feat(awooop): show callback gap freshness

2026-05-25 21:20:10 +08:00
parent 101b08946a
commit dcde86c7f9
7 changed files with 236 additions and 17 deletions
--- a/apps/api/src/api/v1/platform/operator_runs.py
+++ b/apps/api/src/api/v1/platform/operator_runs.py
@@ -120,7 +120,13 @@ class CallbackReplyAuditSummary(BaseModel):
    outbound_incident_ref_total: int
    outbound_reply_markup_total: int = 0
    outbound_reply_markup_missing_incident_ref_total: int = 0
+    outbound_reply_markup_missing_incident_ref_recent_1h_total: int = 0
+    outbound_reply_markup_missing_incident_ref_recent_24h_total: int = 0
+    outbound_reply_markup_missing_incident_ref_latest_sent_at: datetime | None = None
    outbound_reply_markup_missing_trace_ref_total: int = 0
+    outbound_reply_markup_missing_trace_ref_recent_1h_total: int = 0
+    outbound_reply_markup_missing_trace_ref_recent_24h_total: int = 0
+    outbound_reply_markup_missing_trace_ref_latest_sent_at: datetime | None = None
    outbound_reply_markup_missing_incident_ref_top_prefixes: list[
        OutboundReplyMarkupGapPrefix
    ] = Field(default_factory=list)
--- a/apps/api/src/services/platform_operator_service.py
+++ b/apps/api/src/services/platform_operator_service.py
@@ -527,10 +527,47 @@ async def _fetch_callback_reply_audit_summary(
                        '[]'::jsonb
                      ) = '[]'::jsonb
                ) AS outbound_reply_markup_missing_incident_ref_total,
+                COUNT(*) FILTER (
+                    WHERE source_envelope #>> '{reply_markup,present}' = 'true'
+                      AND COALESCE(
+                        source_envelope #> '{source_refs,incident_ids}',
+                        '[]'::jsonb
+                      ) = '[]'::jsonb
+                      AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '1 hour'
+                ) AS outbound_reply_markup_missing_incident_ref_recent_1h_total,
+                COUNT(*) FILTER (
+                    WHERE source_envelope #>> '{reply_markup,present}' = 'true'
+                      AND COALESCE(
+                        source_envelope #> '{source_refs,incident_ids}',
+                        '[]'::jsonb
+                      ) = '[]'::jsonb
+                      AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '24 hours'
+                ) AS outbound_reply_markup_missing_incident_ref_recent_24h_total,
+                MAX(COALESCE(sent_at, queued_at)) FILTER (
+                    WHERE source_envelope #>> '{reply_markup,present}' = 'true'
+                      AND COALESCE(
+                        source_envelope #> '{source_refs,incident_ids}',
+                        '[]'::jsonb
+                      ) = '[]'::jsonb
+                ) AS outbound_reply_markup_missing_incident_ref_latest_sent_at,
                COUNT(*) FILTER (
                    WHERE source_envelope #>> '{reply_markup,present}' = 'true'
                      AND NOT has_trace_ref
                ) AS outbound_reply_markup_missing_trace_ref_total,
+                COUNT(*) FILTER (
+                    WHERE source_envelope #>> '{reply_markup,present}' = 'true'
+                      AND NOT has_trace_ref
+                      AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '1 hour'
+                ) AS outbound_reply_markup_missing_trace_ref_recent_1h_total,
+                COUNT(*) FILTER (
+                    WHERE source_envelope #>> '{reply_markup,present}' = 'true'
+                      AND NOT has_trace_ref
+                      AND COALESCE(sent_at, queued_at) >= NOW() - INTERVAL '24 hours'
+                ) AS outbound_reply_markup_missing_trace_ref_recent_24h_total,
+                MAX(COALESCE(sent_at, queued_at)) FILTER (
+                    WHERE source_envelope #>> '{reply_markup,present}' = 'true'
+                      AND NOT has_trace_ref
+                ) AS outbound_reply_markup_missing_trace_ref_latest_sent_at,
                COALESCE((
                    SELECT jsonb_agg(
                        jsonb_build_object(
@@ -739,9 +776,27 @@ def _callback_reply_audit_summary_from_row(
        "outbound_reply_markup_missing_incident_ref_total": _safe_int(
            row.get("outbound_reply_markup_missing_incident_ref_total")
        ),
+        "outbound_reply_markup_missing_incident_ref_recent_1h_total": _safe_int(
+            row.get("outbound_reply_markup_missing_incident_ref_recent_1h_total")
+        ),
+        "outbound_reply_markup_missing_incident_ref_recent_24h_total": _safe_int(
+            row.get("outbound_reply_markup_missing_incident_ref_recent_24h_total")
+        ),
+        "outbound_reply_markup_missing_incident_ref_latest_sent_at": row.get(
+            "outbound_reply_markup_missing_incident_ref_latest_sent_at"
+        ),
        "outbound_reply_markup_missing_trace_ref_total": _safe_int(
            row.get("outbound_reply_markup_missing_trace_ref_total")
        ),
+        "outbound_reply_markup_missing_trace_ref_recent_1h_total": _safe_int(
+            row.get("outbound_reply_markup_missing_trace_ref_recent_1h_total")
+        ),
+        "outbound_reply_markup_missing_trace_ref_recent_24h_total": _safe_int(
+            row.get("outbound_reply_markup_missing_trace_ref_recent_24h_total")
+        ),
+        "outbound_reply_markup_missing_trace_ref_latest_sent_at": row.get(
+            "outbound_reply_markup_missing_trace_ref_latest_sent_at"
+        ),
        "outbound_reply_markup_missing_incident_ref_top_prefixes": (
            top_missing_prefixes
        ),
--- a/apps/api/tests/test_awooop_operator_timeline_labels.py
+++ b/apps/api/tests/test_awooop_operator_timeline_labels.py
@@ -670,7 +670,17 @@ def test_list_callback_replies_response_preserves_callback_evidence() -> None:
            "outbound_incident_ref_total": 80,
            "outbound_reply_markup_total": 30,
            "outbound_reply_markup_missing_incident_ref_total": 4,
+            "outbound_reply_markup_missing_incident_ref_recent_1h_total": 1,
+            "outbound_reply_markup_missing_incident_ref_recent_24h_total": 2,
+            "outbound_reply_markup_missing_incident_ref_latest_sent_at": (
+                datetime(2026, 5, 18, 8, 15, 0)
+            ),
            "outbound_reply_markup_missing_trace_ref_total": 2,
+            "outbound_reply_markup_missing_trace_ref_recent_1h_total": 0,
+            "outbound_reply_markup_missing_trace_ref_recent_24h_total": 1,
+            "outbound_reply_markup_missing_trace_ref_latest_sent_at": (
+                datetime(2026, 5, 18, 7, 40, 0)
+            ),
            "outbound_reply_markup_missing_incident_ref_top_prefixes": [
                {
                    "prefix": "silence",
@@ -732,7 +742,19 @@ def test_list_callback_replies_response_preserves_callback_evidence() -> None:
    assert dumped["summary"]["outbound_trace_ref_total"] == 92
    assert dumped["summary"]["outbound_reply_markup_total"] == 30
    assert dumped["summary"]["outbound_reply_markup_missing_incident_ref_total"] == 4
+    assert dumped["summary"][
+        "outbound_reply_markup_missing_incident_ref_recent_1h_total"
+    ] == 1
+    assert dumped["summary"][
+        "outbound_reply_markup_missing_incident_ref_latest_sent_at"
+    ] == "2026-05-18T08:15:00"
    assert dumped["summary"]["outbound_reply_markup_missing_trace_ref_total"] == 2
+    assert dumped["summary"][
+        "outbound_reply_markup_missing_trace_ref_recent_24h_total"
+    ] == 1
+    assert dumped["summary"][
+        "outbound_reply_markup_missing_trace_ref_latest_sent_at"
+    ] == "2026-05-18T07:40:00"
    assert dumped["summary"][
        "outbound_reply_markup_missing_incident_ref_top_prefixes"
    ][0] == {
@@ -774,7 +796,17 @@ def test_callback_reply_audit_summary_marks_missing_snapshots() -> None:
            "outbound_incident_ref_total": 3200,
            "outbound_reply_markup_total": 100,
            "outbound_reply_markup_missing_incident_ref_total": 12,
+            "outbound_reply_markup_missing_incident_ref_recent_1h_total": 2,
+            "outbound_reply_markup_missing_incident_ref_recent_24h_total": 3,
+            "outbound_reply_markup_missing_incident_ref_latest_sent_at": (
+                datetime(2026, 5, 25, 8, 42, 22)
+            ),
            "outbound_reply_markup_missing_trace_ref_total": 5,
+            "outbound_reply_markup_missing_trace_ref_recent_1h_total": 1,
+            "outbound_reply_markup_missing_trace_ref_recent_24h_total": 2,
+            "outbound_reply_markup_missing_trace_ref_latest_sent_at": (
+                datetime(2026, 5, 25, 8, 42, 22)
+            ),
            "outbound_reply_markup_missing_incident_ref_top_prefixes": [
                {
                    "prefix": "silence",
@@ -819,6 +851,14 @@ def test_callback_reply_audit_summary_marks_missing_snapshots() -> None:
    assert summary["outbound_total"] == 5256
    assert summary["outbound_trace_ref_total"] == 4300
    assert summary["outbound_reply_markup_missing_trace_ref_total"] == 5
+    assert summary["outbound_reply_markup_missing_trace_ref_recent_1h_total"] == 1
+    assert summary["outbound_reply_markup_missing_trace_ref_recent_24h_total"] == 2
+    assert summary["outbound_reply_markup_missing_trace_ref_latest_sent_at"] == (
+        datetime(2026, 5, 25, 8, 42, 22)
+    )
+    assert summary[
+        "outbound_reply_markup_missing_incident_ref_recent_24h_total"
+    ] == 3
    assert summary["outbound_reply_markup_missing_trace_ref_top_prefixes"][0][
        "prefix"
    ] == "unknown"
@@ -844,7 +884,17 @@ def test_callback_reply_audit_summary_marks_mixed_legacy_snapshots_partial() ->
            "outbound_incident_ref_total": 920,
            "outbound_reply_markup_total": 1322,
            "outbound_reply_markup_missing_incident_ref_total": 684,
+            "outbound_reply_markup_missing_incident_ref_recent_1h_total": 0,
+            "outbound_reply_markup_missing_incident_ref_recent_24h_total": 0,
+            "outbound_reply_markup_missing_incident_ref_latest_sent_at": (
+                datetime(2026, 5, 25, 10, 59, 49)
+            ),
            "outbound_reply_markup_missing_trace_ref_total": 154,
+            "outbound_reply_markup_missing_trace_ref_recent_1h_total": 0,
+            "outbound_reply_markup_missing_trace_ref_recent_24h_total": 23,
+            "outbound_reply_markup_missing_trace_ref_latest_sent_at": (
+                datetime(2026, 5, 25, 12, 13, 1)
+            ),
            "outbound_reply_markup_missing_incident_ref_top_prefixes": [
                {
                    "prefix": "silence",
@@ -894,6 +944,11 @@ def test_callback_reply_audit_summary_marks_mixed_legacy_snapshots_partial() ->
    assert summary["callback_snapshot_captured_total"] == 1
    assert summary["outbound_trace_ref_total"] == 4230
    assert summary["outbound_reply_markup_missing_trace_ref_total"] == 154
+    assert summary["outbound_reply_markup_missing_trace_ref_recent_1h_total"] == 0
+    assert summary["outbound_reply_markup_missing_trace_ref_recent_24h_total"] == 23
+    assert summary["outbound_reply_markup_missing_trace_ref_latest_sent_at"] == (
+        datetime(2026, 5, 25, 12, 13, 1)
+    )
    assert summary["outbound_reply_markup_missing_trace_ref_top_prefixes"][0][
        "recent_24h_total"
    ] == 23
--- a/apps/web/messages/en.json
+++ b/apps/web/messages/en.json
@@ -3060,6 +3060,8 @@
          "outbound": "Outbound mirror",
          "outboundDetail": "source_refs {sourceRefs}; trace refs {traceRefs}; incident refs {incidentRefs}; coverage {coverage}",
          "outboundReplyMarkupDetail": "reply_markup {replyMarkup}; missing trace refs {missingTraceRefs}; missing incident refs {missingIncidentRefs}",
+          "outboundReplyMarkupTraceFreshness": "Missing trace activity: 1h {recent1h} / 24h {recent24h} / latest {latest}",
+          "outboundReplyMarkupIncidentFreshness": "Missing incident activity: 1h {recent1h} / 24h {recent24h} / latest {latest}",
          "outboundReplyMarkupTopPrefixes": "Missing incident top prefixes: {prefixes}",
          "outboundReplyMarkupTraceTopPrefixes": "Missing trace top prefixes: {prefixes}",
          "outboundReplyMarkupTopPrefixItem": "{prefix} {total} (24h {recent}, last {last})",
--- a/apps/web/messages/zh-TW.json
+++ b/apps/web/messages/zh-TW.json
@@ -3061,6 +3061,8 @@
          "outbound": "出站鏡像",
          "outboundDetail": "source_refs {sourceRefs}；trace refs {traceRefs}；incident refs {incidentRefs}；覆蓋 {coverage}",
          "outboundReplyMarkupDetail": "reply_markup {replyMarkup}；缺 trace refs {missingTraceRefs}；缺 incident refs {missingIncidentRefs}",
+          "outboundReplyMarkupTraceFreshness": "缺 trace 活躍度：1h {recent1h} / 24h {recent24h} / 最新 {latest}",
+          "outboundReplyMarkupIncidentFreshness": "缺 incident 活躍度：1h {recent1h} / 24h {recent24h} / 最新 {latest}",
          "outboundReplyMarkupTopPrefixes": "缺 incident top prefixes：{prefixes}",
          "outboundReplyMarkupTraceTopPrefixes": "缺 trace top prefixes：{prefixes}",
          "outboundReplyMarkupTopPrefixItem": "{prefix} {total}（24h {recent}，最後 {last}）",
--- a/apps/web/src/app/[locale]/awooop/runs/page.tsx
+++ b/apps/web/src/app/[locale]/awooop/runs/page.tsx
@@ -153,7 +153,13 @@ interface CallbackReplyAuditSummary {
  outbound_incident_ref_total?: number;
  outbound_reply_markup_total?: number;
  outbound_reply_markup_missing_incident_ref_total?: number;
+  outbound_reply_markup_missing_incident_ref_recent_1h_total?: number;
+  outbound_reply_markup_missing_incident_ref_recent_24h_total?: number;
+  outbound_reply_markup_missing_incident_ref_latest_sent_at?: string | null;
  outbound_reply_markup_missing_trace_ref_total?: number;
+  outbound_reply_markup_missing_trace_ref_recent_1h_total?: number;
+  outbound_reply_markup_missing_trace_ref_recent_24h_total?: number;
+  outbound_reply_markup_missing_trace_ref_latest_sent_at?: string | null;
  outbound_reply_markup_missing_incident_ref_top_prefixes?: Array<{
    prefix?: string | null;
    total?: number | null;
@@ -2476,6 +2482,15 @@ function CallbackReplyAuditSummaryPanel({
    summary.outbound_trace_ref_total ?? summary.outbound_incident_ref_total ?? 0,
    outboundTotal
  );
+  const formatShortDateTime = (value?: string | null) =>
+    value
+      ? new Date(value).toLocaleString("zh-TW", {
+          month: "2-digit",
+          day: "2-digit",
+          hour: "2-digit",
+          minute: "2-digit",
+        })
+      : "--";
  const formatPrefixGaps = (
    items?: Array<{
      prefix?: string | null;
@@ -2487,19 +2502,11 @@ function CallbackReplyAuditSummaryPanel({
    items ?? []
  ).slice(0, 3)
    .map((item) => {
-      const lastSeen = item.last_sent_at
-        ? new Date(item.last_sent_at).toLocaleString("zh-TW", {
-            month: "2-digit",
-            day: "2-digit",
-            hour: "2-digit",
-            minute: "2-digit",
-          })
-        : "--";
      return t("outboundReplyMarkupTopPrefixItem", {
        prefix: item.prefix || "--",
        total: item.total ?? 0,
        recent: item.recent_24h_total ?? 0,
-        last: lastSeen,
+        last: formatShortDateTime(item.last_sent_at),
      });
    })
    .join(" / ") || "--";
@@ -2513,14 +2520,13 @@ function CallbackReplyAuditSummaryPanel({
    summary.callback_snapshot_captured_total ?? 0,
    callbackTotal
  );
-  const latestCallback = summary.latest_callback_at
-    ? new Date(summary.latest_callback_at).toLocaleString("zh-TW", {
-        month: "2-digit",
-        day: "2-digit",
-        hour: "2-digit",
-        minute: "2-digit",
-      })
-    : "--";
+  const latestCallback = formatShortDateTime(summary.latest_callback_at);
+  const latestMissingTraceRef = formatShortDateTime(
+    summary.outbound_reply_markup_missing_trace_ref_latest_sent_at
+  );
+  const latestMissingIncidentRef = formatShortDateTime(
+    summary.outbound_reply_markup_missing_incident_ref_latest_sent_at
+  );
  const snapshotClass = {
    captured: "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]",
    partial: "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]",
@@ -2556,6 +2562,30 @@ function CallbackReplyAuditSummaryPanel({
              summary.outbound_reply_markup_missing_trace_ref_total ?? 0,
          })}
        </p>
+        <p className="mt-1 text-xs leading-5 text-[#5f5b52]">
+          {t("outboundReplyMarkupTraceFreshness", {
+            recent1h:
+              summary.outbound_reply_markup_missing_trace_ref_recent_1h_total ??
+              0,
+            recent24h:
+              summary.outbound_reply_markup_missing_trace_ref_recent_24h_total ??
+              0,
+            latest: latestMissingTraceRef,
+          })}
+        </p>
+        <p className="mt-1 text-xs leading-5 text-[#5f5b52]">
+          {t("outboundReplyMarkupIncidentFreshness", {
+            recent1h:
+              summary
+                .outbound_reply_markup_missing_incident_ref_recent_1h_total ??
+              0,
+            recent24h:
+              summary
+                .outbound_reply_markup_missing_incident_ref_recent_24h_total ??
+              0,
+            latest: latestMissingIncidentRef,
+          })}
+        </p>
        <p className="mt-1 text-xs leading-5 text-[#5f5b52]">
          {t("outboundReplyMarkupTraceTopPrefixes", {
            prefixes: topMissingTracePrefixes,
--- a/docs/LOGBOOK.md
+++ b/docs/LOGBOOK.md
@@ -21217,3 +21217,72 @@ GET /api/v1/health
 - KM governance：約 84.6%。
 - AI Provider lane visibility：約 92.2%。
 - 完整 AI 自動化管理產品化：約 97.85%。
+
+### 2026-05-25 — T192 Callback gap freshness（pre-deploy）
+
+**背景**：T191 已把缺 trace refs 與缺 incident refs 的 top prefixes 分開，但
+operator 仍只看到累積總數，例如 `missing_trace_ref_total = 417`，無法立即判斷
+它是歷史債還是 deploy 後仍在新增。T192 增加 freshness metrics，讓前端同時顯示
+1h / 24h / latest sent time。
+
+**完成變更**：
+
+- `/api/v1/platform/runs/callback-replies` summary 新增：
+  - `outbound_reply_markup_missing_trace_ref_recent_1h_total`
+  - `outbound_reply_markup_missing_trace_ref_recent_24h_total`
+  - `outbound_reply_markup_missing_trace_ref_latest_sent_at`
+  - `outbound_reply_markup_missing_incident_ref_recent_1h_total`
+  - `outbound_reply_markup_missing_incident_ref_recent_24h_total`
+  - `outbound_reply_markup_missing_incident_ref_latest_sent_at`
+- AwoooP Runs / TG Callback Evidence 前端新增：
+  - 缺 trace 活躍度：1h / 24h / 最新
+  - 缺 incident 活躍度：1h / 24h / 最新
+- 測試補上 Pydantic response serialization 與 service row mapping。
+
+**local validation（完成）**：
+
+```text
+python3 -m py_compile apps/api/src/services/platform_operator_service.py apps/api/src/api/v1/platform/operator_runs.py apps/api/tests/test_awooop_operator_timeline_labels.py
+jq empty apps/web/messages/zh-TW.json apps/web/messages/en.json
+PYTHONPATH=. DATABASE_URL='postgresql+asyncpg://test:test@localhost/test' /Users/ogt/.pyenv/shims/pytest apps/api/tests/test_awooop_operator_timeline_labels.py -q
+  53 passed in 0.88s
+pnpm --dir apps/web exec tsc --noEmit --tsBuildInfoFile /tmp/awoooi-t192-tsconfig.tsbuildinfo
+  pass
+pnpm --dir apps/web lint -- --file 'src/app/[locale]/awooop/runs/page.tsx'
+  pass with pre-existing i18next/no-literal-string and unused icon warnings
+NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web run build
+  pass; Sentry global-error / instrumentation-client warnings are pre-existing
+git diff --check
+  pass
+```
+
+**production SQL dry-run（完成，read-only）**：
+
+> 注意：`awooop_outbound_message` 有 RLS。直接用 `DATABASE_URL` 查詢時若未先
+> `set_config('app.project_id', 'awoooi', true)`，會正確隔離成 0 筆。API path
+> 經由 `get_db_context()` 會自動設定 tenant context。
+
+```text
+missing_trace_total = 417
+missing_trace_recent_1h_total = 0
+missing_trace_recent_24h_total = 43
+missing_trace_latest_sent_at = 2026-05-25 12:13:01.534615
+missing_incident_total = 684
+missing_incident_recent_1h_total = 0
+missing_incident_recent_24h_total = 43
+missing_incident_latest_sent_at = 2026-05-25 12:13:01.534615
+```
+
+**目前整體進度（pre-deploy）**：
+
+- AwoooP 告警可觀測鏈：約 99.78%。
+- 低風險自動修復閉環：約 95.8%。
+- 前端 AI 自動化管理介面同步：約 99.65%。
+- Telegram outbound / callback DB coverage 可視化：約 99.75%。
+- callback / DB replayability：約 98.75%。
+- MCP / 自建 MCP 可視化：約 95.1%。
+- Sentry / SigNoz source correlation：約 94.5%。
+- Ansible / PlayBook 可視化：約 92.6%。
+- KM governance：約 84.6%。
+- AI Provider lane visibility：約 92.2%。
+- 完整 AI 自動化管理產品化：約 97.9%。