feat(awooop): classify callback trace gaps

2026-05-25 21:32:37 +08:00
parent f52fdebe0a
commit 32e172ed8b
7 changed files with 159 additions and 7 deletions
--- a/apps/api/src/api/v1/platform/operator_runs.py
+++ b/apps/api/src/api/v1/platform/operator_runs.py
@@ -127,6 +127,8 @@ class CallbackReplyAuditSummary(BaseModel):
    outbound_reply_markup_missing_trace_ref_recent_1h_total: int = 0
    outbound_reply_markup_missing_trace_ref_recent_24h_total: int = 0
    outbound_reply_markup_missing_trace_ref_latest_sent_at: datetime | None = None
+    outbound_reply_markup_trace_ref_gap_status: str = "clean"
+    outbound_reply_markup_trace_ref_gap_next_action: str = "none"
    outbound_reply_markup_missing_incident_ref_top_prefixes: list[
        OutboundReplyMarkupGapPrefix
    ] = Field(default_factory=list)
--- a/apps/api/src/services/platform_operator_service.py
+++ b/apps/api/src/services/platform_operator_service.py
@@ -738,6 +738,20 @@ def _callback_reply_audit_summary_from_row(
    top_missing_trace_prefixes = _reply_markup_gap_prefixes_from_value(
        row.get("outbound_reply_markup_missing_trace_ref_top_prefixes")
    )
+    missing_trace_total = _safe_int(
+        row.get("outbound_reply_markup_missing_trace_ref_total")
+    )
+    missing_trace_recent_1h = _safe_int(
+        row.get("outbound_reply_markup_missing_trace_ref_recent_1h_total")
+    )
+    missing_trace_recent_24h = _safe_int(
+        row.get("outbound_reply_markup_missing_trace_ref_recent_24h_total")
+    )
+    trace_gap_status, trace_gap_next_action = _trace_ref_gap_decision(
+        total=missing_trace_total,
+        recent_1h=missing_trace_recent_1h,
+        recent_24h=missing_trace_recent_24h,
+    )

    if callback_total <= 0:
        snapshot_status = "no_callback"
@@ -785,18 +799,18 @@ def _callback_reply_audit_summary_from_row(
        "outbound_reply_markup_missing_incident_ref_latest_sent_at": row.get(
            "outbound_reply_markup_missing_incident_ref_latest_sent_at"
        ),
-        "outbound_reply_markup_missing_trace_ref_total": _safe_int(
-            row.get("outbound_reply_markup_missing_trace_ref_total")
+        "outbound_reply_markup_missing_trace_ref_total": missing_trace_total,
+        "outbound_reply_markup_missing_trace_ref_recent_1h_total": (
+            missing_trace_recent_1h
        ),
-        "outbound_reply_markup_missing_trace_ref_recent_1h_total": _safe_int(
-            row.get("outbound_reply_markup_missing_trace_ref_recent_1h_total")
-        ),
-        "outbound_reply_markup_missing_trace_ref_recent_24h_total": _safe_int(
-            row.get("outbound_reply_markup_missing_trace_ref_recent_24h_total")
+        "outbound_reply_markup_missing_trace_ref_recent_24h_total": (
+            missing_trace_recent_24h
        ),
        "outbound_reply_markup_missing_trace_ref_latest_sent_at": row.get(
            "outbound_reply_markup_missing_trace_ref_latest_sent_at"
        ),
+        "outbound_reply_markup_trace_ref_gap_status": trace_gap_status,
+        "outbound_reply_markup_trace_ref_gap_next_action": trace_gap_next_action,
        "outbound_reply_markup_missing_incident_ref_top_prefixes": (
            top_missing_prefixes
        ),
@@ -822,6 +836,22 @@ def _callback_reply_audit_summary_from_row(
    }


+def _trace_ref_gap_decision(
+    *,
+    total: int,
+    recent_1h: int,
+    recent_24h: int,
+) -> tuple[str, str]:
+    """Classify reply_markup messages without any source_refs into operator actions."""
+    if total <= 0:
+        return "clean", "none"
+    if recent_1h > 0:
+        return "active_gap", "inspect_recent_outbound_source_refs"
+    if recent_24h > 0:
+        return "recent_backlog", "watch_24h_decay"
+    return "legacy_backlog", "backfill_or_archive_legacy_callbacks"
+
+
 def _reply_markup_gap_prefixes_from_value(value: Any) -> list[dict[str, Any]]:
    if not isinstance(value, list):
        return []
--- a/apps/api/tests/test_awooop_operator_timeline_labels.py
+++ b/apps/api/tests/test_awooop_operator_timeline_labels.py
@@ -681,6 +681,8 @@ def test_list_callback_replies_response_preserves_callback_evidence() -> None:
            "outbound_reply_markup_missing_trace_ref_latest_sent_at": (
                datetime(2026, 5, 18, 7, 40, 0)
            ),
+            "outbound_reply_markup_trace_ref_gap_status": "recent_backlog",
+            "outbound_reply_markup_trace_ref_gap_next_action": "watch_24h_decay",
            "outbound_reply_markup_missing_incident_ref_top_prefixes": [
                {
                    "prefix": "silence",
@@ -755,6 +757,12 @@ def test_list_callback_replies_response_preserves_callback_evidence() -> None:
    assert dumped["summary"][
        "outbound_reply_markup_missing_trace_ref_latest_sent_at"
    ] == "2026-05-18T07:40:00"
+    assert dumped["summary"]["outbound_reply_markup_trace_ref_gap_status"] == (
+        "recent_backlog"
+    )
+    assert dumped["summary"]["outbound_reply_markup_trace_ref_gap_next_action"] == (
+        "watch_24h_decay"
+    )
    assert dumped["summary"][
        "outbound_reply_markup_missing_incident_ref_top_prefixes"
    ][0] == {
@@ -856,6 +864,10 @@ def test_callback_reply_audit_summary_marks_missing_snapshots() -> None:
    assert summary["outbound_reply_markup_missing_trace_ref_latest_sent_at"] == (
        datetime(2026, 5, 25, 8, 42, 22)
    )
+    assert summary["outbound_reply_markup_trace_ref_gap_status"] == "active_gap"
+    assert summary["outbound_reply_markup_trace_ref_gap_next_action"] == (
+        "inspect_recent_outbound_source_refs"
+    )
    assert summary[
        "outbound_reply_markup_missing_incident_ref_recent_24h_total"
    ] == 3
@@ -949,6 +961,10 @@ def test_callback_reply_audit_summary_marks_mixed_legacy_snapshots_partial() ->
    assert summary["outbound_reply_markup_missing_trace_ref_latest_sent_at"] == (
        datetime(2026, 5, 25, 12, 13, 1)
    )
+    assert summary["outbound_reply_markup_trace_ref_gap_status"] == "recent_backlog"
+    assert summary["outbound_reply_markup_trace_ref_gap_next_action"] == (
+        "watch_24h_decay"
+    )
    assert summary["outbound_reply_markup_missing_trace_ref_top_prefixes"][0][
        "recent_24h_total"
    ] == 23
--- a/apps/web/messages/en.json
+++ b/apps/web/messages/en.json
@@ -3061,6 +3061,7 @@
          "outboundDetail": "source_refs {sourceRefs}; trace refs {traceRefs}; incident refs {incidentRefs}; coverage {coverage}",
          "outboundReplyMarkupDetail": "reply_markup {replyMarkup}; missing trace refs {missingTraceRefs}; missing incident refs {missingIncidentRefs}",
          "outboundReplyMarkupTraceFreshness": "Missing trace activity: 1h {recent1h} / 24h {recent24h} / latest {latest}",
+          "outboundReplyMarkupTraceDecision": "Trace gap decision: {status}; next: {action}",
          "outboundReplyMarkupIncidentFreshness": "Missing incident activity: 1h {recent1h} / 24h {recent24h} / latest {latest}",
          "outboundReplyMarkupTopPrefixes": "Missing incident top prefixes: {prefixes}",
          "outboundReplyMarkupTraceTopPrefixes": "Missing trace top prefixes: {prefixes}",
@@ -3080,6 +3081,20 @@
            "no_callback": "No callback yet",
            "observed": "Recorded"
          },
+          "traceGapStatuses": {
+            "clean": "Clean",
+            "active_gap": "Active gap",
+            "recent_backlog": "Recent backlog",
+            "legacy_backlog": "Legacy backlog",
+            "observed": "Recorded"
+          },
+          "traceGapNextActions": {
+            "none": "No follow-up needed",
+            "inspect_recent_outbound_source_refs": "Inspect outbound source_refs from the last hour",
+            "watch_24h_decay": "Watch the 24h window decay to zero",
+            "backfill_or_archive_legacy_callbacks": "Backfill or archive legacy callback gaps",
+            "observed": "Wait for the next outbound evidence"
+          },
          "nextActions": {
            "none": "No follow-up needed",
            "press_telegram_detail_or_history": "Press Telegram Detail / History once to create callback evidence",
--- a/apps/web/messages/zh-TW.json
+++ b/apps/web/messages/zh-TW.json
@@ -3062,6 +3062,7 @@
          "outboundDetail": "source_refs {sourceRefs}；trace refs {traceRefs}；incident refs {incidentRefs}；覆蓋 {coverage}",
          "outboundReplyMarkupDetail": "reply_markup {replyMarkup}；缺 trace refs {missingTraceRefs}；缺 incident refs {missingIncidentRefs}",
          "outboundReplyMarkupTraceFreshness": "缺 trace 活躍度：1h {recent1h} / 24h {recent24h} / 最新 {latest}",
+          "outboundReplyMarkupTraceDecision": "缺 trace 判讀：{status}；下一步：{action}",
          "outboundReplyMarkupIncidentFreshness": "缺 incident 活躍度：1h {recent1h} / 24h {recent24h} / 最新 {latest}",
          "outboundReplyMarkupTopPrefixes": "缺 incident top prefixes：{prefixes}",
          "outboundReplyMarkupTraceTopPrefixes": "缺 trace top prefixes：{prefixes}",
@@ -3081,6 +3082,20 @@
            "no_callback": "尚無 callback",
            "observed": "已記錄"
          },
+          "traceGapStatuses": {
+            "clean": "乾淨",
+            "active_gap": "新缺口",
+            "recent_backlog": "近期歷史債",
+            "legacy_backlog": "歷史待清",
+            "observed": "已記錄"
+          },
+          "traceGapNextActions": {
+            "none": "不需補動作",
+            "inspect_recent_outbound_source_refs": "檢查近 1 小時 outbound source_refs",
+            "watch_24h_decay": "觀察 24 小時窗口自然歸零",
+            "backfill_or_archive_legacy_callbacks": "歸檔或回補舊 callback 缺口",
+            "observed": "等待下一次 outbound evidence"
+          },
          "nextActions": {
            "none": "不需補動作",
            "press_telegram_detail_or_history": "按一次 Telegram 詳情 / 歷史產生 callback evidence",
--- a/apps/web/src/app/[locale]/awooop/runs/page.tsx
+++ b/apps/web/src/app/[locale]/awooop/runs/page.tsx
@@ -160,6 +160,8 @@ interface CallbackReplyAuditSummary {
  outbound_reply_markup_missing_trace_ref_recent_1h_total?: number;
  outbound_reply_markup_missing_trace_ref_recent_24h_total?: number;
  outbound_reply_markup_missing_trace_ref_latest_sent_at?: string | null;
+  outbound_reply_markup_trace_ref_gap_status?: string | null;
+  outbound_reply_markup_trace_ref_gap_next_action?: string | null;
  outbound_reply_markup_missing_incident_ref_top_prefixes?: Array<{
    prefix?: string | null;
    total?: number | null;
@@ -2527,6 +2529,22 @@ function CallbackReplyAuditSummaryPanel({
  const latestMissingIncidentRef = formatShortDateTime(
    summary.outbound_reply_markup_missing_incident_ref_latest_sent_at
  );
+  const traceGapStatusRaw =
+    summary.outbound_reply_markup_trace_ref_gap_status ?? "observed";
+  const traceGapStatusKey = (
+    traceGapStatusRaw === "clean" ||
+    traceGapStatusRaw === "active_gap" ||
+    traceGapStatusRaw === "recent_backlog" ||
+    traceGapStatusRaw === "legacy_backlog"
+  ) ? traceGapStatusRaw : "observed";
+  const traceGapNextActionRaw =
+    summary.outbound_reply_markup_trace_ref_gap_next_action ?? "observed";
+  const traceGapNextActionKey = (
+    traceGapNextActionRaw === "none" ||
+    traceGapNextActionRaw === "inspect_recent_outbound_source_refs" ||
+    traceGapNextActionRaw === "watch_24h_decay" ||
+    traceGapNextActionRaw === "backfill_or_archive_legacy_callbacks"
+  ) ? traceGapNextActionRaw : "observed";
  const snapshotClass = {
    captured: "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]",
    partial: "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]",
@@ -2573,6 +2591,12 @@ function CallbackReplyAuditSummaryPanel({
            latest: latestMissingTraceRef,
          })}
        </p>
+        <p className="mt-1 text-xs font-semibold leading-5 text-[#141413]">
+          {t("outboundReplyMarkupTraceDecision", {
+            status: t(`traceGapStatuses.${traceGapStatusKey}` as never),
+            action: t(`traceGapNextActions.${traceGapNextActionKey}` as never),
+          })}
+        </p>
        <p className="mt-1 text-xs leading-5 text-[#5f5b52]">
          {t("outboundReplyMarkupIncidentFreshness", {
            recent1h:
--- a/docs/LOGBOOK.md
+++ b/docs/LOGBOOK.md
@@ -21361,3 +21361,53 @@ GET /api/v1/health
 - KM governance：約 84.6%。
 - AI Provider lane visibility：約 92.2%。
 - 完整 AI 自動化管理產品化：約 97.95%。
+
+### 2026-05-25 — T193 Callback trace gap decision（pre-deploy）
+
+**背景**：T192 已把 callback trace gap 的 freshness 顯示到前端，但值班者仍需要自行
+把 `total / 1h / 24h / latest` 解讀成下一步。T193 把這段判讀搬進 API summary，
+前端直接顯示「缺 trace 判讀」與「下一步」。
+
+**完成變更**：
+
+- `/api/v1/platform/runs/callback-replies` summary 新增：
+  - `outbound_reply_markup_trace_ref_gap_status`
+  - `outbound_reply_markup_trace_ref_gap_next_action`
+- 判讀規則：
+  - `clean`：缺 trace refs 總數為 0。
+  - `active_gap`：近 1 小時仍新增缺 trace refs。
+  - `recent_backlog`：近 1 小時沒有新增，但 24 小時窗口內仍有歷史缺口。
+  - `legacy_backlog`：只剩 24 小時外的舊缺口。
+- AwoooP Runs / TG Callback Evidence 前端新增：
+  - `缺 trace 判讀：{status}；下一步：{action}`
+
+**local validation（完成）**：
+
+```text
+python3 -m py_compile apps/api/src/services/platform_operator_service.py apps/api/src/api/v1/platform/operator_runs.py apps/api/tests/test_awooop_operator_timeline_labels.py
+jq empty apps/web/messages/zh-TW.json apps/web/messages/en.json
+PYTHONPATH=. DATABASE_URL='postgresql+asyncpg://test:test@localhost/test' /Users/ogt/.pyenv/shims/pytest apps/api/tests/test_awooop_operator_timeline_labels.py -q
+  53 passed in 1.12s
+pnpm --dir apps/web exec tsc --noEmit --tsBuildInfoFile /tmp/awoooi-t193-tsconfig.tsbuildinfo
+  pass
+pnpm --dir apps/web lint -- --file 'src/app/[locale]/awooop/runs/page.tsx'
+  pass with pre-existing i18next/no-literal-string and unused icon warnings
+NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web run build
+  pass; Sentry global-error / instrumentation-client warnings are pre-existing
+git diff --check
+  pass
+```
+
+**目前整體進度（pre-deploy）**：
+
+- AwoooP 告警可觀測鏈：約 99.82%。
+- 低風險自動修復閉環：約 95.85%。
+- 前端 AI 自動化管理介面同步：約 99.72%。
+- Telegram outbound / callback DB coverage 可視化：約 99.82%。
+- callback / DB replayability：約 98.9%。
+- MCP / 自建 MCP 可視化：約 95.1%。
+- Sentry / SigNoz source correlation：約 94.5%。
+- Ansible / PlayBook 可視化：約 92.6%。
+- KM governance：約 84.6%。
+- AI Provider lane visibility：約 92.2%。
+- 完整 AI 自動化管理產品化：約 98.0%。