diff --git a/apps/api/src/api/v1/platform/operator_runs.py b/apps/api/src/api/v1/platform/operator_runs.py index bc28c032..86892c54 100644 --- a/apps/api/src/api/v1/platform/operator_runs.py +++ b/apps/api/src/api/v1/platform/operator_runs.py @@ -129,6 +129,10 @@ class CallbackReplyAuditSummary(BaseModel): outbound_reply_markup_missing_trace_ref_latest_sent_at: datetime | None = None outbound_reply_markup_trace_ref_gap_status: str = "clean" outbound_reply_markup_trace_ref_gap_next_action: str = "none" + outbound_reply_markup_trace_ref_after_gap_total: int = 0 + outbound_reply_markup_trace_ref_after_gap_first_sent_at: datetime | None = None + outbound_reply_markup_trace_ref_after_gap_latest_sent_at: datetime | None = None + outbound_reply_markup_trace_ref_gap_recovery_status: str = "not_needed" outbound_reply_markup_missing_incident_ref_top_prefixes: list[ OutboundReplyMarkupGapPrefix ] = Field(default_factory=list) diff --git a/apps/api/src/services/platform_operator_service.py b/apps/api/src/services/platform_operator_service.py index 6f41d442..9b21bc24 100644 --- a/apps/api/src/services/platform_operator_service.py +++ b/apps/api/src/services/platform_operator_service.py @@ -499,6 +499,14 @@ async def _fetch_callback_reply_audit_summary( FROM awooop_outbound_message m WHERE m.project_id = :project_id AND m.channel_type = 'telegram' + ), + trace_gap_cutoff AS ( + SELECT + MAX(COALESCE(sent_at, queued_at)) + AS latest_missing_trace_ref_at + FROM outbound + WHERE source_envelope #>> '{reply_markup,present}' = 'true' + AND NOT has_trace_ref ) SELECT COUNT(*) AS outbound_total, @@ -568,6 +576,30 @@ async def _fetch_callback_reply_audit_summary( WHERE source_envelope #>> '{reply_markup,present}' = 'true' AND NOT has_trace_ref ) AS outbound_reply_markup_missing_trace_ref_latest_sent_at, + COUNT(*) FILTER ( + WHERE source_envelope #>> '{reply_markup,present}' = 'true' + AND has_trace_ref + AND trace_gap_cutoff.latest_missing_trace_ref_at + IS NOT NULL + AND COALESCE(sent_at, queued_at) + > trace_gap_cutoff.latest_missing_trace_ref_at + ) AS outbound_reply_markup_trace_ref_after_gap_total, + MIN(COALESCE(sent_at, queued_at)) FILTER ( + WHERE source_envelope #>> '{reply_markup,present}' = 'true' + AND has_trace_ref + AND trace_gap_cutoff.latest_missing_trace_ref_at + IS NOT NULL + AND COALESCE(sent_at, queued_at) + > trace_gap_cutoff.latest_missing_trace_ref_at + ) AS outbound_reply_markup_trace_ref_after_gap_first_sent_at, + MAX(COALESCE(sent_at, queued_at)) FILTER ( + WHERE source_envelope #>> '{reply_markup,present}' = 'true' + AND has_trace_ref + AND trace_gap_cutoff.latest_missing_trace_ref_at + IS NOT NULL + AND COALESCE(sent_at, queued_at) + > trace_gap_cutoff.latest_missing_trace_ref_at + ) AS outbound_reply_markup_trace_ref_after_gap_latest_sent_at, COALESCE(( SELECT jsonb_agg( jsonb_build_object( @@ -711,6 +743,7 @@ async def _fetch_callback_reply_audit_summary( WHERE source_envelope ? 'callback_reply' ) AS latest_callback_at FROM outbound + CROSS JOIN trace_gap_cutoff """), {"project_id": project_id}, ) @@ -752,6 +785,13 @@ def _callback_reply_audit_summary_from_row( recent_1h=missing_trace_recent_1h, recent_24h=missing_trace_recent_24h, ) + trace_ref_after_gap_total = _safe_int( + row.get("outbound_reply_markup_trace_ref_after_gap_total") + ) + trace_gap_recovery_status = _trace_ref_gap_recovery_status( + missing_total=missing_trace_total, + after_gap_total=trace_ref_after_gap_total, + ) if callback_total <= 0: snapshot_status = "no_callback" @@ -811,6 +851,18 @@ def _callback_reply_audit_summary_from_row( ), "outbound_reply_markup_trace_ref_gap_status": trace_gap_status, "outbound_reply_markup_trace_ref_gap_next_action": trace_gap_next_action, + "outbound_reply_markup_trace_ref_after_gap_total": ( + trace_ref_after_gap_total + ), + "outbound_reply_markup_trace_ref_after_gap_first_sent_at": row.get( + "outbound_reply_markup_trace_ref_after_gap_first_sent_at" + ), + "outbound_reply_markup_trace_ref_after_gap_latest_sent_at": row.get( + "outbound_reply_markup_trace_ref_after_gap_latest_sent_at" + ), + "outbound_reply_markup_trace_ref_gap_recovery_status": ( + trace_gap_recovery_status + ), "outbound_reply_markup_missing_incident_ref_top_prefixes": ( top_missing_prefixes ), @@ -852,6 +904,19 @@ def _trace_ref_gap_decision( return "legacy_backlog", "backfill_or_archive_legacy_callbacks" +def _trace_ref_gap_recovery_status( + *, + missing_total: int, + after_gap_total: int, +) -> str: + """Describe whether traced reply_markup messages resumed after the last gap.""" + if missing_total <= 0: + return "not_needed" + if after_gap_total > 0: + return "recovered_after_gap" + return "no_recovery_signal" + + def _reply_markup_gap_prefixes_from_value(value: Any) -> list[dict[str, Any]]: if not isinstance(value, list): return [] diff --git a/apps/api/tests/test_awooop_operator_timeline_labels.py b/apps/api/tests/test_awooop_operator_timeline_labels.py index 6e9a269c..8d60efd1 100644 --- a/apps/api/tests/test_awooop_operator_timeline_labels.py +++ b/apps/api/tests/test_awooop_operator_timeline_labels.py @@ -683,6 +683,16 @@ def test_list_callback_replies_response_preserves_callback_evidence() -> None: ), "outbound_reply_markup_trace_ref_gap_status": "recent_backlog", "outbound_reply_markup_trace_ref_gap_next_action": "watch_24h_decay", + "outbound_reply_markup_trace_ref_after_gap_total": 3, + "outbound_reply_markup_trace_ref_after_gap_first_sent_at": ( + datetime(2026, 5, 18, 8, 20, 0) + ), + "outbound_reply_markup_trace_ref_after_gap_latest_sent_at": ( + datetime(2026, 5, 18, 9, 0, 0) + ), + "outbound_reply_markup_trace_ref_gap_recovery_status": ( + "recovered_after_gap" + ), "outbound_reply_markup_missing_incident_ref_top_prefixes": [ { "prefix": "silence", @@ -763,6 +773,16 @@ def test_list_callback_replies_response_preserves_callback_evidence() -> None: assert dumped["summary"]["outbound_reply_markup_trace_ref_gap_next_action"] == ( "watch_24h_decay" ) + assert dumped["summary"]["outbound_reply_markup_trace_ref_after_gap_total"] == 3 + assert dumped["summary"][ + "outbound_reply_markup_trace_ref_after_gap_first_sent_at" + ] == "2026-05-18T08:20:00" + assert dumped["summary"][ + "outbound_reply_markup_trace_ref_after_gap_latest_sent_at" + ] == "2026-05-18T09:00:00" + assert dumped["summary"][ + "outbound_reply_markup_trace_ref_gap_recovery_status" + ] == "recovered_after_gap" assert dumped["summary"][ "outbound_reply_markup_missing_incident_ref_top_prefixes" ][0] == { @@ -815,6 +835,9 @@ def test_callback_reply_audit_summary_marks_missing_snapshots() -> None: "outbound_reply_markup_missing_trace_ref_latest_sent_at": ( datetime(2026, 5, 25, 8, 42, 22) ), + "outbound_reply_markup_trace_ref_after_gap_total": 0, + "outbound_reply_markup_trace_ref_after_gap_first_sent_at": None, + "outbound_reply_markup_trace_ref_after_gap_latest_sent_at": None, "outbound_reply_markup_missing_incident_ref_top_prefixes": [ { "prefix": "silence", @@ -868,6 +891,10 @@ def test_callback_reply_audit_summary_marks_missing_snapshots() -> None: assert summary["outbound_reply_markup_trace_ref_gap_next_action"] == ( "inspect_recent_outbound_source_refs" ) + assert summary["outbound_reply_markup_trace_ref_after_gap_total"] == 0 + assert summary["outbound_reply_markup_trace_ref_gap_recovery_status"] == ( + "no_recovery_signal" + ) assert summary[ "outbound_reply_markup_missing_incident_ref_recent_24h_total" ] == 3 @@ -907,6 +934,13 @@ def test_callback_reply_audit_summary_marks_mixed_legacy_snapshots_partial() -> "outbound_reply_markup_missing_trace_ref_latest_sent_at": ( datetime(2026, 5, 25, 12, 13, 1) ), + "outbound_reply_markup_trace_ref_after_gap_total": 8, + "outbound_reply_markup_trace_ref_after_gap_first_sent_at": ( + datetime(2026, 5, 25, 12, 20, 0) + ), + "outbound_reply_markup_trace_ref_after_gap_latest_sent_at": ( + datetime(2026, 5, 25, 13, 26, 8) + ), "outbound_reply_markup_missing_incident_ref_top_prefixes": [ { "prefix": "silence", @@ -965,6 +999,13 @@ def test_callback_reply_audit_summary_marks_mixed_legacy_snapshots_partial() -> assert summary["outbound_reply_markup_trace_ref_gap_next_action"] == ( "watch_24h_decay" ) + assert summary["outbound_reply_markup_trace_ref_after_gap_total"] == 8 + assert summary["outbound_reply_markup_trace_ref_after_gap_first_sent_at"] == ( + datetime(2026, 5, 25, 12, 20, 0) + ) + assert summary["outbound_reply_markup_trace_ref_gap_recovery_status"] == ( + "recovered_after_gap" + ) assert summary["outbound_reply_markup_missing_trace_ref_top_prefixes"][0][ "recent_24h_total" ] == 23 diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index f622cd4c..c73502f2 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -3062,6 +3062,7 @@ "outboundReplyMarkupDetail": "reply_markup {replyMarkup}; missing trace refs {missingTraceRefs}; missing incident refs {missingIncidentRefs}", "outboundReplyMarkupTraceFreshness": "Missing trace activity: 1h {recent1h} / 24h {recent24h} / latest {latest}", "outboundReplyMarkupTraceDecision": "Trace gap decision: {status}; next: {action}", + "outboundReplyMarkupTraceRecovery": "Trace gap recovery: {status}; traced after gap {count}; first {first}; latest {latest}", "outboundReplyMarkupIncidentFreshness": "Missing incident activity: 1h {recent1h} / 24h {recent24h} / latest {latest}", "outboundReplyMarkupTopPrefixes": "Missing incident top prefixes: {prefixes}", "outboundReplyMarkupTraceTopPrefixes": "Missing trace top prefixes: {prefixes}", @@ -3095,6 +3096,12 @@ "backfill_or_archive_legacy_callbacks": "Backfill or archive legacy callback gaps", "observed": "Wait for the next outbound evidence" }, + "traceGapRecoveryStatuses": { + "not_needed": "Not needed", + "recovered_after_gap": "Recovered", + "no_recovery_signal": "No recovery signal yet", + "observed": "Recorded" + }, "nextActions": { "none": "No follow-up needed", "press_telegram_detail_or_history": "Press Telegram Detail / History once to create callback evidence", diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index 7afdbda9..b36c52b0 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -3063,6 +3063,7 @@ "outboundReplyMarkupDetail": "reply_markup {replyMarkup};缺 trace refs {missingTraceRefs};缺 incident refs {missingIncidentRefs}", "outboundReplyMarkupTraceFreshness": "缺 trace 活躍度:1h {recent1h} / 24h {recent24h} / 最新 {latest}", "outboundReplyMarkupTraceDecision": "缺 trace 判讀:{status};下一步:{action}", + "outboundReplyMarkupTraceRecovery": "缺 trace 復原訊號:{status};gap 後 traced {count};首筆 {first};最新 {latest}", "outboundReplyMarkupIncidentFreshness": "缺 incident 活躍度:1h {recent1h} / 24h {recent24h} / 最新 {latest}", "outboundReplyMarkupTopPrefixes": "缺 incident top prefixes:{prefixes}", "outboundReplyMarkupTraceTopPrefixes": "缺 trace top prefixes:{prefixes}", @@ -3096,6 +3097,12 @@ "backfill_or_archive_legacy_callbacks": "歸檔或回補舊 callback 缺口", "observed": "等待下一次 outbound evidence" }, + "traceGapRecoveryStatuses": { + "not_needed": "不需要", + "recovered_after_gap": "已復原", + "no_recovery_signal": "尚無復原訊號", + "observed": "已記錄" + }, "nextActions": { "none": "不需補動作", "press_telegram_detail_or_history": "按一次 Telegram 詳情 / 歷史產生 callback evidence", diff --git a/apps/web/src/app/[locale]/awooop/runs/page.tsx b/apps/web/src/app/[locale]/awooop/runs/page.tsx index 6713feaf..5fc86976 100644 --- a/apps/web/src/app/[locale]/awooop/runs/page.tsx +++ b/apps/web/src/app/[locale]/awooop/runs/page.tsx @@ -162,6 +162,10 @@ interface CallbackReplyAuditSummary { outbound_reply_markup_missing_trace_ref_latest_sent_at?: string | null; outbound_reply_markup_trace_ref_gap_status?: string | null; outbound_reply_markup_trace_ref_gap_next_action?: string | null; + outbound_reply_markup_trace_ref_after_gap_total?: number; + outbound_reply_markup_trace_ref_after_gap_first_sent_at?: string | null; + outbound_reply_markup_trace_ref_after_gap_latest_sent_at?: string | null; + outbound_reply_markup_trace_ref_gap_recovery_status?: string | null; outbound_reply_markup_missing_incident_ref_top_prefixes?: Array<{ prefix?: string | null; total?: number | null; @@ -2545,6 +2549,19 @@ function CallbackReplyAuditSummaryPanel({ traceGapNextActionRaw === "watch_24h_decay" || traceGapNextActionRaw === "backfill_or_archive_legacy_callbacks" ) ? traceGapNextActionRaw : "observed"; + const traceGapRecoveryStatusRaw = + summary.outbound_reply_markup_trace_ref_gap_recovery_status ?? "observed"; + const traceGapRecoveryStatusKey = ( + traceGapRecoveryStatusRaw === "not_needed" || + traceGapRecoveryStatusRaw === "recovered_after_gap" || + traceGapRecoveryStatusRaw === "no_recovery_signal" + ) ? traceGapRecoveryStatusRaw : "observed"; + const traceRefAfterGapFirst = formatShortDateTime( + summary.outbound_reply_markup_trace_ref_after_gap_first_sent_at + ); + const traceRefAfterGapLatest = formatShortDateTime( + summary.outbound_reply_markup_trace_ref_after_gap_latest_sent_at + ); const snapshotClass = { captured: "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]", partial: "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]", @@ -2597,6 +2614,16 @@ function CallbackReplyAuditSummaryPanel({ action: t(`traceGapNextActions.${traceGapNextActionKey}` as never), })}

+

+ {t("outboundReplyMarkupTraceRecovery", { + status: t( + `traceGapRecoveryStatuses.${traceGapRecoveryStatusKey}` as never + ), + count: summary.outbound_reply_markup_trace_ref_after_gap_total ?? 0, + first: traceRefAfterGapFirst, + latest: traceRefAfterGapLatest, + })} +

{t("outboundReplyMarkupIncidentFreshness", { recent1h: diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index d27422f0..18ced75b 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -21486,3 +21486,65 @@ GET /api/v1/health - KM governance:約 84.6%。 - AI Provider lane visibility:約 92.2%。 - 完整 AI 自動化管理產品化:約 98.05%。 + +### 2026-05-25 — T194 Callback trace gap recovery signal(pre-deploy) + +**背景**:T193 已能把缺 trace refs 判讀為 `recent_backlog`,但 operator 還需要知道 +「最後一筆缺 trace 之後,新送出的 action cards 是否已經恢復 trace refs」。T194 不 +寫死 deploy cutoff,改用資料本身動態計算最後缺口後的 traced reply_markup 數量。 + +**完成變更**: + +- `/api/v1/platform/runs/callback-replies` summary 新增: + - `outbound_reply_markup_trace_ref_after_gap_total` + - `outbound_reply_markup_trace_ref_after_gap_first_sent_at` + - `outbound_reply_markup_trace_ref_after_gap_latest_sent_at` + - `outbound_reply_markup_trace_ref_gap_recovery_status` +- 判讀規則: + - `not_needed`:沒有缺 trace refs。 + - `recovered_after_gap`:最後缺 trace 後,已有新的 traced reply_markup。 + - `no_recovery_signal`:仍缺 trace,且最後缺口後尚無 traced reply_markup。 +- AwoooP Runs / TG Callback Evidence 前端新增: + - `缺 trace 復原訊號:{status};gap 後 traced {count};首筆 {first};最新 {latest}` + +**local validation(完成)**: + +```text +python3 -m py_compile apps/api/src/services/platform_operator_service.py apps/api/src/api/v1/platform/operator_runs.py apps/api/tests/test_awooop_operator_timeline_labels.py +jq empty apps/web/messages/zh-TW.json apps/web/messages/en.json +PYTHONPATH=. DATABASE_URL='postgresql+asyncpg://test:test@localhost/test' /Users/ogt/.pyenv/shims/pytest apps/api/tests/test_awooop_operator_timeline_labels.py -q + 53 passed in 0.92s +pnpm --dir apps/web exec tsc --noEmit --tsBuildInfoFile /tmp/awoooi-t194-tsconfig.tsbuildinfo + pass +pnpm --dir apps/web lint -- --file 'src/app/[locale]/awooop/runs/page.tsx' + pass with pre-existing i18next/no-literal-string and unused icon warnings +NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web run build + pass; Sentry global-error / instrumentation-client warnings are pre-existing +git diff --check + pass +``` + +**production SQL dry-run(完成,read-only, RLS context)**: + +```text +missing_trace_total = 417 +missing_trace_latest_sent_at = 2026-05-25 12:13:01.534615 +traced_after_gap_total = 9 +traced_after_gap_first_sent_at = 2026-05-25 12:24:35.809853 +traced_after_gap_latest_sent_at = 2026-05-25 13:32:04.893903 +recovery_status = recovered_after_gap +``` + +**目前整體進度(pre-deploy)**: + +- AwoooP 告警可觀測鏈:約 99.86%。 +- 低風險自動修復閉環:約 95.9%。 +- 前端 AI 自動化管理介面同步:約 99.76%。 +- Telegram outbound / callback DB coverage 可視化:約 99.86%。 +- callback / DB replayability:約 99.0%。 +- MCP / 自建 MCP 可視化:約 95.1%。 +- Sentry / SigNoz source correlation:約 94.5%。 +- Ansible / PlayBook 可視化:約 92.6%。 +- KM governance:約 84.6%。 +- AI Provider lane visibility:約 92.2%。 +- 完整 AI 自動化管理產品化:約 98.1%。