diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index c73502f2..25828582 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -2225,6 +2225,7 @@ "remediationQueue": "Every degraded / failed / timeout row must map to replay, reverify, ticket, or manual review", "telegramCallbacks": "Detail and history buttons cannot depend only on Redis TTL or stale snapshots", "callbackOwnerReview": "Telegram detail/history callbacks without a KM owner-review link must become trackable work items", + "callbackTraceRecoveryBacklog": "Callback trace gaps must show recovery signal, 24h decay, and the backlog next step", "ciSecretHygiene": "Workflows must not mount 機密設定 in step env / action inputs; historical logs still need rotation and retention governance", "governanceDispatch": "Governance alerts must enter dispatch and expose skipped / pending / repaired", "knowledgeHealthcheck": "knowledge_degradation must show Hermes / OpenClaw / ElephantAlpha ownership, current stage, and owner review point", @@ -2270,6 +2271,10 @@ "callbackOwnerReviewAutomation": "Automation: {state}; safe auto-repair={safe}", "callbackOwnerReviewBlocker": "Blocker: {reason}", "callbackOwnerReviewEmpty": "Recent callback evidence is matched or no data is available yet", + "callbackTraceRecoveryBacklog": "Callback trace backlog: missing trace {missing}; 1h {recent1h}; 24h {recent24h}; traced after gap {recovered}; recovery {status}", + "callbackTraceRecoveryDecision": "Decision: {gap}; next: {next}", + "callbackTraceRecoveryLatest": "Last gap: {missing}; recovery first: {first}; recovery latest: {latest}", + "callbackTraceRecoveryUnavailable": "Callback trace recovery summary has not returned yet", "ciSecretHygiene": "Repo-controlled step env / action input exposure is guarded; key rotation and log retention remain", "governance": "Unresolved governance alerts: {unresolved}; 待派送: {queued}", "governanceUnavailable": "Governance events API is not responding; 待派送: {queued}", diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index b36c52b0..487d25ec 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -2226,6 +2226,7 @@ "remediationQueue": "每筆 degraded / failed / timeout 都必須映射到重跑、重驗、Ticket 或人工檢查", "telegramCallbacks": "按下詳情與歷史不能再只依賴 Redis TTL 或舊快照", "callbackOwnerReview": "Telegram 詳情 / 歷史若未連到 KM owner-review,必須變成可追蹤工作項", + "callbackTraceRecoveryBacklog": "Callback trace 缺口必須顯示復原訊號、24h decay 與 backlog 下一步", "ciSecretHygiene": "workflow 不可再把 機密設定 掛在 step env / action input;歷史 log 需另做輪換與保留期治理", "governanceDispatch": "治理告警必須進 dispatch,並標示 skipped / pending / repaired", "knowledgeHealthcheck": "knowledge_degradation 必須顯示 Hermes / OpenClaw / ElephantAlpha 分工、目前階段與 owner 審核點", @@ -2271,6 +2272,10 @@ "callbackOwnerReviewAutomation": "自動化:{state};可安全自動修復={safe}", "callbackOwnerReviewBlocker": "卡點:{reason}", "callbackOwnerReviewEmpty": "近期 callback evidence 均已匹配或尚無資料", + "callbackTraceRecoveryBacklog": "Callback trace backlog:缺 trace {missing};1h {recent1h};24h {recent24h};gap 後 traced {recovered};復原 {status}", + "callbackTraceRecoveryDecision": "判讀:{gap};下一步:{next}", + "callbackTraceRecoveryLatest": "最後缺口:{missing};復原首筆:{first};復原最新:{latest}", + "callbackTraceRecoveryUnavailable": "Callback trace recovery summary 尚未回傳", "ciSecretHygiene": "repo 可控 step env / action input 泄漏面已加 guard;仍需 key rotation 與 log retention 收斂", "governance": "未解治理告警:{unresolved};待派送:{queued}", "governanceUnavailable": "治理事件 API 目前無法回應;待派送:{queued}", diff --git a/apps/web/src/app/[locale]/awooop/work-items/page.tsx b/apps/web/src/app/[locale]/awooop/work-items/page.tsx index 4cb802fe..36bb29ec 100644 --- a/apps/web/src/app/[locale]/awooop/work-items/page.tsx +++ b/apps/web/src/app/[locale]/awooop/work-items/page.tsx @@ -827,11 +827,25 @@ type CallbackReplyWorkItemEvent = { km_stale_completion_summary?: KmStaleCallbackCompletionSummary | null; }; +type CallbackReplyAuditSummary = { + outbound_reply_markup_missing_trace_ref_total?: number; + outbound_reply_markup_missing_trace_ref_recent_1h_total?: number; + outbound_reply_markup_missing_trace_ref_recent_24h_total?: number; + outbound_reply_markup_missing_trace_ref_latest_sent_at?: string | null; + outbound_reply_markup_trace_ref_gap_status?: string | null; + outbound_reply_markup_trace_ref_gap_next_action?: string | null; + outbound_reply_markup_trace_ref_after_gap_total?: number; + outbound_reply_markup_trace_ref_after_gap_first_sent_at?: string | null; + outbound_reply_markup_trace_ref_after_gap_latest_sent_at?: string | null; + outbound_reply_markup_trace_ref_gap_recovery_status?: string | null; +}; + type CallbackRepliesWorkItemResponse = { items?: CallbackReplyWorkItemEvent[]; total: number; page: number; per_page: number; + summary?: CallbackReplyAuditSummary | null; }; type AiRouteRepairEvidence = { @@ -1610,6 +1624,27 @@ function callbackOwnerReviewOpenEvents( }); } +function callbackTraceRecoveryStatus( + summary: CallbackReplyAuditSummary | null | undefined +): WorkStatus { + if (!summary) { + return "blocked"; + } + const missingTrace = summary.outbound_reply_markup_missing_trace_ref_total ?? 0; + if (missingTrace <= 0) { + return "live"; + } + const gapStatus = summary.outbound_reply_markup_trace_ref_gap_status; + const recoveryStatus = summary.outbound_reply_markup_trace_ref_gap_recovery_status; + if (gapStatus === "active_gap" || recoveryStatus === "no_recovery_signal") { + return "blocked"; + } + if (recoveryStatus === "recovered_after_gap") { + return "in_progress"; + } + return "watching"; +} + function buildWorkItems( telemetry: Telemetry, t: ReturnType @@ -1643,6 +1678,7 @@ function buildWorkItems( latestCallbackOwnerReview?.km_stale_completion_summary ?? null; const latestCallbackWorkItem = latestCallbackSummary?.work_item ?? null; const latestCallbackTriage = latestCallbackWorkItem?.triage ?? null; + const callbackTraceSummary = telemetry.callbackReplies?.summary ?? null; const aiRoute = telemetry.aiRouteStatus; const aiRouteRepairEvidence = aiRoute?.repair_evidence ?? null; const aiRouteWorkItem = aiRouteRepairEvidence?.work_item ?? null; @@ -1993,6 +2029,58 @@ function buildWorkItems( : [t("evidence.callbackOwnerReviewEmpty")], href: latestCallbackWorkItem?.target_href ?? "/awooop/runs", }, + { + id: "callbackTraceRecoveryBacklog", + phase: "T195", + status: callbackTraceRecoveryStatus(callbackTraceSummary), + surfaceKey: "workItems", + source: "/api/v1/platform/runs/callback-replies summary", + gateKey: "callbackTraceRecoveryBacklog", + evidence: t("evidence.callbackTraceRecoveryBacklog", { + missing: + callbackTraceSummary?.outbound_reply_markup_missing_trace_ref_total ?? + 0, + recent1h: + callbackTraceSummary + ?.outbound_reply_markup_missing_trace_ref_recent_1h_total ?? 0, + recent24h: + callbackTraceSummary + ?.outbound_reply_markup_missing_trace_ref_recent_24h_total ?? 0, + recovered: + callbackTraceSummary?.outbound_reply_markup_trace_ref_after_gap_total ?? + 0, + status: + callbackTraceSummary + ?.outbound_reply_markup_trace_ref_gap_recovery_status ?? "--", + }), + evidenceDetails: callbackTraceSummary + ? [ + t("evidence.callbackTraceRecoveryDecision", { + gap: + callbackTraceSummary.outbound_reply_markup_trace_ref_gap_status ?? + "--", + next: + callbackTraceSummary + .outbound_reply_markup_trace_ref_gap_next_action ?? "--", + }), + t("evidence.callbackTraceRecoveryLatest", { + missing: + callbackTraceSummary + .outbound_reply_markup_missing_trace_ref_latest_sent_at ?? + "--", + first: + callbackTraceSummary + .outbound_reply_markup_trace_ref_after_gap_first_sent_at ?? + "--", + latest: + callbackTraceSummary + .outbound_reply_markup_trace_ref_after_gap_latest_sent_at ?? + "--", + }), + ] + : [t("evidence.callbackTraceRecoveryUnavailable")], + href: "/awooop/runs?project_id=awoooi", + }, { id: "ciSecretHygiene", phase: "T44", diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 23d76da1..4eeb34e6 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -21620,3 +21620,54 @@ GET /api/v1/health - KM governance:約 84.6%。 - AI Provider lane visibility:約 92.2%。 - 完整 AI 自動化管理產品化:約 98.15%。 + +### 2026-05-25 — T195 Callback trace recovery backlog work item(pre-deploy) + +**背景**:T194 已在 Runs / TG Callback Evidence 顯示 `recovered_after_gap`,但它 +仍只是摘要文字。T195 把同一份 callback summary 接進 AwoooP Work Items,讓 +`recovered_after_gap + 24h decay` 進入 backlog 視角,而不是靠值班者記住。 + +**完成變更**: + +- AwoooP Work Items 讀取 `/api/v1/platform/runs/callback-replies` 的 `summary`。 +- 新增 `callbackTraceRecoveryBacklog` 工作項投影: + - `missing_trace_ref_total` + - `recent_1h / recent_24h` + - `trace_ref_after_gap_total` + - `trace_ref_gap_recovery_status` + - `trace_ref_gap_status / next_action` +- 狀態規則: + - 無 summary:`blocked`。 + - 缺 trace = 0:`live`。 + - `active_gap` 或 `no_recovery_signal`:`blocked`。 + - `recovered_after_gap`:`in_progress`,代表舊 backlog 正在 24h decay。 + - 其他:`watching`。 + +**local validation(完成)**: + +```text +jq empty apps/web/messages/zh-TW.json apps/web/messages/en.json +pnpm --dir apps/web exec tsc --noEmit --tsBuildInfoFile /tmp/awoooi-t195-tsconfig.tsbuildinfo + pass +pnpm --dir apps/web lint -- --file 'src/app/[locale]/awooop/work-items/page.tsx' + pass; no warnings +NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web run build + pass; Sentry global-error / instrumentation-client warnings are pre-existing +git diff --check + pass +``` + +**目前整體進度(pre-deploy)**: + +- AwoooP 告警可觀測鏈:約 99.88%。 +- 低風險自動修復閉環:約 95.9%。 +- 前端 AI 自動化管理介面同步:約 99.8%。 +- Telegram outbound / callback DB coverage 可視化:約 99.88%。 +- callback / DB replayability:約 99.1%。 +- Work Items / backlog 可追蹤性:約 96.5%。 +- MCP / 自建 MCP 可視化:約 95.1%。 +- Sentry / SigNoz source correlation:約 94.5%。 +- Ansible / PlayBook 可視化:約 92.6%。 +- KM governance:約 84.6%。 +- AI Provider lane visibility:約 92.2%。 +- 完整 AI 自動化管理產品化:約 98.2%。