From e6a433da222a67a450689367c755fcc778b13abf Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 18:38:07 +0800 Subject: [PATCH] fix(web): surface incident audit chain in work items --- apps/web/messages/en.json | 33 +++ apps/web/messages/zh-TW.json | 33 +++ .../app/[locale]/awooop/work-items/page.tsx | 253 ++++++++++++++++++ docs/LOGBOOK.md | 53 ++++ 4 files changed, 372 insertions(+) diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index a3330aa2..4d28e587 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -3044,6 +3044,39 @@ "handoffStatus": "交接狀態:{status}" } }, + "incidentAudit": { + "title": "焦點事件稽核鏈", + "emptyIncident": "尚未選到 Incident", + "empty": "目前工作項尚未連到 Incident;先從重複告警或補救佇列選取工作項。", + "openRuns": "回 Run 監控", + "flowTitle": "處理流程", + "loading": "正在讀取 incident timeline,先顯示焦點事件與等待資料。", + "timelineEmpty": "Incident timeline 尚未回應,不能判定流程階段。", + "evidenceTitle": "執行與學習證據", + "executor": "Executor", + "ansible": "Ansible / PlayBook", + "mcp": "MCP 調查", + "km": "KM / Learning", + "metrics": { + "stages": "階段", + "events": "事件", + "source": "Direct / Candidate / Applied", + "verification": "最終驗證" + }, + "statusLabels": { + "success": "成功", + "completed": "已完成", + "warning": "警告", + "warn": "警告", + "failed": "失敗", + "error": "錯誤", + "blocked": "阻塞", + "pending": "等待中", + "info": "資訊", + "skipped": "已略過", + "unknown": "未知" + } + }, "recurrence": { "title": "重複告警工作項", "subtitle": "把 run_completed_no_repair、修復失敗與人工閘門接成可追蹤 work item", diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index a3330aa2..4d28e587 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -3044,6 +3044,39 @@ "handoffStatus": "交接狀態:{status}" } }, + "incidentAudit": { + "title": "焦點事件稽核鏈", + "emptyIncident": "尚未選到 Incident", + "empty": "目前工作項尚未連到 Incident;先從重複告警或補救佇列選取工作項。", + "openRuns": "回 Run 監控", + "flowTitle": "處理流程", + "loading": "正在讀取 incident timeline,先顯示焦點事件與等待資料。", + "timelineEmpty": "Incident timeline 尚未回應,不能判定流程階段。", + "evidenceTitle": "執行與學習證據", + "executor": "Executor", + "ansible": "Ansible / PlayBook", + "mcp": "MCP 調查", + "km": "KM / Learning", + "metrics": { + "stages": "階段", + "events": "事件", + "source": "Direct / Candidate / Applied", + "verification": "最終驗證" + }, + "statusLabels": { + "success": "成功", + "completed": "已完成", + "warning": "警告", + "warn": "警告", + "failed": "失敗", + "error": "錯誤", + "blocked": "阻塞", + "pending": "等待中", + "info": "資訊", + "skipped": "已略過", + "unknown": "未知" + } + }, "recurrence": { "title": "重複告警工作項", "subtitle": "把 run_completed_no_repair、修復失敗與人工閘門接成可追蹤 work item", diff --git a/apps/web/src/app/[locale]/awooop/work-items/page.tsx b/apps/web/src/app/[locale]/awooop/work-items/page.tsx index 9cff56ee..97916e91 100644 --- a/apps/web/src/app/[locale]/awooop/work-items/page.tsx +++ b/apps/web/src/app/[locale]/awooop/work-items/page.tsx @@ -922,9 +922,42 @@ type Telemetry = { driftFingerprintState: DriftFingerprintState | null; callbackReplies: CallbackRepliesWorkItemResponse | null; statusChain: AwoooPStatusChain | null; + incidentTimeline: IncidentTimelineResponse | null; aiRouteStatus: AiRouteStatusResponse | null; }; +type IncidentTimelineEvent = { + stage: string; + status: string; + title: string; + description?: string | null; + actor?: string | null; + timestamp?: string | null; + source_table?: string | null; + data?: Record; +}; + +type IncidentTimelineStage = IncidentTimelineEvent & { + label: string; + events?: IncidentTimelineEvent[]; +}; + +type IncidentTimelineResponse = { + incident_id: string; + title: string; + status: string; + severity: string; + started_at?: string | null; + updated_at?: string | null; + resolved_at?: string | null; + affected_services?: string[]; + approval_ids?: string[]; + timeline: IncidentTimelineStage[]; + events: IncidentTimelineEvent[]; + ascii_timeline: string; + reconciliation?: Record; +}; + type WorkItem = { id: string; phase: string; @@ -1043,6 +1076,37 @@ function selectStatusChainIncidentId( ); } +function auditStatusClass(status?: string | null) { + if (status === "success" || status === "completed" || status === "resolved") { + return "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]"; + } + if (status === "warning" || status === "warn" || status === "pending" || status === "info") { + return "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]"; + } + if (status === "failed" || status === "error" || status === "blocked") { + return "border-[#e2a29b] bg-[#fff0ef] text-[#9f2f25]"; + } + return "border-[#d8d3c7] bg-[#faf9f3] text-[#5f5b52]"; +} + +function auditStatusLabelKey(status?: string | null) { + if ( + status === "success" || + status === "completed" || + status === "warning" || + status === "warn" || + status === "failed" || + status === "error" || + status === "blocked" || + status === "pending" || + status === "info" || + status === "skipped" + ) { + return status; + } + return "unknown"; +} + function recurrenceRepairStatusKey(status?: string | null) { if ( status === "auto_repair_verified" || @@ -2380,6 +2444,178 @@ function ProductionClaimBanner({ ); } +function WorkItemIncidentAuditPanel({ + timeline, + chain, + focusedIncidentId, + projectId, + loading, +}: { + timeline: IncidentTimelineResponse | null; + chain: AwoooPStatusChain | null; + focusedIncidentId: string | null; + projectId: string; + loading: boolean; +}) { + const t = useTranslations("awooop.workItems.incidentAudit"); + const incidentId = focusedIncidentId ?? chain?.source_id ?? timeline?.incident_id ?? null; + const stages = timeline?.timeline?.filter((stage) => stage.status !== "skipped") ?? []; + const executor = timeline?.timeline?.find((stage) => stage.stage === "executor"); + const verifier = timeline?.timeline?.find((stage) => stage.stage === "verifier"); + const km = timeline?.timeline?.find((stage) => stage.stage === "km"); + const investigator = timeline?.timeline?.find((stage) => stage.stage === "investigator"); + const sourceCorrelation = chain?.source_refs?.correlation; + const ansible = chain?.execution?.ansible; + const timelineLoaded = Boolean(timeline); + const importantEvents = (timeline?.events ?? []) + .filter((event) => ( + event.source_table === "automation_operation_log" || + event.source_table === "knowledge_entries" || + event.source_table === "incident_evidence" || + event.source_table === "alert_operation_log" || + event.stage === "executor" || + event.stage === "verifier" || + event.stage === "km" || + event.stage === "ai_router" + )) + .slice(-5) + .reverse(); + + return ( +
+
+
+
+ + {t("openRuns")} +
+ + {!incidentId ? ( +
+ {t("empty")} +
+ ) : ( + <> +
+
+

{t("metrics.stages")}

+

+ {timelineLoaded ? stages.length : "--"} +

+
+
+

{t("metrics.events")}

+

+ {timelineLoaded ? timeline?.events?.length ?? 0 : "--"} +

+
+
+

{t("metrics.source")}

+

+ {sourceCorrelation + ? `${sourceCorrelation.direct_ref_total ?? 0}/${sourceCorrelation.candidate_total ?? 0}/${sourceCorrelation.applied_link_total ?? 0}` + : "--"} +

+
+
+

{t("metrics.verification")}

+ + {t(`statusLabels.${auditStatusLabelKey(verifier?.status ?? chain?.verification)}` as never)} + +
+
+ +
+
+
+
+ {timeline?.ascii_timeline ? ( +

+ {timeline.ascii_timeline} +

+ ) : ( +

+ {loading && !timelineLoaded ? t("loading") : t("timelineEmpty")} +

+ )} + {stages.length > 0 ? ( +
+ {stages.slice(0, 6).map((stage) => ( +
+
+ {stage.label} + + {t(`statusLabels.${auditStatusLabelKey(stage.status)}` as never)} + +
+

+ {stage.title} +

+
+ ))} +
+ ) : null} +
+ +
+
+
+
+ {[ + [t("executor"), executor?.title ?? chain?.execution?.latest_operation_type ?? "--"], + [t("ansible"), ansible?.latest_playbook_path ?? ansible?.latest_catalog_id ?? "--"], + [t("mcp"), investigator?.title ?? "--"], + [t("km"), km?.title ?? "--"], + ].map(([label, value]) => ( +
+

{label}

+

+ {value} +

+
+ ))} +
+ {importantEvents.length > 0 ? ( +
+ {importantEvents.map((event, index) => ( +
+
+ + {t(`statusLabels.${auditStatusLabelKey(event.status)}` as never)} + + {event.source_table ?? "--"} +
+

+ {event.title} +

+
+ ))} +
+ ) : null} +
+
+ + )} +
+ ); +} + function RecurrenceWorkQueuePanel({ recurrence, focusedWorkItemId, @@ -4902,6 +5138,7 @@ export default function AwoooPWorkItemsPage() { driftFingerprintState: null, callbackReplies: null, statusChain: null, + incidentTimeline: null, aiRouteStatus: null, }); const [loading, setLoading] = useState(true); @@ -4980,6 +5217,13 @@ export default function AwoooPWorkItemsPage() { 12000 ); } + const timelineIncidentId = statusChain?.source_id ?? statusChainIncidentId; + const incidentTimeline = timelineIncidentId + ? await fetchJson( + `${API_BASE}/api/v1/incidents/${encodeURIComponent(timelineIncidentId)}/timeline`, + 12000 + ) + : null; setTelemetry({ quality, @@ -4999,6 +5243,7 @@ export default function AwoooPWorkItemsPage() { driftFingerprintState, callbackReplies, statusChain, + incidentTimeline, aiRouteStatus, }); setLastUpdated(new Date()); @@ -5108,6 +5353,14 @@ export default function AwoooPWorkItemsPage() { + + pass +python3 -m json.tool apps/web/messages/en.json -> pass +git diff --check -> pass +pnpm --dir apps/web exec tsc --noEmit --tsBuildInfoFile /tmp/awoooi-work-items-incident-audit-20260531.tsbuildinfo -> pass +python3 scripts/security/security-mirror-progress-guard.py -> SECURITY_MIRROR_PROGRESS_GUARD_OK +NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web run build -> pass +``` + +**Browser smoke(local production build)**: + +```text +http://127.0.0.1:3107/zh-TW/awooop/work-items?project_id=awoooi&incident_id=INC-20260530-0DD83C + visible: 焦點事件稽核鏈 + visible: 處理流程 + visible: 執行與學習證據 + visible: 正在讀取 incident timeline,先顯示焦點事件與等待資料 + canScroll=true + horizontalOverflow=false +``` + +**技術債 / 現場清理**: + +- 本機 `/System/Volumes/Data` 一度只剩約 103MiB,導致 Git 無法寫入 `FETCH_HEAD`,且 build 先前出現 webpack cache ENOSPC 警告。 +- 已只清理本 worktree 產生物 `apps/web/.next`,釋放約 1.5GiB;未刪資料庫、原始碼或任何 production 狀態。 +- 後續仍應把 local runner / build cache 空間列為開發機維運項,避免前端驗證被快取空間污染。 + +**目前整體進度(local ready, pending Gitea deploy)**: + +- Telegram / Run / Work Items 單一 Incident drill-down:約 88%;Run detail 已 production,Work Items 已 local 驗證,待推版與 production smoke。 +- MCP / Sentry / SigNoz / KM / PlayBook / Ansible 的跨頁透明度:約 84%;Work Items 已能承接同一條 timeline,但 Approvals / Tickets 還需同樣接入。 +- 前端 AI 自動化管理介面同步:約 85%;工作鏈路頁開始成為操作員入口,不再只靠 Telegram 按鈕。 +- 整體 AI 自動化飛輪:約 73%;仍不能宣稱 24h 全自動 repair 閉環,需以 production evidence 持續補齊。 +- 24h 完整 AI Agent 自動修復 production claim:0%;仍維持嚴格口徑,只能宣稱「已驗證的特定 controlled apply / drill-down 能被追蹤」。 + ## 2026-05-31|Ollama 111 local fallback 復原確認 **背景**: