diff --git a/apps/api/src/api/v1/platform/operator_runs.py b/apps/api/src/api/v1/platform/operator_runs.py index ae858d22..d39ee79f 100644 --- a/apps/api/src/api/v1/platform/operator_runs.py +++ b/apps/api/src/api/v1/platform/operator_runs.py @@ -107,7 +107,7 @@ async def list_runs( state: str | None = Query(None, description="Run 狀態 filter(可選)"), remediation_status: str | None = Query( None, - description="AI 補救證據狀態 filter(no_evidence/read_only_dry_run/write_observed/blocked/observed)", + description="AI 證據狀態 filter(no_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed)", ), incident_id: str | None = Query(None, description="關聯 Incident ID filter(可選)"), page: int = Query(1, ge=1, description="頁碼,從 1 開始"), @@ -152,7 +152,7 @@ async def list_approvals( run_id: str | None = Query(None, description="Run ID(可選,M8 詳情頁查單筆)"), remediation_status: str | None = Query( None, - description="AI 補救證據狀態 filter(no_evidence/read_only_dry_run/write_observed/blocked/observed)", + description="AI 證據狀態 filter(no_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed)", ), ) -> dict[str, Any]: return await list_approvals_svc( diff --git a/apps/api/src/services/platform_operator_service.py b/apps/api/src/services/platform_operator_service.py index 419a5d37..6654f2e5 100644 --- a/apps/api/src/services/platform_operator_service.py +++ b/apps/api/src/services/platform_operator_service.py @@ -50,6 +50,7 @@ _MAX_STEP_SUMMARY_CHARS = 128 _REMEDIATION_HISTORY_LIMIT = 20 _INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{4,}\b") _REMEDIATION_STATUS_FILTERS = { + "mcp_observed", "no_evidence", "read_only_dry_run", "write_observed", @@ -443,6 +444,22 @@ def _route_label_from_remediation(item: dict[str, Any]) -> str: ) or "--" +def _route_label_from_legacy_mcp(record: dict[str, Any]) -> str: + """Render self-built/legacy MCP evidence as agent/tool/scope for list UX.""" + tool = record.get("tool_name") + server = record.get("mcp_server") + tool_label = ".".join(str(part) for part in (server, tool) if part) or tool + return "/".join( + str(part) + for part in ( + record.get("agent_role"), + tool_label, + "read", + ) + if part + ) or "--" + + def _remediation_timeline_status(item: dict[str, Any]) -> str: if item.get("success") is False or item.get("allowed") is False: return "failed" @@ -485,18 +502,31 @@ def _run_remediation_list_summary( run: AwoooPRunState, incident_ids: list[str], items: list[dict[str, Any]], + legacy_mcp_records: list[dict[str, Any]] | None = None, errors: list[dict[str, str]] | None = None, ) -> dict[str, Any]: - """Summarize durable ADR-100 dry-run evidence for list-level UX.""" + """Summarize durable ADR-100 dry-run and MCP investigation evidence for list UX.""" sorted_items = sorted( (item for item in items if isinstance(item, dict)), key=lambda item: str(item.get("created_at") or ""), reverse=True, ) + sorted_mcp_records = sorted( + (record for record in (legacy_mcp_records or []) if isinstance(record, dict)), + key=lambda record: str(record.get("created_at") or ""), + reverse=True, + ) latest = sorted_items[0] if sorted_items else {} + latest_mcp = sorted_mcp_records[0] if sorted_mcp_records else {} writes_incident = latest.get("writes_incident_state") writes_auto_repair = latest.get("writes_auto_repair_result") - route = _route_label_from_remediation(latest) if latest else "--" + route = ( + _route_label_from_remediation(latest) + if latest + else _route_label_from_legacy_mcp(latest_mcp) + if latest_mcp + else "--" + ) write_observed = writes_incident is True or writes_auto_repair is True is_read_only = ( bool(latest) @@ -504,9 +534,12 @@ def _run_remediation_list_summary( and writes_incident is False and writes_auto_repair is False ) + mcp_total = len(sorted_mcp_records) + mcp_success = sum(1 for record in sorted_mcp_records if record.get("success") is True) + mcp_failed = sum(1 for record in sorted_mcp_records if record.get("success") is False) if not sorted_items: - status_value = "no_evidence" + status_value = "mcp_observed" if mcp_total > 0 else "no_evidence" elif latest.get("success") is False or latest.get("allowed") is False: status_value = "blocked" elif write_observed: @@ -518,22 +551,28 @@ def _run_remediation_list_summary( return { "schema_version": "awooop_run_remediation_summary_v1", - "source": "alert_operation_log", + "source": "alert_operation_log" if sorted_items else "mcp_audit_log" if mcp_total > 0 else "none", "incident_ids": incident_ids, "total": len(sorted_items), + "evidence_total": len(sorted_items) + mcp_total, "status": status_value, "has_dry_run": bool(sorted_items), + "has_mcp_investigation": mcp_total > 0, "is_read_only": is_read_only, "human_gate_open": run.state == "waiting_approval", "latest_at": latest.get("created_at"), "latest_preview": latest.get("verification_result_preview"), "latest_mode": latest.get("mode"), "latest_route": route, - "latest_agent_id": latest.get("agent_id"), - "latest_tool_name": latest.get("tool_name"), - "latest_required_scope": latest.get("required_scope"), + "latest_agent_id": latest.get("agent_id") or latest_mcp.get("agent_role"), + "latest_tool_name": latest.get("tool_name") or latest_mcp.get("tool_name"), + "latest_required_scope": latest.get("required_scope") or ("read" if latest_mcp else None), "writes_incident_state": writes_incident, "writes_auto_repair_result": writes_auto_repair, + "mcp_observation_total": mcp_total, + "mcp_observation_success": mcp_success, + "mcp_observation_failed": mcp_failed, + "latest_mcp_server": latest_mcp.get("mcp_server"), "errors": errors or [], } @@ -602,6 +641,7 @@ async def _build_run_remediation_summaries( _append_unique(all_incident_ids, incident_id) histories_by_incident: dict[str, list[dict[str, Any]]] = {} + legacy_mcp_by_incident: dict[str, list[dict[str, Any]]] = {} errors_by_incident: dict[str, dict[str, str]] = {} if all_incident_ids: from src.services.adr100_remediation_service import Adr100RemediationService @@ -628,20 +668,27 @@ async def _build_run_remediation_summaries( "incident_id": incident_id, "error": str(exc), } + legacy_mcp_by_incident = await _fetch_legacy_mcp_by_incident_ids( + all_incident_ids, + limit=min(max(len(all_incident_ids) * _REMEDIATION_HISTORY_LIMIT, 100), 5_000), + ) summaries: dict[UUID, dict[str, Any]] = {} for run in runs: incident_ids = incident_ids_by_run.get(run.run_id, []) items: list[dict[str, Any]] = [] + legacy_mcp_records: list[dict[str, Any]] = [] errors: list[dict[str, str]] = [] for incident_id in incident_ids: items.extend(histories_by_incident.get(incident_id, [])) + legacy_mcp_records.extend(legacy_mcp_by_incident.get(incident_id, [])) if incident_id in errors_by_incident: errors.append(errors_by_incident[incident_id]) summaries[run.run_id] = _run_remediation_list_summary( run=run, incident_ids=incident_ids, items=items, + legacy_mcp_records=legacy_mcp_records, errors=errors, ) return summaries @@ -744,6 +791,31 @@ def _legacy_mcp_record(row: MCPAuditLog) -> dict[str, Any]: } +async def _fetch_legacy_mcp_by_incident_ids( + incident_ids: list[str], + *, + limit: int, +) -> dict[str, list[dict[str, Any]]]: + """Fetch legacy/self-built MCP rows for list evidence summaries.""" + if not incident_ids: + return {} + + async with get_db_context("awoooi") as db: + result = await db.execute( + select(MCPAuditLog) + .where(MCPAuditLog.incident_id.in_(incident_ids)) + .order_by(MCPAuditLog.created_at.desc()) + .limit(limit) + ) + rows = list(result.scalars().all()) + + by_incident: dict[str, list[dict[str, Any]]] = defaultdict(list) + for row in rows: + if row.incident_id: + by_incident[row.incident_id].append(_legacy_mcp_record(row)) + return dict(by_incident) + + async def _fetch_run_legacy_mcp_history( incident_ids: list[str], *, diff --git a/apps/api/tests/test_awooop_operator_timeline_labels.py b/apps/api/tests/test_awooop_operator_timeline_labels.py index 0930881e..50c3ce46 100644 --- a/apps/api/tests/test_awooop_operator_timeline_labels.py +++ b/apps/api/tests/test_awooop_operator_timeline_labels.py @@ -163,6 +163,45 @@ def test_run_remediation_list_summary_marks_read_only_dry_run() -> None: assert summary["latest_route"] == "auto_repair_executor/ssh_diagnose/read" +def test_run_remediation_list_summary_marks_mcp_observed_without_dry_run() -> None: + run = SimpleNamespace(state="completed") + + summary = _run_remediation_list_summary( + run=run, + incident_ids=["INC-20260518-792684"], + items=[], + legacy_mcp_records=[ + { + "created_at": "2026-05-18T04:31:30+00:00", + "incident_id": "INC-20260518-792684", + "agent_role": "pre_decision_investigator", + "mcp_server": "ssh_host", + "tool_name": "ssh_diagnose", + "success": True, + }, + { + "created_at": "2026-05-18T04:31:29+00:00", + "incident_id": "INC-20260518-792684", + "agent_role": "pre_decision_investigator", + "mcp_server": "signoz", + "tool_name": "query_logs", + "success": False, + }, + ], + ) + + assert summary["status"] == "mcp_observed" + assert summary["source"] == "mcp_audit_log" + assert summary["total"] == 0 + assert summary["evidence_total"] == 2 + assert summary["has_dry_run"] is False + assert summary["has_mcp_investigation"] is True + assert summary["mcp_observation_total"] == 2 + assert summary["mcp_observation_success"] == 1 + assert summary["mcp_observation_failed"] == 1 + assert summary["latest_route"] == "pre_decision_investigator/ssh_host.ssh_diagnose/read" + + def test_run_remediation_list_summary_flags_write_observed() -> None: run = SimpleNamespace(state="completed") @@ -188,6 +227,10 @@ def test_run_remediation_list_summary_flags_write_observed() -> None: def test_remediation_summary_matches_status_filter() -> None: + assert _remediation_summary_matches_status( + {"status": "mcp_observed"}, + "mcp_observed", + ) assert _remediation_summary_matches_status( {"status": "read_only_dry_run"}, "read_only_dry_run", diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index 626b10b2..c15ee00c 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -1821,8 +1821,9 @@ "listEvidence": { "column": "AI Evidence", "count": "{count} dry-runs", + "mcpCount": "{count} MCP investigations", "route": "MCP: {route}", - "emptyShort": "No remediation dry-run linked", + "emptyShort": "No AI evidence linked", "manualGate": "Next: human approval", "filters": { "label": "AI evidence filter", @@ -1838,29 +1839,33 @@ }, "statuses": { "noEvidence": "No dry-run yet", + "mcpObserved": "MCP investigated", "readOnlyDryRun": "AI dry-run: read-only", "writeObserved": "Write flag observed", "blocked": "Dry-run blocked", "observed": "Evidence linked" }, "details": { - "noEvidence": "This row is not linked to ADR-100 remediation dry-run records in alert_operation_log yet.", + "noEvidence": "This row is not linked to ADR-100 remediation dry-run or MCP investigation evidence yet.", + "mcpObserved": "AI has gathered evidence through MCP / self-built MCP, but no remediation dry-run or execution has started.", "readOnlyDryRun": "AI has run the remediation dry-run and the latest record did not write incident or auto-repair state.", "writeObserved": "The latest remediation record contains write flags; verify the state-change source before approval.", "blocked": "The remediation dry-run failed or was blocked by a gate; human review is required.", "observed": "This row is linked to remediation history; open Run Timeline for the full evidence." }, "summary": { + "mcpObserved": "MCP investigated", + "mcpObservedDetail": "List rows are linked to MCP / self-built MCP investigation evidence", "readOnly": "Read-only dry-run", "readOnlyDetail": "Latest evidence shows AI trialed the action without writing state", "manualGate": "Human gate", "manualGateDetail": "AI is stopped at the approval gate and needs approve / reject", "writeObserved": "Write flags", "writeObservedDetail": "Verify whether this is the expected auto-repair result", - "noEvidence": "Missing evidence", - "noEvidenceDetail": "The list row is not linked to ADR-100 dry-run history yet", + "noEvidence": "Missing AI evidence", + "noEvidenceDetail": "The list row is not linked to ADR-100 dry-run or MCP evidence yet", "approvalReadOnlyDetail": "Read-only remediation evidence is visible before approval", - "approvalNoEvidenceDetail": "Approval still lacks remediation dry-run evidence; inspect Run Timeline" + "approvalNoEvidenceDetail": "Approval still lacks AI evidence; inspect Run Timeline" } }, "incidentEvidence": { diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index 4c54dff5..580c0013 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -1822,8 +1822,9 @@ "listEvidence": { "column": "AI 證據", "count": "試跑 {count} 次", + "mcpCount": "MCP 調查 {count} 次", "route": "MCP:{route}", - "emptyShort": "尚未連到補救試跑", + "emptyShort": "尚未連到 AI 證據", "manualGate": "下一步:人工審批", "filters": { "label": "AI 證據篩選", @@ -1839,29 +1840,33 @@ }, "statuses": { "noEvidence": "尚無試跑", + "mcpObserved": "MCP 已調查", "readOnlyDryRun": "AI 已試跑:只讀", "writeObserved": "有寫入旗標", "blocked": "試跑受阻", "observed": "有補救證據" }, "details": { - "noEvidence": "此列尚未從 alert_operation_log 連到 ADR-100 補救試跑。", + "noEvidence": "此列尚未連到 ADR-100 補救試跑或 MCP 調查證據。", + "mcpObserved": "AI 已透過 MCP / 自建 MCP 收集證據,但尚未進入補救試跑或執行。", "readOnlyDryRun": "AI 已走補救試跑,且最新紀錄沒有寫入 incident 或 auto-repair 狀態。", "writeObserved": "最新補救紀錄含寫入旗標,審批前需確認狀態變更來源。", "blocked": "補救試跑未通過或被 gate 阻擋,需人工確認卡點。", "observed": "此列已連到補救歷史,請進入 Run Timeline 查看完整證據。" }, "summary": { + "mcpObserved": "MCP 已調查", + "mcpObservedDetail": "列表已連到 MCP / 自建 MCP 調查證據", "readOnly": "只讀試跑", "readOnlyDetail": "最新證據顯示 AI 已試跑且未寫狀態", "manualGate": "人工閘門", "manualGateDetail": "AI 已停在 approval gate,需 approve / reject", "writeObserved": "寫入旗標", "writeObservedDetail": "需確認是否為預期自動修復結果", - "noEvidence": "缺補救證據", - "noEvidenceDetail": "列表尚未連到 ADR-100 dry-run history", + "noEvidence": "缺 AI 證據", + "noEvidenceDetail": "列表尚未連到 ADR-100 dry-run 或 MCP evidence", "approvalReadOnlyDetail": "審批前已有只讀補救證據可回看", - "approvalNoEvidenceDetail": "審批前仍缺補救試跑證據,需進 Run Timeline 檢查" + "approvalNoEvidenceDetail": "審批前仍缺 AI 證據,需進 Run Timeline 檢查" } }, "incidentEvidence": { diff --git a/apps/web/src/app/[locale]/awooop/approvals/page.tsx b/apps/web/src/app/[locale]/awooop/approvals/page.tsx index e12d4204..a1ff614f 100644 --- a/apps/web/src/app/[locale]/awooop/approvals/page.tsx +++ b/apps/web/src/app/[locale]/awooop/approvals/page.tsx @@ -28,6 +28,7 @@ import { Link } from "@/i18n/routing"; type RemediationStatus = | "no_evidence" + | "mcp_observed" | "read_only_dry_run" | "write_observed" | "blocked" @@ -36,12 +37,17 @@ type RemediationStatus = interface RemediationSummary { incident_ids?: string[]; total?: number; + evidence_total?: number; status?: RemediationStatus | string; + has_mcp_investigation?: boolean; human_gate_open?: boolean; latest_route?: string | null; latest_preview?: string | null; writes_incident_state?: boolean | null; writes_auto_repair_result?: boolean | null; + mcp_observation_total?: number; + mcp_observation_success?: number; + mcp_observation_failed?: number; } interface Approval { @@ -101,6 +107,12 @@ const REMEDIATION_STATUS_CONFIG: Record< icon: AlertCircle, className: "border-[#d8d3c7] bg-[#faf9f3] text-[#5f5b52]", }, + mcp_observed: { + labelKey: "statuses.mcpObserved", + detailKey: "details.mcpObserved", + icon: SearchCheck, + className: "border-[#9bb6d9] bg-[#eef5ff] text-[#1f5b9b]", + }, read_only_dry_run: { labelKey: "statuses.readOnlyDryRun", detailKey: "details.readOnlyDryRun", @@ -127,6 +139,7 @@ const REMEDIATION_STATUS_CONFIG: Record< }, }; const REMEDIATION_FILTER_OPTIONS: RemediationStatus[] = [ + "mcp_observed", "read_only_dry_run", "write_observed", "blocked", @@ -137,6 +150,7 @@ const REMEDIATION_FILTER_OPTIONS: RemediationStatus[] = [ function normalizeRemediationStatus(summary?: RemediationSummary | null): RemediationStatus { const statusValue = summary?.status; if ( + statusValue === "mcp_observed" || statusValue === "read_only_dry_run" || statusValue === "write_observed" || statusValue === "blocked" || @@ -153,9 +167,14 @@ function RemediationEvidenceCell({ summary }: { summary?: RemediationSummary | n const config = REMEDIATION_STATUS_CONFIG[status]; const Icon = config.icon; const total = summary?.total ?? 0; + const mcpTotal = summary?.mcp_observation_total ?? 0; + const evidenceTotal = summary?.evidence_total ?? total + mcpTotal; const route = summary?.latest_route && summary.latest_route !== "--" ? summary.latest_route : null; + const countText = status === "mcp_observed" + ? t("mcpCount", { count: mcpTotal || evidenceTotal }) + : t("count", { count: total }); return (
@@ -169,9 +188,9 @@ function RemediationEvidenceCell({ summary }: { summary?: RemediationSummary | n
-
+
{queueSummary.map((item) => { const Icon = item.icon; return ( diff --git a/apps/web/src/app/[locale]/awooop/runs/page.tsx b/apps/web/src/app/[locale]/awooop/runs/page.tsx index 35795d67..f6c8b8b0 100644 --- a/apps/web/src/app/[locale]/awooop/runs/page.tsx +++ b/apps/web/src/app/[locale]/awooop/runs/page.tsx @@ -42,6 +42,7 @@ type RunState = type RunLane = "intake" | "diagnosis" | "approval" | "execution" | "done" | "manual"; type RemediationStatus = | "no_evidence" + | "mcp_observed" | "read_only_dry_run" | "write_observed" | "blocked" @@ -52,8 +53,10 @@ interface RemediationSummary { source?: string; incident_ids?: string[]; total?: number; + evidence_total?: number; status?: RemediationStatus | string; has_dry_run?: boolean; + has_mcp_investigation?: boolean; is_read_only?: boolean; human_gate_open?: boolean; latest_at?: string | null; @@ -65,6 +68,10 @@ interface RemediationSummary { latest_required_scope?: string | null; writes_incident_state?: boolean | null; writes_auto_repair_result?: boolean | null; + mcp_observation_total?: number; + mcp_observation_success?: number; + mcp_observation_failed?: number; + latest_mcp_server?: string | null; } interface Run { @@ -235,6 +242,12 @@ const REMEDIATION_STATUS_CONFIG: Record< icon: AlertCircle, className: "border-[#d8d3c7] bg-[#faf9f3] text-[#5f5b52]", }, + mcp_observed: { + labelKey: "statuses.mcpObserved", + detailKey: "details.mcpObserved", + icon: SearchCheck, + className: "border-[#9bb6d9] bg-[#eef5ff] text-[#1f5b9b]", + }, read_only_dry_run: { labelKey: "statuses.readOnlyDryRun", detailKey: "details.readOnlyDryRun", @@ -261,6 +274,7 @@ const REMEDIATION_STATUS_CONFIG: Record< }, }; const REMEDIATION_FILTER_OPTIONS: RemediationStatus[] = [ + "mcp_observed", "read_only_dry_run", "write_observed", "blocked", @@ -330,6 +344,7 @@ function RunLaneBadge({ state }: { state: RunState }) { function normalizeRemediationStatus(summary?: RemediationSummary | null): RemediationStatus { const statusValue = summary?.status; if ( + statusValue === "mcp_observed" || statusValue === "read_only_dry_run" || statusValue === "write_observed" || statusValue === "blocked" || @@ -346,9 +361,14 @@ function RemediationEvidenceCell({ summary }: { summary?: RemediationSummary | n const config = REMEDIATION_STATUS_CONFIG[status]; const Icon = config.icon; const total = summary?.total ?? 0; + const mcpTotal = summary?.mcp_observation_total ?? 0; + const evidenceTotal = summary?.evidence_total ?? total + mcpTotal; const route = summary?.latest_route && summary.latest_route !== "--" ? summary.latest_route : null; + const countText = status === "mcp_observed" + ? t("mcpCount", { count: mcpTotal || evidenceTotal }) + : t("count", { count: total }); return (
@@ -362,9 +382,9 @@ function RemediationEvidenceCell({ summary }: { summary?: RemediationSummary | n
-
+
{[ + { + label: tEvidence("summary.mcpObserved"), + value: evidenceSummary.mcpObserved, + detail: tEvidence("summary.mcpObservedDetail"), + icon: SearchCheck, + className: "border-[#9bb6d9] bg-[#eef5ff] text-[#1f5b9b]", + }, { label: tEvidence("summary.readOnly"), value: evidenceSummary.readOnly,