diff --git a/apps/api/src/services/awooop_truth_chain_service.py b/apps/api/src/services/awooop_truth_chain_service.py index 01736c90..f296ce87 100644 --- a/apps/api/src/services/awooop_truth_chain_service.py +++ b/apps/api/src/services/awooop_truth_chain_service.py @@ -101,6 +101,55 @@ def _auto_repair_ids(auto_repair_executions: list[dict[str, Any]]) -> list[str]: return [str(row["id"]) for row in auto_repair_executions if row.get("id")] +def _looks_like_no_action(value: Any) -> bool: + text = str(value or "").upper() + return ( + "NO_ACTION" in text + or "NO-ACTION" in text + or "NOACTION" in text + or text.startswith("OBSERVE") + or text.startswith("INVESTIGATE") + ) + + +def _approval_has_no_action(approvals: list[dict[str, Any]]) -> bool: + return any(_looks_like_no_action(row.get("action")) for row in approvals) + + +def _is_no_action_operation(row: dict[str, Any]) -> bool: + """Return true for durable audit rows that represent observation, not repair.""" + if str(row.get("operation_type") or "") != "playbook_executed": + return False + return any( + _looks_like_no_action(row.get(key)) + for key in ( + "input_action", + "output_action", + "output_reason", + "output_not_used_reason", + ) + ) + + +def _is_audit_only_operation(row: dict[str, Any]) -> bool: + operation_type = str(row.get("operation_type") or "") + status = str(row.get("status") or "").lower() + if status == "dry_run": + return True + return operation_type in { + "ansible_candidate_matched", + "ansible_execution_skipped", + } + + +def _effective_execution_ops(automation_ops: list[dict[str, Any]]) -> list[dict[str, Any]]: + return [ + row + for row in automation_ops + if not _is_no_action_operation(row) and not _is_audit_only_operation(row) + ] + + def build_incident_reconciliation( *, incident: dict[str, Any] | None, @@ -255,7 +304,8 @@ def _truth_status( if incident is not None: incident_status = str(incident.get("status") or "unknown") repair_rows = auto_repair_executions or [] - has_execution_records = bool(automation_ops or repair_rows) + effective_ops = _effective_execution_ops(automation_ops) + has_execution_records = bool(effective_ops or repair_rows) stage = "received" stage_status = incident_status.lower() if incident_status in {"RESOLVED", "CLOSED"}: @@ -275,23 +325,24 @@ def _truth_status( approval_statuses = {str(row.get("status") or "").upper() for row in approvals} approval_actions = " ".join(str(row.get("action") or "") for row in approvals).upper() + approval_no_action = _approval_has_no_action(approvals) if any(status in {"PENDING", "WAITING_APPROVAL"} for status in approval_statuses): stage = "approval_required" stage_status = "waiting" needs_human = True - elif "APPROVED" in approval_statuses and not has_execution_records: - if "NO_ACTION" in approval_actions: + elif not has_execution_records and (approval_no_action or "NO_ACTION" in approval_actions): + if approval_statuses: stage = "manual_required" stage_status = "blocked" needs_human = True blockers.append("approval_resolved_no_action_without_execution") - else: - stage = "execution_missing" - stage_status = "blocked" - needs_human = True - blockers.append("approved_without_execution_record") + elif "APPROVED" in approval_statuses and not has_execution_records: + stage = "execution_missing" + stage_status = "blocked" + needs_human = True + blockers.append("approved_without_execution_record") - op_statuses = {str(row.get("status") or "").lower() for row in automation_ops} + op_statuses = {str(row.get("status") or "").lower() for row in effective_ops} repair_successes = {row.get("success") for row in repair_rows} if op_statuses or repair_successes: if (op_statuses & {"success", "completed"}) or True in repair_successes: @@ -372,12 +423,16 @@ def build_automation_quality( evidence_succeeded = sum(int(row.get("sensors_succeeded") or 0) for row in evidence_rows) gateway_total = int(gateway_mcp_summary.get("total") or 0) legacy_total = int(legacy_mcp_summary.get("total") or 0) - automation_statuses = {str(row.get("status") or "").lower() for row in automation_ops} + effective_ops = _effective_execution_ops(automation_ops) + noop_ops = [row for row in automation_ops if _is_no_action_operation(row)] + audit_only_ops = [row for row in automation_ops if _is_audit_only_operation(row)] + automation_statuses = {str(row.get("status") or "").lower() for row in effective_ops} auto_repair_successes = {row.get("success") for row in auto_repair_executions} - has_execution = bool(automation_ops or auto_repair_executions) + has_execution = bool(effective_ops or auto_repair_executions) verification_result = _latest_verification_result(incident, evidence_rows) approval_statuses = {str(row.get("status") or "").upper() for row in approvals} approval_actions = " ".join(str(row.get("action") or "") for row in approvals).upper() + approval_no_action = _approval_has_no_action(approvals) gate("source_persisted", "passed", str(incident.get("incident_id"))) gate("outbound_recorded", "passed" if outbound_rows else "missing", str(len(outbound_rows))) @@ -399,14 +454,14 @@ def build_automation_quality( if any(status in {"PENDING", "WAITING_APPROVAL"} for status in approval_statuses): gate("approval_state", "warning", "waiting_approval") - elif "APPROVED" in approval_statuses and "NO_ACTION" in approval_actions and not has_execution: + elif approval_statuses and (approval_no_action or "NO_ACTION" in approval_actions) and not has_execution: gate("approval_state", "failed", "approved_no_action_without_execution") elif approvals: gate("approval_state", "passed", ",".join(sorted(approval_statuses))) else: gate("approval_state", "not_applicable", "no approval") - gate("execution_recorded", "passed" if has_execution else "missing", str(len(automation_ops) + len(auto_repair_executions))) + gate("execution_recorded", "passed" if has_execution else "missing", str(len(effective_ops) + len(auto_repair_executions))) gate("auto_repair_recorded", "passed" if auto_repair_executions else "missing", str(len(auto_repair_executions))) if not has_execution: @@ -433,7 +488,7 @@ def build_automation_quality( verdict = "execution_failed" elif has_execution: verdict = "execution_unverified" - elif "APPROVED" in approval_statuses and "NO_ACTION" in approval_actions: + elif approval_statuses and (approval_no_action or "NO_ACTION" in approval_actions): verdict = "manual_required_no_action" elif any(status in {"PENDING", "WAITING_APPROVAL"} for status in approval_statuses): verdict = "approval_required" @@ -479,6 +534,9 @@ def build_automation_quality( "legacy_mcp_total": legacy_total, "approvals": len(approvals), "automation_operation_records": len(automation_ops), + "effective_execution_records": len(effective_ops), + "noop_operation_records": len(noop_ops), + "audit_only_operation_records": len(audit_only_ops), "auto_repair_execution_records": len(auto_repair_executions), "verification_result": verification_result, "knowledge_entries": len(km_entries), @@ -960,6 +1018,7 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[ error, duration_ms, tags, + input ->> 'action' AS input_action, input ->> 'executor' AS input_executor, input ->> 'execution_backend' AS input_execution_backend, input ->> 'playbook_id' AS input_playbook_id, @@ -967,6 +1026,8 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[ input ->> 'ansible_playbook_path' AS input_ansible_playbook_path, input ->> 'check_mode' AS input_check_mode, input ->> 'not_used_reason' AS input_not_used_reason, + output ->> 'action' AS output_action, + output ->> 'reason' AS output_reason, output ->> 'executor' AS output_executor, output ->> 'execution_backend' AS output_execution_backend, output ->> 'playbook_id' AS output_playbook_id, diff --git a/apps/api/tests/test_awooop_truth_chain_service.py b/apps/api/tests/test_awooop_truth_chain_service.py index 777d5580..53f5c3c0 100644 --- a/apps/api/tests/test_awooop_truth_chain_service.py +++ b/apps/api/tests/test_awooop_truth_chain_service.py @@ -58,6 +58,38 @@ def test_truth_status_marks_no_action_approval_as_manual_required() -> None: assert "awooop_mcp_gateway_audit_empty" in status["blockers"] +def test_truth_status_does_not_treat_no_action_audit_as_execution() -> None: + status = _truth_status( + incident={"incident_id": "INC-1", "status": "RESOLVED"}, + approvals=[{"status": "EXECUTION_SUCCESS", "action": "未知操作 | NO_ACTION"}], + evidence_rows=[{"sensors_attempted": 8, "sensors_succeeded": 6}], + automation_ops=[ + { + "operation_type": "playbook_executed", + "status": "success", + "actor": "approval_execution", + "output_reason": "NO_ACTION", + "output_action": "未知操作 | NO_ACTION", + }, + { + "operation_type": "ansible_candidate_matched", + "status": "dry_run", + "output_not_used_reason": "Ansible check-mode is not wired yet", + }, + ], + drift=None, + drift_repeat_count=0, + gateway_mcp_total=8, + legacy_mcp_total=8, + outbound_visible_total=1, + ) + + assert status["current_stage"] == "manual_required" + assert status["stage_status"] == "blocked" + assert status["needs_human"] is True + assert "approval_resolved_no_action_without_execution" in status["blockers"] + + def test_truth_status_marks_repeated_pending_drift_as_human_needed() -> None: status = _truth_status( incident=None, @@ -249,6 +281,43 @@ def test_automation_quality_marks_no_action_without_execution() -> None: assert "execution_recorded" in quality["blockers"] +def test_automation_quality_ignores_no_action_audit_rows_as_execution() -> None: + quality = build_automation_quality( + incident={"incident_id": "INC-1", "status": "RESOLVED"}, + approvals=[{"status": "EXECUTION_SUCCESS", "action": "未知操作 | NO_ACTION"}], + evidence_rows=[{"sensors_attempted": 8, "sensors_succeeded": 6}], + automation_ops=[ + { + "operation_type": "playbook_executed", + "status": "success", + "actor": "approval_execution", + "output_reason": "NO_ACTION", + "output_action": "未知操作 | NO_ACTION", + }, + { + "operation_type": "ansible_candidate_matched", + "status": "dry_run", + "output_not_used_reason": "Ansible check-mode is not wired yet", + }, + ], + auto_repair_executions=[], + gateway_mcp_summary={"total": 8}, + legacy_mcp_summary={"total": 8}, + outbound_rows=[{"message_id": "m1"}], + km_entries=[{"id": "km-1"}], + timeline_events=[{"id": "tl-1"}], + ) + + gates = {row["name"]: row["status"] for row in quality["gates"]} + assert quality["verdict"] == "manual_required_no_action" + assert quality["facts"]["automation_operation_records"] == 2 + assert quality["facts"]["effective_execution_records"] == 0 + assert quality["facts"]["noop_operation_records"] == 1 + assert quality["facts"]["audit_only_operation_records"] == 1 + assert gates["execution_recorded"] == "missing" + assert gates["verification_recorded"] == "not_applicable" + + def test_automation_quality_marks_verified_auto_repair() -> None: quality = build_automation_quality( incident={ diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index f251e9ca..b9ede896 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -7947,3 +7947,141 @@ curl http://127.0.0.1:3000/zh-TW/awooop -> 200 ``` **目前整體進度**:約 75%。 + +**production deploy / smoke 追加(完成)**: + +```text +Gitea: +2050 ai-code-review 356bfce2 -> success +2049 CD Pipeline 356bfce2 -> success +2048 ai-code-review e4203060 -> success +2047 CD Pipeline e4203060 -> success +Deploy marker: 90156a7c chore(cd): deploy 356bfce [skip ci] + +K8s image: +awoooi-api 192.168.0.110:5000/awoooi/api:356bfce2c8663c46933df4a9050dfaa9f594436a +awoooi-worker 192.168.0.110:5000/awoooi/api:356bfce2c8663c46933df4a9050dfaa9f594436a +awoooi-web 192.168.0.110:5000/awoooi/web:356bfce2c8663c46933df4a9050dfaa9f594436a + +health: +https://awoooi.wooo.work/api/v1/health -> 200 + +summary endpoint smoke: +GET /api/v1/platform/truth-chain/quality/summary?project_id=awoooi&hours=24&limit=10 -> 200 +schema_version=automation_quality_summary_v1 +incident_total=10 +evaluated_total=10 +verified_auto_repair_total=0 +production_claim.can_claim_full_auto_repair=false +examples_len=0 +visibility_note=Aggregate only. Use /truth-chain/{source_id} with operator auth for source-level details. +score_buckets={green: 0, yellow: 3, red: 7} +by_verdict: + received_only=4 + execution_unverified=3 + manual_required_no_action=3 +top gate_failures: + auto_repair_recorded=10 + evidence_collected=7 + execution_recorded=7 + mcp_gateway_observed=4 + outbound_recorded=4 + timeline_recorded=4 + approval_state=3 + verification_recorded=3 + +formal page smoke: +https://awoooi.wooo.work/zh-TW/awooop -> 200 +HTML contains: AwoooP 治理總覽 / 自動化品質 / 不可宣稱完整閉環 +``` + +判讀: + +- T12d 已部署:Operator Console 首頁現在能直接顯示最近告警的自動化品質總覽。 +- summary endpoint 是 public aggregate 讀取面,刻意清空 `examples`;逐筆 source-level truth-chain 仍走 `/truth-chain/{source_id}` operator auth。 +- 產線資料仍顯示 `verified_auto_repair_total=0`、`production_claim=false`,因此目前正確說法是「真相可見度已補上」,不是「完整 AI 自動修復閉環已完成」。 +- 下一步要進 T13:收斂 `execution_unverified`,把 post-execution verification / auto_repair durable record / learning writeback 的缺口從可見化推進到真正閉環。 +- 目前整體進度更新:約 76%。 + +### 2026-05-13 — AwoooP truth-chain T13:NO_ACTION / audit-only 不再誤算成自動修復執行(production verified) + +**目的**: + +- T12d production quality summary 顯示不少 `execution_unverified`,但 live trace 發現其中多數其實是 `NO_ACTION` 或 `ansible_candidate_matched/dry_run` audit row。 +- 這些 row 是「純觀察 / 候選稽核」,不是 AI 自動修復執行;若算成 execution,Operator 會誤以為「AI 修了但沒驗證」。 + +**變更**: + +- truth-chain 新增 effective execution 判定: + - `playbook_executed` 且 `output.reason=NO_ACTION` / action 含 `NO_ACTION` / `OBSERVE` / `INVESTIGATE` → `noop_operation_records` + - `status=dry_run`、`ansible_candidate_matched`、`ansible_execution_skipped` → audit-only,不算有效修復執行 +- `automation_quality.facts` 新增: + - `effective_execution_records` + - `noop_operation_records` + - `audit_only_operation_records` +- `_truth_status()` / `build_automation_quality()` 都改用 effective execution,避免 NO_ACTION 把 stage 推成 `execution_succeeded` 或 verdict 推成 `execution_unverified`。 + +**local verification**: + +```text +DATABASE_URL=postgresql+asyncpg://u:p@localhost:5432/db pytest tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py -q +21 passed + +ruff check --select F821 src/services/awooop_truth_chain_service.py tests/test_awooop_truth_chain_service.py +OK + +python3 -m py_compile src/services/awooop_truth_chain_service.py tests/test_awooop_truth_chain_service.py +OK +``` + +**production deploy / smoke(完成)**: + +```text +Gitea: +2054 ai-code-review cecadb33 -> success +2053 CD Pipeline cecadb33 -> success + tests -> success + build-and-deploy -> success + post-deploy-checks -> success +Deploy marker: 2314bade chore(cd): deploy cecadb3 [skip ci] + +K8s image: +awoooi-api 192.168.0.110:5000/awoooi/api:cecadb331badac7aa4fb07922b892875c28a891a +awoooi-worker 192.168.0.110:5000/awoooi/api:cecadb331badac7aa4fb07922b892875c28a891a +awoooi-web 192.168.0.110:5000/awoooi/web:cecadb331badac7aa4fb07922b892875c28a891a + +health: +https://awoooi.wooo.work/api/v1/health -> 200 + +summary endpoint smoke, hours=24, limit=30: +schema_version=automation_quality_summary_v1 +incident_total=30 +evaluated_total=30 +verified_auto_repair_total=0 +production_claim.can_claim_full_auto_repair=false +score_buckets={green: 0, yellow: 0, red: 30} +by_verdict: + manual_required_no_action=18 + received_only=12 + execution_unverified=0 +top gate_failures: + auto_repair_recorded=30 + execution_recorded=30 + evidence_collected=22 + approval_state=18 + mcp_gateway_observed=12 + outbound_recorded=12 + timeline_recorded=12 + +detail smoke: +INC-20260513-CF5DCE -> stage=manual_required / verdict=manual_required_no_action / effective_execution_records=0 / noop_operation_records=1 +INC-20260513-553113 -> stage=manual_required / verdict=manual_required_no_action / effective_execution_records=0 / noop_operation_records=1 / audit_only_operation_records=1 +INC-20260513-42FCEC -> stage=manual_required / verdict=manual_required_no_action / effective_execution_records=0 / noop_operation_records=1 +``` + +判讀: + +- T13 已完成並推版:現在 Operator Console / truth-chain 不會再把 NO_ACTION 或 Ansible candidate audit 誤認為真正修復執行。 +- 產線結果更誠實:目前不是「修了但未驗證」,而是「18 筆需人工判斷、12 筆只收到告警、0 筆可宣稱已驗證自動修復」。 +- 下一步 T14 應從「分類校正」進到真正閉環:讓可安全處理的低風險事件產生 durable `auto_repair_executions`、post-execution `verification_result`、KM / learning writeback;不能再用 NO_ACTION 假裝自動修復。 +- 目前整體進度更新:約 78%。 diff --git a/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md b/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md index 7ee6cb5e..eff02b1e 100644 --- a/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md +++ b/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md @@ -2015,6 +2015,23 @@ Phase 6 完成後 - `post_verify_first_class=5` - 邊界:T8 只完成修復後 read-only 驗證 path。approval execution SSH / write-admin MCP / Ansible check-mode / apply / rollback 仍未完成,不能宣稱真正自動修復閉環已全面完成。 +**T9-T12 AwoooP truth-chain / quality visibility production verified(2026-05-13 台北)**: +- T9:approved SSH execution 已經過 first-class `McpGateway`,`approval_executor` write tools 需有 active contract / grants / Gate 5 approval;production smoke `trace_id=codex-t9-approval-smoke-eb44cd4a` 證明 `gateway_path=awooop_mcp_gateway`、`policy_enforced=true`、`required_scope=write`、`is_shadow=false`。provider error 來自安全測試位址不在 `SSH_MCP_ALLOWED_HOSTS`,不是 Gateway 黑盒。 +- T10:truth-chain 新增 MCP Gateway summary,可把 `first_class_total`、`legacy_bridge_total`、`policy_enforced_total`、agent/tool/scope 與 provider failure stage 一次輸出,避免 Telegram 卡片只看到「失敗」而不知道卡在 Gate 還是底層 provider。 +- T11:Telegram incident detail 與 AwoooP Run Detail 都接上 MCP Gateway summary;production smoke 以 detail formatter 驗證,不發真實 Telegram 訊息避免洗版。 +- T12a:Telegram outbound mirror 新增 `sent_at` 與 structured `source_refs`,新出站訊息能以 incident / code refs 回查,不再只靠 preview 文字猜關聯。 +- T12b:truth-chain 新增 `automation_quality` gate,逐筆回答是否 `auto_repaired_verified`、`execution_unverified`、`manual_required_no_action` 等;Telegram detail 顯示品質摘要。 +- T12c:`GET /api/v1/platform/truth-chain/quality/summary` 新增全體品質總覽;production 50 筆 smoke 顯示 `verified_auto_repair_total=0`、`production_claim.can_claim_full_auto_repair=false`,因此仍不能宣稱完整 AI 自動修復閉環。 +- T12d:Operator Console `/awooop` 首頁新增「自動化品質」面板;summary endpoint 改為 public aggregate 且清空 `examples`,source-level `/truth-chain/{source_id}` 仍需 operator auth。production image `356bfce2c8663c46933df4a9050dfaa9f594436a`、Gitea runs `2050/2049` success、health 200、頁面含 `自動化品質` / `不可宣稱完整閉環`。 +- 目前總體判讀:真相鏈可見度已從 Telegram 卡片補到 DB / truth-chain API / Run Detail / Operator Console,但 latest production aggregate 仍是 `verified_auto_repair_total=0`。下一步 T13 必須收斂 `execution_unverified`:post-execution verification、auto-repair durable record、learning / KM writeback 缺口要從「可見」推到「閉環」。 + +**T13 NO_ACTION / audit-only quality classification production verified(2026-05-13 台北)**: +- 觸發:T12d production summary 顯示 `execution_unverified`,但 live trace 證實多數其實是 `NO_ACTION` 或 `ansible_candidate_matched/dry_run` audit row,被 truth-chain 誤算成有效修復執行。 +- 修正:truth-chain 新增 effective execution 判定;NO_ACTION / OBSERVE / INVESTIGATE row 計入 `noop_operation_records`,`status=dry_run`、`ansible_candidate_matched`、`ansible_execution_skipped` 計為 audit-only,不再推動 `execution_succeeded` / `execution_unverified`。 +- Production:`cecadb33 fix(awooop): exclude audit-only ops from repair quality` 已部署,Gitea runs `2054/2053` success,deploy marker `2314bade`,API/Worker/Web image 均為 `cecadb331badac7aa4fb07922b892875c28a891a`,health 200。 +- Smoke:quality summary `hours=24&limit=30` 由舊的 `execution_unverified=11` 校正為 `manual_required_no_action=18`、`received_only=12`、`execution_unverified=0`、`verified_auto_repair_total=0`、`production_claim=false`。 +- 判讀:T13 完成的是「真相分類校正」,不是自動修復閉環。下一步 T14 必須讓可安全處理的低風險事件產生 durable `auto_repair_executions`、post-execution `verification_result`、KM / learning writeback;禁止再用 NO_ACTION 或 dry-run audit 假裝自動修復。 + --- ### 2026-04-20 晚 (台北) — C1-C4 全流程串接 — Playbook 鏈路保護(commit de2d34d) diff --git a/k8s/awoooi-prod/kustomization.yaml b/k8s/awoooi-prod/kustomization.yaml index 8babe885..72e47048 100644 --- a/k8s/awoooi-prod/kustomization.yaml +++ b/k8s/awoooi-prod/kustomization.yaml @@ -40,7 +40,7 @@ resources: images: - name: 192.168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER newName: 192.168.0.110:5000/awoooi/api - newTag: e4203060f3a417e879c2ad3b32894e69444105ad + newTag: cecadb331badac7aa4fb07922b892875c28a891a - name: 192.168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER newName: 192.168.0.110:5000/awoooi/web - newTag: e4203060f3a417e879c2ad3b32894e69444105ad + newTag: cecadb331badac7aa4fb07922b892875c28a891a