Merge remote-tracking branch 'gitea/main' into codex/security-supply-chain-contracts-20260512

2026-05-13 18:47:51 +08:00
parent a334ae48cc a28baa6197
commit 035d0de4c7
5 changed files with 301 additions and 16 deletions
--- a/apps/api/src/services/awooop_truth_chain_service.py
+++ b/apps/api/src/services/awooop_truth_chain_service.py
@@ -101,6 +101,55 @@ def _auto_repair_ids(auto_repair_executions: list[dict[str, Any]]) -> list[str]:
    return [str(row["id"]) for row in auto_repair_executions if row.get("id")]


+def _looks_like_no_action(value: Any) -> bool:
+    text = str(value or "").upper()
+    return (
+        "NO_ACTION" in text
+        or "NO-ACTION" in text
+        or "NOACTION" in text
+        or text.startswith("OBSERVE")
+        or text.startswith("INVESTIGATE")
+    )
+
+
+def _approval_has_no_action(approvals: list[dict[str, Any]]) -> bool:
+    return any(_looks_like_no_action(row.get("action")) for row in approvals)
+
+
+def _is_no_action_operation(row: dict[str, Any]) -> bool:
+    """Return true for durable audit rows that represent observation, not repair."""
+    if str(row.get("operation_type") or "") != "playbook_executed":
+        return False
+    return any(
+        _looks_like_no_action(row.get(key))
+        for key in (
+            "input_action",
+            "output_action",
+            "output_reason",
+            "output_not_used_reason",
+        )
+    )
+
+
+def _is_audit_only_operation(row: dict[str, Any]) -> bool:
+    operation_type = str(row.get("operation_type") or "")
+    status = str(row.get("status") or "").lower()
+    if status == "dry_run":
+        return True
+    return operation_type in {
+        "ansible_candidate_matched",
+        "ansible_execution_skipped",
+    }
+
+
+def _effective_execution_ops(automation_ops: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    return [
+        row
+        for row in automation_ops
+        if not _is_no_action_operation(row) and not _is_audit_only_operation(row)
+    ]
+
+
 def build_incident_reconciliation(
    *,
    incident: dict[str, Any] | None,
@@ -255,7 +304,8 @@ def _truth_status(
    if incident is not None:
        incident_status = str(incident.get("status") or "unknown")
        repair_rows = auto_repair_executions or []
-        has_execution_records = bool(automation_ops or repair_rows)
+        effective_ops = _effective_execution_ops(automation_ops)
+        has_execution_records = bool(effective_ops or repair_rows)
        stage = "received"
        stage_status = incident_status.lower()
        if incident_status in {"RESOLVED", "CLOSED"}:
@@ -275,23 +325,24 @@ def _truth_status(

        approval_statuses = {str(row.get("status") or "").upper() for row in approvals}
        approval_actions = " ".join(str(row.get("action") or "") for row in approvals).upper()
+        approval_no_action = _approval_has_no_action(approvals)
        if any(status in {"PENDING", "WAITING_APPROVAL"} for status in approval_statuses):
            stage = "approval_required"
            stage_status = "waiting"
            needs_human = True
-        elif "APPROVED" in approval_statuses and not has_execution_records:
-            if "NO_ACTION" in approval_actions:
+        elif not has_execution_records and (approval_no_action or "NO_ACTION" in approval_actions):
+            if approval_statuses:
                stage = "manual_required"
                stage_status = "blocked"
                needs_human = True
                blockers.append("approval_resolved_no_action_without_execution")
-            else:
-                stage = "execution_missing"
-                stage_status = "blocked"
-                needs_human = True
-                blockers.append("approved_without_execution_record")
+        elif "APPROVED" in approval_statuses and not has_execution_records:
+            stage = "execution_missing"
+            stage_status = "blocked"
+            needs_human = True
+            blockers.append("approved_without_execution_record")

-        op_statuses = {str(row.get("status") or "").lower() for row in automation_ops}
+        op_statuses = {str(row.get("status") or "").lower() for row in effective_ops}
        repair_successes = {row.get("success") for row in repair_rows}
        if op_statuses or repair_successes:
            if (op_statuses & {"success", "completed"}) or True in repair_successes:
@@ -372,12 +423,16 @@ def build_automation_quality(
    evidence_succeeded = sum(int(row.get("sensors_succeeded") or 0) for row in evidence_rows)
    gateway_total = int(gateway_mcp_summary.get("total") or 0)
    legacy_total = int(legacy_mcp_summary.get("total") or 0)
-    automation_statuses = {str(row.get("status") or "").lower() for row in automation_ops}
+    effective_ops = _effective_execution_ops(automation_ops)
+    noop_ops = [row for row in automation_ops if _is_no_action_operation(row)]
+    audit_only_ops = [row for row in automation_ops if _is_audit_only_operation(row)]
+    automation_statuses = {str(row.get("status") or "").lower() for row in effective_ops}
    auto_repair_successes = {row.get("success") for row in auto_repair_executions}
-    has_execution = bool(automation_ops or auto_repair_executions)
+    has_execution = bool(effective_ops or auto_repair_executions)
    verification_result = _latest_verification_result(incident, evidence_rows)
    approval_statuses = {str(row.get("status") or "").upper() for row in approvals}
    approval_actions = " ".join(str(row.get("action") or "") for row in approvals).upper()
+    approval_no_action = _approval_has_no_action(approvals)

    gate("source_persisted", "passed", str(incident.get("incident_id")))
    gate("outbound_recorded", "passed" if outbound_rows else "missing", str(len(outbound_rows)))
@@ -399,14 +454,14 @@ def build_automation_quality(

    if any(status in {"PENDING", "WAITING_APPROVAL"} for status in approval_statuses):
        gate("approval_state", "warning", "waiting_approval")
-    elif "APPROVED" in approval_statuses and "NO_ACTION" in approval_actions and not has_execution:
+    elif approval_statuses and (approval_no_action or "NO_ACTION" in approval_actions) and not has_execution:
        gate("approval_state", "failed", "approved_no_action_without_execution")
    elif approvals:
        gate("approval_state", "passed", ",".join(sorted(approval_statuses)))
    else:
        gate("approval_state", "not_applicable", "no approval")

-    gate("execution_recorded", "passed" if has_execution else "missing", str(len(automation_ops) + len(auto_repair_executions)))
+    gate("execution_recorded", "passed" if has_execution else "missing", str(len(effective_ops) + len(auto_repair_executions)))
    gate("auto_repair_recorded", "passed" if auto_repair_executions else "missing", str(len(auto_repair_executions)))

    if not has_execution:
@@ -433,7 +488,7 @@ def build_automation_quality(
        verdict = "execution_failed"
    elif has_execution:
        verdict = "execution_unverified"
-    elif "APPROVED" in approval_statuses and "NO_ACTION" in approval_actions:
+    elif approval_statuses and (approval_no_action or "NO_ACTION" in approval_actions):
        verdict = "manual_required_no_action"
    elif any(status in {"PENDING", "WAITING_APPROVAL"} for status in approval_statuses):
        verdict = "approval_required"
@@ -479,6 +534,9 @@ def build_automation_quality(
            "legacy_mcp_total": legacy_total,
            "approvals": len(approvals),
            "automation_operation_records": len(automation_ops),
+            "effective_execution_records": len(effective_ops),
+            "noop_operation_records": len(noop_ops),
+            "audit_only_operation_records": len(audit_only_ops),
            "auto_repair_execution_records": len(auto_repair_executions),
            "verification_result": verification_result,
            "knowledge_entries": len(km_entries),
@@ -960,6 +1018,7 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
                    error,
                    duration_ms,
                    tags,
+                    input ->> 'action' AS input_action,
                    input ->> 'executor' AS input_executor,
                    input ->> 'execution_backend' AS input_execution_backend,
                    input ->> 'playbook_id' AS input_playbook_id,
@@ -967,6 +1026,8 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
                    input ->> 'ansible_playbook_path' AS input_ansible_playbook_path,
                    input ->> 'check_mode' AS input_check_mode,
                    input ->> 'not_used_reason' AS input_not_used_reason,
+                    output ->> 'action' AS output_action,
+                    output ->> 'reason' AS output_reason,
                    output ->> 'executor' AS output_executor,
                    output ->> 'execution_backend' AS output_execution_backend,
                    output ->> 'playbook_id' AS output_playbook_id,
--- a/apps/api/tests/test_awooop_truth_chain_service.py
+++ b/apps/api/tests/test_awooop_truth_chain_service.py
@@ -58,6 +58,38 @@ def test_truth_status_marks_no_action_approval_as_manual_required() -> None:
    assert "awooop_mcp_gateway_audit_empty" in status["blockers"]


+def test_truth_status_does_not_treat_no_action_audit_as_execution() -> None:
+    status = _truth_status(
+        incident={"incident_id": "INC-1", "status": "RESOLVED"},
+        approvals=[{"status": "EXECUTION_SUCCESS", "action": "未知操作 | NO_ACTION"}],
+        evidence_rows=[{"sensors_attempted": 8, "sensors_succeeded": 6}],
+        automation_ops=[
+            {
+                "operation_type": "playbook_executed",
+                "status": "success",
+                "actor": "approval_execution",
+                "output_reason": "NO_ACTION",
+                "output_action": "未知操作 | NO_ACTION",
+            },
+            {
+                "operation_type": "ansible_candidate_matched",
+                "status": "dry_run",
+                "output_not_used_reason": "Ansible check-mode is not wired yet",
+            },
+        ],
+        drift=None,
+        drift_repeat_count=0,
+        gateway_mcp_total=8,
+        legacy_mcp_total=8,
+        outbound_visible_total=1,
+    )
+
+    assert status["current_stage"] == "manual_required"
+    assert status["stage_status"] == "blocked"
+    assert status["needs_human"] is True
+    assert "approval_resolved_no_action_without_execution" in status["blockers"]
+
+
 def test_truth_status_marks_repeated_pending_drift_as_human_needed() -> None:
    status = _truth_status(
        incident=None,
@@ -249,6 +281,43 @@ def test_automation_quality_marks_no_action_without_execution() -> None:
    assert "execution_recorded" in quality["blockers"]


+def test_automation_quality_ignores_no_action_audit_rows_as_execution() -> None:
+    quality = build_automation_quality(
+        incident={"incident_id": "INC-1", "status": "RESOLVED"},
+        approvals=[{"status": "EXECUTION_SUCCESS", "action": "未知操作 | NO_ACTION"}],
+        evidence_rows=[{"sensors_attempted": 8, "sensors_succeeded": 6}],
+        automation_ops=[
+            {
+                "operation_type": "playbook_executed",
+                "status": "success",
+                "actor": "approval_execution",
+                "output_reason": "NO_ACTION",
+                "output_action": "未知操作 | NO_ACTION",
+            },
+            {
+                "operation_type": "ansible_candidate_matched",
+                "status": "dry_run",
+                "output_not_used_reason": "Ansible check-mode is not wired yet",
+            },
+        ],
+        auto_repair_executions=[],
+        gateway_mcp_summary={"total": 8},
+        legacy_mcp_summary={"total": 8},
+        outbound_rows=[{"message_id": "m1"}],
+        km_entries=[{"id": "km-1"}],
+        timeline_events=[{"id": "tl-1"}],
+    )
+
+    gates = {row["name"]: row["status"] for row in quality["gates"]}
+    assert quality["verdict"] == "manual_required_no_action"
+    assert quality["facts"]["automation_operation_records"] == 2
+    assert quality["facts"]["effective_execution_records"] == 0
+    assert quality["facts"]["noop_operation_records"] == 1
+    assert quality["facts"]["audit_only_operation_records"] == 1
+    assert gates["execution_recorded"] == "missing"
+    assert gates["verification_recorded"] == "not_applicable"
+
+
 def test_automation_quality_marks_verified_auto_repair() -> None:
    quality = build_automation_quality(
        incident={
--- a/docs/LOGBOOK.md
+++ b/docs/LOGBOOK.md
@@ -7947,3 +7947,141 @@ curl http://127.0.0.1:3000/zh-TW/awooop -> 200
 ```

 **目前整體進度**：約 75%。
+
+**production deploy / smoke 追加（完成）**：
+
+```text
+Gitea:
+2050 ai-code-review 356bfce2 -> success
+2049 CD Pipeline 356bfce2 -> success
+2048 ai-code-review e4203060 -> success
+2047 CD Pipeline e4203060 -> success
+Deploy marker: 90156a7c chore(cd): deploy 356bfce [skip ci]
+
+K8s image:
+awoooi-api    192.168.0.110:5000/awoooi/api:356bfce2c8663c46933df4a9050dfaa9f594436a
+awoooi-worker 192.168.0.110:5000/awoooi/api:356bfce2c8663c46933df4a9050dfaa9f594436a
+awoooi-web    192.168.0.110:5000/awoooi/web:356bfce2c8663c46933df4a9050dfaa9f594436a
+
+health:
+https://awoooi.wooo.work/api/v1/health -> 200
+
+summary endpoint smoke:
+GET /api/v1/platform/truth-chain/quality/summary?project_id=awoooi&hours=24&limit=10 -> 200
+schema_version=automation_quality_summary_v1
+incident_total=10
+evaluated_total=10
+verified_auto_repair_total=0
+production_claim.can_claim_full_auto_repair=false
+examples_len=0
+visibility_note=Aggregate only. Use /truth-chain/{source_id} with operator auth for source-level details.
+score_buckets={green: 0, yellow: 3, red: 7}
+by_verdict:
+  received_only=4
+  execution_unverified=3
+  manual_required_no_action=3
+top gate_failures:
+  auto_repair_recorded=10
+  evidence_collected=7
+  execution_recorded=7
+  mcp_gateway_observed=4
+  outbound_recorded=4
+  timeline_recorded=4
+  approval_state=3
+  verification_recorded=3
+
+formal page smoke:
+https://awoooi.wooo.work/zh-TW/awooop -> 200
+HTML contains: AwoooP 治理總覽 / 自動化品質 / 不可宣稱完整閉環
+```
+
+判讀：
+
+- T12d 已部署：Operator Console 首頁現在能直接顯示最近告警的自動化品質總覽。
+- summary endpoint 是 public aggregate 讀取面，刻意清空 `examples`；逐筆 source-level truth-chain 仍走 `/truth-chain/{source_id}` operator auth。
+- 產線資料仍顯示 `verified_auto_repair_total=0`、`production_claim=false`，因此目前正確說法是「真相可見度已補上」，不是「完整 AI 自動修復閉環已完成」。
+- 下一步要進 T13：收斂 `execution_unverified`，把 post-execution verification / auto_repair durable record / learning writeback 的缺口從可見化推進到真正閉環。
+- 目前整體進度更新：約 76%。
+
+### 2026-05-13 — AwoooP truth-chain T13：NO_ACTION / audit-only 不再誤算成自動修復執行（production verified）
+
+**目的**：
+
+- T12d production quality summary 顯示不少 `execution_unverified`，但 live trace 發現其中多數其實是 `NO_ACTION` 或 `ansible_candidate_matched/dry_run` audit row。
+- 這些 row 是「純觀察 / 候選稽核」，不是 AI 自動修復執行；若算成 execution，Operator 會誤以為「AI 修了但沒驗證」。
+
+**變更**：
+
+- truth-chain 新增 effective execution 判定：
+  - `playbook_executed` 且 `output.reason=NO_ACTION` / action 含 `NO_ACTION` / `OBSERVE` / `INVESTIGATE` → `noop_operation_records`
+  - `status=dry_run`、`ansible_candidate_matched`、`ansible_execution_skipped` → audit-only，不算有效修復執行
+- `automation_quality.facts` 新增：
+  - `effective_execution_records`
+  - `noop_operation_records`
+  - `audit_only_operation_records`
+- `_truth_status()` / `build_automation_quality()` 都改用 effective execution，避免 NO_ACTION 把 stage 推成 `execution_succeeded` 或 verdict 推成 `execution_unverified`。
+
+**local verification**：
+
+```text
+DATABASE_URL=postgresql+asyncpg://u:p@localhost:5432/db pytest tests/test_awooop_truth_chain_service.py tests/test_platform_router_order.py -q
+21 passed
+
+ruff check --select F821 src/services/awooop_truth_chain_service.py tests/test_awooop_truth_chain_service.py
+OK
+
+python3 -m py_compile src/services/awooop_truth_chain_service.py tests/test_awooop_truth_chain_service.py
+OK
+```
+
+**production deploy / smoke（完成）**：
+
+```text
+Gitea:
+2054 ai-code-review cecadb33 -> success
+2053 CD Pipeline cecadb33 -> success
+  tests -> success
+  build-and-deploy -> success
+  post-deploy-checks -> success
+Deploy marker: 2314bade chore(cd): deploy cecadb3 [skip ci]
+
+K8s image:
+awoooi-api    192.168.0.110:5000/awoooi/api:cecadb331badac7aa4fb07922b892875c28a891a
+awoooi-worker 192.168.0.110:5000/awoooi/api:cecadb331badac7aa4fb07922b892875c28a891a
+awoooi-web    192.168.0.110:5000/awoooi/web:cecadb331badac7aa4fb07922b892875c28a891a
+
+health:
+https://awoooi.wooo.work/api/v1/health -> 200
+
+summary endpoint smoke, hours=24, limit=30:
+schema_version=automation_quality_summary_v1
+incident_total=30
+evaluated_total=30
+verified_auto_repair_total=0
+production_claim.can_claim_full_auto_repair=false
+score_buckets={green: 0, yellow: 0, red: 30}
+by_verdict:
+  manual_required_no_action=18
+  received_only=12
+  execution_unverified=0
+top gate_failures:
+  auto_repair_recorded=30
+  execution_recorded=30
+  evidence_collected=22
+  approval_state=18
+  mcp_gateway_observed=12
+  outbound_recorded=12
+  timeline_recorded=12
+
+detail smoke:
+INC-20260513-CF5DCE -> stage=manual_required / verdict=manual_required_no_action / effective_execution_records=0 / noop_operation_records=1
+INC-20260513-553113 -> stage=manual_required / verdict=manual_required_no_action / effective_execution_records=0 / noop_operation_records=1 / audit_only_operation_records=1
+INC-20260513-42FCEC -> stage=manual_required / verdict=manual_required_no_action / effective_execution_records=0 / noop_operation_records=1
+```
+
+判讀：
+
+- T13 已完成並推版：現在 Operator Console / truth-chain 不會再把 NO_ACTION 或 Ansible candidate audit 誤認為真正修復執行。
+- 產線結果更誠實：目前不是「修了但未驗證」，而是「18 筆需人工判斷、12 筆只收到告警、0 筆可宣稱已驗證自動修復」。
+- 下一步 T14 應從「分類校正」進到真正閉環：讓可安全處理的低風險事件產生 durable `auto_repair_executions`、post-execution `verification_result`、KM / learning writeback；不能再用 NO_ACTION 假裝自動修復。
+- 目前整體進度更新：約 78%。
--- a/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md
+++ b/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md
@@ -2015,6 +2015,23 @@ Phase 6 完成後
  - `post_verify_first_class=5`
 - 邊界：T8 只完成修復後 read-only 驗證 path。approval execution SSH / write-admin MCP / Ansible check-mode / apply / rollback 仍未完成，不能宣稱真正自動修復閉環已全面完成。

+**T9-T12 AwoooP truth-chain / quality visibility production verified（2026-05-13 台北）**：
+- T9：approved SSH execution 已經過 first-class `McpGateway`，`approval_executor` write tools 需有 active contract / grants / Gate 5 approval；production smoke `trace_id=codex-t9-approval-smoke-eb44cd4a` 證明 `gateway_path=awooop_mcp_gateway`、`policy_enforced=true`、`required_scope=write`、`is_shadow=false`。provider error 來自安全測試位址不在 `SSH_MCP_ALLOWED_HOSTS`，不是 Gateway 黑盒。
+- T10：truth-chain 新增 MCP Gateway summary，可把 `first_class_total`、`legacy_bridge_total`、`policy_enforced_total`、agent/tool/scope 與 provider failure stage 一次輸出，避免 Telegram 卡片只看到「失敗」而不知道卡在 Gate 還是底層 provider。
+- T11：Telegram incident detail 與 AwoooP Run Detail 都接上 MCP Gateway summary；production smoke 以 detail formatter 驗證，不發真實 Telegram 訊息避免洗版。
+- T12a：Telegram outbound mirror 新增 `sent_at` 與 structured `source_refs`，新出站訊息能以 incident / code refs 回查，不再只靠 preview 文字猜關聯。
+- T12b：truth-chain 新增 `automation_quality` gate，逐筆回答是否 `auto_repaired_verified`、`execution_unverified`、`manual_required_no_action` 等；Telegram detail 顯示品質摘要。
+- T12c：`GET /api/v1/platform/truth-chain/quality/summary` 新增全體品質總覽；production 50 筆 smoke 顯示 `verified_auto_repair_total=0`、`production_claim.can_claim_full_auto_repair=false`，因此仍不能宣稱完整 AI 自動修復閉環。
+- T12d：Operator Console `/awooop` 首頁新增「自動化品質」面板；summary endpoint 改為 public aggregate 且清空 `examples`，source-level `/truth-chain/{source_id}` 仍需 operator auth。production image `356bfce2c8663c46933df4a9050dfaa9f594436a`、Gitea runs `2050/2049` success、health 200、頁面含 `自動化品質` / `不可宣稱完整閉環`。
+- 目前總體判讀：真相鏈可見度已從 Telegram 卡片補到 DB / truth-chain API / Run Detail / Operator Console，但 latest production aggregate 仍是 `verified_auto_repair_total=0`。下一步 T13 必須收斂 `execution_unverified`：post-execution verification、auto-repair durable record、learning / KM writeback 缺口要從「可見」推到「閉環」。
+
+**T13 NO_ACTION / audit-only quality classification production verified（2026-05-13 台北）**：
+- 觸發：T12d production summary 顯示 `execution_unverified`，但 live trace 證實多數其實是 `NO_ACTION` 或 `ansible_candidate_matched/dry_run` audit row，被 truth-chain 誤算成有效修復執行。
+- 修正：truth-chain 新增 effective execution 判定；NO_ACTION / OBSERVE / INVESTIGATE row 計入 `noop_operation_records`，`status=dry_run`、`ansible_candidate_matched`、`ansible_execution_skipped` 計為 audit-only，不再推動 `execution_succeeded` / `execution_unverified`。
+- Production：`cecadb33 fix(awooop): exclude audit-only ops from repair quality` 已部署，Gitea runs `2054/2053` success，deploy marker `2314bade`，API/Worker/Web image 均為 `cecadb331badac7aa4fb07922b892875c28a891a`，health 200。
+- Smoke：quality summary `hours=24&limit=30` 由舊的 `execution_unverified=11` 校正為 `manual_required_no_action=18`、`received_only=12`、`execution_unverified=0`、`verified_auto_repair_total=0`、`production_claim=false`。
+- 判讀：T13 完成的是「真相分類校正」，不是自動修復閉環。下一步 T14 必須讓可安全處理的低風險事件產生 durable `auto_repair_executions`、post-execution `verification_result`、KM / learning writeback；禁止再用 NO_ACTION 或 dry-run audit 假裝自動修復。
+
 ---

 ### 2026-04-20 晚 (台北) — C1-C4 全流程串接 — Playbook 鏈路保護（commit de2d34d）
--- a/k8s/awoooi-prod/kustomization.yaml
+++ b/k8s/awoooi-prod/kustomization.yaml
@@ -40,7 +40,7 @@ resources:
 images:
 - name: 192.168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER
  newName: 192.168.0.110:5000/awoooi/api
-  newTag: e4203060f3a417e879c2ad3b32894e69444105ad
+  newTag: cecadb331badac7aa4fb07922b892875c28a891a
 - name: 192.168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER
  newName: 192.168.0.110:5000/awoooi/web
-  newTag: e4203060f3a417e879c2ad3b32894e69444105ad
+  newTag: cecadb331badac7aa4fb07922b892875c28a891a