From d7b3997b4aca20f41ba6ddc9661fa8eba8367bca Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 25 Jun 2026 20:52:22 +0800 Subject: [PATCH] fix(api): distinguish ansible dry run from repair --- apps/api/src/services/operator_outcome.py | 20 +++++ .../src/services/platform_operator_service.py | 41 ++++++++- apps/api/src/services/telegram_gateway.py | 88 +++++++++++++++---- .../test_awooop_operator_timeline_labels.py | 76 ++++++++++++++++ apps/api/tests/test_operator_outcome.py | 29 ++++++ .../tests/test_telegram_message_templates.py | 68 ++++++++++++++ docs/LOGBOOK.md | 24 +++++ ...6-25-awoooi-navigation-ia-consolidation.md | 16 +++- ...026-06-25-awoooi-product-uiux-inventory.md | 23 ++++- 9 files changed, 362 insertions(+), 23 deletions(-) diff --git a/apps/api/src/services/operator_outcome.py b/apps/api/src/services/operator_outcome.py index 818bb899..a580be31 100644 --- a/apps/api/src/services/operator_outcome.py +++ b/apps/api/src/services/operator_outcome.py @@ -106,6 +106,14 @@ def _build_execution_result( failure_status = "no_command_failed" summary = "已執行成功,但缺少修復驗證結果" terminal = False + elif state == "dry_run_only_owner_review_required": + approval_status = "owner_review_required" + completion_status = "dry_run_completed_no_apply" + command_status = "check_mode_succeeded" + repair_status = "not_executed" + failure_status = "not_applicable" + summary = "只完成 Ansible check-mode 乾跑,尚未執行修復" + terminal = False elif state == "no_action_manual_review": approval_status = "pending_manual_review" completion_status = "not_started_no_action" @@ -218,6 +226,11 @@ def build_operator_outcome( has_repair_execution = _safe_int(facts.get("effective_execution_records")) > 0 or _safe_int( facts.get("auto_repair_execution_records") ) > 0 + ansible_dry_run_only = ( + _safe_int(facts.get("ansible_check_mode_total")) > 0 + and _safe_int(facts.get("ansible_apply_total")) == 0 + and _safe_int(facts.get("auto_repair_execution_records")) == 0 + ) has_nonrepair_operation = ( _safe_int(facts.get("automation_operation_records")) > 0 and not has_repair_execution @@ -260,6 +273,13 @@ def build_operator_outcome( next_action = "manual_verify_or_repair" summary = "已執行但驗證退化,需人工確認" reason = first_blocker or f"verification={verification}" + elif verdict == "ansible_check_mode_only" or ansible_dry_run_only: + state = "dry_run_only_owner_review_required" + severity = "warning" + needs_human = True + next_action = "owner_review_apply_gate_or_create_verifier_plan" + summary = "只完成 Ansible check-mode 乾跑,尚未執行修復" + reason = first_blocker or "ansible_check_mode_without_apply" elif verdict == "execution_unverified" or ( has_repair_execution and verification == "missing" ): diff --git a/apps/api/src/services/platform_operator_service.py b/apps/api/src/services/platform_operator_service.py index 851ac72f..e93bc7fe 100644 --- a/apps/api/src/services/platform_operator_service.py +++ b/apps/api/src/services/platform_operator_service.py @@ -3963,6 +3963,22 @@ def _has_nonrepair_operation_evidence(facts: dict[str, Any]) -> bool: ) +def _status_chain_ansible_dry_run_only( + execution_section: dict[str, Any], + facts: dict[str, Any], +) -> bool: + ansible = execution_section.get("ansible") + if not isinstance(ansible, dict): + return False + return ( + _safe_int(ansible.get("check_mode_total")) > 0 + and _safe_int(ansible.get("apply_total")) == 0 + and not bool(ansible.get("applied")) + and not bool(ansible.get("controlled_apply")) + and _safe_int(facts.get("auto_repair_execution_records")) == 0 + ) + + def _latest_remediation_history_item( history: dict[str, Any] | None, ) -> dict[str, Any]: @@ -4795,6 +4811,12 @@ def _build_awooop_status_chain( mcp_section = _status_chain_mcp_section(truth_chain) execution_section = _status_chain_execution_section(truth_chain) + ansible_dry_run_only = _status_chain_ansible_dry_run_only(execution_section, facts) + if ansible_dry_run_only: + verdict = "ansible_check_mode_only" + repair_state = "ansible_check_mode_only" + next_step = "owner_review_apply_gate_or_create_verifier_plan" + needs_human = True source_section = _status_chain_source_section(truth_chain) if source_correlation is not None: source_section["correlation"] = source_correlation @@ -4809,7 +4831,23 @@ def _build_awooop_status_chain( if fetch_error: blockers.append("truth_chain_fetch_failed") outcome = {} - if isinstance(quality.get("operator_outcome"), dict): + if ansible_dry_run_only: + outcome_quality = dict(quality) + outcome_facts = dict(facts) + ansible = execution_section.get("ansible") if isinstance(execution_section.get("ansible"), dict) else {} + outcome_facts["ansible_check_mode_total"] = _safe_int(ansible.get("check_mode_total")) + outcome_facts["ansible_apply_total"] = _safe_int(ansible.get("apply_total")) + outcome_quality["facts"] = outcome_facts + outcome_quality["verdict"] = "ansible_check_mode_only" + outcome_quality.pop("operator_outcome", None) + outcome = build_operator_outcome( + truth_status=truth_status, + automation_quality=outcome_quality, + remediation_state=remediation_state, + fetch_error=fetch_error, + source_id=source_id, + ) + elif isinstance(quality.get("operator_outcome"), dict): outcome = dict(quality["operator_outcome"]) else: outcome = build_operator_outcome( @@ -4849,6 +4887,7 @@ def _build_awooop_status_chain( "latest_mode": latest.get("mode"), "latest_at": latest.get("created_at"), "latest_preview": latest.get("verification_result_preview"), + "ansible_dry_run_only": ansible_dry_run_only, }, "writes": { "incident": latest.get("writes_incident_state"), diff --git a/apps/api/src/services/telegram_gateway.py b/apps/api/src/services/telegram_gateway.py index c398cc9b..efba14c5 100644 --- a/apps/api/src/services/telegram_gateway.py +++ b/apps/api/src/services/telegram_gateway.py @@ -1248,6 +1248,20 @@ def _has_nonrepair_operation_evidence(facts: dict[str, object]) -> bool: ) +def _ansible_dry_run_only_from_execution( + execution: dict[str, object], + facts: dict[str, object], +) -> bool: + ansible = execution.get("ansible") if isinstance(execution.get("ansible"), dict) else {} + return ( + _safe_int(ansible.get("check_mode_total")) > 0 + and _safe_int(ansible.get("apply_total")) == 0 + and not bool(ansible.get("applied")) + and not bool(ansible.get("controlled_apply")) + and _safe_int(facts.get("auto_repair_execution_records")) == 0 + ) + + def _bool_code(value: object, *, unknown_when_none: bool = False) -> str: if value is None and unknown_when_none: return "unknown" @@ -1308,10 +1322,30 @@ def _format_awooop_status_chain_lines( has_repair_execution = _has_repair_execution_evidence(facts) has_nonrepair_operation = _has_nonrepair_operation_evidence(facts) + execution = _callback_reply_awooop_execution_snapshot(truth_chain) + ansible = execution.get("ansible") if isinstance(execution.get("ansible"), dict) else {} + ansible_considered = bool(ansible.get("considered")) + ansible_check_total = _safe_int(ansible.get("check_mode_total")) + ansible_apply_total = _safe_int(ansible.get("apply_total")) + ansible_latest_status = str(ansible.get("latest_status") or "--") + ansible_latest_operation = str(ansible.get("latest_operation_type") or "--") + ansible_latest_rc = str(ansible.get("latest_returncode") if ansible.get("latest_returncode") not in (None, "") else "--") + ansible_playbook = str( + ansible.get("latest_catalog_id") + or ansible.get("latest_playbook_path") + or "--" + ) + ansible_approval = str(ansible.get("approval_source") or "--") + ansible_dry_run_only = _ansible_dry_run_only_from_execution(execution, facts) + if ansible_dry_run_only: + verdict = "ansible_check_mode_only" if verdict == "auto_repaired_verified": repair_state = "auto_repaired_verified" next_step = "monitor_for_regression" + elif ansible_dry_run_only: + repair_state = "ansible_check_mode_only" + next_step = "owner_review_apply_gate_or_create_verifier_plan" elif has_repair_execution: repair_state = "executed_pending_verification" if verification == "missing" else "executed" next_step = "verify_execution_result" @@ -1341,28 +1375,23 @@ def _format_awooop_status_chain_lines( and repair_state != "auto_repaired_verified" ): needs_human = True + outcome_quality = quality + if ansible_dry_run_only: + outcome_quality = dict(quality) + outcome_facts = dict(facts) + outcome_facts["ansible_check_mode_total"] = ansible_check_total + outcome_facts["ansible_apply_total"] = ansible_apply_total + outcome_quality["facts"] = outcome_facts + outcome_quality["verdict"] = "ansible_check_mode_only" + outcome_quality.pop("operator_outcome", None) outcome = _operator_outcome_from_blocks( truth_status=truth_status, - quality=quality, + quality=outcome_quality, remediation_state=remediation_state, ) if outcome: needs_human = bool(needs_human or outcome.get("needs_human")) next_step = str(outcome.get("next_action") or next_step) - execution = _callback_reply_awooop_execution_snapshot(truth_chain) - ansible = execution.get("ansible") if isinstance(execution.get("ansible"), dict) else {} - ansible_considered = bool(ansible.get("considered")) - ansible_check_total = _safe_int(ansible.get("check_mode_total")) - ansible_apply_total = _safe_int(ansible.get("apply_total")) - ansible_latest_status = str(ansible.get("latest_status") or "--") - ansible_latest_operation = str(ansible.get("latest_operation_type") or "--") - ansible_latest_rc = str(ansible.get("latest_returncode") if ansible.get("latest_returncode") not in (None, "") else "--") - ansible_playbook = str( - ansible.get("latest_catalog_id") - or ansible.get("latest_playbook_path") - or "--" - ) - ansible_approval = str(ansible.get("approval_source") or "--") lines = [ "", @@ -2613,10 +2642,25 @@ def _callback_reply_awooop_status_chain_snapshot( has_repair_execution = _has_repair_execution_evidence(facts) has_nonrepair_operation = _has_nonrepair_operation_evidence(facts) + execution_snapshot = _callback_reply_awooop_execution_snapshot(truth_chain) + ansible = ( + execution_snapshot.get("ansible") + if isinstance(execution_snapshot.get("ansible"), dict) + else {} + ) + ansible_dry_run_only = _ansible_dry_run_only_from_execution( + execution_snapshot, + facts, + ) + if ansible_dry_run_only: + verdict = "ansible_check_mode_only" if verdict == "auto_repaired_verified": repair_state = "auto_repaired_verified" next_step = "monitor_for_regression" + elif ansible_dry_run_only: + repair_state = "ansible_check_mode_only" + next_step = "owner_review_apply_gate_or_create_verifier_plan" elif has_repair_execution: repair_state = "executed_pending_verification" if verification == "missing" else "executed" next_step = "verify_execution_result" @@ -2646,9 +2690,18 @@ def _callback_reply_awooop_status_chain_snapshot( and repair_state != "auto_repaired_verified" ): needs_human = True + outcome_quality = quality + if ansible_dry_run_only: + outcome_quality = dict(quality) + outcome_facts = dict(facts) + outcome_facts["ansible_check_mode_total"] = _safe_int(ansible.get("check_mode_total")) + outcome_facts["ansible_apply_total"] = _safe_int(ansible.get("apply_total")) + outcome_quality["facts"] = outcome_facts + outcome_quality["verdict"] = "ansible_check_mode_only" + outcome_quality.pop("operator_outcome", None) outcome = _operator_outcome_from_blocks( truth_status=truth_status, - quality=quality, + quality=outcome_quality, remediation_state=remediation_state, source_id=incident_id, ) @@ -2694,13 +2747,14 @@ def _callback_reply_awooop_status_chain_snapshot( "latest_mode": latest.get("mode"), "latest_at": latest.get("created_at"), "latest_preview": latest.get("verification_result_preview"), + "ansible_dry_run_only": ansible_dry_run_only, }, "writes": { "incident": latest.get("writes_incident_state"), "auto_repair": latest.get("writes_auto_repair_result"), }, "mcp": _callback_reply_awooop_mcp_snapshot(truth_chain), - "execution": _callback_reply_awooop_execution_snapshot(truth_chain), + "execution": execution_snapshot, "source_refs": _callback_reply_awooop_source_snapshot( truth_chain, source_correlation, diff --git a/apps/api/tests/test_awooop_operator_timeline_labels.py b/apps/api/tests/test_awooop_operator_timeline_labels.py index 83e17cc8..7b1dccca 100644 --- a/apps/api/tests/test_awooop_operator_timeline_labels.py +++ b/apps/api/tests/test_awooop_operator_timeline_labels.py @@ -1732,6 +1732,82 @@ def test_awooop_status_chain_surfaces_controlled_ansible_apply_proof() -> None: assert ansible["approval_source"] == "user_chat_approved_continue" +def test_awooop_status_chain_does_not_treat_ansible_check_mode_as_repair() -> None: + chain = _build_awooop_status_chain( + incident_ids=["INC-20260625-977E5F"], + source_id="INC-20260625-977E5F", + truth_chain={ + "truth_status": { + "current_stage": "execution_succeeded", + "stage_status": "success", + "needs_human": True, + "blockers": ["incident_open_after_successful_execution"], + }, + "automation_quality": { + "verdict": "execution_unverified", + "facts": { + "auto_repair_execution_records": 0, + "automation_operation_records": 2, + "effective_execution_records": 1, + "verification_result": None, + "mcp_gateway_total": 8, + "knowledge_entries": 0, + }, + "blockers": ["verification_recorded"], + "operator_outcome": { + "state": "execution_unverified_manual_required", + "needs_human": True, + "next_action": "run_or_review_post_execution_verification", + }, + }, + "execution": { + "automation_operation_log": [ + { + "operation_type": "ansible_check_mode_executed", + "status": "success", + "actor": "ansible_check_mode_worker", + "input_executor": "ansible", + "input_catalog_id": "ansible:188-ai-web", + "input_playbook_path": "infra/ansible/playbooks/188-ai-web-readonly.yml", + } + ], + "ansible": { + "considered": True, + "records": [ + { + "operation_type": "ansible_check_mode_executed", + "status": "success", + "actor": "ansible_check_mode_worker", + "catalog_id": "ansible:188-ai-web", + "playbook_path": "infra/ansible/playbooks/188-ai-web-readonly.yml", + "execution_mode": "check_mode", + "check_mode": True, + "apply_executed": False, + "returncode": 0, + } + ], + "candidate_catalog": {"candidates": []}, + }, + }, + }, + remediation_history={"total": 0}, + ) + + assert chain["verdict"] == "ansible_check_mode_only" + assert chain["repair_state"] == "ansible_check_mode_only" + assert chain["next_step"] == "owner_review_apply_gate_or_create_verifier_plan" + assert chain["needs_human"] is True + assert chain["evidence"]["ansible_dry_run_only"] is True + assert chain["operator_outcome"]["state"] == "dry_run_only_owner_review_required" + assert ( + chain["operator_outcome"]["execution_result"]["completion_status"] + == "dry_run_completed_no_apply" + ) + assert chain["execution"]["ansible"]["check_mode_total"] == 1 + assert chain["execution"]["ansible"]["apply_total"] == 0 + assert chain["execution"]["ansible"]["applied"] is False + + def test_awooop_status_chain_includes_source_provider_correlation() -> None: chain = _build_awooop_status_chain( incident_ids=["INC-20260520-4D1124"], diff --git a/apps/api/tests/test_operator_outcome.py b/apps/api/tests/test_operator_outcome.py index 7bd17bc4..630b4992 100644 --- a/apps/api/tests/test_operator_outcome.py +++ b/apps/api/tests/test_operator_outcome.py @@ -66,6 +66,35 @@ def test_operator_outcome_marks_unverified_execution_as_human_review() -> None: assert outcome["next_action"] == "run_or_review_post_execution_verification" +def test_operator_outcome_marks_ansible_check_mode_as_dry_run_only() -> None: + outcome = build_operator_outcome( + truth_status={ + "current_stage": "execution_succeeded", + "stage_status": "success", + "needs_human": False, + "blockers": [], + }, + automation_quality={ + "verdict": "ansible_check_mode_only", + "facts": { + "effective_execution_records": 1, + "auto_repair_execution_records": 0, + "ansible_check_mode_total": 1, + "ansible_apply_total": 0, + "verification_result": None, + }, + "blockers": [], + }, + ) + + assert outcome["state"] == "dry_run_only_owner_review_required" + assert outcome["needs_human"] is True + assert outcome["next_action"] == "owner_review_apply_gate_or_create_verifier_plan" + assert outcome["execution_result"]["completion_status"] == "dry_run_completed_no_apply" + assert outcome["execution_result"]["command_status"] == "check_mode_succeeded" + assert outcome["execution_result"]["repair_status"] == "not_executed" + + def test_operator_outcome_marks_verified_repair_as_result_only() -> None: outcome = build_operator_outcome( truth_status={ diff --git a/apps/api/tests/test_telegram_message_templates.py b/apps/api/tests/test_telegram_message_templates.py index 2b4f503c..3898b754 100644 --- a/apps/api/tests/test_telegram_message_templates.py +++ b/apps/api/tests/test_telegram_message_templates.py @@ -708,6 +708,74 @@ def test_awooop_status_chain_lines_do_not_treat_audit_ops_as_repair() -> None: assert "通知:telegram_sre_war_room,awooop_operator_console" in joined +def test_awooop_status_chain_lines_mark_ansible_check_mode_as_dry_run_only() -> None: + lines = telegram_gateway_module._format_awooop_status_chain_lines( + truth_chain={ + "truth_status": { + "current_stage": "execution_succeeded", + "stage_status": "success", + "needs_human": True, + "blockers": ["incident_open_after_successful_execution"], + }, + "automation_quality": { + "verdict": "execution_unverified", + "facts": { + "auto_repair_execution_records": 0, + "automation_operation_records": 2, + "effective_execution_records": 1, + "verification_result": None, + "mcp_gateway_total": 8, + "knowledge_entries": 0, + }, + "blockers": ["verification_recorded"], + "operator_outcome": { + "state": "execution_unverified_manual_required", + "needs_human": True, + "next_action": "run_or_review_post_execution_verification", + }, + }, + "execution": { + "automation_operation_log": [ + { + "operation_type": "ansible_check_mode_executed", + "status": "success", + "actor": "ansible_check_mode_worker", + "input_executor": "ansible", + "input_catalog_id": "ansible:188-ai-web", + } + ], + "ansible": { + "considered": True, + "records": [ + { + "operation_type": "ansible_check_mode_executed", + "status": "success", + "actor": "ansible_check_mode_worker", + "catalog_id": "ansible:188-ai-web", + "playbook_path": "infra/ansible/playbooks/188-ai-web-readonly.yml", + "execution_mode": "check_mode", + "check_mode": True, + "apply_executed": False, + "returncode": 0, + } + ], + "candidate_catalog": {"candidates": []}, + }, + }, + }, + remediation_history={"total": 0}, + ) + + joined = "\n".join(lines) + assert "ansible_check_mode_only" in joined + assert "owner_review_apply_gate_or_create_verifier_plan" in joined + assert "dry_run_only_owner_review_required" in joined + assert "dry_run_completed_no_apply" in joined + assert "只完成 Ansible check-mode 乾跑,尚未執行修復" in joined + assert "executed_pending_verification" not in joined + assert "run_or_review_post_execution_verification" not in joined + + def test_awooop_agent_evidence_lines_show_mcp_source_execution_playbook_km() -> None: """Telegram 詳情/歷史要像前端一樣顯示五段 AI Agent 證據鏈。""" lines = telegram_gateway_module._format_awooop_agent_evidence_lines( diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 9c4823f9..ff8843be 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,27 @@ +## 2026-06-25|Ansible check-mode 乾跑不再誤標為已修復 + +**背景**:使用者貼出的 `node-exporter-188` Telegram 告警顯示已批准後仍像「沒有自動化」。正式 API 讀回 `INC-20260625-977E5F` 時發現更精準的真相:系統有 Ansible `check_mode` 證據,`check_mode_total=1`、`apply_total=0`、`auto_repair_execution_records=0`,但 AwoooP status chain 仍把它標成 `execution_succeeded / executed_pending_verification`。這會讓 operator 誤以為 AI 已執行修復,只缺 verifier;實際上它只是乾跑成功,尚未進入 apply / verifier。 + +**完成**: +- `operator_outcome` 新增 `dry_run_only_owner_review_required` 狀態。 +- AwoooP status chain 新增 `ansible_check_mode_only` 判定:當 Ansible 只有 `check_mode_total>0` 且 `apply_total=0`、`applied=false`、`controlled_apply=false`、`auto_repair_execution_records=0` 時,不再顯示 `executed_pending_verification`。 +- Telegram 詳情 / 歷史與 callback snapshot 同步套用同一判定,下一步改為 `owner_review_apply_gate_or_create_verifier_plan`。 +- status chain evidence 新增 `ansible_dry_run_only=true`,讓前端 / Telegram / API 能清楚分辨「乾跑完成」與「修復已執行」。 + +**驗證**: +- `python3 -m py_compile apps/api/src/services/operator_outcome.py apps/api/src/services/platform_operator_service.py apps/api/src/services/telegram_gateway.py` 通過。 +- `DATABASE_URL=sqlite:///tmp/awoooi-test.db pytest apps/api/tests/test_operator_outcome.py apps/api/tests/test_awooop_operator_timeline_labels.py apps/api/tests/test_telegram_message_templates.py -q`:`150 passed`。 +- `git diff --check` 通過。 +- `python3 scripts/security/source-control-owner-response-guard.py --root .`:`SOURCE_CONTROL_OWNER_RESPONSE_GUARD_OK`。 +- `python3 scripts/security/security-mirror-progress-guard.py --root .`:`SECURITY_MIRROR_PROGRESS_GUARD_OK`。 + +**完成度同步**: +- Ansible check-mode truth-chain 語意修正:`100%`。 +- Telegram / AwoooP 乾跑誤導修正:source-side `100%`,production deploy 待本段推送後驗證。 +- AI 自動化 verified repair 成功率:不因本段提高,仍需以真正 apply + verifier + KM / PlayBook writeback 計算。 + +**邊界**:本段只修狀態鏈與 operator outcome;沒有執行 Ansible apply、沒有 restart、沒有 SSH 主機修改、沒有發 Telegram、沒有開 runtime gate、沒有把 check-mode 當修復完成。 + ## 2026-06-25|110 orphan Chrome 精準清理與重啟 SOP v1.59 證據同步 **背景**:20:23 post-start quick check 仍顯示 110 load 偏高。只讀 `ps` / `vmstat` 分流後確認兩組 `stockplatform-review-bulk-ux` headless Chrome process group 已跑約 5 小時,root Chrome process `PPID=1`,沒有活躍測試父程序;GPU process 各吃約 96% CPU,renderer 各約 22% CPU。這符合 SOP 內「orphan browser smoke」分流條件,不是 Docker、Nginx、K3s、MOMO、Harbor、Sentry 或 Wazuh 服務事故。 diff --git a/docs/workplans/2026-06-25-awoooi-navigation-ia-consolidation.md b/docs/workplans/2026-06-25-awoooi-navigation-ia-consolidation.md index f07c4e48..af9a5c81 100644 --- a/docs/workplans/2026-06-25-awoooi-navigation-ia-consolidation.md +++ b/docs/workplans/2026-06-25-awoooi-navigation-ia-consolidation.md @@ -152,12 +152,24 @@ | 驗證 | JSON parse、i18n mirror、web typecheck、diff check 通過 | | 邊界 | 只讀狀態顯示;不新增執行、重啟、Telegram send 或 PlayBook apply 入口 | +## 4.8 2026-06-25 Ansible check-mode 乾跑 truth-chain 修正 + +| 項目 | 結果 | +|---|---| +| 核心修正 | `INC-20260625-977E5F` 類 Ansible `check_mode` 乾跑不再被 AwoooP / Telegram 誤標為已執行修復 | +| 判定條件 | `check_mode_total>0`、`apply_total=0`、`applied=false`、`controlled_apply=false`、`auto_repair_execution_records=0` | +| 新狀態 | `ansible_check_mode_only`、`dry_run_only_owner_review_required` | +| 下一步 | `owner_review_apply_gate_or_create_verifier_plan` | +| 產品含義 | Runs / Approvals / Telegram 必須把「乾跑完成」與「修復已套用」分成不同階段;後續 Situation Strip 也要把 dry-run、apply、verifier、KM / PlayBook writeback 分層 | +| 驗證 | `py_compile` 通過;targeted API / Telegram tests `150 passed`;diff check 與 security guards 通過 | +| 邊界 | 不執行 Ansible apply、不新增主機命令、不發 Telegram、不開 runtime gate;本段只修 truth-chain 與 operator outcome 語意 | + ## 5. 下一輪必做 | 優先級 | 工作 | 驗收 | |---|---|---| -| P0 | AwoooP Runs 共用 Situation Strip / Agent Flow / Action Rail | Recurrence Work Item status chip 已完成;下一步補首屏 Situation Strip,讓 Runs 一眼看懂卡點、owner、補救試跑、verifier 與下一步 | -| P0 | Repair candidate draft readback 串接 | 後端已拆出 `draft_ready_for_owner_review`,Runs 已顯示 Work Item 草案狀態;下一步 Work Items / KB 顯示草案 ID、owner、rollback、verifier、KM / PlayBook / script / schedule 資產狀態 | +| P0 | AwoooP Runs 共用 Situation Strip / Agent Flow / Action Rail | Recurrence Work Item status chip 與 Ansible dry-run truth-chain 已完成;下一步補首屏 Situation Strip,讓 Runs 一眼看懂卡點、owner、dry-run、apply、verifier 與下一步 | +| P0 | Repair candidate draft readback 串接 | 後端已拆出 `draft_ready_for_owner_review` 與 `ansible_check_mode_only`;下一步 Work Items / KB 顯示草案 ID、owner、rollback、verifier、KM / PlayBook / script / schedule 資產狀態 | | P0 | Tenants 舊表格 responsive 化 | route / source 已完成卡片化;下一步處理租戶資料表 drawer 與產品 topology drilldown | | P0 | Observability topology | 主機 / 服務 / 網站 / 告警 / SLO 關聯可視化 | | P0 | Knowledge / Automation trust ledger | KM、PlayBook、腳本、排程、dry-run、verifier 有統一沉澱面板 | diff --git a/docs/workplans/2026-06-25-awoooi-product-uiux-inventory.md b/docs/workplans/2026-06-25-awoooi-product-uiux-inventory.md index dae797ce..52d81dbd 100644 --- a/docs/workplans/2026-06-25-awoooi-product-uiux-inventory.md +++ b/docs/workplans/2026-06-25-awoooi-product-uiux-inventory.md @@ -200,6 +200,23 @@ Tenants 目前已讀到: 完成度同步:AwoooP Runs 可判讀性 `62% -> 65%`;全站 UI/UX 專業化 `55% -> 56%`;AwoooP AI 自動化真相鏈仍 `64%`。 +### 2.5.6 Ansible check-mode 乾跑不再誤標為已修復 + +2026-06-25 正式 API 讀回 `INC-20260625-977E5F` 時確認:`node-exporter-188` 類告警已出現 Ansible `check_mode` 證據,但實際只有乾跑,沒有 apply,也沒有 verifier。舊狀態鏈把它顯示成 `execution_succeeded / executed_pending_verification`,會讓 operator 誤以為 AI 已經執行修復,實際上只是完成安全乾跑。 + +| 項目 | 完成 | +|---|---| +| 真相來源 | `check_mode_total=1`、`apply_total=0`、`auto_repair_execution_records=0` | +| 狀態鏈 | 新增 `ansible_check_mode_only`,不再顯示 `executed_pending_verification` | +| Operator outcome | 新增 `dry_run_only_owner_review_required` | +| 下一步 | `owner_review_apply_gate_or_create_verifier_plan` | +| API evidence | `ansible_dry_run_only=true` | +| Telegram / Callback | 詳情、歷史與 callback snapshot 套用同一判定,避免 TG 把 dry-run 講成已修復 | +| 驗證 | `py_compile` 通過;`DATABASE_URL=sqlite:///tmp/awoooi-test.db pytest apps/api/tests/test_operator_outcome.py apps/api/tests/test_awooop_operator_timeline_labels.py apps/api/tests/test_telegram_message_templates.py -q`:`150 passed` | +| 邊界 | 不執行 Ansible apply、不 restart、不 SSH、不發 Telegram、不開 runtime gate;只修正狀態鏈語意 | + +完成度同步:AwoooP status-chain 真相語意 `64% -> 67%`;Telegram / AwoooP 乾跑誤導修正 source-side `100%`;真正 AI 自動化 verified repair 成功率不提高,仍需以 apply + verifier + KM / PlayBook writeback 計算。 + ## 3. 頁面 UI/UX 現況盤點 2026-06-25 對正式站桌機 / mobile 抽查: @@ -207,7 +224,7 @@ Tenants 目前已讀到: | 頁面 | 現況 | 主要問題 | 優先級 | |---|---|---|---| | `/zh-TW/awooop` | Google Ads 式 shell 已存在,AwoooP 概覽與 AI 自動化真相帶可見 | 仍需把 Runs / Work Items / Approvals 的卡點與 owner SOP rail 串回同一首屏;後續要減少下方長文字卡 | P0 | -| `/zh-TW/awooop/runs` | Runs 狀態鏈、provider evidence 與 recurrence Work Item 草案狀態 chip 可見 | mobile 有長字串、內部 provider URL / key 類資訊外露風險;仍需補 Situation Strip / Agent Flow / Action Rail 才能一眼分辨「AI 已做什麼、卡哪裡、下一步誰做」 | P0 | +| `/zh-TW/awooop/runs` | Runs 狀態鏈、provider evidence、recurrence Work Item 草案狀態 chip 可見;後端已能分辨 Ansible check-mode 乾跑與真正修復執行 | mobile 有長字串、內部 provider URL / key 類資訊外露風險;仍需補 Situation Strip / Agent Flow / Action Rail 才能一眼分辨「乾跑、已套用、待 verifier、需 owner apply gate」 | P0 | | `/zh-TW/awooop/work-items` | 工作項目、repair candidate 與 operator SOP rail 可見;已能一眼看到人工卡點、候選品質、沉澱資產與負責人接手 | 底部長表格仍需 responsive data grid / drawer 化;每筆 item 還要補更細的 owner、rollback、verifier receipt 與狀態回寫 | P0 | | `/zh-TW/awooop/approvals` | approval 狀態鏈、AI evidence 與審批決策 handoff rail 可見;已能一眼看到阻塞、證據、接手包與安全閘門;後端已能把 prefilled host-service 草案標成 owner-review-ready | 表格仍需 drawer 化;Runs / Work Items 仍要讀回 draft-ready 狀態、owner、rollback、verifier receipt 與資產 ID | P0 | | `/zh-TW/awooop/tenants` | 16 產品、31 路由、10 repo 資產可讀;作戰圖與 route/source 手機卡片已上線 | 下一步補產品拓樸 drilldown 與租戶資料表 drawer | P0 | @@ -311,7 +328,7 @@ AWOOOI 缺的是: |---|---|---| | P0-UX-001 | 凍結目前資訊架構,不再往頁面塞新文字卡 | 新增 UI 變更需指定進 Situation / Flow / Matrix / Topology / Evidence / Action 其中一層 | | P0-UX-002 | AwoooP Runs 移除 raw internal provider URL / key 顯示 | 正式 DOM 不出現 LAN IP / raw provider endpoint;改顯 provider alias、owner、route order、fallback reason | -| P0-UX-003 | Work Items / Approvals / Runs 加人工處置 SOP rail | Work Items 與 Approvals 首屏 rail 已上線;下一步 Runs 要同樣顯示 owner、why blocked、safe next action、evidence missing、rollback / verifier status | +| P0-UX-003 | Work Items / Approvals / Runs 加人工處置 SOP rail | Work Items 與 Approvals 首屏 rail 已上線;下一步 Runs 要同樣顯示 owner、why blocked、safe next action、dry-run / apply / verifier 差異、evidence missing、rollback / verifier status | | P0-UX-004 | Tenants 改成全產品 Coverage Heatmap + Topology | 第一刀已上線;下一步移除舊寬表格依賴,改成 responsive cards / drawer | | P0-UX-005 | Observability 修 `0% / -- / error` 語義 | 所有 0 要分成 no data、not connected、blocked、healthy zero、stale、error | | P0-UX-006 | Knowledge Base 修讀取狀態與沉澱總帳 | 顯示 KM / PlayBook / scripts / schedules / owner / stale / trust score,不再像資料消失 | @@ -395,7 +412,7 @@ AWOOOI 缺的是: |---|---:| | 全站 UI/UX 專業化 | `56%` | | 導航 / IA 整合 | `64%` | -| AwoooP 操作台產品化 | `65%` | +| AwoooP 操作台產品化 | `66%` | | Tenants 全產品資產中心 | `68%` | | Observability 專業拓樸 / 告警中心 | `38%` | | Knowledge / PlayBook 沉澱可視化 | `34%` |