diff --git a/apps/api/src/api/v1/telegram.py b/apps/api/src/api/v1/telegram.py index 5aa208c4..ba0d2808 100644 --- a/apps/api/src/api/v1/telegram.py +++ b/apps/api/src/api/v1/telegram.py @@ -130,6 +130,74 @@ async def _finalize_telegram_approval(approval, execution_triggered: bool) -> bo return _schedule_telegram_approved_execution(approval) +def _safe_dict(value) -> dict: + return value if isinstance(value, dict) else {} + + +def _safe_str(value) -> str: + return value if isinstance(value, str) else "" + + +def _safe_str_list(value) -> list[str]: + if not isinstance(value, list): + return [] + return [item for item in value if isinstance(item, str)] + + +def _build_no_action_manual_handoff_payload(approval) -> dict: + """Expose the next manual handoff state when approval has no executable repair. + + NO_ACTION approvals are intentionally blocked from executor scheduling, but + the operator still needs a concrete next state instead of a dead-end approval + receipt. Keep the payload redacted and focused on AwoooP work tracking. + """ + metadata = _safe_dict(getattr(approval, "metadata", None)) + package = _safe_dict(metadata.get("repair_candidate_draft_package")) + work_item = _safe_dict(package.get("awooop_work_item")) + + next_action = ( + _safe_str(package.get("next_step")) + or _safe_str(metadata.get("repair_candidate_next_step")) + or "open_repair_candidate_work_item_or_reanalyze" + ) + work_item_id = ( + _safe_str(work_item.get("work_item_id")) + or _safe_str(metadata.get("repair_candidate_work_item_id")) + ) + work_item_href = ( + _safe_str(work_item.get("work_item_url")) + or _safe_str(work_item.get("work_item_href")) + or _safe_str(metadata.get("repair_candidate_work_item_href")) + ) + blocker = ( + _safe_str(package.get("blocker")) + or _safe_str(metadata.get("repair_candidate_blocker_summary")) + or _safe_str(metadata.get("repair_candidate_status")) + or "repair_candidate_missing" + ) + + return { + "message": "ApprovedForManualHandoff", + "manual_handoff_required": True, + "manual_handoff_scheduled": True, + "manual_handoff_kind": "repair_candidate_draft", + "next_action": next_action, + "operator_guidance": ( + "此批准沒有執行命令;請開啟處置包或重診,補齊專屬 PlayBook、" + "rollback、verifier 與 owner review 後再進入執行 gate。" + ), + "work_item_id": work_item_id, + "work_item_href": work_item_href, + "repair_candidate_blocker": blocker, + "required_fields": _safe_str_list(package.get("required_fields")), + "blocked_operations": _safe_str_list(package.get("blocked_operations")), + "required_writebacks": _safe_str_list(package.get("required_writebacks")), + "automation_asset_requirements": package.get("automation_asset_requirements") + if isinstance(package.get("automation_asset_requirements"), list) + else [], + } + + async def _sync_telegram_rejection(approval_id: str) -> bool: """Keep Incident state aligned when an approval is rejected from Telegram.""" try: @@ -340,19 +408,18 @@ async def telegram_webhook( ) await _log_user_action("approve", True, getattr(approval, "incident_id", None)) - return { + response = { "ok": True, - "message": ( - "ApprovedWithoutExecution" - if execution_suppressed - else ("Approved" if execution_triggered else "Signed") - ), + "message": "Approved" if execution_triggered else "Signed", "approval_id": approval_id, "status": status_value, "execution_triggered": execution_triggered, "execution_scheduled": execution_scheduled, "execution_suppressed": execution_suppressed, } + if execution_suppressed: + response.update(_build_no_action_manual_handoff_payload(approval)) + return response elif action == "reject": approval, msg = await service.reject_approval( diff --git a/apps/api/src/services/repair_candidate_service.py b/apps/api/src/services/repair_candidate_service.py index d6de5fc6..68bb5c72 100644 --- a/apps/api/src/services/repair_candidate_service.py +++ b/apps/api/src/services/repair_candidate_service.py @@ -478,12 +478,56 @@ class RepairCandidateService: "rollback_command", "verifier_plan", "owner_review", + "script_or_ansible_ref", + "schedule_or_monitoring_rule_ref", + "km_update_plan", + "automation_asset_record", ] blocked_operations = [ "auto_execute", "approve_no_action_as_repair", "generic_fallback_repair", ] + automation_asset_requirements = [ + { + "asset_type": "KM", + "required_record": "incident_root_cause_and_resolution_summary", + "visibility": "knowledge_base", + "owner_review_required": True, + }, + { + "asset_type": "PlayBook", + "required_record": "service_specific_repair_steps_with_trust_policy", + "visibility": "awooop_work_items", + "owner_review_required": True, + }, + { + "asset_type": "ScriptOrAnsible", + "required_record": "safe_route_command_or_ansible_playbook_ref", + "visibility": "runs_and_work_items", + "owner_review_required": True, + }, + { + "asset_type": "ScheduleOrMonitoringRule", + "required_record": "recurrence_detection_or_alert_rule_gap", + "visibility": "observability_and_reports", + "owner_review_required": True, + }, + { + "asset_type": "Verifier", + "required_record": "success_failure_and_rollback_verification_plan", + "visibility": "incident_timeline", + "owner_review_required": True, + }, + ] + required_writebacks = [ + "incident_timeline_stage_update", + "execution_or_manual_handoff_result", + "verifier_result", + "km_update_draft", + "playbook_trust_update", + "automation_asset_inventory_record", + ] coverage_gap = self._build_coverage_gap( blockers=blockers, lane=lane, @@ -532,6 +576,8 @@ class RepairCandidateService: "reason": ",".join(blockers), "next_step": next_step, "required_fields": required_fields, + "automation_asset_requirements": automation_asset_requirements, + "required_writebacks": required_writebacks, "coverage_gap": coverage_gap, "playbook_draft_template": playbook_draft_template, "blocked_operations": blocked_operations, @@ -558,6 +604,8 @@ class RepairCandidateService: "matched_playbook_name": playbook.name if playbook else None, "evidence_snapshot_id": evidence_ref, "required_fields": required_fields, + "automation_asset_requirements": automation_asset_requirements, + "required_writebacks": required_writebacks, "coverage_gap": coverage_gap, "playbook_draft_template": playbook_draft_template, "blocked_operations": blocked_operations, @@ -605,6 +653,9 @@ class RepairCandidateService: "confirm_fingerprint_recurrence_stops_or_decreases", "write_execution_result_and_verifier_outcome", "update_km_and_playbook_trust_after_owner_review", + "record_script_or_ansible_asset_ref", + "record_schedule_or_monitoring_rule_gap", + "publish_assets_to_runs_work_items_and_knowledge_base", ] if target_kind == "k8s_workload": @@ -751,6 +802,10 @@ class RepairCandidateService: "verifier_plan", "owner_review_record", "trust_score_update_policy", + "script_or_ansible_ref", + "schedule_or_alert_rule_ref", + "automation_asset_record", + "dashboard_visibility_refs", ], "blocked_operations": [ "auto_execute", diff --git a/apps/api/src/services/telegram_gateway.py b/apps/api/src/services/telegram_gateway.py index d6513ec0..00782463 100644 --- a/apps/api/src/services/telegram_gateway.py +++ b/apps/api/src/services/telegram_gateway.py @@ -304,7 +304,8 @@ def _format_manual_handoff_package_lines( f"├ 1. 開 Runs / 真相鏈確認 {html.escape(incident_ref)} 仍在 firing 或 recurrence", f"├ 2. 補證據:{html.escape(evidence_hint)}", "├ 3. 在 AwoooP 建立修復候選:命令、風險、rollback、verifier、owner", - "└ 4. 修復後回寫:execution result、verifier、KM / PlayBook trust", + "├ 4. 沉澱資產:KM、PlayBook、腳本/Ansible、排程/監控規則、Verifier 結果", + "└ 5. 頁面可查:Runs、Work Items、Knowledge Base 要顯示資產 ID、owner、狀態與下一步", ] insert_at = 3 if repair_candidate_blocker_summary: @@ -8950,7 +8951,7 @@ class TelegramGateway: ) if no_action_approval: status_emoji = "🟠" - suffix = "已記錄;此卡沒有可執行修復,等待補修復候選" + suffix = "已轉人工處置包;請按處置包或重診補修復候選,這不是執行中" else: suffix = "⚡ 執行中..." if execution_triggered else "已簽核,等待更多簽核" else: diff --git a/apps/api/tests/test_repair_candidate_service.py b/apps/api/tests/test_repair_candidate_service.py index 4b3805e1..8a99049d 100644 --- a/apps/api/tests/test_repair_candidate_service.py +++ b/apps/api/tests/test_repair_candidate_service.py @@ -234,10 +234,24 @@ async def test_candidate_blocked_when_playbook_is_generic_fallback() -> None: result.metadata["repair_candidate_next_step"] ) assert "repair_command" in result.metadata["repair_candidate_draft_package"]["required_fields"] + assert "script_or_ansible_ref" in result.metadata["repair_candidate_draft_package"]["required_fields"] + assert "automation_asset_record" in result.metadata["repair_candidate_draft_package"]["required_fields"] + assert "km_update_draft" in result.metadata["repair_candidate_draft_package"]["required_writebacks"] + asset_requirements = result.metadata["repair_candidate_draft_package"][ + "automation_asset_requirements" + ] + assert [item["asset_type"] for item in asset_requirements] == [ + "KM", + "PlayBook", + "ScriptOrAnsible", + "ScheduleOrMonitoringRule", + "Verifier", + ] coverage_gap = result.metadata["repair_candidate_draft_package"]["coverage_gap"] assert coverage_gap["schema_version"] == "repair_candidate_coverage_gap_v1" assert coverage_gap["coverage_key"] == "unknownalert:awoooi-api" assert coverage_gap["blocking_stage"] == "service_playbook_coverage" + assert "automation_asset_record" in coverage_gap["playbook_template_fields"] assert coverage_gap["next_owner_lane"] == "create_service_specific_repair_playbook" assert coverage_gap["mcp_evidence_ready"] is True assert coverage_gap["runtime_execution_authorized"] is False diff --git a/apps/api/tests/test_telegram_message_templates.py b/apps/api/tests/test_telegram_message_templates.py index 6c60c503..d7e5888e 100644 --- a/apps/api/tests/test_telegram_message_templates.py +++ b/apps/api/tests/test_telegram_message_templates.py @@ -1190,7 +1190,8 @@ class TestTelegramMessageFormat: assert "人工處置包" in result assert "補證據:node_exporter target up" in result assert "AwoooP 建立修復候選" in result - assert "execution result、verifier、KM / PlayBook trust" in result + assert "沉澱資產:KM、PlayBook、腳本/Ansible、排程/監控規則、Verifier 結果" in result + assert "Runs、Work Items、Knowledge Base 要顯示資產 ID、owner、狀態與下一步" in result assert "等待人工批准" not in result def test_telegram_message_diagnosis_state_is_not_auto_repair(self): diff --git a/apps/api/tests/test_telegram_webhook_execution_handoff.py b/apps/api/tests/test_telegram_webhook_execution_handoff.py index c0e981bc..e16a6141 100644 --- a/apps/api/tests/test_telegram_webhook_execution_handoff.py +++ b/apps/api/tests/test_telegram_webhook_execution_handoff.py @@ -125,6 +125,41 @@ async def test_telegram_approval_suppresses_executor_for_no_action(monkeypatch): status=SimpleNamespace(value="approved"), incident_id="INC-20260611-NOEXEC", action="NO_ACTION - REPAIR_CANDIDATE_MISSING: LLM 分析失敗", + metadata={ + "repair_candidate_blocker_summary": "只命中通用兜底 PlayBook", + "repair_candidate_draft_package": { + "next_step": "建立專屬 PlayBook 草案", + "required_fields": [ + "alertname", + "target_selector", + "repair_command", + "rollback_command", + "verifier_plan", + "owner_review", + ], + "required_writebacks": [ + "incident_timeline_stage_update", + "km_update_draft", + "playbook_trust_update", + ], + "automation_asset_requirements": [ + {"asset_type": "KM", "visibility": "knowledge_base"}, + {"asset_type": "PlayBook", "visibility": "awooop_work_items"}, + {"asset_type": "ScriptOrAnsible", "visibility": "runs_and_work_items"}, + ], + "blocked_operations": ["approve_no_action_as_repair"], + "awooop_work_item": { + "work_item_id": ( + "repair-candidate-draft:awoooi:INC-20260611-NOEXEC:" + "create_service_specific_repair" + ), + "work_item_url": ( + "https://awoooi.wooo.work/zh-TW/awooop/work-items?" + "project_id=awoooi&incident_id=INC-20260611-NOEXEC" + ), + }, + }, + }, ) finalizer_calls: list[dict] = [] op_log_repo = _FakeAlertOperationLogRepository() @@ -157,10 +192,30 @@ async def test_telegram_approval_suppresses_executor_for_no_action(monkeypatch): result = await telegram_api.telegram_webhook(_callback_update(f"approve:{approval_id}:ts:nonce")) assert result["ok"] is True - assert result["message"] == "ApprovedWithoutExecution" + assert result["message"] == "ApprovedForManualHandoff" assert result["execution_triggered"] is True assert result["execution_scheduled"] is False assert result["execution_suppressed"] is True + assert result["manual_handoff_required"] is True + assert result["manual_handoff_scheduled"] is True + assert result["manual_handoff_kind"] == "repair_candidate_draft" + assert result["next_action"] == "建立專屬 PlayBook 草案" + assert result["repair_candidate_blocker"] == "只命中通用兜底 PlayBook" + assert result["work_item_id"] == ( + "repair-candidate-draft:awoooi:INC-20260611-NOEXEC:" + "create_service_specific_repair" + ) + assert "project_id=awoooi" in result["work_item_href"] + assert "repair_command" in result["required_fields"] + assert result["blocked_operations"] == ["approve_no_action_as_repair"] + assert result["required_writebacks"] == [ + "incident_timeline_stage_update", + "km_update_draft", + "playbook_trust_update", + ] + assert result["automation_asset_requirements"][0]["asset_type"] == "KM" + assert result["automation_asset_requirements"][1]["visibility"] == "awooop_work_items" + assert "此批准沒有執行命令" in result["operator_guidance"] assert finalizer_calls == [{ "approval_id": approval_id, "execution_triggered": True,