fix(api): 補修復候選人工草案包
All checks were successful
CD Pipeline / tests (push) Successful in 1m28s
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / build-and-deploy (push) Successful in 4m0s
CD Pipeline / post-deploy-checks (push) Successful in 1m48s

This commit is contained in:
Your Name
2026-06-11 18:23:49 +08:00
parent 99efc62745
commit febe9ecfcd
5 changed files with 238 additions and 2 deletions

View File

@@ -595,6 +595,10 @@ async def _push_to_telegram_background(
fingerprint: str = "",
# P2.4 中間態清理 2026-04-24 ogt + Claude Sonnet 4.6
placeholder_message_id: int | None = None,
# 2026-06-11 Codex: 修復候選阻擋時,把下一步與草案欄位直接帶到 Telegram 卡片。
repair_candidate_blocker_summary: str = "",
repair_candidate_next_step: str = "",
repair_candidate_required_fields: list[str] | None = None,
) -> None:
"""
背景任務: 推送待簽核卡片到 Telegram (v7.0 含 SignOz 整合)
@@ -688,6 +692,9 @@ async def _push_to_telegram_background(
# ADR-075 斷點 B 修復: 傳入分類以啟用動態按鈕
alert_category=alert_category,
notification_type=notification_type,
repair_candidate_blocker_summary=repair_candidate_blocker_summary,
repair_candidate_next_step=repair_candidate_next_step,
repair_candidate_required_fields=repair_candidate_required_fields,
)
logger.info(
@@ -2316,9 +2323,17 @@ async def _process_new_alert_background(
repair_candidate_result.metadata.get("repair_candidate_blocker_summary")
or ", ".join(blockers)
)
next_step = str(
repair_candidate_result.metadata.get("repair_candidate_next_step")
or "建立人工處置包並補 PlayBook 草案欄位;完成 owner review 後再重跑候選生成。"
)
fallback_create = ApprovalRequestCreate(
action=f"NO_ACTION - REPAIR_CANDIDATE_MISSING: {blocker_text}",
description=f"[LLM Failed] {message}\n修復候選阻擋:{blocker_text}",
description=(
f"[LLM Failed] {message}\n"
f"修復候選阻擋:{blocker_text}\n"
f"下一步:{next_step}"
),
risk_level=RiskLevel.LOW,
blast_radius=BlastRadius(
affected_pods=1,
@@ -2331,6 +2346,11 @@ async def _process_new_alert_background(
name="MCP/PlayBook candidate gate",
passed=False,
message=blocker_text[:240],
),
DryRunCheck(
name="Repair PlayBook draft package",
passed=False,
message=next_step[:240],
)
],
requested_by="OpenClaw (fallback candidate gate)",
@@ -2338,7 +2358,9 @@ async def _process_new_alert_background(
metadata=_approval_metadata_cs4,
matched_playbook_id=_matched_playbook_id_cs4,
)
telegram_root_cause = f"LLM fallback 後未產生修復候選;阻擋:{blocker_text}"
telegram_root_cause = (
f"LLM fallback 後未產生修復候選;阻擋:{blocker_text};下一步:{next_step}"
)
primary_responsibility = "HUMAN"
approval = await service.create_approval_with_fingerprint(
@@ -2457,6 +2479,19 @@ async def _process_new_alert_background(
notification_type=notification_type,
alert_category=alert_category,
fingerprint=fingerprint,
repair_candidate_blocker_summary=str(
_approval_metadata_cs4.get("repair_candidate_blocker_summary") or ""
),
repair_candidate_next_step=str(
_approval_metadata_cs4.get("repair_candidate_next_step") or ""
),
repair_candidate_required_fields=(
_approval_metadata_cs4.get("repair_candidate_draft_package", {}).get(
"required_fields", []
)
if isinstance(_approval_metadata_cs4.get("repair_candidate_draft_package"), dict)
else []
),
)
except Exception as e:

View File

@@ -322,6 +322,14 @@ class RepairCandidateService:
metadata["repair_candidate_blocker_summary"] = self._humanize_blockers(
metadata["repair_candidate_blockers"]
)
draft_package = self._build_draft_package(
blockers=metadata["repair_candidate_blockers"],
playbook=playbook,
evidence=evidence,
)
metadata["playbook_draft_required"] = True
metadata["repair_candidate_next_step"] = draft_package["next_step"]
metadata["repair_candidate_draft_package"] = draft_package
metadata["fallback_action"] = fallback_action
return RepairCandidateResult(
evidence=evidence,
@@ -367,6 +375,93 @@ class RepairCandidateService:
}
return "".join(labels.get(blocker, blocker) for blocker in blockers)
def _build_draft_package(
self,
*,
blockers: list[str],
playbook: Playbook | None,
evidence: EvidenceSnapshot | None,
) -> dict[str, Any]:
"""Describe the concrete owner-review package needed to unblock repair.
The package is a handoff contract only. It must not be interpreted as
approval to mutate runtime state or auto-create an approved PlayBook.
"""
blocker_set = set(blockers)
if "incident_not_found" in blocker_set:
lane = "restore_truth_chain_before_repair"
next_step = "先修復 incident / approval 真相鏈綁定,再重跑 MCP evidence 與 PlayBook 匹配。"
elif "mcp_evidence_missing" in blocker_set:
lane = "rerun_mcp_evidence_collection"
next_step = (
"先按重診收集 MCP evidence成功後再建立服務專屬 PlayBook 草案,"
"禁止只憑通用規則批准修復。"
)
elif {
"playbook_not_matched",
"playbook_not_found",
"playbook_generic_fallback_not_repair",
} & blocker_set:
lane = "create_service_specific_repair_playbook"
next_step = (
"建立專屬 PlayBook 草案:綁定 alertname / target selector補 MCP evidence refs、"
"修復命令、rollback、verifier plan 與 owner review通用兜底不可執行。"
)
elif "playbook_observe_only" in blocker_set:
lane = "promote_diagnostic_to_repair_playbook"
next_step = (
"把診斷命令保留為 MCP evidence collector另建獨立修復步驟、rollback "
"與 verifier經 owner review 後才可進入批准。"
)
elif "playbook_command_not_safely_routable" in blocker_set:
lane = "route_command_through_safe_mcp_or_ansible"
next_step = (
"將命令改走 allowlisted MCP / Ansible route補 blast radius、rollback "
"與 verifier plan再送 owner review。"
)
elif {
"playbook_not_approved",
"playbook_trust_below_gate",
} & blocker_set:
lane = "owner_review_playbook_trust_gate"
next_step = (
"由 owner review PlayBook 狀態與 trust score補成功/失敗證據後才可進入修復候選。"
)
else:
lane = "repair_candidate_owner_review"
next_step = (
"建立人工處置包並補 PlayBook 草案欄位;完成 owner review 後再重跑候選生成。"
)
evidence_ref = None
if evidence and evidence.snapshot_id:
evidence_ref = evidence.snapshot_id
return {
"schema_version": "repair_candidate_draft_package_v1",
"status": "draft_required",
"lane": lane,
"next_step": next_step,
"matched_playbook_id": playbook.playbook_id if playbook else None,
"matched_playbook_name": playbook.name if playbook else None,
"evidence_snapshot_id": evidence_ref,
"required_fields": [
"alertname",
"target_selector",
"mcp_evidence_refs",
"repair_command",
"rollback_command",
"verifier_plan",
"owner_review",
],
"blocked_operations": [
"auto_execute",
"approve_no_action_as_repair",
"generic_fallback_repair",
],
}
def _build_description(
self,
*,

View File

@@ -270,6 +270,9 @@ def _format_manual_handoff_package_lines(
alert_category: str = "",
suggested_action: str | None = None,
verdict: str | None = None,
repair_candidate_blocker_summary: str = "",
repair_candidate_next_step: str = "",
repair_candidate_required_fields: list[str] | None = None,
compact: bool = False,
) -> list[str]:
"""Build a safe manual handoff package for no-action / degraded alerts.
@@ -287,6 +290,11 @@ def _format_manual_handoff_package_lines(
evidence_hint = _manual_evidence_hint(resource_name, alert_category)
incident_ref = incident_id or "--"
required_fields = [
str(field)
for field in (repair_candidate_required_fields or [])
if str(field).strip()
]
lines = [
"",
"🧰 <b>人工處置包</b>",
@@ -296,6 +304,25 @@ def _format_manual_handoff_package_lines(
"├ 3. 在 AwoooP 建立修復候選命令、風險、rollback、verifier、owner",
"└ 4. 修復後回寫execution result、verifier、KM / PlayBook trust",
]
insert_at = 3
if repair_candidate_blocker_summary:
lines.insert(
insert_at,
f"├ 阻擋:{html.escape(str(repair_candidate_blocker_summary)[:260])}",
)
insert_at += 1
if repair_candidate_next_step:
lines.insert(
insert_at,
f"├ 下一步:{html.escape(str(repair_candidate_next_step)[:360])}",
)
insert_at += 1
if required_fields:
field_text = ", ".join(required_fields[:7])
lines.insert(
insert_at,
f"├ PlayBook 草案欄位:<code>{html.escape(field_text)}</code>",
)
if not compact:
lines.append("按鈕:<b>處置包</b> 看完整證據,<b>重診</b> 重新收集,<b>Runs</b> 追蹤狀態")
return lines
@@ -1938,6 +1965,9 @@ class TelegramMessage:
automation_state: str = "" # diagnosis_collected_manual_required / diagnosis_failed_manual_required
automation_quality: dict | None = None # truth-chain automation_quality 摘要
remediation_summary: dict | None = None # ADR-100 read-only dry-run history 摘要
repair_candidate_blocker_summary: str = "" # 修復候選阻擋原因摘要
repair_candidate_next_step: str = "" # 修復候選阻擋後的下一步
repair_candidate_required_fields: list[str] | None = None # PlayBook 草案必填欄位
# ==========================================================================
# Phase 22: Nemotron 協作欄位 (ADR-044)
@@ -2117,6 +2147,9 @@ class TelegramMessage:
alert_category=self.alert_category,
suggested_action=self.suggested_action,
verdict=verdict,
repair_candidate_blocker_summary=self.repair_candidate_blocker_summary,
repair_candidate_next_step=self.repair_candidate_next_step,
repair_candidate_required_fields=self.repair_candidate_required_fields,
)
if not lines:
return ""
@@ -4126,6 +4159,10 @@ class TelegramGateway:
# 2026-04-16 ogt + Claude Sonnet 4.6: 修復鏈路顯示 (ADR-076)
playbook_name: str = "",
automation_state: str = "",
# 2026-06-11 Codex: no-action 修復候選阻擋時的人工處置包欄位。
repair_candidate_blocker_summary: str = "",
repair_candidate_next_step: str = "",
repair_candidate_required_fields: list[str] | None = None,
) -> dict:
"""
推送待簽核卡片到 Telegram (v7.0 含 SignOz 整合)
@@ -4232,6 +4269,9 @@ class TelegramGateway:
automation_state=automation_state,
automation_quality=automation_quality,
remediation_summary=remediation_summary,
repair_candidate_blocker_summary=repair_candidate_blocker_summary,
repair_candidate_next_step=repair_candidate_next_step,
repair_candidate_required_fields=repair_candidate_required_fields,
)
# 格式化訊息 — Phase 22: 如果 Nemotron 啟用,使用雙軌格式

View File

@@ -220,6 +220,54 @@ async def test_candidate_blocked_when_playbook_is_generic_fallback() -> None:
assert result.candidate_found is False
assert "playbook_generic_fallback_not_repair" in result.blockers
assert "通用兜底" in result.metadata["repair_candidate_blocker_summary"]
assert result.metadata["playbook_draft_required"] is True
assert result.metadata["repair_candidate_draft_package"]["schema_version"] == (
"repair_candidate_draft_package_v1"
)
assert result.metadata["repair_candidate_draft_package"]["lane"] == (
"create_service_specific_repair_playbook"
)
assert "建立專屬 PlayBook 草案" in result.metadata["repair_candidate_next_step"]
assert "repair_command" in result.metadata["repair_candidate_draft_package"]["required_fields"]
@pytest.mark.asyncio
async def test_candidate_blocked_observe_only_prompts_repair_playbook_draft() -> None:
incident = _incident()
playbook = _playbook(
"ssh 192.168.0.188 'uptime; ps aux --sort=-%cpu | head -20; docker stats --no-stream'",
risk_level=PlaybookRiskLevel.LOW,
)
playbook.repair_steps[0].action_type = ActionType.SSH_COMMAND
service = RepairCandidateService(
incident_service=FakeIncidentService(),
investigator=FakeInvestigator(_evidence(incident.incident_id)),
playbook_repository=FakePlaybookRepository(playbook),
auto_repair_service=FakeAutoRepairService(),
)
service._auto_repair = type(
"NoRouteAutoRepairService",
(),
{"preview_write_ssh_mcp_route": lambda self, incident, command: False},
)()
result = await service.build_from_incident(
incident=incident,
alertname="NodeExporterDown",
target_resource="node-exporter-188",
namespace="awoooi-prod",
message="node exporter is down",
fallback_action="NO_ACTION - REPAIR_CANDIDATE_MISSING",
matched_playbook_id="PB-REPAIR-001",
severity="medium",
)
assert result.candidate_found is False
assert "playbook_observe_only" in result.blockers
assert result.metadata["repair_candidate_draft_package"]["lane"] == (
"promote_diagnostic_to_repair_playbook"
)
assert "診斷命令保留為 MCP evidence collector" in result.metadata["repair_candidate_next_step"]
def test_approval_record_data_uses_preallocated_id_without_leaking_metadata() -> None:

View File

@@ -41,6 +41,20 @@ def test_repair_candidate_missing_card_exposes_manual_handoff_package() -> None:
primary_responsibility="INFRA",
confidence=0.0,
alert_category="host_resource",
repair_candidate_blocker_summary="只命中通用兜底 PlayBook禁止當成修復命令",
repair_candidate_next_step=(
"建立專屬 PlayBook 草案:綁定 alertname / target selector補 MCP evidence refs、"
"修復命令、rollback、verifier plan 與 owner review。"
),
repair_candidate_required_fields=[
"alertname",
"target_selector",
"mcp_evidence_refs",
"repair_command",
"rollback_command",
"verifier_plan",
"owner_review",
],
)
body = message.format()
@@ -48,6 +62,10 @@ def test_repair_candidate_missing_card_exposes_manual_handoff_package() -> None:
assert "缺少可執行修復候選,已產生人工處置包" in body
assert "Mode<code>repair_candidate_missing_manual_handoff</code>" in body
assert "人工處置包" in body
assert "只命中通用兜底 PlayBook" in body
assert "建立專屬 PlayBook 草案" in body
assert "PlayBook 草案欄位" in body
assert "repair_command" in body
assert "補證據node_exporter target up" in body
assert "AwoooP 建立修復候選" in body
assert "按鈕:<b>處置包</b>" in body