From 67f437043ae4e81bfb28caaf3a0957bd4b502eb5 Mon Sep 17 00:00:00 2001 From: OG T Date: Wed, 15 Apr 2026 21:41:31 +0800 Subject: [PATCH] =?UTF-8?q?fix(prod):=20=E4=BF=AE=E5=BE=A9=E5=9B=9B?= =?UTF-8?q?=E5=80=8B=E7=94=9F=E7=94=A2=E8=87=B4=E5=91=BD=20bug=20=E2=80=94?= =?UTF-8?q?=20outcome=20=E5=AF=AB=E5=85=A5=20/=20OpenClaw=20/=20Telegram?= =?UTF-8?q?=20=E9=80=9A=E7=9F=A5=20/=20LLM=20=E8=A6=8F=E5=89=87=E9=A1=AF?= =?UTF-8?q?=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. decision_manager: 移除 UPDATE incidents 中的 verification_result 欄位 (incidents 表無此欄位 → 所有 outcome 寫入失敗 outcome_write_failed) 2. failure_watcher: get_openclaw_service → get_openclaw (函數名錯誤 → OpenClaw 分析全部 ImportError 崩潰) 3. failure_watcher: tg.send_message → tg.send_notification (TelegramGateway 無 send_message 方法 → 修復通知無法送出) 4. decision_manager: expert_analyze 補齊 initial_diagnosis / diagnosis_description key (openclaw.py 讀這兩個 key,但 expert_analyze 只有 matched_rule / description → LLM 永遠看到 Matched Rule=unknown,無法正確分析) 2026-04-15 ogt + Claude Sonnet 4.6(亞太): 生產緊急修復 Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/decision_manager.py | 10 ++++------ apps/api/src/services/failure_watcher.py | 8 ++++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/apps/api/src/services/decision_manager.py b/apps/api/src/services/decision_manager.py index e8dcbbae..999a7064 100644 --- a/apps/api/src/services/decision_manager.py +++ b/apps/api/src/services/decision_manager.py @@ -842,15 +842,11 @@ async def _push_auto_repair_result( from src.db.models import IncidentRecord from sqlalchemy import update as _upd_outcome _outcome = "auto_repaired" if success else "auto_repair_failed" - _verification = ( - f"自動修復{'成功' if success else '失敗'}:{action[:120] if action else '未知'}" - + (f" | 錯誤:{error[:80]}" if error else "") - ) async with get_db_context() as _odb: await _odb.execute( _upd_outcome(IncidentRecord) .where(IncidentRecord.incident_id == inc_id) - .values(outcome=_outcome, verification_result=_verification) + .values(outcome=_outcome) ) await _odb.commit() logger.info("outcome_written", incident_id=inc_id, outcome=_outcome) @@ -1018,13 +1014,15 @@ def expert_analyze(incident: Incident) -> dict[str, Any]: "source": "expert_system", "action": rule["action"].format(target=target), "description": rule["description"], + "diagnosis_description": rule["description"], # openclaw.py reads this key "risk_level": rule["risk_level"], "reasoning": f"[規則匹配] {rule['reasoning']}", # 明確標示來源 "confidence": 0.0, # 🔴 規則匹配不是 AI 仲裁,信心度設 0 "kubectl_command": rule["action"].format(target=target), "matched_rule": matched_rule, + "initial_diagnosis": matched_rule, # openclaw.py reads this key "from_cache": False, - "is_rule_based": True, # 新增標記 + "is_rule_based": True, } diff --git a/apps/api/src/services/failure_watcher.py b/apps/api/src/services/failure_watcher.py index c5426a36..bf00d4d9 100644 --- a/apps/api/src/services/failure_watcher.py +++ b/apps/api/src/services/failure_watcher.py @@ -569,9 +569,9 @@ class FailureWatcherService(IFailureWatcher): 整合 SignOz 監控數據提供更精準的 RCA。 """ try: - from src.services.openclaw import get_openclaw_service + from src.services.openclaw import get_openclaw - openclaw = get_openclaw_service() + openclaw = get_openclaw() # 建構告警上下文 alert_context = { @@ -738,7 +738,7 @@ class FailureWatcherService(IFailureWatcher): f"└ 💡 建議: {analysis.get('suggested_repair', '需人工分析')}\n\n" f"請在 Dashboard 授權或使用 /repair {audit_log_id[:8]}" ) - await tg.send_message(message) + await tg.send_notification(message) logger.info( "repair_request_sent", @@ -770,7 +770,7 @@ class FailureWatcherService(IFailureWatcher): f"├ 📋 AuditLog: {audit_log_id[:8]}...\n" f"└ 📝 結果: {repair_result}" ) - await tg.send_message(message) + await tg.send_notification(message) except Exception as e: logger.warning(