fix(webhook+decision): ADR-089 async webhook + 超時髒資料修復
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 10m16s
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 10m16s
P0 — Webhook async (ADR-089): - Alertmanager 收到告警立即回 202,不再同步等 90s LLM - 新增 _process_new_alert_background():LLM 分析/Approval/Incident/Telegram 全進背景 - 根治 Alertmanager Fallback 風暴(超時 → 重送 → 指數退避風暴) P1 — 超時髒資料 (decision_manager): - _package_to_proposal_data: blocked_reason 禁止進 desc_parts(禁進卡片) - _push_decision_to_telegram: suggested_action fallback 改「待分析」,禁止 description 流入 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1055,6 +1055,219 @@ def is_internal_ip(client_ip: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
async def _process_new_alert_background(
|
||||
alert_context: dict,
|
||||
alert_id: str,
|
||||
fingerprint: str,
|
||||
target_resource: str,
|
||||
namespace: str,
|
||||
alert_type: str,
|
||||
message: str,
|
||||
alertname: str,
|
||||
severity: str,
|
||||
alert_labels: dict,
|
||||
notification_type: str,
|
||||
alert_category: str,
|
||||
can_auto_repair: bool,
|
||||
) -> None:
|
||||
"""
|
||||
背景任務: LLM 分析 + Approval/Incident 建立 + Telegram 推送
|
||||
|
||||
ADR-089 (2026-04-17 ogt + Claude Sonnet 4.6):
|
||||
Alertmanager 收到告警後立即回傳 HTTP 202,
|
||||
所有 AI 辯證放入背景執行,避免 Alertmanager 等待 >90s 觸發 Fallback 風暴。
|
||||
"""
|
||||
try:
|
||||
service = get_approval_service()
|
||||
openclaw = get_openclaw()
|
||||
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await openclaw.analyze_alert(alert_context)
|
||||
|
||||
if analysis_result:
|
||||
risk_mapping = {
|
||||
"low": RiskLevel.LOW,
|
||||
"medium": RiskLevel.MEDIUM,
|
||||
"critical": RiskLevel.CRITICAL,
|
||||
}
|
||||
risk_level = risk_mapping.get(analysis_result.risk_level.value, RiskLevel.MEDIUM)
|
||||
|
||||
blast = analysis_result.blast_radius
|
||||
impact_mapping = {
|
||||
"NONE": DataImpact.NONE,
|
||||
"READ_ONLY": DataImpact.READ_ONLY,
|
||||
"WRITE": DataImpact.WRITE,
|
||||
"DESTRUCTIVE": DataImpact.DESTRUCTIVE,
|
||||
}
|
||||
data_impact = impact_mapping.get(blast.data_impact.value, DataImpact.NONE) if blast else DataImpact.NONE
|
||||
|
||||
approval_create = ApprovalRequestCreate(
|
||||
action=f"{analysis_result.action_title} | {analysis_result.kubectl_command}",
|
||||
description=f"[AI: {ai_provider}] {analysis_result.description}",
|
||||
risk_level=risk_level,
|
||||
blast_radius=BlastRadius(
|
||||
affected_pods=blast.affected_pods if blast else 1,
|
||||
estimated_downtime=blast.estimated_downtime if blast else "~30s",
|
||||
related_services=list(set((blast.related_services if blast else []) + analysis_result.affected_services)),
|
||||
data_impact=data_impact,
|
||||
),
|
||||
dry_run_checks=[
|
||||
DryRunCheck(name="AI 信心度", passed=analysis_result.confidence >= 0.7, message=f"{analysis_result.confidence:.0%}"),
|
||||
DryRunCheck(name="來源", passed=True, message="alertmanager"),
|
||||
],
|
||||
requested_by=f"OpenClaw ({ai_provider})",
|
||||
)
|
||||
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
request=approval_create,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
|
||||
incident_id = await create_incident_for_approval(
|
||||
approval_id=str(approval.id),
|
||||
risk_level=risk_level.value,
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
alert_type=alert_type,
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert_labels,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
|
||||
try:
|
||||
await service.update_incident_id(approval.id, incident_id)
|
||||
approval.incident_id = incident_id
|
||||
except Exception as _meta_err:
|
||||
logger.warning(
|
||||
"approval_incident_id_update_failed",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
error=str(_meta_err),
|
||||
)
|
||||
|
||||
root_cause = analysis_result.description or message
|
||||
estimated_downtime = blast.estimated_downtime if blast else "~30s"
|
||||
primary_responsibility = analysis_result.primary_responsibility or "COLLAB"
|
||||
confidence = analysis_result.confidence
|
||||
|
||||
_is_heartbeat = is_heartbeat_alertname(alertname)
|
||||
if _is_heartbeat:
|
||||
logger.info(
|
||||
"auto_repair_skipped_heartbeat",
|
||||
incident_id=incident_id,
|
||||
alertname=alertname,
|
||||
)
|
||||
|
||||
if can_auto_repair and not _is_heartbeat:
|
||||
await _try_auto_repair_background(
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
alert_type=alert_type,
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
)
|
||||
else:
|
||||
from src.repositories.alert_operation_log_repository import get_alert_operation_log_repository
|
||||
_op_log_rule = get_alert_operation_log_repository()
|
||||
await _op_log_rule.append(
|
||||
"GUARDRAIL_BLOCKED",
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
actor="prometheus-rule",
|
||||
action_detail=f"Prometheus rule 設定 auto_repair=false,強制人工審核: {alertname}",
|
||||
success=False,
|
||||
context={"alertname": alertname, "auto_repair_flag": False},
|
||||
)
|
||||
|
||||
await _push_to_telegram_background(
|
||||
approval_id=str(approval.id),
|
||||
risk_level=risk_level.value,
|
||||
resource_name=target_resource,
|
||||
root_cause=root_cause,
|
||||
suggested_action=(analysis_result.kubectl_command or "").strip() or analysis_result.suggested_action.value,
|
||||
estimated_downtime=estimated_downtime,
|
||||
hit_count=1,
|
||||
primary_responsibility=primary_responsibility,
|
||||
confidence=confidence,
|
||||
namespace=namespace,
|
||||
signoz_rps=signoz_metrics.rps if signoz_metrics else 0,
|
||||
signoz_rps_trend=signoz_metrics.rps_trend if signoz_metrics else "stable",
|
||||
signoz_error_rate=signoz_metrics.error_rate if signoz_metrics else 0,
|
||||
signoz_p99_latency=signoz_metrics.p99_latency_ms if signoz_metrics else 0,
|
||||
signoz_latency_trend=signoz_metrics.latency_trend if signoz_metrics else "stable",
|
||||
signoz_trace_url=signoz_trace_url or "",
|
||||
ai_tokens=ai_tokens,
|
||||
ai_cost=ai_cost,
|
||||
ai_provider=ai_provider,
|
||||
incident_id=incident_id,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
|
||||
record_alert_chain_success("alertmanager")
|
||||
|
||||
else:
|
||||
# LLM 失敗 - 使用預設值
|
||||
fallback_create = ApprovalRequestCreate(
|
||||
action="OBSERVE",
|
||||
description=f"[LLM Failed] {message}",
|
||||
risk_level=RiskLevel.MEDIUM,
|
||||
blast_radius=BlastRadius(
|
||||
affected_pods=1,
|
||||
estimated_downtime="unknown",
|
||||
related_services=[],
|
||||
data_impact=DataImpact.NONE,
|
||||
),
|
||||
dry_run_checks=[],
|
||||
requested_by="OpenClaw (fallback)",
|
||||
)
|
||||
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
request=fallback_create,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
|
||||
fallback_incident_id = await create_incident_for_approval(
|
||||
approval_id=str(approval.id),
|
||||
risk_level="medium",
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
alert_type=alert_type,
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert_labels,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
|
||||
await _push_to_telegram_background(
|
||||
approval_id=str(approval.id),
|
||||
risk_level="medium",
|
||||
resource_name=target_resource,
|
||||
root_cause=message,
|
||||
suggested_action="OBSERVE",
|
||||
estimated_downtime="unknown",
|
||||
hit_count=1,
|
||||
primary_responsibility="HUMAN",
|
||||
confidence=0.0,
|
||||
namespace=namespace,
|
||||
incident_id=fallback_incident_id,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"process_new_alert_background_error",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/alertmanager",
|
||||
response_model=AlertResponse,
|
||||
@@ -1375,11 +1588,10 @@ async def alertmanager_webhook(
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
# 新告警 - LLM 分析
|
||||
# ADR-089 (2026-04-17 ogt + Claude Sonnet 4.6): 新告警 — 背景 LLM 分析
|
||||
# 立即回傳 202,AI 辯證在背景非同步執行
|
||||
# 修復根因: 同步等待 90s LLM → Alertmanager 超時 → Fallback 風暴
|
||||
# ==========================================================================
|
||||
# 2026-04-16 ogt + Claude Sonnet 4.6: 修復 — alertname 置頂,LLM 才能知道是什麼告警
|
||||
# 舊版 alertname 埋在 labels 中,alert_type 永遠是 "custom"
|
||||
# → LLM 全部輸出「重啟 AWOOOI 服務」(見 INC-20260416-C365D0 postgres 磁碟告警事故)
|
||||
alert_context = {
|
||||
"alertname": alertname, # 主要識別符 — LLM 必讀
|
||||
"alert_category": alert_category, # kubernetes/database/storage/host_resource/ssl_cert
|
||||
@@ -1394,241 +1606,30 @@ async def alertmanager_webhook(
|
||||
"labels": alert.labels,
|
||||
}
|
||||
|
||||
# 2026-03-29 ogt: 加入 Token/Cost 追蹤
|
||||
openclaw = get_openclaw()
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await openclaw.analyze_alert(alert_context)
|
||||
background_tasks.add_task(
|
||||
_process_new_alert_background,
|
||||
alert_context=alert_context,
|
||||
alert_id=alert_id,
|
||||
fingerprint=fingerprint,
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
alert_type=alert_type,
|
||||
message=message,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
alert_labels=alert.labels,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
can_auto_repair=_can_auto_repair_by_rule,
|
||||
)
|
||||
|
||||
if analysis_result:
|
||||
# analysis_result 是 OpenClawDecision Pydantic 模型
|
||||
# 轉換風險等級
|
||||
risk_mapping = {
|
||||
"low": RiskLevel.LOW,
|
||||
"medium": RiskLevel.MEDIUM,
|
||||
"critical": RiskLevel.CRITICAL,
|
||||
}
|
||||
risk_level = risk_mapping.get(analysis_result.risk_level.value, RiskLevel.MEDIUM)
|
||||
|
||||
# 提取爆炸半徑
|
||||
blast = analysis_result.blast_radius
|
||||
impact_mapping = {
|
||||
"NONE": DataImpact.NONE,
|
||||
"READ_ONLY": DataImpact.READ_ONLY,
|
||||
"WRITE": DataImpact.WRITE,
|
||||
"DESTRUCTIVE": DataImpact.DESTRUCTIVE,
|
||||
}
|
||||
data_impact = impact_mapping.get(blast.data_impact.value, DataImpact.NONE) if blast else DataImpact.NONE
|
||||
|
||||
# 建立 ApprovalRequestCreate (同 /alerts 流程)
|
||||
approval_create = ApprovalRequestCreate(
|
||||
action=f"{analysis_result.action_title} | {analysis_result.kubectl_command}",
|
||||
description=f"[AI: {ai_provider}] {analysis_result.description}",
|
||||
risk_level=risk_level,
|
||||
blast_radius=BlastRadius(
|
||||
affected_pods=blast.affected_pods if blast else 1,
|
||||
estimated_downtime=blast.estimated_downtime if blast else "~30s",
|
||||
related_services=list(set((blast.related_services if blast else []) + analysis_result.affected_services)),
|
||||
data_impact=data_impact,
|
||||
),
|
||||
dry_run_checks=[
|
||||
DryRunCheck(name="AI 信心度", passed=analysis_result.confidence >= 0.7, message=f"{analysis_result.confidence:.0%}"),
|
||||
DryRunCheck(name="來源", passed=True, message="alertmanager"),
|
||||
],
|
||||
requested_by=f"OpenClaw ({ai_provider})",
|
||||
)
|
||||
|
||||
# 使用 create_approval_with_fingerprint (同 /alerts)
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
request=approval_create,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
|
||||
# ================================================================
|
||||
# Incident-Approval 同步 (鐵律: 必須同時創建)
|
||||
# ================================================================
|
||||
incident_id = await create_incident_for_approval(
|
||||
approval_id=str(approval.id),
|
||||
risk_level=risk_level.value,
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
alert_type=alert_type,
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert.labels, # Phase 1: 完整 labels 供 _extract_affected_services
|
||||
notification_type=notification_type, # ADR-073 Phase 2-2
|
||||
alert_category=alert_category, # ADR-073 Phase 2-2
|
||||
)
|
||||
|
||||
# 2026-04-06 ogt: Phase 26 — 回寫 incident_id 到 Approval
|
||||
# 這樣 Playbook 萃取和 KM 寫入才能找到對應的 Incident
|
||||
try:
|
||||
await service.update_incident_id(approval.id, incident_id)
|
||||
approval.incident_id = incident_id
|
||||
except Exception as _meta_err:
|
||||
logger.warning(
|
||||
"approval_incident_id_update_failed",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
error=str(_meta_err),
|
||||
)
|
||||
|
||||
root_cause = analysis_result.description or message
|
||||
estimated_downtime = blast.estimated_downtime if blast else "~30s"
|
||||
primary_responsibility = analysis_result.primary_responsibility or "COLLAB"
|
||||
confidence = analysis_result.confidence
|
||||
|
||||
# ================================================================
|
||||
# 2026-04-05 ogt: 自動修復評估 (ADR-058 閉環)
|
||||
# Incident 建立後立即評估是否可自動修復
|
||||
# P2 以下 + 高品質 Playbook + 低風險 → 背景自動執行
|
||||
# Sprint 5.1 Q9: auto_repair=false 旗標 → 強制 HITL,不觸發背景任務
|
||||
# (2026-04-08 Claude Sonnet 4.6 Asia/Taipei,ADR-062)
|
||||
# ================================================================
|
||||
# 2026-04-10 Claude Sonnet 4.6 Asia/Taipei: 心跳/看門狗告警不進飛輪
|
||||
# NoAlertsReceived2Hours 等代表監控系統狀態,不是服務故障
|
||||
_is_heartbeat = is_heartbeat_alertname(alertname)
|
||||
if _is_heartbeat:
|
||||
logger.info(
|
||||
"auto_repair_skipped_heartbeat",
|
||||
incident_id=incident_id,
|
||||
alertname=alertname,
|
||||
)
|
||||
|
||||
if _can_auto_repair_by_rule and not _is_heartbeat:
|
||||
background_tasks.add_task(
|
||||
_try_auto_repair_background,
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
alert_type=alert_type,
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
)
|
||||
else:
|
||||
# auto_repair=false → 記錄 GUARDRAIL_BLOCKED,不觸發自動修復
|
||||
from src.repositories.alert_operation_log_repository import get_alert_operation_log_repository
|
||||
_op_log_rule = get_alert_operation_log_repository()
|
||||
background_tasks.add_task(
|
||||
_op_log_rule.append,
|
||||
"GUARDRAIL_BLOCKED",
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
actor="prometheus-rule",
|
||||
action_detail=f"Prometheus rule 設定 auto_repair=false,強制人工審核: {alertname}",
|
||||
success=False,
|
||||
context={"alertname": alertname, "auto_repair_flag": False},
|
||||
)
|
||||
|
||||
# 推送 Telegram
|
||||
background_tasks.add_task(
|
||||
_push_to_telegram_background,
|
||||
approval_id=str(approval.id),
|
||||
risk_level=risk_level.value,
|
||||
resource_name=target_resource,
|
||||
root_cause=root_cause,
|
||||
suggested_action=(analysis_result.kubectl_command or "").strip() or analysis_result.suggested_action.value,
|
||||
estimated_downtime=estimated_downtime,
|
||||
hit_count=1,
|
||||
primary_responsibility=primary_responsibility,
|
||||
confidence=confidence,
|
||||
namespace=namespace,
|
||||
signoz_rps=signoz_metrics.rps if signoz_metrics else 0,
|
||||
signoz_rps_trend=signoz_metrics.rps_trend if signoz_metrics else "stable",
|
||||
signoz_error_rate=signoz_metrics.error_rate if signoz_metrics else 0,
|
||||
signoz_p99_latency=signoz_metrics.p99_latency_ms if signoz_metrics else 0,
|
||||
signoz_latency_trend=signoz_metrics.latency_trend if signoz_metrics else "stable",
|
||||
signoz_trace_url=signoz_trace_url or "",
|
||||
# 2026-03-29 ogt: AI Token/Cost 追蹤
|
||||
ai_tokens=ai_tokens,
|
||||
ai_cost=ai_cost,
|
||||
ai_provider=ai_provider,
|
||||
# 2026-04-08 ogt: 補傳 incident_id 以啟用詳情/重診/歷史按鈕
|
||||
incident_id=incident_id,
|
||||
# ADR-073: 路由 TYPE-4D → send_drift_card
|
||||
notification_type=notification_type,
|
||||
# ADR-075 斷點 E 修復: 路由 TYPE-8M → send_meta_alert
|
||||
alert_category=alert_category,
|
||||
)
|
||||
|
||||
record_alert_chain_success("alertmanager")
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message=f"✅ LLM 分析完成 (via {ai_provider})",
|
||||
alert_id=alert_id,
|
||||
approval_created=True,
|
||||
approval_id=str(approval.id),
|
||||
risk_level=risk_level.value,
|
||||
suggested_action=approval_create.action,
|
||||
hit_count=1,
|
||||
converged=False,
|
||||
)
|
||||
else:
|
||||
# LLM 失敗 - 使用預設值
|
||||
fallback_create = ApprovalRequestCreate(
|
||||
action="OBSERVE",
|
||||
description=f"[LLM Failed] {message}",
|
||||
risk_level=RiskLevel.MEDIUM,
|
||||
blast_radius=BlastRadius(
|
||||
affected_pods=1,
|
||||
estimated_downtime="unknown",
|
||||
related_services=[],
|
||||
data_impact=DataImpact.NONE,
|
||||
),
|
||||
dry_run_checks=[],
|
||||
requested_by="OpenClaw (fallback)",
|
||||
)
|
||||
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
request=fallback_create,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
|
||||
# ================================================================
|
||||
# Incident-Approval 同步 (鐵律: 即使 LLM 失敗也必須創建)
|
||||
# ================================================================
|
||||
fallback_incident_id = await create_incident_for_approval(
|
||||
approval_id=str(approval.id),
|
||||
risk_level="medium",
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
alert_type=alert_type,
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert.labels, # Phase 1: 完整 labels
|
||||
notification_type=notification_type, # ADR-073 Phase 2-2
|
||||
alert_category=alert_category, # ADR-073 Phase 2-2
|
||||
)
|
||||
|
||||
background_tasks.add_task(
|
||||
_push_to_telegram_background,
|
||||
approval_id=str(approval.id),
|
||||
risk_level="medium",
|
||||
resource_name=target_resource,
|
||||
root_cause=message,
|
||||
suggested_action="OBSERVE",
|
||||
estimated_downtime="unknown",
|
||||
hit_count=1,
|
||||
primary_responsibility="HUMAN",
|
||||
confidence=0.0,
|
||||
namespace=namespace,
|
||||
incident_id=fallback_incident_id,
|
||||
# ADR-073: 路由 TYPE-4D → send_drift_card
|
||||
notification_type=notification_type,
|
||||
# ADR-075 斷點 E 修復: 路由 TYPE-8M → send_meta_alert
|
||||
alert_category=alert_category,
|
||||
)
|
||||
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message="⚠️ LLM 分析失敗,使用預設值",
|
||||
alert_id=alert_id,
|
||||
approval_created=True,
|
||||
approval_id=str(approval.id),
|
||||
risk_level="medium",
|
||||
suggested_action="OBSERVE",
|
||||
hit_count=1,
|
||||
converged=False,
|
||||
)
|
||||
record_alert_chain_success("alertmanager")
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message="✅ 告警已排入背景分析 (202 Accepted)",
|
||||
alert_id=alert_id,
|
||||
approval_created=False,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("alertmanager_error", error=str(e))
|
||||
|
||||
@@ -408,7 +408,11 @@ async def _push_decision_to_telegram(
|
||||
risk_level=risk_level,
|
||||
resource_name=target[:50],
|
||||
root_cause=_card_root_cause,
|
||||
suggested_action=action[:120] if action else (description[:120] if description else "待分析"),
|
||||
# 2026-04-17 ogt + Claude Sonnet 4.6(亞太): 修復超時降級髒資料
|
||||
# 舊:action="" 時 fallback 到 description,而 description 可能是「待分析」或診斷摘要
|
||||
# 這導致 description 中的診斷文字(如「根因:...」)出現在「建議修復動作」欄位
|
||||
# 新:action="" 時固定顯示「待分析」,禁止 description 流進 suggested_action
|
||||
suggested_action=action[:120] if action else "待分析",
|
||||
estimated_downtime="5-15 min",
|
||||
primary_responsibility="INFRA",
|
||||
confidence=confidence,
|
||||
@@ -1085,8 +1089,10 @@ def _package_to_proposal_data(package: Any) -> dict[str, Any]:
|
||||
if plan and getattr(plan, "top_candidate", None):
|
||||
c = plan.top_candidate
|
||||
desc_parts.append(f"方案:{c.action[:100]}")
|
||||
if package.blocked_reason:
|
||||
desc_parts.append(f"備注:{package.blocked_reason[:100]}")
|
||||
# blocked_reason 是系統內部診斷,不能放進 description(Telegram 卡片顯示用)
|
||||
# 2026-04-17 ogt + Claude Sonnet 4.6(亞太): 修復超時髒資料污染卡片
|
||||
# 舊:blocked_reason → desc_parts → description → suggested_action 欄位顯示「備注:全局超時 > 90.0s」
|
||||
# 新:blocked_reason 只寫入 proposal_data["blocked_reason"],供下游閘門邏輯用,禁止進卡片顯示
|
||||
description = ";".join(desc_parts) if desc_parts else (action[:200] if action else "待分析")
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user