diff --git a/apps/api/src/services/telegram_gateway.py b/apps/api/src/services/telegram_gateway.py index 4d2f4a83..c29b1f20 100644 --- a/apps/api/src/services/telegram_gateway.py +++ b/apps/api/src/services/telegram_gateway.py @@ -4212,6 +4212,14 @@ class WeeklyReportMessage: k3s_source_ok: bool = True git_source_ok: bool = True cost_source_ok: bool = False + ai_slo_source_ok: bool = True + ai_slo_auto_execute_success_rate: float | None = None + ai_slo_auto_execute_sample_count: int = 0 + ai_slo_auto_execute_threshold: float = 0.85 + ai_slo_auto_execute_violated: bool = False + ai_slo_top_failure: str = "" + ai_slo_verifier_coverage_rate: float | None = None + ai_slo_unverified_auto_count: int = 0 all_zero_actionable_anomaly: bool = False report_source_confidence_percent: int = 0 report_source_ok_count: int = 0 @@ -4230,7 +4238,9 @@ class WeeklyReportMessage: self.k3s_source_ok, self.git_source_ok, self.cost_source_ok, + self.ai_slo_source_ok, ]) + source_total_count = 5 all_zero = ( self.alert_total == 0 and self.ai_proposal_count == 0 @@ -4241,9 +4251,10 @@ class WeeklyReportMessage: and self.disposition_total == 0 ) actionable_all_zero = self.all_zero_actionable_anomaly or all_zero - report_trust = "低可信" if actionable_all_zero or source_ok_count < 4 else "可參考" + report_trust = "低可信" if actionable_all_zero or source_ok_count < source_total_count else "可參考" source_status = ( f"統計={'正常' if self.stats_source_ok else '失效'} / " + f"AI SLO={'正常' if self.ai_slo_source_ok else '失效'} / " f"K3s={'正常' if self.k3s_source_ok else '失效'} / " f"Git={'正常' if self.git_source_ok else '失效'} / " f"成本={'正常' if self.cost_source_ok else '缺資料'}" @@ -4251,6 +4262,8 @@ class WeeklyReportMessage: source_gaps: list[str] = [] if not self.stats_source_ok: source_gaps.append("告警 / AI 統計資料源失效:建立 report-source-gap:stats_api") + if not self.ai_slo_source_ok: + source_gaps.append("AI SLO 真相資料源失效:建立 report-source-gap:ai_slo") if not self.k3s_source_ok: source_gaps.append("K3s 指標資料源失效:建立 report-source-gap:k3s_metrics") if not self.git_source_ok: @@ -4282,6 +4295,54 @@ class WeeklyReportMessage: f"{formatted_assets}\n" ) + def _code_metric( + value: int | float | str, + *, + source_ok: bool, + suffix: str = "", + precision: int | None = None, + ) -> str: + if not source_ok: + return "缺資料" + if isinstance(value, float) and precision is not None: + rendered = f"{value:.{precision}f}" + elif isinstance(value, int): + rendered = f"{value:,}" + else: + rendered = str(value) + return f"{html.escape(rendered)}{suffix}" + + ai_slo_block = "" + if ( + not self.ai_slo_source_ok + or self.ai_slo_auto_execute_sample_count > 0 + or self.ai_slo_top_failure + ): + if self.ai_slo_source_ok and self.ai_slo_auto_execute_success_rate is not None: + slo_pct = self.ai_slo_auto_execute_success_rate * 100 + threshold_pct = self.ai_slo_auto_execute_threshold * 100 + slo_status = "違反" if self.ai_slo_auto_execute_violated else "合格" + verifier_text = ( + f"{self.ai_slo_verifier_coverage_rate * 100:.1f}%" + if self.ai_slo_verifier_coverage_rate is not None + else "缺資料" + ) + top_failure = self.ai_slo_top_failure or "目前無 top failure" + ai_slo_block = ( + f"━━━━━━━━━━━━━━━━━━━\n" + f"🧠 AI 自動化 SLO\n" + f"├ 自動執行成功率: {slo_pct:.1f}% / 目標 {threshold_pct:.0f}%({slo_status})\n" + f"├ 樣本: {self.ai_slo_auto_execute_sample_count} | Verifier 覆蓋: {verifier_text}\n" + f"├ 未驗證自動執行: {self.ai_slo_unverified_auto_count}\n" + f"└ Top failure: {html.escape(top_failure[:180])}\n" + ) + else: + ai_slo_block = ( + f"━━━━━━━━━━━━━━━━━━━\n" + f"🧠 AI 自動化 SLO\n" + f"└ 資料源缺口:無法判定 AI 自動化是否真的接管。\n" + ) + message = ( f"═══════════════════════════\n" f"📊 AWOOOI 週報\n" @@ -4294,29 +4355,30 @@ class WeeklyReportMessage: f"└ 全 0: {'actionable_anomaly' if actionable_all_zero else 'no'}\n" f"━━━━━━━━━━━━━━━━━━━\n" f"{alert_health} 告警統計\n" - f"├ 總數: {self.alert_total}\n" - f"├ Critical: {self.alert_critical}\n" - f"├ 已解決: {self.alert_resolved}\n" - f"└ 解決率: {self.resolved_rate:.1f}%\n" + f"├ 總數: {_code_metric(self.alert_total, source_ok=self.stats_source_ok)}\n" + f"├ Critical: {_code_metric(self.alert_critical, source_ok=self.stats_source_ok)}\n" + f"├ 已解決: {_code_metric(self.alert_resolved, source_ok=self.stats_source_ok)}\n" + f"└ 解決率: {_code_metric(self.resolved_rate, source_ok=self.stats_source_ok, suffix='%', precision=1)}\n" f"━━━━━━━━━━━━━━━━━━━\n" f"{ai_health} AI 效能\n" - f"├ 提案數: {self.ai_proposal_count}\n" - f"├ 執行數: {self.ai_executed_count}\n" - f"├ 成功率: {self.ai_success_rate:.1f}%\n" - f"└ 平均回應: {self.avg_response_minutes:.1f} 分鐘\n" + f"├ 提案數: {_code_metric(self.ai_proposal_count, source_ok=self.stats_source_ok)}\n" + f"├ 執行數: {_code_metric(self.ai_executed_count, source_ok=self.stats_source_ok)}\n" + f"├ 成功率: {_code_metric(self.ai_success_rate, source_ok=self.stats_source_ok, suffix='%', precision=1)}\n" + f"└ 平均回應: {_code_metric(self.avg_response_minutes, source_ok=self.stats_source_ok, precision=1)} 分鐘\n" + f"{ai_slo_block}" f"━━━━━━━━━━━━━━━━━━━\n" f"{k3s_health} K3s 健康\n" - f"├ Uptime: {self.k3s_uptime_pct:.2f}%\n" - f"├ Pod 重啟: {self.pod_restart_total}\n" - f"└ HPA 擴縮: {self.hpa_scale_events} 次\n" + f"├ Uptime: {_code_metric(self.k3s_uptime_pct, source_ok=self.k3s_source_ok, suffix='%', precision=2)}\n" + f"├ Pod 重啟: {_code_metric(self.pod_restart_total, source_ok=self.k3s_source_ok)}\n" + f"└ HPA 擴縮: {_code_metric(self.hpa_scale_events, source_ok=self.k3s_source_ok)} 次\n" f"━━━━━━━━━━━━━━━━━━━\n" f"📦 開發活動\n" - f"├ Commits: {self.commits_count}\n" - f"└ 部署: {self.deploy_count} 次\n" + f"├ Commits: {_code_metric(self.commits_count, source_ok=self.git_source_ok)}\n" + f"└ 部署: {_code_metric(self.deploy_count, source_ok=self.git_source_ok)} 次\n" f"━━━━━━━━━━━━━━━━━━━\n" f"💰 AI 成本\n" - f"├ 費用: ${self.ai_cost_week:.2f}\n" - f"└ Tokens: {self.ai_tokens_week:,}\n" + f"├ 費用: ${_code_metric(self.ai_cost_week, source_ok=self.cost_source_ok, precision=2)}\n" + f"└ Tokens: {_code_metric(self.ai_tokens_week, source_ok=self.cost_source_ok)}\n" f"━━━━━━━━━━━━━━━━━━━\n" f"🧩 資料缺口 / 下一步\n" f"{gap_lines}\n" @@ -4338,7 +4400,7 @@ class WeeklyReportMessage: f"└ 自動化率: {auto_rate}%" ) - return message[:2400] + return message[:3600] @dataclass diff --git a/apps/api/src/services/weekly_report_service.py b/apps/api/src/services/weekly_report_service.py index d2905eb1..1366eea3 100644 --- a/apps/api/src/services/weekly_report_service.py +++ b/apps/api/src/services/weekly_report_service.py @@ -210,6 +210,49 @@ class WeeklyReportService: except Exception as _disp_e: logger.warning("weekly_report_disposition_failed", error=str(_disp_e)) + ai_slo_source_ok = True + ai_slo_auto_execute_success_rate: float | None = None + ai_slo_auto_execute_sample_count = 0 + ai_slo_auto_execute_threshold = 0.85 + ai_slo_auto_execute_violated = False + ai_slo_top_failure = "" + ai_slo_verifier_coverage_rate: float | None = None + ai_slo_unverified_auto_count = 0 + try: + from src.services.adr100_slo_status_service import ( + get_adr100_slo_status_service, + ) + from src.services.ai_slo_calculator import AiSloCalculator + + slo_report = await AiSloCalculator(project_id="awoooi").calculate() + auto_metric = next( + (metric for metric in slo_report.metrics if metric.name == "auto_execute_success_rate"), + None, + ) + if auto_metric is not None: + ai_slo_auto_execute_success_rate = auto_metric.value + ai_slo_auto_execute_sample_count = auto_metric.sample_count + ai_slo_auto_execute_threshold = auto_metric.threshold + ai_slo_auto_execute_violated = auto_metric.violated + + diagnostics = slo_report.diagnostics.get("auto_execute_success_rate") or {} + top_failure = (diagnostics.get("top_failure_groups") or [{}])[0] + if top_failure: + ai_slo_top_failure = ( + f"{top_failure.get('alertname') or 'unknown'} / " + f"{top_failure.get('playbook_id') or 'unknown'} ×" + f"{int(top_failure.get('count') or 0)}: " + f"{str(top_failure.get('error_signature') or '')[:90]}" + ) + + adr100_report = await get_adr100_slo_status_service("awoooi").fetch_report() + verification = adr100_report.get("verification_coverage") or {} + ai_slo_verifier_coverage_rate = verification.get("coverage_rate") + ai_slo_unverified_auto_count = int(verification.get("unverified_auto") or 0) + except Exception as _slo_e: + ai_slo_source_ok = False + logger.warning("weekly_report_ai_slo_failed", error=str(_slo_e)) + report_source_confidence = 0 report_source_ok = 0 report_source_total = 0 @@ -267,6 +310,14 @@ class WeeklyReportService: k3s_source_ok=k3s_source_ok, git_source_ok=git_source_ok, cost_source_ok=False, + ai_slo_source_ok=ai_slo_source_ok, + ai_slo_auto_execute_success_rate=ai_slo_auto_execute_success_rate, + ai_slo_auto_execute_sample_count=ai_slo_auto_execute_sample_count, + ai_slo_auto_execute_threshold=ai_slo_auto_execute_threshold, + ai_slo_auto_execute_violated=ai_slo_auto_execute_violated, + ai_slo_top_failure=ai_slo_top_failure, + ai_slo_verifier_coverage_rate=ai_slo_verifier_coverage_rate, + ai_slo_unverified_auto_count=ai_slo_unverified_auto_count, all_zero_actionable_anomaly=( total_incidents == 0 and ai_proposals == 0 diff --git a/apps/api/tests/test_telegram_message_templates.py b/apps/api/tests/test_telegram_message_templates.py index 1b45d074..3aa58b0d 100644 --- a/apps/api/tests/test_telegram_message_templates.py +++ b/apps/api/tests/test_telegram_message_templates.py @@ -469,6 +469,9 @@ def test_weekly_report_marks_all_zero_as_low_trust_anomaly() -> None: assert "Git=失效" in body assert "成本=缺資料" in body assert "全 0: actionable_anomaly" in body + assert "總數: 缺資料" in body + assert "Commits: 缺資料" in body + assert "Tokens: 缺資料" in body assert "資料缺口 / 下一步" in body assert "全 0 不是健康" in body assert "report-source-gap:stats_api" in body @@ -500,6 +503,45 @@ def test_weekly_report_keeps_nonzero_source_status_visible() -> None: assert "Tokens: 1,200" in body +def test_weekly_report_includes_ai_slo_truth_when_available() -> None: + report = WeeklyReportMessage( + week_range="2026-W26", + report_date="2026-06-27 15:40", + alert_total=3, + ai_proposal_count=4, + ai_executed_count=2, + ai_success_rate=50.0, + commits_count=6, + deploy_count=2, + ai_tokens_week=1200, + stats_source_ok=True, + k3s_source_ok=True, + git_source_ok=True, + cost_source_ok=True, + ai_slo_source_ok=True, + ai_slo_auto_execute_success_rate=0.5, + ai_slo_auto_execute_sample_count=14, + ai_slo_auto_execute_threshold=0.85, + ai_slo_auto_execute_violated=True, + ai_slo_top_failure=( + "DockerContainerMissingResourceLimit / ansible:188-ai-web ×5: " + "role host-textfile-exporters not found" + ), + ai_slo_verifier_coverage_rate=0.857, + ai_slo_unverified_auto_count=2, + ) + + body = report.format() + + assert "AI 自動化 SLO" in body + assert "自動執行成功率: 50.0%" in body + assert "目標 85%" in body + assert "樣本: 14" in body + assert "Verifier 覆蓋: 85.7%" in body + assert "未驗證自動執行: 2" in body + assert "DockerContainerMissingResourceLimit / ansible:188-ai-web" in body + + def test_weekly_report_includes_report_source_health_assets() -> None: report = WeeklyReportMessage( week_range="2026-W25",