fix(api): expose AI SLO truth in weekly reports
This commit is contained in:
@@ -4212,6 +4212,14 @@ class WeeklyReportMessage:
|
||||
k3s_source_ok: bool = True
|
||||
git_source_ok: bool = True
|
||||
cost_source_ok: bool = False
|
||||
ai_slo_source_ok: bool = True
|
||||
ai_slo_auto_execute_success_rate: float | None = None
|
||||
ai_slo_auto_execute_sample_count: int = 0
|
||||
ai_slo_auto_execute_threshold: float = 0.85
|
||||
ai_slo_auto_execute_violated: bool = False
|
||||
ai_slo_top_failure: str = ""
|
||||
ai_slo_verifier_coverage_rate: float | None = None
|
||||
ai_slo_unverified_auto_count: int = 0
|
||||
all_zero_actionable_anomaly: bool = False
|
||||
report_source_confidence_percent: int = 0
|
||||
report_source_ok_count: int = 0
|
||||
@@ -4230,7 +4238,9 @@ class WeeklyReportMessage:
|
||||
self.k3s_source_ok,
|
||||
self.git_source_ok,
|
||||
self.cost_source_ok,
|
||||
self.ai_slo_source_ok,
|
||||
])
|
||||
source_total_count = 5
|
||||
all_zero = (
|
||||
self.alert_total == 0
|
||||
and self.ai_proposal_count == 0
|
||||
@@ -4241,9 +4251,10 @@ class WeeklyReportMessage:
|
||||
and self.disposition_total == 0
|
||||
)
|
||||
actionable_all_zero = self.all_zero_actionable_anomaly or all_zero
|
||||
report_trust = "低可信" if actionable_all_zero or source_ok_count < 4 else "可參考"
|
||||
report_trust = "低可信" if actionable_all_zero or source_ok_count < source_total_count else "可參考"
|
||||
source_status = (
|
||||
f"統計={'正常' if self.stats_source_ok else '失效'} / "
|
||||
f"AI SLO={'正常' if self.ai_slo_source_ok else '失效'} / "
|
||||
f"K3s={'正常' if self.k3s_source_ok else '失效'} / "
|
||||
f"Git={'正常' if self.git_source_ok else '失效'} / "
|
||||
f"成本={'正常' if self.cost_source_ok else '缺資料'}"
|
||||
@@ -4251,6 +4262,8 @@ class WeeklyReportMessage:
|
||||
source_gaps: list[str] = []
|
||||
if not self.stats_source_ok:
|
||||
source_gaps.append("告警 / AI 統計資料源失效:建立 report-source-gap:stats_api")
|
||||
if not self.ai_slo_source_ok:
|
||||
source_gaps.append("AI SLO 真相資料源失效:建立 report-source-gap:ai_slo")
|
||||
if not self.k3s_source_ok:
|
||||
source_gaps.append("K3s 指標資料源失效:建立 report-source-gap:k3s_metrics")
|
||||
if not self.git_source_ok:
|
||||
@@ -4282,6 +4295,54 @@ class WeeklyReportMessage:
|
||||
f"{formatted_assets}\n"
|
||||
)
|
||||
|
||||
def _code_metric(
|
||||
value: int | float | str,
|
||||
*,
|
||||
source_ok: bool,
|
||||
suffix: str = "",
|
||||
precision: int | None = None,
|
||||
) -> str:
|
||||
if not source_ok:
|
||||
return "<code>缺資料</code>"
|
||||
if isinstance(value, float) and precision is not None:
|
||||
rendered = f"{value:.{precision}f}"
|
||||
elif isinstance(value, int):
|
||||
rendered = f"{value:,}"
|
||||
else:
|
||||
rendered = str(value)
|
||||
return f"<code>{html.escape(rendered)}</code>{suffix}"
|
||||
|
||||
ai_slo_block = ""
|
||||
if (
|
||||
not self.ai_slo_source_ok
|
||||
or self.ai_slo_auto_execute_sample_count > 0
|
||||
or self.ai_slo_top_failure
|
||||
):
|
||||
if self.ai_slo_source_ok and self.ai_slo_auto_execute_success_rate is not None:
|
||||
slo_pct = self.ai_slo_auto_execute_success_rate * 100
|
||||
threshold_pct = self.ai_slo_auto_execute_threshold * 100
|
||||
slo_status = "違反" if self.ai_slo_auto_execute_violated else "合格"
|
||||
verifier_text = (
|
||||
f"{self.ai_slo_verifier_coverage_rate * 100:.1f}%"
|
||||
if self.ai_slo_verifier_coverage_rate is not None
|
||||
else "缺資料"
|
||||
)
|
||||
top_failure = self.ai_slo_top_failure or "目前無 top failure"
|
||||
ai_slo_block = (
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"🧠 <b>AI 自動化 SLO</b>\n"
|
||||
f"├ 自動執行成功率: <code>{slo_pct:.1f}%</code> / 目標 <code>{threshold_pct:.0f}%</code>({slo_status})\n"
|
||||
f"├ 樣本: <code>{self.ai_slo_auto_execute_sample_count}</code> | Verifier 覆蓋: <code>{verifier_text}</code>\n"
|
||||
f"├ 未驗證自動執行: <code>{self.ai_slo_unverified_auto_count}</code>\n"
|
||||
f"└ Top failure: <code>{html.escape(top_failure[:180])}</code>\n"
|
||||
)
|
||||
else:
|
||||
ai_slo_block = (
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"🧠 <b>AI 自動化 SLO</b>\n"
|
||||
f"└ <code>資料源缺口</code>:無法判定 AI 自動化是否真的接管。\n"
|
||||
)
|
||||
|
||||
message = (
|
||||
f"═══════════════════════════\n"
|
||||
f"📊 <b>AWOOOI 週報</b>\n"
|
||||
@@ -4294,29 +4355,30 @@ class WeeklyReportMessage:
|
||||
f"└ 全 0: <code>{'actionable_anomaly' if actionable_all_zero else 'no'}</code>\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"{alert_health} <b>告警統計</b>\n"
|
||||
f"├ 總數: <code>{self.alert_total}</code>\n"
|
||||
f"├ Critical: <code>{self.alert_critical}</code>\n"
|
||||
f"├ 已解決: <code>{self.alert_resolved}</code>\n"
|
||||
f"└ 解決率: <code>{self.resolved_rate:.1f}%</code>\n"
|
||||
f"├ 總數: {_code_metric(self.alert_total, source_ok=self.stats_source_ok)}\n"
|
||||
f"├ Critical: {_code_metric(self.alert_critical, source_ok=self.stats_source_ok)}\n"
|
||||
f"├ 已解決: {_code_metric(self.alert_resolved, source_ok=self.stats_source_ok)}\n"
|
||||
f"└ 解決率: {_code_metric(self.resolved_rate, source_ok=self.stats_source_ok, suffix='%', precision=1)}\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"{ai_health} <b>AI 效能</b>\n"
|
||||
f"├ 提案數: <code>{self.ai_proposal_count}</code>\n"
|
||||
f"├ 執行數: <code>{self.ai_executed_count}</code>\n"
|
||||
f"├ 成功率: <code>{self.ai_success_rate:.1f}%</code>\n"
|
||||
f"└ 平均回應: <code>{self.avg_response_minutes:.1f}</code> 分鐘\n"
|
||||
f"├ 提案數: {_code_metric(self.ai_proposal_count, source_ok=self.stats_source_ok)}\n"
|
||||
f"├ 執行數: {_code_metric(self.ai_executed_count, source_ok=self.stats_source_ok)}\n"
|
||||
f"├ 成功率: {_code_metric(self.ai_success_rate, source_ok=self.stats_source_ok, suffix='%', precision=1)}\n"
|
||||
f"└ 平均回應: {_code_metric(self.avg_response_minutes, source_ok=self.stats_source_ok, precision=1)} 分鐘\n"
|
||||
f"{ai_slo_block}"
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"{k3s_health} <b>K3s 健康</b>\n"
|
||||
f"├ Uptime: <code>{self.k3s_uptime_pct:.2f}%</code>\n"
|
||||
f"├ Pod 重啟: <code>{self.pod_restart_total}</code>\n"
|
||||
f"└ HPA 擴縮: <code>{self.hpa_scale_events}</code> 次\n"
|
||||
f"├ Uptime: {_code_metric(self.k3s_uptime_pct, source_ok=self.k3s_source_ok, suffix='%', precision=2)}\n"
|
||||
f"├ Pod 重啟: {_code_metric(self.pod_restart_total, source_ok=self.k3s_source_ok)}\n"
|
||||
f"└ HPA 擴縮: {_code_metric(self.hpa_scale_events, source_ok=self.k3s_source_ok)} 次\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"📦 <b>開發活動</b>\n"
|
||||
f"├ Commits: <code>{self.commits_count}</code>\n"
|
||||
f"└ 部署: <code>{self.deploy_count}</code> 次\n"
|
||||
f"├ Commits: {_code_metric(self.commits_count, source_ok=self.git_source_ok)}\n"
|
||||
f"└ 部署: {_code_metric(self.deploy_count, source_ok=self.git_source_ok)} 次\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"💰 <b>AI 成本</b>\n"
|
||||
f"├ 費用: $<code>{self.ai_cost_week:.2f}</code>\n"
|
||||
f"└ Tokens: <code>{self.ai_tokens_week:,}</code>\n"
|
||||
f"├ 費用: ${_code_metric(self.ai_cost_week, source_ok=self.cost_source_ok, precision=2)}\n"
|
||||
f"└ Tokens: {_code_metric(self.ai_tokens_week, source_ok=self.cost_source_ok)}\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"🧩 <b>資料缺口 / 下一步</b>\n"
|
||||
f"{gap_lines}\n"
|
||||
@@ -4338,7 +4400,7 @@ class WeeklyReportMessage:
|
||||
f"└ 自動化率: <b>{auto_rate}%</b>"
|
||||
)
|
||||
|
||||
return message[:2400]
|
||||
return message[:3600]
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -210,6 +210,49 @@ class WeeklyReportService:
|
||||
except Exception as _disp_e:
|
||||
logger.warning("weekly_report_disposition_failed", error=str(_disp_e))
|
||||
|
||||
ai_slo_source_ok = True
|
||||
ai_slo_auto_execute_success_rate: float | None = None
|
||||
ai_slo_auto_execute_sample_count = 0
|
||||
ai_slo_auto_execute_threshold = 0.85
|
||||
ai_slo_auto_execute_violated = False
|
||||
ai_slo_top_failure = ""
|
||||
ai_slo_verifier_coverage_rate: float | None = None
|
||||
ai_slo_unverified_auto_count = 0
|
||||
try:
|
||||
from src.services.adr100_slo_status_service import (
|
||||
get_adr100_slo_status_service,
|
||||
)
|
||||
from src.services.ai_slo_calculator import AiSloCalculator
|
||||
|
||||
slo_report = await AiSloCalculator(project_id="awoooi").calculate()
|
||||
auto_metric = next(
|
||||
(metric for metric in slo_report.metrics if metric.name == "auto_execute_success_rate"),
|
||||
None,
|
||||
)
|
||||
if auto_metric is not None:
|
||||
ai_slo_auto_execute_success_rate = auto_metric.value
|
||||
ai_slo_auto_execute_sample_count = auto_metric.sample_count
|
||||
ai_slo_auto_execute_threshold = auto_metric.threshold
|
||||
ai_slo_auto_execute_violated = auto_metric.violated
|
||||
|
||||
diagnostics = slo_report.diagnostics.get("auto_execute_success_rate") or {}
|
||||
top_failure = (diagnostics.get("top_failure_groups") or [{}])[0]
|
||||
if top_failure:
|
||||
ai_slo_top_failure = (
|
||||
f"{top_failure.get('alertname') or 'unknown'} / "
|
||||
f"{top_failure.get('playbook_id') or 'unknown'} ×"
|
||||
f"{int(top_failure.get('count') or 0)}: "
|
||||
f"{str(top_failure.get('error_signature') or '')[:90]}"
|
||||
)
|
||||
|
||||
adr100_report = await get_adr100_slo_status_service("awoooi").fetch_report()
|
||||
verification = adr100_report.get("verification_coverage") or {}
|
||||
ai_slo_verifier_coverage_rate = verification.get("coverage_rate")
|
||||
ai_slo_unverified_auto_count = int(verification.get("unverified_auto") or 0)
|
||||
except Exception as _slo_e:
|
||||
ai_slo_source_ok = False
|
||||
logger.warning("weekly_report_ai_slo_failed", error=str(_slo_e))
|
||||
|
||||
report_source_confidence = 0
|
||||
report_source_ok = 0
|
||||
report_source_total = 0
|
||||
@@ -267,6 +310,14 @@ class WeeklyReportService:
|
||||
k3s_source_ok=k3s_source_ok,
|
||||
git_source_ok=git_source_ok,
|
||||
cost_source_ok=False,
|
||||
ai_slo_source_ok=ai_slo_source_ok,
|
||||
ai_slo_auto_execute_success_rate=ai_slo_auto_execute_success_rate,
|
||||
ai_slo_auto_execute_sample_count=ai_slo_auto_execute_sample_count,
|
||||
ai_slo_auto_execute_threshold=ai_slo_auto_execute_threshold,
|
||||
ai_slo_auto_execute_violated=ai_slo_auto_execute_violated,
|
||||
ai_slo_top_failure=ai_slo_top_failure,
|
||||
ai_slo_verifier_coverage_rate=ai_slo_verifier_coverage_rate,
|
||||
ai_slo_unverified_auto_count=ai_slo_unverified_auto_count,
|
||||
all_zero_actionable_anomaly=(
|
||||
total_incidents == 0
|
||||
and ai_proposals == 0
|
||||
|
||||
@@ -469,6 +469,9 @@ def test_weekly_report_marks_all_zero_as_low_trust_anomaly() -> None:
|
||||
assert "Git=失效" in body
|
||||
assert "成本=缺資料" in body
|
||||
assert "全 0: <code>actionable_anomaly</code>" in body
|
||||
assert "總數: <code>缺資料</code>" in body
|
||||
assert "Commits: <code>缺資料</code>" in body
|
||||
assert "Tokens: <code>缺資料</code>" in body
|
||||
assert "資料缺口 / 下一步" in body
|
||||
assert "全 0 不是健康" in body
|
||||
assert "report-source-gap:stats_api" in body
|
||||
@@ -500,6 +503,45 @@ def test_weekly_report_keeps_nonzero_source_status_visible() -> None:
|
||||
assert "Tokens: <code>1,200</code>" in body
|
||||
|
||||
|
||||
def test_weekly_report_includes_ai_slo_truth_when_available() -> None:
|
||||
report = WeeklyReportMessage(
|
||||
week_range="2026-W26",
|
||||
report_date="2026-06-27 15:40",
|
||||
alert_total=3,
|
||||
ai_proposal_count=4,
|
||||
ai_executed_count=2,
|
||||
ai_success_rate=50.0,
|
||||
commits_count=6,
|
||||
deploy_count=2,
|
||||
ai_tokens_week=1200,
|
||||
stats_source_ok=True,
|
||||
k3s_source_ok=True,
|
||||
git_source_ok=True,
|
||||
cost_source_ok=True,
|
||||
ai_slo_source_ok=True,
|
||||
ai_slo_auto_execute_success_rate=0.5,
|
||||
ai_slo_auto_execute_sample_count=14,
|
||||
ai_slo_auto_execute_threshold=0.85,
|
||||
ai_slo_auto_execute_violated=True,
|
||||
ai_slo_top_failure=(
|
||||
"DockerContainerMissingResourceLimit / ansible:188-ai-web ×5: "
|
||||
"role host-textfile-exporters not found"
|
||||
),
|
||||
ai_slo_verifier_coverage_rate=0.857,
|
||||
ai_slo_unverified_auto_count=2,
|
||||
)
|
||||
|
||||
body = report.format()
|
||||
|
||||
assert "AI 自動化 SLO" in body
|
||||
assert "自動執行成功率: <code>50.0%</code>" in body
|
||||
assert "目標 <code>85%</code>" in body
|
||||
assert "樣本: <code>14</code>" in body
|
||||
assert "Verifier 覆蓋: <code>85.7%</code>" in body
|
||||
assert "未驗證自動執行: <code>2</code>" in body
|
||||
assert "DockerContainerMissingResourceLimit / ansible:188-ai-web" in body
|
||||
|
||||
|
||||
def test_weekly_report_includes_report_source_health_assets() -> None:
|
||||
report = WeeklyReportMessage(
|
||||
week_range="2026-W25",
|
||||
|
||||
Reference in New Issue
Block a user