Files
ewoooc/tests/test_token_report_service.py
OoO bb891f1a6e feat(observability): ai_call_logger + 23:55 Telegram token 日報
services/ai_call_logger.py(300 行)— 統一 LLM 遙測層
- context manager log_ai_call() / decorator logged_ai_call()
- async fire-and-forget 寫 ai_calls,DB 失敗永不影響主流程
- kill-switch:連續 10 次失敗自動降級為 logger.info
- env AI_CALL_LOGGING_ENABLED=false 一鍵關閉
- COST_TABLE 集中 13 個模型計費(gemini/claude/nim/ollama)
- PII 保護:meta 只存 prompt_hash[:12],不存原文
- 22 unit tests 全綠

services/token_report_service.py(580 行)— 6 段落每日 23:55 日報
- Section 1-6: 總覽 / 供應商分布 / TOP10 caller / 成本預算 / 趨勢 / 告警建議
- 7 條告警規則 + Hermes 規則引擎智能建議
- HTML escape + 4096 字元雙保險
- Telegram 失敗 fallback 訊息
- ai_insights 寫入 PII safe(無 chat_id/username 落地)
- 30 unit tests 全綠

A11 critic 護欄:H6 chat_id PII fix(services/openclaw_bot_routes 4 處 → SHA1[:8])

Operation Ollama-First v5.0 / Phase 1 A4+A5

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 23:04:58 +08:00

527 lines
25 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_token_report_service.py
LLM Token 日報服務單元測試 (Operation Ollama-First v5.0 — Phase 1 收尾)
測試紀律:
- 不真連 DBmock _exec_query 返回固定資料
- 不真連 Telegrammock send_telegram_with_result
- 不真寫 ai_insightsmock _persist_to_ai_insights
- 7 個告警規則各自獨立觸發測試
- HTML escape 驗證caller 名含 < / & 不破版)
- 訊息字數 ≤ 4096 驗證
"""
from __future__ import annotations
import os
import sys
from datetime import date, datetime, timedelta, timezone
from typing import Any, Dict, List
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import services.token_report_service as svc
# ─────────────────────────────────────────────────────────────────────────────
# 共用 fixtures
# ─────────────────────────────────────────────────────────────────────────────
TARGET_DATE = date(2026, 5, 3)
def _make_summary(**overrides) -> Dict[str, Any]:
base = {
'total_tokens': 3_142_891,
'total_calls': 2_847,
'total_cost_usd': 0.36,
'avg_duration_ms': 1847.0,
'success_rate': 98.7,
'failed_calls': 37,
'ollama_pct': 64.3,
'prev_total_tokens': 2_905_000,
'wow_pct': 8.2,
}
base.update(overrides)
return base
def _make_by_provider(**overrides) -> List[Dict[str, Any]]:
"""7 個 provider 的預設配置,可用 overrides={'gemini': {'pct': 50}} 覆寫"""
defaults = {
'gcp_ollama': {'tokens': 2_021_000, 'pct': 64.3, 'calls': 2103, 'cost_usd': 0.0, 'avg_duration_ms': 1200},
'ollama_111': {'tokens': 12_000, 'pct': 0.4, 'calls': 18, 'cost_usd': 0.0, 'avg_duration_ms': 2400},
'gemini': {'tokens': 892_000, 'pct': 28.4, 'calls': 589, 'cost_usd': 0.31, 'avg_duration_ms': 2100},
'claude': {'tokens': 178_000, 'pct': 5.7, 'calls': 98, 'cost_usd': 0.04, 'avg_duration_ms': 3200},
'nim': {'tokens': 28_000, 'pct': 0.9, 'calls': 24, 'cost_usd': 0.0, 'avg_duration_ms': 1800},
'openrouter': {'tokens': 12_000, 'pct': 0.4, 'calls': 15, 'cost_usd': 0.01, 'avg_duration_ms': 2900},
'nim_via_elephant': {'tokens': 27_000, 'pct': 0.9, 'calls': 12, 'cost_usd': 0.0, 'avg_duration_ms': 3100},
}
for k, v in (overrides or {}).items():
defaults.setdefault(k, {}).update(v)
return [{'provider': k, **v} for k, v in defaults.items()]
def _make_top_callers() -> List[Dict[str, Any]]:
return [
{'caller': 'km_embedding_worker', 'provider': 'gcp_ollama',
'model': 'bge-m3:latest', 'tokens': 892_000, 'calls': 1247, 'delta_pct': 5.0},
{'caller': 'hermes_analyst', 'provider': 'gcp_ollama',
'model': 'hermes3:latest', 'tokens': 482_000, 'calls': 72, 'delta_pct': -2.0},
{'caller': 'code_review_hermes', 'provider': 'claude',
'model': 'claude-opus-4-7', 'tokens': 158_000, 'calls': 8, 'delta_pct': 42.0},
]
def _make_trends() -> Dict[str, Any]:
return {
'today_total_tokens': 3_142_000,
'today_gemini_tokens': 892_000,
'today_ollama_tokens': 2_033_000,
'today_claude_tokens': 178_000,
'today_avg_duration': 1847.0,
'today_error_rate': 1.3,
'today_gcp_hit_pct': 99.6,
'7d_avg_total': 2_905_000,
'7d_avg_gemini': 948_000,
'7d_avg_ollama': 1_712_000,
'7d_avg_claude': 165_000,
'7d_avg_duration': 1920.0,
'7d_error_rate': 1.8,
'7d_total_tokens': 18_832_000,
'7d_total_cost': 11.84,
'7d_gcp_hit_pct_7d': 98.9,
'7d_gcp_hit_pct': 98.9,
}
def _make_budgets(**overrides) -> Dict[str, Any]:
base = {
'daily_spent': 0.36,
'weekly_spent': 1.92,
'monthly_spent': 5.84,
'daily_budget': 1.00,
'weekly_budget': 5.00,
'monthly_budget': 20.00,
}
base.update(overrides)
return base
def _make_cache_stats(**overrides) -> Dict[str, Any]:
base = {
'claude': {'total': 98, 'hits': 62, 'pct': 63.3},
'gemini': {'total': 0, 'hits': 0, 'pct': 0.0},
}
base.update(overrides)
return base
# ─────────────────────────────────────────────────────────────────────────────
# 1. 報表組裝測試 — generate_daily_report 路徑
# ─────────────────────────────────────────────────────────────────────────────
class TestReportFormat:
"""測 _format_report 主要章節都出現 & 字數合理。"""
def test_format_report_contains_all_six_sections(self):
"""6 個段落標題都應出現。"""
out = svc._format_report(
target_date=TARGET_DATE,
summary=_make_summary(),
by_provider=_make_by_provider(),
top_callers=_make_top_callers(),
costs=[{'provider': 'gemini', 'model': 'gemini-2.5-flash', 'cost_usd': 0.26, 'calls': 50}],
trends=_make_trends(),
budgets=_make_budgets(),
cache_stats=_make_cache_stats(),
alerts=[],
insights=[{'icon': '', 'text': 'Ollama-First 達標'}],
)
assert '【1】今日總覽' in out
assert '【2】供應商分布' in out
assert '【3】呼叫點 TOP' in out
assert '【4】成本分析' in out
assert '【5】趨勢與洞察' in out
assert '【6】告警與建議' in out
def test_format_report_under_telegram_limit(self):
"""完整報表(含 10 個 caller / 12 個成本項 / 多個告警)不應超過 4096 字元。"""
big_callers = _make_top_callers() * 4 # 12 筆
big_costs = [{'provider': 'p', 'model': f'model-{i}', 'cost_usd': 0.01, 'calls': 1}
for i in range(12)]
big_alerts = [
{'level': 'P1', 'icon': '🔴', 'title': 'X' * 80, 'suggestion': 'Y' * 80}
for _ in range(5)
]
out = svc._format_report(
target_date=TARGET_DATE,
summary=_make_summary(),
by_provider=_make_by_provider(),
top_callers=big_callers[:10],
costs=big_costs,
trends=_make_trends(),
budgets=_make_budgets(),
cache_stats=_make_cache_stats(),
alerts=big_alerts,
insights=[],
)
# send_daily_report 端會做 4000 字截斷HTML 安全),單元測試先確認原始長度可控
assert len(out) < 6000, f"原始報表 {len(out)} 字元,可能需縮減欄位寬度"
def test_format_report_html_escape_caller_name(self):
"""caller 名含 <script> 不應原樣輸出(防 HTML 注入)。"""
nasty_callers = [{
'caller': 'evil<script>',
'provider': 'gcp_ollama',
'model': 'a&b<c>',
'tokens': 100,
'calls': 1,
'delta_pct': None,
}]
out = svc._format_report(
target_date=TARGET_DATE,
summary=_make_summary(),
by_provider=_make_by_provider(),
top_callers=nasty_callers,
costs=[],
trends=_make_trends(),
budgets=_make_budgets(),
cache_stats=_make_cache_stats(),
alerts=[],
insights=[],
)
assert '<script>' not in out, "caller 含 <script> 必須被 escape"
assert '&lt;script&gt;' in out
assert '&amp;' in out
def test_failure_report_html_safe(self):
"""DB 失敗時的 fallback 訊息不應洩漏 stack trace 且 HTML 安全。"""
out = svc._format_failure_report(TARGET_DATE, 'DB error: <a href="x">x</a>')
assert '日報生成失敗' in out
assert '&lt;a href' in out # < 已被 escape
# ─────────────────────────────────────────────────────────────────────────────
# 2. 告警規則測試 — _detect_alerts 7 條規則
# ─────────────────────────────────────────────────────────────────────────────
class TestAlertRules:
"""每條告警規則一個獨立測試,確保都會觸發。"""
def test_rule1_caller_token_spike(self):
"""R1: 單一 caller 暴增 ≥ +40% (factor=1.4)"""
callers = [{'caller': 'code_review_hermes', 'provider': 'claude',
'model': 'claude-opus-4-7', 'tokens': 158_000,
'calls': 8, 'delta_pct': 42.0}]
alerts = svc._detect_alerts(_make_summary(), _make_by_provider(),
callers, _make_trends(),
_make_budgets(), _make_cache_stats())
assert any('暴增' in a['title'] and a['level'] == 'P2' for a in alerts), \
f"R1 未觸發alerts={alerts}"
def test_rule2_gemini_share_too_high(self):
"""R2: Gemini 占比 > 35% → 「Ollama-First 失守」"""
prov = _make_by_provider()
for p in prov:
if p['provider'] == 'gemini':
p['pct'] = 50.0
alerts = svc._detect_alerts(_make_summary(), prov, [], _make_trends(),
_make_budgets(), _make_cache_stats())
assert any('Gemini 占比' in a['title'] for a in alerts), \
f"R2 未觸發alerts={alerts}"
def test_rule3_error_rate_critical(self):
"""R3: 全域失敗率 > 5% → P1"""
summary = _make_summary(failed_calls=300, total_calls=2000) # 15%
alerts = svc._detect_alerts(summary, _make_by_provider(), [],
_make_trends(), _make_budgets(), _make_cache_stats())
p1 = [a for a in alerts if a['level'] == 'P1' and '失敗率' in a['title']]
assert p1, f"R3 未觸發alerts={alerts}"
def test_rule4_budget_overrun(self):
"""R4: 月成本達 80% 預算 → P1"""
budgets = _make_budgets(monthly_spent=18.0, monthly_budget=20.0) # 90%
alerts = svc._detect_alerts(_make_summary(), _make_by_provider(), [],
_make_trends(), budgets, _make_cache_stats())
assert any('月成本' in a['title'] and a['level'] == 'P1' for a in alerts), \
f"R4 未觸發alerts={alerts}"
def test_rule5_gcp_hit_low(self):
"""R5: GCP Ollama 命中率 < 90% → P2 (但需有 Ollama 流量)"""
trends = _make_trends()
trends['today_gcp_hit_pct'] = 70.0
alerts = svc._detect_alerts(_make_summary(), _make_by_provider(), [],
trends, _make_budgets(), _make_cache_stats())
assert any('GCP Ollama 命中率' in a['title'] for a in alerts), \
f"R5 未觸發alerts={alerts}"
def test_rule6_claude_cache_low(self):
"""R6: Claude cache 命中率 < 40% (≥10 calls 才檢查) → INFO"""
cache = _make_cache_stats(claude={'total': 100, 'hits': 20, 'pct': 20.0})
alerts = svc._detect_alerts(_make_summary(), _make_by_provider(), [],
_make_trends(), _make_budgets(), cache)
assert any('Claude prompt cache' in a['title'] for a in alerts), \
f"R6 未觸發alerts={alerts}"
def test_rule6_claude_cache_low_skipped_when_few_calls(self):
"""R6 邊界:< 10 calls 時不應觸發告警(樣本不足)"""
cache = _make_cache_stats(claude={'total': 5, 'hits': 0, 'pct': 0.0})
alerts = svc._detect_alerts(_make_summary(), _make_by_provider(), [],
_make_trends(), _make_budgets(), cache)
cache_alerts = [a for a in alerts if 'Claude prompt cache' in a['title']]
assert not cache_alerts, "樣本不足時不應告警"
def test_no_alerts_when_healthy(self):
"""健康狀態下應無 P1/P2 告警。"""
alerts = svc._detect_alerts(_make_summary(), _make_by_provider(),
_make_top_callers()[:2], # 不含 +42% spike
_make_trends(), _make_budgets(),
_make_cache_stats())
critical = [a for a in alerts if a['level'] in ('P1', 'P2')]
assert not critical, f"健康狀態不應有 P1/P2 告警;得到:{critical}"
# ─────────────────────────────────────────────────────────────────────────────
# 3. 智能建議測試 — _generate_insights
# ─────────────────────────────────────────────────────────────────────────────
class TestInsights:
def test_ollama_first_target_met(self):
"""Ollama 占比 ≥ 60% → 應含「達標」建議。"""
insights = svc._generate_insights(TARGET_DATE,
_make_summary(ollama_pct=64.3),
_make_by_provider())
assert any('達標' in i['text'] for i in insights)
def test_ollama_first_target_missed(self):
"""Ollama 占比 < 60% → 應含「未達」建議。"""
insights = svc._generate_insights(TARGET_DATE,
_make_summary(ollama_pct=45.0),
_make_by_provider())
assert any('未達' in i['text'] for i in insights)
def test_nim_low_usage_suggestion(self):
"""NIM 用量 < 100K 時應建議下線 NIM。"""
prov = _make_by_provider()
for p in prov:
if p['provider'] in ('nim', 'nim_via_elephant'):
p['tokens'] = 5000
insights = svc._generate_insights(TARGET_DATE, _make_summary(), prov)
assert any('NIM 用量' in i['text'] for i in insights)
# ─────────────────────────────────────────────────────────────────────────────
# 4. SQL 查詢測試 — mock _exec_query 驗證 SQL 結構正確
# ─────────────────────────────────────────────────────────────────────────────
class TestQueriesViaMock:
"""mock _exec_query 確認查詢函數呼叫順序與參數正確。"""
def test_query_summary_calls_two_windows(self, monkeypatch):
"""_query_summary 應分別查今日 + 昨日(共 2 次 SQL"""
captured: List[Dict] = []
def fake_exec(sql, params):
captured.append({'sql_head': sql.strip().split('\n')[0],
'params': dict(params)})
# 第 1 次回今日資料;第 2 次回昨日資料
if 'COUNT(*)' in sql:
return [{'total_tokens': 100_000, 'total_calls': 50,
'total_cost_usd': 0.5, 'avg_duration_ms': 1500,
'ok_calls': 49, 'ollama_tokens': 70_000}]
return [{'prev_total_tokens': 90_000}]
monkeypatch.setattr(svc, '_exec_query', fake_exec)
result = svc._query_summary(TARGET_DATE)
assert len(captured) == 2
# 第二次查詢的 end 應等於第一次的 start昨日窗
assert captured[1]['params']['end'] == captured[0]['params']['start']
assert result['total_tokens'] == 100_000
assert result['ollama_pct'] == pytest.approx(70.0, rel=0.01)
assert result['success_rate'] == pytest.approx(98.0, rel=0.01)
assert result['failed_calls'] == 1
assert result['wow_pct'] == pytest.approx(11.11, rel=0.01)
def test_query_by_provider_returns_all_eight_providers(self, monkeypatch):
"""即使只有 1 個 provider 有資料,也要回傳 8 個 provider0 占位)。
critic-A11 B4 修補:補 ollama_secondary 後從 7 → 8 個(三主機架構一致性)。
"""
def fake_exec(sql, params):
return [{'provider': 'gcp_ollama', 'tokens': 1000, 'calls': 5,
'cost_usd': 0.0, 'avg_duration_ms': 1000}]
monkeypatch.setattr(svc, '_exec_query', fake_exec)
result = svc._query_by_provider(TARGET_DATE)
assert len(result) == 8
gcp = next(r for r in result if r['provider'] == 'gcp_ollama')
assert gcp['tokens'] == 1000
secondary = next(r for r in result if r['provider'] == 'ollama_secondary')
assert secondary['tokens'] == 0 # 沒資料應給 0
gemini = next(r for r in result if r['provider'] == 'gemini')
assert gemini['tokens'] == 0 # 沒資料應給 0
def test_query_top_callers_orders_by_tokens(self, monkeypatch):
def fake_exec(sql, params):
return [
{'caller': 'a', 'provider': 'gcp_ollama', 'top_model': 'm1',
'tokens': 500, 'calls': 5, 'avg_tokens_7d': 400},
{'caller': 'b', 'provider': 'gemini', 'top_model': 'm2',
'tokens': 200, 'calls': 2, 'avg_tokens_7d': 0},
]
monkeypatch.setattr(svc, '_exec_query', fake_exec)
result = svc._query_top_callers(TARGET_DATE, limit=10)
assert len(result) == 2
assert result[0]['caller'] == 'a'
# delta = (500-400)/400 = 25%
assert result[0]['delta_pct'] == pytest.approx(25.0, rel=0.01)
# baseline=0 → delta_pct=None避免除 0
assert result[1]['delta_pct'] is None
def test_query_cost_breakdown_filters_zero_cost(self, monkeypatch):
"""Ollama 等成本 0 的 model 不應出現在拆解中。"""
captured = []
def fake_exec(sql, params):
captured.append(sql)
return []
monkeypatch.setattr(svc, '_exec_query', fake_exec)
svc._query_cost_breakdown(TARGET_DATE)
assert 'cost_usd > 0' in captured[0]
# ─────────────────────────────────────────────────────────────────────────────
# 5. send_daily_report 整合 — mock 整條鏈
# ─────────────────────────────────────────────────────────────────────────────
class TestSendDailyReport:
def test_send_happy_path(self, monkeypatch):
"""整條鏈走通generate → send → persist 都被呼叫。"""
monkeypatch.setattr(svc, 'generate_daily_report', lambda d: '<b>OK</b>')
sent_calls = []
def fake_send(text, **kwargs):
sent_calls.append({'text': text, 'kwargs': kwargs})
return {'ok': True, 'sent': 1, 'failed': 0, 'chat_ids': [-1], 'errors': []}
# mock telegram_templates.send_telegram_with_result
import services.telegram_templates as tg
monkeypatch.setattr(tg, 'send_telegram_with_result', fake_send)
persist_calls = []
monkeypatch.setattr(svc, '_persist_to_ai_insights',
lambda d, c, r: persist_calls.append((d, c, r)))
result = svc.send_daily_report(TARGET_DATE)
assert result['ok'] is True
assert result['sent'] == 1
assert len(sent_calls) == 1
assert sent_calls[0]['kwargs'].get('parse_mode') == 'HTML'
assert len(persist_calls) == 1
assert persist_calls[0][0] == TARGET_DATE
def test_send_truncates_oversized_message(self, monkeypatch):
"""訊息 > 4000 應自動截斷並加省略尾。"""
big = 'X' * 5000
monkeypatch.setattr(svc, 'generate_daily_report', lambda d: big)
captured_text = []
def fake_send(text, **kwargs):
captured_text.append(text)
return {'ok': True, 'sent': 1, 'failed': 0, 'chat_ids': [], 'errors': []}
import services.telegram_templates as tg
monkeypatch.setattr(tg, 'send_telegram_with_result', fake_send)
monkeypatch.setattr(svc, '_persist_to_ai_insights', lambda *a, **k: None)
svc.send_daily_report(TARGET_DATE)
assert len(captured_text) == 1
assert len(captured_text[0]) <= svc._TELEGRAM_MAX_CHARS
assert '截斷' in captured_text[0]
def test_send_resilient_to_telegram_failure(self, monkeypatch):
"""Telegram 送失敗時 send_daily_report 仍應回 dict不爆"""
monkeypatch.setattr(svc, 'generate_daily_report', lambda d: 'msg')
def boom(text, **kwargs):
raise RuntimeError("network down")
import services.telegram_templates as tg
monkeypatch.setattr(tg, 'send_telegram_with_result', boom)
monkeypatch.setattr(svc, '_persist_to_ai_insights', lambda *a, **k: None)
result = svc.send_daily_report(TARGET_DATE)
assert result['ok'] is False
assert any('telegram' in e for e in result['errors'])
def test_generate_returns_failure_msg_when_db_dies(self, monkeypatch):
"""DB 例外時 generate_daily_report 應回 fallback 字串而不是丟 exception。"""
def boom(*a, **kw):
raise RuntimeError("DB connection refused")
monkeypatch.setattr(svc, '_query_summary', boom)
out = svc.generate_daily_report(TARGET_DATE)
assert '日報生成失敗' in out
assert '<code>' in out # fallback 訊息含 escape 過的錯誤
# ─────────────────────────────────────────────────────────────────────────────
# 6. telegram_templates.daily_token_report 包裝測試
# ─────────────────────────────────────────────────────────────────────────────
class TestTelegramTemplate:
def test_daily_token_report_appends_footer(self):
from services.telegram_templates import daily_token_report
out = daily_token_report("body", footer_url="http://x/y")
assert 'body' in out
assert 'http://x/y' in out
def test_daily_token_report_truncates_to_4096(self):
from services.telegram_templates import daily_token_report
big = 'A' * 5000
out = daily_token_report(big)
assert len(out) <= 4096
assert '截斷' in out
def test_daily_token_report_escapes_footer_url(self):
"""footer_url 含特殊字元應被 escape。"""
from services.telegram_templates import daily_token_report
out = daily_token_report("body", footer_url="http://x/?a=1&b=<2>")
assert '<2>' not in out # 應 escape
assert '&amp;' in out or '&lt;2&gt;' in out
# ─────────────────────────────────────────────────────────────────────────────
# 7. 格式化工具測試
# ─────────────────────────────────────────────────────────────────────────────
class TestFormatHelpers:
def test_fmt_kb(self):
assert svc._fmt_kb(0) == '0'
assert svc._fmt_kb(500) == '500'
assert svc._fmt_kb(1500) == '2K' # round
assert svc._fmt_kb(2_021_000) == '2.0M'
def test_esc_handles_none(self):
assert svc._esc(None) == ''
assert svc._esc('<a>') == '&lt;a&gt;'
assert svc._esc('a&b') == 'a&amp;b'
def test_budget_line_zero_budget(self):
line = svc._budget_line("📅 本日", 0.5, 0.0)
assert '未設定預算' in line
def test_trend_line_handles_zero_baseline(self):
line = svc._trend_line("X", 100.0, 0.0)
assert '' in line # 無基準應顯示「—」