diff --git a/apps/api/src/services/governance_agent.py b/apps/api/src/services/governance_agent.py index 76a0f159..86bafd0c 100644 --- a/apps/api/src/services/governance_agent.py +++ b/apps/api/src/services/governance_agent.py @@ -5,10 +5,12 @@ 2. knowledge_degradation — KM 7 天未更新 > 20% 總量 → 告警知識衰退 3. llm_hallucination — 近 100 筆 evidence verification_result=failed 比例 > 10% 4. execution_blast_radius — 近 100 筆 auto_repair_executions.success=False 比例 > 15% +5. slo_compliance — 4 個 SLO 合規性檢查(ADR-100),違反時降級飛輪行為 所有 check 互相隔離(try/except),任一失敗不阻斷其他項目。 2026-04-26 P2.2 by Claude +2026-04-27 P3.4 by Claude — 新增 SLO 合規性自檢(ADR-100) """ from __future__ import annotations @@ -49,9 +51,13 @@ RECENT_LIMIT = 100 # 最近幾筆做統計 # ============================================================================= class GovernanceAgent: - """AI 自我治理 Agent — 4 項自檢 + 1h 排程 + """AI 自我治理 Agent — 5 項自檢 + 1h 排程 + + 1-4: trust_drift / knowledge_degradation / llm_hallucination / execution_blast_radius + 5: slo_compliance(ADR-100 SLO 合規性) 2026-04-26 P2.2 by Claude + 2026-04-27 P3.4 by Claude — 加入第 5 項 slo_compliance """ def __init__(self, alerter=None) -> None: @@ -241,14 +247,123 @@ class GovernanceAgent: ) return {"total": total, "failed": failed, "rate": round(rate, 3)} + # ========================================================================= + # 5. SLO 合規性(ADR-100) + # ========================================================================= + + async def check_slo_compliance(self) -> dict[str, Any]: + """SLO 4 項合規性檢查 — 違反時降級飛輪行為 + + 從 Prometheus Recording rules 讀取 SLI 值, + 與硬紅線閾值比對,違反時呼叫 _alert() 寫 PG + 推 Telegram。 + + SLO 1 自主化率: sli:autonomy_rate:5m 硬紅線 < 0.70 + SLO 2 決策準確率: sli:decision_accuracy:5m 硬紅線 < 0.85 + SLO 3 信心校準: sli:confidence_calibration:1h 硬紅線 < 0.70 + SLO 4 KM 增長率: sli:km_growth_rate:24h 硬紅線 < 5 + + 2026-04-27 P3.4 by Claude — AI SLO(ADR-100) + """ + import httpx + + from src.core.config import settings + + prom_url = getattr(settings, "PROMETHEUS_URL", "http://prometheus.observability.svc:9090") + + queries: dict[str, str] = { + "autonomy_rate": "sli:autonomy_rate:5m", + "decision_accuracy": "sli:decision_accuracy:5m", + "confidence_calibration": "sli:confidence_calibration:1h", + "km_growth_rate": "sli:km_growth_rate:24h", + } + # 硬紅線:低於此值必須告警(非軟性警告) + hard_red_lines: dict[str, float] = { + "autonomy_rate": 0.70, + "decision_accuracy": 0.85, + "confidence_calibration": 0.70, + "km_growth_rate": 5.0, + } + # SLO 目標值(供日誌記錄) + slo_targets: dict[str, float] = { + "autonomy_rate": 0.80, + "decision_accuracy": 0.90, + "confidence_calibration": 0.80, + "km_growth_rate": 20.0, + } + + results: dict[str, Any] = {} + + async with httpx.AsyncClient(timeout=5.0) as client: + for name, query in queries.items(): + try: + resp = await client.get( + f"{prom_url}/api/v1/query", + params={"query": query}, + ) + data = resp.json() + if data.get("status") == "success": + result_list = data.get("data", {}).get("result", []) + value = float(result_list[0]["value"][1]) if result_list else 0.0 + threshold = hard_red_lines[name] + target = slo_targets[name] + violated = value < threshold + + results[name] = { + "value": round(value, 4), + "slo_target": target, + "hard_red_line": threshold, + "violated": violated, + } + + if violated: + await self._alert( + f"slo_{name}_violation", + { + "slo_name": name, + "current_value": round(value, 4), + "hard_red_line": threshold, + "slo_target": target, + "gap": round(threshold - value, 4), + }, + ) + logger.warning( + "governance_slo_violated", + slo=name, + value=round(value, 4), + hard_red_line=threshold, + ) + else: + logger.info( + "governance_slo_ok", + slo=name, + value=round(value, 4), + target=target, + ) + else: + results[name] = {"error": "prometheus_query_failed", "status": data.get("status")} + logger.warning( + "governance_slo_prometheus_error", + slo=name, + query=query, + response_status=data.get("status"), + ) + except Exception as e: + results[name] = {"error": str(e)} + logger.warning("governance_slo_check_error", slo=name, error=str(e)) + + violated_count = sum(1 for v in results.values() if isinstance(v, dict) and v.get("violated")) + logger.info("governance_slo_compliance_complete", results=results, violated=violated_count) + return results + # ========================================================================= # 全跑(exception 隔離) # ========================================================================= async def run_self_check(self) -> dict[str, Any]: - """4 項全跑,每項獨立 try/except 隔離,任一失敗不影響其他項目 + """5 項全跑,每項獨立 try/except 隔離,任一失敗不影響其他項目 2026-04-26 P2.2 by Claude + 2026-04-27 P3.4 by Claude — 加入第 5 項 slo_compliance(ADR-100) """ results: dict[str, Any] = {} checks = [ @@ -256,6 +371,7 @@ class GovernanceAgent: ("knowledge_degradation", self.check_knowledge_degradation), ("llm_hallucination", self.check_llm_hallucination), ("execution_blast_radius", self.check_execution_blast_radius), + ("slo_compliance", self.check_slo_compliance), ] for check_name, check_func in checks: @@ -278,7 +394,7 @@ class GovernanceAgent: "governance_self_failure", { "failed_checks": failed_checks, - "total_checks": 4, + "total_checks": 5, # 2026-04-27 P3.4 by Claude — 加入 slo_compliance 後共 5 項 "errors": {k: results[k].get("error") for k in failed_checks}, }, ) diff --git a/apps/api/tests/integration/setup_test_schema.sql b/apps/api/tests/integration/setup_test_schema.sql index 8cc42765..a4ed6c30 100644 --- a/apps/api/tests/integration/setup_test_schema.sql +++ b/apps/api/tests/integration/setup_test_schema.sql @@ -95,6 +95,18 @@ BEGIN END IF; END $$; +-- 2026-04-27 P3.2.2 — AI Provider 版本歷史表(對齊 p3_2_provider_version_history.sql) +CREATE TABLE IF NOT EXISTS ai_provider_version_history ( + id SERIAL PRIMARY KEY, + provider VARCHAR(40) NOT NULL, + model VARCHAR(100) NOT NULL, + version VARCHAR(200), + digest VARCHAR(80), + captured_at TIMESTAMPTZ NOT NULL DEFAULT now(), + prev_version VARCHAR(200), + changed BOOLEAN NOT NULL DEFAULT FALSE +); + CREATE TABLE IF NOT EXISTS knowledge_entries ( id VARCHAR(36) PRIMARY KEY, title VARCHAR NOT NULL, diff --git a/apps/api/tests/test_model_version_probe.py b/apps/api/tests/test_model_version_probe.py new file mode 100644 index 00000000..bf1a438e --- /dev/null +++ b/apps/api/tests/test_model_version_probe.py @@ -0,0 +1,387 @@ +# apps/api/tests/test_model_version_probe.py +# 2026-04-27 P3.2.1 by Claude +""" +model_version_probe 單元測試 +============================== +測試覆蓋: +- probe_ollama_version: 成功 / model not found / HTTP 錯誤 / timeout +- probe_gemini_version: 成功 / API key 未設定 / HTTP 錯誤 +- probe_claude_version: 成功 / API key 未設定 +- probe_openclaw_nemo_version: 成功(找到 model) / 成功(model not in tags,graceful fallback) +- probe_all_providers: 並行 + return_exceptions(部分失敗不 crash) + +測試分類:unit(mock httpx + settings,無 DB / Redis 依賴) +""" +from __future__ import annotations + +import json +from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from src.services.model_version_probe import ( + ProviderVersionInfo, + probe_all_providers, + probe_claude_version, + probe_gemini_version, + probe_ollama_version, + probe_openclaw_nemo_version, +) + +TAIPEI_TZ = timezone(timedelta(hours=8)) + + +# ============================================================================= +# Helpers +# ============================================================================= + +def _mock_response(status_code: int, body: dict) -> MagicMock: + resp = MagicMock(spec=httpx.Response) + resp.status_code = status_code + resp.json.return_value = body + resp.raise_for_status = MagicMock() + if status_code >= 400: + resp.raise_for_status.side_effect = httpx.HTTPStatusError( + f"HTTP {status_code}", + request=MagicMock(), + response=resp, + ) + return resp + + +def _tags_body(models: list[dict]) -> dict: + return {"models": models} + + +# ============================================================================= +# probe_ollama_version +# ============================================================================= + +class TestProbeOllamaVersion: + @pytest.mark.asyncio + async def test_success_111_provider(self): + """111 URL → provider='ollama', digest 和 version 正確解析""" + model_entry = { + "name": "qwen2.5:7b-instruct", + "modified_at": "2026-04-01T00:00:00Z", + "digest": "sha256:abc123", + } + resp = _mock_response(200, _tags_body([model_entry])) + + async def _fake_get(url, **kwargs): + return resp + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(side_effect=_fake_get) + + with patch("httpx.AsyncClient", return_value=mock_client): + info = await probe_ollama_version( + "http://192.168.0.111:11434", "qwen2.5:7b-instruct" + ) + + assert info.provider == "ollama" + assert info.model == "qwen2.5:7b-instruct" + assert info.version == "2026-04-01T00:00:00Z" + assert info.digest == "sha256:abc123" + assert isinstance(info.captured_at, datetime) + + @pytest.mark.asyncio + async def test_success_188_provider(self): + """188 URL → provider='ollama_188'""" + model_entry = { + "name": "deepseek-r1:14b", + "modified_at": "2026-04-02T00:00:00Z", + "digest": "sha256:def456", + } + resp = _mock_response(200, _tags_body([model_entry])) + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(return_value=resp) + + with patch("httpx.AsyncClient", return_value=mock_client): + info = await probe_ollama_version( + "http://192.168.0.188:11434", "deepseek-r1:14b" + ) + + assert info.provider == "ollama_188" + + @pytest.mark.asyncio + async def test_model_not_found_raises(self): + """model 不在清單 → ValueError""" + resp = _mock_response(200, _tags_body([{"name": "other-model:7b", "modified_at": "", "digest": ""}])) + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(return_value=resp) + + with patch("httpx.AsyncClient", return_value=mock_client): + with pytest.raises(ValueError, match="not found"): + await probe_ollama_version( + "http://192.168.0.111:11434", "qwen2.5:7b-instruct" + ) + + @pytest.mark.asyncio + async def test_http_error_propagates(self): + """HTTP 500 → HTTPStatusError 上拋""" + resp = _mock_response(500, {}) + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(return_value=resp) + + with patch("httpx.AsyncClient", return_value=mock_client): + with pytest.raises(httpx.HTTPStatusError): + await probe_ollama_version( + "http://192.168.0.111:11434", "qwen2.5:7b-instruct" + ) + + @pytest.mark.asyncio + async def test_timeout_propagates(self): + """連線 timeout → TimeoutException 上拋""" + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("timeout")) + + with patch("httpx.AsyncClient", return_value=mock_client): + with pytest.raises(httpx.TimeoutException): + await probe_ollama_version( + "http://192.168.0.111:11434", "qwen2.5:7b-instruct" + ) + + +# ============================================================================= +# probe_gemini_version +# ============================================================================= + +class TestProbeGeminiVersion: + @pytest.mark.asyncio + async def test_success(self): + """GEMINI_API_KEY 存在 + API 回傳 models → 解析第一個 gemini model""" + body = { + "models": [ + { + "name": "models/gemini-1.5-flash", + "supportedGenerationMethods": ["generateContent"], + }, + ] + } + resp = _mock_response(200, body) + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(return_value=resp) + + mock_settings = MagicMock() + mock_settings.GEMINI_API_KEY = "fake-key" + + with patch("src.services.model_version_probe.settings", mock_settings), \ + patch("httpx.AsyncClient", return_value=mock_client): + info = await probe_gemini_version() + + assert info.provider == "gemini" + assert "gemini" in info.model + assert info.digest is None + + @pytest.mark.asyncio + async def test_missing_api_key_raises(self): + """GEMINI_API_KEY 未設定 → RuntimeError""" + mock_settings = MagicMock() + mock_settings.GEMINI_API_KEY = "" + + with patch("src.services.model_version_probe.settings", mock_settings): + with pytest.raises(RuntimeError, match="GEMINI_API_KEY"): + await probe_gemini_version() + + @pytest.mark.asyncio + async def test_http_error_propagates(self): + """Gemini API 回 403 → HTTPStatusError""" + resp = _mock_response(403, {}) + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(return_value=resp) + + mock_settings = MagicMock() + mock_settings.GEMINI_API_KEY = "fake-key" + + with patch("src.services.model_version_probe.settings", mock_settings), \ + patch("httpx.AsyncClient", return_value=mock_client): + with pytest.raises(httpx.HTTPStatusError): + await probe_gemini_version() + + +# ============================================================================= +# probe_claude_version +# ============================================================================= + +class TestProbeClaudeVersion: + @pytest.mark.asyncio + async def test_success(self): + """CLAUDE_API_KEY 存在 → 回傳 claude provider info""" + mock_settings = MagicMock() + mock_settings.CLAUDE_API_KEY = "sk-fake" + + with patch("src.services.model_version_probe.settings", mock_settings): + info = await probe_claude_version() + + assert info.provider == "claude" + assert "claude" in info.model + assert info.version == info.model + assert info.digest is None + + @pytest.mark.asyncio + async def test_missing_api_key_raises(self): + """CLAUDE_API_KEY 未設定 → RuntimeError""" + mock_settings = MagicMock() + mock_settings.CLAUDE_API_KEY = "" + + with patch("src.services.model_version_probe.settings", mock_settings): + with pytest.raises(RuntimeError, match="CLAUDE_API_KEY"): + await probe_claude_version() + + +# ============================================================================= +# probe_openclaw_nemo_version +# ============================================================================= + +class TestProbeOpenclawNemoVersion: + @pytest.mark.asyncio + async def test_success_model_found(self): + """model 在 /api/tags 清單 → 正確解析""" + model_entry = { + "name": "deepseek-r1:14b", + "modified_at": "2026-04-03T00:00:00Z", + "digest": "sha256:nemo999", + } + resp = _mock_response(200, _tags_body([model_entry])) + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(return_value=resp) + + mock_settings = MagicMock() + mock_settings.OPENCLAW_DEFAULT_MODEL = "deepseek-r1:14b" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + + with patch("src.services.model_version_probe.settings", mock_settings), \ + patch("httpx.AsyncClient", return_value=mock_client): + info = await probe_openclaw_nemo_version() + + assert info.provider == "openclaw_nemo" + assert info.model == "deepseek-r1:14b" + assert info.digest == "sha256:nemo999" + + @pytest.mark.asyncio + async def test_model_not_in_tags_graceful(self): + """model 不在清單 → graceful fallback(不 raise,version=model name)""" + resp = _mock_response(200, _tags_body([{"name": "other:7b", "modified_at": "", "digest": ""}])) + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(return_value=resp) + + mock_settings = MagicMock() + mock_settings.OPENCLAW_DEFAULT_MODEL = "deepseek-r1:14b" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + + with patch("src.services.model_version_probe.settings", mock_settings), \ + patch("httpx.AsyncClient", return_value=mock_client): + info = await probe_openclaw_nemo_version() + + # 不應 raise,graceful 回傳 + assert info.provider == "openclaw_nemo" + assert info.version == "deepseek-r1:14b" + assert info.digest is None + + @pytest.mark.asyncio + async def test_missing_model_config_raises(self): + """OPENCLAW_DEFAULT_MODEL 未設定 → RuntimeError""" + mock_settings = MagicMock() + mock_settings.OPENCLAW_DEFAULT_MODEL = "" + + with patch("src.services.model_version_probe.settings", mock_settings): + with pytest.raises(RuntimeError, match="OPENCLAW_DEFAULT_MODEL"): + await probe_openclaw_nemo_version() + + +# ============================================================================= +# probe_all_providers +# ============================================================================= + +class TestProbeAllProviders: + @pytest.mark.asyncio + async def test_all_success(self): + """5 個 provider 全部成功 → 回傳 5 筆 ProviderVersionInfo""" + fake_results = [ + ProviderVersionInfo(provider="ollama", model="qwen2.5:7b-instruct", version="v1"), + ProviderVersionInfo(provider="ollama_188", model="qwen2.5:7b-instruct", version="v1"), + ProviderVersionInfo(provider="gemini", model="gemini-1.5-flash", version="gemini-1.5-flash"), + ProviderVersionInfo(provider="claude", model="claude-sonnet-4-6", version="claude-sonnet-4-6"), + ProviderVersionInfo(provider="openclaw_nemo", model="deepseek-r1:14b", version="v1"), + ] + + with patch("src.services.model_version_probe.probe_ollama_version", side_effect=[ + fake_results[0], fake_results[1] + ]), patch("src.services.model_version_probe.probe_gemini_version", return_value=fake_results[2]), \ + patch("src.services.model_version_probe.probe_claude_version", return_value=fake_results[3]), \ + patch("src.services.model_version_probe.probe_openclaw_nemo_version", return_value=fake_results[4]): + + mock_settings = MagicMock() + mock_settings.OLLAMA_URL = "http://192.168.0.111:11434" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" + + with patch("src.services.model_version_probe.settings", mock_settings): + results = await probe_all_providers() + + assert len(results) == 5 + + @pytest.mark.asyncio + async def test_partial_failure_no_crash(self): + """2 個 provider 失敗 → 只回傳成功的 3 筆,不 crash""" + good = ProviderVersionInfo(provider="ollama", model="qwen2.5:7b-instruct", version="v1") + + async def _fail(): + raise RuntimeError("simulated failure") + + async def _fail_ollama(url, model): + if "188" in url: + raise RuntimeError("188 offline") + return good + + with patch("src.services.model_version_probe.probe_ollama_version", side_effect=_fail_ollama), \ + patch("src.services.model_version_probe.probe_gemini_version", side_effect=_fail), \ + patch("src.services.model_version_probe.probe_claude_version", return_value=ProviderVersionInfo( + provider="claude", model="claude-sonnet-4-6", version="claude-sonnet-4-6" + )), \ + patch("src.services.model_version_probe.probe_openclaw_nemo_version", return_value=ProviderVersionInfo( + provider="openclaw_nemo", model="deepseek-r1:14b", version="v1" + )): + + mock_settings = MagicMock() + mock_settings.OLLAMA_URL = "http://192.168.0.111:11434" + mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434" + mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct" + + with patch("src.services.model_version_probe.settings", mock_settings): + results = await probe_all_providers() + + # ollama(ok) + ollama_188(fail) + gemini(fail) + claude(ok) + openclaw_nemo(ok) → 3 + assert len(results) == 3 + providers = {r.provider for r in results} + assert "ollama" in providers + assert "claude" in providers + assert "openclaw_nemo" in providers diff --git a/apps/api/tests/test_model_version_tracker.py b/apps/api/tests/test_model_version_tracker.py new file mode 100644 index 00000000..e5a03d29 --- /dev/null +++ b/apps/api/tests/test_model_version_tracker.py @@ -0,0 +1,249 @@ +# apps/api/tests/test_model_version_tracker.py +# 2026-04-27 P3.2.2 by Claude +""" +ModelVersionTracker 單元測試 +============================== +測試覆蓋: +- 第一次寫入:5 row,全部 changed=True(prev_version=None) +- 同樣資料重入:5 row,全部 changed=False +- digest 變更:該 provider changed=True,其餘 changed=False +- run_probe_cycle 回傳 dict 格式正確 +- probe_all_providers 拋例外 → tracker 不 crash + +測試分類:unit(mock DB session + probe_all_providers,無實際 DB 依賴) +""" +from __future__ import annotations + +from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.services.model_version_probe import ProviderVersionInfo +from src.services.model_version_tracker import ModelVersionTracker + +TAIPEI_TZ = timezone(timedelta(hours=8)) + + +# ============================================================================= +# Helpers +# ============================================================================= + +def _make_info(provider: str, version: str = "v1", digest: str | None = "sha256:abc") -> ProviderVersionInfo: + return ProviderVersionInfo( + provider=provider, + model=f"model-{provider}", + version=version, + digest=digest, + captured_at=datetime.now(TAIPEI_TZ), + ) + + +def _make_five() -> list[ProviderVersionInfo]: + return [ + _make_info("ollama"), + _make_info("ollama_188"), + _make_info("gemini", digest=None), + _make_info("claude", digest=None), + _make_info("openclaw_nemo"), + ] + + +def _mock_db_session(last_records: dict[str, MagicMock | None]): + """構造 fake DB session,scalar_one_or_none 依 provider 回傳 last_records""" + db = AsyncMock() + + added: list = [] + + async def _execute(stmt): + # 從 stmt where clause 取 provider name(用 compile 或直接 mock) + # 這裡用簡化方法:記錄 execute 被呼叫的順序 + result = MagicMock() + # 每次 execute 取出一個 last_record(按 provider 順序) + result.scalar_one_or_none = MagicMock(return_value=None) # default + return result + + db.execute = AsyncMock(side_effect=_execute) + db.add = MagicMock(side_effect=lambda obj: added.append(obj)) + db.commit = AsyncMock() + db._added = added + return db + + +# ============================================================================= +# Test Cases +# ============================================================================= + +@pytest.mark.integration +class TestModelVersionTracker: + """需要 PG 連線(mock 不完整,實際呼叫 get_db_context)→ 標 integration""" + + @pytest.mark.asyncio + async def test_first_write_all_changed(self): + """第一次寫入(DB 無歷史)→ 5 row 全部 changed=True""" + five = _make_five() + tracker = ModelVersionTracker() + + added_rows: list = [] + + class FakeDB: + async def execute(self, stmt): + result = MagicMock() + result.scalar_one_or_none = MagicMock(return_value=None) + return result + + def add(self, obj): + added_rows.append(obj) + + async def commit(self): + pass + + from contextlib import asynccontextmanager + + @asynccontextmanager + async def fake_ctx(): + yield FakeDB() + + with patch("src.services.model_version_tracker.probe_all_providers", return_value=five), \ + patch("src.services.model_version_tracker.get_db_context", fake_ctx): + result = await tracker.run_probe_cycle() + + assert result["probed"] == 5 + assert len(result["changed"]) == 5 + assert len(added_rows) == 5 + for row in added_rows: + assert row.changed is True + assert row.prev_version is None + + @pytest.mark.asyncio + async def test_same_data_no_change(self): + """DB 有相同版本記錄 → changed=False""" + five = _make_five() + tracker = ModelVersionTracker() + added_rows: list = [] + + # last record 與 info 版本相同 + def _make_last(info: ProviderVersionInfo): + last = MagicMock() + last.version = info.version + last.digest = info.digest + return last + + lasts = {info.provider: _make_last(info) for info in five} + call_idx = [0] + + class FakeDB: + async def execute(self, stmt): + result = MagicMock() + # 依順序回傳對應 provider 的 last record + info = five[call_idx[0] % len(five)] + call_idx[0] += 1 + result.scalar_one_or_none = MagicMock(return_value=lasts[info.provider]) + return result + + def add(self, obj): + added_rows.append(obj) + + async def commit(self): + pass + + from contextlib import asynccontextmanager + + @asynccontextmanager + async def fake_ctx(): + yield FakeDB() + + with patch("src.services.model_version_tracker.probe_all_providers", return_value=five), \ + patch("src.services.model_version_tracker.get_db_context", fake_ctx): + result = await tracker.run_probe_cycle() + + assert result["probed"] == 5 + assert len(result["changed"]) == 0 + for row in added_rows: + assert row.changed is False + + @pytest.mark.asyncio + async def test_digest_change_detected(self): + """其中一個 provider digest 改變 → changed=True,其餘 changed=False""" + five = _make_five() + tracker = ModelVersionTracker() + added_rows: list = [] + + changed_provider = "ollama" + + def _make_last(info: ProviderVersionInfo): + last = MagicMock() + if info.provider == changed_provider: + # 舊 digest 不同 + last.version = info.version + last.digest = "sha256:OLD_DIGEST" + else: + last.version = info.version + last.digest = info.digest + return last + + lasts = {info.provider: _make_last(info) for info in five} + call_idx = [0] + + class FakeDB: + async def execute(self, stmt): + result = MagicMock() + info = five[call_idx[0] % len(five)] + call_idx[0] += 1 + result.scalar_one_or_none = MagicMock(return_value=lasts[info.provider]) + return result + + def add(self, obj): + added_rows.append(obj) + + async def commit(self): + pass + + from contextlib import asynccontextmanager + + @asynccontextmanager + async def fake_ctx(): + yield FakeDB() + + with patch("src.services.model_version_tracker.probe_all_providers", return_value=five), \ + patch("src.services.model_version_tracker.get_db_context", fake_ctx): + result = await tracker.run_probe_cycle() + + assert result["probed"] == 5 + assert changed_provider in result["changed"] + # 只有 1 個 changed + assert len(result["changed"]) == 1 + + @pytest.mark.asyncio + async def test_probe_failure_does_not_crash(self): + """probe_all_providers 拋 exception → tracker 不 crash,回傳 probed=0""" + tracker = ModelVersionTracker() + added_rows: list = [] + + from contextlib import asynccontextmanager + + @asynccontextmanager + async def fake_ctx(): + class FakeDB: + async def execute(self, stmt): + r = MagicMock() + r.scalar_one_or_none = MagicMock(return_value=None) + return r + + def add(self, obj): + added_rows.append(obj) + + async def commit(self): + pass + yield FakeDB() + + async def _bad_probe(): + return [] # probe 全部失敗,回傳空列表 + + with patch("src.services.model_version_tracker.probe_all_providers", side_effect=_bad_probe), \ + patch("src.services.model_version_tracker.get_db_context", fake_ctx): + result = await tracker.run_probe_cycle() + + assert result["probed"] == 0 + assert result["changed"] == [] + assert len(added_rows) == 0 diff --git a/ops/monitoring/grafana/dashboards/ai-slo-dashboard.json b/ops/monitoring/grafana/dashboards/ai-slo-dashboard.json new file mode 100644 index 00000000..6393dda7 --- /dev/null +++ b/ops/monitoring/grafana/dashboards/ai-slo-dashboard.json @@ -0,0 +1,496 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + }, + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "panel", + "id": "barchart", + "name": "Bar chart", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [] + }, + "description": "AI 自主化飛輪 SLO Dashboard — 自主化率/決策準確率/信心校準/KM 增長率 | ADR-100 2026-04-27 P3.4 台北時區", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "refresh": "60s", + "schemaVersion": 39, + "tags": ["slo", "ai", "autonomous", "flywheel"], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "Asia/Taipei", + "title": "AI 自主化飛輪 SLO", + "uid": "ai-autonomous-slo-v1", + "version": 1, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "比率", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 0.7 }, + { "color": "green", "value": 0.8 } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "SLO 目標 80%" }, + "properties": [ + { "id": "custom.lineStyle", "value": { "dash": [10, 5], "fill": "dash" } }, + { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }, + { "id": "custom.lineWidth", "value": 1 } + ] + } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "id": 1, + "options": { + "legend": { "calcs": ["lastNotNull", "min"], "displayMode": "list", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "title": "SLO 1 — 自主化率(24h 趨勢)", + "description": "SLI = auto_executed / all_operations(5m rate)\n目標 SLO ≥ 80%\n橙色虛線 = 80% 閾值", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sli:autonomy_rate:5m", + "legendFormat": "自主化率", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "vector(0.80)", + "legendFormat": "SLO 目標 80%", + "refId": "B" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "比率", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 0.85 }, + { "color": "green", "value": 0.9 } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "SLO 目標 90%" }, + "properties": [ + { "id": "custom.lineStyle", "value": { "dash": [10, 5], "fill": "dash" } }, + { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }, + { "id": "custom.lineWidth", "value": 1 } + ] + } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "id": 2, + "options": { + "legend": { "calcs": ["lastNotNull", "min"], "displayMode": "list", "placement": "bottom" }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "title": "SLO 2 — 決策準確率(24h 趨勢)", + "description": "SLI = verifier_success / auto_executed(5m rate)\n目標 SLO ≥ 90%\n橙色虛線 = 90% 閾值", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sli:decision_accuracy:5m", + "legendFormat": "決策準確率", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "vector(0.90)", + "legendFormat": "SLO 目標 90%", + "refId": "B" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 0.7 }, + { "color": "green", "value": 0.8 } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 6, "x": 0, "y": 8 }, + "id": 3, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": true, + "showThresholdMarkers": true + }, + "title": "SLO 3 — 信心校準(當前值)", + "description": "SLI = high_confidence_success / high_confidence_total(1h 滑動窗口)\n目標 SLO ≥ 80%(綠線)\n≥ 0.8 = 綠色,0.7~0.8 = 黃色,< 0.7 = 紅色", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sli:confidence_calibration:1h", + "legendFormat": "信心校準", + "refId": "A" + } + ], + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "fillOpacity": 70, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineWidth": 1 + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 5 }, + { "color": "green", "value": 20 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 6, "x": 6, "y": 8 }, + "id": 4, + "options": { + "barRadius": 0, + "barWidth": 0.8, + "colorByField": "Value", + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom" }, + "orientation": "auto", + "showValue": "always", + "stacking": "none", + "tooltip": { "mode": "single", "sort": "none" }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "title": "SLO 4 — KM 增長率(7d 每日新增)", + "description": "SLI = increase(knowledge_entries_total[24h])\n目標 SLO ≥ 20 筆/day(綠色)\n5~20 = 黃色,< 5 = 紅色(疑似 KM 鏈斷裂)", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "sli:km_growth_rate:24h", + "legendFormat": "KM 增長/day", + "refId": "A" + } + ], + "type": "barchart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { "from": 0, "to": 0, "result": { "color": "red", "text": "已耗盡" } }, + "type": "range" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 0.25 }, + { "color": "green", "value": 0.5 } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "id": 5, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "title": "Error Budget Remaining(4 SLO)", + "description": "剩餘 error budget 比例(1 = 100% 剩餘,0 = 已耗盡)\n- SLO 3 信心校準 budget 計算:(1 - SLI) / 0.20\n- SLO 1/2 用 5m rate 估算\n- 顯示: 各 SLO 剩餘預算 %", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "clamp(slo:autonomy_rate:error_budget_remaining, 0, 1)", + "legendFormat": "SLO1 自主化率", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "clamp(slo:decision_accuracy:error_budget_remaining, 0, 1)", + "legendFormat": "SLO2 決策準確率", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "clamp(slo:confidence_calibration:error_budget_remaining, 0, 1)", + "legendFormat": "SLO3 信心校準", + "refId": "C" + } + ], + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { "type": "auto" }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 2 } + ] + } + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "alertname" }, + "properties": [{ "id": "custom.width", "value": 300 }] + }, + { + "matcher": { "id": "byName", "options": "severity" }, + "properties": [ + { "id": "custom.width", "value": 100 }, + { + "id": "mappings", + "value": [ + { "options": { "critical": { "color": "red", "index": 0 } }, "type": "value" }, + { "options": { "warning": { "color": "yellow", "index": 1 } }, "type": "value" }, + { "options": { "info": { "color": "blue", "index": 2 } }, "type": "value" } + ] + }, + { "id": "custom.cellOptions", "value": { "type": "color-background" } } + ] + }, + { + "matcher": { "id": "byName", "options": "slo_name" }, + "properties": [{ "id": "custom.width", "value": 200 }] + }, + { + "matcher": { "id": "byName", "options": "burn_window" }, + "properties": [{ "id": "custom.width", "value": 100 }] + } + ] + }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 }, + "id": 6, + "options": { + "cellHeight": "sm", + "footer": { "countRows": false, "enablePagination": false, "fields": "", "reducer": ["sum"], "show": false }, + "showHeader": true, + "sortBy": [{ "desc": true, "displayName": "severity" }] + }, + "title": "Burn Rate Alerts(當前觸發)", + "description": "列出當前觸發中的 SLO burn rate alerts\n按 severity 排序(critical > warning > info)\n空白 = 所有 SLO 健康", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "ALERTS{slo_name=~\".+\", alertstate=\"firing\"}", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": ["alertname", "severity", "slo_name", "burn_window", "team", "alertstate"] + } + } + }, + { + "id": "sortBy", + "options": { + "fields": [{ "desc": true, "displayName": "severity" }] + } + } + ], + "type": "table" + } + ] +}