Wave 8 P3.2 模型版本追蹤 + ADR-100 SLO 自我治理 + 配套: P3.2 — Model Version Tracking: - model_version_probe.py (268 行) — 探測 Ollama / OpenRouter 等 provider 的 model version - model_version_tracker.py (101 行) — 對齊 PG provider_version_history 表 - migrations/p3_2_provider_version_history.sql + rollback — 25 行 schema - db/models.py +32 行 — ProviderVersionHistory ORM ADR-100 — AI 自主化 SLO: - docs/adr/ADR-100-ai-autonomous-slo.md (167 行) — 飛輪 SLO 設計與閾值 - ops/monitoring/slo-rules.yml (254 行) — Prometheus SLO recording rules + alerts - ops/monitoring/tests/test_slo_rules.yaml (242 行) — promtool unit tests 整合修改: - main.py +72 行 — Lifespan 啟動 model_version_probe + KB rot cleaner schedule - gitea_webhook.py +45 行 — webhook 接收 model 版本變化通知 - ci_auto_repair.py / evidence_snapshot.py / pre_decision_investigator.py — 配合接線 新測試: - test_kb_rot_cleaner_schedule.py (120 行) — 9 tests pass - test_slo_rules.yaml — promtool 驗收 Tests: 9 passed (test_kb_rot_cleaner_schedule) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Co-Authored-By: Multiple Engineers (P3.2 + ADR-100) <noreply@anthropic.com>
269 lines
9.0 KiB
Python
269 lines
9.0 KiB
Python
"""
|
||
AI Provider 版本探測 — 為每個 Provider 提供 get_version()
|
||
|
||
每個 probe 函數獨立運作,失敗只影響該 provider,不 crash 整批。
|
||
|
||
Provider:
|
||
- ollama : 192.168.0.111 Ollama (primary)
|
||
- ollama_188 : 192.168.0.188 Ollama (fallback)
|
||
- gemini : Google Gemini API (版本 = model name)
|
||
- claude : Anthropic Claude (版本 = model name)
|
||
- openclaw_nemo : OpenClaw NemoTron (版本 = OPENCLAW_DEFAULT_MODEL)
|
||
|
||
# 2026-04-27 P3.2.1 by Claude
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
from dataclasses import dataclass, field
|
||
from datetime import datetime, timedelta, timezone
|
||
|
||
import structlog
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
TAIPEI_TZ = timezone(timedelta(hours=8))
|
||
|
||
|
||
@dataclass
|
||
class ProviderVersionInfo:
|
||
"""AI Provider 版本快照"""
|
||
|
||
provider: str # "ollama" / "ollama_188" / "gemini" / "claude" / "openclaw_nemo"
|
||
model: str
|
||
version: str # version string 或 tag(Ollama 用 modified_at,其他用 model name)
|
||
digest: str | None = None # SHA256 digest(僅 Ollama 有)
|
||
captured_at: datetime = field(default_factory=lambda: datetime.now(TAIPEI_TZ))
|
||
|
||
|
||
# =============================================================================
|
||
# Ollama Probe
|
||
# =============================================================================
|
||
|
||
async def probe_ollama_version(url: str, model: str) -> ProviderVersionInfo:
|
||
"""探測 Ollama(111 或 188):GET /api/tags 取 model digest + modified_at
|
||
|
||
Args:
|
||
url: Ollama base URL,例如 "http://192.168.0.111:11434"
|
||
model: model name,例如 "qwen2.5:7b-instruct"
|
||
|
||
Returns:
|
||
ProviderVersionInfo — provider 依 URL 自動判斷(111=ollama, 否則=ollama_188)
|
||
|
||
Raises:
|
||
ValueError: model 不在清單
|
||
httpx.HTTPError: 連線失敗
|
||
"""
|
||
import httpx
|
||
|
||
provider_name = "ollama" if "192.168.0.111" in url else "ollama_188"
|
||
|
||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||
resp = await client.get(f"{url}/api/tags")
|
||
resp.raise_for_status()
|
||
models = resp.json().get("models", [])
|
||
|
||
for m in models:
|
||
if m.get("name") == model:
|
||
return ProviderVersionInfo(
|
||
provider=provider_name,
|
||
model=model,
|
||
version=m.get("modified_at", ""),
|
||
digest=m.get("digest"),
|
||
)
|
||
|
||
raise ValueError(f"Model {model!r} not found at {url}; available: {[m.get('name') for m in models]}")
|
||
|
||
|
||
# =============================================================================
|
||
# Gemini Probe
|
||
# =============================================================================
|
||
|
||
async def probe_gemini_version() -> ProviderVersionInfo:
|
||
"""探測 Gemini:以設定的 model name 作為版本字串
|
||
|
||
Gemini model name 本身即版本識別碼(e.g. "gemini-1.5-flash"),
|
||
不需要額外 API 呼叫。若 GEMINI_API_KEY 存在則視為可用。
|
||
|
||
Returns:
|
||
ProviderVersionInfo — version = model name (e.g. "gemini-1.5-flash")
|
||
|
||
Raises:
|
||
RuntimeError: GEMINI_API_KEY 未設定
|
||
"""
|
||
from src.core.config import settings
|
||
|
||
api_key = settings.GEMINI_API_KEY
|
||
if not api_key:
|
||
raise RuntimeError("GEMINI_API_KEY not configured")
|
||
|
||
# Gemini 以 AI_FALLBACK_ORDER 中 "gemini" 的設定決定 model
|
||
# 實際 model name 在 ai_router 層,此處以已知預設值作為版本
|
||
# 透過 list models API 取得最新版本資訊
|
||
import httpx
|
||
|
||
async with httpx.AsyncClient(timeout=8.0) as client:
|
||
resp = await client.get(
|
||
"https://generativelanguage.googleapis.com/v1beta/models",
|
||
params={"key": api_key, "pageSize": 50},
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
|
||
# 找第一個 GENERATE_CONTENT 功能的 gemini 模型版本
|
||
models = data.get("models", [])
|
||
gemini_model = None
|
||
for m in models:
|
||
name = m.get("name", "")
|
||
if "gemini" in name and "generateContent" in m.get("supportedGenerationMethods", []):
|
||
gemini_model = name.replace("models/", "")
|
||
break
|
||
|
||
if not gemini_model:
|
||
gemini_model = "gemini-unknown"
|
||
|
||
return ProviderVersionInfo(
|
||
provider="gemini",
|
||
model=gemini_model,
|
||
version=gemini_model,
|
||
digest=None,
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# Claude Probe
|
||
# =============================================================================
|
||
|
||
async def probe_claude_version() -> ProviderVersionInfo:
|
||
"""Claude:model name 即版本識別(例如 "claude-sonnet-4-6")
|
||
|
||
Anthropic 沒有 list models endpoint(截至 2026-04),
|
||
以設定中的 claude model name 作為版本字串。
|
||
若 CLAUDE_API_KEY 存在則視為可用。
|
||
|
||
Returns:
|
||
ProviderVersionInfo — version = model name(來自設定或預設)
|
||
|
||
Raises:
|
||
RuntimeError: CLAUDE_API_KEY 未設定
|
||
"""
|
||
from src.core.config import settings
|
||
|
||
api_key = settings.CLAUDE_API_KEY
|
||
if not api_key:
|
||
raise RuntimeError("CLAUDE_API_KEY not configured")
|
||
|
||
# Claude model name 從 AI_FALLBACK_ORDER 的 claude provider 取
|
||
# 直接使用已知 model name 作為版本(Claude 不提供公開版本 API)
|
||
model_name = "claude-sonnet-4-6" # 與 settings 中 ai_router 的 claude model 對齊
|
||
|
||
return ProviderVersionInfo(
|
||
provider="claude",
|
||
model=model_name,
|
||
version=model_name,
|
||
digest=None,
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# OpenClaw NemoTron Probe
|
||
# =============================================================================
|
||
|
||
async def probe_openclaw_nemo_version() -> ProviderVersionInfo:
|
||
"""OpenClaw NemoTron:版本字串從 settings.OPENCLAW_DEFAULT_MODEL 讀取
|
||
|
||
NemoTron 運行在 OpenClaw 188 節點(使用 Ollama 推理),
|
||
透過 OPENCLAW_URL /api/tags 探測,模型名稱即版本識別。
|
||
|
||
Returns:
|
||
ProviderVersionInfo — version = model tag (e.g. "deepseek-r1:14b")
|
||
|
||
Raises:
|
||
RuntimeError: OPENCLAW_DEFAULT_MODEL 未設定
|
||
httpx.HTTPError: 連線失敗
|
||
"""
|
||
from src.core.config import settings
|
||
|
||
model = settings.OPENCLAW_DEFAULT_MODEL
|
||
if not model:
|
||
raise RuntimeError("OPENCLAW_DEFAULT_MODEL not configured")
|
||
|
||
# OpenClaw 底層是 Ollama,使用 OPENCLAW_URL 的 host:port 加上 Ollama port
|
||
# OPENCLAW_URL 是 8088(OpenClaw API),Ollama 通常在 11434
|
||
# 188 的 Ollama URL 若有設定則直接用 OLLAMA_FALLBACK_URL
|
||
ollama_188_url = settings.OLLAMA_FALLBACK_URL
|
||
if not ollama_188_url:
|
||
# fallback:從 OPENCLAW_URL host 構建 Ollama URL
|
||
from urllib.parse import urlparse
|
||
parsed = urlparse(settings.OPENCLAW_URL)
|
||
ollama_188_url = f"{parsed.scheme}://{parsed.hostname}:11434"
|
||
|
||
import httpx
|
||
|
||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||
resp = await client.get(f"{ollama_188_url}/api/tags")
|
||
resp.raise_for_status()
|
||
models = resp.json().get("models", [])
|
||
|
||
for m in models:
|
||
if m.get("name") == model:
|
||
return ProviderVersionInfo(
|
||
provider="openclaw_nemo",
|
||
model=model,
|
||
version=m.get("modified_at", model),
|
||
digest=m.get("digest"),
|
||
)
|
||
|
||
# model 不在清單時:version 用 model name,digest=None
|
||
logger.warning("openclaw_nemo_model_not_in_tags", model=model, url=ollama_188_url)
|
||
return ProviderVersionInfo(
|
||
provider="openclaw_nemo",
|
||
model=model,
|
||
version=model,
|
||
digest=None,
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# Probe All
|
||
# =============================================================================
|
||
|
||
async def probe_all_providers() -> list[ProviderVersionInfo]:
|
||
"""並行探測所有 5 個 AI Provider,失敗的 provider 以 exception 跳過
|
||
|
||
Returns:
|
||
成功探測的 ProviderVersionInfo 列表(長度 0~5)
|
||
|
||
Notes:
|
||
- 使用 return_exceptions=True 確保任一 provider 失敗不影響其他
|
||
- 每個 exception 都有對應的 log warning
|
||
"""
|
||
from src.core.config import settings
|
||
|
||
tasks = [
|
||
probe_ollama_version(settings.OLLAMA_URL, settings.OLLAMA_HEALTH_CHECK_MODEL),
|
||
probe_ollama_version(
|
||
settings.OLLAMA_FALLBACK_URL or settings.OLLAMA_URL,
|
||
settings.OLLAMA_HEALTH_CHECK_MODEL,
|
||
),
|
||
probe_gemini_version(),
|
||
probe_claude_version(),
|
||
probe_openclaw_nemo_version(),
|
||
]
|
||
|
||
raw = await asyncio.gather(*tasks, return_exceptions=True)
|
||
|
||
results: list[ProviderVersionInfo] = []
|
||
provider_labels = ["ollama", "ollama_188", "gemini", "claude", "openclaw_nemo"]
|
||
for label, outcome in zip(provider_labels, raw):
|
||
if isinstance(outcome, ProviderVersionInfo):
|
||
results.append(outcome)
|
||
else:
|
||
logger.warning(
|
||
"provider_probe_failed",
|
||
provider=label,
|
||
error=str(outcome),
|
||
)
|
||
|
||
return results
|