diff --git a/apps/api/src/services/heartbeat_report_service.py b/apps/api/src/services/heartbeat_report_service.py index 3d856293..dc6b9b74 100644 --- a/apps/api/src/services/heartbeat_report_service.py +++ b/apps/api/src/services/heartbeat_report_service.py @@ -108,6 +108,7 @@ class HeartbeatReport: timestamp: datetime ai_services: dict[str, ProbeResult] = field(default_factory=dict) ollama_models: dict[str, bool] = field(default_factory=dict) + ollama_endpoints: dict[str, ProbeResult] = field(default_factory=dict) mcp_providers: dict[str, ProbeResult] = field(default_factory=dict) flywheel: FlywheelStats = field(default_factory=FlywheelStats) infra: dict[str, ProbeResult] = field(default_factory=dict) @@ -181,6 +182,7 @@ class HeartbeatReportService: ollama_data = collected["_ollama"] or {} report.ai_services["ollama"] = ollama_data.get("probe", ProbeResult(False, "❌ 無回應")) report.ollama_models = ollama_data.get("models", {}) + report.ollama_endpoints = ollama_data.get("endpoints", {}) report.ai_services["nemotron"] = collected["_nemotron"] or ProbeResult(False, "❌ 無回應") report.ai_services["gemini"] = collected["_gemini"] or ProbeResult(False, "❌ 無回應") report.ai_services["claude"] = collected["_claude"] or ProbeResult(False, "❌ 無回應") @@ -224,37 +226,62 @@ class HeartbeatReportService: async def _probe_ollama(self) -> dict: """探測 Ollama 服務 + 逐一確認所需模型""" - try: - async with httpx.AsyncClient(timeout=_PROBE_TIMEOUT) as client: + endpoints = [ + ("GCP-A", settings.OLLAMA_URL), + ("GCP-B", getattr(settings, "OLLAMA_SECONDARY_URL", "")), + ("111", getattr(settings, "OLLAMA_FALLBACK_URL", "")), + ] + + async def _probe_endpoint( + client: httpx.AsyncClient, + label: str, + url: str, + ) -> tuple[str, ProbeResult, set[str]]: + if not url: + return label, ProbeResult(False, "⚠️ 未設定"), set() + try: t0 = asyncio.get_event_loop().time() - resp = await client.get(f"{settings.OLLAMA_URL}/api/tags") + resp = await client.get(f"{url}/api/tags") latency = (asyncio.get_event_loop().time() - t0) * 1000 + if resp.status_code != 200: + return label, ProbeResult(False, f"❌ HTTP {resp.status_code}", latency), set() + available = {m["name"] for m in resp.json().get("models", [])} + return label, ProbeResult(True, "✅ 正常", round(latency, 1)), available + except Exception as e: + return label, ProbeResult(False, f"❌ {str(e)[:60]}"), set() - if resp.status_code != 200: - return { - "probe": ProbeResult(False, f"❌ HTTP {resp.status_code}", latency), - "models": {}, - } + async with httpx.AsyncClient(timeout=_PROBE_TIMEOUT) as client: + results = await asyncio.gather( + *[_probe_endpoint(client, label, url) for label, url in endpoints], + ) - available = {m["name"] for m in resp.json().get("models", [])} + endpoint_status = {label: probe for label, probe, _available in results} + primary_probe = endpoint_status.get("GCP-A", ProbeResult(False, "❌ 無回應")) + primary_available = next( + (available for label, _probe, available in results if label == "GCP-A"), + set(), + ) + + if primary_probe.ok: # 也把 short name(無 :tag)加進去方便匹配 - available_short = {n.split(":")[0] for n in available} + available_short = {n.split(":")[0] for n in primary_available} model_status: dict[str, bool] = {} for required in settings.OLLAMA_REQUIRED_MODELS: req_short = required.split(":")[0] - ok = required in available or req_short in available_short + ok = required in primary_available or req_short in available_short model_status[required] = ok - return { - "probe": ProbeResult(True, "✅ 正常", round(latency, 1)), + "probe": primary_probe, "models": model_status, + "endpoints": endpoint_status, } - except Exception as e: - return { - "probe": ProbeResult(False, f"❌ {str(e)[:60]}"), - "models": {}, - } + + return { + "probe": primary_probe, + "models": {}, + "endpoints": endpoint_status, + } async def _probe_nemotron(self) -> ProbeResult: """探測 Nemotron NIM API""" @@ -437,9 +464,11 @@ class HeartbeatReportService: try: # KM 向量化率(DB 查詢) + from sqlalchemy import func, select + from sqlalchemy import text as sa_text + from src.db.base import get_db_context from src.db.models import KnowledgeEntryRecord - from sqlalchemy import func, select, text as sa_text async with get_db_context() as db: # KM 總數 km_total = await db.scalar(select(func.count()).select_from(KnowledgeEntryRecord)) @@ -490,8 +519,9 @@ class HeartbeatReportService: """查 24h 告警流水線統計(approval_records)""" stats = AlertPipelineStats() try: - from src.db.base import get_db_context from sqlalchemy import text as sa_text + + from src.db.base import get_db_context async with get_db_context() as db: r = await db.execute(sa_text(""" SELECT @@ -517,8 +547,9 @@ class HeartbeatReportService: """探測 PostgreSQL 與 Redis 連線健康""" s = DbRedisStats() try: - from src.db.base import get_db_context from sqlalchemy import text as sa_text + + from src.db.base import get_db_context async with get_db_context() as db: await db.execute(sa_text("SELECT 1")) s.db_ok = True @@ -652,8 +683,9 @@ class HeartbeatReportService: logger.debug("heartbeat_automation_redis_failed", error=str(e)) try: - from src.db.base import get_db_context from sqlalchemy import text as sa_text + + from src.db.base import get_db_context async with get_db_context() as db: # 今日新增 KM(timestamptz 直接比較,不需 AT TIME ZONE) km_today = await db.scalar(sa_text( @@ -686,6 +718,10 @@ class HeartbeatReportService: if not loaded: warnings.append(f"{model} 未載入,相關功能失效") + for name, probe in report.ollama_endpoints.items(): + if not probe.ok and not probe.status.startswith("⚠️ 未設定"): + warnings.append(f"Ollama {name} 異常: {probe.status}") + # AI 服務異常 for name, probe in report.ai_services.items(): if not probe.ok and not probe.status.startswith("⚠️"): @@ -816,6 +852,12 @@ def report_to_telegram_html(report: HeartbeatReport) -> str: lines.append("🤖 AI 服務") lines.append(f"├─ Ollama: {ollama.status}{ollama_lat} {html.escape(models_str)}") + if report.ollama_endpoints: + endpoint_items = list(report.ollama_endpoints.items()) + for idx, (name, probe) in enumerate(endpoint_items): + branch = "└" if idx == len(endpoint_items) - 1 else "├" + latency = f" {probe.latency_ms:.0f}ms" if probe.latency_ms else "" + lines.append(f"│ {branch}─ {html.escape(name)}: {probe.status}{latency}") lines.append(f"├─ Nemotron NIM: {nem.status}" + (f" {nem.latency_ms:.0f}ms" if nem.latency_ms else "")) lines.append(f"├─ Gemini API: {gem.status}" + (f" {gem.latency_ms:.0f}ms" if gem.latency_ms else "")) lines.append(f"└─ Claude API: {cla.status}" + (f" {cla.latency_ms:.0f}ms" if cla.latency_ms else "")) diff --git a/apps/api/tests/test_heartbeat_ollama_endpoints.py b/apps/api/tests/test_heartbeat_ollama_endpoints.py new file mode 100644 index 00000000..c015db85 --- /dev/null +++ b/apps/api/tests/test_heartbeat_ollama_endpoints.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from datetime import datetime +from typing import Any + +import pytest + +from src.services import heartbeat_report_service as heartbeat + + +class _FakeResponse: + def __init__(self, status_code: int, payload: dict[str, Any] | None = None) -> None: + self.status_code = status_code + self._payload = payload or {} + + def json(self) -> dict[str, Any]: + return self._payload + + +class _FakeAsyncClient: + def __init__(self, *_args: Any, **_kwargs: Any) -> None: + pass + + async def __aenter__(self) -> "_FakeAsyncClient": + return self + + async def __aexit__(self, *_args: Any) -> None: + return None + + async def get(self, url: str) -> _FakeResponse: + if url.startswith("http://gcp-a"): + return _FakeResponse( + 200, + {"models": [{"name": "qwen3:14b"}, {"name": "bge-m3:latest"}]}, + ) + if url.startswith("http://gcp-b"): + return _FakeResponse(200, {"models": [{"name": "gemma3:4b"}]}) + raise TimeoutError("connect failed") + + +@pytest.mark.asyncio +async def test_probe_ollama_reports_each_endpoint(monkeypatch) -> None: + monkeypatch.setattr(heartbeat.httpx, "AsyncClient", _FakeAsyncClient) + monkeypatch.setattr(heartbeat.settings, "OLLAMA_URL", "http://gcp-a:11434") + monkeypatch.setattr(heartbeat.settings, "OLLAMA_SECONDARY_URL", "http://gcp-b:11434") + monkeypatch.setattr(heartbeat.settings, "OLLAMA_FALLBACK_URL", "http://local-111:11434") + monkeypatch.setattr(heartbeat.settings, "OLLAMA_REQUIRED_MODELS", ["qwen3:14b", "bge-m3:latest"]) + + result = await heartbeat.HeartbeatReportService()._probe_ollama() + + assert result["probe"].ok is True + assert result["models"] == {"qwen3:14b": True, "bge-m3:latest": True} + assert result["endpoints"]["GCP-A"].ok is True + assert result["endpoints"]["GCP-B"].ok is True + assert result["endpoints"]["111"].ok is False + + +def test_report_to_telegram_html_renders_ollama_endpoint_statuses() -> None: + report = heartbeat.HeartbeatReport(timestamp=datetime(2026, 5, 6, 18, 0)) + report.ai_services["ollama"] = heartbeat.ProbeResult(True, "✅ 正常", 1200) + report.ai_services["nemotron"] = heartbeat.ProbeResult(True, "✅ 正常", 900) + report.ai_services["gemini"] = heartbeat.ProbeResult(True, "✅ 正常", 800) + report.ai_services["claude"] = heartbeat.ProbeResult(True, "✅ 正常", 700) + report.ollama_models = {"qwen3:14b": True} + report.ollama_endpoints = { + "GCP-A": heartbeat.ProbeResult(True, "✅ 正常", 1000), + "GCP-B": heartbeat.ProbeResult(True, "✅ 正常", 1100), + "111": heartbeat.ProbeResult(False, "❌ connect failed"), + } + report.warnings = heartbeat.HeartbeatReportService()._build_warnings(report) + + text = heartbeat.report_to_telegram_html(report) + + assert "GCP-A: ✅ 正常" in text + assert "GCP-B: ✅ 正常" in text + assert "111: ❌ connect failed" in text + assert "Ollama 111 異常" in "\n".join(report.warnings) diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 11e457aa..a51b7822 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -3986,3 +3986,31 @@ ruff check --select F401,F821,I001 apps/api/src/services/mcp_audit_context.py ap - `ollama188-retirement-gate.sh` 預設 24 小時窗口仍會看到退場前歷史 POST,因此短期會 fail;判斷「現在是否仍打 188」需用較短觀察窗口。 - 後續若要讓 111 真正成為第三順位可用 fallback,需要先修通 K8s / API Pod 到 `192.168.0.111:11434` 的網路路徑。 + +--- + +## 2026-05-06(台北)— 心跳報告列出 Ollama 三段式端點 + +**觸發**:系統報告原本只顯示單一 `Ollama: 正常`,容易讓人誤判 111、GCP-A、GCP-B 的實際狀態;在 111 網路不可達時,報告仍可能看起來「全系統正常」。 + +### 已修正 + +| 範圍 | 結果 | +|------|------| +| `heartbeat_report_service.py` | `_probe_ollama()` 改為同時探測 GCP-A、GCP-B、111 fallback,保留主 Ollama models 檢查 | +| Telegram 心跳 HTML | AI 服務區新增三個子列:`GCP-A`、`GCP-B`、`111`,各自顯示狀態與延遲 | +| warnings | 任一已設定 Ollama endpoint 異常時,明確加入 `Ollama {name} 異常` | +| 測試 | 新增 `test_heartbeat_ollama_endpoints.py`,鎖住三端點顯示與 warning 行為 | + +### 驗證 + +```text +pytest apps/api/tests/test_heartbeat_ollama_endpoints.py apps/api/tests/test_heartbeat_pod_state_machine.py apps/api/tests/test_mcp_audit_context.py apps/api/tests/test_mcp_audit_service.py +# 19 passed + +py_compile apps/api/src/services/heartbeat_report_service.py apps/api/tests/test_heartbeat_ollama_endpoints.py +# 通過 + +ruff check --select F401,F821,I001 apps/api/src/services/heartbeat_report_service.py apps/api/tests/test_heartbeat_ollama_endpoints.py +# All checks passed +```