fix(api): align kb extractor ollama model
This commit is contained in:
@@ -4,7 +4,7 @@ Knowledge Extractor Service — KB Phase 2-A
|
||||
Incident resolved 後自動萃取 KB 草稿。
|
||||
|
||||
設計原則:
|
||||
- 使用 Ollama llama3.2:3b,依全域順序 GCP-A → GCP-B → 111 嘗試
|
||||
- 使用 `settings.OLLAMA_TOOL_MODEL`,依全域順序 GCP-A → GCP-B → 111 嘗試
|
||||
- fire-and-forget:失敗不影響 resolve 主流程
|
||||
- logger.exception 保留完整 Stack Trace 供 Prompt 調優
|
||||
|
||||
@@ -17,10 +17,19 @@ logger = structlog.get_logger(__name__)
|
||||
|
||||
# 2026-05-19 Codex: 統帥校正,全 Ollama workload 固定 GCP-A → GCP-B → 111。
|
||||
def _get_ollama_endpoints():
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_order
|
||||
from src.services.ollama_endpoint_circuit_breaker import (
|
||||
resolve_ollama_order_with_cooldown,
|
||||
)
|
||||
|
||||
return resolve_ollama_order_with_cooldown("hermes")
|
||||
|
||||
|
||||
def _get_extract_model() -> str:
|
||||
from src.core.config import settings
|
||||
|
||||
return str(getattr(settings, "OLLAMA_TOOL_MODEL", "hermes3:latest") or "hermes3:latest")
|
||||
|
||||
|
||||
return resolve_ollama_order("deep_rca")
|
||||
_EXTRACT_MODEL = "llama3.2:3b"
|
||||
_EXTRACT_TIMEOUT = 30.0 # 秒,容忍慢速
|
||||
|
||||
# Linear / Nothing.tech 風格的 SRE KB Prompt
|
||||
@@ -72,7 +81,7 @@ class KnowledgeExtractorService:
|
||||
"""
|
||||
Incident → KB 草稿自動萃取器
|
||||
|
||||
使用 Ollama llama3.2:3b 本地推理,產生 Markdown 格式的 SRE 知識條目。
|
||||
使用目前配置的 Ollama tool model 產生 Markdown 格式的 SRE 知識條目。
|
||||
"""
|
||||
|
||||
async def extract_from_incident(self, incident) -> bool:
|
||||
@@ -103,11 +112,12 @@ class KnowledgeExtractorService:
|
||||
|
||||
# 2. 呼叫 Ollama(直接 HTTP,不走 AIRouter 避免路由邏輯開銷)
|
||||
markdown_content = await self._call_ollama(prompt)
|
||||
model = _get_extract_model()
|
||||
if not markdown_content:
|
||||
logger.warning(
|
||||
"kb_extract_empty_response",
|
||||
incident_id=incident.incident_id,
|
||||
model=_EXTRACT_MODEL,
|
||||
model=model,
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -142,7 +152,7 @@ class KnowledgeExtractorService:
|
||||
incident_id=incident.incident_id,
|
||||
title=title,
|
||||
category=category,
|
||||
model=_EXTRACT_MODEL,
|
||||
model=model,
|
||||
)
|
||||
return True
|
||||
|
||||
@@ -165,6 +175,7 @@ class KnowledgeExtractorService:
|
||||
import httpx
|
||||
|
||||
endpoints = _get_ollama_endpoints()
|
||||
model = _get_extract_model()
|
||||
async with httpx.AsyncClient(timeout=_EXTRACT_TIMEOUT) as client:
|
||||
for endpoint in endpoints:
|
||||
if not endpoint.url:
|
||||
@@ -173,7 +184,7 @@ class KnowledgeExtractorService:
|
||||
r = await client.post(
|
||||
f"{endpoint.url}/api/generate",
|
||||
json={
|
||||
"model": _EXTRACT_MODEL,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
@@ -188,15 +199,25 @@ class KnowledgeExtractorService:
|
||||
if text:
|
||||
logger.info(
|
||||
"kb_ollama_call_success",
|
||||
model=_EXTRACT_MODEL,
|
||||
model=model,
|
||||
provider=endpoint.provider_name,
|
||||
base=endpoint.url,
|
||||
)
|
||||
from src.services.ollama_endpoint_circuit_breaker import (
|
||||
record_ollama_endpoint_success,
|
||||
)
|
||||
|
||||
record_ollama_endpoint_success(endpoint.url)
|
||||
return text
|
||||
except Exception as e:
|
||||
from src.services.ollama_endpoint_circuit_breaker import (
|
||||
record_ollama_endpoint_failure,
|
||||
)
|
||||
|
||||
record_ollama_endpoint_failure(endpoint.url)
|
||||
logger.warning(
|
||||
"kb_ollama_call_failed",
|
||||
model=_EXTRACT_MODEL,
|
||||
model=model,
|
||||
provider=endpoint.provider_name,
|
||||
base=endpoint.url,
|
||||
error=str(e),
|
||||
@@ -204,7 +225,7 @@ class KnowledgeExtractorService:
|
||||
|
||||
logger.error(
|
||||
"kb_ollama_all_endpoints_failed",
|
||||
model=_EXTRACT_MODEL,
|
||||
model=model,
|
||||
attempted=[endpoint.provider_name for endpoint in endpoints],
|
||||
)
|
||||
return None
|
||||
|
||||
16
apps/api/tests/test_knowledge_extractor_model.py
Normal file
16
apps/api/tests/test_knowledge_extractor_model.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from src.core.config import settings
|
||||
from src.services import knowledge_extractor_service
|
||||
|
||||
|
||||
def test_extract_model_uses_configured_ollama_tool_model(monkeypatch):
|
||||
"""KB extractor must use a configured model that exists on GCP Ollama."""
|
||||
monkeypatch.setattr(settings, "OLLAMA_TOOL_MODEL", "hermes3:latest")
|
||||
|
||||
assert knowledge_extractor_service._get_extract_model() == "hermes3:latest"
|
||||
|
||||
|
||||
def test_extract_model_falls_back_to_hermes_when_config_empty(monkeypatch):
|
||||
"""Empty runtime config should not fall back to the removed llama3.2 model."""
|
||||
monkeypatch.setattr(settings, "OLLAMA_TOOL_MODEL", "")
|
||||
|
||||
assert knowledge_extractor_service._get_extract_model() == "hermes3:latest"
|
||||
@@ -1,3 +1,45 @@
|
||||
## 2026-05-31|KB extractor Ollama 模型漂移修復
|
||||
|
||||
**背景**:
|
||||
|
||||
- `INC-20260531-D6A3C4` resolve path 已寫入 KM / Postmortem,但 API log 同時出現 `kb_ollama_all_endpoints_failed`。
|
||||
- 直接探測 110 proxy 後確認:
|
||||
- `11435` / `11436` 的 `/api/tags` 都是 `200`,GCP-A/B Ollama 服務不是整台掛掉。
|
||||
- `11435` / `11436` 對 `llama3.2:3b` 的 `/api/generate` 回 `404 model not found`。
|
||||
- `11436` 對 `hermes3:latest` 的 `/api/generate` 回 `200`。
|
||||
- `11437` local fallback 仍為 `502 Bad Gateway`,保留為獨立基礎設施紅燈。
|
||||
- 根因是 `KnowledgeExtractorService` 還硬打舊模型 `llama3.2:3b`,但目前 GCP-A/B 實際可用模型包含 `hermes3:latest` / `deepseek-r1:14b`。
|
||||
|
||||
**本次調整**:
|
||||
|
||||
- `apps/api/src/services/knowledge_extractor_service.py`:
|
||||
- KB extractor 改用既有 `settings.OLLAMA_TOOL_MODEL`,預設 fallback `hermes3:latest`。
|
||||
- endpoint workload 從 `deep_rca` 改成 `hermes`。
|
||||
- 接上 `resolve_ollama_order_with_cooldown()`,失敗 endpoint 短期 cooldown,成功後清除 cooldown,避免 GCP-A timeout 反覆拖慢背景 KB 萃取。
|
||||
- 新增 `apps/api/tests/test_knowledge_extractor_model.py`,鎖住 extractor 不再回退到已移除的 `llama3.2:3b`。
|
||||
|
||||
**驗證**:
|
||||
|
||||
```text
|
||||
live probe:
|
||||
11435 /api/tags -> 200
|
||||
11435 llama3.2:3b /api/generate -> 404 model not found
|
||||
11436 /api/tags -> 200
|
||||
11436 llama3.2:3b /api/generate -> 404 model not found
|
||||
11436 hermes3:latest /api/generate -> 200, response=OK
|
||||
11437 /api/tags and /api/generate -> 502 Bad Gateway
|
||||
|
||||
local:
|
||||
python3 -m py_compile knowledge_extractor_service.py test_knowledge_extractor_model.py -> pass
|
||||
ruff check knowledge_extractor_service.py test_knowledge_extractor_model.py --select E9,F401,F821,F841 -> pass
|
||||
pytest tests/test_knowledge_extractor_model.py -q -> 2 passed
|
||||
```
|
||||
|
||||
**判讀**:
|
||||
|
||||
- 這次修的是 KB extractor 的模型漂移,不是改 AI Router 主路由,也沒有新增付費 provider。
|
||||
- GCP-A/B Ollama proxy 可列模型;GCP-B `hermes3:latest` 可生成。11437 local fallback 仍需後續基礎設施修復,不能宣稱三層 Ollama 全綠。
|
||||
|
||||
## 2026-05-31|IwoooS 側欄單一資安入口收斂
|
||||
|
||||
**背景**:
|
||||
|
||||
@@ -2716,6 +2716,12 @@ Phase 6 完成後
|
||||
- Verification:production status-chain 回 `repair_state=auto_repaired_verified`、`operator_outcome.state=completed_verified`、`needs_human=false`、`execution_result.summary_zh=已完成:修復指令成功,且驗證通過`、`execution.ansible.latest_operation_type=ansible_apply_executed`、`latest_returncode=0`、`controlled_apply=true`。Resolve path 已寫 incident case KM `9207f7e1-ee6f-4a4d-981f-4676c04a5d61`、postmortem KM `c28c5f56-d4b3-4314-b961-31d0b69a9c05`,Telegram postmortem sent。
|
||||
- 判讀:T154g 是使用者批准後的 controlled apply + 人工驗證收斂,不是 24h 全自動修復宣稱;但此 incident 已不需人工介入,後續只需監控是否回歸。
|
||||
|
||||
**T154h KB extractor Ollama model drift repair(2026-05-31 台北)**:
|
||||
- 觸發:D6A3C4 resolve path 成功寫 KM / Postmortem 後,背景 `KnowledgeExtractorService` 仍記錄 `kb_ollama_all_endpoints_failed`。Live probe 證明 GCP-A/B proxy `/api/tags` 為 200,問題不是 GCP-A/B 整台掛掉,而是 extractor 還硬打不存在的 `llama3.2:3b`;GCP-B `hermes3:latest` `/api/generate` 可回 200。
|
||||
- 修正:`KnowledgeExtractorService` 改用 `settings.OLLAMA_TOOL_MODEL`,預設 `hermes3:latest`;endpoint workload 改為 `hermes`;接上 `resolve_ollama_order_with_cooldown()`,失敗 endpoint 短期 cooldown、成功後清除 cooldown,避免 GCP-A timeout 反覆拖慢背景 KB 萃取。新增 `test_knowledge_extractor_model.py` 鎖住空設定 fallback 也不再回到 `llama3.2:3b`。
|
||||
- Verification:live probe `11436 hermes3:latest /api/generate -> 200 response=OK`;`11435/11436 llama3.2:3b -> 404 model not found`;`11437 -> 502` 仍是 local fallback 紅燈。Local `py_compile` pass、ruff `E9/F401/F821/F841` pass、pytest `tests/test_knowledge_extractor_model.py` 2 passed。
|
||||
- 判讀:T154h 修的是背景 KB extractor 的模型漂移,不改 AI Router 主路由、不新增付費 provider;不能因此宣稱三層 Ollama 全綠,11437 local fallback 仍需後續基礎設施修復。
|
||||
|
||||
**T152 Ansible runtime readiness surfaced(2026-05-24 台北)**:
|
||||
- 觸發:T151 已讓首頁看到 execution backend / Ansible attribution,但 operator 仍看不到 runtime 端缺什麼,容易把「Ansible 有候選」誤解成「Ansible 已能自動修復」。
|
||||
- 修正:API image 複製 `infra/ansible/` 作 read-only catalog;`truth-chain/quality/summary` 新增 `ansible_runtime`,回報 playbook binary、catalog、inventory、playbook_count、can_run_check_mode、blockers。首頁 execution evidence 同步顯示 runtime 狀態;目前 production 顯示 `runtime 未就緒:ansible_playbook_binary_missing`。未安裝 `ansible-core`、未啟用 check-mode / apply。
|
||||
|
||||
Reference in New Issue
Block a user