This commit is contained in:
@@ -363,12 +363,13 @@ OLLAMA_MODEL=gemma3:4b
|
||||
OLLAMA_TIMEOUT=120
|
||||
OLLAMA_COPY_TIMEOUT=180
|
||||
OLLAMA_EMBED_TIMEOUT=45
|
||||
# 111 是 Mac final fallback,不承接 7B+ / vision / long-context 模型長駐;落到 111 時自動降級與縮短常駐。
|
||||
# 111 是 Mac final fallback,不承接 7B+ / vision / long-context / 長輸出任務;落到 111 時自動降級與縮短常駐。
|
||||
OLLAMA_111_MODEL_FALLBACK=llama3.2:latest
|
||||
OLLAMA_111_MODEL_DOWNGRADE_PATTERNS=qwen3:*,deepseek-r1:*,hermes3:*,llama3.1:*,qwen2.5:*,qwen2.5-coder:*,gemma3:*,minicpm-v:*,llava:*,*:7b*,*:8b*,*:14b*,*:32b*,*:70b*
|
||||
OLLAMA_111_KEEP_ALIVE=5m
|
||||
OLLAMA_111_MAX_TIMEOUT=45
|
||||
OLLAMA_111_MAX_TIMEOUT=20
|
||||
OLLAMA_111_NUM_CTX=4096
|
||||
OLLAMA_111_NUM_PREDICT=512
|
||||
|
||||
# [預設 true] OpenClaw Q&A 先走 Ollama,品質不足或失敗時才 fallback Gemini/NIM
|
||||
# 主機不提供單 caller override;一律走 OLLAMA_HOST_PRIMARY → OLLAMA_HOST_SECONDARY → OLLAMA_HOST_FALLBACK
|
||||
|
||||
@@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.379"
|
||||
SYSTEM_VERSION = "V10.380"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
> **最後更新**: 2026-05-21 (台北時間)
|
||||
> **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 備援預設關閉
|
||||
> **適用版本**: V10.377
|
||||
> **適用版本**: V10.380
|
||||
|
||||
---
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
- Gemini API 出站有第二道 kill switch:`GEMINI_FALLBACK_ENABLED` 預設為 `false`。即使 `GEMINI_API_KEY` 存在,通用 AI fallback、OpenClaw 報告/QA/PPT/圖片、MCP Grounding 與 Code Review L3 都不得呼叫 Gemini;只有操作員明確設為 `true` 時,Gemini 才能作緊急備援。
|
||||
- Gemini 不可被任何狀態面板或 router 推薦為主提供者:`AIProviderService._get_recommended_provider()` 不得回傳 `gemini`,只能顯示為 fallback 狀態;`llm_model_router` 的 `ea_engine` 若收到 `gemini-*` default 必須改回 `hermes3:latest`,需要深推理時才升本地 `deepseek-r1:14b`。
|
||||
- ElephantAlpha prompt / agent registry 不得再把 OpenClaw 描述為 Gemini 主模型;OpenClaw 是 `qwen2.5-coder:7b` / `qwen3:14b` Ollama-first 策略師,Gemini 僅能在 guard 顯式解鎖後作 emergency fallback。
|
||||
- 111 `192.168.0.111` 只是最後一道 Mac fallback,不承接 7B+、vision、long-context 模型長駐;`OllamaService.generate()` 落到 111 時會將 `qwen3`、`deepseek-r1`、`hermes3`、`qwen2.5*`、`gemma3`、`llava`、`minicpm-v` 與 7B+ 模型依 `OLLAMA_111_MODEL_DOWNGRADE_PATTERNS` 降級到 `OLLAMA_111_MODEL_FALLBACK=llama3.2:latest`,並以 `OLLAMA_111_KEEP_ALIVE=5m`、`OLLAMA_111_MAX_TIMEOUT=45`、`OLLAMA_111_NUM_CTX=4096` 封頂,避免 16GB RAM 主機被大 context runner 與 24h keep-alive 壓到 swap。
|
||||
- 111 `192.168.0.111` 只是最後一道 Mac fallback,不承接 7B+、vision、long-context 模型長駐;`OllamaService.generate()` 落到 111 時會將 `qwen3`、`deepseek-r1`、`hermes3`、`qwen2.5*`、`gemma3`、`llava`、`minicpm-v` 與 7B+ 模型依 `OLLAMA_111_MODEL_DOWNGRADE_PATTERNS` 降級到 `OLLAMA_111_MODEL_FALLBACK=llama3.2:latest`,並以 `OLLAMA_111_KEEP_ALIVE=5m`、`OLLAMA_111_MAX_TIMEOUT=20`、`OLLAMA_111_NUM_CTX=4096`、`OLLAMA_111_NUM_PREDICT=512` 封頂,避免 16GB RAM 主機被大 context runner、長輸出與 24h keep-alive 壓到 swap。
|
||||
|
||||
## 一、四 AI Agent 路由架構
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
## 📅 詳細更新日誌 (考古存檔)
|
||||
|
||||
### 2026-05-21:瀏覽器測試守門與 PChome 熱路徑優化
|
||||
- **V10.380 111 Ollama final fallback 收斂**: 111 Mac fallback 從救急路徑改成更短的保護路徑,`OLLAMA_111_MAX_TIMEOUT` 預設由 45s 收緊到 20s,並新增 `OLLAMA_111_NUM_PREDICT=512` 輸出上限;落到 111 時仍會降級重模型到 `llama3.2:latest`、縮 `num_ctx=4096`、`keep_alive=5m`,避免 GCP-A/GCP-B 短暫 timeout 後把長篇 Hermes/OpenClaw 工作轉嫁到 111 造成 swap 與 load 飆高。
|
||||
- **V10.379 MCP runtime promotion gate**: 新增 `mcp_runtime_promotion` read-only builder、GET/POST endpoint、UI promotion package 審核面板與 deployment readiness smoke target,將 MCP activation evidence 與 runtime smoke receipt 合併審核,讓 completion audit 的 runtime 缺口可由人工收據明確補齊。
|
||||
- **V10.379 只讀安全邊界**: 本階段不保存 payload、不打 health、不開 DB、不抓外站、不掛 scheduler,也不會因 promotion 通過自動打開人工 fetch gate;正式 fetch / DB write / scheduler attach 仍需各自獨立 gate。
|
||||
- **V10.378 AI 推薦頁首屏 Gemini 防漏**: `/ai_recommend` 首屏狀態快照新增 provider sanitization,即使舊 cache / env 內出現 `default_provider='gemini'` 或 `recommended_provider='gemini'`,也會回到 `ollama`,避免 UI 把 Gemini 顯示成主推薦路徑;`/api/ai/set_provider` 同步正規化 provider 輸入,保留 Gemini 只能作 Ollama 失敗備援的拒絕訊息。
|
||||
|
||||
@@ -57,8 +57,9 @@ TIMEOUT = int(os.getenv('OLLAMA_TIMEOUT', '120')) # 秒 - 2 分鐘
|
||||
COPY_TIMEOUT = int(os.getenv('OLLAMA_COPY_TIMEOUT', '180')) # 文案生成專用超時 - 3 分鐘
|
||||
EMBED_TIMEOUT = int(os.getenv('OLLAMA_EMBED_TIMEOUT', os.getenv('EMBEDDING_TIMEOUT', '45')))
|
||||
FALLBACK_111_KEEP_ALIVE = os.getenv('OLLAMA_111_KEEP_ALIVE', '5m')
|
||||
FALLBACK_111_MAX_TIMEOUT = int(os.getenv('OLLAMA_111_MAX_TIMEOUT', '45'))
|
||||
FALLBACK_111_MAX_TIMEOUT = int(os.getenv('OLLAMA_111_MAX_TIMEOUT', '20'))
|
||||
FALLBACK_111_NUM_CTX = int(os.getenv('OLLAMA_111_NUM_CTX', '4096'))
|
||||
FALLBACK_111_NUM_PREDICT = int(os.getenv('OLLAMA_111_NUM_PREDICT', '512'))
|
||||
FALLBACK_111_MODEL = os.getenv('OLLAMA_111_MODEL_FALLBACK', 'llama3.2:latest')
|
||||
FALLBACK_111_MODEL_PATTERNS = tuple(
|
||||
pattern.strip().lower()
|
||||
@@ -142,13 +143,19 @@ def _effective_timeout_for_host(timeout_s: int, host: str) -> int:
|
||||
|
||||
|
||||
def _cap_111_options(options: Dict[str, Any]) -> None:
|
||||
"""111 fallback 強制縮 context,避免 3B/7B 仍因 131k context 吃爆記憶體。"""
|
||||
"""111 fallback 強制縮 context / output,避免最後備援被長任務拖成高負載。"""
|
||||
try:
|
||||
requested_num_ctx = int(options.get("num_ctx") or FALLBACK_111_NUM_CTX)
|
||||
except (TypeError, ValueError):
|
||||
requested_num_ctx = FALLBACK_111_NUM_CTX
|
||||
options["num_ctx"] = min(requested_num_ctx, FALLBACK_111_NUM_CTX)
|
||||
|
||||
try:
|
||||
requested_num_predict = int(options.get("num_predict") or FALLBACK_111_NUM_PREDICT)
|
||||
except (TypeError, ValueError):
|
||||
requested_num_predict = FALLBACK_111_NUM_PREDICT
|
||||
options["num_predict"] = min(requested_num_predict, FALLBACK_111_NUM_PREDICT)
|
||||
|
||||
|
||||
def _canonical_host_chain() -> List[str]:
|
||||
"""Return the approved static fallback chain without duplicates."""
|
||||
|
||||
@@ -243,8 +243,9 @@ def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch
|
||||
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",))
|
||||
|
||||
fake_resp = MagicMock(status_code=200)
|
||||
@@ -257,13 +258,19 @@ def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch
|
||||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b")
|
||||
|
||||
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
|
||||
resp = svc.generate("hi", timeout=120, keep_alive="24h")
|
||||
resp = svc.generate(
|
||||
"hi",
|
||||
timeout=120,
|
||||
keep_alive="24h",
|
||||
options={"num_ctx": 131072, "num_predict": 4096},
|
||||
)
|
||||
|
||||
payload = mock_post.call_args.kwargs["json"]
|
||||
assert payload["model"] == "qwen2.5:7b-instruct"
|
||||
assert payload["keep_alive"] == "5m"
|
||||
assert payload["options"]["num_ctx"] == 4096
|
||||
assert mock_post.call_args.kwargs["timeout"] == 45
|
||||
assert payload["options"]["num_predict"] == 512
|
||||
assert mock_post.call_args.kwargs["timeout"] == 20
|
||||
assert resp.model == "qwen2.5:7b-instruct"
|
||||
|
||||
|
||||
@@ -272,15 +279,16 @@ def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch):
|
||||
from services import ollama_service as oss
|
||||
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
|
||||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest")
|
||||
|
||||
with patch("services.ollama_service.requests.post", side_effect=Timeout):
|
||||
resp = svc.generate("hi", timeout=120, keep_alive="24h")
|
||||
|
||||
assert resp.success is False
|
||||
assert "timeout (45s)" in resp.error
|
||||
assert "timeout (20s)" in resp.error
|
||||
|
||||
|
||||
def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
|
||||
@@ -288,8 +296,9 @@ def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
|
||||
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",))
|
||||
|
||||
fake_resp = MagicMock(status_code=200)
|
||||
@@ -308,4 +317,5 @@ def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
|
||||
assert payload["model"] == "llama3.2:latest"
|
||||
assert payload["keep_alive"] == "5m"
|
||||
assert payload["options"]["num_ctx"] == 4096
|
||||
assert payload["options"]["num_predict"] == 512
|
||||
assert resp.model == "llama3.2:latest"
|
||||
|
||||
Reference in New Issue
Block a user