diff --git a/config.py b/config.py index 137a96d..192ba10 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.426" +SYSTEM_VERSION = "V10.427" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 9593239..115a613 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-05-24 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 備援預設關閉 -> **適用版本**: V10.426 +> **適用版本**: V10.427 --- @@ -32,6 +32,7 @@ - ElephantAlpha prompt / agent registry 不得再把 OpenClaw 描述為 Gemini 主模型;OpenClaw 是 `qwen2.5-coder:7b` / `qwen3:14b` Ollama-first 策略師,Gemini 僅能在 guard 顯式解鎖後作 emergency fallback。 - 111 `192.168.0.111` 只是最後一道 Mac fallback,不承接 7B+、vision、long-context 模型長駐;`OllamaService.generate()` 落到 111 時會將 `qwen3`、`deepseek-r1`、`hermes3`、`qwen2.5*`、`gemma3`、`llava`、`minicpm-v` 與 7B+ 模型依 `OLLAMA_111_MODEL_DOWNGRADE_PATTERNS` 降級到 `OLLAMA_111_MODEL_FALLBACK=llama3.2:latest`,並以 `OLLAMA_111_KEEP_ALIVE=5m`、`OLLAMA_111_MAX_TIMEOUT=20`、`OLLAMA_111_NUM_CTX=4096`、`OLLAMA_111_NUM_PREDICT=512` 封頂。OpenClaw 報告型路徑的業務 keep-alive 預設 `5m`;Code Review 以 `CODE_REVIEW_ALLOW_111_FALLBACK=false`、Hermes 以 `HERMES_ALLOW_111_FALLBACK=false` 預設跳過 111,避免 16GB RAM 主機與 GCP-B 被長駐 runner、長輸出與 24h keep-alive 壓到高 load。 - Scheduler 每 15 分鐘執行 `run_ollama_111_usage_guard_check()`,只讀 `ai_calls` 統計最近視窗的 GCP-A / GCP-B / 111 呼叫量;預設 60 分鐘內 Ollama 呼叫至少 20 次、111 至少 3 次且占比 >= 5% 才推 Telegram。這是觀測護欄,不改路由、不寫 DB、不自動重啟服務。 +- `OllamaService` 對 111 final fallback 有 circuit breaker:預設最近 60 分鐘 Ollama 呼叫至少 20 次、111 至少 5 次且占比 >= 5% 時,短暫跳過 111(`OLLAMA_111_CIRCUIT_CACHE_SEC=60`),避免 111 在已偏高時繼續承接長任務;DB 觀測失敗時 fail-open,不讓主要 GCP-A/GCP-B 路由被觀測層中斷。 - 111 的 LAN 入口必須經 `scripts/ops/ollama111_allow_proxy.py` allowlist proxy:真實 Ollama 綁 `127.0.0.1:11434`,proxy 綁 `192.168.0.111:11434`,預設只允許 111 本機與 188 生產宿主;110 / 121 / 其他 LAN client 不能直接打 111,避免跨專案 CI 或 VM 繞過 momo-pro router 載入 7B+ runner。111 上以 `scripts/ops/install_ollama111_allow_proxy.sh` 安裝 user LaunchAgent,安裝器會把 proxy script 複製到 `~/.local/share/momo-pro-system/ollama111_allow_proxy.py`,讓 LaunchAgent 不依賴 iCloud repo 掛載路徑,並讓 proxy 與 `OLLAMA_HOST=127.0.0.1:11434` 在登入/重啟後自動恢復。拒絕日誌以 `OLLAMA111_PROXY_REJECT_LOG_DEDUP_SEC=60` 去重,避免 121 這類旁路探測刷爆 111 磁碟日誌。 - ElephantAlpha 的 `price_drop_alert` / `market_opportunity` Telegram HITL 告警必須把同款證據獨立呈現,至少包含 `match_type`、`price_basis`、`alert_tier` 與 `match_score`;沒有高信心同款與總價可比證據時,不得把 PChome/MOMO 價差寫成可直接跟價建議。 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 9a6be27..28dc421 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-24:PChome 近門檻身份回收第二輪 +- **V10.427 111 fallback circuit breaker**: `OllamaService` 在選到 111 final fallback 前先讀 `ai_calls` 近 60 分鐘比例;若 Ollama 呼叫 >=20、111 >=5 且占比 >=5%,會短暫跳過 111 並清除 resolved host cache,避免 111 在已偏高時繼續承接長任務。DB 觀測失敗採 fail-open,避免觀測層故障反向中斷 GCP-A/GCP-B 正常路由。 - **V10.426 111 proxy 拒絕日誌去重**: `ollama111_allow_proxy.py` 對同一來源 IP 的 reject log 預設 60 秒去重,保留 110 / 121 被擋的可觀測性,同時避免旁路 VM 持續探測時把 111 的 proxy log 與磁碟 I/O 刷高。 - **V10.425 111 fallback 使用率護欄**: Scheduler 每 15 分鐘只讀 `ai_calls` 檢查 111 Ollama fallback 使用率,預設 60 分鐘內 Ollama 呼叫 >=20、111 呼叫 >=3 且占比 >=5% 才推 Telegram,並列出 111 caller Top 5;此護欄只觀測與告警,不改路由、不寫 DB、不重啟服務,讓 111 被異常承接高負載時可即早發現。 - **V10.424 111 proxy LaunchAgent 安裝路徑穩定化**: `install_ollama111_allow_proxy.sh` 會把 proxy script 複製到 `~/.local/share/momo-pro-system/ollama111_allow_proxy.py` 後再寫入 LaunchAgent,避免 111 重啟或 iCloud repo 路徑未掛載時代理失效;同時清空舊 stderr log,讓安裝後狀態更容易判讀。 diff --git a/services/ollama_service.py b/services/ollama_service.py index ff9bb98..8c1aa7a 100644 --- a/services/ollama_service.py +++ b/services/ollama_service.py @@ -64,6 +64,7 @@ FALLBACK_111_MAX_TIMEOUT = int(os.getenv('OLLAMA_111_MAX_TIMEOUT', '20')) FALLBACK_111_NUM_CTX = int(os.getenv('OLLAMA_111_NUM_CTX', '4096')) FALLBACK_111_NUM_PREDICT = int(os.getenv('OLLAMA_111_NUM_PREDICT', '512')) FALLBACK_111_MODEL = os.getenv('OLLAMA_111_MODEL_FALLBACK', 'llama3.2:latest') +FALLBACK_111_CIRCUIT_CACHE_SEC = int(os.getenv('OLLAMA_111_CIRCUIT_CACHE_SEC', '60')) FALLBACK_111_MODEL_PATTERNS = tuple( pattern.strip().lower() for pattern in os.getenv( @@ -85,6 +86,7 @@ _resolved_host_cache: dict = {'host': None, 'ts': 0} _RESOLVE_TTL = 120 # 主機健康狀態快取 120 秒 _unhealthy_marks: dict = {} # host_url -> ts;30s 內被標記就跳過 _UNHEALTHY_TTL = 30 # 主機被標 unhealthy 後 30 秒內跳過 resolve +_fallback_111_circuit_cache: dict = {'blocked': False, 'reason': '', 'ts': 0} def mark_unhealthy(host: str) -> None: @@ -99,8 +101,7 @@ def mark_unhealthy(host: str) -> None: return _unhealthy_marks[host.rstrip('/')] = time.time() # 同時讓 resolved cache 失效,下次 resolve 才會重新評估 - _resolved_host_cache['host'] = None - _resolved_host_cache['ts'] = 0 + _clear_resolved_host_cache() logger.warning(f"[OllamaHost] 主機標記為 unhealthy(30s 跳過):{host}") @@ -119,6 +120,84 @@ def _is_111_fallback_host(host: str) -> bool: return '192.168.0.111:11434' in (host or '') +def _env_flag(name: str, default: bool = False) -> bool: + raw = os.getenv(name) + if raw is None: + return default + return str(raw).strip().lower() in {'1', 'true', 'yes', 'on'} + + +def _clear_resolved_host_cache() -> None: + _resolved_host_cache['host'] = None + _resolved_host_cache['ts'] = 0 + + +def _fallback_111_block_reason(host: str) -> Tuple[bool, str]: + """Return whether 111 fallback should be skipped for this request. + + This is checked only when the selected target is 111, so normal GCP paths + do not pay DB overhead. The circuit breaker is fail-open if DB is unavailable. + """ + if not _is_111_fallback_host(host): + return False, '' + if not _env_flag('OLLAMA_111_FALLBACK_ENABLED', True): + return True, '111 fallback disabled by OLLAMA_111_FALLBACK_ENABLED=false' + if not _env_flag('OLLAMA_111_CIRCUIT_BREAKER_ENABLED', True): + return False, '' + + import time + now = time.time() + cached_ts = float(_fallback_111_circuit_cache.get('ts') or 0) + if now - cached_ts < FALLBACK_111_CIRCUIT_CACHE_SEC: + return ( + bool(_fallback_111_circuit_cache.get('blocked')), + str(_fallback_111_circuit_cache.get('reason') or ''), + ) + + window_minutes = int(os.getenv('OLLAMA_111_CIRCUIT_WINDOW_MINUTES', '60')) + threshold_pct = float(os.getenv('OLLAMA_111_CIRCUIT_PCT', '5')) + min_total = int(os.getenv('OLLAMA_111_CIRCUIT_MIN_TOTAL', '20')) + min_111 = int(os.getenv('OLLAMA_111_CIRCUIT_MIN_111', '5')) + + try: + from sqlalchemy import text as sa_text + from database.manager import get_session + + session = get_session() + try: + row = session.execute( + sa_text(""" + SELECT + COUNT(*) FILTER ( + WHERE provider IN ('gcp_ollama','ollama_secondary','ollama_111') + ) AS total_ollama, + COUNT(*) FILTER (WHERE provider = 'ollama_111') AS host_111 + FROM ai_calls + WHERE called_at >= NOW() - (:window_minutes || ' minutes')::interval + """), + {'window_minutes': window_minutes}, + ).fetchone() + finally: + session.close() + + total_ollama = int(row[0] or 0) + host_111 = int(row[1] or 0) + rate_pct = (host_111 / total_ollama * 100.0) if total_ollama else 0.0 + blocked = total_ollama >= min_total and host_111 >= min_111 and rate_pct >= threshold_pct + reason = ( + f'111 circuit breaker active: {host_111}/{total_ollama} ' + f'ollama calls in {window_minutes}m ({rate_pct:.1f}% >= {threshold_pct:.1f}%)' + if blocked else '' + ) + except Exception as exc: + logger.debug('[Ollama111Circuit] fail-open: %s', exc, exc_info=True) + blocked = False + reason = '' + + _fallback_111_circuit_cache.update({'blocked': blocked, 'reason': reason, 'ts': now}) + return blocked, reason + + def _effective_model_for_host(model: str, host: str) -> str: """ 111 是 Mac/HDD final fallback,不承接 7B+ / vision / long-context 等模型。 @@ -433,6 +512,13 @@ class OllamaService: next_host, ) current_host = next_host + blocked_111, block_reason = _fallback_111_block_reason(current_host) + if blocked_111: + last_error = block_reason + logger.warning("[Ollama] skip 111 fallback: %s", block_reason) + if self._explicit_host is None: + _clear_resolved_host_cache() + break attempted_hosts.append(current_host) effective_model = _effective_model_for_host(model, current_host) @@ -947,6 +1033,10 @@ class OllamaService: if not allow_111_fallback and _is_111_fallback_host(host): logger.warning("[Embed] 111 fallback disabled; explicit host skipped: %s", host) return [] + blocked_111, block_reason = _fallback_111_block_reason(host) + if blocked_111: + logger.warning("[Embed] skip 111 fallback explicit host: %s", block_reason) + return [] return _embed_one(host.rstrip("/")) # HOTFIX 三主機 retry 鏈(與 generate() 同模式) @@ -980,6 +1070,11 @@ class OllamaService: next_host, ) target_host = next_host + blocked_111, block_reason = _fallback_111_block_reason(target_host) + if blocked_111: + logger.warning("[Embed] skip 111 fallback: %s", block_reason) + _clear_resolved_host_cache() + break attempted_hosts.append(target_host) vec = _embed_one(target_host) diff --git a/tests/test_ollama_retry_chain.py b/tests/test_ollama_retry_chain.py index efd4273..3e9b22b 100644 --- a/tests/test_ollama_retry_chain.py +++ b/tests/test_ollama_retry_chain.py @@ -30,10 +30,12 @@ def _reset_state(): oss._unhealthy_marks.clear() oss._resolved_host_cache['host'] = None oss._resolved_host_cache['ts'] = 0 + oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0}) yield oss._unhealthy_marks.clear() oss._resolved_host_cache['host'] = None oss._resolved_host_cache['ts'] = 0 + oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0}) # ═══════════════════════════════════════════════════════════════════════════ @@ -187,6 +189,81 @@ def test_generate_can_disable_111_fallback_for_batch_llm_work(): assert '111 fallback disabled' in (resp.error or '') +def test_generate_skips_111_when_circuit_breaker_blocks_fallback(): + """111 使用率過高時,generate 不應再把第三輪送到 111。""" + import requests + from services import ollama_service as oss + from services.ollama_service import OllamaService + + svc = OllamaService() + hosts = [ + oss.OLLAMA_HOST_PRIMARY, + oss.OLLAMA_HOST_SECONDARY, + oss.OLLAMA_HOST_PRIMARY, + ] + + def fake_111_circuit(host): + if host == oss.OLLAMA_HOST_FALLBACK: + return True, '111 circuit breaker active' + return False, '' + + with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \ + patch('services.ollama_service._fallback_111_block_reason', side_effect=fake_111_circuit), \ + patch('services.ollama_service.requests.post', + side_effect=requests.Timeout('gcp timeout')) as mock_post: + resp = svc.generate('test') + + posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list] + assert resp.success is False + assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY] + assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts + assert '111 circuit breaker active' in (resp.error or '') + + +def test_111_circuit_breaker_blocks_when_recent_share_is_high(monkeypatch): + """ai_calls 顯示 111 占比過高時,circuit breaker 回傳 blocked。""" + from services import ollama_service as oss + + class FakeResult: + def fetchone(self): + return (100, 12) + + class FakeSession: + def execute(self, *args, **kwargs): + return FakeResult() + + def close(self): + pass + + monkeypatch.setenv('OLLAMA_111_CIRCUIT_CACHE_SEC', '60') + monkeypatch.setenv('OLLAMA_111_CIRCUIT_PCT', '5') + monkeypatch.setenv('OLLAMA_111_CIRCUIT_MIN_TOTAL', '20') + monkeypatch.setenv('OLLAMA_111_CIRCUIT_MIN_111', '5') + monkeypatch.setattr('database.manager.get_session', lambda: FakeSession()) + oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0}) + + blocked, reason = oss._fallback_111_block_reason(oss.OLLAMA_HOST_FALLBACK) + + assert blocked is True + assert '111 circuit breaker active' in reason + + +def test_111_circuit_breaker_fails_open_when_db_is_unavailable(monkeypatch): + """DB 觀測失敗不可讓 Ollama fallback 全面中斷。""" + from services import ollama_service as oss + + monkeypatch.setattr( + 'database.manager.get_session', + lambda: (_ for _ in ()).throw(RuntimeError('db down')), + ) + oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0}) + + blocked, reason = oss._fallback_111_block_reason(oss.OLLAMA_HOST_FALLBACK) + + assert blocked is False + assert reason == '' + + def test_generate_token_parsing_phase13(): """Phase 13 補強:OllamaResponse 解 prompt_eval_count + eval_count""" from services.ollama_service import OllamaService