diff --git a/apps/api/src/services/ollama_failover_manager.py b/apps/api/src/services/ollama_failover_manager.py index f09a8557..dc0e7c86 100644 --- a/apps/api/src/services/ollama_failover_manager.py +++ b/apps/api/src/services/ollama_failover_manager.py @@ -316,22 +316,22 @@ class OllamaFailoverManager: now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat() - # GCP-A 健康 → 主 GCP-A + # GCP-A 健康 → 主 GCP-A,Gemini 永遠在 Ollama 鏈最後(與舊 111 行為一致) if health_gcp_a.status == HealthStatus.HEALTHY: return OllamaRoutingResult( primary=ep_gcp_a, - fallback_chain=[ep_gcp_b, ep_local], + fallback_chain=[ep_gcp_b, ep_local, _GEMINI_ENDPOINT], routing_reason="GCP-A HEALTHY → primary GCP-A", health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b, health_local=health_local, ) - # GCP-A 不健康,GCP-B 健康 → 切 GCP-B + # GCP-A 不健康,GCP-B 健康 → 切 GCP-B,Gemini 在鏈尾 if health_gcp_b.status == HealthStatus.HEALTHY: return OllamaRoutingResult( primary=ep_gcp_b, - fallback_chain=[ep_local], + fallback_chain=[ep_local, _GEMINI_ENDPOINT], routing_reason=f"GCP-A {health_gcp_a.status.value} → 切 GCP-B at {now_ts}", health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b,