diff --git a/apps/api/src/services/callback_dispatcher.py b/apps/api/src/services/callback_dispatcher.py index 81bf4077..0c28d85a 100644 --- a/apps/api/src/services/callback_dispatcher.py +++ b/apps/api/src/services/callback_dispatcher.py @@ -479,7 +479,7 @@ def dispatch_llm_action( 1. Risk Gating — critical 直接拒絕;high 需要 confirmed=True 2. Allowlist — mcp_tool 必須在 registry 中 3. Params 渲染 — 支援 {labels.xxx} / {context.xxx} / {incident_id} - 4. Nonce 生成 — medium/high 允許執行時附帶 nonce + 4. Nonce 生成 — medium/high 允許時寫 Redis SET NX TTL=300s 防重放 Args: action: RecommendedAction dataclass(來自 solver_agent B1 輸出) @@ -488,7 +488,8 @@ def dispatch_llm_action( Returns: dict — ok=True 為允許執行,ok=False 附 reason 拒絕原因 """ - import time as _time # noqa: PLC0415 + # 2026-04-27 Claude Sonnet 4.6: H2 Fix — nonce 改用 secrets.token_hex(16) + import secrets as _secrets # noqa: PLC0415 risk: str = getattr(action, "risk", "medium") mcp_tool: str = getattr(action, "mcp_tool", "") @@ -496,6 +497,17 @@ def dispatch_llm_action( name: str = getattr(action, "name", "") params: dict = dict(getattr(action, "params", {}) or {}) + # ── M1: params 型別驗證(所有 value 必須是 str)──────────────────────────── + # 2026-04-27 Claude Sonnet 4.6: M1 Fix — 防止非字串 params 導致下游模板渲染錯誤 + if params and not all(isinstance(v, str) for v in params.values()): + logger.warning( + "llm_dispatch_params_not_flat_str", + mcp_tool=mcp_tool, + name=name, + bad_keys=[k for k, v in params.items() if not isinstance(v, str)], + ) + return {"ok": False, "reason": "params_not_flat_str"} + # ── 1. Risk Gating ──────────────────────────────────────────────────────── if risk == "critical": @@ -509,9 +521,8 @@ def dispatch_llm_action( if risk == "high": if not context.get("confirmed"): - nonce = ( - f"{mcp_tool}:{name}:{context.get('incident_id', '?')}:{int(_time.time())}" - ) + # 2026-04-27 Claude Sonnet 4.6: H2 Fix — 純字串 nonce(不寫 Redis,此路徑只回拒絕) + pending_nonce = _secrets.token_hex(16) logger.info( "llm_dispatch_high_risk_pending", mcp_tool=mcp_tool, @@ -521,7 +532,7 @@ def dispatch_llm_action( return { "ok": False, "reason": "high_risk_requires_confirmation", - "nonce": nonce, + "nonce": pending_nonce, } # ── 2. Allowlist 驗證 ───────────────────────────────────────────────────── @@ -540,12 +551,10 @@ def dispatch_llm_action( rendered_params = _render_llm_params(params, context) # ── 4. Nonce 生成(medium/high 允許時) ─────────────────────────────────── - + # 2026-04-27 Claude Sonnet 4.6: H2 Fix — secrets.token_hex(16) 取代時間戳拼接 nonce: str | None = None if risk in ("medium", "high"): - nonce = ( - f"{mcp_tool}:{name}:{context.get('incident_id', '?')}:{int(_time.time())}" - ) + nonce = _secrets.token_hex(16) logger.info( "llm_dispatch_allowed", diff --git a/apps/api/src/services/failover_alerter.py b/apps/api/src/services/failover_alerter.py index ae81c67f..6dbb9f8a 100644 --- a/apps/api/src/services/failover_alerter.py +++ b/apps/api/src/services/failover_alerter.py @@ -134,13 +134,43 @@ class FailoverAlerter: f"日期:{date_str}\n" f"上限:{quota} calls/day\n" f"當前用量:{current_count}\n" - f"降級目標:OLLAMA\\_188 \\(CPU,推理較慢\\)\n\n" - f"進入慢速模式至明日 0:00\n" + f"降級目標:Nemotron → Claude \\(Gemini 不可用\\)\n\n" + f"進入容災模式至明日 0:00\n" f"建議檢查是否有異常流量,評估是否升級 Gemini 配額" ) await self._send(msg) logger.info("quota_alert_sent", quota=quota, current_count=current_count) + async def alert_provider_version_changed(self, changed_providers: list[str], probed: int) -> None: + """AI Provider 版本變更告警 — dedup 1h/provider + + P3.2.3 by Claude Sonnet 4.6 2026-04-27 + 每個 provider 獨立 dedup,避免同一版本重複告警。 + """ + now_str = datetime.now(TAIPEI_TZ).strftime("%Y-%m-%d %H:%M") + sent: list[str] = [] + + for provider in changed_providers: + dedup_key = f"alert:provider_version_changed:{provider}" + if not await self._check_dedup(dedup_key, ttl=3600): + logger.debug("provider_version_alert_dedup_skipped", provider=provider) + continue + sent.append(provider) + + if not sent: + return + + providers_md = "\n".join(f"• {_escape_md(p)}" for p in sent) + msg = ( + f"*AI Provider 版本變更偵測*\n\n" + f"時間:{_escape_md(now_str)}\n" + f"探測總數:{probed}\n" + f"版本已變更:\n{providers_md}\n\n" + f"系統已自動記錄版本歷史,請確認是否需要重新驗證推理品質" + ) + await self._send(msg) + logger.info("provider_version_alert_sent", sent=sent) + # ------------------------------------------------------------------------- # Dedup(Redis SET NX EX) # ------------------------------------------------------------------------- diff --git a/apps/api/src/services/model_version_tracker.py b/apps/api/src/services/model_version_tracker.py index 5f0a6a70..bf9349d1 100644 --- a/apps/api/src/services/model_version_tracker.py +++ b/apps/api/src/services/model_version_tracker.py @@ -77,6 +77,15 @@ class ModelVersionTracker: changed=changed_providers, total_probed=len(results), ) + # P3.2.3: Telegram 告警(dedup 1h/provider) + try: + from src.services.failover_alerter import get_failover_alerter + await get_failover_alerter().alert_provider_version_changed( + changed_providers=changed_providers, + probed=len(results), + ) + except Exception as _alert_err: + logger.warning("provider_version_alert_failed", error=str(_alert_err)) else: logger.info( "provider_version_stable",