"""FailoverAlerter 單元測試 — P1.5 Telegram 容災告警 四大 testcase(覆蓋 status 文件 line 99 指定範圍): 1. test_alert_failover_dedup — 同 to_provider 第二次被 10min dedup 2. test_alert_recovery_send — 正常發送 + Markdown 訊息結構 3. test_no_telegram_chat_id_noop — chat_id 缺失時不發送(fail-soft) 4. test_quota_alert_dedup_24h — quota 告警 86400s TTL(每日一次) 2026-04-26 P1.5 補測 by Claude Opus 4.7 """ from __future__ import annotations from unittest.mock import AsyncMock, MagicMock, patch import pytest from src.services.failover_alerter import ( DEDUP_TTL_SEC, QUOTA_DEDUP_TTL_SEC, FailoverAlerter, _lines_from_list, _sanitize_telegram_error, configure_alerter, format_governance_alert_card, get_failover_alerter, reset_failover_alerter, ) @pytest.fixture(autouse=True) def _reset_singleton(): """每個 test 前後重置 singleton,避免 state 洩漏""" reset_failover_alerter() yield reset_failover_alerter() @pytest.fixture def mock_redis(): """Mock Redis:set 第一次回 True(NX 成功),第二次回 None(已存在)""" redis = MagicMock() redis.set = AsyncMock(side_effect=[True, None, True, None]) return redis @pytest.fixture def mock_telegram_send(): """Mock TelegramGateway.send_alert_notification + settings.SRE_GROUP_CHAT_ID `_send()` 在函式內 inline import,必須 mock 來源 module 而非 alerter module。 """ with patch("src.services.telegram_gateway.get_telegram_gateway") as mock_gw, \ patch("src.core.config.get_settings") as mock_settings: gateway = MagicMock() gateway.send_alert_notification = AsyncMock() mock_gw.return_value = gateway mock_settings.return_value = MagicMock(SRE_GROUP_CHAT_ID="-100123", OPENCLAW_TG_CHAT_ID="-100456") yield gateway # ============================================================================= # Case 1: failover dedup(同 to_provider 第二次被攔) # ============================================================================= @pytest.mark.asyncio async def test_alert_failover_dedup(mock_redis, mock_telegram_send): alerter = FailoverAlerter(redis_client=mock_redis) event = { "to_provider": "gemini", "reason": "111 unhealthy", "model": "qwen3:8b", "fallback_chain_str": "gemini → ollama_local", } # 第 1 次:dedup pass,發送 await alerter.alert_failover(event) assert mock_telegram_send.send_alert_notification.await_count == 1 # 第 2 次:dedup hit,不發送 await alerter.alert_failover(event) assert mock_telegram_send.send_alert_notification.await_count == 1 # 仍是 1 # 驗證 dedup TTL = 10min assert mock_redis.set.await_args_list[0].kwargs["ex"] == DEDUP_TTL_SEC assert mock_redis.set.await_args_list[0].kwargs["nx"] is True # ============================================================================= # Case 2: recovery 正常發送 # ============================================================================= @pytest.mark.asyncio async def test_alert_recovery_send(mock_redis, mock_telegram_send): alerter = FailoverAlerter(redis_client=mock_redis) await alerter.alert_recovery({ "from_provider": "gemini", "to_provider": "ollama_111", "stable_count": 3, }) assert mock_telegram_send.send_alert_notification.await_count == 1 sent_kwargs = mock_telegram_send.send_alert_notification.await_args.kwargs assert sent_kwargs["parse_mode"] == "MarkdownV2" # 訊息應提及恢復 + 連續 3 次 HEALTHY assert "Ollama 自動恢復" in sent_kwargs["text"] assert "連續 3" in sent_kwargs["text"] # ============================================================================= # Case 3: chat_id 缺失 → fail-soft(不發送,不 raise) # ============================================================================= @pytest.mark.asyncio async def test_no_telegram_chat_id_noop(mock_redis): alerter = FailoverAlerter(redis_client=mock_redis) with patch("src.services.telegram_gateway.get_telegram_gateway") as mock_gw, \ patch("src.core.config.get_settings") as mock_settings: gateway = MagicMock() gateway.send_alert_notification = AsyncMock() mock_gw.return_value = gateway mock_settings.return_value = MagicMock(SRE_GROUP_CHAT_ID=None, OPENCLAW_TG_CHAT_ID=None) # 不該 raise,dedup pass 但 send 因 chat_id 缺直接 return await alerter.alert_failover({"to_provider": "gemini"}) assert gateway.send_alert_notification.await_count == 0 # ============================================================================= # Case 4: quota 告警 24h dedup # ============================================================================= @pytest.mark.asyncio async def test_quota_alert_dedup_24h(mock_redis, mock_telegram_send): alerter = FailoverAlerter(redis_client=mock_redis) await alerter.alert_gemini_quota_exceeded({ "quota": 1000, "current_count": 1003, }) # 訊息發出 assert mock_telegram_send.send_alert_notification.await_count == 1 sent = mock_telegram_send.send_alert_notification.await_args.kwargs["text"] assert "Gemini 每日配額耗盡" in sent assert "1000" in sent assert "1003" in sent assert "\\-" in sent # MarkdownV2 date hyphens must be escaped # 驗證 dedup TTL = 24h assert mock_redis.set.await_args_list[0].kwargs["ex"] == QUOTA_DEDUP_TTL_SEC assert QUOTA_DEDUP_TTL_SEC == 86400 # sanity check 常數本身 # ============================================================================= # 額外:configure_alerter / get_failover_alerter 行為驗證 # ============================================================================= def test_configure_alerter_replaces_singleton(mock_redis): """configure_alerter() 應替換現有 singleton 並注入 redis""" a1 = get_failover_alerter() assert a1._redis is None # 預設無 redis configure_alerter(mock_redis) a2 = get_failover_alerter() assert a2._redis is mock_redis # 注入後 redis 可用 assert a1 is not a2 # 是新 instance @pytest.mark.asyncio async def test_dedup_fail_open_when_no_redis(): """Redis 為 None 時 dedup 第一次應允許送出(in-memory dedup,非 fail-open 對所有次數)""" alerter = FailoverAlerter(redis_client=None) # 第一次:無記錄 → 允許 assert await alerter._check_dedup("any:key", ttl=600) is True # ============================================================================= # Wave8-X2: dedup in-memory fallback 新增測試 # ============================================================================= @pytest.mark.asyncio async def test_dedup_redis_unavailable_uses_memory(): """Redis 拋出例外時,in-memory dedup 仍生效(不 fail-open 狂發) Wave8-X2 fix:原 fail-open 改為 in-memory dedup fallback。 驗證:Redis set() raise → 第二次 _check_dedup 同 key 應回 False。 """ bad_redis = MagicMock() bad_redis.set = AsyncMock(side_effect=ConnectionError("Redis is down")) alerter = FailoverAlerter(redis_client=bad_redis) key = "alert:test:dedup_memory" ttl = 600 # 第 1 次:in-memory 無記錄 → 允許 result1 = await alerter._check_dedup(key, ttl=ttl) assert result1 is True # 第 2 次:in-memory 已有記錄(未過 TTL)→ 拒絕 result2 = await alerter._check_dedup(key, ttl=ttl) assert result2 is False @pytest.mark.asyncio async def test_memory_dedup_max_size_gc(): """超過 1000 entries 時 GC 清除過期 entry,防 dict 無限成長 Wave8-X2 fix:_memory_dedup_max_size = 1000,超過時 GC。 驗證:注入 999 個已過期 entry + 1 個未過期 → GC 後 dict size 應減少。 """ import time alerter = FailoverAlerter(redis_client=None) # 注入 999 個「已過期」entry(last_sent = 0.0,TTL=600s,均已過期) for i in range(999): alerter._memory_dedup[f"stale:key:{i}"] = 0.0 # expired: now - 0.0 > 600 # 注入 1 個「未過期」entry alerter._memory_dedup["fresh:key"] = time.time() # 此時 dict size = 1000,達 _memory_dedup_max_size assert len(alerter._memory_dedup) == 1000 # 觸發 GC:新 key check 讓 len >= max_size → 清理 result = await alerter._check_dedup("trigger:gc:key", ttl=600) assert result is True # 新 key 應被允許 # GC 後:999 個 stale entry 被清除,只剩 fresh:key + trigger:gc:key assert len(alerter._memory_dedup) <= 3 # fresh + trigger + 可能有邊界差1 def test_lines_from_list_escapes_markdown_v2_numbered_periods() -> None: rendered = _lines_from_list(["修復 node-exporter-110"]) assert "1\\." in rendered assert "node\\-exporter\\-110" in rendered def test_sanitize_telegram_error_redacts_bot_token_url() -> None: raw = "HTTP error for https://api.telegram.org/bot123456:SECRET/sendMessage" sanitized = _sanitize_telegram_error(raw) assert "SECRET" not in sanitized assert "bot" in sanitized def test_governance_alert_card_formats_knowledge_degradation() -> None: card = format_governance_alert_card( "knowledge_degradation", { "status": "warning", "impact": { "stale_count": 948, "stale_days": 7, "stale_ratio": 0.521, "threshold": 0.2, "total_count": 1819, }, "remediation": { "items": [ "啟動 KM 反查與自動補齊流程", "關鍵服務告警自動同步到 KM 任務", ], "next_action": "run_kb_growth_healthcheck", }, "actionable": { "items": [ "每日檢查 ANTI_PATTERN 更新結果", "安排 owner 對 stale 條目做快速人工審核", ] }, }, ) assert "*AI 治理警報|KM 需要更新" in card assert "💬 *白話說明*" in card assert "🧩 *AI 流程狀態*" in card assert "👥 *負責分工*" in card assert "主責:Hermes" in card assert "OpenClaw:提供告警分類" in card assert "ElephantAlpha:read\\-only 稽核" in card assert "人工覆核:KM owner / SRE owner" in card assert "✅ *現在要做*" in card assert "queued\\_kb\\_healthcheck" in card assert "AwoooP Work Items" in card assert "🧭 *影響摘要*" in card assert "陳舊 KM:948" in card assert "陳舊比例:52\\.1%" in card assert "▶️ 下一步:run\\_kb\\_growth\\_healthcheck" in card assert "📎 *補充欄位*" not in card assert "欄位快覽" not in card def test_governance_alert_card_accepts_legacy_knowledge_degradation_payload() -> None: card = format_governance_alert_card( "knowledge_degradation", { "status": "warning", "stale_count": 1425, "total": 1856, "stale_ratio": 0.768, "threshold": 0.2, "stale_days": 7, "remediation": [ "啟動 KM 反查與自動補齊流程", "關鍵服務告警自動同步到 KM 任務", ], "next_step": "run_kb_growth_healthcheck", "automatable_work": [ "每日檢查 ANTI_PATTERN 更新結果", "安排至少 2 位 owner 對 stale 條目做快速人工審核", ], }, ) assert "1425 / 1856 筆 KM" in card assert "陳舊 KM:1425" in card assert "總 KM:1856" in card assert "陳舊比例:76\\.8%" in card assert "主責:Hermes" in card assert "人工覆核:KM owner / SRE owner" in card assert "▶️ 下一步:run\\_kb\\_growth\\_healthcheck" in card assert "每日檢查 ANTI\\_PATTERN 更新結果" in card assert "📎 *補充欄位*" not in card assert "? / ?" not in card def test_governance_alert_card_limits_fallback_fields() -> None: card = format_governance_alert_card( "custom_signal", { "status": "warning", "field_a": "a", "field_b": "b", "field_c": "c", "field_d": "d", "field_e": "e", }, ) assert "📎 *補充欄位*" in card assert "更多欄位已收斂至 AwoooP 稽核資料" in card assert "field\\_e" not in card