承接 Wave 6/7/8 多 engineer 在 agent 限額前完成的代碼,補 commit 解 production HEAD 隱性 import error(decision_fusion 已被 decision_manager 引用但檔案 untracked)。 新增(後端核心): - decision_fusion.py (562 行) — P2.1 方法 III(OpenClaw + Hermes + Elephant 三 LLM 融合) - aiops_timeline.py + aiops_timeline_service.py — critic B4 修復 /api/v1/aiops/timeline endpoint,DB 存取抽到 service 層遵守 leWOOOgo 積木化 - migrations/p2_decision_fusion_columns.sql + rollback — approval_records fusion 欄位 修改(後端整合): - decision_manager.py — fusion 三斷鏈修補(critic B1+B2+B3): · B1: 寫 _evidence_snapshot_ref 到 token.proposal_data · B2: fusion 前計算 complexity_score 並寫 token · B3: fusion composite 寫 token.proposal_data["decision_fusion"] - auto_approve.py — fusion + consensus 認識(critic B3+B5): · composite > 0.7 → auto_execute_eligible bypass min_confidence · source=consensus_engine + score>=0.6 → 規則可信路徑 - consensus_engine.py — db-fix _save_consensus 重用 agent_sessions - governance_agent.py — db-fix _alert PG 寫入 ai_governance_events - approval_db.py — fusion 3 欄位 + 2 partial index + CheckConstraint - db/models.py — schema 對齊 migration - core/config.py — vuln #1 修復:OLLAMA_URL/_FALLBACK_URL field_validator 拒絕公網 IP + 外部域名,僅允許私網/loopback/K8s SVC 白名單 - core/feature_flags.py — P2 fusion + consensus flags - main.py — governance_agent lifespan 啟動 - failover_alerter.py — Wave8-X2: in-memory dedup fallback(Redis 拒絕後不 fail-open) - ollama_*.py — metrics 整合 + recovery 改善 - auto_repair_service.py — verifier 接線 新增(測試 2438 行): - test_decision_fusion.py / test_governance_agent.py / test_consensus_integration.py - test_p2_db_fixes.py / test_wave8_fusion_fixes.py - test_config_url_validation.py(vuln #1 12 tests) - test_failover_alerter.py +Wave8-X2 in-memory dedup 補測 驗收: 116 tests pass (decision_fusion + wave8_fusion + config_url + consensus + governance + p2_db_fixes + failover_alerter) Conflict resolution: - 3 檔(config.py + auto_approve.py + decision_manager.py)git stash pop 衝突 保留 stashed (engineer 最終版),補回 ValueError 「公網 IP」字樣對齊 test Note: 此 commit 解 production HEAD 隱性 import error 仍未修: vuln #4 prompt injection / debugger B14 quota fail-closed / B25-B26 drain_pending_tasks / B8 governance fail alert Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Co-Authored-By: Multiple Engineers (Wave 6/7/8) <noreply@anthropic.com>
233 lines
8.3 KiB
Python
233 lines
8.3 KiB
Python
"""FailoverAlerter 單元測試 — P1.5 Telegram 容災告警
|
||
|
||
四大 testcase(覆蓋 status 文件 line 99 指定範圍):
|
||
1. test_alert_failover_dedup — 同 to_provider 第二次被 10min dedup
|
||
2. test_alert_recovery_send — 正常發送 + Markdown 訊息結構
|
||
3. test_no_telegram_chat_id_noop — chat_id 缺失時不發送(fail-soft)
|
||
4. test_quota_alert_dedup_24h — quota 告警 86400s TTL(每日一次)
|
||
|
||
2026-04-26 P1.5 補測 by Claude Opus 4.7
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
from src.services.failover_alerter import (
|
||
DEDUP_TTL_SEC,
|
||
QUOTA_DEDUP_TTL_SEC,
|
||
FailoverAlerter,
|
||
configure_alerter,
|
||
get_failover_alerter,
|
||
reset_failover_alerter,
|
||
)
|
||
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def _reset_singleton():
|
||
"""每個 test 前後重置 singleton,避免 state 洩漏"""
|
||
reset_failover_alerter()
|
||
yield
|
||
reset_failover_alerter()
|
||
|
||
|
||
@pytest.fixture
|
||
def mock_redis():
|
||
"""Mock Redis:set 第一次回 True(NX 成功),第二次回 None(已存在)"""
|
||
redis = MagicMock()
|
||
redis.set = AsyncMock(side_effect=[True, None, True, None])
|
||
return redis
|
||
|
||
|
||
@pytest.fixture
|
||
def mock_telegram_send():
|
||
"""Mock TelegramGateway.send_notification + settings.OPENCLAW_TG_CHAT_ID
|
||
|
||
`_send()` 在函式內 inline import,必須 mock 來源 module 而非 alerter module。
|
||
"""
|
||
with patch("src.services.telegram_gateway.get_telegram_gateway") as mock_gw, \
|
||
patch("src.core.config.get_settings") as mock_settings:
|
||
gateway = MagicMock()
|
||
gateway.send_notification = AsyncMock()
|
||
mock_gw.return_value = gateway
|
||
mock_settings.return_value = MagicMock(OPENCLAW_TG_CHAT_ID="-100123")
|
||
yield gateway
|
||
|
||
|
||
# =============================================================================
|
||
# Case 1: failover dedup(同 to_provider 第二次被攔)
|
||
# =============================================================================
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_alert_failover_dedup(mock_redis, mock_telegram_send):
|
||
alerter = FailoverAlerter(redis_client=mock_redis)
|
||
|
||
event = {
|
||
"to_provider": "gemini",
|
||
"reason": "111 unhealthy",
|
||
"model": "qwen3:8b",
|
||
"fallback_chain_str": "gemini → ollama_188",
|
||
}
|
||
|
||
# 第 1 次:dedup pass,發送
|
||
await alerter.alert_failover(event)
|
||
assert mock_telegram_send.send_notification.await_count == 1
|
||
|
||
# 第 2 次:dedup hit,不發送
|
||
await alerter.alert_failover(event)
|
||
assert mock_telegram_send.send_notification.await_count == 1 # 仍是 1
|
||
|
||
# 驗證 dedup TTL = 10min
|
||
assert mock_redis.set.await_args_list[0].kwargs["ex"] == DEDUP_TTL_SEC
|
||
assert mock_redis.set.await_args_list[0].kwargs["nx"] is True
|
||
|
||
|
||
# =============================================================================
|
||
# Case 2: recovery 正常發送
|
||
# =============================================================================
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_alert_recovery_send(mock_redis, mock_telegram_send):
|
||
alerter = FailoverAlerter(redis_client=mock_redis)
|
||
|
||
await alerter.alert_recovery({
|
||
"from_provider": "gemini",
|
||
"to_provider": "ollama_111",
|
||
"stable_count": 3,
|
||
})
|
||
|
||
assert mock_telegram_send.send_notification.await_count == 1
|
||
sent_kwargs = mock_telegram_send.send_notification.await_args.kwargs
|
||
assert sent_kwargs["parse_mode"] == "MarkdownV2"
|
||
# 訊息應提及恢復 + 連續 3 次 HEALTHY
|
||
assert "Ollama 自動恢復" in sent_kwargs["text"]
|
||
assert "連續 3" in sent_kwargs["text"]
|
||
|
||
|
||
# =============================================================================
|
||
# Case 3: chat_id 缺失 → fail-soft(不發送,不 raise)
|
||
# =============================================================================
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_no_telegram_chat_id_noop(mock_redis):
|
||
alerter = FailoverAlerter(redis_client=mock_redis)
|
||
|
||
with patch("src.services.telegram_gateway.get_telegram_gateway") as mock_gw, \
|
||
patch("src.core.config.get_settings") as mock_settings:
|
||
gateway = MagicMock()
|
||
gateway.send_notification = AsyncMock()
|
||
mock_gw.return_value = gateway
|
||
mock_settings.return_value = MagicMock(OPENCLAW_TG_CHAT_ID=None)
|
||
|
||
# 不該 raise,dedup pass 但 send 因 chat_id 缺直接 return
|
||
await alerter.alert_failover({"to_provider": "gemini"})
|
||
assert gateway.send_notification.await_count == 0
|
||
|
||
|
||
# =============================================================================
|
||
# Case 4: quota 告警 24h dedup
|
||
# =============================================================================
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_quota_alert_dedup_24h(mock_redis, mock_telegram_send):
|
||
alerter = FailoverAlerter(redis_client=mock_redis)
|
||
|
||
await alerter.alert_gemini_quota_exceeded({
|
||
"quota": 1000,
|
||
"current_count": 1003,
|
||
})
|
||
|
||
# 訊息發出
|
||
assert mock_telegram_send.send_notification.await_count == 1
|
||
sent = mock_telegram_send.send_notification.await_args.kwargs["text"]
|
||
assert "Gemini 每日配額耗盡" in sent
|
||
assert "1000" in sent
|
||
assert "1003" in sent
|
||
|
||
# 驗證 dedup TTL = 24h
|
||
assert mock_redis.set.await_args_list[0].kwargs["ex"] == QUOTA_DEDUP_TTL_SEC
|
||
assert QUOTA_DEDUP_TTL_SEC == 86400 # sanity check 常數本身
|
||
|
||
|
||
# =============================================================================
|
||
# 額外:configure_alerter / get_failover_alerter 行為驗證
|
||
# =============================================================================
|
||
|
||
def test_configure_alerter_replaces_singleton(mock_redis):
|
||
"""configure_alerter() 應替換現有 singleton 並注入 redis"""
|
||
a1 = get_failover_alerter()
|
||
assert a1._redis is None # 預設無 redis
|
||
|
||
configure_alerter(mock_redis)
|
||
a2 = get_failover_alerter()
|
||
assert a2._redis is mock_redis # 注入後 redis 可用
|
||
assert a1 is not a2 # 是新 instance
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_dedup_fail_open_when_no_redis():
|
||
"""Redis 為 None 時 dedup 第一次應允許送出(in-memory dedup,非 fail-open 對所有次數)"""
|
||
alerter = FailoverAlerter(redis_client=None)
|
||
# 第一次:無記錄 → 允許
|
||
assert await alerter._check_dedup("any:key", ttl=600) is True
|
||
|
||
|
||
# =============================================================================
|
||
# Wave8-X2: dedup in-memory fallback 新增測試
|
||
# =============================================================================
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_dedup_redis_unavailable_uses_memory():
|
||
"""Redis 拋出例外時,in-memory dedup 仍生效(不 fail-open 狂發)
|
||
|
||
Wave8-X2 fix:原 fail-open 改為 in-memory dedup fallback。
|
||
驗證:Redis set() raise → 第二次 _check_dedup 同 key 應回 False。
|
||
"""
|
||
bad_redis = MagicMock()
|
||
bad_redis.set = AsyncMock(side_effect=ConnectionError("Redis is down"))
|
||
|
||
alerter = FailoverAlerter(redis_client=bad_redis)
|
||
|
||
key = "alert:test:dedup_memory"
|
||
ttl = 600
|
||
|
||
# 第 1 次:in-memory 無記錄 → 允許
|
||
result1 = await alerter._check_dedup(key, ttl=ttl)
|
||
assert result1 is True
|
||
|
||
# 第 2 次:in-memory 已有記錄(未過 TTL)→ 拒絕
|
||
result2 = await alerter._check_dedup(key, ttl=ttl)
|
||
assert result2 is False
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_memory_dedup_max_size_gc():
|
||
"""超過 1000 entries 時 GC 清除過期 entry,防 dict 無限成長
|
||
|
||
Wave8-X2 fix:_memory_dedup_max_size = 1000,超過時 GC。
|
||
驗證:注入 999 個已過期 entry + 1 個未過期 → GC 後 dict size 應減少。
|
||
"""
|
||
import time
|
||
|
||
alerter = FailoverAlerter(redis_client=None)
|
||
|
||
# 注入 999 個「已過期」entry(last_sent = 0.0,TTL=600s,均已過期)
|
||
for i in range(999):
|
||
alerter._memory_dedup[f"stale:key:{i}"] = 0.0 # expired: now - 0.0 > 600
|
||
|
||
# 注入 1 個「未過期」entry
|
||
alerter._memory_dedup["fresh:key"] = time.time()
|
||
|
||
# 此時 dict size = 1000,達 _memory_dedup_max_size
|
||
assert len(alerter._memory_dedup) == 1000
|
||
|
||
# 觸發 GC:新 key check 讓 len >= max_size → 清理
|
||
result = await alerter._check_dedup("trigger:gc:key", ttl=600)
|
||
assert result is True # 新 key 應被允許
|
||
|
||
# GC 後:999 個 stale entry 被清除,只剩 fresh:key + trigger:gc:key
|
||
assert len(alerter._memory_dedup) <= 3 # fresh + trigger + 可能有邊界差1
|