Files
awoooi/apps/api/tests/test_failover_alerter.py
Your Name cc547736ab feat(wave6-8): P2.1 fusion + P2.2 governance + P2.4 consensus + Wave 7/8 BLOCKER 修復
承接 Wave 6/7/8 多 engineer 在 agent 限額前完成的代碼,補 commit 解 production
HEAD 隱性 import error(decision_fusion 已被 decision_manager 引用但檔案 untracked)。

新增(後端核心):
- decision_fusion.py (562 行) — P2.1 方法 III(OpenClaw + Hermes + Elephant 三 LLM 融合)
- aiops_timeline.py + aiops_timeline_service.py — critic B4 修復
  /api/v1/aiops/timeline endpoint,DB 存取抽到 service 層遵守 leWOOOgo 積木化
- migrations/p2_decision_fusion_columns.sql + rollback — approval_records fusion 欄位

修改(後端整合):
- decision_manager.py — fusion 三斷鏈修補(critic B1+B2+B3):
  · B1: 寫 _evidence_snapshot_ref 到 token.proposal_data
  · B2: fusion 前計算 complexity_score 並寫 token
  · B3: fusion composite 寫 token.proposal_data["decision_fusion"]
- auto_approve.py — fusion + consensus 認識(critic B3+B5):
  · composite > 0.7 → auto_execute_eligible bypass min_confidence
  · source=consensus_engine + score>=0.6 → 規則可信路徑
- consensus_engine.py — db-fix _save_consensus 重用 agent_sessions
- governance_agent.py — db-fix _alert PG 寫入 ai_governance_events
- approval_db.py — fusion 3 欄位 + 2 partial index + CheckConstraint
- db/models.py — schema 對齊 migration
- core/config.py — vuln #1 修復:OLLAMA_URL/_FALLBACK_URL field_validator
  拒絕公網 IP + 外部域名,僅允許私網/loopback/K8s SVC 白名單
- core/feature_flags.py — P2 fusion + consensus flags
- main.py — governance_agent lifespan 啟動
- failover_alerter.py — Wave8-X2: in-memory dedup fallback(Redis 拒絕後不 fail-open)
- ollama_*.py — metrics 整合 + recovery 改善
- auto_repair_service.py — verifier 接線

新增(測試 2438 行):
- test_decision_fusion.py / test_governance_agent.py / test_consensus_integration.py
- test_p2_db_fixes.py / test_wave8_fusion_fixes.py
- test_config_url_validation.py(vuln #1 12 tests)
- test_failover_alerter.py +Wave8-X2 in-memory dedup 補測

驗收: 116 tests pass (decision_fusion + wave8_fusion + config_url + consensus +
                      governance + p2_db_fixes + failover_alerter)

Conflict resolution:
- 3 檔(config.py + auto_approve.py + decision_manager.py)git stash pop 衝突
  保留 stashed (engineer 最終版),補回 ValueError 「公網 IP」字樣對齊 test

Note: 此 commit 解 production HEAD 隱性 import error
仍未修: vuln #4 prompt injection / debugger B14 quota fail-closed
       / B25-B26 drain_pending_tasks / B8 governance fail alert

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Multiple Engineers (Wave 6/7/8) <noreply@anthropic.com>
2026-04-27 08:11:40 +08:00

233 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""FailoverAlerter 單元測試 — P1.5 Telegram 容災告警
四大 testcase覆蓋 status 文件 line 99 指定範圍):
1. test_alert_failover_dedup — 同 to_provider 第二次被 10min dedup
2. test_alert_recovery_send — 正常發送 + Markdown 訊息結構
3. test_no_telegram_chat_id_noop — chat_id 缺失時不發送fail-soft
4. test_quota_alert_dedup_24h — quota 告警 86400s TTL每日一次
2026-04-26 P1.5 補測 by Claude Opus 4.7
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.services.failover_alerter import (
DEDUP_TTL_SEC,
QUOTA_DEDUP_TTL_SEC,
FailoverAlerter,
configure_alerter,
get_failover_alerter,
reset_failover_alerter,
)
@pytest.fixture(autouse=True)
def _reset_singleton():
"""每個 test 前後重置 singleton避免 state 洩漏"""
reset_failover_alerter()
yield
reset_failover_alerter()
@pytest.fixture
def mock_redis():
"""Mock Redisset 第一次回 TrueNX 成功),第二次回 None已存在"""
redis = MagicMock()
redis.set = AsyncMock(side_effect=[True, None, True, None])
return redis
@pytest.fixture
def mock_telegram_send():
"""Mock TelegramGateway.send_notification + settings.OPENCLAW_TG_CHAT_ID
`_send()` 在函式內 inline import必須 mock 來源 module 而非 alerter module。
"""
with patch("src.services.telegram_gateway.get_telegram_gateway") as mock_gw, \
patch("src.core.config.get_settings") as mock_settings:
gateway = MagicMock()
gateway.send_notification = AsyncMock()
mock_gw.return_value = gateway
mock_settings.return_value = MagicMock(OPENCLAW_TG_CHAT_ID="-100123")
yield gateway
# =============================================================================
# Case 1: failover dedup同 to_provider 第二次被攔)
# =============================================================================
@pytest.mark.asyncio
async def test_alert_failover_dedup(mock_redis, mock_telegram_send):
alerter = FailoverAlerter(redis_client=mock_redis)
event = {
"to_provider": "gemini",
"reason": "111 unhealthy",
"model": "qwen3:8b",
"fallback_chain_str": "gemini → ollama_188",
}
# 第 1 次dedup pass發送
await alerter.alert_failover(event)
assert mock_telegram_send.send_notification.await_count == 1
# 第 2 次dedup hit不發送
await alerter.alert_failover(event)
assert mock_telegram_send.send_notification.await_count == 1 # 仍是 1
# 驗證 dedup TTL = 10min
assert mock_redis.set.await_args_list[0].kwargs["ex"] == DEDUP_TTL_SEC
assert mock_redis.set.await_args_list[0].kwargs["nx"] is True
# =============================================================================
# Case 2: recovery 正常發送
# =============================================================================
@pytest.mark.asyncio
async def test_alert_recovery_send(mock_redis, mock_telegram_send):
alerter = FailoverAlerter(redis_client=mock_redis)
await alerter.alert_recovery({
"from_provider": "gemini",
"to_provider": "ollama_111",
"stable_count": 3,
})
assert mock_telegram_send.send_notification.await_count == 1
sent_kwargs = mock_telegram_send.send_notification.await_args.kwargs
assert sent_kwargs["parse_mode"] == "MarkdownV2"
# 訊息應提及恢復 + 連續 3 次 HEALTHY
assert "Ollama 自動恢復" in sent_kwargs["text"]
assert "連續 3" in sent_kwargs["text"]
# =============================================================================
# Case 3: chat_id 缺失 → fail-soft不發送不 raise
# =============================================================================
@pytest.mark.asyncio
async def test_no_telegram_chat_id_noop(mock_redis):
alerter = FailoverAlerter(redis_client=mock_redis)
with patch("src.services.telegram_gateway.get_telegram_gateway") as mock_gw, \
patch("src.core.config.get_settings") as mock_settings:
gateway = MagicMock()
gateway.send_notification = AsyncMock()
mock_gw.return_value = gateway
mock_settings.return_value = MagicMock(OPENCLAW_TG_CHAT_ID=None)
# 不該 raisededup pass 但 send 因 chat_id 缺直接 return
await alerter.alert_failover({"to_provider": "gemini"})
assert gateway.send_notification.await_count == 0
# =============================================================================
# Case 4: quota 告警 24h dedup
# =============================================================================
@pytest.mark.asyncio
async def test_quota_alert_dedup_24h(mock_redis, mock_telegram_send):
alerter = FailoverAlerter(redis_client=mock_redis)
await alerter.alert_gemini_quota_exceeded({
"quota": 1000,
"current_count": 1003,
})
# 訊息發出
assert mock_telegram_send.send_notification.await_count == 1
sent = mock_telegram_send.send_notification.await_args.kwargs["text"]
assert "Gemini 每日配額耗盡" in sent
assert "1000" in sent
assert "1003" in sent
# 驗證 dedup TTL = 24h
assert mock_redis.set.await_args_list[0].kwargs["ex"] == QUOTA_DEDUP_TTL_SEC
assert QUOTA_DEDUP_TTL_SEC == 86400 # sanity check 常數本身
# =============================================================================
# 額外configure_alerter / get_failover_alerter 行為驗證
# =============================================================================
def test_configure_alerter_replaces_singleton(mock_redis):
"""configure_alerter() 應替換現有 singleton 並注入 redis"""
a1 = get_failover_alerter()
assert a1._redis is None # 預設無 redis
configure_alerter(mock_redis)
a2 = get_failover_alerter()
assert a2._redis is mock_redis # 注入後 redis 可用
assert a1 is not a2 # 是新 instance
@pytest.mark.asyncio
async def test_dedup_fail_open_when_no_redis():
"""Redis 為 None 時 dedup 第一次應允許送出in-memory dedup非 fail-open 對所有次數)"""
alerter = FailoverAlerter(redis_client=None)
# 第一次:無記錄 → 允許
assert await alerter._check_dedup("any:key", ttl=600) is True
# =============================================================================
# Wave8-X2: dedup in-memory fallback 新增測試
# =============================================================================
@pytest.mark.asyncio
async def test_dedup_redis_unavailable_uses_memory():
"""Redis 拋出例外時in-memory dedup 仍生效(不 fail-open 狂發)
Wave8-X2 fix原 fail-open 改為 in-memory dedup fallback。
驗證Redis set() raise → 第二次 _check_dedup 同 key 應回 False。
"""
bad_redis = MagicMock()
bad_redis.set = AsyncMock(side_effect=ConnectionError("Redis is down"))
alerter = FailoverAlerter(redis_client=bad_redis)
key = "alert:test:dedup_memory"
ttl = 600
# 第 1 次in-memory 無記錄 → 允許
result1 = await alerter._check_dedup(key, ttl=ttl)
assert result1 is True
# 第 2 次in-memory 已有記錄(未過 TTL→ 拒絕
result2 = await alerter._check_dedup(key, ttl=ttl)
assert result2 is False
@pytest.mark.asyncio
async def test_memory_dedup_max_size_gc():
"""超過 1000 entries 時 GC 清除過期 entry防 dict 無限成長
Wave8-X2 fix_memory_dedup_max_size = 1000超過時 GC。
驗證:注入 999 個已過期 entry + 1 個未過期 → GC 後 dict size 應減少。
"""
import time
alerter = FailoverAlerter(redis_client=None)
# 注入 999 個「已過期」entrylast_sent = 0.0TTL=600s均已過期
for i in range(999):
alerter._memory_dedup[f"stale:key:{i}"] = 0.0 # expired: now - 0.0 > 600
# 注入 1 個「未過期」entry
alerter._memory_dedup["fresh:key"] = time.time()
# 此時 dict size = 1000達 _memory_dedup_max_size
assert len(alerter._memory_dedup) == 1000
# 觸發 GC新 key check 讓 len >= max_size → 清理
result = await alerter._check_dedup("trigger:gc:key", ttl=600)
assert result is True # 新 key 應被允許
# GC 後999 個 stale entry 被清除,只剩 fresh:key + trigger:gc:key
assert len(alerter._memory_dedup) <= 3 # fresh + trigger + 可能有邊界差1