Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 1m59s
Wave 8 P3.1-T2 PathA 啟用 + Solver F4 安全強化 + test 對齊:
PathA — DiagnosisAggregator 信號分類層補 PDI:
- ENABLE_DIAGNOSIS_AGGREGATOR default=False → True
· PathA 純信號分類層(OOMKilled/CrashLoop 等業務邏輯)
· 不重複呼叫 K8s/SignOz API(只取 PDI 已收集的 raw 資料)
· 安全 default on — 純邏輯處理,無外部依賴重疊
- diagnosis_aggregator.py +155 行(PathA 實作)
- pre_decision_investigator.py 已接 (commit 3a2cd151)
F4 — Solver critical risk reject:
- solver_agent.py: _validate_recommended_action 拒絕 risk=critical
· 鐵律:critical 動作必須走人工審批,不可變 Telegram 按鈕
· log warning + return None(被 _extract 過濾掉)
- _extract_recommended_actions 改返回 (list, status_str) tuple
· status="ok"/"empty"/"all_invalid" 供呼叫端決策
- protocol.py +16 / metrics.py +9 / ai_router.py +18 — 配套 metric + protocol field
測試對齊:
- test_solver_recommended_actions.py 拆 test_all_valid → low/medium/high accepted +
test_critical_rejected
- result tuple unpack: result, _ = _extract_recommended_actions(...)
- test_diagnosis_aggregator_stub.py: feature flag default 改 True 對齊 PathA
Tests: 51 passed (solver 28 + aggregator 16 + router fallback 8)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Multiple Engineers (Wave 8 P3.1-T2 PathA + F4) <noreply@anthropic.com>
277 lines
13 KiB
Python
277 lines
13 KiB
Python
"""
|
||
DiagnosisAggregator 路徑 A 整合測試
|
||
==========================================
|
||
P3.1-T2-PathA by Claude 2026-04-27 — DiagAggregator 信號分類層補 PDI
|
||
|
||
路徑 A 策略:ENABLE_DIAGNOSIS_AGGREGATOR=True(預設啟用)
|
||
DA 只取 PDI 已收集的 raw 資料做信號分類,不重複呼叫 K8s/SignOz API。
|
||
|
||
驗證:
|
||
1. ENABLE_DIAGNOSIS_AGGREGATOR=True(路徑 A 預設啟用)
|
||
2. _collect_diagnosis_aggregator 走 classify_signals_from_raw(不打外部 API)
|
||
3. aggregator 呼叫失敗時不影響主路徑(exception 隔離)
|
||
4. EvidenceSnapshot.extra_diagnosis 為 dict 結構化資料
|
||
5. build_summary() 包含 Signal Classification 區塊
|
||
6. DiagnosisAggregator.classify_signals_from_raw 純邏輯無 IO
|
||
|
||
注意:不依賴真實 K8s/SignOz — 全 mock 測試
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import pytest
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Test: EvidenceSnapshot.extra_diagnosis 欄位
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
class TestEvidenceSnapshotExtraDiagnosis:
|
||
def test_extra_diagnosis_field_exists(self):
|
||
"""EvidenceSnapshot 應有 extra_diagnosis 欄位,預設 None,型別為 dict | None"""
|
||
from src.services.evidence_snapshot import EvidenceSnapshot
|
||
snap = EvidenceSnapshot(incident_id="INC-001")
|
||
assert hasattr(snap, "extra_diagnosis")
|
||
assert snap.extra_diagnosis is None
|
||
|
||
def test_build_summary_includes_signal_classification(self):
|
||
"""extra_diagnosis 有 signals 時 build_summary 應包含 Signal Classification"""
|
||
from src.services.evidence_snapshot import EvidenceSnapshot
|
||
snap = EvidenceSnapshot(incident_id="INC-001")
|
||
snap.extra_diagnosis = {
|
||
"signal_count": 2,
|
||
"signals": [
|
||
{"signal_type": "crash_loop", "severity": "critical", "message": "CrashLoopBackOff"},
|
||
{"signal_type": "oom_killed", "severity": "critical", "message": "OOMKilled"},
|
||
],
|
||
}
|
||
summary = snap.build_summary()
|
||
assert "Signal Classification" in summary
|
||
assert "crash_loop" in summary
|
||
|
||
def test_build_summary_no_extra_diagnosis_no_section(self):
|
||
"""extra_diagnosis=None 時 build_summary 不應包含 Signal Classification"""
|
||
from src.services.evidence_snapshot import EvidenceSnapshot
|
||
snap = EvidenceSnapshot(incident_id="INC-001")
|
||
snap.extra_diagnosis = None
|
||
summary = snap.build_summary()
|
||
assert "Signal Classification" not in summary
|
||
|
||
def test_build_summary_empty_signals_no_section(self):
|
||
"""extra_diagnosis signals=[] 時 build_summary 不應包含 Signal Classification"""
|
||
from src.services.evidence_snapshot import EvidenceSnapshot
|
||
snap = EvidenceSnapshot(incident_id="INC-001")
|
||
snap.extra_diagnosis = {"signal_count": 0, "signals": []}
|
||
summary = snap.build_summary()
|
||
assert "Signal Classification" not in summary
|
||
|
||
def test_extra_diagnosis_not_persisted_to_db_record(self):
|
||
"""extra_diagnosis 是 in-memory only,save() 不應包含此欄位到 DB model"""
|
||
from src.services.evidence_snapshot import EvidenceSnapshot
|
||
from src.db.models import IncidentEvidence
|
||
# 確認 DB model 沒有 extra_diagnosis 欄位(in-memory only 設計)
|
||
assert not hasattr(IncidentEvidence, "extra_diagnosis"), \
|
||
"extra_diagnosis 應為 in-memory only,不應存在於 DB model"
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Test: ENABLE_DIAGNOSIS_AGGREGATOR feature flag
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
class TestDiagnosisAggregatorFeatureFlag:
|
||
def test_feature_flag_exists_in_settings(self):
|
||
"""config.py 應有 ENABLE_DIAGNOSIS_AGGREGATOR 欄位,路徑 A 預設 True"""
|
||
from src.core.config import settings
|
||
assert hasattr(settings, "ENABLE_DIAGNOSIS_AGGREGATOR")
|
||
# 路徑 A 啟用:預設 True
|
||
assert settings.ENABLE_DIAGNOSIS_AGGREGATOR is True
|
||
|
||
def test_feature_flag_default_true(self):
|
||
"""直接從 Settings class 確認路徑 A 預設值為 True"""
|
||
from src.core.config import Settings
|
||
import inspect
|
||
source = inspect.getsource(Settings)
|
||
assert "ENABLE_DIAGNOSIS_AGGREGATOR" in source
|
||
assert "default=True" in source
|
||
|
||
def test_aggregator_guarded_by_flag_in_investigate(self):
|
||
"""investigate() 4.6 區塊有 ENABLE_DIAGNOSIS_AGGREGATOR flag 守門(source inspection)"""
|
||
import inspect
|
||
from src.services.pre_decision_investigator import PreDecisionInvestigator
|
||
source = inspect.getsource(PreDecisionInvestigator.investigate)
|
||
assert "ENABLE_DIAGNOSIS_AGGREGATOR" in source, \
|
||
"investigate() 應有 ENABLE_DIAGNOSIS_AGGREGATOR feature flag 守門"
|
||
assert "_collect_diagnosis_aggregator" in source, \
|
||
"investigate() 應呼叫 _collect_diagnosis_aggregator"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_collect_diagnosis_aggregator_uses_classify_signals_from_raw(self):
|
||
"""路徑 A:_collect_diagnosis_aggregator 應呼叫 classify_signals_from_raw(非 collect_pod_diagnosis)"""
|
||
from src.services.pre_decision_investigator import PreDecisionInvestigator
|
||
from src.services.evidence_snapshot import EvidenceSnapshot
|
||
|
||
investigator = PreDecisionInvestigator()
|
||
|
||
class _Inc:
|
||
incident_id = "INC-TEST-002"
|
||
signals = []
|
||
|
||
snap = EvidenceSnapshot(incident_id="INC-TEST-002")
|
||
snap.k8s_state = {"phase": "CrashLoopBackOff"}
|
||
snap.recent_logs = None
|
||
snap.metrics_snapshot = None
|
||
|
||
mock_aggregator = MagicMock()
|
||
mock_aggregator.classify_signals_from_raw = MagicMock(return_value=[])
|
||
mock_aggregator.collect_pod_diagnosis = AsyncMock() # 不應被呼叫
|
||
|
||
with patch("src.services.diagnosis_aggregator.get_diagnosis_aggregator", return_value=mock_aggregator):
|
||
await investigator._collect_diagnosis_aggregator(snap, _Inc())
|
||
|
||
# 路徑 A:classify_signals_from_raw 被呼叫,collect_pod_diagnosis 不被呼叫
|
||
mock_aggregator.classify_signals_from_raw.assert_called_once()
|
||
mock_aggregator.collect_pod_diagnosis.assert_not_called()
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_da_classify_signals_no_api_call(self):
|
||
"""路徑 A 核心驗證:classify_signals_from_raw 是純邏輯,不打外部 API"""
|
||
from src.services.diagnosis_aggregator import DiagnosisAggregator
|
||
|
||
agg = DiagnosisAggregator.__new__(DiagnosisAggregator)
|
||
|
||
# 不 init k8s_service / signoz_client(純邏輯方法不依賴這些)
|
||
with patch("src.services.diagnosis_aggregator.get_k8s_diagnostics_service"), \
|
||
patch("src.services.diagnosis_aggregator.get_signoz_client"):
|
||
|
||
signals = agg.classify_signals_from_raw(
|
||
k8s_data={"phase": "CrashLoopBackOff", "restart_count": 10},
|
||
logs_data=None,
|
||
metrics_data={"error_rate": 25.0},
|
||
)
|
||
|
||
# 應產出信號
|
||
assert len(signals) >= 1
|
||
signal_types = [s.signal_type for s in signals]
|
||
assert "crash_loop" in signal_types
|
||
assert "high_error_rate" in signal_types
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_collect_diagnosis_aggregator_fills_extra_diagnosis_dict(self):
|
||
"""路徑 A:extra_diagnosis 應為 dict 結構化資料(非 string)"""
|
||
from src.services.pre_decision_investigator import PreDecisionInvestigator
|
||
from src.services.evidence_snapshot import EvidenceSnapshot
|
||
from src.services.diagnosis_aggregator import DiagnosisSignal, DiagnosisSeverity
|
||
|
||
investigator = PreDecisionInvestigator()
|
||
|
||
class _Inc:
|
||
incident_id = "INC-TEST-003"
|
||
signals = []
|
||
|
||
snap = EvidenceSnapshot(incident_id="INC-TEST-003")
|
||
snap.k8s_state = {"phase": "CrashLoopBackOff"}
|
||
snap.recent_logs = None
|
||
snap.metrics_snapshot = None
|
||
|
||
fake_signal = DiagnosisSignal(
|
||
source="k8s_state",
|
||
signal_type="crash_loop",
|
||
severity=DiagnosisSeverity.CRITICAL,
|
||
message="CrashLoopBackOff detected",
|
||
)
|
||
|
||
mock_aggregator = MagicMock()
|
||
mock_aggregator.classify_signals_from_raw = MagicMock(return_value=[fake_signal])
|
||
|
||
with patch("src.services.diagnosis_aggregator.get_diagnosis_aggregator", return_value=mock_aggregator):
|
||
await investigator._collect_diagnosis_aggregator(snap, _Inc())
|
||
|
||
assert snap.extra_diagnosis is not None
|
||
assert isinstance(snap.extra_diagnosis, dict)
|
||
assert snap.extra_diagnosis["signal_count"] == 1
|
||
assert snap.extra_diagnosis["signals"][0]["signal_type"] == "crash_loop"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_da_signals_appear_in_evidence_summary(self):
|
||
"""路徑 A:extra_diagnosis signals 應出現在 build_summary 的 Signal Classification 區塊"""
|
||
from src.services.evidence_snapshot import EvidenceSnapshot
|
||
|
||
snap = EvidenceSnapshot(incident_id="INC-TEST-SC-001")
|
||
snap.extra_diagnosis = {
|
||
"signal_count": 1,
|
||
"signals": [{"signal_type": "oom_killed", "severity": "critical", "message": "OOMKilled"}],
|
||
}
|
||
summary = snap.build_summary()
|
||
assert "Signal Classification" in summary
|
||
assert "oom_killed" in summary
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_collect_diagnosis_aggregator_exception_isolated(self):
|
||
"""路徑 A:aggregator 內部異常時 snap.extra_diagnosis 維持 None(exception 被內層 catch)"""
|
||
from src.services.pre_decision_investigator import PreDecisionInvestigator
|
||
from src.services.evidence_snapshot import EvidenceSnapshot
|
||
|
||
investigator = PreDecisionInvestigator()
|
||
|
||
class _Inc:
|
||
incident_id = "INC-TEST-004"
|
||
signals = []
|
||
|
||
snap = EvidenceSnapshot(incident_id="INC-TEST-004")
|
||
|
||
mock_aggregator = MagicMock()
|
||
mock_aggregator.classify_signals_from_raw = MagicMock(
|
||
side_effect=Exception("classify error")
|
||
)
|
||
|
||
# 路徑 A:_collect_diagnosis_aggregator 有內層 try/except,不會 raise
|
||
with patch("src.services.diagnosis_aggregator.get_diagnosis_aggregator", return_value=mock_aggregator):
|
||
await investigator._collect_diagnosis_aggregator(snap, _Inc())
|
||
|
||
assert snap.extra_diagnosis is None
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Test: DiagnosisAggregator 基本結構
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
class TestDiagnosisAggregatorInterface:
|
||
def test_get_diagnosis_aggregator_singleton(self):
|
||
"""get_diagnosis_aggregator() singleton 可正常取得"""
|
||
import src.services.diagnosis_aggregator as m
|
||
original = m._aggregator
|
||
m._aggregator = None
|
||
try:
|
||
a = m.get_diagnosis_aggregator()
|
||
b = m.get_diagnosis_aggregator()
|
||
assert a is b
|
||
finally:
|
||
m._aggregator = original
|
||
|
||
def test_collect_pod_diagnosis_method_exists(self):
|
||
"""DiagnosisAggregator 必須有 collect_pod_diagnosis 方法"""
|
||
from src.services.diagnosis_aggregator import DiagnosisAggregator
|
||
agg = DiagnosisAggregator.__new__(DiagnosisAggregator)
|
||
assert hasattr(agg, "collect_pod_diagnosis")
|
||
assert callable(agg.collect_pod_diagnosis)
|
||
|
||
def test_collect_service_diagnosis_method_exists(self):
|
||
"""DiagnosisAggregator 必須有 collect_service_diagnosis 方法"""
|
||
from src.services.diagnosis_aggregator import DiagnosisAggregator
|
||
assert hasattr(DiagnosisAggregator, "collect_service_diagnosis")
|
||
|
||
def test_diagnosis_context_get_llm_prompt_context(self):
|
||
"""DiagnosisContext.get_llm_prompt_context() 回傳非空字串"""
|
||
from src.services.diagnosis_aggregator import DiagnosisContext
|
||
ctx = DiagnosisContext(target="test-pod", namespace="awoooi-prod")
|
||
result = ctx.get_llm_prompt_context()
|
||
assert isinstance(result, str)
|
||
assert "test-pod" in result
|
||
|
||
def test_pre_decision_investigator_has_collect_diagnosis_method(self):
|
||
"""PreDecisionInvestigator 必須有 _collect_diagnosis_aggregator 方法"""
|
||
from src.services.pre_decision_investigator import PreDecisionInvestigator
|
||
assert hasattr(PreDecisionInvestigator, "_collect_diagnosis_aggregator")
|
||
assert callable(PreDecisionInvestigator._collect_diagnosis_aggregator)
|