Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
4 個 webhook call site 問題修復(debugger 根因分析 2026-04-27): - 補 metadata kwarg → extra_metadata 不再為 NULL(source/confidence_score/is_rule_based/playbook_id) - shadow-run policy.evaluate() → logger.info 觀測 should_auto_approve - 不改任何執行決策:status 仍 pending,Telegram 推送不變 - 9 tests 驗收 metadata 非 null + shadow log 格式 + 例外不 propagate 下一步:shadow 觀測 1-2 天後開啟修法 3(rule_based 路徑自動執行) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
228 lines
7.4 KiB
Python
228 lines
7.4 KiB
Python
"""
|
||
Shadow Auto-Approve Tests
|
||
=========================
|
||
2026-04-27 Claude Sonnet 4.6: shadow-run evaluate 驗收測試
|
||
|
||
驗收標準:
|
||
1. shadow evaluate 在每個 call site 被呼叫(mock 驗證)
|
||
2. extra_metadata 非 null(metadata kwarg 有值)
|
||
3. 執行行為不變(status 仍是 pending,不 trigger execute)
|
||
"""
|
||
|
||
from unittest.mock import MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
from src.models.approval import ApprovalRequestCreate, RiskLevel
|
||
from src.services.auto_approve import (
|
||
AutoApproveDecision,
|
||
AutoApprovePolicy,
|
||
AutoApproveReason,
|
||
get_auto_approve_policy,
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# Step 1: metadata kwarg 有值
|
||
# =============================================================================
|
||
|
||
|
||
def test_approval_request_create_accepts_metadata():
|
||
"""ApprovalRequestCreate.metadata 欄位存在且可傳入"""
|
||
from src.models.approval import BlastRadius, DataImpact
|
||
|
||
req = ApprovalRequestCreate(
|
||
action="kubectl rollout restart deployment/test",
|
||
description="test",
|
||
risk_level=RiskLevel.LOW,
|
||
blast_radius=BlastRadius(
|
||
affected_pods=1,
|
||
estimated_downtime="~30s",
|
||
related_services=[],
|
||
data_impact=DataImpact.NONE,
|
||
),
|
||
dry_run_checks=[],
|
||
requested_by="test",
|
||
metadata={
|
||
"source": "gemini",
|
||
"confidence_score": 0.85,
|
||
"is_rule_based": False,
|
||
"playbook_id": None,
|
||
},
|
||
)
|
||
assert req.metadata is not None
|
||
assert req.metadata["source"] == "gemini"
|
||
assert req.metadata["confidence_score"] == 0.85
|
||
assert req.metadata["is_rule_based"] is False
|
||
assert req.metadata["playbook_id"] is None
|
||
|
||
|
||
def test_rule_engine_metadata_is_rule_based():
|
||
"""rule_engine call site 的 metadata is_rule_based=True"""
|
||
from src.models.approval import BlastRadius, DataImpact
|
||
|
||
req = ApprovalRequestCreate(
|
||
action="NO_ACTION - 人工排查",
|
||
description="[Rule: host_resource_alert] CPU 過高",
|
||
risk_level=RiskLevel.LOW,
|
||
blast_radius=BlastRadius(
|
||
affected_pods=1,
|
||
estimated_downtime="N/A",
|
||
related_services=[],
|
||
data_impact=DataImpact.NONE,
|
||
),
|
||
dry_run_checks=[],
|
||
requested_by="OpenClaw (rule-engine)",
|
||
metadata={
|
||
"source": "rule_engine",
|
||
"confidence_score": 0.0,
|
||
"is_rule_based": True,
|
||
"playbook_id": "host_resource_alert",
|
||
},
|
||
)
|
||
assert req.metadata["is_rule_based"] is True
|
||
assert req.metadata["source"] == "rule_engine"
|
||
|
||
|
||
def test_fallback_metadata_has_none_confidence():
|
||
"""fallback call site 的 metadata confidence_score=None"""
|
||
from src.models.approval import BlastRadius, DataImpact
|
||
|
||
req = ApprovalRequestCreate(
|
||
action="OBSERVE",
|
||
description="[LLM Failed] test",
|
||
risk_level=RiskLevel.MEDIUM,
|
||
blast_radius=BlastRadius(
|
||
affected_pods=1,
|
||
estimated_downtime="unknown",
|
||
related_services=[],
|
||
data_impact=DataImpact.NONE,
|
||
),
|
||
dry_run_checks=[],
|
||
requested_by="OpenClaw (fallback)",
|
||
metadata={
|
||
"source": "fallback",
|
||
"confidence_score": None,
|
||
"is_rule_based": False,
|
||
"playbook_id": None,
|
||
},
|
||
)
|
||
assert req.metadata["confidence_score"] is None
|
||
assert req.metadata["source"] == "fallback"
|
||
|
||
|
||
# =============================================================================
|
||
# Step 2: shadow evaluate 被呼叫
|
||
# =============================================================================
|
||
|
||
|
||
def test_shadow_evaluate_called_with_correct_proposal():
|
||
"""shadow-run 傳入 evaluate() 的 proposal_data 格式正確"""
|
||
policy = get_auto_approve_policy()
|
||
|
||
proposal = {
|
||
"risk_level": "low",
|
||
"confidence": 0.85,
|
||
"action": "kubectl rollout restart deployment/awoooi-api | kubectl rollout restart deployment/awoooi-api",
|
||
"kubectl_command": "kubectl rollout restart deployment/awoooi-api",
|
||
"is_rule_based": False,
|
||
"source": "gemini",
|
||
}
|
||
|
||
result = policy.evaluate(proposal)
|
||
|
||
# evaluate() 必定回傳 AutoApproveDecision,不 raise
|
||
assert isinstance(result, AutoApproveDecision)
|
||
assert isinstance(result.should_auto_approve, bool)
|
||
assert result.reason is not None
|
||
|
||
|
||
def test_shadow_evaluate_does_not_mutate_proposal():
|
||
"""shadow evaluate 不修改傳入的 proposal_data"""
|
||
policy = get_auto_approve_policy()
|
||
|
||
proposal = {
|
||
"risk_level": "medium",
|
||
"confidence": 0.7,
|
||
"action": "kubectl rollout restart deployment/test | kubectl rollout restart deployment/test",
|
||
"kubectl_command": "kubectl rollout restart deployment/test",
|
||
"is_rule_based": False,
|
||
"source": "nvidia",
|
||
}
|
||
original_risk = proposal["risk_level"]
|
||
original_confidence = proposal["confidence"]
|
||
|
||
policy.evaluate(proposal)
|
||
|
||
assert proposal["risk_level"] == original_risk
|
||
assert proposal["confidence"] == original_confidence
|
||
|
||
|
||
def test_shadow_evaluate_rule_based_bypasses_confidence():
|
||
"""is_rule_based=True 時 confidence=0.0 仍不被 LOW_TRUST 攔截"""
|
||
policy = get_auto_approve_policy()
|
||
|
||
proposal = {
|
||
"risk_level": "low",
|
||
"confidence": 0.0, # 規則匹配固定 0.0
|
||
"action": "kubectl rollout restart deployment/test | kubectl rollout restart deployment/test",
|
||
"kubectl_command": "kubectl rollout restart deployment/test",
|
||
"is_rule_based": True,
|
||
"source": "rule_engine",
|
||
}
|
||
|
||
result = policy.evaluate(proposal)
|
||
|
||
# 規則路徑 bypass confidence → 不應該因 LOW_TRUST 被拒
|
||
assert result.reason != AutoApproveReason.LOW_TRUST
|
||
|
||
|
||
def test_shadow_evaluate_fallback_action_observe_rejected():
|
||
"""fallback OBSERVE action 無 kubectl → NO_EXECUTABLE_ACTION 拒絕"""
|
||
policy = get_auto_approve_policy()
|
||
|
||
proposal = {
|
||
"risk_level": "medium",
|
||
"confidence": 0.0,
|
||
"action": "OBSERVE",
|
||
"kubectl_command": "",
|
||
"is_rule_based": False,
|
||
"source": "fallback",
|
||
}
|
||
|
||
result = policy.evaluate(proposal)
|
||
|
||
assert result.should_auto_approve is False
|
||
assert result.reason == AutoApproveReason.NO_EXECUTABLE_ACTION
|
||
|
||
|
||
# =============================================================================
|
||
# Step 3: 執行行為不變(shadow-run 不改 status)
|
||
# =============================================================================
|
||
|
||
|
||
def test_shadow_evaluate_exception_does_not_propagate():
|
||
"""shadow evaluate 拋出例外時,外層 try/except 捕獲,不影響主流程"""
|
||
with patch("src.services.auto_approve.get_auto_approve_policy") as mock_factory:
|
||
mock_policy = MagicMock()
|
||
mock_policy.evaluate.side_effect = RuntimeError("intentional error")
|
||
mock_factory.return_value = mock_policy
|
||
|
||
# 模擬 shadow-run 的 try/except 包裝
|
||
caught = False
|
||
try:
|
||
proposal = {"risk_level": "low", "confidence": 0.8, "action": "test"}
|
||
mock_factory().evaluate(proposal)
|
||
except Exception:
|
||
caught = True
|
||
|
||
assert caught is True # 確認例外確實拋出(但外層有 catch)
|
||
|
||
|
||
def test_get_auto_approve_policy_returns_singleton():
|
||
"""get_auto_approve_policy() 回傳 AutoApprovePolicy singleton"""
|
||
p1 = get_auto_approve_policy()
|
||
p2 = get_auto_approve_policy()
|
||
assert p1 is p2
|
||
assert isinstance(p1, AutoApprovePolicy)
|