W1 第二波:onboarder 飛輪 80→90 路徑剩餘兩件 PR。 ## PR-R1 — 25 條 yaml 規則 → DRAFT Playbook 遷移 斷鏈背景(onboarder C2):alert_rules.yaml 25 條規則 68% 寫死 RESTART, 沒有對應 Playbook → RAG 永遠 generic_fallback → 規則命中率沒回饋給 catalog。 修法: - 新建 services/rule_to_playbook_migrator.py - 自動從 alert_rules.yaml 解析每條 rule - 產生 PlaybookRecord(status=DRAFT, ai_confidence=0.3, source=YAML_RULE) - 誠實標示信心 0.3(非假 1.0,違反 feedback_confidence_truthfulness) - INSERT ON CONFLICT 冪等(name LIKE 'AutoMigrated: %' 去重,不擾動 seed) - 新建 scripts/migrate_rules_to_playbooks.py(CLI: --dry-run/--commit/--disable-flag) - ENABLE_RULE_MIGRATION_DRAFT=true(rollback flag) - 23 測試覆蓋(parse / build_dict / idempotent / dry_run / action_type / severity_map / feature_flag / wildcard_filter / partial_existing 等) ## PR-K1 — timeline_events 防禦性 ALTER(db-expert finding) 任務原前提錯誤:onboarder 報告的 C7 斷鏈(incident_id 欄位)在 2026-04-24 P1.6 已修復 ORM。但生產環境若在 P1.6 前已建表,create_all 跳過 已存在的表 → ORM 寫入 SELECT 仍可能找不到 column。 修法: - db/base.py:init_db() 補防禦性 ALTER: ALTER TABLE timeline_events ADD COLUMN IF NOT EXISTS incident_id VARCHAR(64); CREATE INDEX IF NOT EXISTS ix_timeline_incident_id ON timeline_events(incident_id); - IF NOT EXISTS 為 no-op 安全(已有 column 不做事) - stage 欄位是任務描述的幻覺(codebase 0 writer),不新增 未做: - alembic migration(專案不用 alembic,遵循既有 init_db ALTER pattern) - onboarder C7 在 ORM 層已修,本 commit 確保 prod schema 對齊 ## 驗證 - 1608 unit tests 全綠(+23 from 1585) - PR-R1 23 個測試獨立通過 ## 期望影響 - 飛輪 RAG 終於有 25 條 DRAFT Playbook 可查 → +5 分 - prod schema 對齊保險 → 防 ORM SELECT 失敗 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
455 lines
15 KiB
Python
455 lines
15 KiB
Python
"""
|
||
test_rule_to_playbook_migrator.py — 規則 → Playbook 遷移測試
|
||
=============================================================
|
||
W1 PR-R1: 覆蓋遷移邏輯,不真寫 DB
|
||
|
||
測試策略:
|
||
- 所有測試用 conftest.py 設定 MOCK_MODE=true,避免 DB 連線
|
||
- DB 寫入用 AsyncMock 模擬
|
||
- yaml 解析用臨時 fixture 檔案(不依賴實際 alert_rules.yaml 避免路徑問題)
|
||
|
||
2026-04-28 ogt + Claude Sonnet 4.6
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import textwrap
|
||
from pathlib import Path
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
import pytest
|
||
import yaml
|
||
|
||
|
||
# =============================================================================
|
||
# Fixtures
|
||
# =============================================================================
|
||
|
||
MINIMAL_RULE_KUBECTL = {
|
||
"id": "test_pod_crash",
|
||
"priority": 60,
|
||
"description": "Test Pod CrashLoopBackOff",
|
||
"match": {
|
||
"alertname": ["KubePodCrashLooping", "PodCrashLoopBackOff"],
|
||
"alert_type": ["pod_crash"],
|
||
"message": ["crashloop", "crash"],
|
||
},
|
||
"response": {
|
||
"action_title": "診斷 CrashLoop 根因",
|
||
"description": "⚙️ 規則匹配: Pod 進入 CrashLoopBackOff",
|
||
"suggested_action": "NO_ACTION",
|
||
"kubectl_command": "kubectl logs {target} -n {namespace} --previous --tail=50",
|
||
"risk": "critical",
|
||
"responsibility": "BE",
|
||
"optimization": [
|
||
{
|
||
"type": "LIVENESS_PROBE",
|
||
"description": "調整 liveness probe",
|
||
"command": "# 調整 initialDelaySeconds",
|
||
}
|
||
],
|
||
},
|
||
}
|
||
|
||
MINIMAL_RULE_SSH = {
|
||
"id": "test_ollama_down",
|
||
"priority": 90,
|
||
"description": "Test Ollama Down",
|
||
"match": {
|
||
"alertname": ["OllamaDown"],
|
||
"message": ["ollama"],
|
||
},
|
||
"response": {
|
||
"action_title": "重啟 Ollama",
|
||
"description": "⚙️ Ollama 下線",
|
||
"suggested_action": "RESTART_DEPLOYMENT",
|
||
"kubectl_command": "ssh {host} 'systemctl restart ollama'",
|
||
"risk": "medium",
|
||
"responsibility": "INFRA",
|
||
"optimization": [],
|
||
},
|
||
}
|
||
|
||
MINIMAL_RULE_NO_ACTION = {
|
||
"id": "test_no_action",
|
||
"priority": 110,
|
||
"description": "Test No Action Rule",
|
||
"match": {
|
||
"alertname": ["GiteaDown"],
|
||
"message": ["gitea"],
|
||
},
|
||
"response": {
|
||
"action_title": "Gitea 下線 — 人工確認",
|
||
"description": "⚠️ Gitea 無法連線,不自動修復",
|
||
"suggested_action": "NO_ACTION",
|
||
"kubectl_command": "",
|
||
"risk": "critical",
|
||
"responsibility": "INFRA",
|
||
"optimization": [],
|
||
},
|
||
}
|
||
|
||
SEVERITY_TEST_CASES = [
|
||
("low", "LOW"),
|
||
("medium", "MEDIUM"),
|
||
("high", "HIGH"),
|
||
("critical", "CRITICAL"),
|
||
("", "MEDIUM"),
|
||
(None, "MEDIUM"),
|
||
]
|
||
|
||
|
||
@pytest.fixture
|
||
def minimal_yaml(tmp_path: Path) -> Path:
|
||
"""建立含 3 條規則的最小 yaml fixture"""
|
||
data = {
|
||
"version": "1.0.0",
|
||
"updated_at": "2026-04-28",
|
||
"rules": [
|
||
MINIMAL_RULE_KUBECTL,
|
||
MINIMAL_RULE_SSH,
|
||
MINIMAL_RULE_NO_ACTION,
|
||
],
|
||
}
|
||
yaml_file = tmp_path / "test_alert_rules.yaml"
|
||
yaml_file.write_text(yaml.dump(data, allow_unicode=True), encoding="utf-8")
|
||
return yaml_file
|
||
|
||
|
||
@pytest.fixture
|
||
def empty_yaml(tmp_path: Path) -> Path:
|
||
"""建立空規則 yaml"""
|
||
data = {"version": "1.0.0", "rules": []}
|
||
yaml_file = tmp_path / "empty_rules.yaml"
|
||
yaml_file.write_text(yaml.dump(data), encoding="utf-8")
|
||
return yaml_file
|
||
|
||
|
||
# =============================================================================
|
||
# 1. test_parse_yaml_rule_extracts_alertname
|
||
# =============================================================================
|
||
|
||
def test_parse_yaml_rule_extracts_alertname(minimal_yaml: Path) -> None:
|
||
"""parse_yaml_rules 能正確讀取 alertname list"""
|
||
from src.services.rule_to_playbook_migrator import parse_yaml_rules
|
||
|
||
rules = parse_yaml_rules(minimal_yaml)
|
||
|
||
assert len(rules) == 3
|
||
# 第一條規則的 alertnames
|
||
alertnames = rules[0]["match"]["alertname"]
|
||
assert "KubePodCrashLooping" in alertnames
|
||
assert "PodCrashLoopBackOff" in alertnames
|
||
|
||
|
||
def test_parse_yaml_rule_all_fields(minimal_yaml: Path) -> None:
|
||
"""parse_yaml_rules 保留所有欄位"""
|
||
from src.services.rule_to_playbook_migrator import parse_yaml_rules
|
||
|
||
rules = parse_yaml_rules(minimal_yaml)
|
||
|
||
rule = rules[0]
|
||
assert rule["id"] == "test_pod_crash"
|
||
assert rule["priority"] == 60
|
||
assert "response" in rule
|
||
assert rule["response"]["risk"] == "critical"
|
||
|
||
|
||
# =============================================================================
|
||
# 2. test_migration_creates_draft_playbook
|
||
# =============================================================================
|
||
|
||
def test_build_playbook_dict_creates_draft() -> None:
|
||
"""build_playbook_dict 產生 status=draft 的 dict"""
|
||
from src.services.rule_to_playbook_migrator import build_playbook_dict
|
||
|
||
pb_dict = build_playbook_dict(MINIMAL_RULE_KUBECTL)
|
||
|
||
assert pb_dict["status"] == "draft"
|
||
assert pb_dict["source"] == "yaml_rule"
|
||
assert pb_dict["ai_confidence"] == 0.3
|
||
assert pb_dict["trust_score"] == 0.3
|
||
|
||
|
||
def test_build_playbook_dict_name_prefix() -> None:
|
||
"""name 格式必須是 'AutoMigrated: {rule_id}'"""
|
||
from src.services.rule_to_playbook_migrator import build_playbook_dict
|
||
|
||
pb_dict = build_playbook_dict(MINIMAL_RULE_KUBECTL)
|
||
|
||
assert pb_dict["name"] == "AutoMigrated: test_pod_crash"
|
||
|
||
|
||
def test_build_playbook_dict_symptom_pattern_alertnames() -> None:
|
||
"""symptom_pattern.alert_names 來自 match.alertname"""
|
||
from src.services.rule_to_playbook_migrator import build_playbook_dict
|
||
|
||
pb_dict = build_playbook_dict(MINIMAL_RULE_KUBECTL)
|
||
sp = pb_dict["symptom_pattern"]
|
||
|
||
assert "KubePodCrashLooping" in sp["alert_names"]
|
||
assert "PodCrashLoopBackOff" in sp["alert_names"]
|
||
|
||
|
||
def test_build_playbook_dict_kubectl_action_type() -> None:
|
||
"""kubectl 指令 → action_type=kubectl"""
|
||
from src.services.rule_to_playbook_migrator import build_playbook_dict
|
||
|
||
pb_dict = build_playbook_dict(MINIMAL_RULE_KUBECTL)
|
||
step = pb_dict["repair_steps"][0]
|
||
|
||
assert step["action_type"] == "kubectl"
|
||
assert "kubectl logs" in step["command"]
|
||
|
||
|
||
def test_build_playbook_dict_ssh_action_type() -> None:
|
||
"""ssh 指令 → action_type=ssh_command"""
|
||
from src.services.rule_to_playbook_migrator import build_playbook_dict
|
||
|
||
pb_dict = build_playbook_dict(MINIMAL_RULE_SSH)
|
||
step = pb_dict["repair_steps"][0]
|
||
|
||
assert step["action_type"] == "ssh_command"
|
||
assert step["command"].startswith("ssh ")
|
||
|
||
|
||
def test_build_playbook_dict_no_action_uses_manual() -> None:
|
||
"""kubectl_command 為空時 → action_type=manual,command 為描述文字"""
|
||
from src.services.rule_to_playbook_migrator import build_playbook_dict
|
||
|
||
pb_dict = build_playbook_dict(MINIMAL_RULE_NO_ACTION)
|
||
step = pb_dict["repair_steps"][0]
|
||
|
||
assert step["action_type"] == "manual"
|
||
assert len(step["command"]) > 0 # 有描述文字
|
||
assert step["requires_approval"] is True
|
||
|
||
|
||
# =============================================================================
|
||
# 3. test_migration_idempotent_on_conflict
|
||
# =============================================================================
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_migration_idempotent_on_conflict(minimal_yaml: Path) -> None:
|
||
"""已存在的 AutoMigrated: name → 跳過,不重複建立"""
|
||
from src.services.rule_to_playbook_migrator import migrate_yaml_rules_to_playbooks
|
||
|
||
# 模擬 DB 已有全部 3 條規則
|
||
existing_names = {
|
||
"AutoMigrated: test_pod_crash",
|
||
"AutoMigrated: test_ollama_down",
|
||
"AutoMigrated: test_no_action",
|
||
}
|
||
|
||
mock_result = MagicMock()
|
||
mock_result.fetchall.return_value = [(name,) for name in existing_names]
|
||
|
||
mock_db = AsyncMock()
|
||
mock_db.execute = AsyncMock(return_value=mock_result)
|
||
|
||
mock_cm = MagicMock()
|
||
mock_cm.__aenter__ = AsyncMock(return_value=mock_db)
|
||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||
|
||
mock_repo = MagicMock()
|
||
mock_repo.create = AsyncMock(side_effect=AssertionError("不應呼叫 create"))
|
||
|
||
# patch src.db.base.get_db_context(lazy import 的攔截點)
|
||
with (
|
||
patch("src.db.base.get_db_context", return_value=mock_cm),
|
||
patch("src.repositories.playbook_repository.get_playbook_repository", return_value=mock_repo),
|
||
):
|
||
report = await migrate_yaml_rules_to_playbooks(
|
||
yaml_path=minimal_yaml,
|
||
dry_run=False,
|
||
enable_migration=True,
|
||
)
|
||
|
||
assert report.created == 0
|
||
assert report.skipped == 3
|
||
assert report.failed == 0
|
||
|
||
|
||
# =============================================================================
|
||
# 4. test_migration_dry_run_no_db_write
|
||
# =============================================================================
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_migration_dry_run_no_db_write(minimal_yaml: Path) -> None:
|
||
"""dry_run=True 時不查 DB、不寫 DB"""
|
||
from src.services.rule_to_playbook_migrator import migrate_yaml_rules_to_playbooks
|
||
|
||
with (
|
||
patch("src.db.base.get_db_context") as mock_db_ctx,
|
||
patch("src.repositories.playbook_repository.get_playbook_repository") as mock_repo,
|
||
):
|
||
report = await migrate_yaml_rules_to_playbooks(
|
||
yaml_path=minimal_yaml,
|
||
dry_run=True,
|
||
enable_migration=True,
|
||
)
|
||
|
||
# 不呼叫 DB
|
||
mock_db_ctx.assert_not_called()
|
||
mock_repo.assert_not_called()
|
||
|
||
# dry-run 時 created = 規則總數(全部「待建立」)
|
||
assert report.dry_run is True
|
||
assert report.total_rules == 3
|
||
assert report.created == 3
|
||
assert report.failed == 0
|
||
|
||
|
||
# =============================================================================
|
||
# 5. test_kubectl_command_validation_via_regex
|
||
# =============================================================================
|
||
|
||
def test_infer_action_type_kubectl() -> None:
|
||
"""kubectl 指令 → kubectl"""
|
||
from src.services.rule_to_playbook_migrator import _infer_action_type
|
||
|
||
assert _infer_action_type("kubectl delete pod foo -n default") == "kubectl"
|
||
assert _infer_action_type("kubectl rollout restart deployment/api -n awoooi") == "kubectl"
|
||
|
||
|
||
def test_infer_action_type_ssh() -> None:
|
||
"""ssh 指令 → ssh_command"""
|
||
from src.services.rule_to_playbook_migrator import _infer_action_type
|
||
|
||
assert _infer_action_type("ssh {host} 'docker restart minio'") == "ssh_command"
|
||
assert _infer_action_type("ssh root@192.168.0.111 'systemctl restart ollama'") == "ssh_command"
|
||
|
||
|
||
def test_infer_action_type_empty() -> None:
|
||
"""空指令 → manual"""
|
||
from src.services.rule_to_playbook_migrator import _infer_action_type
|
||
|
||
assert _infer_action_type("") == "manual"
|
||
assert _infer_action_type(" ") == "manual"
|
||
|
||
|
||
# =============================================================================
|
||
# 6. test_severity_to_risk_level_mapping
|
||
# =============================================================================
|
||
|
||
@pytest.mark.parametrize("risk_str,expected", SEVERITY_TEST_CASES)
|
||
def test_severity_to_risk_level_mapping(risk_str: str | None, expected: str) -> None:
|
||
"""YAML risk 欄位 → RiskLevel 字串映射"""
|
||
from src.services.rule_to_playbook_migrator import _infer_risk_level
|
||
|
||
result = _infer_risk_level(risk_str)
|
||
assert result == expected
|
||
|
||
|
||
# =============================================================================
|
||
# 7. test_feature_flag_disabled_skips_db_insert
|
||
# =============================================================================
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_feature_flag_disabled_skips_db_insert(minimal_yaml: Path) -> None:
|
||
"""enable_migration=False 時直接 return 空報告,不查 DB"""
|
||
from src.services.rule_to_playbook_migrator import migrate_yaml_rules_to_playbooks
|
||
|
||
with (
|
||
patch("src.db.base.get_db_context") as mock_db_ctx,
|
||
patch("src.repositories.playbook_repository.get_playbook_repository") as mock_repo,
|
||
):
|
||
report = await migrate_yaml_rules_to_playbooks(
|
||
yaml_path=minimal_yaml,
|
||
dry_run=False,
|
||
enable_migration=False,
|
||
)
|
||
|
||
mock_db_ctx.assert_not_called()
|
||
mock_repo.assert_not_called()
|
||
|
||
assert report.total_rules == 0
|
||
assert report.created == 0
|
||
assert report.skipped == 0
|
||
|
||
|
||
# =============================================================================
|
||
# 8. 整合路徑:1 條已存在 + 2 條新建
|
||
# =============================================================================
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_migration_partial_existing(minimal_yaml: Path) -> None:
|
||
"""1 條已存在 → skipped=1,2 條新建 → created=2"""
|
||
from src.services.rule_to_playbook_migrator import migrate_yaml_rules_to_playbooks
|
||
|
||
# 只有第一條已存在
|
||
existing_names = {"AutoMigrated: test_pod_crash"}
|
||
|
||
mock_result = MagicMock()
|
||
mock_result.fetchall.return_value = [(name,) for name in existing_names]
|
||
|
||
mock_db = AsyncMock()
|
||
mock_db.execute = AsyncMock(return_value=mock_result)
|
||
|
||
mock_cm = MagicMock()
|
||
mock_cm.__aenter__ = AsyncMock(return_value=mock_db)
|
||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||
|
||
mock_repo = MagicMock()
|
||
mock_repo.create = AsyncMock(side_effect=lambda pb: pb)
|
||
|
||
with (
|
||
patch("src.db.base.get_db_context", return_value=mock_cm),
|
||
patch("src.repositories.playbook_repository.PlaybookRepository", return_value=mock_repo),
|
||
patch("src.repositories.playbook_repository.get_playbook_repository", return_value=mock_repo),
|
||
):
|
||
report = await migrate_yaml_rules_to_playbooks(
|
||
yaml_path=minimal_yaml,
|
||
dry_run=False,
|
||
enable_migration=True,
|
||
)
|
||
|
||
assert report.skipped == 1
|
||
assert report.created == 2
|
||
assert report.failed == 0
|
||
assert mock_repo.create.call_count == 2
|
||
|
||
|
||
# =============================================================================
|
||
# 9. yaml 不存在 → 回傳有錯誤的報告
|
||
# =============================================================================
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_migration_yaml_not_found(tmp_path: Path) -> None:
|
||
"""yaml 不存在 → 回傳有錯誤的報告,不 raise"""
|
||
from src.services.rule_to_playbook_migrator import migrate_yaml_rules_to_playbooks
|
||
|
||
nonexistent = tmp_path / "nonexistent.yaml"
|
||
|
||
report = await migrate_yaml_rules_to_playbooks(
|
||
yaml_path=nonexistent,
|
||
dry_run=False,
|
||
enable_migration=True,
|
||
)
|
||
|
||
assert report.failed == 0
|
||
assert len(report.errors) == 1
|
||
assert "不存在" in report.errors[0]
|
||
|
||
|
||
# =============================================================================
|
||
# 10. symptom_pattern keywords 過濾萬用符
|
||
# =============================================================================
|
||
|
||
def test_build_symptom_pattern_filters_wildcard() -> None:
|
||
"""generic_fallback 的 alertname=['*'] 不應進入 keywords"""
|
||
from src.services.rule_to_playbook_migrator import _build_symptom_pattern
|
||
|
||
generic_rule = {
|
||
"id": "generic_fallback",
|
||
"match": {
|
||
"alertname": ["*"],
|
||
"message": ["fallback"],
|
||
},
|
||
"response": {"risk": "medium"},
|
||
}
|
||
sp = _build_symptom_pattern(generic_rule)
|
||
|
||
# alertname=['*'] 進入 alert_names 沒問題(就是萬用符)
|
||
# keywords 不應含 '*'
|
||
assert "*" not in sp["keywords"]
|