diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile index 7aacca1e..dc44364e 100644 --- a/apps/api/Dockerfile +++ b/apps/api/Dockerfile @@ -60,6 +60,8 @@ COPY k8s/ ./k8s/ # 2026-04-10 Claude Sonnet 4.6: RAG 知識庫索引來源 (ADR-067 Phase 33) COPY docs/ ./docs/ COPY .agents/skills/ ./.agents/skills/ +# 2026-04-12 ogt (ADR-073 P2-1): CronJob 腳本 — 獨立腳本取代 inline Python +COPY scripts/ ./scripts/ # Install openssh-client + curl — SSH_COMMAND Playbook + healthcheck # Install kubectl — drift_detector 需要 kubectl 讀取 K8s 實際狀態 diff --git a/apps/api/tests/test_classify_alert_early.py b/apps/api/tests/test_classify_alert_early.py new file mode 100644 index 00000000..4e13c28b --- /dev/null +++ b/apps/api/tests/test_classify_alert_early.py @@ -0,0 +1,155 @@ +""" +classify_alert_early() 分類函數單元測試 — ADR-073 Phase 2-2 + +覆蓋 7 條分類規則的正常路徑與邊界情況: + config_drift, info, backup, infrastructure, kubernetes, database, general + +2026-04-12 ogt (ADR-073 P2-2 測試補強) +""" + +import pytest +from src.services.incident_service import classify_alert_early + + +# --------------------------------------------------------------------------- # +# TYPE-4D: Config Drift +# --------------------------------------------------------------------------- # + +class TestConfigDrift: + def test_configuration_drift(self): + ac, nt = classify_alert_early("ConfigurationDrift", "critical", {}) + assert nt == "TYPE-4D" + assert ac == "config_drift" + + def test_kube_config_drift(self): + ac, nt = classify_alert_early("KubeConfigDrift", "warning", {}) + assert nt == "TYPE-4D" + assert ac == "config_drift" + + def test_config_drift_case_sensitive(self): + # 不在白名單裡的變體 → 不應命中 config_drift + ac, nt = classify_alert_early("configurationdrift", "critical", {}) + assert ac != "config_drift" + + +# --------------------------------------------------------------------------- # +# TYPE-1: Info / Heartbeat +# --------------------------------------------------------------------------- # + +class TestInfoAlerts: + def test_severity_info(self): + ac, nt = classify_alert_early("SomeAlert", "info", {}) + assert nt == "TYPE-1" + assert ac == "info" + + def test_severity_none(self): + ac, nt = classify_alert_early("SomeAlert", "none", {}) + assert nt == "TYPE-1" + assert ac == "info" + + def test_backup_keyword(self): + ac, nt = classify_alert_early("BackupJobFailed", "warning", {}) + assert nt == "TYPE-1" + assert ac == "backup" + + def test_heartbeat_keyword(self): + ac, nt = classify_alert_early("WatchdogHeartbeat", "warning", {}) + assert nt == "TYPE-1" + assert ac == "backup" + + def test_backup_case_insensitive(self): + ac, nt = classify_alert_early("BACKUP_MISSING", "critical", {}) + assert nt == "TYPE-1" + assert ac == "backup" + + +# --------------------------------------------------------------------------- # +# TYPE-3: Infrastructure (Docker / Host) +# --------------------------------------------------------------------------- # + +class TestInfrastructure: + def test_docker_prefix(self): + ac, nt = classify_alert_early("DockerContainerOOM", "critical", {}) + assert nt == "TYPE-3" + assert ac == "infrastructure" + + def test_host_prefix(self): + ac, nt = classify_alert_early("HostHighCpuLoad", "warning", {}) + assert nt == "TYPE-3" + assert ac == "infrastructure" + + def test_docker_restart(self): + ac, nt = classify_alert_early("DockerContainerRestarting", "warning", {}) + assert ac == "infrastructure" + + +# --------------------------------------------------------------------------- # +# TYPE-3: Kubernetes +# --------------------------------------------------------------------------- # + +class TestKubernetes: + @pytest.mark.parametrize("alertname", [ + "KubePodCrashLooping", + "PodHighMemory", + "DeploymentReplicasMismatch", + "NodeNotReady", + "ArgoCDSyncFailed", + ]) + def test_k8s_prefixes(self, alertname): + ac, nt = classify_alert_early(alertname, "critical", {}) + assert nt == "TYPE-3" + assert ac == "kubernetes" + + def test_velero_backup_keyword_wins(self): + # VeleroBackupFailed 含 "backup" → backup 規則優先於 kubernetes prefix + ac, nt = classify_alert_early("VeleroBackupFailed", "critical", {}) + assert nt == "TYPE-1" + assert ac == "backup" + + +# --------------------------------------------------------------------------- # +# TYPE-3: Database +# --------------------------------------------------------------------------- # + +class TestDatabase: + def test_postgres(self): + ac, nt = classify_alert_early("PostgresDown", "critical", {}) + assert nt == "TYPE-3" + assert ac == "database" + + def test_redis(self): + ac, nt = classify_alert_early("RedisMemoryHigh", "warning", {}) + assert nt == "TYPE-3" + assert ac == "database" + + +# --------------------------------------------------------------------------- # +# TYPE-3: General (fallback) +# --------------------------------------------------------------------------- # + +class TestGeneral: + def test_unknown_alert(self): + ac, nt = classify_alert_early("SomeUnknownAlert", "warning", {}) + assert nt == "TYPE-3" + assert ac == "general" + + def test_empty_alertname(self): + ac, nt = classify_alert_early("", "warning", {}) + assert nt == "TYPE-3" + assert ac == "general" + + +# --------------------------------------------------------------------------- # +# 優先順序驗證 — config_drift 和 info 應優先於 prefix 規則 +# --------------------------------------------------------------------------- # + +class TestPriority: + def test_config_drift_beats_severity(self): + # ConfigurationDrift 即使 severity=warning 也應命中 config_drift + ac, nt = classify_alert_early("ConfigurationDrift", "warning", {}) + assert ac == "config_drift" + + def test_info_severity_beats_docker_prefix(self): + # Docker 前綴 + severity=info → info 規則先命中 + ac, nt = classify_alert_early("DockerContainerOOM", "info", {}) + assert ac == "info" diff --git a/k8s/awoooi-prod/15-cronjob-km-vectorize.yaml b/k8s/awoooi-prod/15-cronjob-km-vectorize.yaml index 7234db7b..77c97068 100644 --- a/k8s/awoooi-prod/15-cronjob-km-vectorize.yaml +++ b/k8s/awoooi-prod/15-cronjob-km-vectorize.yaml @@ -41,15 +41,7 @@ spec: imagePullPolicy: Always command: - python - - -c - - | - import asyncio, httpx, os - async def main(): - api = os.environ.get("INTERNAL_API_URL", "http://awoooi-api:8000") - async with httpx.AsyncClient(timeout=120) as c: - r = await c.post(f"{api}/api/v1/knowledge/embed-all") - print("embed-all:", r.status_code, r.text[:200]) - asyncio.run(main()) + - /app/scripts/cron_km_vectorize.py env: - name: TZ value: "Asia/Taipei" diff --git a/scripts/cron_km_vectorize.py b/scripts/cron_km_vectorize.py new file mode 100644 index 00000000..0dd7f4a1 --- /dev/null +++ b/scripts/cron_km_vectorize.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +""" +KM Vectorize CronJob 入口腳本 — ADR-073 Phase 4-3 + +每日由 K8s CronJob 呼叫,對新增 KM 條目執行向量化, +確保 RAG 查詢可存取最新知識(飛輪「學習固化」節點)。 + +2026-04-12 ogt (ADR-073 Phase 4-3, P2-1 重構) +""" + +import asyncio +import os +import sys + +import httpx + + +async def main() -> int: + api_base = os.environ.get( + "INTERNAL_API_URL", + "http://awoooi-api.awoooi-prod.svc.cluster.local:8000", + ) + url = f"{api_base}/api/v1/knowledge/embed-all" + + async with httpx.AsyncClient(timeout=120) as client: + try: + resp = await client.post(url) + print(f"embed-all: {resp.status_code} {resp.text[:200]}") + if resp.status_code >= 400: + print(f"ERROR: embed-all returned {resp.status_code}", file=sys.stderr) + return 1 + return 0 + except httpx.RequestError as exc: + print(f"ERROR: request failed — {exc}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + sys.exit(asyncio.run(main()))