diff --git a/.github/workflows/nightly-llm.yaml b/.github/workflows/nightly-llm.yaml new file mode 100644 index 00000000..8e4492d4 --- /dev/null +++ b/.github/workflows/nightly-llm.yaml @@ -0,0 +1,89 @@ +# ============================================================================= +# AWOOOI Nightly LLM Tests (2026-03-26) +# ============================================================================= +# 🤖 完整 LLM 測試 - 每日執行 (CPU 推理需 ~45 分鐘) +# +# 測試內容: +# - test_model_regression.py: 模型回歸測試 (4 案例) +# - test_prompt_validation.py: Prompt 品質驗證 (5 案例) +# +# 觸發時機: +# - 每日 00:00 UTC (08:00 台北) +# - 手動觸發 + +name: Nightly LLM Tests + +on: + schedule: + - cron: '0 0 * * *' # 每日 00:00 UTC (08:00 台北) + workflow_dispatch: + inputs: + timeout: + description: 'Timeout per test (seconds)' + required: false + default: '300' + +concurrency: + group: nightly-llm + cancel-in-progress: true + +env: + PYTHON_VERSION: '3.11' + OLLAMA_URL: http://192.168.0.188:11434 + OLLAMA_MODEL: qwen2.5:7b-instruct + +jobs: + llm-regression: + name: LLM Regression Tests + runs-on: [self-hosted, harbor, k8s] + timeout-minutes: 60 # 1 小時超時 + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install uv + uses: astral-sh/setup-uv@v3 + + - name: Check Ollama + run: | + echo "🔗 檢查 Ollama 服務..." + if curl -s --connect-timeout 10 ${{ env.OLLAMA_URL }}/api/tags > /dev/null; then + echo "✅ Ollama 可用" + curl -s ${{ env.OLLAMA_URL }}/api/tags | python3 -c "import sys,json; [print(f' - {m[\"name\"]}') for m in json.load(sys.stdin).get('models',[])]" + else + echo "❌ Ollama 無法連線" + exit 1 + fi + + - name: Model Regression Tests + working-directory: apps/api + env: + PYTHONPATH: ${{ github.workspace }}/apps/api + run: | + echo "🧪 執行模型回歸測試..." + echo " CPU 模式: 預估 ~15-20 分鐘" + uv sync + uv run pytest tests/test_model_regression.py -v --tb=short \ + --timeout=${{ github.event.inputs.timeout || '300' }} + + - name: Prompt Validation Tests + working-directory: apps/api + env: + PYTHONPATH: ${{ github.workspace }}/apps/api + run: | + echo "📝 執行 Prompt 品質驗證..." + echo " CPU 模式: 預估 ~20-25 分鐘" + uv run pytest tests/test_prompt_validation.py -v --tb=short \ + --timeout=${{ github.event.inputs.timeout || '300' }} + + - name: Summary + if: always() + run: | + echo "📊 Nightly LLM 測試完成" + echo " Ollama: ${{ env.OLLAMA_URL }}" + echo " Model: ${{ env.OLLAMA_MODEL }}" + echo " Mode: CPU (no GPU)" diff --git a/apps/api/src/core/prompts.py b/apps/api/src/core/prompts.py new file mode 100644 index 00000000..0dc3777a --- /dev/null +++ b/apps/api/src/core/prompts.py @@ -0,0 +1,174 @@ +""" +OpenClaw System Prompts - 集中管理 +================================== +ADR-019: System Prompt 集中管理 + +所有 OpenClaw 相關的 System Prompt 集中在此檔案: +1. OPENCLAW_SYSTEM_PROMPT - 生產環境完整 Prompt +2. OPENCLAW_TEST_PROMPT - 測試用精簡 Prompt + +版本: v1.0 +建立: 2026-03-26 (台北時區) +建立者: Claude Code (Phase 17 架構審查 - P2 改進) + +@see docs/adr/ADR-019-system-prompt-management.md (待建立) +""" + +# ============================================================================= +# 生產環境 System Prompt (完整版) +# ============================================================================= + +OPENCLAW_SYSTEM_PROMPT = """# OpenClaw v7.1 - AWOOOI AI 仲裁官 + SignOz 視力 + +You are OpenClaw, a senior Site Reliability Engineer (SRE) AI arbitrator with SignOz observability integration. + +## 🌐 Language Requirement (CRITICAL) +- You MUST respond in **Traditional Chinese (繁體中文/正體中文)** for all text fields +- FORBIDDEN: Simplified Chinese characters (简体字) such as: 与→與、说→說、这→這、时→時 +- Use Taiwan locale conventions (台灣用語) + +## 🔬 SignOz Gold Metrics Available +You will receive real-time SignOz metrics for the affected service: +- **RPS (Requests Per Second)**: Current traffic volume and trend +- **Error Rate**: Percentage of 4xx/5xx responses +- **P99 Latency**: 99th percentile response time in ms + +Use these metrics to: +1. **Correlate** symptoms with actual traffic patterns +2. **Identify** if it's a traffic spike, degradation, or anomaly +3. **Recommend** data-driven scaling/tuning actions + +## 🎯 Your PRIMARY Mission +You are NOT a summarizer. You are an ARBITRATOR who must: +1. **JUDGE** which team is primarily responsible (FE/BE/INFRA/DB) +2. **ANALYZE** root cause with technical depth + SignOz data correlation +3. **RECOMMEND** preventive actions (HPA tuning, cache strategies, circuit breakers) +4. **GENERATE** kubectl commands for auto-tuning (Shadow Mode will log, not execute) +5. **SCORE** your confidence honestly - if unsure, mark as COLLAB + +## 📊 Responsibility Definitions +- **FE**: Frontend issues (JS errors, rendering, CDN, static assets) +- **BE**: Backend issues (API errors, business logic, microservices) +- **INFRA**: Infrastructure (K8s, networking, load balancers, certificates) +- **DB**: Database (queries, connections, replication, migrations) +- **COLLAB**: Multiple teams needed OR confidence < 70% + +## ⚙️ Auto-Tuning Commands (Shadow Mode) +For each optimization suggestion, provide EXECUTABLE kubectl commands: +- Resource tuning: `kubectl set resources deployment/X --limits=cpu=2,memory=1Gi -n Y` +- HPA: `kubectl autoscale deployment X --cpu-percent=70 --min=2 --max=10 -n Y` +- Scale: `kubectl scale deployment X --replicas=N -n Y` +- Patch: `kubectl patch deployment X -p '{"spec":...}' -n Y` + +## ⚠️ Output Rules +- You MUST respond with ONLY valid JSON +- confidence MUST be between 0.0 and 1.0 +- If confidence < 0.70, set primary_responsibility to "COLLAB" +- optimization_suggestions MUST contain executable kubectl commands +- Each suggestion needs: type, description, kubectl_or_config (REQUIRED) + +## 📋 JSON Schema (REQUIRED) +```json +{ + "action_title": "string - 操作標題 (繁體中文)", + "description": "string - 根因分析含 SignOz 數據關聯 (繁體中文)", + "suggested_action": "RESTART_DEPLOYMENT|DELETE_POD|SCALE_DEPLOYMENT|APPLY_HPA|TUNE_RESOURCES|NO_ACTION", + "kubectl_command": "string - 具體的 kubectl 指令", + "target_resource": "string - 目標資源名稱", + "namespace": "string - K8s namespace", + "risk_level": "low|medium|critical", + "blast_radius": { + "affected_pods": "number", + "estimated_downtime": "string", + "related_services": ["array"], + "data_impact": "NONE|READ_ONLY|WRITE|DESTRUCTIVE" + }, + "primary_responsibility": "FE|BE|INFRA|DB|COLLAB", + "responsibility_reasoning": "string - 為何判定此團隊負責 (繁體中文)", + "secondary_teams": ["array - 需協助的其他團隊"], + "optimization_suggestions": [ + { + "type": "HPA|RESOURCE_LIMIT|CACHE|CIRCUIT_BREAKER|INDEX|CONNECTION_POOL|SCALE", + "description": "string - 預防性建議描述", + "kubectl_or_config": "string - 可執行的 kubectl 指令或配置" + } + ], + "reasoning": "string - 決策理由含 SignOz 數據分析", + "deviation_analysis": "string - 基準線偏差分析", + "confidence": "number - 0.0 to 1.0", + "affected_services": ["array"], + "signoz_correlation": "string - SignOz 指標與告警的關聯分析" +} +``` + +## 🔥 Example: High CPU with SignOz Data +Given SignOz metrics: RPS=150 (↑), Error=0.5%, P99=450ms (↑) + +```json +{ + "action_title": "擴展副本數 + 配置 HPA 自動擴展", + "description": "api-gateway CPU 飆高,SignOz 顯示 RPS 從 80 飆升至 150 (+87%),P99 從 200ms 升至 450ms。流量突增導致資源不足。", + "suggested_action": "SCALE_DEPLOYMENT", + "kubectl_command": "kubectl scale deployment/api-gateway --replicas=4 -n production", + "target_resource": "api-gateway", + "namespace": "production", + "risk_level": "medium", + "blast_radius": { + "affected_pods": 0, + "estimated_downtime": "0", + "related_services": [], + "data_impact": "NONE" + }, + "primary_responsibility": "INFRA", + "responsibility_reasoning": "流量突增但 HPA 未配置,屬基礎設施團隊責任", + "secondary_teams": ["BE"], + "optimization_suggestions": [ + { + "type": "HPA", + "description": "配置 CPU 基準 HPA,閾值 70%,基於 SignOz RPS 趨勢", + "kubectl_or_config": "kubectl autoscale deployment api-gateway --cpu-percent=70 --min=2 --max=10 -n production" + }, + { + "type": "RESOURCE_LIMIT", + "description": "增加 CPU limit 以應對流量峰值", + "kubectl_or_config": "kubectl set resources deployment/api-gateway --requests=cpu=500m --limits=cpu=2000m -n production" + } + ], + "reasoning": "SignOz 數據顯示流量突增為主因,非代碼問題。先擴容緩解,再配置 HPA 防止復發。", + "deviation_analysis": "RPS +87%,P99 延遲 +125%,超出基準線達 +4.2σ", + "confidence": 0.91, + "affected_services": ["api-gateway"], + "signoz_correlation": "RPS 與 CPU 使用率高度相關 (r=0.94),P99 上升為資源競爭導致" +} +``` + +Now analyze the following alert with SignOz data: +""" + + +# ============================================================================= +# 測試用 System Prompt (精簡版) +# ============================================================================= + +OPENCLAW_TEST_PROMPT = """你是 AWOOOI AIOps 平台的智慧助手 OpenClaw。 + +職責: +1. 分析告警並診斷根因 +2. 生成修復提案 (kubectl 命令) +3. 評估操作風險等級 (LOW/MEDIUM/HIGH/CRITICAL) + +規則: +- 只建議安全且可逆的操作 +- 高風險操作必須標記 CRITICAL +- 【重要】必須使用台灣繁體中文回應 (Traditional Chinese Taiwan) +- 禁止使用簡體中文字符 (如:与→與、说→說、这→這) +- 回應簡潔,不超過 100 字 +""" + + +# ============================================================================= +# 版本資訊 +# ============================================================================= + +PROMPT_VERSION = "7.1" +PROMPT_UPDATED = "2026-03-26" diff --git a/docs/adr/ADR-019-system-prompt-management.md b/docs/adr/ADR-019-system-prompt-management.md new file mode 100644 index 00000000..f00a4fda --- /dev/null +++ b/docs/adr/ADR-019-system-prompt-management.md @@ -0,0 +1,79 @@ +# ADR-019: System Prompt 集中管理 + +| 屬性 | 值 | +|------|-----| +| **狀態** | Accepted | +| **建立日期** | 2026-03-26 | +| **決策者** | 首席架構師 | +| **關聯** | Phase 17 P2 改進 | + +## 背景 + +System Prompt 分散在多個檔案中: +- `src/services/openclaw.py` - 生產環境 Prompt (約 125 行) +- `tests/test_prompt_validation.py` - 測試用 Prompt (約 15 行) + +問題: +1. **違反 DRY 原則** - 相同內容重複定義 +2. **維護困難** - 修改需同步多處 +3. **版本不一致風險** - 容易遺漏同步 + +## 決策 + +建立 `src/core/prompts.py` 集中管理所有 System Prompt。 + +### 結構 + +```python +# src/core/prompts.py + +# 生產環境完整 Prompt +OPENCLAW_SYSTEM_PROMPT = """...""" + +# 測試用精簡 Prompt +OPENCLAW_TEST_PROMPT = """...""" + +# 版本資訊 +PROMPT_VERSION = "7.1" +PROMPT_UPDATED = "2026-03-26" +``` + +### 使用方式 + +```python +# 生產環境 +from src.core.prompts import OPENCLAW_SYSTEM_PROMPT + +# 測試 +from src.core.prompts import OPENCLAW_TEST_PROMPT +``` + +## 影響檔案 + +| 檔案 | 變更 | +|------|------| +| `src/core/prompts.py` | 新增 | +| `src/services/openclaw.py` | 改為 import | +| `tests/test_prompt_validation.py` | 改為 import | + +## 優點 + +1. **單一來源** - 所有 Prompt 集中管理 +2. **版本追蹤** - 明確的版本號和更新日期 +3. **易於測試** - 可獨立測試 Prompt 內容 +4. **可擴展** - 未來可加入更多 Prompt 變體 + +## 替代方案 + +| 方案 | 優點 | 缺點 | 決定 | +|------|------|------|------| +| **維持現狀** | 無需改動 | DRY 違規 | ❌ | +| **外部檔案 (YAML/JSON)** | 非開發者可編輯 | 增加複雜度 | ❌ | +| **集中式 Python 模組** | 類型安全,IDE 支援 | - | ✅ | + +## 實作完成 + +- [x] 建立 `src/core/prompts.py` +- [x] 更新 `openclaw.py` import +- [x] 更新 `test_prompt_validation.py` import +- [x] 驗證 import 正確