Compare commits

..

7 Commits

Author SHA1 Message Date
Your Name
2d37383fc6 fix(monitoring): fix false positive NoAlertsReceived2Hours by filtering only alertmanager source 2026-05-28 15:33:17 +08:00
Your Name
3779f6f1e0 fix(metrics): 串入飛輪指標到 /metrics 主端點,修復 FlywheelExecutionRateMissing 死告警
INC-20260507-99ADF2 根因(feedback_full_chain_first_then_fix.md 全鏈分析):

【鏈路斷點】規則層(5/3 加)vs 指標層(5/6 改)vs scrape 層(從沒同步)
- 577250a6(5/3)「反消音化」commit 加了 FlywheelExecutionRateMissing
  rule,要求 110 Prom scrape 到 awoooi_flywheel_execution_success_rate;
- a2c4b3d4(5/6)Codex 改 FlywheelStatsService 用 auto_repair_executions
  作 source of truth(24h 樣本 1-9 筆回 None 給 W-3b watchdog 接管);
- 但 awoooi_flywheel_* 指標自始至終只在 /api/v1/stats/flywheel/metrics
  暴露,110 Prom awoooi-api job 抓的是 /metrics → absent() 永遠 1
  → 自 2026-05-06T04:14 UTC 起 firing 26h+ 屬 dead alert

【修法】只動 awoooi-api 一處,不碰 Codex 設計、不碰 110 Prom 配置:
- main.py /metrics endpoint 改 async,在 generate_latest() 後串入
  FlywheelStatsService.compute() → to_prometheus_lines()。
- 既有 awoooi-api scrape job 自動拿到飛輪指標。
- 完全保留 Codex a2c4b3d4 設計:1-9 筆回 None 讓 W-3b watchdog 雙保險。

【不碰的部分】
- flywheel_stats_service.py 不動:Codex 5/6 LOGBOOK 已明確說明
  「Redis playbook counter 失準 → 用 auto_repair_executions 為唯一信任源」,
  1-9 筆 return None 是配合 ai_slo_watchdog_job W-3b grace+30min 設計的
  反消音化雙保險,不是 bug。

驗證計畫(部署後):
1. curl /metrics | grep awoooi_flywheel  → 看到飛輪指標
2. Prom query awoooi_flywheel_execution_success_rate  → 非空
3. ALERTS{alertname="FlywheelExecutionRateMissing"}  → resolved
4. 30 分鐘觀察 Telegram 不再收 INC-20260507-99ADF2

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-07 15:32:47 +08:00
Your Name
c38227e945 fix(ai): remove 188 ollama provider 2026-05-06 14:33:16 +08:00
Your Name
1b4a6c1e8c fix(awooop): align console with flywheel execution metrics 2026-05-06 00:44:53 +08:00
Your Name
894174da5b fix(ops): harden cold-start schedule recovery 2026-05-05 22:14:54 +08:00
Your Name
10cd9fc025 fix(openclaw): gate alert cloud fallback behind flag 2026-05-05 20:53:12 +08:00
Your Name
8161ccf83f fix(ops): persist host resource guardrails 2026-05-05 16:13:02 +08:00
1866 changed files with 7733 additions and 754989 deletions

832
.claude/settings.json Normal file
View File

@@ -0,0 +1,832 @@
{
"permissions": {
"allow": [
"Read(**)",
"Glob(**)",
"Grep(**)",
"Bash(curl *)",
"Bash(kubectl get *)",
"Bash(kubectl describe *)",
"Bash(kubectl logs *)",
"Bash(kubectl rollout status *)",
"Bash(docker ps *)",
"Bash(docker logs *)",
"Bash(ls *)",
"Bash(cat *)",
"Bash(head *)",
"Bash(tail *)",
"Bash(grep *)",
"Bash(find *)",
"Bash(pwd)",
"Bash(which *)",
"Bash(echo *)",
"Bash(git status *)",
"Bash(git log *)",
"Bash(git diff *)",
"Bash(git branch *)",
"Bash(git remote *)",
"Edit(**)",
"Write(apps/**)",
"Write(packages/**)",
"Write(docs/**)",
"Write(.agents/**)",
"Write(k8s/**)",
"Write(scripts/**)",
"Bash(pnpm *)",
"Bash(npm *)",
"Bash(npx *)",
"Bash(node *)",
"Bash(python *)",
"Bash(python3 *)",
"Bash(pip *)",
"Bash(cd *)",
"Bash(mkdir *)",
"Bash(touch *)",
"Bash(cp *)",
"Bash(mv *)",
"Bash(chmod *)",
"Bash(pytest *)",
"Bash(playwright *)",
"Bash(git add *)",
"Bash(git commit *)",
"Bash(git stash *)",
"Bash(ssh *)",
"Bash(scp *)",
"Bash(export KUBECONFIG=*)",
"Bash(git push:*)",
"Bash(claude --version)",
"Bash(git check-ignore:*)",
"WebSearch",
"Bash(claude plugin:*)",
"Bash(claude --channels)",
"Bash(claude --channels plugin:telegram@claude-plugins-official --help)",
"Bash(bash)",
"Bash(source ~/.zshrc)",
"Bash(~/.bun/bin/bun --version)",
"Bash(env)",
"Bash(claude upgrade:*)",
"Bash(/Users/ogt/.local/bin/claude --help)",
"Bash(CLAUDE_CODE_EXPERIMENTAL_CHANNELS=1 claude --help)",
"Bash(claude --channels plugin:telegram@claude-plugins-official --print \"hello\")",
"Bash(mkdir -p ~/.claude/channels/telegram)",
"Bash(~/.claude/channels/telegram/.env)",
"Bash(~/.bun/bin/bun run:*)",
"Bash(sudo ln:*)",
"Bash(ln -sf ~/.bun/bin/bun /opt/homebrew/bin/bun)",
"Bash(xargs python:*)",
"Bash(uv --version)",
"Bash(pip3 install:*)",
"Bash(pip3 show:*)",
"Bash(ruff *)",
"Bash(mypy *)",
"Bash(black *)",
"Bash(isort *)",
"Bash(timeout *)",
"Bash(wc *)",
"Bash(sort *)",
"Bash(uniq *)",
"Bash(awk *)",
"Bash(sed *)",
"Bash(tr *)",
"Bash(tee *)",
"Bash(xargs *)",
"Bash(test *)",
"Bash([ *)",
"Bash(true)",
"Bash(false)",
"Bash(date *)",
"Bash(sleep *)",
"Bash(kill *)",
"Bash(pkill *)",
"Bash(ps *)",
"Bash(top *)",
"Bash(htop *)",
"Bash(df *)",
"Bash(du *)",
"Bash(free *)",
"Bash(uname *)",
"Bash(hostname *)",
"Bash(whoami)",
"Bash(id *)",
"Bash(groups *)",
"Bash(stat *)",
"Bash(file *)",
"Bash(realpath *)",
"Bash(dirname *)",
"Bash(basename *)",
"Bash(type *)",
"Bash(command *)",
"Bash(hash *)",
"Bash(alias *)",
"Bash(set *)",
"Bash(unset *)",
"Bash(printenv *)",
"Bash(diff *)",
"Bash(cmp *)",
"Bash(comm *)",
"Bash(join *)",
"Bash(paste *)",
"Bash(cut *)",
"Bash(rev *)",
"Bash(nl *)",
"Bash(fmt *)",
"Bash(fold *)",
"Bash(pr *)",
"Bash(expand *)",
"Bash(unexpand *)",
"Bash(od *)",
"Bash(xxd *)",
"Bash(hexdump *)",
"Bash(strings *)",
"Bash(base64 *)",
"Bash(md5sum *)",
"Bash(sha256sum *)",
"Bash(jq *)",
"Bash(yq *)",
"Bash(gh *)",
"Bash(docker build *)",
"Bash(docker run *)",
"Bash(docker exec *)",
"Bash(docker compose *)",
"Bash(docker-compose *)",
"Bash(docker images *)",
"Bash(docker inspect *)",
"Bash(docker network *)",
"Bash(docker volume *)",
"Bash(kubectl apply *)",
"Bash(kubectl create *)",
"Bash(kubectl exec *)",
"Bash(kubectl port-forward *)",
"Bash(kubectl config *)",
"Bash(helm *)",
"Bash(terraform *)",
"Bash(ansible *)",
"Bash(bun *)",
"Bash(deno *)",
"Bash(cargo *)",
"Bash(rustc *)",
"Bash(go *)",
"Bash(java *)",
"Bash(javac *)",
"Bash(gradle *)",
"Bash(mvn *)",
"Bash(make *)",
"Bash(cmake *)",
"Bash(ninja *)",
"Bash(uv *)",
"Bash(poetry *)",
"Bash(pipx *)",
"Bash(virtualenv *)",
"Bash(venv *)",
"Bash(conda *)",
"Bash(brew *)",
"Bash(apt *)",
"Bash(apt-get *)",
"Bash(yum *)",
"Bash(dnf *)",
"Bash(pacman *)",
"Bash(snap *)",
"Bash(flatpak *)",
"Bash(systemctl status *)",
"Bash(journalctl *)",
"Bash(service * status)",
"Bash(nc *)",
"Bash(netstat *)",
"Bash(ss *)",
"Bash(lsof *)",
"Bash(nmap *)",
"Bash(dig *)",
"Bash(nslookup *)",
"Bash(host *)",
"Bash(ping *)",
"Bash(traceroute *)",
"Bash(mtr *)",
"Bash(wget *)",
"Bash(http *)",
"Bash(httpie *)",
"Bash(hadolint apps/api/Dockerfile)",
"Bash(docker info:*)",
"Bash(kubectl cluster-info:*)",
"Read(//var/run/**)",
"Bash(open -a Docker)",
"Bash(git rm:*)",
"Bash(git reset:*)",
"Bash(kubectl --kubeconfig ~/.kube/config get pods -n awoooi -o wide)",
"Bash(kubectl scale:*)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollam@192.168.0.188 \"docker ps -a | grep -i claw\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a | grep -i claw\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker start clawbot && sleep 3 && docker logs clawbot --tail=10\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep clawbot && docker port clawbot\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail=30\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot/.env | grep -E ''\\(TG_|TELEGRAM\\)'' | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Mounts}}{{.Source}}:{{.Destination}} {{end}}''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Config.Env}}{{println .}}{{end}}'' | grep -E ''\\(TG_|TELEGRAM|ENABLED\\)''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''logout\\\\|log.out\\\\|shutdown\\\\|stop'' | tail -20\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''\\(getMe|getUpdates|sendMessage\\).*200'' | tail -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''success\\\\|started\\\\|初始化'' | head -20\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''2026-03-\\(19|20|21\\)'' | grep -i ''error\\\\|fail\\\\|logout\\\\|400\\\\|401'' | head -20\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker stop clawbot && docker rm clawbot && echo ''✅ OpenClaw 已永久停用''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose ps 2>/dev/null || ls -la docker-compose.yml 2>/dev/null || find /home/ollama -name ''docker-compose*.yml'' -type f 2>/dev/null | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose up -d && sleep 3 && docker-compose ps\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose up -d 2>&1 || docker run -d --name clawbot --restart unless-stopped -p 8088:8088 -v /var/run/docker.sock:/var/run/docker.sock 192.168.0.110:5000/library/clawbot:stable-v6 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail=15 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}'' | grep -E ''clawbot|litellm''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && sed -i ''s|TELEGRAM_BOT_TOKEN=.*|TELEGRAM_BOT_TOKEN=8569720657:AAHrJ5CMOb4rP0IYJrCUiDViLsnpK69uEUI|'' .env && grep TELEGRAM_BOT_TOKEN .env\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose down && docker compose up -d && sleep 5 && docker logs clawbot --tail=10\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''{{.Names}}'' | grep -i alert\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker stop alertmanager && docker rm alertmanager && echo ''✅ 舊 AIOPS Alertmanager 已停用''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Image}}\\\\t{{.Status}}''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/momo-pro/monitoring/prometheus/alert_rules.yml 2>/dev/null | grep -A5 ''ClawbotDown\\\\|telegram\\\\|AIOPS'' | head -30\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"find /home/ollama -name ''*.yml'' -type f 2>/dev/null | xargs grep -l ''ClawbotDown\\\\|telegram'' 2>/dev/null | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot grep -r ''協同警報\\\\|ClawbotDown'' /app 2>/dev/null | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/prometheus.yml 2>/dev/null | grep -A10 ''alerting\\\\|alertmanager''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep -i alert || echo ''✅ 沒有 alertmanager 在運行''\")",
"Bash(jq -r '.status, .components | to_entries[] | \"\"\"\"\\\\\\(.key\\): \\\\\\(.value.status\\)\"\"\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}'' | grep clawbot && docker logs clawbot --tail=15\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Config.Env}}{{println .}}{{end}}'' | grep TELEGRAM\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && sed -i ''s|TELEGRAM_BOT_TOKEN=.*|TELEGRAM_BOT_TOKEN=8569720657:AAFjDyjAN94QQrjn1gBnFXAyS20EUyozH8c|'' .env && docker compose down && docker compose up -d && sleep 5 && docker logs clawbot --tail=10\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot grep -r ''ClawBotDown\\\\|ClawbotDown'' /app 2>/dev/null | head -5 || echo ''在程式碼中找不到''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/alerts.yml 2>/dev/null | grep -A10 ''ClawBot\\\\|clawbot'' | head -30\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/alerts.yml 2>/dev/null | grep -i ''clawbot\\\\|claw'' -A5 -B5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --since=5m 2>&1 | grep -i ''clawbot\\\\|incident\\\\|alert'' | tail -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 50 2>&1\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''telegram\\\\|polling\\\\|bot'' | tail -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}\\\\t{{.Ports}}'' | grep -E ''claw|NAME''\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''telegram|Telegram|error|Error'' | tail -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep ollama\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}'' | head -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"sed -i ''s|host.docker.internal|172.17.0.1|g'' /home/ollama/clawbot-v5/.env && cat /home/ollama/clawbot-v5/.env | grep OLLAMA\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose restart clawbot && sleep 3 && docker logs clawbot --tail 30 2>&1\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose restart clawbot && sleep 5 && docker logs clawbot --tail 30 2>&1\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot curl -s http://172.17.0.1:11434/api/tags | head -c 200\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | tail -10\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -iE ''error|telegram|polling|alert|send'' | tail -30\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep OLLAMA\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose up -d --force-recreate clawbot && sleep 5 && docker logs clawbot 2>&1 | tail -20\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot curl -s http://172.17.0.1:11434/api/tags | head -c 100\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --since 5m 2>&1 | tail -30\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec momo-db psql -U postgres -d clawbot -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec -e PGPASSWORD=clawbot123 momo-db psql -U clawbot -d clawbot -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep -E ''postgres|db''\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec momo-db env | grep -i postgres\")",
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"PGPASSWORD=AwoooiProd2026 psql -h localhost -U awoooi -d awoooi_prod -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
"Bash(KUBECONFIG=~/.kube/config kubectl config get-contexts)",
"Bash(docker tag:*)",
"Bash(docker push:*)",
"Bash(ssh ollama@192.168.0.188 \"cd ~/awoooi-build && find apps/web/src -name ''''*.ts'''' -o -name ''''*.tsx'''' | head -30 | xargs md5sum\")",
"Bash(rsync -avz --exclude 'node_modules' --exclude '.next' --exclude '.turbo' --exclude '*.log' /Users/ogt/awoooi/ ollama@192.168.0.188:~/awoooi-build/)",
"Bash(gh run:*)",
"Bash(APPROVAL_ID=\"ea43578e-17cd-40b9-b4c3-8fe8e92f225c\" __NEW_LINE_76dc92b2699cd7d5__ echo \"=== 檢查 Approval Metadata ===\" curl -s \"https://awoooi.wooo.work/api/v1/approvals/pending\")",
"Bash(APPROVAL_ID=\"865ab726-c3b9-447e-86a9-65a6227516e6\" __NEW_LINE_db14ef76ca26af32__ echo \"=== 簽核 ===\" curl -s -X POST \"https://awoooi.wooo.work/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\":\"\"\"\"commander\"\"\"\",\"\"\"\"signer_name\"\"\"\":\"\"\"\"Commander\"\"\"\",\"\"\"\"comment\"\"\"\":\"\"\"\"Test resolution\"\"\"\"}')",
"Read(//Users/ogt/awoooi/**)",
"Bash(APPROVAL_ID=\"e9445e68-6c3e-4899-b507-3b9b7bcaf0a7\" __NEW_LINE_680ad94d4896e58a__ echo \"=== 簽核 ===\" curl -s -X POST \"https://awoooi.wooo.work/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\":\"\"\"\"commander\"\"\"\",\"\"\"\"signer_name\"\"\"\":\"\"\"\"Commander\"\"\"\",\"\"\"\"comment\"\"\"\":\"\"\"\"Final test\"\"\"\"}')",
"Bash(APPROVAL_ID=\"eb0afb4e-834b-4af7-9ae0-3c58232fdd99\" INCIDENT=\"INC-20260323-F05CD6\" __NEW_LINE_47f1c3803a64b43c__ echo \"=== 簽核前 Incident 狀態 ===\" curl -s \"https://awoooi.wooo.work/api/v1/incidents/$INCIDENT\")",
"Bash(mkdir -p /Users/ogt/awoooi/.claude/hooks)",
"Bash(/Users/ogt/awoooi/.claude/hooks/pre-commit-check.sh:*)",
"Bash(git -C /Users/ogt/awoooi status packages/lewooogo-core/)",
"Bash(git -C /Users/ogt/awoooi ls-files packages/lewooogo-core/src/)",
"Bash(git -C /Users/ogt/awoooi status --short)",
"Bash(git -C /Users/ogt/awoooi add apps/api/pyproject.toml apps/api/scripts/ apps/api/src/ apps/web/.eslintrc.js apps/web/src/ packages/lewooogo-core/.eslintrc.js)",
"Bash(git -C /Users/ogt/awoooi diff --cached --stat)",
"Bash(git -C:*)",
"Bash(for wf:*)",
"Bash(do)",
"Bash(done)",
"Bash(jq 'if type == \"\"\"\"array\"\"\"\" then .[0] | {incident_id, status, decision} else . end')",
"Bash(PYTHONPATH=. python -c \"from src.api.v1.stats import router; print\\(''✅ stats.py 載入成功,路由數:'', len\\(router.routes\\)\\)\")",
"Bash(PYTHONPATH=. pytest tests/ -v --tb=short)",
"Bash(PYTHONPATH=. pytest tests/test_stats_api.py -v --tb=short)",
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py::TestNewAlertTelegramPush -v --tb=long)",
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py::TestNewAlertTelegramPush -v --tb=short)",
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py -v --tb=short)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get ns awoooi && kubectl get all -n awoooi')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get ns | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-worker-bb89b5ffc-bpf45 -n awoooi-prod --tail=50')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-worker-bb89b5ffc-bpf45 -n awoooi-prod --tail=100 | grep -i telegram')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-api-8c9489b6c-cm8g5 -n awoooi-prod --tail=50 | grep -i webhook')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-api-8c9489b6c-cm8g5 -n awoooi-prod --tail=30')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n monitoring | grep alertmanager')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get configmap alertmanager-config -n monitoring -o jsonpath=''{.data.alertmanager\\\\.yml}'' | head -50\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get svc -n awoooi-prod')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl patch configmap alertmanager-config -n monitoring --type merge -p ''{\"\"data\"\":{\"\"alertmanager.yml\"\":\"\"global:\\\\n resolve_timeout: 5m\\\\n\\\\nroute:\\\\n group_by: [\\\\\"\"alertname\\\\\"\", \\\\\"\"severity\\\\\"\"]\\\\n group_wait: 30s\\\\n group_interval: 5m\\\\n repeat_interval: 4h\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n routes:\\\\n - match:\\\\n severity: critical\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n group_wait: 10s\\\\n repeat_interval: 1h\\\\n - match:\\\\n severity: warning\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n group_wait: 1m\\\\n repeat_interval: 4h\\\\n\\\\nreceivers:\\\\n - name: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n webhook_configs:\\\\n - url: \\\\\"\"http://192.168.0.120:32334/api/v1/webhook/alertmanager\\\\\"\"\\\\n send_resolved: true\\\\n\\\\ninhibit_rules:\\\\n - source_match:\\\\n severity: \\\\\"\"critical\\\\\"\"\\\\n target_match:\\\\n severity: \\\\\"\"warning\\\\\"\"\\\\n equal: [\\\\\"\"alertname\\\\\"\", \\\\\"\"instance\\\\\"\"]\\\\n\"\"}}''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl rollout restart deployment/alertmanager -n monitoring && kubectl rollout status deployment/alertmanager -n monitoring')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get configmap alertmanager-config -n monitoring -o jsonpath=''{.data.alertmanager\\\\.yml}'' | grep -A 3 ''url:''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod -o jsonpath=\"\"{range .items[*]}{.metadata.name}{\\\\\"\" \\\\\"\"}{.spec.containers[*].image}{\\\\\"\"\\\\\\\\n\\\\\"\"}{end}\"\"')",
"Bash(git mv:*)",
"Bash(for file:*)",
"Bash(do echo:*)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 wooo@192.168.0.120 \"echo ''Connected''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get deployment -n awoooi-prod -o jsonpath=''{range .items[*]}{.metadata.name}{\"\" selector: \"\"}{.spec.selector.matchLabels}{\"\"\\\\n\"\"}{end}''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl delete deployment awoooi-api awoooi-web awoooi-worker -n awoooi-prod\")",
"WebFetch(domain:awoooi.wooo.work)",
"WebFetch(domain:api.awoooi.wooo.work)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod -o wide')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get svc,ingress -n awoooi-prod')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-api -- curl -sf http://localhost:8000/api/v1/health 2>&1')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'curl -sf http://10.43.125.201:8000/api/v1/health 2>&1 || echo \"\"FAILED\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'sudo nginx -t 2>&1 && sudo cat /etc/nginx/sites-enabled/awoooi* 2>/dev/null || sudo cat /etc/nginx/conf.d/awoooi* 2>/dev/null || echo \"\"No awoooi nginx config found\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'cat /etc/nginx/sites-enabled/* 2>/dev/null | grep -A5 awoooi || cat /etc/nginx/conf.d/* 2>/dev/null | grep -A5 awoooi || ls -la /etc/nginx/ 2>/dev/null || echo \"\"No nginx on this host\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'ls /etc/nginx/sites-enabled/ 2>/dev/null && cat /etc/nginx/sites-enabled/*awoooi* 2>/dev/null || echo \"\"Checking conf.d...\"\" && ls /etc/nginx/conf.d/ 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -l awoooi /etc/nginx/sites-enabled/* 2>/dev/null || grep -r \"\"awoooi\"\" /etc/nginx/sites-enabled/ 2>/dev/null | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -r \"\"awoooi\\\\|32334\\\\|32335\"\" /etc/nginx/ 2>/dev/null | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cp /tmp/awoooi-prod.conf /etc/nginx/conf.d/ && echo \"\"Config copied\"\" && sudo nginx -t 2>&1')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S ls -la /etc/nginx/ssl/ 2>/dev/null || echo \"\"No ssl dir\"\" && sudo ls -la /etc/letsencrypt/live/ 2>/dev/null | head -10')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S sed -i \"\"s|/etc/nginx/ssl/awoooi.crt|/etc/letsencrypt/live/awoooi.wooo.work/fullchain.pem|g\"\" /etc/nginx/conf.d/awoooi-prod.conf && sudo sed -i \"\"s|/etc/nginx/ssl/awoooi.key|/etc/letsencrypt/live/awoooi.wooo.work/privkey.pem|g\"\" /etc/nginx/conf.d/awoooi-prod.conf && echo \"\"Paths fixed\"\" && sudo nginx -t 2>&1')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S nginx -s reload && echo \"\"Nginx reloaded!\"\" && sleep 2')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'grep -r \"\"awoooi\"\" /etc/nginx/sites-enabled/ 2>/dev/null | head -5')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S grep -rl \"\"awoooi.wooo.work\"\" /etc/nginx/ 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'curl -sf http://192.168.0.121:32334/api/v1/health 2>&1 || echo \"\"FAILED to reach 121\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S rm /etc/nginx/conf.d/awoooi-prod.conf && sudo nginx -t && sudo nginx -s reload && echo \"\"Cleaned up duplicate config\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -30 /var/log/nginx/error.log 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'grep -r \"\"api.awoooi\"\" /etc/nginx/ 2>/dev/null || echo \"\"No api.awoooi config found\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get configmap awoooi-config -n awoooi-prod -o yaml | grep -A5 NEXT_PUBLIC')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get deployment awoooi-web -n awoooi-prod -o yaml | grep -A20 \"\"env:\"\" | head -25')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -10 /var/log/nginx/access.log 2>/dev/null | grep awoooi')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -5 /var/log/nginx/error.log 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S stat /etc/nginx/sites-available/awoooi.wooo.work.conf 2>/dev/null | grep -E \"\"Modify|Change|Birth\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs -n awoooi-prod -l app=awoooi-web --tail=30 2>/dev/null | grep -i \"\"api\\\\|error\\\\|fetch\"\" | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -20 /var/log/nginx/access.log 2>/dev/null | grep -E \"\"awoooi.*api\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -20 /var/log/nginx/awoooi-prod-access.log 2>/dev/null')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- env | grep -i api')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- sh -c \"\"grep -r \\\\\"\"NEXT_PUBLIC_API_URL\\\\|api.awoooi\\\\\"\" /app/.next/static/chunks/*.js 2>/dev/null | head -5 || grep -r \\\\\"\"awoooi.wooo.work\\\\\"\" /app/.next/static/chunks/*.js 2>/dev/null | head -3\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- sh -c \"\"find /app/.next -name \\\\\"\"*.js\\\\\"\" -exec grep -l \\\\\"\"awoooi\\\\\"\" {} \\\\; 2>/dev/null | head -3\"\"')",
"Bash(./scripts/qa-zero-touch.sh)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cat /etc/nginx/sites-available/awoooi.wooo.work.conf')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cp /tmp/awoooi.wooo.work.conf /etc/nginx/sites-available/awoooi.wooo.work.conf && sudo nginx -t 2>&1')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S nginx -s reload && echo \"\"✅ Nginx reloaded with load balancing!\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt && sudo ls -la sentry 2>/dev/null || echo \"\"Sentry 目錄不存在,需要建立\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'sudo mkdir -p /opt/sentry && sudo chown wooo:wooo /opt/sentry && cd /opt/sentry && git clone https://github.com/getsentry/self-hosted.git . 2>&1 | tail -5')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"0936223270\"\" | sudo -S mkdir -p /opt/sentry && echo \"\"0936223270\"\" | sudo -S chown wooo:wooo /opt/sentry && cd /opt/sentry && git clone https://github.com/getsentry/self-hosted.git . 2>&1 | tail -10')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && ls -la 2>&1 | head -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && git describe --tags 2>/dev/null || git rev-parse --short HEAD')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && ./install.sh --help 2>&1 | head -30 || echo \"\"No help available, checking script...\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && nohup ./install.sh --skip-user-creation --no-report-self-hosted-issues > /tmp/sentry-install.log 2>&1 &')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'tail -30 /tmp/sentry-install.log 2>/dev/null || echo \"\"日誌檔案尚未建立,等待中...\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -E \"\"^\\\\▶|^Creating|^Starting|^Error|^✓|Pulling\"\" /tmp/sentry-install.log 2>/dev/null | tail -40')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近進度 ===\"\" && tail -10 /tmp/sentry-install.log')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting\"\" /tmp/sentry-install.log | tail -20')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近 20 行 ===\"\" && tail -20 /tmp/sentry-install.log')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting|Building|DONE\"\" /tmp/sentry-install.log | tail -30')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting|DONE|Completed|success\"\" /tmp/sentry-install.log | tail -25')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -E \"\"^▶|✓|Error|Completed|success|fail\"\" /tmp/sentry-install.log | tail -15')",
"Bash(redis-cli -h 192.168.0.188 -p 6380 KEYS incident:*)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/momo-pro/monitoring/alertmanager.yml 2>/dev/null || cat /etc/alertmanager/alertmanager.yml 2>/dev/null || echo ''Config not found''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 30 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 20 2>&1 | grep -iE ''telegram|send|alert|incident|error''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep -E ''TELEGRAM|TG_'' | head -5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep -E ''REDIS|POSTGRES|DATABASE'' | head -5\")",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9093/api/v2/alerts?active=true\"\" | python3 -c \"\"import sys,json; alerts=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Active alerts: {len\\(alerts\\)}\\\\\"\"\\)\"\"')",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9093/api/v2/alerts\"\" | python3 -c \"\"import sys,json; alerts=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Total alerts: {len\\(alerts\\)}\\\\\"\"\\); [print\\(a[\\\\\"\"labels\\\\\"\"][\\\\\"\"alertname\\\\\"\"]\\) for a in alerts[:5]]\"\"')",
"Bash(ssh ollama@192.168.0.188 'redis-cli -p 6380 -n 0 GET incident:INC-20260324-36AF55 | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Status: {d.get\\(\\\\\"\"status\\\\\"\"\\)}\\\\\"\"\\); print\\(f\\\\\"\"message_id: {d.get\\(\\\\\"\"message_id\\\\\"\", \\\\\"\"NONE\\\\\"\"\\)}\\\\\"\"\\); print\\(f\\\\\"\"chat_id: {d.get\\(\\\\\"\"chat_id\\\\\"\", \\\\\"\"NONE\\\\\"\"\\)}\\\\\"\"\\)\"\"')",
"Bash(ssh ollama@192.168.0.188 'redis-cli -p 6380 -n 0 GET incident:INC-20260324-36AF55 | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f\\\\\"\"status: {d.get\\('status'\\)}\\\\\"\"\\); print\\(f\\\\\"\"message_id: {d.get\\('message_id'\\)}\\\\\"\"\\); print\\(f\\\\\"\"created_at: {d.get\\('created_at'\\)}\\\\\"\"\\)\"\"')",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *approval*)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *incident*)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *pending*)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl get pods -n awoooi-prod -o wide)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl get deployment awoooi-api -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].image}')",
"Bash(kubectl --kubeconfig=/Users/ogt/awoooi/k3s-prod.yaml get deployment awoooi-api -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].image}')",
"Bash(python3 -c \":*)",
"Bash(/tmp/awoooi-tg-secret.yaml:*)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl apply -f /tmp/awoooi-tg-secret.yaml)",
"Bash(for pod:*)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.188 \"curl -fsSL https://ollama.com/install.sh | sh\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o PreferredAuthentications=password wooo@192.168.0.188 \"echo connected && ollama --version\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o PreferredAuthentications=password ollama@192.168.0.188 \"curl -fsSL https://ollama.com/install.sh | sh\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S curl -fsSL https://ollama.com/install.sh | sudo -S sh\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"ollama --version\")",
"Bash(__NEW_LINE_95e9df111552805b__ echo:*)",
"Bash(sshpass -p '0936223270' scp /Users/ogt/awoooi/k8s/nginx/awoooi-prod.conf ollama@192.168.0.188:/tmp/awoooi-prod.conf)",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S cp /tmp/awoooi-prod.conf /etc/nginx/conf.d/awoooi-prod.conf && echo ''0936223270'' | sudo -S nginx -t 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S ls -la /etc/nginx/ssl/ 2>/dev/null || echo ''No ssl dir''; echo ''0936223270'' | sudo -S ls -la /etc/nginx/conf.d/ 2>/dev/null | head -10\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S grep -r ''ssl_certificate'' /etc/nginx/ 2>/dev/null | head -5\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S grep -A 20 ''server_name awoooi'' /etc/nginx/sites-enabled/all-sites.conf 2>/dev/null | head -30\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S ls -la /etc/nginx/sites-enabled/ 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S cat /etc/nginx/sites-available/awoooi.wooo.work.conf 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S rm /etc/nginx/conf.d/awoooi-prod.conf && echo ''0936223270'' | sudo -S nginx -t 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S nginx -s reload 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S systemctl reload nginx 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs openclaw 2>&1 | tail -30\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}\\\\t{{.Image}}'' 2>&1 | head -15\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i telegram | tail -20\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | tail -30\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager cat /etc/alertmanager/alertmanager.yml 2>&1 | head -30\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"curl -sf ''http://localhost:9093/api/v1/alerts'' | jq ''.data | length'' 2>/dev/null || curl -sf ''http://localhost:9093/api/v2/alerts'' | jq ''length'' 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager wget -qO- ''http://localhost:9093/api/v2/alerts'' 2>&1 | head -100\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl -n awoooi-prod logs -l app=awoooi-worker --tail=50 2>&1\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"cat /home/ollama/alertmanager/alertmanager.yml 2>/dev/null || docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker cp /tmp/alertmanager.yml alertmanager:/etc/alertmanager/alertmanager.yml && docker exec alertmanager amtool check-config /etc/alertmanager/alertmanager.yml && docker kill -s SIGHUP alertmanager\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker inspect alertmanager --format ''{{range .Mounts}}{{.Source}} -> {{.Destination}}{{println}}{{end}}''\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker restart alertmanager && sleep 3 && docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''telegram\\\\|webhook\\\\|alert'' | tail -10\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=30 2>/dev/null | grep -E ''''POST|webhook|alertmanager|ManualTest''''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=30 2>/dev/null | grep -iE ''''POST|webhook''''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50 2>/dev/null | grep -iE ''''POST.*webhook|alertmanager_webhook|NewFingerprint''''\")",
"Bash(kustomize build:*)",
"Bash(KUBECONFIG=~/.kube/config kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data}')",
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl exec deploy/awoooi-api -n awoooi-prod -- env)",
"Bash(git checkout:*)",
"Bash(jq -r '.status // \"\"\"\"failed\"\"\"\"')",
"Bash(jq -r '.total // \"\"\"\"error\"\"\"\"')",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 XLEN awoooi:signals)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 XRANGE awoooi:signals - + COUNT 5)",
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/organizations/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/organizations/sentry/projects/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/projects/sentry/awoooi-api/rules/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" __NEW_LINE_583db0bbb6875db0__ echo \"=== Alert Rules ===\" curl -s \"http://192.168.0.110:9000/api/0/projects/sentry/awoooi-api/rules/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get nodes -o wide && echo ''---'' && kubectl top nodes 2>/dev/null || echo ''metrics-server not installed''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide && echo ''---'' && kubectl get pvc -n awoooi-prod 2>/dev/null && echo ''---'' && kubectl get sc 2>/dev/null && echo ''---'' && kubectl get deploy -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get ns && echo ''---'' && kubectl get svc -A | grep -E ''prometheus|grafana|metrics|signoz|longhorn|argocd'' || echo ''No monitoring/gitops services found''\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/rancher/k3s/config.yaml 2>/dev/null || echo ''--- K3s default config \\(no custom config.yaml\\) ---'' && echo ''---'' && sudo k3s check-config 2>/dev/null | head -30 || echo ''check-config not available''\")",
"Bash(ssh wooo@192.168.0.120 \"free -h && echo ''---'' && swapon --show && echo ''---'' && df -h /var/lib/rancher/k3s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n cnpg-system && echo ''---'' && kubectl get svc -n monitoring\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get all -n awoooi-prod -o wide 2>/dev/null && echo ''---QUOTA---'' && kubectl describe quota -n awoooi-prod 2>/dev/null && echo ''---EVENTS---'' && kubectl get events -n awoooi-prod --sort-by=''.lastTimestamp'' 2>/dev/null | tail -20\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get helmcharts -A 2>/dev/null || echo ''No HelmCharts'' && echo ''---'' && kubectl get helmreleases -A 2>/dev/null || echo ''No HelmReleases'' && echo ''---'' && kubectl api-resources | grep -E ''argo|flux|velero|longhorn'' || echo ''No GitOps/Backup CRDs''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get ds -A && echo ''---'' && kubectl get cm -n kube-system | grep -E ''traefik|coredns'' && echo ''---REGISTRIES---'' && sudo cat /etc/rancher/k3s/registries.yaml 2>/dev/null || echo ''No registries.yaml''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get ingress -A 2>/dev/null || echo ''No Ingress'' && echo ''---HPA---'' && kubectl get hpa -A 2>/dev/null || echo ''No HPA'' && echo ''---PDB---'' && kubectl get pdb -A 2>/dev/null || echo ''No PDB'' && echo ''---SYSCTL---'' && cat /proc/sys/net/core/somaxconn && cat /proc/sys/fs/file-max\")",
"Bash(ssh wooo@192.168.0.120 \"systemctl status k3s | head -20 && echo ''---K3S-VERSION---'' && k3s --version && echo ''---ETCD-STATUS---'' && sudo k3s etcd-snapshot list 2>/dev/null | head -5 || echo ''No etcd snapshots''\")",
"Bash(ssh wooo@192.168.0.121 \"free -h && swapon --show && echo ''---DISK---'' && df -h /var/lib/rancher/k3s 2>/dev/null\")",
"Bash(ssh wooo@192.168.0.120 \"sudo ls -la /var/lib/rancher/k3s/server/db/ 2>/dev/null && echo ''---TOKEN---'' && sudo cat /var/lib/rancher/k3s/server/token 2>/dev/null | head -1 | cut -c1-20\")",
"Bash(ssh -o ConnectTimeout=10 wooo@192.168.0.120 \"ps aux | grep k3s | grep -v grep | head -3 && echo ''---'' && sudo cat /etc/systemd/system/k3s.service 2>/dev/null | grep -E ''ExecStart|datastore''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S mkdir -p /backup/k3s_etcd 2>/dev/null && echo ''0936223270'' | sudo -S chown ollama:ollama /backup/k3s_etcd 2>/dev/null && echo ''=== 188 備份目錄 ==='' && ls -la /backup/\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"mkdir -p ~/.ssh && chmod 700 ~/.ssh && echo ''ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCnTnbjtSPwrI/pN6DByDxsFDOR4+sVnk7hb+eOr+Pb4e7o7QGbyKaJC2eKP7uRBilPqeScuvNKZhwmY8ZOuhjId+ZyLK0jZXHdq3a6tjsQ4MwPGyT2aMaD7x2jKzPbFojR0P5lmQWH2zjxeVuB7UeBIejaYk3gQEMFVES8Xh84yxFvy9jlwKmZFAI0gIhx0nPOTPB7onTyb8L5snUbwQQntoHWYFbb83+wui/kM15aLT5r8uvS2yZdsWWrDvAyuIShde1ceTBevwwqxezH1egXGoGkvZYYF7vHFu3X6jF7Nfp4qVfo0EfFV3omy90HzoFvoEXCC+jIWU0TjUqdEgGIEj2b+YXw3bIs+k+g/0/iJzA5LLUNb2vHVHoUmah4ZNlfiGU7e6hTYXjLjoXJlz9gfv6LYywhgktdThi9sUCn6rzbatlMrY0HNUE6uOwRTugMq1YUEJCvRqeFmtX5yF6xGp+FbOjIr1kMmplbRQRqKIrpQoqEn0+UBXC7OwJNCk8= wooo@mon'' >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys && echo ''SSH key 已加入''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCs3nQ11B+V/VEchNR9Uzj57JoKXOJ8S1UVjCTHkUDL8FnrbdPFr0zvpYgX0a/Ipj9wHkqU6z6Ho6MQj3X2+HaK5fC0fZ3aZE1QT2df/x0xXdyka9XSaTFaymKzNTvfmum40koBkNccKyO5SLSjTcoTZCDHP4RqHHu/MYjQMejG7yeyCFmgumrHh5T/0DXPf5zl0Ff1C5U3VCLPxz5vq63JB2dTfrjQLg3sO0ZI3KTZE8aFj3txKz5snDZX3nE1tHZMKLecwwEqi130BtVZcm8zXDqX83gtUDp/WLfPyKCmzZzGf6YgEofIsyrVup8XnD9xNoFmbEeBdFocGWeoIVIn+faOpU22fvQ34L57GHhNQwygZOPKsZa9XNKjayKdKQl3gcAA2wnkZgN0cyIEYvTd3O+Z5Xvff2dat+0sDMK571V+0JEdAMOpQjFO7DkwjKHn/gHLmvRjYLiUOItX9JysFgYuHs8omad2LmeUIkQrBD2I2hyvY49HaJKWctk4Jm0= root@mon'' >> ~/.ssh/authorized_keys && echo ''Root SSH key added''\")",
"Bash(grep -r \"\"\"zod\"\"\" /Users/ogt/awoooi/package.json /Users/ogt/awoooi/apps/*/package.json /Users/ogt/awoooi/packages/*/package.json)",
"Bash(__NEW_LINE_144503b060dfd3dd__ echo:*)",
"Bash(__NEW_LINE_ae2a22b14586d7aa__ echo:*)",
"Bash(__NEW_LINE_e17561a4e55f74d4__ echo:*)",
"Bash(ssh wooo@192.168.0.120 \"echo ''''0936223270'''' | sudo -S cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | sed ''''s|https://127.0.0.1:6443|https://192.168.0.125:6443|g''''\")",
"Bash(KUBECONFIG=/tmp/kubeconfig-vip.yaml kubectl get nodes)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get rs -n awoooi-prod)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -A --no-headers)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get jobs -A --no-headers)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get rs -n awoooi-prod --no-headers)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml delete job api-watchdog-29556380 -n wooo-aiops-uat)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -n awoooi-prod)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -A)",
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get svc -A)",
"Bash(PGPASSWORD=changeme psql -h 192.168.0.188 -U awoooi -d awoooi_prod -f /Users/ogt/awoooi/apps/api/scripts/migrate_phase18_audit_logs.sql)",
"Bash(PLAYWRIGHT_BASE_URL=http://192.168.0.125:32335 npx playwright test phase11-conversational.spec.ts --reporter=list)",
"Bash(PLAYWRIGHT_BASE_URL=http://192.168.0.125:32335 npx playwright test phase11-conversational.spec.ts --reporter=list --workers=1)",
"Bash(KUBECONFIG=~/.kube/config kubectl get nodes --server=https://192.168.0.125:6443 --insecure-skip-tls-verify)",
"Bash(source .venv/bin/activate)",
"Read(//etc/postgresql/14/main/**)",
"Bash(for port:*)",
"Bash(kubectl top:*)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl top pods -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -o wide)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n awoooi-prod)",
"Bash(jq -r '.components | to_entries[] | \"\"\"\"\\\\\\(.key\\): \\\\\\(.value.status\\)\"\"\"\"')",
"Bash(tar -xzf velero-v1.13.0-darwin-arm64.tar.gz)",
"Bash(sudo mv:*)",
"Bash(velero version:*)",
"Bash(mkdir -p ~/bin)",
"Bash(mv velero-v1.13.0-darwin-arm64/velero ~/bin/)",
"Bash(~/bin/velero version:*)",
"Bash(k8s/velero/00-namespace.yaml:*)",
"Bash(k8s/velero/01-credentials.yaml:*)",
"Bash(k8s/velero/02-velero-install.yaml:*)",
"Bash(tar -xzf velero.tar.gz)",
"Bash(/tmp/velero-credentials:*)",
"Bash(__NEW_LINE_e85d95513fc16492__ ~/bin/velero install --provider aws --plugins velero/velero-plugin-for-aws:v1.9.0 --bucket velero-backups --secret-file /tmp/velero-credentials --backup-location-config region=minio,s3ForcePathStyle=true,s3Url=http://192.168.0.188:9000 --use-volume-snapshots=false --dry-run -o yaml)",
"Bash(__NEW_LINE_e85d95513fc16492__ head:*)",
"Bash(k8s/velero/README.md:*)",
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl apply -f /Users/ogt/awoooi/k8s/velero/velero-install-full.yaml)",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"whoami && hostname && cat /etc/sudoers.d/* 2>/dev/null | head -5 || echo ''no sudoers.d files''\")",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get nodes 2>&1 || echo ''kubectl failed, checking k3s kubeconfig...'' && ls -la /etc/rancher/k3s/k3s.yaml 2>&1\")",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"sudo -l 2>&1 | head -20\")",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''09362233270'' | sudo -S -l 2>&1\")",
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get nodes 2>&1\")",
"Bash(sshpass -p '0936223270' scp /Users/ogt/awoooi/k8s/velero/velero-install-full.yaml wooo@192.168.0.120:/tmp/velero-install-full.yaml)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''''0936223270'''' | sudo -S kubectl apply -f /tmp/velero-install-full.yaml 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get pods -n velero 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get backupstoragelocation -n velero 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl logs -n velero deploy/velero --tail=30 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl logs -n velero deploy/velero --tail=10 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get secret cloud-credentials -n velero -o jsonpath=''{.data.cloud}'' 2>&1 | base64 -d\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S curl -s http://192.168.0.188:9000/velero-backups/ 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl rollout restart deployment/velero -n velero 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get backups -n velero 2>&1\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl describe backup test-backup-20260328-2114 -n velero 2>&1 | tail -30\")",
"Bash(sshpass -p:*)",
"Read(//Users/ogt/awoooi/=== 測試 /approvals/**)",
"Bash(kubectl --kubeconfig=/Users/ogt/.kube/config get svc -n velero -o wide)",
"Bash(kubectl --kubeconfig=/Users/ogt/.kube/config get pods -n velero -o wide)",
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl get svc -n velero)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'echo \"\"0936223270\"\" | sudo -S sh -c \"\"kubectl get pods -A | grep -E \\\\\"\"kube-state|state-metrics\\\\\"\"\"\"')",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'echo \"\"0936223270\"\" | sudo -S sh -c \"\"kubectl get ns | grep -E \\\\\"\"wooo|aiops|legacy|old\\\\\"\"\"\"')",
"Bash(KUBECONFIG=~/.kube/config kubectl get ns --no-headers)",
"WebFetch(domain:build.nvidia.com)",
"WebFetch(domain:ollama.com)",
"WebFetch(domain:docs.api.nvidia.com)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"curl -s ''http://admin:admin@localhost:3002/api/search?type=dash-db'' | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''Dashboard 數量: {len\\(d\\)}''\\); [print\\(f\\\\\"\" - {i[''title'']}\\\\\"\"\\) for i in d[:10]]\"\"\")",
"Bash(jq '.ai_provider // .data.ai_provider // \"\"\"\"not found\"\"\"\"')",
"Bash(KUBECONFIG=~/.kube/config kubectl logs -n awoooi-prod deployment/awoooi-api --tail=50)",
"Bash(export NVIDIA_API_KEY=\"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\")",
"Bash(curl -s -X POST \"https://integrate.api.nvidia.com/v1/chat/completions\" -H \"Content-Type: application/json\" -H \"Authorization: Bearer $NVIDIA_API_KEY\" -d '{:*)",
"Bash(/tmp/fix-network-policy.yaml:*)",
"Bash(__NEW_LINE_acde7a92ceae01f6__ scp:*)",
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/alertmanager -H 'Content-Type: application/json' -d '{:*)",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/targets\"\" 2>/dev/null | grep -o \"\"\\\\\"\"health\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq -c')",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/rules\"\" 2>/dev/null | grep -o \"\"\\\\\"\"name\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq')",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/targets\"\" 2>/dev/null | grep -o \"\"\\\\\"\"job\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq -c | sort -rn')",
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/query?query=up\"\" 2>/dev/null | grep -o \"\"\\\\\"\"instance\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq')",
"Bash(for i:*)",
"Bash(do sleep:*)",
"Bash(kubectl patch:*)",
"Bash(ssh wooo@192.168.0.110 \"cat /tmp/runner_clean.log 2>/dev/null; echo ''---''; ps aux | grep ''Runner.Listener'' | grep -v grep | wc -l\")",
"Bash(KUBECONFIG=~/.kube/config kubectl logs -n awoooi-prod -l app=awoooi-api --tail=200)",
"Bash(/Users/ogt/awoooi/ops/monitoring/deploy-exporters.sh:*)",
"WebFetch(domain:github.com)",
"WebFetch(domain:docs.ollama.com)",
"Skill(telegram:configure)",
"Skill(telegram:configure:*)",
"Bash(USE_NEW_ENGINE=true pytest tests/test_incident*.py -v --tb=short -x)",
"Bash(USE_NEW_ENGINE=true pytest tests/test_approval_field_alignment.py tests/test_learning_service.py -v --tb=short)",
"Bash(/tmp/debug_approval.py:*)",
"Bash(/tmp/debug_approval2.py:*)",
"Bash(/tmp/bulk_sign.sh:*)",
"Bash(bash /tmp/bulk_sign.sh)",
"Bash(/tmp/check_deploy.py:*)",
"Bash(/tmp/check_buttons.py:*)",
"Bash(ssh ollama@192.168.0.188 \"docker logs openclaw --since=10s 2>&1 | grep -Ev ''\\(GET|POST\\) /health'' | tail -10 && echo ''---'' && docker exec openclaw env | grep OPENAI_API_KEY | cut -c1-30\")",
"Read(//Users/ogt/awoooi/https:/awoooi.wooo.work/_next/static/chunks/app/%5Blocale%5D/**)",
"Bash(find /Users/ogt/awoooi/apps/web -type f \\\\\\(-name *.spec.ts -o -name *.spec.tsx \\\\\\))",
"Bash(kubectl -n awoooi-prod get pods)",
"Bash(kubectl -n production get pods)",
"Bash(ssh -o StrictHostKeyChecking=no wooo@192.168.0.121 \"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl get deployment awoooi-web -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].image}'' && echo '''' && sudo kubectl get pods -n awoooi-prod -l app=awoooi-web --no-headers\")",
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl get pods -n awoooi-prod)",
"Bash(for run_id in 166 165)",
"mcp__plugin_playwright_playwright__browser_navigate",
"mcp__plugin_playwright_playwright__browser_take_screenshot",
"Bash(open \"http://192.168.0.110:3001/wooo/awoooi/actions\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=5\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/166/jobs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=10\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runners\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/admin/runners\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=3\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/169/jobs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/179/logs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" JOB_ID=180 curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/$JOB_ID/logs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=2\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" JOB_ID=181 curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/$JOB_ID/logs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/172/jobs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/182/logs\" -H \"Authorization: token $TOKEN\")",
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/178\" -H \"Authorization: token $TOKEN\")",
"mcp__plugin_playwright_playwright__browser_snapshot",
"mcp__plugin_playwright_playwright__browser_fill_form",
"mcp__plugin_playwright_playwright__browser_click",
"Bash(GITEA_TOKEN=\"e6c9fecb1f0148939493ae0fa30407d28c91279d\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=5\" -H \"Authorization: token $GITEA_TOKEN\")",
<<<<<<< Updated upstream
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 /tmp/a4_smoke.py)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.repositories.aider_event_repository import AiderEventRepository; print\\('import OK'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py -v --tb=short)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.services.aider_event_service import classify_severity, should_create_incident, build_signal_data; print\\('✓ All imports successful'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py::test_build_signal_data_redacts_secrets_in_annotations -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_events_api.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.workers.aider_event_processor import AiderEventProcessor, get_aider_event_processor, run_aider_event_processor_loop; print\\('✓ All imports successful'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py -v --tb=short)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py --tb=short)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_ai_router_feedback.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.services.ai_router import AIRouter; from src.db.base import get_session_factory; print\\('✓ Imports successful, no circular imports'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_ai_router_feedback.py tests/test_aider_event_service.py -v --tb=short)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.api.v1 import aider_events; from src.workers.aider_event_processor import run_aider_event_processor_loop; from src.core.config import settings; print\\('AIDER_WEBHOOK_SECRET' in settings.__fields__, 'USE_AIDER_FEEDBACK' in settings.__fields__\\)\")",
"Bash(AIDER_WEBHOOK_SECRET=testsecret /Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.main import app; print\\('app OK; title:', app.title\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -q)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip install -e .[dev] --quiet)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip install -e '.[dev]' --quiet)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/ -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from aider_watch_client.aiderw import main as awmain; from aider_watch_client.cli import main as climain; print\\('✓ imports ok'\\)\")",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip show aider-watch-client)",
"Bash(tailscale status *)",
"Bash(kubectl rollout *)",
"Bash(bash /Users/ogt/awoooi/scripts/aider_watch_client/scripts/install.sh)",
"Bash(git rebase *)",
"Bash(/opt/homebrew/bin/aiderw --message \"add docstring to hello function\" --exit)",
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')",
"Bash(kubectl -n awoooi-prod exec awoooi-api-7b9464c969-8ml88 -- python -c ' *)",
"Bash(kubectl -n awoooi-prod rollout restart deployment/awoooi-api)",
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api --no-headers)",
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=120s)",
"Bash(/opt/homebrew/bin/aider-watch flush *)",
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o wide)",
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=30s)",
"Bash(kubectl -n awoooi-prod exec awoooi-api-6657fb9cf7-47lcg -- python -c \"import src.services.telegram_gateway as tg; import inspect; lines = inspect.getsource\\(tg\\); idx = lines.find\\('response_body=e.response.text'\\); print\\('FOUND' if idx >= 0 else 'NOT FOUND'\\)\")",
"Read(//opt/gitea/**)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/ -q)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/unit/test_aider_event_service.py tests/unit/test_aider_model.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_aider_event_service.py tests/test_aider_event_processor.py -v)",
"Bash(kubectl -n awoooi-prod get svc)",
"Bash(kubectl -n openclaw get pod)",
"Bash(kubectl -n awoooi-prod exec awoooi-api-7cd784c875-r4qkz -- python -c ' *)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=10m)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=15m)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=20m)",
"Bash(kubectl -n awoooi-prod get secret awoooi-secrets -o yaml)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=30m)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=2h)",
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2)",
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name} {.status.containerStatuses[0].imageID}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get ingress)",
"Bash(kubectl -n awoooi-prod get svc awoooi-api-svc)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=60s --prefix)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=5m --prefix)",
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll --since=5m)",
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll --since=10m)",
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=90s --prefix)",
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-4x69p --since=5m)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 SCAN 0 MATCH \"playbook:PB-*\" COUNT 500)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 DBSIZE)",
"Bash(wait)",
"Read(//Users/**)",
"Read(//Users/ooo/.claude/**)",
"Bash(mkdir -p /Users/ogt/awoooi/.claude/agents)",
"Bash(cp /Users/ogt/.claude/agents/*.md /Users/ogt/awoooi/.claude/agents/)",
"Bash(kubectl -n awoooi-prod logs --tail=400 -l app=awoooi-api --prefix=true)",
"Bash(kubectl -n awoooi-prod logs --tail=300 awoooi-api-65c69fd649-bxbwp)",
"Bash(kubectl -n awoooi-prod logs --tail=20000 -l app=awoooi-api --prefix=false --since=24h)",
"Bash(kubectl -n awoooi-prod logs --since=24h awoooi-api-65c69fd649-bxbwp)",
"Bash(kubectl -n awoooi-prod logs --since=24h -l app=awoooi-api --prefix=false)",
"Bash(kubectl -n awoooi-prod logs --since=24h awoooi-api-65c69fd649-fmbxd)",
"Bash(kubectl -n awoooi-prod logs --since=3h awoooi-api-65c69fd649-fmbxd)",
"Bash(kubectl -n awoooi-prod logs --since=3h awoooi-api-65c69fd649-bxbwp)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=30 --since=30m)",
"Bash(kubectl -n awoooi-prod get pods -o wide)",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{.items[0].metadata.creationTimestamp}')",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=5 --since=5m)",
"Bash(kubectl -n awoooi-prod describe pod -l app=awoooi-api)",
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=20 --since=10m)",
"Bash(kubectl -n awoooi-prod exec deployment/awoooi-api -- python3 -c ' *)",
"Bash(PGPASSWORD=\"\" psql -h 188.188.188.188 -U aiops -d aiops -c \"\\\\d timeline_events\")",
"Bash(kubectl -n awoooi-prod get deploy awoooi-api -o yaml)",
"Bash(PGPASSWORD=\"\" psql --version)",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- env)",
"Bash(kubectl -n awoooi-prod logs --tail=500 deploy/awoooi-api)",
"Bash(kubectl cp *)",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=up\" 2>&1 | head -c 400')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'for q in \"sum\\(rate\\(http_requests_total{status=~\\\\\"5..\\\\\"}[5m]\\)\\) / sum\\(rate\\(http_requests_total[5m]\\)\\)\" \"avg\\(rate\\(container_cpu_usage_seconds_total{namespace=\\\\\"awoooi-prod\\\\\",container=\\\\\"awoooi-api\\\\\"}[5m]\\)\\)\" \"pg_stat_activity_count{datname=\\\\\"awoooi\\\\\"}\" \"increase\\(kube_pod_container_status_restarts_total{namespace=\\\\\"awoooi-prod\\\\\"}[15m]\\)\"; do echo \"---- $q\"; curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=$q\" 2>&1 | head -c 250; echo; done')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'PGPASSWORD=as0V1mohktaFbGIx3R0iCatbMJ6XxFDL psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT metric_name, count\\(*\\), max\\(trained_at\\) FROM dynamic_baseline_record GROUP BY metric_name;\" 2>&1 | head -20')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'PGPASSWORD=as0V1mohktaFbGIx3R0iCatbMJ6XxFDL psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT count\\(*\\) as asset_count FROM asset_inventory; SELECT count\\(*\\) as coverage_count FROM asset_coverage_snapshot; SELECT count\\(*\\) as host_cap_count FROM host_capacity_snapshot; SELECT count\\(*\\) as compl_count FROM asset_compliance_snapshot; SELECT count\\(*\\) as rule_cat FROM alert_rule_catalog; SELECT count\\(*\\) as log_cluster FROM log_cluster_record;\" 2>&1')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'python3 -c \" *)",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- python3 -c ' *)",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'for q in \"http_requests_total\" \"container_cpu_usage_seconds_total\" \"container_memory_usage_bytes\" \"kube_pod_container_status_restarts_total\" \"pg_stat_activity_count\" \"node_cpu_seconds_total\" \"node_load1\"; do echo -n \"$q => \"; curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=count\\($q\\)\" 2>&1 | head -c 180; echo; done')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=container_cpu_usage_seconds_total\" 2>&1 | python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); rs=d[\\\\\"data\\\\\"][\\\\\"result\\\\\"][:3]; [print\\(r[\\\\\"metric\\\\\"]\\) for r in rs]; print\\(\\\\\"total series:\\\\\", len\\(d[\\\\\"data\\\\\"][\\\\\"result\\\\\"]\\)\\)\"')",
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'which kubectl 2>&1; kubectl version --client 2>&1 | head -3; kubectl -n awoooi-prod get deploy awoooi-api 2>&1 | head -3')",
"Bash(kubectl -n awoooi-prod logs --tail=2000 deploy/awoooi-api)",
"Bash(psql --version)",
"WebFetch(domain:core.telegram.org)",
"mcp__plugin_context7_context7__resolve-library-id",
"mcp__plugin_context7_context7__query-docs",
"WebFetch(domain:docs.claude.com)",
"Bash(git tag *)",
"Read(//usr/**)",
"Bash(psql -h 192.168.0.110 -U awoooi_user -d awoooi -c \"SELECT id, alertname, status, confidence, description, created_at FROM approval_records WHERE status='PENDING' AND DATE\\(created_at AT TIME ZONE 'Asia/Taipei'\\) = CURRENT_DATE AT TIME ZONE 'Asia/Taipei' ORDER BY created_at DESC LIMIT 10;\")",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].image}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].imagePullPolicy}{\"\\\\n\"}{.spec.template.metadata.labels}{\"\\\\n\"}')",
"Bash(kubectl kustomize *)",
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=60s)",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api --no-headers)",
"Bash(kubectl -n awoooi-prod patch deployment awoooi-api -p '{\"spec\":{\"template\":{\"spec\":{\"containers\":[{\"name\":\"api\",\"image\":\"192.168.0.110:5000/awoooi/api:cbd28e29a08435deb8c66af51654d8fa65120a14\"}]}}}}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].image}{\"\\\\n\"}')",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\"\\\\t\"}{.spec.containers[0].image}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get pdb awoooi-api-pdb -o jsonpath='{.spec.minAvailable}')",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o wide)",
"Bash(kubectl -n awoooi-prod describe rs -l app=awoooi-api)",
"Bash(kubectl -n awoooi-prod get events --sort-by='.lastTimestamp')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.replicas}{\"\\\\n\"}{.status.replicas}{\"\\\\n\"}{.status.readyReplicas}{\"\\\\n\"}{.status.updatedReplicas}{\"\\\\n\"}')",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api --sort-by=.metadata.creationTimestamp -o jsonpath='{range .items[*]}{.metadata.name}{\":\"}{.metadata.creationTimestamp}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.status.conditions[*]}')",
"Bash(kubectl -n awoooi-prod describe deployment awoooi-api)",
"Bash(kubectl -n awoooi-prod get rs -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\":\"}{.spec.template.spec.containers[0].image}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o yaml)",
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=180s)",
"Bash(kubectl -n awoooi-prod set image deployment/awoooi-api api=192.168.0.110:5000/awoooi/api:cbd28e29a08435deb8c66af51654d8fa65120a14 --record=false)",
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\"\\\\t\"}{.spec.containers[0].image}{\"\\\\t\"}{.status.phase}{\"\\\\n\"}{end}')",
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.status.replicas}{\"\\\\t\"}{.status.readyReplicas}{\"\\\\t\"}{.status.updatedReplicas}')",
"Bash(bash /tmp/diagnostic.sh)",
"WebFetch(domain:docs.github.com)",
"WebFetch(domain:docs.sonarsource.com)",
"WebFetch(domain:gitea.com)",
"WebFetch(domain:docs.gitea.com)",
"WebFetch(domain:www.sonarsource.com)",
"WebFetch(domain:golangci-lint.run)",
"WebFetch(domain:www.uber.com)",
"Bash(bash scripts/ops/deploy-alerts.sh --dry-run)",
"Bash(bash scripts/ops/deploy-alerts.sh)",
"Bash(promtool check *)",
"WebFetch(domain:openrouter.ai)",
"WebFetch(domain:qwenlm.github.io)",
"WebFetch(domain:aclanthology.org)",
"WebFetch(domain:datanorth.ai)",
"WebFetch(domain:www.infoq.com)",
"WebFetch(domain:aws.amazon.com)",
"WebFetch(domain:artificialanalysis.ai)",
"WebFetch(domain:www.alibabacloud.com)",
"WebFetch(domain:docs.langchain.com)",
"WebFetch(domain:arxiv.org)",
"WebFetch(domain:blog.kilo.ai)",
"WebFetch(domain:www.siliconflow.com)",
"WebFetch(domain:aicompetence.org)",
"Bash(redis-cli -h 192.168.0.188 -p 6380 ping)",
"Bash(redis-cli ping *)"
=======
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest apps/api/tests/test_aider_event_models.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py -v --collect-only)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py --collect-only)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.repositories.aider_event_repository import AiderEventRepository; print\\('import OK'\\)\")"
>>>>>>> Stashed changes
],
"deny": [
"Bash(rm -rf *)",
"Bash(git push --force *)",
"Bash(git reset --hard *)",
"Bash(kubectl delete *)",
"Bash(docker rm -f *)"
],
"additionalDirectories": [
"/Users/ogt/.claude/projects/-Users-ogt-awoooi/memory",
"/Users/ogt/awoooi/.claude/hooks",
"/Users/ogt/.claude/channels/telegram",
<<<<<<< Updated upstream
"/Users/ogt",
"/Users/ogt/.claude",
"/Users/ogt/awoooi/apps/web/src/app/[locale]/aiops"
]
},
"hooks": {
"PreToolUse": [
{
"matcher": "",
"hooks": [
{
"type": "command",
"command": "node $CLAUDE_PROJECT_DIR/.claude/hooks/awoooi-guard.js 2>/dev/null || true"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/branch-protection.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/commit-quality.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/large-file-warner.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/mcp-health.js"
}
]
}
],
"PostToolUse": [
{
"matcher": "",
"hooks": [
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/audit-log.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/suggest-compact.js"
}
]
}
],
"Stop": [
{
"matcher": "",
"hooks": [
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/cost-tracker.js"
},
{
"type": "command",
"command": "node /Users/ogt/.claude/hooks/session-summary.js"
}
]
}
=======
"/Users/ogt/aider-watch"
>>>>>>> Stashed changes
]
}
}

View File

@@ -0,0 +1,827 @@
{
"permissions": {
"allow": [
"Bash(pnpm install:*)",
"Bash(npm --version)",
"Bash(npm install:*)",
"Bash(pnpm --version)",
"Bash(pnpm dev:*)",
"Bash(pnpm add:*)",
"Bash(ls -la /Users/ogt/awoooi/apps/web/next.config.*)",
"Bash(pkill -f \"next dev\")",
"Bash(curl -sL http://localhost:3000/zh-TW)",
"Bash(curl -s http://localhost:3000/zh-TW)",
"Bash(pnpm --filter web build)",
"Bash(curl -s http://localhost:3001/zh-TW)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3000/zh-TW)",
"Bash(kubectl apply:*)",
"Bash(chmod +x /Users/ogt/awoooi/deploy-infra.sh)",
"Bash(./deploy-infra.sh)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"mkdir -p /tmp/awoooi-k8s\")",
"Bash(sshpass -p '0936223270' scp -o StrictHostKeyChecking=no /Users/ogt/awoooi/k8s/awoooi-prod/01-namespace-quota.yaml /Users/ogt/awoooi/k8s/awoooi-prod/02-network-policy.yaml /Users/ogt/awoooi/k8s/awoooi-prod/04-configmap.yaml wooo@192.168.0.120:/tmp/awoooi-k8s/)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"sudo kubectl apply -f /tmp/awoooi-k8s/01-namespace-quota.yaml\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/01-namespace-quota.yaml 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/02-network-policy.yaml 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/04-configmap.yaml 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get ns awoooi-prod -o wide 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get networkpolicy -n awoooi-prod 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get resourcequota,limitrange,configmap -n awoooi-prod 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"rm -rf /tmp/awoooi-k8s\")",
"Bash(PYTHONPATH=. python -c \"from src.main import app; print\\(''Import OK''\\)\")",
"Bash(curl -s http://localhost:8000/api/v1/health/ready)",
"Bash(curl -s http://localhost:8000/api/v1/health/live)",
"Bash(curl -s http://localhost:8000/)",
"Bash(pkill -f \"uvicorn src.main:app\")",
"Bash(pkill -f \"node.*next\")",
"Bash(curl -s http://localhost:8000/api/v1/health)",
"Read(//Users/ogt/awoooi/apps/api/**)",
"Bash(pnpm typecheck:*)",
"Read(//Users/ogt/awoooi/apps/web/**)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/dashboard/demo/spike/clear)",
"Read(//Users/ogt/awoooi/=== 驗證英文頁面 \\(/en/**)",
"Bash(jq \".devDependencies | keys | map\\(select\\(startswith\\(\"\"@playwright\"\"\\) or startswith\\(\"\"playwright\"\"\\)\\)\\)\")",
"Bash(npx playwright:*)",
"Bash(curl -s http://localhost:3000/zh-TW/demo -o /dev/null -w \"Frontend: HTTP %{http_code}\\\\n\")",
"Bash(__NEW_LINE_ef548029029cdfac__ echo:*)",
"Bash(curl -s http://localhost:8000/api/v1/health -o /dev/null -w \"Backend: HTTP %{http_code}\\\\n\")",
"Bash(echo '=== 已產出的截圖 ===' find /Users/ogt/awoooi/apps/web/test-results -name *.png)",
"Bash(echo '=== Playwright E2E 測試結果 ===' echo echo '📸 截圖證據 \\(test-results/screenshots/\\):' ls -la /Users/ogt/awoooi/apps/web/test-results/screenshots/ __NEW_LINE_db74e5f56e34db17__ echo echo '🎬 錄影證據 \\(.webm\\):' find /Users/ogt/awoooi/apps/web/test-results -name *.webm -exec ls -la {})",
"Bash(__NEW_LINE_db74e5f56e34db17__ echo:*)",
"Bash(source .venv/bin/activate)",
"Bash(python scripts/demo_multisig.py)",
"Bash(python -c \"from src.api.v1.approvals import router; print\\(''✅ Approvals router loaded:'', len\\(router.routes\\), ''routes''\\)\")",
"Bash(npx tsc:*)",
"Bash(chmod +x /Users/ogt/awoooi/scripts/demo-multisig-flow.sh)",
"Bash(python -c \"from src.main import app; print\\(''✅ API loads successfully''\\)\")",
"Bash(jq)",
"Bash(/Users/ogt/awoooi/scripts/demo-multisig-flow.sh)",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals\" -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s http://localhost:8000/api/v1/openapi.json)",
"Bash(python -c \":*)",
"Bash(curl -s http://localhost:3000 -o /dev/null -w \"%{http_code}\")",
"Bash(lsof -ti:3000,3001,8000)",
"Bash(curl -s http://localhost:8000/health)",
"Bash(curl -s http://localhost:8000/api/v1/approvals/pending)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3001/zh-TW/demo)",
"Bash(ls -la test-results/*.png)",
"Bash(cp test-results/cpo102-*.png /Users/ogt/awoooi/docs/screenshots/)",
"Bash(ssh ogt@192.168.0.120 'cat /etc/rancher/k3s/k3s.yaml')",
"Bash(python -c \"from src.main import app; print\\(''✅ main.py imports OK''\\)\")",
"Bash(curl -s http://localhost:8000/api/v1/approvals/k8s-test)",
"Bash(sqlite3 awoooi.db \".tables\")",
"Bash(sshpass -p 0936223270 ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'sudo cat /etc/rancher/k3s/k3s.yaml')",
"Bash(kubectl --kubeconfig=/Users/ogt/awoooi/apps/api/k3s-prod.yaml get deployments -n awoooi-prod)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get deployments -n awoooi-prod 2>/dev/null\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get deployments -A 2>/dev/null\")",
"Bash(curl -s -X POST http://localhost:8000/api/v1/approvals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(APPROVAL_ID=\"b58a0d86-fa4e-43ca-881c-02e978cd7943\")",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{:*)",
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT operation_type, target_resource, namespace, success, dry_run_passed, dry_run_message, error_message, execution_duration_ms, created_at FROM audit_logs ORDER BY created_at DESC LIMIT 1;\" -header -column)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get pods -n monitoring -l app=grafana 2>/dev/null\")",
"Bash(curl -s http://192.168.0.188:11434/api/tags)",
"Bash(python -c \"from src.main import app; print\\(''✅ Compile OK''\\)\")",
"Bash(curl -s http://localhost:8000/api/v1/ai/status)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}')",
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Output only JSON: {\\\\\"\"\"\"action\\\\\"\"\"\":\\\\\"\"\"\"test\\\\\"\"\"\"}\"\"\"\",\"\"\"\"stream\"\"\"\":false,\"\"\"\"format\"\"\"\":\"\"\"\"json\"\"\"\"}' --max-time 30)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}' --max-time 60)",
"Bash(PROMPT='你是 ClawBot AI。分析以下監控數據輸出純 JSON無其他文字。:*)",
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d \"{\"\"model\"\":\"\"llama3.2:1b\"\",\"\"prompt\"\":\"\"$PROMPT\"\",\"\"stream\"\":false,\"\"format\"\":\"\"json\"\",\"\"options\"\":{\"\"num_predict\"\":256,\"\"temperature\"\":0.1}}\" --max-time 60)",
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Harbor service returning 404. Output JSON: {\\\\\"\"\"\"suggested_action\\\\\"\"\"\":\\\\\"\"\"\"RESTART_DEPLOYMENT\\\\\"\"\"\",\\\\\"\"\"\"target_resource\\\\\"\"\"\":\\\\\"\"\"\"harbor\\\\\"\"\"\",\\\\\"\"\"\"namespace\\\\\"\"\"\":\\\\\"\"\"\"default\\\\\"\"\"\",\\\\\"\"\"\"risk_level\\\\\"\"\"\":\\\\\"\"\"\"medium\\\\\"\"\"\",\\\\\"\"\"\"reasoning\\\\\"\"\"\":\\\\\"\"\"\"Service down\\\\\"\"\"\",\\\\\"\"\"\"confidence\\\\\"\"\"\":0.8,\\\\\"\"\"\"affected_services\\\\\"\"\"\":[]}\"\"\"\",\"\"\"\"stream\"\"\"\":false,\"\"\"\"format\"\"\"\":\"\"\"\"json\"\"\"\",\"\"\"\"options\"\"\"\":{\"\"\"\"num_predict\"\"\"\":128,\"\"\"\"temperature\"\"\"\":0.1}}' --max-time 30)",
"Bash(curl -v -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Say hello\"\"\"\",\"\"\"\"stream\"\"\"\":false}' --max-time 30)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}' --max-time 120)",
"Bash(curl -s http://localhost:8000/api/v1/ai/analyze-and-propose -X POST -H \"Content-Type: application/json\")",
"Bash(curl -s http://localhost:8000/api/v1/dashboard)",
"Bash(ls -la ~/Downloads/image*.png)",
"Bash(ls -la ~/Desktop/image*.png)",
"Bash(ls -la /Users/ogt/awoooi/apps/web/public/*.png)",
"WebFetch(domain:openclaw.ai)",
"Bash(ls -la /Users/ogt/Downloads/*.png)",
"Bash(ls -la /Users/ogt/.gemini/antigravity/brain/*/image*.png)",
"Bash(ls -lat /Users/ogt/Downloads/*.png)",
"Bash(curl -s http://localhost:8000/api/v1/approvals)",
"Bash(curl -s -X GET http://localhost:8000/api/v1/approvals/)",
"Bash(APPROVAL_ID=\"4989729e-e518-4e7e-8dff-5c3269e0c82b\")",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\": \"\"\"\"ciso-001\"\"\"\", \"\"\"\"signer_name\"\"\"\": \"\"\"\"Demo CISO\"\"\"\", \"\"\"\"comment\"\"\"\": \"\"\"\"資安確認,核准執行\"\"\"\"}')",
"Bash(curl -s http://localhost:8000/api/v1/webhooks/health)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s http://localhost:3000)",
"Bash(ls -la apps/web/test-results/*.png)",
"Bash(curl -s http://localhost:3000/zh-TW/demo)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3333/zh-TW/demo)",
"Bash(curl -s http://localhost:8001/api/v1/approvals/pending)",
"Bash(curl -s -X POST http://localhost:8001/api/v1/approvals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s http://localhost:8001/openapi.json)",
"Bash(curl -s http://localhost:8001/docs)",
"Bash(curl -s http://localhost:8001/api/v1/webhooks/grafana -X OPTIONS)",
"Bash(pnpm run:*)",
"Bash(node scripts/screenshot-rbac.mjs)",
"Bash(pnpm exec:*)",
"Bash(curl -s http://localhost:3333 -o /dev/null -w \"%{http_code}\")",
"Bash(curl -s http://localhost:3333/zh-TW/demo -o /dev/null -w \"%{http_code}\")",
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Count: {d[count]}''''\\); [print\\(f''''- {a[id][:8]}... risk={a[risk_level]}''''\\) for a in d[''''approvals''''][:3]]\")",
"Bash(curl -s http://localhost:3000/zh-TW/demo -o /dev/null -w \"%{http_code}\")",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f'''' Connected: {d[\"\"success\"\"]}''''\\); print\\(f'''' Namespaces: {d[\"\"namespaces\"\"][:3]}...''''\\)\" __NEW_LINE_57ae1c1c812968e7__ echo \"\" echo \"3. 資料庫持久化:\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as approvals FROM approval_records;\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as timeline FROM timeline_events;\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as audits FROM audit_logs;\")",
"Bash(head -2 __NEW_LINE_9bf9481fbdf30d4e__ echo \"\" echo \"2. 告警收斂跳過 LLM 日誌 \\(應該有 4 次\\):\" grep -c \"alert_converged_skip_llm\" /tmp/api-server.log)",
"Bash(python -m json.tool)",
"Bash(__NEW_LINE_7463bff94cecc20f__ echo:*)",
"Bash(__NEW_LINE_13846c8488c5fa9a__ echo:*)",
"Bash(__NEW_LINE_13846c8488c5fa9a__ ls:*)",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f'''' Status: {d[\"\"status\"\"]}''''\\)\" __NEW_LINE_32366ca1bb050259__ echo \"\" echo \"2. 待簽核記錄 \\(含 hit_count\\):\" curl -s http://localhost:8000/api/v1/approvals/pending)",
"Read(//Users/ogt/awoooi/**)",
"Bash(curl -s http://localhost:8000/api/v1/timeline/events?limit=10)",
"Bash(curl -s http://localhost:8000/api/v1/timeline/events?limit=5)",
"Bash(ls -la /Users/ogt/awoooi/apps/api/*.txt /Users/ogt/awoooi/apps/api/*.toml)",
"Bash(ls -la /Users/ogt/awoooi/docker-compose*.yml)",
"Bash(ls /Users/ogt/awoooi/k8s/awoooi-prod/*rbac* /Users/ogt/awoooi/k8s/awoooi-prod/*service-account*)",
"Bash(kubectl kustomize:*)",
"Bash(docker compose:*)",
"Bash(docker info:*)",
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(''''API Status:'''', d.get\\(''''status'''', ''''unknown''''\\)\\)\")",
"Bash(pkill -9 -f uvicorn)",
"Bash(lsof -ti:8000)",
"Bash(open -a Docker)",
"Bash(docker stop:*)",
"Bash(lsof -ti:3000)",
"Bash(docker start:*)",
"Bash(docker ps:*)",
"Bash(curl -s http://localhost:3000 -o /dev/null -w 'HTTP Status: %{http_code}\\\\n')",
"Bash(curl -I http://localhost:8000/api/v1/dashboard/stream)",
"Bash(curl -s http://localhost:8000/openapi.json)",
"Bash(curl -s http://localhost:8000/api/v1/dashboard/stream --max-time 3 -w \"\\\\n--- HTTP Status: %{http_code} ---\\\\n\")",
"Bash(curl -s http://localhost:8000/api/v1/dashboard/stream --max-time 3)",
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"HTTP Status: %{http_code}\\\\n\")",
"Bash(curl -s -D - http://localhost:8000/api/v1/dashboard/stream --max-time 2)",
"Bash(chmod +x /Users/ogt/awoooi/scripts/deploy-infra.sh)",
"Bash(./scripts/deploy-infra.sh)",
"Bash(pnpm --filter @awoooi/web build)",
"Bash(timeout 10 env MOCK_MODE=true OTEL_ENABLED=false uvicorn src.main:app --host 0.0.0.0 --port 8099)",
"Bash(timeout 8 pnpm --filter @awoooi/web dev)",
"Bash(git diff:*)",
"Bash(curl -s -I http://localhost:8000/api/v1/dashboard/stream)",
"Bash(timeout 3 curl -s -N http://localhost:8000/api/v1/dashboard/stream)",
"Bash(grep -n \"NEXT_PUBLIC\\\\|API_URL\\\\|localhost\" /Users/ogt/awoooi/apps/web/.env*)",
"Bash(timeout 2 curl -s -D - -N http://localhost:8000/api/v1/dashboard/stream)",
"Bash(curl -s http://localhost:3000/)",
"Bash(python -m py_compile scripts/fire_test_alert.py)",
"Bash(python -m scripts.fire_test_alert --help)",
"Bash(python -m scripts.fire_test_alert)",
"Bash(python -m scripts.fire_test_alert --type k8s_pod_crash)",
"Bash(timeout 3 curl -s -N -H \"Origin: http://localhost:3000\" http://localhost:8000/api/v1/dashboard/stream)",
"Bash(python -m scripts.fire_test_alert --type disk_full)",
"Bash(docker restart:*)",
"Bash(curl -s -w \"\\\\nHTTP_CODE: %{http_code}\\\\n\" http://localhost:3000)",
"Bash(docker exec:*)",
"Bash(docker rmi:*)",
"Bash(timeout 5 curl -s -N http://localhost:8000/api/v1/dashboard/stream)",
"Bash(curl -s http://localhost:3000 -w \"\\\\nHTTP: %{http_code}\\\\n\")",
"Bash(timeout 120 docker logs awoooi-api -f --since 1s)",
"Bash(curl -s -I -H \"Origin: http://localhost:3000\" http://localhost:8000/api/v1/dashboard/stream)",
"Bash(curl -s -X OPTIONS -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\" http://localhost:8000/api/v1/dashboard/stream -I)",
"Bash(node /Users/ogt/awoooi/scripts/verify-sse.js)",
"Bash(python -m scripts.fire_test_alert --type db_connection_timeout)",
"Bash(npm run:*)",
"Bash(docker-compose down:*)",
"Bash(docker-compose build:*)",
"Bash(docker-compose up:*)",
"Bash(pkill -f 'next dev')",
"Bash(node /Users/ogt/awoooi/scripts/test-approval-flow.js)",
"Bash(python -m scripts.fire_test_alert --type pod_crash)",
"Bash(node /Users/ogt/awoooi/scripts/test-k8s-executor.js)",
"Bash(kubectl cluster-info:*)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl cluster-info)",
"Bash(ls -la /Users/ogt/awoooi/apps/web/src/app/[locale]/)",
"Bash(python -c \"from src.api.v1 import audit_logs; print\\(''API module loads OK''\\)\")",
"Bash(curl -s http://localhost:3000/zh-TW/action-logs)",
"Bash(pnpm build:*)",
"Bash(curl -s http://localhost:8000/api/v1/audit-logs)",
"Bash(xargs -r kill -9 2)",
"Bash(/dev/null source:*)",
"Bash(python -c \"from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor; print\\(''''httpx ok''''\\)\")",
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT * FROM audit_logs ORDER BY created_at DESC LIMIT 5;\")",
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT name FROM sqlite_master WHERE type=''table'';\")",
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT id, event_type, status, title, created_at FROM timeline_events ORDER BY created_at DESC LIMIT 5;\")",
"Bash(curl -s http://localhost:8000/api/v1/audit-logs/stats)",
"Bash(curl -s http://localhost:8000/api/v1/timeline?limit=10)",
"Bash(curl -s \"http://localhost:8000/api/v1/timeline\")",
"Bash(curl -s http://localhost:8000/api/v1/docs)",
"Bash(chmod +x /Users/ogt/awoooi/scripts/setup-guardrails.sh /Users/ogt/awoooi/scripts/ai_code_reviewer.py)",
"Bash(ls -la /Users/ogt/awoooi/apps/web/.eslintrc*)",
"Bash(ls -la scripts/*.py scripts/*.sh .pre-commit-config.yaml .secrets.baseline apps/web/.eslintrc.js)",
"Bash(python -m src.services.test_context_gatherer)",
"Bash(python -m pytest src/services/test_context_gatherer.py -v)",
"Bash(grep -r \"ClawBot\\\\|clawbot\\\\|CLAWBOT\" --include=*.py --include=*.ts --include=*.tsx apps/)",
"Bash(python scripts/e2e_openclaw_test.py)",
"Bash(python -m pytest tests/e2e_network_test.py -v --tb=short)",
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/apply_prometheus_config.sh /Users/ogt/awoooi/apps/api/scripts/fire_live_alert.py)",
"Bash(./scripts/apply_prometheus_config.sh)",
"Bash(python scripts/fire_live_alert.py oomkilled)",
"Bash(python scripts/fire_live_alert.py oomkilled --api-url http://localhost:8000)",
"Bash(python scripts/fire_live_alert.py highcpu --api-url http://localhost:8000)",
"Bash(python scripts/fire_live_alert.py podcrash --api-url http://localhost:8000)",
"Bash(python -m pytest tests/test_webhook_telegram_integration.py -v)",
"Bash(ls -la /Users/ogt/awoooi/apps/api/.env*)",
"Bash(ls -la /Users/ogt/wooo-aiops/.env*)",
"Bash(ls -la /Users/ogt/AIOps/.env*)",
"Bash(/Users/ogt/awoooi/apps/api/.env:*)",
"Bash(/tmp/deploy-188-home.sh:*)",
"Bash(chmod +x /tmp/deploy-188-home.sh)",
"Bash(scp /tmp/awoooi-api-deploy.tar.gz /tmp/deploy-188-home.sh ollama@192.168.0.188:/tmp/)",
"Bash(ssh ollama@192.168.0.188 \"bash /tmp/deploy-188-home.sh\")",
"Bash(ssh ollama@192.168.0.188 \"curl -s http://localhost:8000/api/v1/webhooks/health\")",
"Bash(ssh ollama@192.168.0.188 \"tail -50 /tmp/openclaw.log\")",
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && source .venv/bin/activate && pip install sqlalchemy aiosqlite -q && pip install httpx python-dotenv pydantic-settings -q\")",
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && pkill -f ''uvicorn src.main:app'' 2>/dev/null; sleep 1; source .venv/bin/activate && nohup uvicorn src.main:app --host 0.0.0.0 --port 8000 > /tmp/openclaw.log 2>&1 & sleep 3 && curl -s http://localhost:8000/api/v1/webhooks/health\")",
"Bash(ssh ollama@192.168.0.188:*)",
"Bash(pkill -f ngrok)",
"Bash(pkill -f \"ssh -fN.*8001\")",
"Bash(ssh -fN -L 8001:localhost:8000 ollama@192.168.0.188)",
"Bash(curl -s http://localhost:8001/api/v1/webhooks/health)",
"Bash(BOT_TOKEN=\"8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk\" curl -s \"https://api.telegram.org/bot$BOT_TOKEN/getWebhookInfo\")",
"Bash(curl -s https://api.telegram.org/bot$BOT_TOKEN/getWebhookInfo)",
"Bash(curl -s http://localhost:8001/api/v1/webhooks/)",
"Bash(curl -s http://localhost:8001/)",
"Bash(curl -s http://localhost:8001/api/v1/health)",
"Bash(scp /tmp/awoooi-api-v7.tar.gz ollama@192.168.0.188:/tmp/)",
"Bash(tar -czvf /tmp/awoooi-api-v7.1.tar.gz src/ requirements.txt pyproject.toml)",
"Bash(scp /tmp/awoooi-api-v7.1.tar.gz ollama@192.168.0.188:/tmp/)",
"Bash(ssh ollama@192.168.0.188 \"tail -10 /tmp/openclaw.log | grep -E ''''clickhouse|signoz_gold''''\")",
"Bash(ssh ogt@192.168.0.188 \"cd /home/ollama/awoooi-api && tail -50 nohup.out 2>/dev/null || journalctl -u awoooi-api --no-pager -n 50 2>/dev/null || echo ''請手動檢查日誌''\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8123/ -d \"SELECT 1 FORMAT JSONEachRow\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:11434/api/tags)",
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 ollama@192.168.0.188 \"echo ok\")",
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 wooo@192.168.0.188 \"echo ok\")",
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 root@192.168.0.188 \"echo ok\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8001/health)",
"Bash(ssh root@192.168.0.188 \"cat /tmp/openclaw.log 2>/dev/null | tail -100 || echo ''Log file not found''\")",
"Bash(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 ollama@192.168.0.188 \"echo ok\")",
"Bash(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 wooo@192.168.0.188 \"echo ok\")",
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/signoz_client.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/openclaw.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/telegram_gateway.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/api/v1/webhooks.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/api/v1/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/models/ai.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/models/)",
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && pkill -f ''''uvicorn src.main:app'''' && sleep 2 && nohup .venv/bin/python3 -m uvicorn src.main:app --host 0.0.0.0 --port 8000 > nohup.out 2>&1 &\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/health)",
"Bash(curl -s --connect-timeout 10 http://192.168.0.188:8000/health)",
"Bash(curl -s -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"api-gateway\"\",\"\"namespace\"\":\"\"awoooi-prod\"\",\"\"message\"\":\"\"CPU 92% test\"\"}')",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"api-gateway\"\",\"\"namespace\"\":\"\"awoooi-prod\"\",\"\"message\"\":\"\"CPU 92% - 統帥全自主驗收 v2\"\"}')",
"Bash(curl -s --connect-timeout 30 --max-time 120 -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s --connect-timeout 30 --max-time 180 -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"k8s_pod_crash\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"inventory-api\"\",\"\"namespace\"\":\"\"commerce\"\",\"\"message\"\":\"\"Pod crash - 統帥終極驗收\"\"}' --connect-timeout 30 --max-time 180)",
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.0.188 \"echo OK && ps aux | grep uvicorn | grep -v grep | head -2\")",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"ssl_expiry\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"nginx-ingress\"\",\"\"namespace\"\":\"\"ingress\"\",\"\"message\"\":\"\"SSL 即將過期 - 終極驗收\"\"}' --connect-timeout 30 --max-time 180)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"db_connection_timeout\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"postgres-primary\"\",\"\"namespace\"\":\"\"database\"\",\"\"message\"\":\"\"DB 連線逾時 - SignOz 整合終極測試\"\"}' --connect-timeout 30 --max-time 180)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"service_404\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"auth-service\"\",\"\"namespace\"\":\"\"identity\"\",\"\"message\"\":\"\"Service 404 - SignOz + Ollama 整合終極測試\"\"}' --connect-timeout 30 --max-time 180)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"warning\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"recommendation-engine\"\",\"\"namespace\"\":\"\"ml\"\",\"\"message\"\":\"\"CPU 78% - Ollama 最終測試\"\"}' --connect-timeout 30 --max-time 200)",
"Bash(scp apps/api/src/services/openclaw.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/openclaw.py)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/core/http_client.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/core/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/main.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/core/config.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/core/)",
"Bash(scp /Users/ogt/awoooi/apps/api/src/api/v1/health.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/api/v1/)",
"Bash(ssh -o ConnectTimeout=5 ollama@192.168.0.188 \"ps aux | grep uvicorn | grep -v grep\")",
"Bash(curl -s -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\" -X OPTIONS http://192.168.0.188:8000/api/v1/health -v)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/health)",
"Bash(curl -s -N --max-time 3 http://192.168.0.188:8000/api/v1/dashboard/stream)",
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"%{http_code}\")",
"Bash(open http://localhost:3000/zh-TW)",
"Bash(open http://localhost:3001/zh-TW)",
"Bash(curl -s -H \"Origin: http://localhost:3001\" http://192.168.0.188:8000/api/v1/dashboard/stream --max-time 3)",
"Bash(curl -s -I -H \"Origin: http://localhost:3001\" http://192.168.0.188:8000/api/v1/health)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals/pending)",
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals?status=pending_approval\")",
"Bash(xargs sed:*)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals/history?limit=5\")",
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals/approved)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline?limit=10\")",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/action-logs\")",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=10\")",
"Bash(ssh ogt@192.168.0.188 \"kubectl get nodes\")",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals/k8s-test\")",
"Bash(scp /Users/ogt/awoooi/apps/api/k3s-prod.yaml ogt@192.168.0.188:~/awoooi-api/k3s-prod.yaml)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=5\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"cat /etc/rancher/k3s/k3s.yaml\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.188 \"echo ''SSH OK'' && pwd\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''SSH OK'' && pwd && ls -la ~/awoooi-api/ 2>/dev/null || echo ''Directory not found''\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"sshpass -p ''0936223270'' scp -o StrictHostKeyChecking=no wooo@192.168.0.120:/etc/rancher/k3s/k3s.yaml ~/awoooi-api/k3s-prod.yaml && sed -i ''s/127.0.0.1/192.168.0.120/g'' ~/awoooi-api/k3s-prod.yaml && echo ''Kubeconfig deployed!'' && head -10 ~/awoooi-api/k3s-prod.yaml\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd ~/awoooi-api && pkill -f ''uvicorn'' 2>/dev/null; sleep 1; nohup .venv/bin/uvicorn src.main:app --host 0.0.0.0 --port 8000 --reload > nohup.out 2>&1 & sleep 3; echo ''=== API Restarted ==='' && tail -20 nohup.out\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd ~/awoooi-api && pkill -f ''uvicorn src.main'' || true\")",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/health\" --connect-timeout 5)",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ollama@192.168.0.188 \"cd ~/awoooi-api && source .venv/bin/activate && nohup uvicorn src.main:app --host 0.0.0.0 --port 8000 > nohup.out 2>&1 &\")",
"Bash(sshpass -p:*)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/health\" --connect-timeout 10)",
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=8\")",
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"Frontend: HTTP %{http_code}\\\\n\")",
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'curl -s http://localhost:8000/api/v1/approvals/pending | jq -r \"\".approvals[] | \\\\\"\"ID: \\\\\\(.id\\) | Action: \\\\\\(.action\\)\\\\\"\"\"\"')",
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.tw/api/v1/health)",
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.tw/api/v1/approvals/pending)",
"Bash(ssh ollama@192.168.70.188 \"ps aux | grep uvicorn | grep -v grep | head -3\")",
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.70.188 \"echo ''SSH Connected''\")",
"Bash(ping -c 2 -t 5 192.168.70.188)",
"Bash(curl -s --connect-timeout 10 https://awoooi.wooo.tw/api/v1/health)",
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.0.188 \"echo ''SSH Connected to 188 Base''\")",
"Bash(grep -B 5 -A 30 \"async def add_signature\" /Users/ogt/awoooi/apps/api/src/services/*.py)",
"Bash(ssh ogt@192.168.0.188 \"cd /home/ogt/awoooi && docker compose ps\")",
"Bash(ls -la .env*)",
"Bash(.env:*)",
"Bash(timeout 15 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
"Bash(timeout 20 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
"Bash(timeout 25 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
"Bash(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no ogt@192.168.0.188 \"cd /home/ogt/wooo-aiops && docker compose ps clawbot 2>/dev/null || docker ps | grep -i claw\")",
"Bash(ls -la ~/.ssh/*.pub)",
"Bash(ssh -i ~/.ssh/id_rsa -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o PasswordAuthentication=no ogt@192.168.0.188 \"echo connected\")",
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/logOut\")",
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/close\")",
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/getUpdates?timeout=3&limit=1\")",
"Bash(ping -c 1 192.168.0.188)",
"Bash(python -m tests.test_redis_multisig)",
"Bash(curl -v -X POST http://localhost:8000/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(python3 -c \":*)",
"Bash(echo ' 無法連線' __NEW_LINE_8fc87454f9798a7d__ echo echo [結論]: echo ' /signals 端點尚未部署到 .188' echo ' 程式碼已完成,需要執行:' echo \" cd apps/api && docker build -t awoooi-api . && docker-compose up -d\")",
"Bash(__NEW_LINE_dc88f37970737861__ cd:*)",
"Bash(__NEW_LINE_dc88f37970737861__ echo:*)",
"Read(//Users/**)",
"Bash(tail -20 __NEW_LINE_8b049957a9782734__ echo \"\" echo \"[Step 2] 等待容器啟動 \\(10 秒\\)...\" sleep 10 __NEW_LINE_8b049957a9782734__ echo \"\" echo \"[Step 3] 檢查容器狀態...\" docker compose ps)",
"Bash(tail -5 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.2] 重建 API 容器 \\(含 Signal Worker\\)...\" docker compose build api)",
"Bash(1 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.4] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.5] 檢查容器狀態...\" docker compose ps)",
"Bash(__NEW_LINE_f4c8301ec5249760__ echo:*)",
"Bash(__NEW_LINE_21ba3cf3700d942d__ cd:*)",
"Bash(1 __NEW_LINE_9a14b79fc58c11ba__ echo \"\" echo \"[1.3] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_9a14b79fc58c11ba__ echo \"\" echo \"[1.4] 檢查容器狀態...\" docker compose ps api)",
"Bash(1 __NEW_LINE_6b654ca5be87c137__ echo \"\" echo \"[2] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_6b654ca5be87c137__ echo \"\" echo \"[3] 發送測試 Signal...\" curl -s -X POST http://localhost:8000/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(__NEW_LINE_564908ddf866c081__ echo:*)",
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_phase63_aggregation.py)",
"Bash(python scripts/test_phase63_aggregation.py)",
"Bash(xargs -r docker exec -i awoooi-redis redis-cli DEL)",
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_race_condition.py)",
"Bash(python scripts/test_race_condition.py)",
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_phase64_proposal.py)",
"Bash(python scripts/test_phase64_proposal.py)",
"Bash(python agent.py --alert FINAL_PHASE_6_TEST)",
"Bash(AWOOOI_REDIS_URL=\"redis://localhost:6379/0\" python agent.py --alert FINAL_PHASE_6_TEST)",
"Bash(curl -s http://localhost:8000/api/v1/incidents)",
"Bash(curl -s -X POST http://localhost:8000/api/v1/incidents/INC-20260322-06085B/proposal)",
"Bash(grep -r \"mock\\\\|Mock\\\\|MOCK\\\\|fake\\\\|Fake\\\\|dummy\\\\|hardcode\" /Users/ogt/awoooi/apps/web/src --include=*.tsx --include=*.ts -l)",
"Bash(NEXT_PUBLIC_API_URL=http://localhost:8000 pnpm next build --no-lint)",
"Bash(grep -v \"Traceback\\\\|File \"\"/usr\\\\|^\\\\s*$\")",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Signal Count: {len\\(d[\"\"signals\"\"]\\)}''''\\); [print\\(f'''' - {s[\"\"alert_name\"\"]} \\({s[\"\"signal_id\"\"]}\\)''''\\) for s in d[''''signals'''']]\")",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3003/zh-TW)",
"Bash(curl -s -X GET \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3003\" -H \"Access-Control-Request-Method: GET\" -v)",
"Bash(grep -r TELEGRAM /Users/ogt/awoooi/apps/api/.env*)",
"Bash(grep -r TELEGRAM_BOT_TOKEN /Users/ogt/awoooi --include=*.env* --include=*.yaml --include=*.yml)",
"Bash(curl -s -I -X OPTIONS \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\")",
"Bash(curl -s \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\")",
"Bash(python /tmp/e2e_drill.py)",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); i=[x for x in d[''''incidents''''] if x[''''incident_id'''']==''''INC-20260322-06085B''''][0]; print\\(f\"\"Incident: {i[''''incident_id'''']}\"\"\\); print\\(f\"\"Signals: {i[''''signal_count'''']}\"\"\\); print\\(f\"\"Updated: {i[''''updated_at'''']}\"\"\\)\")",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test\")",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test-push\" -H \"Content-Type: application/json\" -d '{\"\"\"\"approval_id\"\"\"\": \"\"\"\"15ab6844-ca4e-4a13-aead-dc71cd342445\"\"\"\", \"\"\"\"risk_level\"\"\"\": \"\"\"\"critical\"\"\"\", \"\"\"\"resource_name\"\"\"\": \"\"\"\"api-gateway\"\"\"\", \"\"\"\"root_cause\"\"\"\": \"\"\"\"E2E DRILL - PodCrashLoopBackOff\"\"\"\", \"\"\"\"suggested_action\"\"\"\": \"\"\"\"RESTART_DEPLOYMENT\"\"\"\", \"\"\"\"estimated_downtime\"\"\"\": \"\"\"\"5-15 min\"\"\"\"}')",
"Bash(curl -s -o /dev/null -w \"HTTP Status: %{http_code}\\\\n\" http://localhost:3000/zh-TW)",
"Bash(curl -s -I \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\")",
"Bash(curl -s -X POST http://localhost:8000/api/v1/incidents/INC-20260322-19DF60/proposal)",
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test-push\" -H \"Content-Type: application/json\" -d '{\"\"\"\"approval_id\"\"\"\": \"\"\"\"942e762e-fb97-480f-b21a-d3be67fa70b1\"\"\"\", \"\"\"\"risk_level\"\"\"\": \"\"\"\"critical\"\"\"\", \"\"\"\"resource_name\"\"\"\": \"\"\"\"core-system\"\"\"\", \"\"\"\"root_cause\"\"\"\": \"\"\"\"E2E DRILL TAKE 2 - 二次實彈演習\"\"\"\", \"\"\"\"suggested_action\"\"\"\": \"\"\"\"INVESTIGATE_SERVICE\"\"\"\", \"\"\"\"estimated_downtime\"\"\"\": \"\"\"\"5-15 min\"\"\"\"}')",
"Bash(curl -s \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\" -H \"Accept: application/json\")",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Incidents: {d[\"\"count\"\"]}''''\\); [print\\(f'''' - {i[\"\"incident_id\"\"]} | {i[\"\"severity\"\"]} | {i[\"\"signal_count\"\"]} signals | {i[\"\"affected_services\"\"]}''''\\) for i in d[''''incidents'''']]\")",
"Bash(curl -s \"http://localhost:8000/api/v1/approvals/pending\" -H \"Origin: http://localhost:3000\")",
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Pending: {d[\"\"count\"\"]} approvals''''\\); [print\\(f'''' - {a[\"\"id\"\"][:8]}... | {a[\"\"risk_level\"\"]} | {a[\"\"action\"\"][:30]}...''''\\) for a in d[''''approvals''''][:3]]\")",
"Bash(mkdir -p /Users/ogt/awoooi/apps/web/public/fonts)",
"Bash(curl -sL -o DSEG7Classic-Bold.woff2 \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Bold.woff2\")",
"Bash(curl -sL -o DSEG7Classic-Bold.woff \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Bold.woff\")",
"Bash(curl -sL -o DSEG7Classic-Regular.woff2 \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Regular.woff2\")",
"Bash(curl -sL -o DSEG7Classic-Regular.woff \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Regular.woff\")",
"Bash(pnpm next:*)",
"Bash(chmod +x /Users/ogt/awoooi/scripts/bootstrap_prod.sh)",
"Bash(/Users/ogt/awoooi/.env:*)",
"Bash(grep -E \"^\\\\.env$|03-secrets\\\\.yaml\" .gitignore)",
"Bash(echo 'Adding to .gitignore...' if ! grep -q ^.env$ .gitignore)",
"Bash(then echo:*)",
"Bash(git add:*)",
"Bash(git commit:*)",
"Bash(git push:*)",
"Bash(git remote:*)",
"Bash(gh repo:*)",
"Bash(gh api:*)",
"Bash(gh run:*)",
"Bash(ls -la pnpm-*.yaml package.json turbo.json)",
"Bash(git status:*)",
"Bash(gh workflow:*)",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-77545758fc-xnncc -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-77545758fc-xnncc -n awoooi-prod 2>&1 | grep -i ''cors'' -A 5 -B 5\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-79948cbbbf-b8cgj -n awoooi-prod --tail=100\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -l app=awoooi-api --sort-by=.metadata.creationTimestamp -o name | tail -1 | xargs kubectl logs -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data.OPENCLAW_TG_USER_WHITELIST}'' | base64 -d\")",
"Bash(ssh wooo@192.168.0.120 'kubectl patch secret awoooi-secrets -n awoooi-prod --type='\"''\"'json'\"''\"' -p='\"''\"'[:*)",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-api -n awoooi-prod && kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-worker -n awoooi-prod && kubectl rollout status deployment/awoooi-worker -n awoooi-prod --timeout=120s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-747967b787-fcx2r -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.110 \"ps aux | grep -E ''actions-runner|Runner'' | grep -v grep\")",
"Bash(curl -sf http://192.168.0.120:32334/api/v1/health)",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-fd795cd87-rdpgn -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health | jq .status\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://localhost:32334/api/v1/health\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get svc -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://10.43.125.201:8000/api/v1/health\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://10.43.105.105:3000/ -o /dev/null && echo ''Web OK''\")",
"Bash(ssh ogt@192.168.0.188 \"ls -la /etc/nginx/sites-available/\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-795c95ff76-wch2p -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod && ss -tlnp | grep 32334\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://127.0.0.1:32334/api/v1/health | head -c 200\")",
"Bash(ssh wooo@192.168.0.120 \"sudo ufw status 2>/dev/null || sudo iptables -L INPUT -n | head -20\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health | head -c 100\")",
"Bash(ssh wooo@192.168.0.110 \"curl -v --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 | head -30\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/systemd/system/k3s.service 2>/dev/null | grep -i exec || ps aux | grep k3s | head -3\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/systemd/system/k3s.service\")",
"Bash(ssh wooo@192.168.0.120 \"netstat -tlnp 2>/dev/null | grep 32334 || ss -tlnp | grep 32334\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf --connect-timeout 5 http://192.168.0.120:31234/health 2>&1 | head -c 100\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-nginx-ingress -n awoooi-prod -o yaml\")",
"Bash(curl -sk https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -sk -I -X OPTIONS https://awoooi.wooo.work/api/v1/health -H \"Origin: https://awoooi.wooo.work\" -H \"Access-Control-Request-Method: GET\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sI --connect-timeout 3 http://127.0.0.1:32334/api/v1/health 2>&1 | head -5\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sI --connect-timeout 3 http://127.0.0.1:32335/ 2>&1 | head -5\")",
"Bash(ssh wooo@192.168.0.121 \"curl -sI --connect-timeout 3 http://127.0.0.1:32334/api/v1/health 2>&1 | head -5\")",
"Bash(ssh wooo@192.168.0.121 \"curl -sI --connect-timeout 3 http://127.0.0.1:32335/ 2>&1 | head -5\")",
"Bash(ssh wooo@192.168.0.120 \"sudo iptables -t nat -L KUBE-NODEPORTS -n 2>/dev/null | head -20\")",
"Bash(ssh wooo@192.168.0.120 \"sudo netstat -tlnp | grep -E ''32334|32335''\")",
"Bash(ssh wooo@192.168.0.120 \"ss -tlnp 2>/dev/null | grep -E ''32334|32335'' || netstat -tln | grep -E ''32334|32335''\")",
"Bash(ssh wooo@192.168.0.120 \"ss -tln | grep -E ''32334|32335|:323''\")",
"Bash(ssh wooo@192.168.0.120 \"ss -tln\")",
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120; /home/wooo/bin/kubectl get svc -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"which kubectl || find /usr -name kubectl 2>/dev/null | head -1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get svc -n awoooi-prod && kubectl get pods -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 80\")",
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 80 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"ls -la /home/wooo/.kube/ && cat /home/wooo/.kube/config-120 2>/dev/null | head -20 || cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | head -20\")",
"Bash(ssh wooo@192.168.0.120 \"sudo cat /etc/rancher/k3s/k3s.yaml | head -20\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 100 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"which kubectl 2>/dev/null || find /home/wooo -name kubectl 2>/dev/null | head -1 || ls -la /home/wooo/bin/\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 100 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl describe pod awoooi-api-546b88465d-lb8zm -n awoooi-prod | tail -40\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get svc -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec -n awoooi-prod deploy/awoooi-api -- curl -sf http://localhost:8000/api/v1/health 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec -n awoooi-prod deploy/awoooi-api -- wget -qO- http://localhost:8000/api/v1/health 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 20 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''FAILED to connect to 120:32334''\")",
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.121:32334/api/v1/health 2>&1 || echo ''FAILED to connect to 121:32334''\")",
"Bash(ssh wooo@192.168.0.110 \"ssh wooo@192.168.0.120 ''cat /etc/rancher/k3s/k3s.yaml 2>/dev/null || echo No k3s.yaml''\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get pods -n awoooi-prod -o wide | grep Running\")",
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.120 \"ufw status 2>/dev/null || firewall-cmd --state 2>/dev/null || echo ''No firewall command found''\")",
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.121 \"ufw status 2>/dev/null || firewall-cmd --state 2>/dev/null || echo ''No firewall command found''\")",
"Bash(pip3 show:*)",
"Bash(docker build:*)",
"Bash(docker version:*)",
"Bash(docker run:*)",
"Bash(curl -vI -H \"Origin: https://awoooi.wooo.work\" http://localhost:8889/api/v1/health)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get endpoints awoooi-api-svc -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get pods -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"sudo -n ufw status 2>/dev/null || sudo -n iptables -L INPUT -n 2>/dev/null | head -20 || echo ''Need sudo for firewall check''\")",
"Bash(ssh wooo@192.168.0.120 \"ss -tln | grep -E ''32334|32335|:323'' || echo ''No NodePort listeners found''\")",
"Bash(ssh wooo@192.168.0.121 \"ss -tln | grep -E ''32334|32335|:323'' || echo ''No NodePort listeners found''\")",
"Bash(ssh wooo@192.168.0.120 \"ps aux | grep -E ''kube-proxy|k3s'' | grep -v grep | head -5\")",
"Bash(ssh wooo@192.168.0.120 \"cat /proc/sys/net/ipv4/ip_forward\")",
"Bash(ssh wooo@192.168.0.120 \"systemctl status k3s 2>/dev/null | head -15 || ps aux | grep ''k3s server'' | grep -v grep\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf --connect-timeout 5 http://127.0.0.1:32334/api/v1/health 2>&1 || echo ''LOCALHOST NodePort FAILED''\")",
"Bash(ssh wooo@192.168.0.120 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''EXTERNAL IP NodePort FAILED''\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/iptables/rules.v4 2>/dev/null || iptables-save 2>/dev/null | grep -E ''DROP|REJECT|32334|32335'' | head -10 || echo ''Cannot read iptables without sudo''\")",
"Bash(ssh wooo@192.168.0.121 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''Worker->Master NodePort FAILED''\")",
"Bash(ssh wooo@192.168.0.120 \"cat /etc/rancher/k3s/config.yaml 2>/dev/null || ls -la /etc/rancher/k3s/ 2>/dev/null || echo ''No K3s config found''\")",
"Bash(ssh wooo@192.168.0.120 \"netstat -an 2>/dev/null | grep 32334 || ss -an | grep 32334 || echo ''No socket found for 32334''\")",
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -L INPUT -n 2>&1 | head -20\")",
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -t nat -L KUBE-NODEPORTS -n 2>&1 | head -20\")",
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -L KUBE-ROUTER-INPUT -n 2>&1 | head -30\")",
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -t nat -L KUBE-NODEPORTS -n 2>&1 | grep -i awoooi || echo ''NO AWOOOI RULES FOUND''\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get svc awoooi-api-svc -n awoooi-prod -o yaml | grep -A5 ''spec:''\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get networkpolicy -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl apply -f - 2>&1\")",
"Bash(curl -sf --connect-timeout 10 https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -skf --connect-timeout 10 https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -sI https://awoooi.wooo.work/)",
"Bash(curl -skI https://awoooi.wooo.work/)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 50 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl rollout restart deployment/awoooi-api -n awoooi-prod && /home/wooo/kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s\")",
"Bash(curl -sf https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -skf https://awoooi.wooo.work/api/v1/health)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 40 2>&1\")",
"Bash(for i:*)",
"Bash(do curl:*)",
"Bash(echo \"Request $i sent\")",
"Bash(done)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 100 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 30 2>&1\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get configmap awoooi-config -n awoooi-prod -o yaml | grep OTEL\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec deployment/awoooi-api -n awoooi-prod -- env | grep OTEL\")",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec deployment/awoooi-api -n awoooi-prod -- python -c \"\"import socket; s=socket.socket\\(\\); s.settimeout\\(5\\); s.connect\\(\\(''192.168.0.188'', 24317\\)\\); print\\(''✅ Connection to 24317 OK''\\); s.close\\(\\)\"\" 2>&1\")",
"Bash(curl -vI https://awoooi.wooo.work)",
"Bash(curl -vI https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -sf -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{\"\"source\"\": \"\"prometheus\"\", \"\"severity\"\": \"\"P1\"\", \"\"message\"\": \"\"Test alert from CLI\"\"}')",
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''''{.data.WEBHOOK_HMAC_SECRET}'''' 2>/dev/null\")",
"Bash(timeout 15 curl -N -s https://awoooi.wooo.work/api/v1/dashboard/stream)",
"Bash(bash:*)",
"Bash(curl -s https://awoooi.wooo.work/api/v1/metrics/gold)",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT DISTINCT metric_name FROM signoz_metrics.distributed_samples_v4 WHERE unix_milli > \\(toUnixTimestamp\\(now\\(\\)\\) - 1800\\) * 1000 LIMIT 20 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) as trace_count FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE FORMAT TabSeparated\")",
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 /home/wooo/bin/kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''{.data}'' | python3 -m json.tool 2>/dev/null | head -30\")",
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 /home/wooo/bin/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 50 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"which kubectl || ls -la ~/bin/kubectl 2>/dev/null || ls -la /usr/local/bin/kubectl 2>/dev/null || echo ''kubectl not found''\")",
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''{.data}'' 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"ls -la ~/.kube/ 2>/dev/null; cat ~/.kube/config 2>/dev/null | head -20 || echo ''checking k3s default...''; sudo cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | head -5 || echo ''no k3s config''\")",
"Bash(ssh wooo@192.168.0.120 \"sudo k3s kubectl get configmap awoooi-config -n awoooi-prod -o yaml 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"sudo k3s kubectl logs deployment/awoooi-api -n awoooi-prod --tail 100 2>&1\")",
"Bash(nc -zv 192.168.0.188 24317)",
"Bash(curl -s http://192.168.0.188:24318/v1/traces -X POST -H \"Content-Type: application/json\" -d '{}')",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT DISTINCT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 24 HOUR GROUP BY serviceName ORDER BY cnt DESC LIMIT 20 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_traces.distributed_signoz_index_v2 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 10 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT service_name, count\\(\\) as cnt FROM signoz_logs.distributed_logs WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE GROUP BY service_name ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SHOW TABLES FROM signoz_logs FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) as total FROM signoz_logs.distributed_logs_v2 WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT JSONExtractString\\(resources_string, ''service.name''\\) as svc, count\\(\\) as cnt FROM signoz_logs.distributed_logs_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE GROUP BY svc ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_logs.distributed_logs_v2 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT resources_string[''service.name''] as svc, count\\(\\) as cnt FROM signoz_logs.distributed_logs_v2 WHERE timestamp > \\(toUnixTimestamp64Nano\\(now64\\(\\)\\) - 300000000000\\) GROUP BY svc ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT body, resources_string FROM signoz_logs.distributed_logs_v2 WHERE timestamp > \\(toUnixTimestamp64Nano\\(now64\\(\\)\\) - 60000000000\\) LIMIT 1 FORMAT JSONEachRow\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 2 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, name, timestamp FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE ORDER BY timestamp DESC LIMIT 5 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, name, formatDateTime\\(timestamp, ''%Y-%m-%d %H:%M:%S''\\) as ts FROM signoz_traces.distributed_signoz_index_v2 ORDER BY timestamp DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.distributed_signoz_index_v2 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.distributed_signoz_spans FORMAT TabSeparated\")",
"Bash(ssh wooo@192.168.0.188 \"docker ps | grep -E ''otel|signoz''\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT metric_name, sum\\(value\\) as total FROM signoz_metrics.distributed_samples_v4 WHERE metric_name LIKE ''otelcol%span%'' AND unix_milli > \\(toUnixTimestamp\\(now\\(\\)\\) - 300\\) * 1000 GROUP BY metric_name FORMAT TabSeparated\")",
"Bash(for t:*)",
"Bash(do)",
"Bash(echo -n \"$t: \")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.$t FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp > now\\(\\) - INTERVAL 10 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \":*)",
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_traces.distributed_signoz_index_v3 FORMAT TabSeparated\")",
"Bash(AWOOOI_API_URL=https://awoooi.wooo.work WEBHOOK_HMAC_SECRET=\"CHANGE_ME_TO_RANDOM_64_CHARS\" python scripts/fire_live_alert.py oomkilled)",
"Bash(timeout 10 curl -sN https://awoooi.wooo.work/api/v1/dashboard/stream)",
"Bash(curl -s https://awoooi.wooo.work/api/v1/dashboard)",
"Bash(npm list:*)",
"Bash(node scripts/verify-frontend.js)",
"Bash(node /Users/ogt/awoooi/scripts/verify-frontend.js)",
"Bash(python -c \"from src.services.proposal_service import ProposalService; print\\(''''✅ ProposalService OK''''\\)\")",
"Bash(python -c \"from src.services.openclaw import OpenClawService; print\\(''''✅ OpenClawService OK''''\\)\")",
"Bash(curl -s http://192.168.0.120:32334/api/v1/incidents)",
"Bash(jq -r \".incidents[:2] | .[] | \"\"\\\\\\(.incident_id\\) - \\\\\\(.status\\) - \\\\\\(.severity\\)\"\"\")",
"Bash(curl -s -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
"Bash(kubectl logs:*)",
"Bash(ssh ogt@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail 30\")",
"Bash(curl -sv -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
"Bash(curl -s http://192.168.0.120:32334/api/v1/health)",
"Bash(curl -s \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152\")",
"Bash(curl -sv \"http://192.168.0.120:32334/api/v1/incidents\")",
"Bash(curl -s --retry 3 --retry-delay 2 \"http://192.168.0.120:32334/api/v1/health\")",
"Bash(curl -s --retry 3 --retry-delay 2 http://192.168.0.120:32334/api/v1/health)",
"Bash(do echo:*)",
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152/proposal\" -H \"Content-Type: application/json\")",
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-D6C6A0/proposal\" -H \"Content-Type: application/json\")",
"Bash(curl -s http://192.168.0.120:32334/api/v1/approvals/pending)",
"Bash(kubectl get:*)",
"Bash(curl -s -w \"\\\\nHTTP_CODE: %{http_code}\\\\n\" http://192.168.0.120:32334/api/v1/health)",
"Bash(curl -s http://awoooi.wooo.work/api/v1/health)",
"Bash(curl -s http://awoooi.wooo.work/api/v1/approvals/pending)",
"Bash(curl -sL https://awoooi.wooo.work/api/v1/approvals/pending -k)",
"Bash(ssh root@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
"Bash(ssh root@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-api --tail=30\")",
"Bash(curl -sL https://awoooi.wooo.work/api/v1/timeline -k)",
"Bash(curl -sL https://awoooi.wooo.work/api/v1/incidents -k)",
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/approvals?include_history=true\" -k)",
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152\" -k)",
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/audit-logs?limit=10\" -k)",
"Bash(curl -sL https://awoooi.wooo.work/api/v1/audit-logs?limit=10 -k)",
"Bash(ssh ogt@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-api --tail=100\")",
"Bash(ssh ogt@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-web --tail=50\")",
"Bash(ssh ogt@192.168.0.188 \"kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml logs -n awoooi-prod -l app=awoooi-api --tail=100 2>/dev/null || docker logs awoooi-api --tail=100 2>/dev/null\")",
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/approvals/pending\" -k -w \"\\\\n\\\\nHTTP: %{http_code}\\\\nTime: %{time_total}s\\\\n\")",
"Bash(curl -sL -X POST https://awoooi.wooo.work/api/v1/approvals/182e07c1-118a-49d7-b71c-7d33c5484d9b/sign -H 'Content-Type: application/json' -d '{\"\"\"\"signer_id\"\"\"\": \"\"\"\"test-debug\"\"\"\", \"\"\"\"signer_name\"\"\"\": \"\"\"\"Debug Test\"\"\"\", \"\"\"\"comment\"\"\"\": \"\"\"\"Testing\"\"\"\"}' -k)",
"Bash(curl -s https://wwooo.aiops.tw/api/v1/health)",
"Bash(curl -s https://wwooo.aiops.tw/api/v1/incidents?limit=5)",
"Bash(curl -s https://wwooo.aiops.tw/api/v1/approvals/pending)",
"Bash(curl -v -s \"https://wwooo.aiops.tw/api/v1/health\")",
"Bash(curl -s \"https://wwooo.aiops.tw/\")",
"Bash(curl -s --connect-timeout 5 \"http://192.168.0.120:32334/api/v1/health\")",
"Bash(curl -s --connect-timeout 5 \"http://192.168.0.120:32334/api/v1/incidents?limit=5\")",
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-867f67f55d-kvdl2 -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep -E ''NAME|worker''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep worker\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-5bdc5699bb-kcv9q -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod --show-labels | grep worker\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-required-egress -n awoooi-prod -o yaml\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=''json'' -p=''[{\"\"op\"\": \"\"replace\"\", \"\"path\"\": \"\"/spec/podSelector/matchLabels\"\", \"\"value\"\": {\"\"system\"\": \"\"awoooi\"\"}}]''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-worker -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-5bdc5699bb-kcv9q -n awoooi-prod --tail=15\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=40\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -E ''signal_worker|redis_pool|INFO'' | tail -10\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health\")",
"Bash(ssh wooo@192.168.0.120 'curl -s -X POST \"\"http://localhost:32334/api/v1/webhooks/signals\"\" -H \"\"Content-Type: application/json\"\" -d \"\"{:*)",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep -E ''NAME|worker|api''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod && echo ''==='' && kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/incidents?limit=5\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/approvals/pending\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | head -50\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health | jq ''.components''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret -n awoooi-prod -o name\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data.WEBHOOK_HMAC_SECRET}'' | base64 -d\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=20 2>&1 | grep -E ''signal|incident|telegram|INFO''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=5''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -iE ''telegram|notification|send'' | tail -10\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/approvals/pending''\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=2'' && echo ''---'' && curl -s ''http://localhost:32334/api/v1/approvals/pending''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep worker && echo ''---'' && kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-xjdwr -n awoooi-prod --tail=40\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-required-egress -n awoooi-prod -o jsonpath=''{.spec.podSelector}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=''json'' -p=''[{\"\"op\"\": \"\"replace\"\", \"\"path\"\": \"\"/spec/podSelector\"\", \"\"value\"\": {\"\"matchLabels\"\": {\"\"system\"\": \"\"awoooi\"\"}}}]''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl delete pod awoooi-worker-6b8cc94d9c-xjdwr -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-pmzj7 -n awoooi-prod --tail=30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-pmzj7 -n awoooi-prod --tail=20\")",
"Bash(ls -la /Users/ogt/awoooi/apps/api/scripts/fire*.py)",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=3''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -iE ''proposal|approval|llm|ai|ollama|generate'' | tail -20\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deployment awoooi-worker -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].envFrom}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deployment awoooi-api -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].envFrom}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''''{.data}''''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data}'' | tr '','' ''\\\\n''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl exec deployment/awoooi-api -n awoooi-prod -- python -c ''import os; print\\(os.getenv\\(\"\"DATABASE_URL\"\", \"\"NOT SET\"\"\\)[:50]\\)''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-75ffbfb88b-2htfh -n awoooi-prod --tail=50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- env | grep DATABASE\")",
"Bash(ssh wooo@192.168.0.120 \"PGPASSWORD=''CHANGE_ME'' psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c ''SELECT 1'' 2>&1 || echo ''Connection failed''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod\")",
"Bash(curl -sv http://192.168.0.120:32334/api/v1/health)",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-75ffbfb88b-2htfh -n awoooi-prod --tail=20 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-7fb7d5b55f-n48gk -n awoooi-prod --tail=20 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get rs -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl scale rs awoooi-api-75ffbfb88b -n awoooi-prod --replicas=0\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl scale rs awoooi-worker-7fb7d5b55f -n awoooi-prod --replicas=0\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=10\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy -n awoooi-prod -o wide\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy awoooi-api -n awoooi-prod -o jsonpath=''{.spec.replicas}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy awoooi-worker -n awoooi-prod -o jsonpath=''{.spec.replicas}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=5s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout history deployment/awoooi-api -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-api -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-worker -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=30s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get rs awoooi-api-6687db5564 -n awoooi-prod -o jsonpath=''{.metadata.annotations.deployment\\\\.kubernetes\\\\.io/revision}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl delete pod awoooi-api-7f487f7cbb-5f88g -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-api -n awoooi-prod --to-revision=46\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=15\")",
"Bash(curl -s http://192.168.0.120:32334/api/v1/incidents?limit=3)",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --since=2m\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --since=2m | grep -i webhook\")",
"Bash(curl -sv -X POST http://192.168.0.120:32334/api/v1/webhooks/alertmanager -H \"Content-Type: application/json\" -d '{:*)",
"Bash(ssh wooo@192.168.0.120 \"kubectl get endpoints -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health | jq ''{status}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --since=30s\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-fc4744758-7wfv5 -n awoooi-prod --tail=30 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6fc548887b-b9mtf -n awoooi-prod --tail=30 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get configmap awoooi-config -n awoooi-prod -o yaml\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''''{.data}''''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pod awoooi-worker-6fc548887b-b9mtf -n awoooi-prod -o jsonpath=''{.metadata.labels}''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod -o yaml\")",
"Bash(ssh wooo@192.168.0.120 'kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=json -p=\"\"[{\\\\\"\"op\\\\\"\": \\\\\"\"replace\\\\\"\", \\\\\"\"path\\\\\"\": \\\\\"\"/spec/podSelector/matchLabels\\\\\"\", \\\\\"\"value\\\\\"\": {\\\\\"\"system\\\\\"\": \\\\\"\"awoooi\\\\\"\"}}]\"\"')",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-api deployment/awoooi-worker -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-6c69b77894-d6jqq -n awoooi-prod --tail=20\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl run nc-test --rm -it --restart=Never --image=busybox -- nc -zv 192.168.0.188 5432\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o=custom-columns=''NAME:.metadata.name,IMAGE:.spec.containers[0].image''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- ls -la *.db 2>/dev/null || echo ''No SQLite files''\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- env | grep -E ''MOCK|DATABASE|SQLITE''\")",
"Bash(curl -s \"http://192.168.0.120:32334/api/v1/approvals\")",
"Bash(python -m py_compile src/lewooogo_brain/engines/incident_engine.py src/lewooogo_brain/engines/proposal_engine.py src/lewooogo_brain/skills/loader.py)",
"Bash(python packages/lewooogo-brain/tests/test_skill_loader.py)",
"Bash(python packages/lewooogo-brain/tests/test_incident_engine.py)",
"Bash(python packages/lewooogo-brain/tests/test_guardrails.py)",
"Bash(python -m py_compile src/lewooogo_brain/engines/proposal_engine.py src/lewooogo_brain/engines/incident_engine.py src/lewooogo_brain/skills/loader.py)",
"Bash(PYTHONPATH=/Users/ogt/awoooi/packages/lewooogo-brain/src python -c \":*)",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/api/v1/health)",
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/approvals/pending\")",
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/approvals?status=pending\")",
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/incidents\")",
"Bash(uv sync:*)",
"Bash(python -c \"from src.routers.proposals import router; print\\(''✅ Router 語法驗證通過''\\)\")",
"Bash(curl -s -X GET \"https://awoooi.wooo.work/api/v1/health\" --connect-timeout 10)",
"Bash(curl -s -X GET \"https://awoooi.wooo.work/api/v1/incidents\" --connect-timeout 10)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" \"https://awoooi.wooo.work\" --connect-timeout 10)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" -L \"https://awoooi.wooo.work\" --connect-timeout 10)",
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/test-123/propose\" -H \"Content-Type: application/json\" -d '{\"\"require_dry_run\"\": true}' --connect-timeout 10)",
"Bash(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no ollama@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs awoooi-api-64c8659cff-grslz -n awoooi-prod --tail=50)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data.DATABASE_URL}')",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-api -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -l app=awoooi-api)",
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/health\" --connect-timeout 10)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" -L \"https://awoooi.wooo.work/zh-TW\" --connect-timeout 10)",
"Bash(python -c \"from src.routers.proposals import router; print\\(''✅ Router import successful''\\)\")",
"Bash(PGPASSWORD=postgres psql -h 192.168.0.188 -U awoooi -d awoooi_dev -c \"SELECT incident_id, status, severity FROM incidents LIMIT 5;\")",
"Bash(PGPASSWORD=AwoooiProd2026 psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT incident_id, status, severity FROM incidents LIMIT 5;\")",
"Bash(curl -sf http://192.168.0.120:32334/api/v1/incidents)",
"Bash(curl -v \"http://192.168.0.120:32334/api/v1/incidents\")",
"Bash(export KUBECONFIG=/Users/ogt/.kube/config-120)",
"Bash(curl -sI \"http://awoooi.wooo.work/\")",
"Bash(openssl s_client -servername awoooi.wooo.work -connect awoooi.wooo.work:443)",
"Bash(openssl x509:*)",
"Bash(curl -s -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260323-7DE10B/propose\" -H \"Content-Type: application/json\" -d '{\"\"\"\"require_dry_run\"\"\"\": true}')",
"Bash(python -c \"from src.services.executor import execute_approved_proposal, get_executor, ActionExecutor; print\\(''✅ Import successful''\\)\")",
"Bash(curl -s https://awoooi.woooo.cc/api/v1/incidents)",
"Bash(curl -s https://awoooi.woooo.cc/api/v1/health)",
"Bash(curl -s --connect-timeout 10 https://awoooi.woooo.cc/api/v1/health)",
"Bash(ssh ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi 2>/dev/null\")",
"Bash(curl -s --connect-timeout 5 http://192.168.70.200:8000/api/v1/health)",
"Bash(ssh ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi-prod\")",
"Bash(ssh -o StrictHostKeyChecking=no ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi-prod\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -A)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-7479556d76-jbbps --tail 30)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-api --tail 20)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- curl -s http://localhost:8000/api/v1/incidents)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- python -c \"import httpx; r = httpx.get\\(''http://localhost:8000/api/v1/incidents''\\); print\\(r.text\\)\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get ingress -n awoooi-prod -o wide)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-worker -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].env}')",
"Bash(curl -s --connect-timeout 5 http://192.168.70.202:32334/api/v1/health)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl describe deployment awoooi-worker -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl describe deployment awoooi-api -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap awoooi-config -n awoooi-prod -o yaml)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secrets -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data}')",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data.REDIS_URL}')",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-worker -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -l app=awoooi-worker)",
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.work/api/v1/health)",
"Bash(curl -s https://awoooi.wooo.work/api/v1/incidents)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-worker --tail 10)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n wooo-aiops-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -A)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-76bdf9786d-rvtmz --tail 15)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- python -c \"import os; print\\(os.getenv\\(''REDIS_URL'', ''NOT_SET''\\)\\)\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-api -n awoooi-prod -o yaml)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-api deployment/awoooi-worker -n awoooi-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-api-865cdc97db-6mpzz --tail 20)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n wooo-aiops-prod -l app=redis)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n wooo-aiops-prod)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n wooo-aiops-prod redis-6c6fcd64b8-8wznx -- redis-cli ping)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod awoooi-api-6445c76797-mrl7p -- python -c \"import redis; r=redis.Redis\\(host=''10.43.239.47'', port=6379, db=10\\); print\\(r.ping\\(\\)\\)\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy -A)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy allow-required-egress -n awoooi-prod -o yaml)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type='json' -p='[{\"\"op\"\": \"\"add\"\", \"\"path\"\": \"\"/spec/egress/0/ports/-\"\", \"\"value\"\": {\"\"port\"\": 6379, \"\"protocol\"\": \"\"TCP\"\"}}]')",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-api-5fcc484b85-qpwt6 --tail 15)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod awoooi-api-6445c76797-mrl7p -- python -c \"import os; print\\(''REDIS_URL:'', os.getenv\\(''REDIS_URL''\\)\\); import redis; r=redis.Redis.from_url\\(os.getenv\\(''REDIS_URL''\\)\\); print\\(''PING:'', r.ping\\(\\)\\)\")",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-59d7588d75-p5tht --tail 20)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-worker --tail 30)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-worker -n awoooi-prod -o yaml)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy -n awoooi-prod -o wide)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl apply -f -)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-6cd7dcbc9-5mtfq --tail 15)",
"Bash(jq .incidents[0])",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath='{.data.OPENCLAW_URL}')",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8088/health)",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8088/)",
"Bash(nc -zv 192.168.0.188 8088 -w 5)",
"Bash(ping -c 2 192.168.0.188)",
"Bash(ping -c 2 192.168.70.202)",
"Bash(grep -n \"mapToDualState\" /Users/ogt/awoooi/apps/web/src/app/[locale]/page.tsx -A 30)",
"Bash(head -40 /Users/ogt/awoooi/apps/web/src/app/[locale]/page.tsx)",
"Bash(ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a | grep -i claw; docker start openclaw 2>/dev/null || docker start clawbot 2>/dev/null || echo ''Container not found, listing all:'' && docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}'' | head -10\")",
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8089/health)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout status deployment/awoooi-web -n awoooi-prod --timeout=60s)",
"Bash(grep -rn \"clawbot\\\\|ClawBot\" /Users/ogt/awoooi/ --include=*.yaml --include=*.yml --include=*.json)",
"Bash(grep -rn \"ClawBot\\\\|clawbot\" /Users/ogt/awoooi/apps/ --include=*.py --include=*.ts --include=*.tsx)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs deployment/awoooi-api -n awoooi-prod --tail=100)",
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200)",
"Bash(export KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml)",
"Bash(ssh root@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|exception|execute|background|parse'' | tail -40\")",
"Bash(curl -s https://awoooi.wooo.work/api/v1/approvals)",
"Bash(ssh k3s@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse'' | tail -40\")",
"Bash(ssh ubuntu@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse'' | tail -40\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse|skip'' | tail -50\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=500 2>&1 | grep -iE ''background_execution|approve_action|reject|k8s_executor'' | tail -30\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy,sts -n awoooi-prod\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s 2>&1\")",
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50 2>&1 | grep -iE ''background_execution|k8s_executor|parse'' | tail -10\")"
],
"additionalDirectories": [
"/Users/ogt/awoooi/docs",
"/Users/ogt/.claude/projects/-Users-ogt-awoooi/memory",
"/Users/ogt/awoooi/apps/web/src/app",
"/Users/ogt/awoooi/apps/api",
"/Users/ogt/awoooi/apps/api/http:/localhost:8000/api/v1",
"/Users/ogt/awoooi/apps/web/public",
"/Users/ogt/Downloads",
"/Users/ogt/awoooi/apps/web/test-results",
"/Users/ogt/awoooi",
"/Users/ogt/awoooi/apps/web/src/app/[locale]",
"/tmp"
]
}
}

View File

@@ -19,18 +19,10 @@
# 文件與腳本(不需要進 image
# 注意: docs/runbooks/, docs/adr/, .agents/skills/ 供 RAG 索引 (ADR-067 Phase 33)
# scripts/ 大部分不需要進 image僅白名單 production runtime/ops 種子腳本
# scripts/ 大部分不需要進 image但 CronJob 腳本需要
# 2026-04-12 ogt (ADR-073 P2-1): 白名單允許 cron_km_vectorize.py
# 2026-05-13 codex: 白名單 T16 auto-repair canary PlayBook seed script
# 2026-05-31 codex: MOMO backup Ansible playbook copies the backup script from
# the controller image; keep only this backup script in the runtime context.
scripts/**
!scripts/
scripts
!scripts/cron_km_vectorize.py
!scripts/backup/
!scripts/backup/backup-momo-188-pg.sh
!scripts/ops/
!scripts/ops/awooop-seed-auto-repair-canary-playbook.py
# Node 快取monorepo 根目錄)
node_modules
@@ -59,6 +51,3 @@ apps/web/.env*
# memory/ADR不影響 build
memory
# 2026-05-02 trigger CI rebuild after runner restart
# 2026-06-12 Codex: trigger P2-403N production verification deploy, no runtime behavior change.
# 2026-06-12 Codex: retry P2-404 deploy after transient Harbor 502, no runtime behavior change.
# 2026-06-19 Codex: trigger P2-111 Code Review Gate production deploy, no runtime behavior change.

View File

@@ -1,581 +0,0 @@
# =============================================================================
# AWOOOI Agent Market Watch (Gitea Actions)
# =============================================================================
# Weekly read-only AI Agent market scan. This workflow detects primary-source
# changes only; it does not install SDKs, call LLM APIs, commit reports, approve
# shadow/canary, or change production routing.
name: Agent Market Watch
on:
workflow_dispatch:
schedule:
- cron: '0 1 * * 1' # 每週一 09:00 台北 (UTC+8)
env:
GITEA_ACTIONS_URL: http://192.168.0.110:3001/wooo/awoooi/actions
SRE_GROUP_CHAT_ID: "-1003711974679"
jobs:
market-watch:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- name: Run read-only market watch
id: watch
run: |
set -euo pipefail
REPORT="/tmp/agent_market_watch_report.json"
PREVIOUS_REPORT="$(find docs/evaluations -maxdepth 1 -type f -name 'agent_market_watch_report_*.json' | sort | tail -n 1 || true)"
PREVIOUS_ARGS=()
if [ -n "$PREVIOUS_REPORT" ]; then
PREVIOUS_ARGS=(--previous-report "$PREVIOUS_REPORT")
echo "Using previous committed market watch baseline: $PREVIOUS_REPORT"
else
echo "No previous committed market watch baseline found; running first live baseline."
fi
python3 scripts/agents/agent-market-watch.py \
--registry docs/ai/agent-market-watch-sources.v1.json \
--output "$REPORT" \
--mode live \
--timeout-seconds 12 \
"${PREVIOUS_ARGS[@]}"
python3 -m json.tool "$REPORT" >/dev/null
python3 - "$REPORT" <<'PY'
import json
import os
import sys
report_path = sys.argv[1]
with open(report_path, encoding="utf-8") as handle:
data = json.load(handle)
if data.get("schema_version") != "agent_market_watch_report_v1":
raise SystemExit("unexpected market watch schema_version")
if data.get("mode") != "live":
raise SystemExit("market watch workflow must run in live mode")
summary = data.get("summary")
if not isinstance(summary, dict):
raise SystemExit("missing market watch summary")
required = [
"candidate_count",
"source_count",
"changed_candidates",
"watch_only_candidates",
"integration_queue_count",
"failure_count",
]
missing = [key for key in required if key not in summary]
if missing:
raise SystemExit(f"missing market watch summary keys: {missing}")
integration_queue = data.get("integration_queue")
if not isinstance(integration_queue, list):
raise SystemExit("integration_queue must be a list")
output_path = os.environ.get("GITHUB_OUTPUT")
if output_path:
with open(output_path, "a", encoding="utf-8") as handle:
for key in required:
handle.write(f"{key}={summary.get(key, 0)}\n")
step_summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if step_summary_path:
with open(step_summary_path, "a", encoding="utf-8") as handle:
handle.write("## Agent Market Watch\n\n")
handle.write(f"- Candidates: {summary['candidate_count']}\n")
handle.write(f"- Sources: {summary['source_count']}\n")
handle.write(f"- Changed candidates: {summary['changed_candidates']}\n")
handle.write(f"- Integration queue: {summary['integration_queue_count']}\n")
handle.write(f"- Source failures: {summary['failure_count']}\n")
handle.write("\nPolicy: read-only watch; no SDK/API/prod change is approved by this workflow.\n")
print(json.dumps(summary, ensure_ascii=False, sort_keys=True))
PY
- name: Run read-only integration review
id: review
run: |
set -euo pipefail
REVIEW="/tmp/agent_market_integration_review.json"
python3 scripts/agents/agent-market-integration-review.py \
--watch-report /tmp/agent_market_watch_report.json \
--candidates docs/ai/agent-replacement-candidates.v1.json \
--scorecard docs/evaluations/agent_market_capability_scorecard_2026-06-01.json \
--review-scope all \
--output "$REVIEW"
python3 -m json.tool "$REVIEW" >/dev/null
python3 - "$REVIEW" <<'PY'
import json
import os
import sys
review_path = sys.argv[1]
with open(review_path, encoding="utf-8") as handle:
data = json.load(handle)
if data.get("schema_version") != "agent_market_integration_review_v1":
raise SystemExit("unexpected integration review schema_version")
policy = data.get("policy") or {}
forbidden = [
"production_changes_approved",
"replacement_decision_allowed",
"sdk_installation_approved",
"paid_api_calls_approved",
"shadow_or_canary_approved",
]
unsafe = [key for key in forbidden if policy.get(key) is not False]
if unsafe:
raise SystemExit(f"integration review policy must stay false: {unsafe}")
summary = data.get("summary")
if not isinstance(summary, dict):
raise SystemExit("missing integration review summary")
required = [
"reviewed_candidates",
"blocked_from_integration",
"requires_cost_approval",
"requires_dependency_approval",
"source_failures",
"production_changes_approved",
"shadow_or_canary_approved",
]
missing = [key for key in required if key not in summary]
if missing:
raise SystemExit(f"missing integration review summary keys: {missing}")
output_path = os.environ.get("GITHUB_OUTPUT")
if output_path:
with open(output_path, "a", encoding="utf-8") as handle:
for key in required:
handle.write(f"{key}={summary.get(key, 0)}\n")
step_summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if step_summary_path:
with open(step_summary_path, "a", encoding="utf-8") as handle:
handle.write("\n## Agent Integration Review\n\n")
handle.write("- Review scope: all candidates\n")
handle.write(f"- Reviewed candidates: {summary['reviewed_candidates']}\n")
handle.write(f"- Blocked from integration: {summary['blocked_from_integration']}\n")
handle.write(f"- Cost approvals required: {summary['requires_cost_approval']}\n")
handle.write(f"- Dependency approvals required: {summary['requires_dependency_approval']}\n")
handle.write(f"- Production changes approved: {summary['production_changes_approved']}\n")
handle.write(f"- Shadow/canary approved: {summary['shadow_or_canary_approved']}\n")
print(json.dumps(summary, ensure_ascii=False, sort_keys=True))
PY
- name: Run read-only discovery review
id: discovery
run: |
set -euo pipefail
DISCOVERY="/tmp/agent_market_discovery_review.json"
PREVIOUS_DISCOVERY="$(find docs/evaluations -maxdepth 1 -type f -name 'agent_market_discovery_review_*.json' | sort | tail -n 1 || true)"
PREVIOUS_ARGS=()
if [ -n "$PREVIOUS_DISCOVERY" ]; then
PREVIOUS_ARGS=(--previous-review "$PREVIOUS_DISCOVERY")
echo "Using previous committed discovery review baseline: $PREVIOUS_DISCOVERY"
else
echo "No previous committed discovery review baseline found; running first discovery intake."
fi
python3 scripts/agents/agent-market-discovery-review.py \
--watch-report /tmp/agent_market_watch_report.json \
--candidates docs/ai/agent-replacement-candidates.v1.json \
--source-registry docs/ai/agent-market-watch-sources.v1.json \
--output "$DISCOVERY" \
"${PREVIOUS_ARGS[@]}"
python3 -m json.tool "$DISCOVERY" >/dev/null
python3 - "$DISCOVERY" <<'PY'
import json
import os
import sys
discovery_path = sys.argv[1]
with open(discovery_path, encoding="utf-8") as handle:
data = json.load(handle)
if data.get("schema_version") != "agent_market_discovery_review_v1":
raise SystemExit("unexpected discovery review schema_version")
policy = data.get("policy") or {}
forbidden = [
"auto_registry_addition_approved",
"sdk_installation_approved",
"paid_api_calls_approved",
"production_changes_approved",
"shadow_or_canary_approved",
"replacement_decision_allowed",
]
unsafe = [key for key in forbidden if policy.get(key) is not False]
if unsafe:
raise SystemExit(f"discovery review policy must stay false: {unsafe}")
summary = data.get("summary")
if not isinstance(summary, dict):
raise SystemExit("missing discovery review summary")
required = [
"discovery_sources",
"discovered_items",
"unique_repositories",
"already_watched_or_registered",
"manual_classification_required",
"new_manual_classification_required",
"source_failures",
]
missing = [key for key in required if key not in summary]
if missing:
raise SystemExit(f"missing discovery review summary keys: {missing}")
output_path = os.environ.get("GITHUB_OUTPUT")
if output_path:
with open(output_path, "a", encoding="utf-8") as handle:
for key in required:
handle.write(f"{key}={summary.get(key, 0)}\n")
step_summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if step_summary_path:
with open(step_summary_path, "a", encoding="utf-8") as handle:
handle.write("\n## Agent Discovery Review\n\n")
handle.write(f"- Discovery sources: {summary['discovery_sources']}\n")
handle.write(f"- Unique repositories: {summary['unique_repositories']}\n")
handle.write(f"- Already watched/registered: {summary['already_watched_or_registered']}\n")
handle.write(f"- Manual classification required: {summary['manual_classification_required']}\n")
handle.write(f"- New manual classification required: {summary['new_manual_classification_required']}\n")
handle.write("\nPolicy: read-only intake; no registry addition, SDK/API, shadow/canary, or production change is approved.\n")
print(json.dumps(summary, ensure_ascii=False, sort_keys=True))
PY
- name: Run read-only discovery classification
id: classify
if: ${{ steps.discovery.outputs.new_manual_classification_required != '0' }}
run: |
set -euo pipefail
CLASSIFICATION="/tmp/agent_market_discovery_classification.json"
python3 scripts/agents/agent-market-discovery-classify.py \
--discovery-review /tmp/agent_market_discovery_review.json \
--output "$CLASSIFICATION" \
--timeout-seconds 12
python3 -m json.tool "$CLASSIFICATION" >/dev/null
python3 - "$CLASSIFICATION" <<'PY'
import json
import os
import sys
classification_path = sys.argv[1]
with open(classification_path, encoding="utf-8") as handle:
data = json.load(handle)
if data.get("schema_version") != "agent_market_discovery_classification_v1":
raise SystemExit("unexpected discovery classification schema_version")
policy = data.get("policy") or {}
forbidden = [
"auto_watch_registry_addition_approved",
"sdk_installation_approved",
"paid_api_calls_approved",
"production_changes_approved",
"shadow_or_canary_approved",
"replacement_decision_allowed",
]
unsafe = [key for key in forbidden if policy.get(key) is not False]
if unsafe:
raise SystemExit(f"discovery classification policy must stay false: {unsafe}")
summary = data.get("summary")
if not isinstance(summary, dict):
raise SystemExit("missing discovery classification summary")
required = [
"classified_repositories",
"recommended_watch_additions",
"watch_only_or_defer",
"production_changes_approved",
"shadow_or_canary_approved",
]
missing = [key for key in required if key not in summary]
if missing:
raise SystemExit(f"missing discovery classification summary keys: {missing}")
output_path = os.environ.get("GITHUB_OUTPUT")
if output_path:
with open(output_path, "a", encoding="utf-8") as handle:
for key in required:
handle.write(f"{key}={summary.get(key, 0)}\n")
step_summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if step_summary_path:
with open(step_summary_path, "a", encoding="utf-8") as handle:
handle.write("\n## Agent Discovery Classification\n\n")
handle.write(f"- Classified repositories: {summary['classified_repositories']}\n")
handle.write(f"- Recommended watch additions: {summary['recommended_watch_additions']}\n")
handle.write(f"- Watch-only/defer: {summary['watch_only_or_defer']}\n")
handle.write("\nPolicy: read-only classification; no watch registry addition, SDK/API, replay, shadow/canary, or production change is approved.\n")
print(json.dumps(summary, ensure_ascii=False, sort_keys=True))
PY
- name: Run read-only watch promotion review
id: promote
run: |
set -euo pipefail
PROMOTION="/tmp/agent_market_watch_promotion_review.json"
CLASSIFICATION="/tmp/agent_market_discovery_classification.json"
if [ ! -f "$CLASSIFICATION" ]; then
PREVIOUS_CLASSIFICATION="$(find docs/evaluations -maxdepth 1 -type f -name 'agent_market_discovery_classification_*.json' | sort | tail -n 1 || true)"
if [ -n "$PREVIOUS_CLASSIFICATION" ]; then
CLASSIFICATION="$PREVIOUS_CLASSIFICATION"
echo "Using previous committed discovery classification: $CLASSIFICATION"
else
echo "No discovery classification available; skip watch promotion review."
exit 0
fi
fi
python3 scripts/agents/agent-market-watch-promotion-review.py \
--watch-report /tmp/agent_market_watch_report.json \
--integration-review /tmp/agent_market_integration_review.json \
--discovery-classification "$CLASSIFICATION" \
--candidates docs/ai/agent-replacement-candidates.v1.json \
--output "$PROMOTION"
python3 -m json.tool "$PROMOTION" >/dev/null
python3 - "$PROMOTION" <<'PY'
import json
import os
import sys
promotion_path = sys.argv[1]
with open(promotion_path, encoding="utf-8") as handle:
data = json.load(handle)
if data.get("schema_version") != "agent_market_watch_promotion_review_v1":
raise SystemExit("unexpected watch promotion review schema_version")
policy = data.get("policy") or {}
forbidden = [
"priority_upgrade_approved",
"market_scorecard_update_approved",
"replay_candidate_approved",
"sdk_installation_approved",
"paid_api_calls_approved",
"production_changes_approved",
"shadow_or_canary_approved",
"replacement_decision_allowed",
]
unsafe = [key for key in forbidden if policy.get(key) is not False]
if unsafe:
raise SystemExit(f"watch promotion policy must stay false: {unsafe}")
summary = data.get("summary")
if not isinstance(summary, dict):
raise SystemExit("missing watch promotion summary")
required = [
"watch_only_candidates_reviewed",
"eligible_for_market_scorecard_prescreen",
"remain_watch_only",
"priority_upgrades_approved",
"market_scorecard_updates_approved",
"replay_candidates_approved",
]
missing = [key for key in required if key not in summary]
if missing:
raise SystemExit(f"missing watch promotion summary keys: {missing}")
output_path = os.environ.get("GITHUB_OUTPUT")
if output_path:
with open(output_path, "a", encoding="utf-8") as handle:
for key in required:
handle.write(f"{key}={summary.get(key, 0)}\n")
step_summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if step_summary_path:
with open(step_summary_path, "a", encoding="utf-8") as handle:
handle.write("\n## Agent Watch Promotion Review\n\n")
handle.write(f"- Watch-only candidates reviewed: {summary['watch_only_candidates_reviewed']}\n")
handle.write(f"- Eligible for scorecard prescreen: {summary['eligible_for_market_scorecard_prescreen']}\n")
handle.write(f"- Remain watch-only: {summary['remain_watch_only']}\n")
handle.write(f"- Priority upgrades approved: {summary['priority_upgrades_approved']}\n")
handle.write(f"- Replay candidates approved: {summary['replay_candidates_approved']}\n")
handle.write("\nPolicy: read-only promotion readiness; no priority upgrade, scorecard update, replay, SDK/API, shadow/canary, or production change is approved.\n")
print(json.dumps(summary, ensure_ascii=False, sort_keys=True))
PY
- name: Build read-only governance snapshot
id: snapshot
run: |
set -euo pipefail
SNAPSHOT="/tmp/agent_market_governance_snapshot.json"
CLASSIFICATION="/tmp/agent_market_discovery_classification.json"
if [ ! -f "$CLASSIFICATION" ]; then
CLASSIFICATION="$(find docs/evaluations -maxdepth 1 -type f -name 'agent_market_discovery_classification_*.json' | sort | tail -n 1 || true)"
fi
PROMOTION="/tmp/agent_market_watch_promotion_review.json"
if [ ! -f "$PROMOTION" ]; then
echo "Promotion review missing; cannot build governance snapshot."
exit 1
fi
python3 scripts/agents/agent-market-governance-snapshot.py \
--watch-report /tmp/agent_market_watch_report.json \
--integration-review /tmp/agent_market_integration_review.json \
--discovery-classification "$CLASSIFICATION" \
--promotion-review "$PROMOTION" \
--candidates docs/ai/agent-replacement-candidates.v1.json \
--output "$SNAPSHOT"
python3 -m json.tool "$SNAPSHOT" >/dev/null
python3 - "$SNAPSHOT" <<'PY'
import json
import os
import sys
snapshot_path = sys.argv[1]
with open(snapshot_path, encoding="utf-8") as handle:
data = json.load(handle)
if data.get("schema_version") != "agent_market_governance_snapshot_v1":
raise SystemExit("unexpected governance snapshot schema_version")
policy = data.get("policy") or {}
forbidden = [
"priority_upgrade_approved",
"market_scorecard_update_approved",
"replay_candidate_approved",
"sdk_installation_approved",
"paid_api_calls_approved",
"production_changes_approved",
"shadow_or_canary_approved",
"replacement_decision_allowed",
]
unsafe = [key for key in forbidden if policy.get(key) is not False]
if unsafe:
raise SystemExit(f"governance snapshot policy must stay false: {unsafe}")
summary = data.get("summary")
if not isinstance(summary, dict):
raise SystemExit("missing governance snapshot summary")
required = [
"candidate_count",
"source_count",
"blocked_from_integration",
"eligible_for_market_scorecard_prescreen",
"replacement_decisions_approved",
"replay_candidates_approved",
"production_changes_approved",
]
missing = [key for key in required if key not in summary]
if missing:
raise SystemExit(f"missing governance snapshot summary keys: {missing}")
output_path = os.environ.get("GITHUB_OUTPUT")
if output_path:
with open(output_path, "a", encoding="utf-8") as handle:
for key in required:
handle.write(f"{key}={summary.get(key, 0)}\n")
step_summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if step_summary_path:
with open(step_summary_path, "a", encoding="utf-8") as handle:
handle.write("\n## Agent Market Governance Snapshot\n\n")
handle.write(f"- Current decision: {data['current_decision']}\n")
handle.write(f"- Candidates: {summary['candidate_count']}\n")
handle.write(f"- Sources: {summary['source_count']}\n")
handle.write(f"- Blocked from integration: {summary['blocked_from_integration']}\n")
handle.write(f"- Scorecard prescreen eligible: {summary['eligible_for_market_scorecard_prescreen']}\n")
handle.write(f"- Replacement approvals: {summary['replacement_decisions_approved']}\n")
handle.write(f"- Replay approvals: {summary['replay_candidates_approved']}\n")
handle.write(f"- Production approvals: {summary['production_changes_approved']}\n")
print(json.dumps(summary, ensure_ascii=False, sort_keys=True))
PY
- name: Summarize actionable change or failure
if: always()
env:
TG_CHAT_ID: ${{ env.SRE_GROUP_CHAT_ID }}
JOB_STATUS: ${{ job.status }}
CANDIDATE_COUNT: ${{ steps.watch.outputs.candidate_count }}
SOURCE_COUNT: ${{ steps.watch.outputs.source_count }}
CHANGED_CANDIDATES: ${{ steps.watch.outputs.changed_candidates }}
INTEGRATION_QUEUE_COUNT: ${{ steps.watch.outputs.integration_queue_count }}
FAILURE_COUNT: ${{ steps.watch.outputs.failure_count }}
REVIEWED_CANDIDATES: ${{ steps.review.outputs.reviewed_candidates }}
BLOCKED_FROM_INTEGRATION: ${{ steps.review.outputs.blocked_from_integration }}
REVIEW_COST_APPROVALS: ${{ steps.review.outputs.requires_cost_approval }}
REVIEW_DEPENDENCY_APPROVALS: ${{ steps.review.outputs.requires_dependency_approval }}
DISCOVERY_MANUAL_REQUIRED: ${{ steps.discovery.outputs.manual_classification_required }}
DISCOVERY_NEW_MANUAL_REQUIRED: ${{ steps.discovery.outputs.new_manual_classification_required }}
DISCOVERY_UNIQUE_REPOSITORIES: ${{ steps.discovery.outputs.unique_repositories }}
CLASSIFIED_REPOSITORIES: ${{ steps.classify.outputs.classified_repositories }}
RECOMMENDED_WATCH_ADDITIONS: ${{ steps.classify.outputs.recommended_watch_additions }}
WATCH_PROMOTION_ELIGIBLE: ${{ steps.promote.outputs.eligible_for_market_scorecard_prescreen }}
WATCH_PROMOTION_APPROVED: ${{ steps.promote.outputs.priority_upgrades_approved }}
REPLAY_CANDIDATES_APPROVED: ${{ steps.promote.outputs.replay_candidates_approved }}
GITEA_ACTIONS_URL: ${{ env.GITEA_ACTIONS_URL }}
run: |
set -euo pipefail
CHANGED="${CHANGED_CANDIDATES:-0}"
QUEUE="${INTEGRATION_QUEUE_COUNT:-0}"
FAILURES="${FAILURE_COUNT:-0}"
NEW_DISCOVERY="${DISCOVERY_NEW_MANUAL_REQUIRED:-0}"
if [ "$JOB_STATUS" = "success" ] && [ "$CHANGED" = "0" ] && [ "$QUEUE" = "0" ] && [ "$FAILURES" = "0" ] && [ "$NEW_DISCOVERY" = "0" ]; then
echo "No actionable market changes; keep Telegram quiet."
exit 0
fi
python3 - <<'PY'
import os
from datetime import datetime
from zoneinfo import ZoneInfo
status = os.environ.get("JOB_STATUS", "unknown")
changed = os.environ.get("CHANGED_CANDIDATES") or "0"
queue = os.environ.get("INTEGRATION_QUEUE_COUNT") or "0"
failures = os.environ.get("FAILURE_COUNT") or "0"
reviewed = os.environ.get("REVIEWED_CANDIDATES") or "0"
blocked = os.environ.get("BLOCKED_FROM_INTEGRATION") or "0"
cost_approvals = os.environ.get("REVIEW_COST_APPROVALS") or "0"
dependency_approvals = os.environ.get("REVIEW_DEPENDENCY_APPROVALS") or "0"
discovery_manual = os.environ.get("DISCOVERY_MANUAL_REQUIRED") or "0"
discovery_new = os.environ.get("DISCOVERY_NEW_MANUAL_REQUIRED") or "0"
discovery_repos = os.environ.get("DISCOVERY_UNIQUE_REPOSITORIES") or "0"
classified_repos = os.environ.get("CLASSIFIED_REPOSITORIES") or "0"
recommended_watch_additions = os.environ.get("RECOMMENDED_WATCH_ADDITIONS") or "0"
watch_promotion_eligible = os.environ.get("WATCH_PROMOTION_ELIGIBLE") or "0"
watch_promotion_approved = os.environ.get("WATCH_PROMOTION_APPROVED") or "0"
replay_candidates_approved = os.environ.get("REPLAY_CANDIDATES_APPROVED") or "0"
candidates = os.environ.get("CANDIDATE_COUNT") or "0"
sources = os.environ.get("SOURCE_COUNT") or "0"
actions_url = os.environ.get("GITEA_ACTIONS_URL", "")
generated = datetime.now(ZoneInfo("Asia/Taipei")).strftime("%Y-%m-%d %H:%M")
title = "Agent Market Watch 需要複核" if status == "success" else "Agent Market Watch 執行失敗"
lines = [
f"## {title}",
"",
f"- 時間:`{generated}`",
f"- 狀態:`{status}`",
f"- 候選 / 來源:`{candidates}` / `{sources}`",
f"- 變動候選 / 整合佇列 / 來源失敗:`{changed}` / `{queue}` / `{failures}`",
f"- Review已審 `{reviewed}`;擋下整合 `{blocked}`;成本批准需求 `{cost_approvals}`;依賴批准需求 `{dependency_approvals}`",
f"- Discoveryunique repo `{discovery_repos}`;需人工分類 `{discovery_manual}`;新未分類 `{discovery_new}`;已分類 `{classified_repos}`;建議 watch `{recommended_watch_additions}`",
f"- Promotionscorecard prescreen eligible `{watch_promotion_eligible}`priority upgrade approved `{watch_promotion_approved}`replay approved `{replay_candidates_approved}`",
"",
"政策:此 workflow 只建立市場觀察、整合審查、discovery intake/classification 訊號,不批准 SDK 安裝、付費 API、replay、shadow/canary 或 OpenClaw 取代。",
f"Log{actions_url}",
]
summary = "\n".join(lines) + "\n"
print(summary)
step_summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if step_summary_path:
with open(step_summary_path, "a", encoding="utf-8") as handle:
handle.write(summary)
PY

View File

@@ -1,110 +0,0 @@
# =============================================================================
# AWOOOI AI Technology Watch (Gitea Actions)
# =============================================================================
# 每 6 小時只讀監控主流 AI 技術 primary sources。此 workflow 只產生
# Gitea step summary不安裝 SDK、不呼叫 LLM API、不 commit report、不發
# Telegram、不切換 provider route、不修改 production。
name: AI 技術雷達監控
on:
workflow_dispatch:
schedule:
- cron: '0 */6 * * *'
jobs:
ai-technology-watch:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- name: 執行只讀 AI 技術雷達監控
id: watch
run: |
set -euo pipefail
REPORT="/tmp/ai_technology_watch_report.json"
PREVIOUS_REPORT="$(find docs/evaluations -maxdepth 1 -type f -name 'ai_technology_watch_report_*.json' | sort | tail -n 1 || true)"
PREVIOUS_ARGS=()
if [ -n "$PREVIOUS_REPORT" ]; then
PREVIOUS_ARGS=(--previous-report "$PREVIOUS_REPORT")
echo "使用已提交的上一份 AI 技術雷達 baseline: $PREVIOUS_REPORT"
else
echo "找不到已提交的 AI 技術雷達 baseline執行第一次 live baseline。"
fi
python3 scripts/agents/ai-technology-watch.py \
--registry docs/ai/ai-technology-watch-sources.v1.json \
--output "$REPORT" \
--mode live \
--timeout-seconds 12 \
"${PREVIOUS_ARGS[@]}"
python3 -m json.tool "$REPORT" >/dev/null
python3 - "$REPORT" <<'PY'
import json
import os
import sys
report_path = sys.argv[1]
with open(report_path, encoding="utf-8") as handle:
data = json.load(handle)
if data.get("schema_version") != "ai_technology_watch_report_v1":
raise SystemExit("AI 技術雷達 schema_version 不正確")
if data.get("mode") != "live":
raise SystemExit("AI 技術雷達 workflow 必須以 live mode 執行")
policy = data.get("policy") or {}
forbidden = [
"sdk_installation_approved",
"paid_api_calls_approved",
"production_routing_approved",
"telegram_send_approved",
"model_provider_switch_approved",
"host_write_approved",
]
unsafe = [key for key in forbidden if policy.get(key) is not False]
if unsafe:
raise SystemExit(f"AI 技術雷達 policy 必須維持 false: {unsafe}")
if policy.get("read_only") is not True:
raise SystemExit("AI 技術雷達必須維持 read_only")
summary = data.get("summary")
if not isinstance(summary, dict):
raise SystemExit("缺少 AI 技術雷達 summary")
required = [
"technology_count",
"technology_area_count",
"source_count",
"changed_technologies",
"watch_only_technologies",
"review_queue_count",
"source_failure_count",
"high_priority_count",
]
missing = [key for key in required if key not in summary]
if missing:
raise SystemExit(f"缺少 AI 技術雷達 summary keys: {missing}")
output_path = os.environ.get("GITHUB_OUTPUT")
if output_path:
with open(output_path, "a", encoding="utf-8") as handle:
for key in required:
handle.write(f"{key}={summary.get(key, 0)}\n")
step_summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if step_summary_path:
with open(step_summary_path, "a", encoding="utf-8") as handle:
handle.write("## AI 技術雷達監控\n\n")
handle.write(f"- 技術項目:{summary['technology_count']}\n")
handle.write(f"- 技術領域:{summary['technology_area_count']}\n")
handle.write(f"- 來源數:{summary['source_count']}\n")
handle.write(f"- 變更技術:{summary['changed_technologies']}\n")
handle.write(f"- 審核佇列:{summary['review_queue_count']}\n")
handle.write(f"- 來源失敗:{summary['source_failure_count']}\n")
handle.write(f"- 高優先級技術:{summary['high_priority_count']}\n")
handle.write("\nPolicy: 只讀監控;此 workflow 不批准 SDK/API/provider/Telegram/host/production 變更。\n")
print(json.dumps(summary, ensure_ascii=False, sort_keys=True))
PY

View File

@@ -1,49 +1,22 @@
name: Ansible / Reboot Recovery Contract
name: Ansible Lint
on:
push:
branches: [main]
paths:
- 'infra/ansible/**'
- 'ops/monitoring/**'
- 'ops/reboot-recovery/**'
- 'scripts/backup/**'
- 'scripts/ops/**'
- 'scripts/reboot-recovery/**'
- 'docs/**'
- '.gitea/workflows/**'
pull_request:
paths:
- 'infra/ansible/**'
- 'ops/monitoring/**'
- 'ops/reboot-recovery/**'
- 'scripts/backup/**'
- 'scripts/ops/**'
- 'scripts/reboot-recovery/**'
- 'docs/**'
- '.gitea/workflows/**'
workflow_dispatch:
jobs:
validate:
lint:
runs-on: self-hosted
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Bootstrap Ansible validation env
run: bash scripts/ops/bootstrap-ansible-validation-env.sh
- name: Install ansible-lint
run: pip install ansible-lint
- name: Run Ansible and reboot-recovery validation
run: |
set -euo pipefail
export PATH="${ANSIBLE_VALIDATION_VENV:-/tmp/awoooi-ansible-venv}/bin:$PATH"
bash scripts/ops/ansible-validate.sh
python3 scripts/ops/doc-secrets-sanity-check.py docs .gitea
python3 scripts/ops/backup-alert-label-contract-check.py
python3 scripts/ops/recovery-scorecard-contract-check.py
python3 -m py_compile scripts/ops/backup-alert-live-visibility-check.py
bash -n scripts/reboot-recovery/full-stack-recovery-scorecard.sh
bash -n scripts/reboot-recovery/dr-offsite-operator-checklist.sh
bash -n scripts/reboot-recovery/verify-cold-start-monitor-deploy.sh
bash scripts/reboot-recovery/reboot-recovery-readiness-audit.sh --no-color
- name: Run ansible-lint
run: ansible-lint infra/ansible/playbooks/
working-directory: ${{ github.workspace }}

View File

@@ -19,7 +19,7 @@ concurrency:
env:
HARBOR: 192.168.0.110:5000
HARBOR_MIRROR: 192.168.0.110:5001
SRE_GROUP_CHAT_ID: "-1003711974679"
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
OTEL_EXPORTER_OTLP_ENDPOINT: http://192.168.0.188:24318
OTEL_SERVICE_NAME: awoooi-cd-dev
OTEL_RESOURCE_ATTRIBUTES: service.version=${{ github.sha }},deployment.environment=dev
@@ -43,19 +43,10 @@ jobs:
├ 📝 ${{ steps.commit.outputs.message }}
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
└ 🌿 dev branch"
if AWOOI_CICD_STATUS=running \
AWOOI_CICD_STAGE=dev-deploy \
AWOOI_CICD_JOB_NAME="[DEV] 部署開始" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Dev deploy start notification mirrored through AWOOI API"
else
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
fi
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
# API 測試 (同 prod CI確保 dev 也通過)
- name: Run API Tests
@@ -87,18 +78,11 @@ jobs:
echo "✅ API 測試通過"
- name: Login to Harbor
run: |
HARBOR_USERNAME="$(cat <<'AWOOOI_SECRET_HARBOR_USERNAME'
${{ secrets.HARBOR_USERNAME }}
AWOOOI_SECRET_HARBOR_USERNAME
)"
HARBOR_PASSWORD="$(cat <<'AWOOOI_SECRET_HARBOR_PASSWORD'
${{ secrets.HARBOR_PASSWORD }}
AWOOOI_SECRET_HARBOR_PASSWORD
)"
printf '%s' "$HARBOR_PASSWORD" | docker login "${{ env.HARBOR }}" \
-u "$HARBOR_USERNAME" \
--password-stdin
uses: docker/login-action@v3
with:
registry: ${{ env.HARBOR }}
username: ${{ secrets.HARBOR_USERNAME }}
password: ${{ secrets.HARBOR_PASSWORD }}
# Dev API 鏡像:強制重建,不用 cache確保 models.json 等配置文件更新)
- name: Build and Push API (Dev)
@@ -114,63 +98,36 @@ jobs:
# 注入 Dev K8s Secrets
- name: Inject Dev K8s Secrets
env:
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
run: |
secret_b64() {
python3 -c 'import base64, sys; data=sys.stdin.buffer.read(); data=data[:-1] if data.endswith(b"\n") else data; sys.stdout.write(base64.b64encode(data).decode())'
}
write_deploy_key() {
mkdir -p ~/.ssh
umask 077
cat > ~/.ssh/deploy_key <<'AWOOOI_DEPLOY_KEY'
${{ secrets.DEPLOY_SSH_KEY }}
AWOOOI_DEPLOY_KEY
chmod 600 ~/.ssh/deploy_key
}
TG_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_BOT_TOKEN'
${{ secrets.TELEGRAM_BOT_TOKEN }}
AWOOOI_SECRET_TG_BOT_TOKEN
)"
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_SRE_GROUP_CHAT_ID_COMPAT'
${{ secrets.SRE_GROUP_CHAT_ID }}
AWOOOI_SECRET_SRE_GROUP_CHAT_ID_COMPAT
)"
NVIDIA_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_NVIDIA_API_KEY'
${{ secrets.NVIDIA_API_KEY }}
AWOOOI_SECRET_NVIDIA_API_KEY
)"
GEMINI_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_GEMINI_API_KEY'
${{ secrets.GEMINI_API_KEY }}
AWOOOI_SECRET_GEMINI_API_KEY
)"
mkdir -p ~/.ssh
write_deploy_key
# Keep deploy-time host keys separate from the runner user's global
# known_hosts, which is also used by reboot/cold-start checks.
DEPLOY_KNOWN_HOSTS="${HOME}/.ssh/deploy_known_hosts"
ssh-keyscan -T 5 -t ed25519,rsa,ecdsa 192.168.0.120 > "${DEPLOY_KNOWN_HOSTS}" 2>/dev/null
test -s "${DEPLOY_KNOWN_HOSTS}" || { echo "❌ K8S host keyscan failed: 192.168.0.120"; exit 1; }
SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${DEPLOY_KNOWN_HOSTS} -i ~/.ssh/deploy_key"
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
chmod 600 ~/.ssh/deploy_key
# 2026-05-05 Codex: kubectl runs on 120 control-plane. 121 is a
# worker and its local kubeconfig points at 127.0.0.1:6443.
ssh $SSH_OPTS wooo@192.168.0.120 << SECRETS
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 << SECRETS
set -e
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
{"op":"replace","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"${TG_BOT_TOKEN_B64}"},
{"op":"replace","path":"/data/OPENCLAW_TG_CHAT_ID","value":"${TG_CHAT_ID_B64}"}
{"op":"replace","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"'"$(echo -n "${TG_BOT_TOKEN}" | base64 -w 0)"'"},
{"op":"replace","path":"/data/OPENCLAW_TG_CHAT_ID","value":"'"$(echo -n "${TG_CHAT_ID}" | base64 -w 0)"'"}
]' || echo "⚠️ Telegram Secrets patch 跳過"
if [ -n "${NVIDIA_API_KEY_B64}" ]; then
if [ -n "${NVIDIA_API_KEY}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
{"op":"replace","path":"/data/NVIDIA_API_KEY","value":"${NVIDIA_API_KEY_B64}"}
{"op":"replace","path":"/data/NVIDIA_API_KEY","value":"'"$(echo -n "${NVIDIA_API_KEY}" | base64 -w 0)"'"}
]' && echo "✅ NVIDIA_API_KEY 已注入 dev"
fi
if [ -n "${GEMINI_API_KEY_B64}" ]; then
if [ -n "${GEMINI_API_KEY}" ]; then
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
{"op":"replace","path":"/data/GEMINI_API_KEY","value":"${GEMINI_API_KEY_B64}"}
{"op":"replace","path":"/data/GEMINI_API_KEY","value":"'"$(echo -n "${GEMINI_API_KEY}" | base64 -w 0)"'"}
]' && echo "✅ GEMINI_API_KEY 已注入 dev"
fi
@@ -179,16 +136,14 @@ jobs:
# 部署到 awoooi-dev
- name: Deploy to Dev K8s
env:
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
run: |
DEPLOY_KNOWN_HOSTS="${HOME}/.ssh/deploy_known_hosts"
ssh-keyscan -T 5 -t ed25519,rsa,ecdsa 192.168.0.120 > "${DEPLOY_KNOWN_HOSTS}" 2>/dev/null
test -s "${DEPLOY_KNOWN_HOSTS}" || { echo "❌ K8S host keyscan failed: 192.168.0.120"; exit 1; }
SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${DEPLOY_KNOWN_HOSTS} -i ~/.ssh/deploy_key"
cat k8s/awoooi-dev/02-configmap.yaml | \
ssh $SSH_OPTS wooo@192.168.0.120 \
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 \
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
ssh $SSH_OPTS wooo@192.168.0.120 << 'DEPLOY'
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 << 'DEPLOY'
set -e
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
@@ -229,20 +184,10 @@ jobs:
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
├ ⏱️ 耗時: ${MINUTES}m ${SECONDS}s
└ 🩺 http://192.168.0.125:32344/api/v1/health"
if AWOOI_CICD_STATUS=success \
AWOOI_CICD_STAGE=dev-deploy \
AWOOI_CICD_JOB_NAME="[DEV] 部署完成" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_DURATION_SECONDS="${DURATION}" \
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Dev deploy success notification mirrored through AWOOI API"
else
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
fi
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
- name: Notify Dev Deploy Failure
if: failure()
@@ -251,16 +196,7 @@ jobs:
├ 📝 ${{ steps.commit.outputs.message }}
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
└ 🔗 <a href=\"http://192.168.0.110:3001/wooo/awoooi/actions\">查看日誌</a>"
if AWOOI_CICD_STATUS=failed \
AWOOI_CICD_STAGE=dev-deploy \
AWOOI_CICD_JOB_NAME="[DEV] 部署失敗" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Dev deploy failure notification mirrored through AWOOI API"
else
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"
fi
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d "parse_mode=HTML" \
--data-urlencode "text@-"

File diff suppressed because it is too large Load Diff

View File

@@ -19,7 +19,7 @@ concurrency:
env:
REPORT_URL: https://mo.wooo.work/code-review/
GITEA_ACTIONS_URL: http://192.168.0.110:3001/wooo/awoooi/actions
SRE_GROUP_CHAT_ID: "-1003711974679"
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
jobs:
ai-code-review:
@@ -30,9 +30,6 @@ jobs:
with:
fetch-depth: 50
- name: Guard Workflow Secret Surfaces
run: node scripts/ci/check-gitea-step-env-secrets.js
- name: Skip Stale Main Push
id: stale
run: |
@@ -105,40 +102,26 @@ jobs:
- name: Notify Code Review Start
if: steps.stale.outputs.skip != 'true'
env:
SRE_GROUP_CHAT_ID: ${{ env.SRE_GROUP_CHAT_ID }}
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
BRANCH: ${{ steps.ctx.outputs.branch }}
COMMIT_MSG: ${{ steps.ctx.outputs.commit_msg }}
FILES_DISPLAY: ${{ steps.ctx.outputs.files_display }}
run: |
set -euo pipefail
TG_BOT_TOKEN="$(cat <<'AWOOOI_SECRET_TG_BOT_TOKEN'
${{ secrets.TELEGRAM_BOT_TOKEN }}
AWOOOI_SECRET_TG_BOT_TOKEN
)"
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
echo "Telegram secret missing; skip start notification"
exit 0
fi
html_escape() { sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g'; }
COMMIT_ESC="$(printf '%s' "$COMMIT_MSG" | html_escape)"
FILES_ESC="$(printf '%s\n' "$FILES_DISPLAY" | html_escape)"
MSG="$(printf '🔍 <b>Code Review 啟動</b>\n──────────────────────\n📦 Commit <code>%s</code> 🌿 <code>%s</code>\n📝 <code>%s</code>\n📁 <b>變更檔案:</b>\n%s\n──────────────────────\n🤖 <b>Hermes → OpenClaw → Elephant Alpha → NemoTron</b>\n📊 即時進度:<a href=\"%s\">%s</a>' "$SHORT_SHA" "$BRANCH" "$COMMIT_ESC" "$FILES_ESC" "$REPORT_URL" "$REPORT_URL")"
if AWOOI_CICD_STATUS=running \
AWOOI_CICD_STAGE=code-review \
AWOOI_CICD_JOB_NAME="Code Review 啟動" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_TRIGGERED_BY="${GITHUB_ACTOR:-CI}" \
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
AWOOI_CICD_WORKFLOW_URL="${REPORT_URL}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Code review start notification mirrored through AWOOI API"
else
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${SRE_GROUP_CHAT_ID:-}" ]; then
echo "Telegram secret missing and AWOOI API notify failed; skip start notification"
exit 0
fi
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$SRE_GROUP_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
>/dev/null
fi
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
>/dev/null
- name: Run Deterministic Review
if: steps.stale.outputs.skip != 'true'
@@ -156,14 +139,15 @@ jobs:
- name: Notify Code Review Completion
if: always() && steps.stale.outputs.skip != 'true'
env:
SRE_GROUP_CHAT_ID: ${{ env.SRE_GROUP_CHAT_ID }}
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
run: |
set -euo pipefail
TG_BOT_TOKEN="$(cat <<'AWOOOI_SECRET_TG_BOT_TOKEN'
${{ secrets.TELEGRAM_BOT_TOKEN }}
AWOOOI_SECRET_TG_BOT_TOKEN
)"
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
echo "Telegram secret missing; skip completion notification"
exit 0
fi
REPORT=/tmp/code-review-report.json
if [ ! -s "$REPORT" ]; then
cat > "$REPORT" <<'JSON'
@@ -196,25 +180,7 @@ jobs:
TOP_ESC="$(printf '%s' "$TOP_ISSUE" | html_escape)"
MSG="$(printf '%s <b>Code Review 完成・%s</b>\n──────────────────────\n🔴 CRITICAL <code>%s</code> 🟠 HIGH <code>%s</code> 🟡 MEDIUM <code>%s</code> 🟢 LOW <code>%s</code>\n──────────────────────\n⚠ <b>主要問題</b>\n%s\n\n🔍 <b>整體風險等級</b>\n%s%s\n\n⚠ <b>最高關注問題</b>\n1. %s\n──────────────────────\n🤖 Elephant Alpha<b>%s</b> ✅ %s\n📊 完整報告:<a href=\"%s\">%s</a>' "$STATUS" "$SHORT_SHA" "$CRITICAL" "$HIGH" "$MEDIUM" "$LOW" "$ISSUE_LINE" "$RISK" "$SUMMARY_ESC" "$TOP_ESC" "$RISK" "$ACTION_ESC" "$REPORT_URL" "$REPORT_URL")"
CICD_STATUS=success
if [ "$RISK" = "MEDIUM" ]; then CICD_STATUS=pending; fi
if [ "$RISK" = "HIGH" ] || [ "$RISK" = "CRITICAL" ]; then CICD_STATUS=failed; fi
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
AWOOI_CICD_STAGE=code-review \
AWOOI_CICD_JOB_NAME="Code Review 完成・${RISK}" \
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
AWOOI_CICD_TRIGGERED_BY="${GITHUB_ACTOR:-CI}" \
AWOOI_CICD_SUMMARY="CRITICAL=${CRITICAL}; HIGH=${HIGH}; MEDIUM=${MEDIUM}; LOW=${LOW}; ${SUMMARY}" \
AWOOI_CICD_WORKFLOW_URL="${REPORT_URL}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Code review completion notification mirrored through AWOOI API"
else
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${SRE_GROUP_CHAT_ID:-}" ]; then
echo "Telegram secret missing and AWOOI API notify failed; skip completion notification"
exit 0
fi
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$SRE_GROUP_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
>/dev/null
fi
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
>/dev/null

View File

@@ -1,7 +1,7 @@
# =============================================================================
# Deploy Prometheus Alert Rules (獨立 workflow)
# 2026-04-05 Claude Code (ADR-039 I3): 從 cd.yaml 分離
# 觸發條件: ops/monitoring/alerts-unified.yml / slo-rules.yml 有變更 或 workflow_dispatch
# 觸發條件: ops/monitoring/alerts-unified.yml 有變更 或 workflow_dispatch
# 說明: 告警規則部署不依賴應用構建,獨立觸發以加快響應速度
# =============================================================================
@@ -12,12 +12,10 @@ on:
branches: [main]
paths:
- 'ops/monitoring/alerts-unified.yml'
- 'ops/monitoring/slo-rules.yml'
- 'scripts/ops/deploy-alerts.sh'
workflow_dispatch:
env:
SRE_GROUP_CHAT_ID: "-1003711974679"
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
jobs:
deploy-alerts:
@@ -32,15 +30,11 @@ jobs:
run: |
pip3 install -q pyyaml 2>/dev/null || pip install -q pyyaml
python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/alerts-unified.yml')); print('YAML OK')"
python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/slo-rules.yml')); print('SLO YAML OK')"
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
umask 077
cat > ~/.ssh/id_ed25519 <<'AWOOOI_DEPLOY_KEY'
${{ secrets.DEPLOY_SSH_KEY }}
AWOOOI_DEPLOY_KEY
echo "${{ secrets.DEPLOY_SSH_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
ssh-keyscan 192.168.0.110 >> ~/.ssh/known_hosts
@@ -56,17 +50,6 @@ jobs:
SHORT_SHA="${{ github.sha }}"
SHORT_SHA="${SHORT_SHA:0:7}"
MSG="${EMOJI} Prometheus 告警規則部署 ${STATUS} (${SHORT_SHA})"
CICD_STATUS="success"
[ "$STATUS" != "success" ] && CICD_STATUS="failed"
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
AWOOI_CICD_STAGE=deploy-alerts \
AWOOI_CICD_JOB_NAME="Prometheus 告警規則部署" \
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
AWOOI_CICD_SUMMARY="${MSG}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Alert rule deploy notification mirrored through AWOOI API"
else
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
--data-urlencode "text=${MSG}" || true
fi
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
--data-urlencode "text=${MSG}" || true

View File

@@ -19,7 +19,7 @@ env:
OTEL_EXPORTER_OTLP_ENDPOINT: http://192.168.0.188:24318
OTEL_SERVICE_NAME: awoooi-e2e
OTEL_RESOURCE_ATTRIBUTES: deployment.environment=production
SRE_GROUP_CHAT_ID: "-1003711974679"
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
jobs:
e2e-health:
@@ -51,52 +51,10 @@ jobs:
echo "status=failed" >> $GITHUB_OUTPUT
exit 1
- name: Source Provider Freshness Smoke
run: |
SOURCE_CANARY_RUN_REF="gitea-e2e-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}"
echo "SOURCE_CANARY_RUN_REF=${SOURCE_CANARY_RUN_REF}" >> "$GITHUB_ENV"
echo "SOURCE_LINK_CANARY_WORK_ITEM_ID=source-evidence:sentry:upstream_canary:awoooi-source-link-canary-${SOURCE_CANARY_RUN_REF}" >> "$GITHUB_ENV"
OPERATOR_KEY="$(cat <<'AWOOOI_SECRET_AWOOOP_OPERATOR_API_KEY'
${{ secrets.AWOOOP_OPERATOR_API_KEY }}
AWOOOI_SECRET_AWOOOP_OPERATOR_API_KEY
)"
AWOOOP_OPERATOR_API_KEY="${OPERATOR_KEY}" \
AWOOOP_OPERATOR_ID=gitea-e2e-health \
python3 scripts/alert_chain_smoke_test.py \
--api-url https://awoooi.wooo.work \
--metrics-api-url http://192.168.0.125:32334 \
--source-provider-heartbeat \
--source-provider-upstream-canary \
--run-ref "${SOURCE_CANARY_RUN_REF}" \
--source-link-canary-target-incident-id INC-20260505-25E744 \
--json
- name: Source Correlation Applied-Link Smoke
run: |
python3 scripts/awooop_source_correlation_apply_smoke.py \
--api-url https://awoooi.wooo.work \
--target-incident-id INC-20260505-25E744 \
--allow-existing-apply \
--refresh-if-stale-days 6 \
--refresh-work-item-id "${SOURCE_LINK_CANARY_WORK_ITEM_ID}" \
--verify-refresh-candidate \
--reviewer-id gitea_e2e_source_link_canary \
--operator-note "T124 dedicated source-link canary refresh; append-only status-chain proof"
- name: Notify Telegram on Failure
if: failure()
run: |
MSG="E2E Health Check 失敗API 健康檢查未通過"
if AWOOI_CICD_STATUS=failed \
AWOOI_CICD_STAGE=e2e-health \
AWOOI_CICD_JOB_NAME="E2E Health Check" \
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
AWOOI_CICD_SUMMARY="${MSG}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "E2E failure notification mirrored through AWOOI API"
else
curl -s -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
-d chat_id="${{ env.SRE_GROUP_CHAT_ID }}" \
-d parse_mode="HTML" \
-d text="🔴 <b>[E2E Health Check]</b> 失敗%0A%0A📅 $(TZ=Asia/Taipei date '+%Y-%m-%d %H:%M')%0A🔗 API 健康檢查未通過%0A%0A請檢查 K3s 叢集狀態"
fi
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
-d chat_id="${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
-d parse_mode="HTML" \
-d text="🔴 <b>[E2E Health Check]</b> 失敗%0A%0A📅 $(TZ=Asia/Taipei date '+%Y-%m-%d %H:%M')%0A🔗 API 健康檢查未通過%0A%0A請檢查 K3s 叢集狀態"

View File

@@ -17,10 +17,9 @@ on:
branches: [main]
paths:
- 'apps/api/migrations/*.sql'
workflow_dispatch:
env:
SRE_GROUP_CHAT_ID: "-1003711974679"
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
jobs:
migrate:
@@ -57,101 +56,45 @@ jobs:
- name: Identify new migrations
id: diff
run: |
ALL_NEW_FILES=$(git diff --no-renames --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true)
NEW_FILES=$(echo "$ALL_NEW_FILES" | grep -Ev '(_down|rollback)\.sql$' || true)
SKIPPED_ROLLBACK_FILES=$(echo "$ALL_NEW_FILES" | grep -E '(_down|rollback)\.sql$' || true)
NEW_FILES=$(git diff --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true)
echo "new_files<<EOF" >> $GITHUB_OUTPUT
echo "$NEW_FILES" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "=== New migration files ==="
echo "$NEW_FILES"
if [ -n "$SKIPPED_ROLLBACK_FILES" ]; then
echo "=== Rollback/down migrations skipped by design ==="
echo "$SKIPPED_ROLLBACK_FILES"
fi
- name: Apply new migrations
if: steps.diff.outputs.new_files != ''
env:
# 從 Gitea secrets 取,不直接明碼
PGURL: ${{ secrets.MIGRATION_DATABASE_URL }}
run: |
set -euo pipefail
# 從 Gitea secrets 取,不放 step-level env避免 runner log 展開。
# MIGRATION_DATABASE_URL 是限權帳號DATABASE_URL 只在 PostgreSQL
# 明確回報「必須是 table owner」時作為受控 fallback。
PGURL="$(cat <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
${{ secrets.MIGRATION_DATABASE_URL }}
AWOOOI_SECRET_MIGRATION_DATABASE_URL
)"
OWNER_PGURL="$(cat <<'AWOOOI_SECRET_DATABASE_URL'
${{ secrets.DATABASE_URL }}
AWOOOI_SECRET_DATABASE_URL
)"
if [ -z "$PGURL" ]; then
echo "::error::MIGRATION_DATABASE_URL secret not set in Gitea"
exit 1
fi
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
apply_migration() {
local url="$1"
local file="$2"
psql "$url" \
-v ON_ERROR_STOP=1 \
--single-transaction \
-f "$file"
}
# 套用每個新檔 (single transaction per file)
echo "${{ steps.diff.outputs.new_files }}" | while IFS= read -r file; do
[ -z "$file" ] && continue
echo "=== Applying: $file ==="
migration_err="$(mktemp)"
if ! apply_migration "$PGURL_PSQL" "$file" 2>"$migration_err"; then
if grep -Eq "(must be owner of table|permission denied for table)" "$migration_err"; then
if [ -z "$OWNER_PGURL_PSQL" ]; then
cat "$migration_err" >&2
echo "::error::migration requires table owner but DATABASE_URL secret is not set"
exit 1
fi
echo "::warning::migration requires table owner; retrying with owner connection"
apply_migration "$OWNER_PGURL_PSQL" "$file"
else
cat "$migration_err" >&2
exit 1
fi
fi
rm -f "$migration_err"
psql "$PGURL_PSQL" \
-v ON_ERROR_STOP=1 \
--single-transaction \
-f "$file"
echo "=== OK: $file ==="
done
- name: Seed asset_discovery_run (audit)
if: steps.diff.outputs.new_files != ''
env:
PGURL: ${{ secrets.MIGRATION_DATABASE_URL }}
run: |
set -euo pipefail
PGURL="$(cat <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
${{ secrets.MIGRATION_DATABASE_URL }}
AWOOOI_SECRET_MIGRATION_DATABASE_URL
)"
OWNER_PGURL="$(cat <<'AWOOOI_SECRET_DATABASE_URL'
${{ secrets.DATABASE_URL }}
AWOOOI_SECRET_DATABASE_URL
)"
if [ -z "$PGURL" ]; then
echo "::error::MIGRATION_DATABASE_URL secret not set in Gitea"
exit 1
fi
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
FILES_JSON=$(echo "${{ steps.diff.outputs.new_files }}" | jq -Rn '[inputs | select(length > 0)]')
SUMMARY_JSON=$(jq -cn \
--arg commit_sha "${{ github.sha }}" \
--argjson files "$FILES_JSON" \
'{type: "ci_migration", commit_sha: $commit_sha, files: $files}')
SUMMARY_JSON_SQL=${SUMMARY_JSON//\'/\'\'}
seed_audit() {
local url="$1"
psql "$url" -v ON_ERROR_STOP=1 <<SQL
psql "$PGURL_PSQL" -c "
INSERT INTO asset_discovery_run (
run_id, triggered_by, scope, scan_depth, status,
started_at, ended_at, tools_used, summary
@@ -163,52 +106,26 @@ jobs:
'success',
NOW(),
NOW(),
'{"psql": 1, "gitea_ci": 1}'::jsonb,
'${SUMMARY_JSON_SQL}'::jsonb
'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb,
jsonb_build_object(
'type', 'ci_migration',
'commit_sha', '${{ github.sha }}',
'files', $FILES_JSON
)
);
SQL
}
audit_err="$(mktemp)"
if ! seed_audit "$PGURL_PSQL" 2>"$audit_err"; then
if grep -q "permission denied for table asset_discovery_run" "$audit_err"; then
if [ -z "$OWNER_PGURL_PSQL" ]; then
cat "$audit_err" >&2
echo "::error::audit requires table insert privilege but DATABASE_URL secret is not set"
exit 1
fi
echo "::warning::audit requires owner connection; retrying with owner connection"
seed_audit "$OWNER_PGURL_PSQL"
else
cat "$audit_err" >&2
exit 1
fi
fi
rm -f "$audit_err"
"
- name: Notify Telegram (if configured)
if: always()
env:
TG_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
run: |
TG_TOKEN="$(cat <<'AWOOOI_SECRET_TG_TOKEN'
${{ secrets.TELEGRAM_BOT_TOKEN }}
AWOOOI_SECRET_TG_TOKEN
)"
STATUS="${{ job.status }}"
CICD_STATUS="success"
[ "$STATUS" != "success" ] && CICD_STATUS="failed"
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
AWOOI_CICD_STAGE=run-migration \
AWOOI_CICD_JOB_NAME="Migration CI" \
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
AWOOI_CICD_SUMMARY="Migration CI: ${STATUS}" \
scripts/ci/notify-awoooi-cicd.sh; then
echo "Migration notification mirrored through AWOOI API"
exit 0
fi
if [ -n "$TG_TOKEN" ] && [ -n "${{ env.SRE_GROUP_CHAT_ID }}" ]; then
if [ -n "$TG_TOKEN" ] && [ -n "$TG_CHAT" ]; then
STATUS="${{ job.status }}"
MSG="🗄️ Migration CI: \`${STATUS}\` — commit ${{ github.sha }}"
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
-d chat_id="${{ env.SRE_GROUP_CHAT_ID }}" \
-d chat_id="${TG_CHAT}" \
-d parse_mode="Markdown" \
-d text="${MSG}" || true
fi

View File

@@ -13,10 +13,12 @@
name: CD
# 2026-05-12 Codex: GitHub 僅保留唯讀備份;生產 CI/CD 只能從 Gitea 執行。
# 本 workflow 曾可 push / workflow_dispatch 後 build、patch secret、kubectl apply
# 會和 `.gitea/workflows/cd.yaml` 競爭 K3s production 狀態,因此硬停用。
on:
push:
branches: [main]
paths-ignore:
- 'docs/**'
- '*.md'
workflow_dispatch:
inputs:
force_deploy:
@@ -58,7 +60,6 @@ jobs:
# ==================== Pre-flight Check (10s Fail-Fast) ====================
pre-flight-check:
name: "Pre-flight Check"
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
timeout-minutes: 1
steps:
@@ -132,7 +133,6 @@ jobs:
# 2026-03-29 Claude Code: 確保監控覆蓋率 >= 90%
monitoring-coverage:
name: "Monitoring Coverage"
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
needs: pre-flight-check
timeout-minutes: 2
@@ -152,7 +152,6 @@ jobs:
# ==================== 路徑偵測 (使用 dorny/paths-filter) ====================
detect-changes:
name: Detect Changes
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
needs: [pre-flight-check, monitoring-coverage]
timeout-minutes: 1
@@ -198,7 +197,11 @@ jobs:
runs-on: [self-hosted, harbor, k8s]
needs: [detect-changes, build-web]
timeout-minutes: 20
if: ${{ false }}
if: |
!inputs.skip_api && (
needs.detect-changes.outputs.api == 'true' ||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
)
outputs:
image_tag: ${{ steps.tag.outputs.tag }}
steps:
@@ -235,7 +238,11 @@ jobs:
runs-on: [self-hosted, harbor, k8s]
needs: detect-changes
timeout-minutes: 20
if: ${{ false }}
if: |
!inputs.skip_web && (
needs.detect-changes.outputs.web == 'true' ||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
)
outputs:
image_tag: ${{ steps.tag.outputs.tag }}
steps:
@@ -286,7 +293,7 @@ jobs:
concurrency:
group: runner-awoooi-cd-mutex
cancel-in-progress: false
if: ${{ false }}
if: always() && (needs.build-api.result == 'success' || needs.build-api.result == 'skipped') && (needs.build-web.result == 'success' || needs.build-web.result == 'skipped')
environment: production
steps:
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)

View File

@@ -14,10 +14,15 @@
name: Deploy to Production
# 2026-05-12 Codex: GitHub 是唯讀備份production deploy 只能從 Gitea 進入。
# 這份歷史 workflow 仍含 Harbor build/push 與 kubectl apply/rollout會和 Gitea CD 競爭。
# 保留檔案供稽核,但停用所有 job。
on:
push:
branches:
- main
paths:
- 'apps/api/**'
- 'apps/web/**'
- 'k8s/awoooi-prod/**'
- '.github/workflows/deploy-prod.yml'
workflow_dispatch:
inputs:
deploy_api:
@@ -65,7 +70,6 @@ jobs:
# ===========================================================================
build:
name: "Build Images"
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
outputs:
image_tag: ${{ steps.meta.outputs.tag }}
@@ -134,7 +138,6 @@ jobs:
deploy:
name: "Deploy to K3s"
needs: build
if: ${{ false }}
runs-on: [self-hosted, harbor, k8s]
steps:
@@ -207,7 +210,7 @@ jobs:
smoke-test:
name: "Smoke Tests"
needs: deploy
if: ${{ false }}
if: ${{ !inputs.skip_tests }}
runs-on: [self-hosted, harbor, k8s]
steps:
@@ -245,7 +248,7 @@ jobs:
notify:
name: "Send Notification"
needs: [build, deploy, smoke-test]
if: ${{ false }}
if: always()
runs-on: [self-hosted, harbor, k8s]
steps:

1
.gitignore vendored
View File

@@ -93,4 +93,3 @@ tsconfig.tsbuildinfo
!.aiderignore
.claude/settings.local.json
.claude/settings.json
.claude/settings.json.bak*

View File

@@ -31,9 +31,6 @@
## 🔴 絕對禁止 → [HARD_RULES.md](docs/HARD_RULES.md)
## 🔴 文件語言鐵律 → [文件語言規範](docs/HARD_RULES.md#文件語言規範)
Markdown、ADR、LOGBOOK、Runbook、交接文件與計畫文件一律使用繁體中文程式符號、API、指令、錯誤碼、服務名稱與原始 log 可保留英文。
## 🔴 紅區治理 → [RED_ZONES.md](docs/RED_ZONES.md)
Tier 3 核心檔案 (decision_manager, trust_engine, config 等) 修改需首席架構師授權

View File

@@ -1 +1 @@
# 2026-06-18 p2-405e-p2-406a telegram rehearsal deploy trigger after runner cache repair
# 2026-04-05 warm-up deploy triggered

View File

@@ -44,6 +44,28 @@ FROM python:3.11-slim
WORKDIR /app
# Copy installed packages from builder
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# 2026-04-01 ogt: CACHE_BUST 強制失效 src/ 和 models.json 層
# deps 層 (pip install) 仍可 cache代碼/配置變更必須重建
ARG CACHE_BUST=none
COPY apps/api/src/ ./src/
COPY apps/api/models.json ./models.json
# 2026-04-09 ogt: 規則引擎配置 — alert_rule_engine.py 從此檔載入規則
COPY apps/api/alert_rules.yaml ./alert_rules.yaml
# 2026-04-10 Claude Sonnet 4.6: drift_detector 需要 k8s/ YAML 做 Git state 比對
COPY k8s/ ./k8s/
# 2026-04-10 Claude Sonnet 4.6: RAG 知識庫索引來源 (ADR-067 Phase 33)
COPY docs/ ./docs/
COPY .agents/skills/ ./.agents/skills/
# 2026-05-04 Claude Sonnet 4.6 (Task 1.2): hermes agent_loader 的 system prompt 來源
# agent_loader.py 預設讀 /app/.claude/agents/,對應 K8s AGENTS_DIR 環境變數
COPY .claude/agents/ ./.claude/agents/
# 2026-04-12 ogt (ADR-073 P2-1): CronJob 腳本 — 獨立腳本取代 inline Python
COPY scripts/ ./scripts/
# Install openssh-client + curl — SSH_COMMAND Playbook + healthcheck
# Install kubectl — drift_detector 需要 kubectl 讀取 K8s 實際狀態
# (2026-04-09 Claude Sonnet 4.6 Asia/Taipei, Bug #6 修正 — python:3.11-slim 無 openssh-client)
@@ -53,38 +75,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends openssh-client
chmod +x kubectl && mv kubectl /usr/local/bin/kubectl && \
rm -rf /var/lib/apt/lists/*
# Create non-root user before copying app artifacts so COPY --chown can avoid
# an expensive full-tree chown layer on every source-only rebuild.
RUN useradd -m -u 1000 appuser
# Copy installed packages from builder
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# 2026-04-01 ogt: CACHE_BUST 強制失效 src/ 和 models.json 層
# deps 層 (pip install) 仍可 cache代碼/配置變更必須重建
ARG CACHE_BUST=none
COPY --chown=appuser:appuser apps/api/src/ ./src/
# 2026-04-09 ogt: 規則引擎配置 — alert_rule_engine.py 從此檔載入規則
COPY --chown=appuser:appuser apps/api/models.json ./models.json
COPY --chown=appuser:appuser apps/api/alert_rules.yaml ./alert_rules.yaml
# 2026-04-10 Claude Sonnet 4.6: drift_detector 需要 k8s/ YAML 做 Git state 比對
COPY --chown=appuser:appuser k8s/ ./k8s/
# 2026-05-24 Codex: truth-chain / Ansible readiness needs the repo-known
# playbook catalog in the API image.
# 2026-05-31 Codex: ansible-core is now installed through pyproject.toml so
# this catalog can graduate from visibility-only to check-mode runtime-ready
# once repair SSH material is mounted and readable. This still does not enable
# automatic apply; approval/execution code remains the gate.
COPY --chown=appuser:appuser infra/ansible/ ./infra/ansible/
# 2026-04-10 Claude Sonnet 4.6: RAG 知識庫索引來源 (ADR-067 Phase 33)
COPY --chown=appuser:appuser docs/ ./docs/
COPY --chown=appuser:appuser .agents/skills/ ./.agents/skills/
# 2026-05-04 Claude Sonnet 4.6 (Task 1.2): hermes agent_loader 的 system prompt 來源
# agent_loader.py 預設讀 /app/.claude/agents/,對應 K8s AGENTS_DIR 環境變數
COPY --chown=appuser:appuser .claude/agents/ ./.claude/agents/
# 2026-04-12 ogt (ADR-073 P2-1): CronJob 腳本 — 獨立腳本取代 inline Python
COPY --chown=appuser:appuser scripts/ ./scripts/
# Create non-root user
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
USER appuser
# Expose port

View File

@@ -809,9 +809,6 @@ rules:
alertname:
- MoWoooWorkDown
- MoWoooDevDown
- TsenyangWebsiteDown
- StockWoooWorkDown
- BitanWoooWorkDown
- ExternalSiteDown
- WebsiteDown
- BlackboxProbeFailed

View File

@@ -1,49 +0,0 @@
-- ADR-090 capacity_violation_event metric violation types
-- 日期2026-05-07台北
-- 目的:讓 capacity_scanner_job.py 寫入的 cpu/mem/swap 細項違規符合 DB constraint。
--
-- 背景:
-- capacity_scanner_job.py 會寫入:
-- - cpu_over_threshold
-- - mem_over_threshold
-- - swap_over_threshold
-- 但原始 ADR-090 DDL 只允許較粗的 host_saturation導致 production 出現
-- capacity_violation_event_type_valid check violation容量治理事件漏記。
BEGIN;
ALTER TABLE capacity_violation_event
DROP CONSTRAINT IF EXISTS capacity_violation_event_type_valid;
ALTER TABLE capacity_violation_event
ADD CONSTRAINT capacity_violation_event_type_valid
CHECK (violation_type IN (
'no_limit_set',
'over_request',
'over_limit',
'host_saturation',
'over_sla_budget',
'unauthorized_new_deploy',
'cpu_over_threshold',
'mem_over_threshold',
'swap_over_threshold',
'load_over_threshold'
));
COMMIT;
-- Rollback需人工確認後執行
-- BEGIN;
-- ALTER TABLE capacity_violation_event
-- DROP CONSTRAINT IF EXISTS capacity_violation_event_type_valid;
-- ALTER TABLE capacity_violation_event
-- ADD CONSTRAINT capacity_violation_event_type_valid
-- CHECK (violation_type IN (
-- 'no_limit_set',
-- 'over_request',
-- 'over_limit',
-- 'host_saturation',
-- 'over_sla_budget',
-- 'unauthorized_new_deploy'
-- ));
-- COMMIT;

View File

@@ -1,36 +0,0 @@
-- ADR-090-D: automation_operation_log.operation_type adds Ansible executor audit states
-- Created: 2026-05-12 Taipei
--
-- Purpose:
-- T3 Ansible declarative executor visibility. These operation types allow
-- the AI automation truth chain to record that Ansible was matched,
-- check-mode executed, applied, rolled back, or explicitly skipped.
--
-- Safety:
-- This migration only expands the CHECK allowlist. It does not execute
-- Ansible, change approval behavior, or create auto-remediation rows.
ALTER TABLE automation_operation_log
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
ALTER TABLE automation_operation_log
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
'monitor_configured','monitor_removed',
'alert_fired','alert_suppressed','alert_routed',
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
'playbook_generated','playbook_updated','playbook_executed',
'remediation_executed','remediation_verified','remediation_rolled_back',
'self_correction_attempted',
'km_created','km_updated','km_linked',
'asset_discovered','coverage_recalculated',
'capacity_recommendation','quota_enforced',
'notification_formatted',
'ansible_candidate_matched',
'ansible_check_mode_executed',
'ansible_apply_executed',
'ansible_rollback_executed',
'ansible_execution_skipped'
));
COMMENT ON CONSTRAINT automation_operation_log_type_valid ON automation_operation_log IS
'ADR-090-D: allow first-class Ansible executor audit states for AwoooP truth-chain visibility.';

View File

@@ -1,19 +0,0 @@
-- ADR-090-D rollback: remove Ansible executor audit states from operation_type allowlist.
-- Only apply after confirming no automation_operation_log rows use ansible_* operation types.
ALTER TABLE automation_operation_log
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
ALTER TABLE automation_operation_log
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
'monitor_configured','monitor_removed',
'alert_fired','alert_suppressed','alert_routed',
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
'playbook_generated','playbook_updated','playbook_executed',
'remediation_executed','remediation_verified','remediation_rolled_back',
'self_correction_attempted',
'km_created','km_updated','km_linked',
'asset_discovered','coverage_recalculated',
'capacity_recommendation','quota_enforced',
'notification_formatted'
));

View File

@@ -1,164 +0,0 @@
-- T9: approved SSH execution MCP Gateway seed
-- 目的:讓 Telegram/Approval 已批准的 SSH 修復動作通過 AwoooP Gateway 五閘門。
-- 邊界:只授權 approval_executorwrite/admin 仍需 Gate 5 短效 approval key。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_body AS (
SELECT jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', 'approval_executor',
'display_name', 'Approval Executor',
'project_id', 'awoooi',
'purpose', 'Approved SSH execution through AwoooP MCP Gateway',
'allowed_scopes', jsonb_build_array('read', 'write', 'admin'),
'requires_gate5_for_scopes', jsonb_build_array('write', 'admin'),
'stage', 't9_ssh_approval_gateway'
) AS body_json
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
'approval_executor',
1,
0,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.0',
'migration:t9_ssh_approval_gateway',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'approval_executor'
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
)
SELECT 'approval_executor_active_contracts', count(*) FROM upsert_pointer;
WITH gateway_tools(tool_name, description, required_scope) AS (
VALUES
('ssh_diagnose', 'SSH host diagnosis read', 'read'),
('ssh_docker_restart', 'Approved Docker container restart over SSH', 'write'),
('ssh_docker_compose_restart', 'Approved Docker Compose service restart over SSH', 'write'),
('ssh_systemctl_restart', 'Approved systemd service restart over SSH', 'write'),
('ssh_clear_docker_logs', 'Approved Docker log truncation over SSH', 'write'),
('ssh_renew_ssl', 'Approved certbot renewal over SSH', 'write'),
('ssh_reload_nginx', 'Approved nginx config test and reload over SSH', 'write'),
('ssh_docker_prune', 'Approved Docker prune over SSH with provider disk guard', 'admin')
),
upsert_tools AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
SELECT
'awoooi',
tool_name,
'mcp_server',
description,
jsonb_build_array(required_scope),
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
FROM gateway_tools
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id, tool_name, allowed_scopes
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
'approval_executor',
tool_id,
'migration:t9_ssh_approval_gateway',
allowed_scopes,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tools
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_by = EXCLUDED.granted_by,
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'approval_executor_ssh_gateway',
(SELECT count(*) FROM upsert_tools) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;

View File

@@ -1,43 +0,0 @@
-- Rollback for T9 approved SSH execution MCP Gateway seed.
-- Contract revisions are append-only; rollback revokes approval_executor grants
-- and deactivates only the write/admin tools introduced here.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET
is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t9_ssh_approval_gateway'
WHERE project_id = 'awoooi'
AND agent_id = 'approval_executor'
AND granted_by = 'migration:t9_ssh_approval_gateway'
AND is_revoked = FALSE;
UPDATE awooop_mcp_tool_registry
SET
is_active = FALSE,
updated_at = NOW()
WHERE project_id = 'awoooi'
AND tool_name IN (
'ssh_docker_restart',
'ssh_docker_compose_restart',
'ssh_systemctl_restart',
'ssh_clear_docker_logs',
'ssh_renew_ssl',
'ssh_reload_nginx',
'ssh_docker_prune'
);
DELETE FROM awooop_active_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'approval_executor';
UPDATE awooop_contract_revisions
SET lifecycle_status = 'revoked'
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'approval_executor'
AND publisher_id = 'migration:t9_ssh_approval_gateway'
AND lifecycle_status = 'active';

View File

@@ -1,159 +0,0 @@
-- T24: auto-repair executor Docker restart MCP Gateway grant
-- 目的:讓已由 PlayBook 標記為 requires_approval=false 的安全容器重啟,
-- 透過 AwoooP MCP Gateway + Gate 5 policy projection 執行與稽核。
-- 邊界:僅授權 ssh_docker_restart/write複雜 shell、systemctl、prune 仍不得自動執行。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_body AS (
SELECT jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', 'auto_repair_executor',
'display_name', 'Auto Repair Executor',
'project_id', 'awoooi',
'purpose', 'Auto repair diagnostics and safe Docker container restart through AwoooP MCP Gateway',
'allowed_scopes', jsonb_build_array('read', 'write'),
'requires_gate5_for_scopes', jsonb_build_array('write'),
'write_scope_constraints', jsonb_build_object(
'allowed_tools', jsonb_build_array('ssh_docker_restart'),
'required_playbook_requires_approval', false,
'required_trust_score_min', 0.8,
'forbidden_shell_patterns', jsonb_build_array('command_substitution', 'pipe', 'fallback_shell', 'systemd', 'prune')
),
'stage', 't24_auto_repair_docker_restart_gateway'
) AS body_json
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
'auto_repair_executor',
1,
1,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.1',
'migration:t24_auto_repair_docker_restart_gateway',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor'
AND version_major = 1
AND version_minor = 1
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
),
upsert_tool AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
VALUES (
'awoooi',
'ssh_docker_restart',
'mcp_server',
'Policy-approved Docker container restart over SSH for auto-repair',
'["write"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
)
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id, allowed_scopes
),
upsert_grant AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
'auto_repair_executor',
tool_id,
'migration:t24_auto_repair_docker_restart_gateway',
allowed_scopes,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tool
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_by = EXCLUDED.granted_by,
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'auto_repair_executor_docker_restart_gateway',
(SELECT count(*) FROM upsert_pointer) AS active_contract_rows,
(SELECT count(*) FROM upsert_tool) AS tool_rows,
(SELECT count(*) FROM upsert_grant) AS grant_rows;

View File

@@ -1,37 +0,0 @@
-- Rollback T24: revoke auto_repair_executor Docker restart write grant.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t24_auto_repair_docker_restart_gateway'
WHERE project_id = 'awoooi'
AND agent_id = 'auto_repair_executor'
AND granted_by = 'migration:t24_auto_repair_docker_restart_gateway';
WITH previous_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor'
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
ORDER BY revision_id DESC
LIMIT 1
)
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT project_id, contract_family, contract_id, revision_id, NOW()
FROM previous_revision
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW();

View File

@@ -1,166 +0,0 @@
-- T23: auto-repair executor read-only MCP Gateway seed
-- 目的:讓 YAML_RULE/PlayBook 的只讀 SSH 診斷步驟經過 AwoooP MCP Gateway。
-- 邊界:只授權 read scopewrite/admin SSH 工具仍必須走 approval_executor + Gate 5。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_body AS (
SELECT jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', 'auto_repair_executor',
'display_name', 'Auto Repair Executor',
'project_id', 'awoooi',
'purpose', 'Read-only auto-repair diagnostics through AwoooP MCP Gateway',
'allowed_scopes', jsonb_build_array('read'),
'forbidden_scopes', jsonb_build_array('write', 'admin'),
'stage', 't23_auto_repair_diagnostic_gateway'
) AS body_json
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
'auto_repair_executor',
1,
0,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.0',
'migration:t23_auto_repair_executor_read_gateway',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor'
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
)
SELECT 'auto_repair_executor_active_contracts', count(*) FROM upsert_pointer;
WITH read_tools(tool_name, description) AS (
VALUES
('ssh_diagnose', 'SSH host/container diagnosis read'),
('ssh_get_top_processes', 'SSH top processes read'),
('ssh_get_disk_usage', 'SSH disk usage read'),
('ssh_get_memory_info', 'SSH memory info read'),
('ssh_get_container_logs', 'SSH container logs read'),
('ssh_get_container_status', 'SSH container status read'),
('ssh_get_service_status', 'SSH service status read'),
('ssh_check_port', 'SSH port check read'),
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
('ssh_get_swap_info', 'SSH swap info read')
),
upsert_tools AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
SELECT
'awoooi',
tool_name,
'mcp_server',
description,
'["read"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
FROM read_tools
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id, tool_name, allowed_scopes
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
'auto_repair_executor',
tool_id,
'migration:t23_auto_repair_executor_read_gateway',
allowed_scopes,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tools
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_by = EXCLUDED.granted_by,
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'auto_repair_executor_read_gateway',
(SELECT count(*) FROM upsert_tools) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;

View File

@@ -1,24 +0,0 @@
-- Rollback T23 auto-repair executor read-only MCP Gateway grant.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t23_auto_repair_executor_read_gateway'
WHERE project_id = 'awoooi'
AND agent_id = 'auto_repair_executor'
AND granted_by = 'migration:t23_auto_repair_executor_read_gateway';
DELETE FROM awooop_active_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor';
UPDATE awooop_contract_revisions
SET lifecycle_status = 'retired'
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor'
AND publisher_id = 'migration:t23_auto_repair_executor_read_gateway'
AND lifecycle_status = 'active';

View File

@@ -1,25 +0,0 @@
-- =============================================================================
-- AwoooP / AWOOOI MCP Gateway Shadow Onboarding
-- 2026-05-13 Codex + ogt
--
-- 背景:
-- AWOOOI 已完成 read-only MCP tool registry / grants seed但 project 本身仍停在
-- legacy_awoooi_default會被 MCP Gateway Gate 1 正確攔截。
--
-- 邊界:
-- 只把 AWOOOI 租戶升到 shadow讓既有 Gate 1 生效。
-- write/admin tool 仍未授權;自動修復/破壞性動作不因本 migration 開放。
-- =============================================================================
BEGIN;
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_projects
SET
migration_mode = 'shadow',
updated_at = NOW()
WHERE project_id = 'awoooi'
AND migration_mode = 'legacy_awoooi_default';
COMMIT;

View File

@@ -1,20 +0,0 @@
-- =============================================================================
-- Rollback: AwoooP / AWOOOI MCP Gateway Shadow Onboarding
-- 2026-05-13 Codex + ogt
--
-- 只回退仍停在 shadow 的 AWOOOI若已由人工/後續 migration 推進到 canary/active
-- 不自動降級。
-- =============================================================================
BEGIN;
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_projects
SET
migration_mode = 'legacy_awoooi_default',
updated_at = NOW()
WHERE project_id = 'awoooi'
AND migration_mode = 'shadow';
COMMIT;

View File

@@ -1,211 +0,0 @@
-- T7: awoooi read-only MCP Gateway seed
-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3產生 first-class audit。
-- 邊界:只授權 read scope不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_seed(agent_id, display_name) AS (
VALUES
('pre_decision_investigator', 'Pre-decision Investigator'),
('post_execution_verifier', 'Post-execution Verifier')
),
agent_body AS (
SELECT
agent_id,
jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', agent_id,
'display_name', display_name,
'project_id', 'awoooi',
'purpose', 'Read-only MCP sensing through AwoooP Gateway',
'allowed_scopes', jsonb_build_array('read'),
'forbidden_scopes', jsonb_build_array('write', 'admin'),
'stage', 't7_mcp_gateway_read_sense'
) AS body_json
FROM agent_seed
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
agent_id,
1,
0,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.0',
'migration:t7_mcp_gateway_read_seed',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN (SELECT agent_id FROM agent_seed)
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
)
SELECT 'active_agent_contracts', count(*) FROM upsert_pointer;
WITH read_tools(tool_name, description) AS (
VALUES
('k8s_get_pod_logs', 'Kubernetes pod logs read'),
('k8s_get_events', 'Kubernetes events read'),
('k8s_describe_pod', 'Kubernetes pod describe read'),
('k8s_get_hpa_status', 'Kubernetes HPA status read'),
('k8s_get_node_conditions', 'Kubernetes node conditions read'),
('ssh_diagnose', 'SSH host diagnosis read'),
('ssh_get_top_processes', 'SSH top processes read'),
('ssh_get_disk_usage', 'SSH disk usage read'),
('ssh_get_memory_info', 'SSH memory info read'),
('ssh_get_container_logs', 'SSH container logs read'),
('ssh_get_container_status', 'SSH container status read'),
('ssh_get_service_status', 'SSH service status read'),
('ssh_check_port', 'SSH port check read'),
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
('ssh_get_swap_info', 'SSH swap info read'),
('prometheus_query', 'Prometheus instant query read'),
('prometheus_query_range', 'Prometheus range query read'),
('prometheus_get_alert_history', 'Prometheus alert history read'),
('gold_metrics', 'SigNoz gold metrics read'),
('trace_url', 'SigNoz trace URL read'),
('system_metrics', 'SigNoz system metrics read'),
('query_logs', 'SigNoz logs read'),
('error_logs_summary', 'SigNoz error logs summary read'),
('list_approvals', 'Approval records read'),
('get_approval', 'Approval detail read'),
('list_incidents', 'Incident records read'),
('list_timeline', 'Timeline records read'),
('read_file', 'Filesystem allowlisted file read'),
('list_directory', 'Filesystem allowlisted directory read'),
('search_in_file', 'Filesystem allowlisted file search'),
('list_dashboards', 'Grafana dashboards read'),
('get_dashboard', 'Grafana dashboard read'),
('get_panel_data', 'Grafana panel data read'),
('generate_dashboard_url', 'Grafana dashboard URL read'),
('search_runbook', 'Runbook semantic search read'),
('get_index_stats', 'Runbook index stats read'),
('argocd_list_apps', 'ArgoCD apps read'),
('argocd_get_app_status', 'ArgoCD app status read'),
('argocd_get_sync_history', 'ArgoCD sync history read'),
('sentry_list_issues', 'Sentry issues read'),
('sentry_get_issue', 'Sentry issue detail read'),
('sentry_search_issues', 'Sentry issue search read')
),
upsert_tools AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
SELECT
'awoooi',
tool_name,
'mcp_server',
description,
'["read"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
FROM read_tools
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id
),
grant_agents(agent_id) AS (
VALUES
('pre_decision_investigator'),
('post_execution_verifier')
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
grant_agents.agent_id,
upsert_tools.tool_id,
'migration:t7_mcp_gateway_read_seed',
'["read"]'::jsonb,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tools
CROSS JOIN grant_agents
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'awoooi_read_tools',
(SELECT count(*) FROM upsert_tools) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;

View File

@@ -1,77 +0,0 @@
-- Rollback for T7 awoooi read-only MCP Gateway seed.
-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET
is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t7_mcp_gateway_read_seed'
WHERE project_id = 'awoooi'
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND granted_by = 'migration:t7_mcp_gateway_read_seed'
AND is_revoked = FALSE;
UPDATE awooop_mcp_tool_registry
SET
is_active = FALSE,
updated_at = NOW()
WHERE project_id = 'awoooi'
AND tool_name IN (
'k8s_get_pod_logs',
'k8s_get_events',
'k8s_describe_pod',
'k8s_get_hpa_status',
'k8s_get_node_conditions',
'ssh_diagnose',
'ssh_get_top_processes',
'ssh_get_disk_usage',
'ssh_get_memory_info',
'ssh_get_container_logs',
'ssh_get_container_status',
'ssh_get_service_status',
'ssh_check_port',
'ssh_get_nginx_error_log',
'ssh_get_swap_info',
'prometheus_query',
'prometheus_query_range',
'prometheus_get_alert_history',
'gold_metrics',
'trace_url',
'system_metrics',
'query_logs',
'error_logs_summary',
'list_approvals',
'get_approval',
'list_incidents',
'list_timeline',
'read_file',
'list_directory',
'search_in_file',
'list_dashboards',
'get_dashboard',
'get_panel_data',
'generate_dashboard_url',
'search_runbook',
'get_index_stats',
'argocd_list_apps',
'argocd_get_app_status',
'argocd_get_sync_history',
'sentry_list_issues',
'sentry_get_issue',
'sentry_search_issues'
);
DELETE FROM awooop_active_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier');
UPDATE awooop_contract_revisions
SET lifecycle_status = 'revoked'
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND publisher_id = 'migration:t7_mcp_gateway_read_seed'
AND lifecycle_status = 'active';

View File

@@ -1,213 +0,0 @@
-- T7: awoooi read-only MCP Gateway seed
-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3產生 first-class audit。
-- 邊界:只授權 read scope不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_seed(agent_id, display_name) AS (
VALUES
('pre_decision_investigator', 'Pre-decision Investigator'),
('post_execution_verifier', 'Post-execution Verifier')
),
agent_body AS (
SELECT
agent_id,
jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', agent_id,
'display_name', display_name,
'project_id', 'awoooi',
'purpose', 'Read-only MCP sensing through AwoooP Gateway',
'allowed_scopes', jsonb_build_array('read'),
'forbidden_scopes', jsonb_build_array('write', 'admin'),
'stage', 't7_mcp_gateway_read_sense'
) AS body_json
FROM agent_seed
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
agent_id,
1,
0,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.0',
'migration:t7_mcp_gateway_read_seed',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN (SELECT agent_id FROM agent_seed)
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
)
SELECT 'active_agent_contracts', count(*) FROM upsert_pointer;
WITH read_tools(tool_name, description) AS (
VALUES
('k8s_get_pod_logs', 'Kubernetes pod logs read'),
('k8s_get_events', 'Kubernetes events read'),
('k8s_describe_pod', 'Kubernetes pod describe read'),
('k8s_get_hpa_status', 'Kubernetes HPA status read'),
('k8s_get_node_conditions', 'Kubernetes node conditions read'),
('ssh_diagnose', 'SSH host diagnosis read'),
('ssh_get_top_processes', 'SSH top processes read'),
('ssh_get_disk_usage', 'SSH disk usage read'),
('ssh_get_memory_info', 'SSH memory info read'),
('ssh_get_container_logs', 'SSH container logs read'),
('ssh_get_container_status', 'SSH container status read'),
('ssh_get_service_status', 'SSH service status read'),
('ssh_check_port', 'SSH port check read'),
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
('ssh_get_swap_info', 'SSH swap info read'),
('prometheus_query', 'Prometheus instant query read'),
('prometheus_query_range', 'Prometheus range query read'),
('prometheus_get_alert_history', 'Prometheus alert history read'),
('gold_metrics', 'SigNoz gold metrics read'),
('trace_url', 'SigNoz trace URL read'),
('system_metrics', 'SigNoz system metrics read'),
('query_logs', 'SigNoz logs read'),
('error_logs_summary', 'SigNoz error logs summary read'),
('list_approvals', 'Approval records read'),
('get_approval', 'Approval detail read'),
('list_incidents', 'Incident records read'),
('list_timeline', 'Timeline records read'),
('read_file', 'Filesystem allowlisted file read'),
('list_directory', 'Filesystem allowlisted directory read'),
('search_in_file', 'Filesystem allowlisted file search'),
('list_dashboards', 'Grafana dashboards read'),
('get_dashboard', 'Grafana dashboard read'),
('get_panel_data', 'Grafana panel data read'),
('generate_dashboard_url', 'Grafana dashboard URL read'),
('search_runbook', 'Runbook semantic search read'),
('get_index_stats', 'Runbook index stats read'),
('argocd_list_apps', 'ArgoCD apps read'),
('argocd_get_app_status', 'ArgoCD app status read'),
('argocd_get_sync_history', 'ArgoCD sync history read'),
('sentry_list_issues', 'Sentry issues read'),
('sentry_get_issue', 'Sentry issue detail read'),
('sentry_search_issues', 'Sentry issue search read')
),
upsert_tools AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
SELECT
'awoooi',
tool_name,
'mcp_server',
description,
'["read"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
FROM read_tools
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id
),
grant_agents(agent_id) AS (
VALUES
('pre_decision_investigator'),
('post_execution_verifier')
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
grant_agents.agent_id,
upsert_tools.tool_id,
'migration:t7_mcp_gateway_read_seed',
'["read"]'::jsonb,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tools
CROSS JOIN grant_agents
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'awoooi_read_tools',
(SELECT count(*) FROM upsert_tools) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;
-- v4 exists only to retrigger run-migration after Gitea skipped the v2->v3 rename-only push.

View File

@@ -1,79 +0,0 @@
-- Rollback for T7 awoooi read-only MCP Gateway seed.
-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET
is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t7_mcp_gateway_read_seed'
WHERE project_id = 'awoooi'
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND granted_by = 'migration:t7_mcp_gateway_read_seed'
AND is_revoked = FALSE;
UPDATE awooop_mcp_tool_registry
SET
is_active = FALSE,
updated_at = NOW()
WHERE project_id = 'awoooi'
AND tool_name IN (
'k8s_get_pod_logs',
'k8s_get_events',
'k8s_describe_pod',
'k8s_get_hpa_status',
'k8s_get_node_conditions',
'ssh_diagnose',
'ssh_get_top_processes',
'ssh_get_disk_usage',
'ssh_get_memory_info',
'ssh_get_container_logs',
'ssh_get_container_status',
'ssh_get_service_status',
'ssh_check_port',
'ssh_get_nginx_error_log',
'ssh_get_swap_info',
'prometheus_query',
'prometheus_query_range',
'prometheus_get_alert_history',
'gold_metrics',
'trace_url',
'system_metrics',
'query_logs',
'error_logs_summary',
'list_approvals',
'get_approval',
'list_incidents',
'list_timeline',
'read_file',
'list_directory',
'search_in_file',
'list_dashboards',
'get_dashboard',
'get_panel_data',
'generate_dashboard_url',
'search_runbook',
'get_index_stats',
'argocd_list_apps',
'argocd_get_app_status',
'argocd_get_sync_history',
'sentry_list_issues',
'sentry_get_issue',
'sentry_search_issues'
);
DELETE FROM awooop_active_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier');
UPDATE awooop_contract_revisions
SET lifecycle_status = 'revoked'
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND publisher_id = 'migration:t7_mcp_gateway_read_seed'
AND lifecycle_status = 'active';
-- v4 rollback companion for the retrigger migration.

View File

@@ -1,77 +0,0 @@
-- T16 verifier gap: allow rollout status evidence through AwoooP MCP Gateway.
-- Boundary: read-only scope only; no restart/delete/scale grant is added here.
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH upsert_tool AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
VALUES (
'awoooi',
'k8s_watch_rollout',
'mcp_server',
'Kubernetes deployment rollout status read',
'["read"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
)
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id
),
grant_agents(agent_id) AS (
VALUES
('pre_decision_investigator'),
('post_execution_verifier')
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
grant_agents.agent_id,
upsert_tool.tool_id,
'migration:t16_rollout_verifier_seed',
'["read"]'::jsonb,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tool
CROSS JOIN grant_agents
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'k8s_watch_rollout_read_grants' AS seed,
(SELECT count(*) FROM upsert_tool) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;

View File

@@ -1,24 +0,0 @@
-- Roll back T16 rollout verifier read grant seed.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET
is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'migration:t16_rollout_verifier_seed_down'
WHERE project_id = 'awoooi'
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
AND tool_id IN (
SELECT tool_id
FROM awooop_mcp_tool_registry
WHERE project_id = 'awoooi'
AND tool_name = 'k8s_watch_rollout'
);
UPDATE awooop_mcp_tool_registry
SET
is_active = FALSE,
updated_at = NOW()
WHERE project_id = 'awoooi'
AND tool_name = 'k8s_watch_rollout';

View File

@@ -1,14 +0,0 @@
-- AwoooP Phase 5bMCP Gateway blocked call 稽核覆蓋
-- 日期2026-05-06
-- 維護者Codex
--
-- Gate 1 / Gate 2 / 未知工具的 blocked call 可能發生在 tool registry row
-- 取得之前。這些安全決策仍必須落稽核紀錄,因此 tool_id 允許為 NULL
-- 但 tool_name 仍維持必填,作為未知工具與早期 gate block 的追蹤線索。
BEGIN;
ALTER TABLE awooop_mcp_gateway_audit
ALTER COLUMN tool_id DROP NOT NULL;
COMMIT;

View File

@@ -1,21 +0,0 @@
-- AwoooP Phase 7 T15b: inbound event truth-chain columns
--
-- Purpose:
-- Telegram cards are only the notification surface. Operators need a
-- redacted replay envelope for inbound alerts so Alertmanager, Sentry, and
-- SignOz events can be correlated with incidents, approvals, logs, and
-- automation decisions without storing raw secrets or PII.
ALTER TABLE awooop_conversation_event
ADD COLUMN IF NOT EXISTS content_redacted TEXT,
ADD COLUMN IF NOT EXISTS redaction_version VARCHAR(32) NOT NULL DEFAULT 'audit_sink_v1',
ADD COLUMN IF NOT EXISTS source_envelope JSONB NOT NULL DEFAULT '{}'::jsonb;
COMMENT ON COLUMN awooop_conversation_event.content_redacted IS
'Full inbound event content after audit_sink redaction; raw unredacted payload text is not stored.';
COMMENT ON COLUMN awooop_conversation_event.redaction_version IS
'Redaction algorithm/version used for content_redacted and source_envelope.';
COMMENT ON COLUMN awooop_conversation_event.source_envelope IS
'Redacted source metadata for inbound replay/audit, including payload hash, provider, source refs, and log correlation hints.';

View File

@@ -1,6 +0,0 @@
-- Rollback for AwoooP Phase 7 T15b inbound truth-chain columns.
-- Safe only if no consumers depend on the redacted replay fields.
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS source_envelope;
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS redaction_version;
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS content_redacted;

View File

@@ -1,21 +0,0 @@
-- AwoooP Phase 7 T1: outbound message truth-chain columns
--
-- Purpose:
-- Telegram must remain a summary channel, but the operator console needs a
-- complete redacted replay of the rendered card and the source envelope that
-- produced it. Store redacted content only; raw unredacted Telegram text stays
-- out of PostgreSQL.
ALTER TABLE awooop_outbound_message
ADD COLUMN IF NOT EXISTS content_redacted TEXT,
ADD COLUMN IF NOT EXISTS redaction_version VARCHAR(32) NOT NULL DEFAULT 'audit_sink_v1',
ADD COLUMN IF NOT EXISTS source_envelope JSONB NOT NULL DEFAULT '{}'::jsonb;
COMMENT ON COLUMN awooop_outbound_message.content_redacted IS
'Full rendered outbound content after audit_sink redaction; raw unredacted text is not stored.';
COMMENT ON COLUMN awooop_outbound_message.redaction_version IS
'Redaction algorithm/version used for content_redacted and source_envelope.';
COMMENT ON COLUMN awooop_outbound_message.source_envelope IS
'Redacted source metadata for replay/audit, including payload hash and adapter context.';

View File

@@ -1,6 +0,0 @@
-- Rollback for AwoooP Phase 7 T1 outbound truth-chain columns.
-- Safe only if no consumers depend on the redacted replay fields.
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS source_envelope;
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS redaction_version;
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS content_redacted;

View File

@@ -10,8 +10,7 @@
-- 2. rag_chunks.embedding vector(768) → vector(1024)
-- 3. playbook_embeddings.embedding vector(768) → vector(1024)
--
-- 遷移策略:僅在欄位不是 vector(1024) 時清空現有向量資料,切換維度後由 re-embed script 重新嵌入
-- 已經是 vector(1024) 的環境重跑本 migration 時,必須保留既有向量資料。
-- 遷移策略:清空現有向量資料,切換維度後由 re-embed script 重新嵌入
-- 現有向量資料若要保留,需先 dump 用 nomic 格式備份(舊維度無法轉換)
--
-- 執行前置條件:
@@ -24,38 +23,17 @@
BEGIN;
-- 1. knowledge_entries備份舊向量並清空變更欄位維度
DO $$
DECLARE
v_dim integer;
BEGIN
SELECT a.atttypmod INTO v_dim
FROM pg_attribute a
JOIN pg_class c ON a.attrelid = c.oid
WHERE c.relname = 'knowledge_entries'
AND a.attname = 'embedding';
CREATE TABLE IF NOT EXISTS knowledge_entries_embedding_backup_20260505 AS
SELECT
id,
embedding::text AS embedding_768,
NOW() AS backed_up_at
FROM knowledge_entries
WHERE embedding IS NOT NULL;
IF v_dim IS DISTINCT FROM 1024 THEN
EXECUTE $sql$
CREATE TABLE IF NOT EXISTS knowledge_entries_embedding_backup_20260505 AS
SELECT
id,
embedding::text AS embedding_768,
NOW() AS backed_up_at
FROM knowledge_entries
WHERE embedding IS NOT NULL
$sql$;
EXECUTE $sql$
ALTER TABLE knowledge_entries
ALTER COLUMN embedding TYPE vector(1024)
USING NULL
$sql$;
RAISE NOTICE 'knowledge_entries.embedding migrated from vector(%) to vector(1024); old embeddings were backed up and cleared', v_dim;
ELSE
RAISE NOTICE 'knowledge_entries.embedding already vector(1024); existing embeddings preserved';
END IF;
END $$;
ALTER TABLE knowledge_entries
ALTER COLUMN embedding TYPE vector(1024)
USING NULL; -- 清空現有 768 維向量(維度不可轉換)
COMMENT ON COLUMN knowledge_entries.embedding IS
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-05 ADR-110 follow-up)';
@@ -63,29 +41,11 @@ COMMENT ON COLUMN knowledge_entries.embedding IS
-- 2. rag_chunks清空向量資料變更欄位維度
-- ivfflat index 必須先 DROP 才能 ALTER COLUMN
DO $$
DECLARE
v_dim integer;
BEGIN
SELECT a.atttypmod INTO v_dim
FROM pg_attribute a
JOIN pg_class c ON a.attrelid = c.oid
WHERE c.relname = 'rag_chunks'
AND a.attname = 'embedding';
DROP INDEX IF EXISTS idx_rag_chunks_embedding;
IF v_dim IS DISTINCT FROM 1024 THEN
EXECUTE 'DROP INDEX IF EXISTS idx_rag_chunks_embedding';
EXECUTE $sql$
ALTER TABLE rag_chunks
ALTER COLUMN embedding TYPE vector(1024)
USING NULL
$sql$;
RAISE NOTICE 'rag_chunks.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim;
ELSE
RAISE NOTICE 'rag_chunks.embedding already vector(1024); existing embeddings preserved';
END IF;
END $$;
ALTER TABLE rag_chunks
ALTER COLUMN embedding TYPE vector(1024)
USING NULL; -- 清空現有 768 維向量(維度不可轉換)
-- 重建 ivfflat indexlists=100 適合 ~10k 筆以下資料)
CREATE INDEX IF NOT EXISTS idx_rag_chunks_embedding
@@ -98,29 +58,11 @@ COMMENT ON COLUMN rag_chunks.embedding IS
-- 3. playbook_embeddings清空向量資料變更欄位維度
DO $$
DECLARE
v_dim integer;
BEGIN
SELECT a.atttypmod INTO v_dim
FROM pg_attribute a
JOIN pg_class c ON a.attrelid = c.oid
WHERE c.relname = 'playbook_embeddings'
AND a.attname = 'embedding';
DROP INDEX IF EXISTS ix_playbook_embeddings_vec;
IF v_dim IS DISTINCT FROM 1024 THEN
EXECUTE 'DROP INDEX IF EXISTS ix_playbook_embeddings_vec';
EXECUTE $sql$
ALTER TABLE playbook_embeddings
ALTER COLUMN embedding TYPE vector(1024)
USING NULL
$sql$;
RAISE NOTICE 'playbook_embeddings.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim;
ELSE
RAISE NOTICE 'playbook_embeddings.embedding already vector(1024); existing embeddings preserved';
END IF;
END $$;
ALTER TABLE playbook_embeddings
ALTER COLUMN embedding TYPE vector(1024)
USING NULL; -- 清空現有 768 維向量
CREATE INDEX IF NOT EXISTS ix_playbook_embeddings_vec
ON playbook_embeddings

View File

@@ -25,7 +25,7 @@
"log_anomaly": "deepseek-r1:14b",
"nemoclaw": "deepseek-r1:14b",
"playbook_draft": "qwen3:14b",
"code_review": "qwen2.5-coder:7b",
"code_review": "qwen2.5-coder:32b",
"embedding": "bge-m3:latest",
"rag_generate": "qwen3:14b",
"image_analysis": "minicpm-v:latest",
@@ -175,7 +175,7 @@
},
"pr_code_review": {
"phase": 32,
"model": "qwen2.5-coder:7b",
"model": "qwen2.5-coder:32b",
"timeout_seconds": 120,
"purpose": "Gitea PR 自動審查"
},

View File

@@ -46,10 +46,6 @@ dependencies = [
# 2026-04-16 ogt + Claude Sonnet 4.6: SSH MCP sensor 修復 — asyncssh 缺失導致 sensors_succeeded=0
# 根因: ssh_provider.py 中 import asyncssh 在 try/except 外,所有 15 個 SSH tool 直接 ImportError
"asyncssh>=2.14.0",
# 2026-05-31 Codex: AwoooP truth-chain Ansible runtime gate 需要
# production API image 內真的存在 ansible-playbook否則只能顯示
# candidate audit無法進入 check-mode executor readiness。
"ansible-core>=2.16.0,<2.18.0",
]
# [tool.uv.sources]

View File

@@ -58,8 +58,3 @@ pytest>=7.4.0
pytest-asyncio>=0.23.0
ruff>=0.1.0
sentry-sdk[fastapi]>=2.0.0
# AwoooP Ansible runtime readiness
# 2026-05-31 Codex: production API image must include ansible-playbook before
# truth-chain can honestly mark check-mode executor readiness as available.
ansible-core>=2.16.0,<2.18.0

View File

@@ -9,7 +9,7 @@ AwoooP Phase 1 Batch 1 回填腳本
awooop_phase1_batch1_rls_2026-05-04.sql Step AADD COLUMN nullable已執行
執行方式:
從 secret manager / operator vault 設定 DATABASE_URL禁止在指令或檔案中寫入 URL。
export DATABASE_URL="postgresql+asyncpg://awoooi:<password>@192.168.0.188:5432/awoooi_prod"
cd apps/api && python scripts/awooop_phase1_batch1_backfill.py
2026-05-04 ogt + Claude Sonnet 4.6ADR-118 Batch 1 C-3 修正)

View File

@@ -37,7 +37,6 @@ logging = structlog.get_logger(__name__)
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://34.143.170.20:11434")
EMBEDDING_MODEL = "bge-m3:latest"
EXPECTED_DIM = 1024
PROJECT_ID = os.getenv("AWOOOP_PROJECT_ID", "awoooi")
async def embed_text(client: httpx.AsyncClient, text: str) -> list[float]:
@@ -163,7 +162,6 @@ async def main(dry_run: bool, batch_size: int) -> None:
conn = await asyncpg.connect(database_url)
try:
await conn.execute("SELECT set_config('app.project_id', $1, FALSE)", PROJECT_ID)
# 統計待嵌入筆數
rag_null = await conn.fetchval("SELECT COUNT(*) FROM rag_chunks WHERE embedding IS NULL")
pb_null = await conn.fetchval("SELECT COUNT(*) FROM playbook_embeddings WHERE embedding IS NULL")

View File

@@ -15,7 +15,7 @@ from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine
# 2026-04-22 ogt: 移除硬碼 changeme改為讀取環境變數強制要求設定
# 執行前: 從 secret manager / operator vault 設定 DATABASE_URL禁止在指令或檔案中寫入 URL。
# 執行前: export DATABASE_URL="postgresql+asyncpg://awoooi:<password>@192.168.0.188:5432/awoooi_prod"
DATABASE_URL = os.environ["DATABASE_URL"]
MIGRATION_SQLS = [

View File

@@ -28,7 +28,7 @@ except ImportError:
# ============================================================================
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://192.168.0.110:11435")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://192.168.0.188:11434")
if not NVIDIA_API_KEY:
print("❌ 請設定 NVIDIA_API_KEY 環境變數")

View File

@@ -227,13 +227,12 @@ Phase 4 動態異常偵測AI 主動巡檢結果,可作為高信心佐證)
latency_ms: int,
reason: str = "unknown",
) -> DiagnosisReport:
"""熔斷降級:只保留已知告警事實,不把 Docker/host memory 誤寫成 K8s OOM。"""
"""熔斷降級:rule-based mock用 alert_category 作簡單假設)"""
category = _guess_category_from_snapshot(snapshot)
description = _build_degraded_description(snapshot, reason, category)
return DiagnosisReport(
hypotheses=[
Hypothesis(
description=description,
description=f"[降級] 無法完成 LLM 分析(原因: {reason})。基於告警類別推測: {category}",
confidence=0.2,
evidence_chain=[],
category=category,
@@ -301,48 +300,11 @@ def _extract_hypotheses(parsed: dict[str, Any]) -> list[Hypothesis]:
return hypotheses
def _build_degraded_description(
snapshot: "EvidenceSnapshot",
reason: str,
category: str,
) -> str:
"""組裝降級診斷文案,明確標示這不是 LLM 根因判定。"""
alert_name, labels = _alert_identity(snapshot)
parts = [f"[降級] 無法完成 LLM 分析(原因: {reason}"]
if alert_name:
parts.append(f"保留原始告警: {alert_name}")
target = _first_label(labels, "container_name", "name", "pod", "resource", "service")
host = _first_label(labels, "host", "exported_host", "instance")
if target:
parts.append(f"target={target}")
if host:
parts.append(f"host={host}")
parts.append(f"降級分類: {category}")
return "".join(parts)
def _guess_category_from_snapshot(snapshot: "EvidenceSnapshot") -> str:
"""降級時從 snapshot 推導保守分類,優先保留原始 alertname"""
alert_name, labels = _alert_identity(snapshot)
if alert_name:
return alert_name
"""降級時從 snapshot 猜測告警類別(最粗粒度兜底)"""
summary = (snapshot.evidence_summary or "").lower()
layer = str(labels.get("layer") or "").lower()
job = str(labels.get("job") or "").lower()
has_container = bool(_first_label(labels, "container_name", "container", "name"))
has_k8s_pod = bool(_first_label(labels, "pod")) or "k8s" in summary or "kubernetes" in summary
has_memory_signal = _contains_memory_signal(summary)
if has_memory_signal and (
layer == "docker" or "cadvisor" in job or has_container
):
return "DockerContainerMemoryPressure"
if "oom" in summary and has_k8s_pod:
if "oom" in summary or "memory" in summary:
return "KubePodOOM"
if has_memory_signal:
return "MemoryPressure"
if "crashloop" in summary:
return "KubePodCrashLoop"
if "disk" in summary:
@@ -354,56 +316,6 @@ def _guess_category_from_snapshot(snapshot: "EvidenceSnapshot") -> str:
return "Unknown"
def _alert_identity(snapshot: "EvidenceSnapshot") -> tuple[str, dict[str, Any]]:
"""Extract alertname and labels from structured alert_info when available."""
info = getattr(snapshot, "alert_info", None) or {}
labels = info.get("labels") if isinstance(info, dict) else {}
if not isinstance(labels, dict):
labels = {}
alert_name = ""
if isinstance(info, dict):
alert_name = str(info.get("alert_name") or "").strip()
if not alert_name:
alert_name = str(labels.get("alertname") or "").strip()
if not alert_name:
alert_name = _extract_alertname_from_summary(getattr(snapshot, "evidence_summary", "") or "")
return alert_name, labels
def _contains_memory_signal(summary: str) -> bool:
return any(term in summary for term in ("memory", "mem", "記憶體", "內存"))
def _extract_alertname_from_summary(summary: str) -> str:
"""Best-effort parse for older snapshots whose structured alert_info is absent."""
marker = "'alert_name': '"
if marker in summary:
after = summary.split(marker, 1)[1]
return after.split("'", 1)[0].strip()
marker = '"alert_name": "'
if marker in summary:
after = summary.split(marker, 1)[1]
return after.split('"', 1)[0].strip()
marker = "'alertname': '"
if marker in summary:
after = summary.split(marker, 1)[1]
return after.split("'", 1)[0].strip()
marker = '"alertname": "'
if marker in summary:
after = summary.split(marker, 1)[1]
return after.split('"', 1)[0].strip()
return ""
def _first_label(labels: dict[str, Any], *keys: str) -> str:
for key in keys:
value = labels.get(key)
if value:
return str(value).strip()
return ""
def compute_input_hash(snapshot: "EvidenceSnapshot") -> str:
"""計算 Diagnostician 輸入的 fingerprint用於 AgentSession input_hash"""
key = (snapshot.snapshot_id or "") + (snapshot.evidence_summary or "")[:100]

File diff suppressed because it is too large Load Diff

View File

@@ -22,48 +22,17 @@ from datetime import datetime
from typing import Annotated
import structlog
from fastapi import APIRouter, HTTPException, Query
from fastapi import APIRouter, Query
from src.models.governance import (
GovernanceEventsResponse,
GovernanceQueueResponse,
GovernanceSummaryResponse,
KnowledgeReviewDraftArchiveRequest,
KnowledgeReviewDraftArchiveResponse,
KnowledgeReviewDraftDedupeResponse,
KnowledgeStaleCandidatesResponse,
KnowledgeStaleOwnerReviewBatchQueueRequest,
KnowledgeStaleOwnerReviewBatchQueueResponse,
KnowledgeStaleOwnerReviewBurnDownResponse,
KnowledgeStaleOwnerReviewCompleteRequest,
KnowledgeStaleOwnerReviewCompleteResponse,
KnowledgeStaleOwnerReviewCompletionBatchPreviewRequest,
KnowledgeStaleOwnerReviewCompletionBatchPreviewResponse,
KnowledgeStaleOwnerReviewCompletionQueueResponse,
KnowledgeStaleOwnerReviewInboxResponse,
KnowledgeStaleOwnerReviewRequest,
KnowledgeStaleOwnerReviewResponse,
)
from src.services.governance_km_review_service import (
KmReviewDraftArchiveError,
archive_km_review_draft_duplicates,
)
from src.services.governance_km_stale_review_service import (
KmStaleOwnerReviewError,
batch_queue_km_stale_owner_reviews,
complete_km_stale_owner_review,
preview_km_stale_owner_review_completion_batch,
query_km_stale_owner_review_burndown,
query_km_stale_owner_review_completion_queue,
query_km_stale_owner_review_inbox,
queue_km_stale_owner_review,
)
from src.services.governance_query_service import (
query_governance_events,
query_governance_queue,
query_governance_summary,
query_km_review_draft_dedupe,
query_km_stale_candidates,
)
logger = structlog.get_logger(__name__)
@@ -77,7 +46,6 @@ router = APIRouter()
@router.get("/ai/governance/events", response_model=GovernanceEventsResponse)
async def get_governance_events(
event_id: Annotated[list[str] | None, Query(alias="event_id")] = None,
event_type: Annotated[list[str] | None, Query(alias="event_type")] = None,
from_: Annotated[datetime | None, Query(alias="from")] = None,
to: Annotated[datetime | None, Query(alias="to")] = None,
@@ -90,7 +58,6 @@ async def get_governance_events(
查詢 AI 治理事件列表(分頁)。
- event_type: 多值過濾(可重複傳)
- event_id: 多值精準過濾(可重複傳),供 Telegram 詳情 / 歷史與 Work Items 錨點回看
- from / to: ISO 8601 時間範圍URL 傳 from 參數)
- status: resolved / unresolved
- severity: critical / warning / info由 event_type 映射決定)
@@ -99,7 +66,6 @@ async def get_governance_events(
"""
logger.debug(
"governance_events_request",
event_ids=event_id,
event_types=event_type,
from_=from_,
to=to,
@@ -109,7 +75,6 @@ async def get_governance_events(
size=size,
)
return await query_governance_events(
event_ids=event_id,
event_types=event_type,
from_dt=from_,
to_dt=to,
@@ -128,9 +93,8 @@ async def get_governance_events(
async def get_governance_queue(
dispatch_status: Annotated[
str,
Query(pattern="^(all|pending|dispatched|executing|succeeded|failed|skipped|cancelled)$"),
Query(pattern="^(pending|dispatched|succeeded|failed)$"),
] = "pending",
event_type: Annotated[list[str] | None, Query(alias="event_type")] = None,
page: Annotated[int, Query(ge=1)] = 1,
size: Annotated[int, Query(ge=10, le=100)] = 20,
) -> GovernanceQueueResponse:
@@ -140,360 +104,22 @@ async def get_governance_queue(
governance_remediation_dispatch 表由 Track D 建立,尚未完成時
本 endpoint 回傳 { table_pending: true, items: [], total: 0 },不拋 500。
- dispatch_status: pendingdefault/ dispatched / executing / succeeded / failed / skipped / cancelled / all
- event_type: 多值過濾(可重複傳)
- dispatch_status: pendingdefault/ dispatched / succeeded / failed
- page / size: 分頁
"""
logger.debug(
"governance_queue_request",
dispatch_status=dispatch_status,
event_type=event_type,
page=page,
size=size,
)
return await query_governance_queue(
dispatch_status=dispatch_status,
event_types=event_type,
page=page,
size=size,
)
# =============================================================================
# GET /api/v1/ai/governance/km-review-drafts/dedupe
# =============================================================================
@router.get(
"/ai/governance/km-review-drafts/dedupe",
response_model=KnowledgeReviewDraftDedupeResponse,
)
async def get_km_review_draft_dedupe(
limit: Annotated[int, Query(ge=10, le=200)] = 100,
) -> KnowledgeReviewDraftDedupeResponse:
"""
查詢 Hermes KM healthcheck review drafts 的去重 read model。
這是 read-only owner review surface只回傳 canonical / duplicate /
owner_action不自動 archive、不自動 approve/publish KM。
"""
logger.debug("km_review_draft_dedupe_request", limit=limit)
return await query_km_review_draft_dedupe(limit=limit)
# =============================================================================
# POST /api/v1/ai/governance/km-review-drafts/dedupe/{event_id}/archive-duplicates
# =============================================================================
@router.post(
"/ai/governance/km-review-drafts/dedupe/{governance_event_id}/archive-duplicates",
response_model=KnowledgeReviewDraftArchiveResponse,
)
async def post_km_review_draft_archive_duplicates(
governance_event_id: str,
request: KnowledgeReviewDraftArchiveRequest,
) -> KnowledgeReviewDraftArchiveResponse:
"""
Owner 審核後封存 Hermes KM healthcheck duplicate review drafts。
這不是 read endpoint必須明確傳 owner_approved=true且後端會重新比對
最新 dedupe plan。封存為 KnowledgeEntry.status=archived不刪除資料。
"""
logger.info(
"km_review_draft_archive_request",
governance_event_id=governance_event_id,
canonical_entry_id=request.canonical_entry_id,
duplicate_count=len(request.duplicate_entry_ids),
owner=request.owner,
dry_run=request.dry_run,
owner_approved=request.owner_approved,
)
try:
return await archive_km_review_draft_duplicates(
governance_event_id=governance_event_id,
request=request,
)
except KmReviewDraftArchiveError as exc:
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
# =============================================================================
# GET /api/v1/ai/governance/km-stale-candidates
# =============================================================================
@router.get(
"/ai/governance/km-stale-candidates",
response_model=KnowledgeStaleCandidatesResponse,
)
async def get_km_stale_candidates(
project_id: Annotated[str, Query(min_length=1, max_length=64)] = "awoooi",
limit: Annotated[int, Query(ge=5, le=100)] = 20,
) -> KnowledgeStaleCandidatesResponse:
"""
查詢 stale KM 的 read-only 優先處理清單。
Hermes 可以用這個 read model 產生 KM 更新草稿owner console 則能先看
哪些條目有 Incident / Sentry / SigNoz / PlayBook 脈絡,避免只看到總數。
"""
logger.debug(
"km_stale_candidates_request",
project_id=project_id,
limit=limit,
)
return await query_km_stale_candidates(project_id=project_id, limit=limit)
# =============================================================================
# GET /api/v1/ai/governance/km-stale-owner-reviews
# =============================================================================
@router.get(
"/ai/governance/km-stale-owner-reviews",
response_model=KnowledgeStaleOwnerReviewInboxResponse,
)
async def get_km_stale_owner_reviews(
project_id: Annotated[str, Query(min_length=1, max_length=64)] = "awoooi",
dispatch_status: Annotated[
str,
Query(pattern="^(all|pending|dispatched|executing|succeeded|failed|skipped|cancelled)$"),
] = "pending",
limit: Annotated[int, Query(ge=5, le=100)] = 20,
) -> KnowledgeStaleOwnerReviewInboxResponse:
"""
查詢 stale KM owner-review 工作台。
這是 read-only inbox把 dispatch trail 與 KM priority context 合併,
讓 operator 可以依 P0/P1、score、batch 來源與流程階段逐筆 completion。
"""
logger.debug(
"km_stale_owner_reviews_request",
project_id=project_id,
dispatch_status=dispatch_status,
limit=limit,
)
try:
return await query_km_stale_owner_review_inbox(
project_id=project_id,
dispatch_status=dispatch_status,
limit=limit,
)
except KmStaleOwnerReviewError as exc:
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
# =============================================================================
# GET /api/v1/ai/governance/km-stale-owner-review-burndown
# =============================================================================
@router.get(
"/ai/governance/km-stale-owner-review-burndown",
response_model=KnowledgeStaleOwnerReviewBurnDownResponse,
)
async def get_km_stale_owner_review_burndown(
project_id: Annotated[str, Query(min_length=1, max_length=64)] = "awoooi",
limit: Annotated[int, Query(ge=1, le=100)] = 20,
) -> KnowledgeStaleOwnerReviewBurnDownResponse:
"""
查詢 stale KM owner-review 完成與 stale ratio burn-down 狀態。
這是 read-only dashboard把 pending review、completion audit、recheck
snapshot 與距離治理門檻的剩餘筆數放在同一個前端面板。
"""
logger.debug(
"km_stale_owner_review_burndown_request",
project_id=project_id,
limit=limit,
)
return await query_km_stale_owner_review_burndown(
project_id=project_id,
limit=limit,
)
# =============================================================================
# GET /api/v1/ai/governance/km-stale-owner-review-completion-queue
# =============================================================================
@router.get(
"/ai/governance/km-stale-owner-review-completion-queue",
response_model=KnowledgeStaleOwnerReviewCompletionQueueResponse,
)
async def get_km_stale_owner_review_completion_queue(
project_id: Annotated[str, Query(min_length=1, max_length=64)] = "awoooi",
status_bucket: Annotated[
str,
Query(pattern="^(all|ready|blocked|completed|failed|pending)$"),
] = "all",
priority_tier: Annotated[list[str] | None, Query(alias="priority_tier")] = None,
recommended_completion_outcome: Annotated[
str,
Query(pattern="^(all|refresh_with_evidence|archive|supersede)$"),
] = "all",
batch_governance_event_id: Annotated[str | None, Query(max_length=120)] = None,
can_preview: bool | None = None,
limit: Annotated[int, Query(ge=1, le=100)] = 20,
) -> KnowledgeStaleOwnerReviewCompletionQueueResponse:
"""
查詢 stale KM owner-review completion 分流。
這是 read-only queue把 active / completed / failed dispatch 拆成
ready、blocked、completed、failed讓前端呈現下一步卡點打開頁面不寫 KM。
"""
logger.debug(
"km_stale_owner_review_completion_queue_request",
project_id=project_id,
status_bucket=status_bucket,
priority_tiers=priority_tier,
recommended_completion_outcome=recommended_completion_outcome,
batch_governance_event_id=batch_governance_event_id,
can_preview=can_preview,
limit=limit,
)
try:
return await query_km_stale_owner_review_completion_queue(
project_id=project_id,
status_bucket=status_bucket,
priority_tiers=priority_tier,
recommended_completion_outcome=recommended_completion_outcome,
batch_governance_event_id=batch_governance_event_id,
can_preview=can_preview,
limit=limit,
)
except KmStaleOwnerReviewError as exc:
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
# =============================================================================
# POST /api/v1/ai/governance/km-stale-owner-review-completion-queue/batch-preview
# =============================================================================
@router.post(
"/ai/governance/km-stale-owner-review-completion-queue/batch-preview",
response_model=KnowledgeStaleOwnerReviewCompletionBatchPreviewResponse,
)
async def post_km_stale_owner_review_completion_batch_preview(
request: KnowledgeStaleOwnerReviewCompletionBatchPreviewRequest,
) -> KnowledgeStaleOwnerReviewCompletionBatchPreviewResponse:
"""
Preview a bounded set of owner-review completion candidates.
This endpoint is intentionally dry-run only: it does not write KM, does not
enqueue a batch executor, and does not create governance audit rows. Each
item must still be completed through the single-item dry-run + owner confirm
endpoint.
"""
logger.info(
"km_stale_owner_review_completion_batch_preview_request",
project_id=request.project_id,
status_bucket=request.status_bucket,
priority_tiers=request.priority_tiers,
recommended_completion_outcome=request.recommended_completion_outcome,
batch_governance_event_id=request.batch_governance_event_id,
limit=request.limit,
owner=request.owner,
)
try:
return await preview_km_stale_owner_review_completion_batch(request=request)
except KmStaleOwnerReviewError as exc:
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
# =============================================================================
# POST /api/v1/ai/governance/km-stale-candidates/batch-queue-review
# =============================================================================
@router.post(
"/ai/governance/km-stale-candidates/batch-queue-review",
response_model=KnowledgeStaleOwnerReviewBatchQueueResponse,
)
async def post_km_stale_candidate_batch_queue_review(
request: KnowledgeStaleOwnerReviewBatchQueueRequest,
) -> KnowledgeStaleOwnerReviewBatchQueueResponse:
"""
將 P0/P1 stale KM 批次排入 owner review。
這個 endpoint 只建立 batch audit 與逐筆 owner-review dispatch不改寫 KM。
真正 refresh / archive / supersede 仍需單筆 dry-run fingerprint + owner approval。
"""
logger.info(
"km_stale_candidate_batch_queue_review_request",
project_id=request.project_id,
priority_tiers=request.priority_tiers,
limit=request.limit,
owner=request.owner,
dry_run=request.dry_run,
)
try:
return await batch_queue_km_stale_owner_reviews(request=request)
except KmStaleOwnerReviewError as exc:
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
# =============================================================================
# POST /api/v1/ai/governance/km-stale-candidates/{entry_id}/queue-review
# =============================================================================
@router.post(
"/ai/governance/km-stale-candidates/{entry_id}/queue-review",
response_model=KnowledgeStaleOwnerReviewResponse,
)
async def post_km_stale_candidate_queue_review(
entry_id: str,
request: KnowledgeStaleOwnerReviewRequest,
) -> KnowledgeStaleOwnerReviewResponse:
"""
將單筆 stale KM candidate 排入 owner review。
這個 endpoint 只建立治理事件與 dispatch work item不修改 KM 內容。
實際 refresh / archive / supersede 仍需 owner 在後續流程確認。
"""
logger.info(
"km_stale_candidate_queue_review_request",
entry_id=entry_id,
owner=request.owner,
dry_run=request.dry_run,
)
try:
return await queue_km_stale_owner_review(entry_id=entry_id, request=request)
except KmStaleOwnerReviewError as exc:
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
# =============================================================================
# POST /api/v1/ai/governance/km-stale-candidates/{entry_id}/complete-review
# =============================================================================
@router.post(
"/ai/governance/km-stale-candidates/{entry_id}/complete-review",
response_model=KnowledgeStaleOwnerReviewCompleteResponse,
)
async def post_km_stale_candidate_complete_review(
entry_id: str,
request: KnowledgeStaleOwnerReviewCompleteRequest,
) -> KnowledgeStaleOwnerReviewCompleteResponse:
"""
Owner 審核後完成 stale KM 的 refresh / archive / supersede 流程。
必須先 dry-run 取得 fingerprint真正寫入時需 owner_approved=true。
後端會寫 KM、terminal audit dispatch 與 stale ratio recheck dispatch。
"""
logger.info(
"km_stale_candidate_complete_review_request",
entry_id=entry_id,
dispatch_id=request.dispatch_id,
owner=request.owner,
review_outcome=request.review_outcome,
dry_run=request.dry_run,
owner_approved=request.owner_approved,
)
try:
return await complete_km_stale_owner_review(
entry_id=entry_id,
request=request,
)
except KmStaleOwnerReviewError as exc:
raise HTTPException(status_code=exc.status_code, detail=exc.detail) from exc
# =============================================================================
# GET /api/v1/ai/governance/summary
# =============================================================================

View File

@@ -18,15 +18,8 @@ Endpoints:
from __future__ import annotations
import structlog
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from fastapi import APIRouter, Query
from src.services.adr100_remediation_service import (
RemediationMode,
RemediationNotFoundError,
get_adr100_remediation_service,
)
from src.services.adr100_slo_status_service import get_adr100_slo_status_service
from src.services.ai_slo_calculator import AiSloCalculator
logger = structlog.get_logger(__name__)
@@ -34,27 +27,6 @@ logger = structlog.get_logger(__name__)
router = APIRouter()
class RemediationPreviewRequest(BaseModel):
"""ADR-100 remediation preview request."""
work_item_id: str = Field(min_length=1)
mode: RemediationMode = "auto"
class RemediationDryRunRequest(BaseModel):
"""ADR-100 remediation dry-run request."""
work_item_id: str = Field(min_length=1)
mode: RemediationMode = "auto"
class RemediationApprovalRequest(BaseModel):
"""ADR-100 record-only approval request."""
work_item_id: str = Field(min_length=1)
mode: RemediationMode = "approval"
@router.get("/ai/slo")
async def get_ai_slo(
force_refresh: bool = Query(False, description="忽略快取,強制重算"),
@@ -78,80 +50,9 @@ async def get_ai_slo(
if cached:
data = cached.to_dict()
data["cache_hit"] = True
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
return data
report = await calc.run()
data = report.to_dict()
data["cache_hit"] = False
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
return data
@router.get("/ai/slo/remediation/preview")
async def preview_ai_slo_remediation(
work_item_id: str = Query(..., min_length=1),
mode: RemediationMode = Query("auto"),
) -> dict:
"""Preview the safe remediation plan for one ADR-100 queue item."""
try:
return await get_adr100_remediation_service().preview(work_item_id, mode)
except RemediationNotFoundError as exc:
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
@router.post("/ai/slo/remediation/preview")
async def preview_ai_slo_remediation_post(request: RemediationPreviewRequest) -> dict:
"""POST variant for clients that prefer JSON bodies."""
try:
return await get_adr100_remediation_service().preview(
request.work_item_id,
request.mode,
)
except RemediationNotFoundError as exc:
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
@router.post("/ai/slo/remediation/dry-run")
async def dry_run_ai_slo_remediation(request: RemediationDryRunRequest) -> dict:
"""Run a read-only ADR-100 remediation dry-run."""
try:
return await get_adr100_remediation_service().dry_run(
request.work_item_id,
request.mode,
)
except RemediationNotFoundError as exc:
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
@router.post("/ai/slo/remediation/approval-request")
async def create_ai_slo_remediation_approval_request(
request: RemediationApprovalRequest,
) -> dict:
"""Create a record-only approval request for ADR-100 remediation."""
try:
return await get_adr100_remediation_service().create_approval_request(
request.work_item_id,
request.mode,
)
except RemediationNotFoundError as exc:
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
@router.get("/ai/slo/remediation/history")
async def list_ai_slo_remediation_history(
limit: int = Query(50, ge=1, le=200),
incident_id: str | None = Query(default=None, min_length=1),
work_item_id: str | None = Query(default=None, min_length=1),
) -> dict:
"""List durable ADR-100 remediation dry-run history from alert_operation_log."""
return await get_adr100_remediation_service().history(
limit=limit,
incident_id=incident_id,
work_item_id=work_item_id,
)

View File

@@ -20,7 +20,6 @@ from pydantic import BaseModel
from src.core.config import settings
from src.core.logging import get_logger
from src.core.sse import EventPublisher, EventType, SSEEvent, get_publisher
from src.services.dashboard_metrics_service import fetch_pending_approval_count
from src.services.host_aggregator import AggregatedStatus, HostAggregator
router = APIRouter()
@@ -142,14 +141,12 @@ async def dashboard_update_loop(publisher: EventPublisher) -> None:
try:
# Fetch aggregated status
status = await HostAggregator.fetch_all()
pending_approvals = await fetch_pending_approval_count()
# Publish to all connected clients
event = SSEEvent(
type=EventType.HOST_UPDATE,
data={
"overall_status": status.overall_status,
"pending_approvals": pending_approvals,
"hosts": [
{
"ip": h.ip,
@@ -209,9 +206,7 @@ async def get_dashboard() -> DashboardResponse:
logger.info("dashboard_fetch")
status = await HostAggregator.fetch_all()
response = aggregated_to_response(status)
response.pending_approvals = await fetch_pending_approval_count()
return response
return aggregated_to_response(status)
@router.get("/dashboard/stream")

View File

@@ -13,12 +13,10 @@ leWOOOgo 積木化原則:
建立者: Claude Code (Phase 25 P2)
"""
from typing import Literal
from fastapi import APIRouter, BackgroundTasks, HTTPException
from pydantic import BaseModel, Field
from src.core.csrf import CSRFToken # Phase 20: CSRF Protection
from src.models.drift import (
DriftListResponse,
DriftReport,
@@ -30,10 +28,6 @@ from src.repositories.drift_repository import get_drift_repository
from src.services.drift_adopt_service import get_drift_adopt_service
from src.services.drift_analyzer import get_drift_analyzer
from src.services.drift_detector import get_drift_detector
from src.services.drift_fingerprint_state_service import (
DriftFingerprintStateNotFoundError,
get_drift_fingerprint_state_service,
)
from src.services.drift_interpreter import get_drift_interpreter
from src.services.drift_remediator import get_drift_remediator
from src.utils.timezone import now_taipei
@@ -43,42 +37,6 @@ router = APIRouter(prefix="/drift", tags=["drift"])
# 2026-04-09 Claude Sonnet 4.6: B4 drift_reports 持久化 — 改用 DB repository
class DriftFingerprintHandoffRequest(BaseModel):
"""Record-only handoff request for a stable drift fingerprint."""
report_id: str | None = Field(default=None, min_length=1)
namespace: str | None = Field(default="awoooi-prod", min_length=1)
handoff_kind: Literal[
"open_pr_review",
"manual_investigation",
"zero_diff_pr_cleanup",
] = "open_pr_review"
pr_url: str | None = Field(default=None, min_length=1)
note: str | None = Field(default=None, max_length=500)
class DriftFingerprintRemediationRequest(BaseModel):
"""Record-only remediation request for a stable drift fingerprint."""
report_id: str | None = Field(default=None, min_length=1)
namespace: str | None = Field(default="awoooi-prod", min_length=1)
remediation_kind: Literal[
"live_env_rollback",
"git_adopted",
"git_rollback",
"zero_diff_pr_cleanup",
"manual_noop",
] = "live_env_rollback"
remediation_status: Literal[
"executed_unverified",
"verified_no_drift",
"verification_failed",
] | None = None
verification_report_id: str | None = Field(default=None, min_length=1)
note: str | None = Field(default=None, max_length=1000)
commands_summary: list[str] = Field(default_factory=list, max_length=12)
@router.post("/scan", response_model=DriftScanResponse, summary="觸發漂移掃描")
async def trigger_drift_scan(
request: DriftScanRequest,
@@ -141,72 +99,6 @@ async def list_drift_reports() -> DriftListResponse:
return DriftListResponse(items=items, total=len(items))
@router.get("/fingerprints/state", summary="查詢 Config Drift fingerprint 狀態")
async def get_drift_fingerprint_state(
report_id: str | None = None,
namespace: str | None = "awoooi-prod",
) -> dict:
"""
以 stable fingerprint 聚合漂移狀態。
此 endpoint 只建立 read model重複次數、PR 狀態、是否零 diff、
人工交接歷史與下一步。它不修改 drift / incident / auto-repair 狀態。
"""
svc = get_drift_fingerprint_state_service()
try:
return await svc.get_state(report_id=report_id, namespace=namespace)
except DriftFingerprintStateNotFoundError as exc:
raise HTTPException(status_code=404, detail="drift_report_not_found") from exc
@router.post("/fingerprints/handoff", summary="記錄 Config Drift fingerprint 交接")
async def record_drift_fingerprint_handoff(
request: DriftFingerprintHandoffRequest,
) -> dict:
"""
記錄 stable fingerprint 已轉人工 / PR review 的歷史證據。
安全邊界:只寫 alert_operation_log / timeline_events不修改 drift 狀態、
incident 狀態、自動修復結果,不建立外部 ticket也不 merge PR。
"""
svc = get_drift_fingerprint_state_service()
try:
return await svc.record_handoff(
report_id=request.report_id,
namespace=request.namespace,
handoff_kind=request.handoff_kind,
pr_url=request.pr_url,
note=request.note,
)
except DriftFingerprintStateNotFoundError as exc:
raise HTTPException(status_code=404, detail="drift_report_not_found") from exc
@router.post("/fingerprints/remediation", summary="記錄 Config Drift fingerprint 修復")
async def record_drift_fingerprint_remediation(
request: DriftFingerprintRemediationRequest,
) -> dict:
"""
記錄 stable fingerprint 已完成的修復 / 驗證證據。
安全邊界:只寫 alert_operation_log / timeline_events不修改 drift 狀態、
incident 狀態、自動修復結果,不建立外部 ticket也不執行 kubectl。
"""
svc = get_drift_fingerprint_state_service()
try:
return await svc.record_remediation(
report_id=request.report_id,
namespace=request.namespace,
remediation_kind=request.remediation_kind,
remediation_status=request.remediation_status,
verification_report_id=request.verification_report_id,
note=request.note,
commands_summary=request.commands_summary,
)
except DriftFingerprintStateNotFoundError as exc:
raise HTTPException(status_code=404, detail="drift_report_not_found") from exc
@router.post("/reports/{report_id}/rollback", summary="覆蓋回 Git 狀態")
async def rollback_drift(report_id: str, _csrf_token: CSRFToken) -> dict: # Phase 20: CSRF Protection (驗證用,不需要使用值)
"""

View File

@@ -418,9 +418,7 @@ async def _send_gitea_notification(
logger.debug("gitea_tg_skipped", reason="Bot token not configured")
return
from src.services.telegram_gateway import (
get_telegram_gateway, # type: ignore[import]
)
from src.services.telegram_gateway import get_telegram_gateway # type: ignore[import]
gateway = get_telegram_gateway()
await gateway.initialize()
await gateway.send_alert_notification(message)
@@ -504,22 +502,15 @@ async def handle_pull_request(
review_id = f"gitea-pr-{payload.repository.id}-{pr.number}-{uuid.uuid4().hex[:8]}"
# 背景執行審查 (委派給 Service)
if settings.MOCK_MODE:
logger.info(
"gitea_pr_review_background_skipped_mock_mode",
review_id=review_id,
repo=payload.repository.full_name,
)
else:
service = get_gitea_webhook_service()
background_tasks.add_task(
service.review_pull_request,
repo=payload.repository,
pr=pr,
sender=payload.sender,
review_id=review_id,
action=payload.action,
)
service = get_gitea_webhook_service()
background_tasks.add_task(
service.review_pull_request,
repo=payload.repository,
pr=pr,
sender=payload.sender,
review_id=review_id,
action=payload.action,
)
logger.info(
"gitea_pr_review_scheduled",
@@ -570,24 +561,17 @@ async def handle_push(
review_id = f"gitea-push-{payload.repository.id}-{payload.after[:8]}-{uuid.uuid4().hex[:8]}"
# 背景執行審查 (委派給 Service)
if settings.MOCK_MODE:
logger.info(
"gitea_push_review_background_skipped_mock_mode",
review_id=review_id,
repo=payload.repository.full_name,
)
else:
service = get_gitea_webhook_service()
background_tasks.add_task(
service.review_push,
repo=payload.repository,
commits=commits,
sender=payload.sender,
review_id=review_id,
ref=ref,
before_sha=payload.before,
after_sha=payload.after,
)
service = get_gitea_webhook_service()
background_tasks.add_task(
service.review_push,
repo=payload.repository,
commits=commits,
sender=payload.sender,
review_id=review_id,
ref=ref,
before_sha=payload.before,
after_sha=payload.after,
)
logger.info(
"gitea_push_review_scheduled",

View File

@@ -11,7 +11,7 @@ Endpoints:
Components Checked:
- PostgreSQL (192.168.0.188:5432)
- Redis (192.168.0.188:6380)
- Ollama ADR-110 provider pool (GCP-A -> GCP-B -> 111)
- Ollama (settings.OLLAMA_URL / ADR-110 provider pool)
- OpenClaw (192.168.0.188:8089)
- SigNoz (192.168.0.188:3301)
"""
@@ -26,16 +26,9 @@ from pydantic import BaseModel
from src.core.config import settings
from src.core.logging import get_logger
from src.services.health_check_service import get_health_check_service
from src.services.ollama_endpoint_circuit_breaker import (
get_ollama_endpoint_cooldown_remaining_seconds,
record_ollama_endpoint_failure,
record_ollama_endpoint_success,
)
from src.services.ollama_endpoint_resolver import resolve_ollama_order
router = APIRouter()
logger = get_logger("awoooi.health")
CORE_COMPONENTS = ("api", "postgresql", "redis", "ollama", "openclaw", "signoz")
# =============================================================================
@@ -47,11 +40,6 @@ class ComponentHealth(BaseModel):
status: Literal["up", "down", "degraded"]
latency_ms: float | None = None
error: str | None = None
provider_name: str | None = None
diagnosis_code: str | None = None
retry_after_seconds: float | None = None
cooldown_remaining_seconds: float | None = None
is_cooldown: bool = False
class HealthResponse(BaseModel):
@@ -62,7 +50,6 @@ class HealthResponse(BaseModel):
mock_mode: bool
timestamp: datetime
components: dict[str, ComponentHealth]
ollama_route_order: list[str] = []
# =============================================================================
@@ -119,125 +106,8 @@ async def check_redis() -> ComponentHealth:
async def check_ollama() -> ComponentHealth:
"""Async aggregate Ollama health check via ADR-110 provider chain."""
aggregate, _details = await check_ollama_provider_chain()
return aggregate
async def check_ollama_provider_chain() -> tuple[ComponentHealth, dict[str, ComponentHealth]]:
"""
Check the full Ollama provider chain.
The aggregate ``ollama`` component represents route availability:
- up: GCP-A is reachable
- degraded: GCP-A is unavailable but GCP-B or 111 is reachable
- down: no configured Ollama endpoint is reachable
"""
selections = tuple(
selection
for selection in resolve_ollama_order("healthcheck")
if selection.url and selection.provider_name != "ollama_unconfigured"
)
if not selections:
aggregate = ComponentHealth(
status="down",
error="no Ollama endpoints configured",
)
return aggregate, {}
checked = await asyncio.gather(
*(
_ollama_endpoint_health_check(selection.provider_name, selection.url)
for selection in selections
)
)
details = {
selection.provider_name: result
for selection, result in zip(selections, checked, strict=False)
}
primary = selections[0]
primary_status = details[primary.provider_name].status
if primary.provider_name == "ollama_gcp_a" and primary_status == "up":
return details[primary.provider_name], details
first_available = next(
(
selection
for selection in selections
if details[selection.provider_name].status == "up"
),
None,
)
if first_available:
fallback = details[first_available.provider_name]
return (
ComponentHealth(
status="degraded",
latency_ms=fallback.latency_ms,
error=f"primary unavailable; fallback active: {first_available.provider_name}",
),
details,
)
errors = ", ".join(
f"{provider}={health.error or health.status}"
for provider, health in details.items()
)
return (
ComponentHealth(
status="down",
error=f"all Ollama endpoints unavailable: {errors}",
),
details,
)
async def _ollama_endpoint_health_check(name: str, url: str) -> ComponentHealth:
cooldown_remaining = get_ollama_endpoint_cooldown_remaining_seconds(url)
if cooldown_remaining > 0:
return ComponentHealth(
status="down",
error=f"recent endpoint failure cooldown: {cooldown_remaining:.0f}s",
provider_name=name,
diagnosis_code="endpoint_cooldown",
retry_after_seconds=round(cooldown_remaining, 1),
cooldown_remaining_seconds=round(cooldown_remaining, 1),
is_cooldown=True,
)
result = await _http_health_check(name, url, "/api/tags")
result.provider_name = name
if result.status == "up":
result.diagnosis_code = "endpoint_reachable"
record_ollama_endpoint_success(url)
else:
result.diagnosis_code = _classify_ollama_endpoint_failure(name, result.error)
record_ollama_endpoint_failure(url)
return result
def _classify_ollama_endpoint_failure(
provider_name: str,
error: str | None,
) -> str:
"""Return a stable diagnosis code for UI/alert rendering."""
normalized_error = (error or "").lower()
if "cooldown" in normalized_error:
return "endpoint_cooldown"
if "502" in normalized_error or "bad gateway" in normalized_error:
return (
"local_proxy_upstream_unreachable"
if provider_name == "ollama_local"
else "proxy_upstream_unreachable"
)
if "timeout" in normalized_error:
return "endpoint_timeout"
if "connection refused" in normalized_error:
return "endpoint_connection_refused"
if "no route to host" in normalized_error or "network is unreachable" in normalized_error:
return "endpoint_network_unreachable"
return "endpoint_unreachable"
"""Async Ollama health check via /api/tags"""
return await _http_health_check("ollama", settings.OLLAMA_URL, "/api/tags")
async def check_openclaw() -> ComponentHealth:
@@ -250,30 +120,6 @@ async def check_signoz() -> ComponentHealth:
return await _http_health_check("signoz", settings.SIGNOZ_URL, "/api/v1/health")
def _determine_overall_status(
components: dict[str, ComponentHealth],
) -> Literal["healthy", "degraded", "unhealthy"]:
"""Determine overall health from core aggregate components only."""
statuses = [
components[name].status
for name in CORE_COMPONENTS
if name in components
]
down_count = statuses.count("down")
degraded_count = statuses.count("degraded")
critical_down = (
components.get("postgresql", ComponentHealth(status="down")).status == "down"
or components.get("redis", ComponentHealth(status="down")).status == "down"
)
if critical_down or down_count >= 3:
return "unhealthy"
if down_count >= 1 or degraded_count > 0:
return "degraded"
return "healthy"
# =============================================================================
# Endpoints
# =============================================================================
@@ -296,28 +142,34 @@ async def get_health() -> HealthResponse:
results = await asyncio.gather(
check_postgresql(),
check_redis(),
check_ollama_provider_chain(),
check_ollama(),
check_openclaw(),
check_signoz(),
)
ollama_aggregate, ollama_details = results[2]
components = {
"api": ComponentHealth(status="up", latency_ms=0.0),
"postgresql": results[0],
"redis": results[1],
"ollama": ollama_aggregate,
"ollama": results[2],
"openclaw": results[3],
"signoz": results[4],
}
components.update(ollama_details)
overall_status = _determine_overall_status(components)
ollama_route_order = [
selection.provider_name
for selection in resolve_ollama_order("healthcheck")
if selection.url and selection.provider_name != "ollama_unconfigured"
]
# Determine overall status
statuses = [c.status for c in components.values()]
down_count = statuses.count("down")
degraded_count = statuses.count("degraded")
# Critical services: postgresql, redis
critical_down = components["postgresql"].status == "down" or components["redis"].status == "down"
if critical_down or down_count >= 3:
overall_status: Literal["healthy", "degraded", "unhealthy"] = "unhealthy"
elif down_count >= 1 or degraded_count > 0:
overall_status = "degraded"
else:
overall_status = "healthy"
logger.info(
"health_check_complete",
@@ -333,7 +185,6 @@ async def get_health() -> HealthResponse:
mock_mode=settings.MOCK_MODE,
timestamp=datetime.now(UTC),
components=components,
ollama_route_order=ollama_route_order,
)

View File

@@ -17,10 +17,9 @@ Phase 6.4 核心功能:
- Proposal 必須關聯到 Incident
"""
from datetime import UTC, datetime, timedelta
from typing import Any
from fastapi import APIRouter, HTTPException, Query, status
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel, Field
from src.core.logging import get_logger
@@ -134,7 +133,6 @@ class IncidentTimelineResponse(BaseModel):
timeline: list[IncidentTimelineStage] = Field(default_factory=list)
events: list[IncidentTimelineEvent] = Field(default_factory=list)
ascii_timeline: str
reconciliation: dict[str, Any] = Field(default_factory=dict)
# =============================================================================
@@ -150,26 +148,18 @@ class IncidentTimelineResponse(BaseModel):
Phase 6.5 升級:
- 每個事件自動附帶 decision_token
- 預設只讀取已存在的 decision_token
- 需要新決策時改由明確的 proposal / operator run 入口觸發
- 確保 UI 永遠有決策可操作
- 雙軌引擎: LLM (主) + Expert System (備)
""",
)
async def list_incidents(
generate_missing_decisions: bool = Query(
False,
description=(
"預設 false列表查詢只讀既有 decision token"
"true 僅供明確維運操作使用,會背景產生缺少的決策。"
),
),
) -> IncidentListResponse:
async def list_incidents() -> IncidentListResponse:
"""
取得活躍事件清單
Phase 6.5: 附帶既有決策令牌
- 列表查詢必須是低成本純讀路徑
- 不可因為前端輪詢就背景觸發 LLM / Ollama / OpenClaw
- 需要新決策時,呼叫 POST /api/v1/incidents/{incident_id}/proposal
Phase 6.5: 自動為每個事件生成決策令牌
- P0/P1 事件優先處理
- 30 秒內保證有決策
- LLM 失敗時 Expert System 保底
Returns:
IncidentListResponse: 事件清單與計數 (含決策令牌)
@@ -184,6 +174,8 @@ async def list_incidents(
# 按時間排序 (最新優先)
# 2026-03-26 修復: 處理 timezone-aware 與 naive datetime 混合問題
from datetime import UTC
def safe_created_at(i: Incident) -> float:
"""安全取得 timestamp處理 timezone 混合問題"""
dt = i.created_at
@@ -197,24 +189,15 @@ async def list_incidents(
# 2026-04-09 Claude Sonnet 4.6: 效能修復 — list endpoint 不同步等待 AI
# 原設計: 每個 incident await AI 決策 (120-180s timeout),多 incident 時乘積爆炸
# 修復: 只取已存在的決策 token若無則背景觸發生成前端 poll 單筆 GET 取得結果
#
# 2026-05-06 Codex: 成本與推理槽修復 — 預設不再背景觸發 AI。
# 根因: 多個前端頁面會輪詢 GET /incidents若列表查詢偷偷 create_task
# 每次頁面載入都可能消耗 GCP Ollama / OpenClaw 推理槽,甚至 fallback 到 Gemini。
# 新規則: GET list 是純讀;生成新修復建議必須走明確 proposal/operator-run 入口。
if generate_missing_decisions:
import asyncio
import asyncio
responses = []
background_tasks = []
existing_tokens = await decision_manager._find_existing_tokens_for_incidents(
[incident.incident_id for incident in incidents]
)
for incident in incidents:
try:
# 只查已快取的決策 (不等待 AI立即返回)
existing = existing_tokens.get(incident.incident_id)
existing = await decision_manager._find_existing_token(incident.incident_id)
if existing:
decision_info = DecisionInfo(
token=existing.token,
@@ -224,20 +207,17 @@ async def list_incidents(
)
responses.append(IncidentResponse.from_incident(incident, decision_info))
else:
# 無快取 → 本次返回 None。列表查詢預設不觸發 AI
# 前端若需要修復建議,必須呼叫明確的 proposal 入口。
# 無快取 → 背景觸發,本次返回 None(前端看到 decision=null 會 poll
responses.append(IncidentResponse.from_incident(incident, None))
if not generate_missing_decisions:
continue
# 2026-04-16 Claude Sonnet 4.6: 只對 48h 內的 incident 觸發 AI 分析
# 舊 incident token 每小時過期,若不限制會反覆重新分析歷史事件 → Telegram 洪水
from datetime import datetime, timezone, timedelta
_created = getattr(incident, "created_at", None)
_too_old = False
if _created:
if _created.tzinfo is None:
_created = _created.replace(tzinfo=UTC)
_too_old = (_created < datetime.now(UTC) - timedelta(hours=48))
_created = _created.replace(tzinfo=timezone.utc)
_too_old = (_created < datetime.now(timezone.utc) - timedelta(hours=48))
if not _too_old:
timeout = 120.0 if incident.severity in (Severity.P0, Severity.P1) else 180.0
background_tasks.append(
@@ -260,7 +240,6 @@ async def list_incidents(
"incidents_listed",
count=len(incidents),
with_decisions=sum(1 for r in responses if r.decision is not None),
generate_missing_decisions=generate_missing_decisions,
)
return IncidentListResponse(

View File

@@ -1,234 +0,0 @@
"""
IwoooS 安全治理 API。
Wazuh 接線採用只讀 metadata 模式:預設關閉、不保存 raw payload、
不公開 agent 原名 / 內網 IP、不啟用 active response。
"""
from __future__ import annotations
import asyncio
import json
import os
from base64 import b64encode
from typing import Any
from urllib.parse import urljoin, urlparse
import httpx
from fastapi import APIRouter, HTTPException, status
from fastapi.responses import JSONResponse
from src.services.iwooos_runtime_security_readback import (
load_latest_iwooos_runtime_security_readback,
)
from src.services.public_redaction import redact_public_lan_topology
router = APIRouter(tags=["IwoooS Security"])
REQUEST_TIMEOUT_SECONDS = 5.0
def _wazuh_env() -> dict[str, str]:
return {
"enabled": os.getenv("IWOOOS_WAZUH_READONLY_ENABLED", "").strip().lower(),
"base_url": os.getenv("WAZUH_API_BASE_URL", "").strip(),
"username": os.getenv("WAZUH_API_USERNAME", "").strip(),
"password": os.getenv("WAZUH_API_PASSWORD", "").strip(),
"expected_min_agent_count": os.getenv("IWOOOS_WAZUH_EXPECTED_MIN_AGENT_COUNT", "").strip(),
}
def _expected_min_agent_count(value: str) -> int:
try:
return max(0, int(value))
except ValueError:
return 0
def _https_url(value: str) -> str | None:
parsed = urlparse(value)
if parsed.scheme != "https" or not parsed.netloc:
return None
return value.rstrip("/") + "/"
def _boundary_response(status_text: str, http_status: int = 200) -> JSONResponse:
return JSONResponse(
status_code=http_status,
content={
"schema_version": "iwooos_wazuh_readonly_status_v1",
"status": status_text,
"mode": "metadata_only_no_active_response_no_raw_payload",
"configured": False,
"summary": {
"wazuh_platform_reported_count": 1,
"readonly_api_enabled_count": 0,
"wazuh_manager_query_accepted_count": 0,
"wazuh_event_accepted_count": 0,
"host_forensics_accepted_count": 0,
"active_response_authorized_count": 0,
"host_write_authorized_count": 0,
"runtime_gate_count": 0,
"expected_min_agent_count": _expected_min_agent_count(_wazuh_env()["expected_min_agent_count"]),
"agent_registry_empty_count": 0,
"agent_below_expected_minimum_count": 0,
"agent_visibility_no_false_green_count": 1,
},
"boundaries": _boundaries(),
},
)
def _boundaries() -> dict[str, bool]:
return {
"active_response_authorized": False,
"host_write_authorized": False,
"secret_value_collection_allowed": False,
"raw_wazuh_payload_storage_allowed": False,
"agent_identity_public_display_allowed": False,
"internal_ip_public_display_allowed": False,
"not_authorization": True,
}
def _redacted_agent(agent: dict[str, Any], index: int) -> dict[str, Any]:
os_info = agent.get("os") if isinstance(agent.get("os"), dict) else {}
return {
"alias": f"agent-{index + 1:02d}",
"status": agent.get("status", "unknown"),
"os": os_info.get("platform") or os_info.get("name") or "unknown",
"last_seen_present": bool(agent.get("lastKeepAlive")),
}
def _int_or_default(value: Any, default: int) -> int:
return value if isinstance(value, int) else default
def _agent_visibility_status(agent_total: int, expected_min_agent_count: int) -> str:
if agent_total <= 0:
return "wazuh_agent_registry_empty"
if expected_min_agent_count > 0 and agent_total < expected_min_agent_count:
return "wazuh_agent_registry_below_expected"
return "readonly_metadata_available"
async def _fetch_json(client: httpx.AsyncClient, url: str, headers: dict[str, str]) -> dict[str, Any]:
response = await client.get(url, headers=headers)
response.raise_for_status()
payload = response.json()
return payload if isinstance(payload, dict) else {}
async def _wazuh_readonly_status() -> JSONResponse:
env = _wazuh_env()
if env["enabled"] != "true":
return _boundary_response("disabled_waiting_iwooos_wazuh_owner_gate")
base_url = _https_url(env["base_url"])
if not base_url or not env["username"] or not env["password"]:
return _boundary_response("misconfigured_missing_server_side_wazuh_env", 503)
try:
auth_header = b64encode(f"{env['username']}:{env['password']}".encode("utf-8")).decode("ascii")
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT_SECONDS) as client:
auth = await _fetch_json(
client,
urljoin(base_url, "security/user/authenticate"),
{"Authorization": f"Basic {auth_header}"},
)
token = (auth.get("data") or {}).get("token")
if not token:
return _boundary_response("wazuh_auth_token_missing", 502)
bearer_headers = {"Authorization": f"Bearer {token}"}
status_payload = await _fetch_json(
client,
urljoin(base_url, "agents/summary/status"),
bearer_headers,
)
agents_payload = await _fetch_json(
client,
urljoin(base_url, "agents?limit=100&select=id,status,os.name,os.platform,lastKeepAlive"),
bearer_headers,
)
except (httpx.HTTPError, ValueError):
return _boundary_response("wazuh_readonly_metadata_unavailable", 502)
connection = ((status_payload.get("data") or {}).get("connection") or {})
affected_items = ((agents_payload.get("data") or {}).get("affected_items") or [])
if not isinstance(affected_items, list):
affected_items = []
expected_min_agent_count = _expected_min_agent_count(env["expected_min_agent_count"])
agent_total = _int_or_default(connection.get("total"), len(affected_items))
agent_active = _int_or_default(connection.get("active"), 0)
agent_disconnected = _int_or_default(connection.get("disconnected"), 0)
agent_pending = _int_or_default(connection.get("pending"), 0)
agent_registry_empty = agent_total <= 0
agent_below_expected = expected_min_agent_count > 0 and agent_total < expected_min_agent_count
return JSONResponse(
content={
"schema_version": "iwooos_wazuh_readonly_status_v1",
"status": _agent_visibility_status(agent_total, expected_min_agent_count),
"mode": "metadata_only_no_active_response_no_raw_payload",
"configured": True,
"summary": {
"wazuh_platform_reported_count": 1,
"readonly_api_enabled_count": 1,
"agent_total": agent_total,
"agent_active": agent_active,
"agent_disconnected": agent_disconnected,
"agent_pending": agent_pending,
"expected_min_agent_count": expected_min_agent_count,
"agent_registry_empty_count": 1 if agent_registry_empty else 0,
"agent_below_expected_minimum_count": 1 if agent_below_expected else 0,
"agent_visibility_no_false_green_count": 1,
"wazuh_manager_query_accepted_count": 0,
"wazuh_event_accepted_count": 0,
"host_forensics_accepted_count": 0,
"active_response_authorized_count": 0,
"host_write_authorized_count": 0,
"runtime_gate_count": 0,
},
"agents": [_redacted_agent(agent, index) for index, agent in enumerate(affected_items[:20])],
"boundaries": _boundaries(),
},
)
@router.get("/api/iwooos/wazuh")
async def get_iwooos_wazuh_readonly_status_compat() -> JSONResponse:
return await _wazuh_readonly_status()
@router.get("/api/v1/iwooos/wazuh")
async def get_iwooos_wazuh_readonly_status_v1() -> JSONResponse:
return await _wazuh_readonly_status()
@router.get(
"/api/v1/iwooos/runtime-security-readback",
response_model=dict[str, Any],
summary="取得 IwoooS runtime security readback",
description=(
"讀取最新已提交的 IwoooS 資安只讀快照,彙總 Wazuh、Kali、SOC/SIEM、"
"告警可讀性、owner dispatch 與外部入侵防護 Gate。此端點不呼叫 Wazuh / Kali / "
"主機 / Docker / Nginx / firewall / Telegram不收集 secret不授權 runtime 寫入。"
),
)
async def get_iwooos_runtime_security_readback() -> dict[str, Any]:
"""回傳 IwoooS 資安 runtime readback 只讀總板。"""
try:
payload = await asyncio.to_thread(load_latest_iwooos_runtime_security_readback)
return redact_public_lan_topology(payload)
except FileNotFoundError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=str(exc),
) from exc
except (json.JSONDecodeError, ValueError) as exc:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"IwoooS runtime security readback 無效:{exc}",
) from exc

View File

@@ -27,23 +27,6 @@ router = APIRouter(prefix="/monitoring", tags=["Monitoring"])
TIMEOUT = 3.0
PUBLIC_TOOL_URLS = {
"Sentry": "https://sentry.wooo.work",
"Langfuse": "https://langfuse.wooo.work",
"SigNoz": "https://signoz.wooo.work",
"Gitea": "https://gitea.wooo.work",
}
def public_monitoring_tool_payload(tool: dict) -> dict:
"""Drop internal probe URLs before returning tool status to browsers."""
payload = dict(tool)
payload.pop("url", None)
public_url = PUBLIC_TOOL_URLS.get(str(payload.get("name") or ""))
if public_url:
payload["url"] = public_url
return payload
# =============================================================================
# Probes
@@ -56,16 +39,15 @@ async def _probe_grafana(client: httpx.AsyncClient) -> dict:
if r.status_code == 200:
data = r.json()
version = data.get("version")
dash_count = None
grafana_api_key = settings.GRAFANA_API_KEY.strip()
if grafana_api_key and grafana_api_key != "CHANGE_ME":
dash_r = await client.get(
f"{base}/api/search?type=dash-db",
headers={"Authorization": f"Bearer {grafana_api_key}"},
timeout=TIMEOUT,
)
if dash_r.status_code == 200 and isinstance(dash_r.json(), list):
dash_count = len(dash_r.json())
# Dashboard count requires basic auth (internal probe only)
import base64 as _b64
_token = _b64.b64encode(b"admin:WoooTech2026").decode()
dash_r = await client.get(
f"{base}/api/search?type=dash-db",
headers={"Authorization": f"Basic {_token}"},
timeout=TIMEOUT,
)
dash_count = len(dash_r.json()) if dash_r.status_code == 200 and isinstance(dash_r.json(), list) else None
return {
"name": "Grafana",
"status": "up",
@@ -260,7 +242,7 @@ async def get_monitoring_status() -> dict:
if isinstance(r, Exception):
logger.error("monitoring_probe_exception", error=str(r))
continue
tools.append({**public_monitoring_tool_payload(r), "checked_at": now})
tools.append({**r, "checked_at": now})
return {
"tools": tools,

View File

@@ -9,21 +9,14 @@ ADR-106/ADR-107/ADR-114/ADR-115/ADR-116
from fastapi import APIRouter
from src.api.v1.platform.contracts import router as contracts_router
from src.api.v1.platform.events import router as events_router
from src.api.v1.platform.operator_runs import router as operator_runs_router
from src.api.v1.platform.runs import router as runs_router
from src.api.v1.platform.tenants import router as tenants_router
from src.api.v1.platform.truth_chain import router as truth_chain_router
router = APIRouter()
router.include_router(events_router)
router.include_router(truth_chain_router)
# 2026-05-06 Codex: FastAPI 依註冊順序比對路由。Operator Console 的
# `/runs/list` 必須排在 `/runs/{run_id}` 前面,否則 `list` 會被當成
# run_id造成前端 Run 監控頁 HTTP 422。
router.include_router(operator_runs_router)
router.include_router(runs_router)
router.include_router(tenants_router)
router.include_router(contracts_router)
router.include_router(operator_runs_router)
__all__ = ["router"]

View File

@@ -1,620 +0,0 @@
"""
AwoooP Operator Console — Channel Events API
============================================
提供 Operator Console 讀取 Communication Hub / legacy mirror 的事件摘要。
"""
from __future__ import annotations
from datetime import UTC, datetime
from typing import Annotated, Any, Literal
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from src.core.awooop_operator_auth import (
AwoooPOperatorPrincipal,
verify_awooop_operator,
)
from src.core.context import clear_project_context, get_current_project_context, set_project_context
from src.services.channel_event_dossier_service import (
RecurrenceWorkItemHandoffKind,
RecurrenceWorkItemMode,
RecurrenceWorkItemNotFoundError,
SourceCorrelationReviewDecision,
fetch_channel_event_dossier,
fetch_channel_event_dossier_coverage,
fetch_channel_event_dossier_recurrence,
fetch_recurrence_work_item_dry_run,
fetch_recurrence_work_item_handoff,
fetch_recurrence_work_item_preview,
fetch_source_correlation_apply,
fetch_source_correlation_review_decision,
)
from src.services.channel_hub import record_external_alert_event
from src.services.platform_operator_service import list_recent_channel_events
router = APIRouter()
class _BodyProjectContext:
"""Temporarily promote body project_id into the request project context."""
def __init__(self, project_id: str | None) -> None:
self._project_id = project_id.strip() if project_id else None
self._tokens = None
def __enter__(self) -> None:
if not self._project_id:
return
current = get_current_project_context()
self._tokens = set_project_context(
project_id=self._project_id,
source="request.body",
request_id=current.get("request_id"),
)
def __exit__(self, exc_type, exc, tb) -> None:
if self._tokens is not None:
clear_project_context(self._tokens)
class ChannelEventItem(BaseModel):
event_id: UUID
project_id: str
channel_type: str
provider_event_id: str
channel_chat_id: str | None
run_id: UUID | None = None
content_type: str | None = None
content_preview: str | None
is_duplicate: bool
received_at: datetime
source_summary: dict[str, Any] = Field(default_factory=dict)
class RecentEventsResponse(BaseModel):
events: list[ChannelEventItem]
total: int
limit: int
class ChannelEventDossierItem(BaseModel):
event_id: UUID
project_id: str
channel_type: str
provider: str | None
stage: str
provider_event_id: str
content_preview: str | None
content_redacted: str | None
has_redacted_content: bool
redaction_version: str | None
source_url: str | None
content_sha256: str | None
content_length: int | None
source_refs: dict[str, Any]
source_ref_count: int
log_correlation: dict[str, Any]
alertname: str | None
severity: str | None
namespace: str | None
target_resource: str | None
fingerprint: str | None
is_duplicate: bool
provider_ts: datetime | None
received_at: datetime
class ChannelEventDossierSummary(BaseModel):
source_count: int
duplicate_total: int
redacted_total: int
source_ref_total: int
class ChannelEventDossierResponse(BaseModel):
events: list[ChannelEventDossierItem]
total: int
limit: int
summary: ChannelEventDossierSummary
class ChannelEventProviderCoverage(BaseModel):
provider: str
total: int
duplicate_total: int
redacted_total: int
source_ref_total: int
missing_source_refs_total: int
sentry_ref_total: int
signoz_ref_total: int
alert_ref_total: int
latest_received_at: datetime | None
class ChannelEventDossierCoverageSummary(BaseModel):
source_count: int
source_envelope_total: int
missing_source_envelope_total: int
with_source_refs_total: int
missing_source_refs_total: int
duplicate_total: int
redacted_total: int
source_ref_total: int
sentry_ref_total: int
signoz_ref_total: int
alert_ref_total: int
latest_received_at: datetime | None
class ChannelEventDossierCoverageResponse(BaseModel):
project_id: str
limit: int
summary: ChannelEventDossierCoverageSummary
providers: list[ChannelEventProviderCoverage]
SourceProviderName = Literal["sentry", "signoz"]
class SourceProviderHeartbeatRequest(BaseModel):
"""Low-noise freshness heartbeat for external source-provider mirrors."""
project_id: str = Field(default="awoooi", min_length=1, max_length=64)
providers: list[SourceProviderName] = Field(
default_factory=lambda: ["sentry", "signoz"],
min_length=1,
max_length=2,
)
reason: str = Field(
default="scheduled_provider_freshness_smoke",
min_length=1,
max_length=120,
)
run_ref: str | None = Field(default=None, max_length=120)
class SourceProviderHeartbeatItem(BaseModel):
provider: SourceProviderName
event_id: str
conversation_event_id: UUID
class SourceProviderHeartbeatResponse(BaseModel):
status: str
project_id: str
items: list[SourceProviderHeartbeatItem]
class ChannelEventRecurrenceSummary(BaseModel):
source_event_total: int
recurrence_group_total: int
recurrent_group_total: int
duplicate_event_total: int
linked_run_total: int
unlinked_event_total: int
auto_repair_linked_total: int = 0
verified_repair_group_total: int = 0
open_work_item_group_total: int = 0
manual_gate_group_total: int = 0
automation_gap_group_total: int = 0
failed_repair_group_total: int = 0
source_correlation_review_group_total: int = 0
source_correlation_decision_recorded_group_total: int = 0
source_correlation_applied_group_total: int = 0
latest_received_at: datetime | None
class ChannelEventRecurrenceItem(BaseModel):
recurrence_key: str
provider: str | None
alertname: str | None
severity: str | None
namespace: str | None
target_resource: str | None
fingerprint: str | None
latest_stage: str | None = None
latest_event_id: UUID | None
latest_provider_event_id: str | None
latest_content_preview: str | None
latest_run_id: UUID | None
latest_run_state: str | None
latest_agent_id: str | None
latest_incident_id: str | None = None
incident_ids: list[str] = Field(default_factory=list)
repair_summary: dict[str, Any] | None = None
work_item: dict[str, Any] | None = None
source_correlation_review: dict[str, Any] | None = None
source_correlation_apply: dict[str, Any] | None = None
occurrence_total: int
duplicate_total: int
linked_run_total: int
source_ref_total: int
missing_source_refs_total: int
sentry_ref_total: int
signoz_ref_total: int
alert_ref_total: int
stage_counts: dict[str, int] = Field(default_factory=dict)
run_state_counts: dict[str, int]
first_received_at: datetime | None
latest_received_at: datetime | None
class ChannelEventRecurrenceResponse(BaseModel):
project_id: str
limit: int
summary: ChannelEventRecurrenceSummary
items: list[ChannelEventRecurrenceItem]
class RecurrenceWorkItemDryRunRequest(BaseModel):
"""AwoooP recurrence work item dry-run request."""
project_id: str | None = Field(default=None, min_length=1)
work_item_id: str = Field(min_length=1)
mode: RecurrenceWorkItemMode = "auto"
provider: str | None = Field(default=None, min_length=1)
limit: int = Field(default=300, ge=1, le=300)
class RecurrenceWorkItemHandoffRequest(BaseModel):
"""AwoooP recurrence work item handoff request."""
project_id: str | None = Field(default=None, min_length=1)
work_item_id: str = Field(min_length=1)
mode: RecurrenceWorkItemMode = "auto"
handoff_kind: RecurrenceWorkItemHandoffKind = "ticket_proposal"
provider: str | None = Field(default=None, min_length=1)
limit: int = Field(default=300, ge=1, le=300)
class SourceCorrelationReviewDecisionRequest(BaseModel):
"""Record-only source evidence review decision."""
project_id: str | None = Field(default=None, min_length=1)
work_item_id: str = Field(min_length=1)
decision: SourceCorrelationReviewDecision
target_incident_id: str | None = Field(default=None, min_length=1, max_length=30)
reviewer_id: str = Field(default="operator_console", min_length=1, max_length=100)
operator_note: str | None = Field(default=None, max_length=500)
provider: str | None = Field(default=None, min_length=1)
limit: int = Field(default=300, ge=1, le=300)
class SourceCorrelationApplyRequest(BaseModel):
"""Append-only source evidence link apply request."""
project_id: str | None = Field(default=None, min_length=1)
work_item_id: str = Field(min_length=1)
reviewer_id: str = Field(default="operator_console", min_length=1, max_length=100)
operator_note: str | None = Field(default=None, max_length=500)
provider: str | None = Field(default=None, min_length=1)
limit: int = Field(default=300, ge=1, le=300)
@router.get(
"/events/dossier",
response_model=ChannelEventDossierResponse,
summary="查詢 Channel Event 來源卷宗",
description=(
"返回 redacted inbound source envelope供 AwoooP Run Detail 顯示"
"告警來源、source refs、Sentry / SignOz / Alertmanager 關聯與去重狀態。"
),
)
async def get_event_dossier(
project_id: str | None = Query(None, description="租戶 ID可選"),
run_id: Annotated[
UUID | None,
Query(description="Run ID可選"),
] = None,
provider_event_id: str | None = Query(
None, description="provider_event_id可選"
),
limit: int = Query(20, ge=1, le=50, description="最多返回筆數"),
) -> dict[str, Any]:
return await fetch_channel_event_dossier(
project_id=project_id,
run_id=run_id,
provider_event_id=provider_event_id,
limit=limit,
)
@router.get(
"/events/dossier/coverage",
response_model=ChannelEventDossierCoverageResponse,
summary="查詢 Channel Event 來源卷宗覆蓋率",
description=(
"返回近期 inbound event 的 source_envelope / source_refs / 去重 / "
"Sentry / SignOz 關聯覆蓋率,供 AwoooP Run List 顯示告警是否已入庫。"
),
)
async def get_event_dossier_coverage(
project_id: str | None = Query(None, description="租戶 ID可選"),
provider: str | None = Query(
None, description="provider可選如 sentry / signoz"
),
limit: int = Query(100, ge=1, le=200, description="最多納入統計筆數"),
) -> dict[str, Any]:
return await fetch_channel_event_dossier_coverage(
project_id=project_id,
provider=provider,
limit=limit,
)
@router.post(
"/events/dossier/provider-heartbeat",
response_model=SourceProviderHeartbeatResponse,
summary="寫入 Sentry / SignOz 來源卷宗 freshness heartbeat",
description=(
"受 AwoooP operator key 保護的低噪音 smoke。只寫入來源卷宗與"
"completed shadow run不建立 Incident、不送 Telegram、不宣稱真實上游告警。"
),
)
async def create_source_provider_heartbeat(
payload: SourceProviderHeartbeatRequest,
operator: Annotated[
AwoooPOperatorPrincipal,
Depends(verify_awooop_operator),
],
) -> dict[str, Any]:
timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
items: list[dict[str, Any]] = []
for provider in payload.providers:
event_id = f"heartbeat-{timestamp}"
event_uuid = await record_external_alert_event(
project_id=payload.project_id,
provider=provider,
event_id=event_id,
stage="heartbeat",
title="SourceProviderHeartbeat",
severity="info",
namespace="awoooi-prod",
target_resource="source-provider-ingestion",
fingerprint=f"source-provider-heartbeat:{provider}",
labels={
"provider": provider,
"synthetic": "true",
"alert_category": "alertchain_provider_freshness",
"telegram": "not_sent",
"incident": "not_created",
},
annotations={
"summary": (
"Low-noise provider freshness smoke; verifies AwoooP "
"source dossier ingestion without creating an incident."
),
"reason": payload.reason,
},
payload={
"reason": payload.reason,
"run_ref": payload.run_ref,
"operator_id": operator.operator_id,
"auth_method": operator.auth_method,
"synthetic": True,
"side_effects": {
"incident_created": False,
"telegram_sent": False,
"approval_created": False,
},
},
)
if event_uuid is None:
raise HTTPException(
status_code=500,
detail=f"{provider} provider heartbeat was not recorded",
)
items.append(
{
"provider": provider,
"event_id": event_id,
"conversation_event_id": event_uuid,
}
)
return {
"status": "recorded",
"project_id": payload.project_id,
"items": items,
}
@router.get(
"/events/dossier/recurrence",
response_model=ChannelEventRecurrenceResponse,
summary="查詢 Channel Event 重複發生與關聯 Run 狀態",
description=(
"將近期 inbound source events 依 fingerprint / alertname / namespace / target 分組,"
"顯示重複發生次數、去重數、source refs 與最新 linked run 狀態。"
),
)
async def get_event_dossier_recurrence(
project_id: str | None = Query(None, description="租戶 ID可選"),
provider: str | None = Query(
None, description="provider可選如 alertmanager / sentry / signoz"
),
limit: int = Query(100, ge=1, le=300, description="最多納入統計筆數"),
) -> dict[str, Any]:
return await fetch_channel_event_dossier_recurrence(
project_id=project_id,
provider=provider,
limit=limit,
)
@router.get(
"/events/dossier/recurrence/work-item/preview",
summary="預覽重複告警工作項的安全處理計畫",
description=(
"依 recurrence read model 找出指定 work_item返回下一步、pre-flight checks "
"與 read-only / no-write 保證;不修改 incident、auto-repair 或 ticket 狀態。"
),
)
async def preview_event_recurrence_work_item(
work_item_id: str = Query(..., min_length=1, description="recurrence work_item_id"),
project_id: str | None = Query(None, description="租戶 ID可選"),
provider: str | None = Query(
None, description="provider可選如 alertmanager / sentry / signoz"
),
mode: Annotated[
RecurrenceWorkItemMode,
Query(description="預覽模式"),
] = "auto",
limit: int = Query(300, ge=1, le=300, description="最多納入統計筆數"),
) -> dict[str, Any]:
try:
return await fetch_recurrence_work_item_preview(
project_id=project_id,
work_item_id=work_item_id,
mode=mode,
provider=provider,
limit=limit,
)
except RecurrenceWorkItemNotFoundError as exc:
raise HTTPException(
status_code=404,
detail="recurrence_work_item_not_found",
) from exc
@router.post(
"/events/dossier/recurrence/work-item/dry-run",
summary="乾跑重複告警工作項的安全處理流程",
description=(
"依 recurrence read model 產生 dry-run 結果並寫入 pre-flight history"
"但不修改 incident、auto-repair 或 ticket 狀態。"
),
)
async def dry_run_event_recurrence_work_item(
request: RecurrenceWorkItemDryRunRequest,
) -> dict[str, Any]:
try:
return await fetch_recurrence_work_item_dry_run(
project_id=request.project_id,
work_item_id=request.work_item_id,
mode=request.mode,
provider=request.provider,
limit=request.limit,
)
except RecurrenceWorkItemNotFoundError as exc:
raise HTTPException(
status_code=404,
detail="recurrence_work_item_not_found",
) from exc
@router.post(
"/events/dossier/recurrence/work-item/handoff",
summary="記錄重複告警工作項的交接提案",
description=(
"依 recurrence read model 與 dry-run 結果記錄 ticket proposal / 人工接手歷史,"
"但不修改 incident、auto-repair 或外部 ticket 狀態。"
),
)
async def handoff_event_recurrence_work_item(
request: RecurrenceWorkItemHandoffRequest,
) -> dict[str, Any]:
try:
return await fetch_recurrence_work_item_handoff(
project_id=request.project_id,
work_item_id=request.work_item_id,
mode=request.mode,
handoff_kind=request.handoff_kind,
provider=request.provider,
limit=request.limit,
)
except RecurrenceWorkItemNotFoundError as exc:
raise HTTPException(
status_code=404,
detail="recurrence_work_item_not_found",
) from exc
@router.post(
"/events/dossier/recurrence/source-correlation/review",
summary="記錄來源證據與 Incident 配對審核結果",
description=(
"針對 source_correlation_review work item 記錄 operator 審核決定。"
"本 API 僅寫入 alert_operation_log / 可選 timeline_events"
"不修改 Incident 狀態、不回寫 source event、不建立外部 ticket。"
),
)
async def review_source_correlation_work_item(
request: SourceCorrelationReviewDecisionRequest,
) -> dict[str, Any]:
try:
with _BodyProjectContext(request.project_id):
return await fetch_source_correlation_review_decision(
project_id=request.project_id,
work_item_id=request.work_item_id,
decision=request.decision,
target_incident_id=request.target_incident_id,
reviewer_id=request.reviewer_id,
operator_note=request.operator_note,
provider=request.provider,
limit=request.limit,
)
except RecurrenceWorkItemNotFoundError as exc:
raise HTTPException(
status_code=404,
detail="recurrence_work_item_not_found",
) from exc
@router.post(
"/events/dossier/recurrence/source-correlation/apply",
summary="套用已確認的來源證據與 Incident 配對",
description=(
"只接受已寫入 accepted review 的 source_correlation_review work item。"
"成功時以 append-only 方式新增 source_correlation_linked 來源事件,"
"並寫入 alert_operation_log / timeline_events。"
"不修改 Incident 狀態、不修改 auto-repair 結果、不建立外部 ticket。"
),
)
async def apply_source_correlation_work_item(
request: SourceCorrelationApplyRequest,
) -> dict[str, Any]:
try:
with _BodyProjectContext(request.project_id):
return await fetch_source_correlation_apply(
project_id=request.project_id,
work_item_id=request.work_item_id,
reviewer_id=request.reviewer_id,
operator_note=request.operator_note,
provider=request.provider,
limit=request.limit,
)
except RecurrenceWorkItemNotFoundError as exc:
raise HTTPException(
status_code=404,
detail="recurrence_work_item_not_found",
) from exc
@router.get(
"/events/recent",
response_model=RecentEventsResponse,
summary="列出最近 Channel Events",
description=(
"返回 awooop_conversation_event 最近事件。"
"可用 channel_type / provider_prefix 過濾,例如 alert-group 收斂事件。"
),
)
async def list_recent_events(
project_id: str | None = Query(None, description="租戶 ID可選"),
channel_type: str | None = Query(None, description="通道類型(可選)"),
provider_prefix: str | None = Query(
None, description="provider_event_id 前綴(可選)"
),
limit: int = Query(20, ge=1, le=100, description="最多返回筆數"),
) -> dict[str, Any]:
return await list_recent_channel_events(
project_id=project_id,
channel_type=channel_type,
provider_prefix=provider_prefix,
limit=limit,
)

View File

@@ -15,38 +15,12 @@ from decimal import Decimal
from typing import Any, Literal
from uuid import UUID
from fastapi import APIRouter, Depends, Query
from fastapi import APIRouter, Query
from pydantic import BaseModel, Field
from src.core.awooop_operator_auth import (
AwoooPOperatorPrincipal,
verify_awooop_operator,
)
from src.services.platform_operator_service import (
decide_approval as decide_approval_svc,
)
from src.services.platform_operator_service import (
get_ai_route_status as get_ai_route_status_svc,
)
from src.services.platform_operator_service import (
get_awooop_status_chain as get_awooop_status_chain_svc,
)
from src.services.platform_operator_service import (
get_run_detail as get_run_detail_svc,
)
from src.services.platform_operator_service import (
list_cicd_events as list_cicd_events_svc,
)
from src.services.platform_operator_service import (
list_approvals as list_approvals_svc,
)
from src.services.platform_operator_service import (
list_callback_replies as list_callback_replies_svc,
)
from src.services.platform_operator_service import (
list_ai_alert_card_delivery_readback as list_ai_alert_card_delivery_readback_svc,
)
from src.services.platform_operator_service import (
list_runs as list_runs_svc,
)
@@ -66,8 +40,6 @@ class RunItem(BaseModel):
step_count: int
created_at: datetime
timeout_at: datetime | None
remediation_summary: dict[str, Any] | None = None
callback_reply_summary: dict[str, Any] | None = None
class ListRunsResponse(BaseModel):
@@ -77,223 +49,12 @@ class ListRunsResponse(BaseModel):
per_page: int
class OperatorSummaryCacheInfo(BaseModel):
schema_version: str = "operator_summary_cache_v1"
status: str
source: str
ttl_seconds: int
age_seconds: float = 0.0
stored_at: datetime
expires_at: datetime
class CallbackReplyItem(BaseModel):
message_id: UUID
run_id: UUID
project_id: str
status: str
needs_human: bool
action: str | None = None
incident_id: str | None = None
event_at: datetime | None = None
channel_type: str
message_type: str
send_status: str
send_error: str | None = None
provider_message_id: str | None = None
triggered_by_state: str | None = None
content_preview: str | None = None
run_state: str | None = None
agent_id: str | None = None
run_created_at: datetime | None = None
callback_reply: dict[str, Any]
awooop_status_chain: dict[str, Any] | None = None
persisted_awooop_status_chain: dict[str, Any] | None = None
km_stale_completion_summary: dict[str, Any] | None = None
persisted_km_stale_completion_summary: dict[str, Any] | None = None
evidence_capture_status: dict[str, Any] | None = None
run_detail_href: str | None = None
class AiAlertCardDeliveryItem(BaseModel):
message_id: UUID
run_id: UUID
project_id: str
event_at: datetime | None = None
channel_type: str
message_type: str
send_status: str
send_error: str | None = None
provider_message_id: str | None = None
triggered_by_state: str | None = None
event_type: str
lane: str
target: str
gates: list[str]
runtime_write_gate_count: int
runtime_write_allowed: bool
candidate_only: bool
delivery_receipt_readback_required: bool
source_refs: dict[str, Any]
run_state: str | None = None
agent_id: str | None = None
run_created_at: datetime | None = None
run_detail_href: str | None = None
class AiAlertCardDeliverySummary(BaseModel):
schema_version: str
project_id: str
event_type: str | None = None
lane: str | None = None
status: str
total: int
sent_total: int
failed_total: int
pending_total: int
shadow_total: int
delivery_receipt_required_total: int
runtime_write_gate_open_count: int
runtime_write_allowed: bool
latest_sent_at: datetime | None = None
latest_queued_at: datetime | None = None
production_write_count: int = 0
class ListAiAlertCardsResponse(BaseModel):
items: list[AiAlertCardDeliveryItem]
total: int
page: int
per_page: int
summary: AiAlertCardDeliverySummary
class OutboundReplyMarkupGapPrefix(BaseModel):
prefix: str
total: int
recent_24h_total: int = 0
first_sent_at: datetime | None = None
last_sent_at: datetime | None = None
class CallbackReplyAuditSummary(BaseModel):
schema_version: str
project_id: str
outbound_total: int
outbound_source_envelope_total: int
outbound_source_refs_total: int
outbound_trace_ref_total: int = 0
outbound_incident_ref_total: int
outbound_reply_markup_total: int = 0
outbound_reply_markup_missing_incident_ref_total: int = 0
outbound_reply_markup_missing_incident_ref_recent_1h_total: int = 0
outbound_reply_markup_missing_incident_ref_recent_24h_total: int = 0
outbound_reply_markup_missing_incident_ref_latest_sent_at: datetime | None = None
outbound_reply_markup_missing_trace_ref_total: int = 0
outbound_reply_markup_missing_trace_ref_recent_1h_total: int = 0
outbound_reply_markup_missing_trace_ref_recent_24h_total: int = 0
outbound_reply_markup_missing_trace_ref_latest_sent_at: datetime | None = None
outbound_reply_markup_trace_ref_gap_status: str = "clean"
outbound_reply_markup_trace_ref_gap_next_action: str = "none"
outbound_reply_markup_trace_ref_after_gap_total: int = 0
outbound_reply_markup_trace_ref_after_gap_first_sent_at: datetime | None = None
outbound_reply_markup_trace_ref_after_gap_latest_sent_at: datetime | None = None
outbound_reply_markup_trace_ref_gap_recovery_status: str = "not_needed"
outbound_reply_markup_missing_incident_ref_top_prefixes: list[
OutboundReplyMarkupGapPrefix
] = Field(default_factory=list)
outbound_reply_markup_missing_trace_ref_top_prefixes: list[
OutboundReplyMarkupGapPrefix
] = Field(default_factory=list)
outbound_failed_total: int
callback_total: int
callback_sent_total: int
callback_fallback_total: int
callback_rescue_total: int
callback_failed_total: int
callback_detail_total: int
callback_history_total: int
callback_snapshot_captured_total: int
callback_snapshot_partial_total: int
callback_snapshot_missing_total: int
callback_incident_total: int
inbound_callback_total: int = 0
inbound_callback_recent_24h_total: int = 0
inbound_callback_latest_at: datetime | None = None
inbound_callback_mirror_status: str = "no_callback_observed"
inbound_callback_next_action: str = "press_any_telegram_callback_after_rollout"
snapshot_status: str
next_action: str
latest_outbound_at: datetime | None = None
latest_callback_at: datetime | None = None
class ListCallbackRepliesResponse(BaseModel):
items: list[CallbackReplyItem]
total: int
page: int
per_page: int
summary: CallbackReplyAuditSummary | None = None
cache: OperatorSummaryCacheInfo | None = None
class CicdEventItem(BaseModel):
id: str
project_id: str
alertname: str
stage: str | None = None
status: str | None = None
severity: str | None = None
commit_sha: str | None = None
triggered_by: str | None = None
duration_seconds: int = 0
summary: str | None = None
description: str | None = None
workflow_url: str | None = None
alert_id: str | None = None
source: str | None = None
action_detail: str | None = None
needs_attention: bool = False
created_at: datetime
class ListCicdEventsResponse(BaseModel):
items: list[CicdEventItem]
total: int
limit: int
class AiRouteStatusResponse(BaseModel):
schema_version: str
workload_type: str
policy_order: list[dict[str, Any]]
selected_provider: str | None = None
selected_url: str | None = None
selected_model: str | None = None
fallback_chain: list[dict[str, Any]]
route_reason: str
route_source: str
route_error: str | None = None
health: dict[str, dict[str, Any]]
lane_mode: str | None = None
active_lane: dict[str, Any] | None = None
skipped_lanes: list[dict[str, Any]] = Field(default_factory=list)
operator_action: dict[str, Any] | None = None
repair_evidence: dict[str, Any] | None = None
checked_at: datetime
class ApprovalItem(BaseModel):
run_id: UUID
project_id: str
agent_id: str
trigger_type: str | None = None
trigger_ref: str | None = None
is_shadow: bool | None = None
created_at: datetime
timeout_at: datetime | None
remediation_summary: dict[str, Any] | None = None
awooop_status_chain: dict[str, Any] | None = None
class ListApprovalsResponse(BaseModel):
@@ -304,10 +65,7 @@ class ListApprovalsResponse(BaseModel):
class DecideApprovalRequest(BaseModel):
project_id: str = Field(..., description="租戶 ID")
decision: Literal["approve", "reject"] = Field(..., description="核准或拒絕")
approver_id: str | None = Field(
default=None,
description="Deprecated. Ignored; approver comes from trusted operator headers.",
)
approver_id: str = Field(..., description="審核人 IDplatform_subject_id 或 operator email")
reason: str | None = Field(None, description="決策原因(可選)")
@@ -323,8 +81,7 @@ class DecideApprovalResponse(BaseModel):
response_model=ListRunsResponse,
summary="列出 Runs",
description=(
"返回 awooop_run_state 記錄,支援 project_id / state / remediation_status / "
"callback_reply_status / incident_id filter 與分頁。\n\n"
"返回 awooop_run_state 記錄,支援 project_id / state filter 與分頁。\n\n"
"- 按 created_at DESC 排序\n"
"- 注意:此路徑為 /runs/list 以避免與 runs.py 的 /runs/{run_id} 衝突"
),
@@ -332,162 +89,11 @@ class DecideApprovalResponse(BaseModel):
async def list_runs(
project_id: str | None = Query(None, description="租戶 ID可選"),
state: str | None = Query(None, description="Run 狀態 filter可選"),
remediation_status: str | None = Query(
None,
description="AI 證據狀態 filterno_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed",
),
callback_reply_status: str | None = Query(
None,
description="Telegram callback reply 狀態 filterno_callback/sent/fallback_sent/rescue_sent/failed/observed",
),
incident_id: str | None = Query(None, description="關聯 Incident ID filter可選"),
page: int = Query(1, ge=1, description="頁碼,從 1 開始"),
per_page: int = Query(_DEFAULT_PER_PAGE, ge=1, le=_MAX_PER_PAGE, description="每頁筆數"),
) -> dict[str, Any]:
return await list_runs_svc(
project_id=project_id,
state=state,
remediation_status=remediation_status,
callback_reply_status=callback_reply_status,
incident_id=incident_id,
page=page,
per_page=per_page,
)
@router.get(
"/runs/callback-replies",
response_model=ListCallbackRepliesResponse,
summary="列出 Telegram Callback Reply Evidence",
description=(
"從 AwoooP outbound mirror 查詢 Telegram 詳情 / 歷史 callback reply 的"
"送達、fallback、救援與失敗證據只讀不修改 incident、run 或 Telegram 狀態。"
),
)
async def list_callback_replies(
project_id: str | None = Query(None, description="租戶 ID可選"),
callback_reply_status: str | None = Query(
None,
description="Telegram callback reply 狀態 filtersent/fallback_sent/rescue_sent/failed/observed/no_callback",
),
action: str | None = Query(None, description="Callback action filter例如 detail/history"),
incident_id: str | None = Query(None, description="關聯 Incident ID filter可選"),
page: int = Query(1, ge=1, description="頁碼,從 1 開始"),
per_page: int = Query(20, ge=1, le=_MAX_PER_PAGE, description="每頁筆數"),
refresh: bool = Query(False, description="略過短 TTL 快取並重新聚合"),
) -> dict[str, Any]:
return await list_callback_replies_svc(
project_id=project_id,
callback_reply_status=callback_reply_status,
action=action,
incident_id=incident_id,
page=page,
per_page=per_page,
refresh=refresh,
)
@router.get(
"/runs/ai-alert-cards",
response_model=ListAiAlertCardsResponse,
summary="列出 AI 自動化事件卡送達讀回",
description=(
"從 AwoooP outbound mirror 查詢 ai_automation_alert_card_v1 的"
"結構化送達讀回;只讀,不送 Telegram、不修改 incident、run 或 Wazuh 狀態。"
),
)
async def list_ai_alert_card_delivery_readback(
project_id: str | None = Query("awoooi", description="租戶 ID"),
event_type: str | None = Query(None, description="事件類型 filter"),
lane: str | None = Query(None, description="AIOps lane filter"),
page: int = Query(1, ge=1, description="頁碼,從 1 開始"),
per_page: int = Query(20, ge=1, le=_MAX_PER_PAGE, description="每頁筆數"),
refresh: bool = Query(False, description="略過短 TTL 快取並重新聚合"),
) -> dict[str, Any]:
return await list_ai_alert_card_delivery_readback_svc(
project_id=project_id,
event_type=event_type,
lane=lane,
page=page,
per_page=per_page,
refresh=refresh,
)
@router.get(
"/cicd/events",
response_model=ListCicdEventsResponse,
summary="列出 CI/CD evidence events",
description=(
"從 alert_operation_log 讀取 CI/CD notification evidence供 AwoooP "
"Deployments / Run Console 顯示 rollout-risk、success、failed 等階段狀態。"
),
)
async def list_cicd_events(
project_id: str | None = Query(None, description="租戶 ID目前支援 awoooi"),
stage: str | None = Query(None, description="CI/CD stage filter可選"),
status: str | None = Query(None, description="CI/CD status filterrunning/success/failed/pending"),
limit: int = Query(12, ge=1, le=50, description="最多返回筆數"),
) -> dict[str, Any]:
return await list_cicd_events_svc(
project_id=project_id,
stage=stage,
status_filter=status,
limit=limit,
)
@router.get(
"/ai-route-status",
response_model=AiRouteStatusResponse,
summary="查詢 AI Provider 路由狀態",
description=(
"回傳目前 Ollama/Gemini 路由策略、即時 primary、fallback chain 與健康狀態;"
"只讀,不觸發推理或自動修復。"
),
)
async def get_ai_route_status(
workload_type: str | None = Query(
"deep_rca",
description="工作負載類型,例如 deep_rca/hermes/interactive/embedding/rag/code_review/image_analysis",
),
) -> dict[str, Any]:
return await get_ai_route_status_svc(workload_type=workload_type)
@router.get(
"/runs/{run_id}/detail",
summary="查詢 Run 詳細時間線",
description=(
"返回單一 Run 的主狀態、Step Journal、MCP Gateway audit、"
"入站 Channel Event 與出站訊息,供 Operator Console 顯示完整處置脈絡。"
),
)
async def get_run_detail(
run_id: str,
project_id: str | None = Query(None, description="租戶 ID可選"),
) -> dict[str, Any]:
return await get_run_detail_svc(run_id=run_id, project_id=project_id)
@router.get(
"/status-chain",
summary="查詢 AwoooP 狀態鏈",
description=(
"依 incident_id 查詢 truth-chain + ADR-100 history 合併後的只讀狀態鏈,"
"供 Work Items、Approvals、Monitoring 等操作頁面共用。"
),
)
async def get_awooop_status_chain(
project_id: str | None = Query(None, description="租戶 ID可選"),
incident_id: list[str] | None = Query(
None,
description="Incident ID可重複傳入以合併同一工作項的多個事件",
),
) -> dict[str, Any]:
return await get_awooop_status_chain_svc(
project_id=project_id,
incident_ids=incident_id or [],
project_id=project_id, state=state, page=page, per_page=per_page
)
@@ -503,16 +109,8 @@ async def get_awooop_status_chain(
async def list_approvals(
project_id: str | None = Query(None, description="租戶 ID可選"),
run_id: str | None = Query(None, description="Run ID可選M8 詳情頁查單筆)"),
remediation_status: str | None = Query(
None,
description="AI 證據狀態 filterno_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed",
),
) -> dict[str, Any]:
return await list_approvals_svc(
project_id=project_id,
run_id=run_id,
remediation_status=remediation_status,
)
return await list_approvals_svc(project_id=project_id, run_id=run_id)
@router.post(
@@ -529,12 +127,11 @@ async def list_approvals(
async def decide_approval(
run_id: str,
body: DecideApprovalRequest,
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
) -> dict[str, Any]:
return await decide_approval_svc(
run_id=run_id,
project_id=body.project_id,
decision=body.decision,
approver_id=operator.operator_id,
approver_id=body.approver_id,
reason=body.reason,
)

View File

@@ -29,89 +29,9 @@ class TenantItem(BaseModel):
created_at: datetime
class TenantAssetSummary(BaseModel):
tenant_table_count: int
product_surface_count: int
public_route_count: int
public_gateway_snapshot_route_count: int
source_candidate_repo_count: int
source_in_scope_repo_count: int
source_primary_ready_count: int
owner_response_received_count: int
owner_response_accepted_count: int
runtime_gate_count: int
action_button_count: int
class TenantProductSurface(BaseModel):
product_id: str
product_name: str
project_id: str
category: str
surface_kind: str
owner_lane: str
coverage_status: str
public_routes: list[str]
source_keys: list[str]
public_route_count: int
source_repo_count: int
missing_public_routes: list[str]
owner_response_received_count: int
owner_response_accepted_count: int
runtime_gate_count: int
action_button_count: int
class TenantPublicRouteAsset(BaseModel):
domain: str
product_id: str
product_name: str
category: str
coverage_status: str
control_tier: str
upstream_count: int
admin_route_count: int
websocket_route_count: int
public_route_smoke_required: bool
route_smoke_accepted: bool
owner_response_accepted: bool
runtime_gate_count: int
action_button_count: int
source: str
class TenantSourceRepoAsset(BaseModel):
github_repo: str
source_key: str
source_scope_id: str
source_namespace_redacted: bool
product_id: str
product_name: str
category: str
scope_status: str
readiness_state: str
risk: str
primary_ready: bool
blocker_count: int
runtime_gate_count: int
action_button_count: int
class TenantAssetInventory(BaseModel):
schema_version: str
mode: str
evidence_refs: list[str]
summary: TenantAssetSummary
products: list[TenantProductSurface]
public_routes: list[TenantPublicRouteAsset]
source_repos: list[TenantSourceRepoAsset]
boundaries: list[str]
class ListTenantsResponse(BaseModel):
tenants: list[TenantItem]
total: int
asset_inventory: TenantAssetInventory
@router.get(

View File

@@ -1,81 +0,0 @@
"""AwoooP Operator Console — truth-chain read API."""
from __future__ import annotations
from time import perf_counter
from typing import Any
from fastapi import APIRouter, Depends, Query
from src.core.awooop_operator_auth import (
AwoooPOperatorPrincipal,
verify_awooop_operator,
)
from src.services.awooop_truth_chain_service import (
fetch_automation_quality_summary,
fetch_truth_chain,
record_quality_summary_observation,
)
router = APIRouter()
@router.get(
"/truth-chain/quality/summary",
summary="查詢 AI 自動化品質總覽",
description=(
"T12c read-only aggregate endpoint. 聚合最近 incident 的 automation quality gate"
"讓 Operator 不必逐張 Telegram 卡片判斷是否真正完成 AI 自動修復。"
"此總覽不回傳逐筆 examplessource-level truth-chain 詳情仍需 operator auth。"
),
)
async def get_automation_quality_summary(
project_id: str = Query("awoooi", description="租戶 ID"),
hours: int = Query(24, ge=1, le=168, description="回看小時數"),
limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"),
refresh: bool = Query(False, description="略過短 TTL 快取並重新聚合"),
) -> dict[str, Any]:
started_at = perf_counter()
try:
summary = await fetch_automation_quality_summary(
project_id=project_id,
hours=hours,
limit=limit,
refresh=refresh,
)
except Exception as exc:
record_quality_summary_observation(
project_id=project_id,
hours=hours,
limit=limit,
cache_status="error",
success=False,
duration_seconds=perf_counter() - started_at,
error=exc.__class__.__name__,
)
raise
summary["examples"] = []
summary["visibility_note"] = (
"Aggregate only. Use /truth-chain/{source_id} with operator auth for source-level details."
)
return summary
@router.get(
"/truth-chain/{source_id}",
summary="查詢 Telegram / Incident / Drift 真相鏈",
description=(
"T0 read-only endpoint. 聚合 incident、approval、evidence、MCP、"
"automation_operation_log、drift repeat state 與 outbound mirror"
"讓 Operator Console 能判斷 Telegram 卡片目前卡在哪個流程節點。"
),
)
async def get_truth_chain(
source_id: str,
project_id: str = Query("awoooi", description="租戶 ID"),
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
) -> dict[str, Any]:
# operator dependency intentionally gates this read API even though the
# principal is not otherwise needed by the aggregation query.
_ = operator
return await fetch_truth_chain(source_id=source_id, project_id=project_id)

View File

@@ -8,10 +8,9 @@ leWOOOgo 原則: Router 只做 HTTP 轉發,業務邏輯在 KnowledgeRAGService
建立者: Claude Code (Phase 33 ADR-067)
"""
from fastapi import APIRouter, BackgroundTasks
from fastapi import APIRouter, BackgroundTasks, HTTPException
from pydantic import BaseModel
from src.core.config import get_settings
from src.services.knowledge_rag_service import get_knowledge_rag_service
router = APIRouter(prefix="/rag", tags=["RAG Knowledge Base"])
@@ -44,10 +43,9 @@ async def trigger_index(background_tasks: BackgroundTasks) -> RagIndexResponse:
- .agents/skills/*.md
"""
background_tasks.add_task(_run_index)
model = get_settings().OLLAMA_EMBEDDING_MODEL
return RagIndexResponse(
status="accepted",
message=f"索引已排程,背景執行中({model} @ Ollama GCP-A/GCP-B/111",
message="索引已排程,背景執行中(nomic-embed-text @ Ollama 111",
)
@@ -64,7 +62,6 @@ async def rag_debug() -> dict:
"""診斷用:確認容器內 docs 路徑 + Ollama 連線"""
import os
from pathlib import Path
import httpx
paths_check = {}
@@ -79,27 +76,15 @@ async def rag_debug() -> dict:
try:
async with httpx.AsyncClient(timeout=10.0) as c:
from src.core.config import get_settings as _gs
from src.services.ollama_endpoint_resolver import resolve_ollama_order
settings = _gs()
statuses: list[str] = []
for endpoint in resolve_ollama_order("embedding"):
if not endpoint.url:
continue
r = await c.post(
f"{endpoint.url}/api/embeddings",
json={"model": settings.OLLAMA_EMBEDDING_MODEL, "prompt": "test"},
)
if r.status_code == 200:
ollama_ok = True
break
statuses.append(f"{endpoint.provider_name}=http_{r.status_code}")
if ollama_ok is not True:
ollama_ok = ", ".join(statuses) or "no_endpoint"
r = await c.post(
f"{_gs().OLLAMA_URL}/api/embeddings",
json={"model": "nomic-embed-text", "prompt": "test"},
)
ollama_ok = r.status_code == 200 if r.status_code == 200 else f"http_{r.status_code}"
except Exception as e:
ollama_ok = f"error: {type(e).__name__}: {e}"
return {"cwd": os.getcwd(), "paths": paths_check, "ollama_embedding": ollama_ok}
return {"cwd": os.getcwd(), "paths": paths_check, "ollama_111_embed": ollama_ok}
@router.get("/stats", summary="索引統計")

View File

@@ -14,15 +14,12 @@ AWOOOI API - Sentry Webhook Handler
🔴 HARD RULE: 時間顯示使用 Asia/Taipei (UTC+8)
"""
import json
import uuid
from typing import Any
import structlog
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
from pydantic import BaseModel
from src.core.awooop_operator_auth import authenticate_awooop_operator_headers
from src.core.circuit_breaker import get_openclaw_guard
from src.core.metrics import (
record_alert_chain_failure,
@@ -38,10 +35,8 @@ from src.models.approval import (
)
from src.services.anomaly_counter import get_anomaly_counter
from src.services.approval_db import get_approval_service
from src.services.channel_hub import record_external_alert_event
from src.services.openclaw_http_service import get_openclaw_http_service
from src.services.sentry_service import get_sentry_service
# 2026-04-27 P3.1-T2 by Claude — Tier-2 三服務感知強化:補 SentryWebhookService 簽章驗證
from src.services.sentry_webhook_service import (
SentrySignatureError,
@@ -92,114 +87,6 @@ async def sentry_webhook_health() -> dict:
return {"status": "ok", "webhook": "sentry"}
def _sentry_event_tag(event_data: dict[str, Any], key: str) -> str | None:
tags = event_data.get("tags") or []
for tag in tags:
if isinstance(tag, list | tuple) and len(tag) >= 2 and str(tag[0]) == key:
return str(tag[1])
if isinstance(tag, dict) and str(tag.get("key")) == key:
value = tag.get("value")
return str(value) if value is not None else None
return None
def _is_sentry_upstream_canary(payload: dict[str, Any]) -> bool:
data = payload.get("data") if isinstance(payload, dict) else None
if not isinstance(data, dict) or payload.get("action") != "triggered":
return False
issue_data = data.get("issue") if isinstance(data.get("issue"), dict) else {}
event_data = data.get("event") if isinstance(data.get("event"), dict) else {}
issue_id = str(issue_data.get("id") or "")
short_id = str(issue_data.get("shortId") or "")
title = str(issue_data.get("title") or "")
return (
issue_id.startswith("awoooi-canary-")
or short_id.upper().startswith("AWOOOI-CANARY")
or title == "AwoooPSourceProviderCanary"
or (_sentry_event_tag(event_data, "awoooi_canary") or "").lower() == "true"
)
async def _record_sentry_upstream_canary(
payload: dict[str, Any],
request: Request,
) -> dict[str, Any]:
operator = authenticate_awooop_operator_headers(
request.headers.get("x-awooop-operator-id"),
request.headers.get("x-awooop-operator-key"),
)
data = payload.get("data") if isinstance(payload.get("data"), dict) else {}
issue_data = data.get("issue") if isinstance(data.get("issue"), dict) else {}
event_data = data.get("event") if isinstance(data.get("event"), dict) else {}
issue_id = str(
issue_data.get("id")
or issue_data.get("shortId")
or _sentry_event_tag(event_data, "run_ref")
or "awoooi-canary-unknown"
)
source_url = (
issue_data.get("permalink")
or issue_data.get("web_url")
or issue_data.get("url")
)
event_uuid = await record_external_alert_event(
project_id="awoooi",
provider="sentry",
event_id=issue_id,
stage="upstream_canary",
title=str(issue_data.get("title") or "AwoooPSourceProviderCanary"),
severity=str(issue_data.get("level") or "info"),
namespace="awoooi-prod",
target_resource=str(issue_data.get("culprit") or "source-provider-ingestion"),
fingerprint=f"source-provider-canary:sentry:{issue_id}",
source_url=source_url,
labels={
"project": issue_data.get("project", {}),
"level": issue_data.get("level", "info"),
"awoooi_canary": "true",
"operator_id": operator.operator_id,
"telegram": "not_sent",
"incident": "not_created",
"approval": "not_created",
},
annotations={
"message": event_data.get("message"),
"summary": (
"Operator-signed Sentry webhook canary; records upstream "
"source evidence without creating incident, approval, or Telegram."
),
},
payload={
"raw_canary": payload,
"operator_id": operator.operator_id,
"auth_method": operator.auth_method,
"side_effects": {
"incident_created": False,
"approval_created": False,
"telegram_sent": False,
"openclaw_called": False,
},
},
)
if event_uuid is None:
raise HTTPException(
status_code=500,
detail="sentry upstream canary was not recorded",
)
return {
"status": "canary_recorded",
"provider": "sentry",
"event_id": issue_id,
"conversation_event_id": str(event_uuid),
"side_effects": {
"incident_created": False,
"approval_created": False,
"telegram_sent": False,
"openclaw_called": False,
},
}
@router.post("/error")
async def handle_sentry_error(
request: Request,
@@ -221,14 +108,6 @@ async def handle_sentry_error(
try:
# 2026-04-27 P3.1-T2 by Claude — Tier-2 三服務感知強化:接入 SentryWebhookService 簽章驗證
body = await request.body()
try:
payload_from_body = json.loads(body.decode("utf-8") or "{}")
except json.JSONDecodeError:
payload_from_body = {}
if isinstance(payload_from_body, dict) and _is_sentry_upstream_canary(payload_from_body):
return await _record_sentry_upstream_canary(payload_from_body, request)
sig_header = request.headers.get("sentry-hook-signature", "")
try:
verify_sentry_signature(body, sig_header)
@@ -245,60 +124,16 @@ async def handle_sentry_error(
# 提取錯誤資訊
issue_data = payload.get("data", {}).get("issue", {})
event_data = payload.get("data", {}).get("event", {})
issue_id = issue_data.get("id")
source_url = (
issue_data.get("permalink")
or issue_data.get("web_url")
or issue_data.get("url")
)
background_tasks.add_task(
record_external_alert_event,
project_id="awoooi",
provider="sentry",
event_id=str(issue_id or issue_data.get("shortId") or "unknown"),
stage="received",
title=str(issue_data.get("title") or "Sentry issue"),
severity=str(issue_data.get("level") or "error"),
namespace="sentry",
target_resource=str(issue_data.get("culprit") or issue_data.get("project", {}).get("slug") or "unknown"),
fingerprint=f"sentry-{issue_id or issue_data.get('shortId') or 'unknown'}",
source_url=source_url,
labels={
"project": issue_data.get("project", {}),
"level": issue_data.get("level"),
"culprit": issue_data.get("culprit"),
},
annotations={"message": event_data.get("message")},
payload=payload,
)
# Phase 10.2.1: 去重檢查 (10 分鐘內不重複發送)
issue_id = issue_data.get("id")
sentry_service = get_sentry_service()
if not await sentry_service.check_dedup(issue_id, ttl=SENTRY_DEDUP_TTL):
background_tasks.add_task(
record_external_alert_event,
project_id="awoooi",
provider="sentry",
event_id=str(issue_id or issue_data.get("shortId") or "unknown"),
stage="deduplicated",
title=str(issue_data.get("title") or "Sentry issue"),
severity=str(issue_data.get("level") or "error"),
namespace="sentry",
target_resource=str(issue_data.get("culprit") or issue_data.get("project", {}).get("slug") or "unknown"),
fingerprint=f"sentry-{issue_id or issue_data.get('shortId') or 'unknown'}",
source_url=source_url,
labels={"project": issue_data.get("project", {}), "level": issue_data.get("level")},
annotations={"message": event_data.get("message")},
payload={"dedup_ttl": SENTRY_DEDUP_TTL},
is_duplicate=True,
)
return {"status": "deduplicated", "issue_id": issue_id, "ttl": SENTRY_DEDUP_TTL}
event_data = payload.get("data", {}).get("event", {})
error_context = {
"issue_id": issue_data.get("id"),
"source_url": source_url,
"title": issue_data.get("title"),
"culprit": issue_data.get("culprit"),
"level": issue_data.get("level"),
@@ -334,8 +169,6 @@ async def handle_sentry_error(
"message": "Analysis scheduled"
}
except HTTPException:
raise
except Exception as e:
logger.exception("Sentry webhook processing failed")
raise HTTPException(status_code=500, detail=str(e)) from e
@@ -423,29 +256,6 @@ async def analyze_and_comment(
analysis=analysis,
anomaly_frequency=frequency_dict,
)
await record_external_alert_event(
project_id="awoooi",
provider="sentry",
event_id=str(issue_id or error_context.get("issue_id") or "unknown"),
stage="approval_linked",
title=str(error_context.get("title") or "Sentry issue"),
severity=str(error_context.get("level") or "error"),
namespace="sentry",
target_resource=str(error_context.get("culprit") or error_context.get("project") or "unknown"),
fingerprint=f"sentry-{issue_id or error_context.get('issue_id') or 'unknown'}",
approval_id=approval_id,
source_url=error_context.get("source_url"),
labels={
"project": error_context.get("project"),
"level": error_context.get("level"),
},
annotations={"message": error_context.get("message")},
payload={
"anomaly_frequency": frequency_dict,
"ai_analyzed": analysis is not None,
"ai_provider": analysis.analyzed_by if analysis else None,
},
)
# 4. 發送 Telegram 告警 (含頻率資訊)
await send_sentry_telegram_alert(

View File

@@ -1,3 +1,7 @@
from __future__ import annotations
import asyncio
"""
AWOOOI API - SignOz Webhook Handler
====================================
@@ -13,17 +17,12 @@ AWOOOI API - SignOz Webhook Handler
🔴 HARD RULE: 時間顯示使用 Asia/Taipei (UTC+8)
"""
from __future__ import annotations
import asyncio
import uuid
from typing import TYPE_CHECKING
import structlog
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
from pydantic import BaseModel
from src.core.awooop_operator_auth import authenticate_awooop_operator_headers
from src.core.metrics import (
record_alert_chain_failure,
record_alert_chain_success,
@@ -38,14 +37,10 @@ from src.models.approval import (
)
from src.services.anomaly_counter import get_anomaly_counter
from src.services.approval_db import get_approval_service
from src.services.channel_hub import record_external_alert_event
from src.services.incident_service import get_incident_service
from src.services.telegram_gateway import get_telegram_gateway
from src.utils.timezone import now_taipei_iso
if TYPE_CHECKING:
from src.services.openclaw import LLMAnalysisResult
logger = structlog.get_logger(__name__)
router = APIRouter(prefix="/webhooks/signoz", tags=["SignOz Webhook"])
@@ -72,101 +67,6 @@ class SignOzAlertPayload(BaseModel):
generatorURL: str | None = None
def _is_signoz_upstream_canary(alert: dict) -> bool:
labels = alert.get("labels", {}) if isinstance(alert.get("labels"), dict) else {}
annotations = (
alert.get("annotations", {})
if isinstance(alert.get("annotations"), dict)
else {}
)
alert_name = str(alert.get("alertname") or labels.get("alertname") or "")
return (
str(labels.get("awoooi_canary", "")).lower() == "true"
or alert_name == "AwoooPSourceProviderCanary"
or str(annotations.get("awooop_canary", "")).lower() == "true"
)
async def _record_signoz_upstream_canary(
alert: dict,
request: Request,
) -> dict:
operator = authenticate_awooop_operator_headers(
request.headers.get("x-awooop-operator-id"),
request.headers.get("x-awooop-operator-key"),
)
labels = alert.get("labels", {}) if isinstance(alert.get("labels"), dict) else {}
annotations = (
alert.get("annotations", {})
if isinstance(alert.get("annotations"), dict)
else {}
)
alert_name = str(alert.get("alertname") or labels.get("alertname") or "AwoooPSourceProviderCanary")
run_ref = str(labels.get("run_ref") or labels.get("fingerprint") or "unknown")
event_id = f"awooop-canary-{run_ref}"
severity = str(labels.get("severity") or "info")
service_name = str(labels.get("service_name") or labels.get("service") or "source-provider-ingestion")
namespace = str(labels.get("namespace") or "awoooi-prod")
fingerprint = str(labels.get("fingerprint") or f"source-provider-canary:signoz:{run_ref}")
event_uuid = await record_external_alert_event(
project_id="awoooi",
provider="signoz",
event_id=event_id,
stage="upstream_canary",
title=alert_name,
severity=severity,
namespace=namespace,
target_resource=service_name,
fingerprint=fingerprint,
source_url=alert.get("generatorURL"),
labels={
**labels,
"awoooi_canary": "true",
"operator_id": operator.operator_id,
"telegram": "not_sent",
"incident": "not_created",
"approval": "not_created",
},
annotations={
**annotations,
"summary": annotations.get("summary")
or (
"Operator-signed SignOz webhook canary; records upstream "
"source evidence without creating incident, approval, or Telegram."
),
},
payload={
"raw_canary": alert,
"operator_id": operator.operator_id,
"auth_method": operator.auth_method,
"side_effects": {
"incident_created": False,
"approval_created": False,
"telegram_sent": False,
"openclaw_called": False,
},
},
)
if event_uuid is None:
raise HTTPException(
status_code=500,
detail="signoz upstream canary was not recorded",
)
return {
"status": "canary_recorded",
"provider": "signoz",
"event_id": event_id,
"alert_name": alert_name,
"conversation_event_id": str(event_uuid),
"side_effects": {
"incident_created": False,
"approval_created": False,
"telegram_sent": False,
"openclaw_called": False,
},
}
@router.post("/alert")
async def handle_signoz_alert(
request: Request,
@@ -199,35 +99,11 @@ async def handle_signoz_alert(
results.append({"status": "ignored", "reason": "not firing"})
continue
if _is_signoz_upstream_canary(alert):
results.append(await _record_signoz_upstream_canary(alert, request))
continue
# 提取告警資訊
alert_name = alert.get("alertname", alert.get("labels", {}).get("alertname", "unknown"))
labels = alert.get("labels", {})
annotations = alert.get("annotations", {})
severity = labels.get("severity", "warning")
source_url = alert.get("generatorURL")
service_name = labels.get("service_name", labels.get("service", "unknown"))
fingerprint = labels.get("fingerprint") or f"signoz-{alert_name}-{service_name}"
background_tasks.add_task(
record_external_alert_event,
project_id="awoooi",
provider="signoz",
event_id=str(fingerprint),
stage="received",
title=str(alert_name),
severity=str(severity),
namespace=str(labels.get("namespace", "signoz")),
target_resource=str(service_name),
fingerprint=str(fingerprint),
source_url=source_url,
labels=labels,
annotations=annotations,
payload=alert,
)
# 背景處理
background_tasks.add_task(
@@ -237,8 +113,6 @@ async def handle_signoz_alert(
annotations=annotations,
severity=severity,
starts_at=alert.get("startsAt"),
source_url=source_url,
raw_payload=alert,
)
results.append({
@@ -248,8 +122,6 @@ async def handle_signoz_alert(
return {"status": "ok", "processed": len(results), "results": results}
except HTTPException:
raise
except Exception as e:
logger.exception("signoz_webhook_error", error=str(e))
raise HTTPException(status_code=500, detail=str(e)) from e
@@ -261,8 +133,6 @@ async def process_signoz_alert(
annotations: dict,
severity: str,
starts_at: str | None,
source_url: str | None = None,
raw_payload: dict | None = None,
):
"""
背景處理 SignOz 告警
@@ -320,7 +190,6 @@ async def process_signoz_alert(
"annotations": annotations,
"fingerprint": f"signoz-{alert_name}-{labels.get('service_name', 'unknown')}",
}
fingerprint = signal_data["fingerprint"]
# ADR-037: 傳遞頻率統計到 Incident
incident = await incident_service.create_incident_from_signal(
signal_data, frequency_stats=anomaly_frequency
@@ -360,30 +229,6 @@ async def process_signoz_alert(
anomaly_frequency=anomaly_frequency,
analysis_result=analysis_result, # 帶入 AI 結果
)
await record_external_alert_event(
project_id="awoooi",
provider="signoz",
event_id=str(fingerprint),
stage="incident_linked",
title=str(alert_name),
severity=str(severity),
namespace=str(labels.get("namespace", "signoz")),
target_resource=str(labels.get("service_name", labels.get("service", "unknown"))),
fingerprint=str(fingerprint),
incident_id=str(incident.incident_id),
approval_id=str(approval_id),
source_url=source_url or trace_url,
labels=labels,
annotations=annotations,
payload={
"raw_alert": raw_payload or {},
"trace_url": trace_url,
"has_signoz_metrics": bool(signoz_metrics),
"ai_provider": ai_provider,
"tokens": tokens,
"cost": cost,
},
)
# =================================================================
# Step 5: 發送 Telegram 告警
@@ -437,7 +282,7 @@ async def create_signoz_approval(
severity: str,
incident_id: str,
anomaly_frequency: dict | None = None,
analysis_result: LLMAnalysisResult | None = None,
analysis_result: "LLMAnalysisResult" | None = None,
) -> str:
"""
為 SignOz 告警建立 Approval 記錄
@@ -534,7 +379,7 @@ async def send_signoz_telegram(
annotations: dict,
severity: str,
anomaly_frequency: dict | None = None,
analysis_result: LLMAnalysisResult | None = None,
analysis_result: "LLMAnalysisResult" | None = None,
ai_provider: str = "none",
):
"""
@@ -597,7 +442,6 @@ async def _send_log_summary_notification(
帶 5s 軟超時:超時後摘要繼續生成並存 Redis不阻塞告警主流程
"""
import html as _html
from src.services.log_summary_service import get_log_summary_service
from src.services.telegram_gateway import get_telegram_gateway

View File

@@ -27,20 +27,10 @@ from fastapi import APIRouter, Depends, Query, WebSocket, WebSocketDisconnect
from fastapi.responses import PlainTextResponse
from pydantic import BaseModel, Field
from src.services.flywheel_stats_service import (
FlywheelStatsService,
get_flywheel_stats_service,
)
from src.services.k3s_monitor_service import K3sMonitorService, get_k3s_monitor_service
from src.services.report_generation_service import (
ReportGenerationService,
get_report_generation_service,
)
from src.services.stats_service import StatsService, get_stats_service
from src.services.weekly_report_service import (
WeeklyReportService,
get_weekly_report_service,
)
from src.services.k3s_monitor_service import K3sMonitorService, get_k3s_monitor_service
from src.services.weekly_report_service import WeeklyReportService, get_weekly_report_service
from src.services.flywheel_stats_service import FlywheelStatsService, get_flywheel_stats_service
router = APIRouter(prefix="/stats", tags=["Statistics"])
@@ -52,7 +42,6 @@ router = APIRouter(prefix="/stats", tags=["Statistics"])
StatsServiceDep = Annotated[StatsService, Depends(get_stats_service)]
K3sMonitorDep = Annotated[K3sMonitorService, Depends(get_k3s_monitor_service)]
WeeklyReportDep = Annotated[WeeklyReportService, Depends(get_weekly_report_service)]
DailyReportDep = Annotated[ReportGenerationService, Depends(get_report_generation_service)]
# =============================================================================
@@ -371,168 +360,6 @@ class WeeklyReportResponse(BaseModel):
ai_success_rate: float = Field(description="AI 成功率 (%)")
commits_count: int = Field(description="本週 Commits 數")
deploy_count: int = Field(description="本週部署次數")
source_ok_count: int = Field(default=0, description="報表資料源可讀數")
source_total_count: int = Field(default=0, description="報表資料源總數")
source_confidence_percent: int = Field(default=0, description="報表資料源可信度")
source_gap_ids: list[str] = Field(default_factory=list, description="報表資料源缺口工作項")
formatted_preview: str = Field(default="", description="Telegram HTML no-send preview")
class DailyReportPreviewResponse(BaseModel):
"""日報 no-send preview 回應"""
report_date: str = Field(description="報告日期時間")
alert_total: int = Field(description="24 小時告警總數")
auto_repair_success: int = Field(description="自動修復成功次數")
auto_repair_failed: int = Field(description="自動修復失敗次數")
km_new_entries: int = Field(description="新增 KM 條目")
playbook_count: int = Field(description="活躍 PlayBook 數")
source_ok_count: int = Field(default=0, description="報表資料源可讀數")
source_total_count: int = Field(default=0, description="報表資料源總數")
source_confidence_percent: int = Field(default=0, description="報表資料源可信度")
source_gap_ids: list[str] = Field(default_factory=list, description="報表資料源缺口工作項")
formatted_preview: str = Field(default="", description="Telegram HTML no-send preview")
class MonthlyReportPreviewResponse(BaseModel):
"""月報 no-send preview 回應"""
report_month: str = Field(description="報告月份")
source_ok_count: int = Field(default=0, description="報表資料源可讀數")
source_total_count: int = Field(default=0, description="報表資料源總數")
source_confidence_percent: int = Field(default=0, description="報表資料源可信度")
source_gap_ids: list[str] = Field(default_factory=list, description="報表資料源缺口工作項")
no_send_preview_count: int = Field(default=0, description="no-send preview 數量")
formatted_preview: str = Field(default="", description="Telegram HTML no-send preview")
class SreDigestPreviewResponse(BaseModel):
"""AwoooI SRE 戰情室 digest no-send preview 回應"""
report_date: str = Field(description="報告日期時間")
source_ok_count: int = Field(default=0, description="報表資料源可讀數")
source_total_count: int = Field(default=0, description="報表資料源總數")
source_confidence_percent: int = Field(default=0, description="報表資料源可信度")
source_gap_ids: list[str] = Field(default_factory=list, description="報表資料源缺口工作項")
no_send_preview_count: int = Field(default=0, description="日 / 週 / 月 no-send preview 數量")
live_send_allowed_count: int = Field(default=0, description="允許實發數")
runtime_gate_count: int = Field(default=0, description="runtime gate 數")
formatted_preview: str = Field(default="", description="Telegram HTML no-send preview")
def _report_source_preview_fields(source_health: dict[str, Any] | None) -> dict[str, Any]:
source_health = source_health or {}
rollups = source_health.get("rollups") or {}
return {
"source_ok_count": int(rollups.get("source_ok_count") or 0),
"source_total_count": int(rollups.get("source_count") or 0),
"source_confidence_percent": int(rollups.get("confidence_percent") or 0),
"source_gap_ids": [
str(source.get("work_item_id"))
for source in source_health.get("source_health", [])
if source.get("work_item_id")
][:5],
"no_send_preview_count": int(rollups.get("no_send_preview_count") or 0),
"live_send_allowed_count": int(rollups.get("live_send_allowed_count") or 0),
"runtime_gate_count": int(rollups.get("runtime_gate_count") or 0),
}
@router.get(
"/daily/preview",
response_model=DailyReportPreviewResponse,
summary="預覽日報",
)
async def preview_daily_report(
service: DailyReportDep = None,
) -> DailyReportPreviewResponse:
"""
預覽日報內容 (不發送)
這個 endpoint 只讀取 KPI 與 report source-health不寫 Gateway queue、不發 Telegram。
"""
kpi = await service.collect_daily_kpi()
source_health = await service.collect_report_source_health(days=1)
preview_fields = _report_source_preview_fields(source_health)
return DailyReportPreviewResponse(
report_date=kpi.period_end.strftime("%Y-%m-%d %H:%M"),
alert_total=kpi.total_alerts,
auto_repair_success=kpi.auto_repair_success,
auto_repair_failed=kpi.auto_repair_failed,
km_new_entries=kpi.km_new_entries,
playbook_count=kpi.playbook_count,
source_ok_count=preview_fields["source_ok_count"],
source_total_count=preview_fields["source_total_count"],
source_confidence_percent=preview_fields["source_confidence_percent"],
source_gap_ids=preview_fields["source_gap_ids"],
formatted_preview=service.format_daily_report(kpi, source_health),
)
@router.get(
"/monthly/preview",
response_model=MonthlyReportPreviewResponse,
summary="預覽月報",
)
async def preview_monthly_report(
service: DailyReportDep = None,
) -> MonthlyReportPreviewResponse:
"""
預覽月報內容 (不發送)
月報目前使用統一 report source-health / no-send preview不排程、不發送、不寫入。
"""
from src.utils.timezone import now_taipei
source_health = await service.collect_report_source_health(days=30)
preview_fields = _report_source_preview_fields(source_health)
now = now_taipei()
return MonthlyReportPreviewResponse(
report_month=now.strftime("%Y-%m"),
source_ok_count=preview_fields["source_ok_count"],
source_total_count=preview_fields["source_total_count"],
source_confidence_percent=preview_fields["source_confidence_percent"],
source_gap_ids=preview_fields["source_gap_ids"],
no_send_preview_count=preview_fields["no_send_preview_count"],
formatted_preview=service.format_monthly_report_preview(
source_health,
generated_at=now,
),
)
@router.get(
"/sre-digest/preview",
response_model=SreDigestPreviewResponse,
summary="預覽 AwoooI SRE 戰情室 digest",
)
async def preview_sre_digest(
service: DailyReportDep = None,
) -> SreDigestPreviewResponse:
"""
預覽 AwoooI SRE 戰情室 digest (不發送)
收斂日報 / 週報 / 月報 source health、資產沉澱與工作項不寫 Gateway queue。
"""
from src.utils.timezone import now_taipei
source_health = await service.collect_report_source_health(days=30)
preview_fields = _report_source_preview_fields(source_health)
now = now_taipei()
return SreDigestPreviewResponse(
report_date=now.strftime("%Y-%m-%d %H:%M"),
source_ok_count=preview_fields["source_ok_count"],
source_total_count=preview_fields["source_total_count"],
source_confidence_percent=preview_fields["source_confidence_percent"],
source_gap_ids=preview_fields["source_gap_ids"],
no_send_preview_count=preview_fields["no_send_preview_count"],
live_send_allowed_count=preview_fields["live_send_allowed_count"],
runtime_gate_count=preview_fields["runtime_gate_count"],
formatted_preview=service.format_sre_digest_preview(
source_health,
generated_at=now,
),
)
@router.get(
@@ -558,11 +385,6 @@ async def preview_weekly_report(
ai_success_rate=report.ai_success_rate,
commits_count=report.commits_count,
deploy_count=report.deploy_count,
source_ok_count=report.report_source_ok_count,
source_total_count=report.report_source_total_count,
source_confidence_percent=report.report_source_confidence_percent,
source_gap_ids=report.report_source_gap_ids,
formatted_preview=report.format(),
)

View File

@@ -19,7 +19,6 @@ Endpoints:
- 每個 Nonce 只能使用一次
"""
import asyncio
from uuid import UUID
from fastapi import APIRouter, HTTPException, status
@@ -27,10 +26,7 @@ from pydantic import BaseModel
from src.core.config import settings
from src.core.logging import get_logger
from src.services.approval_action_classifier import is_no_action_approval_action
from src.services.approval_db import get_approval_service
from src.services.approval_execution import get_execution_service
from src.services.incident_approval_service import get_incident_approval_service
from src.services.security_interceptor import (
NonceReplayError,
UserNotWhitelistedError,
@@ -68,186 +64,6 @@ class TestPushRequest(BaseModel):
incident_id: str = ""
async def _run_telegram_approved_execution(approval) -> None:
"""Run the approved action that originated from a Telegram callback."""
approval_id = str(getattr(approval, "id", ""))
incident_id = getattr(approval, "incident_id", None)
try:
result = await get_execution_service().execute_approved_action(approval)
logger.info(
"telegram_approval_execution_completed",
approval_id=approval_id,
incident_id=incident_id,
success=bool(result),
)
except Exception as exc:
logger.error(
"telegram_approval_execution_failed",
approval_id=approval_id,
incident_id=incident_id,
error=str(exc),
)
def _schedule_telegram_approved_execution(approval) -> bool:
"""Schedule execution after Telegram approval reaches required signatures."""
try:
asyncio.create_task(_run_telegram_approved_execution(approval))
logger.info(
"telegram_approval_execution_scheduled",
approval_id=str(getattr(approval, "id", "")),
incident_id=getattr(approval, "incident_id", None),
)
return True
except Exception as exc:
logger.error(
"telegram_approval_execution_schedule_failed",
approval_id=str(getattr(approval, "id", "")),
incident_id=getattr(approval, "incident_id", None),
error=str(exc),
)
return False
async def _finalize_telegram_approval(approval, execution_triggered: bool) -> bool:
"""Complete the execution handoff for Telegram approvals.
ApprovalDBService only records the signature/status transition. The actual
executor scheduling lives in API callers, so Telegram must mirror the REST
approval endpoint instead of stopping at a visual approval stamp.
"""
if not execution_triggered:
return False
approval_action = getattr(approval, "action", None)
if approval_action is not None and is_no_action_approval_action(approval_action):
logger.warning(
"telegram_approval_execution_suppressed_no_repair_action",
approval_id=str(getattr(approval, "id", "")),
incident_id=getattr(approval, "incident_id", None),
action=str(approval_action)[:200],
)
return False
return _schedule_telegram_approved_execution(approval)
def _safe_dict(value) -> dict:
return value if isinstance(value, dict) else {}
def _safe_str(value) -> str:
return value if isinstance(value, str) else ""
def _safe_str_list(value) -> list[str]:
if not isinstance(value, list):
return []
return [item for item in value if isinstance(item, str)]
def _build_no_action_manual_handoff_payload(approval) -> dict:
"""Expose the next manual handoff state when approval has no executable repair.
NO_ACTION approvals are intentionally blocked from executor scheduling, but
the operator still needs a concrete next state instead of a dead-end approval
receipt. Keep the payload redacted and focused on AwoooP work tracking.
"""
metadata = _safe_dict(getattr(approval, "metadata", None))
package = _safe_dict(metadata.get("repair_candidate_draft_package"))
work_item = _safe_dict(package.get("awooop_work_item"))
draft_ready = bool(
metadata.get("repair_candidate_draft_ready")
or package.get("status") == "owner_review_ready"
or work_item.get("status") == "owner_review_ready"
)
next_action = (
_safe_str(package.get("next_step"))
or _safe_str(metadata.get("repair_candidate_next_step"))
or "open_repair_candidate_work_item_or_reanalyze"
)
work_item_id = (
_safe_str(work_item.get("work_item_id"))
or _safe_str(metadata.get("repair_candidate_work_item_id"))
)
work_item_href = (
_safe_str(work_item.get("work_item_url"))
or _safe_str(work_item.get("work_item_href"))
or _safe_str(metadata.get("repair_candidate_work_item_href"))
)
blocker = (
_safe_str(package.get("blocker"))
or _safe_str(metadata.get("repair_candidate_blocker_summary"))
or _safe_str(metadata.get("repair_candidate_status"))
or "repair_candidate_missing"
)
promotion_contract = _safe_dict(
package.get("candidate_promotion_contract")
or metadata.get("repair_candidate_promotion_contract")
)
promotion_summary = _safe_str(metadata.get("repair_candidate_promotion_summary"))
if not promotion_summary and promotion_contract:
promotion_summary = (
f"route={promotion_contract.get('route_id') or '--'}; "
f"promotion={promotion_contract.get('ready_count') or 0}/"
f"{promotion_contract.get('total_count') or 0}; "
f"blocked={promotion_contract.get('blocked_count') or 0}; "
f"runtime=false"
)
return {
"message": "ApprovedForOwnerReviewHandoff" if draft_ready else "ApprovedForManualHandoff",
"manual_handoff_required": True,
"manual_handoff_scheduled": True,
"manual_handoff_kind": (
"repair_candidate_owner_review" if draft_ready else "repair_candidate_draft"
),
"repair_candidate_draft_ready": draft_ready,
"owner_review_required": True,
"next_action": next_action,
"operator_guidance": (
"此批准沒有執行命令;修復候選草案已建立,請 owner review 命令、"
"rollback、verifier、blast radius 與維護窗口後,再進入執行 gate。"
if draft_ready
else (
"此批准沒有執行命令;請開啟處置包或重診,補齊專屬 PlayBook、"
"rollback、verifier 與 owner review 後再進入執行 gate。"
)
),
"work_item_id": work_item_id,
"work_item_href": work_item_href,
"repair_candidate_blocker": blocker,
"repair_candidate_promotion_summary": promotion_summary,
"repair_candidate_promotion_contract": promotion_contract,
"required_fields": _safe_str_list(package.get("required_fields")),
"blocked_operations": _safe_str_list(package.get("blocked_operations")),
"required_writebacks": _safe_str_list(package.get("required_writebacks")),
"automation_asset_requirements": package.get("automation_asset_requirements")
if isinstance(package.get("automation_asset_requirements"), list)
else [],
}
async def _sync_telegram_rejection(approval_id: str) -> bool:
"""Keep Incident state aligned when an approval is rejected from Telegram."""
try:
await get_incident_approval_service().on_approval_status_change(
approval_id=approval_id,
new_status="rejected",
)
logger.info(
"telegram_rejection_incident_synced",
approval_id=approval_id,
)
return True
except Exception as exc:
logger.error(
"telegram_rejection_incident_sync_failed",
approval_id=approval_id,
error=str(exc),
)
return False
# =============================================================================
# Endpoints
# =============================================================================
@@ -323,17 +139,6 @@ async def telegram_webhook(
# =========================================================================
try:
gateway = get_telegram_gateway()
mirror_callback = getattr(gateway, "mirror_callback_query_received", None)
if callable(mirror_callback):
await mirror_callback(
update_id=update.update_id,
callback_query_id=callback_query_id,
callback_data=callback_data,
user_id=user_id,
username=username,
message_id=message_id,
chat_id=message.get("chat", {}).get("id"),
)
result = await gateway.handle_callback(
callback_query_id=callback_query_id,
callback_data=callback_data,
@@ -393,62 +198,22 @@ async def telegram_webhook(
)
if approval:
status_value = approval.status.value if hasattr(approval.status, "value") else str(approval.status)
if (
"Cannot sign" in msg
or "already signed" in msg
or "Concurrent modification" in msg
):
logger.info(
"telegram_approval_ignored_already_processed",
approval_id=approval_id,
user_id=user_id,
status=status_value,
message=msg,
)
await _log_user_action("approve_duplicate", False, getattr(approval, "incident_id", None))
return {
"ok": True,
"message": "Already processed",
"approval_id": approval_id,
"status": status_value,
"execution_triggered": False,
"execution_scheduled": False,
}
execution_scheduled = await _finalize_telegram_approval(
approval=approval,
execution_triggered=execution_triggered,
)
approval_action = getattr(approval, "action", None)
execution_suppressed = bool(
execution_triggered
and approval_action is not None
and is_no_action_approval_action(approval_action)
)
logger.info(
"telegram_approval_signed",
approval_id=approval_id,
user_id=user_id,
status=status_value,
status=approval.status.value,
execution_triggered=execution_triggered,
execution_scheduled=execution_scheduled,
execution_suppressed=execution_suppressed,
)
await _log_user_action("approve", True, getattr(approval, "incident_id", None))
response = {
return {
"ok": True,
"message": "Approved" if execution_triggered else "Signed",
"message": "Approved",
"approval_id": approval_id,
"status": status_value,
"status": approval.status.value,
"execution_triggered": execution_triggered,
"execution_scheduled": execution_scheduled,
"execution_suppressed": execution_suppressed,
}
if execution_suppressed:
response.update(_build_no_action_manual_handoff_payload(approval))
return response
elif action == "reject":
approval, msg = await service.reject_approval(
@@ -459,12 +224,10 @@ async def telegram_webhook(
)
if approval:
incident_synced = await _sync_telegram_rejection(approval_id)
logger.info(
"telegram_approval_rejected",
approval_id=approval_id,
user_id=user_id,
incident_synced=incident_synced,
)
await _log_user_action("reject", False, getattr(approval, "incident_id", None))
@@ -473,7 +236,6 @@ async def telegram_webhook(
"message": "Rejected",
"approval_id": approval_id,
"status": approval.status.value,
"incident_synced": incident_synced,
}
return {"ok": False, "message": "Unknown action"}
@@ -550,7 +312,7 @@ async def telegram_health() -> dict:
"mode": "long_polling", # Phase 5.5: 已從 webhook 切換至 long_polling
"polling_active": gateway._polling_active,
"bot_token_set": bool(settings.OPENCLAW_TG_BOT_TOKEN),
"chat_id_set": bool(settings.SRE_GROUP_CHAT_ID),
"chat_id_set": bool(settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID),
"sre_group_chat_id_set": bool(settings.SRE_GROUP_CHAT_ID),
"whitelist_count": len(settings.OPENCLAW_TG_USER_WHITELIST),
"last_update_id": gateway._last_update_id,

View File

@@ -71,29 +71,6 @@ async def telegram_webhook(request: Request) -> dict:
update_id=body.get("update_id"),
)
if update_type == "callback_query":
callback = body.get("callback_query", {}) or {}
message = callback.get("message", {}) or {}
user = callback.get("from", {}) or {}
callback_query_id = callback.get("id")
callback_data = callback.get("data")
user_id = user.get("id")
if callback_query_id and callback_data and user_id:
from src.services.telegram_gateway import get_telegram_gateway
gateway = get_telegram_gateway()
mirror_callback = getattr(gateway, "mirror_callback_query_received", None)
if callable(mirror_callback):
await mirror_callback(
update_id=body.get("update_id"),
callback_query_id=callback_query_id,
callback_data=callback_data,
user_id=user_id,
username=user.get("username") or user.get("first_name") or str(user_id),
message_id=message.get("message_id"),
chat_id=(message.get("chat") or {}).get("id"),
)
# WS5: chat_member 同步 Approvers 白名單ADR-093
if update_type in ("chat_member", "my_chat_member") or (
"chat_member" in body or "my_chat_member" in body

File diff suppressed because it is too large Load Diff

View File

@@ -1,126 +0,0 @@
"""
AwoooP Operator authentication boundary.
ADR-116 Gate 5 approval decisions must not trust browser-supplied identities.
This module accepts a short-lived operator identity only when it is paired with
the server-side AwoooP operator key.
"""
from __future__ import annotations
import re
import secrets
from dataclasses import dataclass
from typing import Annotated
import structlog
from fastapi import Header, HTTPException, status
from src.core.config import settings
logger = structlog.get_logger(__name__)
_OPERATOR_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.:@-]{1,127}$")
_PROD_ENVS = {"prod", "production"}
@dataclass(frozen=True, slots=True)
class AwoooPOperatorPrincipal:
"""Authenticated AwoooP operator principal."""
operator_id: str
auth_method: str
def _auth_error(detail: str = "Operator authentication required") -> HTTPException:
return HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=detail)
def _clean_operator_id(operator_id: str | None) -> str:
if operator_id is None:
raise _auth_error()
cleaned = operator_id.strip()
if not _OPERATOR_ID_RE.fullmatch(cleaned):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail="Invalid operator identity",
)
return cleaned
def authenticate_awooop_operator_headers(
operator_id: str | None,
operator_key: str | None,
*,
configured_key: str | None = None,
environment: str | None = None,
) -> AwoooPOperatorPrincipal:
"""Validate trusted AwoooP operator headers.
Args:
operator_id: Value from ``X-AwoooP-Operator-Id``.
operator_key: Value from ``X-AwoooP-Operator-Key``.
configured_key: Server-side shared key. Defaults to settings.
environment: Runtime environment. Defaults to settings.
Returns:
Authenticated operator principal.
Raises:
HTTPException: 401 when authentication is missing/invalid, or 422 for
malformed operator identity.
"""
cleaned_operator_id = _clean_operator_id(operator_id)
expected_key = (
settings.AWOOOP_OPERATOR_API_KEY
if configured_key is None
else configured_key
)
runtime_env = (environment or settings.ENVIRONMENT or "").lower()
if not expected_key:
if runtime_env in _PROD_ENVS:
logger.critical(
"awooop_operator_key_missing_in_production",
environment=runtime_env,
)
raise _auth_error()
logger.warning(
"awooop_operator_key_skipped_dev_only",
environment=runtime_env,
operator_id=cleaned_operator_id,
)
return AwoooPOperatorPrincipal(
operator_id=cleaned_operator_id,
auth_method="dev_header",
)
if not operator_key:
logger.warning("awooop_operator_key_missing", operator_id=cleaned_operator_id)
raise _auth_error()
if not secrets.compare_digest(operator_key, expected_key):
logger.warning("awooop_operator_key_invalid", operator_id=cleaned_operator_id)
raise _auth_error()
return AwoooPOperatorPrincipal(
operator_id=cleaned_operator_id,
auth_method="operator_api_key",
)
async def verify_awooop_operator(
x_awooop_operator_id: Annotated[
str | None,
Header(alias="X-AwoooP-Operator-Id"),
] = None,
x_awooop_operator_key: Annotated[
str | None,
Header(alias="X-AwoooP-Operator-Key"),
] = None,
) -> AwoooPOperatorPrincipal:
"""FastAPI dependency for operator mutation endpoints."""
return authenticate_awooop_operator_headers(
operator_id=x_awooop_operator_id,
operator_key=x_awooop_operator_key,
)

View File

@@ -215,8 +215,8 @@ class Settings(BaseSettings):
description="Phase 25 P0: DIAGNOSE NIM timeout (秒),實測 2.2-27.3s avg 10.6s60s 含 buffer",
)
OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
default=300,
description="Ollama diagnose timeout (秒)。GCP qwen3:14b CPU-only can exceed the old 120s proxy limit.",
default=200,
description="Phase 25 P0: Ollama timeout (秒),實測 CPU-only 238s保留欄位但 DIAGNOSE 不再走 Ollama",
)
# ==========================================================================
@@ -370,16 +370,11 @@ class Settings(BaseSettings):
)
return v
# 2026-05-05 Codex: health inference must stay on alert-fast model; qwen2.5
# keeps reloading a 7B model on CPU-only GCP and slows incident fallback.
# 2026-04-25 Claude Engineer-C (P1.1): Ollama 健康檢測推理測試模型
OLLAMA_HEALTH_CHECK_MODEL: str = Field(
default="gemma3:4b",
default="qwen2.5:7b-instruct",
description="OllamaHealthMonitor 推理測試使用模型P1.1",
)
OLLAMA_EMBEDDING_MODEL: str = Field(
default="bge-m3:latest",
description="Ollama embedding model. ADR-110 migrated embeddings from nomic-embed-text to bge-m3.",
)
# 2026-04-12 ogt: 心跳必須確認載入的 Ollama 模型清單
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 升級更新必要模型清單nomic→bge-m3 + 新增 qwen3:14b + hermes3
OLLAMA_REQUIRED_MODELS: list[str] = Field(
@@ -505,7 +500,7 @@ class Settings(BaseSettings):
default=False,
description=(
"Allow LocalCodeReviewService to fall back to Gemini when the "
"local Ollama code-review lane fails. Default false to avoid "
"GCP-B/Ollama code-review lane fails. Default false to avoid "
"unexpected cloud spend from Gitea push/PR alerts."
),
)
@@ -525,22 +520,6 @@ class Settings(BaseSettings):
"then local 111 before cloud backup providers such as Gemini."
),
)
ALERT_OLLAMA_MODEL: str = Field(
default="qwen3:14b",
description=(
"Ollama model used for incident/alert deep diagnosis. Alert cards "
"may wait for this model; Gemini remains a backup after GCP-A, "
"GCP-B, and 111 fail."
),
)
INCIDENT_LLM_TIMEOUT_SECONDS: int = Field(
default=360,
description=(
"Outer timeout for incident OpenClaw proposal generation. This must "
"be long enough for the GCP-A/GCP-B/111 Ollama lane to complete "
"before Gemini backup is considered useful."
),
)
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
NVIDIA_API_KEY: str = Field(
default="",
@@ -602,77 +581,6 @@ class Settings(BaseSettings):
default="",
description="API Key for K8s admin endpoints (X-K8s-Api-Key header)",
)
AWOOOP_OPERATOR_API_KEY: str = Field(
default="",
description=(
"API key for AwoooP operator mutation endpoints "
"(X-AwoooP-Operator-Key header)"
),
)
ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER: bool = Field(
default=False,
description=(
"True=consume ansible_candidate_matched AOL rows and run "
"ansible-playbook --check --diff only. Apply remains disabled."
),
)
AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS: int = Field(
default=300,
ge=60,
description="AwoooP Ansible check-mode worker polling interval.",
)
AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT: int = Field(
default=1,
ge=1,
le=5,
description="Maximum Ansible check-mode candidates claimed per worker tick.",
)
AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS: int = Field(
default=180,
ge=30,
le=600,
description="Timeout for one ansible-playbook --check --diff execution.",
)
AWOOOP_ANSIBLE_CHECK_MODE_STARTUP_SLEEP_SECONDS: int = Field(
default=120,
ge=0,
le=900,
description="Delay before the check-mode worker first tick after API startup.",
)
AWOOOP_ANSIBLE_CHECK_MODE_TRANSPORT_PROFILE: str = Field(
default="ssh_mcp",
description=(
"SSH transport profile used by Ansible check-mode. Production uses "
"the existing ssh-mcp key so repair-bot forced-command remains reserved "
"for whitelist repairs."
),
)
AWOOOP_ANSIBLE_CHECK_MODE_SSH_KEY_PATH: str = Field(
default="/run/secrets/ssh_mcp_key",
description="Private key path for Ansible check-mode SSH transport.",
)
AWOOOP_ANSIBLE_CHECK_MODE_KNOWN_HOSTS_PATH: str = Field(
default="/etc/ssh-mcp/known_hosts",
description="known_hosts path for Ansible check-mode SSH transport.",
)
AWOOOP_ANSIBLE_CHECK_MODE_CANDIDATE_MAX_AGE_HOURS: int = Field(
default=24,
ge=1,
le=168,
description=(
"Only recent Ansible candidate audit rows are eligible for automatic "
"check-mode claims; older backlog remains visible but is not drained as noise."
),
)
AWOOOP_ANSIBLE_CHECK_MODE_TRANSPORT_COOLDOWN_SECONDS: int = Field(
default=21_600,
ge=300,
le=86_400,
description=(
"Cooldown after transport-level check-mode blockers such as "
"forced-command repair SSH denial."
),
)
# ==========================================================================
# 統帥鐵律:禁止 SQLite (AWOOOI 憲法)

View File

@@ -37,8 +37,8 @@ REDIS_KEY_DECISION = "decision:"
APPROVAL_TO_INCIDENT_STATUS = {
"pending": "investigating",
"approved": "resolved",
"rejected": "escalated",
"expired": "escalated",
"rejected": "rejected",
"expired": "expired",
}
# Incident 狀態 → 是否活躍

View File

@@ -4,57 +4,19 @@
設計原則:
- Python asyncio.create_task() 自動繼承父任務的 ContextVar 值
- 起始流程不再在 lifespan 強制寫入固定 PROJECT_ID呼叫端需明確提供 project_id
- get_db_context() 僅接受明確參數或已注入的 contextvar 作為 tenant 來源
- startup handler 設一次 PROJECT_ID.set("awoooi"),所有 31 個 loop 自動繼承
- get_db_context() 讀此 contextvar 作為 fallback確保 RLS SET LOCAL 正確
- 多租戶未來:呼叫端傳入不同 project_id 即可隔離,無需改 loop 本體
"""
from __future__ import annotations
from contextvars import ContextVar, Token
from contextvars import ContextVar
# 追蹤當前非同步任務的 project_id
# Fail-Closed: 移除 default="awoooi",進 DB 路徑需要明確租戶標籤
PROJECT_ID: ContextVar[str | None] = ContextVar("project_id")
PROJECT_ID_SOURCE: ContextVar[str | None] = ContextVar("project_id_source")
PROJECT_ID_REQUEST_ID: ContextVar[str | None] = ContextVar("project_id_request_id")
# default="awoooi" 確保未設時也能正常查詢RLS fail-open 保護)
PROJECT_ID: ContextVar[str] = ContextVar("project_id", default="awoooi")
def set_project_context(
project_id: str | None,
source: str = "runtime",
request_id: str | None = None,
) -> tuple[Token[str | None], Token[str | None], Token[str | None]]:
"""
設定當前 request/context 的 project 上下文,並回傳 ContextVar token 供 restore。
"""
return (
PROJECT_ID.set(project_id),
PROJECT_ID_SOURCE.set(source),
PROJECT_ID_REQUEST_ID.set(request_id),
)
def clear_project_context(tokens: tuple[Token[str | None], Token[str | None], Token[str | None]]) -> None:
"""清除 request 上下文,回復前一個 ContextVar 狀態。"""
PROJECT_ID_REQUEST_ID.reset(tokens[2])
PROJECT_ID_SOURCE.reset(tokens[1])
PROJECT_ID.reset(tokens[0])
def get_project_context() -> dict[str, str | None]:
"""取得目前上下文快照(可直接寫入 audit log"""
return {
"project_id": PROJECT_ID.get(None),
"source": PROJECT_ID_SOURCE.get(None),
"request_id": PROJECT_ID_REQUEST_ID.get(None),
}
def get_current_project_id() -> str | None:
def get_current_project_id() -> str:
"""取得當前任務的 project_id給 service 層使用)"""
return PROJECT_ID.get(None)
def get_current_project_context() -> dict[str, str | None]:
"""取得可追溯上下文(同 get_project_context保留 API 命名)。"""
return get_project_context()
return PROJECT_ID.get()

View File

@@ -11,7 +11,6 @@ Features:
"""
import logging
import re
import sys
from typing import Any
@@ -20,28 +19,6 @@ from structlog.types import Processor
from src.core.config import settings
_TELEGRAM_BOT_URL_RE = re.compile(r"(api\.telegram\.org/bot)[^/\s]+")
def _redact_sensitive_log_text(text: str) -> str:
"""遮蔽可能出現在第三方 logger 訊息中的敏感 URL。"""
return _TELEGRAM_BOT_URL_RE.sub(r"\1<redacted>", text)
class SensitiveURLRedactionFilter(logging.Filter):
"""標準 logging filter避免 httpx 等第三方 logger 把 token URL 打進 log。"""
def filter(self, record: logging.LogRecord) -> bool:
record.msg = _redact_sensitive_log_text(str(record.msg))
if isinstance(record.args, tuple):
record.args = tuple(_redact_sensitive_log_text(str(arg)) for arg in record.args)
elif isinstance(record.args, dict):
record.args = {
key: _redact_sensitive_log_text(str(value))
for key, value in record.args.items()
}
return True
def setup_logging() -> None:
"""Configure structlog for the application"""
@@ -91,15 +68,6 @@ def setup_logging() -> None:
stream=sys.stdout,
level=logging.getLevelName(settings.LOG_LEVEL),
)
redaction_filter = SensitiveURLRedactionFilter()
root_logger = logging.getLogger()
root_logger.addFilter(redaction_filter)
for handler in root_logger.handlers:
handler.addFilter(redaction_filter)
# httpx INFO 會輸出完整 request URLTelegram Bot API URL 內含 token。
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)
def get_logger(name: str | None = None, **initial_context: Any) -> structlog.BoundLogger:

View File

@@ -17,7 +17,6 @@ PostgreSQL 事務管理器,確保多表操作原子性。
from typing import Any
import structlog
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
logger = structlog.get_logger(__name__)
@@ -50,20 +49,14 @@ class UnitOfWork:
- Redis 操作失敗時必須手動呼叫 rollback()
"""
def __init__(
self,
session_factory: async_sessionmaker[AsyncSession],
project_id: str | None = None,
):
def __init__(self, session_factory: async_sessionmaker[AsyncSession]):
"""
初始化 UnitOfWork
Args:
session_factory: SQLAlchemy async session factory
project_id: RLS project context. None means contextvar/default awoooi.
"""
self._session_factory = session_factory
self._project_id = project_id
self._session: AsyncSession | None = None
self._committed = False
@@ -81,18 +74,9 @@ class UnitOfWork:
async def __aenter__(self) -> "UnitOfWork":
"""進入事務"""
from src.core.context import get_current_project_id
self._session = self._session_factory()
effective_pid = (
self._project_id if self._project_id is not None else get_current_project_id()
)
await self._session.execute(
text("SELECT set_config('app.project_id', :pid, TRUE)"),
{"pid": effective_pid},
)
self._committed = False
logger.debug("uow_started", project_id=effective_pid)
logger.debug("uow_started")
return self
async def __aexit__(

View File

@@ -10,7 +10,7 @@ from __future__ import annotations
from datetime import datetime
from decimal import Decimal
from typing import Any
from uuid import UUID
from uuid import UUID, uuid4
from sqlalchemy import (
Boolean,
@@ -577,8 +577,8 @@ class AwoooPMcpGatewayAudit(Base):
run_id: Mapped[UUID | None] = mapped_column(nullable=True)
trace_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
agent_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
tool_id: Mapped[UUID | None] = mapped_column(
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=True
tool_id: Mapped[UUID] = mapped_column(
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=False
)
tool_name: Mapped[str] = mapped_column(String(128), nullable=False)
credential_ref: Mapped[str | None] = mapped_column(String(256), nullable=True)
@@ -635,13 +635,6 @@ class AwoooPConversationEvent(Base):
content_type: Mapped[str] = mapped_column(String(32), nullable=False, default="text")
content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
content_preview: Mapped[str | None] = mapped_column(String(256), nullable=True)
content_redacted: Mapped[str | None] = mapped_column(Text, nullable=True)
redaction_version: Mapped[str] = mapped_column(
String(32), nullable=False, server_default=text("'audit_sink_v1'")
)
source_envelope: Mapped[dict[str, Any]] = mapped_column(
JSONB, nullable=False, server_default=text("'{}'::jsonb")
)
attachment_sha256: Mapped[str | None] = mapped_column(String(64), nullable=True)
is_duplicate: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
provider_ts: Mapped[datetime | None] = mapped_column(nullable=True)
@@ -687,13 +680,6 @@ class AwoooPOutboundMessage(Base):
message_type: Mapped[str] = mapped_column(String(32), nullable=False)
content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
content_preview: Mapped[str | None] = mapped_column(String(256), nullable=True)
content_redacted: Mapped[str | None] = mapped_column(Text, nullable=True)
redaction_version: Mapped[str] = mapped_column(
String(32), nullable=False, server_default=text("'audit_sink_v1'")
)
source_envelope: Mapped[dict[str, Any]] = mapped_column(
JSONB, nullable=False, server_default=text("'{}'::jsonb")
)
provider_message_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
send_status: Mapped[str] = mapped_column(String(16), nullable=False, default="pending")
send_error: Mapped[str | None] = mapped_column(Text, nullable=True)

View File

@@ -16,7 +16,6 @@ Features:
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from fastapi import HTTPException
from sqlalchemy import text
from sqlalchemy.ext.asyncio import (
AsyncEngine,
@@ -27,8 +26,6 @@ from sqlalchemy.ext.asyncio import (
from sqlalchemy.orm import DeclarativeBase
from src.core.config import settings
from src.core.context import get_current_project_context
from src.core.logging import get_logger
# =============================================================================
# Base Model
@@ -45,19 +42,6 @@ class Base(DeclarativeBase):
_engine: AsyncEngine | None = None
_session_factory: async_sessionmaker[AsyncSession] | None = None
logger = get_logger("awoooi.db")
def _raise_unauthorized_db_context(msg: str) -> None:
context = get_current_project_context()
logger.error(
"db_context_missing",
reason=msg,
project_id=context.get("project_id"),
project_id_source=context.get("source"),
request_id=context.get("request_id"),
)
raise HTTPException(status_code=401, detail="Missing tenant context: project_id is required")
def get_engine() -> AsyncEngine:
@@ -122,19 +106,10 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
factory = get_session_factory()
async with factory() as session:
try:
from src.core.context import get_current_project_id
# AwoooP Phase 2.3 (2026-05-04 ogt): SET LOCAL app.project_id 讓 RLS Policy 生效
# Fail-Closed RLS: 遇到未授權情境拋出錯誤而非回退到 "awoooi"
pid = get_current_project_id()
if not pid:
_raise_unauthorized_db_context(
"Unauthorized: project_id is missing in context (Fail-Closed RLS)"
)
# 預設 'awoooi',多租戶路由將在 middleware 注入實際 project_id
await session.execute(
text("SELECT set_config('app.project_id', :pid, TRUE)"),
{"pid": pid},
text("SELECT set_config('app.project_id', 'awoooi', TRUE)")
)
yield session
await session.commit()
@@ -148,12 +123,12 @@ async def get_db_context(project_id: str | None = None) -> AsyncGenerator[AsyncS
"""
Context manager for database session (non-FastAPI usage)
AwoooP Phase 2.3/2.4: 優先序 — 明確參數 > contextvar(缺失則 fail-closed
AwoooP Phase 2.3/2.4: 優先序 — 明確參數 > contextvar > "awoooi"
- Phase 2.3: 啟用 RLS tenant isolationSET LOCAL app.project_id
- Phase 2.4: 從 asyncio contextvar 讀取 background loop 的 project_id
Usage:
async with get_db_context() as db: # 繼承 contextvar(缺失將 fail-closed
async with get_db_context() as db: # 繼承 contextvar 或預設 awoooi
...
async with get_db_context("other-tenant") as db: # 明確指定 tenant
...
@@ -161,9 +136,6 @@ async def get_db_context(project_id: str | None = None) -> AsyncGenerator[AsyncS
from src.core.context import get_current_project_id
effective_pid = project_id if project_id is not None else get_current_project_id()
if not effective_pid:
_raise_unauthorized_db_context("Unauthorized: project_id is missing in context (Fail-Closed RLS)")
factory = get_session_factory()
async with factory() as session:
try:
@@ -182,9 +154,6 @@ async def get_db_context(project_id: str | None = None) -> AsyncGenerator[AsyncS
# Initialization
# =============================================================================
_DB_BOOTSTRAP_LOCK_NAME = "awoooi:init_db:ddl"
async def init_db() -> None:
"""
Initialize database tables
@@ -193,28 +162,6 @@ async def init_db() -> None:
"""
engine = get_engine()
async with engine.connect() as lock_conn:
# 2026-05-24 ogt + Codex: 兩個 API replica 同時啟動時PostgreSQL 會在
# ALTER TABLE ... IF NOT EXISTS 上互相等待並 deadlock。整段 bootstrap
# DDL 必須序列化,避免 rollout 因一個 pod CrashLoop 變成 1/2 ready。
await lock_conn.execute(
text("SELECT pg_advisory_lock(hashtext(:lock_name))"),
{"lock_name": _DB_BOOTSTRAP_LOCK_NAME},
)
try:
await _run_init_db_ddl(engine)
finally:
await lock_conn.execute(
text("SELECT pg_advisory_unlock(hashtext(:lock_name))"),
{"lock_name": _DB_BOOTSTRAP_LOCK_NAME},
)
async def _run_init_db_ddl(engine: AsyncEngine) -> None:
"""
Run idempotent DB bootstrap DDL while caller holds the bootstrap advisory lock.
"""
# 2026-04-15 ogt: 多 replica 並行啟動競爭修復
# 問題:單一大 transaction 裡兩個 pod 同時建 table → 其中一個 CREATE INDEX 失敗
# PostgreSQL 中 transaction 內任何錯誤導致整個 transaction ROLLBACK

View File

@@ -633,8 +633,6 @@ class AlertOperationLog(Base):
"RESOLVED", "SILENCED", "ESCALATED", "GUARDRAIL_BLOCKED",
"PRE_FLIGHT_PASSED", "PRE_FLIGHT_FAILED", "BACKUP_TRIGGERED",
"BACKUP_COMPLETED", "BACKUP_FAILED", "APPROVAL_ESCALATED", "CHANGE_APPLIED",
"NOTIFICATION_CLASSIFIED", "MANUAL_FIX_RECORDED", "KM_CONVERTED",
"PLAYBOOK_DRAFT_CREATED", "STATE_GUARD_BLOCKED",
name="alert_event_type", create_type=False,
),
nullable=False, index=True,

View File

@@ -9,7 +9,6 @@ Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型111→ Tel
debugger/vuln → deepseek-r1:14b推理; code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct
"""
from __future__ import annotations
import asyncio
import re
import time
@@ -18,12 +17,12 @@ import httpx
import structlog
from sqlalchemy import text
from src.core.config import settings
from src.core.redis_client import get_redis
from src.db.base import get_db_context
from src.hermes.agent_loader import get_agent_system_prompt
from src.hermes.display_names import DEFAULT_AGENT, format_response_header
from src.hermes.safety_hooks import is_dangerous_input, is_mutate_intent
from src.services.ollama_endpoint_resolver import resolve_ollama_order
logger = structlog.get_logger(__name__)
@@ -262,48 +261,42 @@ async def process_nl_message(
t0 = time.monotonic()
# 呼叫 Ollama 模型(GCP-A → GCP-B → 111零費用按 agent 選模型)
# 呼叫 Ollama 本地模型111零費用按 agent 選模型)
model = _pick_model(agent_name)
success = False
error_type: str | None = None
result_text = ""
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
for endpoint in resolve_ollama_order("hermes"):
if not endpoint.url:
continue
try:
resp = await _hc.post(
f"{endpoint.url}/api/chat",
json={
"model": model,
# Keep Hermes responses in message.content across Ollama 0.24+.
"think": False,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt_with_ctx},
],
"stream": False,
"options": {"num_predict": 1500, "temperature": 0.3},
},
)
resp.raise_for_status()
result_text = resp.json().get("message", {}).get("content", "")
result_text = _strip_think_tags(result_text)
if not result_text:
result_text = "_Agent 回應為空請稍後再試。_"
success = True
break
except Exception as exc:
error_type = type(exc).__name__
logger.error(
"hermes_nl_ollama_error",
error=str(exc),
agent=agent_name,
model=model,
provider=endpoint.provider_name,
exc_type=error_type,
)
if not success:
try:
ollama_base = getattr(settings, "OLLAMA_URL", "http://34.143.170.20:11434") # 2026-05-03 ogt: ADR-110 GCP-A Primary
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
resp = await _hc.post(
f"{ollama_base}/api/chat",
json={
"model": model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt_with_ctx},
],
"stream": False,
"options": {"num_predict": 1500, "temperature": 0.3},
},
)
resp.raise_for_status()
result_text = resp.json().get("message", {}).get("content", "")
result_text = _strip_think_tags(result_text)
if not result_text:
result_text = "_Agent 回應為空請稍後再試。_"
success = True
except Exception as exc:
error_type = type(exc).__name__
logger.error(
"hermes_nl_ollama_error",
error=str(exc),
agent=agent_name,
model=model,
exc_type=error_type,
)
result_text = f"_Hermes 暫時無法連線({error_type}請稍後再試。_"
latency_ms = int((time.monotonic() - t0) * 1000)

View File

@@ -46,7 +46,6 @@ _DEDUP_TTL_SEC = 3600 # 同一告警 1 小時內不重複發送
_TG_SILENCE_THRESHOLD = 2 # PENDING telegram_message_id IS NULL 告警門檻
_FLYWHEEL_SUCCESS_MIN = 0.30 # 執行成功率下限
_STUCK_ANALYSIS_THRESHOLD = 3 # Agent Debate 失敗導致卡住的告警門檻
_TRUST_DRIFT_META_MIN_RATIO = 0.20 # 低於此比例只記治理事件,不升 Meta System
# 2026-05-03 ogt + Claude Opus 4.7 — feedback_silencing_alerts_recurring_violation
# 啟動寬限期30 分鐘內可 skip「資料還沒到」噪音超過寬限期仍空 = 真資料管線斷,必須告警
@@ -108,7 +107,6 @@ async def _check_once() -> None:
# 修法dedup 用穩定 violation_codesW-N:type 格式Telegram 照常顯示動態值
violations: list[str] = []
violation_codes: list[str] = []
probable_causes: list[str] = []
# A3 修復cluster-shared grace period單次查詢供所有 W-check 使用,避免 Pod 間不一致
grace = await _is_grace_active()
@@ -118,18 +116,8 @@ async def _check_once() -> None:
report = await AiSloCalculator().calculate()
if report.any_violated:
violated = [m.name for m in report.metrics if m.violated]
if _is_observation_only_slo_violation(report, violated):
logger.info(
"watchdog_w1_slo_observation_only",
violated=violated,
reason="sealed_waiting_rolling_window",
)
else:
w1_line, w1_cause = _format_slo_violation_for_alert(report, violated)
violations.append(w1_line)
if w1_cause:
probable_causes.append(w1_cause)
violation_codes.append(f"W1:slo_violated:{','.join(sorted(violated))}")
violations.append(f"SLO 違反: {', '.join(violated)}")
violation_codes.append(f"W1:slo_violated:{','.join(sorted(violated))}")
except Exception as e:
logger.warning("watchdog_w1_slo_check_failed", error=str(e))
@@ -222,8 +210,7 @@ async def _check_once() -> None:
from src.services.governance_agent import get_governance_agent
trust_result = await get_governance_agent().check_trust_drift(emit_alert=False)
drifted = trust_result.get("drifted", 0)
drift_ratio = float(trust_result.get("drift_ratio") or 0.0)
if drifted > 0 and drift_ratio >= _TRUST_DRIFT_META_MIN_RATIO:
if drifted > 0:
auto_deprecated = trust_result.get("auto_deprecated", 0)
kept = trust_result.get("kept", 0)
violations.append(
@@ -232,13 +219,6 @@ async def _check_once() -> None:
)
# 2026-05-05 ogt W6 修復:移除動態 low_count避免 count 微變繞過 dedup
violation_codes.append("W6:trust_drift")
elif drifted > 0:
logger.info(
"watchdog_w6_trust_drift_below_meta_threshold",
drifted=drifted,
drift_ratio=round(drift_ratio, 3),
threshold=_TRUST_DRIFT_META_MIN_RATIO,
)
except Exception as e:
logger.warning("watchdog_w6_trust_drift_check_failed", error=str(e))
@@ -272,9 +252,7 @@ async def _check_once() -> None:
*violation_lines,
]
)
probable_cause = "\n".join(probable_causes) if probable_causes else (
"治理異常與執行資料同時異常,建議先核對 AI SLO 指標與最近自修復任務執行紀錄"
)
probable_cause = "治理異常與執行資料同時異常,建議先核對 AI SLO 指標與最近自修復任務執行紀錄"
# 發送 TYPE-8M Meta-System 告警
# 重大異常:超過 2 項即升為 critical便於前線分流1-2 項走 warning
@@ -303,94 +281,6 @@ async def _check_once() -> None:
logger.error("ai_slo_watchdog_telegram_failed", error=str(e), violations=violations)
def _format_slo_violation_for_alert(report, violated: list[str]) -> tuple[str, str | None]:
"""把 W-1 診斷資料壓成 Telegram 可讀摘要dedup key 仍沿用穩定 code。"""
if "auto_execute_success_rate" not in violated:
return f"SLO 違反: {', '.join(violated)}", None
diagnostics = getattr(report, "diagnostics", {}) or {}
diag = diagnostics.get("auto_execute_success_rate") or {}
summary = diag.get("summary") or {}
total = int(summary.get("total") or 0)
success = int(summary.get("success") or 0)
rate = summary.get("rate")
threshold = summary.get("threshold")
sealed = int(diag.get("sealed_failure_group_count") or 0)
open_groups = int(diag.get("open_failure_group_count") or 0)
needed = int(diag.get("immediate_successes_needed") or 0)
projected = _short_taipei_time(diag.get("projected_green_at"))
if isinstance(rate, (int, float)) and isinstance(threshold, (int, float)):
line = (
f"SLO 違反: auto_execute_success_rate "
f"({success}/{total}={rate:.1%},門檻 {threshold:.0%}"
f"已封口群組 {sealed},待查群組 {open_groups}"
)
if projected:
line += f";預估 {projected} 回綠"
elif needed:
line += f";需新增成功 {needed}"
line += ")"
else:
line = "SLO 違反: auto_execute_success_rate診斷資料不足"
groups = diag.get("top_failure_groups") or []
group_lines = []
for group in groups[:3]:
label = group.get("closure_status") or "unknown"
group_lines.append(
f"{group.get('alertname', 'unknown')}/{group.get('playbook_id', 'unknown')}"
f"×{group.get('count', 0)}={label}"
)
cause_parts = [
f"auto_execute_success_rate 仍在 7 日滾動窗內偏低:{success}/{total}"
if total else "auto_execute_success_rate 診斷資料不足",
]
if group_lines:
cause_parts.append("Top failure groups: " + "".join(group_lines))
if sealed and not open_groups:
cause_parts.append("目前已知失敗來源已封口,狀態是等待舊失敗滾出 7 日視窗。")
if projected:
cause_parts.append(f"若沒有新失敗,預估 {projected} 自然回綠;不需要重啟服務或改寫歷史資料。")
elif needed:
cause_parts.append(f"若要立即回綠,需要新增 {needed} 次真實成功自動修復樣本。")
if open_groups:
cause_parts.append("仍有未封口失敗群組,請反查 truth-chain、PlayBook 與 MCP 執行紀錄。")
return line, "\n".join(cause_parts)
def _is_observation_only_slo_violation(report, violated: list[str]) -> bool:
"""已封口且只等 rolling window 的 W-1不再升成 Meta System 告警。"""
if set(violated) != {"auto_execute_success_rate"}:
return False
diagnostics = getattr(report, "diagnostics", {}) or {}
diag = diagnostics.get("auto_execute_success_rate") or {}
try:
open_groups = int(diag.get("open_failure_group_count") or 0)
except (TypeError, ValueError):
open_groups = 0
return (
diag.get("status") == "sealed_waiting_window"
and open_groups == 0
)
def _short_taipei_time(value: str | None) -> str | None:
if not value:
return None
try:
parsed = datetime.fromisoformat(value)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=UTC)
taipei = parsed.astimezone(now_taipei().tzinfo)
return taipei.strftime("%m/%d %H:%M")
except Exception:
return None
async def _count_pending_no_tg_sent() -> int:
"""
查詢真正靜默的 PENDING 告警PENDING 超過 30 分鐘且 telegram_message_id IS NULL。

View File

@@ -1,44 +0,0 @@
"""AwoooP Ansible check-mode worker loop.
Runs only when explicitly enabled by settings. The worker consumes pending
``ansible_candidate_matched`` rows and records check-mode evidence; it never
executes Ansible apply.
"""
from __future__ import annotations
import asyncio
import structlog
from src.core.config import settings
from src.services.awooop_ansible_check_mode_service import run_pending_check_modes_once
logger = structlog.get_logger(__name__)
async def run_awooop_ansible_check_mode_loop() -> None:
if not settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER:
logger.info("awooop_ansible_check_mode_worker_disabled")
return
logger.info(
"awooop_ansible_check_mode_worker_started",
interval_seconds=settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS,
batch_limit=settings.AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT,
timeout_seconds=settings.AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS,
)
await asyncio.sleep(settings.AWOOOP_ANSIBLE_CHECK_MODE_STARTUP_SLEEP_SECONDS)
while True:
try:
result = await run_pending_check_modes_once(
limit=settings.AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT,
timeout_seconds=settings.AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS,
)
if result.get("claimed") or result.get("blockers"):
logger.info("awooop_ansible_check_mode_worker_tick", **result)
except Exception as exc:
logger.warning("awooop_ansible_check_mode_worker_failed", error=str(exc))
await asyncio.sleep(settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS)

View File

@@ -326,7 +326,7 @@ async def _send_telegram_forecast(
from src.services.ai_advisory_helpers import build_ai_advisory_keyboard, is_snoozed
from src.services.telegram_gateway import get_telegram_gateway
target_chat_id = settings.SRE_GROUP_CHAT_ID
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
if not target_chat_id:
return False

View File

@@ -474,7 +474,7 @@ async def _send_telegram_posture(
from src.services.ai_advisory_helpers import build_ai_advisory_keyboard, is_snoozed
from src.services.telegram_gateway import get_telegram_gateway
target_chat_id = settings.SRE_GROUP_CHAT_ID
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
if not target_chat_id:
return

View File

@@ -299,7 +299,7 @@ async def _send_telegram_gaps(
from src.services.ai_advisory_helpers import build_ai_advisory_keyboard, is_snoozed
from src.services.telegram_gateway import get_telegram_gateway
target_chat_id = settings.SRE_GROUP_CHAT_ID
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
if not target_chat_id:
return

View File

@@ -1,308 +0,0 @@
"""
Hermes KB Growth Worker
=======================
消費 governance_remediation_dispatch 中的 hermes_kb_growth_healthcheck work item
把 knowledge_degradation 告警推進成可審核的 KM 草稿。
邊界:
- 可以建立 REVIEW 狀態的 auto_runbook 草稿,讓 owner 在前端審核。
- 不可以直接把 KM 標成 APPROVED / PUBLISHED。
- 不修改 immutable ai_governance_events流程進度寫回 dispatch.decision_context。
2026-05-19 ogt + Codex: T90 Hermes KB growth healthcheck worker。
"""
from __future__ import annotations
import asyncio
from copy import deepcopy
from typing import Any
import structlog
from src.db.base import get_db_context
from src.db.models import GovernanceRemediationDispatch
from src.models.knowledge import (
EntrySource,
EntryStatus,
EntryType,
KnowledgeEntry,
KnowledgeEntryCreate,
)
from src.repositories.governance_remediation_dispatch_repo import (
InvalidStatusTransition,
list_pending_by_executor,
transition_status,
update_decision_context,
)
from src.repositories.knowledge_repository import KnowledgeDBRepository
logger = structlog.get_logger(__name__)
EXECUTOR_TYPE = "hermes_kb_growth_healthcheck"
DEFAULT_INTERVAL_SECONDS = 300
DEFAULT_LIMIT = 20
async def run_hermes_kb_growth_once(limit: int = DEFAULT_LIMIT) -> dict[str, int]:
"""執行一輪 Hermes KB growth healthcheck。
Returns:
統計資訊,供 log / smoke test 判讀。
"""
rows = await list_pending_by_executor(EXECUTOR_TYPE, limit=limit)
result = {
"scanned": len(rows),
"processed": 0,
"skipped": 0,
"failed": 0,
}
for row in rows:
try:
await _process_dispatch(row)
result["processed"] += 1
except InvalidStatusTransition as exc:
result["skipped"] += 1
logger.info(
"hermes_kb_growth_dispatch_skipped",
dispatch_id=row.id,
event_id=row.governance_event_id,
reason=str(exc),
)
except Exception as exc:
result["failed"] += 1
logger.exception(
"hermes_kb_growth_dispatch_failed",
dispatch_id=row.id,
event_id=row.governance_event_id,
error=str(exc),
)
await _mark_failed_if_started(row.id, str(exc))
if any(result.values()):
logger.info("hermes_kb_growth_once_completed", **result)
return result
async def run_hermes_kb_growth_loop(
interval_seconds: int = DEFAULT_INTERVAL_SECONDS,
limit: int = DEFAULT_LIMIT,
) -> None:
"""背景 loop定期消費 Hermes KB growth dispatch。"""
logger.info(
"hermes_kb_growth_loop_started",
interval_seconds=interval_seconds,
limit=limit,
)
while True:
try:
await run_hermes_kb_growth_once(limit=limit)
except asyncio.CancelledError:
raise
except Exception as exc:
logger.exception("hermes_kb_growth_loop_error", error=str(exc))
await asyncio.sleep(interval_seconds)
async def _process_dispatch(row: GovernanceRemediationDispatch) -> None:
"""處理單筆 pending dispatch最後停在 waiting_owner_review。"""
dispatched = await transition_status(row.id, "pending", "dispatched")
executing = await transition_status(dispatched.id, "dispatched", "executing")
km_entry = await _create_or_get_km_review_draft(executing)
updated_context = _build_review_context(
executing.decision_context or {},
dispatch_id=executing.id,
governance_event_id=executing.governance_event_id,
km_entry_id=km_entry.id,
)
await update_decision_context(executing.id, updated_context)
await transition_status(executing.id, "executing", "succeeded")
logger.info(
"hermes_kb_growth_review_draft_ready",
dispatch_id=executing.id,
event_id=executing.governance_event_id,
km_entry_id=km_entry.id,
workflow_stage="waiting_owner_review",
)
async def _create_or_get_km_review_draft(
dispatch: GovernanceRemediationDispatch,
) -> KnowledgeEntry:
"""以 governance event tag 做冪等,建立或取得 REVIEW 狀態 KM 草稿。"""
dispatch_tag = f"dispatch:{dispatch.id}"
event_tag = f"governance_event:{dispatch.governance_event_id}"
payload = _build_km_review_entry_payload(dispatch)
async with get_db_context() as db:
repo = KnowledgeDBRepository(db)
existing, _ = await repo.list_entries(tags=[event_tag], limit=1)
if existing:
return existing[0]
existing, _ = await repo.list_entries(tags=[dispatch_tag], limit=1)
if existing:
return existing[0]
return await repo.create(payload)
def _build_km_review_entry_payload(
dispatch: GovernanceRemediationDispatch,
) -> KnowledgeEntryCreate:
"""把 governance dispatch 轉成待審核的 KM 草稿 payload。"""
context = dispatch.decision_context or {}
workflow = context.get("workflow") if isinstance(context.get("workflow"), dict) else {}
impact = workflow.get("impact") if isinstance(workflow.get("impact"), dict) else {}
extra = context.get("extra") if isinstance(context.get("extra"), dict) else {}
ownership = context.get("ownership") if isinstance(context.get("ownership"), dict) else {}
if not ownership and isinstance(extra.get("ownership"), dict):
ownership = extra["ownership"]
stale_count = _pick_first(impact, extra, key="stale_count")
total_count = _pick_first(impact, extra, key="total_count")
stale_ratio = _pick_first(impact, context, key="stale_ratio")
threshold = _pick_first(impact, context, key="threshold")
stale_days = _pick_first(impact, extra, key="stale_days")
lead_agent = ownership.get("lead_agent") or "Hermes"
human_owner = ownership.get("human_owner") or "KM owner / SRE owner"
content = "\n".join([
"# KM 健康檢查草稿",
"",
"## 來源",
f"- governance_event_id: {dispatch.governance_event_id}",
f"- dispatch_id: {dispatch.id}",
f"- executor_type: {dispatch.executor_type}",
"",
"## 影響摘要",
f"- stale_count: {_format_unknown(stale_count)}",
f"- total_count: {_format_unknown(total_count)}",
f"- stale_ratio: {_format_ratio(stale_ratio)}",
f"- threshold: {_format_ratio(threshold)}",
f"- stale_days: {_format_unknown(stale_days)}",
"",
"## AI 已完成",
"- Hermes 已接手 knowledge_degradation dispatch。",
"- 已產生 KM 更新草稿與 owner review work item。",
"- 尚未把任何條目標成 approved / published。",
"",
"## Owner 審核重點",
"- 優先反查最近被 Incident、Sentry、SigNoz、PlayBook 引用的 KM。",
"- 確認草稿內容沒有把過期處置方式寫回正式知識庫。",
"- 審核通過後再進入 km_writeback_after_approval。",
"",
"## 安全邊界",
"- writes_km_without_approval=false",
f"- lead_agent={lead_agent}",
f"- human_owner={human_owner}",
])
return KnowledgeEntryCreate(
title=f"KM healthcheck review draft - {dispatch.governance_event_id[:8]}",
content=content,
entry_type=EntryType.AUTO_RUNBOOK,
category="AI治理",
tags=[
"governance:knowledge_degradation",
"workflow:kb_growth_healthcheck",
"stage:waiting_owner_review",
"agent:Hermes",
"needs_owner_review",
f"dispatch:{dispatch.id}",
f"governance_event:{dispatch.governance_event_id}",
],
source=EntrySource.AI_EXTRACTED,
status=EntryStatus.REVIEW,
path_type="hermes_kb_growth_healthcheck",
created_by="hermes_kb_growth_worker",
)
def _build_review_context(
context: dict[str, Any],
*,
dispatch_id: str,
governance_event_id: str,
km_entry_id: str,
) -> dict[str, Any]:
"""更新 dispatch read model讓 Work Items/Telegram 可見目前停在 owner review。"""
updated = deepcopy(context)
workflow = updated.setdefault("workflow", {})
if not isinstance(workflow, dict):
workflow = {}
updated["workflow"] = workflow
stages = workflow.setdefault("stage_by_dispatch_status", {})
if not isinstance(stages, dict):
stages = {}
workflow["stage_by_dispatch_status"] = stages
stages.update({
"executing": "draft_km_updates",
"succeeded": "waiting_owner_review",
"failed": "needs_manual_km_triage",
})
workflow["current_stage"] = "waiting_owner_review"
workflow["next_action"] = "owner_review_km_draft"
workflow["needs_human_review"] = True
workflow["writes_km_without_approval"] = False
workflow["kb_draft_entry_id"] = km_entry_id
updated["next_action"] = "owner_review_km_draft"
updated["decision_path"] = "draft_created_waiting_owner_review"
updated["proposed_action"] = "Hermes 已建立 KM 更新草稿,等待 owner 審核"
updated["worker_result"] = {
"worker": "Hermes",
"executor_type": EXECUTOR_TYPE,
"dispatch_id": dispatch_id,
"governance_event_id": governance_event_id,
"km_draft_entry_id": km_entry_id,
"stage": "waiting_owner_review",
"status": "draft_created",
"writes_km_without_approval": False,
}
return updated
async def _mark_failed_if_started(dispatch_id: str, error: str) -> None:
"""若 worker 已取得 dispatch將它收斂到 failed保留錯誤。"""
for from_status in ("executing", "dispatched"):
try:
await transition_status(
dispatch_id,
from_status,
"failed",
last_error=error[:500],
)
return
except InvalidStatusTransition:
continue
except Exception as exc:
logger.warning(
"hermes_kb_growth_mark_failed_failed",
dispatch_id=dispatch_id,
from_status=from_status,
error=str(exc),
)
return
def _pick_first(*sources: dict[str, Any], key: str) -> Any:
for source in sources:
if key in source:
return source[key]
return None
def _format_unknown(value: Any) -> str:
return "unknown" if value is None else str(value)
def _format_ratio(value: Any) -> str:
try:
return f"{float(value) * 100:.1f}%"
except (TypeError, ValueError):
return "unknown"

View File

@@ -316,7 +316,7 @@ async def _send_telegram_summary(
from src.services.ai_advisory_helpers import build_ai_advisory_keyboard, is_snoozed
from src.services.telegram_gateway import get_telegram_gateway
target_chat_id = settings.SRE_GROUP_CHAT_ID
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
if not target_chat_id:
logger.info("hermes_telegram_skip_no_chat_id")
return False

View File

@@ -1,289 +0,0 @@
"""
Incident Lifecycle Reconciler
=============================
把已有強證據的舊 stuck incident 收斂回 RESOLVED。
範圍刻意保守:
- auto_repair_executions.success = true
- approval_records.status = EXECUTION_SUCCESS
- approval_records.status = EXPIRED
不處理單純 APPROVED / NO_ACTION / manual_required避免把仍需人工的事件
誤當作自動修復完成。
"""
from __future__ import annotations
import asyncio
from dataclasses import dataclass
import httpx
import structlog
from sqlalchemy import text
from src.core.config import settings
from src.db.base import get_db_context
from src.utils.timezone import now_taipei
logger = structlog.get_logger(__name__)
BATCH_LIMIT = 100
INTERVAL_SECONDS = 1800
_PROMETHEUS_TIMEOUT_SECONDS = 5.0
@dataclass(frozen=True)
class LifecycleCandidate:
incident_id: str
resolution_type: str
reason: str
direct_db_only: bool = False
async def run_incident_lifecycle_reconciler_loop() -> None:
"""每 30 分鐘收斂一小批已有完成證據的 stuck incident。"""
while True:
try:
resolved, errors = await reconcile_stuck_incidents()
if resolved > 0 or errors > 0:
logger.info(
"incident_lifecycle_reconciler_done",
resolved=resolved,
errors=errors,
batch_limit=BATCH_LIMIT,
)
except Exception as exc:
logger.warning("incident_lifecycle_reconciler_loop_failed", error=str(exc))
await asyncio.sleep(INTERVAL_SECONDS)
async def reconcile_stuck_incidents(limit: int = BATCH_LIMIT) -> tuple[int, int]:
"""
找出已完成但仍卡在 INVESTIGATING 的 incident透過 IncidentService 統一路徑結案。
Returns:
(resolved_count, error_count)
"""
candidates = await _fetch_candidates(limit)
remaining = max(0, limit - len(candidates))
if remaining > 0:
active_alertnames = await _fetch_active_alertnames()
if active_alertnames is not None:
candidates.extend(
await _fetch_inactive_or_duplicate_alert_candidates(
limit=remaining,
active_alertnames=active_alertnames,
exclude_incident_ids={c.incident_id for c in candidates},
)
)
if not candidates:
return 0, 0
from src.services.incident_service import get_incident_service
incident_service = get_incident_service()
resolved = 0
errors = 0
for candidate in candidates:
try:
if candidate.direct_db_only:
result = await _resolve_db_only(candidate.incident_id)
else:
result = await incident_service.resolve_incident(
candidate.incident_id,
resolution_type=candidate.resolution_type,
emit_postmortem=False,
)
if not result:
continue
resolved += 1
logger.info(
"incident_lifecycle_reconciled",
incident_id=candidate.incident_id,
reason=candidate.reason,
resolution_type=candidate.resolution_type,
direct_db_only=candidate.direct_db_only,
)
except Exception as exc:
errors += 1
logger.warning(
"incident_lifecycle_reconcile_failed",
incident_id=candidate.incident_id,
reason=candidate.reason,
error=str(exc),
)
return resolved, errors
async def _fetch_active_alertnames() -> set[str] | None:
"""Read current firing alertnames from Prometheus. None means fail-closed."""
try:
async with httpx.AsyncClient(timeout=_PROMETHEUS_TIMEOUT_SECONDS) as client:
response = await client.get(
f"{settings.PROMETHEUS_URL.rstrip('/')}/api/v1/query",
params={"query": 'ALERTS{alertstate="firing"}'},
)
response.raise_for_status()
payload = response.json()
except Exception as exc:
logger.warning("incident_lifecycle_active_alerts_fetch_failed", error=str(exc))
return None
result = payload.get("data", {}).get("result", [])
active_alertnames = {
item.get("metric", {}).get("alertname")
for item in result
if item.get("metric", {}).get("alertname")
}
logger.info(
"incident_lifecycle_active_alerts_loaded",
active_alert_count=len(active_alertnames),
)
return active_alertnames
async def _resolve_db_only(incident_id: str) -> bool:
from src.repositories.incident_repository import get_incident_repository
now = now_taipei()
return await get_incident_repository().update_status(
incident_id=incident_id,
status="resolved",
updated_at=now,
resolved_at=now,
)
async def _fetch_candidates(limit: int) -> list[LifecycleCandidate]:
async with get_db_context() as db:
result = await db.execute(
text(
"""
WITH stale AS (
SELECT
i.incident_id,
i.created_at,
EXISTS (
SELECT 1
FROM auto_repair_executions are
WHERE are.incident_id = i.incident_id
AND are.success IS TRUE
) AS has_success_auto_repair,
EXISTS (
SELECT 1
FROM approval_records ar
WHERE ar.incident_id = i.incident_id
AND ar.status::text = 'EXECUTION_SUCCESS'
) AS has_execution_success,
EXISTS (
SELECT 1
FROM approval_records ar
WHERE ar.incident_id = i.incident_id
AND ar.status::text = 'EXPIRED'
) AS has_expired_approval
FROM incidents i
WHERE i.status = 'INVESTIGATING'
AND i.created_at <= now() - interval '24 hours'
)
SELECT
incident_id,
CASE
WHEN has_success_auto_repair THEN 'auto_repair'
WHEN has_execution_success THEN 'auto_repair'
ELSE 'timeout'
END AS resolution_type,
CASE
WHEN has_success_auto_repair THEN 'auto_repair_execution_success'
WHEN has_execution_success THEN 'approval_execution_success'
ELSE 'approval_expired'
END AS reason
FROM stale
WHERE has_success_auto_repair
OR has_execution_success
OR has_expired_approval
ORDER BY created_at DESC
LIMIT :limit
"""
),
{
"limit": limit,
},
)
rows = result.mappings().all()
return [
LifecycleCandidate(
incident_id=str(row["incident_id"]),
resolution_type=str(row["resolution_type"]),
reason=str(row["reason"]),
)
for row in rows
]
async def _fetch_inactive_or_duplicate_alert_candidates(
*,
limit: int,
active_alertnames: set[str],
exclude_incident_ids: set[str],
) -> list[LifecycleCandidate]:
"""
收斂 Alertmanager 已不再 firing 的舊 incident以及同一 active alertname 的舊重複案。
若 Prometheus/Alertmanager 讀不到 active alertnames上層會 fail-closed 不呼叫本函式。
"""
active_list = list(active_alertnames) or ["__no_active_alertnames__"]
exclude_list = list(exclude_incident_ids) or ["__no_excluded_incidents__"]
async with get_db_context() as db:
result = await db.execute(
text(
"""
WITH ranked AS (
SELECT
i.incident_id,
i.alertname,
i.created_at,
row_number() OVER (
PARTITION BY i.alertname
ORDER BY i.created_at DESC, i.incident_id DESC
) AS rn
FROM incidents i
WHERE i.status = 'INVESTIGATING'
AND i.created_at <= now() - interval '24 hours'
AND NOT (i.incident_id = ANY(:exclude_incident_ids))
)
SELECT
incident_id,
CASE
WHEN alertname = ANY(:active_alertnames)
THEN 'active_duplicate_stale'
ELSE 'inactive_alert_stale'
END AS reason
FROM ranked
WHERE NOT (alertname = ANY(:active_alertnames) AND rn = 1)
ORDER BY created_at ASC
LIMIT :limit
"""
),
{
"active_alertnames": active_list,
"exclude_incident_ids": exclude_list,
"limit": limit,
},
)
rows = result.mappings().all()
return [
LifecycleCandidate(
incident_id=str(row["incident_id"]),
resolution_type="timeout",
reason=str(row["reason"]),
direct_db_only=True,
)
for row in rows
]

View File

@@ -28,7 +28,7 @@ from datetime import timedelta
import structlog
from sqlalchemy import select, update
from src.db.base import get_db_context
from src.db.base import get_session_factory
from src.db.models import AiGovernanceEvent, KnowledgeEntryRecord
from src.utils.timezone import now_taipei
@@ -129,7 +129,7 @@ class KbRotCleaner:
rot_reasons: dict[str, list[str]] = {}
total = 0
async with get_db_context() as session:
async with get_session_factory()() as session:
# 只掃 active 狀態(非 archived
q = await session.execute(
select(KnowledgeEntryRecord).where(
@@ -193,7 +193,7 @@ class KbRotCleaner:
if not result.stale_ids:
return
async with get_db_context() as session:
async with get_session_factory()() as session:
# 逐條更新(避免 bulk update 覆蓋 tags JSONB
q = await session.execute(
select(KnowledgeEntryRecord).where(
@@ -220,7 +220,7 @@ class KbRotCleaner:
async def _save_event(self, result: RotScanResult) -> None:
"""寫 kb_stale 事件到 ai_governance_events。"""
try:
async with get_db_context() as session:
async with get_session_factory()() as session:
event = AiGovernanceEvent(
event_type="kb_stale",
details=result.to_dict(),

View File

@@ -25,9 +25,7 @@ Feature Flag
from __future__ import annotations
import asyncio
import json
import structlog
from src.core.config import settings

View File

@@ -33,7 +33,7 @@ from datetime import timedelta
import structlog
from sqlalchemy import and_, select, update
from src.db.base import get_db_context
from src.db.base import get_session_factory
from src.db.models import KnowledgeEntryRecord
from src.models.knowledge import EntryStatus
from src.utils.timezone import now_taipei
@@ -112,7 +112,8 @@ class KnowledgeDecayJob:
cutoff = now_taipei() - timedelta(days=DECAY_AGE_DAYS)
decayable_statuses = [EntryStatus.DRAFT.value, EntryStatus.REVIEW.value]
async with get_db_context() as db:
session_factory = get_session_factory()
async with session_factory() as db:
# 查30 天未引用view_count=0且 updated_at < cutoff 的 draft/review 條目
stmt = select(KnowledgeEntryRecord).where(
and_(

View File

@@ -29,7 +29,7 @@ from datetime import timedelta
import structlog
from sqlalchemy import and_, select
from src.db.base import get_db_context
from src.db.base import get_session_factory
from src.db.models import AgentSession, AiGovernanceEvent, AutoRepairExecution, IncidentEvidence
from src.utils.timezone import now_taipei
@@ -109,7 +109,9 @@ class OfflineReplayService:
async def _run_replay(self) -> OfflineReplayReport:
cutoff = now_taipei() - timedelta(days=REPLAY_LOOKBACK_DAYS)
async with get_db_context() as db:
session_factory = get_session_factory()
async with session_factory() as db:
# 1. 取最近 N 個有 AgentSession(coordinator) 的 Incident
stmt = (
select(AgentSession.incident_id)
@@ -135,7 +137,7 @@ class OfflineReplayService:
)
results: list[IncidentReplayResult] = []
async with get_db_context() as db:
async with session_factory() as db:
for incident_id in incident_ids:
r = await self._replay_one(db, incident_id)
results.append(r)

View File

@@ -20,38 +20,31 @@ Date: 2026-03-20
import asyncio
import os
from uuid import uuid4
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
import sentry_sdk
import structlog
from fastapi import FastAPI, HTTPException, Request
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, Response
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
from sentry_sdk.integrations.fastapi import FastApiIntegration
from sentry_sdk.integrations.starlette import StarletteIntegration
from uvicorn.middleware.proxy_headers import ProxyHeadersMiddleware
from src.api.v1 import agents as agents_v1 # Phase 9.5: Agent Teams API
from src.api.v1 import ai as ai_v1
from src.api.v1 import (
ai_governance as ai_governance_v1, # 2026-05-02: /governance 頁面 3 endpoints
)
from src.api.v1 import ai_slo as ai_slo_v1 # Phase 6 ADR-087: AI SLO 自我治理
from src.api.v1 import aider_events as aider_events_v1 # aider-watch v2 ADR-091
from src.api.v1 import ai_governance as ai_governance_v1 # 2026-05-02: /governance 頁面 3 endpoints
from src.api.v1 import ai_slo as ai_slo_v1 # Phase 6 ADR-087: AI SLO 自我治理
from src.api.v1 import aiops_kpi as aiops_kpi_v1 # ADR-090 § Phase 7 KPI Dashboard
from src.api.v1 import (
aiops_timeline as aiops_timeline_v1, # 2026-04-27 Wave8-X3 B4 timeline endpoint
)
from src.api.v1 import alert_operation_logs as alert_operation_logs_v1
from src.api.v1 import aiops_timeline as aiops_timeline_v1 # 2026-04-27 Wave8-X3 B4 timeline endpoint
from src.api.v1 import approvals as approvals_v1
from src.api.v1 import alert_operation_logs as alert_operation_logs_v1
from src.api.v1 import audit_logs as audit_logs_v1
from src.api.v1 import auto_repair as auto_repair_v1 # #8: 自動升級決策
from src.api.v1 import csrf as csrf_v1 # Phase 20: CSRF Protection
from src.api.v1 import dashboard as dashboard_v1
from src.api.v1 import drift as drift_v1 # Phase 25 P2: Config Drift Detection
from src.api.v1 import errors as errors_v1 # #40: Sentry 錯誤 BFF API
from src.api.v1 import (
gitea_webhook as gitea_webhook_v1, # ADR-059: Gitea → OpenClaw (GitHub → Gitea 遷移)
@@ -60,24 +53,22 @@ from src.api.v1 import (
# Import API routers
from src.api.v1 import health as health_v1
from src.api.v1 import incidents as incidents_v1 # Phase 6.4: Decision Proposal
from src.api.v1 import iwooos as iwooos_v1 # IwoooS security governance API
from src.api.v1 import knowledge as knowledge_v1 # KB Phase 1: Knowledge Base
from src.api.v1 import learning as learning_v1 # Phase D-G P0: Learning API
from src.api.v1 import metrics as metrics_v1 # Phase 7: Gold Metrics (真實血脈)
from src.api.v1 import monitoring as monitoring_v1 # 2026-04-03: 監控工具狀態
from src.api.v1 import notifications as notifications_v1 # 2026-04-10: 通知頻道狀態
from src.api.v1 import (
platform as platform_v1, # AwoooP Phase 4: Platform ShellShadow Mode
)
from src.api.v1 import playbooks as playbooks_v1 # #7: Playbook 萃取
from src.api.v1 import proposals as proposals_v1 # Phase 6.4h: Proposals CRUD API
from src.api.v1 import rag as rag_v1 # Phase 33 ADR-067: RAG 知識庫
from src.api.v1 import (
sentry_webhook as sentry_webhook_v1, # Phase 10.2.1: Sentry → Telegram
)
from src.api.v1 import (
signoz_webhook as signoz_webhook_v1, # Phase 21: SignOz → Telegram (ADR-037)
)
from src.api.v1 import drift as drift_v1 # Phase 25 P2: Config Drift Detection
from src.api.v1 import platform as platform_v1 # AwoooP Phase 4: Platform ShellShadow Mode
from src.api.v1 import rag as rag_v1 # Phase 33 ADR-067: RAG 知識庫
from src.api.v1 import monitoring as monitoring_v1 # 2026-04-03: 監控工具狀態
from src.api.v1 import notifications as notifications_v1 # 2026-04-10: 通知頻道狀態
from src.api.v1 import stats as stats_v1 # Phase 6.5: Statistics Analytics
from src.api.v1 import telegram as telegram_v1 # Phase 5.4: Telegram Gateway
from src.api.v1 import telegram_webhook as telegram_webhook_v1 # ADR-094: Webhook入口
@@ -85,13 +76,11 @@ from src.api.v1 import terminal as terminal_v1 # Phase 19.1: Omni-Terminal SSE
from src.api.v1 import timeline as timeline_v1
from src.api.v1 import webhooks as webhooks_v1
from src.core.config import settings
from src.core.feature_flags import aiops_flags # ADR-080: AI 自主化飛輪 feature flags 啟動驗證
from src.core.http_client import close_all_http_clients, init_all_http_clients
from src.core.logging import get_logger, setup_logging
from src.core.redis_client import (
close_redis_pool,
close_worker_redis_pool,
init_redis_pool,
)
from src.core.redis_client import close_redis_pool, init_redis_pool
from src.services.flywheel_stats_service import get_flywheel_stats_service
from src.core.sse import get_publisher
from src.core.telemetry import setup_telemetry, shutdown_telemetry
@@ -103,10 +92,7 @@ from src.routers import proposals as proposals_router
# Legacy route imports (to be migrated)
from src.routes import agent, notifications, pipelines, plugins
from src.services.adr100_slo_metrics_service import get_adr100_slo_metrics_service
from src.services.alert_chain_metrics_service import get_alert_chain_metrics_service
from src.services.executor import close_executor
from src.services.flywheel_stats_service import get_flywheel_stats_service
# Phase 5: OpenClaw AI Engine
from src.services.openclaw import close_openclaw
@@ -121,26 +107,6 @@ from src.workers import close_signal_worker, init_signal_worker
setup_logging()
logger = get_logger("awoooi.api")
ALERTMANAGER_WEBHOOK_PATH = "/api/v1/webhooks/alertmanager"
ALERTMANAGER_DEFAULT_PROJECT_ID = "awoooi"
def _resolve_request_project_context(request: Request) -> tuple[str | None, str]:
"""Resolve tenant context for RLS while keeping non-webhook routes fail-closed."""
for candidate in (
request.headers.get("X-Project-ID"),
request.headers.get("X-Tenant-ID"),
request.query_params.get("project_id"),
):
project_id = candidate.strip() if candidate else None
if project_id:
return project_id, "request.header_or_query"
if request.url.path == ALERTMANAGER_WEBHOOK_PATH:
return ALERTMANAGER_DEFAULT_PROJECT_ID, "request.alertmanager.default_project"
return None, "request.project_id.missing"
# =============================================================================
# Sentry SDK Initialization (Error Tracking - 補強 SignOz)
# Self-Hosted @ 192.168.0.110
@@ -301,55 +267,50 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
# 2026-04-05 ogt: 重開機後 Redis 清空,從 DB restore 未解決的 incidents
# 統帥批准: 數據必須長久記錄,重開機後自動恢復 Working Memory
try:
from src.services.incident_service import get_incident_service
from src.db.base import get_db_context
from src.db.models import IncidentRecord
from sqlalchemy import select
from src.db.base import get_db_context
from src.core.context import clear_project_context, set_project_context
from src.db.models import IncidentRecord
from src.models.incident import IncidentStatus
from src.services.incident_service import get_incident_service
startup_ctx_tokens = set_project_context(
project_id=settings.SYSTEM_NAME,
source="startup.warmup",
request_id="startup-warmup",
)
try:
incident_service = get_incident_service()
async with get_db_context() as db:
result = await db.execute(
select(IncidentRecord).where(
IncidentRecord.status.in_([
IncidentStatus.INVESTIGATING,
IncidentStatus.MITIGATING,
])
)
incident_service = get_incident_service()
async with get_db_context() as db:
result = await db.execute(
select(IncidentRecord).where(
IncidentRecord.status.in_(["investigating", "mitigating"])
)
records = result.scalars().all()
restored = 0
for record in records:
try:
incident = incident_service._record_to_incident(record)
if await incident_service.save_to_working_memory(incident):
restored += 1
except Exception as record_error:
# 舊資料 source 值不合法node-exporter 等)→ 跳過
logger.warning(
"working_memory_warmup_record_skipped",
incident_id=getattr(record, "incident_id", None),
error=str(record_error),
)
logger.info(
"working_memory_warmed_up",
restored=restored,
total=len(records),
startup_project_id=settings.SYSTEM_NAME,
)
finally:
clear_project_context(startup_ctx_tokens)
records = result.scalars().all()
restored = 0
for record in records:
try:
from src.models.incident import Incident
incident = Incident(
incident_id=record.incident_id,
status=record.status,
severity=record.severity,
signals=record.signals or [],
affected_services=record.affected_services or [],
decision_chain=record.decision_chain,
proposal_ids=record.proposal_ids or [],
outcome=record.outcome,
created_at=record.created_at,
updated_at=record.updated_at,
resolved_at=record.resolved_at,
closed_at=record.closed_at,
ttl_days=record.ttl_days,
vectorized=record.vectorized,
# ADR-073: 分類欄位必須還原,否則 KM 寫入時全為 "unknown"
notification_type=record.notification_type,
alert_category=record.alert_category,
)
if await incident_service.save_to_working_memory(incident):
restored += 1
except Exception:
# 舊資料 source 值不合法node-exporter 等)→ 跳過
pass
logger.info("working_memory_warmed_up", restored=restored, total=len(records))
except Exception as e:
logger.warning("working_memory_warmup_failed", error=str(e))
@@ -390,9 +351,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
logger.warning("playbook_pg_backfill_schedule_failed", error=str(e))
try:
from src.services.playbook_embedding_service import (
ensure_playbook_embeddings_indexed,
)
from src.services.playbook_embedding_service import ensure_playbook_embeddings_indexed
asyncio.create_task(ensure_playbook_embeddings_indexed())
logger.info("playbook_embedding_indexing_scheduled")
except Exception as e:
@@ -540,40 +499,6 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
except Exception as e:
logger.warning("approval_timeout_resolver_schedule_failed", error=str(e))
# T73: 已有完成證據但仍卡在 INVESTIGATING 的舊 incident 小批次收斂。
# 僅處理 auto-repair success / approval EXECUTION_SUCCESS / approval EXPIRED
# 不自動關閉 manual_required 或單純 APPROVED 事件。
try:
from src.jobs.incident_lifecycle_reconciler import (
INTERVAL_SECONDS as INCIDENT_LIFECYCLE_RECONCILER_INTERVAL,
)
from src.jobs.incident_lifecycle_reconciler import (
run_incident_lifecycle_reconciler_loop,
)
asyncio.create_task(run_incident_lifecycle_reconciler_loop())
logger.info(
"incident_lifecycle_reconciler_scheduled",
interval_sec=INCIDENT_LIFECYCLE_RECONCILER_INTERVAL,
)
except Exception as e:
logger.warning("incident_lifecycle_reconciler_schedule_failed", error=str(e))
# AwoooP Ansible check-mode worker.
# 只執行 ansible-playbook --check --diff 並回寫 automation_operation_log
# apply 仍必須走 approval gate本 worker 不寫 auto_repair_executions。
try:
from src.jobs.awooop_ansible_check_mode_job import (
run_awooop_ansible_check_mode_loop,
)
asyncio.create_task(run_awooop_ansible_check_mode_loop())
logger.info(
"awooop_ansible_check_mode_worker_scheduled",
enabled=settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER,
interval_seconds=settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS,
)
except Exception as e:
logger.warning("awooop_ansible_check_mode_worker_schedule_failed", error=str(e))
# ADR-083 Phase 3: Evolver Agent每日— Playbook 自動合併 + 低信任封存
# 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 3 初始建立
try:
@@ -585,9 +510,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
# ADR-104 T2: LLM Playbook DRAFT governance每小時
try:
from src.jobs.playbook_generation_governance_job import (
run_playbook_generation_governance_loop,
)
from src.jobs.playbook_generation_governance_job import run_playbook_generation_governance_loop
asyncio.create_task(run_playbook_generation_governance_loop())
logger.info(
"playbook_generation_governance_loop_scheduled",
@@ -631,11 +554,11 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
# 2026-04-27 P3.1-T3 by Claude
try:
from src.utils.timezone import now_taipei
from datetime import datetime as _dt
async def _run_kb_rot_cleaner_loop() -> None:
import asyncio as _asyncio
from src.jobs.kb_rot_cleaner import get_kb_rot_cleaner
import asyncio as _asyncio
while True:
try:
now = now_taipei()
@@ -726,24 +649,14 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
except Exception as e:
logger.warning("governance_dispatcher_schedule_failed", error=str(e))
# T90 2026-05-19 ogt + Codex: Hermes KB growth worker每 5 分鐘)
# 消費 knowledge_degradation 的 hermes_kb_growth_healthcheck dispatch
# 只產生 REVIEW 草稿並停在 owner review不直接批准或發布 KM。
try:
from src.jobs.hermes_kb_growth_worker import run_hermes_kb_growth_loop
asyncio.create_task(run_hermes_kb_growth_loop())
logger.info("hermes_kb_growth_worker_scheduled", interval_sec=300)
except Exception as e:
logger.warning("hermes_kb_growth_worker_schedule_failed", error=str(e))
# 2026-04-25 P1.2 by Claude Engineer-A2 — failover 整合到 ai_router + lifespan
# OllamaFailoverManager + OllamaAutoRecoveryService 飛輪接線:
# failover 切換時 → recovery_callback → set_current_primary → Redis 持久化
# recovery service 每 30s 檢查 → 111 連續 3 次 HEALTHY → 自動切回 → clear_cache
# 順序:先取 singleton → wire callback → 啟動 recovery service才能接收 callback
try:
from src.services.ollama_auto_recovery import get_ollama_auto_recovery_service
from src.services.ollama_failover_manager import get_ollama_failover_manager
from src.services.ollama_auto_recovery import get_ollama_auto_recovery_service
_failover_mgr = get_ollama_failover_manager()
_recovery_svc = get_ollama_auto_recovery_service()
@@ -756,8 +669,8 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
# alerter 還沒注入 Redis → dedup fail-open告警會送出且無 dedup 保護(重複告警風險)
# 修法configure_alerter() 提前到 start() 之前Redis pool 在 lifespan 早期已就緒
try:
from src.core.redis_client import get_redis
from src.services.failover_alerter import configure_alerter
from src.core.redis_client import get_redis
configure_alerter(get_redis())
logger.info("failover_alerter_configured")
except Exception as _alerter_err:
@@ -841,7 +754,6 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
# Phase 6.1: 關閉 Signal Worker (先關閉 Consumer)
await close_signal_worker()
await close_worker_redis_pool()
await publisher.stop()
await close_executor()
await close_openclaw()
@@ -894,8 +806,11 @@ else:
# Middleware
# =============================================================================
# 2026-04-03 ogt: Nginx 反向代理修正 — 讓 FastAPI 信任 X-Forwarded-Proto
# 避免 /api/v1/knowledge 等 redirect 在 HTTPS 反向代理後產生 http:// Location
# 2026-04-03 ogt: Nginx 反向代理修正 — 讓 FastAPI 信任 X-Forwarded-Proto
# 解決問題: /api/v1/knowledge (無結尾斜線) 307 redirect 產生 http:// Location
# 原因: FastAPI 不知道自己在 HTTPS 後面redirect 回 http://
# 效果: 有了此中間件307 Location 會是 https://
from uvicorn.middleware.proxy_headers import ProxyHeadersMiddleware
app.add_middleware(ProxyHeadersMiddleware, trusted_hosts="*")
# CORS - Strict Whitelist (Iron Law #2)
@@ -923,45 +838,27 @@ async def request_logging_middleware(request: Request, call_next):
"""
import time
from src.core.context import clear_project_context, get_current_project_context, set_project_context
request_id = request.headers.get("X-Request-ID") or str(uuid4())
project_id, source = _resolve_request_project_context(request)
context_tokens = set_project_context(
project_id=project_id,
source=source,
request_id=request_id,
)
request_id = request.headers.get("X-Request-ID", "-")
start_time = time.perf_counter()
# Bind request context for all logs in this request
structlog.contextvars.clear_contextvars()
current_context = get_current_project_context()
structlog.contextvars.bind_contextvars(
request_id=request_id,
method=request.method,
path=request.url.path,
project_id=current_context["project_id"],
project_context_source=current_context["source"],
)
log = get_logger("awoooi.http")
log.debug("request_start")
try:
response = await call_next(request)
finally:
clear_project_context(context_tokens)
response = await call_next(request)
duration_ms = (time.perf_counter() - start_time) * 1000
log.info(
"request_complete",
status_code=response.status_code,
duration_ms=round(duration_ms, 2),
project_id=current_context["project_id"],
project_context_source=current_context["source"],
has_project_context=bool(current_context["project_id"]),
)
# Add request ID to response headers
@@ -969,41 +866,11 @@ async def request_logging_middleware(request: Request, call_next):
return response
@app.get("/api/v1/security/db-context-guard")
async def db_context_guard() -> dict:
"""
Context Guard Endpoint (P1-1 runtime evidence)
- 未提供 project contextX-Project-ID / X-Tenant-ID / project_id query
時,應回傳 401代表 RLS 已採 fail-closed
- 有提供 context 時回傳 context snapshot便於稽核
"""
from src.core.context import get_current_project_context
from src.db.base import get_db_context
async with get_db_context():
return {
"status": "ok",
"project_context": get_current_project_context(),
"source": "runtime_guard",
}
# =============================================================================
# Exception Handlers
# =============================================================================
@app.exception_handler(HTTPException)
async def http_exception_handler(_request: Request, exc: HTTPException) -> JSONResponse:
"""Preserve intentional HTTP status responses (e.g. 401/403).
This is critical for P1-1 fail-closed evidence; without it, all HTTPException
is swallowed by the generic exception handler and downgraded to 500.
"""
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}, headers=exc.headers)
@app.exception_handler(Exception)
async def global_exception_handler(_request: Request, exc: Exception) -> JSONResponse:
"""
@@ -1036,7 +903,6 @@ async def global_exception_handler(_request: Request, exc: Exception) -> JSONRes
# =============================================================================
# New v1 API routes
app.include_router(iwooos_v1.router, tags=["IwoooS Security"])
app.include_router(health_v1.router, prefix="/api/v1", tags=["Health"])
app.include_router(csrf_v1.router, prefix="/api/v1", tags=["Security"]) # Phase 20
app.include_router(dashboard_v1.router, prefix="/api/v1", tags=["Dashboard"])
@@ -1140,15 +1006,6 @@ app.include_router(platform_v1.router, prefix="/api/v1/platform", tags=["AwoooP
@app.get("/metrics", include_in_schema=False)
async def prometheus_metrics() -> Response:
"""Prometheus metrics endpoint for alerting"""
# 2026-05-19 Codex — T85 Alert Chain DB evidence refresh.
# record_alert_chain_success() 是 process-local gauge部署後第一個 scrape
# 可能尚未收到新 webhook導致 smoke test 誤判 metric 不存在。
# 先用 AwoooP inbound / alert_operation_log 的 durable evidence 回填 last_success。
try:
await get_alert_chain_metrics_service().refresh_last_success_gauge()
except Exception as exc:
logger.warning("prometheus_metrics_alert_chain_evidence_error", error=str(exc))
content = generate_latest().decode("utf-8")
# 2026-05-07 ogt + Claude Sonnet 4.6 — INC-20260507-99ADF2 修復
# 飛輪指標awoooi_flywheel_*)原本只在 /api/v1/stats/flywheel/metrics 暴露,
@@ -1159,13 +1016,6 @@ async def prometheus_metrics() -> Response:
content += flywheel_metrics.to_prometheus_lines()
except Exception:
logger.warning("prometheus_metrics_flywheel_error")
# 2026-05-14 Codex — T18 ADR-100 SLO emitter
# GovernanceAgent 讀 Prometheus recording rules若 /metrics 不吐底層 DB totals
# sli:* rules 會全空並每小時重複發 governance_slo_data_gap。
try:
content += await get_adr100_slo_metrics_service().to_prometheus_lines()
except Exception as exc:
logger.warning("prometheus_metrics_adr100_slo_error", error=str(exc))
return Response(content=content, media_type=CONTENT_TYPE_LATEST)

View File

@@ -167,8 +167,6 @@ class ApprovalRequest(ApprovalRequestBase):
fingerprint: str | None = Field(default=None, description="告警指紋 Hash")
hit_count: int = Field(default=1, description="聚合觸發次數")
last_seen_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc), description="最後觸發時間")
telegram_message_id: int | None = Field(default=None, description="Telegram approval card message ID")
telegram_chat_id: int | None = Field(default=None, description="Telegram chat ID for the approval card")
# 2026-04-14 Claude Sonnet 4.6: incident_id 已移至 Base避免 ApprovalRequestCreate 缺欄位)
@property
@@ -218,10 +216,6 @@ class ApprovalRequestResponse(BaseModel):
hit_count: int = 1
last_seen_at: datetime | None = None
# Phase 6.5: Incident 關聯 (用於簽核後更新 Incident 狀態)
incident_id: str | None = None
matched_playbook_id: str | None = None
telegram_message_id: int | None = None
telegram_chat_id: int | None = None
metadata: dict | None = None
@classmethod
@@ -247,10 +241,6 @@ class ApprovalRequestResponse(BaseModel):
hit_count=approval.hit_count,
last_seen_at=approval.last_seen_at,
# Phase 6.5
incident_id=approval.incident_id,
matched_playbook_id=approval.matched_playbook_id,
telegram_message_id=approval.telegram_message_id,
telegram_chat_id=approval.telegram_chat_id,
metadata=approval.metadata,
)

View File

@@ -87,27 +87,13 @@ class DispatchItem(BaseModel):
governance_event_id: str
event_type: str
dispatch_status: str
executor_type: str | None = None
proposed_action: str = Field(description="≤120 字動作摘要")
playbook_id: str | None = None
playbook_trust: float | None = Field(default=None, ge=0.0, le=1.0)
created_at: datetime
dispatched_at: datetime | None = None
started_at: datetime | None = None
completed_at: datetime | None = None
operator_note: str | None = None
decision_path: str | None = None
workflow_stage: str | None = None
workflow_steps: list[str] = Field(default_factory=list)
next_action: str | None = None
lead_agent: str | None = None
support_agents: list[str] = Field(default_factory=list)
human_owner: str | None = None
kb_draft_entry_id: str | None = None
worker_status: str | None = None
dry_run_plan_fingerprint: str | None = None
archived_count: int | None = None
stale_ratio_snapshot: dict | None = None
class GovernanceQueueResponse(BaseModel):
@@ -121,475 +107,6 @@ class GovernanceQueueResponse(BaseModel):
)
# =============================================================================
# Endpoint 2B: KM review draft dedupe
# =============================================================================
class KnowledgeReviewDraftDedupeGroup(BaseModel):
governance_event_id: str
canonical_entry_id: str
canonical_title: str
canonical_updated_at: datetime | None = None
preferred_source: Literal["dispatch_context", "latest_review_draft"]
duplicate_entry_ids: list[str] = Field(default_factory=list)
duplicate_count: int
total_entries: int
suggested_action: str
owner_action: str
writes_on_read: bool = False
can_archive_without_owner_approval: bool = False
archive_history: list[DispatchItem] = Field(default_factory=list)
class KnowledgeReviewDraftDedupeResponse(BaseModel):
schema_version: str = "km_review_draft_dedupe_v1"
total_review_drafts: int
event_group_total: int
duplicate_draft_total: int
groups: list[KnowledgeReviewDraftDedupeGroup]
generated_at: datetime
class KnowledgeReviewDraftArchiveRequest(BaseModel):
canonical_entry_id: str = Field(min_length=1, max_length=120)
duplicate_entry_ids: list[str] = Field(min_length=1, max_length=100)
owner: str = Field(default="operator_console", min_length=1, max_length=100)
owner_approved: bool = False
dry_run: bool = False
dry_run_plan_fingerprint: str | None = Field(
default=None,
max_length=80,
description="Dry-run response fingerprint that must be echoed before a write.",
)
class KnowledgeReviewDraftStaleRatioSnapshot(BaseModel):
stale_count: int
total_count: int
stale_ratio: float
threshold: float
stale_days: int
class KnowledgeReviewDraftArchiveResponse(BaseModel):
schema_version: str = "km_review_draft_archive_v1"
governance_event_id: str
canonical_entry_id: str
requested_duplicate_entry_ids: list[str]
archived_entry_ids: list[str] = Field(default_factory=list)
skipped_entry_ids: list[str] = Field(default_factory=list)
would_archive_entry_ids: list[str] = Field(default_factory=list)
status: Literal["dry_run", "archived", "noop_already_archived"]
owner: str
owner_approved: bool
dry_run: bool
writes_km: bool
writes_governance_audit: bool
audit_dispatch_id: str | None = None
stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
stale_ratio_recheck_status: Literal[
"dry_run",
"completed",
"already_active",
"not_requested",
] = "not_requested"
stale_ratio_recheck_dispatch_id: str | None = None
dry_run_plan_fingerprint: str | None = None
next_action: str = "stale_ratio_recheck"
generated_at: datetime
# =============================================================================
# Endpoint 2C: KM stale candidates
# =============================================================================
class KnowledgeStaleCandidate(BaseModel):
entry_id: str
project_id: str
title: str
entry_type: str
category: str | None = None
status: str
source: str | None = None
updated_at: datetime | None = None
stale_days: int
view_count: int
priority_score: int
priority_tier: Literal["P0", "P1", "P2"]
recommended_action: Literal[
"refresh_with_evidence",
"owner_review",
"archive_or_supersede",
]
reasons: list[str] = Field(default_factory=list)
correlation_sources: list[str] = Field(default_factory=list)
related_incident_id: str | None = None
related_playbook_id: str | None = None
related_approval_id: str | None = None
tags: list[str] = Field(default_factory=list)
owner_review_dispatch_id: str | None = None
owner_review_status: str | None = None
owner_review_stage: str | None = None
owner_review_next_action: str | None = None
class KnowledgeStaleCandidatesResponse(BaseModel):
schema_version: str = "km_stale_candidates_v1"
project_id: str
total_stale: int
returned: int
threshold_days: int
writes_on_read: bool = False
manual_review_required: bool = True
items: list[KnowledgeStaleCandidate]
generated_at: datetime
class KnowledgeStaleOwnerReviewRequest(BaseModel):
owner: str = Field(default="operator_console", min_length=1, max_length=100)
owner_note: str | None = Field(default=None, max_length=240)
dry_run: bool = False
class KnowledgeStaleOwnerReviewResponse(BaseModel):
schema_version: str = "km_stale_owner_review_v1"
entry_id: str
project_id: str
status: Literal["dry_run", "queued", "already_queued"]
governance_event_id: str | None = None
dispatch_id: str | None = None
workflow_stage: str
recommended_action: Literal[
"refresh_with_evidence",
"owner_review",
"archive_or_supersede",
]
owner: str
owner_note: str | None = None
writes_km: bool = False
writes_governance_audit: bool
next_action: str = "owner_review_stale_km_candidate"
generated_at: datetime
class KnowledgeStaleOwnerReviewBatchQueueRequest(BaseModel):
project_id: str = Field(default="awoooi", min_length=1, max_length=64)
priority_tiers: list[Literal["P0", "P1", "P2"]] = Field(
default_factory=lambda: ["P0", "P1"],
min_length=1,
max_length=3,
)
limit: int = Field(default=10, ge=1, le=50)
owner: str = Field(default="operator_console", min_length=1, max_length=100)
owner_note: str | None = Field(default=None, max_length=240)
dry_run: bool = False
dry_run_plan_fingerprint: str | None = Field(
default=None,
max_length=80,
description="Dry-run response fingerprint that must be echoed before queueing a batch.",
)
class KnowledgeStaleOwnerReviewBatchItem(BaseModel):
entry_id: str
title: str
priority_tier: Literal["P0", "P1", "P2"]
recommended_action: Literal[
"refresh_with_evidence",
"owner_review",
"archive_or_supersede",
]
status: Literal["would_queue", "queued", "already_queued", "skipped"]
reason: str | None = None
governance_event_id: str | None = None
dispatch_id: str | None = None
workflow_stage: str
class KnowledgeStaleOwnerReviewBatchQueueResponse(BaseModel):
schema_version: str = "km_stale_owner_review_batch_v1"
project_id: str
status: Literal["dry_run", "queued", "noop_already_queued"]
owner: str
owner_note: str | None = None
dry_run: bool
priority_tiers: list[str]
requested_limit: int
candidate_count: int
queued_count: int
already_queued_count: int
skipped_count: int
batch_governance_event_id: str | None = None
batch_dispatch_id: str | None = None
workflow_stage: str
writes_km: bool = False
writes_governance_audit: bool
stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
dry_run_plan_fingerprint: str | None = None
items: list[KnowledgeStaleOwnerReviewBatchItem] = Field(default_factory=list)
next_action: str = "owner_review_stale_km_batch"
generated_at: datetime
class KnowledgeStaleOwnerReviewInboxItem(BaseModel):
dispatch_id: str
governance_event_id: str
entry_id: str
project_id: str
title: str
dispatch_status: str
workflow_stage: str
next_action: str | None = None
owner: str | None = None
owner_note: str | None = None
batch_governance_event_id: str | None = None
batch_dispatch_id: str | None = None
priority_tier: Literal["P0", "P1", "P2"]
priority_score: int
recommended_action: Literal[
"refresh_with_evidence",
"owner_review",
"archive_or_supersede",
]
stale_days: int
view_count: int
correlation_sources: list[str] = Field(default_factory=list)
reasons: list[str] = Field(default_factory=list)
related_incident_id: str | None = None
related_playbook_id: str | None = None
related_approval_id: str | None = None
dry_run_plan_fingerprint: str | None = None
queued_at: datetime | None = None
started_at: datetime | None = None
completed_at: datetime | None = None
class KnowledgeStaleOwnerReviewInboxResponse(BaseModel):
schema_version: str = "km_stale_owner_review_inbox_v1"
project_id: str
dispatch_status: str
total: int
returned: int
writes_on_read: bool = False
manual_review_required: bool = True
items: list[KnowledgeStaleOwnerReviewInboxItem] = Field(default_factory=list)
generated_at: datetime
class KnowledgeStaleOwnerReviewBurnDownItem(BaseModel):
completion_dispatch_id: str
governance_event_id: str
source_dispatch_id: str | None = None
recheck_dispatch_id: str | None = None
entry_id: str | None = None
project_id: str
dispatch_status: str
workflow_stage: str
review_outcome: Literal[
"refresh_with_evidence",
"archive",
"supersede",
] | None = None
owner: str | None = None
completed_at: datetime | None = None
stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
stale_count_delta: int | None = None
stale_ratio_delta: float | None = None
above_threshold: bool | None = None
class KnowledgeStaleOwnerReviewBurnDownResponse(BaseModel):
schema_version: str = "km_stale_owner_review_burndown_v1"
project_id: str
burn_down_status: Literal["above_threshold", "at_or_below_threshold", "no_data"]
current_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
entries_to_threshold: int
pending_owner_reviews: int
completed_owner_reviews: int
completion_audit_total: int
stale_ratio_recheck_total: int
latest_stale_count_delta: int | None = None
latest_stale_ratio_delta: float | None = None
writes_on_read: bool = False
manual_review_required: bool = True
returned: int
items: list[KnowledgeStaleOwnerReviewBurnDownItem] = Field(default_factory=list)
generated_at: datetime
class KnowledgeStaleOwnerReviewCompletionQueueItem(BaseModel):
dispatch_id: str
governance_event_id: str
entry_id: str
project_id: str
title: str
dispatch_status: str
workflow_stage: str
readiness: Literal["ready", "blocked", "completed", "failed"]
recommended_completion_outcome: Literal[
"refresh_with_evidence",
"archive",
"supersede",
]
next_action: str
blockers: list[str] = Field(default_factory=list)
required_owner_fields: list[str] = Field(default_factory=list)
can_preview: bool
can_confirm_after_preview: bool
writes_km_on_confirm: bool
owner: str | None = None
owner_note: str | None = None
batch_governance_event_id: str | None = None
batch_dispatch_id: str | None = None
priority_tier: Literal["P0", "P1", "P2"]
priority_score: int
recommended_action: Literal[
"refresh_with_evidence",
"owner_review",
"archive_or_supersede",
]
stale_days: int
view_count: int
correlation_sources: list[str] = Field(default_factory=list)
reasons: list[str] = Field(default_factory=list)
related_incident_id: str | None = None
related_playbook_id: str | None = None
related_approval_id: str | None = None
dry_run_plan_fingerprint: str | None = None
queued_at: datetime | None = None
started_at: datetime | None = None
completed_at: datetime | None = None
class KnowledgeStaleOwnerReviewCompletionQueueResponse(BaseModel):
schema_version: str = "km_stale_owner_review_completion_queue_v1"
project_id: str
status_bucket: Literal["all", "ready", "blocked", "completed", "failed", "pending"]
priority_tiers: list[str] = Field(default_factory=list)
recommended_completion_outcome: Literal[
"all",
"refresh_with_evidence",
"archive",
"supersede",
] = "all"
batch_governance_event_id: str | None = None
can_preview: bool | None = None
total: int
returned: int
pending_count: int
ready_count: int
blocked_count: int
completed_count: int
failed_count: int
writes_on_read: bool = False
manual_review_required: bool = True
batch_writes_allowed: bool = False
items: list[KnowledgeStaleOwnerReviewCompletionQueueItem] = Field(default_factory=list)
generated_at: datetime
class KnowledgeStaleOwnerReviewCompletionBatchPreviewRequest(BaseModel):
project_id: str = Field(default="awoooi", min_length=1, max_length=64)
status_bucket: Literal["all", "ready", "blocked", "completed", "failed", "pending"] = "ready"
priority_tiers: list[Literal["P0", "P1", "P2"]] = Field(
default_factory=lambda: ["P0", "P1", "P2"],
min_length=1,
max_length=3,
)
recommended_completion_outcome: Literal[
"all",
"refresh_with_evidence",
"archive",
"supersede",
] = "all"
batch_governance_event_id: str | None = Field(default=None, max_length=120)
limit: int = Field(default=10, ge=1, le=30)
owner: str = Field(default="operator_console", min_length=1, max_length=100)
owner_note: str | None = Field(default=None, max_length=240)
class KnowledgeStaleOwnerReviewCompletionBatchPreviewResponse(BaseModel):
schema_version: str = "km_stale_owner_review_completion_batch_preview_v1"
project_id: str
status: Literal["dry_run"] = "dry_run"
owner: str
owner_note: str | None = None
status_bucket: Literal["all", "ready", "blocked", "completed", "failed", "pending"]
priority_tiers: list[str]
recommended_completion_outcome: Literal[
"all",
"refresh_with_evidence",
"archive",
"supersede",
]
batch_governance_event_id: str | None = None
requested_limit: int
candidate_count: int
previewable_count: int
blocked_count: int
completed_count: int
failed_count: int
writes_km: bool = False
writes_governance_audit: bool = False
batch_writes_allowed: bool = False
manual_review_required: bool = True
dry_run_plan_fingerprint: str
next_action: str = "preview_each_ready_item_then_confirm_single_item"
items: list[KnowledgeStaleOwnerReviewCompletionQueueItem] = Field(default_factory=list)
generated_at: datetime
class KnowledgeStaleOwnerReviewCompleteRequest(BaseModel):
dispatch_id: str | None = Field(
default=None,
max_length=120,
description="Owner-review dispatch id. Optional when the backend can resolve the active item by entry id.",
)
owner: str = Field(default="operator_console", min_length=1, max_length=100)
owner_approved: bool = False
dry_run: bool = False
review_outcome: Literal[
"refresh_with_evidence",
"archive",
"supersede",
]
owner_note: str | None = Field(default=None, max_length=500)
updated_title: str | None = Field(default=None, min_length=1, max_length=255)
updated_content: str | None = Field(default=None, min_length=1)
superseded_by_entry_id: str | None = Field(default=None, max_length=120)
dry_run_plan_fingerprint: str | None = Field(
default=None,
max_length=80,
description="Dry-run response fingerprint that must be echoed before a write.",
)
class KnowledgeStaleOwnerReviewCompleteResponse(BaseModel):
schema_version: str = "km_stale_owner_review_complete_v1"
entry_id: str
project_id: str
status: Literal["dry_run", "completed", "already_completed"]
review_outcome: Literal[
"refresh_with_evidence",
"archive",
"supersede",
]
governance_event_id: str
dispatch_id: str
audit_dispatch_id: str | None = None
stale_ratio_recheck_dispatch_id: str | None = None
workflow_stage: str
owner: str
owner_approved: bool
dry_run: bool
writes_km: bool
writes_governance_audit: bool
stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
dry_run_plan_fingerprint: str | None = None
next_action: str = "stale_ratio_recheck"
generated_at: datetime
# =============================================================================
# Endpoint 3: summary
# =============================================================================

View File

@@ -39,15 +39,14 @@ import hashlib
import json
import time
from dataclasses import dataclass, field
from datetime import UTC, datetime
from datetime import datetime, timezone
from typing import Any
from uuid import UUID
import structlog
from sqlalchemy import select
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession
from src.core.redis_client import get_redis
from src.db.awooop_models import (
AwoooPActiveRevision,
AwoooPMcpGatewayAudit,
@@ -278,7 +277,7 @@ class McpGateway:
self, ctx: GatewayContext, gate_result: GateCheckResult
) -> tuple[AwoooPMcpToolRegistry, AwoooPMcpGrant]:
"""Gate 3tool 在白名單 + grant 有效(未到期、未撤銷)"""
now = datetime.now(UTC)
now = datetime.now(timezone.utc)
# 查 tool registry
tool_result = await self._db.execute(
@@ -360,9 +359,14 @@ class McpGateway:
raise GateApprovalError("write/admin 操作需要 run_idapproval 追蹤用)")
try:
redis = get_redis()
import aioredis
from src.core.config import settings
redis = aioredis.from_url(settings.REDIS_URL)
approval_key = f"mcp_approval:{ctx.project_id}:{ctx.agent_id}:{ctx.tool_name}:{ctx.run_id}"
approved = await redis.get(approval_key)
await redis.aclose()
except Exception as exc:
logger.warning(
"mcp_gate5_redis_error",
@@ -388,7 +392,10 @@ class McpGateway:
parameters: dict[str, Any],
) -> MCPToolResult:
"""呼叫底層 MCP provider 執行工具"""
provider = await self._resolve_provider(ctx, tool_row)
registry = get_provider_registry()
provider = registry.get(ctx.tool_name) or registry.get(
tool_row.tool_name if tool_row else ctx.tool_name
)
# 找不到 provider → 回傳 shadow no-op
if provider is None:
@@ -404,57 +411,14 @@ class McpGateway:
)
audit_params = dict(parameters)
existing_audit = (
parameters.get("_mcp_audit")
if isinstance(parameters, dict) and isinstance(parameters.get("_mcp_audit"), dict)
else {}
)
audit_params["_mcp_audit"] = {
"project_id": ctx.project_id,
"agent_id": ctx.agent_id,
"run_id": str(ctx.run_id) if ctx.run_id else None,
"trace_id": ctx.trace_id,
"incident_id": existing_audit.get("incident_id") or ctx.trace_id,
"session_id": existing_audit.get("session_id"),
"flywheel_node": existing_audit.get("flywheel_node"),
"agent_role": existing_audit.get("agent_role") or ctx.agent_id,
"gateway_path": "awooop_mcp_gateway",
}
return await provider.execute(ctx.tool_name, audit_params)
async def _resolve_provider(
self,
ctx: GatewayContext,
tool_row: AwoooPMcpToolRegistry | None,
):
"""Find the provider that owns ctx.tool_name.
ProviderRegistry is keyed by provider name (`kubernetes`, `ssh_host`, ...),
while GatewayContext intentionally uses the governed tool name
(`kubectl_get`, `ssh_diagnose`, ...). Scan provider tool manifests as the
compatibility bridge until registry exposes a first-class tool index.
"""
registry = get_provider_registry()
direct = registry.get(ctx.tool_name)
if direct is not None:
return direct
lookup_name = tool_row.tool_name if tool_row else ctx.tool_name
for provider in registry.all():
try:
tools = await provider.list_tools()
except Exception as exc:
logger.debug(
"mcp_gateway_provider_manifest_skipped",
provider=getattr(provider, "name", None),
tool_name=lookup_name,
error=str(exc),
)
continue
if any(tool.name == lookup_name for tool in tools):
return provider
return None
# ── Audit log ─────────────────────────────────────────────────────────────
async def _write_audit(
@@ -482,15 +446,6 @@ class McpGateway:
json.dumps(result.output, sort_keys=True, default=str).encode()
).hexdigest()
gate_payload = {
**gate_result.as_dict(),
"schema_version": "awooop_mcp_gateway_audit_v1",
"gateway_path": "awooop_mcp_gateway",
"policy_enforced": True,
"is_shadow": ctx.is_shadow,
"required_scope": ctx.required_scope,
}
audit = AwoooPMcpGatewayAudit(
project_id=ctx.project_id,
run_id=ctx.run_id,
@@ -500,15 +455,16 @@ class McpGateway:
tool_name=ctx.tool_name,
input_hash=input_hash,
output_hash=output_hash,
gate_result=gate_payload,
gate_result=gate_result.as_dict(),
result_status=result_status,
block_gate=block_gate,
block_reason=block_reason,
latency_ms=latency_ms,
)
self._db.add(audit)
await self._db.flush()
if tool_row is not None:
self._db.add(audit)
await self._db.flush()
except Exception as exc:
logger.warning(
"mcp_gateway_audit_write_failed",

View File

@@ -14,7 +14,6 @@ from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
from uuid import uuid4
from src.utils.timezone import now_taipei
@@ -30,9 +29,7 @@ class MCPTool:
name: str
description: str
input_schema: dict[str, Any]
# 2026-05-06 Codex: 部分舊 provider 的 list_tools() 尚未傳 server_name
# 先給 DTO 預設值registry 會以 provider.name 補正,避免啟動登記直接 crash。
server_name: str = ""
server_name: str
@dataclass
@@ -44,21 +41,12 @@ class MCPToolResult:
"""
success: bool
execution_id: str = ""
execution_id: str
output: Any | None = None
# 2026-05-06 Codex: 舊 provider 曾使用 data=... 作為成功輸出欄位。
# 保留 alias避免 provider 成功路徑因 dataclass 參數不相容而 crash。
data: Any | None = None
error: str | None = None
duration: float = 0.0
timestamp: datetime = field(default_factory=now_taipei)
def __post_init__(self) -> None:
if not self.execution_id:
self.execution_id = f"mcp-{uuid4()}"
if self.output is None and self.data is not None:
self.output = self.data
def to_dict(self) -> dict:
return {
"success": self.success,

View File

@@ -24,7 +24,6 @@ from typing import Any
import httpx
from src.core.config import settings # P0-13: K8s namespace 由 settings.AWOOOI_K8S_NAMESPACE 提供
from src.services.mcp_audit_context import with_mcp_audit_context
from src.utils.timezone import now_taipei
logger = logging.getLogger(__name__)
@@ -519,13 +518,6 @@ class MCPBridge:
raise ValueError(f"Unknown MCP Server: {server_name}")
server = self._servers[server_name]
parameters = with_mcp_audit_context(
parameters,
session_id=f"mcp_bridge:{execution_id}",
flywheel_node="govern",
agent_role="mcp_bridge",
gateway_path="legacy_mcp_bridge",
)
result = await self._execute_tool(server, tool_name, parameters)
# ========================================

Some files were not shown because too many files have changed in this diff Show More