fix(alerts): 收斂 Telegram 告警到 SRE 戰情室
This commit is contained in:
@@ -14,7 +14,7 @@ on:
|
||||
|
||||
env:
|
||||
GITEA_ACTIONS_URL: http://192.168.0.110:3001/wooo/awoooi/actions
|
||||
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
|
||||
SRE_GROUP_CHAT_ID: "-1003711974679"
|
||||
|
||||
jobs:
|
||||
market-watch:
|
||||
@@ -499,7 +499,7 @@ jobs:
|
||||
- name: Summarize actionable change or failure
|
||||
if: always()
|
||||
env:
|
||||
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
TG_CHAT_ID: ${{ env.SRE_GROUP_CHAT_ID }}
|
||||
JOB_STATUS: ${{ job.status }}
|
||||
CANDIDATE_COUNT: ${{ steps.watch.outputs.candidate_count }}
|
||||
SOURCE_COUNT: ${{ steps.watch.outputs.source_count }}
|
||||
|
||||
@@ -19,7 +19,7 @@ concurrency:
|
||||
env:
|
||||
HARBOR: 192.168.0.110:5000
|
||||
HARBOR_MIRROR: 192.168.0.110:5001
|
||||
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
|
||||
SRE_GROUP_CHAT_ID: "-1003711974679"
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://192.168.0.188:24318
|
||||
OTEL_SERVICE_NAME: awoooi-cd-dev
|
||||
OTEL_RESOURCE_ATTRIBUTES: service.version=${{ github.sha }},deployment.environment=dev
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
echo "Dev deploy start notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
fi
|
||||
@@ -130,9 +130,9 @@ jobs:
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_BOT_TOKEN
|
||||
)"
|
||||
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_CHAT_ID'
|
||||
${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
AWOOOI_SECRET_TG_CHAT_ID
|
||||
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_SRE_GROUP_CHAT_ID_COMPAT'
|
||||
${{ secrets.SRE_GROUP_CHAT_ID }}
|
||||
AWOOOI_SECRET_SRE_GROUP_CHAT_ID_COMPAT
|
||||
)"
|
||||
NVIDIA_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_NVIDIA_API_KEY'
|
||||
${{ secrets.NVIDIA_API_KEY }}
|
||||
@@ -235,7 +235,7 @@ jobs:
|
||||
echo "Dev deploy success notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
fi
|
||||
@@ -256,7 +256,7 @@ jobs:
|
||||
echo "Dev deploy failure notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
fi
|
||||
|
||||
@@ -39,7 +39,7 @@ concurrency:
|
||||
|
||||
env:
|
||||
HARBOR: 192.168.0.110:5000
|
||||
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
|
||||
SRE_GROUP_CHAT_ID: "-1003711974679"
|
||||
# Harbor Proxy Cache (指向 DockerHub 的內部 Mirror,避免拉取限額)
|
||||
HARBOR_MIRROR: 192.168.0.110:5001
|
||||
# OTEL CI/CD 監控 (2026-03-31 #46c - 遷移到 Gitea)
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
echo "✅ CI/CD start notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
fi
|
||||
@@ -303,7 +303,7 @@ jobs:
|
||||
echo "✅ CI/CD tests failure notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
fi
|
||||
@@ -509,9 +509,9 @@ jobs:
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_BOT_TOKEN
|
||||
)"
|
||||
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_CHAT_ID'
|
||||
${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
AWOOOI_SECRET_TG_CHAT_ID
|
||||
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_SRE_GROUP_CHAT_ID_COMPAT'
|
||||
${{ secrets.SRE_GROUP_CHAT_ID }}
|
||||
AWOOOI_SECRET_SRE_GROUP_CHAT_ID_COMPAT
|
||||
)"
|
||||
NVIDIA_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_NVIDIA_API_KEY'
|
||||
${{ secrets.NVIDIA_API_KEY }}
|
||||
@@ -616,6 +616,8 @@ jobs:
|
||||
KUBECTL="sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=\${K8S_API_SERVER}"
|
||||
|
||||
# 注入 Telegram Secrets (ADR-035 鐵律)
|
||||
# 2026-06-12 Codex: OPENCLAW_TG_CHAT_ID 僅作舊欄位相容,
|
||||
# 實際值必須與 SRE_GROUP_CHAT_ID 一致,避免正式告警旁路到其他群組。
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"${TG_BOT_TOKEN_B64}"},
|
||||
{"op":"add","path":"/data/OPENCLAW_TG_CHAT_ID","value":"${TG_CHAT_ID_B64}"}
|
||||
@@ -1182,7 +1184,7 @@ jobs:
|
||||
echo "✅ CI/CD build failure notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
fi
|
||||
@@ -1527,7 +1529,7 @@ jobs:
|
||||
echo "✅ CI/CD success notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$TG_MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
--data-urlencode "text@-" || echo "TG notify warning (non-fatal)"
|
||||
fi
|
||||
|
||||
@@ -1550,7 +1552,7 @@ jobs:
|
||||
echo "✅ CI/CD post-deploy failure notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
fi
|
||||
|
||||
@@ -19,7 +19,7 @@ concurrency:
|
||||
env:
|
||||
REPORT_URL: https://mo.wooo.work/code-review/
|
||||
GITEA_ACTIONS_URL: http://192.168.0.110:3001/wooo/awoooi/actions
|
||||
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
|
||||
SRE_GROUP_CHAT_ID: "-1003711974679"
|
||||
|
||||
jobs:
|
||||
ai-code-review:
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
- name: Notify Code Review Start
|
||||
if: steps.stale.outputs.skip != 'true'
|
||||
env:
|
||||
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
SRE_GROUP_CHAT_ID: ${{ env.SRE_GROUP_CHAT_ID }}
|
||||
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
|
||||
BRANCH: ${{ steps.ctx.outputs.branch }}
|
||||
COMMIT_MSG: ${{ steps.ctx.outputs.commit_msg }}
|
||||
@@ -130,13 +130,13 @@ jobs:
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Code review start notification mirrored through AWOOI API"
|
||||
else
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${SRE_GROUP_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing and AWOOI API notify failed; skip start notification"
|
||||
exit 0
|
||||
fi
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
-d "$(jq -n --arg c "$SRE_GROUP_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
fi
|
||||
|
||||
@@ -156,7 +156,7 @@ jobs:
|
||||
- name: Notify Code Review Completion
|
||||
if: always() && steps.stale.outputs.skip != 'true'
|
||||
env:
|
||||
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
SRE_GROUP_CHAT_ID: ${{ env.SRE_GROUP_CHAT_ID }}
|
||||
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
@@ -209,12 +209,12 @@ jobs:
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Code review completion notification mirrored through AWOOI API"
|
||||
else
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${SRE_GROUP_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing and AWOOI API notify failed; skip completion notification"
|
||||
exit 0
|
||||
fi
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
-d "$(jq -n --arg c "$SRE_GROUP_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
fi
|
||||
|
||||
@@ -17,7 +17,7 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
|
||||
SRE_GROUP_CHAT_ID: "-1003711974679"
|
||||
|
||||
jobs:
|
||||
deploy-alerts:
|
||||
@@ -67,6 +67,6 @@ jobs:
|
||||
echo "Alert rule deploy notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "chat_id=${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
--data-urlencode "text=${MSG}" || true
|
||||
fi
|
||||
|
||||
@@ -19,7 +19,7 @@ env:
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://192.168.0.188:24318
|
||||
OTEL_SERVICE_NAME: awoooi-e2e
|
||||
OTEL_RESOURCE_ATTRIBUTES: deployment.environment=production
|
||||
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
|
||||
SRE_GROUP_CHAT_ID: "-1003711974679"
|
||||
|
||||
jobs:
|
||||
e2e-health:
|
||||
@@ -95,8 +95,8 @@ jobs:
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "E2E failure notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||||
-d chat_id="${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
curl -s -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d chat_id="${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
-d parse_mode="HTML" \
|
||||
-d text="🔴 <b>[E2E Health Check]</b> 失敗%0A%0A📅 $(TZ=Asia/Taipei date '+%Y-%m-%d %H:%M')%0A🔗 API 健康檢查未通過%0A%0A請檢查 K3s 叢集狀態"
|
||||
fi
|
||||
|
||||
@@ -20,7 +20,7 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
|
||||
SRE_GROUP_CHAT_ID: "-1003711974679"
|
||||
|
||||
jobs:
|
||||
migrate:
|
||||
@@ -188,8 +188,6 @@ jobs:
|
||||
|
||||
- name: Notify Telegram (if configured)
|
||||
if: always()
|
||||
env:
|
||||
TG_CHAT: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
run: |
|
||||
TG_TOKEN="$(cat <<'AWOOOI_SECRET_TG_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
@@ -207,10 +205,10 @@ jobs:
|
||||
echo "Migration notification mirrored through AWOOI API"
|
||||
exit 0
|
||||
fi
|
||||
if [ -n "$TG_TOKEN" ] && [ -n "$TG_CHAT" ]; then
|
||||
if [ -n "$TG_TOKEN" ] && [ -n "${{ env.SRE_GROUP_CHAT_ID }}" ]; then
|
||||
MSG="🗄️ Migration CI: \`${STATUS}\` — commit ${{ github.sha }}"
|
||||
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
|
||||
-d chat_id="${TG_CHAT}" \
|
||||
-d chat_id="${{ env.SRE_GROUP_CHAT_ID }}" \
|
||||
-d parse_mode="Markdown" \
|
||||
-d text="${MSG}" || true
|
||||
fi
|
||||
|
||||
@@ -454,7 +454,7 @@ async def telegram_health() -> dict:
|
||||
"mode": "long_polling", # Phase 5.5: 已從 webhook 切換至 long_polling
|
||||
"polling_active": gateway._polling_active,
|
||||
"bot_token_set": bool(settings.OPENCLAW_TG_BOT_TOKEN),
|
||||
"chat_id_set": bool(settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID),
|
||||
"chat_id_set": bool(settings.SRE_GROUP_CHAT_ID),
|
||||
"sre_group_chat_id_set": bool(settings.SRE_GROUP_CHAT_ID),
|
||||
"whitelist_count": len(settings.OPENCLAW_TG_USER_WHITELIST),
|
||||
"last_update_id": gateway._last_update_id,
|
||||
|
||||
@@ -326,7 +326,7 @@ async def _send_telegram_forecast(
|
||||
from src.services.ai_advisory_helpers import build_ai_advisory_keyboard, is_snoozed
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID
|
||||
if not target_chat_id:
|
||||
return False
|
||||
|
||||
|
||||
@@ -474,7 +474,7 @@ async def _send_telegram_posture(
|
||||
from src.services.ai_advisory_helpers import build_ai_advisory_keyboard, is_snoozed
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID
|
||||
if not target_chat_id:
|
||||
return
|
||||
|
||||
|
||||
@@ -299,7 +299,7 @@ async def _send_telegram_gaps(
|
||||
from src.services.ai_advisory_helpers import build_ai_advisory_keyboard, is_snoozed
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID
|
||||
if not target_chat_id:
|
||||
return
|
||||
|
||||
|
||||
@@ -316,7 +316,7 @@ async def _send_telegram_summary(
|
||||
from src.services.ai_advisory_helpers import build_ai_advisory_keyboard, is_snoozed
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID
|
||||
if not target_chat_id:
|
||||
logger.info("hermes_telegram_skip_no_chat_id")
|
||||
return False
|
||||
|
||||
@@ -276,7 +276,7 @@ class AIRateLimiter:
|
||||
from src.core.config import settings
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID
|
||||
if not settings.OPENCLAW_TG_BOT_TOKEN or not target_chat_id:
|
||||
logger.warning("telegram_not_configured_for_cost_alert")
|
||||
return
|
||||
@@ -328,7 +328,7 @@ class AIRateLimiter:
|
||||
from src.core.config import settings
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID
|
||||
if not settings.OPENCLAW_TG_BOT_TOKEN or not target_chat_id:
|
||||
return
|
||||
|
||||
|
||||
@@ -1119,7 +1119,7 @@ class ApprovalExecutionService:
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
settings = get_settings()
|
||||
gateway = get_telegram_gateway()
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID
|
||||
if not target_chat_id:
|
||||
logger.warning(
|
||||
"push_execution_result_no_target_chat",
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import hashlib
|
||||
import html
|
||||
|
||||
from src.core.config import settings
|
||||
from src.core.logging import get_logger
|
||||
from src.core.redis_client import get_redis
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
@@ -179,21 +178,6 @@ async def notify_converged_alert_recurrence(
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
private_chat_id = settings.OPENCLAW_TG_CHAT_ID
|
||||
if private_chat_id and private_chat_id != gateway.alert_chat_id:
|
||||
try:
|
||||
await gateway.send_notification(text, chat_id=private_chat_id)
|
||||
sent_count += 1
|
||||
except Exception as exc:
|
||||
failures.append(f"private:{type(exc).__name__}")
|
||||
logger.warning(
|
||||
"converged_alert_recurrence_private_mirror_failed",
|
||||
source=source,
|
||||
approval_id=approval_id,
|
||||
recurrence_stage=recurrence_stage,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
if sent_count:
|
||||
logger.info(
|
||||
"converged_alert_recurrence_sent",
|
||||
@@ -201,7 +185,7 @@ async def notify_converged_alert_recurrence(
|
||||
hit_count=hit_count,
|
||||
approval_id=approval_id,
|
||||
recurrence_stage=recurrence_stage,
|
||||
mirrored_to_private=bool(private_chat_id and private_chat_id != gateway.alert_chat_id),
|
||||
mirrored_to_private=False,
|
||||
sent_count=sent_count,
|
||||
)
|
||||
else:
|
||||
|
||||
@@ -252,7 +252,7 @@ class FailoverAlerter:
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
settings = get_settings()
|
||||
chat_id = getattr(settings, "SRE_GROUP_CHAT_ID", None) or getattr(settings, "OPENCLAW_TG_CHAT_ID", None)
|
||||
chat_id = getattr(settings, "SRE_GROUP_CHAT_ID", None)
|
||||
if not chat_id:
|
||||
logger.warning("telegram_chat_id_missing_failover_alert")
|
||||
return
|
||||
|
||||
@@ -4,8 +4,8 @@ Notification routing matrix — ADR-093
|
||||
單一矩陣決定每種通知類型的發送目標,取代 telegram_gateway.py 內 24 處硬碼 chat_id。
|
||||
|
||||
設計原則:
|
||||
- 正式告警目的地一律 SRE_GROUP_CHAT_ID 優先
|
||||
- OPENCLAW_TG_CHAT_ID 只在 SRE_GROUP_CHAT_ID 缺失時作 fail-soft fallback
|
||||
- 正式告警目的地一律為 SRE_GROUP_CHAT_ID
|
||||
- SRE_GROUP_CHAT_ID 缺失時必須顯示配置缺口,不得旁路到個人或其他群組
|
||||
- 未知通知類型預設發群組
|
||||
|
||||
2026-04-25 ogt + Claude Sonnet 4.6
|
||||
@@ -16,7 +16,7 @@ from enum import Enum
|
||||
|
||||
|
||||
class Destination(str, Enum):
|
||||
DM = "dm" # OPENCLAW_TG_CHAT_ID (僅缺群組設定時 fallback)
|
||||
DM = "dm" # legacy alias: 2026-06-12 起不再旁路至 DM
|
||||
GROUP = "group" # SRE_GROUP_CHAT_ID
|
||||
BOTH = "both" # legacy alias: 2026-04-30 起視為 group-first
|
||||
|
||||
@@ -28,7 +28,7 @@ class RoutingRule:
|
||||
|
||||
|
||||
# ADR-093 D1-D4 路由矩陣
|
||||
# 2026-04-30 Codex: 所有告警類型群組優先,DM 只作缺群組設定 fallback。
|
||||
# 2026-06-12 Codex: 所有正式告警只送 AwoooI SRE 戰情室;缺群組設定時回空清單。
|
||||
NOTIFICATION_ROUTING: dict[str, RoutingRule] = {
|
||||
"TYPE-1": RoutingRule(Destination.GROUP),
|
||||
"TYPE-2": RoutingRule(Destination.GROUP),
|
||||
@@ -60,7 +60,5 @@ def resolve_chat_ids(
|
||||
回傳此通知應發送的 chat_id 清單。
|
||||
tg_group_cutover 僅保留為舊 caller 相容參數;正式策略永遠群組優先。
|
||||
"""
|
||||
rule = get_routing_rule(notification_type)
|
||||
if rule.destination == Destination.DM and not group_chat_id:
|
||||
return [dm_chat_id] if dm_chat_id else []
|
||||
return [group_chat_id or dm_chat_id] if (group_chat_id or dm_chat_id) else []
|
||||
_ = get_routing_rule(notification_type)
|
||||
return [group_chat_id] if group_chat_id else []
|
||||
|
||||
@@ -29,10 +29,8 @@ class TelegramWebhookProvider(NotificationProvider):
|
||||
|
||||
@property
|
||||
def enabled(self) -> bool:
|
||||
"""檢查 Telegram bot token 與 chat ID 是否配置"""
|
||||
return bool(settings.OPENCLAW_TG_BOT_TOKEN) and bool(
|
||||
settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
)
|
||||
"""檢查 Telegram bot token 與 AwoooI SRE 戰情室是否配置。"""
|
||||
return bool(settings.OPENCLAW_TG_BOT_TOKEN) and bool(settings.SRE_GROUP_CHAT_ID)
|
||||
|
||||
def _format(self, msg: NotificationMessage) -> str:
|
||||
"""格式化執行結果為 Telegram 訊息"""
|
||||
|
||||
@@ -429,7 +429,7 @@ async def _send_rollback_proposal_alert(
|
||||
f"<i>此為提案,不會自動執行 Rollback</i>"
|
||||
)
|
||||
|
||||
target_chat_id = _settings.SRE_GROUP_CHAT_ID or _settings.OPENCLAW_TG_CHAT_ID
|
||||
target_chat_id = _settings.SRE_GROUP_CHAT_ID
|
||||
await gateway._send_request(
|
||||
"sendMessage",
|
||||
{
|
||||
|
||||
@@ -3375,8 +3375,8 @@ class TelegramGateway:
|
||||
logger.warning("telegram_gateway_disabled", reason="Bot token not configured")
|
||||
return False
|
||||
|
||||
if not settings.OPENCLAW_TG_CHAT_ID and not settings.SRE_GROUP_CHAT_ID:
|
||||
logger.warning("telegram_gateway_disabled", reason="No Telegram chat ID configured")
|
||||
if not settings.SRE_GROUP_CHAT_ID:
|
||||
logger.warning("telegram_gateway_disabled", reason="SRE_GROUP_CHAT_ID not configured")
|
||||
return False
|
||||
|
||||
# 2026-04-03 ogt: timeout 改用 httpx.Timeout 分開設定
|
||||
@@ -3400,13 +3400,13 @@ class TelegramGateway:
|
||||
|
||||
@property
|
||||
def chat_id(self) -> str:
|
||||
"""取得 Chat ID"""
|
||||
return settings.OPENCLAW_TG_CHAT_ID
|
||||
"""取得正式產品告警 Chat ID。"""
|
||||
return settings.SRE_GROUP_CHAT_ID
|
||||
|
||||
@property
|
||||
def alert_chat_id(self) -> str:
|
||||
"""告警訊息收件人:SRE 群組優先,缺設定時才回退個人頻道。"""
|
||||
return settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
"""告警訊息收件人:正式產品告警只送 AwoooI SRE 戰情室。"""
|
||||
return settings.SRE_GROUP_CHAT_ID
|
||||
|
||||
def _summarize_callback_data_for_audit(self, callback_data: str) -> dict[str, str | None]:
|
||||
"""Return a redaction-safe summary of callback_data without persisting nonce."""
|
||||
@@ -9338,16 +9338,14 @@ class TelegramGateway:
|
||||
|
||||
text = report_to_telegram_html(report)
|
||||
|
||||
# 只發到 SRE 戰情室群組
|
||||
if settings.SRE_GROUP_CHAT_ID:
|
||||
await self.send_to_group(text=text)
|
||||
else:
|
||||
# SRE_GROUP_CHAT_ID 未注入時,fallback 到個人頻道並加警告
|
||||
fallback = (
|
||||
"⚠️ <b>SRE_GROUP_CHAT_ID 未設定</b>,心跳報告暫發到個人頻道\n\n"
|
||||
+ text
|
||||
# 只發到 AwoooI SRE 戰情室;缺設定時不得旁路到個人或其他群組。
|
||||
if not settings.SRE_GROUP_CHAT_ID:
|
||||
logger.warning(
|
||||
"telegram_heartbeat_skipped",
|
||||
reason="SRE_GROUP_CHAT_ID not configured",
|
||||
)
|
||||
await self.send_notification(fallback)
|
||||
return False
|
||||
await self.send_to_group(text=text)
|
||||
|
||||
self._last_message_time = datetime.now(UTC)
|
||||
logger.info(
|
||||
|
||||
@@ -126,7 +126,7 @@ async def test_converged_recurrence_falls_back_to_milestones(monkeypatch):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_converged_recurrence_mirrors_to_private_chat(monkeypatch):
|
||||
async def test_converged_recurrence_does_not_mirror_to_private_chat(monkeypatch):
|
||||
gateway = _FakeGateway()
|
||||
|
||||
async def _always_notify(*, fingerprint, hit_count):
|
||||
@@ -134,7 +134,6 @@ async def test_converged_recurrence_mirrors_to_private_chat(monkeypatch):
|
||||
|
||||
monkeypatch.setattr(notifier, "should_notify_converged_alert_recurrence", _always_notify)
|
||||
monkeypatch.setattr(notifier, "get_telegram_gateway", lambda: gateway)
|
||||
monkeypatch.setattr(notifier.settings, "OPENCLAW_TG_CHAT_ID", "private-chat")
|
||||
|
||||
await notifier.notify_converged_alert_recurrence(
|
||||
source="alertmanager",
|
||||
@@ -151,9 +150,4 @@ async def test_converged_recurrence_mirrors_to_private_chat(monkeypatch):
|
||||
)
|
||||
|
||||
assert len(gateway.primary_messages) == 1
|
||||
assert gateway.private_messages == [
|
||||
{
|
||||
"text": gateway.primary_messages[0],
|
||||
"chat_id": "private-chat",
|
||||
}
|
||||
]
|
||||
assert gateway.private_messages == []
|
||||
|
||||
@@ -22,10 +22,10 @@ def test_all_alert_types_resolve_to_sre_group_first() -> None:
|
||||
) == ["-1003711974679"]
|
||||
|
||||
|
||||
def test_dm_is_only_fail_soft_fallback_when_group_missing() -> None:
|
||||
def test_dm_is_not_used_when_group_missing() -> None:
|
||||
assert resolve_chat_ids(
|
||||
"TYPE-3",
|
||||
dm_chat_id="5619078117",
|
||||
group_chat_id="",
|
||||
tg_group_cutover=True,
|
||||
) == ["5619078117"]
|
||||
) == []
|
||||
|
||||
@@ -195,6 +195,30 @@ class TestSREGroupCutover:
|
||||
assert "_send_approval_card_to_group(" not in fn_body
|
||||
assert "asyncio.create_task" not in fn_body
|
||||
|
||||
def test_alert_chat_id_is_sre_only(self):
|
||||
source = _read_gateway()
|
||||
match = re.search(
|
||||
r"def alert_chat_id\(self\).*?(?=\n def _summarize_callback_data_for_audit)",
|
||||
source,
|
||||
re.DOTALL,
|
||||
)
|
||||
assert match, "找不到 alert_chat_id property"
|
||||
fn_body = match.group(0)
|
||||
assert "return settings.SRE_GROUP_CHAT_ID" in fn_body
|
||||
assert "or settings.OPENCLAW_TG_CHAT_ID" not in fn_body
|
||||
|
||||
def test_default_chat_id_is_sre_only(self):
|
||||
source = _read_gateway()
|
||||
match = re.search(
|
||||
r"def chat_id\(self\).*?(?=\n @property\n def alert_chat_id)",
|
||||
source,
|
||||
re.DOTALL,
|
||||
)
|
||||
assert match, "找不到 chat_id property"
|
||||
fn_body = match.group(0)
|
||||
assert "return settings.SRE_GROUP_CHAT_ID" in fn_body
|
||||
assert "return settings.OPENCLAW_TG_CHAT_ID" not in fn_body
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test: callback handler 完整性(鬼魂按鈕鐵律)
|
||||
|
||||
@@ -1,3 +1,32 @@
|
||||
## 2026-06-12|P2-403K AwoooI SRE 戰情室路由收斂
|
||||
|
||||
**背景**:統帥指出產品告警不應分散到其他 Telegram Bot 或群組,所有 AWOOOI 產品告警必須集中到 **AwoooI SRE 戰情室**。P2-403J 已先補報表真相、日週月報、風險自動化與 TG 旁路審查;本段開始把實際 workflow、API service、ops script 與 CI guard 收斂到單一正式出口。
|
||||
|
||||
**完成**:
|
||||
|
||||
- Gitea workflow 告警路由移除舊 `TELEGRAM_ALERT_CHAT_ID` / `TELEGRAM_CHAT_ID` 路徑,direct Telegram fallback 全部改用 `SRE_GROUP_CHAT_ID`;`e2e-health` 舊 `OPENCLAW_TG_BOT_TOKEN` direct fallback 改用正式 `TELEGRAM_BOT_TOKEN` + `SRE_GROUP_CHAT_ID`。
|
||||
- CD / dev CD secret 注入已讓舊相容欄位 `OPENCLAW_TG_CHAT_ID` 取自 `SRE_GROUP_CHAT_ID`,避免舊程式碼誤用相容欄位時旁路到其他群組。
|
||||
- `telegram_gateway` 的 `chat_id` / `alert_chat_id` 預設收件人收斂為 `settings.SRE_GROUP_CHAT_ID`;缺 `SRE_GROUP_CHAT_ID` 時 Telegram Gateway / heartbeat report 不再 fallback 到個人或舊群組。
|
||||
- `notification_matrix`、Telegram provider、recurrence notifier、capacity / coverage / Hermes rule quality / compliance jobs、approval execution、AI rate limiter、post execution verifier、failover alerter 與 `/api/v1/telegram/health` 全部改為 SRE-only;recurrence notifier 移除 private mirror。
|
||||
- ops scripts `docker-health-monitor`、DR drill、PostgreSQL backup、110 backup 與 Alertmanager config deploy fallback 全部改用 `SRE_GROUP_CHAT_ID`。
|
||||
- `check-gitea-step-env-secrets.js` 新增路由 guard:禁止 workflow 重新引用 `TELEGRAM_ALERT_CHAT_ID` / `TELEGRAM_CHAT_ID`,並擋下 direct Telegram fallback 未指向 `SRE_GROUP_CHAT_ID` 的路徑。
|
||||
|
||||
**本地驗證**:
|
||||
|
||||
- `node scripts/ci/check-gitea-step-env-secrets.js`:`no Gitea step env/with secrets or legacy Telegram routes`。
|
||||
- 路由殘留掃描:`.gitea` / `apps/api/src` / `apps/api/tests` / `scripts/ops` / `k8s/awoooi-prod` 未命中舊 `TELEGRAM_ALERT_CHAT_ID`、舊 `TELEGRAM_CHAT_ID`、SRE/OpenClaw chat fallback 混用、個人 fallback 或 direct OpenClaw bot sendMessage。
|
||||
- `python3.11 -m py_compile`:Telegram gateway、notification matrix、Telegram provider、recurrence notifier、failover alerter、post verifier、rate limiter、approval execution、Telegram API 與相關 jobs 通過。
|
||||
- `bash -n`:相關 ops scripts 與 CI notify script 通過。
|
||||
- `DATABASE_URL='postgresql+asyncpg://test:test@localhost/test' PYTHONPATH=. python3.11 -m pytest -q tests/test_notification_matrix_group_cutover.py tests/test_alert_converged_recurrence.py tests/test_failover_alerter.py tests/test_telegram_button_consistency.py`:`39 passed`。
|
||||
|
||||
**完成度同步**:
|
||||
|
||||
- P2-403K AwoooI SRE 戰情室路由程式收斂:本地 `100%`,正式站待部署驗證。
|
||||
- 三 Agent 主動溝通、學習與成長證據:維持最新工作清單口徑 `100%`;本段提高告警出口一致性,但 Telegram live send、Gateway queue write、receipt worker、KM / PlayBook / timeline / replay score 寫入與 runtime worker 仍未開 gate。
|
||||
- AI Agent automation backlog 維持 `92%`;IwoooS 整體仍維持 `64%`;active runtime gate 仍 `0`。
|
||||
|
||||
**邊界**:本段未讀取 secret value、未發真實 Telegram 測試訊息、未改 Prometheus / Alertmanager route 或 receiver、未改 CronJob、未 SSH、未 active scan、未啟動 runtime repair / verifier worker、未把 SRE 路由收斂解讀為資安 owner response 或 runtime 授權。
|
||||
|
||||
## 2026-06-12|P2-403J 日週月報與風險自動化 Review
|
||||
|
||||
**背景**:統帥要求 AI Agent 產生日報、週報、月報,能看到每個 AI Agent 做了哪些工作與工作量,並以數據化、圖表化報告呈現;Agent 看過報告後要能自動分析評估並提出解決方案,高風險需統帥審核,中 / 低風險則朝自動處理並告警回報前進。本段先建立只讀 review、API、治理頁與測試,不直接開啟 runtime 執行。
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
| Nemotron 實際整合應用 | 30% | 完整回放前仍被關卡擋下 | `blocked_needs_evidence`,下一關是 `refresh_source_evidence_then_5_record_smoke_only` |
|
||||
| 工具 / 服務 / 套件 AI 自動化 | 92% | P0 已完成;P1 服務 / runtime / 監控 / provider / service health / 備份 / DR / 套件與供應鏈只讀基線已完成;P1-007 失敗限定通知合約與前端 redaction 合約已完成;下一主線是 P2-004 依賴 / 供應鏈漂移監控 | 狀態分類、盤點 schema、權限矩陣、靜態盤點種子、只讀 API、UI 骨架、驗證、自動化待辦 schema / 快照 / API / 分組 UI、Backup / DR 目標盤點、準備度矩陣、備份通知政策、Backup / DR 證據 UI、復原演練批准包模板、異地 / escrow 準備度狀態、任務批准邊界、確定性進度彙總、Python 套件 / 供應鏈只讀基線、JS pnpm/npm 只讀基線、Docker build surface 只讀基線、CVE / license / drift 嚴重度政策、定期依賴漂移與外部資料來源檢查設計、依賴升級批准包模板、runtime_surface_inventory_v1 schema / snapshot / API / UI、gitea_workflow_runner_health_v1 schema / snapshot / API / UI、observability_contract_matrix_v1 schema / snapshot / API / UI、ai_provider_route_matrix_v1 schema / snapshot / API / UI、service_health_gap_matrix_v1 schema / snapshot / API / UI、service health evidence cards UI、service_health_failure_notification_policy_v1 schema / snapshot / API / UI 已完成 |
|
||||
| OpenClaw / Hermes / NemoTron 佈建布局 | 45% | P1-401 / P1-402 已完成;仍是只讀 layout 與治理頁顯示,不是 runtime deploy | `ai_agent_deployment_layout_v1` schema、`ai_agent_deployment_layout_2026-06-11.json`、`GET /api/v1/agents/agent-deployment-layout`、治理頁自動化盤點 UI、`AI_AGENT_DEPLOYMENT_LAYOUT_2026-06-11.md` |
|
||||
| OpenClaw / Hermes / NemoTron 主動溝通、學習與成長證據 | 100% | P2-401A 已完成只讀 contract;P2-403A 已完成互動 / 接手 / 學習 / 成長證據面板;P2-403B 已完成 AgentSession / Redis Streams live read model gate;P2-403C 已完成 Redis Streams consumer group dry-run、handoff envelope、ack / dead-letter / replay gate;P2-403D 已完成 learning writeback approval package;P2-403E 已完成 Telegram receipt approval package;P2-403F 已完成 owner-approved learning dry-run preview、人工操作選項與 fixture-only dry-run 總包;P2-403G 已完成 runtime write gate review,固定雙重批准、dry-run hash、post-write verifier 與 redaction 欄位;P2-403H 已完成 post-write verifier implementation package、rollback lane、failure lane 與人工操作選項;P2-403I 已完成 runtime verifier evidence implementation review;P2-403J 已完成報表真相、告警有效性、日報、週報、月報、每個 Agent 工作量、圖表化報告、AI 分析建議與高 / 中 / 低風險自動化政策審查。runtime worker、DB migration、production Redis consumer group、Telegram 實發、Telegram route change、report delivery、AI analysis runtime、中低風險 auto worker、KM / PlayBook trust / timeline / replay score 寫入、SDK / 付費服務仍未開 gate | `ai_agent_communication_learning_contract_v1`、`ai_agent_interaction_learning_proof_v1`、`ai_agent_live_read_model_gate_v1`、`ai_agent_redis_dry_run_gate_v1`、`ai_agent_learning_writeback_approval_package_v1`、`ai_agent_telegram_receipt_approval_package_v1`、`ai_agent_owner_approved_learning_dry_run_v1`、`ai_agent_owner_approved_fixture_dry_run_v1`、`GET /api/v1/agents/agent-communication-learning-contract`、`GET /api/v1/agents/agent-interaction-learning-proof`、`GET /api/v1/agents/agent-live-read-model-gate`、`GET /api/v1/agents/agent-redis-dry-run-gate`、`GET /api/v1/agents/agent-learning-writeback-approval-package`、`GET /api/v1/agents/agent-telegram-receipt-approval-package`、`GET /api/v1/agents/agent-owner-approved-learning-dry-run`、`GET /api/v1/agents/agent-owner-approved-fixture-dry-run`、`ai_agent_runtime_write_gate_review_v1`、`GET /api/v1/agents/agent-runtime-write-gate-review`、`ai_agent_post_write_verifier_package_v1`、`GET /api/v1/agents/agent-post-write-verifier-package`、`ai_agent_runtime_verifier_evidence_review_v1`、`GET /api/v1/agents/agent-runtime-verifier-evidence-review`、`ai_agent_report_truth_actionability_review_v1`、`GET /api/v1/agents/agent-report-truth-actionability-review`、`ai_agent_report_automation_review_v1`、`GET /api/v1/agents/agent-report-automation-review`、`/zh-TW/governance?tab=automation-inventory`、MASTER §3.2.1b / §3.2.1d / §3.4.3 |
|
||||
| OpenClaw / Hermes / NemoTron 主動溝通、學習與成長證據 | 100% | P2-401A 已完成只讀 contract;P2-403A 已完成互動 / 接手 / 學習 / 成長證據面板;P2-403B 已完成 AgentSession / Redis Streams live read model gate;P2-403C 已完成 Redis Streams consumer group dry-run、handoff envelope、ack / dead-letter / replay gate;P2-403D 已完成 learning writeback approval package;P2-403E 已完成 Telegram receipt approval package;P2-403F 已完成 owner-approved learning dry-run preview、人工操作選項與 fixture-only dry-run 總包;P2-403G 已完成 runtime write gate review,固定雙重批准、dry-run hash、post-write verifier 與 redaction 欄位;P2-403H 已完成 post-write verifier implementation package、rollback lane、failure lane 與人工操作選項;P2-403I 已完成 runtime verifier evidence implementation review;P2-403J 已完成報表真相、告警有效性、日報、週報、月報、每個 Agent 工作量、圖表化報告、AI 分析建議與高 / 中 / 低風險自動化政策審查;P2-403K 已完成本地程式層 SRE 戰情室路由收斂,移除 Gitea / API / ops script 的舊群組與 private mirror fallback。runtime worker、DB migration、production Redis consumer group、Telegram 實發、delivery receipt E2E、report delivery、AI analysis runtime、中低風險 auto worker、KM / PlayBook trust / timeline / replay score 寫入、SDK / 付費服務仍未開 gate | `ai_agent_communication_learning_contract_v1`、`ai_agent_interaction_learning_proof_v1`、`ai_agent_live_read_model_gate_v1`、`ai_agent_redis_dry_run_gate_v1`、`ai_agent_learning_writeback_approval_package_v1`、`ai_agent_telegram_receipt_approval_package_v1`、`ai_agent_owner_approved_learning_dry_run_v1`、`ai_agent_owner_approved_fixture_dry_run_v1`、`GET /api/v1/agents/agent-communication-learning-contract`、`GET /api/v1/agents/agent-interaction-learning-proof`、`GET /api/v1/agents/agent-live-read-model-gate`、`GET /api/v1/agents/agent-redis-dry-run-gate`、`GET /api/v1/agents/agent-learning-writeback-approval-package`、`GET /api/v1/agents/agent-telegram-receipt-approval-package`、`GET /api/v1/agents/agent-owner-approved-learning-dry-run`、`GET /api/v1/agents/agent-owner-approved-fixture-dry-run`、`ai_agent_runtime_write_gate_review_v1`、`GET /api/v1/agents/agent-runtime-write-gate-review`、`ai_agent_post_write_verifier_package_v1`、`GET /api/v1/agents/agent-post-write-verifier-package`、`ai_agent_runtime_verifier_evidence_review_v1`、`GET /api/v1/agents/agent-runtime-verifier-evidence-review`、`ai_agent_report_truth_actionability_review_v1`、`GET /api/v1/agents/agent-report-truth-actionability-review`、`ai_agent_report_automation_review_v1`、`GET /api/v1/agents/agent-report-automation-review`、`/zh-TW/governance?tab=automation-inventory`、MASTER §3.2.1b / §3.2.1d / §3.4.3 |
|
||||
| AI Agent 主動營運委派與版本生命週期 | 100% | P2-402A / P2-402B / P2-402C / P2-402D / P2-402E / P2-402F / P2-402G 已完成;已建立 repo-only 版本新鮮度快照、工具採用批准包、Telegram action-required digest policy、Gitea PR 草案 lane、host / K3s / stateful 版本只讀盤點、API 與 governance UI。定期排程、外部版本查詢、工具安裝、CI 變更、套件升級、主機更新、container pull、實際 PR creation、auto merge、Telegram 實發、SSH、kubectl、重啟仍未開 gate | `ai_agent_proactive_operations_contract_v1`、`ai_agent_version_freshness_snapshot_v1`、`ai_agent_tool_adoption_approval_package_v1`、`ai_agent_telegram_action_required_digest_policy_v1`、`ai_agent_gitea_pr_draft_lane_v1`、`ai_agent_host_stateful_version_inventory_v1`、`GET /api/v1/agents/agent-proactive-operations-contract`、`GET /api/v1/agents/agent-version-freshness-snapshot`、`GET /api/v1/agents/agent-tool-adoption-approval-package`、`GET /api/v1/agents/agent-telegram-action-required-digest-policy`、`GET /api/v1/agents/agent-gitea-pr-draft-lane`、`GET /api/v1/agents/agent-host-stateful-version-inventory`、`/zh-TW/governance?tab=automation-inventory`、MASTER §3.2.1c |
|
||||
| 本工作清單與分析報告 | 100% | 已完成 | 本 MD 文件 |
|
||||
|
||||
@@ -20,9 +20,9 @@ AI Agent 自動化工作包目前完成度:**92%**。本工作清單文件本
|
||||
|
||||
三 Agent 佈建布局目前完成度:**45%**。第一波已完成只讀 schema / snapshot / API / 測試 / 報告,第二波已接入治理頁自動化盤點 UI;正式 runtime 佈署、Telegram E2E 發送與 AgentSession 工作流仍需逐項 gate。
|
||||
|
||||
三 Agent 主動溝通、學習與成長證據目前完成度:**100%**。已完成只讀契約、互動 / 接手 / 學習 / 成長證據面板、P2-403B live read model gate、P2-403C Redis dry-run gate、P2-403D learning writeback approval package、P2-403E Telegram receipt approval package、P2-403F owner-approved learning dry-run preview、人工操作選項與 fixture-only dry-run 總包、P2-403G runtime write gate review、P2-403H post-write verifier implementation package、P2-403I runtime verifier evidence implementation review、P2-403J 報表真相 / 告警有效性 / 日週月報 / Agent 工作量 / 圖表化報告 / AI 建議 / 風險自動化政策審查、API、治理頁顯示、測試與 MASTER 同步;目前 live AgentSession、Agent message、handoff、learning write、Telegram receipt、Gateway queue write、runtime verifier execution、report delivery、AI analysis runtime、中低風險 auto worker、Telegram route change 與 Telegram send 仍全部為 `0`,下一步依優先順序推 `P2-403K` unified report truth service / 中低風險 runtime guard / SRE 戰情室路由遷移批准包,但在批准前仍不得啟動 runtime loop。
|
||||
三 Agent 主動溝通、學習與成長證據目前完成度:**100%**。已完成只讀契約、互動 / 接手 / 學習 / 成長證據面板、P2-403B live read model gate、P2-403C Redis dry-run gate、P2-403D learning writeback approval package、P2-403E Telegram receipt approval package、P2-403F owner-approved learning dry-run preview、人工操作選項與 fixture-only dry-run 總包、P2-403G runtime write gate review、P2-403H post-write verifier implementation package、P2-403I runtime verifier evidence implementation review、P2-403J 報表真相 / 告警有效性 / 日週月報 / Agent 工作量 / 圖表化報告 / AI 建議 / 風險自動化政策審查、P2-403K SRE 戰情室路由程式收斂、API、治理頁顯示、測試與 MASTER 同步;目前 live AgentSession、Agent message、handoff、learning write、Telegram receipt、Gateway queue write、runtime verifier execution、report delivery、AI analysis runtime、中低風險 auto worker、Telegram 實發與 delivery receipt E2E 仍全部為 `0`,下一步依優先順序推 `P2-403L` delivery receipt / queue write E2E,但仍不得跳過 runtime gate。
|
||||
|
||||
AI Agent 主動營運委派與版本生命週期目前完成度:**100%**。已完成 12 類版本 domain、24 類可委派能力、5 種 cadence、8 類 MCP、4 類 RAG memory、只讀 API、`P2-402B` repo-only daily version freshness snapshot、`P2-402C` Renovate / OSV-Scanner / Trivy / Syft / Grype 工具採用批准包、`P2-402D` Telegram action-required digest policy、`P2-402E` Gitea PR 草案 lane、`P2-402F` host OS / K3s / stateful services 版本只讀盤點,以及 `P2-402G` governance UI 顯示可委派能力;`P2-403A`、`P2-403B`、`P2-403C`、`P2-403D`、`P2-403E`、`P2-403F` 、`P2-403G`、`P2-403H`、`P2-403I` 與 `P2-403J` 已先補互動、學習證據面、live read model gate、Redis dry-run gate、learning writeback approval package、Telegram receipt approval package、owner-approved learning dry-run preview、runtime write gate review、post-write verifier package、runtime verifier evidence review、報表真相、TG 戰情室收斂、日週月報、Agent 工作量、圖表化報告與風險自動化政策審查。下一步是 `P2-403K` unified report truth service / 中低風險 runtime guard / SRE 戰情室路由遷移批准包,外部 registry / package source / host probe / SSH / kubectl / 工具安裝 / CI 變更 / 實際 PR creation / Telegram 實發與 learning write 仍需 gate。
|
||||
AI Agent 主動營運委派與版本生命週期目前完成度:**100%**。已完成 12 類版本 domain、24 類可委派能力、5 種 cadence、8 類 MCP、4 類 RAG memory、只讀 API、`P2-402B` repo-only daily version freshness snapshot、`P2-402C` Renovate / OSV-Scanner / Trivy / Syft / Grype 工具採用批准包、`P2-402D` Telegram action-required digest policy、`P2-402E` Gitea PR 草案 lane、`P2-402F` host OS / K3s / stateful services 版本只讀盤點,以及 `P2-402G` governance UI 顯示可委派能力;`P2-403A`、`P2-403B`、`P2-403C`、`P2-403D`、`P2-403E`、`P2-403F` 、`P2-403G`、`P2-403H`、`P2-403I`、`P2-403J` 與 `P2-403K` 已先補互動、學習證據面、live read model gate、Redis dry-run gate、learning writeback approval package、Telegram receipt approval package、owner-approved learning dry-run preview、runtime write gate review、post-write verifier package、runtime verifier evidence review、報表真相、TG 戰情室收斂、日週月報、Agent 工作量、圖表化報告、風險自動化政策審查與 SRE 戰情室路由程式收斂。下一步是 `P2-403L` delivery receipt / queue write E2E,外部 registry / package source / host probe / SSH / kubectl / 工具安裝 / CI 變更 / 實際 PR creation / Telegram 實發與 learning write 仍需 gate。
|
||||
|
||||
完成度計算模型:
|
||||
|
||||
@@ -968,6 +968,7 @@ UI:
|
||||
| P2-403G | 完成 | 100 | OpenClaw | Runtime write gate review、雙重批准、dry-run hash、post-write verifier 與 redaction gate | `ai_agent_runtime_write_gate_review_v1` / snapshot / 只讀 API / governance UI;4 個 write target、4 個 approval gate、9 個必填欄位與 live write total `0` | 不寫 KM、不更新 PlayBook trust、不寫 timeline / replay score、不發 Telegram;runtime write 仍未授權 |
|
||||
| P2-403H | 完成 | 100 | OpenClaw | Post-write verifier implementation package、rollback lane、failure lane 與人工操作選項 | `ai_agent_post_write_verifier_package_v1` / snapshot / 只讀 API / governance UI;4 個 verification target、3 個 failure lane、4 個 operator action 與 live verifier execution `0` | 不讀 canonical target、不寫 rollback work item、不發 Telegram、不寫 KM / PlayBook trust / timeline / replay score;runtime verifier 仍未授權 |
|
||||
| P2-403J | 完成 | 100 | Hermes + OpenClaw | 報表真相、告警有效性、日週月報、每個 Agent 工作量、圖表化報告、AI 分析建議與風險自動化政策審查;高風險需審核,中低風險目前只定義 policy | `ai_agent_report_truth_actionability_review_v1` + `ai_agent_report_automation_review_v1` / snapshot / 只讀 API / governance UI;5 個真相缺口、3 個日週月契約、4 個 actionability lane、4 條 TG 旁路風險、3 個報表週期、3 個 Agent 工作量、4 個 chart package、5 個 recommendation | 不發 Telegram、不改 CronJob、不改 Prometheus / Alertmanager、不改 route / receiver、不讀 secret、不寫 work item / KM / PlayBook trust、不開 runtime worker、不排程實發、不啟動中低風險 auto worker、不執行生產優化 |
|
||||
| P2-403K | 本地完成,正式站待驗證 | 100 | OpenClaw | AwoooI SRE 戰情室路由程式收斂;移除 Gitea / API / ops script 舊群組與 private mirror fallback | Gitea workflow 使用 `SRE_GROUP_CHAT_ID`;CD 舊相容欄位取自 SRE group;Telegram Gateway / notification matrix / jobs / ops scripts SRE-only;CI guard 擋舊 `TELEGRAM_ALERT_CHAT_ID` / `TELEGRAM_CHAT_ID` 與非 SRE direct fallback | 未讀 secret value、未發 Telegram live 測試、未改 Prometheus / Alertmanager route、未開 Gateway queue write / receipt worker / runtime gate |
|
||||
| P2-101 | 待辦 | 0 | OpenClaw | 定義操作類別權限模型 | 操作政策 schema | HITL 關卡 |
|
||||
| P2-102 | 待辦 | 0 | OpenClaw | 所有候選操作都要有 dry-run 證據 | dry-run 合約 | 不直接 apply |
|
||||
| P2-103 | 待辦 | 0 | Hermes | 把任務結果接回 KM / LOGBOOK / 稽核軌跡 | 證據寫入器 | 不洩漏 secret |
|
||||
|
||||
@@ -10,6 +10,7 @@ const path = require("path");
|
||||
const root = path.resolve(__dirname, "../..");
|
||||
const workflowDir = path.join(root, ".gitea", "workflows");
|
||||
const violations = [];
|
||||
const routeViolations = [];
|
||||
|
||||
for (const fileName of fs.readdirSync(workflowDir).sort()) {
|
||||
if (!fileName.endsWith(".yml") && !fileName.endsWith(".yaml")) {
|
||||
@@ -17,9 +18,29 @@ for (const fileName of fs.readdirSync(workflowDir).sort()) {
|
||||
}
|
||||
|
||||
const filePath = path.join(workflowDir, fileName);
|
||||
const lines = fs.readFileSync(filePath, "utf8").split(/\r?\n/);
|
||||
const content = fs.readFileSync(filePath, "utf8");
|
||||
const lines = content.split(/\r?\n/);
|
||||
let block = null;
|
||||
|
||||
if (content.includes("TELEGRAM_ALERT_CHAT_ID")) {
|
||||
routeViolations.push(`${filePath}: legacy TELEGRAM_ALERT_CHAT_ID is not allowed; use SRE_GROUP_CHAT_ID`);
|
||||
}
|
||||
|
||||
if (content.includes("TELEGRAM_CHAT_ID")) {
|
||||
routeViolations.push(`${filePath}: legacy TELEGRAM_CHAT_ID is not allowed for alert routing; use SRE_GROUP_CHAT_ID`);
|
||||
}
|
||||
|
||||
let lineOffset = 0;
|
||||
lines.forEach((line, index) => {
|
||||
if (
|
||||
line.includes("api.telegram.org/bot")
|
||||
&& !content.slice(Math.max(0, lineOffset - 700), lineOffset + line.length + 1200).includes("SRE_GROUP_CHAT_ID")
|
||||
) {
|
||||
routeViolations.push(`${filePath}:${index + 1}: direct Telegram fallback must target SRE_GROUP_CHAT_ID`);
|
||||
}
|
||||
lineOffset += line.length + 1;
|
||||
});
|
||||
|
||||
lines.forEach((line, index) => {
|
||||
const indent = line.match(/^\s*/)[0].length;
|
||||
const trimmed = line.trim();
|
||||
@@ -51,4 +72,12 @@ if (violations.length > 0) {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log("no Gitea step env/with secrets");
|
||||
if (routeViolations.length > 0) {
|
||||
console.error("Gitea workflow Telegram route must converge on AwoooI SRE war room:");
|
||||
for (const violation of routeViolations) {
|
||||
console.error(` - ${violation}`);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log("no Gitea step env/with secrets or legacy Telegram routes");
|
||||
|
||||
@@ -59,7 +59,7 @@ notify_awoooi_ops() {
|
||||
notify_telegram_fallback() {
|
||||
local msg="$1"
|
||||
local tg_token="${TG_BOT_TOKEN:-${TELEGRAM_BOT_TOKEN:-}}"
|
||||
local tg_chat="${TELEGRAM_ALERT_CHAT_ID:-${SRE_GROUP_CHAT_ID:--1003711974679}}"
|
||||
local tg_chat="${SRE_GROUP_CHAT_ID:--1003711974679}"
|
||||
if [ -n "$tg_token" ] && [ -n "$tg_chat" ]; then
|
||||
curl -s -X POST "https://api.telegram.org/bot${tg_token}/sendMessage" \
|
||||
-d "chat_id=${tg_chat}" \
|
||||
|
||||
@@ -85,8 +85,7 @@ TELEGRAM_BOT_TOKEN="$(
|
||||
SRE_GROUP_CHAT_ID="$(
|
||||
read_secret_first_available \
|
||||
"${SRE_GROUP_CHAT_ID:-}" \
|
||||
SRE_GROUP_CHAT_ID \
|
||||
TELEGRAM_ALERT_CHAT_ID
|
||||
SRE_GROUP_CHAT_ID
|
||||
)" || die "missing SRE_GROUP_CHAT_ID"
|
||||
|
||||
[[ "$SRE_GROUP_CHAT_ID" =~ ^-?[0-9]+$ ]] || die "SRE_GROUP_CHAT_ID must be a Telegram numeric chat id"
|
||||
|
||||
@@ -87,7 +87,6 @@ deploy_to_host() {
|
||||
AWOOOI_API_URL=https://awoooi.wooo.work
|
||||
TELEGRAM_BOT_TOKEN=CHANGE_ME
|
||||
SRE_GROUP_CHAT_ID=-1003711974679
|
||||
TELEGRAM_ALERT_CHAT_ID=-1003711974679
|
||||
SEND_COOLDOWN_SECONDS=300
|
||||
SECRETS_TEMPLATE
|
||||
echo ' ⚠️ 請填寫 /etc/awoooi-ops/secrets.env.template 後重命名為 secrets.env'
|
||||
|
||||
@@ -25,7 +25,6 @@ fi
|
||||
: "${AWOOOI_API_URL:=https://awoooi.wooo.work}"
|
||||
: "${TELEGRAM_BOT_TOKEN:=}"
|
||||
: "${SRE_GROUP_CHAT_ID:=-1003711974679}"
|
||||
: "${TELEGRAM_ALERT_CHAT_ID:=${SRE_GROUP_CHAT_ID:-${TELEGRAM_CHAT_ID:-}}}"
|
||||
: "${LOG_FILE:=/var/log/docker-health-monitor.log}"
|
||||
: "${SEND_COOLDOWN_SECONDS:=300}"
|
||||
: "${COOLDOWN_DIR:=/tmp/docker-health-monitor-cooldown}"
|
||||
@@ -87,10 +86,10 @@ matches_pattern() {
|
||||
# ─── Telegram 直發 Fallback ──────────────────────────────────────────────────
|
||||
send_telegram_direct() {
|
||||
local message="$1"
|
||||
[[ -z "$TELEGRAM_BOT_TOKEN" || -z "$TELEGRAM_ALERT_CHAT_ID" ]] && return 0
|
||||
[[ -z "$TELEGRAM_BOT_TOKEN" || -z "$SRE_GROUP_CHAT_ID" ]] && return 0
|
||||
curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"chat_id\":\"${TELEGRAM_ALERT_CHAT_ID}\",\"text\":\"${message}\",\"parse_mode\":\"HTML\"}" \
|
||||
-d "{\"chat_id\":\"${SRE_GROUP_CHAT_ID}\",\"text\":\"${message}\",\"parse_mode\":\"HTML\"}" \
|
||||
> /dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ notify_telegram() {
|
||||
# 只有 API 不可達或 helper 未部署時,才使用 Telegram 直發救命旁路。
|
||||
notify_awoooi_ops "$status" "$msg" && return 0
|
||||
|
||||
local chat_id="${TELEGRAM_ALERT_CHAT_ID:-${SRE_GROUP_CHAT_ID:--1003711974679}}"
|
||||
local chat_id="${SRE_GROUP_CHAT_ID:--1003711974679}"
|
||||
if [[ -n "${TELEGRAM_BOT_TOKEN:-}" && -n "$chat_id" ]]; then
|
||||
curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
|
||||
-d "chat_id=${chat_id}" \
|
||||
|
||||
@@ -47,7 +47,7 @@ notify_telegram() {
|
||||
# 只有 API 不可達或 helper 未部署時,才使用 Telegram 直發救命旁路。
|
||||
notify_awoooi_ops "$status" "$msg" && return 0
|
||||
|
||||
local chat_id="${TELEGRAM_ALERT_CHAT_ID:-${SRE_GROUP_CHAT_ID:--1003711974679}}"
|
||||
local chat_id="${SRE_GROUP_CHAT_ID:--1003711974679}"
|
||||
if [[ -n "${TELEGRAM_BOT_TOKEN:-}" && -n "$chat_id" ]]; then
|
||||
curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
|
||||
-d "chat_id=${chat_id}" \
|
||||
|
||||
Reference in New Issue
Block a user