fix(alerts): surface legacy hitl backlog
Some checks failed
CD Pipeline / tests (push) Successful in 1m21s
Code Review / ai-code-review (push) Successful in 13s
Type Sync Check / check-type-sync (push) Failing after 40s
CD Pipeline / build-and-deploy (push) Successful in 5m22s
CD Pipeline / post-deploy-checks (push) Successful in 2m19s
Some checks failed
CD Pipeline / tests (push) Successful in 1m21s
Code Review / ai-code-review (push) Successful in 13s
Type Sync Check / check-type-sync (push) Failing after 40s
CD Pipeline / build-and-deploy (push) Successful in 5m22s
CD Pipeline / post-deploy-checks (push) Successful in 2m19s
This commit is contained in:
@@ -167,6 +167,8 @@ class ApprovalRequest(ApprovalRequestBase):
|
||||
fingerprint: str | None = Field(default=None, description="告警指紋 Hash")
|
||||
hit_count: int = Field(default=1, description="聚合觸發次數")
|
||||
last_seen_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc), description="最後觸發時間")
|
||||
telegram_message_id: int | None = Field(default=None, description="Telegram approval card message ID")
|
||||
telegram_chat_id: int | None = Field(default=None, description="Telegram chat ID for the approval card")
|
||||
# 2026-04-14 Claude Sonnet 4.6: incident_id 已移至 Base(避免 ApprovalRequestCreate 缺欄位)
|
||||
|
||||
@property
|
||||
@@ -216,6 +218,10 @@ class ApprovalRequestResponse(BaseModel):
|
||||
hit_count: int = 1
|
||||
last_seen_at: datetime | None = None
|
||||
# Phase 6.5: Incident 關聯 (用於簽核後更新 Incident 狀態)
|
||||
incident_id: str | None = None
|
||||
matched_playbook_id: str | None = None
|
||||
telegram_message_id: int | None = None
|
||||
telegram_chat_id: int | None = None
|
||||
metadata: dict | None = None
|
||||
|
||||
@classmethod
|
||||
@@ -241,6 +247,10 @@ class ApprovalRequestResponse(BaseModel):
|
||||
hit_count=approval.hit_count,
|
||||
last_seen_at=approval.last_seen_at,
|
||||
# Phase 6.5
|
||||
incident_id=approval.incident_id,
|
||||
matched_playbook_id=approval.matched_playbook_id,
|
||||
telegram_message_id=approval.telegram_message_id,
|
||||
telegram_chat_id=approval.telegram_chat_id,
|
||||
metadata=approval.metadata,
|
||||
)
|
||||
|
||||
|
||||
@@ -106,6 +106,8 @@ def _record_to_request(record: ApprovalRecord) -> ApprovalRequest:
|
||||
# B4 fix 2026-04-24 ogt + Claude Sonnet 4.6: 補回 DB 欄位(人工審核路徑讀回必要)
|
||||
incident_id=getattr(record, "incident_id", None),
|
||||
matched_playbook_id=getattr(record, "matched_playbook_id", None),
|
||||
telegram_message_id=getattr(record, "telegram_message_id", None),
|
||||
telegram_chat_id=getattr(record, "telegram_chat_id", None),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -110,10 +110,10 @@ def approval_record_to_request(record: ApprovalRecord) -> ApprovalRequest:
|
||||
hit_count=record.hit_count,
|
||||
last_seen_at=record.last_seen_at,
|
||||
# B3 fix 2026-04-24 ogt + Claude Sonnet 4.6: 補回 DB 欄位(人工審核路徑讀回必要)
|
||||
# incident_id / matched_playbook_id 在 ApprovalRequest 基礎模型中有定義
|
||||
# telegram_message_id / telegram_chat_id 只在 DB model,不在 Pydantic ApprovalRequest
|
||||
incident_id=getattr(record, "incident_id", None),
|
||||
matched_playbook_id=getattr(record, "matched_playbook_id", None),
|
||||
telegram_message_id=getattr(record, "telegram_message_id", None),
|
||||
telegram_chat_id=getattr(record, "telegram_chat_id", None),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -53,6 +53,9 @@ class AlertPipelineStats:
|
||||
total_24h: int = 0
|
||||
auto_resolved_24h: int = 0
|
||||
pending_approval: int = 0
|
||||
pending_actionable: int = 0
|
||||
pending_observe_only: int = 0
|
||||
pending_without_telegram: int = 0
|
||||
execution_success_24h: int = 0
|
||||
execution_failed_24h: int = 0
|
||||
|
||||
@@ -524,18 +527,46 @@ class HeartbeatReportService:
|
||||
from src.db.base import get_db_context
|
||||
async with get_db_context() as db:
|
||||
r = await db.execute(sa_text("""
|
||||
WITH scoped AS (
|
||||
SELECT
|
||||
*,
|
||||
(
|
||||
btrim(coalesce(action, '')) = ''
|
||||
OR UPPER(action) LIKE 'OBSERVE%'
|
||||
OR UPPER(action) LIKE 'INVESTIGATE%'
|
||||
OR UPPER(action) LIKE 'NO_ACTION%'
|
||||
OR UPPER(action) LIKE '% NO_ACTION%'
|
||||
OR UPPER(action) LIKE '%| NO_ACTION%'
|
||||
) AS is_observe_only
|
||||
FROM approval_records
|
||||
WHERE created_at >= NOW() - interval '24 hours'
|
||||
)
|
||||
SELECT
|
||||
COUNT(*) AS total,
|
||||
COUNT(*) FILTER (WHERE UPPER(status::text) = 'PENDING') AS pending,
|
||||
COUNT(*) FILTER (
|
||||
WHERE UPPER(status::text) = 'PENDING'
|
||||
AND NOT is_observe_only
|
||||
) AS pending_actionable,
|
||||
COUNT(*) FILTER (
|
||||
WHERE UPPER(status::text) = 'PENDING'
|
||||
AND is_observe_only
|
||||
) AS pending_observe_only,
|
||||
COUNT(*) FILTER (
|
||||
WHERE UPPER(status::text) = 'PENDING'
|
||||
AND telegram_message_id IS NULL
|
||||
) AS pending_without_telegram,
|
||||
COUNT(*) FILTER (WHERE UPPER(status::text) = 'EXECUTION_SUCCESS') AS success,
|
||||
COUNT(*) FILTER (WHERE UPPER(status::text) = 'EXECUTION_FAILED') AS failed,
|
||||
COUNT(*) FILTER (WHERE UPPER(status::text) IN ('APPROVED','EXECUTION_SUCCESS','EXECUTION_FAILED')) AS auto_resolved
|
||||
FROM approval_records
|
||||
WHERE created_at >= NOW() - interval '24 hours'
|
||||
FROM scoped
|
||||
"""))
|
||||
row = r.one()
|
||||
stats.total_24h = int(row.total or 0)
|
||||
stats.pending_approval = int(row.pending or 0)
|
||||
stats.pending_actionable = int(row.pending_actionable or 0)
|
||||
stats.pending_observe_only = int(row.pending_observe_only or 0)
|
||||
stats.pending_without_telegram = int(row.pending_without_telegram or 0)
|
||||
stats.execution_success_24h = int(row.success or 0)
|
||||
stats.execution_failed_24h = int(row.failed or 0)
|
||||
stats.auto_resolved_24h = int(row.auto_resolved or 0)
|
||||
@@ -762,9 +793,12 @@ class HeartbeatReportService:
|
||||
if not report.db_redis.redis_ok:
|
||||
warnings.append(f"Redis: {report.db_redis.redis_status}")
|
||||
|
||||
# Pending 積壓告警
|
||||
if report.alert_pipeline.pending_approval > 10:
|
||||
warnings.append(f"PENDING 積壓 {report.alert_pipeline.pending_approval} 筆,需人工處理")
|
||||
# Pending 積壓告警:只用可執行/有風險待審計數觸發,避免 OBSERVE/NO_ACTION 觀察卡造成假待辦。
|
||||
if report.alert_pipeline.pending_actionable > 10:
|
||||
warnings.append(
|
||||
f"待人工審核 {report.alert_pipeline.pending_actionable} 筆"
|
||||
f"(前台 /awooop/approvals;觀察類 {report.alert_pipeline.pending_observe_only} 筆另列)"
|
||||
)
|
||||
|
||||
# Pod 異常 — 2026-05-03 Claude Opus 4.7 + 統帥 ogt:P0 #3 完整 K8s pod state machine
|
||||
# K8s pod phases (https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/):
|
||||
@@ -906,6 +940,10 @@ def report_to_telegram_html(report: HeartbeatReport) -> str:
|
||||
lines.append("")
|
||||
lines.append("📊 <b>告警流水線(24h)</b>")
|
||||
lines.append(f"├─ 總計: {ap.total_24h} PENDING: {ap.pending_approval}")
|
||||
lines.append(
|
||||
f"├─ 待審拆分: 人工 {ap.pending_actionable} 觀察 {ap.pending_observe_only}"
|
||||
f" 無TG {ap.pending_without_telegram}"
|
||||
)
|
||||
if ap.execution_success_24h > 0 and ap.execution_failed_24h == 0:
|
||||
exec_icon = "✅"
|
||||
elif ap.execution_failed_24h > 0:
|
||||
|
||||
Reference in New Issue
Block a user