feat(awooop): show callback trace recovery
This commit is contained in:
@@ -129,6 +129,10 @@ class CallbackReplyAuditSummary(BaseModel):
|
||||
outbound_reply_markup_missing_trace_ref_latest_sent_at: datetime | None = None
|
||||
outbound_reply_markup_trace_ref_gap_status: str = "clean"
|
||||
outbound_reply_markup_trace_ref_gap_next_action: str = "none"
|
||||
outbound_reply_markup_trace_ref_after_gap_total: int = 0
|
||||
outbound_reply_markup_trace_ref_after_gap_first_sent_at: datetime | None = None
|
||||
outbound_reply_markup_trace_ref_after_gap_latest_sent_at: datetime | None = None
|
||||
outbound_reply_markup_trace_ref_gap_recovery_status: str = "not_needed"
|
||||
outbound_reply_markup_missing_incident_ref_top_prefixes: list[
|
||||
OutboundReplyMarkupGapPrefix
|
||||
] = Field(default_factory=list)
|
||||
|
||||
@@ -499,6 +499,14 @@ async def _fetch_callback_reply_audit_summary(
|
||||
FROM awooop_outbound_message m
|
||||
WHERE m.project_id = :project_id
|
||||
AND m.channel_type = 'telegram'
|
||||
),
|
||||
trace_gap_cutoff AS (
|
||||
SELECT
|
||||
MAX(COALESCE(sent_at, queued_at))
|
||||
AS latest_missing_trace_ref_at
|
||||
FROM outbound
|
||||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||||
AND NOT has_trace_ref
|
||||
)
|
||||
SELECT
|
||||
COUNT(*) AS outbound_total,
|
||||
@@ -568,6 +576,30 @@ async def _fetch_callback_reply_audit_summary(
|
||||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||||
AND NOT has_trace_ref
|
||||
) AS outbound_reply_markup_missing_trace_ref_latest_sent_at,
|
||||
COUNT(*) FILTER (
|
||||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||||
AND has_trace_ref
|
||||
AND trace_gap_cutoff.latest_missing_trace_ref_at
|
||||
IS NOT NULL
|
||||
AND COALESCE(sent_at, queued_at)
|
||||
> trace_gap_cutoff.latest_missing_trace_ref_at
|
||||
) AS outbound_reply_markup_trace_ref_after_gap_total,
|
||||
MIN(COALESCE(sent_at, queued_at)) FILTER (
|
||||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||||
AND has_trace_ref
|
||||
AND trace_gap_cutoff.latest_missing_trace_ref_at
|
||||
IS NOT NULL
|
||||
AND COALESCE(sent_at, queued_at)
|
||||
> trace_gap_cutoff.latest_missing_trace_ref_at
|
||||
) AS outbound_reply_markup_trace_ref_after_gap_first_sent_at,
|
||||
MAX(COALESCE(sent_at, queued_at)) FILTER (
|
||||
WHERE source_envelope #>> '{reply_markup,present}' = 'true'
|
||||
AND has_trace_ref
|
||||
AND trace_gap_cutoff.latest_missing_trace_ref_at
|
||||
IS NOT NULL
|
||||
AND COALESCE(sent_at, queued_at)
|
||||
> trace_gap_cutoff.latest_missing_trace_ref_at
|
||||
) AS outbound_reply_markup_trace_ref_after_gap_latest_sent_at,
|
||||
COALESCE((
|
||||
SELECT jsonb_agg(
|
||||
jsonb_build_object(
|
||||
@@ -711,6 +743,7 @@ async def _fetch_callback_reply_audit_summary(
|
||||
WHERE source_envelope ? 'callback_reply'
|
||||
) AS latest_callback_at
|
||||
FROM outbound
|
||||
CROSS JOIN trace_gap_cutoff
|
||||
"""),
|
||||
{"project_id": project_id},
|
||||
)
|
||||
@@ -752,6 +785,13 @@ def _callback_reply_audit_summary_from_row(
|
||||
recent_1h=missing_trace_recent_1h,
|
||||
recent_24h=missing_trace_recent_24h,
|
||||
)
|
||||
trace_ref_after_gap_total = _safe_int(
|
||||
row.get("outbound_reply_markup_trace_ref_after_gap_total")
|
||||
)
|
||||
trace_gap_recovery_status = _trace_ref_gap_recovery_status(
|
||||
missing_total=missing_trace_total,
|
||||
after_gap_total=trace_ref_after_gap_total,
|
||||
)
|
||||
|
||||
if callback_total <= 0:
|
||||
snapshot_status = "no_callback"
|
||||
@@ -811,6 +851,18 @@ def _callback_reply_audit_summary_from_row(
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_gap_status": trace_gap_status,
|
||||
"outbound_reply_markup_trace_ref_gap_next_action": trace_gap_next_action,
|
||||
"outbound_reply_markup_trace_ref_after_gap_total": (
|
||||
trace_ref_after_gap_total
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_after_gap_first_sent_at": row.get(
|
||||
"outbound_reply_markup_trace_ref_after_gap_first_sent_at"
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at": row.get(
|
||||
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at"
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_gap_recovery_status": (
|
||||
trace_gap_recovery_status
|
||||
),
|
||||
"outbound_reply_markup_missing_incident_ref_top_prefixes": (
|
||||
top_missing_prefixes
|
||||
),
|
||||
@@ -852,6 +904,19 @@ def _trace_ref_gap_decision(
|
||||
return "legacy_backlog", "backfill_or_archive_legacy_callbacks"
|
||||
|
||||
|
||||
def _trace_ref_gap_recovery_status(
|
||||
*,
|
||||
missing_total: int,
|
||||
after_gap_total: int,
|
||||
) -> str:
|
||||
"""Describe whether traced reply_markup messages resumed after the last gap."""
|
||||
if missing_total <= 0:
|
||||
return "not_needed"
|
||||
if after_gap_total > 0:
|
||||
return "recovered_after_gap"
|
||||
return "no_recovery_signal"
|
||||
|
||||
|
||||
def _reply_markup_gap_prefixes_from_value(value: Any) -> list[dict[str, Any]]:
|
||||
if not isinstance(value, list):
|
||||
return []
|
||||
|
||||
@@ -683,6 +683,16 @@ def test_list_callback_replies_response_preserves_callback_evidence() -> None:
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_gap_status": "recent_backlog",
|
||||
"outbound_reply_markup_trace_ref_gap_next_action": "watch_24h_decay",
|
||||
"outbound_reply_markup_trace_ref_after_gap_total": 3,
|
||||
"outbound_reply_markup_trace_ref_after_gap_first_sent_at": (
|
||||
datetime(2026, 5, 18, 8, 20, 0)
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at": (
|
||||
datetime(2026, 5, 18, 9, 0, 0)
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_gap_recovery_status": (
|
||||
"recovered_after_gap"
|
||||
),
|
||||
"outbound_reply_markup_missing_incident_ref_top_prefixes": [
|
||||
{
|
||||
"prefix": "silence",
|
||||
@@ -763,6 +773,16 @@ def test_list_callback_replies_response_preserves_callback_evidence() -> None:
|
||||
assert dumped["summary"]["outbound_reply_markup_trace_ref_gap_next_action"] == (
|
||||
"watch_24h_decay"
|
||||
)
|
||||
assert dumped["summary"]["outbound_reply_markup_trace_ref_after_gap_total"] == 3
|
||||
assert dumped["summary"][
|
||||
"outbound_reply_markup_trace_ref_after_gap_first_sent_at"
|
||||
] == "2026-05-18T08:20:00"
|
||||
assert dumped["summary"][
|
||||
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at"
|
||||
] == "2026-05-18T09:00:00"
|
||||
assert dumped["summary"][
|
||||
"outbound_reply_markup_trace_ref_gap_recovery_status"
|
||||
] == "recovered_after_gap"
|
||||
assert dumped["summary"][
|
||||
"outbound_reply_markup_missing_incident_ref_top_prefixes"
|
||||
][0] == {
|
||||
@@ -815,6 +835,9 @@ def test_callback_reply_audit_summary_marks_missing_snapshots() -> None:
|
||||
"outbound_reply_markup_missing_trace_ref_latest_sent_at": (
|
||||
datetime(2026, 5, 25, 8, 42, 22)
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_after_gap_total": 0,
|
||||
"outbound_reply_markup_trace_ref_after_gap_first_sent_at": None,
|
||||
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at": None,
|
||||
"outbound_reply_markup_missing_incident_ref_top_prefixes": [
|
||||
{
|
||||
"prefix": "silence",
|
||||
@@ -868,6 +891,10 @@ def test_callback_reply_audit_summary_marks_missing_snapshots() -> None:
|
||||
assert summary["outbound_reply_markup_trace_ref_gap_next_action"] == (
|
||||
"inspect_recent_outbound_source_refs"
|
||||
)
|
||||
assert summary["outbound_reply_markup_trace_ref_after_gap_total"] == 0
|
||||
assert summary["outbound_reply_markup_trace_ref_gap_recovery_status"] == (
|
||||
"no_recovery_signal"
|
||||
)
|
||||
assert summary[
|
||||
"outbound_reply_markup_missing_incident_ref_recent_24h_total"
|
||||
] == 3
|
||||
@@ -907,6 +934,13 @@ def test_callback_reply_audit_summary_marks_mixed_legacy_snapshots_partial() ->
|
||||
"outbound_reply_markup_missing_trace_ref_latest_sent_at": (
|
||||
datetime(2026, 5, 25, 12, 13, 1)
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_after_gap_total": 8,
|
||||
"outbound_reply_markup_trace_ref_after_gap_first_sent_at": (
|
||||
datetime(2026, 5, 25, 12, 20, 0)
|
||||
),
|
||||
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at": (
|
||||
datetime(2026, 5, 25, 13, 26, 8)
|
||||
),
|
||||
"outbound_reply_markup_missing_incident_ref_top_prefixes": [
|
||||
{
|
||||
"prefix": "silence",
|
||||
@@ -965,6 +999,13 @@ def test_callback_reply_audit_summary_marks_mixed_legacy_snapshots_partial() ->
|
||||
assert summary["outbound_reply_markup_trace_ref_gap_next_action"] == (
|
||||
"watch_24h_decay"
|
||||
)
|
||||
assert summary["outbound_reply_markup_trace_ref_after_gap_total"] == 8
|
||||
assert summary["outbound_reply_markup_trace_ref_after_gap_first_sent_at"] == (
|
||||
datetime(2026, 5, 25, 12, 20, 0)
|
||||
)
|
||||
assert summary["outbound_reply_markup_trace_ref_gap_recovery_status"] == (
|
||||
"recovered_after_gap"
|
||||
)
|
||||
assert summary["outbound_reply_markup_missing_trace_ref_top_prefixes"][0][
|
||||
"recent_24h_total"
|
||||
] == 23
|
||||
|
||||
@@ -3062,6 +3062,7 @@
|
||||
"outboundReplyMarkupDetail": "reply_markup {replyMarkup}; missing trace refs {missingTraceRefs}; missing incident refs {missingIncidentRefs}",
|
||||
"outboundReplyMarkupTraceFreshness": "Missing trace activity: 1h {recent1h} / 24h {recent24h} / latest {latest}",
|
||||
"outboundReplyMarkupTraceDecision": "Trace gap decision: {status}; next: {action}",
|
||||
"outboundReplyMarkupTraceRecovery": "Trace gap recovery: {status}; traced after gap {count}; first {first}; latest {latest}",
|
||||
"outboundReplyMarkupIncidentFreshness": "Missing incident activity: 1h {recent1h} / 24h {recent24h} / latest {latest}",
|
||||
"outboundReplyMarkupTopPrefixes": "Missing incident top prefixes: {prefixes}",
|
||||
"outboundReplyMarkupTraceTopPrefixes": "Missing trace top prefixes: {prefixes}",
|
||||
@@ -3095,6 +3096,12 @@
|
||||
"backfill_or_archive_legacy_callbacks": "Backfill or archive legacy callback gaps",
|
||||
"observed": "Wait for the next outbound evidence"
|
||||
},
|
||||
"traceGapRecoveryStatuses": {
|
||||
"not_needed": "Not needed",
|
||||
"recovered_after_gap": "Recovered",
|
||||
"no_recovery_signal": "No recovery signal yet",
|
||||
"observed": "Recorded"
|
||||
},
|
||||
"nextActions": {
|
||||
"none": "No follow-up needed",
|
||||
"press_telegram_detail_or_history": "Press Telegram Detail / History once to create callback evidence",
|
||||
|
||||
@@ -3063,6 +3063,7 @@
|
||||
"outboundReplyMarkupDetail": "reply_markup {replyMarkup};缺 trace refs {missingTraceRefs};缺 incident refs {missingIncidentRefs}",
|
||||
"outboundReplyMarkupTraceFreshness": "缺 trace 活躍度:1h {recent1h} / 24h {recent24h} / 最新 {latest}",
|
||||
"outboundReplyMarkupTraceDecision": "缺 trace 判讀:{status};下一步:{action}",
|
||||
"outboundReplyMarkupTraceRecovery": "缺 trace 復原訊號:{status};gap 後 traced {count};首筆 {first};最新 {latest}",
|
||||
"outboundReplyMarkupIncidentFreshness": "缺 incident 活躍度:1h {recent1h} / 24h {recent24h} / 最新 {latest}",
|
||||
"outboundReplyMarkupTopPrefixes": "缺 incident top prefixes:{prefixes}",
|
||||
"outboundReplyMarkupTraceTopPrefixes": "缺 trace top prefixes:{prefixes}",
|
||||
@@ -3096,6 +3097,12 @@
|
||||
"backfill_or_archive_legacy_callbacks": "歸檔或回補舊 callback 缺口",
|
||||
"observed": "等待下一次 outbound evidence"
|
||||
},
|
||||
"traceGapRecoveryStatuses": {
|
||||
"not_needed": "不需要",
|
||||
"recovered_after_gap": "已復原",
|
||||
"no_recovery_signal": "尚無復原訊號",
|
||||
"observed": "已記錄"
|
||||
},
|
||||
"nextActions": {
|
||||
"none": "不需補動作",
|
||||
"press_telegram_detail_or_history": "按一次 Telegram 詳情 / 歷史產生 callback evidence",
|
||||
|
||||
@@ -162,6 +162,10 @@ interface CallbackReplyAuditSummary {
|
||||
outbound_reply_markup_missing_trace_ref_latest_sent_at?: string | null;
|
||||
outbound_reply_markup_trace_ref_gap_status?: string | null;
|
||||
outbound_reply_markup_trace_ref_gap_next_action?: string | null;
|
||||
outbound_reply_markup_trace_ref_after_gap_total?: number;
|
||||
outbound_reply_markup_trace_ref_after_gap_first_sent_at?: string | null;
|
||||
outbound_reply_markup_trace_ref_after_gap_latest_sent_at?: string | null;
|
||||
outbound_reply_markup_trace_ref_gap_recovery_status?: string | null;
|
||||
outbound_reply_markup_missing_incident_ref_top_prefixes?: Array<{
|
||||
prefix?: string | null;
|
||||
total?: number | null;
|
||||
@@ -2545,6 +2549,19 @@ function CallbackReplyAuditSummaryPanel({
|
||||
traceGapNextActionRaw === "watch_24h_decay" ||
|
||||
traceGapNextActionRaw === "backfill_or_archive_legacy_callbacks"
|
||||
) ? traceGapNextActionRaw : "observed";
|
||||
const traceGapRecoveryStatusRaw =
|
||||
summary.outbound_reply_markup_trace_ref_gap_recovery_status ?? "observed";
|
||||
const traceGapRecoveryStatusKey = (
|
||||
traceGapRecoveryStatusRaw === "not_needed" ||
|
||||
traceGapRecoveryStatusRaw === "recovered_after_gap" ||
|
||||
traceGapRecoveryStatusRaw === "no_recovery_signal"
|
||||
) ? traceGapRecoveryStatusRaw : "observed";
|
||||
const traceRefAfterGapFirst = formatShortDateTime(
|
||||
summary.outbound_reply_markup_trace_ref_after_gap_first_sent_at
|
||||
);
|
||||
const traceRefAfterGapLatest = formatShortDateTime(
|
||||
summary.outbound_reply_markup_trace_ref_after_gap_latest_sent_at
|
||||
);
|
||||
const snapshotClass = {
|
||||
captured: "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]",
|
||||
partial: "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]",
|
||||
@@ -2597,6 +2614,16 @@ function CallbackReplyAuditSummaryPanel({
|
||||
action: t(`traceGapNextActions.${traceGapNextActionKey}` as never),
|
||||
})}
|
||||
</p>
|
||||
<p className="mt-1 text-xs font-semibold leading-5 text-[#17602a]">
|
||||
{t("outboundReplyMarkupTraceRecovery", {
|
||||
status: t(
|
||||
`traceGapRecoveryStatuses.${traceGapRecoveryStatusKey}` as never
|
||||
),
|
||||
count: summary.outbound_reply_markup_trace_ref_after_gap_total ?? 0,
|
||||
first: traceRefAfterGapFirst,
|
||||
latest: traceRefAfterGapLatest,
|
||||
})}
|
||||
</p>
|
||||
<p className="mt-1 text-xs leading-5 text-[#5f5b52]">
|
||||
{t("outboundReplyMarkupIncidentFreshness", {
|
||||
recent1h:
|
||||
|
||||
@@ -21486,3 +21486,65 @@ GET /api/v1/health
|
||||
- KM governance:約 84.6%。
|
||||
- AI Provider lane visibility:約 92.2%。
|
||||
- 完整 AI 自動化管理產品化:約 98.05%。
|
||||
|
||||
### 2026-05-25 — T194 Callback trace gap recovery signal(pre-deploy)
|
||||
|
||||
**背景**:T193 已能把缺 trace refs 判讀為 `recent_backlog`,但 operator 還需要知道
|
||||
「最後一筆缺 trace 之後,新送出的 action cards 是否已經恢復 trace refs」。T194 不
|
||||
寫死 deploy cutoff,改用資料本身動態計算最後缺口後的 traced reply_markup 數量。
|
||||
|
||||
**完成變更**:
|
||||
|
||||
- `/api/v1/platform/runs/callback-replies` summary 新增:
|
||||
- `outbound_reply_markup_trace_ref_after_gap_total`
|
||||
- `outbound_reply_markup_trace_ref_after_gap_first_sent_at`
|
||||
- `outbound_reply_markup_trace_ref_after_gap_latest_sent_at`
|
||||
- `outbound_reply_markup_trace_ref_gap_recovery_status`
|
||||
- 判讀規則:
|
||||
- `not_needed`:沒有缺 trace refs。
|
||||
- `recovered_after_gap`:最後缺 trace 後,已有新的 traced reply_markup。
|
||||
- `no_recovery_signal`:仍缺 trace,且最後缺口後尚無 traced reply_markup。
|
||||
- AwoooP Runs / TG Callback Evidence 前端新增:
|
||||
- `缺 trace 復原訊號:{status};gap 後 traced {count};首筆 {first};最新 {latest}`
|
||||
|
||||
**local validation(完成)**:
|
||||
|
||||
```text
|
||||
python3 -m py_compile apps/api/src/services/platform_operator_service.py apps/api/src/api/v1/platform/operator_runs.py apps/api/tests/test_awooop_operator_timeline_labels.py
|
||||
jq empty apps/web/messages/zh-TW.json apps/web/messages/en.json
|
||||
PYTHONPATH=. DATABASE_URL='postgresql+asyncpg://test:test@localhost/test' /Users/ogt/.pyenv/shims/pytest apps/api/tests/test_awooop_operator_timeline_labels.py -q
|
||||
53 passed in 0.92s
|
||||
pnpm --dir apps/web exec tsc --noEmit --tsBuildInfoFile /tmp/awoooi-t194-tsconfig.tsbuildinfo
|
||||
pass
|
||||
pnpm --dir apps/web lint -- --file 'src/app/[locale]/awooop/runs/page.tsx'
|
||||
pass with pre-existing i18next/no-literal-string and unused icon warnings
|
||||
NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web run build
|
||||
pass; Sentry global-error / instrumentation-client warnings are pre-existing
|
||||
git diff --check
|
||||
pass
|
||||
```
|
||||
|
||||
**production SQL dry-run(完成,read-only, RLS context)**:
|
||||
|
||||
```text
|
||||
missing_trace_total = 417
|
||||
missing_trace_latest_sent_at = 2026-05-25 12:13:01.534615
|
||||
traced_after_gap_total = 9
|
||||
traced_after_gap_first_sent_at = 2026-05-25 12:24:35.809853
|
||||
traced_after_gap_latest_sent_at = 2026-05-25 13:32:04.893903
|
||||
recovery_status = recovered_after_gap
|
||||
```
|
||||
|
||||
**目前整體進度(pre-deploy)**:
|
||||
|
||||
- AwoooP 告警可觀測鏈:約 99.86%。
|
||||
- 低風險自動修復閉環:約 95.9%。
|
||||
- 前端 AI 自動化管理介面同步:約 99.76%。
|
||||
- Telegram outbound / callback DB coverage 可視化:約 99.86%。
|
||||
- callback / DB replayability:約 99.0%。
|
||||
- MCP / 自建 MCP 可視化:約 95.1%。
|
||||
- Sentry / SigNoz source correlation:約 94.5%。
|
||||
- Ansible / PlayBook 可視化:約 92.6%。
|
||||
- KM governance:約 84.6%。
|
||||
- AI Provider lane visibility:約 92.2%。
|
||||
- 完整 AI 自動化管理產品化:約 98.1%。
|
||||
|
||||
Reference in New Issue
Block a user