diff --git a/apps/api/src/services/awooop_truth_chain_service.py b/apps/api/src/services/awooop_truth_chain_service.py index 738b21aa..a084469a 100644 --- a/apps/api/src/services/awooop_truth_chain_service.py +++ b/apps/api/src/services/awooop_truth_chain_service.py @@ -7,6 +7,7 @@ Telegram cards can be audited without guessing which subsystem owns the truth. from __future__ import annotations +import asyncio import json from datetime import UTC, date, datetime, timedelta from decimal import Decimal @@ -24,6 +25,7 @@ logger = structlog.get_logger(__name__) _MAX_ROWS = 100 _JSON_TEXT_FIELDS = {"gate_result", "source_envelope"} +_QUALITY_SUMMARY_CONCURRENCY = 8 def _clean(value: Any) -> Any: @@ -1477,17 +1479,25 @@ async def fetch_automation_quality_summary( }, ) - records: list[dict[str, Any]] = [] - for incident in incidents: + semaphore = asyncio.Semaphore(_QUALITY_SUMMARY_CONCURRENCY) + + async def _quality_record(incident: dict[str, Any]) -> dict[str, Any] | None: incident_id = str(incident.get("incident_id") or "") if not incident_id: - continue - truth_chain = await fetch_truth_chain(source_id=incident_id, project_id=project_id) - records.append({ + return None + async with semaphore: + truth_chain = await fetch_truth_chain(source_id=incident_id, project_id=project_id) + return { "incident": truth_chain.get("incident") or incident, "truth_status": truth_chain.get("truth_status") or {}, "automation_quality": truth_chain.get("automation_quality") or {}, - }) + } + + records = [ + record + for record in await asyncio.gather(*(_quality_record(incident) for incident in incidents)) + if record is not None + ] summary = summarize_automation_quality_records( project_id=project_id, diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index d28808e4..2f8eb3c4 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,34 @@ +## 2026-05-18 | T47 Truth-chain quality summary bounded concurrency + +**背景**:T46 前端導入 production claim 後,實測發現 `/api/v1/platform/truth-chain/quality/summary?limit=100` 超過 15 秒未回應,`limit=30` 約 7.3 秒才回應。這會讓 Operator Console 第一屏無法穩定顯示「能不能宣稱完整 AI 自動修復」,也會讓 Telegram / 前端 truth-chain 可見性打折。 + +**根因**: +- `fetch_automation_quality_summary()` 先查最近 incidents。 +- 接著逐筆 incident 串行呼叫完整 `fetch_truth_chain()`。 +- 每筆 truth-chain 都會聚合 incident / approval / evidence / MCP / automation / KM / timeline / outbound mirror;`limit=100` 等於 100 次完整聚合串行執行。 + +**修正**: +- `fetch_automation_quality_summary()` 改為 bounded concurrency。 +- 最多同時 8 筆 incident truth-chain 聚合,避免無限制打爆 DB,也避免 100 筆完全串行。 +- 不改 scoring、不改 production claim 判讀、不改 truth-chain schema。 + +**local verification**: +- `DATABASE_URL='sqlite+aiosqlite:///:memory:' /Users/ogt/awoooi/apps/api/.venv/bin/python -m pytest apps/api/tests/test_awooop_truth_chain_service.py -q`:24 passed。 +- `/Users/ogt/awoooi/apps/api/.venv/bin/ruff check --select E9,F821,F401 apps/api/src/services/awooop_truth_chain_service.py apps/api/tests/test_awooop_truth_chain_service.py`:pass。 +- `/Users/ogt/awoooi/apps/api/.venv/bin/python -m py_compile apps/api/src/services/awooop_truth_chain_service.py`:pass。 +- `git diff --check`:pass。 + +**目前整體進度**: +- Alertmanager 低風險自動修復主線:約 98%。 +- 完整 AI 自動化管理產品化:約 99%。 +- 前端 AI 自動化管理介面產品化:約 93%。 +- 告警詳情/歷史/主卡/前端 deep-link 可追溯:約 99%。 +- Truth-chain quality summary 即時可用性:約 85%(待 production 驗證 `limit=100`)。 +- Telegram approval / reject callback:約 97%。 +- AwoooP MCP 使用可見性:約 90%。 +- Token hygiene:約 65%。 +- CI/CD secret masking hygiene:約 60%。 + ## 2026-05-18 | T46 AwoooP Work Items 顯示 production claim / T44 / T45 **背景**:統帥要求已完成與正在推進的工作要能在前端頁面同步呈現,否則 Telegram、Run、Approval、MCP、PlayBook、KM 與修復狀態仍像分散黑盒。T45 已修 Telegram 詳情/歷史 callback 400 非致命化,T44 已修 repo 可控 CI secret env/with 泄漏面,但 `/awooop/work-items` 仍把 Telegram callback 顯示為推進中,且第一屏沒有直接回答「目前能不能宣稱完整 AI 自動修復」。