fix(awooop): parallelize quality summary truth-chain fetch
This commit is contained in:
@@ -7,6 +7,7 @@ Telegram cards can be audited without guessing which subsystem owns the truth.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
@@ -24,6 +25,7 @@ logger = structlog.get_logger(__name__)
|
||||
|
||||
_MAX_ROWS = 100
|
||||
_JSON_TEXT_FIELDS = {"gate_result", "source_envelope"}
|
||||
_QUALITY_SUMMARY_CONCURRENCY = 8
|
||||
|
||||
|
||||
def _clean(value: Any) -> Any:
|
||||
@@ -1477,17 +1479,25 @@ async def fetch_automation_quality_summary(
|
||||
},
|
||||
)
|
||||
|
||||
records: list[dict[str, Any]] = []
|
||||
for incident in incidents:
|
||||
semaphore = asyncio.Semaphore(_QUALITY_SUMMARY_CONCURRENCY)
|
||||
|
||||
async def _quality_record(incident: dict[str, Any]) -> dict[str, Any] | None:
|
||||
incident_id = str(incident.get("incident_id") or "")
|
||||
if not incident_id:
|
||||
continue
|
||||
truth_chain = await fetch_truth_chain(source_id=incident_id, project_id=project_id)
|
||||
records.append({
|
||||
return None
|
||||
async with semaphore:
|
||||
truth_chain = await fetch_truth_chain(source_id=incident_id, project_id=project_id)
|
||||
return {
|
||||
"incident": truth_chain.get("incident") or incident,
|
||||
"truth_status": truth_chain.get("truth_status") or {},
|
||||
"automation_quality": truth_chain.get("automation_quality") or {},
|
||||
})
|
||||
}
|
||||
|
||||
records = [
|
||||
record
|
||||
for record in await asyncio.gather(*(_quality_record(incident) for incident in incidents))
|
||||
if record is not None
|
||||
]
|
||||
|
||||
summary = summarize_automation_quality_records(
|
||||
project_id=project_id,
|
||||
|
||||
@@ -1,3 +1,34 @@
|
||||
## 2026-05-18 | T47 Truth-chain quality summary bounded concurrency
|
||||
|
||||
**背景**:T46 前端導入 production claim 後,實測發現 `/api/v1/platform/truth-chain/quality/summary?limit=100` 超過 15 秒未回應,`limit=30` 約 7.3 秒才回應。這會讓 Operator Console 第一屏無法穩定顯示「能不能宣稱完整 AI 自動修復」,也會讓 Telegram / 前端 truth-chain 可見性打折。
|
||||
|
||||
**根因**:
|
||||
- `fetch_automation_quality_summary()` 先查最近 incidents。
|
||||
- 接著逐筆 incident 串行呼叫完整 `fetch_truth_chain()`。
|
||||
- 每筆 truth-chain 都會聚合 incident / approval / evidence / MCP / automation / KM / timeline / outbound mirror;`limit=100` 等於 100 次完整聚合串行執行。
|
||||
|
||||
**修正**:
|
||||
- `fetch_automation_quality_summary()` 改為 bounded concurrency。
|
||||
- 最多同時 8 筆 incident truth-chain 聚合,避免無限制打爆 DB,也避免 100 筆完全串行。
|
||||
- 不改 scoring、不改 production claim 判讀、不改 truth-chain schema。
|
||||
|
||||
**local verification**:
|
||||
- `DATABASE_URL='sqlite+aiosqlite:///:memory:' /Users/ogt/awoooi/apps/api/.venv/bin/python -m pytest apps/api/tests/test_awooop_truth_chain_service.py -q`:24 passed。
|
||||
- `/Users/ogt/awoooi/apps/api/.venv/bin/ruff check --select E9,F821,F401 apps/api/src/services/awooop_truth_chain_service.py apps/api/tests/test_awooop_truth_chain_service.py`:pass。
|
||||
- `/Users/ogt/awoooi/apps/api/.venv/bin/python -m py_compile apps/api/src/services/awooop_truth_chain_service.py`:pass。
|
||||
- `git diff --check`:pass。
|
||||
|
||||
**目前整體進度**:
|
||||
- Alertmanager 低風險自動修復主線:約 98%。
|
||||
- 完整 AI 自動化管理產品化:約 99%。
|
||||
- 前端 AI 自動化管理介面產品化:約 93%。
|
||||
- 告警詳情/歷史/主卡/前端 deep-link 可追溯:約 99%。
|
||||
- Truth-chain quality summary 即時可用性:約 85%(待 production 驗證 `limit=100`)。
|
||||
- Telegram approval / reject callback:約 97%。
|
||||
- AwoooP MCP 使用可見性:約 90%。
|
||||
- Token hygiene:約 65%。
|
||||
- CI/CD secret masking hygiene:約 60%。
|
||||
|
||||
## 2026-05-18 | T46 AwoooP Work Items 顯示 production claim / T44 / T45
|
||||
|
||||
**背景**:統帥要求已完成與正在推進的工作要能在前端頁面同步呈現,否則 Telegram、Run、Approval、MCP、PlayBook、KM 與修復狀態仍像分散黑盒。T45 已修 Telegram 詳情/歷史 callback 400 非致命化,T44 已修 repo 可控 CI secret env/with 泄漏面,但 `/awooop/work-items` 仍把 Telegram callback 顯示為推進中,且第一屏沒有直接回答「目前能不能宣稱完整 AI 自動修復」。
|
||||
|
||||
Reference in New Issue
Block a user