Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
1. aiderw: session_end 補 model+cwd (AI Router feedback loop 修通) 2. repository: model_stats_since SQL 改 COALESCE(session_end, session_start) model 3. aider_event_service: classify_severity 移除 error_count 觸發告警(防假陽性) 4. worker: run_aider_event_processor_loop 包 proc.start() try/except(防靜默崩潰) 2026-04-20 @ Asia/Taipei
136 lines
4.6 KiB
Python
136 lines
4.6 KiB
Python
# aider_event_service | 2026-04-20 @ Asia/Taipei
|
||
"""aider event 分類 + 轉成 awoooi signal_data 給 IncidentService。
|
||
|
||
設計原則:
|
||
- 不重做 dedup — 既有 IncidentService.create_incident_from_signal 已有 3min fingerprint debounce
|
||
- 不做 pattern extract — Task A8 ai_router 會直接從 aider_event_repository 聚合
|
||
- 純函式為主,副作用(建 incident)由 caller(A7 processor job)管理
|
||
- Redis stream 推送 (Task A6):轉接 Router 層
|
||
"""
|
||
from __future__ import annotations
|
||
import os
|
||
from typing import Any
|
||
from src.models.aider import AiderEventIn, AiderBatchIn
|
||
from src.utils.secret_redactor import redact
|
||
|
||
|
||
# ---- 分類 ----
|
||
|
||
def classify_severity(ev: AiderEventIn) -> str | None:
|
||
"""回傳對應 awoooi Signal 的 severity 字串(_parse_severity 會轉 P0-P3),
|
||
None = 不該建 incident。
|
||
映射:
|
||
error → warning (P2)
|
||
silent_timeout → info (P3)
|
||
session_end + nonzero exit + error_count>=3 → high (P1)
|
||
session_end + nonzero exit → warning (P2)
|
||
其他 → None
|
||
"""
|
||
t = ev.type
|
||
if t == "error":
|
||
return "warning"
|
||
if t == "silent_timeout":
|
||
return "info"
|
||
if t == "session_end":
|
||
exit_code = ev.payload.get("exit_code", 0)
|
||
err_count = ev.payload.get("error_count", 0)
|
||
if exit_code != 0 and err_count >= 3:
|
||
return "high"
|
||
if exit_code != 0:
|
||
return "warning"
|
||
return None
|
||
|
||
|
||
def should_create_incident(ev: AiderEventIn) -> bool:
|
||
return classify_severity(ev) is not None
|
||
|
||
|
||
# ---- signal_data 構造 ----
|
||
|
||
def build_signal_data(ev: AiderEventIn) -> dict[str, Any] | None:
|
||
"""把 AiderEventIn 轉成 IncidentService.create_incident_from_signal 所需 dict。
|
||
不該建 incident 的 event(session_start 等)回傳 None。
|
||
|
||
Fingerprint 設計:'aider:{session_id}:{type}' — 讓既有 3min debounce 自然發揮
|
||
(同 session 60s 內連續 5 個 error 會只建 1 個 incident)。
|
||
"""
|
||
sev = classify_severity(ev)
|
||
if sev is None:
|
||
return None
|
||
|
||
p = redact(ev.payload)
|
||
cwd = p.get("cwd") or ""
|
||
model = p.get("model") or "unknown"
|
||
repo = _repo_basename(cwd)
|
||
|
||
alert_name_map = {
|
||
"error": "AiderError",
|
||
"silent_timeout": "AiderSilentTimeout",
|
||
"session_end": "AiderSessionFailure",
|
||
}
|
||
alert_name = alert_name_map.get(ev.type, "AiderEvent")
|
||
|
||
return {
|
||
"alert_name": alert_name,
|
||
"severity": sev,
|
||
"source": "manual", # aider 不屬 prometheus/signoz/alertmanager/telegram
|
||
"fingerprint": f"aider:{ev.session_id}:{ev.type}",
|
||
"target": repo or "unknown",
|
||
"labels": {
|
||
"session_id": ev.session_id,
|
||
"host": ev.host,
|
||
"repo": repo,
|
||
"model": model,
|
||
"event_type": ev.type,
|
||
},
|
||
"annotations": {
|
||
"summary": f"[aider/{ev.type}] {repo} ({model})",
|
||
"description": _compact_desc(ev.type, p),
|
||
},
|
||
}
|
||
|
||
|
||
def _repo_basename(cwd: str) -> str:
|
||
"""/Users/ogt/awoooi → awoooi"""
|
||
if not cwd:
|
||
return ""
|
||
return cwd.rstrip("/").rsplit("/", 1)[-1]
|
||
|
||
|
||
def _compact_desc(event_type: str, payload: dict) -> str:
|
||
"""把 payload 壓成 200 字內描述,secret 已 redacted。"""
|
||
if event_type == "error":
|
||
kind = payload.get("kind", "unknown")
|
||
msg = payload.get("message", "")[:150]
|
||
return f"kind={kind} message={msg}"
|
||
if event_type == "silent_timeout":
|
||
return f"idle_sec={payload.get('idle_sec')} tail={payload.get('last_output_tail','')[:50]}"
|
||
if event_type == "session_end":
|
||
return (f"exit_code={payload.get('exit_code')} "
|
||
f"errors={payload.get('error_count',0)} "
|
||
f"duration={payload.get('duration_sec',0)}s "
|
||
f"tokens={payload.get('tokens_sent',0)}+{payload.get('tokens_received',0)}")
|
||
return str(payload)[:200]
|
||
|
||
|
||
# ---- Redis stream 推送 (Task A6) ----
|
||
|
||
async def push_aider_batch_to_stream(batch: AiderBatchIn) -> list[str]:
|
||
"""把 event batch 推到 Redis stream。回傳 stream ID 列表。"""
|
||
from src.core.redis_client import get_redis
|
||
|
||
stream_key = os.environ.get("AIDER_EVENTS_STREAM_KEY", "signals:aider:events")
|
||
r = get_redis()
|
||
|
||
ids = []
|
||
for ev in batch.events:
|
||
msg_id = await r.xadd(stream_key, {"payload": ev.model_dump_json()})
|
||
ids.append(_to_str(msg_id))
|
||
|
||
return ids
|
||
|
||
|
||
def _to_str(x) -> str:
|
||
"""轉成 str(相容 bytes 和 str 回傳值)。"""
|
||
return x.decode() if isinstance(x, bytes) else str(x)
|