216 lines
7.9 KiB
Python
216 lines
7.9 KiB
Python
# aider_event_processor_job | 2026-04-20 @ Asia/Taipei
|
||
"""aider-watch 事件處理器
|
||
|
||
消費 Redis stream `signals:aider:events` (由 /api/v1/aider/events webhook 推入):
|
||
1. 解析 AiderEventIn
|
||
2. 若應該觸發 incident → build_signal_data → IncidentEngine.process_signal
|
||
3. 寫入 aider_events 表(含 incident_id 關聯)
|
||
4. XACK
|
||
|
||
仿 workers/signal_worker.py,但簡化(無 Active Sweeper)。
|
||
|
||
@author Claude Code
|
||
@date 2026-04-20
|
||
"""
|
||
from __future__ import annotations
|
||
import asyncio
|
||
import os
|
||
import socket
|
||
from typing import Any
|
||
|
||
import structlog
|
||
|
||
from src.core.redis_client import get_redis, get_worker_redis, init_worker_redis_pool
|
||
from src.db.base import get_db_context
|
||
from src.models.aider import AiderEventIn
|
||
from src.repositories.aider_event_repository import AiderEventRepository
|
||
from src.services.aider_event_service import build_signal_data, should_create_incident
|
||
from src.services.incident_engine import get_incident_engine
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
# =========================================================================
|
||
# Constants
|
||
# =========================================================================
|
||
|
||
STREAM_KEY_DEFAULT = "signals:aider:events"
|
||
CONSUMER_GROUP = "aider_processors"
|
||
CONSUMER_NAME = f"aider_{socket.gethostname()}"
|
||
BATCH_SIZE = 10
|
||
BLOCK_MS = 5000
|
||
GRACEFUL_SHUTDOWN_TIMEOUT_S = 30
|
||
|
||
|
||
def _stream_key() -> str:
|
||
"""取得 Redis stream key,可覆蓋環境變數。"""
|
||
return os.environ.get("AIDER_EVENTS_STREAM_KEY", STREAM_KEY_DEFAULT)
|
||
|
||
|
||
# =========================================================================
|
||
# Processor
|
||
# =========================================================================
|
||
|
||
class AiderEventProcessor:
|
||
"""Redis Streams consumer for aider events."""
|
||
|
||
def __init__(self) -> None:
|
||
self._running = False
|
||
self._task: asyncio.Task | None = None
|
||
|
||
async def _ensure_consumer_group(self) -> None:
|
||
"""確保 Consumer Group 存在。"""
|
||
r = get_redis()
|
||
try:
|
||
await r.xgroup_create(_stream_key(), CONSUMER_GROUP,
|
||
id="0", mkstream=True)
|
||
logger.info("aider_consumer_group_created",
|
||
stream=_stream_key(), group=CONSUMER_GROUP)
|
||
except Exception as e:
|
||
if "BUSYGROUP" in str(e):
|
||
logger.debug("aider_consumer_group_exists", group=CONSUMER_GROUP)
|
||
else:
|
||
raise
|
||
|
||
async def start(self) -> None:
|
||
"""啟動消費循環。"""
|
||
if self._running:
|
||
logger.warning("aider_processor_already_running")
|
||
return
|
||
await init_worker_redis_pool() # 確保 worker 專屬長連線池就緒(冪等)
|
||
await self._ensure_consumer_group()
|
||
self._running = True
|
||
self._task = asyncio.create_task(self._consume_loop())
|
||
logger.info("aider_processor_started",
|
||
stream=_stream_key(), group=CONSUMER_GROUP, consumer=CONSUMER_NAME)
|
||
|
||
async def stop(self) -> None:
|
||
"""優雅關閉。"""
|
||
if not self._running:
|
||
return
|
||
self._running = False
|
||
if self._task:
|
||
try:
|
||
await asyncio.wait_for(self._task, timeout=GRACEFUL_SHUTDOWN_TIMEOUT_S)
|
||
except TimeoutError:
|
||
self._task.cancel()
|
||
except asyncio.CancelledError:
|
||
pass
|
||
logger.info("aider_processor_stopped")
|
||
|
||
async def _consume_loop(self) -> None:
|
||
"""主消費循環。"""
|
||
r = get_worker_redis()
|
||
stream_key = _stream_key()
|
||
while self._running:
|
||
try:
|
||
messages = await r.xreadgroup(
|
||
groupname=CONSUMER_GROUP, consumername=CONSUMER_NAME,
|
||
streams={stream_key: ">"},
|
||
count=BATCH_SIZE, block=BLOCK_MS,
|
||
)
|
||
if not messages:
|
||
continue
|
||
for _stream_name, entries in messages:
|
||
for message_id, data in entries:
|
||
await self._process_one(stream_key, message_id, data)
|
||
except asyncio.CancelledError:
|
||
break
|
||
except Exception as e:
|
||
logger.exception("aider_processor_loop_error", error=str(e))
|
||
await asyncio.sleep(1.0)
|
||
|
||
async def _process_one(
|
||
self, stream_key: str, msg_id: Any, data: dict, _session_factory=None
|
||
) -> None:
|
||
"""處理單筆 message:parse → (maybe) incident → DB write → ACK。
|
||
_session_factory: 可注入測試用 factory;production 預設使用 get_db_context() 設定 RLS context。
|
||
"""
|
||
try:
|
||
raw = data.get(b"payload") or data.get("payload")
|
||
if isinstance(raw, bytes):
|
||
raw = raw.decode()
|
||
ev = AiderEventIn.model_validate_json(raw)
|
||
except Exception:
|
||
logger.exception("aider_processor_parse_failed", msg_id=msg_id)
|
||
# Parse 失敗不重試(bad payload),直接 ACK 避免卡 pending
|
||
await self._ack(stream_key, msg_id)
|
||
return
|
||
|
||
incident_id: str | None = None
|
||
if should_create_incident(ev):
|
||
try:
|
||
sig_data = build_signal_data(ev)
|
||
if sig_data is not None:
|
||
engine = get_incident_engine()
|
||
incident = await engine.process_signal(sig_data)
|
||
if incident:
|
||
incident_id = getattr(incident, "incident_id", None)
|
||
except Exception:
|
||
logger.exception("aider_processor_incident_failed",
|
||
session_id=ev.session_id, type=ev.type)
|
||
# 不中斷 — 即使 incident 失敗,event 仍要持久化
|
||
|
||
try:
|
||
if _session_factory is None:
|
||
async with get_db_context() as session:
|
||
repo = AiderEventRepository(session)
|
||
await repo.insert(
|
||
session_id=ev.session_id, ts=ev.ts, type_=ev.type,
|
||
host=ev.host, payload=ev.payload, incident_id=incident_id,
|
||
)
|
||
else:
|
||
session_factory = _session_factory
|
||
async with session_factory() as session:
|
||
repo = AiderEventRepository(session)
|
||
await repo.insert(
|
||
session_id=ev.session_id, ts=ev.ts, type_=ev.type,
|
||
host=ev.host, payload=ev.payload, incident_id=incident_id,
|
||
)
|
||
await session.commit()
|
||
except Exception:
|
||
logger.exception("aider_processor_db_write_failed",
|
||
session_id=ev.session_id)
|
||
# DB 失敗就不 ACK,靠 Redis pending list 保留重試
|
||
return
|
||
|
||
await self._ack(stream_key, msg_id)
|
||
|
||
async def _ack(self, stream_key: str, msg_id: Any) -> None:
|
||
"""確認訊息已處理。"""
|
||
try:
|
||
r = get_worker_redis()
|
||
await r.xack(stream_key, CONSUMER_GROUP, msg_id)
|
||
except Exception:
|
||
logger.exception("aider_processor_ack_failed", msg_id=msg_id)
|
||
|
||
|
||
# =========================================================================
|
||
# Lifespan entry (called from main.py in Task A9)
|
||
# =========================================================================
|
||
|
||
_instance: AiderEventProcessor | None = None
|
||
|
||
|
||
def get_aider_event_processor() -> AiderEventProcessor:
|
||
"""取得 AiderEventProcessor singleton。"""
|
||
global _instance
|
||
if _instance is None:
|
||
_instance = AiderEventProcessor()
|
||
return _instance
|
||
|
||
|
||
async def run_aider_event_processor_loop() -> None:
|
||
"""main.py lifespan 呼叫此函式。"""
|
||
proc = get_aider_event_processor()
|
||
try:
|
||
await proc.start()
|
||
except Exception:
|
||
logger.exception("aider_processor_start_failed")
|
||
return
|
||
try:
|
||
while proc._running:
|
||
await asyncio.sleep(60)
|
||
except asyncio.CancelledError:
|
||
await proc.stop()
|
||
raise
|