Files
awoooi/apps/api/src/workers/platform_worker.py
Your Name 8629ac709b
Some checks failed
run-migration / migrate (push) Failing after 59s
Code Review / ai-code-review (push) Successful in 1m8s
Type Sync Check / check-type-sync (push) Successful in 2m27s
feat(awooop): Phase 1-8 完整實作 — AwoooP Agent Platform 六平面架構
## Phase 1-3: Control Plane + Contract System
- awooop_phase1_control_plane_2026-05-04.sql: 12 張核心表 + RLS
- awooop_phase1_batch1_rls_2026-05-04.sql: 全部 FORCE RLS + GRANT
- packages/awooop-contracts/: 六合約 JSON Schema + golden fixtures
- src/models/awooop_contracts.py: Pydantic v2 contract models(extra=forbid)
- src/repositories/contract_repository.py: contract lifecycle(draft→published→active)
- src/services/contract_service.py: HMAC publish sig + Redis multi-sig activate
- src/services/schema_validator.py: LLM output validator(retry×3, E-SCHEMA-001)

## Phase 2: Tenant Isolation
- awooop_phase2_budget_ledger_2026-05-04.sql: budget_ledger + RLS
- src/services/budget_service.py: Token Budget Hard Kill 三層防線
- src/core/context.py: PROJECT_ID ContextVar(31 background loop 自動繼承)
- src/db/base.py + models.py: project_id 欄位 + RLS set_config 注入
- src/hermes/nl_gateway.py: project_id Redis key 前綴(Phase A 雙寫)
- src/services/anomaly_counter.py: per-project 改造(Phase A fallback)

## Phase 4: Platform Shell in Shadow Mode
- awooop_phase4_run_state_2026-05-04.sql: run_state + step_journal + idempotency
- src/services/run_state_machine.py: 8-state FSM + SKIP LOCKED + stale reaper
- src/services/platform_runtime.py: UUID v7 + W3C trace_id + shadow_execute
- src/services/audit_sink.py: PII/secret redaction 9 patterns
- src/api/v1/platform/runs.py: POST/GET /v1/platform/runs(Router→Service 架構)
- src/workers/platform_worker.py: SKIP LOCKED worker + heartbeat + reaper loop
- src/main.py: platform router + lifespan worker start/stop

## Phase 5: MCP Gateway 五閘門
- awooop_phase5_mcp_gateway_2026-05-04.sql: 4 表 + RLS
- src/plugins/mcp/gateway.py: McpGateway(Gate 1~5, E-MCP-GATE-001~009)
- src/plugins/mcp/redaction_middleware.py: 雙層 redaction + 16K 截斷
- src/plugins/mcp/registry.py: __provider name mangling(ADR-116)
- src/plugins/mcp/credential_resolver.py: k8s secret ref 解析
- tests/test_mcp_credential_isolation.py: 10 個迴歸測試(secret leak 防再現)

## Phase 6-8: EwoooC + Channel Hub + Approval Token
- awooop_phase6_ewoooc_onboarding_2026-05-04.sql: ewoooc tenant + 4 read-only MCP tools
- awooop_phase7_channel_hub_2026-05-04.sql: conversation_event + outbound_message
- src/services/provider_proxy.py: ProviderProxy + PlatformEnvelope(ADR-115)
- src/services/channel_hub.py: Telegram inbound mirror + Progressive Feedback(30s)
- src/services/awooop_approval_token.py: HS256 + jti NX replay 防護 + suggest mode

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-04 19:31:53 +08:00

197 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Platform Worker
================
AwoooP Phase 4: SKIP LOCKED worker + stale run reaperADR-114
2026-05-04 ogt + Claude Sonnet 4.6
功能:
1. Worker Loop以 SKIP LOCKED 從 awooop_run_state 取 PENDING run 並執行
2. Stale Run Reaper每 60 秒掃描 lease 過期的 RUNNING run
3. Shadow Mode Enforcer所有 Phase 4 run 強制 is_shadow=True
Worker 設計:
- 啟動時以 asyncio.create_task 掛入 main.py lifespan
- 多個 worker 安全並行SKIP LOCKED 保證每 run 只被一個 worker 取得)
- Heartbeat 每 15 秒更新 lease防 stale reaper 誤殺)
- 優雅停機:收到 stop signal 後完成當前 run 再退出
與 legacy 的關係:
- 完全獨立,不碰任何既有 signal_worker.py / aider_event_processor.py
- 只處理 awooop_run_state 表legacy signal 不寫入此表)
"""
from __future__ import annotations
import asyncio
from datetime import datetime, timezone
import structlog
from src.services.platform_runtime import shadow_execute
from src.services.run_state_machine import (
HEARTBEAT_INTERVAL_SECONDS,
STALE_REAPER_INTERVAL_SECONDS,
acquire_pending_run,
heartbeat,
reap_stale_runs,
)
logger = structlog.get_logger(__name__)
# Phase 4 固定處理 awoooi 租戶Phase 6+ 改為多租戶掃描)
_DEFAULT_PROJECT_ID = "awoooi"
_WORKER_POLL_INTERVAL_SECONDS = 5 # 無任務時的等待間隔
_WORKER_CONCURRENCY = 2 # 同時最多幾個 run 並行
class PlatformWorker:
"""
Platform WorkerSKIP LOCKED + shadow execution + stale reaper。
Usage在 main.py lifespan 中):
worker = PlatformWorker()
asyncio.create_task(worker.run_loop())
asyncio.create_task(worker.reaper_loop())
"""
def __init__(self, project_id: str = _DEFAULT_PROJECT_ID) -> None:
self.project_id = project_id
self._stop_event = asyncio.Event()
self._active_runs: set[str] = set()
def stop(self) -> None:
"""優雅停機信號"""
self._stop_event.set()
async def run_loop(self) -> None:
"""
主 worker loop
1. 取一筆 PENDING runSKIP LOCKED
2. 執行 shadow_execute不產生 user response
3. Heartbeat每 15 秒)
4. 等待 5 秒後重新掃描
"""
logger.info("platform_worker_started", project_id=self.project_id)
while not self._stop_event.is_set():
try:
# 控制並行度
if len(self._active_runs) >= _WORKER_CONCURRENCY:
await asyncio.sleep(1)
continue
run = await acquire_pending_run(self.project_id)
if run is None:
await asyncio.sleep(_WORKER_POLL_INTERVAL_SECONDS)
continue
run_id_str = str(run.run_id)
self._active_runs.add(run_id_str)
# 每個 run 獨立 task不阻塞 loop
asyncio.create_task(
self._execute_with_heartbeat(run),
name=f"platform_run_{run_id_str[:8]}",
)
except asyncio.CancelledError:
break
except Exception as exc:
logger.exception("platform_worker_loop_error", error=str(exc))
await asyncio.sleep(_WORKER_POLL_INTERVAL_SECONDS)
logger.info("platform_worker_stopped", project_id=self.project_id)
async def _execute_with_heartbeat(self, run: object) -> None:
"""
在 shadow_execute 執行期間,同步 heartbeat 防 stale reaper 誤殺。
"""
from src.db.awooop_models import AwoooPRunState
assert isinstance(run, AwoooPRunState)
run_id_str = str(run.run_id)
# Heartbeat task每 15 秒更新 lease
heartbeat_task = asyncio.create_task(
self._heartbeat_loop(run.run_id, self.project_id),
name=f"heartbeat_{run_id_str[:8]}",
)
try:
await shadow_execute(run)
except Exception as exc:
logger.exception(
"platform_run_execution_error",
run_id=run_id_str,
error=str(exc),
)
finally:
heartbeat_task.cancel()
self._active_runs.discard(run_id_str)
async def _heartbeat_loop(self, run_id: object, project_id: str) -> None:
"""每 HEARTBEAT_INTERVAL_SECONDS 秒更新 lease直到被 cancel"""
import uuid as _uuid
while True:
await asyncio.sleep(HEARTBEAT_INTERVAL_SECONDS)
try:
await heartbeat(run_id, project_id) # type: ignore[arg-type]
except Exception as exc:
logger.warning(
"platform_heartbeat_failed",
run_id=str(run_id),
error=str(exc),
)
async def reaper_loop(self) -> None:
"""
Stale run reaper每 60 秒掃描 lease 過期的 RUNNING run。
lease < NOW() + attempt < max → PENDINGretry
lease < NOW() + attempt >= max → FAILED(E-RUN-002)
"""
logger.info("stale_run_reaper_started", project_id=self.project_id)
while not self._stop_event.is_set():
try:
await asyncio.sleep(STALE_REAPER_INTERVAL_SECONDS)
reaped = await reap_stale_runs(self.project_id)
if reaped:
logger.info(
"stale_run_reaper_cycle",
project_id=self.project_id,
reaped=reaped,
ts=datetime.now(timezone.utc).isoformat(),
)
except asyncio.CancelledError:
break
except Exception as exc:
logger.exception("stale_run_reaper_error", error=str(exc))
logger.info("stale_run_reaper_stopped", project_id=self.project_id)
# ─────────────────────────────────────────────────────────────────────────────
# Singleton掛入 lifespan 用)
# ─────────────────────────────────────────────────────────────────────────────
_platform_worker: PlatformWorker | None = None
def get_platform_worker() -> PlatformWorker:
global _platform_worker
if _platform_worker is None:
_platform_worker = PlatformWorker()
return _platform_worker
async def start_platform_worker() -> None:
"""在 main.py lifespan 中呼叫此函數啟動 worker"""
worker = get_platform_worker()
asyncio.create_task(worker.run_loop(), name="platform_worker_run_loop")
asyncio.create_task(worker.reaper_loop(), name="platform_worker_reaper_loop")
logger.info("platform_worker_tasks_started")
async def stop_platform_worker() -> None:
"""在 main.py lifespan 關閉時呼叫"""
worker = get_platform_worker()
worker.stop()
logger.info("platform_worker_stop_requested")