379 lines
15 KiB
Python
379 lines
15 KiB
Python
"""AwoooP Token Budget Hard Kill Service
|
||
=======================================
|
||
ADR-120: 三層 Hard Kill 防護架構
|
||
2026-05-04 ogt + Claude Sonnet 4.6(Phase 2.6)
|
||
|
||
防線:
|
||
1. Pre-call check(呼叫前)— Layer 1 Tenant + Layer 2 Platform + Layer 3 Emergency Kill
|
||
2. Post-call accounting(呼叫後)— 寫 budget_ledger + 更新 Redis cache
|
||
3. 告警閾值通知(80% / 95% Telegram 告警)
|
||
|
||
注意:Layer 0 Run budget 需要 awooop_run_state(Phase 3 SAGA 實作後補加)
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import time
|
||
from decimal import Decimal
|
||
|
||
import structlog
|
||
|
||
from src.core.config import settings
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 告警閾值(ADR-120 D4)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
BUDGET_ALERT_THRESHOLDS = {
|
||
"warn": Decimal("0.80"),
|
||
"critical": Decimal("0.95"),
|
||
"hard_kill": Decimal("1.00"),
|
||
}
|
||
|
||
# Redis key 前綴
|
||
_EMERGENCY_KILL_KEY = "platform:budget:emergency_kill"
|
||
_TENANT_BUDGET_KEY_PREFIX = "budget:tenant:" # {project_id}:daily_used_usd
|
||
_PLATFORM_BUDGET_KEY = "budget:platform:daily_used_usd"
|
||
_BUDGET_CACHE_TTL = 300 # 5 分鐘,每次寫入後 refresh
|
||
|
||
|
||
class BudgetExhaustedError(Exception):
|
||
"""LLM call 被 hard kill 攔截"""
|
||
|
||
def __init__(self, error_code: str, message: str) -> None:
|
||
self.error_code = error_code
|
||
super().__init__(f"[{error_code}] {message}")
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 費用計算(按模型定價估算)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
# USD per 1M tokens(in + out)
|
||
_COST_PER_MILLION_TOKENS: dict[str, tuple[float, float]] = {
|
||
# (prompt_per_M, completion_per_M)
|
||
"claude-opus-4-7": (15.0, 75.0),
|
||
"claude-sonnet-4-6": (3.0, 15.0),
|
||
"claude-haiku-4-5": (0.8, 4.0),
|
||
"gpt-4o": (5.0, 15.0),
|
||
"gpt-4o-mini": (0.15, 0.6),
|
||
"gemini-2.0-flash": (0.075, 0.3),
|
||
"deepseek-r1:14b": (0.0, 0.0), # local Ollama — 無費用
|
||
"qwen3:8b": (0.0, 0.0), # local Ollama — 無費用
|
||
}
|
||
_DEFAULT_COST_PER_M = (3.0, 15.0) # fallback → claude-sonnet
|
||
|
||
|
||
def estimate_cost(
|
||
prompt_tokens: int,
|
||
completion_tokens: int,
|
||
model: str,
|
||
) -> Decimal:
|
||
"""估算一次 LLM call 的費用(USD)"""
|
||
prompt_rate, completion_rate = _COST_PER_MILLION_TOKENS.get(
|
||
model, _DEFAULT_COST_PER_M
|
||
)
|
||
cost = (prompt_tokens / 1_000_000 * prompt_rate +
|
||
completion_tokens / 1_000_000 * completion_rate)
|
||
return Decimal(str(round(cost, 6)))
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Pre-call Budget Check(ADR-120 D2 防線 1)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
async def check_budget_before_llm_call(
|
||
project_id: str,
|
||
model: str,
|
||
estimated_prompt_tokens: int = 4000,
|
||
*,
|
||
agent_id: str | None = None,
|
||
) -> None:
|
||
"""
|
||
LLM call 前的三層 budget check。
|
||
|
||
超出任一層預算 → 拋出 BudgetExhaustedError,阻止 API call。
|
||
Redis 不可用時 fail-open(不阻擋呼叫,但記 warning)。
|
||
|
||
Args:
|
||
project_id: 租戶 ID
|
||
model: 模型名稱(用於費用估算)
|
||
estimated_prompt_tokens: 預估 prompt token 數(保守估計 × 1.5 已含在外)
|
||
"""
|
||
# Layer 3:Emergency Kill Switch(最優先)
|
||
await check_emergency_kill()
|
||
|
||
# Local Ollama 模型無費用,跳過 Layer 1/2
|
||
if model in {"deepseek-r1:14b", "qwen3:8b"} or model.startswith("ollama/"):
|
||
return
|
||
|
||
estimated_cost = estimate_cost(estimated_prompt_tokens, 0, model)
|
||
|
||
# Layer 2:Tenant Budget
|
||
await _check_tenant_budget(project_id, estimated_cost)
|
||
|
||
# Layer 1:Platform Budget
|
||
await _check_platform_budget(estimated_cost)
|
||
|
||
|
||
async def check_emergency_kill() -> None:
|
||
"""Layer 3: Emergency Kill Switch — Redis key platform:budget:emergency_kill"""
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
if await redis.exists(_EMERGENCY_KILL_KEY):
|
||
raise BudgetExhaustedError(
|
||
"E-BUDGET-004",
|
||
"Emergency kill switch activated — contact platform admin",
|
||
)
|
||
except BudgetExhaustedError:
|
||
raise
|
||
except Exception as exc:
|
||
logger.warning("budget_emergency_kill_check_failed", error=str(exc))
|
||
|
||
|
||
async def _check_tenant_budget(project_id: str, estimated_cost: Decimal) -> None:
|
||
"""Layer 2: Tenant Budget(Redis 快取 + awooop_projects.budget_limit_usd)"""
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
|
||
# 讀取 Tenant 每日已用金額
|
||
cache_key = f"{_TENANT_BUDGET_KEY_PREFIX}{project_id}"
|
||
used_raw = await redis.get(cache_key)
|
||
used_usd = Decimal(used_raw.decode() if isinstance(used_raw, bytes) else used_raw or "0")
|
||
|
||
# 讀取 Tenant 預算上限(從 awooop_projects 表)
|
||
limit_usd = await _get_tenant_budget_limit(project_id)
|
||
if limit_usd is None:
|
||
return # 無上限 → 放行
|
||
|
||
if used_usd + estimated_cost > limit_usd:
|
||
raise BudgetExhaustedError(
|
||
"E-BUDGET-002",
|
||
f"Tenant {project_id} budget exhausted: "
|
||
f"used ${used_usd:.4f} / ${limit_usd:.4f}",
|
||
)
|
||
|
||
# 告警閾值
|
||
usage_pct = (used_usd + estimated_cost) / limit_usd
|
||
if usage_pct >= BUDGET_ALERT_THRESHOLDS["critical"]:
|
||
logger.warning(
|
||
"budget_tenant_critical",
|
||
project_id=project_id,
|
||
usage_pct=float(usage_pct),
|
||
used_usd=float(used_usd),
|
||
limit_usd=float(limit_usd),
|
||
)
|
||
elif usage_pct >= BUDGET_ALERT_THRESHOLDS["warn"]:
|
||
logger.warning(
|
||
"budget_tenant_warn",
|
||
project_id=project_id,
|
||
usage_pct=float(usage_pct),
|
||
used_usd=float(used_usd),
|
||
limit_usd=float(limit_usd),
|
||
)
|
||
|
||
except BudgetExhaustedError:
|
||
raise
|
||
except Exception as exc:
|
||
logger.warning("budget_tenant_check_failed", project_id=project_id, error=str(exc))
|
||
|
||
|
||
async def _check_platform_budget(estimated_cost: Decimal) -> None:
|
||
"""Layer 1: Platform Budget(config 靜態上限 + Redis 累計)"""
|
||
platform_limit = getattr(settings, "PLATFORM_DAILY_BUDGET_USD", None)
|
||
if not platform_limit:
|
||
return # 未設定 → 放行
|
||
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
used_raw = await redis.get(_PLATFORM_BUDGET_KEY)
|
||
used_usd = Decimal(used_raw.decode() if isinstance(used_raw, bytes) else used_raw or "0")
|
||
limit_usd = Decimal(str(platform_limit))
|
||
|
||
if used_usd + estimated_cost > limit_usd:
|
||
raise BudgetExhaustedError(
|
||
"E-BUDGET-003",
|
||
f"Platform budget exhausted: used ${used_usd:.4f} / ${limit_usd:.4f} — "
|
||
"all LLM calls suspended",
|
||
)
|
||
except BudgetExhaustedError:
|
||
raise
|
||
except Exception as exc:
|
||
logger.warning("budget_platform_check_failed", error=str(exc))
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Post-call Accounting(ADR-120 D2 防線 2)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
async def record_token_usage(
|
||
*,
|
||
project_id: str,
|
||
model: str,
|
||
provider: str,
|
||
prompt_tokens: int,
|
||
completion_tokens: int,
|
||
agent_id: str | None = None,
|
||
run_id: str | None = None,
|
||
) -> Decimal:
|
||
"""
|
||
LLM call 完成後記帳。
|
||
|
||
1. 計算實際費用
|
||
2. INSERT budget_ledger
|
||
3. 更新 Redis budget cache(async,不阻擋回傳)
|
||
4. 觸發告警閾值通知
|
||
|
||
Returns:
|
||
actual_cost_usd
|
||
"""
|
||
import asyncio
|
||
from uuid import UUID
|
||
|
||
actual_cost = estimate_cost(prompt_tokens, completion_tokens, model)
|
||
|
||
# 寫入 budget_ledger(非阻擋)
|
||
asyncio.create_task(
|
||
_write_budget_ledger(
|
||
project_id=project_id,
|
||
agent_id=agent_id,
|
||
run_id=UUID(run_id) if run_id else None,
|
||
model=model,
|
||
provider=provider,
|
||
prompt_tokens=prompt_tokens,
|
||
completion_tokens=completion_tokens,
|
||
cost_usd=actual_cost,
|
||
),
|
||
name="budget_ledger_write",
|
||
)
|
||
|
||
# 更新 Redis cache(非阻擋)
|
||
asyncio.create_task(
|
||
_update_budget_cache(project_id, actual_cost),
|
||
name="budget_cache_update",
|
||
)
|
||
|
||
logger.info(
|
||
"token_usage_recorded",
|
||
project_id=project_id,
|
||
model=model,
|
||
prompt_tokens=prompt_tokens,
|
||
completion_tokens=completion_tokens,
|
||
cost_usd=float(actual_cost),
|
||
)
|
||
return actual_cost
|
||
|
||
|
||
async def _write_budget_ledger(
|
||
*,
|
||
project_id: str,
|
||
agent_id: str | None,
|
||
run_id, # UUID | None
|
||
model: str,
|
||
provider: str,
|
||
prompt_tokens: int,
|
||
completion_tokens: int,
|
||
cost_usd: Decimal,
|
||
) -> None:
|
||
"""INSERT budget_ledger(leWOOOgo: DB 寫入在 Service 層,非 Router)"""
|
||
try:
|
||
from sqlalchemy import text
|
||
from src.db.base import get_db_context
|
||
async with get_db_context(project_id) as db:
|
||
await db.execute(
|
||
text("""
|
||
INSERT INTO budget_ledger
|
||
(project_id, agent_id, run_id, model, provider,
|
||
prompt_tokens, completion_tokens, cost_usd)
|
||
VALUES
|
||
(:project_id, :agent_id, :run_id, :model, :provider,
|
||
:prompt_tokens, :completion_tokens, :cost_usd)
|
||
"""),
|
||
{
|
||
"project_id": project_id,
|
||
"agent_id": agent_id,
|
||
"run_id": run_id,
|
||
"model": model,
|
||
"provider": provider,
|
||
"prompt_tokens": prompt_tokens,
|
||
"completion_tokens": completion_tokens,
|
||
"cost_usd": cost_usd,
|
||
},
|
||
)
|
||
except Exception as exc:
|
||
logger.warning("budget_ledger_write_failed", project_id=project_id, error=str(exc))
|
||
|
||
|
||
async def _update_budget_cache(project_id: str, cost: Decimal) -> None:
|
||
"""用 Redis INCRBYFLOAT 更新 Tenant + Platform daily budget cache"""
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
cost_f = float(cost)
|
||
|
||
# Tenant daily budget
|
||
tenant_key = f"{_TENANT_BUDGET_KEY_PREFIX}{project_id}"
|
||
await redis.incrbyfloat(tenant_key, cost_f)
|
||
await redis.expire(tenant_key, 86400) # 24h TTL(每日重置)
|
||
|
||
# Platform daily budget
|
||
await redis.incrbyfloat(_PLATFORM_BUDGET_KEY, cost_f)
|
||
await redis.expire(_PLATFORM_BUDGET_KEY, 86400)
|
||
|
||
except Exception as exc:
|
||
logger.warning("budget_cache_update_failed", project_id=project_id, error=str(exc))
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Helper:從 DB 讀取 Tenant budget limit
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
async def _get_tenant_budget_limit(project_id: str) -> Decimal | None:
|
||
"""從 awooop_projects.budget_limit_usd 讀取 Tenant 每日上限(允許 NULL = 無上限)"""
|
||
try:
|
||
from sqlalchemy import text
|
||
from src.db.base import get_db_context
|
||
async with get_db_context(project_id) as db:
|
||
row = await db.execute(
|
||
text("SELECT budget_limit_usd FROM awooop_projects WHERE project_id = :pid"),
|
||
{"pid": project_id},
|
||
)
|
||
result = row.scalar_one_or_none()
|
||
return Decimal(str(result)) if result is not None else None
|
||
except Exception as exc:
|
||
logger.warning("get_tenant_budget_limit_failed", project_id=project_id, error=str(exc))
|
||
return None
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Emergency Kill Switch 管理(Admin 工具)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
async def activate_emergency_kill(reason: str = "") -> None:
|
||
"""啟動緊急停機 — SET platform:budget:emergency_kill"""
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
await redis.set(_EMERGENCY_KILL_KEY, reason or "activated", ex=86400 * 7)
|
||
logger.warning("budget_emergency_kill_activated", reason=reason)
|
||
|
||
|
||
async def deactivate_emergency_kill() -> None:
|
||
"""解除緊急停機"""
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
await redis.delete(_EMERGENCY_KILL_KEY)
|
||
logger.info("budget_emergency_kill_deactivated")
|
||
|
||
|
||
async def is_emergency_kill_active() -> bool:
|
||
"""查詢緊急停機狀態"""
|
||
try:
|
||
from src.core.redis_client import get_redis
|
||
redis = get_redis()
|
||
return bool(await redis.exists(_EMERGENCY_KILL_KEY))
|
||
except Exception:
|
||
return False
|