Files
awoooi/apps/api/src/services/budget_service.py
Your Name 7d92f0acd7
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / tests (push) Successful in 1m8s
CD Pipeline / build-and-deploy (push) Successful in 3m49s
CD Pipeline / post-deploy-checks (push) Successful in 1m25s
chore(rls): stage projects canary path
2026-05-12 21:25:24 +08:00

379 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""AwoooP Token Budget Hard Kill Service
=======================================
ADR-120: 三層 Hard Kill 防護架構
2026-05-04 ogt + Claude Sonnet 4.6Phase 2.6
防線:
1. Pre-call check呼叫前— Layer 1 Tenant + Layer 2 Platform + Layer 3 Emergency Kill
2. Post-call accounting呼叫後— 寫 budget_ledger + 更新 Redis cache
3. 告警閾值通知80% / 95% Telegram 告警)
注意Layer 0 Run budget 需要 awooop_run_statePhase 3 SAGA 實作後補加)
"""
from __future__ import annotations
import time
from decimal import Decimal
import structlog
from src.core.config import settings
logger = structlog.get_logger(__name__)
# ─────────────────────────────────────────────────────────────────────────────
# 告警閾值ADR-120 D4
# ─────────────────────────────────────────────────────────────────────────────
BUDGET_ALERT_THRESHOLDS = {
"warn": Decimal("0.80"),
"critical": Decimal("0.95"),
"hard_kill": Decimal("1.00"),
}
# Redis key 前綴
_EMERGENCY_KILL_KEY = "platform:budget:emergency_kill"
_TENANT_BUDGET_KEY_PREFIX = "budget:tenant:" # {project_id}:daily_used_usd
_PLATFORM_BUDGET_KEY = "budget:platform:daily_used_usd"
_BUDGET_CACHE_TTL = 300 # 5 分鐘,每次寫入後 refresh
class BudgetExhaustedError(Exception):
"""LLM call 被 hard kill 攔截"""
def __init__(self, error_code: str, message: str) -> None:
self.error_code = error_code
super().__init__(f"[{error_code}] {message}")
# ─────────────────────────────────────────────────────────────────────────────
# 費用計算(按模型定價估算)
# ─────────────────────────────────────────────────────────────────────────────
# USD per 1M tokensin + out
_COST_PER_MILLION_TOKENS: dict[str, tuple[float, float]] = {
# (prompt_per_M, completion_per_M)
"claude-opus-4-7": (15.0, 75.0),
"claude-sonnet-4-6": (3.0, 15.0),
"claude-haiku-4-5": (0.8, 4.0),
"gpt-4o": (5.0, 15.0),
"gpt-4o-mini": (0.15, 0.6),
"gemini-2.0-flash": (0.075, 0.3),
"deepseek-r1:14b": (0.0, 0.0), # local Ollama — 無費用
"qwen3:8b": (0.0, 0.0), # local Ollama — 無費用
}
_DEFAULT_COST_PER_M = (3.0, 15.0) # fallback → claude-sonnet
def estimate_cost(
prompt_tokens: int,
completion_tokens: int,
model: str,
) -> Decimal:
"""估算一次 LLM call 的費用USD"""
prompt_rate, completion_rate = _COST_PER_MILLION_TOKENS.get(
model, _DEFAULT_COST_PER_M
)
cost = (prompt_tokens / 1_000_000 * prompt_rate +
completion_tokens / 1_000_000 * completion_rate)
return Decimal(str(round(cost, 6)))
# ─────────────────────────────────────────────────────────────────────────────
# Pre-call Budget CheckADR-120 D2 防線 1
# ─────────────────────────────────────────────────────────────────────────────
async def check_budget_before_llm_call(
project_id: str,
model: str,
estimated_prompt_tokens: int = 4000,
*,
agent_id: str | None = None,
) -> None:
"""
LLM call 前的三層 budget check。
超出任一層預算 → 拋出 BudgetExhaustedError阻止 API call。
Redis 不可用時 fail-open不阻擋呼叫但記 warning
Args:
project_id: 租戶 ID
model: 模型名稱(用於費用估算)
estimated_prompt_tokens: 預估 prompt token 數(保守估計 × 1.5 已含在外)
"""
# Layer 3Emergency Kill Switch最優先
await check_emergency_kill()
# Local Ollama 模型無費用,跳過 Layer 1/2
if model in {"deepseek-r1:14b", "qwen3:8b"} or model.startswith("ollama/"):
return
estimated_cost = estimate_cost(estimated_prompt_tokens, 0, model)
# Layer 2Tenant Budget
await _check_tenant_budget(project_id, estimated_cost)
# Layer 1Platform Budget
await _check_platform_budget(estimated_cost)
async def check_emergency_kill() -> None:
"""Layer 3: Emergency Kill Switch — Redis key platform:budget:emergency_kill"""
try:
from src.core.redis_client import get_redis
redis = get_redis()
if await redis.exists(_EMERGENCY_KILL_KEY):
raise BudgetExhaustedError(
"E-BUDGET-004",
"Emergency kill switch activated — contact platform admin",
)
except BudgetExhaustedError:
raise
except Exception as exc:
logger.warning("budget_emergency_kill_check_failed", error=str(exc))
async def _check_tenant_budget(project_id: str, estimated_cost: Decimal) -> None:
"""Layer 2: Tenant BudgetRedis 快取 + awooop_projects.budget_limit_usd"""
try:
from src.core.redis_client import get_redis
redis = get_redis()
# 讀取 Tenant 每日已用金額
cache_key = f"{_TENANT_BUDGET_KEY_PREFIX}{project_id}"
used_raw = await redis.get(cache_key)
used_usd = Decimal(used_raw.decode() if isinstance(used_raw, bytes) else used_raw or "0")
# 讀取 Tenant 預算上限(從 awooop_projects 表)
limit_usd = await _get_tenant_budget_limit(project_id)
if limit_usd is None:
return # 無上限 → 放行
if used_usd + estimated_cost > limit_usd:
raise BudgetExhaustedError(
"E-BUDGET-002",
f"Tenant {project_id} budget exhausted: "
f"used ${used_usd:.4f} / ${limit_usd:.4f}",
)
# 告警閾值
usage_pct = (used_usd + estimated_cost) / limit_usd
if usage_pct >= BUDGET_ALERT_THRESHOLDS["critical"]:
logger.warning(
"budget_tenant_critical",
project_id=project_id,
usage_pct=float(usage_pct),
used_usd=float(used_usd),
limit_usd=float(limit_usd),
)
elif usage_pct >= BUDGET_ALERT_THRESHOLDS["warn"]:
logger.warning(
"budget_tenant_warn",
project_id=project_id,
usage_pct=float(usage_pct),
used_usd=float(used_usd),
limit_usd=float(limit_usd),
)
except BudgetExhaustedError:
raise
except Exception as exc:
logger.warning("budget_tenant_check_failed", project_id=project_id, error=str(exc))
async def _check_platform_budget(estimated_cost: Decimal) -> None:
"""Layer 1: Platform Budgetconfig 靜態上限 + Redis 累計)"""
platform_limit = getattr(settings, "PLATFORM_DAILY_BUDGET_USD", None)
if not platform_limit:
return # 未設定 → 放行
try:
from src.core.redis_client import get_redis
redis = get_redis()
used_raw = await redis.get(_PLATFORM_BUDGET_KEY)
used_usd = Decimal(used_raw.decode() if isinstance(used_raw, bytes) else used_raw or "0")
limit_usd = Decimal(str(platform_limit))
if used_usd + estimated_cost > limit_usd:
raise BudgetExhaustedError(
"E-BUDGET-003",
f"Platform budget exhausted: used ${used_usd:.4f} / ${limit_usd:.4f}"
"all LLM calls suspended",
)
except BudgetExhaustedError:
raise
except Exception as exc:
logger.warning("budget_platform_check_failed", error=str(exc))
# ─────────────────────────────────────────────────────────────────────────────
# Post-call AccountingADR-120 D2 防線 2
# ─────────────────────────────────────────────────────────────────────────────
async def record_token_usage(
*,
project_id: str,
model: str,
provider: str,
prompt_tokens: int,
completion_tokens: int,
agent_id: str | None = None,
run_id: str | None = None,
) -> Decimal:
"""
LLM call 完成後記帳。
1. 計算實際費用
2. INSERT budget_ledger
3. 更新 Redis budget cacheasync不阻擋回傳
4. 觸發告警閾值通知
Returns:
actual_cost_usd
"""
import asyncio
from uuid import UUID
actual_cost = estimate_cost(prompt_tokens, completion_tokens, model)
# 寫入 budget_ledger非阻擋
asyncio.create_task(
_write_budget_ledger(
project_id=project_id,
agent_id=agent_id,
run_id=UUID(run_id) if run_id else None,
model=model,
provider=provider,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cost_usd=actual_cost,
),
name="budget_ledger_write",
)
# 更新 Redis cache非阻擋
asyncio.create_task(
_update_budget_cache(project_id, actual_cost),
name="budget_cache_update",
)
logger.info(
"token_usage_recorded",
project_id=project_id,
model=model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cost_usd=float(actual_cost),
)
return actual_cost
async def _write_budget_ledger(
*,
project_id: str,
agent_id: str | None,
run_id, # UUID | None
model: str,
provider: str,
prompt_tokens: int,
completion_tokens: int,
cost_usd: Decimal,
) -> None:
"""INSERT budget_ledgerleWOOOgo: DB 寫入在 Service 層,非 Router"""
try:
from sqlalchemy import text
from src.db.base import get_db_context
async with get_db_context(project_id) as db:
await db.execute(
text("""
INSERT INTO budget_ledger
(project_id, agent_id, run_id, model, provider,
prompt_tokens, completion_tokens, cost_usd)
VALUES
(:project_id, :agent_id, :run_id, :model, :provider,
:prompt_tokens, :completion_tokens, :cost_usd)
"""),
{
"project_id": project_id,
"agent_id": agent_id,
"run_id": run_id,
"model": model,
"provider": provider,
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"cost_usd": cost_usd,
},
)
except Exception as exc:
logger.warning("budget_ledger_write_failed", project_id=project_id, error=str(exc))
async def _update_budget_cache(project_id: str, cost: Decimal) -> None:
"""用 Redis INCRBYFLOAT 更新 Tenant + Platform daily budget cache"""
try:
from src.core.redis_client import get_redis
redis = get_redis()
cost_f = float(cost)
# Tenant daily budget
tenant_key = f"{_TENANT_BUDGET_KEY_PREFIX}{project_id}"
await redis.incrbyfloat(tenant_key, cost_f)
await redis.expire(tenant_key, 86400) # 24h TTL每日重置
# Platform daily budget
await redis.incrbyfloat(_PLATFORM_BUDGET_KEY, cost_f)
await redis.expire(_PLATFORM_BUDGET_KEY, 86400)
except Exception as exc:
logger.warning("budget_cache_update_failed", project_id=project_id, error=str(exc))
# ─────────────────────────────────────────────────────────────────────────────
# Helper從 DB 讀取 Tenant budget limit
# ─────────────────────────────────────────────────────────────────────────────
async def _get_tenant_budget_limit(project_id: str) -> Decimal | None:
"""從 awooop_projects.budget_limit_usd 讀取 Tenant 每日上限(允許 NULL = 無上限)"""
try:
from sqlalchemy import text
from src.db.base import get_db_context
async with get_db_context(project_id) as db:
row = await db.execute(
text("SELECT budget_limit_usd FROM awooop_projects WHERE project_id = :pid"),
{"pid": project_id},
)
result = row.scalar_one_or_none()
return Decimal(str(result)) if result is not None else None
except Exception as exc:
logger.warning("get_tenant_budget_limit_failed", project_id=project_id, error=str(exc))
return None
# ─────────────────────────────────────────────────────────────────────────────
# Emergency Kill Switch 管理Admin 工具)
# ─────────────────────────────────────────────────────────────────────────────
async def activate_emergency_kill(reason: str = "") -> None:
"""啟動緊急停機 — SET platform:budget:emergency_kill"""
from src.core.redis_client import get_redis
redis = get_redis()
await redis.set(_EMERGENCY_KILL_KEY, reason or "activated", ex=86400 * 7)
logger.warning("budget_emergency_kill_activated", reason=reason)
async def deactivate_emergency_kill() -> None:
"""解除緊急停機"""
from src.core.redis_client import get_redis
redis = get_redis()
await redis.delete(_EMERGENCY_KILL_KEY)
logger.info("budget_emergency_kill_deactivated")
async def is_emergency_kill_active() -> bool:
"""查詢緊急停機狀態"""
try:
from src.core.redis_client import get_redis
redis = get_redis()
return bool(await redis.exists(_EMERGENCY_KILL_KEY))
except Exception:
return False