676 lines
22 KiB
Python
676 lines
22 KiB
Python
"""
|
||
Token Counter Service - Phase 13.3 #88 AI Token Dashboard
|
||
=========================================================
|
||
Token 用量監控,整合 SignOz OTEL Metrics + Langfuse
|
||
|
||
功能:
|
||
- 記錄每次 LLM 呼叫的 input/output tokens
|
||
- 按 provider 分類統計
|
||
- 成本估算 (Gemini/Claude 有成本,Ollama 免費)
|
||
- 每日/每月 Token 預算監控
|
||
- 超標時通知切換到本地模型
|
||
|
||
SignOz 指標:
|
||
- llm.tokens.input (Counter) - 輸入 Token 數
|
||
- llm.tokens.output (Counter) - 輸出 Token 數
|
||
- llm.cost.usd (Counter) - 累計成本
|
||
- llm.latency.ms (Histogram) - 延遲分佈
|
||
- llm.requests.total (Counter) - 總請求數
|
||
- llm.requests.failed (Counter) - 失敗請求數
|
||
|
||
版本: v1.0
|
||
建立: 2026-03-26 14:30 (台北時區)
|
||
建立者: Claude Code
|
||
最後修改: 2026-03-26 14:30 (台北時區)
|
||
修改者: Claude Code
|
||
|
||
變更紀錄:
|
||
| 版本 | 日期 | 執行者 | 變更內容 |
|
||
|------|------|--------|----------|
|
||
| v1.0 | 2026-03-26 | Claude Code | Phase 13.3 #88 初始實作 |
|
||
"""
|
||
|
||
import time
|
||
from dataclasses import dataclass, field
|
||
from datetime import UTC, datetime
|
||
from typing import Protocol
|
||
|
||
import structlog
|
||
from opentelemetry import metrics
|
||
from opentelemetry.metrics import Counter, Histogram, Meter
|
||
|
||
from src.core.config import settings
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
# =============================================================================
|
||
# Constants - Cost Per 1K Tokens (USD)
|
||
# =============================================================================
|
||
|
||
# 成本定義 (from models.json)
|
||
COST_PER_1K_TOKENS = {
|
||
"ollama": 0.0, # 本地免費
|
||
"gemini": 0.001, # Gemini 1.5 Flash
|
||
"claude": 0.005, # Claude Haiku 4.5 conservative output-side estimate
|
||
}
|
||
|
||
# 預算閾值 (from models.json monitoring.alerts)
|
||
DAILY_COST_THRESHOLD_USD = 5.0
|
||
MONTHLY_COST_THRESHOLD_USD = 10.0
|
||
DAILY_TOKEN_BUDGET = {
|
||
"gemini": 100_000, # 每日 100K tokens
|
||
"claude": 50_000, # 每日 50K tokens
|
||
}
|
||
MONTHLY_TOKEN_BUDGET = {
|
||
"gemini": 2_000_000, # 每月 2M tokens
|
||
"claude": 500_000, # 每月 500K tokens
|
||
}
|
||
ALERT_THRESHOLD_PERCENT = 70 # 70% 預警
|
||
|
||
|
||
# =============================================================================
|
||
# Data Classes
|
||
# =============================================================================
|
||
|
||
|
||
@dataclass
|
||
class TokenUsage:
|
||
"""單次 LLM 呼叫的 Token 使用量"""
|
||
|
||
input_tokens: int
|
||
output_tokens: int
|
||
total_tokens: int = field(init=False)
|
||
provider: str
|
||
model: str
|
||
latency_ms: float = 0.0
|
||
success: bool = True
|
||
error_message: str | None = None
|
||
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
|
||
|
||
def __post_init__(self):
|
||
self.total_tokens = self.input_tokens + self.output_tokens
|
||
|
||
@property
|
||
def estimated_cost_usd(self) -> float:
|
||
"""估算成本 (USD)"""
|
||
cost_per_1k = COST_PER_1K_TOKENS.get(self.provider.lower(), 0.0)
|
||
return (self.total_tokens / 1000) * cost_per_1k
|
||
|
||
|
||
@dataclass
|
||
class ProviderStats:
|
||
"""Provider 統計"""
|
||
|
||
provider: str
|
||
total_input_tokens: int = 0
|
||
total_output_tokens: int = 0
|
||
total_requests: int = 0
|
||
failed_requests: int = 0
|
||
total_latency_ms: float = 0.0
|
||
total_cost_usd: float = 0.0
|
||
period_start: datetime = field(default_factory=lambda: datetime.now(UTC))
|
||
|
||
@property
|
||
def total_tokens(self) -> int:
|
||
return self.total_input_tokens + self.total_output_tokens
|
||
|
||
@property
|
||
def success_rate(self) -> float:
|
||
if self.total_requests == 0:
|
||
return 100.0
|
||
return ((self.total_requests - self.failed_requests) / self.total_requests) * 100
|
||
|
||
@property
|
||
def avg_latency_ms(self) -> float:
|
||
if self.total_requests == 0:
|
||
return 0.0
|
||
return self.total_latency_ms / self.total_requests
|
||
|
||
|
||
@dataclass
|
||
class BudgetStatus:
|
||
"""預算狀態"""
|
||
|
||
provider: str
|
||
daily_tokens_used: int
|
||
daily_tokens_budget: int
|
||
daily_cost_usd: float
|
||
monthly_tokens_used: int
|
||
monthly_tokens_budget: int
|
||
monthly_cost_usd: float
|
||
is_over_budget: bool = False
|
||
alert_triggered: bool = False
|
||
recommendation: str = ""
|
||
|
||
|
||
# =============================================================================
|
||
# Interface (Protocol for DI)
|
||
# =============================================================================
|
||
|
||
|
||
class ITokenCounter(Protocol):
|
||
"""Token Counter Interface"""
|
||
|
||
def record_usage(self, usage: TokenUsage) -> None:
|
||
"""記錄 Token 使用"""
|
||
...
|
||
|
||
def get_provider_stats(self, provider: str) -> ProviderStats:
|
||
"""取得 Provider 統計"""
|
||
...
|
||
|
||
def get_budget_status(self, provider: str) -> BudgetStatus:
|
||
"""取得預算狀態"""
|
||
...
|
||
|
||
def should_fallback_to_local(self, provider: str) -> tuple[bool, str]:
|
||
"""檢查是否應該 fallback 到本地模型"""
|
||
...
|
||
|
||
|
||
# =============================================================================
|
||
# Token Counter Implementation
|
||
# =============================================================================
|
||
|
||
|
||
class TokenCounter:
|
||
"""
|
||
Token 計數器 - OTEL Metrics + Langfuse 整合
|
||
|
||
使用 OpenTelemetry Metrics API 將指標送到 SignOz,
|
||
同時整合 Langfuse 記錄詳細的 LLM trace。
|
||
|
||
Usage:
|
||
counter = get_token_counter()
|
||
counter.record_usage(TokenUsage(
|
||
input_tokens=500,
|
||
output_tokens=200,
|
||
provider="ollama",
|
||
model="qwen2.5:7b-instruct",
|
||
latency_ms=1500,
|
||
))
|
||
"""
|
||
|
||
def __init__(self):
|
||
self._provider_stats: dict[str, ProviderStats] = {}
|
||
self._daily_stats: dict[str, ProviderStats] = {}
|
||
self._monthly_stats: dict[str, ProviderStats] = {}
|
||
self._last_daily_reset: datetime = datetime.now(UTC).replace(
|
||
hour=0, minute=0, second=0, microsecond=0
|
||
)
|
||
self._last_monthly_reset: datetime = datetime.now(UTC).replace(
|
||
day=1, hour=0, minute=0, second=0, microsecond=0
|
||
)
|
||
|
||
# OTEL Metrics 初始化
|
||
self._meter: Meter | None = None
|
||
self._input_tokens_counter: Counter | None = None
|
||
self._output_tokens_counter: Counter | None = None
|
||
self._cost_counter: Counter | None = None
|
||
self._latency_histogram: Histogram | None = None
|
||
self._request_counter: Counter | None = None
|
||
self._failed_counter: Counter | None = None
|
||
|
||
self._init_metrics()
|
||
|
||
def _init_metrics(self) -> None:
|
||
"""初始化 OTEL Metrics"""
|
||
if not settings.OTEL_ENABLED or settings.MOCK_MODE:
|
||
logger.info("otel_metrics_disabled", reason="OTEL_ENABLED=false or MOCK_MODE=true")
|
||
return
|
||
|
||
try:
|
||
# 取得 MeterProvider
|
||
self._meter = metrics.get_meter(
|
||
name="awoooi.llm",
|
||
version=settings.VERSION,
|
||
)
|
||
|
||
# 建立 Counters
|
||
self._input_tokens_counter = self._meter.create_counter(
|
||
name="llm.tokens.input",
|
||
description="LLM input tokens count",
|
||
unit="tokens",
|
||
)
|
||
|
||
self._output_tokens_counter = self._meter.create_counter(
|
||
name="llm.tokens.output",
|
||
description="LLM output tokens count",
|
||
unit="tokens",
|
||
)
|
||
|
||
self._cost_counter = self._meter.create_counter(
|
||
name="llm.cost.usd",
|
||
description="Estimated LLM cost in USD",
|
||
unit="USD",
|
||
)
|
||
|
||
self._request_counter = self._meter.create_counter(
|
||
name="llm.requests.total",
|
||
description="Total LLM requests",
|
||
unit="requests",
|
||
)
|
||
|
||
self._failed_counter = self._meter.create_counter(
|
||
name="llm.requests.failed",
|
||
description="Failed LLM requests",
|
||
unit="requests",
|
||
)
|
||
|
||
# 建立 Histogram (延遲分佈)
|
||
self._latency_histogram = self._meter.create_histogram(
|
||
name="llm.latency.ms",
|
||
description="LLM request latency in milliseconds",
|
||
unit="ms",
|
||
)
|
||
|
||
logger.info(
|
||
"otel_llm_metrics_initialized",
|
||
meter_name="awoooi.llm",
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.warning(
|
||
"otel_metrics_init_failed",
|
||
error=str(e),
|
||
)
|
||
|
||
def _reset_if_needed(self) -> None:
|
||
"""檢查並重置每日/每月統計"""
|
||
now = datetime.now(UTC)
|
||
|
||
# 每日重置
|
||
today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||
if today_start > self._last_daily_reset:
|
||
logger.info(
|
||
"daily_stats_reset",
|
||
previous_date=self._last_daily_reset.isoformat(),
|
||
)
|
||
self._daily_stats = {}
|
||
self._last_daily_reset = today_start
|
||
|
||
# 每月重置
|
||
month_start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||
if month_start > self._last_monthly_reset:
|
||
logger.info(
|
||
"monthly_stats_reset",
|
||
previous_month=self._last_monthly_reset.isoformat(),
|
||
)
|
||
self._monthly_stats = {}
|
||
self._last_monthly_reset = month_start
|
||
|
||
def _get_or_create_stats(
|
||
self, provider: str, stats_dict: dict[str, ProviderStats]
|
||
) -> ProviderStats:
|
||
"""取得或建立 Provider 統計"""
|
||
if provider not in stats_dict:
|
||
stats_dict[provider] = ProviderStats(provider=provider)
|
||
return stats_dict[provider]
|
||
|
||
def record_usage(self, usage: TokenUsage) -> None:
|
||
"""
|
||
記錄 Token 使用量
|
||
|
||
同時更新:
|
||
1. 內存統計 (總計/每日/每月)
|
||
2. OTEL Metrics (SignOz)
|
||
3. Langfuse (如果有 trace context)
|
||
"""
|
||
self._reset_if_needed()
|
||
|
||
provider = usage.provider.lower()
|
||
attributes = {
|
||
"provider": provider,
|
||
"model": usage.model,
|
||
"environment": settings.ENVIRONMENT,
|
||
}
|
||
|
||
# 更新內存統計
|
||
for stats_dict in [self._provider_stats, self._daily_stats, self._monthly_stats]:
|
||
stats = self._get_or_create_stats(provider, stats_dict)
|
||
stats.total_input_tokens += usage.input_tokens
|
||
stats.total_output_tokens += usage.output_tokens
|
||
stats.total_requests += 1
|
||
stats.total_latency_ms += usage.latency_ms
|
||
stats.total_cost_usd += usage.estimated_cost_usd
|
||
if not usage.success:
|
||
stats.failed_requests += 1
|
||
|
||
# 發送 OTEL Metrics
|
||
if self._input_tokens_counter:
|
||
self._input_tokens_counter.add(usage.input_tokens, attributes)
|
||
|
||
if self._output_tokens_counter:
|
||
self._output_tokens_counter.add(usage.output_tokens, attributes)
|
||
|
||
if self._cost_counter and usage.estimated_cost_usd > 0:
|
||
# Counter 只接受整數或 float,成本用 micro-USD (乘以 1,000,000)
|
||
# 或直接用 float
|
||
self._cost_counter.add(usage.estimated_cost_usd, attributes)
|
||
|
||
if self._request_counter:
|
||
self._request_counter.add(1, attributes)
|
||
|
||
if not usage.success and self._failed_counter:
|
||
self._failed_counter.add(1, attributes)
|
||
|
||
if self._latency_histogram and usage.latency_ms > 0:
|
||
self._latency_histogram.record(usage.latency_ms, attributes)
|
||
|
||
# 記錄日誌
|
||
logger.info(
|
||
"token_usage_recorded",
|
||
provider=provider,
|
||
model=usage.model,
|
||
input_tokens=usage.input_tokens,
|
||
output_tokens=usage.output_tokens,
|
||
total_tokens=usage.total_tokens,
|
||
latency_ms=round(usage.latency_ms, 2),
|
||
cost_usd=round(usage.estimated_cost_usd, 6),
|
||
success=usage.success,
|
||
)
|
||
|
||
# 檢查預算告警
|
||
self._check_budget_alert(provider)
|
||
|
||
def _check_budget_alert(self, provider: str) -> None:
|
||
"""檢查預算告警"""
|
||
status = self.get_budget_status(provider)
|
||
|
||
if status.alert_triggered:
|
||
logger.warning(
|
||
"llm_budget_alert",
|
||
provider=provider,
|
||
daily_usage_percent=round(
|
||
(status.daily_tokens_used / status.daily_tokens_budget * 100)
|
||
if status.daily_tokens_budget > 0
|
||
else 0,
|
||
1,
|
||
),
|
||
monthly_usage_percent=round(
|
||
(status.monthly_tokens_used / status.monthly_tokens_budget * 100)
|
||
if status.monthly_tokens_budget > 0
|
||
else 0,
|
||
1,
|
||
),
|
||
recommendation=status.recommendation,
|
||
)
|
||
|
||
if status.is_over_budget:
|
||
logger.error(
|
||
"llm_budget_exceeded",
|
||
provider=provider,
|
||
daily_tokens_used=status.daily_tokens_used,
|
||
monthly_tokens_used=status.monthly_tokens_used,
|
||
recommendation=status.recommendation,
|
||
)
|
||
|
||
def get_provider_stats(self, provider: str) -> ProviderStats:
|
||
"""取得 Provider 總計統計"""
|
||
return self._get_or_create_stats(provider.lower(), self._provider_stats)
|
||
|
||
def get_daily_stats(self, provider: str) -> ProviderStats:
|
||
"""取得 Provider 每日統計"""
|
||
self._reset_if_needed()
|
||
return self._get_or_create_stats(provider.lower(), self._daily_stats)
|
||
|
||
def get_monthly_stats(self, provider: str) -> ProviderStats:
|
||
"""取得 Provider 每月統計"""
|
||
self._reset_if_needed()
|
||
return self._get_or_create_stats(provider.lower(), self._monthly_stats)
|
||
|
||
def get_budget_status(self, provider: str) -> BudgetStatus:
|
||
"""取得預算狀態"""
|
||
self._reset_if_needed()
|
||
provider = provider.lower()
|
||
|
||
daily_stats = self.get_daily_stats(provider)
|
||
monthly_stats = self.get_monthly_stats(provider)
|
||
|
||
daily_budget = DAILY_TOKEN_BUDGET.get(provider, 0)
|
||
monthly_budget = MONTHLY_TOKEN_BUDGET.get(provider, 0)
|
||
|
||
# 計算使用率
|
||
daily_usage_percent = (
|
||
(daily_stats.total_tokens / daily_budget * 100) if daily_budget > 0 else 0
|
||
)
|
||
monthly_usage_percent = (
|
||
(monthly_stats.total_tokens / monthly_budget * 100) if monthly_budget > 0 else 0
|
||
)
|
||
|
||
# 判斷告警狀態
|
||
alert_triggered = (
|
||
daily_usage_percent >= ALERT_THRESHOLD_PERCENT
|
||
or monthly_usage_percent >= ALERT_THRESHOLD_PERCENT
|
||
)
|
||
is_over_budget = daily_usage_percent >= 100 or monthly_usage_percent >= 100
|
||
|
||
# 建議
|
||
recommendation = ""
|
||
if is_over_budget:
|
||
recommendation = "建議切換到本地模型 (Ollama) 以節省成本"
|
||
elif alert_triggered:
|
||
recommendation = f"接近預算上限 ({max(daily_usage_percent, monthly_usage_percent):.1f}%),考慮減少 {provider} 呼叫"
|
||
|
||
return BudgetStatus(
|
||
provider=provider,
|
||
daily_tokens_used=daily_stats.total_tokens,
|
||
daily_tokens_budget=daily_budget,
|
||
daily_cost_usd=daily_stats.total_cost_usd,
|
||
monthly_tokens_used=monthly_stats.total_tokens,
|
||
monthly_tokens_budget=monthly_budget,
|
||
monthly_cost_usd=monthly_stats.total_cost_usd,
|
||
is_over_budget=is_over_budget,
|
||
alert_triggered=alert_triggered,
|
||
recommendation=recommendation,
|
||
)
|
||
|
||
def should_fallback_to_local(self, provider: str) -> tuple[bool, str]:
|
||
"""
|
||
檢查是否應該 fallback 到本地模型
|
||
|
||
Returns:
|
||
(should_fallback, reason)
|
||
"""
|
||
if provider.lower() == "ollama":
|
||
return False, "Already using local model"
|
||
|
||
status = self.get_budget_status(provider)
|
||
|
||
if status.is_over_budget:
|
||
return True, f"Budget exceeded for {provider}: {status.recommendation}"
|
||
|
||
if status.alert_triggered:
|
||
# 70% 以上時,可選擇 fallback
|
||
return False, f"Near budget threshold for {provider}: {status.recommendation}"
|
||
|
||
return False, "Budget OK"
|
||
|
||
def get_all_stats_summary(self) -> dict:
|
||
"""取得所有 Provider 統計摘要"""
|
||
self._reset_if_needed()
|
||
|
||
summary = {
|
||
"timestamp": datetime.now(UTC).isoformat(),
|
||
"providers": {},
|
||
"total": {
|
||
"input_tokens": 0,
|
||
"output_tokens": 0,
|
||
"cost_usd": 0.0,
|
||
"requests": 0,
|
||
},
|
||
}
|
||
|
||
for provider in ["ollama", "gemini", "claude"]:
|
||
daily = self.get_daily_stats(provider)
|
||
monthly = self.get_monthly_stats(provider)
|
||
budget = self.get_budget_status(provider)
|
||
|
||
summary["providers"][provider] = {
|
||
"daily": {
|
||
"input_tokens": daily.total_input_tokens,
|
||
"output_tokens": daily.total_output_tokens,
|
||
"total_tokens": daily.total_tokens,
|
||
"cost_usd": round(daily.total_cost_usd, 4),
|
||
"requests": daily.total_requests,
|
||
"success_rate": round(daily.success_rate, 1),
|
||
"avg_latency_ms": round(daily.avg_latency_ms, 1),
|
||
},
|
||
"monthly": {
|
||
"input_tokens": monthly.total_input_tokens,
|
||
"output_tokens": monthly.total_output_tokens,
|
||
"total_tokens": monthly.total_tokens,
|
||
"cost_usd": round(monthly.total_cost_usd, 4),
|
||
"requests": monthly.total_requests,
|
||
},
|
||
"budget": {
|
||
"daily_budget": budget.daily_tokens_budget,
|
||
"daily_usage_percent": round(
|
||
(budget.daily_tokens_used / budget.daily_tokens_budget * 100)
|
||
if budget.daily_tokens_budget > 0
|
||
else 0,
|
||
1,
|
||
),
|
||
"monthly_budget": budget.monthly_tokens_budget,
|
||
"monthly_usage_percent": round(
|
||
(budget.monthly_tokens_used / budget.monthly_tokens_budget * 100)
|
||
if budget.monthly_tokens_budget > 0
|
||
else 0,
|
||
1,
|
||
),
|
||
"is_over_budget": budget.is_over_budget,
|
||
"alert_triggered": budget.alert_triggered,
|
||
},
|
||
}
|
||
|
||
# 累計總計
|
||
summary["total"]["input_tokens"] += daily.total_input_tokens
|
||
summary["total"]["output_tokens"] += daily.total_output_tokens
|
||
summary["total"]["cost_usd"] += daily.total_cost_usd
|
||
summary["total"]["requests"] += daily.total_requests
|
||
|
||
summary["total"]["cost_usd"] = round(summary["total"]["cost_usd"], 4)
|
||
|
||
return summary
|
||
|
||
|
||
# =============================================================================
|
||
# Helper: Usage Tracker Context Manager
|
||
# =============================================================================
|
||
|
||
|
||
class UsageTracker:
|
||
"""
|
||
Token 使用追蹤器 - Context Manager
|
||
|
||
自動計時並記錄 Token 使用
|
||
|
||
Usage:
|
||
async with UsageTracker("ollama", "qwen2.5:7b-instruct") as tracker:
|
||
result = await call_llm(prompt)
|
||
tracker.set_tokens(input_tokens=500, output_tokens=200)
|
||
"""
|
||
|
||
def __init__(self, provider: str, model: str):
|
||
self.provider = provider
|
||
self.model = model
|
||
self.start_time: float = 0
|
||
self.input_tokens: int = 0
|
||
self.output_tokens: int = 0
|
||
self.success: bool = True
|
||
self.error_message: str | None = None
|
||
self._counter = get_token_counter()
|
||
|
||
def __enter__(self):
|
||
self.start_time = time.perf_counter()
|
||
return self
|
||
|
||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||
latency_ms = (time.perf_counter() - self.start_time) * 1000
|
||
|
||
if exc_type is not None:
|
||
self.success = False
|
||
self.error_message = str(exc_val)
|
||
|
||
usage = TokenUsage(
|
||
input_tokens=self.input_tokens,
|
||
output_tokens=self.output_tokens,
|
||
provider=self.provider,
|
||
model=self.model,
|
||
latency_ms=latency_ms,
|
||
success=self.success,
|
||
error_message=self.error_message,
|
||
)
|
||
|
||
self._counter.record_usage(usage)
|
||
|
||
async def __aenter__(self):
|
||
return self.__enter__()
|
||
|
||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||
return self.__exit__(exc_type, exc_val, exc_tb)
|
||
|
||
def set_tokens(self, input_tokens: int, output_tokens: int) -> None:
|
||
"""設定 Token 數量"""
|
||
self.input_tokens = input_tokens
|
||
self.output_tokens = output_tokens
|
||
|
||
def mark_failed(self, error_message: str) -> None:
|
||
"""標記失敗"""
|
||
self.success = False
|
||
self.error_message = error_message
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton
|
||
# =============================================================================
|
||
|
||
_token_counter: TokenCounter | None = None
|
||
|
||
|
||
def get_token_counter() -> TokenCounter:
|
||
"""取得 TokenCounter 單例"""
|
||
global _token_counter
|
||
if _token_counter is None:
|
||
_token_counter = TokenCounter()
|
||
return _token_counter
|
||
|
||
|
||
def reset_token_counter() -> None:
|
||
"""重置單例 (用於測試)"""
|
||
global _token_counter
|
||
_token_counter = None
|
||
|
||
|
||
# =============================================================================
|
||
# Convenience Functions
|
||
# =============================================================================
|
||
|
||
|
||
def record_token_usage(
|
||
provider: str,
|
||
model: str,
|
||
input_tokens: int,
|
||
output_tokens: int,
|
||
latency_ms: float = 0.0,
|
||
success: bool = True,
|
||
error_message: str | None = None,
|
||
) -> None:
|
||
"""便捷函數: 記錄 Token 使用"""
|
||
usage = TokenUsage(
|
||
input_tokens=input_tokens,
|
||
output_tokens=output_tokens,
|
||
provider=provider,
|
||
model=model,
|
||
latency_ms=latency_ms,
|
||
success=success,
|
||
error_message=error_message,
|
||
)
|
||
get_token_counter().record_usage(usage)
|
||
|
||
|
||
def should_use_local_model(provider: str) -> tuple[bool, str]:
|
||
"""便捷函數: 檢查是否應該使用本地模型"""
|
||
return get_token_counter().should_fallback_to_local(provider)
|