feat(api): Phase 15.2 Redis Trace Context 傳遞

實現 Redis Streams 跨服務追蹤零斷鏈:
- telemetry.py: 新增 get_trace_context() + restore_trace_context()
- webhooks.py: Producer 注入 _trace_id, _span_id 到 Redis
- signal_worker.py: Consumer 還原 Trace Context 建立子 Span

架構: API → Redis Streams → Worker 完整追蹤鏈
格式: W3C Trace Context (traceparent)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-03-26 00:40:20 +08:00
parent 1ac8965a7a
commit 0d31ccb911
3 changed files with 217 additions and 40 deletions

View File

@@ -219,3 +219,121 @@ def get_current_trace_id() -> str | None:
return None
return format(ctx.trace_id, '032x')
def get_current_span_id() -> str | None:
"""
Get current span ID
Returns:
Span ID as hex string, or None if no active span
"""
span = trace.get_current_span()
if span is None:
return None
ctx = span.get_span_context()
if ctx is None or not ctx.is_valid:
return None
return format(ctx.span_id, '016x')
# =============================================================================
# Phase 15.2: Redis Trace Context Propagation
# =============================================================================
def get_trace_context() -> dict[str, str] | None:
"""
取得當前 Trace Context 用於 Redis Streams 注入
Phase 15.2: 解決 Redis Streams Trace 斷鏈問題
Returns:
dict with trace_id, span_id, or None if no active span
Usage (寫入 Redis):
payload = {**signal.dict(), "_trace_context": get_trace_context()}
await redis.xadd("stream:signals", payload)
"""
trace_id = get_current_trace_id()
span_id = get_current_span_id()
if not trace_id:
return None
return {
"trace_id": trace_id,
"span_id": span_id or "",
}
def restore_trace_context(trace_context: dict[str, str] | None):
"""
從 Redis 訊息還原 Trace Context 並建立新 Span
Phase 15.2: Worker 端 Context 重建
Args:
trace_context: 從 Redis 訊息取得的 _trace_context
Returns:
Context manager for the restored span
Usage (讀取 Redis):
message = await redis.xreadgroup(...)
trace_ctx = message.get("_trace_context")
with restore_trace_context(trace_ctx) as span:
# 處理邏輯,此處的 span 會繼承原始 trace_id
pass
"""
from contextlib import contextmanager
from opentelemetry.trace import SpanKind
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
tracer = get_tracer("awoooi.worker")
@contextmanager
def _context_manager():
if not trace_context or not trace_context.get("trace_id"):
# 沒有 trace context建立新的 span
with tracer.start_as_current_span(
"worker_process",
kind=SpanKind.CONSUMER,
) as span:
yield span
return
# 有 trace context嘗試還原
try:
# 使用 W3C Trace Context 格式建立 carrier
carrier = {
"traceparent": f"00-{trace_context['trace_id']}-{trace_context.get('span_id', '0' * 16)}-01"
}
# 從 carrier 提取 context
propagator = TraceContextTextMapPropagator()
ctx = propagator.extract(carrier=carrier)
# 建立子 span 繼承原始 trace
with tracer.start_as_current_span(
"worker_process",
context=ctx,
kind=SpanKind.CONSUMER,
) as span:
span.set_attribute("trace.restored", True)
span.set_attribute("trace.parent_trace_id", trace_context["trace_id"])
yield span
except Exception as e:
_logger.warning(f"Trace context restore failed: {e}, creating new span")
with tracer.start_as_current_span(
"worker_process",
kind=SpanKind.CONSUMER,
) as span:
span.set_attribute("trace.restore_failed", str(e))
yield span
return _context_manager()