## Phase 1-3: Control Plane + Contract System - awooop_phase1_control_plane_2026-05-04.sql: 12 張核心表 + RLS - awooop_phase1_batch1_rls_2026-05-04.sql: 全部 FORCE RLS + GRANT - packages/awooop-contracts/: 六合約 JSON Schema + golden fixtures - src/models/awooop_contracts.py: Pydantic v2 contract models(extra=forbid) - src/repositories/contract_repository.py: contract lifecycle(draft→published→active) - src/services/contract_service.py: HMAC publish sig + Redis multi-sig activate - src/services/schema_validator.py: LLM output validator(retry×3, E-SCHEMA-001) ## Phase 2: Tenant Isolation - awooop_phase2_budget_ledger_2026-05-04.sql: budget_ledger + RLS - src/services/budget_service.py: Token Budget Hard Kill 三層防線 - src/core/context.py: PROJECT_ID ContextVar(31 background loop 自動繼承) - src/db/base.py + models.py: project_id 欄位 + RLS set_config 注入 - src/hermes/nl_gateway.py: project_id Redis key 前綴(Phase A 雙寫) - src/services/anomaly_counter.py: per-project 改造(Phase A fallback) ## Phase 4: Platform Shell in Shadow Mode - awooop_phase4_run_state_2026-05-04.sql: run_state + step_journal + idempotency - src/services/run_state_machine.py: 8-state FSM + SKIP LOCKED + stale reaper - src/services/platform_runtime.py: UUID v7 + W3C trace_id + shadow_execute - src/services/audit_sink.py: PII/secret redaction 9 patterns - src/api/v1/platform/runs.py: POST/GET /v1/platform/runs(Router→Service 架構) - src/workers/platform_worker.py: SKIP LOCKED worker + heartbeat + reaper loop - src/main.py: platform router + lifespan worker start/stop ## Phase 5: MCP Gateway 五閘門 - awooop_phase5_mcp_gateway_2026-05-04.sql: 4 表 + RLS - src/plugins/mcp/gateway.py: McpGateway(Gate 1~5, E-MCP-GATE-001~009) - src/plugins/mcp/redaction_middleware.py: 雙層 redaction + 16K 截斷 - src/plugins/mcp/registry.py: __provider name mangling(ADR-116) - src/plugins/mcp/credential_resolver.py: k8s secret ref 解析 - tests/test_mcp_credential_isolation.py: 10 個迴歸測試(secret leak 防再現) ## Phase 6-8: EwoooC + Channel Hub + Approval Token - awooop_phase6_ewoooc_onboarding_2026-05-04.sql: ewoooc tenant + 4 read-only MCP tools - awooop_phase7_channel_hub_2026-05-04.sql: conversation_event + outbound_message - src/services/provider_proxy.py: ProviderProxy + PlatformEnvelope(ADR-115) - src/services/channel_hub.py: Telegram inbound mirror + Progressive Feedback(30s) - src/services/awooop_approval_token.py: HS256 + jti NX replay 防護 + suggest mode Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
228 lines
8.8 KiB
Python
228 lines
8.8 KiB
Python
"""
|
||
Audit Sink with PII/Secret Redaction
|
||
======================================
|
||
AwoooP Phase 4.4: Audit log 寫入前的 sanitization pipeline(ADR-116)
|
||
2026-05-04 ogt + Claude Sonnet 4.6
|
||
|
||
設計原則:
|
||
- audit log 不記錄 raw LLM input/output,只記 hash + schema validation result
|
||
- PII / secret pattern 硬攔(不可被 caller 繞過)
|
||
- 攔截清單:GCP IP、PostgreSQL password、Telegram token、SSH key、Bearer token 等
|
||
- redaction 後原值不可還原(替換為 [REDACTED:<type>])
|
||
- 所有 audit 寫入透過此 sink(禁止其他 service 直接 INSERT audit_logs)
|
||
|
||
使用:
|
||
from src.services.audit_sink import write_audit
|
||
|
||
await write_audit(
|
||
project_id="awoooi",
|
||
action="run.completed",
|
||
resource_type="run",
|
||
resource_id=str(run_id),
|
||
details={"trace_id": trace_id, "cost_usd": 0.012},
|
||
)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import hashlib
|
||
import json
|
||
import re
|
||
from typing import Any
|
||
|
||
import structlog
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Redaction patterns(ADR-116 P1-08)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
# 每個 pattern: (compiled_re, replacement_tag)
|
||
_REDACTION_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
||
# Telegram bot token(數字:英數字母混合 32~64 字元)
|
||
(re.compile(r"\d{8,12}:[A-Za-z0-9_-]{32,64}"), "TELEGRAM_TOKEN"),
|
||
|
||
# PostgreSQL connection string
|
||
(re.compile(r"postgresql(?:\+asyncpg)?://[^:]+:[^@]+@[^/\s]+"), "PG_DSN"),
|
||
|
||
# Generic password in URL / config
|
||
(re.compile(r"(?i)(?:password|passwd|pwd)\s*[:=]\s*\S+"), "PASSWORD"),
|
||
|
||
# Bearer / Authorization header value
|
||
(re.compile(r"(?i)(?:bearer|token)\s+[A-Za-z0-9\-._~+/]+=*"), "BEARER_TOKEN"),
|
||
|
||
# AWS / GCP / NVIDIA API key patterns
|
||
(re.compile(r"(?i)(?:api[_-]?key|apikey)\s*[:=]\s*[A-Za-z0-9\-._]{20,}"), "API_KEY"),
|
||
|
||
# Private GCP internal IPs(ADR-116 禁止 GCP 內網 IP 進 log)
|
||
(re.compile(r"\b10\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"), "INTERNAL_IP"),
|
||
(re.compile(r"\b172\.(?:1[6-9]|2\d|3[0-1])\.\d{1,3}\.\d{1,3}\b"), "INTERNAL_IP"),
|
||
(re.compile(r"\b192\.168\.\d{1,3}\.\d{1,3}\b"), "INTERNAL_IP"),
|
||
|
||
# SSH private key
|
||
(re.compile(r"-----BEGIN (?:RSA|EC|OPENSSH) PRIVATE KEY-----[\s\S]*?-----END [A-Z ]+ PRIVATE KEY-----"), "SSH_PRIVATE_KEY"),
|
||
|
||
# JWT(三段 base64 以 . 分隔)
|
||
(re.compile(r"eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "JWT_TOKEN"),
|
||
|
||
# Hex secret >= 32 位(可能是 HMAC key / session token)
|
||
(re.compile(r"\b[0-9a-f]{64}\b"), "HEX_SECRET_64"),
|
||
]
|
||
|
||
# 欄位名稱黑名單:這些 key 的 value 直接替換(不做 pattern 掃描)
|
||
_BLOCKED_FIELD_NAMES = frozenset({
|
||
"password", "passwd", "pwd", "secret", "token", "api_key", "apikey",
|
||
"private_key", "private_key_pem", "bot_token", "telegram_token",
|
||
"hmac_key", "jwt", "authorization", "cookie", "session",
|
||
})
|
||
|
||
# LLM raw input/output 欄位名稱(只記 hash)
|
||
_LLM_RAW_FIELDS = frozenset({
|
||
"raw_input", "raw_output", "llm_input", "llm_output",
|
||
"prompt", "completion", "system_prompt",
|
||
})
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Sanitization pipeline
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
def _redact_string(value: str) -> str:
|
||
"""對字串套用所有 redaction patterns"""
|
||
for pattern, tag in _REDACTION_PATTERNS:
|
||
value = pattern.sub(f"[REDACTED:{tag}]", value)
|
||
return value
|
||
|
||
|
||
def sanitize(details: dict[str, Any]) -> dict[str, Any]:
|
||
"""
|
||
遞迴處理 details dict,套用所有 redaction 規則。
|
||
|
||
規則優先序:
|
||
1. key 在 _BLOCKED_FIELD_NAMES → value 替換為 [REDACTED:BLOCKED_FIELD]
|
||
2. key 在 _LLM_RAW_FIELDS → value 替換為 sha256(str(value))(只記 hash)
|
||
3. string value → pattern redaction
|
||
4. nested dict/list → 遞迴處理
|
||
"""
|
||
return _sanitize_value(details, depth=0)
|
||
|
||
|
||
def _sanitize_value(value: Any, depth: int = 0) -> Any:
|
||
if depth > 10:
|
||
return "[REDACTED:MAX_DEPTH]"
|
||
|
||
if isinstance(value, dict):
|
||
return {k: _sanitize_dict_entry(k, v, depth) for k, v in value.items()}
|
||
if isinstance(value, list):
|
||
return [_sanitize_value(item, depth + 1) for item in value]
|
||
if isinstance(value, str):
|
||
return _redact_string(value)
|
||
return value
|
||
|
||
|
||
def _sanitize_dict_entry(key: str, value: Any, depth: int) -> Any:
|
||
key_lower = key.lower()
|
||
|
||
if key_lower in _BLOCKED_FIELD_NAMES:
|
||
return "[REDACTED:BLOCKED_FIELD]"
|
||
|
||
if key_lower in _LLM_RAW_FIELDS:
|
||
# 只記 sha256 hash,不記原始內容
|
||
raw_str = json.dumps(value, ensure_ascii=False) if not isinstance(value, str) else value
|
||
return f"[LLM_RAW_HASH:{hashlib.sha256(raw_str.encode()).hexdigest()[:16]}]"
|
||
|
||
return _sanitize_value(value, depth + 1)
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Audit write
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
async def write_audit(
|
||
*,
|
||
project_id: str,
|
||
action: str,
|
||
resource_type: str,
|
||
resource_id: str,
|
||
details: dict[str, Any] | None = None,
|
||
run_id: str | None = None,
|
||
trace_id: str | None = None,
|
||
) -> None:
|
||
"""
|
||
統一 audit log 寫入入口(Phase 4+ 所有 service 必須透過此方法)。
|
||
|
||
1. sanitize details(PII / secret redaction)
|
||
2. 附加 run_id / trace_id(可觀測性)
|
||
3. INSERT audit_logs(非阻擋 background task)
|
||
"""
|
||
import asyncio
|
||
|
||
asyncio.create_task(
|
||
_write_audit_impl(
|
||
project_id=project_id,
|
||
action=action,
|
||
resource_type=resource_type,
|
||
resource_id=resource_id,
|
||
details=details,
|
||
run_id=run_id,
|
||
trace_id=trace_id,
|
||
),
|
||
name="audit_sink_write",
|
||
)
|
||
|
||
|
||
async def _write_audit_impl(
|
||
*,
|
||
project_id: str,
|
||
action: str,
|
||
resource_type: str,
|
||
resource_id: str,
|
||
details: dict[str, Any] | None,
|
||
run_id: str | None,
|
||
trace_id: str | None,
|
||
) -> None:
|
||
try:
|
||
from sqlalchemy import text as sa_text
|
||
from src.db.base import get_db_context
|
||
|
||
clean_details: dict[str, Any] = sanitize(details or {})
|
||
if run_id:
|
||
clean_details["_run_id"] = run_id
|
||
if trace_id:
|
||
clean_details["_trace_id"] = trace_id
|
||
|
||
async with get_db_context(project_id) as db:
|
||
await db.execute(
|
||
sa_text("""
|
||
INSERT INTO audit_logs
|
||
(project_id, action, resource_type, resource_id, details)
|
||
VALUES
|
||
(:project_id, :action, :resource_type, :resource_id, :details::jsonb)
|
||
"""),
|
||
{
|
||
"project_id": project_id,
|
||
"action": action,
|
||
"resource_type": resource_type,
|
||
"resource_id": resource_id,
|
||
"details": json.dumps(clean_details),
|
||
},
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"audit_sink_write_failed",
|
||
action=action,
|
||
resource_id=resource_id,
|
||
error=str(exc),
|
||
)
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Convenience:可在測試中驗證 sanitization 結果
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
def sanitize_for_test(details: dict[str, Any]) -> dict[str, Any]:
|
||
"""同步 sanitize,供測試使用"""
|
||
return sanitize(details)
|