fix(mcp): persist blocked gateway audit rows
Some checks failed
Code Review / ai-code-review (push) Successful in 16s
run-migration / migrate (push) Failing after 9s
CD Pipeline / tests (push) Successful in 1m8s
CD Pipeline / build-and-deploy (push) Successful in 3m59s
CD Pipeline / post-deploy-checks (push) Successful in 1m46s

This commit is contained in:
Your Name
2026-05-06 16:21:43 +08:00
parent 1e68d45659
commit 7ed8c95409
5 changed files with 108 additions and 9 deletions

View File

@@ -0,0 +1,14 @@
-- AwoooP Phase 5bMCP Gateway blocked call 稽核覆蓋
-- 日期2026-05-06
-- 維護者Codex
--
-- Gate 1 / Gate 2 / 未知工具的 blocked call 可能發生在 tool registry row
-- 取得之前。這些安全決策仍必須落稽核紀錄,因此 tool_id 允許為 NULL
-- 但 tool_name 仍維持必填,作為未知工具與早期 gate block 的追蹤線索。
BEGIN;
ALTER TABLE awooop_mcp_gateway_audit
ALTER COLUMN tool_id DROP NOT NULL;
COMMIT;

View File

@@ -10,7 +10,7 @@ from __future__ import annotations
from datetime import datetime
from decimal import Decimal
from typing import Any
from uuid import UUID, uuid4
from uuid import UUID
from sqlalchemy import (
Boolean,
@@ -577,8 +577,8 @@ class AwoooPMcpGatewayAudit(Base):
run_id: Mapped[UUID | None] = mapped_column(nullable=True)
trace_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
agent_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
tool_id: Mapped[UUID] = mapped_column(
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=False
tool_id: Mapped[UUID | None] = mapped_column(
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=True
)
tool_name: Mapped[str] = mapped_column(String(128), nullable=False)
credential_ref: Mapped[str | None] = mapped_column(String(256), nullable=True)

View File

@@ -39,7 +39,7 @@ import hashlib
import json
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from datetime import UTC, datetime
from typing import Any
from uuid import UUID
@@ -47,6 +47,7 @@ import structlog
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from src.core.redis_client import get_redis
from src.db.awooop_models import (
AwoooPActiveRevision,
AwoooPMcpGatewayAudit,
@@ -54,7 +55,6 @@ from src.db.awooop_models import (
AwoooPMcpToolRegistry,
AwoooPProject,
)
from src.core.redis_client import get_redis
from src.plugins.mcp.interfaces import MCPToolResult
from src.plugins.mcp.registry import get_provider_registry
@@ -278,7 +278,7 @@ class McpGateway:
self, ctx: GatewayContext, gate_result: GateCheckResult
) -> tuple[AwoooPMcpToolRegistry, AwoooPMcpGrant]:
"""Gate 3tool 在白名單 + grant 有效(未到期、未撤銷)"""
now = datetime.now(timezone.utc)
now = datetime.now(UTC)
# 查 tool registry
tool_result = await self._db.execute(
@@ -458,9 +458,8 @@ class McpGateway:
latency_ms=latency_ms,
)
if tool_row is not None:
self._db.add(audit)
await self._db.flush()
self._db.add(audit)
await self._db.flush()
except Exception as exc:
logger.warning(
"mcp_gateway_audit_write_failed",

View File

@@ -0,0 +1,53 @@
from __future__ import annotations
import uuid
import pytest
from src.plugins.mcp.gateway import GateCheckResult, GatewayContext, McpGateway
class FakeDb:
def __init__(self) -> None:
self.added: list[object] = []
self.flush_count = 0
def add(self, item: object) -> None:
self.added.append(item)
async def flush(self) -> None:
self.flush_count += 1
@pytest.mark.asyncio
async def test_write_audit_persists_blocked_gate_without_tool_row() -> None:
db = FakeDb()
run_id = uuid.uuid4()
await McpGateway(db)._write_audit(
ctx=GatewayContext(
project_id="awoooi",
agent_id="openclaw-sre",
tool_name="missing_tool",
run_id=run_id,
trace_id="trace-audit-gap",
),
tool_row=None,
parameters={"namespace": "awoooi-prod"},
result=None,
gate_result=GateCheckResult(),
result_status="blocked",
block_gate=1,
block_reason="E-MCP-GATE-001: project blocked",
latency_ms=12,
)
assert db.flush_count == 1
assert len(db.added) == 1
audit = db.added[0]
assert audit.project_id == "awoooi"
assert audit.run_id == run_id
assert audit.tool_id is None
assert audit.tool_name == "missing_tool"
assert audit.result_status == "blocked"
assert audit.block_gate == 1

View File

@@ -3744,3 +3744,36 @@ Sentry consumers reset 後狀態
- `DockerContainerRestartSpike` 使用 15 分鐘窗口,已發生的 restart spike 會在 Prometheus 窗口過去後退火;若短時間仍看到舊訊息,優先查 live `ALERTS{alertname="DockerContainerRestartSpike"}` 是否已歸零。
- Alertmanager 本身不支援「webhook send failed 後再 fallback receiver」語義因此 direct Telegram 只能以明確的 API/AlertChain 健康告警作為 emergency gate。
---
## 2026-05-06台北— MCP Gateway blocked audit 缺口修補
**觸發**AwoooP / AI 自動化飛輪整合審查指出 MCP Gateway Gate 1 / Gate 2 / 未註冊工具被攔截時,可能因尚未取得 `tool_id` 而沒有落 `awooop_mcp_gateway_audit`,造成安全決策不可追溯。
### 已修正
| 範圍 | 結果 |
|------|------|
| ORM | `AwoooPMcpGatewayAudit.tool_id` 改為可空,保留 `tool_name` 作為未知工具或早期 gate blocked call 的稽核線索 |
| DB migration | 新增 `awooop_phase5b_mcp_gateway_audit_nullable_tool_2026-05-06.sql`,對既有表執行 `ALTER COLUMN tool_id DROP NOT NULL` |
| Gateway audit | `_write_audit()` 不再只於 `tool_row is not None` 時 add/flushblocked call 一律嘗試落 audit |
| 回歸測試 | 新增 `test_mcp_gateway_audit.py`,驗證沒有 `tool_row` 的 Gate blocked call 仍會寫入 audit row |
### 驗證
```text
pytest apps/api/tests/test_mcp_gateway_audit.py apps/api/tests/test_mcp_gateway_gate5.py apps/api/tests/test_mcp_credential_isolation.py apps/api/tests/test_mcp_tool_registry.py -q
# 43 passed
py_compile apps/api/src/plugins/mcp/gateway.py apps/api/src/db/awooop_models.py apps/api/tests/test_mcp_gateway_audit.py
# 通過
ruff check apps/api/src/plugins/mcp/gateway.py apps/api/src/db/awooop_models.py apps/api/tests/test_mcp_gateway_audit.py
# All checks passed
```
### 後續
- 部署後必須確認 DB migration 有被套用,否則 production 仍會因 `tool_id NOT NULL` 擋住 Gate 1 / Gate 2 blocked audit。
- 下一步繼續收斂 direct provider / legacy MCP caller讓 MCP Gateway 成為真正 choke point。