fix(telegram): sanitize failover alert errors
This commit is contained in:
@@ -12,7 +12,8 @@ from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
@@ -23,6 +24,7 @@ RECOVERY_DEDUP_TTL_SEC = 3600 # 1h — GCP 健康閃爍時 1 小時內不重複
|
||||
QUOTA_DEDUP_TTL_SEC = 86400 # 24h(每日 quota 告警只發一次)
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
_TELEGRAM_BOT_URL_RE = re.compile(r"(api\.telegram\.org/bot)[^/\s]+")
|
||||
|
||||
|
||||
class FailoverAlerter:
|
||||
@@ -157,7 +159,7 @@ class FailoverAlerter:
|
||||
if impact_lines:
|
||||
sections.append(f"\n*影響*\n{impact_lines}")
|
||||
if remediation_lines or next_action_line:
|
||||
sections.append(f"\n*修復方向*")
|
||||
sections.append("\n*修復方向*")
|
||||
if remediation_lines:
|
||||
sections.append(remediation_lines)
|
||||
if next_action_line:
|
||||
@@ -285,8 +287,8 @@ class FailoverAlerter:
|
||||
2026-04-25 P1.5 by Claude Engineer-D — 告警失敗不能阻斷主流程
|
||||
"""
|
||||
try:
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
from src.core.config import get_settings
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
settings = get_settings()
|
||||
chat_id = getattr(settings, "SRE_GROUP_CHAT_ID", None) or getattr(settings, "OPENCLAW_TG_CHAT_ID", None)
|
||||
@@ -299,7 +301,13 @@ class FailoverAlerter:
|
||||
logger.info("telegram_failover_alert_sent", message_len=len(message))
|
||||
except Exception as e:
|
||||
# 不 raise — 告警失敗不該阻斷主流程(鐵律)
|
||||
logger.exception("telegram_failover_send_failed", error=str(e))
|
||||
# 2026-05-06 Codex: Telegram/httpx exception 字串可能包含 bot token URL,
|
||||
# 禁止用 logger.exception 輸出 chained traceback。
|
||||
logger.warning(
|
||||
"telegram_failover_send_failed",
|
||||
error=_sanitize_telegram_error(str(e)),
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@@ -319,6 +327,11 @@ def _escape_md(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def _sanitize_telegram_error(text: str) -> str:
|
||||
"""遮蔽 Telegram Bot URL 中的 token,避免例外訊息寫入 log。"""
|
||||
return _TELEGRAM_BOT_URL_RE.sub(r"\1<redacted>", text)
|
||||
|
||||
|
||||
def _as_dict(value: Any) -> dict[str, Any]:
|
||||
return value if isinstance(value, dict) else {}
|
||||
|
||||
@@ -334,7 +347,7 @@ def _lines_from_dict(data: dict[str, Any], max_items: int = 20, compact: bool =
|
||||
rows.append(f"{_escape_md(str(k))}:{_escape_md(str(data.get(k)))}")
|
||||
idx += 1
|
||||
if compact and len(rows) >= max_items:
|
||||
rows.append("...(更多欄位略)")
|
||||
rows.append(_escape_md("...(更多欄位略)"))
|
||||
return "\n".join(f" {line}" for line in rows)
|
||||
|
||||
|
||||
@@ -342,7 +355,7 @@ def _lines_from_list(value: Any) -> str:
|
||||
if not isinstance(value, list):
|
||||
return ""
|
||||
return "\n".join(
|
||||
f" {idx + 1}. {_escape_md(str(item))}"
|
||||
f" {_escape_md(str(idx + 1))}\\. {_escape_md(str(item))}"
|
||||
for idx, item in enumerate(value)
|
||||
)
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ SOUL.md 鐵律 (4.1 Telegram 訊息壓縮原則):
|
||||
import asyncio
|
||||
import html
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
|
||||
@@ -58,6 +59,12 @@ POLLING_LEADER_RENEW = 20 # seconds - 每 20s 續約
|
||||
POLLING_LEADER_WATCH = 30 # seconds - 非 Leader Pod 每 30s 嘗試接管
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
_TELEGRAM_BOT_URL_RE = re.compile(r"(api\.telegram\.org/bot)[^/\s]+")
|
||||
|
||||
|
||||
def _sanitize_telegram_error(text: str) -> str:
|
||||
"""遮蔽 Telegram Bot URL 中的 token,避免例外字串污染 log / trace。"""
|
||||
return _TELEGRAM_BOT_URL_RE.sub(r"\1<redacted>", text)
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: B3 — LLM 動態 Telegram 按鈕 Feature Flag
|
||||
# true → 優先使用 ActionPlan.recommended_actions 動態生成按鈕
|
||||
@@ -1468,20 +1475,28 @@ class TelegramGateway:
|
||||
except httpx.HTTPStatusError as e:
|
||||
span.set_attribute("telegram.http_status", e.response.status_code)
|
||||
span.set_status(trace.Status(trace.StatusCode.ERROR))
|
||||
span.record_exception(e)
|
||||
span.record_exception(
|
||||
TelegramGatewayError(f"HTTP error: {e.response.status_code}")
|
||||
)
|
||||
logger.error("telegram_api_error", method=method, status=e.response.status_code,
|
||||
response_body=e.response.text[:500])
|
||||
raise TelegramGatewayError(f"HTTP error: {e.response.status_code}") from e
|
||||
raise TelegramGatewayError(f"HTTP error: {e.response.status_code}") from None
|
||||
|
||||
except TelegramGatewayError:
|
||||
# 已處理的錯誤,直接拋出
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
safe_error = _sanitize_telegram_error(str(e))
|
||||
span.set_status(trace.Status(trace.StatusCode.ERROR))
|
||||
span.record_exception(e)
|
||||
logger.error("telegram_request_failed", method=method, error=str(e))
|
||||
raise TelegramGatewayError(str(e)) from e
|
||||
span.record_exception(TelegramGatewayError(safe_error))
|
||||
logger.error(
|
||||
"telegram_request_failed",
|
||||
method=method,
|
||||
error=safe_error,
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
raise TelegramGatewayError(safe_error) from None
|
||||
|
||||
async def _build_inline_keyboard(
|
||||
self,
|
||||
|
||||
@@ -19,6 +19,8 @@ from src.services.failover_alerter import (
|
||||
DEDUP_TTL_SEC,
|
||||
QUOTA_DEDUP_TTL_SEC,
|
||||
FailoverAlerter,
|
||||
_lines_from_list,
|
||||
_sanitize_telegram_error,
|
||||
configure_alerter,
|
||||
get_failover_alerter,
|
||||
reset_failover_alerter,
|
||||
@@ -231,3 +233,19 @@ async def test_memory_dedup_max_size_gc():
|
||||
|
||||
# GC 後:999 個 stale entry 被清除,只剩 fresh:key + trigger:gc:key
|
||||
assert len(alerter._memory_dedup) <= 3 # fresh + trigger + 可能有邊界差1
|
||||
|
||||
|
||||
def test_lines_from_list_escapes_markdown_v2_numbered_periods() -> None:
|
||||
rendered = _lines_from_list(["修復 node-exporter-110"])
|
||||
|
||||
assert "1\\." in rendered
|
||||
assert "node\\-exporter\\-110" in rendered
|
||||
|
||||
|
||||
def test_sanitize_telegram_error_redacts_bot_token_url() -> None:
|
||||
raw = "HTTP error for https://api.telegram.org/bot123456:SECRET/sendMessage"
|
||||
|
||||
sanitized = _sanitize_telegram_error(raw)
|
||||
|
||||
assert "SECRET" not in sanitized
|
||||
assert "bot<redacted>" in sanitized
|
||||
|
||||
12
apps/api/tests/test_telegram_gateway_error_sanitizer.py
Normal file
12
apps/api/tests/test_telegram_gateway_error_sanitizer.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from src.services.telegram_gateway import _sanitize_telegram_error
|
||||
|
||||
|
||||
def test_telegram_gateway_sanitizes_bot_token_url() -> None:
|
||||
raw = "Client error for https://api.telegram.org/bot123456:SECRET/sendMessage"
|
||||
|
||||
sanitized = _sanitize_telegram_error(raw)
|
||||
|
||||
assert "SECRET" not in sanitized
|
||||
assert "bot<redacted>" in sanitized
|
||||
@@ -3825,3 +3825,35 @@ ruff check apps/api/tests/test_approval_execution_mcp_audit.py
|
||||
|
||||
- 本次是「先補 durable audit + legacy 標記」,不是直接硬切 MCP Gateway enforcement;原因是 AwoooP project / agent / grant contract 尚未覆蓋所有 legacy 修復路徑,硬切會中斷現有 approved execution。
|
||||
- 下一步應將 `decision_manager.py`、`pre_decision_investigator.py`、`post_execution_verifier.py`、`callback_dispatcher.py` 的 direct MCP caller 逐步套同一種可追蹤 wrapper,最後再切到 `McpGateway.call()` enforcement。
|
||||
|
||||
---
|
||||
|
||||
## 2026-05-06(台北)— Telegram failover 告警 400 與 token log 外洩修補
|
||||
|
||||
**觸發**:production API log 顯示 `telegram_failover_send_failed`,Telegram `sendMessage` 回 400;同時 chained traceback 內含 Telegram Bot URL,會把 token 形式的敏感資訊寫入 log / trace。
|
||||
|
||||
### 已修正
|
||||
|
||||
| 範圍 | 結果 |
|
||||
|------|------|
|
||||
| `failover_alerter.py` | 失敗時不再使用 `logger.exception()` 輸出 chained traceback,改記錄已遮蔽的錯誤文字與錯誤類型 |
|
||||
| MarkdownV2 | `_lines_from_list()` 將編號句點改為 `1\\.`,並補上 compact 省略文字的 MarkdownV2 escape,避免治理告警清單觸發 Telegram parse 400 |
|
||||
| `telegram_gateway.py` | HTTPStatusError 不再 `raise ... from e`,OTel span 也只記 sanitized gateway error,避免 httpx exception 字串帶出 Bot URL |
|
||||
| 測試 | 新增 Telegram error sanitizer 與 MarkdownV2 編號 escape 回歸測試 |
|
||||
|
||||
### 驗證
|
||||
|
||||
```text
|
||||
pytest apps/api/tests/test_failover_alerter.py apps/api/tests/test_telegram_gateway_error_sanitizer.py apps/api/tests/test_heartbeat_dedup_p0_4.py -q
|
||||
# 17 passed
|
||||
|
||||
py_compile apps/api/src/services/failover_alerter.py apps/api/src/services/telegram_gateway.py apps/api/tests/test_failover_alerter.py apps/api/tests/test_telegram_gateway_error_sanitizer.py
|
||||
# 通過
|
||||
|
||||
ruff check apps/api/src/services/failover_alerter.py apps/api/tests/test_failover_alerter.py apps/api/tests/test_telegram_gateway_error_sanitizer.py
|
||||
# All checks passed
|
||||
```
|
||||
|
||||
### 注意
|
||||
|
||||
- `telegram_gateway.py` 全檔仍有大量既有 ruff 債,本次只針對 token 外洩與 MarkdownV2 400 風險做最小安全修補,避免在 6000+ 行 gateway 巨檔混入無關機械改動。
|
||||
|
||||
Reference in New Issue
Block a user