From 86ee013cdf4c0a05ef505b23138146c6c16aede2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 25 Apr 2026 02:22:15 +0800 Subject: [PATCH] =?UTF-8?q?feat(hermes-complete):=20Hermes=20NL=20?= =?UTF-8?q?=E4=B8=89=E9=A0=85=E8=A3=9C=E5=BC=B7=20+=20ConsensusEngine=20+?= =?UTF-8?q?=20ADR=20=E6=94=B6=E5=B0=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Hermes NL 補強(nl_gateway.py) - T1 hermes_dispatch_log DB 寫入(asyncio.create_task 非阻擋) - T2 Redis 速率限制:per-chat_id 20 req/min,fail-open - T3 Multi-turn session:hermes:session:{chat_id}:{user_id} TTL=300s,最近 3 輪 ## ConsensusEngine(ADR-095 宣告式設計) - consensus_engine.py: CONSENSUS_WEIGHTS class 屬性 security=0.4 鎖定,9 個 Claude Code agent 分配 0.6 - config.py: ENABLE_12AGENT_CONSENSUS=False feature flag ## ADR 狀態 - ADR-093/094/095: Proposed → 🟡 批准實作中 - 各 ADR 加 v1.1 變更紀錄 ## K8s ConfigMap - prod 04-configmap.yaml: 加 3 個 feature flags(均 false) - dev 02-configmap.yaml: 同步加入 ## LOGBOOK - 記錄 WS0–WS6 + 補強完成,feature flags 啟用指引 Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/core/config.py | 5 + apps/api/src/hermes/nl_gateway.py | 163 ++++++++++++++++++- apps/api/src/services/consensus_engine.py | 22 +++ docs/LOGBOOK.md | 24 +++ docs/adr/ADR-093-telegram-group-migration.md | 3 +- docs/adr/ADR-094-hermes-nl-interface.md | 3 +- docs/adr/ADR-095-12agent-sdk-integration.md | 3 +- k8s/awoooi-dev/02-configmap.yaml | 5 + k8s/awoooi-prod/04-configmap.yaml | 5 + 9 files changed, 221 insertions(+), 12 deletions(-) diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index 96a441fe..d4f229a7 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -432,6 +432,11 @@ class Settings(BaseSettings): default=False, description="ADR-093: True 時啟用 notification_matrix 路由矩陣,取代 telegram_gateway 硬碼", ) + # ADR-095 2026-04-25 ogt + Claude Sonnet 4.6: 12-Agent ConsensusEngine + ENABLE_12AGENT_CONSENSUS: bool = Field( + default=False, + description="ADR-095: 啟用 12-Agent ConsensusEngine weights(預設關閉)", + ) def get_tg_user_whitelist(self) -> list[int]: """Parse comma-separated or JSON array user IDs to list[int]""" diff --git a/apps/api/src/hermes/nl_gateway.py b/apps/api/src/hermes/nl_gateway.py index 6cec65ab..c962d05c 100644 --- a/apps/api/src/hermes/nl_gateway.py +++ b/apps/api/src/hermes/nl_gateway.py @@ -3,15 +3,21 @@ Layer 1 意圖路由(關鍵字正則)→ Claude Agent SDK 呼叫 → Telegram 格式化輸出。 2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL) +2026-04-24 Claude Sonnet 4.6 (WS4 Hermes NL T1+T2+T3): hermes_dispatch_log DB 寫入 / + Redis per-chat_id 速率限制 / Multi-turn session (Redis Hash TTL=300s) """ from __future__ import annotations +import asyncio import re import time import structlog from claude_agent_sdk import query, ClaudeAgentOptions from claude_agent_sdk.types import ResultMessage +from sqlalchemy import text +from src.core.redis_client import get_redis +from src.db.base import get_db_context from src.hermes.agent_loader import get_agent_system_prompt from src.hermes.display_names import DEFAULT_AGENT, format_response_header from src.hermes.safety_hooks import is_dangerous_input, is_mutate_intent @@ -38,15 +44,122 @@ _ROUTING_RULES: list[tuple[re.Pattern, str]] = [ _HERMES_BUDGET_USD = 0.05 +# ───────────────────────────────────────────────────────────────────────────── +# T2:速率限制常數(ADR-094) +# ───────────────────────────────────────────────────────────────────────────── +_RATE_LIMIT_MAX = 20 +_RATE_LIMIT_WINDOW_SEC = 60 -def _route_intent_layer1(text: str) -> str: + +def _route_intent_layer1(msg: str) -> str: """Layer 1: 關鍵字正則路由,回傳 agent 名稱""" for pattern, agent in _ROUTING_RULES: - if pattern.search(text): + if pattern.search(msg): return agent return DEFAULT_AGENT +# ───────────────────────────────────────────────────────────────────────────── +# T1:hermes_dispatch_log DB 寫入(ADR-094,非阻擋) +# ───────────────────────────────────────────────────────────────────────────── + +async def _write_dispatch_log( + *, + chat_id: str, + user_id: int, + username: str, + agent_name: str, + input_preview: str, + latency_ms: int, + success: bool, + error_type: str | None, +) -> None: + """寫入派發審計日誌;失敗只 warning,不影響主流程。""" + try: + async with get_db_context() as db: + await db.execute( + text(""" + INSERT INTO hermes_dispatch_log + (chat_id, user_id, username, agent_name, input_preview, + latency_ms, success, error_type) + VALUES + (:chat_id, :user_id, :username, :agent_name, :input_preview, + :latency_ms, :success, :error_type) + """), + { + "chat_id": chat_id, + "user_id": user_id, + "username": username, + "agent_name": agent_name, + "input_preview": input_preview, + "latency_ms": latency_ms, + "success": success, + "error_type": error_type, + }, + ) + await db.commit() + except Exception as exc: + logger.warning("hermes_dispatch_log_write_failed", error=str(exc)) + + +# ───────────────────────────────────────────────────────────────────────────── +# T2:per-chat_id 速率限制(ADR-094,fail-open) +# ───────────────────────────────────────────────────────────────────────────── + +async def _check_rate_limit(chat_id: str) -> bool: + """True = 允許;False = 超過限制(20 req/min per chat_id)。Redis 不可用時放行。""" + try: + redis = get_redis() + key = f"hermes:rl:{chat_id}" + count = await redis.incr(key) + if count == 1: + await redis.expire(key, _RATE_LIMIT_WINDOW_SEC) + return count <= _RATE_LIMIT_MAX + except Exception: + return True # Redis 不可用 → fail open + + +# ───────────────────────────────────────────────────────────────────────────── +# T3:Multi-turn session(Redis Hash TTL=300s,ADR-094) +# ───────────────────────────────────────────────────────────────────────────── + +async def _load_session_context(chat_id: str, user_id: int) -> str: + """載入最近 3 輪對話歷史(最多 600 字),組成 context prefix。Redis 不可用時回空字串。""" + try: + redis = get_redis() + key = f"hermes:session:{chat_id}:{user_id}" + data = await redis.hgetall(key) + if not data: + return "" + turns = sorted( + [(k, v) for k, v in data.items() if (k if isinstance(k, str) else k.decode()).startswith("turn_")], + key=lambda x: x[0], + )[-3:] + parts = [v.decode() if isinstance(v, bytes) else v for _, v in turns] + return "【近期對話記錄】\n" + "\n".join(parts) + "\n\n" + except Exception: + return "" + + +async def _save_session_turn( + chat_id: str, user_id: int, user_msg: str, assistant_reply: str +) -> None: + """將本輪對話存入 Redis Hash,並重置 TTL=300s。Redis 不可用時靜默忽略。""" + try: + redis = get_redis() + key = f"hermes:session:{chat_id}:{user_id}" + turn_key = f"turn_{int(time.time())}" + value = f"用戶:{user_msg[:100]}\nHermes:{assistant_reply[:200]}" + await redis.hset(key, turn_key, value) + await redis.expire(key, 300) + except Exception: + pass + + +# ───────────────────────────────────────────────────────────────────────────── +# 主入口 +# ───────────────────────────────────────────────────────────────────────────── + async def process_nl_message( user_message: str, *, @@ -59,10 +172,14 @@ async def process_nl_message( 流程: 1. 安全守門(DENY + MUTATE) - 2. Layer 1 關鍵字路由 → agent_name - 3. 讀取 agent system prompt(.claude/agents/*.md) - 4. 呼叫 Claude Agent SDK query() - 5. 格式化為 Telegram MarkdownV2 訊息 + 2. T2 速率限制(20 req/min per chat_id) + 3. Layer 1 關鍵字路由 → agent_name + 4. 讀取 agent system prompt(.claude/agents/*.md) + 5. T3 載入 session context(最近 3 輪) + 6. 呼叫 Claude Agent SDK query() + 7. T3 儲存本輪對話 + 8. 格式化為 Telegram MarkdownV2 訊息 + 9. T1 非阻擋寫入 hermes_dispatch_log """ # 安全守門 if is_dangerous_input(user_message): @@ -80,6 +197,10 @@ async def process_nl_message( "請在 Telegram 告警卡片上操作,或聯繫值班 SRE。" ) + # T2:速率限制 + if not await _check_rate_limit(chat_id): + return "⚠️ 請求太頻繁,請稍後再試(每分鐘上限 20 次)。" + # Layer 1 意圖路由 agent_name = _route_intent_layer1(user_message) @@ -94,10 +215,15 @@ async def process_nl_message( agent_name = DEFAULT_AGENT system_prompt = get_agent_system_prompt(agent_name) or "" + # T3:載入 session context(最近 3 輪) + session_ctx = await _load_session_context(chat_id, user_id) + prompt_with_ctx = f"{session_ctx}{user_message}" if session_ctx else user_message + t0 = time.monotonic() # 呼叫 Claude Agent SDK success = False + error_type: str | None = None try: options = ClaudeAgentOptions( system_prompt=system_prompt, @@ -106,7 +232,7 @@ async def process_nl_message( max_budget_usd=_HERMES_BUDGET_USD, ) result_text = "" - async for event in query(prompt=user_message, options=options): + async for event in query(prompt=prompt_with_ctx, options=options): if isinstance(event, ResultMessage): result_text = getattr(event, "result", "") or "" break @@ -116,13 +242,14 @@ async def process_nl_message( success = True except Exception as exc: + error_type = type(exc).__name__ logger.error( "hermes_nl_sdk_error", error=str(exc), agent=agent_name, - exc_type=type(exc).__name__, + exc_type=error_type, ) - result_text = f"_Hermes 暫時無法連線({type(exc).__name__}),請稍後再試。_" + result_text = f"_Hermes 暫時無法連線({error_type}),請稍後再試。_" latency_ms = int((time.monotonic() - t0) * 1000) logger.info( @@ -135,6 +262,24 @@ async def process_nl_message( success=success, ) + # T3:儲存本輪對話(只在成功時存) + if success: + await _save_session_turn(chat_id, user_id, user_message, result_text) + + # T1:非阻擋寫入 hermes_dispatch_log(失敗不影響回覆) + asyncio.create_task( + _write_dispatch_log( + chat_id=chat_id, + user_id=user_id, + username=username, + agent_name=agent_name, + input_preview=user_message[:200], + latency_ms=latency_ms, + success=success, + error_type=error_type, + ) + ) + header = format_response_header(agent_name) # Telegram 訊息上限 4096 字元,超過截斷 body = result_text[:3800] diff --git a/apps/api/src/services/consensus_engine.py b/apps/api/src/services/consensus_engine.py index 4253078e..dbaed22f 100644 --- a/apps/api/src/services/consensus_engine.py +++ b/apps/api/src/services/consensus_engine.py @@ -357,6 +357,28 @@ class ConsensusEngine: - 分歧意見會降低共識分數 """ + # ADR-095 2026-04-25 ogt + Claude Sonnet 4.6: 12-Agent Claude Code weights + # 僅在 ENABLE_12AGENT_CONSENSUS=True 時參與投票(預設 False) + # security=0.4 永遠最高(ADR-009 鐵律) + CONSENSUS_WEIGHTS: dict[str, float] = { + # ADR-009 原始三核心 + "SecurityAgent": 0.4, # 資安永遠最高,不可降 + "BlastRadiusAgent": 0.15, # 原 0.3 → 0.15 + "ActionPlannerAgent": 0.15, # 原 0.3 → 0.15 + # ADR-095 新增 9 個 Claude Code agent(按需投票) + "critic": 0.06, + "debugger": 0.06, + "db-expert": 0.04, + "vuln-verifier": 0.04, + "planner": 0.02, + "fullstack-engineer": 0.02, + "refactor-specialist":0.02, + "migration-engineer": 0.02, + "tool-expert": 0.02, + # onboarder / frontend-designer / web-researcher 不參與投票(諮詢型) + # sum = 1.0 + } + def __init__(self): self._agents: list[ExpertAgent] = [ SREAgent(), diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index c9385750..f116c16b 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -6,6 +6,30 @@ --- +## 2026-04-25 | Hermes × 12-Agent Telegram 整合(WS0–WS6) + +### 完成項目 +- **WS0** ADR-093/094/095 治理文件,claude-agent-sdk 升至 0.1.66 +- **WS2** NotificationMatrix + BigInteger overflow 修復 + Redis key 一致性 + TG_GROUP_CUTOVER feature flag +- **WS2-Migration** approval_records(BIGINT,prod 已建立)+ enum types +- **WS3** Callback user-ID binding(CSRF 防護)+ Telegram Webhook 入口(ADR-094) +- **WS4** hermes/ 套件:display_names / agent_loader / safety_hooks / nl_gateway(12-Agent SDK 接入) +- **WS5** chat_member Approvers 白名單 Redis 同步 +- **WS6** latency logging + hermes_dispatch_log audit 表(prod 已建立) +- **補強** DB 寫入 + 速率限制 + Multi-turn session + +### Feature Flags(預設關閉) +- `HERMES_NL_ENABLED=false` → 啟用後支援 @mention NL 對話 +- `TG_GROUP_CUTOVER=false` → 啟用後 TYPE-3/4/4D/8M 告警改發 SRE 群組 + +### 剩餘待辦 +- WS1 Token Rotation(統帥決定時機) +- K8s ConfigMap 補 feature flags(統帥決定啟用時機) +- Phase 3 Prometheus 規則(ADR-075,不阻擋上線) +- awoooi_migrator 角色需 superuser 建立 + +--- + ## 📍 2026-04-25 — Host 告警錯誤診斷與 resolved_at 缺漏修復 ### 本次修復 diff --git a/docs/adr/ADR-093-telegram-group-migration.md b/docs/adr/ADR-093-telegram-group-migration.md index 842657f6..92432068 100644 --- a/docs/adr/ADR-093-telegram-group-migration.md +++ b/docs/adr/ADR-093-telegram-group-migration.md @@ -1,6 +1,6 @@ # ADR-093: Telegram 告警全面遷移至 SRE 戰情室群組 -> **狀態**: Proposed +> **狀態**: 🟡 批准實作中 > **日期**: 2026-04-24 > **決策者**: 統帥 + 12-Agent 全景分析團隊(onboarder / debugger / db-expert / tool-expert / web-researcher / planner / critic / frontend-designer / fullstack-engineer / refactor-specialist / migration-engineer / vuln-verifier) @@ -74,3 +74,4 @@ Feature flag `TG_GROUP_CUTOVER ∈ {off, 10%, 50%, 100%}`,以 `alert.labels.en | 版本 | 日期 | 執行者 | 變更內容 | |------|------|--------|---------| | v1.0 | 2026-04-24 | 12-Agent 全景分析 | 初版 Proposed | +| v1.1 | 2026-04-25 | Claude Sonnet 4.6 | WS2-WS5 實作完成:NotificationMatrix + BIGINT + approval_records prod 建立 + Approvers 白名單 | diff --git a/docs/adr/ADR-094-hermes-nl-interface.md b/docs/adr/ADR-094-hermes-nl-interface.md index 8b7b6b8d..697f10b1 100644 --- a/docs/adr/ADR-094-hermes-nl-interface.md +++ b/docs/adr/ADR-094-hermes-nl-interface.md @@ -1,6 +1,6 @@ # ADR-094: Hermes 自然語言介面(@mention 對話) -> **狀態**: Proposed +> **狀態**: 🟡 批准實作中 > **日期**: 2026-04-24 > **決策者**: 統帥 + 12-Agent 全景分析團隊 @@ -87,3 +87,4 @@ SDK 延遲 > 10s P95 → 自動降級 `debugger` agent 預設回覆。 | 版本 | 日期 | 執行者 | 變更內容 | |------|------|--------|---------| | v1.0 | 2026-04-24 | 12-Agent 全景分析 | 初版 Proposed | +| v1.1 | 2026-04-25 | Claude Sonnet 4.6 | WS4 實作完成:hermes/ 套件 + NL gateway + SDK 接入 + dispatch_log + rate limit + session | diff --git a/docs/adr/ADR-095-12agent-sdk-integration.md b/docs/adr/ADR-095-12agent-sdk-integration.md index 48b0e0a2..b6299f99 100644 --- a/docs/adr/ADR-095-12agent-sdk-integration.md +++ b/docs/adr/ADR-095-12agent-sdk-integration.md @@ -1,6 +1,6 @@ # ADR-095: 12-Agent Claude SDK 整合 × Telegram 視覺分派 -> **狀態**: Proposed +> **狀態**: 🟡 批准實作中 > **日期**: 2026-04-24 > **決策者**: 統帥 + 12-Agent 全景分析團隊 @@ -159,3 +159,4 @@ CONSENSUS_WEIGHTS = { |------|------|--------|---------| | v1.0 | 2026-04-24 | 12-Agent 全景分析 | 初版 Proposed | | v1.1 | 2026-04-24 | Codex | 補入 12-agent 日常工作模式(Game Rules v1)與 9 skills 對照 | +| v1.2 | 2026-04-25 | Claude Sonnet 4.6 | WS4-WS5 實作完成:display_names.py + agent_loader + safety_hooks + ConsensusEngine weights 宣告 | diff --git a/k8s/awoooi-dev/02-configmap.yaml b/k8s/awoooi-dev/02-configmap.yaml index 806aa78c..ab24347f 100644 --- a/k8s/awoooi-dev/02-configmap.yaml +++ b/k8s/awoooi-dev/02-configmap.yaml @@ -46,3 +46,8 @@ data: # Dev: Shadow Mode 關閉,測試自動修復 SHADOW_MODE_ENABLED: "false" SHADOW_MODE_LOG_ONLY: "false" + + # 2026-04-25: Hermes × 12-Agent(ADR-093/094/095)— dev 可先開啟測試 + TG_GROUP_CUTOVER: "false" + HERMES_NL_ENABLED: "false" + ENABLE_12AGENT_CONSENSUS: "false" diff --git a/k8s/awoooi-prod/04-configmap.yaml b/k8s/awoooi-prod/04-configmap.yaml index b500a2be..0514cddb 100644 --- a/k8s/awoooi-prod/04-configmap.yaml +++ b/k8s/awoooi-prod/04-configmap.yaml @@ -152,3 +152,8 @@ data: AIOPS_P6_OFFLINE_REPLAY: "true" AIOPS_P6_KB_ROT_CLEANER: "true" AIOPS_P6_TRUST_DRIFT_DETECTOR: "true" + # 2026-04-25 ogt + Claude Sonnet 4.6: Hermes × 12-Agent 整合(ADR-093/094/095) + # 啟用前請先確認 WS1 Token Rotation 完成 + E2E 測試通過 + TG_GROUP_CUTOVER: "false" + HERMES_NL_ENABLED: "false" + ENABLE_12AGENT_CONSENSUS: "false"