diff --git a/apps/api/src/api/v1/agents.py b/apps/api/src/api/v1/agents.py index a0a5d208..a7363e7a 100644 --- a/apps/api/src/api/v1/agents.py +++ b/apps/api/src/api/v1/agents.py @@ -49,6 +49,9 @@ from src.services.ai_agent_automation_backlog_snapshot import ( from src.services.ai_agent_automation_inventory_snapshot import ( load_latest_ai_agent_automation_inventory_snapshot, ) +from src.services.ai_agent_communication_learning_contract import ( + load_latest_ai_agent_communication_learning_contract, +) from src.services.ai_agent_deployment_layout import ( load_latest_ai_agent_deployment_layout, ) @@ -524,6 +527,33 @@ async def get_agent_deployment_layout() -> dict[str, Any]: ) from exc +@router.get( + "/agent-communication-learning-contract", + response_model=dict[str, Any], + summary="取得 AI Agent 主動溝通與學習契約", + description=( + "讀取最新已提交的 OpenClaw / Hermes / NemoTron 主動溝通、學習、記錄、MCP 與 RAG 契約;" + "此端點不啟動 worker、不建立 DB migration、不送 Telegram、不安裝 SDK、不呼叫付費服務、" + "不修改生產路由或主機。" + ), +) +async def get_agent_communication_learning_contract() -> dict[str, Any]: + """Return the latest read-only AI Agent communication learning contract.""" + try: + return await asyncio.to_thread(load_latest_ai_agent_communication_learning_contract) + except FileNotFoundError as exc: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(exc), + ) from exc + except (json.JSONDecodeError, ValueError) as exc: + logger.error("ai_agent_communication_learning_contract_invalid", error=str(exc)) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="AI Agent 主動溝通與學習契約無效", + ) from exc + + @router.get( "/runtime-surface-inventory", response_model=dict[str, Any], diff --git a/apps/api/src/services/ai_agent_communication_learning_contract.py b/apps/api/src/services/ai_agent_communication_learning_contract.py new file mode 100644 index 00000000..4f1017db --- /dev/null +++ b/apps/api/src/services/ai_agent_communication_learning_contract.py @@ -0,0 +1,146 @@ +""" +AI Agent communication and learning contract snapshot. + +Loads the latest committed, read-only contract for OpenClaw, Hermes, and +NemoTron proactive communication, learning, recording, MCP, RAG, and +intelligence service boundaries. This module never starts workers, writes +database migrations, sends Telegram messages, installs SDKs, calls paid +providers, or changes production routes. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from src.services.snapshot_paths import default_evaluations_dir + +_DEFAULT_EVALUATIONS_DIR = default_evaluations_dir(Path(__file__)) +_SNAPSHOT_PATTERN = "ai_agent_communication_learning_contract_*.json" +_SCHEMA_VERSION = "ai_agent_communication_learning_contract_v1" + + +def load_latest_ai_agent_communication_learning_contract( + evaluations_dir: Path | None = None, +) -> dict[str, Any]: + """Load the newest committed AI Agent communication learning contract.""" + directory = evaluations_dir or _DEFAULT_EVALUATIONS_DIR + candidates = sorted(directory.glob(_SNAPSHOT_PATTERN)) + if not candidates: + raise FileNotFoundError( + f"no AI Agent communication learning contract snapshots found in {directory}" + ) + + latest = candidates[-1] + with latest.open(encoding="utf-8") as handle: + payload = json.load(handle) + + if not isinstance(payload, dict): + raise ValueError(f"{latest}: expected JSON object") + _require_schema(payload, _SCHEMA_VERSION, str(latest)) + _require_read_only_contract(payload, str(latest)) + _require_rollup_consistency(payload, str(latest)) + _require_agent_boundaries(payload, str(latest)) + _require_frontend_redaction(payload, str(latest)) + return payload + + +def _require_schema(payload: dict[str, Any], expected: str, label: str) -> None: + actual = payload.get("schema_version") + if actual != expected: + raise ValueError(f"{label}: expected schema_version={expected}, got {actual!r}") + + +def _require_read_only_contract(payload: dict[str, Any], label: str) -> None: + program_status = payload.get("program_status") or {} + if program_status.get("read_only_mode") is not True: + raise ValueError(f"{label}: program_status.read_only_mode must be true") + if program_status.get("runtime_authority") != "contract_only_no_runtime_worker": + raise ValueError(f"{label}: runtime_authority must stay contract_only_no_runtime_worker") + + boundaries = payload.get("approval_boundaries") or {} + blocked_flags = { + "runtime_worker_allowed", + "db_migration_allowed", + "telegram_direct_send_allowed", + "paid_external_service_allowed", + "secret_plaintext_allowed", + "autonomous_host_mutation_allowed", + "production_route_change_allowed", + "sdk_installation_allowed", + } + allowed = sorted(flag for flag in blocked_flags if boundaries.get(flag) is not False) + if allowed: + raise ValueError(f"{label}: approval boundaries must remain false: {allowed}") + + +def _require_rollup_consistency(payload: dict[str, Any], label: str) -> None: + rollups = payload.get("rollups") or {} + + expected_counts = { + "agent_lane_count": len(payload.get("agent_lanes") or []), + "mcp_stack_count": len(payload.get("mcp_stack") or []), + "rag_layer_count": len(payload.get("rag_memory_stack") or []), + "learning_loop_count": len(payload.get("learning_loops") or []), + "intelligence_service_count": len(payload.get("intelligence_services") or []), + "rollout_task_count": len(payload.get("rollout_tasks") or []), + } + mismatched = { + key: {"expected": expected, "actual": rollups.get(key)} + for key, expected in expected_counts.items() + if rollups.get(key) != expected + } + if mismatched: + raise ValueError(f"{label}: rollup counts must match payload sections: {mismatched}") + + rollout_tasks = payload.get("rollout_tasks") or [] + blocked_task_ids = sorted( + task.get("task_id") + for task in rollout_tasks + if task.get("status") in {"planned", "blocked"} + and ( + "approval" in str(task.get("next_gate", "")).lower() + or "gate" in str(task.get("next_gate", "")).lower() + ) + ) + if sorted(rollups.get("blocked_task_ids") or []) != blocked_task_ids: + raise ValueError(f"{label}: rollups.blocked_task_ids must match gated rollout tasks") + + optional_service_ids = sorted( + service.get("id") + for service in payload.get("intelligence_services") or [] + if service.get("status") in {"optional_candidate", "deferred_candidate"} + ) + if sorted(rollups.get("optional_service_ids") or []) != optional_service_ids: + raise ValueError(f"{label}: rollups.optional_service_ids must match optional services") + + +def _require_agent_boundaries(payload: dict[str, Any], label: str) -> None: + lanes = payload.get("agent_lanes") or [] + lane_ids = {lane.get("agent_id") for lane in lanes} + required_lanes = {"openclaw", "hermes", "nemotron"} + if not required_lanes.issubset(lane_ids): + raise ValueError(f"{label}: missing required agent lanes: {sorted(required_lanes - lane_ids)}") + + unsafe_lanes = [ + lane.get("agent_id") + for lane in lanes + if not lane.get("blocked_actions") + or "secret_plaintext_read" not in set(lane.get("blocked_actions") or []) + ] + if unsafe_lanes: + raise ValueError(f"{label}: agent lanes must block secret plaintext read: {unsafe_lanes}") + + nemotron = next((lane for lane in lanes if lane.get("agent_id") == "nemotron"), {}) + nemotron_blocked = set(nemotron.get("blocked_actions") or []) + if "production_route_change" not in nemotron_blocked: + raise ValueError(f"{label}: Nemotron must remain blocked from production route changes") + + +def _require_frontend_redaction(payload: dict[str, Any], label: str) -> None: + redaction = ((payload.get("communication_plane") or {}).get("frontend_redaction") or {}) + if redaction.get("operator_conversation_display_allowed") is not False: + raise ValueError(f"{label}: operator conversation display must stay false") + if redaction.get("agent_private_reasoning_display_allowed") is not False: + raise ValueError(f"{label}: agent private reasoning display must stay false") diff --git a/apps/api/tests/test_ai_agent_communication_learning_contract.py b/apps/api/tests/test_ai_agent_communication_learning_contract.py new file mode 100644 index 00000000..3435a909 --- /dev/null +++ b/apps/api/tests/test_ai_agent_communication_learning_contract.py @@ -0,0 +1,190 @@ +from __future__ import annotations + +import json + +import pytest + +from src.services.ai_agent_communication_learning_contract import ( + load_latest_ai_agent_communication_learning_contract, +) + + +def test_load_latest_ai_agent_communication_learning_contract_reads_committed_snapshot(): + data = load_latest_ai_agent_communication_learning_contract() + + assert data["schema_version"] == "ai_agent_communication_learning_contract_v1" + assert data["program_status"]["overall_completion_percent"] == 35 + assert data["program_status"]["read_only_mode"] is True + assert data["program_status"]["runtime_authority"] == "contract_only_no_runtime_worker" + assert data["communication_plane"]["message_bus"] == "Redis Streams" + assert data["communication_plane"]["stream_key_pattern"] == "aiops:agent:{session_id}" + assert data["communication_plane"]["frontend_redaction"]["operator_conversation_display_allowed"] is False + assert data["approval_boundaries"]["runtime_worker_allowed"] is False + assert data["approval_boundaries"]["db_migration_allowed"] is False + assert data["approval_boundaries"]["telegram_direct_send_allowed"] is False + assert data["approval_boundaries"]["production_route_change_allowed"] is False + assert data["rollups"]["agent_lane_count"] == 3 + assert data["rollups"]["mcp_stack_count"] == len(data["mcp_stack"]) == 9 + assert data["rollups"]["rag_layer_count"] == len(data["rag_memory_stack"]) == 3 + assert data["rollups"]["learning_loop_count"] == len(data["learning_loops"]) == 5 + assert data["rollups"]["intelligence_service_count"] == len(data["intelligence_services"]) == 7 + assert data["rollups"]["optional_service_ids"] == ["langfuse", "phoenix", "qdrant", "milvus"] + assert {lane["agent_id"] for lane in data["agent_lanes"]} == { + "openclaw", + "hermes", + "nemotron", + } + + +def test_load_latest_ai_agent_communication_learning_contract_rejects_runtime_worker(tmp_path): + snapshot = _snapshot() + snapshot["approval_boundaries"]["runtime_worker_allowed"] = True + (tmp_path / "ai_agent_communication_learning_contract_2026-06-11.json").write_text( + json.dumps(snapshot), + encoding="utf-8", + ) + + with pytest.raises(ValueError, match="approval boundaries"): + load_latest_ai_agent_communication_learning_contract(tmp_path) + + +def test_load_latest_ai_agent_communication_learning_contract_rejects_rollup_mismatch(tmp_path): + snapshot = _snapshot() + snapshot["rollups"]["mcp_stack_count"] = 99 + (tmp_path / "ai_agent_communication_learning_contract_2026-06-11.json").write_text( + json.dumps(snapshot), + encoding="utf-8", + ) + + with pytest.raises(ValueError, match="rollup counts"): + load_latest_ai_agent_communication_learning_contract(tmp_path) + + +def test_load_latest_ai_agent_communication_learning_contract_rejects_nemotron_route_change(tmp_path): + snapshot = _snapshot() + snapshot["agent_lanes"][2]["blocked_actions"] = ["secret_plaintext_read"] + (tmp_path / "ai_agent_communication_learning_contract_2026-06-11.json").write_text( + json.dumps(snapshot), + encoding="utf-8", + ) + + with pytest.raises(ValueError, match="Nemotron"): + load_latest_ai_agent_communication_learning_contract(tmp_path) + + +def _snapshot() -> dict: + return { + "schema_version": "ai_agent_communication_learning_contract_v1", + "generated_at": "2026-06-11T20:40:00+08:00", + "program_status": { + "overall_completion_percent": 35, + "current_priority": "P2", + "current_task_id": "P2-401A", + "next_task_id": "P2-401B", + "read_only_mode": True, + "runtime_authority": "contract_only_no_runtime_worker", + }, + "communication_plane": { + "message_bus": "Redis Streams", + "stream_key_pattern": "aiops:agent:{session_id}", + "session_table": "agent_sessions", + "event_tables": ["timeline_events"], + "turn_types": ["observe", "propose", "challenge", "review", "decide"], + "frontend_redaction": { + "operator_conversation_display_allowed": False, + "agent_private_reasoning_display_allowed": False, + }, + }, + "agent_lanes": [ + { + "agent_id": "openclaw", + "display_name": "OpenClaw", + "primary_role": "生產仲裁", + "initiates": ["incident arbitration"], + "responds_to": ["Hermes evidence dossier"], + "writes_to": ["agent_sessions"], + "blocked_actions": ["secret_plaintext_read", "self_approval"], + }, + { + "agent_id": "hermes", + "display_name": "Hermes", + "primary_role": "治理", + "initiates": ["knowledge review"], + "responds_to": ["OpenClaw evidence request"], + "writes_to": ["knowledge_entries"], + "blocked_actions": ["secret_plaintext_read", "production_write"], + }, + { + "agent_id": "nemotron", + "display_name": "NemoTron", + "primary_role": "離線評估", + "initiates": ["replay scoring"], + "responds_to": ["OpenClaw offline evaluation request"], + "writes_to": ["agent_replay_results"], + "blocked_actions": ["secret_plaintext_read", "production_route_change"], + }, + ], + "mcp_stack": [_capability("mcp_gateway", "MCP Gateway", "openclaw", "existing")], + "rag_memory_stack": [ + _capability("hot_session_memory", "Hot", "openclaw", "contract_defined"), + _capability("warm_rag_memory", "Warm", "hermes", "existing"), + _capability("cold_replay_archive", "Cold", "nemotron", "planned"), + ], + "learning_loops": [_capability("incident_outcome_learning", "Incident", "openclaw", "planned")], + "intelligence_services": [ + _capability("postgres_pgvector", "PostgreSQL + pgvector", "hermes", "preferred_default"), + _capability("langfuse", "Langfuse", "hermes", "optional_candidate"), + ], + "rollout_tasks": [ + { + "task_id": "P2-401A", + "priority": "P2", + "status": "ready_for_review", + "completion_percent": 100, + "owner_agent": "Hermes + OpenClaw", + "summary": "契約。", + "next_gate": "本地測試", + }, + { + "task_id": "P2-401B", + "priority": "P2", + "status": "planned", + "completion_percent": 0, + "owner_agent": "OpenClaw", + "summary": "Migration。", + "next_gate": "DB migration approval gate", + }, + ], + "approval_boundaries": { + "runtime_worker_allowed": False, + "db_migration_allowed": False, + "telegram_direct_send_allowed": False, + "paid_external_service_allowed": False, + "secret_plaintext_allowed": False, + "autonomous_host_mutation_allowed": False, + "production_route_change_allowed": False, + "sdk_installation_allowed": False, + }, + "rollups": { + "agent_lane_count": 3, + "mcp_stack_count": 1, + "rag_layer_count": 3, + "learning_loop_count": 1, + "intelligence_service_count": 2, + "rollout_task_count": 2, + "blocked_task_ids": ["P2-401B"], + "optional_service_ids": ["langfuse"], + }, + } + + +def _capability(id_: str, display_name: str, owner: str, status: str) -> dict: + return { + "id": id_, + "display_name": display_name, + "primary_owner": owner, + "purpose": "測試用途。", + "storage_or_service": "test", + "status": status, + "approval_gate": "read_only_allowed", + } diff --git a/apps/api/tests/test_ai_agent_communication_learning_contract_api.py b/apps/api/tests/test_ai_agent_communication_learning_contract_api.py new file mode 100644 index 00000000..565383f4 --- /dev/null +++ b/apps/api/tests/test_ai_agent_communication_learning_contract_api.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from src.api.v1.agents import router + + +def test_ai_agent_communication_learning_contract_endpoint_returns_committed_snapshot(): + app = FastAPI() + app.include_router(router, prefix="/api/v1") + client = TestClient(app) + + response = client.get("/api/v1/agents/agent-communication-learning-contract") + + assert response.status_code == 200 + data = response.json() + assert data["schema_version"] == "ai_agent_communication_learning_contract_v1" + assert data["program_status"]["overall_completion_percent"] == 35 + assert data["program_status"]["current_task_id"] == "P2-401A" + assert data["program_status"]["next_task_id"] == "P2-401B" + assert data["program_status"]["read_only_mode"] is True + assert data["communication_plane"]["message_bus"] == "Redis Streams" + assert data["communication_plane"]["frontend_redaction"]["operator_conversation_display_allowed"] is False + assert data["approval_boundaries"]["runtime_worker_allowed"] is False + assert data["approval_boundaries"]["db_migration_allowed"] is False + assert data["approval_boundaries"]["telegram_direct_send_allowed"] is False + assert data["rollups"]["agent_lane_count"] == 3 + assert data["rollups"]["mcp_stack_count"] == 9 + assert data["rollups"]["rag_layer_count"] == 3 + assert data["rollups"]["learning_loop_count"] == 5 + assert data["rollups"]["intelligence_service_count"] == 7 + assert any(lane["agent_id"] == "openclaw" for lane in data["agent_lanes"]) + assert any(lane["agent_id"] == "hermes" for lane in data["agent_lanes"]) + assert any(lane["agent_id"] == "nemotron" for lane in data["agent_lanes"]) diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index aaa24aeb..0be6fcad 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -29,6 +29,24 @@ - Nginx `nginx -t`、reload、restart、DNS 修改、TLS renew、ArgoCD sync、kubectl、SSH 主機修改、workflow 修改、runner 啟用、secret rotation、active scan、agent-bounty runtime、payout / withdrawal、deploy 或 runtime execution:全部未執行。 - IwoooS 整體仍維持 `64%`;active runtime gate 仍為 `0`;owner response received / accepted 仍為 `0 / false`。 +## 2026-06-11|OpenClaw / Hermes / NemoTron 主動溝通與學習契約第一波 + +**背景**:統帥要求讓所有 AI Agent 能互相主動溝通、主動學習並記錄,並釐清需要哪些 MCP、RAG、工具與服務堆疊來累積智慧成長。本波依 MASTER 規則先建立只讀契約,不啟動 runtime worker、不做 DB migration、不發 Telegram、不安裝 SDK、不呼叫付費服務。 + +**完成內容:** +- 新增 `docs/schemas/ai_agent_communication_learning_contract_v1.schema.json`,定義 OpenClaw / Hermes / NemoTron 主動溝通、學習、記錄、MCP、RAG、intelligence service、rollout task 與 approval boundary。 +- 新增 `docs/evaluations/ai_agent_communication_learning_contract_2026-06-11.json`,完成度 `35%`;Hot Session Memory、Warm RAG Memory、Cold Replay Archive、9 類 MCP、5 條 learning loop 與 7 個 intelligence services 已列入契約。 +- 新增 `apps/api/src/services/ai_agent_communication_learning_contract.py`,強制驗證 runtime worker、DB migration、Telegram direct send、paid service、secret plaintext、host mutation、production route、SDK installation 皆維持 false。 +- 新增 `GET /api/v1/agents/agent-communication-learning-contract` 只讀端點;端點只回傳 committed snapshot,不碰 DB/Redis、不呼叫外部服務。 +- 更新 MASTER §3.2.1b / §3.4.3 / §5 / §8,把 Agent 主動溝通資料面、Hot/Warm/Cold 記憶層、MCP Gateway、PostgreSQL + pgvector、OpenTelemetry、Langfuse / Phoenix、Qdrant / Milvus 採用順序納入權威藍圖。 +- 更新 `docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md`,新增 P2-401A 完成、P2-401B~E 後續優先順序與 35% 完成度。 +- 新增 API / service tests,覆蓋只讀邊界、rollup consistency、NemoTron 禁止 production route、前端 redaction。 + +**完成度與邊界:** +- 主動溝通與學習契約:`100%`(P2-401A)。 +- 三 Agent 主動溝通與學習架構整體:`35%`。 +- runtime worker / AgentSession migration / Redis consumer group / Telegram E2E 實發 / NemoTron paid smoke / SDK 安裝 / production route / host mutation:全部仍 `0%`,需後續 gate。 + ## 2026-06-11|IwoooS Nginx 只讀漂移偵測器 repo-only 第一波 **背景**:接續高價值配置控管清冊,P0 下一步是先把 Nginx 這個最容易被手動改動的公開入口配置做成可重跑的只讀漂移偵測框架。使用者已要求 Nginx 必須有資安機制控管;本階段仍不 SSH、不讀 live、不 reload、不修改主機。 diff --git a/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md b/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md index 7691e240..998cb8ab 100644 --- a/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md +++ b/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md @@ -12,12 +12,15 @@ | Nemotron 實際整合應用 | 30% | 完整回放前仍被關卡擋下 | `blocked_needs_evidence`,下一關是 `refresh_source_evidence_then_5_record_smoke_only` | | 工具 / 服務 / 套件 AI 自動化 | 92% | P0 已完成;P1 服務 / runtime / 監控 / provider / service health / 備份 / DR / 套件與供應鏈只讀基線已完成;P1-007 失敗限定通知合約與前端 redaction 合約已完成;下一主線是 P2-004 依賴 / 供應鏈漂移監控 | 狀態分類、盤點 schema、權限矩陣、靜態盤點種子、只讀 API、UI 骨架、驗證、自動化待辦 schema / 快照 / API / 分組 UI、Backup / DR 目標盤點、準備度矩陣、備份通知政策、Backup / DR 證據 UI、復原演練批准包模板、異地 / escrow 準備度狀態、任務批准邊界、確定性進度彙總、Python 套件 / 供應鏈只讀基線、JS pnpm/npm 只讀基線、Docker build surface 只讀基線、CVE / license / drift 嚴重度政策、定期依賴漂移與外部資料來源檢查設計、依賴升級批准包模板、runtime_surface_inventory_v1 schema / snapshot / API / UI、gitea_workflow_runner_health_v1 schema / snapshot / API / UI、observability_contract_matrix_v1 schema / snapshot / API / UI、ai_provider_route_matrix_v1 schema / snapshot / API / UI、service_health_gap_matrix_v1 schema / snapshot / API / UI、service health evidence cards UI、service_health_failure_notification_policy_v1 schema / snapshot / API / UI 已完成 | | OpenClaw / Hermes / NemoTron 佈建布局 | 45% | P1-401 / P1-402 已完成;仍是只讀 layout 與治理頁顯示,不是 runtime deploy | `ai_agent_deployment_layout_v1` schema、`ai_agent_deployment_layout_2026-06-11.json`、`GET /api/v1/agents/agent-deployment-layout`、治理頁自動化盤點 UI、`AI_AGENT_DEPLOYMENT_LAYOUT_2026-06-11.md` | +| OpenClaw / Hermes / NemoTron 主動溝通與學習契約 | 35% | P2-401A 已完成只讀 contract;runtime worker、DB migration、Telegram 實發、SDK / 付費服務仍未開 gate | `ai_agent_communication_learning_contract_v1` schema、`ai_agent_communication_learning_contract_2026-06-11.json`、`GET /api/v1/agents/agent-communication-learning-contract`、MASTER §3.2.1b / §3.4.3 | | 本工作清單與分析報告 | 100% | 已完成 | 本 MD 文件 | AI Agent 自動化工作包目前完成度:**92%**。本工作清單文件本身完成度:**100%**。 三 Agent 佈建布局目前完成度:**45%**。第一波已完成只讀 schema / snapshot / API / 測試 / 報告,第二波已接入治理頁自動化盤點 UI;正式 runtime 佈署、Telegram E2E 發送與 AgentSession 工作流仍需逐項 gate。 +三 Agent 主動溝通與學習契約目前完成度:**35%**。已完成只讀 schema / snapshot / API / 測試與 MASTER 同步;下一步依優先順序推 `P2-401B` AgentSession / Redis Streams migration 與 worker gate,但在批准前仍不得啟動 runtime loop。 + 完成度計算模型: ```text @@ -68,6 +71,7 @@ AI Agent 自動化工作包目前完成度:**92%**。本工作清單文件本 | 前端治理 UI | 已完成:P1-402,治理頁自動化盤點 tab 已顯示佈建布局、主責 Agent、Telegram policy、批准/阻擋狀態 | | Telegram 三 Agent lane E2E | 待辦:P1-403,必須沿用 Gateway / ADR-035,不允許 Agent 直接發送 | | AgentSession / Redis Streams runtime loop | 待辦:P2-401,需 migration / worker gate | +| 主動溝通與學習契約 | 已完成:P2-401A,`ai_agent_communication_learning_contract_v1` + `GET /api/v1/agents/agent-communication-learning-contract` | | NemoTron 3 Ultra smoke | 待辦:P3-401,需 source refresh + cost/data approval | ## 4. 工作流總覽 @@ -938,6 +942,11 @@ UI: | ID | 狀態 | % | 負責 Agent | 任務 | 產出 | 關卡 | |---|---|---:|---|---|---|---| +| P2-401A | 完成 | 100 | Hermes + OpenClaw | 定義 OpenClaw / Hermes / NemoTron 主動溝通、學習、記錄、MCP、RAG 與 redaction 契約 | `ai_agent_communication_learning_contract_v1` / snapshot / 只讀 API / MASTER 同步 | 只讀;不啟動 worker、不 DB migration、不發 Telegram、不安裝 SDK | +| P2-401B | 待辦 | 0 | OpenClaw | AgentSession / Redis Streams migration 與 consumer group runtime gate | migration proposal + rollback plan | DB migration approval | +| P2-401C | 待辦 | 0 | Hermes | MCP Gateway audit matrix:K8s / Prometheus / SigNoz / Sentry / Gitea / Backup / Package / Telegram | MCP 權限與 audit matrix | no-secret read-only smoke | +| P2-401D | 待辦 | 0 | Hermes | RAG Hot / Warm / Cold memory ingestion、dedupe、freshness、redaction policy | RAG 記憶治理提案 | schema migration + owner review | +| P2-401E | 待辦 | 0 | Nemotron | sanitized replay scorer 與 5-record smoke 設計 | NemoTron replay smoke 批准包 | cost / data approval | | P2-101 | 待辦 | 0 | OpenClaw | 定義操作類別權限模型 | 操作政策 schema | HITL 關卡 | | P2-102 | 待辦 | 0 | OpenClaw | 所有候選操作都要有 dry-run 證據 | dry-run 合約 | 不直接 apply | | P2-103 | 待辦 | 0 | Hermes | 把任務結果接回 KM / LOGBOOK / 稽核軌跡 | 證據寫入器 | 不洩漏 secret | diff --git a/docs/evaluations/ai_agent_communication_learning_contract_2026-06-11.json b/docs/evaluations/ai_agent_communication_learning_contract_2026-06-11.json new file mode 100644 index 00000000..6cb1164a --- /dev/null +++ b/docs/evaluations/ai_agent_communication_learning_contract_2026-06-11.json @@ -0,0 +1,534 @@ +{ + "schema_version": "ai_agent_communication_learning_contract_v1", + "generated_at": "2026-06-11T20:40:00+08:00", + "program_status": { + "overall_completion_percent": 35, + "current_priority": "P2", + "current_task_id": "P2-401A", + "next_task_id": "P2-401B", + "read_only_mode": true, + "runtime_authority": "contract_only_no_runtime_worker", + "status_note": "本快照只定義 OpenClaw / Hermes / NemoTron 主動溝通、主動學習、記錄與工具堆疊契約;尚未建立 worker、migration、Telegram 實發或生產 route。" + }, + "external_source_evidence": [ + { + "id": "mcp_official_intro", + "name": "Model Context Protocol", + "url": "https://modelcontextprotocol.io/docs/getting-started/intro", + "decision_use": "採用 MCP 作為 Agent 對外部系統取證與工具呼叫的標準邊界。" + }, + { + "id": "redis_streams_docs", + "name": "Redis Streams", + "url": "https://redis.io/docs/latest/develop/data-types/streams/", + "decision_use": "採用 Redis Streams 作為 append-only 協作訊息匯流與 replay 來源。" + }, + { + "id": "pgvector_docs", + "name": "pgvector", + "url": "https://github.com/pgvector/pgvector", + "decision_use": "預設沿用 PostgreSQL + pgvector 做 RAG 記憶,不先新增專用 vector DB。" + }, + { + "id": "opentelemetry_docs", + "name": "OpenTelemetry", + "url": "https://opentelemetry.io/docs/", + "decision_use": "把 traces / metrics / logs 當作 Agent 決策可追溯性的共通 telemetry 格式。" + }, + { + "id": "langfuse_docs", + "name": "Langfuse", + "url": "https://langfuse.com/docs", + "decision_use": "列為後續 LLM trace / eval / prompt 管理候選,不在本波安裝。" + }, + { + "id": "phoenix_docs", + "name": "Arize Phoenix", + "url": "https://arize.com/docs/phoenix", + "decision_use": "列為後續 OpenTelemetry 相容 Agent trace / eval 候選,不在本波安裝。" + }, + { + "id": "qdrant_docs", + "name": "Qdrant", + "url": "https://qdrant.tech/documentation/", + "decision_use": "列為 pgvector 達到量級或隔離瓶頸後的專用 vector DB 候選。" + }, + { + "id": "milvus_docs", + "name": "Milvus", + "url": "https://milvus.io/docs/quickstart.md", + "decision_use": "列為大量高維向量與獨立 RAG 服務需求出現後的專用 vector DB 候選。" + } + ], + "communication_plane": { + "message_bus": "Redis Streams", + "stream_key_pattern": "aiops:agent:{session_id}", + "session_table": "agent_sessions", + "event_tables": [ + "timeline_events", + "audit_logs", + "alert_operation_log", + "playbook_trust_history", + "learning_failure_log" + ], + "turn_types": [ + "observe", + "propose", + "challenge", + "review", + "decide", + "verify", + "learn" + ], + "message_contract": { + "required_fields": [ + "session_id", + "turn_id", + "parent_turn_id", + "agent_id", + "turn_type", + "evidence_refs", + "confidence", + "risk_level", + "redaction_level", + "created_at" + ], + "private_reasoning_policy": "不得寫入前端可讀 snapshot;只保存必要 decision envelope、evidence refs、摘要與稽核欄位。", + "handoff_policy": "Agent 不互相直接呼叫函式;全部透過 stream + AgentSession + OpenClaw/HITL 關卡轉交。" + }, + "frontend_redaction": { + "operator_conversation_display_allowed": false, + "agent_private_reasoning_display_allowed": false, + "display_policy": "治理頁只顯示狀態、證據摘要、角色、風險、下一 gate;不得顯示工作視窗對話、prompt、session private context 或原始 agent chain-of-thought。" + } + }, + "agent_lanes": [ + { + "agent_id": "openclaw", + "display_name": "OpenClaw", + "primary_role": "生產仲裁者、HITL 守門者、風險 owner", + "initiates": [ + "incident arbitration", + "risk challenge", + "approval package review", + "post-verification learning" + ], + "responds_to": [ + "Hermes evidence dossier", + "NemoTron replay score", + "Telegram action-required callback", + "Alertmanager / Sentry / SigNoz evidence" + ], + "writes_to": [ + "agent_sessions", + "timeline_events", + "audit_logs", + "playbook_trust_history" + ], + "blocked_actions": [ + "self_approval", + "secret_plaintext_read", + "production_write_without_human_gate", + "telegram_direct_send_without_gateway" + ], + "growth_scope": [ + "風險分類", + "修復策略選擇", + "HITL 判定品質", + "Playbook trust" + ] + }, + { + "agent_id": "hermes", + "display_name": "Hermes", + "primary_role": "治理、知識、文件、供應鏈與降噪 steward", + "initiates": [ + "knowledge freshness review", + "dependency drift dossier", + "market watch digest", + "runbook gap report" + ], + "responds_to": [ + "OpenClaw evidence request", + "NemoTron comparison request", + "scheduled market / dependency / KM review" + ], + "writes_to": [ + "knowledge_entries", + "docs", + "agent_sessions", + "timeline_events" + ], + "blocked_actions": [ + "production_write", + "runtime_route_change", + "telegram_direct_send", + "secret_plaintext_read" + ], + "growth_scope": [ + "KM 去重與壓縮", + "RAG 命中品質", + "runbook 新鮮度", + "告警噪音分類" + ] + }, + { + "agent_id": "nemotron", + "display_name": "NemoTron", + "primary_role": "離線模型評估、replay scorer、工具能力比較者", + "initiates": [ + "sanitized replay scoring", + "model comparison", + "tool-call contract validation" + ], + "responds_to": [ + "OpenClaw offline evaluation request", + "Hermes source refresh request", + "market watch candidate update" + ], + "writes_to": [ + "agent_replay_results", + "agent_market_scorecards", + "agent_sessions", + "timeline_events" + ], + "blocked_actions": [ + "production_route_change", + "shadow_or_canary_without_gate", + "paid_api_call_without_approval", + "secret_plaintext_read", + "unsanitized_data_ingestion" + ], + "growth_scope": [ + "offline replay quality", + "模型版本比較", + "工具呼叫穩定度", + "JSON / schema 合約遵守率" + ] + } + ], + "mcp_stack": [ + { + "id": "mcp_gateway", + "display_name": "MCP Gateway / AwoooP Tool Boundary", + "primary_owner": "openclaw", + "purpose": "所有 Agent 外部工具呼叫的入口,要求 audit、RBAC、rate limit、redaction 與 tool schema。", + "storage_or_service": "apps/api MCP providers + audit_logs", + "status": "partially_existing_needs_unified_audit", + "approval_gate": "read_only_allowed" + }, + { + "id": "k8s_readonly_mcp", + "display_name": "Kubernetes 只讀 MCP", + "primary_owner": "openclaw", + "purpose": "讀 pod、event、rollout、node、namespace 與 resource 狀態;不 apply、不 scale、不 delete。", + "storage_or_service": "K8s API / existing provider", + "status": "existing_or_planned_read_only", + "approval_gate": "runtime_write_blocked" + }, + { + "id": "prometheus_alertmanager_mcp", + "display_name": "Prometheus / Alertmanager MCP", + "primary_owner": "openclaw", + "purpose": "查詢 metrics、alert history、silence 狀態與告警 fingerprint;不得修改 route、receiver 或 silence。", + "storage_or_service": "Prometheus / Alertmanager", + "status": "existing_read_path_needs_contract", + "approval_gate": "alert_rule_write_blocked" + }, + { + "id": "signoz_sentry_mcp", + "display_name": "SigNoz / Sentry MCP", + "primary_owner": "hermes", + "purpose": "串接 trace、log、error issue、release 與 regression evidence,供 OpenClaw 仲裁。", + "storage_or_service": "SigNoz / Sentry", + "status": "read_path_needs_freshness_slo", + "approval_gate": "read_only_allowed" + }, + { + "id": "gitea_argocd_mcp", + "display_name": "Gitea / ArgoCD MCP", + "primary_owner": "hermes", + "purpose": "關聯 commit、workflow、deploy、sync、rollback evidence;不自動 merge、push、sync 或 rollback。", + "storage_or_service": "Gitea Actions / ArgoCD", + "status": "partially_existing", + "approval_gate": "write_requires_human_gate" + }, + { + "id": "backup_dr_mcp", + "display_name": "Backup / DR MCP", + "primary_owner": "openclaw", + "purpose": "讀取備份新鮮度、完整性、restore drill readiness;不執行 restore、不讀 secret。", + "storage_or_service": "backup manifests / runbooks / readiness snapshots", + "status": "snapshot_existing_runtime_probe_blocked", + "approval_gate": "restore_requires_explicit_approval" + }, + { + "id": "package_security_mcp", + "display_name": "Package / SBOM / CVE MCP", + "primary_owner": "hermes", + "purpose": "追蹤 Python、pnpm、Docker image、license、CVE 與 drift;只產升級批准包。", + "storage_or_service": "package inventories / vulnerability feeds", + "status": "snapshot_existing_external_feed_needs_gate", + "approval_gate": "dependency_upgrade_approval_required" + }, + { + "id": "telegram_gateway_mcp", + "display_name": "Telegram Gateway MCP", + "primary_owner": "openclaw", + "purpose": "只在 action-required、failure-only、approval callback 場景送訊;所有訊息需 redaction 與 ADR-035 E2E。", + "storage_or_service": "Telegram Gateway / alert_operation_log", + "status": "policy_existing_e2e_pending", + "approval_gate": "telegram_direct_send_blocked" + }, + { + "id": "agent_market_watch_mcp", + "display_name": "Agent Market Watch MCP", + "primary_owner": "hermes", + "purpose": "定期讀官方 primary source、release notes、scorecard 與 replay readiness,評估新 Agent 是否納入。", + "storage_or_service": "agent-market-watch registry / Gitea workflow summary", + "status": "scheduled_watch_existing_no_auto_promotion", + "approval_gate": "market_integration_review_required" + } + ], + "rag_memory_stack": [ + { + "id": "hot_session_memory", + "display_name": "Hot Session Memory", + "primary_owner": "openclaw", + "purpose": "保存當前 incident、最近 MCP evidence、當輪 Agent turns、批准狀態與驗證結果。", + "storage_or_service": "Redis Streams + agent_sessions + timeline_events", + "status": "contract_defined_runtime_worker_pending", + "approval_gate": "db_migration_and_worker_gate_required", + "retention_policy": "短期高新鮮度;完成後摘要與 evidence refs 入 warm memory。" + }, + { + "id": "warm_rag_memory", + "display_name": "Warm RAG Memory", + "primary_owner": "hermes", + "purpose": "保存 KM、runbook、ADR、LOGBOOK、Playbook、scorecard、歷史 incident 摘要與治理結果。", + "storage_or_service": "PostgreSQL + pgvector + knowledge_entries + playbooks", + "status": "existing_foundation_needs_quality_gate", + "approval_gate": "read_only_allowed", + "retention_policy": "30 天未命中轉 dormant;相似度高的記憶由 Hermes 草稿合併,owner review 後寫入。" + }, + { + "id": "cold_replay_archive", + "display_name": "Cold Replay Archive", + "primary_owner": "nemotron", + "purpose": "保存 sanitized replay fixture、model comparison、fine-tune JSONL、舊 log 壓縮摘要與 ground truth。", + "storage_or_service": "MinIO / S3-compatible archive + docs/evaluations", + "status": "planned_cost_and_redaction_gate_required", + "approval_gate": "cost_data_and_redaction_approval_required", + "retention_policy": "只保存已脫敏 evidence;不得保存 secret、PII、工作視窗對話或 private reasoning。" + } + ], + "learning_loops": [ + { + "id": "incident_outcome_learning", + "display_name": "Incident Outcome Learning", + "primary_owner": "openclaw", + "purpose": "把修復結果、驗證差異、誤修、回滾與 HITL 結果回寫 Playbook trust。", + "storage_or_service": "playbook_trust_history / learning_service", + "status": "existing_concept_runtime_quality_gap", + "approval_gate": "runtime_worker_gate_required", + "metric": "trust_update_success_rate >= 99%" + }, + { + "id": "rag_quality_learning", + "display_name": "RAG Quality Learning", + "primary_owner": "hermes", + "purpose": "監測 RAG 命中是否引用錯誤、過期、低信任或缺證據資料,產生 KM 修正草稿。", + "storage_or_service": "knowledge_entries / embedding index / LOGBOOK refs", + "status": "planned", + "approval_gate": "owner_review_required", + "metric": "rag_stale_hit_rate 持續下降" + }, + { + "id": "agent_debate_learning", + "display_name": "Agent Debate Learning", + "primary_owner": "openclaw", + "purpose": "記錄 Reviewer / Critic / NemoTron 與 OpenClaw 分歧,避免多 Agent 互相附和。", + "storage_or_service": "agent_sessions / disagreement_score", + "status": "contract_defined", + "approval_gate": "db_migration_required", + "metric": "critical incidents disagreement coverage >= 95%" + }, + { + "id": "market_watch_learning", + "display_name": "Market Watch Learning", + "primary_owner": "hermes", + "purpose": "定期追蹤主流 Agent 框架與模型版本,轉成 integration review queue。", + "storage_or_service": "agent_market_governance_snapshot / scorecards", + "status": "existing_weekly_watch_no_auto_promotion", + "approval_gate": "replay_shadow_canary_gate_required", + "metric": "candidate_refresh_age_days <= 30" + }, + { + "id": "nemotron_replay_learning", + "display_name": "NemoTron Replay Learning", + "primary_owner": "nemotron", + "purpose": "以 sanitized replay 比較 NemoTron / OpenClaw / 其他候選 Agent 的 RCA、工具呼叫、schema、風險攔截。", + "storage_or_service": "agent_replay_results / agent_market_scorecards", + "status": "blocked_by_cost_data_and_smoke_gate", + "approval_gate": "cost_data_approval_required", + "metric": "5-record smoke pass 後才進 50-record replay" + } + ], + "intelligence_services": [ + { + "id": "postgres_pgvector", + "display_name": "PostgreSQL + pgvector", + "primary_owner": "hermes", + "purpose": "AWOOOI 預設 RAG 記憶與語意檢索層,沿用既有 Postgres 治理與備份。", + "storage_or_service": "PostgreSQL extension", + "status": "preferred_default", + "approval_gate": "schema_change_requires_migration_gate" + }, + { + "id": "redis_streams", + "display_name": "Redis Streams", + "primary_owner": "openclaw", + "purpose": "append-only multi-agent turn bus、replay、consumer group 與 backpressure。", + "storage_or_service": "Redis", + "status": "preferred_default", + "approval_gate": "worker_enablement_required" + }, + { + "id": "opentelemetry", + "display_name": "OpenTelemetry", + "primary_owner": "hermes", + "purpose": "統一 traces、metrics、logs 的 context correlation,作為 Agent 可追溯決策證據。", + "storage_or_service": "OTel Collector / SigNoz", + "status": "recommended_existing_stack_alignment", + "approval_gate": "collector_config_change_requires_review" + }, + { + "id": "langfuse", + "display_name": "Langfuse", + "primary_owner": "hermes", + "purpose": "LLM trace、eval、prompt 管理與人工 annotation queue 候選。", + "storage_or_service": "Self-hosted or managed service", + "status": "optional_candidate", + "approval_gate": "new_service_and_secret_approval_required" + }, + { + "id": "phoenix", + "display_name": "Arize Phoenix", + "primary_owner": "hermes", + "purpose": "OpenTelemetry 相容 LLM / RAG / tool trace 與 eval 候選。", + "storage_or_service": "Self-hosted service", + "status": "optional_candidate", + "approval_gate": "new_service_and_secret_approval_required" + }, + { + "id": "qdrant", + "display_name": "Qdrant", + "primary_owner": "hermes", + "purpose": "當 pgvector 遇到隔離、filter、量級或 latency 瓶頸時的專用 vector DB 候選。", + "storage_or_service": "Dedicated vector database", + "status": "deferred_candidate", + "approval_gate": "architecture_review_required" + }, + { + "id": "milvus", + "display_name": "Milvus", + "primary_owner": "hermes", + "purpose": "大量高維向量與獨立 RAG 平台候選。", + "storage_or_service": "Dedicated vector database", + "status": "deferred_candidate", + "approval_gate": "architecture_review_required" + } + ], + "rollout_tasks": [ + { + "task_id": "P2-401A", + "priority": "P2", + "status": "done", + "completion_percent": 100, + "owner_agent": "Hermes + OpenClaw", + "summary": "建立主動溝通 / 學習 / 記錄只讀契約、schema、snapshot、API 與文件同步。", + "next_gate": "正式部署驗證" + }, + { + "task_id": "P2-401B", + "priority": "P2", + "status": "planned", + "completion_percent": 0, + "owner_agent": "OpenClaw", + "summary": "設計 AgentSession / Redis Streams migration 與 consumer group runtime gate。", + "next_gate": "DB migration approval + rollback plan" + }, + { + "task_id": "P2-401C", + "priority": "P2", + "status": "planned", + "completion_percent": 0, + "owner_agent": "Hermes", + "summary": "建立 MCP Gateway audit matrix,把 K8s / Prometheus / SigNoz / Sentry / Gitea / Backup / Package / Telegram 工具統一權限化。", + "next_gate": "no-secret read-only smoke" + }, + { + "task_id": "P2-401D", + "priority": "P2", + "status": "planned", + "completion_percent": 0, + "owner_agent": "Hermes", + "summary": "建立 RAG hot / warm / cold memory ingestion、dedupe、freshness 與 redaction policy。", + "next_gate": "schema migration approval + owner review workflow" + }, + { + "task_id": "P2-401E", + "priority": "P2", + "status": "planned", + "completion_percent": 0, + "owner_agent": "NemoTron", + "summary": "定義 sanitized replay scorer 與 5-record smoke,禁止 production / shadow / canary。", + "next_gate": "cost/data approval" + }, + { + "task_id": "P1-403", + "priority": "P1", + "status": "planned", + "completion_percent": 0, + "owner_agent": "OpenClaw", + "summary": "Telegram Gateway lane E2E:action-required / failure-only / approval callback,禁止直接送 Bot。", + "next_gate": "ADR-035 E2E approval" + } + ], + "approval_boundaries": { + "runtime_worker_allowed": false, + "db_migration_allowed": false, + "telegram_direct_send_allowed": false, + "paid_external_service_allowed": false, + "secret_plaintext_allowed": false, + "autonomous_host_mutation_allowed": false, + "production_route_change_allowed": false, + "sdk_installation_allowed": false + }, + "rollups": { + "agent_lane_count": 3, + "mcp_stack_count": 9, + "rag_layer_count": 3, + "learning_loop_count": 5, + "intelligence_service_count": 7, + "rollout_task_count": 6, + "blocked_task_ids": [ + "P2-401B", + "P2-401D", + "P2-401E", + "P1-403" + ], + "optional_service_ids": [ + "langfuse", + "phoenix", + "qdrant", + "milvus" + ], + "required_existing_first_ids": [ + "mcp_gateway", + "postgres_pgvector", + "redis_streams", + "opentelemetry" + ] + } +} diff --git a/docs/schemas/ai_agent_communication_learning_contract_v1.schema.json b/docs/schemas/ai_agent_communication_learning_contract_v1.schema.json new file mode 100644 index 00000000..372d7e30 --- /dev/null +++ b/docs/schemas/ai_agent_communication_learning_contract_v1.schema.json @@ -0,0 +1,375 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://awoooi.wooo.work/schemas/ai_agent_communication_learning_contract_v1.schema.json", + "title": "AI Agent Communication Learning Contract v1", + "type": "object", + "required": [ + "schema_version", + "generated_at", + "program_status", + "communication_plane", + "agent_lanes", + "mcp_stack", + "rag_memory_stack", + "learning_loops", + "intelligence_services", + "rollout_tasks", + "approval_boundaries", + "rollups" + ], + "properties": { + "schema_version": { + "const": "ai_agent_communication_learning_contract_v1" + }, + "generated_at": { + "type": "string" + }, + "program_status": { + "type": "object", + "required": [ + "overall_completion_percent", + "current_priority", + "current_task_id", + "next_task_id", + "read_only_mode", + "runtime_authority" + ], + "properties": { + "overall_completion_percent": { + "type": "integer", + "minimum": 0, + "maximum": 100 + }, + "current_priority": { + "type": "string" + }, + "current_task_id": { + "type": "string" + }, + "next_task_id": { + "type": "string" + }, + "read_only_mode": { + "const": true + }, + "runtime_authority": { + "const": "contract_only_no_runtime_worker" + } + }, + "additionalProperties": true + }, + "communication_plane": { + "type": "object", + "required": [ + "message_bus", + "stream_key_pattern", + "session_table", + "event_tables", + "turn_types", + "frontend_redaction" + ], + "properties": { + "message_bus": { + "const": "Redis Streams" + }, + "stream_key_pattern": { + "type": "string" + }, + "session_table": { + "type": "string" + }, + "event_tables": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "turn_types": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 5 + }, + "frontend_redaction": { + "type": "object", + "required": [ + "operator_conversation_display_allowed", + "agent_private_reasoning_display_allowed" + ], + "properties": { + "operator_conversation_display_allowed": { + "const": false + }, + "agent_private_reasoning_display_allowed": { + "const": false + } + }, + "additionalProperties": true + } + }, + "additionalProperties": true + }, + "agent_lanes": { + "type": "array", + "items": { + "$ref": "#/$defs/agent_lane" + }, + "minItems": 3 + }, + "mcp_stack": { + "type": "array", + "items": { + "$ref": "#/$defs/capability" + }, + "minItems": 1 + }, + "rag_memory_stack": { + "type": "array", + "items": { + "$ref": "#/$defs/capability" + }, + "minItems": 3 + }, + "learning_loops": { + "type": "array", + "items": { + "$ref": "#/$defs/capability" + }, + "minItems": 1 + }, + "intelligence_services": { + "type": "array", + "items": { + "$ref": "#/$defs/capability" + }, + "minItems": 1 + }, + "rollout_tasks": { + "type": "array", + "items": { + "$ref": "#/$defs/rollout_task" + }, + "minItems": 1 + }, + "approval_boundaries": { + "type": "object", + "required": [ + "runtime_worker_allowed", + "db_migration_allowed", + "telegram_direct_send_allowed", + "paid_external_service_allowed", + "secret_plaintext_allowed", + "autonomous_host_mutation_allowed", + "production_route_change_allowed", + "sdk_installation_allowed" + ], + "properties": { + "runtime_worker_allowed": { + "const": false + }, + "db_migration_allowed": { + "const": false + }, + "telegram_direct_send_allowed": { + "const": false + }, + "paid_external_service_allowed": { + "const": false + }, + "secret_plaintext_allowed": { + "const": false + }, + "autonomous_host_mutation_allowed": { + "const": false + }, + "production_route_change_allowed": { + "const": false + }, + "sdk_installation_allowed": { + "const": false + } + }, + "additionalProperties": true + }, + "rollups": { + "type": "object", + "required": [ + "agent_lane_count", + "mcp_stack_count", + "rag_layer_count", + "learning_loop_count", + "intelligence_service_count", + "rollout_task_count", + "blocked_task_ids", + "optional_service_ids" + ], + "properties": { + "agent_lane_count": { + "type": "integer", + "minimum": 0 + }, + "mcp_stack_count": { + "type": "integer", + "minimum": 0 + }, + "rag_layer_count": { + "type": "integer", + "minimum": 0 + }, + "learning_loop_count": { + "type": "integer", + "minimum": 0 + }, + "intelligence_service_count": { + "type": "integer", + "minimum": 0 + }, + "rollout_task_count": { + "type": "integer", + "minimum": 0 + }, + "blocked_task_ids": { + "type": "array", + "items": { + "type": "string" + } + }, + "optional_service_ids": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": true + } + }, + "$defs": { + "agent_lane": { + "type": "object", + "required": [ + "agent_id", + "display_name", + "primary_role", + "initiates", + "responds_to", + "writes_to", + "blocked_actions" + ], + "properties": { + "agent_id": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "primary_role": { + "type": "string" + }, + "initiates": { + "type": "array", + "items": { + "type": "string" + } + }, + "responds_to": { + "type": "array", + "items": { + "type": "string" + } + }, + "writes_to": { + "type": "array", + "items": { + "type": "string" + } + }, + "blocked_actions": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + } + }, + "additionalProperties": true + }, + "capability": { + "type": "object", + "required": [ + "id", + "display_name", + "primary_owner", + "purpose", + "storage_or_service", + "status", + "approval_gate" + ], + "properties": { + "id": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "primary_owner": { + "type": "string" + }, + "purpose": { + "type": "string" + }, + "storage_or_service": { + "type": "string" + }, + "status": { + "type": "string" + }, + "approval_gate": { + "type": "string" + } + }, + "additionalProperties": true + }, + "rollout_task": { + "type": "object", + "required": [ + "task_id", + "priority", + "status", + "completion_percent", + "owner_agent", + "summary", + "next_gate" + ], + "properties": { + "task_id": { + "type": "string" + }, + "priority": { + "type": "string" + }, + "status": { + "type": "string" + }, + "completion_percent": { + "type": "integer", + "minimum": 0, + "maximum": 100 + }, + "owner_agent": { + "type": "string" + }, + "summary": { + "type": "string" + }, + "next_gate": { + "type": "string" + } + }, + "additionalProperties": true + } + }, + "additionalProperties": true +} diff --git a/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md b/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md index 107285fa..1dcd056b 100644 --- a/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md +++ b/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md @@ -594,6 +594,47 @@ OpenClaw incumbent 多 Agent 互判、接手、協作是穩定度解法的一部分,但不是全部。AWOOOI 正確方向是 `Coordinator + Diagnostician + Solver + Tool Specialist + Critic`,外面套 deterministic contract / hidden-label grading / HITL / promotion gate。Agent 可以互相挑戰,但不能互相自行批准上線。 +#### 3.2.1b 2026-06-11 OpenClaw / Hermes / NemoTron 主動溝通契約 + +**核心裁決**:三個 Agent 要主動溝通,但不能變成三套互相私下授權的黑箱。所有互叫、接手、批判、回寫都必須通過同一條可 replay 的資料面: + +```text +Alert / Sentry / SigNoz / Gitea / Market Watch / Operator + → Hermes 建立 evidence dossier + → OpenClaw 開 AgentSession 並仲裁風險 + → NemoTron 只在 sanitized replay / smoke / model comparison lane 參與 + → Reviewer / Critic challenge + → OpenClaw 產 decision envelope + → Telegram Gateway 只送 action-required / failure-only / approval callback + → Timeline / KM / Playbook trust / replay dataset 回寫 +``` + +**資料面:** + +| 層 | 既定方案 | 目的 | 禁止事項 | +|---|---|---|---| +| 協作訊息 | `Redis Streams`,key:`aiops:agent:{session_id}` | append-only turns、consumer group、replay、backpressure | Agent 直接互相呼叫函式、私下改 prompt | +| 稽核會話 | `agent_sessions` | 保存 session、agent、turn、risk、confidence、latency、evidence refs | 保存工作視窗對話、private reasoning、secret payload | +| 事實軌跡 | `timeline_events` / `audit_logs` / `alert_operation_log` | 追溯每次決策來源與 Telegram callback | 只留 LLM 結論、不留證據 | +| 前端呈現 | Governance / AwoooP redacted summary | 只顯示狀態、角色、證據摘要、風險、下一 gate | 顯示 operator conversation、prompt、session private context、chain-of-thought | + +**角色主動權:** + +| Agent | 可主動發起 | 必須回應 | 可寫入 | 仍禁止 | +|---|---|---|---|---| +| OpenClaw | incident arbitration、risk challenge、approval review、post-verification learning | Hermes dossier、NemoTron replay score、Telegram callback | `agent_sessions`、`timeline_events`、`audit_logs`、`playbook_trust_history` | 自我批准、未授權 production write、Telegram direct send、讀 secret plaintext | +| Hermes | KM freshness、dependency drift、market watch、runbook gap | OpenClaw evidence request、NemoTron comparison request | `knowledge_entries`、docs、`agent_sessions`、`timeline_events` | 直接改生產、直接發 Telegram、越權做生產仲裁 | +| NemoTron | sanitized replay scoring、model comparison、tool contract validation | OpenClaw offline evaluation request、Hermes source refresh request | `agent_replay_results`、`agent_market_scorecards`、`agent_sessions`、`timeline_events` | production route、shadow/canary、未批准 paid API、未脫敏資料 | + +**本波已建立的可執行契約(只讀,不授權 runtime):** + +| 檔案 / API | 用途 | +|---|---| +| `docs/schemas/ai_agent_communication_learning_contract_v1.schema.json` | 主動溝通、學習、MCP、RAG、服務堆疊與 approval boundary 契約 | +| `docs/evaluations/ai_agent_communication_learning_contract_2026-06-11.json` | 2026-06-11 committed snapshot;完成度 `35%`,runtime worker / DB migration / Telegram direct send 全部 false | +| `apps/api/src/services/ai_agent_communication_learning_contract.py` | 只讀 loader;強制驗證 runtime / migration / Telegram / SDK / route 權限都未開 | +| `GET /api/v1/agents/agent-communication-learning-contract` | 治理 API;只回傳 committed contract,不啟動 worker、不碰 DB/Redis、不呼叫外部服務 | + #### 3.2.2 核心缺口與災難場景 | 場景 | 現況 | 有 D2 協作後 | @@ -827,6 +868,21 @@ OpenClaw incumbent - 比對三向:今日決策 vs 當時決策 vs 事後 ground truth - 一致率 drop > 10% → 觸發 AI 能力衰退告警(連動 D6) +**2026-06-11 智慧成長資料層補強** + +| 記憶層 | 預設技術 | Owner | 使用場景 | 目前關卡 | +|---|---|---|---|---| +| Hot Session Memory | Redis Streams + `agent_sessions` + `timeline_events` | OpenClaw | 當前 incident、當輪 Agent turns、MCP evidence、批准狀態 | P2-401B migration / worker gate | +| Warm RAG Memory | PostgreSQL + pgvector + `knowledge_entries` + Playbooks | Hermes | KM、ADR、LOGBOOK、runbook、scorecard、歷史事件摘要 | 先沿用既有 Postgres;品質 gate 待補 | +| Cold Replay Archive | MinIO / S3-compatible archive + sanitized JSONL | NemoTron | replay fixture、model comparison、fine-tune dataset、ground truth | cost / data / redaction approval required | + +**工具堆疊裁決:** + +- MCP 必須是 Agent 對外部系統取證與工具呼叫的統一邊界;K8s、Prometheus / Alertmanager、SigNoz / Sentry、Gitea / ArgoCD、Backup / DR、Package / SBOM、Telegram Gateway、Agent Market Watch 都要走 MCP Gateway / AwoooP tool boundary 與 audit。 +- RAG 預設不新增獨立 vector DB;先沿用 PostgreSQL + pgvector,除非量級、隔離、filter 或 latency 證據證明需要 Qdrant / Milvus。 +- LLM observability 可評估 Langfuse 或 Arize Phoenix;本階段只列 optional candidate,不裝服務、不加 secret、不改 runtime。 +- OpenTelemetry 作為 traces / metrics / logs 的共通語言,讓 Agent 決策能追溯到 telemetry context,而不是只相信 RAG 文字。 + #### 3.4.4 韌性、資安、效能 **韌性** @@ -1210,6 +1266,7 @@ OpenClaw incumbent | Agent Orchestrator | ✦`services/agent_orchestrator.py` | Redis Streams 訊息匯流;熔斷(單 Agent > 5s 降級);AgentSession 存 DB | L4×D2 | | Agent Session 表 | DB migration | 新增 `agent_sessions` 表(session_id / agent_role / input_hash / output / latency)| L7×D2 | | 決策路由 | `services/decision_manager.py` | 新路徑:收到 EvidenceSnapshot → 送 Orchestrator → 等 Coordinator 結果 | L4×D2 | +| 主動溝通與學習契約 | `docs/evaluations/ai_agent_communication_learning_contract_2026-06-11.json` + `GET /api/v1/agents/agent-communication-learning-contract` | 先固定 OpenClaw / Hermes / NemoTron 主動溝通、MCP、RAG、學習與 redaction 邊界;不啟動 runtime worker | L4×D2 / L7×D4 | **退出條件(量化)** @@ -1569,6 +1626,12 @@ Phase 6 完成後 --- +### 2026-06-11 20:40 (台北) — §3.2 / §3.4 / §5 — 補 OpenClaw / Hermes / NemoTron 主動溝通與學習契約 — 回應統帥要求讓 Agent 可主動溝通、主動學習、記錄且不洩漏工作視窗對話 + +- 新增 §3.2.1b,定義三 Agent 主動溝通資料面、角色主動權、前端 redaction 與 committed contract/API。 +- 新增 §3.4.3 智慧成長資料層補強,明確 Hot / Warm / Cold memory、MCP Gateway、PostgreSQL + pgvector、OpenTelemetry、Langfuse / Phoenix、Qdrant / Milvus 的採用順序。 +- Phase 2 核心改造項加入 `ai_agent_communication_learning_contract_v1` 與 `GET /api/v1/agents/agent-communication-learning-contract`,本波只讀、完成度 35%,未授權 worker / migration / Telegram / SDK / production route。 + ### 2026-04-15 (台北) — 全檔 — 建立 v2 骨架,§0/§1 完成 — 統帥批准「單 MASTER + 4 道閘門」結構 - 從 v1(plans/2026-04-15-MASTER-ai-autonomous-flywheel.md)繼承核心發現