Files
awoooi/docs/evaluations/ai_agent_communication_learning_contract_2026-06-11.json
2026-06-11 19:07:08 +08:00

535 lines
20 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"schema_version": "ai_agent_communication_learning_contract_v1",
"generated_at": "2026-06-11T20:40:00+08:00",
"program_status": {
"overall_completion_percent": 35,
"current_priority": "P2",
"current_task_id": "P2-401A",
"next_task_id": "P2-401B",
"read_only_mode": true,
"runtime_authority": "contract_only_no_runtime_worker",
"status_note": "本快照只定義 OpenClaw / Hermes / NemoTron 主動溝通、主動學習、記錄與工具堆疊契約;尚未建立 worker、migration、Telegram 實發或生產 route。"
},
"external_source_evidence": [
{
"id": "mcp_official_intro",
"name": "Model Context Protocol",
"url": "https://modelcontextprotocol.io/docs/getting-started/intro",
"decision_use": "採用 MCP 作為 Agent 對外部系統取證與工具呼叫的標準邊界。"
},
{
"id": "redis_streams_docs",
"name": "Redis Streams",
"url": "https://redis.io/docs/latest/develop/data-types/streams/",
"decision_use": "採用 Redis Streams 作為 append-only 協作訊息匯流與 replay 來源。"
},
{
"id": "pgvector_docs",
"name": "pgvector",
"url": "https://github.com/pgvector/pgvector",
"decision_use": "預設沿用 PostgreSQL + pgvector 做 RAG 記憶,不先新增專用 vector DB。"
},
{
"id": "opentelemetry_docs",
"name": "OpenTelemetry",
"url": "https://opentelemetry.io/docs/",
"decision_use": "把 traces / metrics / logs 當作 Agent 決策可追溯性的共通 telemetry 格式。"
},
{
"id": "langfuse_docs",
"name": "Langfuse",
"url": "https://langfuse.com/docs",
"decision_use": "列為後續 LLM trace / eval / 提示詞 管理候選,不在本波安裝。"
},
{
"id": "phoenix_docs",
"name": "Arize Phoenix",
"url": "https://arize.com/docs/phoenix",
"decision_use": "列為後續 OpenTelemetry 相容 Agent trace / eval 候選,不在本波安裝。"
},
{
"id": "qdrant_docs",
"name": "Qdrant",
"url": "https://qdrant.tech/documentation/",
"decision_use": "列為 pgvector 達到量級或隔離瓶頸後的專用 vector DB 候選。"
},
{
"id": "milvus_docs",
"name": "Milvus",
"url": "https://milvus.io/docs/quickstart.md",
"decision_use": "列為大量高維向量與獨立 RAG 服務需求出現後的專用 vector DB 候選。"
}
],
"communication_plane": {
"message_bus": "Redis Streams",
"stream_key_pattern": "aiops:agent:{session_id}",
"session_table": "agent_sessions",
"event_tables": [
"timeline_events",
"audit_logs",
"alert_operation_log",
"playbook_trust_history",
"learning_failure_log"
],
"turn_types": [
"observe",
"propose",
"challenge",
"review",
"decide",
"verify",
"learn"
],
"message_contract": {
"required_fields": [
"session_id",
"turn_id",
"parent_turn_id",
"agent_id",
"turn_type",
"evidence_refs",
"confidence",
"risk_level",
"redaction_level",
"created_at"
],
"private_reasoning_policy": "不得寫入前端可讀 snapshot只保存必要 decision envelope、evidence refs、摘要與稽核欄位。",
"handoff_policy": "Agent 不互相直接呼叫函式;全部透過 stream + AgentSession + OpenClaw/HITL 關卡轉交。"
},
"frontend_redaction": {
"operator_conversation_display_allowed": false,
"agent_private_reasoning_display_allowed": false,
"display_policy": "治理頁只顯示狀態、證據摘要、角色、風險、下一 gate不得顯示工作視窗對話、提示詞、會話私有上下文 或原始 agent 推理鏈。"
}
},
"agent_lanes": [
{
"agent_id": "openclaw",
"display_name": "OpenClaw",
"primary_role": "生產仲裁者、HITL 守門者、風險 owner",
"initiates": [
"incident arbitration",
"risk challenge",
"approval package review",
"post-verification learning"
],
"responds_to": [
"Hermes evidence dossier",
"NemoTron replay score",
"Telegram action-required callback",
"Alertmanager / Sentry / SigNoz evidence"
],
"writes_to": [
"agent_sessions",
"timeline_events",
"audit_logs",
"playbook_trust_history"
],
"blocked_actions": [
"self_approval",
"secret_plaintext_read",
"production_write_without_human_gate",
"telegram_direct_send_without_gateway"
],
"growth_scope": [
"風險分類",
"修復策略選擇",
"HITL 判定品質",
"Playbook trust"
]
},
{
"agent_id": "hermes",
"display_name": "Hermes",
"primary_role": "治理、知識、文件、供應鏈與降噪 steward",
"initiates": [
"knowledge freshness review",
"dependency drift dossier",
"market watch digest",
"runbook gap report"
],
"responds_to": [
"OpenClaw evidence request",
"NemoTron comparison request",
"scheduled market / dependency / KM review"
],
"writes_to": [
"knowledge_entries",
"docs",
"agent_sessions",
"timeline_events"
],
"blocked_actions": [
"production_write",
"runtime_route_change",
"telegram_direct_send",
"secret_plaintext_read"
],
"growth_scope": [
"KM 去重與壓縮",
"RAG 命中品質",
"runbook 新鮮度",
"告警噪音分類"
]
},
{
"agent_id": "nemotron",
"display_name": "NemoTron",
"primary_role": "離線模型評估、replay scorer、工具能力比較者",
"initiates": [
"sanitized replay scoring",
"model comparison",
"tool-call contract validation"
],
"responds_to": [
"OpenClaw offline evaluation request",
"Hermes source refresh request",
"market watch candidate update"
],
"writes_to": [
"agent_replay_results",
"agent_market_scorecards",
"agent_sessions",
"timeline_events"
],
"blocked_actions": [
"production_route_change",
"shadow_or_canary_without_gate",
"paid_api_call_without_approval",
"secret_plaintext_read",
"unsanitized_data_ingestion"
],
"growth_scope": [
"offline replay quality",
"模型版本比較",
"工具呼叫穩定度",
"JSON / schema 合約遵守率"
]
}
],
"mcp_stack": [
{
"id": "mcp_gateway",
"display_name": "MCP Gateway / AwoooP Tool Boundary",
"primary_owner": "openclaw",
"purpose": "所有 Agent 外部工具呼叫的入口,要求 audit、RBAC、rate limit、redaction 與 tool schema。",
"storage_or_service": "apps/api MCP providers + audit_logs",
"status": "partially_existing_needs_unified_audit",
"approval_gate": "read_only_allowed"
},
{
"id": "k8s_readonly_mcp",
"display_name": "Kubernetes 只讀 MCP",
"primary_owner": "openclaw",
"purpose": "讀 pod、event、rollout、node、namespace 與 resource 狀態;不 apply、不 scale、不 delete。",
"storage_or_service": "K8s API / existing provider",
"status": "existing_or_planned_read_only",
"approval_gate": "runtime_write_blocked"
},
{
"id": "prometheus_alertmanager_mcp",
"display_name": "Prometheus / Alertmanager MCP",
"primary_owner": "openclaw",
"purpose": "查詢 metrics、alert history、silence 狀態與告警 fingerprint不得修改 route、receiver 或 silence。",
"storage_or_service": "Prometheus / Alertmanager",
"status": "existing_read_path_needs_contract",
"approval_gate": "alert_rule_write_blocked"
},
{
"id": "signoz_sentry_mcp",
"display_name": "SigNoz / Sentry MCP",
"primary_owner": "hermes",
"purpose": "串接 trace、log、error issue、release 與 regression evidence供 OpenClaw 仲裁。",
"storage_or_service": "SigNoz / Sentry",
"status": "read_path_needs_freshness_slo",
"approval_gate": "read_only_allowed"
},
{
"id": "gitea_argocd_mcp",
"display_name": "Gitea / ArgoCD MCP",
"primary_owner": "hermes",
"purpose": "關聯 commit、workflow、deploy、sync、rollback evidence不自動 merge、push、sync 或 rollback。",
"storage_or_service": "Gitea Actions / ArgoCD",
"status": "partially_existing",
"approval_gate": "write_requires_human_gate"
},
{
"id": "backup_dr_mcp",
"display_name": "Backup / DR MCP",
"primary_owner": "openclaw",
"purpose": "讀取備份新鮮度、完整性、restore drill readiness不執行 restore、不讀 secret。",
"storage_or_service": "backup manifests / runbooks / readiness snapshots",
"status": "snapshot_existing_runtime_probe_blocked",
"approval_gate": "restore_requires_explicit_approval"
},
{
"id": "package_security_mcp",
"display_name": "Package / SBOM / CVE MCP",
"primary_owner": "hermes",
"purpose": "追蹤 Python、pnpm、Docker image、license、CVE 與 drift只產升級批准包。",
"storage_or_service": "package inventories / vulnerability feeds",
"status": "snapshot_existing_external_feed_needs_gate",
"approval_gate": "dependency_upgrade_approval_required"
},
{
"id": "telegram_gateway_mcp",
"display_name": "Telegram Gateway MCP",
"primary_owner": "openclaw",
"purpose": "只在 action-required、failure-only、approval callback 場景送訊;所有訊息需 redaction 與 ADR-035 E2E。",
"storage_or_service": "Telegram Gateway / alert_operation_log",
"status": "policy_existing_e2e_pending",
"approval_gate": "telegram_direct_send_blocked"
},
{
"id": "agent_market_watch_mcp",
"display_name": "Agent Market Watch MCP",
"primary_owner": "hermes",
"purpose": "定期讀官方 primary source、release notes、scorecard 與 replay readiness評估新 Agent 是否納入。",
"storage_or_service": "agent-market-watch registry / Gitea workflow summary",
"status": "scheduled_watch_existing_no_auto_promotion",
"approval_gate": "market_integration_review_required"
}
],
"rag_memory_stack": [
{
"id": "hot_session_memory",
"display_name": "Hot Session Memory",
"primary_owner": "openclaw",
"purpose": "保存當前 incident、最近 MCP evidence、當輪 Agent turns、批准狀態與驗證結果。",
"storage_or_service": "Redis Streams + agent_sessions + timeline_events",
"status": "contract_defined_runtime_worker_pending",
"approval_gate": "db_migration_and_worker_gate_required",
"retention_policy": "短期高新鮮度;完成後摘要與 evidence refs 入 warm memory。"
},
{
"id": "warm_rag_memory",
"display_name": "Warm RAG Memory",
"primary_owner": "hermes",
"purpose": "保存 KM、runbook、ADR、LOGBOOK、Playbook、scorecard、歷史 incident 摘要與治理結果。",
"storage_or_service": "PostgreSQL + pgvector + knowledge_entries + playbooks",
"status": "existing_foundation_needs_quality_gate",
"approval_gate": "read_only_allowed",
"retention_policy": "30 天未命中轉 dormant相似度高的記憶由 Hermes 草稿合併owner review 後寫入。"
},
{
"id": "cold_replay_archive",
"display_name": "Cold Replay Archive",
"primary_owner": "nemotron",
"purpose": "保存 sanitized replay fixture、model comparison、fine-tune JSONL、舊 log 壓縮摘要與 ground truth。",
"storage_or_service": "MinIO / S3-compatible archive + docs/evaluations",
"status": "planned_cost_and_redaction_gate_required",
"approval_gate": "cost_data_and_redaction_approval_required",
"retention_policy": "只保存已脫敏 evidence不得保存 secret、PII、工作視窗對話或 私有推理。"
}
],
"learning_loops": [
{
"id": "incident_outcome_learning",
"display_name": "Incident Outcome Learning",
"primary_owner": "openclaw",
"purpose": "把修復結果、驗證差異、誤修、回滾與 HITL 結果回寫 Playbook trust。",
"storage_or_service": "playbook_trust_history / learning_service",
"status": "existing_concept_runtime_quality_gap",
"approval_gate": "runtime_worker_gate_required",
"metric": "trust_update_success_rate >= 99%"
},
{
"id": "rag_quality_learning",
"display_name": "RAG Quality Learning",
"primary_owner": "hermes",
"purpose": "監測 RAG 命中是否引用錯誤、過期、低信任或缺證據資料,產生 KM 修正草稿。",
"storage_or_service": "knowledge_entries / embedding index / LOGBOOK refs",
"status": "planned",
"approval_gate": "owner_review_required",
"metric": "rag_stale_hit_rate 持續下降"
},
{
"id": "agent_debate_learning",
"display_name": "Agent Debate Learning",
"primary_owner": "openclaw",
"purpose": "記錄 Reviewer / Critic / NemoTron 與 OpenClaw 分歧,避免多 Agent 互相附和。",
"storage_or_service": "agent_sessions / disagreement_score",
"status": "contract_defined",
"approval_gate": "db_migration_required",
"metric": "critical incidents disagreement coverage >= 95%"
},
{
"id": "market_watch_learning",
"display_name": "Market Watch Learning",
"primary_owner": "hermes",
"purpose": "定期追蹤主流 Agent 框架與模型版本,轉成 integration review queue。",
"storage_or_service": "agent_market_governance_snapshot / scorecards",
"status": "existing_weekly_watch_no_auto_promotion",
"approval_gate": "replay_shadow_canary_gate_required",
"metric": "candidate_refresh_age_days <= 30"
},
{
"id": "nemotron_replay_learning",
"display_name": "NemoTron Replay Learning",
"primary_owner": "nemotron",
"purpose": "以 sanitized replay 比較 NemoTron / OpenClaw / 其他候選 Agent 的 RCA、工具呼叫、schema、風險攔截。",
"storage_or_service": "agent_replay_results / agent_market_scorecards",
"status": "blocked_by_cost_data_and_smoke_gate",
"approval_gate": "cost_data_approval_required",
"metric": "5-record smoke pass 後才進 50-record replay"
}
],
"intelligence_services": [
{
"id": "postgres_pgvector",
"display_name": "PostgreSQL + pgvector",
"primary_owner": "hermes",
"purpose": "AWOOOI 預設 RAG 記憶與語意檢索層,沿用既有 Postgres 治理與備份。",
"storage_or_service": "PostgreSQL extension",
"status": "preferred_default",
"approval_gate": "schema_change_requires_migration_gate"
},
{
"id": "redis_streams",
"display_name": "Redis Streams",
"primary_owner": "openclaw",
"purpose": "append-only multi-agent turn bus、replay、consumer group 與 backpressure。",
"storage_or_service": "Redis",
"status": "preferred_default",
"approval_gate": "worker_enablement_required"
},
{
"id": "opentelemetry",
"display_name": "OpenTelemetry",
"primary_owner": "hermes",
"purpose": "統一 traces、metrics、logs 的 context correlation作為 Agent 可追溯決策證據。",
"storage_or_service": "OTel Collector / SigNoz",
"status": "recommended_existing_stack_alignment",
"approval_gate": "collector_config_change_requires_review"
},
{
"id": "langfuse",
"display_name": "Langfuse",
"primary_owner": "hermes",
"purpose": "LLM trace、eval、prompt 管理與人工 annotation queue 候選。",
"storage_or_service": "Self-hosted or managed service",
"status": "optional_candidate",
"approval_gate": "new_service_and_secret_approval_required"
},
{
"id": "phoenix",
"display_name": "Arize Phoenix",
"primary_owner": "hermes",
"purpose": "OpenTelemetry 相容 LLM / RAG / tool trace 與 eval 候選。",
"storage_or_service": "Self-hosted service",
"status": "optional_candidate",
"approval_gate": "new_service_and_secret_approval_required"
},
{
"id": "qdrant",
"display_name": "Qdrant",
"primary_owner": "hermes",
"purpose": "當 pgvector 遇到隔離、filter、量級或 latency 瓶頸時的專用 vector DB 候選。",
"storage_or_service": "Dedicated vector database",
"status": "deferred_candidate",
"approval_gate": "architecture_review_required"
},
{
"id": "milvus",
"display_name": "Milvus",
"primary_owner": "hermes",
"purpose": "大量高維向量與獨立 RAG 平台候選。",
"storage_or_service": "Dedicated vector database",
"status": "deferred_candidate",
"approval_gate": "architecture_review_required"
}
],
"rollout_tasks": [
{
"task_id": "P2-401A",
"priority": "P2",
"status": "done",
"completion_percent": 100,
"owner_agent": "Hermes + OpenClaw",
"summary": "建立主動溝通 / 學習 / 記錄只讀契約、schema、snapshot、API 與文件同步。",
"next_gate": "正式部署驗證"
},
{
"task_id": "P2-401B",
"priority": "P2",
"status": "planned",
"completion_percent": 0,
"owner_agent": "OpenClaw",
"summary": "設計 AgentSession / Redis Streams migration 與 consumer group runtime gate。",
"next_gate": "DB migration approval + rollback plan"
},
{
"task_id": "P2-401C",
"priority": "P2",
"status": "planned",
"completion_percent": 0,
"owner_agent": "Hermes",
"summary": "建立 MCP Gateway audit matrix把 K8s / Prometheus / SigNoz / Sentry / Gitea / Backup / Package / Telegram 工具統一權限化。",
"next_gate": "no-secret read-only smoke"
},
{
"task_id": "P2-401D",
"priority": "P2",
"status": "planned",
"completion_percent": 0,
"owner_agent": "Hermes",
"summary": "建立 RAG hot / warm / cold memory ingestion、dedupe、freshness 與 redaction policy。",
"next_gate": "schema migration approval + owner review workflow"
},
{
"task_id": "P2-401E",
"priority": "P2",
"status": "planned",
"completion_percent": 0,
"owner_agent": "NemoTron",
"summary": "定義 sanitized replay scorer 與 5-record smoke禁止 production / shadow / canary。",
"next_gate": "cost/data approval"
},
{
"task_id": "P1-403",
"priority": "P1",
"status": "planned",
"completion_percent": 0,
"owner_agent": "OpenClaw",
"summary": "Telegram Gateway lane E2Eaction-required / failure-only / approval callback禁止直接送 Bot。",
"next_gate": "ADR-035 E2E approval"
}
],
"approval_boundaries": {
"runtime_worker_allowed": false,
"db_migration_allowed": false,
"telegram_direct_send_allowed": false,
"paid_external_service_allowed": false,
"secret_plaintext_allowed": false,
"autonomous_host_mutation_allowed": false,
"production_route_change_allowed": false,
"sdk_installation_allowed": false
},
"rollups": {
"agent_lane_count": 3,
"mcp_stack_count": 9,
"rag_layer_count": 3,
"learning_loop_count": 5,
"intelligence_service_count": 7,
"rollout_task_count": 6,
"blocked_task_ids": [
"P2-401B",
"P2-401D",
"P2-401E",
"P1-403"
],
"optional_service_ids": [
"langfuse",
"phoenix",
"qdrant",
"milvus"
],
"required_existing_first_ids": [
"mcp_gateway",
"postgres_pgvector",
"redis_streams",
"opentelemetry"
]
}
}