Files
awoooi/docs/evaluations/ai_agent_interaction_learning_proof_2026-06-11.json
Your Name b17a28c293
Some checks failed
CD Pipeline / tests (push) Successful in 1m35s
Code Review / ai-code-review (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Successful in 6m19s
CD Pipeline / post-deploy-checks (push) Successful in 1m30s
feat(governance): 新增報表 runtime 啟動前閘門
2026-06-12 11:34:21 +08:00

445 lines
21 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"schema_version": "ai_agent_interaction_learning_proof_v1",
"generated_at": "2026-06-11T23:20:00+08:00",
"program_status": {
"overall_completion_percent": 100,
"current_priority": "P2",
"current_task_id": "P2-403J",
"next_task_id": "P2-403K",
"read_only_mode": true,
"runtime_authority": "proof_surface_only_no_live_worker",
"status_note": "P2-403J 已把報表真相、全 0 週報可處置異常、日週月契約、Telegram 收斂、每個 Agent 工作量、圖表化報告與高/中/低風險自動化政策接入live AgentSession、message、handoff、learning write、Telegram receipt、報告實發、自動優化、verifier execution 與 route change 仍全部為 0。"
},
"live_truth": {
"runtime_loop_enabled": false,
"live_agent_session_readback_enabled": false,
"redis_consumer_group_enabled": false,
"telegram_send_enabled": false,
"learning_writeback_enabled": false,
"active_live_agent_sessions": 0,
"live_agent_messages_24h": 0,
"live_handoffs_24h": 0,
"live_learning_writes_24h": 0,
"telegram_digest_receipts_24h": 0,
"truth_note": "目前能證明的是合約、面板與證據需求已建立;還不能宣稱 OpenClaw / Hermes / NemoTron runtime 正在互傳訊息或自主學習。"
},
"proof_ladder": [
{
"level_id": "contract_surface",
"display_name": "協作與學習契約可見",
"status": "contract_ready",
"completion_percent": 100,
"operator_meaning": "你可以看到三個 Agent 如何分工、用哪些 MCP/RAG/記憶層互相交接。",
"source_of_truth": "ai_agent_communication_learning_contract_v1",
"next_gate": "保持只讀,等待 live read model 接線。"
},
{
"level_id": "proof_dashboard",
"display_name": "互動證據面板可見",
"status": "proof_surface_ready",
"completion_percent": 100,
"operator_meaning": "治理頁會顯示哪些證據真的有資料、哪些仍是 0 與原因。",
"source_of_truth": "ai_agent_interaction_learning_proof_v1",
"next_gate": "P2-403C Redis dry-run gate 已接入;下一步是 learning writeback approval package。"
},
{
"level_id": "live_read_model",
"display_name": "Live AgentSession 讀回",
"status": "contract_ready",
"completion_percent": 100,
"operator_meaning": "已定義 AgentSession 安全欄位、Redis event envelope、worker gate、回滾與無寫入 smoke尚未讀 live DB/Redis。",
"source_of_truth": "ai_agent_live_read_model_gate_v1",
"next_gate": "P2-403C Redis dry-run gate 已接入治理頁。"
},
{
"level_id": "message_bus_receipts",
"display_name": "訊息匯流收據",
"status": "contract_ready",
"completion_percent": 100,
"operator_meaning": "已定義 fixture-only Redis dry-run、consumer group plan、ack / dead-letter / replay idempotency尚未連 production Redis。",
"source_of_truth": "ai_agent_redis_dry_run_gate_v1",
"next_gate": "P2-403D learning writeback approval package。"
},
{
"level_id": "handoff_receipts",
"display_name": "互相接手紀錄",
"status": "contract_ready",
"completion_percent": 100,
"operator_meaning": "已定義 OpenClaw / Hermes / NemoTron handoff envelope、必填欄位、idempotency key 與 redacted evidence refs尚未啟動 runtime worker。",
"source_of_truth": "ai_agent_redis_dry_run_gate_v1",
"next_gate": "P2-403D learning writeback approval package。"
},
{
"level_id": "runtime_verifier_evidence_review",
"display_name": "Runtime verifier evidence review",
"status": "contract_ready",
"completion_percent": 100,
"operator_meaning": "已定義 runtime verifier implementation 前必看的 readback plan、rollback template、failure receipt redaction 與 NemoTron replay fixture尚未實作或執行 verifier。",
"source_of_truth": "ai_agent_runtime_verifier_evidence_review_v1",
"next_gate": "P2-403J 成長趨勢週報與 operator feedback applied 指標。"
},
{
"level_id": "reporting_and_risk_automation_review",
"display_name": "日週月報與風險自動化 review",
"status": "contract_ready",
"completion_percent": 100,
"operator_meaning": "已定義 AI Agent 日報、週報、月報、每個 Agent 工作量、圖表化報告、AI 分析建議與高/中/低風險自動化政策;尚未排程實發或啟動中低風險 runtime worker。",
"source_of_truth": "ai_agent_report_automation_review_v1",
"next_gate": "P2-403K SRE 戰情室路由收斂後,接 P2-403L 報表 runtime 啟動前閘門。"
},
{
"level_id": "learning_growth",
"display_name": "學習回寫與成長曲線",
"status": "approval_required",
"completion_percent": 0,
"operator_meaning": "啟用後你會看到知識庫新增、playbook trust 變化、replay 分數改善與錯誤類型下降。",
"source_of_truth": "knowledge_entries + playbook_trust_history + replay_results + finetune_exporter",
"next_gate": "P2-403F owner-approved learning dry-run 已接入;實際 KM / PlayBook trust / timeline / replay score writeback 仍需 owner approval 與 runtime write gate。"
},
{
"level_id": "telegram_receipts",
"display_name": "Telegram 摘要收據",
"status": "approval_required",
"completion_percent": 0,
"operator_meaning": "啟用後 Telegram 只推送需處置、需批准或失敗摘要,治理頁保留已排隊/已送達/已確認收據。",
"source_of_truth": "telegram_gateway_queue + alert_operation_log",
"next_gate": "P2-403E Telegram receipt approval package 已接入;實際 queue write、send、delivered、ack 與 retry 仍需 owner approval。"
}
],
"agent_lanes": [
{
"agent_id": "openclaw",
"display_name": "OpenClaw",
"primary_role": "仲裁者、風險 owner、HITL 守門者",
"current_visible_state": "contract_visible_live_runtime_pending",
"visible_signals": [
"arbitration_request_count",
"challenge_count",
"approval_gate_decision",
"post_verification_learning_receipt"
],
"growth_metric": "錯誤批准率下降、人工覆核退回率下降、風險分類穩定度上升",
"what_operator_will_feel": "你會看到 OpenClaw 不是只給結論,而是保留它要求補證、拒收、批准或升級的每一步摘要。"
},
{
"agent_id": "hermes",
"display_name": "Hermes",
"primary_role": "治理文件、證據打包、變更草案與知識整理",
"current_visible_state": "contract_visible_live_runtime_pending",
"visible_signals": [
"evidence_dossier_created",
"runbook_update_proposed",
"knowledge_entry_written",
"telegram_digest_drafted"
],
"growth_metric": "證據包完整率上升、缺欄位率下降、runbook reuse 次數上升",
"what_operator_will_feel": "你會看到 Hermes 把雜訊整理成可審查證據包,並留下哪些知識被沉澱。"
},
{
"agent_id": "nemotron",
"display_name": "NemoTron",
"primary_role": "離線 replay、模型候選評分、策略壓力測試",
"current_visible_state": "contract_visible_live_runtime_pending",
"visible_signals": [
"replay_job_scored",
"candidate_model_compared",
"failure_pattern_labeled",
"promotion_gate_recommendation"
],
"growth_metric": "replay 通過率上升、失敗模式覆蓋率上升、候選模型誤判率下降",
"what_operator_will_feel": "你會看到 NemoTron 用離線分數證明自己是否值得被納入某個任務,而不是直接進生產替換。"
}
],
"proof_signals": [
{
"signal_id": "agent_heartbeat",
"display_name": "Agent 心跳",
"category": "heartbeat",
"source_of_truth": "agent_sessions.last_seen_at",
"visible_surface": "governance_agent_proof_panel",
"current_state": "live_pending",
"operator_interpretation": "有心跳才代表 runtime worker 真正在回報。",
"next_gate": "P2-403B"
},
{
"signal_id": "agent_message_created",
"display_name": "Agent 發話收據",
"category": "message",
"source_of_truth": "Redis Streams XADD + agent_sessions.turn_id",
"visible_surface": "governance_agent_proof_panel",
"current_state": "live_pending",
"operator_interpretation": "只能顯示摘要與 event id未脫敏內容與內部草稿不得進前端。",
"next_gate": "P2-403C"
},
{
"signal_id": "agent_message_consumed",
"display_name": "Agent 收話收據",
"category": "message",
"source_of_truth": "Redis Streams consumer group ack",
"visible_surface": "governance_agent_proof_panel",
"current_state": "live_pending",
"operator_interpretation": "有 ack 才能證明不是單向寫入,而是對方 Agent 已接手處理。",
"next_gate": "P2-403C"
},
{
"signal_id": "handoff_accepted",
"display_name": "接手成功",
"category": "handoff",
"source_of_truth": "agent_sessions.parent_turn_id + timeline_events",
"visible_surface": "AwoooP timeline",
"current_state": "live_pending",
"operator_interpretation": "你會看到誰把任務交給誰、被接受或拒收,以及下一步。",
"next_gate": "P2-403C"
},
{
"signal_id": "critic_challenge",
"display_name": "互相挑戰",
"category": "quality_gate",
"source_of_truth": "agent_sessions.turn_type=challenge",
"visible_surface": "AwoooP timeline",
"current_state": "contract_defined",
"operator_interpretation": "多 Agent 穩定度不是互相附和,而是可記錄的挑戰、拒收與補證。",
"next_gate": "P2-403C"
},
{
"signal_id": "learning_write_receipt",
"display_name": "學習回寫收據",
"category": "learning",
"source_of_truth": "knowledge_entries + playbook_trust_history + learning_failure_log",
"visible_surface": "governance_agent_proof_panel",
"current_state": "live_pending",
"operator_interpretation": "只有寫入知識或 trust history才算真正把經驗沉澱。",
"next_gate": "P2-403F"
},
{
"signal_id": "growth_delta",
"display_name": "成長趨勢",
"category": "growth",
"source_of_truth": "replay_results + evaluation scorecard",
"visible_surface": "governance_agent_proof_panel",
"current_state": "live_pending",
"operator_interpretation": "用 replay 分數、拒收率、誤判率與補證率看 Agent 是否真的變好。",
"next_gate": "P2-403F"
},
{
"signal_id": "telegram_digest_queued",
"display_name": "Telegram 摘要排隊",
"category": "telegram",
"source_of_truth": "telegram_gateway_queue",
"visible_surface": "Telegram Bot + governance receipts",
"current_state": "live_pending",
"operator_interpretation": "先證明排隊與脫敏,再證明送達;成功噪音不即時推播。",
"next_gate": "P2-403E"
},
{
"signal_id": "operator_feedback_applied",
"display_name": "人工回饋採納",
"category": "learning",
"source_of_truth": "approval_records + learning_service feedback",
"visible_surface": "governance_agent_proof_panel",
"current_state": "contract_defined",
"operator_interpretation": "你按下接受、拒絕或要求補證後Agent 要能把結果回寫成後續策略。",
"next_gate": "P2-403F"
},
{
"signal_id": "runtime_verifier_review_packet",
"display_name": "Runtime verifier review package",
"category": "verifier",
"source_of_truth": "ai_agent_runtime_verifier_evidence_review_v1",
"visible_surface": "governance_agent_proof_panel",
"current_state": "contract_defined",
"operator_interpretation": "你會看到 verifier implementation 前需要哪些脫敏證據、誰審查、哪些 runtime action 仍被阻擋。",
"next_gate": "P2-403J"
},
{
"signal_id": "daily_weekly_monthly_agent_reports",
"display_name": "日週月報與 Agent 工作量圖表",
"category": "reporting",
"source_of_truth": "ai_agent_report_automation_review_v1",
"visible_surface": "governance_agent_report_panel",
"current_state": "contract_defined",
"operator_interpretation": "你會看到每個 Agent 做了哪些工作、工作量多少、風險分級、AI 建議與哪些項目需要你批准。",
"next_gate": "P2-403K"
}
],
"operator_surfaces": [
{
"surface_id": "governance_agent_proof_panel",
"display_name": "治理頁 Agent 證據面板",
"route_or_channel": "/zh-TW/governance?tab=automation-inventory",
"operator_feel": "一眼看到目前是真的 live、只有合約或被哪個 gate 阻擋。",
"redaction_policy": "只顯示狀態、證據 id、來源表、摘要與下一步不顯示對話逐字稿。",
"current_state": "implemented_read_only"
},
{
"surface_id": "aiops_timeline",
"display_name": "AwoooP Timeline",
"route_or_channel": "/zh-TW/aiops/timeline",
"operator_feel": "事件會串成觀察、提案、挑戰、審查、決策、驗證、學習的時間線。",
"redaction_policy": "顯示脫敏 event envelope不顯示未核准提示內容或內部草稿。",
"current_state": "existing_surface_needs_agent_read_model"
},
{
"surface_id": "telegram_action_digest",
"display_name": "Telegram Bot 處置摘要",
"route_or_channel": "Telegram gateway",
"operator_feel": "只有需處置、需批准、失敗或風險升級時被打擾。",
"redaction_policy": "只送摘要、風險、批准連結與證據 id不送機密值或完整對話。",
"current_state": "policy_defined_send_blocked"
},
{
"surface_id": "learning_growth_cards",
"display_name": "學習成長卡",
"route_or_channel": "governance proof panel",
"operator_feel": "能看到某個 Agent 的 replay 分數是否上升、失敗類型是否下降。",
"redaction_policy": "顯示聚合指標與趨勢,不顯示訓練原文。",
"current_state": "planned"
},
{
"surface_id": "audit_evidence_drawer",
"display_name": "稽核證據抽屜",
"route_or_channel": "governance proof panel",
"operator_feel": "點開後只看來源表、事件 id、驗證狀態與下一 gate。",
"redaction_policy": "所有 payload 需脫敏,敏感值僅顯示欄位名稱。",
"current_state": "planned"
}
],
"runtime_gates": [
{
"gate_id": "agent_session_read_model_gate",
"display_name": "AgentSession 唯讀 read model",
"status": "ready",
"required_before_green": "P2-403B 已提供既有表確認、safe selected fields、查詢索引、回滾計畫與無寫入 smokelive DB query 仍未批准。",
"next_task_id": "P2-403C"
},
{
"gate_id": "redis_stream_consumer_gate",
"display_name": "Redis Streams consumer group dry-run",
"status": "approval_required",
"required_before_green": "P2-403C 已完成 fixture-only dry-run、handoff envelope、ack / dead-letter / replay 契約;建立 production consumer group 前仍需人工批准。",
"next_task_id": "P2-403D"
},
{
"gate_id": "learning_writeback_gate",
"display_name": "Learning writeback 批准包",
"status": "approval_required",
"required_before_green": "必須定義可寫表、資料保留、錯誤補償、人工回饋回滾。",
"next_task_id": "P2-403G",
"next_gate": "P2-403G runtime write gate review 已接入;實際 writeback 仍需雙重批准、dry-run hash 與 post-write verifier。"
},
{
"gate_id": "telegram_receipt_gate",
"display_name": "Telegram 摘要與收據 gate",
"status": "approval_required",
"required_before_green": "必須完成 gateway dry-run、token 注入、E2E 送達、成功降噪與 failure fallback。",
"next_task_id": "P2-403E",
"next_gate": "P2-403E Telegram receipt approval package 已接入;實際 receipt write 仍需 owner approval。"
},
{
"gate_id": "runtime_verifier_evidence_review_gate",
"display_name": "Runtime verifier evidence implementation review",
"status": "approval_required",
"required_before_green": "P2-403I 已完成只讀 evidence review package實作或執行 verifier 前仍需 owner approval、redaction review、rollback owner 與 sanitized replay gate。",
"next_task_id": "P2-403J",
"next_gate": "下一步只做成長趨勢與 operator feedback applied 指標,不啟動 live verifier。"
},
{
"gate_id": "report_automation_runtime_guard",
"display_name": "報表讀後分析與中低風險自動化 guard",
"status": "approval_required",
"required_before_green": "P2-403J 已完成日週月報、工作量圖表與風險政策;啟動自動處理前仍需 scheduler guard、Telegram receipt、runtime verifier 與 rollback/no-op evidence。",
"next_task_id": "P2-403K",
"next_gate": "下一步建立中低風險自動處理 runtime guard不碰高風險自動執行。"
},
{
"gate_id": "frontend_redaction_gate",
"display_name": "前端脫敏與內容紅線",
"status": "blocked",
"required_before_green": "任何面板只能顯示脫敏摘要、狀態、來源與核准欄位,未脫敏內容不得進前端。",
"next_task_id": "P2-403C"
}
],
"learning_memory_stack": [
{
"layer_id": "hot_agent_session_memory",
"display_name": "Hot AgentSession Memory",
"storage_or_service": "agent_sessions + Redis Streams",
"visible_growth_signal": "心跳、turn count、handoff accepted、challenge count",
"current_state": "read_model_gate_defined_live_read_pending"
},
{
"layer_id": "warm_knowledge_memory",
"display_name": "Warm Knowledge Memory",
"storage_or_service": "knowledge_entries + playbook_trust_history",
"visible_growth_signal": "知識新增數、trust delta、人工拒收率",
"current_state": "service_existing_writeback_gate_pending"
},
{
"layer_id": "cold_replay_memory",
"display_name": "Cold Replay Memory",
"storage_or_service": "agent_replay_results + finetune_exporter",
"visible_growth_signal": "replay 分數、失敗分類、候選模型比較結果",
"current_state": "offline_adapter_existing_surface_pending"
},
{
"layer_id": "market_model_memory",
"display_name": "Market Model Memory",
"storage_or_service": "agent_market_scorecard + governance snapshot",
"visible_growth_signal": "候選 Agent 市場分數、晉升/降級建議、取代評估證據",
"current_state": "read_only_governance_existing"
}
],
"telegram_receipt_contract": {
"direct_send_allowed": false,
"gateway_queue_write_allowed": false,
"receipt_visible_to_operator": true,
"allowed_future_notification_classes": [
"action_required",
"approval_required",
"failure",
"risk_escalation",
"daily_digest"
],
"success_policy": "成功摘要預設不即時推播,只在日報或被追問時顯示。",
"redaction_policy": "Telegram 僅允許脫敏摘要、證據 id、風險、批准連結與下一步不允許機密值或完整對話。"
},
"frontend_redaction": {
"operator_conversation_display_allowed": false,
"agent_private_reasoning_display_allowed": false,
"raw_prompt_display_allowed": false,
"display_policy": "前端只顯示 Agent 狀態、證據摘要、來源、gate、聚合指標與下一步未公開上下文、提示內容、未脫敏細節與機密值一律不進前端。"
},
"approval_boundaries": {
"runtime_worker_allowed": false,
"db_migration_allowed": false,
"redis_consumer_group_allowed": false,
"telegram_direct_send_allowed": false,
"conversation_transcript_display_allowed": false,
"agent_private_reasoning_display_allowed": false,
"secret_plaintext_allowed": false,
"autonomous_self_modify_allowed": false
},
"rollups": {
"proof_level_count": 9,
"contract_ready_level_count": 7,
"live_pending_level_ids": [],
"signal_count": 11,
"live_signal_count": 0,
"operator_surface_count": 5,
"runtime_gate_count": 7,
"blocked_gate_ids": [
"frontend_redaction_gate",
"learning_writeback_gate",
"report_automation_runtime_guard",
"redis_stream_consumer_gate",
"runtime_verifier_evidence_review_gate",
"telegram_receipt_gate"
],
"active_live_agent_sessions": 0,
"live_agent_messages_24h": 0,
"live_handoffs_24h": 0,
"live_learning_writes_24h": 0,
"telegram_digest_receipts_24h": 0
}
}