From 5de4b3f36b293d223ca22f644b5bb131f6a115e6 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 14 Jun 2026 14:03:15 +0800 Subject: [PATCH] =?UTF-8?q?feat(governance):=20=E6=96=B0=E5=A2=9E=2012-Age?= =?UTF-8?q?nt=20War=20Room=20=E8=AE=80=E5=9B=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/api/src/api/v1/agents.py | 33 ++ .../services/ai_agent_12_agent_war_room.py | 203 +++++++++++ .../tests/test_ai_agent_12_agent_war_room.py | 100 +++++ .../test_ai_agent_12_agent_war_room_api.py | 36 ++ apps/web/messages/en.json | 38 ++ apps/web/messages/zh-TW.json | 38 ++ .../tabs/automation-inventory-tab.tsx | 166 ++++++++- apps/web/src/lib/api-client.ts | 119 ++++++ docs/LOGBOOK.md | 54 +++ .../AI_AGENT_12_AGENT_WAR_ROOM_2026-06-14.md | 188 ++++++++++ ...AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md | 8 +- ...ai_agent_12_agent_war_room_2026-06-14.json | 341 ++++++++++++++++++ .../ai_agent_12_agent_war_room_v1.schema.json | 254 +++++++++++++ 13 files changed, 1574 insertions(+), 4 deletions(-) create mode 100644 apps/api/src/services/ai_agent_12_agent_war_room.py create mode 100644 apps/api/tests/test_ai_agent_12_agent_war_room.py create mode 100644 apps/api/tests/test_ai_agent_12_agent_war_room_api.py create mode 100644 docs/ai/AI_AGENT_12_AGENT_WAR_ROOM_2026-06-14.md create mode 100644 docs/evaluations/ai_agent_12_agent_war_room_2026-06-14.json create mode 100644 docs/schemas/ai_agent_12_agent_war_room_v1.schema.json diff --git a/apps/api/src/api/v1/agents.py b/apps/api/src/api/v1/agents.py index 3ff67233..4bab293d 100644 --- a/apps/api/src/api/v1/agents.py +++ b/apps/api/src/api/v1/agents.py @@ -82,6 +82,9 @@ from src.services.ai_agent_learning_writeback_approval_package import ( from src.services.ai_agent_live_read_model_gate import ( load_latest_ai_agent_live_read_model_gate, ) +from src.services.ai_agent_12_agent_war_room import ( + load_latest_ai_agent_12_agent_war_room, +) from src.services.ai_agent_matched_playbook_learning_gap import ( load_latest_ai_agent_matched_playbook_learning_gap, ) @@ -718,6 +721,36 @@ async def get_agent_deployment_layout() -> dict[str, Any]: ) from exc +@router.get( + "/agent-12-agent-war-room", + response_model=dict[str, Any], + summary="取得 AI Agent 12-Agent War Room 快照", + description=( + "讀取最新已提交的 12-Agent War Room 只讀快照;" + "此端點只呈現 OpenClaw、Hermes、NemoTron、SRE、Security、DevOps、Data/DR、" + "Supply Chain、Product/UI、QA、Market Scout、Telegram Ops 的分工、工作量、阻擋項與批准邊界," + "不開 runtime writer、不送 Telegram、不呼叫 Bot API、不安裝 SDK、不呼叫付費 API、" + "不讀 secret、不執行 production write。" + ), +) +async def get_agent_12_agent_war_room() -> dict[str, Any]: + """回傳最新 12-Agent War Room 只讀快照。""" + try: + payload = await asyncio.to_thread(load_latest_ai_agent_12_agent_war_room) + return redact_public_lan_topology(payload) + except FileNotFoundError as exc: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(exc), + ) from exc + except (json.JSONDecodeError, ValueError) as exc: + logger.error("ai_agent_12_agent_war_room_invalid", error=str(exc)) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="AI Agent 12-Agent War Room 快照無效", + ) from exc + + @router.get( "/agent-communication-learning-contract", response_model=dict[str, Any], diff --git a/apps/api/src/services/ai_agent_12_agent_war_room.py b/apps/api/src/services/ai_agent_12_agent_war_room.py new file mode 100644 index 00000000..ad3cd143 --- /dev/null +++ b/apps/api/src/services/ai_agent_12_agent_war_room.py @@ -0,0 +1,203 @@ +""" +AI Agent 12-Agent War Room 快照。 + +讀取最新已提交的 War Room 只讀回報,把 12 位邏輯 Agent 的分工、 +工作量、報告合約、市場觀測合約與 Telegram 邊界產品化;本模組不開 +runtime writer、不送 Telegram、不呼叫 Bot API、不安裝 SDK、不呼叫付費 +API、不讀 secret、不寫 production,也不執行破壞性操作。 +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from src.services.snapshot_paths import default_evaluations_dir + +_DEFAULT_EVALUATIONS_DIR = default_evaluations_dir(Path(__file__)) +_SNAPSHOT_PATTERN = "ai_agent_12_agent_war_room_*.json" +_SCHEMA_VERSION = "ai_agent_12_agent_war_room_v1" +_RUNTIME_AUTHORITY = "12_agent_war_room_read_only_no_live_write" +_EXPECTED_AGENT_IDS = { + "agent_01_openclaw_arbiter", + "agent_02_hermes_rag", + "agent_03_nemotron_replay", + "agent_04_sre_sentinel", + "agent_05_security_sentinel", + "agent_06_devops_commander", + "agent_07_data_dr_guardian", + "agent_08_supply_chain_scout", + "agent_09_product_ui_curator", + "agent_10_qa_verifier", + "agent_11_market_scout", + "agent_12_telegram_ops_liaison", +} +_ZERO_FIELDS = { + "live_write_count", + "telegram_send_count", + "bot_api_call_count", + "production_write_count", + "paid_api_call_count", + "sdk_install_count", + "secret_read_count", + "destructive_operation_count", +} +_FORBIDDEN_PUBLIC_TERMS = { + "work_window_transcript", + "chain-of-thought", + "source_thread_id", + "browser_context", + "telegram_token", + "authorization header", +} + + +def load_latest_ai_agent_12_agent_war_room( + evaluations_dir: Path | None = None, +) -> dict[str, Any]: + """讀取最新已提交的 12-Agent War Room 只讀快照。""" + directory = evaluations_dir or _DEFAULT_EVALUATIONS_DIR + candidates = sorted(directory.glob(_SNAPSHOT_PATTERN)) + if not candidates: + raise FileNotFoundError(f"no AI Agent 12-Agent War Room snapshots found in {directory}") + + latest = candidates[-1] + with latest.open(encoding="utf-8") as handle: + payload = json.load(handle) + + if not isinstance(payload, dict): + raise ValueError(f"{latest}: expected JSON object") + + label = str(latest) + _require_schema(payload, label) + _require_agent_roles(payload, label) + _require_rollups(payload, label) + _require_contracts(payload, label) + _require_no_forbidden_public_terms(payload, label) + return payload + + +def _require_schema(payload: dict[str, Any], label: str) -> None: + if payload.get("schema_version") != _SCHEMA_VERSION: + raise ValueError(f"{label}: expected schema_version={_SCHEMA_VERSION}") + + status = payload.get("program_status") or {} + expected = { + "current_priority": "P1", + "current_task_id": "P2-142", + "next_task_id": "P2-143", + "read_only_mode": True, + "runtime_authority": _RUNTIME_AUTHORITY, + "overall_completion_percent": 72, + } + mismatches = _mismatches(status, expected) + if mismatches: + raise ValueError(f"{label}: program_status mismatch: {mismatches}") + if not status.get("status_note"): + raise ValueError(f"{label}: program_status.status_note is required") + + +def _require_agent_roles(payload: dict[str, Any], label: str) -> None: + roles = payload.get("agent_roles") or [] + if len(roles) != 12: + raise ValueError(f"{label}: expected exactly 12 agent roles") + + role_ids = {str(role.get("agent_id")) for role in roles} + if role_ids != _EXPECTED_AGENT_IDS: + missing = sorted(_EXPECTED_AGENT_IDS - role_ids) + extra = sorted(role_ids - _EXPECTED_AGENT_IDS) + raise ValueError(f"{label}: agent ids mismatch missing={missing} extra={extra}") + + for role in roles: + role_id = role.get("agent_id") + if role.get("review_status") != "read_only_review_completed": + raise ValueError(f"{label}: {role_id} must remain read_only_review_completed") + for field in ("live_write_count", "telegram_send_count", "bot_api_call_count"): + if role.get(field) != 0: + raise ValueError(f"{label}: {role_id}.{field} must remain zero") + for field in ("display_name", "war_room_role", "next_action"): + if not role.get(field): + raise ValueError(f"{label}: {role_id}.{field} is required") + if not isinstance(role.get("work_units"), int) or role["work_units"] <= 0: + raise ValueError(f"{label}: {role_id}.work_units must be positive") + + +def _require_rollups(payload: dict[str, Any], label: str) -> None: + roles = payload.get("agent_roles") or [] + rollups = payload.get("rollups") or {} + expected = { + "agent_role_count": len(roles), + "read_only_review_completed_count": sum( + 1 for role in roles if role.get("review_status") == "read_only_review_completed" + ), + "subagent_batch_limit": 6, + "subagent_batch_count": 2, + "approval_required_total": sum(int(role.get("approval_required_count") or 0) for role in roles), + "blocker_total": sum(int(role.get("blocker_count") or 0) for role in roles), + "total_work_units": sum(int(role.get("work_units") or 0) for role in roles), + "total_evidence_items": sum(int(role.get("evidence_items") or 0) for role in roles), + } + mismatches = _mismatches(rollups, expected) + if mismatches: + raise ValueError(f"{label}: rollups mismatch: {mismatches}") + + for field in _ZERO_FIELDS: + if rollups.get(field) != 0: + raise ValueError(f"{label}: rollups.{field} must remain zero") + + +def _require_contracts(payload: dict[str, Any], label: str) -> None: + coordination = payload.get("coordination_model") or {} + if coordination.get("logical_agent_count") != 12: + raise ValueError(f"{label}: coordination_model.logical_agent_count must be 12") + if coordination.get("subagent_batch_limit") != 6: + raise ValueError(f"{label}: coordination_model.subagent_batch_limit must be 6") + if coordination.get("arbiter") != "openclaw": + raise ValueError(f"{label}: coordination_model.arbiter must remain openclaw") + + telegram = payload.get("telegram_contract") or {} + for field in ("direct_send_allowed", "bot_api_call_allowed", "success_immediate_send_allowed"): + if telegram.get(field) is not False: + raise ValueError(f"{label}: telegram_contract.{field} must remain false") + for field in ("dedup_required", "receipt_required"): + if telegram.get(field) is not True: + raise ValueError(f"{label}: telegram_contract.{field} must remain true") + + redaction = payload.get("display_redaction_contract") or {} + expected_redaction = { + "redaction_required": True, + "conversation_transcript_display_allowed": False, + "raw_prompt_display_allowed": False, + "private_reasoning_display_allowed": False, + "secret_value_display_allowed": False, + "raw_runtime_payload_display_allowed": False, + } + mismatches = _mismatches(redaction, expected_redaction) + if mismatches: + raise ValueError(f"{label}: display_redaction_contract mismatch: {mismatches}") + + reporting = payload.get("reporting_contract") or {} + for cadence in ("daily", "weekly", "monthly"): + if (reporting.get(cadence) or {}).get("required") is not True: + raise ValueError(f"{label}: reporting_contract.{cadence}.required must be true") + + market = payload.get("market_watch_contract") or {} + candidates = market.get("p0_refresh_candidates") or [] + if len(candidates) < 5: + raise ValueError(f"{label}: market_watch_contract.p0_refresh_candidates must include at least 5 entries") + + +def _require_no_forbidden_public_terms(payload: dict[str, Any], label: str) -> None: + public_text = json.dumps(payload, ensure_ascii=False).lower() + leaked = sorted(term for term in _FORBIDDEN_PUBLIC_TERMS if term.lower() in public_text) + if leaked: + raise ValueError(f"{label}: forbidden public terms leaked: {leaked}") + + +def _mismatches(payload: dict[str, Any], expected: dict[str, Any]) -> dict[str, dict[str, Any]]: + return { + key: {"expected": expected_value, "actual": payload.get(key)} + for key, expected_value in expected.items() + if payload.get(key) != expected_value + } diff --git a/apps/api/tests/test_ai_agent_12_agent_war_room.py b/apps/api/tests/test_ai_agent_12_agent_war_room.py new file mode 100644 index 00000000..1c1eeb39 --- /dev/null +++ b/apps/api/tests/test_ai_agent_12_agent_war_room.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +import copy +import json +import os +from pathlib import Path + +import pytest + +os.environ.setdefault("DATABASE_URL", "postgresql+asyncpg://test:test@localhost/test") + +from src.services.ai_agent_12_agent_war_room import ( + load_latest_ai_agent_12_agent_war_room, +) + + +def test_load_latest_ai_agent_12_agent_war_room_snapshot() -> None: + snapshot = load_latest_ai_agent_12_agent_war_room() + + assert snapshot["schema_version"] == "ai_agent_12_agent_war_room_v1" + assert snapshot["program_status"]["current_task_id"] == "P2-142" + assert snapshot["program_status"]["next_task_id"] == "P2-143" + assert snapshot["program_status"]["overall_completion_percent"] == 72 + assert snapshot["program_status"]["runtime_authority"] == "12_agent_war_room_read_only_no_live_write" + assert len(snapshot["agent_roles"]) == 12 + + rollups = snapshot["rollups"] + assert rollups["agent_role_count"] == 12 + assert rollups["read_only_review_completed_count"] == 12 + assert rollups["subagent_batch_limit"] == 6 + assert rollups["subagent_batch_count"] == 2 + assert rollups["total_work_units"] == 82 + assert rollups["total_evidence_items"] == 84 + assert rollups["approval_required_total"] == 61 + assert rollups["blocker_total"] == 54 + assert rollups["live_write_count"] == 0 + assert rollups["telegram_send_count"] == 0 + assert rollups["bot_api_call_count"] == 0 + assert rollups["production_write_count"] == 0 + assert rollups["paid_api_call_count"] == 0 + assert rollups["sdk_install_count"] == 0 + + +def test_war_room_contract_keeps_reports_market_and_telegram_read_only() -> None: + snapshot = load_latest_ai_agent_12_agent_war_room() + + assert snapshot["coordination_model"]["arbiter"] == "openclaw" + assert snapshot["reporting_contract"]["daily"]["required"] is True + assert snapshot["reporting_contract"]["weekly"]["required"] is True + assert snapshot["reporting_contract"]["monthly"]["required"] is True + assert len(snapshot["market_watch_contract"]["p0_refresh_candidates"]) == 5 + assert snapshot["telegram_contract"]["direct_send_allowed"] is False + assert snapshot["telegram_contract"]["bot_api_call_allowed"] is False + assert snapshot["telegram_contract"]["dedup_required"] is True + assert snapshot["telegram_contract"]["receipt_required"] is True + assert snapshot["display_redaction_contract"]["conversation_transcript_display_allowed"] is False + assert snapshot["display_redaction_contract"]["secret_value_display_allowed"] is False + + +def test_rejects_missing_agent_role(tmp_path: Path) -> None: + snapshot = copy.deepcopy(load_latest_ai_agent_12_agent_war_room()) + snapshot["agent_roles"] = snapshot["agent_roles"][:-1] + snapshot["rollups"]["agent_role_count"] = 11 + snapshot["rollups"]["read_only_review_completed_count"] = 11 + _write_snapshot(tmp_path, snapshot) + + with pytest.raises(ValueError, match="expected exactly 12 agent roles"): + load_latest_ai_agent_12_agent_war_room(tmp_path) + + +def test_rejects_role_live_write_drift(tmp_path: Path) -> None: + snapshot = copy.deepcopy(load_latest_ai_agent_12_agent_war_room()) + snapshot["agent_roles"][0]["live_write_count"] = 1 + _write_snapshot(tmp_path, snapshot) + + with pytest.raises(ValueError, match="live_write_count must remain zero"): + load_latest_ai_agent_12_agent_war_room(tmp_path) + + +def test_rejects_telegram_direct_send_drift(tmp_path: Path) -> None: + snapshot = copy.deepcopy(load_latest_ai_agent_12_agent_war_room()) + snapshot["telegram_contract"]["direct_send_allowed"] = True + _write_snapshot(tmp_path, snapshot) + + with pytest.raises(ValueError, match="direct_send_allowed must remain false"): + load_latest_ai_agent_12_agent_war_room(tmp_path) + + +def test_rejects_forbidden_public_terms(tmp_path: Path) -> None: + snapshot = copy.deepcopy(load_latest_ai_agent_12_agent_war_room()) + snapshot["agent_roles"][0]["next_action"] = "blocked source_thread_id leak" + _write_snapshot(tmp_path, snapshot) + + with pytest.raises(ValueError, match="forbidden public terms leaked"): + load_latest_ai_agent_12_agent_war_room(tmp_path) + + +def _write_snapshot(directory: Path, payload: dict) -> None: + path = directory / "ai_agent_12_agent_war_room_2099-01-01.json" + path.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8") diff --git a/apps/api/tests/test_ai_agent_12_agent_war_room_api.py b/apps/api/tests/test_ai_agent_12_agent_war_room_api.py new file mode 100644 index 00000000..6427fa43 --- /dev/null +++ b/apps/api/tests/test_ai_agent_12_agent_war_room_api.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import os + +from fastapi.testclient import TestClient + +os.environ.setdefault("DATABASE_URL", "postgresql+asyncpg://test:test@localhost/test") + +from src.main import app + + +def test_ai_agent_12_agent_war_room_endpoint() -> None: + client = TestClient(app) + + response = client.get("/api/v1/agents/agent-12-agent-war-room") + + assert response.status_code == 200 + payload = response.json() + assert payload["schema_version"] == "ai_agent_12_agent_war_room_v1" + assert payload["program_status"]["current_task_id"] == "P2-142" + assert payload["program_status"]["next_task_id"] == "P2-143" + assert payload["program_status"]["overall_completion_percent"] == 72 + assert payload["program_status"]["runtime_authority"] == "12_agent_war_room_read_only_no_live_write" + assert payload["rollups"]["agent_role_count"] == 12 + assert payload["rollups"]["read_only_review_completed_count"] == 12 + assert payload["rollups"]["subagent_batch_limit"] == 6 + assert payload["rollups"]["subagent_batch_count"] == 2 + assert payload["rollups"]["total_work_units"] == 82 + assert payload["rollups"]["approval_required_total"] == 61 + assert payload["rollups"]["blocker_total"] == 54 + assert payload["rollups"]["live_write_count"] == 0 + assert payload["rollups"]["telegram_send_count"] == 0 + assert payload["rollups"]["bot_api_call_count"] == 0 + assert payload["rollups"]["production_write_count"] == 0 + assert payload["telegram_contract"]["direct_send_allowed"] is False + assert payload["display_redaction_contract"]["conversation_transcript_display_allowed"] is False diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index 96d6b995..1c4a65ec 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -6293,6 +6293,44 @@ "telegramSendAllowed": "Telegram 發送允許={value}" } }, + "warRoom": { + "title": "12-Agent War Room 作戰室", + "source": "產生 {generated};目前 {current};下一步 {next}", + "runtime": "runtime={value}", + "coordinationTitle": "協作模型", + "telegramTitle": "Telegram / 報告邊界", + "rolesTitle": "12 位 Agent 工作狀態", + "metrics": { + "overall": "產品化進度", + "reviews": "只讀回饋", + "batches": "分批上限", + "workUnits": "工作量", + "approvals": "需批准", + "blockers": "阻擋項", + "market": "市場候選", + "liveWrites": "live / send / write" + }, + "labels": { + "arbiter": "仲裁={value}", + "memory": "記憶={value}", + "replay": "回放={value}", + "reports": "報告 cadence={value}", + "directSend": "Telegram 直送={value}", + "botApi": "Bot API={value}", + "dedup": "去重={value}", + "redaction": "脫敏鎖定={value}", + "workUnits": "工作={value}", + "approvals": "批准={value}", + "blockers": "阻擋={value}", + "liveWrites": "live={value}", + "telegramSends": "TG={value}" + }, + "report": { + "daily": "日報", + "weekly": "週報", + "monthly": "月報" + } + }, "resultCaptureReleaseVerifierOwnerReviewPacket": { "title": "P2-137 釋出驗證器負責人審查包", "source": "產生 {generated};目前 {current};下一步 {next}", diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index 96d6b995..1c4a65ec 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -6293,6 +6293,44 @@ "telegramSendAllowed": "Telegram 發送允許={value}" } }, + "warRoom": { + "title": "12-Agent War Room 作戰室", + "source": "產生 {generated};目前 {current};下一步 {next}", + "runtime": "runtime={value}", + "coordinationTitle": "協作模型", + "telegramTitle": "Telegram / 報告邊界", + "rolesTitle": "12 位 Agent 工作狀態", + "metrics": { + "overall": "產品化進度", + "reviews": "只讀回饋", + "batches": "分批上限", + "workUnits": "工作量", + "approvals": "需批准", + "blockers": "阻擋項", + "market": "市場候選", + "liveWrites": "live / send / write" + }, + "labels": { + "arbiter": "仲裁={value}", + "memory": "記憶={value}", + "replay": "回放={value}", + "reports": "報告 cadence={value}", + "directSend": "Telegram 直送={value}", + "botApi": "Bot API={value}", + "dedup": "去重={value}", + "redaction": "脫敏鎖定={value}", + "workUnits": "工作={value}", + "approvals": "批准={value}", + "blockers": "阻擋={value}", + "liveWrites": "live={value}", + "telegramSends": "TG={value}" + }, + "report": { + "daily": "日報", + "weekly": "週報", + "monthly": "月報" + } + }, "resultCaptureReleaseVerifierOwnerReviewPacket": { "title": "P2-137 釋出驗證器負責人審查包", "source": "產生 {generated};目前 {current};下一步 {next}", diff --git a/apps/web/src/app/[locale]/governance/tabs/automation-inventory-tab.tsx b/apps/web/src/app/[locale]/governance/tabs/automation-inventory-tab.tsx index b5058200..cf44c616 100644 --- a/apps/web/src/app/[locale]/governance/tabs/automation-inventory-tab.tsx +++ b/apps/web/src/app/[locale]/governance/tabs/automation-inventory-tab.tsx @@ -39,6 +39,7 @@ import { StatusOrb } from '@/components/ui/status-orb' import { AgentActivityConstellation } from '@/components/governance/agent-activity-constellation' import { apiClient, + type AiAgent12AgentWarRoomSnapshot, type AiAgentCandidateOperationDryRunEvidenceSnapshot, type AiAgentCriticReviewerResultCaptureSnapshot, type AiAgentDeploymentLayoutSnapshot, @@ -457,6 +458,7 @@ export function AutomationInventoryTab() { const [observabilityMatrix, setObservabilityMatrix] = useState(null) const [providerRouteMatrix, setProviderRouteMatrix] = useState(null) const [deploymentLayout, setDeploymentLayout] = useState(null) + const [warRoom, setWarRoom] = useState(null) const [proactiveOperations, setProactiveOperations] = useState(null) const [interactionLearningProof, setInteractionLearningProof] = useState(null) const [liveReadModelGate, setLiveReadModelGate] = useState(null) @@ -535,6 +537,7 @@ export function AutomationInventoryTab() { apiClient.getObservabilityContractMatrix(), apiClient.getAiProviderRouteMatrix(), apiClient.getAiAgentDeploymentLayout(), + apiClient.getAiAgent12AgentWarRoom(), apiClient.getAiAgentProactiveOperationsContract(), apiClient.getAiAgentInteractionLearningProof(), apiClient.getAiAgentLiveReadModelGate(), @@ -612,6 +615,7 @@ export function AutomationInventoryTab() { observabilityMatrixResult, providerRouteMatrixResult, deploymentLayoutResult, + warRoomResult, proactiveOperationsResult, interactionLearningProofResult, liveReadModelGateResult, @@ -686,6 +690,7 @@ export function AutomationInventoryTab() { setObservabilityMatrix(observabilityMatrixResult.status === 'fulfilled' ? observabilityMatrixResult.value : null) setProviderRouteMatrix(providerRouteMatrixResult.status === 'fulfilled' ? providerRouteMatrixResult.value : null) setDeploymentLayout(deploymentLayoutResult.status === 'fulfilled' ? deploymentLayoutResult.value : null) + setWarRoom(warRoomResult.status === 'fulfilled' ? warRoomResult.value : null) setProactiveOperations(proactiveOperationsResult.status === 'fulfilled' ? proactiveOperationsResult.value : null) setInteractionLearningProof(interactionLearningProofResult.status === 'fulfilled' ? interactionLearningProofResult.value : null) setLiveReadModelGate(liveReadModelGateResult.status === 'fulfilled' ? liveReadModelGateResult.value : null) @@ -758,6 +763,7 @@ export function AutomationInventoryTab() { observabilityMatrixResult, providerRouteMatrixResult, deploymentLayoutResult, + warRoomResult, proactiveOperationsResult, interactionLearningProofResult, liveReadModelGateResult, @@ -883,6 +889,19 @@ export function AutomationInventoryTab() { .slice(0, 12) }, [deploymentLayout]) + const visibleWarRoomRoles = useMemo(() => { + if (!warRoom) return [] + const riskPriority = { critical: 0, high: 1, medium: 2, low: 3 } as Record + return [...warRoom.agent_roles] + .sort((a, b) => { + const riskLeft = riskPriority[a.risk_tier] ?? 4 + const riskRight = riskPriority[b.risk_tier] ?? 4 + if (riskLeft !== riskRight) return riskLeft - riskRight + return a.agent_id.localeCompare(b.agent_id) + }) + .slice(0, 12) + }, [warRoom]) + const visibleDelegableCapabilities = useMemo(() => { if (!proactiveOperations) return [] const riskPriority = { critical: 0, high: 1, medium: 2, low: 3 } as Record @@ -2056,7 +2075,7 @@ export function AutomationInventoryTab() { ) } - if (error || !snapshot || !backlog || !backupTargets || !backupReadiness || !backupPolicy || !offsiteEscrow || !giteaHealth || !observabilityMatrix || !providerRouteMatrix || !deploymentLayout || !proactiveOperations || !interactionLearningProof || !liveReadModelGate || !redisDryRunGate || !learningWritebackPackage || !telegramReceiptPackage || !ownerApprovedLearningDryRun || !runtimeWriteGateReview || !postWriteVerifierPackage || !runtimeVerifierEvidenceReview || !reportAutomationReview || !reportStatusBoard || !reportRuntimeReadiness || !reportRuntimeDryRun || !reportRuntimeFixtureReadback || !runtimeWorkerShadowGate || !operationPermissionModel || !candidateOperationDryRunEvidence || !taskResultAuditTrail || !matchedPlaybookLearningGap || !criticReviewerResultCapture || !ownerApprovedResultCaptureDryRun || !ownerApprovedResultCaptureReadback || !runtimeReadbackApprovalPackage || !runtimeReadbackImplementationReview || !reportLiveDeliveryApprovalPackage || !runtimeReadbackFixtureApproval || !runtimeReadbackPromotionGate || !ownerApprovedFixturePromotionGate || !canonicalRuntimeReadbackOwnerAcceptance || !failureReceiptNoSendReplay || !reviewerQueueNoWriteReadback || !resultCaptureNoWriteReadback || !resultCapturePromotionApprovalGate || !ownerApprovedResultCapturePromotionDryRun || !resultCaptureWriteGateReview || !resultCaptureWriterImplementationReview || !resultCaptureWriterDryRunFixture || !resultCaptureWriterDryRunReadback || !resultCaptureOwnerPromotionReview || !resultCaptureOwnerApprovedExecutionRehearsal || !resultCaptureOwnerAcceptanceMaintenanceGate || !resultCaptureOwnerAcceptanceReadbackPreflightHold || !resultCaptureOwnerApprovedPreflightReleasePackage || !resultCaptureOwnerApprovedReleaseReadinessReadback || !resultCaptureOwnerReleaseApprovalGate || !resultCapturePostReleaseVerifierRollbackGate || !resultCaptureFinalReleaseCandidateReadback || !resultCaptureReleaseAuthorizationHold || !resultCaptureReleaseAuthorizationReadbackGate || !resultCaptureReleaseVerifierPreflightGate || !resultCaptureReleaseVerifierOwnerReviewPacket || !resultCaptureReleaseDecisionHold || !resultCaptureReleaseDecisionReadback || !resultCaptureReleaseDecisionNextHandoff || !resultCaptureReleaseDecisionInputPrep || !reportTruthActionabilityReview || !ownerDryRunPackage || !hostStatefulInventory || !serviceHealthGapMatrix || !serviceHealthNotificationPolicy) { + if (error || !snapshot || !backlog || !backupTargets || !backupReadiness || !backupPolicy || !offsiteEscrow || !giteaHealth || !observabilityMatrix || !providerRouteMatrix || !deploymentLayout || !warRoom || !proactiveOperations || !interactionLearningProof || !liveReadModelGate || !redisDryRunGate || !learningWritebackPackage || !telegramReceiptPackage || !ownerApprovedLearningDryRun || !runtimeWriteGateReview || !postWriteVerifierPackage || !runtimeVerifierEvidenceReview || !reportAutomationReview || !reportStatusBoard || !reportRuntimeReadiness || !reportRuntimeDryRun || !reportRuntimeFixtureReadback || !runtimeWorkerShadowGate || !operationPermissionModel || !candidateOperationDryRunEvidence || !taskResultAuditTrail || !matchedPlaybookLearningGap || !criticReviewerResultCapture || !ownerApprovedResultCaptureDryRun || !ownerApprovedResultCaptureReadback || !runtimeReadbackApprovalPackage || !runtimeReadbackImplementationReview || !reportLiveDeliveryApprovalPackage || !runtimeReadbackFixtureApproval || !runtimeReadbackPromotionGate || !ownerApprovedFixturePromotionGate || !canonicalRuntimeReadbackOwnerAcceptance || !failureReceiptNoSendReplay || !reviewerQueueNoWriteReadback || !resultCaptureNoWriteReadback || !resultCapturePromotionApprovalGate || !ownerApprovedResultCapturePromotionDryRun || !resultCaptureWriteGateReview || !resultCaptureWriterImplementationReview || !resultCaptureWriterDryRunFixture || !resultCaptureWriterDryRunReadback || !resultCaptureOwnerPromotionReview || !resultCaptureOwnerApprovedExecutionRehearsal || !resultCaptureOwnerAcceptanceMaintenanceGate || !resultCaptureOwnerAcceptanceReadbackPreflightHold || !resultCaptureOwnerApprovedPreflightReleasePackage || !resultCaptureOwnerApprovedReleaseReadinessReadback || !resultCaptureOwnerReleaseApprovalGate || !resultCapturePostReleaseVerifierRollbackGate || !resultCaptureFinalReleaseCandidateReadback || !resultCaptureReleaseAuthorizationHold || !resultCaptureReleaseAuthorizationReadbackGate || !resultCaptureReleaseVerifierPreflightGate || !resultCaptureReleaseVerifierOwnerReviewPacket || !resultCaptureReleaseDecisionHold || !resultCaptureReleaseDecisionReadback || !resultCaptureReleaseDecisionNextHandoff || !resultCaptureReleaseDecisionInputPrep || !reportTruthActionabilityReview || !ownerDryRunPackage || !hostStatefulInventory || !serviceHealthGapMatrix || !serviceHealthNotificationPolicy) { return (
@@ -3519,6 +3538,34 @@ export function AutomationInventoryTab() { serviceHealthNotificationPolicy.display_redaction_contract.conversation_transcript_display_allowed === false && serviceHealthNotificationPolicy.display_redaction_contract.redaction_required === true ) + const warRoomOverall = warRoom.program_status.overall_completion_percent + const warRoomCompletedReviews = warRoom.rollups.read_only_review_completed_count + const warRoomRoleCount = warRoom.rollups.agent_role_count + const warRoomBatchCount = warRoom.rollups.subagent_batch_count + const warRoomBatchLimit = warRoom.rollups.subagent_batch_limit + const warRoomWorkUnits = warRoom.rollups.total_work_units + const warRoomApprovals = warRoom.rollups.approval_required_total + const warRoomBlockers = warRoom.rollups.blocker_total + const warRoomMarketCandidates = warRoom.rollups.market_refresh_candidate_count + const warRoomReportCadences = ( + warRoom.rollups.daily_report_required_count + + warRoom.rollups.weekly_report_required_count + + warRoom.rollups.monthly_report_required_count + ) + const warRoomLiveWrites = ( + warRoom.rollups.live_write_count + + warRoom.rollups.telegram_send_count + + warRoom.rollups.bot_api_call_count + + warRoom.rollups.production_write_count + + warRoom.rollups.paid_api_call_count + + warRoom.rollups.sdk_install_count + + warRoom.rollups.secret_read_count + + warRoom.rollups.destructive_operation_count + ) + const warRoomRedactionLocked = ( + warRoom.display_redaction_contract.conversation_transcript_display_allowed === false + && warRoom.display_redaction_contract.redaction_required === true + ) const backlogProgressPercent = backlog.progress_summary.overall_percent const explicitApprovalItemCount = backlog.item_approval_boundary_rollup.items_requiring_explicit_approval.length const taskBoundaryCount = snapshot.task_approval_boundary_rollup.total_tasks @@ -3788,6 +3835,123 @@ export function AutomationInventoryTab() { ]} /> + +
+
+
+ + + {t('warRoom.title')} + +
+
+ + +
+
+ +

+ {warRoom.program_status.status_note} +

+ +
+ } /> + } /> + } /> + } /> + } /> + } /> + } /> + } /> +
+ +
+
+ {t('warRoom.coordinationTitle')} +

+ {warRoom.coordination_model.summary} +

+
+ + + + +
+
+ +
+ {t('warRoom.telegramTitle')} +

+ {warRoom.telegram_contract.summary} +

+
+ + + + +
+
+
+ + {t('warRoom.rolesTitle')} +
+ {visibleWarRoomRoles.map(role => { + const roleTone = role.risk_tier === 'critical' ? 'danger' : role.risk_tier === 'high' ? 'warn' : 'neutral' + const roleColor = toneColor(roleTone) + return ( +
+
+ + {role.display_name} + + +
+ + {role.war_room_role} + +
+ + + + + + +
+
+ ) + })} +
+ +
+ + + +
+
+
+
diff --git a/apps/web/src/lib/api-client.ts b/apps/web/src/lib/api-client.ts index 43dfc722..44028894 100644 --- a/apps/web/src/lib/api-client.ts +++ b/apps/web/src/lib/api-client.ts @@ -329,6 +329,11 @@ export const apiClient = { return handleResponse(res) }, + async getAiAgent12AgentWarRoom() { + const res = await fetch(`${API_BASE_URL}/agents/agent-12-agent-war-room`) + return handleResponse(res) + }, + async getAiAgentProactiveOperationsContract() { const res = await fetch(`${API_BASE_URL}/agents/agent-proactive-operations-contract`) return handleResponse(res) @@ -1284,6 +1289,120 @@ export interface AiAgentDeploymentLayoutSnapshot { > } +export interface AiAgent12AgentWarRoomSnapshot { + schema_version: 'ai_agent_12_agent_war_room_v1' + generated_at: string + program_status: { + overall_completion_percent: number + current_priority: 'P0' | 'P1' | 'P2' | 'P3' + current_task_id: 'P2-142' + next_task_id: 'P2-143' + read_only_mode: true + runtime_authority: '12_agent_war_room_read_only_no_live_write' + status_note: string + } + source_refs: string[] + coordination_model: { + logical_agent_count: number + subagent_batch_limit: number + subagent_batch_count: number + coordination_mode: string + arbiter: string + memory_owner: string + replay_owner: string + gateway_owner: string + summary: string + } + agent_roles: Array<{ + agent_id: string + display_name: string + primary_agent: string + war_room_role: string + review_status: string + risk_tier: 'low' | 'medium' | 'high' | 'critical' + work_units: number + evidence_items: number + blocker_count: number + approval_required_count: number + live_write_count: number + telegram_send_count: number + bot_api_call_count: number + next_action: string + }> + workload_summary: { + total_work_units: number + total_evidence_items: number + status_report_visible: boolean + chart_ready: boolean + daily_report_required: boolean + weekly_report_required: boolean + monthly_report_required: boolean + standard_agent_metrics: string[] + } + risk_policy: { + low_risk_auto_handle: string + medium_risk_auto_handle: string + high_risk_requires_approval: string + openclaw_gate_required: boolean + qa_verifier_required: boolean + security_gate_required: boolean + } + reporting_contract: Record<'daily' | 'weekly' | 'monthly', { + required: boolean + delivery_mode: string + sections: string[] + }> + market_watch_contract: { + cadence: string + auto_collect_allowed: string[] + approval_required: string[] + p0_refresh_candidates: string[] + candidate_package_required_fields: string[] + } + telegram_contract: { + direct_send_allowed: boolean + bot_api_call_allowed: boolean + success_immediate_send_allowed: boolean + action_required_digest_allowed_after_approval: boolean + failure_only_escalation: boolean + dedup_required: boolean + receipt_required: boolean + forbidden_actions: string[] + summary: string + } + display_redaction_contract: { + redaction_required: boolean + conversation_transcript_display_allowed: boolean + raw_prompt_display_allowed: boolean + private_reasoning_display_allowed: boolean + secret_value_display_allowed: boolean + raw_runtime_payload_display_allowed: boolean + frontend_display_policy: string + } + rollups: { + agent_role_count: number + read_only_review_completed_count: number + subagent_batch_limit: number + subagent_batch_count: number + approval_required_total: number + blocker_total: number + total_work_units: number + total_evidence_items: number + daily_report_required_count: number + weekly_report_required_count: number + monthly_report_required_count: number + market_refresh_candidate_count: number + live_write_count: number + telegram_send_count: number + bot_api_call_count: number + production_write_count: number + paid_api_call_count: number + sdk_install_count: number + secret_read_count: number + destructive_operation_count: number + } +} + export interface AiAgentProactiveOperationsContractSnapshot { schema_version: 'ai_agent_proactive_operations_contract_v1' generated_at: string diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index a98418cb..58887b09 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,32 @@ +## 2026-06-14|P2-142 12-Agent War Room 本地完成 + +**背景**:統帥批准以 12 位 Agent 一起推進工作;本輪把 12 個邏輯工位與兩批次只讀審查產品化,讓治理頁能讀回每位 Agent 的工作量、風險層級、阻擋項、需批准數、報告 cadence、Telegram 邊界與 redaction 狀態。此段不代表 runtime writer、Telegram send、Bot API、production write、SDK 安裝或付費 API 已開啟。 + +**完成項目**: +- 新增 `ai_agent_12_agent_war_room_v1` schema、committed snapshot、loader guard、API endpoint `GET /api/v1/agents/agent-12-agent-war-room`、API/service tests 與治理頁 12-Agent War Room 區塊。 +- 12 個邏輯工位已全部收斂:OpenClaw、Hermes、NemoTron、SRE、Security、DevOps、Data/DR、Supply Chain、Product/UI、QA、Market Scout、Telegram Ops。 +- War Room snapshot 固定 12 份 read-only review、總工作量 `82`、evidence `84`、需批准 `61`、阻擋項 `54`、市場 refresh candidate `5`、日週月報 cadence `3`。 +- Governance automation inventory 頁新增 12-Agent War Room 卡片,顯示產品化進度 `72%`、只讀回饋 `12/12`、分批上限 `2×6`、live / send / write `0`,且不顯示本工作視窗對話內容。 +- 更新 `docs/ai/AI_AGENT_12_AGENT_WAR_ROOM_2026-06-14.md` 與 `docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md`。 + +**本地驗證**: +- JSON parse:P2-142 schema / snapshot、`zh-TW.json`、`en.json` 通過。 +- Python 編譯:P2-142 loader、`agents.py` 通過。 +- API/service pytest:P2-142 `7 passed`。 +- i18n key / placeholder / war-room mirror:`keys_zh=11894`、`keys_en=11894`、missing `0 / 0`、placeholder diff `0`、War Room 鏡像差異 `0`。 +- Web typecheck:`pnpm --filter @awoooi/web typecheck` 通過。 +- Web production build:`NEXT_PUBLIC_API_URL=https://awoooi.wooo.work NEXT_PRIVATE_BUILD_WORKER_COUNT=1 SENTRY_SUPPRESS_GLOBAL_ERROR_HANDLER_FILE_WARNING=1 pnpm --filter @awoooi/web build` 通過,`/zh-TW/governance` 進 build summary。 +- Guard:`SOURCE_CONTROL_OWNER_RESPONSE_GUARD_OK`、`SECURITY_MIRROR_PROGRESS_GUARD_OK`、`DOC_SECRET_SANITY_OK scanned_files=805`、`git diff --check` 通過。 +- 公開 payload 目標掃描:`批准!繼續`、`In app browser`、`My request for Codex`、`source_thread_id` 命中 `0`;role count `12`、completion `72`。 + +**安全邊界**: +- P2-142 仍是 read-only War Room readback;不得把 12-Agent 可見、UI 進度、測試通過或報告 cadence 解讀成 runtime 授權。 +- runtime writer、Telegram send、Bot API call、Gateway queue write、reviewer queue write、production write、paid API call、SDK install、secret read、host update、kubectl / ArgoCD / Nginx / DB / restore / destructive operation 仍全部維持 `0 / false`。 + +**下一步**: +- 推送 feature commit,等待 Gitea CD;deploy marker 生效後執行 production API readback 與 desktop / mobile Browser smoke。 +- 正式驗證通過後,`P2-143` 承接 report receipt / 月報 / Agent 工作量 runtime data model,仍不得直接開啟 writer 或 Telegram 實發。 + ## 2026-06-14|P2-141 S4.9 owner 欄位補強正式驗證完成 **背景**:P2-141 release decision input prep 基線已由 `ee5bf500` / deploy marker `306657fd` 正式驗證;S4.9 owner 欄位補強 commit `77515bbe fix(governance): 補齊 P2-141 S4.9 owner 欄位` 已由 deploy marker `a1ad68b9 chore(cd): deploy 77515bb [skip ci]` 部署到正式站。本段只補齊 owner release 決策輸入,不新增第二套入口、不開 runtime gate、不寫 reviewer / Gateway queue、不送 Telegram、不呼叫 Bot API、不寫 result capture、learning、PlayBook trust 或 production target。 @@ -338,6 +367,31 @@ - 可以宣稱 P2-136 / AI Agent 活動正式 deploy 後 recovery readback 無服務回歸;所有 public route / DB parity / backup core / K3s placement / host failed-unit gate 均維持可用。 - 不能宣稱 full cold-start green、ArgoCD Healthy 或 DR complete;下一個真正完成 gate 仍是下一次官方 03:00 `km-vectorize` 成功更新 `lastSuccessfulTime`,以及 5 個 credential escrow non-secret evidence marker。 +## 2026-06-14|12-Agent War Room 編組啟動 + +**背景**:統帥詢問是否應安排 12 位 Agent 一起推進工作。本輪建立 12 個邏輯工位與分批派工規則,避免多 Agent 直接搶 production writer 或 Telegram send。 + +**完成內容**: +- 新增並更新 `docs/ai/AI_AGENT_12_AGENT_WAR_ROOM_2026-06-14.md`,定義 OpenClaw、Hermes、NemoTron、SRE、Security、DevOps、Data/DR、Supply Chain、Product/UI、QA、Market Scout、Telegram Ops 十二個工位。 +- Codex sub-agent 工具層已確認同時執行上限約 6 位;本輪採分批接力,12 位邏輯 Agent 的只讀審查已全部回收並彙整。 +- 新增 `ai_agent_12_agent_war_room_v1` schema、committed snapshot、loader guard、API endpoint `GET /api/v1/agents/agent-12-agent-war-room`、API/service tests、治理頁 12-Agent War Room 區塊與 i18n 文案。 +- War Room snapshot 固定 12 個 Agent role、12 份 read-only review、總工作量 `82`、evidence `84`、需批准 `61`、阻擋項 `54`、市場 refresh candidate `5`、日週月報 cadence `3`,且 live write / Telegram send / Bot API / production write / paid API / SDK install / secret read / destructive operation 全部 `0`。 +- 更新 `docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md`,將「12-Agent War Room 編組」完成度更新為 `72%`。 + +**關鍵裁決**: +- OpenClaw 仍是 production baseline 與最終仲裁者;其他 Agent 的輸出只能作為 evidence / expert suggestion。 +- Hermes 定位為記憶 / RAG 治理官,不是第二套修復決策核心;不得保存 raw token、完整 callback payload 或工作視窗內容。 +- NemoTron 定位為離線專家 / Agent Fabric 與模型評測器;可做 sanitized smoke、replay 評分、模型比較,但不得自行進 full replay、shadow/canary、production routing 或 paid API。 +- Security 定位為 `vuln-verifier` / Security Gatekeeper;IwoooS 仍是只讀 evidence + owner response gate + redaction / public bundle governance,不得因 UI 或 guard pass 開 runtime。 +- SRE / DevOps / Data/DR / Supply Chain / Product/UI / QA / Market / Telegram 已補齊對應 gate:service green 不等於 DR complete、Gitea CD 不等於 runtime/security acceptance、Market Scout 只能產候選批准包、Telegram Ops 只做低噪音 digest / receipt,不取代 runtime 授權。 + +**安全邊界**: +- 本輪只做文件、schema、snapshot、API、治理頁與只讀 sub-agent 審查;不改 runtime、不開 writer、不發 Telegram、不呼叫 Bot API、不讀 secret、不做 kubectl / ArgoCD / host / Nginx / firewall 操作。 + +**下一步**: +- 執行本地 pytest / typecheck / build / guard;推送 Gitea CD 後,做 production API readback 與 desktop / mobile Browser smoke。 +- P2-143 承接 report receipt / 月報 / Agent 工作量 runtime data model;runtime writer、Gateway queue、Telegram send、Bot API、production write 仍不得開啟。 + ## 2026-06-14|AI Agent 活動動畫本地完成 **背景**:統帥要求在相關治理頁加入 AI Agent 動畫,讓使用者能直覺看見 OpenClaw、Hermes、NemoTron 正在分工、溝通與產生治理證據;同時不得把工作視窗對話內容顯示到前端頁面。 diff --git a/docs/ai/AI_AGENT_12_AGENT_WAR_ROOM_2026-06-14.md b/docs/ai/AI_AGENT_12_AGENT_WAR_ROOM_2026-06-14.md new file mode 100644 index 00000000..710652cc --- /dev/null +++ b/docs/ai/AI_AGENT_12_AGENT_WAR_ROOM_2026-06-14.md @@ -0,0 +1,188 @@ +# AI Agent 12-Agent War Room 編組與推進規則 + +> 日期:2026-06-14(台北時間) +> 文件定位:12 位 AI Agent 的協作分工、輸入輸出、批准邊界與推進節奏。 +> 事實邊界:本文件是治理編組與推進規則,不代表 runtime writer、Telegram send、Bot API、付費 API、SDK 安裝、host update、kubectl apply、ArgoCD sync、rollback、Nginx reload 或任何 production write 已開啟。 + +## 1. 結論 + +要安排 12 位 Agent 一起推進工作,但必須用「分工 + 仲裁 + 證據 + 批准」的方式,而不是 12 位同時直接操作 production。 + +第一階段採用 **12 個邏輯工位、6 位 Codex sub-agent 分批執行**: + +- 產品治理上顯示 12 位 Agent 工位。 +- Codex 工具層若同時執行上限為 6 位,則採第一批 6 位、第二批 6 位接力。 +- 所有 Agent 先做只讀分析、互評、批准包、報告與驗證。 +- 中低風險可在未來由 policy 自動處理並回報;高風險仍必須由統帥批准。 +- OpenClaw 仍是仲裁 / HITL gate;Hermes 負責記憶、RAG、報告與知識寫回;NemoTron 負責離線回放、執行建議、模型/工具比較與優化候選。 + +## 2. 12 位 Agent 工位 + +| # | 工位 | 主責 Agent | 核心任務 | 主要輸入 | 主要輸出 | 自動化邊界 | +|---:|---|---|---|---|---|---| +| 1 | 仲裁與風險裁決 | OpenClaw | 高風險 gate、HITL、替換/升級仲裁、禁止事項判斷 | 市場證據、回放結果、runtime gate、批准包 | 仲裁決議、阻擋理由、下一關卡 | 不被其他 Agent 取代;不得無證據放行 | +| 2 | 記憶與 RAG | Hermes | LOGBOOK、KM、RAG chunk、報告與學習寫回設計 | 任務結果、驗證證據、報表、runbook | 記憶摘要、RAG 索引計畫、日週月報 | 不寫 secret、不存原始工作視窗內容 | +| 3 | 回放與執行專家 | NemoTron | fixture 回放、模型比較、prompt/工具優化候選 | smoke fixture、scorecard、市場版本 | 回放評分、優化建議、候選批准包 | 不直接切 production route、不呼叫付費 API | +| 4 | SRE 與觀測性 | SRE Sentinel | service health、SLO、降噪、冷啟動、runner / workflow 健康 | health API、Prometheus、SigNoz、cold-start、Gitea Actions | 健康摘要、告警降噪提案、SLO 報告 | 不重啟服務、不建立 silence、不修改 alert rule | +| 5 | 安全與 IwoooS | Security Sentinel | secret redaction、public bundle leak、source-control owner response、IwoooS gates | source scan、guard 結果、public route、security docs | 安全阻擋、脫敏提案、owner request | 不讀 secret value、不執行 active scan、不修改 firewall | +| 6 | DevOps / CD / K8s | DevOps Commander | Gitea CD、ArgoCD、K8s manifest、deploy marker、rollback 包 | git refs、CD run、kustomization、ArgoCD readback | deploy readback、rollback plan、release gate | 不 kubectl apply、不 ArgoCD sync、不 force push | +| 7 | Data / Backup / DR | Data DR Guardian | backup freshness、offsite escrow、restore drill、cold-start recovery | backup-status、DR scorecard、runbooks、escrow evidence | DR readiness、restore drill 批准包 | 不做 restore、不讀 credential、不寫 escrow marker | +| 8 | Supply Chain / 版本更新 | Supply Chain Scout | Python / JS / Docker / OS / AI Agent 版本、CVE、license、registry freshness | lockfiles、image tag、market watch、OSV / Trivy / Syft 計畫 | 升級批准包、版本漂移報告 | 不安裝 SDK、不升級套件、不拉新 image | +| 9 | Product / UI / 可視化 | Product UI Curator | governance UI、Agent 動畫、工作狀態可視化、mobile overflow | snapshot API、i18n、browser smoke、設計規則 | UI patch、視覺驗證、截圖證據 | 不顯示工作視窗對話、不做無關 redesign | +| 10 | QA / Verifier | QA Verifier | pytest、typecheck、build、browser smoke、console / overflow / forbidden text | 測試命令、production URL、DOM 檢查 | 驗證報告、阻擋項、截圖證據 | 不改 production、不用 mock 宣稱完成 | +| 11 | Market Scout | Agent Market Scout | 主流 AI Agent / SDK / 模型版本監測、benchmark 與新候選發現 | 官方 release、primary sources、scorecard | 市場觀察、候選清單、integration review | 不自動替換、不批准 shadow/canary | +| 12 | Telegram / 報告作戰室 | Telegram Ops Liaison | 日報/週報/月報、action-required digest、告警路由、回執模型 | Agent 工作量、風險、審核狀態、Gateway policy | 報告摘要、Telegram digest 草案、回執 gate | 不直接 Bot API send、不成功洗版 | + +## 3. 工作流 + +```text +1. Market Scout / Supply Chain Scout 定期發現版本與候選。 +2. Security Sentinel / SRE Sentinel / Data DR Guardian 產出只讀風險證據。 +3. NemoTron 針對低風險 fixture 做離線回放與優化建議。 +4. Hermes 把證據寫成可追溯報告、RAG chunk 與 LOGBOOK 摘要。 +5. QA Verifier 做測試、browser smoke 與 forbidden text 檢查。 +6. OpenClaw 對高風險項目仲裁:放行、要求補證據、阻擋或升級統帥審核。 +7. DevOps Commander 只在批准後產 deploy / rollback / release gate。 +8. Telegram Ops Liaison 只發 action-required digest;高風險等待統帥批准。 +``` + +## 4. 風險分層 + +| 風險層級 | 例子 | 12-Agent 處理方式 | +|---|---|---| +| 低風險 | 文件同步、只讀報告、UI 顯示、測試報告 | Agent 可自動完成,完成後回報 | +| 中風險 | 套件升級 PR 草案、告警降噪提案、非 production config 建議 | Agent 可產 patch / PR 草案;套用前至少 QA + OpenClaw gate | +| 高風險 | production write、kubectl apply、ArgoCD sync、Telegram 實發、付費 API、SDK 安裝、rollback、host update | 必須統帥批准;OpenClaw 仲裁;QA / SRE / Security / DevOps 交叉簽核 | + +## 5. Telegram 對接 + +Telegram 不應該讓 12 位 Agent 各自直接發送。統一走 `Telegram Ops Liaison` + Gateway policy: + +- 只發 action-required、failure、approval-required、release decision、DR blocker。 +- 成功訊息預設不洗版,除非是統帥指定的 release closeout。 +- 高風險訊息必須包含:風險層級、負責 Agent、證據連結、禁止事項、需要統帥批准的明確選項。 +- 中低風險自動處理後,只發彙總,不直接刷頻。 + +## 6. RAG / MCP / 智慧成長 + +| 類別 | 用途 | 首批用途 | +|---|---|---| +| RAG:LOGBOOK chunks | 累積事件、決策、驗證與阻擋理由 | Hermes 建立每日摘要與向量索引候選 | +| RAG:Runbook / ADR chunks | 讓 Agent 先查規則再提案 | OpenClaw / DevOps / Security 查 gate | +| RAG:Incident / alert traces | 把故障調查變成可重用 playbook | SRE + Hermes 產降噪與修復建議 | +| MCP:Git / Gitea | 讀 refs、PR、workflow、deploy marker | DevOps / QA 只讀驗證 | +| MCP:Observability | 讀 metrics、trace、alert 狀態 | SRE 只讀 | +| MCP:Package / registry scanner | 讀 CVE、license、版本新鮮度 | Supply Chain 只讀 | +| MCP:Telegram Gateway | 發送批准後 digest / receipt | Telegram Ops 受控發送 | +| MCP:Browser | 驗證 UI、DOM、截圖、console | QA / Product UI | + +## 7. 派工狀態 + +| 批次 | Agent | 狀態 | 說明 | +|---|---|---|---| +| 第一批 | OpenClaw 仲裁 | 已完成只讀審查 | 仲裁、HITL、高風險 gate、action permission matrix | +| 第一批 | Hermes 記憶/RAG | 已完成只讀審查 | 記憶、RAG、報告、KM / owner review 草稿 | +| 第一批 | NemoTron 回放執行 | 已完成只讀審查 | 離線 smoke、replay、模型比較、候選批准包 | +| 第一批 | SRE / 觀測性 | 已完成只讀審查 | service health、SLO、Alertmanager、runner、cold-start | +| 第一批 | Security / IwoooS | 已完成只讀審查 | redaction、public bundle、owner response、runtime gate | +| 第一批 | DevOps / CD / K8s | 已完成只讀審查 | Gitea CD、ArgoCD、K8s、NetworkPolicy、migration gate | +| 第二批 | Data / Backup / DR | 已完成只讀審查 | backup、offsite、credential escrow、restore drill | +| 第二批 | Supply Chain / 版本更新 | 已完成只讀審查 | 套件、CVE、Docker、AI Agent 市場、升級批准包 | +| 第二批 | Product / UI | 已完成只讀審查 | governance UI、Agent 動畫、redaction、mobile smoke | +| 第二批 | QA / Verifier | 已完成只讀審查 | pytest、typecheck、build、production smoke、forbidden text | +| 第三批 | Market Scout | 已完成只讀審查 | 主流 AI Agent、市場 benchmark、候選 refresh | +| 第三批 | Telegram Ops | 已完成只讀審查 | 日報/週報/月報、report receipt、低噪音通知 | + +## 8. 下一步 + +1. 完成 `ai_agent_12_agent_war_room_v1` schema / snapshot / API,讓治理頁可讀取 12 工位狀態。 +2. 在 `/zh-TW/governance?tab=automation-inventory` 新增 12-Agent War Room 可視化區塊。 +3. 將日報 / 週報 / 月報補上每位 Agent 的工作量、阻擋項、風險層級與自動處理量。 +4. 把高風險批准流程與 Telegram action-required digest 接到同一個審核模型。 +5. 後續 P2-143 承接 report receipt / 月報 / Agent 工作量 runtime data model;runtime writer、Telegram send、Bot API、production write 仍待 gate。 + +## 9. 12 位 Agent 只讀審查回饋 + +### 9.1 OpenClaw 仲裁審查 + +OpenClaw 工位必須保留為 production baseline 與最終仲裁者。其他 Agent 可以用 market-mainstream 證據、AWOOOI replay、shadow / canary 實測挑戰既有分工,但不能直接替換、降級或繞過 OpenClaw。OpenClaw 也應維護 action permission matrix,負責把 read-only、prepare-only、需 OpenClaw 仲裁、需人工/費用/依賴批准、blocked 分清楚。 + +OpenClaw 的輸入是 Hermes 的證據彙整、NemoTron 的 sanitized offline 評分、SRE / Security / DevOps / QA 的驗證結果;輸出是 gate verdict、blocked reason、next action 與是否需要統帥批准。禁止把 UI 可見、snapshot 進度、AwoooP approval、smoke pass 或 projection run 當成 runtime 授權。 + +### 9.2 Hermes 記憶/RAG 審查 + +Hermes 工位應定位為記憶 / RAG 治理官,不是第二套修復決策核心。OpenClaw 仍主導診斷、分類、風險、PlayBook / KM / RAG / MCP 融合與「能不能自動化修復」;Hermes 主導 channel delivery、Operator UI、Telegram / AwoooP / 前端階段呈現、callback 狀態、delivery audit,以及 `knowledge_degradation` 的 KM 草稿反查與 owner review。 + +Hermes 負責維護 RAG 來源分級、index 健康、top-k 命中品質、stale / dormant / deprecated 排除;把 governance event 轉成 KM 草稿、owner review queue、去重 / 封存 dry-run plan;管理日報 / 週報 / 月報的知識健康、RAG 命中率、KM 新增、stale ratio、redaction 結果。禁止直接批准高影響 KM、直接改 PlayBook trust、把 UI 可見或 smoke pass 說成 runtime/security acceptance、保存 raw token / callback payload / 工作視窗內容。 + +### 9.3 NemoTron 回放審查 + +NemoTron 目前可信定位是「離線專家 / Agent Fabric 與模型評測器」,不是生產仲裁者。它可以做 sanitized smoke、replay 評分、模型比較、輸出合約檢查、prompt / output contract 優化、HITL policy injection 檢查與 latency budget 分析。 + +目前既有證據顯示 NemoTron 仍被 full replay、shadow/canary、production route gate 擋住;它的結果只能成為 OpenClaw 仲裁輸入。另有一個需後續 DevOps / Security 釐清的靜態漂移:`04-configmap.yaml` 與 `06-deployment-api.yaml` 對 `ENABLE_NEMOTRON_COLLABORATION` 的宣告不一致;此項應列入後續只讀 drift gate,不得直接 patch production。 + +### 9.4 Security / IwoooS 審查 + +Security 工位應定位為 `vuln-verifier` / Security Gatekeeper,不是 runtime operator。IwoooS 仍是只讀 evidence、owner response gate、redaction、public bundle governance 階段;不能因 UI、AwoooP approval、文件 ready、guard pass 就打開執行權限。 + +Security Gate 至少包含五層:只讀 evidence gate、owner response envelope gate、public bundle gate、source-control gate、runtime gate。Telegram 只能送脫敏狀態與 action-required digest;不得送 raw secret、token、partial token、private URL、未脫敏截圖,也不得用 Telegram button 直接執行危險操作。 + +### 9.5 SRE / 觀測性審查 + +SRE 工位定位為可靠性與觀測契約 owner,不是任意修復 executor。它負責把 service green、DR blocked、governance debt 分開呈現;service health、cold-start、backup、runner、Alertmanager、Prometheus、Grafana、SigNoz、OTEL 都是它的觀測面。 + +降噪只能產生 proposal,不能直接改 Prometheus rule、Alertmanager route、silence、reload 或送 Telegram 測試。Telegram policy 必須 failure-only / action-required,成功 health 預設安靜。 + +### 9.6 DevOps / CD / K8s 審查 + +DevOps 工位擁有 Gitea CD、ArgoCD、K8s manifest、runner health、workflow gate 與部署驗證鏈的設計審查。它不能把 Gitea workflow success、UI 可見或 smoke pass 當成 runtime / security acceptance。 + +目前 P0 / P1 風險包含:`02-network-policy.yaml` 排除在 Kustomize 外但 CD apply 證據不足、migration workflow 屬高風險、HPA/VPA/restore cron 是否納入 GitOps contract 待釐清、`NEXT_PUBLIC_*` 內網 IP 形態需確認是否進 bundle、ArgoCD Degraded 不可被 rollout 成功掩蓋。 + +### 9.7 Data / Backup / DR 審查 + +Data / DR 工位是備份、異地、escrow、restore drill 與 cold-start gate 的守門人,不是 restore executor。核心 backup/offsite/cold-start 偏綠,但 DR 不能宣告完成,因為 credential escrow 仍缺 5 個 non-secret evidence marker。 + +它每日要報告 backup freshness、offsite marker、restore drill、escrow missing count、cold-start scorecard 與 alert visibility;禁止自行 restore、寫 escrow marker、讀 credential、變更 schedule 或把單一 backup success 當 aggregate green。 + +### 9.8 Supply Chain / 版本更新審查 + +Supply Chain 工位負責 repo-only manifest、lockfile、Docker surface、CVE/license/drift policy 與 upgrade approval package。它不是自動更新器;外部 CVE / registry / license lookup、套件升級、lockfile write、image pull / rebuild / push、SDK 安裝、paid API 都需要批准。 + +目前 P0 / P1 缺口包含:agent-market workflow runner label 需符合 self-hosted 鐵律、Dockerfile / package snapshot 與實際 worktree 漂移要重查、`:latest` 只能是 cache-only 不得 deploy、Kali / Harbor 紅燈只能進批准包。 + +### 9.9 Product / UI 審查 + +Product / UI 工位負責把治理狀態做成可理解、可驗證、不可誤導的只讀介面。動畫只能表達狀態語意,不應成為裝飾,也不能使用官方 logo 或把 UI 可見包裝成 runtime authorization。 + +每次治理 UI 變更都要做 i18n、desktop / mobile smoke、console error、HTTP error、horizontal overflow、dangerous action、forbidden content scan;不得顯示工作視窗對話、prompt、session、browser context、secret 或 token。 + +### 9.10 QA / Verifier 審查 + +QA 工位是獨立驗證閘門,對每輪交付產出 `PASS`、`CONDITIONAL PASS` 或 `BLOCKED`,並附命令、route、viewport、commit 或 deploy marker。它不替代 DevOps 部署,也不替代 Security 授權。 + +前端可見變更至少要 typecheck / build / production desktop + mobile smoke;後端/API/告警鏈至少要 pytest、API response readback、alert-chain 或對應 E2E。部署宣稱必須有 Gitea / ArgoCD / Pod image / API health / production DOM 證據。 + +### 9.11 Market Scout 審查 + +Market Scout 工位是市場雷達與候選批准包 owner,不是整合 executor。它負責追蹤 AI Agent / 模型 / 框架版本、官方來源、benchmark 摘要、版本差異、風險與批准包。 + +OpenClaw 不再免戰,但不能被市場聲量直接換掉;新版本或新 Agent 必須經 source refresh、market watch report、integration review、discovery classification、promotion review、governance snapshot、OpenClaw 仲裁與 HITL。NemoTron 3 Ultra 應先走 source refresh + 5-record smoke 批准包,不是直接 full replay 或 production。 + +### 9.12 Telegram Ops 審查 + +Telegram Ops 工位負責 Telegram Gateway、Channel Hub、日報/週報/月報、report receipt、去重、脫敏與 action-required digest。它不是決策者,也不是執行者。 + +低風險結果進 UI 與日報摘要;中風險若涉及設定、外部服務、成本或不確定狀態就升級;高風險必須建立 approval record,由 OpenClaw / 審核者裁決。Telegram 卡片只能作審核入口與結果通知,不能取代 runtime authorization。 + +## 10. 完成度 + +```text +進度:72%。 +目前優先級:P1。 +目前任務:將 12-Agent War Room 從文件編組產品化為 schema / snapshot / API / governance UI。 +狀態變更:12 位 Agent 只讀審查全部回收並彙整;已建立 `ai_agent_12_agent_war_room_v1` schema / committed snapshot / API / tests / governance UI 區塊。 +證據:`docs/schemas/ai_agent_12_agent_war_room_v1.schema.json`、`docs/evaluations/ai_agent_12_agent_war_room_2026-06-14.json`、`GET /api/v1/agents/agent-12-agent-war-room`、治理頁 12-Agent War Room 區塊、12 位 Agent 只讀回饋。 +阻擋:runtime writer、Telegram send、Bot API、production write、SDK 安裝、付費 API、shadow/canary、host update、DB migration、restore 仍未批准。 +下一步:本地驗證、推送 Gitea CD、production API readback 與 desktop / mobile Browser smoke;P2-143 承接 report receipt / 月報 / Agent 工作量 runtime data model。 +``` diff --git a/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md b/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md index 1504ff90..10b5b290 100644 --- a/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md +++ b/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md @@ -14,6 +14,7 @@ | OpenClaw / Hermes / NemoTron 佈建布局 | 45% | P1-401 / P1-402 已完成;仍是只讀 layout 與治理頁顯示,不是 runtime deploy | `ai_agent_deployment_layout_v1` schema、`ai_agent_deployment_layout_2026-06-11.json`、`GET /api/v1/agents/agent-deployment-layout`、治理頁自動化盤點 UI、`AI_AGENT_DEPLOYMENT_LAYOUT_2026-06-11.md` | | OpenClaw / Hermes / NemoTron 主動溝通、學習與成長證據 | 100% | P2-401A 到 P2-141 已完成只讀證據面、runtime / report / result-capture gates、no-write readback、promotion review、writer implementation review、writer dry-run fixture、writer dry-run readback、owner promotion execution gate、owner-approved execution rehearsal、owner acceptance / maintenance window gate、owner acceptance readback / preflight hold、owner-approved preflight release package、owner-approved release readiness readback、owner release approval gate、post-release verifier / rollback gate、final release candidate readback、release authorization hold / readback gate、release verifier preflight / owner review packet、release decision hold / readback、release decision next handoff 與 release decision input prep;P2-141 基線與 S4.9 owner release packet 補強皆已正式驗證,固定 5 個 decision input packet、18 個 missing input field、6 個 blocked input transition 與 5 個 operator action,並把 P2-140 下一關交接轉成 owner / verifier / rollback / maintenance / live-apply 五類決策輸入準備包;owner release packet 已補 S4.9 owner role / team、decision、decision reason、affected scope、redacted evidence refs、followup owner。runtime worker、DB migration、production Redis consumer group、canonical runtime readback、live query、runtime score、result capture write、Telegram 實發、delivery receipt E2E、live report delivery、reviewer queue write、Gateway queue write、AI analysis runtime、中低風險 auto worker、KM / LOGBOOK / audit DB / timeline / PlayBook trust 寫入、SDK / 付費服務仍未開 gate | `ai_agent_result_capture_release_decision_input_prep_v1`、`GET /api/v1/agents/agent-result-capture-release-decision-input-prep`、feature commit `ee5bf500`、deploy marker `306657fd`、Gitea code-review `2954` / CD `2953` success、P2-141 基線正式 API readback、desktop / mobile smoke;S4.9 補強 commit `77515bbe`、deploy marker `a1ad68b9`、Gitea code-review `2956` / CD `2955` success、正式 API readback missing input field `18`、owner 六欄位、desktop / mobile smoke、in-app browser smoke;本地 API/service regression `15 passed`、JSON parse、Python compile、i18n key mirror `11864`、Web typecheck、guard 與 doc secret sanity 通過;治理頁 P2-141 區塊、禁用外露字串 `0`、水平溢位 `0`、內容區危險控制 `0`、console error `0`、HTTP 4xx/5xx `0`;MASTER §3.2.1b / §3.2.1d / §3.4.3 | | AI Agent 主動營運委派與版本生命週期 | 100% | P2-402A / P2-402B / P2-402C / P2-402D / P2-402E / P2-402F / P2-402G 已完成;已建立 repo-only 版本新鮮度快照、工具採用批准包、Telegram action-required digest policy、Gitea PR 草案 lane、host / K3s / stateful 版本只讀盤點、API 與 governance UI。定期排程、外部版本查詢、工具安裝、CI 變更、套件升級、主機更新、container pull、實際 PR creation、auto merge、Telegram 實發、SSH、kubectl、重啟仍未開 gate | `ai_agent_proactive_operations_contract_v1`、`ai_agent_version_freshness_snapshot_v1`、`ai_agent_tool_adoption_approval_package_v1`、`ai_agent_telegram_action_required_digest_policy_v1`、`ai_agent_gitea_pr_draft_lane_v1`、`ai_agent_host_stateful_version_inventory_v1`、`GET /api/v1/agents/agent-proactive-operations-contract`、`GET /api/v1/agents/agent-version-freshness-snapshot`、`GET /api/v1/agents/agent-tool-adoption-approval-package`、`GET /api/v1/agents/agent-telegram-action-required-digest-policy`、`GET /api/v1/agents/agent-gitea-pr-draft-lane`、`GET /api/v1/agents/agent-host-stateful-version-inventory`、`/zh-TW/governance?tab=automation-inventory`、MASTER §3.2.1c | +| 12-Agent War Room 編組 | 72% | 12 個邏輯工位與分批派工規則已建立;OpenClaw / Hermes / NemoTron / SRE / Security / DevOps / Data/DR / Supply Chain / Product/UI / QA / Market / Telegram 共 12 份只讀審查已回收;已建立 schema / committed snapshot / API / tests / governance UI 區塊;runtime writer、Telegram send、Bot API、production write 仍未批准 | `ai_agent_12_agent_war_room_v1`、`docs/evaluations/ai_agent_12_agent_war_room_2026-06-14.json`、`GET /api/v1/agents/agent-12-agent-war-room`、`/zh-TW/governance?tab=automation-inventory`、12 份 Codex sub-agent 只讀回饋 | | 本工作清單與分析報告 | 100% | 已完成 | 本 MD 文件 | ### 2026-06-14 08:44 狀態同步 @@ -129,7 +130,7 @@ - P2-141 固定 5 個 decision input packet、18 個 missing input field、6 個 blocked input transition、5 個 operator action、需批准 `12`、阻擋 `12`、正式寫入 / 發送 `0`;owner release packet 已補齊 S4.9 owner role / team、decision、decision reason、affected scope、redacted evidence refs、followup owner。 - 本地證據:P2-141 + P2-140 API/service regression `15 passed`、JSON parse、Python compile、i18n key mirror `11864`、Web typecheck、guard 與 doc secret sanity 通過。 - 邊界仍維持:owner release authorized、owner review approved、owner decision approved、verifier decision approved、maintenance window approved、release decision passed、release authorization granted / passed、rollback release、live apply release、writer apply、execution apply、receipt write、reviewer queue write、Gateway queue write、Telegram send、Bot API、report receipt、result capture write、learning write、PlayBook trust write、production write、secret read、destructive operation 全部 `0 / false`。 -- P2-141 推送與正式驗證後才可交給 `P2-142`;仍不得直接開啟 result capture writer、learning writer、PlayBook trust writer、reviewer queue write、Gateway queue write、Telegram send、Bot API call 或 production write。 +- P2-141 後已由 `P2-142` 12-Agent War Room 承接本地產品化;仍不得直接開啟 result capture writer、learning writer、PlayBook trust writer、reviewer queue write、Gateway queue write、Telegram send、Bot API call 或 production write。 ### 2026-06-14 13:32 狀態同步 @@ -162,9 +163,9 @@ AI Agent 自動化工作包目前完成度:**99.8%**。本工作清單文件 三 Agent 佈建布局目前完成度:**45%**。第一波已完成只讀 schema / snapshot / API / 測試 / 報告,第二波已接入治理頁自動化盤點 UI;正式 runtime 佈署、Telegram E2E 發送與 AgentSession 工作流仍需逐項 gate。 -三 Agent 主動溝通、學習與成長證據目前完成度:**100%**。P2-403A 到 P2-141 已把互動證據、報表治理、runtime readback、reviewer / result capture / writer gates、no-write readback、promotion review、writer implementation review、writer dry-run fixture、writer dry-run readback、owner promotion execution gate、owner-approved execution rehearsal、owner acceptance / maintenance window gate、owner acceptance readback / preflight hold、owner-approved preflight release package、owner-approved release readiness readback、owner release approval gate、post-release verifier / rollback gate、final release candidate readback、release authorization hold、release authorization readback gate、release verifier preflight gate、release verifier owner review packet、release decision hold、release decision readback、release decision next handoff 與 release decision input prep 固定成可驗證證據。P2-141 基線已由 feature commit `ee5bf500` 與 deploy marker `306657fd` 正式驗證,S4.9 owner 欄位補強已由 commit `77515bbe` 與 deploy marker `a1ad68b9` 正式驗證;P2-140 已由 feature commit `2fe31c91` 與 deploy marker `40741425` 正式驗證,並在後續遮罩 deploy markers `0ae1a25d` 與 `a6b2d187` 後完成 production recheck;P2-139 已由 feature commit `d41b1a38` 與 deploy marker `df867bd6` 正式驗證。目前 live AgentSession、Agent message、handoff、canonical runtime readback、live query、runtime score、result capture write、learning write、Telegram receipt、Gateway queue write、reviewer queue write、runtime verifier execution、live report delivery、AI analysis runtime、中低風險 auto worker、Telegram 實發、shadow worker live、delivery receipt E2E、KM / LOGBOOK / audit DB / timeline / PlayBook trust runtime 寫入仍全部為 `0`。真正下一步是由 `P2-142` 承接下一個只讀關卡;不得開啟 runtime writer、Gateway queue、Telegram send、Bot API 或 production write。 +三 Agent 主動溝通、學習與成長證據目前完成度:**100%**。P2-403A 到 P2-142 已把互動證據、報表治理、runtime readback、reviewer / result capture / writer gates、no-write readback、promotion review、writer implementation review、writer dry-run fixture、writer dry-run readback、owner promotion execution gate、owner-approved execution rehearsal、owner acceptance / maintenance window gate、owner acceptance readback / preflight hold、owner-approved preflight release package、owner-approved release readiness readback、owner release approval gate、post-release verifier / rollback gate、final release candidate readback、release authorization hold、release authorization readback gate、release verifier preflight gate、release verifier owner review packet、release decision hold、release decision readback、release decision next handoff、release decision input prep 與 12-Agent War Room 固定成可驗證證據。P2-141 基線已由 feature commit `ee5bf500` 與 deploy marker `306657fd` 正式驗證,S4.9 owner 欄位補強已由 commit `77515bbe` 與 deploy marker `a1ad68b9` 正式驗證;P2-142 已本地完成,正式驗證後才可進入 P2-143。目前 live AgentSession、Agent message、handoff、canonical runtime readback、live query、runtime score、result capture write、learning write、Telegram receipt、Gateway queue write、reviewer queue write、runtime verifier execution、live report delivery、AI analysis runtime、中低風險 auto worker、Telegram 實發、shadow worker live、delivery receipt E2E、KM / LOGBOOK / audit DB / timeline / PlayBook trust runtime 寫入仍全部為 `0`。真正下一步是完成 P2-142 正式部署驗證,再由 `P2-143` 承接只讀關卡;不得開啟 runtime writer、Gateway queue、Telegram send、Bot API 或 production write。 -AI Agent 主動營運委派與版本生命週期目前完成度:**100%**。已完成 12 類版本 domain、24 類可委派能力、5 種 cadence、8 類 MCP、4 類 RAG memory、只讀 API、`P2-402B` repo-only daily version freshness snapshot、`P2-402C` Renovate / OSV-Scanner / Trivy / Syft / Grype 工具採用批准包、`P2-402D` Telegram action-required digest policy、`P2-402E` Gitea PR 草案 lane、`P2-402F` host OS / K3s / stateful services 版本只讀盤點,以及 `P2-402G` governance UI 顯示可委派能力;`P2-403A` 到 `P2-141` 已補互動、學習證據面、live read model gate、Redis dry-run gate、learning writeback approval package、Telegram receipt approval package、owner-approved learning dry-run preview、runtime write gate review、post-write verifier package、runtime verifier evidence review、報表真相、TG 戰情室收斂、日週月報、Agent 工作量、圖表化報告、風險自動化政策、報表 runtime 啟動前閘門、no-write dry-run 證據包、fixture/readback/verifier dry-run 證據包、shadow/no-write execution gate、操作類別權限模型、13 類候選操作 dry-run 證據、任務結果稽核軌跡、matched PlayBook 學習缺口、critic / reviewer result capture、owner-approved result capture dry-run / readback、result capture writer dry-run fixture、writer dry-run readback、owner promotion execution gate、owner-approved execution rehearsal、owner acceptance / maintenance window gate、owner acceptance readback / preflight hold、owner-approved preflight release package、owner-approved release readiness readback、owner release approval gate、post-release verifier / rollback gate、final release candidate readback、release authorization hold、release authorization readback gate、release verifier preflight gate、release verifier owner review packet、release decision hold、release decision readback、next handoff readback 與 decision input prep。下一步是 P2-142;外部 registry / package source / host probe / SSH / kubectl / 工具安裝 / CI 變更 / 實際 PR creation / Telegram 實發與 learning write 仍需 gate。 +AI Agent 主動營運委派與版本生命週期目前完成度:**100%**。已完成 12 類版本 domain、24 類可委派能力、5 種 cadence、8 類 MCP、4 類 RAG memory、只讀 API、`P2-402B` repo-only daily version freshness snapshot、`P2-402C` Renovate / OSV-Scanner / Trivy / Syft / Grype 工具採用批准包、`P2-402D` Telegram action-required digest policy、`P2-402E` Gitea PR 草案 lane、`P2-402F` host OS / K3s / stateful services 版本只讀盤點,以及 `P2-402G` governance UI 顯示可委派能力;`P2-403A` 到 `P2-142` 已補互動、學習證據面、live read model gate、Redis dry-run gate、learning writeback approval package、Telegram receipt approval package、owner-approved learning dry-run preview、runtime write gate review、post-write verifier package、runtime verifier evidence review、報表真相、TG 戰情室收斂、日週月報、Agent 工作量、圖表化報告、風險自動化政策、報表 runtime 啟動前閘門、no-write dry-run 證據包、fixture/readback/verifier dry-run 證據包、shadow/no-write execution gate、操作類別權限模型、13 類候選操作 dry-run 證據、任務結果稽核軌跡、matched PlayBook 學習缺口、critic / reviewer result capture、owner-approved result capture dry-run / readback、result capture writer dry-run fixture、writer dry-run readback、owner promotion execution gate、owner-approved execution rehearsal、owner acceptance / maintenance window gate、owner acceptance readback / preflight hold、owner-approved preflight release package、owner-approved release readiness readback、owner release approval gate、post-release verifier / rollback gate、final release candidate readback、release authorization hold、release authorization readback gate、release verifier preflight gate、release verifier owner review packet、release decision hold、release decision readback、next handoff readback、decision input prep 與 12-Agent War Room。下一步是 P2-142 正式驗證後進 P2-143;外部 registry / package source / host probe / SSH / kubectl / 工具安裝 / CI 變更 / 實際 PR creation / Telegram 實發與 learning write 仍需 gate。 完成度計算模型: @@ -1156,6 +1157,7 @@ UI: | P2-139 | 完成 | 100 | OpenClaw + Hermes + NemoTron | release decision readback | `ai_agent_result_capture_release_decision_readback_v1` / schema / snapshot / 只讀 API / governance UI;feature commit `d41b1a38`、deploy marker `df867bd6` 已正式驗證;承接 P2-138,只讀回 5 個 release decision readback、5 個 owner decision readback、5 個 verifier decision readback、5 個 rollback decision readback、5 個 maintenance window decision readback、5 個 live-apply decision readback、6 個 blocked readback transition、5 個 operator action;需批准 `12`、阻擋 `12`;runtime authority 固定 `result_capture_release_decision_readback_only_no_live_write`;owner release authorized / owner review approved / owner decision approved / verifier decision approved / maintenance window approved / release decision passed / release authorization granted / passed / rollback release / live apply release / writer apply / execution apply / receipt write / reviewer queue / Gateway / Telegram / Bot API / result capture / learning / PlayBook trust / production write / secret read / destructive operation 全為 `0` | 本地 P2-139 API/service pytest `7 passed`、JSON parse、Python compile、i18n key mirror `11809` 通過;production API readback、desktop / mobile smoke、水平溢位 `0`、P2-139 卡片危險控制 `0`、console error `0`、HTTP 4xx/5xx `0`;不得把 P2-139 當 runtime gate,已可由 P2-140 承接 | | P2-140 | 完成 | 100 | OpenClaw + Hermes + NemoTron | release decision next handoff readback | `ai_agent_result_capture_release_decision_next_handoff_v1` / schema / snapshot / 只讀 API / governance UI;feature commit `2fe31c91`、deploy marker `40741425` 已正式驗證,後續 deploy markers `0ae1a25d` 與 `a6b2d187` 已重驗;承接 P2-139,只讀回下一關交接並隔離 P2-139 自我迴圈;固定 5 個 next-gate handoff、1 個 stale operator action containment、6 個 blocked handoff transition、5 個 operator action;需批准 `12`、阻擋 `12`;runtime authority 固定 `result_capture_release_decision_next_handoff_only_no_live_write`;owner release authorized / owner review approved / owner decision approved / verifier decision approved / maintenance window approved / release decision passed / release authorization granted / passed / rollback release / live apply release / writer apply / execution apply / receipt write / reviewer queue / Gateway / Telegram / Bot API / result capture / learning / PlayBook trust / production write / secret read / destructive operation 全為 `0` | 本地 P2-140 + P2-139 API/service regression `14 passed`、JSON parse、Python compile、i18n key mirror `11837`、guard 與 doc secret sanity 通過;production API readback、desktop / mobile smoke、in-app browser smoke、水平溢位 `0`、內容區危險控制 `0`、console error `0`、HTTP 4xx/5xx `0`;不得把 P2-140 當 runtime gate,已可由 P2-141 承接 | | P2-141 | 完成 | 100 | OpenClaw + Hermes + NemoTron | release decision input prep | `ai_agent_result_capture_release_decision_input_prep_v1` / schema / snapshot / 只讀 API / governance UI;feature commit `ee5bf500`、deploy marker `306657fd` 已完成 P2-141 基線正式驗證;S4.9 補強 commit `77515bbe`、deploy marker `a1ad68b9` 已完成正式驗證;承接 P2-140 next handoff,整理 5 個 decision input packet、18 個 missing input field、6 個 blocked input transition、5 個 operator action;owner release packet 已補齊 S4.9 owner role / team、decision、decision reason、affected scope、redacted evidence refs、followup owner;需批准 `12`、阻擋 `12`;runtime authority 固定 `result_capture_release_decision_input_prep_only_no_live_write`;owner release authorized / owner review approved / owner decision approved / verifier decision approved / maintenance window approved / release decision passed / release authorization granted / passed / rollback release / live apply release / writer apply / execution apply / receipt write / reviewer queue / Gateway / Telegram / Bot API / result capture / learning / PlayBook trust / production write / secret read / destructive operation 全為 `0` | P2-141 基線 production API readback、desktop / mobile smoke、水平溢位 `0`、P2-141 卡片危險控制 `0`、console error `0`、HTTP 4xx/5xx `0`;S4.9 補強 production API readback、owner 六欄位、缺欄位 `18`、desktop / mobile smoke、in-app browser smoke、禁用外露值 `0`、水平溢位 `0`、危險按鈕 `0`、console error `0`、HTTP 4xx/5xx `0`;本地 P2-141 + P2-140 API/service regression `15 passed`、JSON parse、Python compile、i18n key mirror `11864`、Web typecheck、guard 與 doc secret sanity 通過;不得把 input prep 當 runtime gate,已可由 P2-142 承接 | +| P2-142 | 本地完成 | 72 | OpenClaw + Hermes + NemoTron + SRE + Security + DevOps + Data/DR + Supply Chain + Product/UI + QA + Market + Telegram | 12-Agent War Room 編組 | `ai_agent_12_agent_war_room_v1` / schema / snapshot / 只讀 API / governance UI;12 個邏輯工位、12 份 read-only review、總工作量 `82`、evidence `84`、需批准 `61`、阻擋項 `54`、市場 refresh candidate `5`、日週月報 cadence `3`;runtime authority 固定 `12_agent_war_room_read_only_no_live_write`;live write / Telegram send / Bot API / production write / paid API / SDK install / secret read / destructive operation 全為 `0` | 本地 P2-142 API/service pytest `7 passed`、JSON parse、Python compile、i18n key mirror `11894`、Web typecheck、Web production build、guard、doc secret sanity、公開 payload 目標掃描通過;正式部署與 production desktop / mobile smoke 待 Gitea CD 完成後補驗,不得把 War Room 可見當 runtime gate,正式驗證後才可由 P2-143 承接 | ### P3 - 候選 Agent 擴展 diff --git a/docs/evaluations/ai_agent_12_agent_war_room_2026-06-14.json b/docs/evaluations/ai_agent_12_agent_war_room_2026-06-14.json new file mode 100644 index 00000000..17831666 --- /dev/null +++ b/docs/evaluations/ai_agent_12_agent_war_room_2026-06-14.json @@ -0,0 +1,341 @@ +{ + "schema_version": "ai_agent_12_agent_war_room_v1", + "generated_at": "2026-06-14T14:20:00+08:00", + "program_status": { + "current_priority": "P1", + "current_task_id": "P2-142", + "next_task_id": "P2-143", + "overall_completion_percent": 72, + "read_only_mode": true, + "runtime_authority": "12_agent_war_room_read_only_no_live_write", + "status_note": "12 個邏輯工位與 12 份只讀審查已收斂;本 snapshot 只把協作狀態產品化,不開 runtime writer、Telegram send、Bot API、SDK 安裝、付費 API 或 production write。" + }, + "source_refs": [ + "docs/ai/AI_AGENT_12_AGENT_WAR_ROOM_2026-06-14.md", + "docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md", + "docs/HARD_RULES.md", + "docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md", + "docs/12-agent-game-rules.md", + "https://build.nvidia.com/nvidia/nemotron-3-ultra-550b-a55b/modelcard", + "https://github.com/openai/openai-agents-python/releases", + "https://github.com/langchain-ai/langgraph/releases", + "https://adk.dev/2.0/" + ], + "coordination_model": { + "logical_agent_count": 12, + "subagent_batch_limit": 6, + "subagent_batch_count": 2, + "coordination_mode": "read_only_evidence_then_gate", + "arbiter": "openclaw", + "memory_owner": "hermes", + "replay_owner": "nemotron", + "gateway_owner": "telegram_ops_liaison", + "summary": "12 個工位分批收集證據;OpenClaw 只做仲裁與 HITL gate,Hermes 做記憶與 RAG,NemoTron 做離線回放與候選評估,Telegram Ops 做低噪音報告與 receipt。" + }, + "agent_roles": [ + { + "agent_id": "agent_01_openclaw_arbiter", + "display_name": "OpenClaw 仲裁者", + "primary_agent": "OpenClaw", + "war_room_role": "仲裁與風險裁決", + "review_status": "read_only_review_completed", + "risk_tier": "critical", + "work_units": 7, + "evidence_items": 6, + "blocker_count": 4, + "approval_required_count": 5, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "維護 action permission matrix,並把其他 Agent 的證據轉成 gate verdict。" + }, + { + "agent_id": "agent_02_hermes_rag", + "display_name": "Hermes 記憶/RAG", + "primary_agent": "Hermes", + "war_room_role": "記憶、RAG、報告與知識治理", + "review_status": "read_only_review_completed", + "risk_tier": "high", + "work_units": 6, + "evidence_items": 6, + "blocker_count": 3, + "approval_required_count": 4, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "把 War Room 結果轉成 RAG chunk、報告摘要與 owner review 草稿,但不保存原始工作視窗內容。" + }, + { + "agent_id": "agent_03_nemotron_replay", + "display_name": "NemoTron 回放專家", + "primary_agent": "NemoTron", + "war_room_role": "離線回放、模型比較與執行建議", + "review_status": "read_only_review_completed", + "risk_tier": "high", + "work_units": 6, + "evidence_items": 5, + "blocker_count": 5, + "approval_required_count": 6, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "先走 source refresh 與 5-record smoke 批准包,不進 full replay、shadow/canary 或 production route。" + }, + { + "agent_id": "agent_04_sre_sentinel", + "display_name": "SRE Sentinel", + "primary_agent": "SRE", + "war_room_role": "可靠性、SLO、告警與觀測契約", + "review_status": "read_only_review_completed", + "risk_tier": "high", + "work_units": 8, + "evidence_items": 8, + "blocker_count": 5, + "approval_required_count": 4, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "把 service green、DR blocked、governance debt 分開報告;降噪只產 proposal,不 reload 或 silence。" + }, + { + "agent_id": "agent_05_security_sentinel", + "display_name": "Security Sentinel", + "primary_agent": "Security", + "war_room_role": "IwoooS、redaction、source-control 與 runtime gate", + "review_status": "read_only_review_completed", + "risk_tier": "critical", + "work_units": 7, + "evidence_items": 7, + "blocker_count": 5, + "approval_required_count": 6, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "維持只讀 evidence、owner response envelope、public bundle、source-control、runtime 五層 gate。" + }, + { + "agent_id": "agent_06_devops_commander", + "display_name": "DevOps Commander", + "primary_agent": "DevOps", + "war_room_role": "Gitea CD、ArgoCD、K8s 與 release gate", + "review_status": "read_only_review_completed", + "risk_tier": "critical", + "work_units": 8, + "evidence_items": 9, + "blocker_count": 6, + "approval_required_count": 7, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "把 NetworkPolicy、migration、runner、NEXT_PUBLIC 與 ArgoCD Degraded 漂移列入批准包。" + }, + { + "agent_id": "agent_07_data_dr_guardian", + "display_name": "Data / DR Guardian", + "primary_agent": "Data DR", + "war_room_role": "備份、異地、escrow、restore drill 與 cold-start gate", + "review_status": "read_only_review_completed", + "risk_tier": "critical", + "work_units": 7, + "evidence_items": 7, + "blocker_count": 5, + "approval_required_count": 6, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "核心備份偏綠但 DR 不得宣告完成;先補 5 個 non-secret escrow evidence marker。" + }, + { + "agent_id": "agent_08_supply_chain_scout", + "display_name": "Supply Chain Scout", + "primary_agent": "Supply Chain", + "war_room_role": "套件、Docker、CVE、license、版本漂移與升級批准包", + "review_status": "read_only_review_completed", + "risk_tier": "high", + "work_units": 7, + "evidence_items": 7, + "blocker_count": 5, + "approval_required_count": 7, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "只讀檢查 manifest / lockfile / Docker surface;外部 CVE、套件升級與 image pull 需批准。" + }, + { + "agent_id": "agent_09_product_ui_curator", + "display_name": "Product UI Curator", + "primary_agent": "Product UI", + "war_room_role": "治理 UI、動畫、redaction 與 mobile smoke", + "review_status": "read_only_review_completed", + "risk_tier": "medium", + "work_units": 6, + "evidence_items": 6, + "blocker_count": 3, + "approval_required_count": 2, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "把 12-Agent 狀態做成可掃描、mobile 不爆版、不可誤讀為授權的只讀 UI。" + }, + { + "agent_id": "agent_10_qa_verifier", + "display_name": "QA Verifier", + "primary_agent": "QA", + "war_room_role": "pytest、typecheck、build、production smoke、console、overflow 與禁用文字掃描", + "review_status": "read_only_review_completed", + "risk_tier": "high", + "work_units": 7, + "evidence_items": 8, + "blocker_count": 4, + "approval_required_count": 3, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "每輪交付輸出 PASS / CONDITIONAL PASS / BLOCKED,並附命令、route、viewport、commit 或 deploy marker。" + }, + { + "agent_id": "agent_11_market_scout", + "display_name": "Agent Market Scout", + "primary_agent": "Market Scout", + "war_room_role": "AI Agent 市場雷達與候選批准包", + "review_status": "read_only_review_completed", + "risk_tier": "high", + "work_units": 6, + "evidence_items": 8, + "blocker_count": 4, + "approval_required_count": 6, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "刷新 Agent Market snapshot,將新版本與候選轉成 OpenClaw 可仲裁、NemoTron 可回放的批准包。" + }, + { + "agent_id": "agent_12_telegram_ops_liaison", + "display_name": "Telegram Ops Liaison", + "primary_agent": "Telegram Ops", + "war_room_role": "日報、週報、月報、receipt、低噪音通知與審核卡片", + "review_status": "read_only_review_completed", + "risk_tier": "high", + "work_units": 7, + "evidence_items": 7, + "blocker_count": 5, + "approval_required_count": 5, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "next_action": "建立統一 report receipt;低風險進摘要,高風險只產審核入口,不替代 runtime authorization。" + } + ], + "workload_summary": { + "total_work_units": 82, + "total_evidence_items": 84, + "status_report_visible": true, + "chart_ready": true, + "daily_report_required": true, + "weekly_report_required": true, + "monthly_report_required": true, + "standard_agent_metrics": [ + "agent_id", + "agent_role", + "task_count", + "operation_count", + "success_count", + "failed_count", + "auto_resolved_count", + "human_review_count", + "approval_required_count", + "mcp_call_count", + "mcp_blocked_count", + "avg_latency_ms", + "tokens_in", + "tokens_out", + "estimated_cost", + "telegram_sent_count", + "dedup_suppressed_count", + "receipt_missing_count", + "km_entries_created", + "playbook_updates", + "recommendations_created" + ] + }, + "risk_policy": { + "low_risk_auto_handle": "文件、只讀報告、UI 顯示、測試報告可自動處理並摘要回報。", + "medium_risk_auto_handle": "read-only、已授權、可回滾、Gateway scope 通過者可自動處理;設定、外部服務、成本或狀態不確定則升級審核。", + "high_risk_requires_approval": "production write、kubectl apply、ArgoCD sync、Telegram 實發、付費 API、SDK 安裝、host update、DB migration、restore、rollback、NetworkPolicy、workflow 修改均需統帥批准。", + "openclaw_gate_required": true, + "qa_verifier_required": true, + "security_gate_required": true + }, + "reporting_contract": { + "daily": { + "required": true, + "delivery_mode": "summary_only_unless_failed_or_blocked", + "sections": ["總告警數", "自動處理量", "人工審核量", "KM 新增", "PlayBook 更新", "成本與 token", "Telegram receipt"] + }, + "weekly": { + "required": true, + "delivery_mode": "operator_reviewed_digest", + "sections": ["Agent 工作量", "成功率", "部署數", "critical alert", "resolved rate", "K3s / pod / HPA", "AI 成本"] + }, + "monthly": { + "required": true, + "delivery_mode": "governance_chart_and_approval_backlog", + "sections": ["每 Agent 任務量", "自動化節省時間", "高風險案例", "市場評估", "成本趨勢", "RAG / KM 成長", "Telegram 送達率"] + } + }, + "market_watch_contract": { + "cadence": "每週一 09:00 台北時間;重大版本、新框架或高訊號 benchmark 觸發臨時複核。", + "auto_collect_allowed": ["官方 release metadata", "版本號", "content hash", "source failure", "公開 benchmark 摘要", "既有內部 replay / smoke 結果"], + "approval_required": ["SDK 安裝", "付費 API / NIM 呼叫", "外部帳號登入", "full replay", "shadow / canary", "production route", "provider switch", "lockfile write", "workflow 修改", "Telegram 實發"], + "p0_refresh_candidates": [ + "Nemotron 3 Ultra 550B-A55B", + "OpenAI Agents SDK 最新 release", + "LangGraph 1.2.5", + "Google ADK 2.0", + "Microsoft Agent Framework 1.0" + ], + "candidate_package_required_fields": ["官方來源", "版本差異", "建議角色", "market score", "AWOOOI replay/smoke 計畫", "成本與資料邊界", "SDK/依賴影響", "Telegram/告警策略", "rollback", "測試清單", "OpenClaw 仲裁", "HITL 到期時間"] + }, + "telegram_contract": { + "direct_send_allowed": false, + "bot_api_call_allowed": false, + "success_immediate_send_allowed": false, + "action_required_digest_allowed_after_approval": true, + "failure_only_escalation": true, + "dedup_required": true, + "receipt_required": true, + "forbidden_actions": ["logOut", "close", "setWebhook", "deleteWebhook", "direct_bot_api_send", "unsafe_callback_execution"], + "summary": "Agent 12 統一處理 Telegram Gateway、Channel Hub、report receipt、去重、脫敏與 action-required digest;高風險 Telegram 卡片只可作審核入口,不可取代 runtime authorization。" + }, + "display_redaction_contract": { + "redaction_required": true, + "conversation_transcript_display_allowed": false, + "raw_prompt_display_allowed": false, + "private_reasoning_display_allowed": false, + "secret_value_display_allowed": false, + "raw_runtime_payload_display_allowed": false, + "frontend_display_policy": "前端只顯示角色、狀態、工作量、阻擋項、批准邊界與脫敏 evidence refs;禁止顯示工作視窗逐字稿、prompt、session、browser context、secret、token 或 callback 原文。" + }, + "rollups": { + "agent_role_count": 12, + "read_only_review_completed_count": 12, + "subagent_batch_limit": 6, + "subagent_batch_count": 2, + "approval_required_total": 61, + "blocker_total": 54, + "total_work_units": 82, + "total_evidence_items": 84, + "daily_report_required_count": 1, + "weekly_report_required_count": 1, + "monthly_report_required_count": 1, + "market_refresh_candidate_count": 5, + "live_write_count": 0, + "telegram_send_count": 0, + "bot_api_call_count": 0, + "production_write_count": 0, + "paid_api_call_count": 0, + "sdk_install_count": 0, + "secret_read_count": 0, + "destructive_operation_count": 0 + } +} diff --git a/docs/schemas/ai_agent_12_agent_war_room_v1.schema.json b/docs/schemas/ai_agent_12_agent_war_room_v1.schema.json new file mode 100644 index 00000000..c8579f5d --- /dev/null +++ b/docs/schemas/ai_agent_12_agent_war_room_v1.schema.json @@ -0,0 +1,254 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://awoooi.wooo.work/schemas/ai_agent_12_agent_war_room_v1.schema.json", + "title": "AI Agent 12-Agent War Room Snapshot", + "type": "object", + "required": [ + "schema_version", + "generated_at", + "program_status", + "source_refs", + "coordination_model", + "agent_roles", + "workload_summary", + "risk_policy", + "reporting_contract", + "market_watch_contract", + "telegram_contract", + "display_redaction_contract", + "rollups" + ], + "properties": { + "schema_version": { + "const": "ai_agent_12_agent_war_room_v1" + }, + "generated_at": { + "type": "string", + "format": "date-time" + }, + "program_status": { + "type": "object", + "required": [ + "current_priority", + "current_task_id", + "next_task_id", + "overall_completion_percent", + "read_only_mode", + "runtime_authority", + "status_note" + ], + "properties": { + "current_priority": { + "enum": ["P0", "P1", "P2", "P3"] + }, + "current_task_id": { + "const": "P2-142" + }, + "next_task_id": { + "const": "P2-143" + }, + "overall_completion_percent": { + "type": "integer", + "minimum": 0, + "maximum": 100 + }, + "read_only_mode": { + "const": true + }, + "runtime_authority": { + "const": "12_agent_war_room_read_only_no_live_write" + }, + "status_note": { + "type": "string", + "minLength": 1 + } + }, + "additionalProperties": true + }, + "source_refs": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "coordination_model": { + "type": "object" + }, + "agent_roles": { + "type": "array", + "minItems": 12, + "maxItems": 12, + "items": { + "type": "object", + "required": [ + "agent_id", + "display_name", + "primary_agent", + "war_room_role", + "review_status", + "risk_tier", + "work_units", + "blocker_count", + "approval_required_count", + "live_write_count", + "telegram_send_count", + "bot_api_call_count", + "next_action" + ], + "properties": { + "agent_id": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "primary_agent": { + "type": "string" + }, + "war_room_role": { + "type": "string" + }, + "review_status": { + "enum": ["read_only_review_completed", "productized_readback_ready", "blocked_by_gate"] + }, + "risk_tier": { + "enum": ["low", "medium", "high", "critical"] + }, + "work_units": { + "type": "integer", + "minimum": 0 + }, + "blocker_count": { + "type": "integer", + "minimum": 0 + }, + "approval_required_count": { + "type": "integer", + "minimum": 0 + }, + "live_write_count": { + "const": 0 + }, + "telegram_send_count": { + "const": 0 + }, + "bot_api_call_count": { + "const": 0 + }, + "next_action": { + "type": "string" + } + }, + "additionalProperties": true + } + }, + "workload_summary": { + "type": "object" + }, + "risk_policy": { + "type": "object" + }, + "reporting_contract": { + "type": "object" + }, + "market_watch_contract": { + "type": "object" + }, + "telegram_contract": { + "type": "object" + }, + "display_redaction_contract": { + "type": "object", + "required": [ + "redaction_required", + "conversation_transcript_display_allowed", + "raw_prompt_display_allowed", + "private_reasoning_display_allowed", + "secret_value_display_allowed", + "frontend_display_policy" + ], + "properties": { + "redaction_required": { + "const": true + }, + "conversation_transcript_display_allowed": { + "const": false + }, + "raw_prompt_display_allowed": { + "const": false + }, + "private_reasoning_display_allowed": { + "const": false + }, + "secret_value_display_allowed": { + "const": false + }, + "frontend_display_policy": { + "type": "string", + "minLength": 1 + } + }, + "additionalProperties": true + }, + "rollups": { + "type": "object", + "required": [ + "agent_role_count", + "read_only_review_completed_count", + "subagent_batch_limit", + "subagent_batch_count", + "approval_required_total", + "blocker_total", + "live_write_count", + "telegram_send_count", + "bot_api_call_count", + "production_write_count", + "paid_api_call_count", + "sdk_install_count" + ], + "properties": { + "agent_role_count": { + "const": 12 + }, + "read_only_review_completed_count": { + "const": 12 + }, + "subagent_batch_limit": { + "const": 6 + }, + "subagent_batch_count": { + "const": 2 + }, + "approval_required_total": { + "type": "integer", + "minimum": 0 + }, + "blocker_total": { + "type": "integer", + "minimum": 0 + }, + "live_write_count": { + "const": 0 + }, + "telegram_send_count": { + "const": 0 + }, + "bot_api_call_count": { + "const": 0 + }, + "production_write_count": { + "const": 0 + }, + "paid_api_call_count": { + "const": 0 + }, + "sdk_install_count": { + "const": 0 + } + }, + "additionalProperties": true + } + }, + "additionalProperties": false +}