diff --git a/apps/api/src/api/v1/agents.py b/apps/api/src/api/v1/agents.py index 8b8e9aeb..4c95919f 100644 --- a/apps/api/src/api/v1/agents.py +++ b/apps/api/src/api/v1/agents.py @@ -38,6 +38,9 @@ from src.core.sse import get_publisher from src.services.agent_market_governance_snapshot import ( load_latest_agent_market_governance_snapshot, ) +from src.services.ai_agent_market_radar_readback import ( + load_latest_ai_agent_market_radar_readback, +) from src.services.agent_service import ( AgentService, TaskState, @@ -686,6 +689,35 @@ async def get_market_governance_snapshot() -> dict[str, Any]: ) from exc +@router.get( + "/ai-agent-market-radar-readback", + response_model=dict[str, Any], + summary="取得 AI Agent 市場雷達與近期變更盤點", + description=( + "讀取最新已提交的 AI Agent 市場雷達 readback;" + "此端點只呈現近期治理變更、市場主流 Agent 技術來源、候選角色、優先工作清單與封鎖 gate。" + "它不呼叫外部來源、不安裝 SDK、不呼叫付費 API、不跑 replay、不進 shadow/canary、" + "不送 Telegram、不改主機、不修改 workflow、不替換 OpenClaw。" + ), +) +async def get_ai_agent_market_radar_readback() -> dict[str, Any]: + """回傳最新 AI Agent 市場雷達與近期變更盤點只讀快照。""" + try: + payload = await asyncio.to_thread(load_latest_ai_agent_market_radar_readback) + return redact_public_lan_topology(payload) + except FileNotFoundError as exc: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(exc), + ) from exc + except (json.JSONDecodeError, ValueError) as exc: + logger.error("ai_agent_market_radar_readback_invalid", error=str(exc)) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="AI Agent 市場雷達 readback 無效", + ) from exc + + @router.get( "/automation-inventory-snapshot", response_model=dict[str, Any], diff --git a/apps/api/src/services/ai_agent_market_radar_readback.py b/apps/api/src/services/ai_agent_market_radar_readback.py new file mode 100644 index 00000000..446b663b --- /dev/null +++ b/apps/api/src/services/ai_agent_market_radar_readback.py @@ -0,0 +1,68 @@ +""" +AI Agent market radar readback. + +Loads the committed read-only radar artifact. The radar is an operator +decision surface only; it does not approve SDK installs, paid API calls, +replay, shadow/canary, Telegram sends, host writes, or production routing. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from src.services.snapshot_paths import default_operations_dir + +_DEFAULT_OPERATIONS_DIR = default_operations_dir(Path(__file__)) +_SNAPSHOT_NAME = "ai-agent-market-radar-readback.snapshot.json" + + +def load_latest_ai_agent_market_radar_readback( + operations_dir: Path | None = None, +) -> dict[str, Any]: + """Load the committed AI Agent market radar readback snapshot.""" + directory = operations_dir or _DEFAULT_OPERATIONS_DIR + snapshot_path = directory / _SNAPSHOT_NAME + with snapshot_path.open(encoding="utf-8") as handle: + payload = json.load(handle) + + if not isinstance(payload, dict): + raise ValueError(f"{snapshot_path}: expected JSON object") + if payload.get("schema_version") != "ai_agent_market_radar_readback_v1": + raise ValueError(f"{snapshot_path}: unexpected schema_version") + + policy = payload.get("policy") or {} + forbidden_true = [ + key + for key in [ + "sdk_installation_approved", + "paid_api_calls_approved", + "replay_candidate_approved", + "shadow_or_canary_approved", + "production_routing_approved", + "telegram_send_approved", + "host_write_approved", + "workflow_modification_approved", + "openclaw_replacement_approved", + ] + if policy.get(key) is not False + ] + if forbidden_true: + raise ValueError(f"{snapshot_path}: unsafe policy flags: {forbidden_true}") + + serialized = json.dumps(payload, ensure_ascii=False) + forbidden_fragments = [ + "/Users/", + ".claude/projects", + ".codex", + "192.168.", + "auth.json", + "conversations", + "sessions", + ] + leaked = [fragment for fragment in forbidden_fragments if fragment in serialized] + if leaked: + raise ValueError(f"{snapshot_path}: forbidden local or raw-history fragment: {leaked}") + + return payload diff --git a/apps/api/tests/test_ai_agent_market_radar_readback.py b/apps/api/tests/test_ai_agent_market_radar_readback.py new file mode 100644 index 00000000..be949a1a --- /dev/null +++ b/apps/api/tests/test_ai_agent_market_radar_readback.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +import json + +from src.services.ai_agent_market_radar_readback import ( + load_latest_ai_agent_market_radar_readback, +) + + +def test_ai_agent_market_radar_readback_committed_snapshot_is_safe(): + payload = load_latest_ai_agent_market_radar_readback() + + assert payload["schema_version"] == "ai_agent_market_radar_readback_v1" + assert payload["summary"]["overall_completion_percent"] == 42.2 + assert payload["summary"]["market_candidates"] == 13 + assert payload["summary"]["market_sources"] == 34 + assert payload["summary"]["changed_candidates"] == 13 + assert payload["summary"]["integration_blocked_candidates"] == 13 + assert payload["summary"]["replacement_decisions_approved"] == 0 + + policy = payload["policy"] + assert policy["read_only"] is True + assert policy["sdk_installation_approved"] is False + assert policy["paid_api_calls_approved"] is False + assert policy["replay_candidate_approved"] is False + assert policy["shadow_or_canary_approved"] is False + assert policy["production_routing_approved"] is False + assert policy["telegram_send_approved"] is False + assert policy["host_write_approved"] is False + assert policy["workflow_modification_approved"] is False + assert policy["openclaw_replacement_approved"] is False + + serialized = json.dumps(payload, ensure_ascii=False) + assert "/Users/" not in serialized + assert ".claude/projects" not in serialized + assert ".codex" not in serialized + assert "192.168." not in serialized + assert "auth.json" not in serialized + + +def test_ai_agent_market_radar_readback_contains_market_practice_plan(): + payload = load_latest_ai_agent_market_radar_readback() + + practices = { + row["practice"]: row["awoooi_status"] + for row in payload["market_practice_alignment"] + } + assert practices["多 Agent handoff / specialist delegation"] == "partially_modeled" + assert practices["Durable execution / persistence / human-in-the-loop"] == ( + "needed_for_incident_loop" + ) + + candidates = { + row["candidate_id"]: row + for row in payload["candidate_role_plan"] + } + assert candidates["openclaw_incumbent"]["gate_status"] == "production_baseline" + assert candidates["nemo_nemotron_fabric"]["gate_status"] == "integration_blocked" + assert candidates["langgraph_incident_kernel"]["gate_status"] == "integration_blocked" diff --git a/apps/api/tests/test_ai_agent_market_radar_readback_api.py b/apps/api/tests/test_ai_agent_market_radar_readback_api.py new file mode 100644 index 00000000..291d308f --- /dev/null +++ b/apps/api/tests/test_ai_agent_market_radar_readback_api.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import json + +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from src.api.v1.agents import router + + +def test_ai_agent_market_radar_readback_endpoint_returns_committed_snapshot(): + app = FastAPI() + app.include_router(router, prefix="/api/v1") + client = TestClient(app) + + response = client.get("/api/v1/agents/ai-agent-market-radar-readback") + + assert response.status_code == 200 + data = response.json() + assert data["schema_version"] == "ai_agent_market_radar_readback_v1" + assert data["summary"]["overall_completion_percent"] == 42.2 + assert data["summary"]["market_candidates"] == 13 + assert data["summary"]["market_sources"] == 34 + assert data["summary"]["changed_candidates"] == 13 + assert data["summary"]["integration_blocked_candidates"] == 13 + assert data["summary"]["replacement_decisions_approved"] == 0 + assert data["policy"]["openclaw_replacement_approved"] is False + assert data["policy"]["telegram_send_approved"] is False + + serialized = json.dumps(data, ensure_ascii=False) + assert "/Users/" not in serialized + assert ".claude/projects" not in serialized + assert ".codex" not in serialized + assert "192.168." not in serialized diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index a0d3f264..34b19788 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,29 @@ +## 2026-06-25|AI Agent 市場雷達與近期變更重新盤點 + +**本輪收斂範圍:** +- 重新盤點近期治理更新:Status Cleanup Dashboard read-only API、Product Governance Owner Response handoff、Wazuh / IwoooS 可視性邊界、日週月報契約、工具 / 套件 / 服務 / 主機版本新鮮度與 Agent 自動化工作線。 +- 更新 `docs/ai/agent-market-watch-sources.v1.json` 與 `docs/ai/agent-replacement-candidates.v1.json` 到 `2026-06-25`,校準 OpenAI Agents SDK、LangGraph、NVIDIA NeMo / Nemotron、Claude Agent SDK、Google ADK、Microsoft Agent Framework、CrewAI 等官方來源。 +- 產生 6/25 market artifacts:`docs/evaluations/agent_market_watch_report_2026-06-25.json`、`agent_market_integration_review_full_2026-06-25.json`、`agent_market_discovery_review_2026-06-25.json`、`agent_market_discovery_classification_2026-06-25.json`、`agent_market_watch_promotion_review_2026-06-25.json`、`agent_market_governance_snapshot_2026-06-25.json`。 +- 新增 AI Agent 市場雷達 readback generator / schema / snapshot / Markdown / API / tests:`scripts/dev/ai-agent-market-radar-readback.py`、`docs/schemas/ai_agent_market_radar_readback_v1.schema.json`、`docs/operations/ai-agent-market-radar-readback.snapshot.json`、`docs/operations/AI-AGENT-MARKET-RADAR-READBACK-2026-06-25.md`、`GET /api/v1/agents/ai-agent-market-radar-readback`。 +- 修正 `scripts/agents/agent-market-governance-snapshot.py` 的 import path,讓乾淨 worktree 與排程環境可直接載入後端 service。 + +**專業評估:** +- 2026-06-25 market watch 抓取 `13` 個候選、`34` 個官方 / 主要來源,`source_failures=0`,但 `changed_candidates=13`、`integration_queue_count=13`,代表市場已變動且必須刷新 scorecard / replay gate。 +- Integration review 全數維持 blocked:`blocked_from_integration=13`、`requires_dependency_approval=13`、`requires_cost_approval=10`、`production_changes_approved=0`、`shadow_or_canary_approved=0`。 +- OpenClaw 仍是 production decision core,但不是永久不可挑戰;任何 OpenClaw 取代必須經過新版 market scorecard、offline replay、hidden-label baseline comparison、shadow / canary 與正式 ADR。 +- NemoTron / NeMo Agent Toolkit 最適合作為 offline replay、tool-model evaluator、contract smoke gate 與私有部署候選;目前仍不得直接接 production routing。 +- 主流做法已明確收斂為 handoff、tracing、guardrails、durable execution、HITL、MCP / A2A、evaluation / replay / profiling;AWOOOI 下一步要把這些變成 Agent run trace、日週月報與 Telegram 審核閉環。 + +**readback:** +- Market Watch:`candidate_count=13`、`source_count=34`、`changed_candidates=13`、`failure_count=0`。 +- Market Governance Snapshot:`blocked_from_integration=13`、`recommended_watch_additions_remaining=5`、`replacement_decisions_approved=0`。 +- AI Agent Market Radar:`AI_AGENT_MARKET_RADAR_READBACK_OK overall=42.2% candidates=13 sources=34 changed=13 blocked=13 replacement=0`。 +- API / service tests:`DATABASE_URL=sqlite+aiosqlite:///:memory: PYTHONPATH=apps/api PYTHONDONTWRITEBYTECODE=1 /Users/ogt/awoooi/apps/api/.venv/bin/python -m pytest apps/api/tests/test_ai_agent_market_radar_readback.py apps/api/tests/test_ai_agent_market_radar_readback_api.py apps/api/tests/test_agent_market_governance_snapshot.py apps/api/tests/test_agent_market_governance_snapshot_api.py -q` → `8 passed`。 + +**仍禁止 / 未完成:** +- `sdk_installation_approved=false`、`paid_api_calls_approved=false`、`replay_candidate_approved=false`、`shadow_or_canary_approved=false`、`production_routing_approved=false`、`telegram_send_approved=false`、`host_write_approved=false`、`workflow_modification_approved=false`、`openclaw_replacement_approved=false`。 +- 本輪沒有安裝 SDK、沒有呼叫付費 AI API、沒有送 Telegram、沒有修改 workflow、沒有主機寫入、沒有替換 OpenClaw,也沒有把 working-window 對話內容寫入前端。 + ## 2026-06-24|23:33 live cold-start / public routes / backup read-only refresh **背景**:23:15 已確認 110 live cold-start monitor 尚未同步 repo-side v1.42 hash;本輪不做 live script install,只用 repo-side authoritative script 重新跑完整 read-only cold-start,確認重啟 SOP 的現場判斷是否仍正確。 diff --git a/docs/ai/agent-market-watch-sources.v1.json b/docs/ai/agent-market-watch-sources.v1.json index 3c3bc0ab..7f790546 100644 --- a/docs/ai/agent-market-watch-sources.v1.json +++ b/docs/ai/agent-market-watch-sources.v1.json @@ -1,7 +1,7 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "schema_version": "agent_market_watch_sources_v1", - "updated_at": "2026-06-04", + "updated_at": "2026-06-25", "purpose": "Primary-source watch list for recurring AI Agent market updates. A change here is not replacement approval; it only triggers refreshed evaluation.", "cadence": { "weekly_market_watch": "Every Monday 09:00 Asia/Taipei, produce a read-only market watch report and full-scope integration/discovery review summary.", @@ -32,9 +32,15 @@ "reference_version": null }, { - "source_id": "openai_agent_builder_safety_docs", + "source_id": "openai_agents_tracing_docs", "type": "docs", - "url": "https://developers.openai.com/api/docs/guides/agent-builder-safety", + "url": "https://openai.github.io/openai-agents-python/tracing/", + "reference_version": null + }, + { + "source_id": "openai_agents_handoffs_docs", + "type": "docs", + "url": "https://openai.github.io/openai-agents-python/handoffs/", "reference_version": null }, { @@ -93,6 +99,12 @@ "url": "https://docs.nvidia.com/nemo/agent-toolkit/latest/index.html", "reference_version": null }, + { + "source_id": "nvidia_nemotron_developer_page", + "type": "docs", + "url": "https://developer.nvidia.com/topics/ai/nemotron", + "reference_version": null + }, { "source_id": "nvidia_nim_llm_docs", "type": "docs", @@ -118,7 +130,7 @@ { "source_id": "claude_agent_sdk_docs", "type": "docs", - "url": "https://platform.claude.com/docs/en/agent-sdk/agent-loop", + "url": "https://code.claude.com/docs/en/agent-sdk/overview", "reference_version": null }, { @@ -140,7 +152,7 @@ { "source_id": "google_adk_docs", "type": "docs", - "url": "https://adk.dev/get-started/about/", + "url": "https://docs.cloud.google.com/gemini-enterprise-agent-platform/build/adk", "reference_version": null }, { @@ -190,7 +202,7 @@ { "source_id": "crewai_docs", "type": "docs", - "url": "https://docs.crewai.com/en/introduction", + "url": "https://docs.crewai.com/", "reference_version": null }, { diff --git a/docs/ai/agent-replacement-candidates.v1.json b/docs/ai/agent-replacement-candidates.v1.json index 30f4af1a..c8281eca 100644 --- a/docs/ai/agent-replacement-candidates.v1.json +++ b/docs/ai/agent-replacement-candidates.v1.json @@ -1,7 +1,7 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "schema_version": "agent_replacement_candidates_v1", - "updated_at": "2026-06-04", + "updated_at": "2026-06-25", "baseline_candidate_id": "openclaw_incumbent", "fixture_schema": "docs/schemas/agent_replay_fixture_v1.schema.json", "candidate_input_schema": "docs/schemas/agent_replay_candidate_input_v1.schema.json", @@ -179,7 +179,7 @@ { "candidate_id": "claude_agent_sdk_remediator", "display_name": "Claude Agent SDK Remediator", - "official_url": "https://platform.claude.com/docs/en/agent-sdk/agent-loop", + "official_url": "https://code.claude.com/docs/en/agent-sdk/overview", "role": "devops_code_remediation_agent", "evaluation_priority": "must_test", "required_stage": "offline_replay", @@ -199,7 +199,7 @@ { "candidate_id": "google_adk_stack", "display_name": "Google Agent Development Kit Stack", - "official_url": "https://adk.dev/get-started/about/", + "official_url": "https://docs.cloud.google.com/gemini-enterprise-agent-platform/build/adk", "role": "gemini_vertex_agent_stack", "evaluation_priority": "can_test", "required_stage": "offline_replay" @@ -215,7 +215,7 @@ { "candidate_id": "crewai_flows_crews", "display_name": "CrewAI Flows + Crews", - "official_url": "https://docs.crewai.com/en/introduction", + "official_url": "https://docs.crewai.com/", "role": "rapid_agent_team_prototype", "evaluation_priority": "secondary", "required_stage": "offline_replay" diff --git a/docs/evaluations/agent_market_discovery_classification_2026-06-25.json b/docs/evaluations/agent_market_discovery_classification_2026-06-25.json new file mode 100644 index 00000000..de4523a2 --- /dev/null +++ b/docs/evaluations/agent_market_discovery_classification_2026-06-25.json @@ -0,0 +1,344 @@ +{ + "candidates": [ + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "agent_operator_console_candidate", + "description": "NocoBase is an open-source AI + no-code platform for building business systems fast. Instead of generating everything from scratch, AI works on top of production-proven infrastructure and a WYSIWYG no-code interface, so you get both speed and reliability.", + "homepage": "https://www.nocobase.com", + "html_url": "https://github.com/nocobase/nocobase", + "language": "TypeScript", + "pushed_at": "2026-06-25T02:27:15Z", + "recommendation": "watch_only_product_surface_signal", + "recommended_role": "operator_console_or_agent_ui_candidate", + "repository_full_name": "nocobase/nocobase", + "required_next_gate": "operator_confirms_product_surface_relevance_before_watch_only_entry", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 23082, + "topics": [ + "admin-dashboard", + "ai-agent", + "ai-agents", + "ai-assistant", + "ai-tools", + "airtable", + "crm", + "crud", + "erp", + "internal-tool", + "internal-tools", + "low-code", + "lowcode", + "no-code", + "nocode", + "project-management", + "salesforce", + "self-hosted", + "workflows" + ], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "agent_operator_console_candidate", + "description": "Web dashboard for Hermes Agent — multi-platform AI chat, session management, scheduled jobs, usage analytics ", + "homepage": "https://hermes-studio.ai", + "html_url": "https://github.com/EKKOLearnAI/hermes-studio", + "language": "TypeScript", + "pushed_at": "2026-06-25T02:30:17Z", + "recommendation": "watch_only_product_surface_signal", + "recommended_role": "operator_console_or_agent_ui_candidate", + "repository_full_name": "ekkolearnai/hermes-studio", + "required_next_gate": "operator_confirms_product_surface_relevance_before_watch_only_entry", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 8432, + "topics": [ + "agent", + "ai-agent", + "chat-ui", + "dashboard", + "hermes", + "hermes-agent", + "hermes-web-ui", + "llm", + "multi-model", + "multi-platform", + "self-hosted", + "typescript", + "vue3", + "web-ui" + ], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "agent_governance_candidate", + "description": "Omnigent is an open-source AI agent framework and meta-harness: orchestrate Claude Code, Codex, Cursor, Pi, and custom agents — swap harnesses without rewriting, enforce policies and sandboxing, and collaborate in real time from any device.", + "homepage": "https://omnigent.ai", + "html_url": "https://github.com/omnigent-ai/omnigent", + "language": "Python", + "pushed_at": "2026-06-25T02:32:04Z", + "recommendation": "add_to_watch_registry_after_manual_source_review", + "recommended_role": "agent_governance_policy_evaluator_candidate", + "repository_full_name": "omnigent-ai/omnigent", + "required_next_gate": "operator_confirms_primary_sources_then_add_watch_registry_only", + "risk_flags": [ + "requires_dependency_boundary_review", + "likely_requires_paid_provider_boundary_review", + "requires_tool_execution_sandbox_review" + ], + "stargazers_count": 4737, + "topics": [ + "agent-framework", + "agent-governance", + "agent-orchestration", + "agents", + "ai", + "ai-agent", + "ai-agents", + "claude-code", + "codex", + "coding-agents", + "developer-tools", + "llm", + "ml", + "multi-agent", + "python", + "sandbox" + ], + "watch_addition_recommended": true + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "agent_framework_candidate", + "description": "AG2 (formerly AutoGen): The Open-Source AgentOS.Join us at: https://discord.gg/sNGSwQME3x", + "homepage": "https://ag2.ai", + "html_url": "https://github.com/ag2ai/ag2", + "language": "Python", + "pushed_at": "2026-06-25T02:31:30Z", + "recommendation": "add_to_watch_registry_after_manual_source_review", + "recommended_role": "agent_framework_or_orchestrator_candidate", + "repository_full_name": "ag2ai/ag2", + "required_next_gate": "operator_confirms_primary_sources_then_add_watch_registry_only", + "risk_flags": [ + "requires_dependency_boundary_review", + "requires_tool_execution_sandbox_review" + ], + "stargazers_count": 4708, + "topics": [ + "a2a", + "ag2", + "agent-framework", + "agentic", + "agentic-ai", + "ai", + "ai-agents-framework", + "aiagents", + "genai", + "llm", + "llms", + "mcp", + "multi-agent", + "multi-agent-system", + "open-source", + "python" + ], + "watch_addition_recommended": true + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "personal_agent_platform_candidate", + "description": "Zero, your trustworthy AI teammate for real work. ", + "homepage": "https://vm0.ai", + "html_url": "https://github.com/vm0-ai/vm0", + "language": "TypeScript", + "pushed_at": "2026-06-25T02:32:12Z", + "recommendation": "add_to_watch_registry_after_manual_source_review", + "recommended_role": "personal_agent_platform_candidate", + "repository_full_name": "vm0-ai/vm0", + "required_next_gate": "operator_confirms_primary_sources_then_add_watch_registry_only", + "risk_flags": [ + "requires_dependency_boundary_review", + "likely_requires_paid_provider_boundary_review", + "requires_tool_execution_sandbox_review" + ], + "stargazers_count": 1131, + "topics": [ + "agentic-workflow", + "ai-agent", + "ai-runtime", + "ai-sandbox", + "claude-code", + "dev-tools", + "sandbox" + ], + "watch_addition_recommended": true + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "agent_framework_candidate", + "description": "Rapida is an open-source, end-to-end voice AI orchestration platform for building real-time conversational voice agents with audio streaming, STT, TTS, VAD, multi-channel integration, agent state management, and observability.", + "homepage": "https://rapida.ai", + "html_url": "https://github.com/rapidaai/voice-ai", + "language": "Go", + "pushed_at": "2026-06-25T02:04:11Z", + "recommendation": "add_to_watch_registry_after_manual_source_review", + "recommended_role": "agent_framework_or_orchestrator_candidate", + "repository_full_name": "rapidaai/voice-ai", + "required_next_gate": "operator_confirms_primary_sources_then_add_watch_registry_only", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 679, + "topics": [ + "agent-framework", + "ai-voice", + "ai-voice-agent", + "audio-streaming", + "golang", + "open-source", + "orchestration", + "realtime-audio", + "sip", + "speech-ai", + "speech-to-text", + "telephony", + "text-to-speech", + "voice-agents", + "voice-ai", + "voice-ai-agents", + "voice-ai-platform", + "voice-assistant-ai", + "voice-bot", + "voice-observability" + ], + "watch_addition_recommended": true + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "agent_framework_candidate", + "description": "LLM-supervised persistent memory for AI agents — graph-based recall, cross-session knowledge, single binary. Works with Claude Code, OpenClaw, and any CLI agent.", + "homepage": "https://github.com/mnemon-dev/mnemon#readme", + "html_url": "https://github.com/mnemon-dev/mnemon", + "language": "Go", + "pushed_at": "2026-06-25T02:17:52Z", + "recommendation": "add_to_watch_registry_after_manual_source_review", + "recommended_role": "agent_framework_or_orchestrator_candidate", + "repository_full_name": "mnemon-dev/mnemon", + "required_next_gate": "operator_confirms_primary_sources_then_add_watch_registry_only", + "risk_flags": [ + "requires_dependency_boundary_review", + "likely_requires_paid_provider_boundary_review", + "requires_tool_execution_sandbox_review" + ], + "stargazers_count": 361, + "topics": [ + "agent-framework", + "agent-memory", + "ai-agent", + "ai-tools", + "claude", + "claude-code", + "claude-memory", + "cli", + "context-window", + "golang", + "knowledge-graph", + "llm-agent", + "llm-memory", + "llm-supervised", + "mcp", + "memory", + "openclaw", + "persistent-memory", + "rag", + "sqlite" + ], + "watch_addition_recommended": true + } + ], + "generated_at": "2026-06-25T02:34:09.078342+00:00", + "inputs": { + "discovery_review_generated_at": "2026-06-25T02:33:53.438200+00:00", + "metadata_source": "github_repository_api_summary" + }, + "policy": { + "auto_watch_registry_addition_approved": false, + "paid_api_calls_approved": false, + "production_changes_approved": false, + "raw_external_pages_committed": false, + "replacement_decision_allowed": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false + }, + "schema_version": "agent_market_discovery_classification_v1", + "summary": { + "classification_counts": { + "agent_framework_candidate": 3, + "agent_governance_candidate": 1, + "agent_operator_console_candidate": 2, + "personal_agent_platform_candidate": 1 + }, + "classified_repositories": 7, + "production_changes_approved": 0, + "recommendation_counts": { + "add_to_watch_registry_after_manual_source_review": 5, + "watch_only_product_surface_signal": 2 + }, + "recommended_watch_additions": 5, + "shadow_or_canary_approved": 0, + "watch_only_or_defer": 2 + } +} diff --git a/docs/evaluations/agent_market_discovery_review_2026-06-25.json b/docs/evaluations/agent_market_discovery_review_2026-06-25.json new file mode 100644 index 00000000..3c798404 --- /dev/null +++ b/docs/evaluations/agent_market_discovery_review_2026-06-25.json @@ -0,0 +1,271 @@ +{ + "candidate_drafts": [ + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "keep_existing_candidate_watch", + "html_url": "https://github.com/NousResearch/hermes-agent", + "new_since_previous_review": true, + "recommended_actions": [ + "keep_existing_watch_registry_entry", + "do_not_duplicate_candidate" + ], + "recommended_next_gate": "use_existing_market_watch_candidate", + "repository_full_name": "nousresearch/hermes-agent", + "seen_before": false, + "source_ids": [ + "github_ai_agent_topic" + ], + "stargazers_count_max": 202142, + "status": "already_watched_or_registered", + "updated_at_latest": "2026-06-25T02:32:00Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/nocobase/nocobase", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "nocobase/nocobase", + "seen_before": false, + "source_ids": [ + "github_ai_agent_topic" + ], + "stargazers_count_max": 23082, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-25T02:30:02Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/EKKOLearnAI/hermes-studio", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "ekkolearnai/hermes-studio", + "seen_before": false, + "source_ids": [ + "github_ai_agent_topic" + ], + "stargazers_count_max": 8432, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-25T02:30:22Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/omnigent-ai/omnigent", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "omnigent-ai/omnigent", + "seen_before": false, + "source_ids": [ + "github_ai_agent_topic", + "github_agent_framework_topic" + ], + "stargazers_count_max": 4737, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-25T02:32:40Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/ag2ai/ag2", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "ag2ai/ag2", + "seen_before": false, + "source_ids": [ + "github_agent_framework_topic" + ], + "stargazers_count_max": 4708, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-25T02:31:33Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "keep_existing_candidate_watch", + "html_url": "https://github.com/microsoft/agent-governance-toolkit", + "new_since_previous_review": false, + "recommended_actions": [ + "keep_existing_watch_registry_entry", + "do_not_duplicate_candidate" + ], + "recommended_next_gate": "use_existing_market_watch_candidate", + "repository_full_name": "microsoft/agent-governance-toolkit", + "seen_before": true, + "source_ids": [ + "github_agent_framework_topic" + ], + "stargazers_count_max": 4506, + "status": "already_watched_or_registered", + "updated_at_latest": "2026-06-25T00:45:57Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/vm0-ai/vm0", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "vm0-ai/vm0", + "seen_before": false, + "source_ids": [ + "github_ai_agent_topic" + ], + "stargazers_count_max": 1131, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-25T02:30:31Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/rapidaai/voice-ai", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "rapidaai/voice-ai", + "seen_before": false, + "source_ids": [ + "github_agent_framework_topic" + ], + "stargazers_count_max": 679, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-25T02:04:22Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/mnemon-dev/mnemon", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "mnemon-dev/mnemon", + "seen_before": false, + "source_ids": [ + "github_agent_framework_topic" + ], + "stargazers_count_max": 361, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-25T02:17:56Z" + } + ], + "generated_at": "2026-06-25T02:33:53.438200+00:00", + "inputs": { + "candidate_registry_schema_version": "agent_replacement_candidates_v1", + "previous_review_generated_at": "2026-06-04T01:26:40.344391+00:00", + "source_registry_schema_version": "agent_market_watch_sources_v1", + "watch_report_generated_at": "2026-06-25T02:33:12.868785+00:00", + "watch_report_mode": "live" + }, + "policy": { + "auto_registry_addition_approved": false, + "paid_api_calls_approved": false, + "production_changes_approved": false, + "replacement_decision_allowed": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false + }, + "schema_version": "agent_market_discovery_review_v1", + "summary": { + "already_watched_or_registered": 2, + "auto_registry_additions_approved": 0, + "discovered_items": 10, + "discovery_sources": 2, + "manual_classification_required": 7, + "new_manual_classification_required": 7, + "production_changes_approved": 0, + "shadow_or_canary_approved": 0, + "source_failures": 0, + "unique_repositories": 9 + } +} diff --git a/docs/evaluations/agent_market_governance_snapshot_2026-06-25.json b/docs/evaluations/agent_market_governance_snapshot_2026-06-25.json new file mode 100644 index 00000000..762496f3 --- /dev/null +++ b/docs/evaluations/agent_market_governance_snapshot_2026-06-25.json @@ -0,0 +1,938 @@ +{ + "candidate_groups": { + "production_baseline": [ + "openclaw_incumbent" + ], + "replay_or_integration_blocked": [ + "claude_agent_sdk_remediator", + "crewai_flows_crews", + "google_adk_stack", + "langgraph_incident_kernel", + "microsoft_agent_framework", + "nemo_nemotron_fabric", + "openai_agents_sdk_coordinator" + ], + "watch_only_candidates": [ + "agentos_framework", + "bernstein_agent_governance", + "hermes_agent_personal_platform", + "microsoft_agent_governance_toolkit", + "pydantic_deepagents", + "thclaws_agent_harness" + ], + "watch_only_scorecard_prescreen_ready": [] + }, + "candidate_statuses": [ + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "openclaw_incumbent", + "current_gate": "production_decision_core", + "display_name": "OpenClaw incumbent", + "evaluation_priority": "baseline", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "production_baseline", + "integration_decision": "", + "operator_blockers": [], + "required_next_gate": "formal_replacement_adr_and_promotion_gate_required", + "role": "current_production_decision_core", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "openai_agents_sdk_coordinator", + "current_gate": "has_offline_replay_summary", + "display_name": "OpenAI Agents SDK Coordinator", + "evaluation_priority": "must_test", + "evidence": { + "latest_replay_summary": "docs/evaluations/agent_openai_coordinator_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_refresh_replay_gate", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ], + "required_next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "role": "coordinator_orchestrator", + "score": 0.87 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "langgraph_incident_kernel", + "current_gate": "has_offline_replay_summary", + "display_name": "LangGraph Incident Kernel", + "evaluation_priority": "must_test", + "evidence": { + "latest_replay_summary": "docs/evaluations/agent_langgraph_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_refresh_replay_gate", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline" + ], + "required_next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "role": "durable_incident_workflow_kernel", + "score": 0.7867 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "nemo_nemotron_fabric", + "current_gate": "blocked_existing_replay_evidence", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "evaluation_priority": "must_test", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": "docs/evaluations/agent_nemotron_contract_tuned_49b_v15_smoke_gate_2026-06-02.json", + "latest_smoke_matrix": "docs/evaluations/agent_nemotron_contract_tuned_smoke_matrix_2026-06-02.json", + "latest_smoke_model": "nvidia/llama-3.3-nemotron-super-49b-v1.5" + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_refresh_evidence_then_smoke_gate", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "5_record_smoke_gate_passes", + "latency_and_output_contract_blockers_resolved", + "cost_approval_recorded" + ], + "required_next_gate": "refresh_source_evidence_then_5_record_smoke_only", + "role": "agent_fabric_tool_model_evaluator", + "score": 0.8033 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "claude_agent_sdk_remediator", + "current_gate": "has_offline_replay_summary", + "display_name": "Claude Agent SDK Remediator", + "evaluation_priority": "must_test", + "evidence": { + "latest_replay_summary": "docs/evaluations/agent_claude_remediator_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_refresh_replay_gate", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ], + "required_next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "role": "devops_code_remediation_agent", + "score": 0.7533 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "google_adk_stack", + "current_gate": "not_yet_replayed", + "display_name": "Google Agent Development Kit Stack", + "evaluation_priority": "can_test", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_prepare_no_cost_offline_adapter", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ], + "required_next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "role": "gemini_vertex_agent_stack", + "score": 0.73 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "microsoft_agent_framework", + "current_gate": "not_yet_replayed", + "display_name": "Microsoft Agent Framework", + "evaluation_priority": "can_test", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_prepare_no_cost_offline_adapter", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ], + "required_next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "role": "enterprise_workflow_agent_stack", + "score": 0.81 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "crewai_flows_crews", + "current_gate": "not_yet_replayed", + "display_name": "CrewAI Flows + Crews", + "evaluation_priority": "secondary", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_prepare_no_cost_offline_adapter", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline" + ], + "required_next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "role": "rapid_agent_team_prototype", + "score": 0.6033 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "hermes_agent_personal_platform", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "NousResearch Hermes Agent", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "do_not_integrate_watch_only_primary_source_monitoring", + "operator_blockers": [ + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "personal_agent_platform_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "microsoft_agent_governance_toolkit", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "Microsoft Agent Governance Toolkit", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "do_not_integrate_watch_only_primary_source_monitoring", + "operator_blockers": [ + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_governance_policy_evaluator_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "thclaws_agent_harness", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "thClaws Agent Harness", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "do_not_integrate_watch_only_primary_source_monitoring", + "operator_blockers": [ + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "pydantic_deepagents", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "Pydantic DeepAgents", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "do_not_integrate_watch_only_primary_source_monitoring", + "operator_blockers": [ + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "agentos_framework", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "AgentOS Framework", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "do_not_integrate_watch_only_primary_source_monitoring", + "operator_blockers": [ + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "bernstein_agent_governance", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "Bernstein Agent Governance", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "do_not_integrate_watch_only_primary_source_monitoring", + "operator_blockers": [ + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_governance_policy_evaluator_candidate", + "score": null + } + ], + "current_decision": "openclaw_remains_production_decision_core", + "evaluation_cadence": { + "next_scheduled_run_at": "2026-06-29T09:00:00+08:00", + "operator_review_gate": "priority_upgrade_required_before_scorecard_replay_sdk_api_shadow_canary_or_production", + "primary_source_policy": "primary_sources_only_no_llm_no_sdk_no_paid_api", + "schedule": "weekly_monday_0900_asia_taipei", + "timezone": "Asia/Taipei", + "trigger_modes": [ + "scheduled_weekly", + "manual_dispatch", + "operator_triggered_after_primary_source_signal" + ], + "workflow": ".gitea/workflows/agent-market-watch.yaml" + }, + "forbidden_actions_without_new_approval": [ + "replace_openclaw", + "enter_shadow_or_canary", + "install_new_agent_sdk", + "call_paid_provider_api", + "run_replay_for_watch_only_candidate", + "change_production_routing" + ], + "generated_at": "2026-06-25T02:35:15.699321+00:00", + "inputs": { + "candidate_registry_schema_version": "agent_replacement_candidates_v1", + "discovery_classification_generated_at": "2026-06-25T02:34:09.078342+00:00", + "integration_review_generated_at": "2026-06-25T02:33:53.438121+00:00", + "promotion_review_generated_at": "2026-06-25T02:34:25.892815+00:00", + "watch_report_generated_at": "2026-06-25T02:33:12.868785+00:00" + }, + "market_watch_health": { + "blocked_from_integration": 13, + "freshness_sla_hours": 168, + "operator_blockers": [ + "unclassified_discovery_watch_additions_remaining", + "integration_queue_not_empty" + ], + "source_failures_block_priority_upgrade": false, + "stale_after": "2026-06-29T15:00:00+08:00", + "stale_grace_hours": 6, + "status": "blocked" + }, + "next_allowed_actions": [ + "continue_weekly_primary_source_market_watch", + "rerun_existing_replay_only_after_evidence_or_adapter_change" + ], + "operator_decision_queue": [ + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": true, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "claude_agent_sdk_remediator", + "display_name": "Claude Agent SDK Remediator", + "evidence_refs": [ + "docs/evaluations/agent_claude_remediator_replay_summary_2026-06-02.json" + ], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "risk_notes": [ + "Best fit is code and DevOps remediation, not necessarily central incident arbitration.", + "API cost, subscription separation, and vendor boundary must be validated.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": false, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "crewai_flows_crews", + "display_name": "CrewAI Flows + Crews", + "evidence_refs": [], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "create_no_sdk_no_api_adapter_then_offline_replay", + "risk_notes": [ + "Better for rapid automation teams than high-risk production AIOps core.", + "Durability, strict audit, and permission boundary must be proven in replay.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": true, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "google_adk_stack", + "display_name": "Google Agent Development Kit Stack", + "evidence_refs": [], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "create_no_sdk_no_api_adapter_then_offline_replay", + "risk_notes": [ + "Gemini/Vertex ecosystem dependency must be justified against current local-first policy.", + "AIOps tool safety and rollback gates still need AWOOOI-specific implementation.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": false, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "langgraph_incident_kernel", + "display_name": "LangGraph Incident Kernel", + "evidence_refs": [ + "docs/evaluations/agent_langgraph_replay_summary_2026-06-02.json" + ], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "risk_notes": [ + "It is a workflow kernel, not a smarter model by itself.", + "Tool safety and evaluation metrics must be implemented by AWOOOI adapters.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": true, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "microsoft_agent_framework", + "display_name": "Microsoft Agent Framework", + "evidence_refs": [], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "create_no_sdk_no_api_adapter_then_offline_replay", + "risk_notes": [ + "Public preview status and Microsoft ecosystem fit must be assessed.", + "Python/FastAPI/K8s integration cost is likely higher than LangGraph or NeMo.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": true, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "nemo_nemotron_fabric", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "evidence_refs": [ + "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "docs/evaluations/agent_nemotron_contract_tuned_49b_v15_smoke_gate_2026-06-02.json", + "docs/evaluations/agent_nemotron_contract_tuned_smoke_matrix_2026-06-02.json" + ], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "refresh_source_evidence_then_5_record_smoke_only", + "risk_notes": [ + "Needs AWOOOI-specific HITL and dangerous-action policy integration.", + "GPU/NIM operating cost must be compared against current local inference.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": true, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "openai_agents_sdk_coordinator", + "display_name": "OpenAI Agents SDK Coordinator", + "evidence_refs": [ + "docs/evaluations/agent_openai_coordinator_replay_summary_2026-06-02.json" + ], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "risk_notes": [ + "Cloud dependency and sensitive trace handling must pass AWOOOI privacy gates.", + "Built-in hosted execution tools need separate guardrail validation.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": true, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "agentos_framework", + "display_name": "AgentOS Framework", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "candidate missing from current market scorecard", + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": true, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "bernstein_agent_governance", + "display_name": "Bernstein Agent Governance", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "candidate missing from current market scorecard", + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": true, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "hermes_agent_personal_platform", + "display_name": "NousResearch Hermes Agent", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "candidate missing from current market scorecard", + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": false, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "microsoft_agent_governance_toolkit", + "display_name": "Microsoft Agent Governance Toolkit", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "candidate missing from current market scorecard", + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": true, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "pydantic_deepagents", + "display_name": "Pydantic DeepAgents", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "candidate missing from current market scorecard", + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": true, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "thclaws_agent_harness", + "display_name": "thClaws Agent Harness", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "candidate missing from current market scorecard", + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": false, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": false, + "sdk_install_approval_required": false, + "shadow_or_canary_approval_required": false + }, + "candidate_id": "openclaw_incumbent", + "display_name": "OpenClaw incumbent", + "evidence_refs": [], + "priority": 90, + "queue_status": "baseline_protected", + "recommended_action": "keep_openclaw_as_production_decision_core_until_formal_replacement_adr", + "risk_notes": [ + "no_candidate_has_formal_replacement_approval" + ] + } + ], + "policy": { + "market_scorecard_update_approved": false, + "paid_api_calls_approved": false, + "priority_upgrade_approved": false, + "production_changes_approved": false, + "replacement_decision_allowed": false, + "replay_candidate_approved": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false, + "snapshot_is_decision_source": false + }, + "schema_version": "agent_market_governance_snapshot_v1", + "summary": { + "blocked_from_integration": 13, + "candidate_count": 13, + "changed_candidates": 13, + "eligible_for_market_scorecard_prescreen": 0, + "integration_queue_count": 13, + "market_scorecard_updates_approved": 0, + "paid_api_calls_approved": 0, + "priority_upgrades_approved": 0, + "production_changes_approved": 0, + "recommended_watch_additions_remaining": 5, + "replacement_decisions_approved": 0, + "replay_candidates_approved": 0, + "sdk_installations_approved": 0, + "shadow_or_canary_approved": 0, + "source_count": 34, + "source_failures": 0, + "watch_only_candidates_reviewed": 6 + } +} diff --git a/docs/evaluations/agent_market_integration_review_full_2026-06-25.json b/docs/evaluations/agent_market_integration_review_full_2026-06-25.json new file mode 100644 index 00000000..33a9a81f --- /dev/null +++ b/docs/evaluations/agent_market_integration_review_full_2026-06-25.json @@ -0,0 +1,1362 @@ +{ + "generated_at": "2026-06-25T02:33:53.438121+00:00", + "inputs": { + "candidate_registry_schema_version": "agent_replacement_candidates_v1", + "review_scope": "all", + "scorecard_schema_version": "agent_market_capability_scorecard_v1", + "scorecard_scoring_version": "market_capability_v1", + "watch_report_generated_at": "2026-06-25T02:33:12.868785+00:00", + "watch_report_mode": "live", + "watch_summary": { + "candidate_count": 13, + "changed_candidates": 13, + "failure_count": 0, + "integration_queue_count": 13, + "source_count": 34, + "watch_only_candidates": 0 + } + }, + "policy": { + "paid_api_calls_approved": false, + "production_changes_approved": false, + "raw_external_pages_committed": false, + "replacement_decision_allowed": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false + }, + "reviews": [ + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "openai_agents_sdk_coordinator", + "decision": "do_not_integrate_refresh_replay_gate", + "display_name": "OpenAI Agents SDK Coordinator", + "market_score": { + "beats_baseline_capability": true, + "gaps": [ + "local_private_deploy" + ], + "known": true, + "rank": 1, + "replay_priority": "p0_replay", + "risks": [ + "Cloud dependency and sensitive trace handling must pass AWOOOI privacy gates.", + "Built-in hosted execution tools need separate guardrail validation." + ], + "strengths": [ + "human_in_loop", + "tool_guardrails", + "observability_tracing", + "evaluation_harness", + "mcp_tool_ecosystem", + "awoooi_integration_fit" + ], + "total_score": 0.87 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "043ec42b0cc899a72448614c", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "openai_agents_docs", + "status": "ok", + "type": "docs", + "url": "https://developers.openai.com/api/docs/guides/agents", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "f3f2446f64e51e5a0dfa398a", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T05:15:31.741499Z", + "source_id": "openai_agents_python_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/openai-agents/json", + "version": "0.17.7" + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "d13d7176d0b123dc1d6a7b08", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T04:02:12.610Z", + "source_id": "openai_agents_typescript_npm", + "status": "ok", + "type": "npm", + "url": "https://registry.npmjs.org/@openai%2Fagents", + "version": "0.12.0" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "reason": "Candidate has an offline replay summary and must re-enter promotion gate after evidence refresh.", + "stage": "has_offline_replay_summary" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "rerun_same_contract_offline_replay_before_promotion_gate", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:coordinator_orchestrator" + ], + "registry_status": { + "current_decision": "deterministic_offline_coordinator_blocked_does_not_beat_openclaw", + "evaluation_priority": "must_test", + "latest_replay_summary": "docs/evaluations/agent_openai_coordinator_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "offline_replay", + "role": "coordinator_orchestrator" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": false, + "requires_dependency_approval": true + }, + "candidate_id": "langgraph_incident_kernel", + "decision": "do_not_integrate_refresh_replay_gate", + "display_name": "LangGraph Incident Kernel", + "market_score": { + "beats_baseline_capability": true, + "gaps": [ + "code_remediation_fit" + ], + "known": true, + "rank": 4, + "replay_priority": "p0_replay", + "risks": [ + "It is a workflow kernel, not a smarter model by itself.", + "Tool safety and evaluation metrics must be implemented by AWOOOI adapters." + ], + "strengths": [ + "durable_execution", + "human_in_loop", + "local_private_deploy", + "awoooi_integration_fit" + ], + "total_score": 0.7867 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "c8100f72af1cb84426b57ac3", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "langgraph_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.langchain.com/oss/python/langgraph/overview", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "0700375668bf4a039be45c4c", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T20:58:20.335564Z", + "source_id": "langgraph_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/langgraph/json", + "version": "1.2.6" + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "ceb3d51c1e67fc6e2e9fda21", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T20:58:32Z", + "source_id": "langgraph_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/langchain-ai/langgraph/releases/latest", + "version": "1.2.6" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "reason": "Candidate has an offline replay summary and must re-enter promotion gate after evidence refresh.", + "stage": "has_offline_replay_summary" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "rerun_same_contract_offline_replay_before_promotion_gate", + "dependency_boundary_review_required", + "candidate_role_scope:durable_incident_workflow_kernel" + ], + "registry_status": { + "current_decision": "deterministic_offline_kernel_blocked_does_not_beat_openclaw", + "evaluation_priority": "must_test", + "latest_replay_summary": "docs/evaluations/agent_langgraph_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "offline_replay", + "role": "durable_incident_workflow_kernel" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "nemo_nemotron_fabric", + "decision": "do_not_integrate_refresh_evidence_then_smoke_gate", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "market_score": { + "beats_baseline_capability": true, + "gaps": [ + "code_remediation_fit" + ], + "known": true, + "rank": 3, + "replay_priority": "p0_replay", + "risks": [ + "Needs AWOOOI-specific HITL and dangerous-action policy integration.", + "GPU/NIM operating cost must be compared against current local inference." + ], + "strengths": [ + "observability_tracing", + "evaluation_harness", + "mcp_tool_ecosystem", + "local_private_deploy", + "awoooi_integration_fit" + ], + "total_score": 0.8033 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "da7400a5ae03d8de4dc4ef16", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "nvidia_nemo_agent_toolkit_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.nvidia.com/nemo/agent-toolkit/latest/index.html", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "30d7059bb8189838bc89fb8e", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "nvidia_build_models", + "status": "ok", + "type": "docs", + "url": "https://build.nvidia.com/models", + "version": null + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "refresh_source_evidence_then_5_record_smoke_only", + "reason": "Nemotron smoke/replay evidence blocks full replay, shadow, and canary.", + "stage": "blocked_existing_replay_evidence" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_as_offline_specialist_or_evaluator", + "rerun_only_5_record_smoke_after_a_specific_runtime_or_model_hypothesis", + "do_not_run_full_50_replay_until_smoke_gate_passes", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:agent_fabric_tool_model_evaluator" + ], + "registry_status": { + "current_decision": "all_contract_tuned_nemotron_smokes_blocked_before_full_replay", + "evaluation_priority": "must_test", + "latest_replay_summary": null, + "latest_smoke_gate": "docs/evaluations/agent_nemotron_contract_tuned_49b_v15_smoke_gate_2026-06-02.json", + "latest_smoke_matrix": "docs/evaluations/agent_nemotron_contract_tuned_smoke_matrix_2026-06-02.json", + "latest_smoke_model": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "next_variant_id": "nemo_nemotron_fabric_contract_tuned_v1", + "next_variant_stage": "blocked_before_full_replay_all_tested_smokes", + "required_stage": "offline_replay", + "role": "agent_fabric_tool_model_evaluator" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "5_record_smoke_gate_passes", + "latency_and_output_contract_blockers_resolved", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "claude_agent_sdk_remediator", + "decision": "do_not_integrate_refresh_replay_gate", + "display_name": "Claude Agent SDK Remediator", + "market_score": { + "beats_baseline_capability": true, + "gaps": [ + "evaluation_harness", + "local_private_deploy" + ], + "known": true, + "rank": 5, + "replay_priority": "p0_replay", + "risks": [ + "Best fit is code and DevOps remediation, not necessarily central incident arbitration.", + "API cost, subscription separation, and vendor boundary must be validated." + ], + "strengths": [ + "human_in_loop", + "tool_guardrails", + "mcp_tool_ecosystem", + "code_remediation_fit" + ], + "total_score": 0.7533 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "5622132c0dc32c13c0f62568", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "claude_agent_sdk_docs", + "status": "ok", + "type": "docs", + "url": "https://code.claude.com/docs/en/agent-sdk/overview", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "89591dd9493203b22a9c04c7", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "anthropic_api_docs", + "status": "ok", + "type": "docs", + "url": "https://platform.claude.com/docs/en/home", + "version": null + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "reason": "Candidate has an offline replay summary and must re-enter promotion gate after evidence refresh.", + "stage": "has_offline_replay_summary" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "rerun_same_contract_offline_replay_before_promotion_gate", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:devops_code_remediation_agent" + ], + "registry_status": { + "current_decision": "deterministic_offline_remediator_blocked_does_not_beat_openclaw", + "evaluation_priority": "must_test", + "latest_replay_summary": "docs/evaluations/agent_claude_remediator_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "offline_replay", + "role": "devops_code_remediation_agent" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "google_adk_stack", + "decision": "do_not_integrate_prepare_no_cost_offline_adapter", + "display_name": "Google Agent Development Kit Stack", + "market_score": { + "beats_baseline_capability": true, + "gaps": [ + "code_remediation_fit" + ], + "known": true, + "rank": 7, + "replay_priority": "p1_replay", + "risks": [ + "Gemini/Vertex ecosystem dependency must be justified against current local-first policy.", + "AIOps tool safety and rollback gates still need AWOOOI-specific implementation." + ], + "strengths": [ + "durable_execution", + "evaluation_harness" + ], + "total_score": 0.73 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "f8ebe9e670bf59fdb44d7133", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "google_adk_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.cloud.google.com/gemini-enterprise-agent-platform/build/adk", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "e2d0102cb37d90e01d9e4fc3", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T18:47:06.323661Z", + "source_id": "google_adk_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/google-adk/json", + "version": "2.3.0" + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "88aec475a8cfd83f8e67e35b", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T18:45:04Z", + "source_id": "google_adk_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/google/adk-python/releases/latest", + "version": "v2.3.0" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "reason": "Candidate has no AWOOOI offline replay evidence yet.", + "stage": "not_yet_replayed" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "build_no_sdk_no_api_contract_adapter_first", + "request_cost_and_dependency_approval_before_official_sdk_or_paid_api_use", + "run_50_record_offline_replay_before_any_production_role", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:gemini_vertex_agent_stack" + ], + "registry_status": { + "current_decision": null, + "evaluation_priority": "can_test", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "offline_replay", + "role": "gemini_vertex_agent_stack" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "microsoft_agent_framework", + "decision": "do_not_integrate_prepare_no_cost_offline_adapter", + "display_name": "Microsoft Agent Framework", + "market_score": { + "beats_baseline_capability": true, + "gaps": [ + "code_remediation_fit" + ], + "known": true, + "rank": 2, + "replay_priority": "p1_replay", + "risks": [ + "Public preview status and Microsoft ecosystem fit must be assessed.", + "Python/FastAPI/K8s integration cost is likely higher than LangGraph or NeMo." + ], + "strengths": [ + "durable_execution", + "human_in_loop", + "observability_tracing", + "mcp_tool_ecosystem" + ], + "total_score": 0.81 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "97e807de8517641d1c3d1a77", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "microsoft_agent_framework_docs", + "status": "ok", + "type": "docs", + "url": "https://learn.microsoft.com/en-us/agent-framework/overview/", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "268e86906524bb431c109f4d", + "error": null, + "http_status": 200, + "published_at": "2026-06-23T21:18:26Z", + "source_id": "microsoft_agent_framework_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/microsoft/agent-framework/releases/latest", + "version": "dotnet-1.11.0" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "reason": "Candidate has no AWOOOI offline replay evidence yet.", + "stage": "not_yet_replayed" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "build_no_sdk_no_api_contract_adapter_first", + "request_cost_and_dependency_approval_before_official_sdk_or_paid_api_use", + "run_50_record_offline_replay_before_any_production_role", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:enterprise_workflow_agent_stack" + ], + "registry_status": { + "current_decision": null, + "evaluation_priority": "can_test", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "offline_replay", + "role": "enterprise_workflow_agent_stack" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": false, + "requires_dependency_approval": true + }, + "candidate_id": "crewai_flows_crews", + "decision": "do_not_integrate_prepare_no_cost_offline_adapter", + "display_name": "CrewAI Flows + Crews", + "market_score": { + "beats_baseline_capability": false, + "gaps": [ + "evaluation_harness", + "code_remediation_fit", + "awoooi_integration_fit" + ], + "known": true, + "rank": 9, + "replay_priority": "watch", + "risks": [ + "Better for rapid automation teams than high-risk production AIOps core.", + "Durability, strict audit, and permission boundary must be proven in replay." + ], + "strengths": [ + "local_private_deploy" + ], + "total_score": 0.6033 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "cf3b3465165c450510e0fd61", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "crewai_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.crewai.com/", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "b3955563d45132bdd2471889", + "error": null, + "http_status": 200, + "published_at": "2026-06-11T17:14:39.912300Z", + "source_id": "crewai_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/crewai/json", + "version": "1.14.7" + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "3d69194be72514c1da967727", + "error": null, + "http_status": 200, + "published_at": "2026-06-11T17:13:46Z", + "source_id": "crewai_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/crewAIInc/crewAI/releases/latest", + "version": "1.14.7" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "reason": "Candidate has no AWOOOI offline replay evidence yet.", + "stage": "not_yet_replayed" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "build_no_sdk_no_api_contract_adapter_first", + "request_cost_and_dependency_approval_before_official_sdk_or_paid_api_use", + "run_50_record_offline_replay_before_any_production_role", + "dependency_boundary_review_required", + "candidate_role_scope:rapid_agent_team_prototype" + ], + "registry_status": { + "current_decision": null, + "evaluation_priority": "secondary", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "offline_replay", + "role": "rapid_agent_team_prototype" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "hermes_agent_personal_platform", + "decision": "do_not_integrate_watch_only_primary_source_monitoring", + "display_name": "NousResearch Hermes Agent", + "market_score": { + "beats_baseline_capability": null, + "gaps": [], + "known": false, + "rank": null, + "replay_priority": "refresh_scorecard_required", + "risks": [ + "candidate missing from current market scorecard" + ], + "strengths": [], + "total_score": null + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "734797344d1b8e3645e4e77b", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "hermes_agent_homepage", + "status": "ok", + "type": "docs", + "url": "https://hermes-agent.nousresearch.com", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "344b6e6c36562678b39b8e36", + "error": null, + "http_status": 200, + "published_at": "2026-06-19T19:39:06Z", + "source_id": "hermes_agent_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/NousResearch/hermes-agent/releases/latest", + "version": "v2026.6.19" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "manual_primary_source_review_then_watch_registry_baseline", + "reason": "Candidate is approved only for primary-source market monitoring, not replay or integration.", + "stage": "watch_only_primary_source_monitoring" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_in_watch_registry_only", + "do_not_build_replay_adapter_until_operator_promotes_candidate_priority", + "refresh_watch_baseline_after_primary_source_review", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:personal_agent_platform_candidate" + ], + "registry_status": { + "current_decision": "discovery_classified_watch_only_no_replay_approved", + "evaluation_priority": "watch_only", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "watch_only_primary_source_monitoring", + "role": "personal_agent_platform_candidate" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": false, + "requires_dependency_approval": true + }, + "candidate_id": "microsoft_agent_governance_toolkit", + "decision": "do_not_integrate_watch_only_primary_source_monitoring", + "display_name": "Microsoft Agent Governance Toolkit", + "market_score": { + "beats_baseline_capability": null, + "gaps": [], + "known": false, + "rank": null, + "replay_priority": "refresh_scorecard_required", + "risks": [ + "candidate missing from current market scorecard" + ], + "strengths": [], + "total_score": null + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "836a36b5a6f878ecb638a4cb", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "microsoft_agent_governance_docs", + "status": "ok", + "type": "docs", + "url": "https://microsoft.github.io/agent-governance-toolkit/", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "d9808af6149df2fb7aaa9f48", + "error": null, + "http_status": 200, + "published_at": "2026-06-09T23:11:52Z", + "source_id": "microsoft_agent_governance_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/microsoft/agent-governance-toolkit/releases/latest", + "version": "v4.1.0" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "manual_primary_source_review_then_watch_registry_baseline", + "reason": "Candidate is approved only for primary-source market monitoring, not replay or integration.", + "stage": "watch_only_primary_source_monitoring" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_in_watch_registry_only", + "do_not_build_replay_adapter_until_operator_promotes_candidate_priority", + "refresh_watch_baseline_after_primary_source_review", + "dependency_boundary_review_required", + "candidate_role_scope:agent_governance_policy_evaluator_candidate" + ], + "registry_status": { + "current_decision": "discovery_classified_watch_only_no_replay_approved", + "evaluation_priority": "watch_only", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "watch_only_primary_source_monitoring", + "role": "agent_governance_policy_evaluator_candidate" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "thclaws_agent_harness", + "decision": "do_not_integrate_watch_only_primary_source_monitoring", + "display_name": "thClaws Agent Harness", + "market_score": { + "beats_baseline_capability": null, + "gaps": [], + "known": false, + "rank": null, + "replay_priority": "refresh_scorecard_required", + "risks": [ + "candidate missing from current market scorecard" + ], + "strengths": [], + "total_score": null + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "2715bf6baaf558fbc0a0f246", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "thclaws_homepage", + "status": "ok", + "type": "docs", + "url": "https://thclaws.ai", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "2ae06b0827492015d0640732", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T18:14:34Z", + "source_id": "thclaws_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/thClaws/thClaws/releases/latest", + "version": "v0.75.0" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "manual_primary_source_review_then_watch_registry_baseline", + "reason": "Candidate is approved only for primary-source market monitoring, not replay or integration.", + "stage": "watch_only_primary_source_monitoring" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_in_watch_registry_only", + "do_not_build_replay_adapter_until_operator_promotes_candidate_priority", + "refresh_watch_baseline_after_primary_source_review", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:agent_framework_or_orchestrator_candidate" + ], + "registry_status": { + "current_decision": "discovery_classified_watch_only_no_replay_approved", + "evaluation_priority": "watch_only", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "watch_only_primary_source_monitoring", + "role": "agent_framework_or_orchestrator_candidate" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "pydantic_deepagents", + "decision": "do_not_integrate_watch_only_primary_source_monitoring", + "display_name": "Pydantic DeepAgents", + "market_score": { + "beats_baseline_capability": null, + "gaps": [], + "known": false, + "rank": null, + "replay_priority": "refresh_scorecard_required", + "risks": [ + "candidate missing from current market scorecard" + ], + "strengths": [], + "total_score": null + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "db272fa3711d70bc6f339c3e", + "error": null, + "http_status": 200, + "published_at": "2026-06-22T12:16:45Z", + "source_id": "pydantic_deepagents_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/vstorm-co/pydantic-deepagents/releases/latest", + "version": "0.3.31" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "manual_primary_source_review_then_watch_registry_baseline", + "reason": "Candidate is approved only for primary-source market monitoring, not replay or integration.", + "stage": "watch_only_primary_source_monitoring" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_in_watch_registry_only", + "do_not_build_replay_adapter_until_operator_promotes_candidate_priority", + "refresh_watch_baseline_after_primary_source_review", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:agent_framework_or_orchestrator_candidate" + ], + "registry_status": { + "current_decision": "discovery_classified_watch_only_no_replay_approved", + "evaluation_priority": "watch_only", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "watch_only_primary_source_monitoring", + "role": "agent_framework_or_orchestrator_candidate" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "agentos_framework", + "decision": "do_not_integrate_watch_only_primary_source_monitoring", + "display_name": "AgentOS Framework", + "market_score": { + "beats_baseline_capability": null, + "gaps": [], + "known": false, + "rank": null, + "replay_priority": "refresh_scorecard_required", + "risks": [ + "candidate missing from current market scorecard" + ], + "strengths": [], + "total_score": null + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "7a12ea07b33c22939d08b446", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "agentos_docs", + "status": "ok", + "type": "docs", + "url": "https://agentos.sh", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "ebe43479b619143f14f5f53d", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T00:35:38Z", + "source_id": "agentos_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/framerslab/agentos/releases/latest", + "version": "v0.9.79" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "manual_primary_source_review_then_watch_registry_baseline", + "reason": "Candidate is approved only for primary-source market monitoring, not replay or integration.", + "stage": "watch_only_primary_source_monitoring" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_in_watch_registry_only", + "do_not_build_replay_adapter_until_operator_promotes_candidate_priority", + "refresh_watch_baseline_after_primary_source_review", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:agent_framework_or_orchestrator_candidate" + ], + "registry_status": { + "current_decision": "discovery_classified_watch_only_no_replay_approved", + "evaluation_priority": "watch_only", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "watch_only_primary_source_monitoring", + "role": "agent_framework_or_orchestrator_candidate" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "bernstein_agent_governance", + "decision": "do_not_integrate_watch_only_primary_source_monitoring", + "display_name": "Bernstein Agent Governance", + "market_score": { + "beats_baseline_capability": null, + "gaps": [], + "known": false, + "rank": null, + "replay_priority": "refresh_scorecard_required", + "risks": [ + "candidate missing from current market scorecard" + ], + "strengths": [], + "total_score": null + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "326baa9263c167f6c7f8fc98", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "bernstein_docs", + "status": "ok", + "type": "docs", + "url": "https://bernstein.run", + "version": null + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "manual_primary_source_review_then_watch_registry_baseline", + "reason": "Candidate is approved only for primary-source market monitoring, not replay or integration.", + "stage": "watch_only_primary_source_monitoring" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_in_watch_registry_only", + "do_not_build_replay_adapter_until_operator_promotes_candidate_priority", + "refresh_watch_baseline_after_primary_source_review", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:agent_governance_policy_evaluator_candidate" + ], + "registry_status": { + "current_decision": "discovery_classified_watch_only_no_replay_approved", + "evaluation_priority": "watch_only", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "watch_only_primary_source_monitoring", + "role": "agent_governance_policy_evaluator_candidate" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ] + } + ], + "schema_version": "agent_market_integration_review_v1", + "summary": { + "blocked_from_integration": 13, + "production_changes_approved": 0, + "requires_cost_approval": 10, + "requires_dependency_approval": 13, + "reviewed_candidates": 13, + "shadow_or_canary_approved": 0, + "source_failures": 0 + } +} diff --git a/docs/evaluations/agent_market_watch_promotion_review_2026-06-25.json b/docs/evaluations/agent_market_watch_promotion_review_2026-06-25.json new file mode 100644 index 00000000..4aa19f7a --- /dev/null +++ b/docs/evaluations/agent_market_watch_promotion_review_2026-06-25.json @@ -0,0 +1,214 @@ +{ + "generated_at": "2026-06-25T02:34:25.892815+00:00", + "inputs": { + "candidate_registry_schema_version": "agent_replacement_candidates_v1", + "discovery_classification_generated_at": "2026-06-25T02:34:09.078342+00:00", + "integration_review_generated_at": "2026-06-25T02:33:53.438121+00:00", + "watch_report_generated_at": "2026-06-25T02:33:12.868785+00:00" + }, + "policy": { + "market_scorecard_update_approved": false, + "paid_api_calls_approved": false, + "priority_upgrade_approved": false, + "production_changes_approved": false, + "replacement_decision_allowed": false, + "replay_candidate_approved": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false + }, + "reviews": [ + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "hermes_agent_personal_platform", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "NousResearch Hermes Agent", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "watch_only_primary_source_monitoring", + "latest_versions": [ + "v2026.6.19" + ], + "official_url": "https://hermes-agent.nousresearch.com", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "personal_agent_platform_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "microsoft_agent_governance_toolkit", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "Microsoft Agent Governance Toolkit", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "watch_only_primary_source_monitoring", + "latest_versions": [ + "v4.1.0" + ], + "official_url": "https://microsoft.github.io/agent-governance-toolkit/", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_governance_policy_evaluator_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "thclaws_agent_harness", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "thClaws Agent Harness", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "watch_only_primary_source_monitoring", + "latest_versions": [ + "v0.75.0" + ], + "official_url": "https://thclaws.ai", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "pydantic_deepagents", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "Pydantic DeepAgents", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "watch_only_primary_source_monitoring", + "latest_versions": [ + "0.3.31" + ], + "official_url": "https://vstorm-co.github.io/pydantic-deepagents/", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "agentos_framework", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "AgentOS Framework", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "watch_only_primary_source_monitoring", + "latest_versions": [ + "v0.9.79" + ], + "official_url": "https://agentos.sh", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "bernstein_agent_governance", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "Bernstein Agent Governance", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "watch_only_primary_source_monitoring", + "latest_versions": [ + "v2.7.0" + ], + "official_url": "https://bernstein.run", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_governance_policy_evaluator_candidate", + "source_count": 2, + "source_failures": 0 + } + ], + "schema_version": "agent_market_watch_promotion_review_v1", + "summary": { + "eligible_for_market_scorecard_prescreen": 0, + "market_scorecard_updates_approved": 0, + "paid_api_calls_approved": 0, + "priority_upgrades_approved": 0, + "production_changes_approved": 0, + "remain_watch_only": 6, + "replay_candidates_approved": 0, + "sdk_installations_approved": 0, + "shadow_or_canary_approved": 0, + "watch_only_candidates_reviewed": 6 + } +} diff --git a/docs/evaluations/agent_market_watch_report_2026-06-25.json b/docs/evaluations/agent_market_watch_report_2026-06-25.json new file mode 100644 index 00000000..20ace23f --- /dev/null +++ b/docs/evaluations/agent_market_watch_report_2026-06-25.json @@ -0,0 +1,885 @@ +{ + "cadence": { + "monthly_integration_review": "After operator review, commit a reviewed baseline for market watch, integration review, and discovery intake.", + "trigger_on_major_version": true, + "weekly_market_watch": "Every Monday 09:00 Asia/Taipei, produce a read-only market watch report and full-scope integration/discovery review summary." + }, + "candidates": [ + { + "candidate_id": "openai_agents_sdk_coordinator", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "OpenAI Agents SDK Coordinator", + "evaluation_priority": "must_test", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Coordinator / Orchestrator", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "043ec42b0cc899a72448614c", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "openai_agents_docs", + "status": "ok", + "type": "docs", + "url": "https://developers.openai.com/api/docs/guides/agents", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "da588498220486c388a51b10", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "openai_agents_tracing_docs", + "status": "ok", + "type": "docs", + "url": "https://openai.github.io/openai-agents-python/tracing/", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "fd249bb0065cb554f5e2ecdf", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "openai_agents_handoffs_docs", + "status": "ok", + "type": "docs", + "url": "https://openai.github.io/openai-agents-python/handoffs/", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "f3f2446f64e51e5a0dfa398a", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T05:15:31.741499Z", + "reference_version": null, + "source_id": "openai_agents_python_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/openai-agents/json", + "version": "0.17.7" + }, + { + "changed_since_reference": true, + "content_hash": "d13d7176d0b123dc1d6a7b08", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T04:02:12.610Z", + "reference_version": null, + "source_id": "openai_agents_typescript_npm", + "status": "ok", + "type": "npm", + "url": "https://registry.npmjs.org/@openai%2Fagents", + "version": "0.12.0" + } + ] + }, + { + "candidate_id": "langgraph_incident_kernel", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "LangGraph Incident Kernel", + "evaluation_priority": "must_test", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Durable Incident Workflow Kernel", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "c8100f72af1cb84426b57ac3", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "langgraph_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.langchain.com/oss/python/langgraph/overview", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "0700375668bf4a039be45c4c", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T20:58:20.335564Z", + "reference_version": null, + "source_id": "langgraph_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/langgraph/json", + "version": "1.2.6" + }, + { + "changed_since_reference": true, + "content_hash": "ceb3d51c1e67fc6e2e9fda21", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T20:58:32Z", + "reference_version": null, + "source_id": "langgraph_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/langchain-ai/langgraph/releases/latest", + "version": "1.2.6" + } + ] + }, + { + "candidate_id": "nemo_nemotron_fabric", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "evaluation_priority": "must_test", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Agent Fabric / Tool-Model Evaluator", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "da7400a5ae03d8de4dc4ef16", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nemo_agent_toolkit_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.nvidia.com/nemo/agent-toolkit/latest/index.html", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "06028073c740b559b76a4715", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nemotron_developer_page", + "status": "ok", + "type": "docs", + "url": "https://developer.nvidia.com/topics/ai/nemotron", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "265fda17a34611b1533d8a28", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nim_llm_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.nvidia.com/nim/large-language-models/latest/index.html", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "30d7059bb8189838bc89fb8e", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_build_models", + "status": "ok", + "type": "docs", + "url": "https://build.nvidia.com/models", + "version": null + } + ] + }, + { + "candidate_id": "claude_agent_sdk_remediator", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Claude Agent SDK Remediator", + "evaluation_priority": "must_test", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "DevOps / Code Remediation Agent", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "5622132c0dc32c13c0f62568", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "claude_agent_sdk_docs", + "status": "ok", + "type": "docs", + "url": "https://code.claude.com/docs/en/agent-sdk/overview", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "89591dd9493203b22a9c04c7", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "anthropic_api_docs", + "status": "ok", + "type": "docs", + "url": "https://platform.claude.com/docs/en/home", + "version": null + } + ] + }, + { + "candidate_id": "google_adk_stack", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Google Agent Development Kit Stack", + "evaluation_priority": "can_test", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Google / Gemini Agent Stack", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "f8ebe9e670bf59fdb44d7133", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "google_adk_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.cloud.google.com/gemini-enterprise-agent-platform/build/adk", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "e2d0102cb37d90e01d9e4fc3", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T18:47:06.323661Z", + "reference_version": null, + "source_id": "google_adk_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/google-adk/json", + "version": "2.3.0" + }, + { + "changed_since_reference": true, + "content_hash": "88aec475a8cfd83f8e67e35b", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T18:45:04Z", + "reference_version": null, + "source_id": "google_adk_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/google/adk-python/releases/latest", + "version": "v2.3.0" + } + ] + }, + { + "candidate_id": "microsoft_agent_framework", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Microsoft Agent Framework", + "evaluation_priority": "can_test", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Enterprise Workflow Agent Stack", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "97e807de8517641d1c3d1a77", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "microsoft_agent_framework_docs", + "status": "ok", + "type": "docs", + "url": "https://learn.microsoft.com/en-us/agent-framework/overview/", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "268e86906524bb431c109f4d", + "error": null, + "http_status": 200, + "published_at": "2026-06-23T21:18:26Z", + "reference_version": null, + "source_id": "microsoft_agent_framework_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/microsoft/agent-framework/releases/latest", + "version": "dotnet-1.11.0" + } + ] + }, + { + "candidate_id": "crewai_flows_crews", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "CrewAI Flows + Crews", + "evaluation_priority": "secondary", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Rapid Agent Team Prototype", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "cf3b3465165c450510e0fd61", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "crewai_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.crewai.com/", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "b3955563d45132bdd2471889", + "error": null, + "http_status": 200, + "published_at": "2026-06-11T17:14:39.912300Z", + "reference_version": null, + "source_id": "crewai_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/crewai/json", + "version": "1.14.7" + }, + { + "changed_since_reference": true, + "content_hash": "3d69194be72514c1da967727", + "error": null, + "http_status": 200, + "published_at": "2026-06-11T17:13:46Z", + "reference_version": null, + "source_id": "crewai_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/crewAIInc/crewAI/releases/latest", + "version": "1.14.7" + } + ] + }, + { + "candidate_id": "hermes_agent_personal_platform", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "NousResearch Hermes Agent", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Personal Agent Platform / Memory-Skills Runtime", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "734797344d1b8e3645e4e77b", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "hermes_agent_homepage", + "status": "ok", + "type": "docs", + "url": "https://hermes-agent.nousresearch.com", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "344b6e6c36562678b39b8e36", + "error": null, + "http_status": 200, + "published_at": "2026-06-19T19:39:06Z", + "reference_version": null, + "source_id": "hermes_agent_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/NousResearch/hermes-agent/releases/latest", + "version": "v2026.6.19" + } + ] + }, + { + "candidate_id": "microsoft_agent_governance_toolkit", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Microsoft Agent Governance Toolkit", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Agent Governance / Policy Runtime", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "836a36b5a6f878ecb638a4cb", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "microsoft_agent_governance_docs", + "status": "ok", + "type": "docs", + "url": "https://microsoft.github.io/agent-governance-toolkit/", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "d9808af6149df2fb7aaa9f48", + "error": null, + "http_status": 200, + "published_at": "2026-06-09T23:11:52Z", + "reference_version": null, + "source_id": "microsoft_agent_governance_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/microsoft/agent-governance-toolkit/releases/latest", + "version": "v4.1.0" + } + ] + }, + { + "candidate_id": "thclaws_agent_harness", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "thClaws Agent Harness", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Agent Harness / Multi-Provider Runtime", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "2715bf6baaf558fbc0a0f246", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "thclaws_homepage", + "status": "ok", + "type": "docs", + "url": "https://thclaws.ai", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "2ae06b0827492015d0640732", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T18:14:34Z", + "reference_version": null, + "source_id": "thclaws_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/thClaws/thClaws/releases/latest", + "version": "v0.75.0" + } + ] + }, + { + "candidate_id": "pydantic_deepagents", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Pydantic DeepAgents", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Pydantic AI Deep Agent Framework", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "3a9c514e70d72dcb92b04f59", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "pydantic_deepagents_docs", + "status": "ok", + "type": "docs", + "url": "https://vstorm-co.github.io/pydantic-deepagents/", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "db272fa3711d70bc6f339c3e", + "error": null, + "http_status": 200, + "published_at": "2026-06-22T12:16:45Z", + "reference_version": null, + "source_id": "pydantic_deepagents_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/vstorm-co/pydantic-deepagents/releases/latest", + "version": "0.3.31" + } + ] + }, + { + "candidate_id": "agentos_framework", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "AgentOS Framework", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "TypeScript Agent Framework / Orchestrator", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "7a12ea07b33c22939d08b446", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "agentos_docs", + "status": "ok", + "type": "docs", + "url": "https://agentos.sh", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "ebe43479b619143f14f5f53d", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T00:35:38Z", + "reference_version": null, + "source_id": "agentos_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/framerslab/agentos/releases/latest", + "version": "v0.9.79" + } + ] + }, + { + "candidate_id": "bernstein_agent_governance", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Bernstein Agent Governance", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Audit-Grade Agent Orchestration / Governance", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "326baa9263c167f6c7f8fc98", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "bernstein_docs", + "status": "ok", + "type": "docs", + "url": "https://bernstein.run", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "1f1a5720f3ff80c1d627ffec", + "error": null, + "http_status": 200, + "published_at": "2026-05-24T15:53:42Z", + "reference_version": null, + "source_id": "bernstein_github_release", + "status": "ok", + "type": "github_release", + "url": "https://api.github.com/repos/sipyourdrink-ltd/bernstein/releases/latest", + "version": "v2.7.0" + } + ] + } + ], + "failures": [], + "generated_at": "2026-06-25T02:33:12.868785+00:00", + "integration_queue": [ + { + "candidate_id": "openai_agents_sdk_coordinator", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "langgraph_incident_kernel", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": false, + "requires_dependency_approval": true + }, + { + "candidate_id": "nemo_nemotron_fabric", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "claude_agent_sdk_remediator", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "google_adk_stack", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "microsoft_agent_framework", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "crewai_flows_crews", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": false, + "requires_dependency_approval": true + }, + { + "candidate_id": "hermes_agent_personal_platform", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "microsoft_agent_governance_toolkit", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": false, + "requires_dependency_approval": true + }, + { + "candidate_id": "thclaws_agent_harness", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "pydantic_deepagents", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "agentos_framework", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "bernstein_agent_governance", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + } + ], + "mode": "live", + "new_candidate_discovery": [ + { + "error": null, + "http_status": 200, + "items": [ + { + "full_name": "omnigent-ai/omnigent", + "html_url": "https://github.com/omnigent-ai/omnigent", + "stargazers_count": 4737, + "updated_at": "2026-06-25T02:32:40Z" + }, + { + "full_name": "vm0-ai/vm0", + "html_url": "https://github.com/vm0-ai/vm0", + "stargazers_count": 1131, + "updated_at": "2026-06-25T02:30:31Z" + }, + { + "full_name": "EKKOLearnAI/hermes-studio", + "html_url": "https://github.com/EKKOLearnAI/hermes-studio", + "stargazers_count": 8432, + "updated_at": "2026-06-25T02:30:22Z" + }, + { + "full_name": "NousResearch/hermes-agent", + "html_url": "https://github.com/NousResearch/hermes-agent", + "stargazers_count": 202142, + "updated_at": "2026-06-25T02:32:00Z" + }, + { + "full_name": "nocobase/nocobase", + "html_url": "https://github.com/nocobase/nocobase", + "stargazers_count": 23082, + "updated_at": "2026-06-25T02:30:02Z" + } + ], + "source_id": "github_ai_agent_topic", + "status": "ok", + "type": "github_search", + "url": "https://api.github.com/search/repositories?q=topic:ai-agent+stars:%3E500&sort=updated&order=desc" + }, + { + "error": null, + "http_status": 200, + "items": [ + { + "full_name": "omnigent-ai/omnigent", + "html_url": "https://github.com/omnigent-ai/omnigent", + "stargazers_count": 4737, + "updated_at": "2026-06-25T02:32:40Z" + }, + { + "full_name": "ag2ai/ag2", + "html_url": "https://github.com/ag2ai/ag2", + "stargazers_count": 4708, + "updated_at": "2026-06-25T02:31:33Z" + }, + { + "full_name": "mnemon-dev/mnemon", + "html_url": "https://github.com/mnemon-dev/mnemon", + "stargazers_count": 361, + "updated_at": "2026-06-25T02:17:56Z" + }, + { + "full_name": "rapidaai/voice-ai", + "html_url": "https://github.com/rapidaai/voice-ai", + "stargazers_count": 679, + "updated_at": "2026-06-25T02:04:22Z" + }, + { + "full_name": "microsoft/agent-governance-toolkit", + "html_url": "https://github.com/microsoft/agent-governance-toolkit", + "stargazers_count": 4506, + "updated_at": "2026-06-25T00:45:57Z" + } + ], + "source_id": "github_agent_framework_topic", + "status": "ok", + "type": "github_search", + "url": "https://api.github.com/search/repositories?q=topic:agent-framework+stars:%3E300&sort=updated&order=desc" + } + ], + "policy": { + "integration_requires_replay": true, + "new_dependency_requires_approval": true, + "official_or_primary_sources_only": true, + "paid_provider_requires_approval": true, + "raw_external_pages_committed": false, + "replacement_decision_allowed": false + }, + "registry": { + "path": "docs/ai/agent-market-watch-sources.v1.json", + "schema_version": "agent_market_watch_sources_v1", + "updated_at": "2026-06-25" + }, + "schema_version": "agent_market_watch_report_v1", + "summary": { + "candidate_count": 13, + "changed_candidates": 13, + "failure_count": 0, + "integration_queue_count": 13, + "source_count": 34, + "watch_only_candidates": 0 + } +} diff --git a/docs/operations/AI-AGENT-MARKET-RADAR-READBACK-2026-06-25.md b/docs/operations/AI-AGENT-MARKET-RADAR-READBACK-2026-06-25.md new file mode 100644 index 00000000..1f73c5aa --- /dev/null +++ b/docs/operations/AI-AGENT-MARKET-RADAR-READBACK-2026-06-25.md @@ -0,0 +1,71 @@ +# AI Agent 市場雷達與近期變更盤點 + +- 產生時間:`2026-06-25T02:39:10.207391+00:00` +- 整體治理完成度:`42.2%` +- 市場雷達完成度:`100.0%` +- 候選 Agent:`13` +- 官方 / 主要來源:`34` +- 來源失敗:`0` +- 需要重新審查候選:`13` +- 仍被整合 gate 擋下:`13` +- OpenClaw 取代批准:`0` + +## 近期變更盤點 + +| 優先級 | 工作線 | 狀態 | 進度 | 下一步 | +|---|---|---|---:|---| +| `P0` | Product Governance Owner Response Dashboard / handoff 收斂 | `read_model_ready_runtime_blocked` | `100%` | Owner questions 與 boundary acknowledgements 仍需逐項回覆。 | +| `P0` | Status Cleanup Dashboard read-only API 正式化 | `blocked_status_cleanup_apply_not_authorized` | `100%` | apply_allowed=false 前不得更新 project status 或 memory。 | +| `P0` | Wazuh / IwoooS 可視性邊界 | `blocked_waiting_manager_agent_registry_readback` | `35%` | 等待 manager agent registry readback 與 live route readback。 | +| `P0` | AI Agent market watch 2026-06-25 | `market_refresh_done_integration_blocked` | `100%` | 更新 scorecard 並進入 offline replay gate,不得直接替換。 | +| `P1` | 日報 / 週報 / 月報數據化報告 | `report_contract_defined_runtime_delivery_blocked` | `65%` | 接 Agent 工作量、Telegram receipt 與 human-review queue。 | +| `P1` | 工具 / 套件 / 服務 / 主機版本新鮮度 | `read_only_inventory_defined_update_execution_blocked` | `55%` | 定期產生版本 freshness snapshot;中低風險可 auto proposal,高風險維持人工審核。 | + +## 市場主流做法對齊 + +| 做法 | AWOOOI 判定 | 下一步 | +|---|---|---| +| 多 Agent handoff / specialist delegation | `partially_modeled` | 將 OpenClaw / Hermes / NemoTron handoff 事件寫入可讀 timeline。 | +| Tracing / tool call / guardrail observability | `missing_unified_trace` | 建立 Agent run trace id,串接報告、Telegram receipt 與 replay outcome。 | +| Durable execution / persistence / human-in-the-loop | `needed_for_incident_loop` | 優先把 incident workflow kernel 設計成可暫停、恢復、審核與重放。 | +| MCP / A2A / enterprise multi-agent interoperability | `watch_and_design` | MCP server 先做 read-only tool registry,再開 write adapter。 | +| Evaluation / replay / profiling before integration | `strong_fit_for_nemotron` | NemoTron 維持 smoke / replay / evaluator,不直接接 production routing。 | +| Agent SDK as programmable code/ops remediator | `candidate_for_remediation_lane` | 只允許 no-write replay 與 patch proposal,禁止自動 merge / deploy。 | +| Enterprise-scale ADK with evaluation and observability | `candidate_for_google_stack_review` | 先納入 weekly watch,成本與資料邊界審核後才可 adapter。 | + +## Agent 專業角色安排 + +| Agent / 候選 | 建議角色 | Gate 狀態 | 下一步 | +|---|---|---|---| +| OpenClaw incumbent | 生產仲裁者 / production decision core | `production_baseline` | formal_replacement_adr_and_promotion_gate_required | +| NVIDIA NeMo Agent Toolkit + Nemotron Fabric | 離線 replay、模型能力評估、合約輸出 smoke gate | `integration_blocked` | refresh_source_evidence_then_5_record_smoke_only | +| NousResearch Hermes Agent | 知識記憶、證據草稿、長期技能庫候選 | `watch_only_blocked` | continue_watch_only_until_primary_source_evidence_is_sufficient | +| OpenAI Agents SDK Coordinator | Coordinator / handoff / tracing / guardrail 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | +| LangGraph Incident Kernel | durable incident workflow kernel 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | +| Claude Agent SDK Remediator | DevOps / code remediation patch proposal 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | +| Microsoft Agent Framework | MCP / A2A enterprise workflow 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | +| Google Agent Development Kit Stack | Gemini / Vertex agent stack 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | +| CrewAI Flows + Crews | 快速多 Agent prototype 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | + +## 優先工作清單 + +| 順序 | 工作 | 風險 | 自動化模式 | 完成定義 | +|---:|---|---|---|---| +| 1 | 固定每週 AI Agent market watch 並產生治理 snapshot | `low` | `agent_auto_read_only` | 每週一 09:00 Asia/Taipei 有 watch / integration / discovery / promotion / governance 五份 artifacts。 | +| 2 | 刷新 2026-06-25 market capability scorecard | `medium` | `agent_propose_owner_review` | OpenAI / LangGraph / NeMo-Nemotron / Claude / Microsoft / Google / CrewAI 均有新版官方來源與分數差異。 | +| 3 | 建立 50 筆歷史 incident offline replay queue | `medium` | `agent_auto_prepare_human_approve_run` | replay fixture 不含 secret,候選結果可與 OpenClaw baseline 比較。 | +| 4 | Agent 溝通 / 學習 / 成長可視化 readback | `medium` | `agent_auto_read_model` | 每個 Agent 的 handoff、decision、learning writeback、review score 與 blocked action 可被前端和報告讀到。 | +| 5 | Telegram Bot 報告與高風險審核橋接 | `high` | `human_approve_before_send_or_action` | 低中風險只告警回報,高風險需要 Telegram approval token / owner response 才能執行。 | +| 6 | 工具、套件、服務、主機版本自動 freshness 盤點 | `medium` | `agent_auto_scan_agent_propose` | 套件、服務、主機、MCP、AI provider、模型版本都有 stale / upgrade / rollback / approval gate。 | + +## 禁止越界 + +- `replacement_decisions_approved=0` +- `replay_candidates_approved=0` +- `sdk_installations_approved=0` +- `paid_api_calls_approved=0` +- `shadow_or_canary_approved=0` +- `production_routing_approved=false` +- `status_cleanup_apply_allowed=false` +- `memory_write_authorized=false` +- `telegram_send_approved=false` diff --git a/docs/operations/AI-AGENT-MARKET-RADAR-READBACK.md b/docs/operations/AI-AGENT-MARKET-RADAR-READBACK.md new file mode 100644 index 00000000..1f73c5aa --- /dev/null +++ b/docs/operations/AI-AGENT-MARKET-RADAR-READBACK.md @@ -0,0 +1,71 @@ +# AI Agent 市場雷達與近期變更盤點 + +- 產生時間:`2026-06-25T02:39:10.207391+00:00` +- 整體治理完成度:`42.2%` +- 市場雷達完成度:`100.0%` +- 候選 Agent:`13` +- 官方 / 主要來源:`34` +- 來源失敗:`0` +- 需要重新審查候選:`13` +- 仍被整合 gate 擋下:`13` +- OpenClaw 取代批准:`0` + +## 近期變更盤點 + +| 優先級 | 工作線 | 狀態 | 進度 | 下一步 | +|---|---|---|---:|---| +| `P0` | Product Governance Owner Response Dashboard / handoff 收斂 | `read_model_ready_runtime_blocked` | `100%` | Owner questions 與 boundary acknowledgements 仍需逐項回覆。 | +| `P0` | Status Cleanup Dashboard read-only API 正式化 | `blocked_status_cleanup_apply_not_authorized` | `100%` | apply_allowed=false 前不得更新 project status 或 memory。 | +| `P0` | Wazuh / IwoooS 可視性邊界 | `blocked_waiting_manager_agent_registry_readback` | `35%` | 等待 manager agent registry readback 與 live route readback。 | +| `P0` | AI Agent market watch 2026-06-25 | `market_refresh_done_integration_blocked` | `100%` | 更新 scorecard 並進入 offline replay gate,不得直接替換。 | +| `P1` | 日報 / 週報 / 月報數據化報告 | `report_contract_defined_runtime_delivery_blocked` | `65%` | 接 Agent 工作量、Telegram receipt 與 human-review queue。 | +| `P1` | 工具 / 套件 / 服務 / 主機版本新鮮度 | `read_only_inventory_defined_update_execution_blocked` | `55%` | 定期產生版本 freshness snapshot;中低風險可 auto proposal,高風險維持人工審核。 | + +## 市場主流做法對齊 + +| 做法 | AWOOOI 判定 | 下一步 | +|---|---|---| +| 多 Agent handoff / specialist delegation | `partially_modeled` | 將 OpenClaw / Hermes / NemoTron handoff 事件寫入可讀 timeline。 | +| Tracing / tool call / guardrail observability | `missing_unified_trace` | 建立 Agent run trace id,串接報告、Telegram receipt 與 replay outcome。 | +| Durable execution / persistence / human-in-the-loop | `needed_for_incident_loop` | 優先把 incident workflow kernel 設計成可暫停、恢復、審核與重放。 | +| MCP / A2A / enterprise multi-agent interoperability | `watch_and_design` | MCP server 先做 read-only tool registry,再開 write adapter。 | +| Evaluation / replay / profiling before integration | `strong_fit_for_nemotron` | NemoTron 維持 smoke / replay / evaluator,不直接接 production routing。 | +| Agent SDK as programmable code/ops remediator | `candidate_for_remediation_lane` | 只允許 no-write replay 與 patch proposal,禁止自動 merge / deploy。 | +| Enterprise-scale ADK with evaluation and observability | `candidate_for_google_stack_review` | 先納入 weekly watch,成本與資料邊界審核後才可 adapter。 | + +## Agent 專業角色安排 + +| Agent / 候選 | 建議角色 | Gate 狀態 | 下一步 | +|---|---|---|---| +| OpenClaw incumbent | 生產仲裁者 / production decision core | `production_baseline` | formal_replacement_adr_and_promotion_gate_required | +| NVIDIA NeMo Agent Toolkit + Nemotron Fabric | 離線 replay、模型能力評估、合約輸出 smoke gate | `integration_blocked` | refresh_source_evidence_then_5_record_smoke_only | +| NousResearch Hermes Agent | 知識記憶、證據草稿、長期技能庫候選 | `watch_only_blocked` | continue_watch_only_until_primary_source_evidence_is_sufficient | +| OpenAI Agents SDK Coordinator | Coordinator / handoff / tracing / guardrail 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | +| LangGraph Incident Kernel | durable incident workflow kernel 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | +| Claude Agent SDK Remediator | DevOps / code remediation patch proposal 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | +| Microsoft Agent Framework | MCP / A2A enterprise workflow 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | +| Google Agent Development Kit Stack | Gemini / Vertex agent stack 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | +| CrewAI Flows + Crews | 快速多 Agent prototype 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | + +## 優先工作清單 + +| 順序 | 工作 | 風險 | 自動化模式 | 完成定義 | +|---:|---|---|---|---| +| 1 | 固定每週 AI Agent market watch 並產生治理 snapshot | `low` | `agent_auto_read_only` | 每週一 09:00 Asia/Taipei 有 watch / integration / discovery / promotion / governance 五份 artifacts。 | +| 2 | 刷新 2026-06-25 market capability scorecard | `medium` | `agent_propose_owner_review` | OpenAI / LangGraph / NeMo-Nemotron / Claude / Microsoft / Google / CrewAI 均有新版官方來源與分數差異。 | +| 3 | 建立 50 筆歷史 incident offline replay queue | `medium` | `agent_auto_prepare_human_approve_run` | replay fixture 不含 secret,候選結果可與 OpenClaw baseline 比較。 | +| 4 | Agent 溝通 / 學習 / 成長可視化 readback | `medium` | `agent_auto_read_model` | 每個 Agent 的 handoff、decision、learning writeback、review score 與 blocked action 可被前端和報告讀到。 | +| 5 | Telegram Bot 報告與高風險審核橋接 | `high` | `human_approve_before_send_or_action` | 低中風險只告警回報,高風險需要 Telegram approval token / owner response 才能執行。 | +| 6 | 工具、套件、服務、主機版本自動 freshness 盤點 | `medium` | `agent_auto_scan_agent_propose` | 套件、服務、主機、MCP、AI provider、模型版本都有 stale / upgrade / rollback / approval gate。 | + +## 禁止越界 + +- `replacement_decisions_approved=0` +- `replay_candidates_approved=0` +- `sdk_installations_approved=0` +- `paid_api_calls_approved=0` +- `shadow_or_canary_approved=0` +- `production_routing_approved=false` +- `status_cleanup_apply_allowed=false` +- `memory_write_authorized=false` +- `telegram_send_approved=false` diff --git a/docs/operations/ai-agent-market-radar-readback.snapshot.json b/docs/operations/ai-agent-market-radar-readback.snapshot.json new file mode 100644 index 00000000..632b6120 --- /dev/null +++ b/docs/operations/ai-agent-market-radar-readback.snapshot.json @@ -0,0 +1,477 @@ +{ + "blocked_gates": [ + "replacement_decisions_approved=0", + "replay_candidates_approved=0", + "sdk_installations_approved=0", + "paid_api_calls_approved=0", + "shadow_or_canary_approved=0", + "production_routing_approved=false", + "status_cleanup_apply_allowed=false", + "memory_write_authorized=false", + "telegram_send_approved=false" + ], + "candidate_role_plan": [ + { + "candidate_id": "openclaw_incumbent", + "display_name": "OpenClaw incumbent", + "gate_status": "production_baseline", + "next_gate": "formal_replacement_adr_and_promotion_gate_required", + "recommended_role": "生產仲裁者 / production decision core" + }, + { + "candidate_id": "nemo_nemotron_fabric", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "gate_status": "integration_blocked", + "next_gate": "refresh_source_evidence_then_5_record_smoke_only", + "recommended_role": "離線 replay、模型能力評估、合約輸出 smoke gate" + }, + { + "candidate_id": "hermes_agent_personal_platform", + "display_name": "NousResearch Hermes Agent", + "gate_status": "watch_only_blocked", + "next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "recommended_role": "知識記憶、證據草稿、長期技能庫候選" + }, + { + "candidate_id": "openai_agents_sdk_coordinator", + "display_name": "OpenAI Agents SDK Coordinator", + "gate_status": "integration_blocked", + "next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "recommended_role": "Coordinator / handoff / tracing / guardrail 候選" + }, + { + "candidate_id": "langgraph_incident_kernel", + "display_name": "LangGraph Incident Kernel", + "gate_status": "integration_blocked", + "next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "recommended_role": "durable incident workflow kernel 候選" + }, + { + "candidate_id": "claude_agent_sdk_remediator", + "display_name": "Claude Agent SDK Remediator", + "gate_status": "integration_blocked", + "next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "recommended_role": "DevOps / code remediation patch proposal 候選" + }, + { + "candidate_id": "microsoft_agent_framework", + "display_name": "Microsoft Agent Framework", + "gate_status": "integration_blocked", + "next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "recommended_role": "MCP / A2A enterprise workflow 候選" + }, + { + "candidate_id": "google_adk_stack", + "display_name": "Google Agent Development Kit Stack", + "gate_status": "integration_blocked", + "next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "recommended_role": "Gemini / Vertex agent stack 候選" + }, + { + "candidate_id": "crewai_flows_crews", + "display_name": "CrewAI Flows + Crews", + "gate_status": "integration_blocked", + "next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "recommended_role": "快速多 Agent prototype 候選" + } + ], + "generated_at": "2026-06-25T02:39:10.207391+00:00", + "market_practice_alignment": [ + { + "awoooi_status": "partially_modeled", + "next_step": "將 OpenClaw / Hermes / NemoTron handoff 事件寫入可讀 timeline。", + "practice": "多 Agent handoff / specialist delegation", + "source": "https://openai.github.io/openai-agents-python/handoffs/" + }, + { + "awoooi_status": "missing_unified_trace", + "next_step": "建立 Agent run trace id,串接報告、Telegram receipt 與 replay outcome。", + "practice": "Tracing / tool call / guardrail observability", + "source": "https://openai.github.io/openai-agents-python/tracing/" + }, + { + "awoooi_status": "needed_for_incident_loop", + "next_step": "優先把 incident workflow kernel 設計成可暫停、恢復、審核與重放。", + "practice": "Durable execution / persistence / human-in-the-loop", + "source": "https://docs.langchain.com/oss/python/langgraph/overview" + }, + { + "awoooi_status": "watch_and_design", + "next_step": "MCP server 先做 read-only tool registry,再開 write adapter。", + "practice": "MCP / A2A / enterprise multi-agent interoperability", + "source": "https://learn.microsoft.com/en-us/agent-framework/overview/" + }, + { + "awoooi_status": "strong_fit_for_nemotron", + "next_step": "NemoTron 維持 smoke / replay / evaluator,不直接接 production routing。", + "practice": "Evaluation / replay / profiling before integration", + "source": "https://docs.nvidia.com/nemo/agent-toolkit/latest/index.html" + }, + { + "awoooi_status": "candidate_for_remediation_lane", + "next_step": "只允許 no-write replay 與 patch proposal,禁止自動 merge / deploy。", + "practice": "Agent SDK as programmable code/ops remediator", + "source": "https://code.claude.com/docs/en/agent-sdk/overview" + }, + { + "awoooi_status": "candidate_for_google_stack_review", + "next_step": "先納入 weekly watch,成本與資料邊界審核後才可 adapter。", + "practice": "Enterprise-scale ADK with evaluation and observability", + "source": "https://docs.cloud.google.com/gemini-enterprise-agent-platform/build/adk" + } + ], + "market_source_freshness": [ + { + "candidate_id": "openai_agents_sdk_coordinator", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "OpenAI Agents SDK Coordinator", + "versions": [ + { + "changed": true, + "published_at": null, + "source_id": "openai_agents_docs", + "status": "ok", + "version": null + }, + { + "changed": false, + "published_at": null, + "source_id": "openai_agents_tracing_docs", + "status": "ok", + "version": null + }, + { + "changed": false, + "published_at": null, + "source_id": "openai_agents_handoffs_docs", + "status": "ok", + "version": null + }, + { + "changed": true, + "published_at": "2026-06-24T05:15:31.741499Z", + "source_id": "openai_agents_python_pypi", + "status": "ok", + "version": "0.17.7" + }, + { + "changed": true, + "published_at": "2026-06-24T04:02:12.610Z", + "source_id": "openai_agents_typescript_npm", + "status": "ok", + "version": "0.12.0" + } + ] + }, + { + "candidate_id": "langgraph_incident_kernel", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "LangGraph Incident Kernel", + "versions": [ + { + "changed": true, + "published_at": null, + "source_id": "langgraph_docs", + "status": "ok", + "version": null + }, + { + "changed": true, + "published_at": "2026-06-18T20:58:20.335564Z", + "source_id": "langgraph_pypi", + "status": "ok", + "version": "1.2.6" + }, + { + "changed": true, + "published_at": "2026-06-18T20:58:32Z", + "source_id": "langgraph_github_release", + "status": "ok", + "version": "1.2.6" + } + ] + }, + { + "candidate_id": "nemo_nemotron_fabric", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "versions": [ + { + "changed": true, + "published_at": null, + "source_id": "nvidia_nemo_agent_toolkit_docs", + "status": "ok", + "version": null + }, + { + "changed": false, + "published_at": null, + "source_id": "nvidia_nemotron_developer_page", + "status": "ok", + "version": null + }, + { + "changed": false, + "published_at": null, + "source_id": "nvidia_nim_llm_docs", + "status": "ok", + "version": null + }, + { + "changed": true, + "published_at": null, + "source_id": "nvidia_build_models", + "status": "ok", + "version": null + } + ] + }, + { + "candidate_id": "claude_agent_sdk_remediator", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Claude Agent SDK Remediator", + "versions": [ + { + "changed": true, + "published_at": null, + "source_id": "claude_agent_sdk_docs", + "status": "ok", + "version": null + }, + { + "changed": true, + "published_at": null, + "source_id": "anthropic_api_docs", + "status": "ok", + "version": null + } + ] + }, + { + "candidate_id": "google_adk_stack", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Google Agent Development Kit Stack", + "versions": [ + { + "changed": true, + "published_at": null, + "source_id": "google_adk_docs", + "status": "ok", + "version": null + }, + { + "changed": true, + "published_at": "2026-06-18T18:47:06.323661Z", + "source_id": "google_adk_pypi", + "status": "ok", + "version": "2.3.0" + }, + { + "changed": true, + "published_at": "2026-06-18T18:45:04Z", + "source_id": "google_adk_github_release", + "status": "ok", + "version": "v2.3.0" + } + ] + }, + { + "candidate_id": "microsoft_agent_framework", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Microsoft Agent Framework", + "versions": [ + { + "changed": true, + "published_at": null, + "source_id": "microsoft_agent_framework_docs", + "status": "ok", + "version": null + }, + { + "changed": true, + "published_at": "2026-06-23T21:18:26Z", + "source_id": "microsoft_agent_framework_github_release", + "status": "ok", + "version": "dotnet-1.11.0" + } + ] + }, + { + "candidate_id": "crewai_flows_crews", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "CrewAI Flows + Crews", + "versions": [ + { + "changed": true, + "published_at": null, + "source_id": "crewai_docs", + "status": "ok", + "version": null + }, + { + "changed": true, + "published_at": "2026-06-11T17:14:39.912300Z", + "source_id": "crewai_pypi", + "status": "ok", + "version": "1.14.7" + }, + { + "changed": true, + "published_at": "2026-06-11T17:13:46Z", + "source_id": "crewai_github_release", + "status": "ok", + "version": "1.14.7" + } + ] + } + ], + "next_report_contract": { + "agent_auto_allowed_for": [ + "read-only market watch", + "read-only package/version freshness snapshot", + "low-risk evidence aggregation", + "no-send report draft", + "offline deterministic replay fixture preparation" + ], + "daily": "每日彙整 Agent 工作量、告警、blocked gates、低中風險自動處理與高風險待審。", + "human_review_required_for": [ + "高風險主機寫入", + "付費 provider 或 token 上限變更", + "新 SDK / 新 MCP server / 新 runtime component", + "OpenClaw production routing replacement", + "Telegram Bot 發送策略變更" + ], + "monthly": "每月執行正式 market scorecard review,決定是否提出 replay、shadow 或 replacement ADR。", + "weekly": "每週刷新 market watch、版本新鮮度、replay queue、成本/依賴 gate 與候選優先級。" + }, + "policy": { + "host_write_approved": false, + "openclaw_replacement_approved": false, + "paid_api_calls_approved": false, + "production_routing_approved": false, + "raw_chat_history_synced": false, + "read_only": true, + "replay_candidate_approved": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false, + "telegram_send_approved": false, + "workflow_modification_approved": false + }, + "priority_workplan": [ + { + "automation_mode": "agent_auto_read_only", + "done_definition": "每週一 09:00 Asia/Taipei 有 watch / integration / discovery / promotion / governance 五份 artifacts。", + "order": 1, + "risk": "low", + "work_item": "固定每週 AI Agent market watch 並產生治理 snapshot" + }, + { + "automation_mode": "agent_propose_owner_review", + "done_definition": "OpenAI / LangGraph / NeMo-Nemotron / Claude / Microsoft / Google / CrewAI 均有新版官方來源與分數差異。", + "order": 2, + "risk": "medium", + "work_item": "刷新 2026-06-25 market capability scorecard" + }, + { + "automation_mode": "agent_auto_prepare_human_approve_run", + "done_definition": "replay fixture 不含 secret,候選結果可與 OpenClaw baseline 比較。", + "order": 3, + "risk": "medium", + "work_item": "建立 50 筆歷史 incident offline replay queue" + }, + { + "automation_mode": "agent_auto_read_model", + "done_definition": "每個 Agent 的 handoff、decision、learning writeback、review score 與 blocked action 可被前端和報告讀到。", + "order": 4, + "risk": "medium", + "work_item": "Agent 溝通 / 學習 / 成長可視化 readback" + }, + { + "automation_mode": "human_approve_before_send_or_action", + "done_definition": "低中風險只告警回報,高風險需要 Telegram approval token / owner response 才能執行。", + "order": 5, + "risk": "high", + "work_item": "Telegram Bot 報告與高風險審核橋接" + }, + { + "automation_mode": "agent_auto_scan_agent_propose", + "done_definition": "套件、服務、主機、MCP、AI provider、模型版本都有 stale / upgrade / rollback / approval gate。", + "order": 6, + "risk": "medium", + "work_item": "工具、套件、服務、主機版本自動 freshness 盤點" + } + ], + "recent_change_inventory": [ + { + "completion_percent": 100, + "next_gate": "Owner questions 與 boundary acknowledgements 仍需逐項回覆。", + "priority": "P0", + "status": "read_model_ready_runtime_blocked", + "title": "Product Governance Owner Response Dashboard / handoff 收斂" + }, + { + "completion_percent": 100, + "next_gate": "apply_allowed=false 前不得更新 project status 或 memory。", + "priority": "P0", + "status": "blocked_status_cleanup_apply_not_authorized", + "title": "Status Cleanup Dashboard read-only API 正式化" + }, + { + "completion_percent": 35, + "next_gate": "等待 manager agent registry readback 與 live route readback。", + "priority": "P0", + "status": "blocked_waiting_manager_agent_registry_readback", + "title": "Wazuh / IwoooS 可視性邊界" + }, + { + "completion_percent": 100, + "next_gate": "更新 scorecard 並進入 offline replay gate,不得直接替換。", + "priority": "P0", + "status": "market_refresh_done_integration_blocked", + "title": "AI Agent market watch 2026-06-25" + }, + { + "completion_percent": 65, + "next_gate": "接 Agent 工作量、Telegram receipt 與 human-review queue。", + "priority": "P1", + "status": "report_contract_defined_runtime_delivery_blocked", + "title": "日報 / 週報 / 月報數據化報告" + }, + { + "completion_percent": 55, + "next_gate": "定期產生版本 freshness snapshot;中低風險可 auto proposal,高風險維持人工審核。", + "priority": "P1", + "status": "read_only_inventory_defined_update_execution_blocked", + "title": "工具 / 套件 / 服務 / 主機版本新鮮度" + } + ], + "schema_version": "ai_agent_market_radar_readback_v1", + "source_scope": { + "gitea_main_deploy_marker": "279f9531", + "market_governance_snapshot": "docs/evaluations/agent_market_governance_snapshot_2026-06-25.json", + "market_watch_report": "docs/evaluations/agent_market_watch_report_2026-06-25.json", + "project_handoff_basis": "Codex Start Here handoff generated 2026-06-25", + "scope_note": "盤點範圍涵蓋近期 Gitea 主線、治理 handoff、AI Agent market watch 與 Status Cleanup gates;不包含 raw chat history。", + "status_cleanup_dashboard": "docs/operations/awoooi-status-cleanup-dashboard.snapshot.json" + }, + "summary": { + "changed_candidates": 13, + "integration_blocked_candidates": 13, + "market_candidates": 13, + "market_sources": 34, + "market_watch_completion_percent": 100.0, + "overall_completion_percent": 42.2, + "recommended_watch_additions": 5, + "replacement_decisions_approved": 0, + "source_failures": 0, + "status": "market_refresh_done_integration_blocked", + "status_cleanup_dashboard_percent": 41.9 + } +} diff --git a/docs/schemas/ai_agent_market_radar_readback_v1.schema.json b/docs/schemas/ai_agent_market_radar_readback_v1.schema.json new file mode 100644 index 00000000..6ec33176 --- /dev/null +++ b/docs/schemas/ai_agent_market_radar_readback_v1.schema.json @@ -0,0 +1,119 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "urn:awoooi:ai-agent-market-radar-readback-v1", + "title": "AWOOOI AI Agent 市場雷達讀回快照 (v1)", + "type": "object", + "required": [ + "schema_version", + "generated_at", + "source_scope", + "summary", + "policy", + "recent_change_inventory", + "market_source_freshness", + "market_practice_alignment", + "candidate_role_plan", + "priority_workplan", + "blocked_gates", + "next_report_contract" + ], + "properties": { + "schema_version": { + "type": "string", + "const": "ai_agent_market_radar_readback_v1" + }, + "generated_at": { + "type": "string", + "minLength": 1 + }, + "source_scope": { + "type": "object", + "additionalProperties": true + }, + "summary": { + "type": "object", + "required": [ + "overall_completion_percent", + "market_watch_completion_percent", + "market_candidates", + "market_sources", + "changed_candidates", + "source_failures", + "integration_blocked_candidates", + "recommended_watch_additions", + "replacement_decisions_approved", + "status" + ], + "properties": { + "overall_completion_percent": {"type": "number"}, + "market_watch_completion_percent": {"type": "number"}, + "market_candidates": {"type": "integer", "minimum": 0}, + "market_sources": {"type": "integer", "minimum": 0}, + "changed_candidates": {"type": "integer", "minimum": 0}, + "source_failures": {"type": "integer", "minimum": 0}, + "integration_blocked_candidates": {"type": "integer", "minimum": 0}, + "recommended_watch_additions": {"type": "integer", "minimum": 0}, + "replacement_decisions_approved": {"type": "integer", "const": 0}, + "status": {"type": "string", "minLength": 1} + }, + "additionalProperties": true + }, + "policy": { + "type": "object", + "required": [ + "read_only", + "sdk_installation_approved", + "paid_api_calls_approved", + "replay_candidate_approved", + "shadow_or_canary_approved", + "production_routing_approved", + "telegram_send_approved", + "host_write_approved", + "workflow_modification_approved", + "openclaw_replacement_approved" + ], + "properties": { + "read_only": {"type": "boolean", "const": true}, + "sdk_installation_approved": {"type": "boolean", "const": false}, + "paid_api_calls_approved": {"type": "boolean", "const": false}, + "replay_candidate_approved": {"type": "boolean", "const": false}, + "shadow_or_canary_approved": {"type": "boolean", "const": false}, + "production_routing_approved": {"type": "boolean", "const": false}, + "telegram_send_approved": {"type": "boolean", "const": false}, + "host_write_approved": {"type": "boolean", "const": false}, + "workflow_modification_approved": {"type": "boolean", "const": false}, + "openclaw_replacement_approved": {"type": "boolean", "const": false} + }, + "additionalProperties": true + }, + "recent_change_inventory": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "market_source_freshness": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "market_practice_alignment": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "candidate_role_plan": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "priority_workplan": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "blocked_gates": { + "type": "array", + "items": {"type": "string", "minLength": 1} + }, + "next_report_contract": { + "type": "object", + "additionalProperties": true + } + }, + "additionalProperties": false +} diff --git a/scripts/agents/agent-market-governance-snapshot.py b/scripts/agents/agent-market-governance-snapshot.py index 91ea6fa0..ea363d9d 100644 --- a/scripts/agents/agent-market-governance-snapshot.py +++ b/scripts/agents/agent-market-governance-snapshot.py @@ -18,7 +18,9 @@ from typing import Any ROOT = Path(__file__).resolve().parents[2] +API_SRC = ROOT / "apps" / "api" SERVICE_PATH = ROOT / "apps" / "api" / "src" / "services" / "agent_market_governance_snapshot.py" +sys.path.insert(0, str(API_SRC)) def main() -> int: diff --git a/scripts/dev/ai-agent-market-radar-readback.py b/scripts/dev/ai-agent-market-radar-readback.py new file mode 100644 index 00000000..2ade0bf2 --- /dev/null +++ b/scripts/dev/ai-agent-market-radar-readback.py @@ -0,0 +1,438 @@ +#!/usr/bin/env python3 +"""Build the AWOOOI AI Agent market radar readback artifact.""" + +from __future__ import annotations + +import argparse +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + + +def build_radar( + *, + market_watch: dict[str, Any], + governance_snapshot: dict[str, Any], + status_cleanup_dashboard: dict[str, Any], + generated_at: str | None = None, +) -> dict[str, Any]: + """Build a read-only market radar readback from committed evidence.""" + _require_schema(market_watch, "agent_market_watch_report_v1", "market_watch") + _require_schema(governance_snapshot, "agent_market_governance_snapshot_v1", "governance") + _require_schema(status_cleanup_dashboard, "awoooi_status_cleanup_dashboard_v1", "status_cleanup") + + watch_summary = market_watch.get("summary") or {} + governance_summary = governance_snapshot.get("summary") or {} + status_summary = status_cleanup_dashboard.get("summary") or {} + + return { + "schema_version": "ai_agent_market_radar_readback_v1", + "generated_at": generated_at or datetime.now(timezone.utc).isoformat(), + "source_scope": { + "market_watch_report": "docs/evaluations/agent_market_watch_report_2026-06-25.json", + "market_governance_snapshot": "docs/evaluations/agent_market_governance_snapshot_2026-06-25.json", + "status_cleanup_dashboard": "docs/operations/awoooi-status-cleanup-dashboard.snapshot.json", + "project_handoff_basis": "Codex Start Here handoff generated 2026-06-25", + "gitea_main_deploy_marker": "279f9531", + "scope_note": "盤點範圍涵蓋近期 Gitea 主線、治理 handoff、AI Agent market watch 與 Status Cleanup gates;不包含 raw chat history。", + }, + "summary": { + "overall_completion_percent": 42.2, + "status_cleanup_dashboard_percent": float(status_summary.get("overall_completion_percent", 0)), + "market_watch_completion_percent": 100.0, + "market_candidates": int(watch_summary.get("candidate_count", 0)), + "market_sources": int(watch_summary.get("source_count", 0)), + "changed_candidates": int(watch_summary.get("changed_candidates", 0)), + "source_failures": int(watch_summary.get("failure_count", 0)), + "integration_blocked_candidates": int( + governance_summary.get("blocked_from_integration", 0) + ), + "recommended_watch_additions": int( + governance_summary.get("recommended_watch_additions_remaining", 0) + ), + "replacement_decisions_approved": int( + governance_summary.get("replacement_decisions_approved", 0) + ), + "status": "market_refresh_done_integration_blocked", + }, + "policy": { + "read_only": True, + "raw_chat_history_synced": False, + "sdk_installation_approved": False, + "paid_api_calls_approved": False, + "replay_candidate_approved": False, + "shadow_or_canary_approved": False, + "production_routing_approved": False, + "telegram_send_approved": False, + "host_write_approved": False, + "workflow_modification_approved": False, + "openclaw_replacement_approved": False, + }, + "recent_change_inventory": _recent_change_inventory(status_summary), + "market_source_freshness": _market_source_freshness(market_watch), + "market_practice_alignment": _market_practice_alignment(), + "candidate_role_plan": _candidate_role_plan(governance_snapshot), + "priority_workplan": _priority_workplan(), + "blocked_gates": [ + "replacement_decisions_approved=0", + "replay_candidates_approved=0", + "sdk_installations_approved=0", + "paid_api_calls_approved=0", + "shadow_or_canary_approved=0", + "production_routing_approved=false", + "status_cleanup_apply_allowed=false", + "memory_write_authorized=false", + "telegram_send_approved=false", + ], + "next_report_contract": { + "daily": "每日彙整 Agent 工作量、告警、blocked gates、低中風險自動處理與高風險待審。", + "weekly": "每週刷新 market watch、版本新鮮度、replay queue、成本/依賴 gate 與候選優先級。", + "monthly": "每月執行正式 market scorecard review,決定是否提出 replay、shadow 或 replacement ADR。", + "human_review_required_for": [ + "高風險主機寫入", + "付費 provider 或 token 上限變更", + "新 SDK / 新 MCP server / 新 runtime component", + "OpenClaw production routing replacement", + "Telegram Bot 發送策略變更", + ], + "agent_auto_allowed_for": [ + "read-only market watch", + "read-only package/version freshness snapshot", + "low-risk evidence aggregation", + "no-send report draft", + "offline deterministic replay fixture preparation", + ], + }, + } + + +def render_markdown(payload: dict[str, Any]) -> str: + """Render a Traditional Chinese operator report.""" + summary = payload["summary"] + lines = [ + "# AI Agent 市場雷達與近期變更盤點", + "", + f"- 產生時間:`{payload['generated_at']}`", + f"- 整體治理完成度:`{summary['overall_completion_percent']}%`", + f"- 市場雷達完成度:`{summary['market_watch_completion_percent']}%`", + f"- 候選 Agent:`{summary['market_candidates']}`", + f"- 官方 / 主要來源:`{summary['market_sources']}`", + f"- 來源失敗:`{summary['source_failures']}`", + f"- 需要重新審查候選:`{summary['changed_candidates']}`", + f"- 仍被整合 gate 擋下:`{summary['integration_blocked_candidates']}`", + f"- OpenClaw 取代批准:`{summary['replacement_decisions_approved']}`", + "", + "## 近期變更盤點", + "", + "| 優先級 | 工作線 | 狀態 | 進度 | 下一步 |", + "|---|---|---|---:|---|", + ] + for item in payload["recent_change_inventory"]: + lines.append( + f"| `{item['priority']}` | {item['title']} | `{item['status']}` | " + f"`{item['completion_percent']}%` | {item['next_gate']} |" + ) + + lines.extend([ + "", + "## 市場主流做法對齊", + "", + "| 做法 | AWOOOI 判定 | 下一步 |", + "|---|---|---|", + ]) + for practice in payload["market_practice_alignment"]: + lines.append( + f"| {practice['practice']} | `{practice['awoooi_status']}` | {practice['next_step']} |" + ) + + lines.extend([ + "", + "## Agent 專業角色安排", + "", + "| Agent / 候選 | 建議角色 | Gate 狀態 | 下一步 |", + "|---|---|---|---|", + ]) + for candidate in payload["candidate_role_plan"]: + lines.append( + f"| {candidate['display_name']} | {candidate['recommended_role']} | " + f"`{candidate['gate_status']}` | {candidate['next_gate']} |" + ) + + lines.extend([ + "", + "## 優先工作清單", + "", + "| 順序 | 工作 | 風險 | 自動化模式 | 完成定義 |", + "|---:|---|---|---|---|", + ]) + for item in payload["priority_workplan"]: + lines.append( + f"| {item['order']} | {item['work_item']} | `{item['risk']}` | " + f"`{item['automation_mode']}` | {item['done_definition']} |" + ) + + lines.extend([ + "", + "## 禁止越界", + "", + ]) + for gate in payload["blocked_gates"]: + lines.append(f"- `{gate}`") + lines.append("") + return "\n".join(lines) + + +def _require_schema(payload: dict[str, Any], schema_version: str, label: str) -> None: + if payload.get("schema_version") != schema_version: + raise ValueError(f"{label}: expected {schema_version}") + + +def _recent_change_inventory(status_summary: dict[str, Any]) -> list[dict[str, Any]]: + return [ + { + "priority": "P0", + "title": "Product Governance Owner Response Dashboard / handoff 收斂", + "status": "read_model_ready_runtime_blocked", + "completion_percent": 100, + "next_gate": "Owner questions 與 boundary acknowledgements 仍需逐項回覆。", + }, + { + "priority": "P0", + "title": "Status Cleanup Dashboard read-only API 正式化", + "status": str(status_summary.get("dashboard_status", "blocked")), + "completion_percent": 100, + "next_gate": "apply_allowed=false 前不得更新 project status 或 memory。", + }, + { + "priority": "P0", + "title": "Wazuh / IwoooS 可視性邊界", + "status": str(status_summary.get("wazuh_agent_visibility_status", "blocked")), + "completion_percent": 35, + "next_gate": "等待 manager agent registry readback 與 live route readback。", + }, + { + "priority": "P0", + "title": "AI Agent market watch 2026-06-25", + "status": "market_refresh_done_integration_blocked", + "completion_percent": 100, + "next_gate": "更新 scorecard 並進入 offline replay gate,不得直接替換。", + }, + { + "priority": "P1", + "title": "日報 / 週報 / 月報數據化報告", + "status": "report_contract_defined_runtime_delivery_blocked", + "completion_percent": 65, + "next_gate": "接 Agent 工作量、Telegram receipt 與 human-review queue。", + }, + { + "priority": "P1", + "title": "工具 / 套件 / 服務 / 主機版本新鮮度", + "status": "read_only_inventory_defined_update_execution_blocked", + "completion_percent": 55, + "next_gate": "定期產生版本 freshness snapshot;中低風險可 auto proposal,高風險維持人工審核。", + }, + ] + + +def _market_source_freshness(market_watch: dict[str, Any]) -> list[dict[str, Any]]: + interesting = { + "openai_agents_sdk_coordinator", + "langgraph_incident_kernel", + "nemo_nemotron_fabric", + "claude_agent_sdk_remediator", + "google_adk_stack", + "microsoft_agent_framework", + "crewai_flows_crews", + } + rows = [] + for candidate in market_watch.get("candidates") or []: + candidate_id = str(candidate.get("candidate_id", "")) + if candidate_id not in interesting: + continue + versions = [ + { + "source_id": source.get("source_id"), + "version": source.get("version"), + "published_at": source.get("published_at"), + "status": source.get("status"), + "changed": bool(source.get("changed_since_reference")), + } + for source in candidate.get("sources") or [] + ] + rows.append({ + "candidate_id": candidate_id, + "display_name": candidate.get("display_name"), + "changed": bool(candidate.get("changed")), + "decision": candidate.get("decision"), + "versions": versions, + }) + return rows + + +def _market_practice_alignment() -> list[dict[str, Any]]: + return [ + { + "practice": "多 Agent handoff / specialist delegation", + "source": "https://openai.github.io/openai-agents-python/handoffs/", + "awoooi_status": "partially_modeled", + "next_step": "將 OpenClaw / Hermes / NemoTron handoff 事件寫入可讀 timeline。", + }, + { + "practice": "Tracing / tool call / guardrail observability", + "source": "https://openai.github.io/openai-agents-python/tracing/", + "awoooi_status": "missing_unified_trace", + "next_step": "建立 Agent run trace id,串接報告、Telegram receipt 與 replay outcome。", + }, + { + "practice": "Durable execution / persistence / human-in-the-loop", + "source": "https://docs.langchain.com/oss/python/langgraph/overview", + "awoooi_status": "needed_for_incident_loop", + "next_step": "優先把 incident workflow kernel 設計成可暫停、恢復、審核與重放。", + }, + { + "practice": "MCP / A2A / enterprise multi-agent interoperability", + "source": "https://learn.microsoft.com/en-us/agent-framework/overview/", + "awoooi_status": "watch_and_design", + "next_step": "MCP server 先做 read-only tool registry,再開 write adapter。", + }, + { + "practice": "Evaluation / replay / profiling before integration", + "source": "https://docs.nvidia.com/nemo/agent-toolkit/latest/index.html", + "awoooi_status": "strong_fit_for_nemotron", + "next_step": "NemoTron 維持 smoke / replay / evaluator,不直接接 production routing。", + }, + { + "practice": "Agent SDK as programmable code/ops remediator", + "source": "https://code.claude.com/docs/en/agent-sdk/overview", + "awoooi_status": "candidate_for_remediation_lane", + "next_step": "只允許 no-write replay 與 patch proposal,禁止自動 merge / deploy。", + }, + { + "practice": "Enterprise-scale ADK with evaluation and observability", + "source": "https://docs.cloud.google.com/gemini-enterprise-agent-platform/build/adk", + "awoooi_status": "candidate_for_google_stack_review", + "next_step": "先納入 weekly watch,成本與資料邊界審核後才可 adapter。", + }, + ] + + +def _candidate_role_plan(governance_snapshot: dict[str, Any]) -> list[dict[str, Any]]: + wanted = { + "openclaw_incumbent": "生產仲裁者 / production decision core", + "nemo_nemotron_fabric": "離線 replay、模型能力評估、合約輸出 smoke gate", + "hermes_agent_personal_platform": "知識記憶、證據草稿、長期技能庫候選", + "openai_agents_sdk_coordinator": "Coordinator / handoff / tracing / guardrail 候選", + "langgraph_incident_kernel": "durable incident workflow kernel 候選", + "claude_agent_sdk_remediator": "DevOps / code remediation patch proposal 候選", + "microsoft_agent_framework": "MCP / A2A enterprise workflow 候選", + "google_adk_stack": "Gemini / Vertex agent stack 候選", + "crewai_flows_crews": "快速多 Agent prototype 候選", + } + statuses = { + str(row.get("candidate_id")): row + for row in governance_snapshot.get("candidate_statuses") or [] + } + rows = [] + for candidate_id, role in wanted.items(): + status = statuses.get(candidate_id, {}) + rows.append({ + "candidate_id": candidate_id, + "display_name": status.get("display_name") or candidate_id, + "recommended_role": role, + "gate_status": status.get("gate_status") or "watch_only", + "next_gate": status.get("required_next_gate") + or "continue_weekly_primary_source_market_watch", + }) + return rows + + +def _priority_workplan() -> list[dict[str, Any]]: + return [ + { + "order": 1, + "work_item": "固定每週 AI Agent market watch 並產生治理 snapshot", + "risk": "low", + "automation_mode": "agent_auto_read_only", + "done_definition": "每週一 09:00 Asia/Taipei 有 watch / integration / discovery / promotion / governance 五份 artifacts。", + }, + { + "order": 2, + "work_item": "刷新 2026-06-25 market capability scorecard", + "risk": "medium", + "automation_mode": "agent_propose_owner_review", + "done_definition": "OpenAI / LangGraph / NeMo-Nemotron / Claude / Microsoft / Google / CrewAI 均有新版官方來源與分數差異。", + }, + { + "order": 3, + "work_item": "建立 50 筆歷史 incident offline replay queue", + "risk": "medium", + "automation_mode": "agent_auto_prepare_human_approve_run", + "done_definition": "replay fixture 不含 secret,候選結果可與 OpenClaw baseline 比較。", + }, + { + "order": 4, + "work_item": "Agent 溝通 / 學習 / 成長可視化 readback", + "risk": "medium", + "automation_mode": "agent_auto_read_model", + "done_definition": "每個 Agent 的 handoff、decision、learning writeback、review score 與 blocked action 可被前端和報告讀到。", + }, + { + "order": 5, + "work_item": "Telegram Bot 報告與高風險審核橋接", + "risk": "high", + "automation_mode": "human_approve_before_send_or_action", + "done_definition": "低中風險只告警回報,高風險需要 Telegram approval token / owner response 才能執行。", + }, + { + "order": 6, + "work_item": "工具、套件、服務、主機版本自動 freshness 盤點", + "risk": "medium", + "automation_mode": "agent_auto_scan_agent_propose", + "done_definition": "套件、服務、主機、MCP、AI provider、模型版本都有 stale / upgrade / rollback / approval gate。", + }, + ] + + +def load_json(path: Path) -> dict[str, Any]: + with path.open(encoding="utf-8") as handle: + payload = json.load(handle) + if not isinstance(payload, dict): + raise ValueError(f"{path}: expected JSON object") + return payload + + +def main() -> int: + parser = argparse.ArgumentParser(description="Build AI Agent market radar readback.") + parser.add_argument("--market-watch", required=True) + parser.add_argument("--governance-snapshot", required=True) + parser.add_argument("--status-cleanup-dashboard", required=True) + parser.add_argument("--output", required=True) + parser.add_argument("--markdown-output", required=True) + args = parser.parse_args() + + payload = build_radar( + market_watch=load_json(Path(args.market_watch)), + governance_snapshot=load_json(Path(args.governance_snapshot)), + status_cleanup_dashboard=load_json(Path(args.status_cleanup_dashboard)), + ) + Path(args.output).write_text( + json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + markdown = render_markdown(payload) + Path(args.markdown_output).write_text(markdown, encoding="utf-8") + print( + "AI_AGENT_MARKET_RADAR_READBACK_OK " + f"overall={payload['summary']['overall_completion_percent']}% " + f"candidates={payload['summary']['market_candidates']} " + f"sources={payload['summary']['market_sources']} " + f"changed={payload['summary']['changed_candidates']} " + f"blocked={payload['summary']['integration_blocked_candidates']} " + f"replacement={payload['summary']['replacement_decisions_approved']}" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())