diff --git a/apps/api/src/services/agent_market_watch.py b/apps/api/src/services/agent_market_watch.py index ec94e16c..7705ffd5 100644 --- a/apps/api/src/services/agent_market_watch.py +++ b/apps/api/src/services/agent_market_watch.py @@ -19,7 +19,7 @@ from dataclasses import dataclass from datetime import datetime, timezone from typing import Any from urllib.error import HTTPError, URLError -from urllib.parse import urljoin +from urllib.parse import urljoin, urlparse from urllib.request import Request, urlopen FetchSource = Callable[[str, int], "FetchedSource"] @@ -234,9 +234,24 @@ def _evaluate_source( } fetched = fetcher(url, timeout_seconds) + previous = previous_sources.get((candidate_id, source_id), {}) + if _is_github_rate_limited(url, fetched) and previous: + return { + "source_id": source_id, + "type": source_type, + "url": url, + "status": "carried_forward_rate_limited", + "http_status": fetched.http_status, + "version": previous.get("version"), + "published_at": previous.get("published_at"), + "content_hash": previous.get("content_hash"), + "changed_since_reference": False, + "reference_version": reference_version, + "error": None, + "carried_forward_from_previous": True, + } parsed = _parse_source(source_type, fetched.body) if fetched.body else {} content_hash = _content_hash(fetched.body, source_type) if fetched.body else None - previous = previous_sources.get((candidate_id, source_id), {}) version = parsed.get("version") published_at = parsed.get("published_at") changed = _changed_since_reference( @@ -260,6 +275,16 @@ def _evaluate_source( } +def _is_github_rate_limited(url: str, fetched: FetchedSource) -> bool: + if fetched.status != "error" or fetched.http_status != 403: + return False + host = urlparse(url).netloc.lower() + if host != "api.github.com": + return False + body = fetched.body.decode("utf-8", errors="ignore").lower() + return "rate limit" in body or "api rate limit exceeded" in body + + def _parse_source(source_type: str, body: bytes) -> dict[str, str | None]: if source_type == "pypi": payload = _loads_json(body) diff --git a/apps/api/tests/test_agent_market_watch.py b/apps/api/tests/test_agent_market_watch.py index 2b9330a8..6b7a1858 100644 --- a/apps/api/tests/test_agent_market_watch.py +++ b/apps/api/tests/test_agent_market_watch.py @@ -333,3 +333,64 @@ def test_versioned_source_ignores_metadata_hash_noise_when_version_is_unchanged( assert report["summary"]["changed_candidates"] == 0 assert report["candidates"][0]["sources"][0]["version"] == "1.2.3" assert report["candidates"][0]["sources"][0]["changed_since_reference"] is False + + +def test_github_rate_limit_carries_forward_previous_source_without_failure(): + registry = { + "schema_version": "agent_market_watch_sources_v1", + "policy": {"replacement_decision_allowed": False}, + "candidates": [ + { + "candidate_id": "rate_limited_candidate", + "display_name": "Rate Limited Candidate", + "sources": [ + { + "source_id": "github_release", + "type": "github_release", + "url": "https://api.github.com/repos/example/project/releases/latest", + } + ], + } + ], + } + previous_report = { + "candidates": [ + { + "candidate_id": "rate_limited_candidate", + "sources": [ + { + "source_id": "github_release", + "version": "v1.2.3", + "published_at": "2026-06-25T00:00:00Z", + "content_hash": "previous-hash", + } + ], + } + ] + } + + def fetcher(_url: str, _timeout: int) -> FetchedSource: + return FetchedSource( + status="error", + http_status=403, + body=b'{"message":"API rate limit exceeded"}', + error="http_403", + ) + + report = run_agent_market_watch( + registry, + registry_path="registry.json", + mode="live", + previous_report=previous_report, + fetcher=fetcher, + generated_at="2026-06-26T00:00:00+00:00", + ) + + source = report["candidates"][0]["sources"][0] + assert report["summary"]["failure_count"] == 0 + assert report["summary"]["changed_candidates"] == 0 + assert source["status"] == "carried_forward_rate_limited" + assert source["version"] == "v1.2.3" + assert source["changed_since_reference"] is False + assert source["error"] is None + assert source["carried_forward_from_previous"] is True diff --git a/apps/api/tests/test_ai_agent_market_radar_readback.py b/apps/api/tests/test_ai_agent_market_radar_readback.py index cd99511b..85a29c95 100644 --- a/apps/api/tests/test_ai_agent_market_radar_readback.py +++ b/apps/api/tests/test_ai_agent_market_radar_readback.py @@ -13,12 +13,13 @@ def test_ai_agent_market_radar_readback_committed_snapshot_is_safe(): assert payload["schema_version"] == "ai_agent_market_radar_readback_v1" assert payload["summary"]["overall_completion_percent"] == 42.2 assert payload["summary"]["market_candidates"] == 13 - assert payload["summary"]["market_sources"] == 34 - assert payload["summary"]["changed_candidates"] == 13 - assert payload["summary"]["integration_blocked_candidates"] == 13 + assert payload["summary"]["market_sources"] == 36 + assert payload["summary"]["changed_candidates"] == 5 + assert payload["summary"]["integration_blocked_candidates"] == 5 + assert payload["summary"]["source_failures"] == 0 assert payload["summary"]["replacement_decisions_approved"] == 0 assert "gitea_main_deploy_marker" not in payload["source_scope"] - assert payload["source_scope"]["gitea_main_evidence_basis_commit"] == "279f9531" + assert payload["source_scope"]["gitea_main_evidence_basis_commit"] == "61cf5024" policy = payload["policy"] assert policy["read_only"] is True @@ -58,4 +59,4 @@ def test_ai_agent_market_radar_readback_contains_market_practice_plan(): } assert candidates["openclaw_incumbent"]["gate_status"] == "production_baseline" assert candidates["nemo_nemotron_fabric"]["gate_status"] == "integration_blocked" - assert candidates["langgraph_incident_kernel"]["gate_status"] == "integration_blocked" + assert candidates["langgraph_incident_kernel"]["gate_status"] == "registered_no_review" diff --git a/apps/api/tests/test_ai_agent_market_radar_readback_api.py b/apps/api/tests/test_ai_agent_market_radar_readback_api.py index 291d308f..a3173844 100644 --- a/apps/api/tests/test_ai_agent_market_radar_readback_api.py +++ b/apps/api/tests/test_ai_agent_market_radar_readback_api.py @@ -20,9 +20,10 @@ def test_ai_agent_market_radar_readback_endpoint_returns_committed_snapshot(): assert data["schema_version"] == "ai_agent_market_radar_readback_v1" assert data["summary"]["overall_completion_percent"] == 42.2 assert data["summary"]["market_candidates"] == 13 - assert data["summary"]["market_sources"] == 34 - assert data["summary"]["changed_candidates"] == 13 - assert data["summary"]["integration_blocked_candidates"] == 13 + assert data["summary"]["market_sources"] == 36 + assert data["summary"]["changed_candidates"] == 5 + assert data["summary"]["integration_blocked_candidates"] == 5 + assert data["summary"]["source_failures"] == 0 assert data["summary"]["replacement_decisions_approved"] == 0 assert data["policy"]["openclaw_replacement_approved"] is False assert data["policy"]["telegram_send_approved"] is False diff --git a/apps/api/tests/test_ai_technology_radar_readback.py b/apps/api/tests/test_ai_technology_radar_readback.py index 145eb1f5..8ca8bfef 100644 --- a/apps/api/tests/test_ai_technology_radar_readback.py +++ b/apps/api/tests/test_ai_technology_radar_readback.py @@ -13,15 +13,17 @@ def test_ai_technology_radar_readback_committed_snapshot_is_safe(): assert payload["schema_version"] == "ai_technology_radar_readback_v1" assert payload["summary"]["overall_completion_percent"] == 42.2 assert payload["summary"]["ai_technology_radar_completion_percent"] == 100.0 - assert payload["summary"]["technology_count"] == 20 + assert payload["summary"]["technology_count"] == 21 assert payload["summary"]["technology_area_count"] == 6 - assert payload["summary"]["source_count"] == 47 + assert payload["summary"]["source_count"] == 52 assert payload["summary"]["source_failures"] == 0 - assert payload["summary"]["high_priority_count"] == 14 + assert payload["summary"]["changed_technologies"] == 5 + assert payload["summary"]["review_queue_count"] == 5 + assert payload["summary"]["high_priority_count"] == 15 assert payload["summary"]["rolling_update_status"] == ( "near_real_time_watch_ready_integration_gated" ) - assert payload["source_scope"]["gitea_main_evidence_basis_commit"] == "683428bd" + assert payload["source_scope"]["gitea_main_evidence_basis_commit"] == "61cf5024" policy = payload["policy"] assert policy["read_only"] is True @@ -59,3 +61,22 @@ def test_ai_technology_radar_readback_contains_roles_and_cadence(): assert payload["report_contract"]["api_endpoint"] == ( "/api/v1/agents/ai-technology-radar-readback" ) + + +def test_ai_technology_radar_readback_contains_market_source_alignment(): + payload = load_latest_ai_technology_radar_readback() + + queue = {row["technology_id"]: row for row in payload["high_priority_review_queue"]} + assert "modelcontextprotocol_sdk" in queue + assert "a2a_protocol" in queue + assert queue["modelcontextprotocol_sdk"]["gate_status"] == ( + "scorecard_required_before_integration" + ) + + alignment_sources = {row["source"] for row in payload["primary_source_alignment"]} + assert "https://modelcontextprotocol.io/specification/2025-06-18" in alignment_sources + assert "https://a2a-protocol.org/latest/" in alignment_sources + assert ( + "https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/" + in alignment_sources + ) diff --git a/apps/api/tests/test_ai_technology_radar_readback_api.py b/apps/api/tests/test_ai_technology_radar_readback_api.py index ddc343b5..aff2d19a 100644 --- a/apps/api/tests/test_ai_technology_radar_readback_api.py +++ b/apps/api/tests/test_ai_technology_radar_readback_api.py @@ -20,9 +20,12 @@ def test_ai_technology_radar_readback_endpoint_returns_committed_snapshot(): assert data["schema_version"] == "ai_technology_radar_readback_v1" assert data["summary"]["overall_completion_percent"] == 42.2 assert data["summary"]["ai_technology_radar_completion_percent"] == 100.0 - assert data["summary"]["technology_count"] == 20 - assert data["summary"]["source_count"] == 47 + assert data["summary"]["technology_count"] == 21 + assert data["summary"]["source_count"] == 52 + assert data["summary"]["changed_technologies"] == 5 + assert data["summary"]["review_queue_count"] == 5 assert data["summary"]["source_failures"] == 0 + assert len(data["primary_source_alignment"]) == 6 assert data["policy"]["read_only"] is True assert data["policy"]["sdk_installation_approved"] is False assert data["policy"]["telegram_send_approved"] is False diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index 7fb74c24..617e7ade 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -3317,9 +3317,21 @@ "nearRealTime": "近即時監控", "reviewQueue": "審核佇列", "reviewQueueValue": "待審 {queue} / 高優先 {high} / 變更 {changed}", + "highPriorityTitle": "高優先市場審核佇列", + "emptyReviewQueue": "目前沒有高優先審核項目", + "primarySourceTitle": "官方 Primary Source 對齊", "domainsTitle": "市場技術領域", "rollingTitle": "日週月報與滾動更新控制", "rolesTitle": "專業 Agent 分工", + "reviewLabels": { + "nextGate": "下一個 Gate", + "reviewNeeds": "審核需求", + "cost": "成本", + "dependency": "依賴", + "security": "資安", + "awoooiGate": "AWOOOI Gate", + "agentAssignment": "Agent 分工" + }, "metrics": { "progress": "雷達完成度", "technologies": "監控技術", diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index 7fb74c24..617e7ade 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -3317,9 +3317,21 @@ "nearRealTime": "近即時監控", "reviewQueue": "審核佇列", "reviewQueueValue": "待審 {queue} / 高優先 {high} / 變更 {changed}", + "highPriorityTitle": "高優先市場審核佇列", + "emptyReviewQueue": "目前沒有高優先審核項目", + "primarySourceTitle": "官方 Primary Source 對齊", "domainsTitle": "市場技術領域", "rollingTitle": "日週月報與滾動更新控制", "rolesTitle": "專業 Agent 分工", + "reviewLabels": { + "nextGate": "下一個 Gate", + "reviewNeeds": "審核需求", + "cost": "成本", + "dependency": "依賴", + "security": "資安", + "awoooiGate": "AWOOOI Gate", + "agentAssignment": "Agent 分工" + }, "metrics": { "progress": "雷達完成度", "technologies": "監控技術", diff --git a/apps/web/src/app/[locale]/governance/tabs/agent-market-tab.tsx b/apps/web/src/app/[locale]/governance/tabs/agent-market-tab.tsx index d9aad99e..bd927742 100644 --- a/apps/web/src/app/[locale]/governance/tabs/agent-market-tab.tsx +++ b/apps/web/src/app/[locale]/governance/tabs/agent-market-tab.tsx @@ -59,6 +59,15 @@ type ProfessionalJudgmentPreset = { question: string escalation: string } +type TechnologyReviewLabels = { + nextGate: string + reviewNeeds: string + cost: string + dependency: string + security: string + awoooiGate: string + agentAssignment: string +} function formatDateTime(value: string): string { const date = new Date(value) @@ -88,6 +97,14 @@ function formatTechnologyArea(value: string): string { return TECHNOLOGY_AREA_LABELS[value] ?? value.replace(/_/g, ' ') } +function formatSourceHost(value: string): string { + try { + return new URL(value).hostname.replace(/^www\./, '') + } catch { + return value + } +} + function normalizeAgentName(value: string): string { const normalized = value.toLowerCase() if (normalized.includes('openclaw')) return 'openclaw' @@ -396,6 +413,84 @@ function ProfessionalAgentRoleCard({ role }: { role: AiTechnologyRadarReadback[' ) } +function HighPriorityReviewCard({ + item, + labels, +}: { + item: AiTechnologyRadarReadback['high_priority_review_queue'][number] + labels: TechnologyReviewLabels +}) { + return ( +
+
+
+ + {item.display_name} + +
+ + +
+
+ +
+ + {item.next_gate} + + +
+ + + +
+
+
+ ) +} + +function PrimarySourceAlignmentCard({ + item, + labels, +}: { + item: AiTechnologyRadarReadback['primary_source_alignment'][number] + labels: TechnologyReviewLabels +}) { + return ( +
+
+ + {item.practice} + + +
+ + + + + {item.agent_assignment} + +
+ ) +} + function ReportCadenceCard({ cadence }: { cadence: AiTechnologyReportCadenceReadback['report_cadences'][number] }) { return (
+ +
+ + + {t('technologyRadar.highPriorityTitle')} + +
+
+ {technologyRadar.high_priority_review_queue.length > 0 ? ( + technologyRadar.high_priority_review_queue.map(item => ( + + )) + ) : ( + + + + )} +
+ +
+ + + {t('technologyRadar.primarySourceTitle')} + +
+
+ {technologyRadar.primary_source_alignment.map(item => ( + + ))} +
@@ -2059,6 +2209,8 @@ export function AgentMarketTab() { .agent-market-kpi-grid, .agent-market-radar-metrics-grid, .agent-market-radar-contract-grid, + .agent-market-radar-review-grid, + .agent-market-radar-source-grid, .agent-market-radar-domain-grid, .agent-market-radar-rolling-grid, .agent-market-radar-role-grid, diff --git a/apps/web/src/lib/api-client.ts b/apps/web/src/lib/api-client.ts index ab6f0025..b1481a19 100644 --- a/apps/web/src/lib/api-client.ts +++ b/apps/web/src/lib/api-client.ts @@ -1134,7 +1134,17 @@ export interface AiTechnologyRadarReadback { changed_count: number representative_technologies: string[] }> - high_priority_review_queue: Array> + high_priority_review_queue: Array<{ + technology_id: string + display_name: string + technology_area: string + evaluation_priority: string + gate_status: string + next_gate: string + requires_cost_approval: boolean + requires_dependency_approval: boolean + requires_security_review: boolean + }> professional_agent_roles: Array<{ agent: string professional_role: string @@ -1148,6 +1158,12 @@ export interface AiTechnologyRadarReadback { output: string gate: string }> + primary_source_alignment: Array<{ + practice: string + source: string + awoooi_gate: string + agent_assignment: string + }> integration_candidates: Array<{ technology_id: string display_name: string diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index c077817d..3835d46e 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -29,6 +29,30 @@ - active runtime gate 仍為 `0`,runtime execution / Ansible apply / service restart / Telegram success send / KM writeback / PlayBook trust writeback 仍全部 `false / 0`。 **邊界**:本段只新增候選升級合約、Telegram 顯示與 callback 回傳;不 SSH、不重啟服務、不執行 Ansible apply、不發 Telegram 測試、不寫 KM、不改 PlayBook trust、不讀 secret、不開 runtime gate。 +## 2026-06-26|P2-412 市場主流 AI Agent / AI 技術定期評估本地完成 + +**背景**:使用者要求調整 OpenClaw / NemoTron 評估規則,不再以身份保護任一 Agent,而是用市場主流 AI Agent、AI 技術、版本與 official primary-source 證據說話;同時要求產品持續監控新的 AI 技術、新版本、新整合與運用方式,並能在治理頁看見 AI Agent 的專業判斷。 + +**完成**: +- 更新 `docs/ai/agent-market-watch-sources.v1.json`:日期 `2026-06-26`,新增 NVIDIA NeMo Agent Toolkit PyPI、Nemotron 3 Ultra 官方文章與繁中 cadence / purpose。 +- 更新 `docs/ai/ai-technology-watch-sources.v1.json`:日期 `2026-06-26`,納入 NVIDIA Nemotron / NeMo、MCP 官方 spec / roadmap、A2A 官方文件、OpenTelemetry GenAI semantic conventions;OpenAI 仍以官方 Agents SDK docs、PyPI、npm 作為自動 watchlist。 +- 新增 GitHub API rate-limit carry-forward:若 GitHub `403 rate limit` 且上一份報告已有成功資料,沿用上一份 version / hash,標記 `carried_forward_rate_limited`,不把短期限流算成來源失敗。 +- 產生 2026-06-26 Agent market artifacts:`agent_market_watch_report`、`agent_market_integration_review_full`、`agent_market_discovery_review`、`agent_market_discovery_classification`、`agent_market_watch_promotion_review`、`agent_market_governance_snapshot`。 +- 產生 2026-06-26 AI technology artifacts:`ai_technology_watch_report`、`ai-agent-market-radar-readback.snapshot.json`、`ai-technology-radar-readback.snapshot.json`、兩份繁中 Markdown readback。 +- `agent-market` 前端新增「高優先市場審核佇列」與「官方 Primary Source 對齊」區塊,顯示 OpenAI / NVIDIA / LangGraph / MCP / A2A / OpenTelemetry GenAI 的 AWOOOI Gate 與 Agent 分工。 + +**本地數據**: +- Agent market watch:候選 `13`、來源 `36`、changed `5`、integration queue `5`、source failure `0`。 +- Agent market governance:blocked from integration `5`、replacement decisions approved `0`、sdk installations approved `0`、paid API approved `0`、production changes approved `0`。 +- AI technology watch:技術 `21`、來源 `52`、changed `5`、review queue `5`、high priority `15`、source failure `0`。 +- AI technology radar readback:整體治理完成度仍 `42.2%`;AI 技術雷達完成度 `100%`;官方 primary-source alignment `6`。 + +**邊界**: +- 本輪只做 read-only watch、source hash / version comparison、governance snapshot、Markdown、API readback 與前端可視化。 +- SDK install、paid API、provider switch、Telegram live send、Bot API、host write、kubectl、production routing、OpenClaw replacement 全部維持 `0 / false`。 +- OpenAI 官網文章在自動 watcher 內回 `403`,未放入自動 watchlist;仍作為人工驗證的 primary-source alignment 參考。 + +**下一步**:執行 regression / typecheck / i18n mirror / sensitive scan / browser smoke,通過後提交、推送 Gitea main,等待正式 CD,再做 production API 與 `/zh-TW/governance?tab=agent-market` desktop / mobile readback。 ## 2026-06-26|AI Agent 自動化成熟度與接管缺口正式上線:從 Sensor 到 Learning 的 Gate 一眼可見 diff --git a/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md b/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md index d67e6626..886207e1 100644 --- a/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md +++ b/docs/ai/AI_AGENT_AUTOMATION_WORKLIST_2026-06-04.md @@ -13,14 +13,14 @@ | 項目 | 目前完成度 | 本次判讀 | 下一個有效動作 | |---|---:|---|---| | 本工作清單細化 | 100% | 已把所有工作流拆成 P0 / P1 / P2 / P3 | 同步 LOGBOOK 與 MASTER §8 | -| AgentOps 治理與可觀測基礎 | 92% | 已有 schema / snapshot / API / UI / gate;P2-407 已正式把日報 / 週報 / 月報、P2-406B receipt owner review、P2-004 drift monitor 與 P2-403J 報表真相串成 no-write 分析草稿;P2-408 已正式部署中 / 低風險白名單、dry-run verifier、rollback proof、audit reason 與 production readback;P2-409 已正式完成高風險 Owner Review Queue production API / governance UI / desktop smoke;P2-410 已正式完成 action audit ledger production API / governance UI / desktop + mobile smoke;P2-411 已本地完成 owner acceptance / handoff event / RAG proposal no-write 基線 | 推送 P2-411 正式讀回;補 Runs mobile smoke;Observability / Tenants / Knowledge Base 接同一資產沉澱總帳 | +| AgentOps 治理與可觀測基礎 | 93% | 已有 schema / snapshot / API / UI / gate;P2-407 已正式把日報 / 週報 / 月報、P2-406B receipt owner review、P2-004 drift monitor 與 P2-403J 報表真相串成 no-write 分析草稿;P2-408 已正式部署中 / 低風險白名單、dry-run verifier、rollback proof、audit reason 與 production readback;P2-409 已正式完成高風險 Owner Review Queue production API / governance UI / desktop smoke;P2-410 已正式完成 action audit ledger production API / governance UI / desktop + mobile smoke;P2-411 已正式完成 owner acceptance / handoff event / RAG proposal no-write 基線;P2-412 已本地刷新市場主流 AI Agent / AI 技術 primary-source 雷達 | 推送 P2-412 正式讀回;補 Runs mobile smoke;Observability / Tenants / Knowledge Base 接同一資產沉澱總帳 | | OpenClaw / Hermes / NemoTron 佈建布局 | 47% | 目前是只讀 layout、治理頁可視化與 P2-411 event bus 接手協議,不是主機上 live agent worker | 建立 runtime agent registry 與 AgentSession ledger | | Agent 主動溝通 / 接手 / 學習 | 設計證據 100%,runtime 38% | 互動證據、War Room、readback gate 與 P2-411 no-write handoff event / RAG proposal 基線已齊;Event Bus publish、RAG writeback、PlayBook trust 寫入仍未開 | 先完成 P2-411 正式讀回,再做 Owner-approved writeback | | 日報 / 週報 / 月報 | 可視化 100%,no-write 分析 100%,實發 0% | 報表批准包、P2-406B owner review、P2-407 no-write analyst 與治理頁已可見;Telegram 實發、receipt production write、AI analysis live runtime 仍為 0 | P2-408 白名單與 P2-406F no-send scheduler | | Telegram Bot / TG 群組 | 契約 54%,實發 0% | no-send preview、dry-run、owner review gate、P2-406B receipt readback owner review、Telegram egress inventory / owner request draft、P2-409 高風險 queue、P2-410 no-send / no-new-bypass audit template 與 P2-411 Telegram 出口驗收 lane 已串起;live send / Bot API / Gateway queue 未批准 | P2-406D no-send envelope ledger、P2-406E failure-only digest route;收到合格 owner approval 後才評估 P2-406C one-message canary | | 中低風險自動化 | 66% | P2-408 已正式部署 6 筆候選白名單、5 個 dry-run verifier、5 個 rollback proof、6 個 audit reason 與 3 類高風險分流;P2-409 已把 high / critical 與 medium live execution 風險接到 Owner Review Queue;P2-410 已補 low / medium audit event;P2-411 已本地建立中低風險 worker 範圍驗收 lane;實際 auto worker 仍未開 | P2-411 正式讀回;之後才評估 dry-run auto worker | | 高風險審核 | 87% | P2-409 已正式完成 7 個 high / critical queue item、7 份 approval packet、8 條 rejection guard、7 份 reviewer checklist;P2-410 已補 high-risk pause、critical rejection 與 owner queue audit event;P2-411 已本地建立 high / critical owner acceptance lanes;owner accepted 仍為 0 | P2-411 正式讀回與 P2-412 fixture-only rehearsal | -| 市場主流 Agent 追蹤 | 55% | 已有市場治理頁與 weekly watch;需擴充成固定外部來源評分與回放 | P2-412 週期性 market watch + scorecard | +| 市場主流 Agent 追蹤 | 74% | 已有市場治理頁、weekly watch 與 2026-06-26 P2-412 primary-source refresh;Agent 市場側 13 候選 / 36 來源 / 5 changed / source failure 0;AI 技術側 21 技術 / 52 來源 / 5 changed / source failure 0;OpenAI / NVIDIA / LangGraph / MCP / A2A / OpenTelemetry GenAI 已納入主流實務對齊 | P2-412 正式部署與 production readback;下一步才進 P2-413 版本生命週期 | | 版本生命週期自動化 | 45% | repo-only snapshot 與採用批准包已完成;安裝、升級、PR creation、host update 仍未開 | P2-413 版本情報與 no-write upgrade proposal | 目前最重要的事實邊界: @@ -69,8 +69,8 @@ | 8 | P2-408 | P0 | 中 / 低風險自動處理白名單 | OpenClaw + SRE | 正式驗證完成 | `ai_agent_low_medium_risk_whitelist_v1` schema / snapshot / API / tests / governance UI 已正式部署;feature commit `b36f4b97`、deploy marker `cd1c4407`、Gitea code-review `#3209`、CD `#3208` success;production API 回 current `P2-408`、next `P2-409`、completion `100`;6 筆候選、3 low、3 medium、5 個 dry-run verifier、5 個 rollback proof、6 個 audit reason、3 類 high-risk redirect、3 個 owner gate、27 個 blocked runtime action;desktop / mobile smoke 可見 OpenClaw / Hermes / NemoTron、AwoooI SRE 戰情室與 `live total 0`;auto worker / Telegram / Gateway / Bot API / production write / secret read / paid API / host write / kubectl 仍為 0 | | 9 | P2-409 | P0 | 高風險 Owner Review Queue | OpenClaw | 正式驗證完成 | `ai_agent_high_risk_owner_review_queue_v1` schema / snapshot / API / tests / governance UI 已正式部署;deploy marker `38e60192`;production API 回 current `P2-409`、next `P2-410`、completion `100`;7 個 high / critical queue item、7 份 approval packet、8 條 rejection guard、7 份 reviewer checklist、42 個 blocked runtime action;owner response accepted、live execution、Gateway queue、Telegram send、Bot API、receipt production write、production write、secret read、paid API、host write、kubectl、destructive operation 全部 `0` | | 10 | P2-410 | P0 | Agent action audit ledger | Hermes + Security | 正式驗證完成 | `ai_agent_action_audit_ledger_v1` schema / snapshot / API / tests / governance UI 已正式部署;API deploy marker `38e60192`、UI deploy marker `7a9e1cfd`;production API 回 current `P2-410`、next `P2-411`、completion `100`;desktop / mobile browser smoke 可見 P2-410、行動審計事件、Verifier receipt gates、`live write total 0`;7 個 source readback、8 個 audit event template、4 個 low / medium event、3 個 high-risk event、1 個 critical event、2 個 report gap event、2 個 Telegram event、5 個 verifier receipt gate、48 個 required audit field、23 個 blocked runtime action;audit DB / timeline / KM / PlayBook trust / Gateway / Telegram / Bot API / production write 全部 `0` | -| 11 | P2-411 | P1 | Owner acceptance / Agent Event Bus / RAG proposal no-write 基線 | OpenClaw + Hermes + NemoTron | 本地驗證完成,待正式讀回 | `ai_agent_action_owner_acceptance_event_bus_v1` schema / snapshot / API / tests / governance UI 已完成;6 條 owner acceptance lane、6 個 handoff event template、4 個 RAG memory proposal、6 個 verifier gate、38 個 required owner field、16 個 blocked runtime action;owner response received / accepted、event bus publish、KM / PlayBook trust write、Gateway queue、Telegram send、Bot API、worker dispatch、production write 全部 `0` | -| 12 | P2-412 | P1 | 市場主流 AI Agent 定期評估 | Market + OpenClaw | 待辦 | 每週 primary-source watch、候選入池、scorecard、替換 gate | +| 11 | P2-411 | P1 | Owner acceptance / Agent Event Bus / RAG proposal no-write 基線 | OpenClaw + Hermes + NemoTron | 正式驗證完成 | `ai_agent_action_owner_acceptance_event_bus_v1` schema / snapshot / API / tests / governance UI 已正式讀回;6 條 owner acceptance lane、6 個 handoff event template、4 個 RAG memory proposal、6 個 verifier gate、38 個 required owner field、16 個 blocked runtime action;owner response received / accepted、event bus publish、KM / PlayBook trust write、Gateway queue、Telegram send、Bot API、worker dispatch、production write 全部 `0` | +| 12 | P2-412 | P1 | 市場主流 AI Agent 定期評估 | Market + OpenClaw | 本地驗證完成,待正式讀回 | 2026-06-26 已刷新 Agent market watch / integration review / discovery / promotion / governance snapshot / AI technology watch / readback snapshot;Agent 市場側 13 候選 / 36 來源 / 5 changed / 5 blocked / source failure 0;AI 技術側 21 技術 / 52 來源 / 5 changed / 5 review queue / source failure 0;新增官方 primary-source 對齊:OpenAI Agents SDK、NVIDIA Nemotron / NeMo、LangGraph、MCP、A2A、OpenTelemetry GenAI;所有 SDK install / paid API / provider switch / Telegram live send / host write / OpenClaw replacement 仍為 `0 / false` | | 13 | P2-413 | P1 | AI Agent / 套件 / 工具 / 服務 / 主機版本生命週期 | DevOps + NemoTron | 待辦 | 版本 inventory、release diff、升級建議、PR 草稿 lane、rollback plan | | 14 | P2-414 | P1 | MCP tool registry / capability attestation | Security + DevOps | 待辦 | 工具能力、風險、scope、owner、consent、blocked actions 可讀 | | 15 | P2-415 | P1 | RAG / KM / PlayBook 成長閉環 | Hermes + OpenClaw | 待辦 | memory type、retention、trust update proposal、negative reinforcement | @@ -159,7 +159,8 @@ | P2-408 中 / 低風險自動處理白名單 | 100%(正式驗證完成) | 已把 P2-407 no-write 分析建議轉成中 / 低風險候選白名單、dry-run verifier、rollback proof、audit reason 與高風險分流,並完成 production API / desktop / mobile browser smoke;下一步是 P2-409 Owner Review Queue | `ai_agent_low_medium_risk_whitelist_v1` schema、`docs/evaluations/ai_agent_low_medium_risk_whitelist_2026-06-18.json`、`GET /api/v1/agents/agent-low-medium-risk-whitelist`、governance `automation-inventory` P2-408 卡片;feature commit `b36f4b97`、deploy marker `cd1c4407`、Gitea code-review `#3209` / CD `#3208` success;6 筆 whitelist candidate、3 low、3 medium、5 個 dry-run verifier、5 個 rollback proof、6 個 audit reason、3 類 high-risk redirect、3 個 owner gate、27 個 blocked runtime action;本地 API/service regression `12 passed`;production API assert PASS;desktop `1280x720` / mobile `390x844` 可見 P2-408、OpenClaw / Hermes / NemoTron、AwoooI SRE 戰情室、`live total 0`;console error `0`、水平溢位 `0`、工作視窗片語命中 `0`;web typecheck 因本 worktree 缺 `apps/web/node_modules` / `tsc` 未執行;auto worker / low risk execution / medium risk execution / Telegram send / Gateway queue / Bot API / receipt production write / production write / secret read / paid API / host write / kubectl / destructive operation 全部 `0 / false` | | P2-409 高風險 Owner Review Queue | 100%(正式驗證完成) | 已把 high / critical、Telegram / Gateway / Bot API、host / kubectl、secret / paid provider、report source gap work item write 與 OpenClaw 角色調整全部固定為 paused owner review,並完成 production API / governance UI / desktop smoke;下一步由 P2-410 audit ledger 與 P2-411 Event Bus 承接 | `ai_agent_high_risk_owner_review_queue_v1` schema、`docs/evaluations/ai_agent_high_risk_owner_review_queue_2026-06-19.json`、`GET /api/v1/agents/agent-high-risk-owner-review-queue`;deploy marker `38e60192`;7 個 source readback、7 個 queue item、5 個 high、2 個 critical、7 份 approval packet、8 條 rejection guard、7 份 reviewer checklist、42 個 blocked runtime action;本地 API/service regression `13 passed`;production API readback `HTTP 200`;owner response accepted / live execution / Gateway / Telegram / Bot API / production write / secret read / paid API / host write / kubectl / destructive operation 全部 `0 / false` | | P2-410 AI Agent action audit ledger | 100%(正式驗證完成) | 已把 AI Agent 分類、拒收、no-send preview、high-risk pause、critical rejection 與 result route blocked 固定成 immutable audit event template 與 verifier receipt gate;production API 已讀回,治理頁 action audit projection 已正式驗證;下一步是 P2-411 Event Bus | `ai_agent_action_audit_ledger_v1` schema、`docs/evaluations/ai_agent_action_audit_ledger_2026-06-19.json`、`GET /api/v1/agents/agent-action-audit-ledger`;API deploy marker `38e60192`、UI deploy marker `7a9e1cfd`;7 個 source readback、8 個 audit event template、4 個 low / medium event、3 個 high-risk event、1 個 critical event、2 個 report gap event、2 個 Telegram event、5 個 verifier receipt gate、48 個 required audit field、23 個 blocked runtime action;本地 API/service regression `11 passed`,P2-409 + P2-410 regression `24 passed`;production API readback `HTTP 200`;desktop / mobile governance smoke 可見 P2-410、行動審計事件、Verifier receipt gates、`live write total 0`;console error `0`、水平溢位 `false`、工作視窗片語 `0`;audit DB / timeline / KM / PlayBook trust / Gateway / Telegram / Bot API / production write 全部 `0 / false` | -| P2-411 Owner acceptance / Agent Event Bus / RAG proposal no-write 基線 | 100%(本地驗證完成,待正式讀回) | 已把 P2-409 高風險 queue、P2-410 action audit ledger、12-Agent War Room 與 communication learning contract 收斂成 owner acceptance lane、handoff event template、RAG memory proposal 與 verifier gate;治理頁可看到 AI Agent 互相接手、學習提案與阻擋原因,但 event bus publish / RAG write / Telegram send 尚未開 | `ai_agent_action_owner_acceptance_event_bus_v1` schema、`docs/evaluations/ai_agent_action_owner_acceptance_event_bus_2026-06-19.json`、`GET /api/v1/agents/agent-action-owner-acceptance-event-bus`;6 條 owner acceptance lane、6 個 handoff event template、4 個 RAG memory proposal、6 個 verifier gate、38 個 required owner field、16 個 blocked runtime action;本地 P2-409 + P2-410 + P2-411 regression `35 passed`、web typecheck、JSON parse、Python compile、i18n parity、source-control owner response guard、security mirror progress guard、IWOOOS config control guard、doc secret sanity、`git diff --check` 通過;owner response received / accepted、external response ingested、event bus publish、audit DB / timeline / KM / PlayBook trust write、Gateway queue、Telegram send、Bot API、worker dispatch、receipt / production write、secret、paid API、host、kubectl、destructive 全部 `0 / false` | +| P2-411 Owner acceptance / Agent Event Bus / RAG proposal no-write 基線 | 100%(正式驗證完成) | 已把 P2-409 高風險 queue、P2-410 action audit ledger、12-Agent War Room 與 communication learning contract 收斂成 owner acceptance lane、handoff event template、RAG memory proposal 與 verifier gate;正式 production API / governance UI 已讀回;event bus publish / RAG write / Telegram send 尚未開 | `ai_agent_action_owner_acceptance_event_bus_v1` schema、`docs/evaluations/ai_agent_action_owner_acceptance_event_bus_2026-06-19.json`、`GET /api/v1/agents/agent-action-owner-acceptance-event-bus`;6 條 owner acceptance lane、6 個 handoff event template、4 個 RAG memory proposal、6 個 verifier gate、38 個 required owner field、16 個 blocked runtime action;owner response received / accepted、external response ingested、event bus publish、audit DB / timeline / KM / PlayBook trust write、Gateway queue、Telegram send、Bot API、worker dispatch、receipt / production write、secret、paid API、host、kubectl、destructive 全部 `0 / false` | +| P2-412 市場主流 AI Agent 定期評估 | 100%(本地驗證完成,待正式讀回) | 已把「用市場主流評估數據說話」落成 read-only pipeline:AI Agent market watch、integration review、discovery review / classification、promotion review、governance snapshot、AI technology watch、market radar readback 與 technology radar readback;治理頁 `agent-market` 新增高優先審核佇列與官方 primary-source 對齊,可看到 OpenAI / NVIDIA / LangGraph / MCP / A2A / OpenTelemetry GenAI 對 AWOOOI 的專業分工與 Gate | `docs/evaluations/agent_market_watch_report_2026-06-26.json`:13 候選 / 36 來源 / 5 changed / failure 0;`docs/evaluations/agent_market_governance_snapshot_2026-06-26.json`:5 blocked / replacement approved 0;`docs/evaluations/ai_technology_watch_report_2026-06-26.json`:21 技術 / 52 來源 / 5 changed / 5 review queue / failure 0;`docs/operations/ai-agent-market-radar-readback.snapshot.json` 與 `docs/operations/ai-technology-radar-readback.snapshot.json` 已更新;SDK install、paid API、provider switch、Telegram live send、Bot API、host write、production routing、OpenClaw replacement 全部 `0 / false` | | Owner response 預檢與拒收邊界 | 100% | P2-143 已完成正式部署與 production readback;承接 P2-141 input prep 與 P2-142 War Room,只建立 owner / verifier / rollback / maintenance / live-apply 五類外部回覆的 intake 預檢、必填欄位與拒收規則;正式 owner response 尚未收到、未接受、未寫入 | `ai_agent_result_capture_release_decision_owner_response_preflight_v1`、`GET /api/v1/agents/agent-result-capture-release-decision-owner-response-preflight`、feature commit `755b0a8d`、deploy marker `667d6329`、Gitea code-review `2961` / CD `2960` success、5 個 response intake lane、18 個 required owner field、6 個 validation check、6 個 rejection guard、5 個 operator action;owner response received / accepted / redacted payload / reviewer queue / Gateway / Telegram / Bot API / production write / secret read / destructive operation 全為 `0` | | Owner response 回讀狀態 | 100% | P2-144 已完成正式部署與 production readback;承接 P2-143 preflight,只讀回五類外部回覆仍未收到、未接受、未拒絕、未保存 | `ai_agent_result_capture_release_decision_owner_response_readback_v1`、`GET /api/v1/agents/agent-result-capture-release-decision-owner-response-readback`、feature commit `8795f100`、deploy marker `ac938037`、Gitea code-review `2965` / CD `2964` success、5 個 response readback lane、18 個 required owner field、6 個 readback validation check、6 個 readback rejection guard、5 個 operator action、waiting external response `5`、no external response received `5`;owner response received / accepted / redacted payload / reviewer queue / Gateway / Telegram / Bot API / production write / secret read / destructive operation 全為 `0` | | 本工作清單與分析報告 | 100% | 已完成 | 本 MD 文件 | diff --git a/docs/ai/agent-market-watch-sources.v1.json b/docs/ai/agent-market-watch-sources.v1.json index 7f790546..8180dfd3 100644 --- a/docs/ai/agent-market-watch-sources.v1.json +++ b/docs/ai/agent-market-watch-sources.v1.json @@ -1,11 +1,11 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "schema_version": "agent_market_watch_sources_v1", - "updated_at": "2026-06-25", - "purpose": "Primary-source watch list for recurring AI Agent market updates. A change here is not replacement approval; it only triggers refreshed evaluation.", + "updated_at": "2026-06-26", + "purpose": "AI Agent 市場定期評估的 primary-source watchlist;來源變更只會觸發重新評估,不代表取代或整合已批准。", "cadence": { - "weekly_market_watch": "Every Monday 09:00 Asia/Taipei, produce a read-only market watch report and full-scope integration/discovery review summary.", - "monthly_integration_review": "After operator review, commit a reviewed baseline for market watch, integration review, and discovery intake.", + "weekly_market_watch": "每週一 09:00 Asia/Taipei 產生只讀市場觀測報告與整合 / 探索 review 摘要。", + "monthly_integration_review": "經 operator review 後,提交 market watch、integration review 與 discovery intake 的 reviewed baseline。", "trigger_on_major_version": true }, "policy": { @@ -99,12 +99,24 @@ "url": "https://docs.nvidia.com/nemo/agent-toolkit/latest/index.html", "reference_version": null }, + { + "source_id": "nvidia_nemo_agent_toolkit_pypi", + "type": "pypi", + "url": "https://pypi.org/pypi/nvidia-nat/json", + "reference_version": null + }, { "source_id": "nvidia_nemotron_developer_page", "type": "docs", "url": "https://developer.nvidia.com/topics/ai/nemotron", "reference_version": null }, + { + "source_id": "nvidia_nemotron_3_ultra_blog", + "type": "docs", + "url": "https://developer.nvidia.com/blog/nvidia-nemotron-3-ultra-powers-faster-more-efficient-reasoning-for-long-running-agents/", + "reference_version": null + }, { "source_id": "nvidia_nim_llm_docs", "type": "docs", diff --git a/docs/ai/ai-technology-watch-sources.v1.json b/docs/ai/ai-technology-watch-sources.v1.json index 16ffb1c8..87ad5f9e 100644 --- a/docs/ai/ai-technology-watch-sources.v1.json +++ b/docs/ai/ai-technology-watch-sources.v1.json @@ -1,6 +1,6 @@ { "schema_version": "ai_technology_watch_sources_v1", - "updated_at": "2026-06-25", + "updated_at": "2026-06-26", "cadence": { "near_real_time_watch": "每 6 小時執行一次只讀 primary-source 檢查,偵測主流 AI 技術版本、文件與 release 變更。", "daily_triage": "每日彙整變更技術,依商業適用性、依賴風險、成本風險與資安風險分組。", @@ -89,6 +89,16 @@ "source_id": "nvidia_nemo_agent_toolkit_docs", "type": "docs", "url": "https://docs.nvidia.com/nemo/agent-toolkit/latest/index.html" + }, + { + "source_id": "nvidia_nemo_agent_toolkit_pypi", + "type": "pypi", + "url": "https://pypi.org/pypi/nvidia-nat/json" + }, + { + "source_id": "nvidia_nemotron_3_ultra_blog", + "type": "docs", + "url": "https://developer.nvidia.com/blog/nvidia-nemotron-3-ultra-powers-faster-more-efficient-reasoning-for-long-running-agents/" } ] }, @@ -225,6 +235,16 @@ "source_id": "mcp_typescript_sdk_docs", "type": "docs", "url": "https://github.com/modelcontextprotocol/typescript-sdk" + }, + { + "source_id": "mcp_specification_2025_06_18", + "type": "docs", + "url": "https://modelcontextprotocol.io/specification/2025-06-18" + }, + { + "source_id": "mcp_roadmap", + "type": "docs", + "url": "https://modelcontextprotocol.io/development/roadmap" } ] }, @@ -252,7 +272,25 @@ { "source_id": "a2a_protocol_docs", "type": "docs", - "url": "https://github.com/a2aproject/A2A" + "url": "https://a2a-protocol.org/latest/" + } + ] + }, + { + "candidate_id": "opentelemetry_genai_semconv", + "display_name": "OpenTelemetry GenAI Semantic Conventions", + "technology_area": "evaluation_and_observability", + "integration_surface": "agent_llm_trace_semantic_conventions", + "awoooi_role": "Agent / LLM / MCP trace 欄位標準與日週月報可觀測基礎", + "evaluation_priority": "p1", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "source_id": "otel_genai_attributes", + "type": "docs", + "url": "https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/" } ] }, diff --git a/docs/evaluations/agent_market_discovery_classification_2026-06-26.json b/docs/evaluations/agent_market_discovery_classification_2026-06-26.json new file mode 100644 index 00000000..07ddf943 --- /dev/null +++ b/docs/evaluations/agent_market_discovery_classification_2026-06-26.json @@ -0,0 +1,266 @@ +{ + "candidates": [ + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "needs_manual_research", + "description": null, + "homepage": null, + "html_url": "https://github.com/CopilotKit/CopilotKit", + "language": null, + "pushed_at": null, + "recommendation": "manual_research_before_watch_registry", + "recommended_role": "manual_research_required", + "repository_full_name": "copilotkit/copilotkit", + "required_next_gate": "manual_research_no_registry_change", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 35522, + "topics": [], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "needs_manual_research", + "description": null, + "homepage": null, + "html_url": "https://github.com/trycua/cua", + "language": null, + "pushed_at": null, + "recommendation": "manual_research_before_watch_registry", + "recommended_role": "manual_research_required", + "repository_full_name": "trycua/cua", + "required_next_gate": "manual_research_no_registry_change", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 19004, + "topics": [], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "needs_manual_research", + "description": null, + "homepage": null, + "html_url": "https://github.com/eigent-ai/eigent", + "language": null, + "pushed_at": null, + "recommendation": "manual_research_before_watch_registry", + "recommended_role": "manual_research_required", + "repository_full_name": "eigent-ai/eigent", + "required_next_gate": "manual_research_no_registry_change", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 14394, + "topics": [], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "needs_manual_research", + "description": null, + "homepage": null, + "html_url": "https://github.com/omnigent-ai/omnigent", + "language": null, + "pushed_at": null, + "recommendation": "manual_research_before_watch_registry", + "recommended_role": "manual_research_required", + "repository_full_name": "omnigent-ai/omnigent", + "required_next_gate": "manual_research_no_registry_change", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 4924, + "topics": [], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "needs_manual_research", + "description": null, + "homepage": null, + "html_url": "https://github.com/ag2ai/ag2", + "language": null, + "pushed_at": null, + "recommendation": "manual_research_before_watch_registry", + "recommended_role": "manual_research_required", + "repository_full_name": "ag2ai/ag2", + "required_next_gate": "manual_research_no_registry_change", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 4710, + "topics": [], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "needs_manual_research", + "description": null, + "homepage": null, + "html_url": "https://github.com/xbtlin/ai-berkshire", + "language": null, + "pushed_at": null, + "recommendation": "manual_research_before_watch_registry", + "recommended_role": "manual_research_required", + "repository_full_name": "xbtlin/ai-berkshire", + "required_next_gate": "manual_research_no_registry_change", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 2172, + "topics": [], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "needs_manual_research", + "description": null, + "homepage": null, + "html_url": "https://github.com/trpc-group/trpc-agent-go", + "language": null, + "pushed_at": null, + "recommendation": "manual_research_before_watch_registry", + "recommended_role": "manual_research_required", + "repository_full_name": "trpc-group/trpc-agent-go", + "required_next_gate": "manual_research_no_registry_change", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 1408, + "topics": [], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "needs_manual_research", + "description": null, + "homepage": null, + "html_url": "https://github.com/vm0-ai/vm0", + "language": null, + "pushed_at": null, + "recommendation": "manual_research_before_watch_registry", + "recommended_role": "manual_research_required", + "repository_full_name": "vm0-ai/vm0", + "required_next_gate": "manual_research_no_registry_change", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 1131, + "topics": [], + "watch_addition_recommended": false + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "approved_for_watch_registry_addition": false + }, + "archived": false, + "classification": "needs_manual_research", + "description": null, + "homepage": null, + "html_url": "https://github.com/Human-Agent-Society/CORAL", + "language": null, + "pushed_at": null, + "recommendation": "manual_research_before_watch_registry", + "recommended_role": "manual_research_required", + "repository_full_name": "human-agent-society/coral", + "required_next_gate": "manual_research_no_registry_change", + "risk_flags": [ + "requires_dependency_boundary_review" + ], + "stargazers_count": 754, + "topics": [], + "watch_addition_recommended": false + } + ], + "generated_at": "2026-06-26T03:41:46.640326+00:00", + "inputs": { + "discovery_review_generated_at": "2026-06-26T03:41:38.325050+00:00", + "metadata_source": "github_repository_api_summary" + }, + "policy": { + "auto_watch_registry_addition_approved": false, + "paid_api_calls_approved": false, + "production_changes_approved": false, + "raw_external_pages_committed": false, + "replacement_decision_allowed": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false + }, + "schema_version": "agent_market_discovery_classification_v1", + "summary": { + "classification_counts": { + "needs_manual_research": 9 + }, + "classified_repositories": 9, + "production_changes_approved": 0, + "recommendation_counts": { + "manual_research_before_watch_registry": 9 + }, + "recommended_watch_additions": 0, + "shadow_or_canary_approved": 0, + "watch_only_or_defer": 9 + } +} diff --git a/docs/evaluations/agent_market_discovery_review_2026-06-26.json b/docs/evaluations/agent_market_discovery_review_2026-06-26.json new file mode 100644 index 00000000..a591337b --- /dev/null +++ b/docs/evaluations/agent_market_discovery_review_2026-06-26.json @@ -0,0 +1,277 @@ +{ + "candidate_drafts": [ + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/CopilotKit/CopilotKit", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "copilotkit/copilotkit", + "seen_before": false, + "source_ids": [ + "github_ai_agent_topic" + ], + "stargazers_count_max": 35522, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-26T03:36:59Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/trycua/cua", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "trycua/cua", + "seen_before": false, + "source_ids": [ + "github_ai_agent_topic" + ], + "stargazers_count_max": 19004, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-26T03:40:04Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/eigent-ai/eigent", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "eigent-ai/eigent", + "seen_before": false, + "source_ids": [ + "github_agent_framework_topic" + ], + "stargazers_count_max": 14394, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-26T03:38:22Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/omnigent-ai/omnigent", + "new_since_previous_review": false, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "omnigent-ai/omnigent", + "seen_before": true, + "source_ids": [ + "github_ai_agent_topic", + "github_agent_framework_topic" + ], + "stargazers_count_max": 4924, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-26T03:37:54Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/ag2ai/ag2", + "new_since_previous_review": false, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "ag2ai/ag2", + "seen_before": true, + "source_ids": [ + "github_agent_framework_topic" + ], + "stargazers_count_max": 4710, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-26T03:29:07Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/xbtlin/ai-berkshire", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "xbtlin/ai-berkshire", + "seen_before": false, + "source_ids": [ + "github_ai_agent_topic" + ], + "stargazers_count_max": 2172, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-26T03:41:15Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/trpc-group/trpc-agent-go", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "trpc-group/trpc-agent-go", + "seen_before": false, + "source_ids": [ + "github_agent_framework_topic" + ], + "stargazers_count_max": 1408, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-26T01:55:30Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/vm0-ai/vm0", + "new_since_previous_review": false, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "vm0-ai/vm0", + "seen_before": true, + "source_ids": [ + "github_ai_agent_topic" + ], + "stargazers_count_max": 1131, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-26T03:32:54Z" + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_registry_addition": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false + }, + "decision": "manual_primary_source_classification_required", + "html_url": "https://github.com/Human-Agent-Society/CORAL", + "new_since_previous_review": true, + "recommended_actions": [ + "verify_official_or_primary_sources", + "classify_role_against_awoooi_agent_taxonomy", + "add_to_watch_registry_only_after_manual_review", + "do_not_install_sdk_or_call_provider", + "do_not_enter_replacement_replay_before_market_scorecard" + ], + "recommended_next_gate": "classify_official_sources_then_update_watch_registry", + "repository_full_name": "human-agent-society/coral", + "seen_before": false, + "source_ids": [ + "github_agent_framework_topic" + ], + "stargazers_count_max": 754, + "status": "needs_primary_source_classification", + "updated_at_latest": "2026-06-26T03:05:29Z" + } + ], + "generated_at": "2026-06-26T03:41:38.325050+00:00", + "inputs": { + "candidate_registry_schema_version": "agent_replacement_candidates_v1", + "previous_review_generated_at": "2026-06-25T02:33:53.438200+00:00", + "source_registry_schema_version": "agent_market_watch_sources_v1", + "watch_report_generated_at": "2026-06-26T03:41:17.266599+00:00", + "watch_report_mode": "live" + }, + "policy": { + "auto_registry_addition_approved": false, + "paid_api_calls_approved": false, + "production_changes_approved": false, + "replacement_decision_allowed": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false + }, + "schema_version": "agent_market_discovery_review_v1", + "summary": { + "already_watched_or_registered": 0, + "auto_registry_additions_approved": 0, + "discovered_items": 10, + "discovery_sources": 2, + "manual_classification_required": 9, + "new_manual_classification_required": 6, + "production_changes_approved": 0, + "shadow_or_canary_approved": 0, + "source_failures": 0, + "unique_repositories": 9 + } +} diff --git a/docs/evaluations/agent_market_governance_snapshot_2026-06-26.json b/docs/evaluations/agent_market_governance_snapshot_2026-06-26.json new file mode 100644 index 00000000..93b4ab9a --- /dev/null +++ b/docs/evaluations/agent_market_governance_snapshot_2026-06-26.json @@ -0,0 +1,833 @@ +{ + "candidate_groups": { + "production_baseline": [ + "openclaw_incumbent" + ], + "replay_or_integration_blocked": [ + "claude_agent_sdk_remediator", + "crewai_flows_crews", + "langgraph_incident_kernel", + "nemo_nemotron_fabric", + "openai_agents_sdk_coordinator" + ], + "watch_only_candidates": [ + "agentos_framework", + "bernstein_agent_governance", + "hermes_agent_personal_platform", + "microsoft_agent_governance_toolkit", + "pydantic_deepagents", + "thclaws_agent_harness" + ], + "watch_only_scorecard_prescreen_ready": [] + }, + "candidate_statuses": [ + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "openclaw_incumbent", + "current_gate": "production_decision_core", + "display_name": "OpenClaw incumbent", + "evaluation_priority": "baseline", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "production_baseline", + "integration_decision": "", + "operator_blockers": [], + "required_next_gate": "formal_replacement_adr_and_promotion_gate_required", + "role": "current_production_decision_core", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "openai_agents_sdk_coordinator", + "current_gate": "offline_replay", + "display_name": "OpenAI Agents SDK Coordinator", + "evaluation_priority": "must_test", + "evidence": { + "latest_replay_summary": "docs/evaluations/agent_openai_coordinator_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "registered_no_review", + "integration_decision": "deterministic_offline_coordinator_blocked_does_not_beat_openclaw", + "operator_blockers": [], + "required_next_gate": "continue_weekly_primary_source_market_watch", + "role": "coordinator_orchestrator", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "langgraph_incident_kernel", + "current_gate": "offline_replay", + "display_name": "LangGraph Incident Kernel", + "evaluation_priority": "must_test", + "evidence": { + "latest_replay_summary": "docs/evaluations/agent_langgraph_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "registered_no_review", + "integration_decision": "deterministic_offline_kernel_blocked_does_not_beat_openclaw", + "operator_blockers": [], + "required_next_gate": "continue_weekly_primary_source_market_watch", + "role": "durable_incident_workflow_kernel", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "nemo_nemotron_fabric", + "current_gate": "blocked_existing_replay_evidence", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "evaluation_priority": "must_test", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": "docs/evaluations/agent_nemotron_contract_tuned_49b_v15_smoke_gate_2026-06-02.json", + "latest_smoke_matrix": "docs/evaluations/agent_nemotron_contract_tuned_smoke_matrix_2026-06-02.json", + "latest_smoke_model": "nvidia/llama-3.3-nemotron-super-49b-v1.5" + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_refresh_evidence_then_smoke_gate", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "5_record_smoke_gate_passes", + "latency_and_output_contract_blockers_resolved", + "cost_approval_recorded" + ], + "required_next_gate": "refresh_source_evidence_then_5_record_smoke_only", + "role": "agent_fabric_tool_model_evaluator", + "score": 0.8033 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "claude_agent_sdk_remediator", + "current_gate": "has_offline_replay_summary", + "display_name": "Claude Agent SDK Remediator", + "evaluation_priority": "must_test", + "evidence": { + "latest_replay_summary": "docs/evaluations/agent_claude_remediator_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_refresh_replay_gate", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ], + "required_next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "role": "devops_code_remediation_agent", + "score": 0.7533 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "google_adk_stack", + "current_gate": "offline_replay", + "display_name": "Google Agent Development Kit Stack", + "evaluation_priority": "can_test", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "registered_no_review", + "integration_decision": "", + "operator_blockers": [], + "required_next_gate": "continue_weekly_primary_source_market_watch", + "role": "gemini_vertex_agent_stack", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "microsoft_agent_framework", + "current_gate": "offline_replay", + "display_name": "Microsoft Agent Framework", + "evaluation_priority": "can_test", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "registered_no_review", + "integration_decision": "", + "operator_blockers": [], + "required_next_gate": "continue_weekly_primary_source_market_watch", + "role": "enterprise_workflow_agent_stack", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "crewai_flows_crews", + "current_gate": "not_yet_replayed", + "display_name": "CrewAI Flows + Crews", + "evaluation_priority": "secondary", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "integration_blocked", + "integration_decision": "do_not_integrate_prepare_no_cost_offline_adapter", + "operator_blockers": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline" + ], + "required_next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "role": "rapid_agent_team_prototype", + "score": 0.6033 + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "hermes_agent_personal_platform", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "NousResearch Hermes Agent", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "remain_watch_only_until_evidence_gap_resolved", + "operator_blockers": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "personal_agent_platform_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "microsoft_agent_governance_toolkit", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "Microsoft Agent Governance Toolkit", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "remain_watch_only_until_evidence_gap_resolved", + "operator_blockers": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_governance_policy_evaluator_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "thclaws_agent_harness", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "thClaws Agent Harness", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "do_not_integrate_watch_only_primary_source_monitoring", + "operator_blockers": [ + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "pydantic_deepagents", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "Pydantic DeepAgents", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "remain_watch_only_until_evidence_gap_resolved", + "operator_blockers": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "agentos_framework", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "AgentOS Framework", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "remain_watch_only_until_evidence_gap_resolved", + "operator_blockers": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "score": null + }, + { + "approvals": { + "paid_api": false, + "production_routing": false, + "replay": false, + "sdk_install": false, + "shadow_or_canary": false + }, + "candidate_id": "bernstein_agent_governance", + "current_gate": "watch_only_primary_source_monitoring", + "display_name": "Bernstein Agent Governance", + "evaluation_priority": "watch_only", + "evidence": { + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null + }, + "gate_status": "watch_only_blocked", + "integration_decision": "do_not_integrate_watch_only_primary_source_monitoring", + "operator_blockers": [ + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ], + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_governance_policy_evaluator_candidate", + "score": null + } + ], + "current_decision": "openclaw_remains_production_decision_core", + "evaluation_cadence": { + "next_scheduled_run_at": "2026-06-29T09:00:00+08:00", + "operator_review_gate": "priority_upgrade_required_before_scorecard_replay_sdk_api_shadow_canary_or_production", + "primary_source_policy": "primary_sources_only_no_llm_no_sdk_no_paid_api", + "schedule": "weekly_monday_0900_asia_taipei", + "timezone": "Asia/Taipei", + "trigger_modes": [ + "scheduled_weekly", + "manual_dispatch", + "operator_triggered_after_primary_source_signal" + ], + "workflow": ".gitea/workflows/agent-market-watch.yaml" + }, + "forbidden_actions_without_new_approval": [ + "replace_openclaw", + "enter_shadow_or_canary", + "install_new_agent_sdk", + "call_paid_provider_api", + "run_replay_for_watch_only_candidate", + "change_production_routing" + ], + "generated_at": "2026-06-26T03:42:28.649759+00:00", + "inputs": { + "candidate_registry_schema_version": "agent_replacement_candidates_v1", + "discovery_classification_generated_at": "2026-06-26T03:41:46.640326+00:00", + "integration_review_generated_at": "2026-06-26T03:41:38.289424+00:00", + "promotion_review_generated_at": "2026-06-26T03:41:56.349330+00:00", + "watch_report_generated_at": "2026-06-26T03:41:17.266599+00:00" + }, + "market_watch_health": { + "blocked_from_integration": 5, + "freshness_sla_hours": 168, + "operator_blockers": [ + "integration_queue_not_empty" + ], + "source_failures_block_priority_upgrade": false, + "stale_after": "2026-06-29T15:00:00+08:00", + "stale_grace_hours": 6, + "status": "blocked" + }, + "next_allowed_actions": [ + "continue_weekly_primary_source_market_watch", + "rerun_existing_replay_only_after_evidence_or_adapter_change" + ], + "operator_decision_queue": [ + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": true, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "claude_agent_sdk_remediator", + "display_name": "Claude Agent SDK Remediator", + "evidence_refs": [ + "docs/evaluations/agent_claude_remediator_replay_summary_2026-06-02.json" + ], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "risk_notes": [ + "Best fit is code and DevOps remediation, not necessarily central incident arbitration.", + "API cost, subscription separation, and vendor boundary must be validated.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": false, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "crewai_flows_crews", + "display_name": "CrewAI Flows + Crews", + "evidence_refs": [], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "create_no_sdk_no_api_adapter_then_offline_replay", + "risk_notes": [ + "Better for rapid automation teams than high-risk production AIOps core.", + "Durability, strict audit, and permission boundary must be proven in replay.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": true, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "nemo_nemotron_fabric", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "evidence_refs": [ + "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "docs/evaluations/agent_nemotron_contract_tuned_49b_v15_smoke_gate_2026-06-02.json", + "docs/evaluations/agent_nemotron_contract_tuned_smoke_matrix_2026-06-02.json" + ], + "priority": 10, + "queue_status": "blocked_needs_evidence", + "recommended_action": "refresh_source_evidence_then_5_record_smoke_only", + "risk_notes": [ + "Needs AWOOOI-specific HITL and dangerous-action policy integration.", + "GPU/NIM operating cost must be compared against current local inference.", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": false, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "agentos_framework", + "display_name": "AgentOS Framework", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": true, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "bernstein_agent_governance", + "display_name": "Bernstein Agent Governance", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "candidate missing from current market scorecard", + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": false, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "hermes_agent_personal_platform", + "display_name": "NousResearch Hermes Agent", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": false, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "microsoft_agent_governance_toolkit", + "display_name": "Microsoft Agent Governance Toolkit", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": false, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "pydantic_deepagents", + "display_name": "Pydantic DeepAgents", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": true, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "thclaws_agent_harness", + "display_name": "thClaws Agent Harness", + "evidence_refs": [], + "priority": 40, + "queue_status": "watch_only_blocked", + "recommended_action": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "risk_notes": [ + "candidate missing from current market scorecard", + "discovery_classification_must_recommend_watch_addition", + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval" + ] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": false, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "google_adk_stack", + "display_name": "Google Agent Development Kit Stack", + "evidence_refs": [], + "priority": 60, + "queue_status": "registered_no_review", + "recommended_action": "continue_weekly_primary_source_market_watch", + "risk_notes": [] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": false, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "langgraph_incident_kernel", + "display_name": "LangGraph Incident Kernel", + "evidence_refs": [ + "docs/evaluations/agent_langgraph_replay_summary_2026-06-02.json" + ], + "priority": 60, + "queue_status": "registered_no_review", + "recommended_action": "continue_weekly_primary_source_market_watch", + "risk_notes": [] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": false, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "microsoft_agent_framework", + "display_name": "Microsoft Agent Framework", + "evidence_refs": [], + "priority": 60, + "queue_status": "registered_no_review", + "recommended_action": "continue_weekly_primary_source_market_watch", + "risk_notes": [] + }, + { + "approval_boundary": { + "market_scorecard_update_required": true, + "paid_api_approval_required": false, + "priority_upgrade_required": true, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": true, + "sdk_install_approval_required": true, + "shadow_or_canary_approval_required": true + }, + "candidate_id": "openai_agents_sdk_coordinator", + "display_name": "OpenAI Agents SDK Coordinator", + "evidence_refs": [ + "docs/evaluations/agent_openai_coordinator_replay_summary_2026-06-02.json" + ], + "priority": 60, + "queue_status": "registered_no_review", + "recommended_action": "continue_weekly_primary_source_market_watch", + "risk_notes": [] + }, + { + "approval_boundary": { + "market_scorecard_update_required": false, + "paid_api_approval_required": false, + "priority_upgrade_required": false, + "production_routing_approval_required": true, + "replacement_adr_required": true, + "replay_approval_required": false, + "sdk_install_approval_required": false, + "shadow_or_canary_approval_required": false + }, + "candidate_id": "openclaw_incumbent", + "display_name": "OpenClaw incumbent", + "evidence_refs": [], + "priority": 90, + "queue_status": "baseline_protected", + "recommended_action": "keep_openclaw_as_production_decision_core_until_formal_replacement_adr", + "risk_notes": [ + "no_candidate_has_formal_replacement_approval" + ] + } + ], + "policy": { + "market_scorecard_update_approved": false, + "paid_api_calls_approved": false, + "priority_upgrade_approved": false, + "production_changes_approved": false, + "replacement_decision_allowed": false, + "replay_candidate_approved": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false, + "snapshot_is_decision_source": false + }, + "schema_version": "agent_market_governance_snapshot_v1", + "summary": { + "blocked_from_integration": 5, + "candidate_count": 13, + "changed_candidates": 5, + "eligible_for_market_scorecard_prescreen": 0, + "integration_queue_count": 5, + "market_scorecard_updates_approved": 0, + "paid_api_calls_approved": 0, + "priority_upgrades_approved": 0, + "production_changes_approved": 0, + "recommended_watch_additions_remaining": 0, + "replacement_decisions_approved": 0, + "replay_candidates_approved": 0, + "sdk_installations_approved": 0, + "shadow_or_canary_approved": 0, + "source_count": 36, + "source_failures": 0, + "watch_only_candidates_reviewed": 6 + } +} diff --git a/docs/evaluations/agent_market_integration_review_full_2026-06-26.json b/docs/evaluations/agent_market_integration_review_full_2026-06-26.json new file mode 100644 index 00000000..d6a3be36 --- /dev/null +++ b/docs/evaluations/agent_market_integration_review_full_2026-06-26.json @@ -0,0 +1,494 @@ +{ + "generated_at": "2026-06-26T03:41:38.289424+00:00", + "inputs": { + "candidate_registry_schema_version": "agent_replacement_candidates_v1", + "review_scope": "actionable", + "scorecard_schema_version": "agent_market_capability_scorecard_v1", + "scorecard_scoring_version": "market_capability_v1", + "watch_report_generated_at": "2026-06-26T03:41:17.266599+00:00", + "watch_report_mode": "live", + "watch_summary": { + "candidate_count": 13, + "changed_candidates": 5, + "failure_count": 0, + "integration_queue_count": 5, + "source_count": 36, + "watch_only_candidates": 8 + } + }, + "policy": { + "paid_api_calls_approved": false, + "production_changes_approved": false, + "raw_external_pages_committed": false, + "replacement_decision_allowed": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false + }, + "reviews": [ + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "nemo_nemotron_fabric", + "decision": "do_not_integrate_refresh_evidence_then_smoke_gate", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "market_score": { + "beats_baseline_capability": true, + "gaps": [ + "code_remediation_fit" + ], + "known": true, + "rank": 3, + "replay_priority": "p0_replay", + "risks": [ + "Needs AWOOOI-specific HITL and dangerous-action policy integration.", + "GPU/NIM operating cost must be compared against current local inference." + ], + "strengths": [ + "observability_tracing", + "evaluation_harness", + "mcp_tool_ecosystem", + "local_private_deploy", + "awoooi_integration_fit" + ], + "total_score": 0.8033 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "3571e46b979eebf11e910d67", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "nvidia_build_models", + "status": "ok", + "type": "docs", + "url": "https://build.nvidia.com/models", + "version": null + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "refresh_source_evidence_then_5_record_smoke_only", + "reason": "Nemotron smoke/replay evidence blocks full replay, shadow, and canary.", + "stage": "blocked_existing_replay_evidence" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_as_offline_specialist_or_evaluator", + "rerun_only_5_record_smoke_after_a_specific_runtime_or_model_hypothesis", + "do_not_run_full_50_replay_until_smoke_gate_passes", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:agent_fabric_tool_model_evaluator" + ], + "registry_status": { + "current_decision": "all_contract_tuned_nemotron_smokes_blocked_before_full_replay", + "evaluation_priority": "must_test", + "latest_replay_summary": null, + "latest_smoke_gate": "docs/evaluations/agent_nemotron_contract_tuned_49b_v15_smoke_gate_2026-06-02.json", + "latest_smoke_matrix": "docs/evaluations/agent_nemotron_contract_tuned_smoke_matrix_2026-06-02.json", + "latest_smoke_model": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "next_variant_id": "nemo_nemotron_fabric_contract_tuned_v1", + "next_variant_stage": "blocked_before_full_replay_all_tested_smokes", + "required_stage": "offline_replay", + "role": "agent_fabric_tool_model_evaluator" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "5_record_smoke_gate_passes", + "latency_and_output_contract_blockers_resolved", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "claude_agent_sdk_remediator", + "decision": "do_not_integrate_refresh_replay_gate", + "display_name": "Claude Agent SDK Remediator", + "market_score": { + "beats_baseline_capability": true, + "gaps": [ + "evaluation_harness", + "local_private_deploy" + ], + "known": true, + "rank": 5, + "replay_priority": "p0_replay", + "risks": [ + "Best fit is code and DevOps remediation, not necessarily central incident arbitration.", + "API cost, subscription separation, and vendor boundary must be validated." + ], + "strengths": [ + "human_in_loop", + "tool_guardrails", + "mcp_tool_ecosystem", + "code_remediation_fit" + ], + "total_score": 0.7533 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "c0b71a85954bc91e38dbda8b", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "anthropic_api_docs", + "status": "ok", + "type": "docs", + "url": "https://platform.claude.com/docs/en/home", + "version": null + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "reason": "Candidate has an offline replay summary and must re-enter promotion gate after evidence refresh.", + "stage": "has_offline_replay_summary" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "rerun_same_contract_offline_replay_before_promotion_gate", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:devops_code_remediation_agent" + ], + "registry_status": { + "current_decision": "deterministic_offline_remediator_blocked_does_not_beat_openclaw", + "evaluation_priority": "must_test", + "latest_replay_summary": "docs/evaluations/agent_claude_remediator_replay_summary_2026-06-02.json", + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "offline_replay", + "role": "devops_code_remediation_agent" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": false, + "requires_dependency_approval": true + }, + "candidate_id": "crewai_flows_crews", + "decision": "do_not_integrate_prepare_no_cost_offline_adapter", + "display_name": "CrewAI Flows + Crews", + "market_score": { + "beats_baseline_capability": false, + "gaps": [ + "evaluation_harness", + "code_remediation_fit", + "awoooi_integration_fit" + ], + "known": true, + "rank": 9, + "replay_priority": "watch", + "risks": [ + "Better for rapid automation teams than high-risk production AIOps core.", + "Durability, strict audit, and permission boundary must be proven in replay." + ], + "strengths": [ + "local_private_deploy" + ], + "total_score": 0.6033 + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "c971a64519884f8e7f2589a2", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "crewai_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.crewai.com/", + "version": null + }, + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "a6181721375c7b51eef2d85a", + "error": null, + "http_status": 200, + "published_at": "2026-06-25T23:18:27.665972Z", + "source_id": "crewai_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/crewai/json", + "version": "1.15.0" + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "reason": "Candidate has no AWOOOI offline replay evidence yet.", + "stage": "not_yet_replayed" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "build_no_sdk_no_api_contract_adapter_first", + "request_cost_and_dependency_approval_before_official_sdk_or_paid_api_use", + "run_50_record_offline_replay_before_any_production_role", + "dependency_boundary_review_required", + "candidate_role_scope:rapid_agent_team_prototype" + ], + "registry_status": { + "current_decision": null, + "evaluation_priority": "secondary", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "offline_replay", + "role": "rapid_agent_team_prototype" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "offline_adapter_contract_valid", + "50_record_hidden_label_replay_beats_openclaw_baseline" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "thclaws_agent_harness", + "decision": "do_not_integrate_watch_only_primary_source_monitoring", + "display_name": "thClaws Agent Harness", + "market_score": { + "beats_baseline_capability": null, + "gaps": [], + "known": false, + "rank": null, + "replay_priority": "refresh_scorecard_required", + "risks": [ + "candidate missing from current market scorecard" + ], + "strengths": [], + "total_score": null + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "3f7b32d3c041cdac97f05515", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "thclaws_homepage", + "status": "ok", + "type": "docs", + "url": "https://thclaws.ai", + "version": null + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "manual_primary_source_review_then_watch_registry_baseline", + "reason": "Candidate is approved only for primary-source market monitoring, not replay or integration.", + "stage": "watch_only_primary_source_monitoring" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_in_watch_registry_only", + "do_not_build_replay_adapter_until_operator_promotes_candidate_priority", + "refresh_watch_baseline_after_primary_source_review", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:agent_framework_or_orchestrator_candidate" + ], + "registry_status": { + "current_decision": "discovery_classified_watch_only_no_replay_approved", + "evaluation_priority": "watch_only", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "watch_only_primary_source_monitoring", + "role": "agent_framework_or_orchestrator_candidate" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ] + }, + { + "approval_boundary": { + "approved_for_paid_api_calls": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + "candidate_id": "bernstein_agent_governance", + "decision": "do_not_integrate_watch_only_primary_source_monitoring", + "display_name": "Bernstein Agent Governance", + "market_score": { + "beats_baseline_capability": null, + "gaps": [], + "known": false, + "rank": null, + "replay_priority": "refresh_scorecard_required", + "risks": [ + "candidate missing from current market scorecard" + ], + "strengths": [], + "total_score": null + }, + "market_watch": { + "changed_sources": [ + { + "change_basis": "version_or_content_hash_changed", + "content_hash": "18f58a58b30119f0f01cbcf1", + "error": null, + "http_status": 200, + "published_at": null, + "source_id": "bernstein_docs", + "status": "ok", + "type": "docs", + "url": "https://bernstein.run", + "version": null + } + ], + "decision": "changed_requires_replay_readiness_review", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ] + }, + "readiness": { + "allowed_next_gate": "manual_primary_source_review_then_watch_registry_baseline", + "reason": "Candidate is approved only for primary-source market monitoring, not replay or integration.", + "stage": "watch_only_primary_source_monitoring" + }, + "recommendations": [ + "refresh_market_capability_evidence_from_changed_primary_sources", + "do_not_replace_openclaw_from_market_watch_signal", + "do_not_enter_shadow_or_canary_without_offline_replay_promotion_gate", + "keep_candidate_in_watch_registry_only", + "do_not_build_replay_adapter_until_operator_promotes_candidate_priority", + "refresh_watch_baseline_after_primary_source_review", + "cost_boundary_review_required", + "dependency_boundary_review_required", + "candidate_role_scope:agent_governance_policy_evaluator_candidate" + ], + "registry_status": { + "current_decision": "discovery_classified_watch_only_no_replay_approved", + "evaluation_priority": "watch_only", + "latest_replay_summary": null, + "latest_smoke_gate": null, + "latest_smoke_matrix": null, + "latest_smoke_model": null, + "next_variant_id": null, + "next_variant_stage": null, + "required_stage": "watch_only_primary_source_monitoring", + "role": "agent_governance_policy_evaluator_candidate" + }, + "unblock_conditions": [ + "changed_sources_reviewed_by_operator", + "market_scorecard_refreshed_if_primary_sources_changed_semantically", + "no_sdk_install_without_dependency_approval", + "no_paid_provider_use_without_cost_and_data_boundary_approval", + "operator_confirms_primary_sources", + "watch_registry_baseline_refreshed", + "explicit_priority_upgrade_before_replay", + "cost_approval_recorded" + ] + } + ], + "schema_version": "agent_market_integration_review_v1", + "summary": { + "blocked_from_integration": 5, + "production_changes_approved": 0, + "requires_cost_approval": 4, + "requires_dependency_approval": 5, + "reviewed_candidates": 5, + "shadow_or_canary_approved": 0, + "source_failures": 0 + } +} diff --git a/docs/evaluations/agent_market_watch_promotion_review_2026-06-26.json b/docs/evaluations/agent_market_watch_promotion_review_2026-06-26.json new file mode 100644 index 00000000..5f0ae936 --- /dev/null +++ b/docs/evaluations/agent_market_watch_promotion_review_2026-06-26.json @@ -0,0 +1,218 @@ +{ + "generated_at": "2026-06-26T03:41:56.349330+00:00", + "inputs": { + "candidate_registry_schema_version": "agent_replacement_candidates_v1", + "discovery_classification_generated_at": "2026-06-26T03:41:46.640326+00:00", + "integration_review_generated_at": "2026-06-26T03:41:38.289424+00:00", + "watch_report_generated_at": "2026-06-26T03:41:17.266599+00:00" + }, + "policy": { + "market_scorecard_update_approved": false, + "paid_api_calls_approved": false, + "priority_upgrade_approved": false, + "production_changes_approved": false, + "replacement_decision_allowed": false, + "replay_candidate_approved": false, + "sdk_installation_approved": false, + "shadow_or_canary_approved": false + }, + "reviews": [ + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "hermes_agent_personal_platform", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "NousResearch Hermes Agent", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "", + "latest_versions": [ + "v2026.6.19" + ], + "official_url": "https://hermes-agent.nousresearch.com", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "personal_agent_platform_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "microsoft_agent_governance_toolkit", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "Microsoft Agent Governance Toolkit", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "", + "latest_versions": [ + "v4.1.0" + ], + "official_url": "https://microsoft.github.io/agent-governance-toolkit/", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_governance_policy_evaluator_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "thclaws_agent_harness", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "thClaws Agent Harness", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "watch_only_primary_source_monitoring", + "latest_versions": [ + "v0.75.0" + ], + "official_url": "https://thclaws.ai", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "pydantic_deepagents", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "Pydantic DeepAgents", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "", + "latest_versions": [ + "0.3.31" + ], + "official_url": "https://vstorm-co.github.io/pydantic-deepagents/", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "integration_review_must_confirm_watch_only_stage", + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "agentos_framework", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "AgentOS Framework", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "", + "latest_versions": [ + "v0.9.79" + ], + "official_url": "https://agentos.sh", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_framework_or_orchestrator_candidate", + "source_count": 2, + "source_failures": 0 + }, + { + "approved_for_paid_api_calls": false, + "approved_for_replay": false, + "approved_for_sdk_install": false, + "approved_for_shadow_or_canary": false, + "blockers": [ + "discovery_classification_must_recommend_watch_addition" + ], + "candidate_id": "bernstein_agent_governance", + "classification": { + "classification": null, + "recommendation": null, + "repository_full_name": null, + "risk_flags": [], + "watch_addition_recommended": false + }, + "decision": "remain_watch_only_until_evidence_gap_resolved", + "display_name": "Bernstein Agent Governance", + "eligible_for_market_scorecard_prescreen": false, + "integration_stage": "watch_only_primary_source_monitoring", + "latest_versions": [ + "v2.7.0" + ], + "official_url": "https://bernstein.run", + "release_version_observed": true, + "required_next_gate": "continue_watch_only_until_primary_source_evidence_is_sufficient", + "role": "agent_governance_policy_evaluator_candidate", + "source_count": 2, + "source_failures": 0 + } + ], + "schema_version": "agent_market_watch_promotion_review_v1", + "summary": { + "eligible_for_market_scorecard_prescreen": 0, + "market_scorecard_updates_approved": 0, + "paid_api_calls_approved": 0, + "priority_upgrades_approved": 0, + "production_changes_approved": 0, + "remain_watch_only": 6, + "replay_candidates_approved": 0, + "sdk_installations_approved": 0, + "shadow_or_canary_approved": 0, + "watch_only_candidates_reviewed": 6 + } +} diff --git a/docs/evaluations/agent_market_watch_report_2026-06-26.json b/docs/evaluations/agent_market_watch_report_2026-06-26.json new file mode 100644 index 00000000..8fc6d514 --- /dev/null +++ b/docs/evaluations/agent_market_watch_report_2026-06-26.json @@ -0,0 +1,841 @@ +{ + "cadence": { + "monthly_integration_review": "經 operator review 後,提交 market watch、integration review 與 discovery intake 的 reviewed baseline。", + "trigger_on_major_version": true, + "weekly_market_watch": "每週一 09:00 Asia/Taipei 產生只讀市場觀測報告與整合 / 探索 review 摘要。" + }, + "candidates": [ + { + "candidate_id": "openai_agents_sdk_coordinator", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "OpenAI Agents SDK Coordinator", + "evaluation_priority": "must_test", + "recommended_actions": [ + "keep_current_integration_status" + ], + "recommended_role": "Coordinator / Orchestrator", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "043ec42b0cc899a72448614c", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "openai_agents_docs", + "status": "ok", + "type": "docs", + "url": "https://developers.openai.com/api/docs/guides/agents", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "da588498220486c388a51b10", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "openai_agents_tracing_docs", + "status": "ok", + "type": "docs", + "url": "https://openai.github.io/openai-agents-python/tracing/", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "fd249bb0065cb554f5e2ecdf", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "openai_agents_handoffs_docs", + "status": "ok", + "type": "docs", + "url": "https://openai.github.io/openai-agents-python/handoffs/", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "f3f2446f64e51e5a0dfa398a", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T05:15:31.741499Z", + "reference_version": null, + "source_id": "openai_agents_python_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/openai-agents/json", + "version": "0.17.7" + }, + { + "changed_since_reference": false, + "content_hash": "d13d7176d0b123dc1d6a7b08", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T04:02:12.610Z", + "reference_version": null, + "source_id": "openai_agents_typescript_npm", + "status": "ok", + "type": "npm", + "url": "https://registry.npmjs.org/@openai%2Fagents", + "version": "0.12.0" + } + ] + }, + { + "candidate_id": "langgraph_incident_kernel", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "LangGraph Incident Kernel", + "evaluation_priority": "must_test", + "recommended_actions": [ + "keep_current_integration_status" + ], + "recommended_role": "Durable Incident Workflow Kernel", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "c8100f72af1cb84426b57ac3", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "langgraph_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.langchain.com/oss/python/langgraph/overview", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "0700375668bf4a039be45c4c", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T20:58:20.335564Z", + "reference_version": null, + "source_id": "langgraph_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/langgraph/json", + "version": "1.2.6" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "ceb3d51c1e67fc6e2e9fda21", + "error": null, + "http_status": 403, + "published_at": "2026-06-18T20:58:32Z", + "reference_version": null, + "source_id": "langgraph_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/langchain-ai/langgraph/releases/latest", + "version": "1.2.6" + } + ] + }, + { + "candidate_id": "nemo_nemotron_fabric", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", + "evaluation_priority": "must_test", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Agent Fabric / Tool-Model Evaluator", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "da7400a5ae03d8de4dc4ef16", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nemo_agent_toolkit_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.nvidia.com/nemo/agent-toolkit/latest/index.html", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "0fc74e93f4cd06e57d2d4ccc", + "error": null, + "http_status": 200, + "published_at": "2026-06-17T00:25:55.043213Z", + "reference_version": null, + "source_id": "nvidia_nemo_agent_toolkit_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/nvidia-nat/json", + "version": "1.8.0" + }, + { + "changed_since_reference": false, + "content_hash": "06028073c740b559b76a4715", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nemotron_developer_page", + "status": "ok", + "type": "docs", + "url": "https://developer.nvidia.com/topics/ai/nemotron", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "caec0c63b700883cddc94e35", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nemotron_3_ultra_blog", + "status": "ok", + "type": "docs", + "url": "https://developer.nvidia.com/blog/nvidia-nemotron-3-ultra-powers-faster-more-efficient-reasoning-for-long-running-agents/", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "265fda17a34611b1533d8a28", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nim_llm_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.nvidia.com/nim/large-language-models/latest/index.html", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "3571e46b979eebf11e910d67", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_build_models", + "status": "ok", + "type": "docs", + "url": "https://build.nvidia.com/models", + "version": null + } + ] + }, + { + "candidate_id": "claude_agent_sdk_remediator", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Claude Agent SDK Remediator", + "evaluation_priority": "must_test", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "DevOps / Code Remediation Agent", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "5622132c0dc32c13c0f62568", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "claude_agent_sdk_docs", + "status": "ok", + "type": "docs", + "url": "https://code.claude.com/docs/en/agent-sdk/overview", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "c0b71a85954bc91e38dbda8b", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "anthropic_api_docs", + "status": "ok", + "type": "docs", + "url": "https://platform.claude.com/docs/en/home", + "version": null + } + ] + }, + { + "candidate_id": "google_adk_stack", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Google Agent Development Kit Stack", + "evaluation_priority": "can_test", + "recommended_actions": [ + "keep_current_integration_status" + ], + "recommended_role": "Google / Gemini Agent Stack", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "f8ebe9e670bf59fdb44d7133", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "google_adk_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.cloud.google.com/gemini-enterprise-agent-platform/build/adk", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "e2d0102cb37d90e01d9e4fc3", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T18:47:06.323661Z", + "reference_version": null, + "source_id": "google_adk_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/google-adk/json", + "version": "2.3.0" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "88aec475a8cfd83f8e67e35b", + "error": null, + "http_status": 403, + "published_at": "2026-06-18T18:45:04Z", + "reference_version": null, + "source_id": "google_adk_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/google/adk-python/releases/latest", + "version": "v2.3.0" + } + ] + }, + { + "candidate_id": "microsoft_agent_framework", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Microsoft Agent Framework", + "evaluation_priority": "can_test", + "recommended_actions": [ + "keep_current_integration_status" + ], + "recommended_role": "Enterprise Workflow Agent Stack", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "97e807de8517641d1c3d1a77", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "microsoft_agent_framework_docs", + "status": "ok", + "type": "docs", + "url": "https://learn.microsoft.com/en-us/agent-framework/overview/", + "version": null + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "268e86906524bb431c109f4d", + "error": null, + "http_status": 403, + "published_at": "2026-06-23T21:18:26Z", + "reference_version": null, + "source_id": "microsoft_agent_framework_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/microsoft/agent-framework/releases/latest", + "version": "dotnet-1.11.0" + } + ] + }, + { + "candidate_id": "crewai_flows_crews", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "CrewAI Flows + Crews", + "evaluation_priority": "secondary", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Rapid Agent Team Prototype", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "c971a64519884f8e7f2589a2", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "crewai_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.crewai.com/", + "version": null + }, + { + "changed_since_reference": true, + "content_hash": "a6181721375c7b51eef2d85a", + "error": null, + "http_status": 200, + "published_at": "2026-06-25T23:18:27.665972Z", + "reference_version": null, + "source_id": "crewai_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/crewai/json", + "version": "1.15.0" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "3d69194be72514c1da967727", + "error": null, + "http_status": 403, + "published_at": "2026-06-11T17:13:46Z", + "reference_version": null, + "source_id": "crewai_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/crewAIInc/crewAI/releases/latest", + "version": "1.14.7" + } + ] + }, + { + "candidate_id": "hermes_agent_personal_platform", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "NousResearch Hermes Agent", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "keep_current_integration_status" + ], + "recommended_role": "Personal Agent Platform / Memory-Skills Runtime", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "734797344d1b8e3645e4e77b", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "hermes_agent_homepage", + "status": "ok", + "type": "docs", + "url": "https://hermes-agent.nousresearch.com", + "version": null + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "344b6e6c36562678b39b8e36", + "error": null, + "http_status": 403, + "published_at": "2026-06-19T19:39:06Z", + "reference_version": null, + "source_id": "hermes_agent_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/NousResearch/hermes-agent/releases/latest", + "version": "v2026.6.19" + } + ] + }, + { + "candidate_id": "microsoft_agent_governance_toolkit", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Microsoft Agent Governance Toolkit", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "keep_current_integration_status" + ], + "recommended_role": "Agent Governance / Policy Runtime", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "836a36b5a6f878ecb638a4cb", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "microsoft_agent_governance_docs", + "status": "ok", + "type": "docs", + "url": "https://microsoft.github.io/agent-governance-toolkit/", + "version": null + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "d9808af6149df2fb7aaa9f48", + "error": null, + "http_status": 403, + "published_at": "2026-06-09T23:11:52Z", + "reference_version": null, + "source_id": "microsoft_agent_governance_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/microsoft/agent-governance-toolkit/releases/latest", + "version": "v4.1.0" + } + ] + }, + { + "candidate_id": "thclaws_agent_harness", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "thClaws Agent Harness", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Agent Harness / Multi-Provider Runtime", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "3f7b32d3c041cdac97f05515", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "thclaws_homepage", + "status": "ok", + "type": "docs", + "url": "https://thclaws.ai", + "version": null + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "2ae06b0827492015d0640732", + "error": null, + "http_status": 403, + "published_at": "2026-06-24T18:14:34Z", + "reference_version": null, + "source_id": "thclaws_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/thClaws/thClaws/releases/latest", + "version": "v0.75.0" + } + ] + }, + { + "candidate_id": "pydantic_deepagents", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Pydantic DeepAgents", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "keep_current_integration_status" + ], + "recommended_role": "Pydantic AI Deep Agent Framework", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "3a9c514e70d72dcb92b04f59", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "pydantic_deepagents_docs", + "status": "ok", + "type": "docs", + "url": "https://vstorm-co.github.io/pydantic-deepagents/", + "version": null + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "db272fa3711d70bc6f339c3e", + "error": null, + "http_status": 403, + "published_at": "2026-06-22T12:16:45Z", + "reference_version": null, + "source_id": "pydantic_deepagents_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/vstorm-co/pydantic-deepagents/releases/latest", + "version": "0.3.31" + } + ] + }, + { + "candidate_id": "agentos_framework", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "AgentOS Framework", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "keep_current_integration_status" + ], + "recommended_role": "TypeScript Agent Framework / Orchestrator", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "7a12ea07b33c22939d08b446", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "agentos_docs", + "status": "ok", + "type": "docs", + "url": "https://agentos.sh", + "version": null + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "ebe43479b619143f14f5f53d", + "error": null, + "http_status": 403, + "published_at": "2026-06-24T00:35:38Z", + "reference_version": null, + "source_id": "agentos_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/framerslab/agentos/releases/latest", + "version": "v0.9.79" + } + ] + }, + { + "candidate_id": "bernstein_agent_governance", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Bernstein Agent Governance", + "evaluation_priority": "watch_only", + "recommended_actions": [ + "refresh_market_capability_evidence", + "refresh_or_create_no_cost_adapter", + "run_offline_replay_before_shadow", + "do_not_promote_without_promotion_gate" + ], + "recommended_role": "Audit-Grade Agent Orchestration / Governance", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "18f58a58b30119f0f01cbcf1", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "bernstein_docs", + "status": "ok", + "type": "docs", + "url": "https://bernstein.run", + "version": null + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "1f1a5720f3ff80c1d627ffec", + "error": null, + "http_status": 403, + "published_at": "2026-05-24T15:53:42Z", + "reference_version": null, + "source_id": "bernstein_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/sipyourdrink-ltd/bernstein/releases/latest", + "version": "v2.7.0" + } + ] + } + ], + "failures": [], + "generated_at": "2026-06-26T03:41:17.266599+00:00", + "integration_queue": [ + { + "candidate_id": "nemo_nemotron_fabric", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "claude_agent_sdk_remediator", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "crewai_flows_crews", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": false, + "requires_dependency_approval": true + }, + { + "candidate_id": "thclaws_agent_harness", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + }, + { + "candidate_id": "bernstein_agent_governance", + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "refresh_market_scorecard_then_offline_replay", + "requires_cost_approval": true, + "requires_dependency_approval": true + } + ], + "mode": "live", + "new_candidate_discovery": [ + { + "error": null, + "http_status": 200, + "items": [ + { + "full_name": "xbtlin/ai-berkshire", + "html_url": "https://github.com/xbtlin/ai-berkshire", + "stargazers_count": 2172, + "updated_at": "2026-06-26T03:41:15Z" + }, + { + "full_name": "trycua/cua", + "html_url": "https://github.com/trycua/cua", + "stargazers_count": 19004, + "updated_at": "2026-06-26T03:40:04Z" + }, + { + "full_name": "CopilotKit/CopilotKit", + "html_url": "https://github.com/CopilotKit/CopilotKit", + "stargazers_count": 35522, + "updated_at": "2026-06-26T03:36:59Z" + }, + { + "full_name": "omnigent-ai/omnigent", + "html_url": "https://github.com/omnigent-ai/omnigent", + "stargazers_count": 4924, + "updated_at": "2026-06-26T03:37:54Z" + }, + { + "full_name": "vm0-ai/vm0", + "html_url": "https://github.com/vm0-ai/vm0", + "stargazers_count": 1131, + "updated_at": "2026-06-26T03:32:54Z" + } + ], + "source_id": "github_ai_agent_topic", + "status": "ok", + "type": "github_search", + "url": "https://api.github.com/search/repositories?q=topic:ai-agent+stars:%3E500&sort=updated&order=desc" + }, + { + "error": null, + "http_status": 200, + "items": [ + { + "full_name": "omnigent-ai/omnigent", + "html_url": "https://github.com/omnigent-ai/omnigent", + "stargazers_count": 4924, + "updated_at": "2026-06-26T03:37:54Z" + }, + { + "full_name": "ag2ai/ag2", + "html_url": "https://github.com/ag2ai/ag2", + "stargazers_count": 4710, + "updated_at": "2026-06-26T03:29:07Z" + }, + { + "full_name": "Human-Agent-Society/CORAL", + "html_url": "https://github.com/Human-Agent-Society/CORAL", + "stargazers_count": 754, + "updated_at": "2026-06-26T03:05:29Z" + }, + { + "full_name": "eigent-ai/eigent", + "html_url": "https://github.com/eigent-ai/eigent", + "stargazers_count": 14394, + "updated_at": "2026-06-26T03:38:22Z" + }, + { + "full_name": "trpc-group/trpc-agent-go", + "html_url": "https://github.com/trpc-group/trpc-agent-go", + "stargazers_count": 1408, + "updated_at": "2026-06-26T01:55:30Z" + } + ], + "source_id": "github_agent_framework_topic", + "status": "ok", + "type": "github_search", + "url": "https://api.github.com/search/repositories?q=topic:agent-framework+stars:%3E300&sort=updated&order=desc" + } + ], + "policy": { + "integration_requires_replay": true, + "new_dependency_requires_approval": true, + "official_or_primary_sources_only": true, + "paid_provider_requires_approval": true, + "raw_external_pages_committed": false, + "replacement_decision_allowed": false + }, + "registry": { + "path": "docs/ai/agent-market-watch-sources.v1.json", + "schema_version": "agent_market_watch_sources_v1", + "updated_at": "2026-06-26" + }, + "schema_version": "agent_market_watch_report_v1", + "summary": { + "candidate_count": 13, + "changed_candidates": 5, + "failure_count": 0, + "integration_queue_count": 5, + "source_count": 36, + "watch_only_candidates": 8 + } +} diff --git a/docs/evaluations/ai_technology_watch_report_2026-06-26.json b/docs/evaluations/ai_technology_watch_report_2026-06-26.json new file mode 100644 index 00000000..60aa4459 --- /dev/null +++ b/docs/evaluations/ai_technology_watch_report_2026-06-26.json @@ -0,0 +1,1297 @@ +{ + "cadence": { + "daily_triage": "每日彙整變更技術,依商業適用性、依賴風險、成本風險與資安風險分組。", + "monthly_strategy_review": "每月策略檢討,決定技術應納入 roadmap、維持 watch-only 或從監控清單移除。", + "near_real_time_watch": "每 6 小時執行一次只讀 primary-source 檢查,偵測主流 AI 技術版本、文件與 release 變更。", + "weekly_scorecard": "每週刷新技術 scorecard,判斷是否值得進入 sandbox、offline replay 或 adapter design。" + }, + "failures": [], + "generated_at": "2026-06-26T03:41:25.394846+00:00", + "mode": "live", + "new_technology_discovery": [ + { + "error": null, + "http_status": 200, + "items": [ + { + "full_name": "trycua/cua", + "html_url": "https://github.com/trycua/cua", + "stargazers_count": 19004, + "updated_at": "2026-06-26T03:40:04Z" + }, + { + "full_name": "CopilotKit/CopilotKit", + "html_url": "https://github.com/CopilotKit/CopilotKit", + "stargazers_count": 35522, + "updated_at": "2026-06-26T03:36:59Z" + }, + { + "full_name": "TencentCloud/TencentDB-Agent-Memory", + "html_url": "https://github.com/TencentCloud/TencentDB-Agent-Memory", + "stargazers_count": 6176, + "updated_at": "2026-06-26T03:32:53Z" + }, + { + "full_name": "iOfficeAI/AionUi", + "html_url": "https://github.com/iOfficeAI/AionUi", + "stargazers_count": 28874, + "updated_at": "2026-06-26T03:30:05Z" + }, + { + "full_name": "nocobase/nocobase", + "html_url": "https://github.com/nocobase/nocobase", + "stargazers_count": 23100, + "updated_at": "2026-06-26T03:26:16Z" + } + ], + "source_id": "github_ai_agent_discovery", + "status": "ok", + "type": "github_search", + "url": "https://api.github.com/search/repositories?q=topic:ai-agent+stars:%3E5000&sort=updated&order=desc" + }, + { + "error": null, + "http_status": 200, + "items": [ + { + "full_name": "xbtlin/ai-berkshire", + "html_url": "https://github.com/xbtlin/ai-berkshire", + "stargazers_count": 2172, + "updated_at": "2026-06-26T03:41:15Z" + }, + { + "full_name": "lobehub/lobehub", + "html_url": "https://github.com/lobehub/lobehub", + "stargazers_count": 79107, + "updated_at": "2026-06-26T03:40:14Z" + }, + { + "full_name": "thetahealth/mirobody", + "html_url": "https://github.com/thetahealth/mirobody", + "stargazers_count": 1048, + "updated_at": "2026-06-26T03:37:56Z" + }, + { + "full_name": "langgenius/dify", + "html_url": "https://github.com/langgenius/dify", + "stargazers_count": 146593, + "updated_at": "2026-06-26T03:36:49Z" + }, + { + "full_name": "diegosouzapw/OmniRoute", + "html_url": "https://github.com/diegosouzapw/OmniRoute", + "stargazers_count": 6915, + "updated_at": "2026-06-26T03:40:54Z" + } + ], + "source_id": "github_mcp_discovery", + "status": "ok", + "type": "github_search", + "url": "https://api.github.com/search/repositories?q=topic:mcp+stars:%3E1000&sort=updated&order=desc" + }, + { + "error": null, + "http_status": 200, + "items": [ + { + "full_name": "elizaOS/eliza", + "html_url": "https://github.com/elizaOS/eliza", + "stargazers_count": 18646, + "updated_at": "2026-06-26T03:39:29Z" + }, + { + "full_name": "infiniflow/ragflow", + "html_url": "https://github.com/infiniflow/ragflow", + "stargazers_count": 83643, + "updated_at": "2026-06-26T03:37:56Z" + }, + { + "full_name": "langgenius/dify", + "html_url": "https://github.com/langgenius/dify", + "stargazers_count": 146593, + "updated_at": "2026-06-26T03:36:49Z" + }, + { + "full_name": "ModelEngine-Group/nexent", + "html_url": "https://github.com/ModelEngine-Group/nexent", + "stargazers_count": 5312, + "updated_at": "2026-06-26T03:38:30Z" + }, + { + "full_name": "1Panel-dev/MaxKB", + "html_url": "https://github.com/1Panel-dev/MaxKB", + "stargazers_count": 21510, + "updated_at": "2026-06-26T03:30:57Z" + } + ], + "source_id": "github_rag_discovery", + "status": "ok", + "type": "github_search", + "url": "https://api.github.com/search/repositories?q=topic:rag+stars:%3E3000&sort=updated&order=desc" + } + ], + "policy": { + "host_write_approved": false, + "model_provider_switch_approved": false, + "paid_api_calls_approved": false, + "production_routing_approved": false, + "raw_chat_history_synced": false, + "read_only": true, + "sdk_installation_approved": false, + "telegram_send_approved": false, + "workflow_modification_approved": false + }, + "registry": { + "path": "docs/ai/ai-technology-watch-sources.v1.json", + "schema_version": "ai_technology_watch_sources_v1", + "updated_at": "2026-06-26" + }, + "review_queue": [ + { + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "scorecard_then_sandbox_or_replay_plan", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "technology_area": "agent_frameworks", + "technology_id": "crewai_flows" + }, + { + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "scorecard_then_sandbox_or_replay_plan", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "technology_area": "mcp_and_a2a", + "technology_id": "modelcontextprotocol_sdk" + }, + { + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "scorecard_then_sandbox_or_replay_plan", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "technology_area": "mcp_and_a2a", + "technology_id": "a2a_protocol" + }, + { + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "scorecard_then_sandbox_or_replay_plan", + "requires_cost_approval": true, + "requires_dependency_approval": false, + "requires_security_review": true, + "technology_area": "model_providers", + "technology_id": "anthropic_claude_platform" + }, + { + "reason": "primary_source_version_or_content_changed", + "required_next_gate": "scorecard_then_sandbox_or_replay_plan", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "requires_security_review": true, + "technology_area": "evaluation_and_observability", + "technology_id": "langfuse_observability" + } + ], + "schema_version": "ai_technology_watch_report_v1", + "summary": { + "changed_technologies": 5, + "high_priority_count": 15, + "review_queue_count": 5, + "source_count": 52, + "source_failure_count": 0, + "technology_area_count": 6, + "technology_count": 21, + "watch_only_technologies": 16 + }, + "technologies": [ + { + "awoooi_role": "協調者、handoff、tool tracing、guardrail 候選", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "OpenAI Agents SDK", + "evaluation_priority": "p0", + "integration_surface": "agent_handoff_tracing_guardrails", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": true, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "f3f2446f64e51e5a0dfa398a", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T05:15:31.741499Z", + "reference_version": "0.17.7", + "source_id": "openai_agents_python_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/openai-agents/json", + "version": "0.17.7" + }, + { + "changed_since_reference": false, + "content_hash": "d13d7176d0b123dc1d6a7b08", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T04:02:12.610Z", + "reference_version": "0.12.0", + "source_id": "openai_agents_typescript_npm", + "status": "ok", + "type": "npm", + "url": "https://registry.npmjs.org/%40openai%2Fagents", + "version": "0.12.0" + }, + { + "changed_since_reference": false, + "content_hash": "043ec42b0cc899a72448614c", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "openai_agents_docs", + "status": "ok", + "type": "docs", + "url": "https://developers.openai.com/api/docs/guides/agents", + "version": null + } + ], + "technology_area": "agent_frameworks", + "technology_id": "openai_agents_sdk" + }, + { + "awoooi_role": "NemoTron replay / evaluator / synthetic data gate", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "NVIDIA Nemotron + NeMo Agent Toolkit", + "evaluation_priority": "p0", + "integration_surface": "offline_replay_evaluator_smoke_gate", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": true, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "06028073c740b559b76a4715", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nemotron_developer_page", + "status": "ok", + "type": "docs", + "url": "https://developer.nvidia.com/topics/ai/nemotron", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "da7400a5ae03d8de4dc4ef16", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nemo_agent_toolkit_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.nvidia.com/nemo/agent-toolkit/latest/index.html", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "0fc74e93f4cd06e57d2d4ccc", + "error": null, + "http_status": 200, + "published_at": "2026-06-17T00:25:55.043213Z", + "reference_version": null, + "source_id": "nvidia_nemo_agent_toolkit_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/nvidia-nat/json", + "version": "1.8.0" + }, + { + "changed_since_reference": false, + "content_hash": "caec0c63b700883cddc94e35", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "nvidia_nemotron_3_ultra_blog", + "status": "ok", + "type": "docs", + "url": "https://developer.nvidia.com/blog/nvidia-nemotron-3-ultra-powers-faster-more-efficient-reasoning-for-long-running-agents/", + "version": null + } + ], + "technology_area": "agent_frameworks", + "technology_id": "nvidia_nemotron_nemo" + }, + { + "awoooi_role": "事件處理與可恢復 workflow kernel 候選", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "LangGraph", + "evaluation_priority": "p0", + "integration_surface": "durable_workflow_human_in_loop", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "0700375668bf4a039be45c4c", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T20:58:20.335564Z", + "reference_version": "1.2.6", + "source_id": "langgraph_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/langgraph/json", + "version": "1.2.6" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "ceb3d51c1e67fc6e2e9fda21", + "error": null, + "http_status": 403, + "published_at": "2026-06-18T20:58:32Z", + "reference_version": "1.2.6", + "source_id": "langgraph_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/langchain-ai/langgraph/releases/latest", + "version": "1.2.6" + }, + { + "changed_since_reference": false, + "content_hash": "c8100f72af1cb84426b57ac3", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "langgraph_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.langchain.com/oss/python/langgraph/overview", + "version": null + } + ], + "technology_area": "agent_frameworks", + "technology_id": "langgraph_runtime" + }, + { + "awoooi_role": "Gemini/Vertex agent stack watch-only 候選", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Google Agent Development Kit", + "evaluation_priority": "p1", + "integration_surface": "gemini_enterprise_agent_stack", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": true, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "e2d0102cb37d90e01d9e4fc3", + "error": null, + "http_status": 200, + "published_at": "2026-06-18T18:47:06.323661Z", + "reference_version": "2.3.0", + "source_id": "google_adk_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/google-adk/json", + "version": "2.3.0" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "88aec475a8cfd83f8e67e35b", + "error": null, + "http_status": 403, + "published_at": "2026-06-18T18:45:04Z", + "reference_version": "v2.3.0", + "source_id": "google_adk_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/google/adk-python/releases/latest", + "version": "v2.3.0" + }, + { + "changed_since_reference": false, + "content_hash": "f8ebe9e670bf59fdb44d7133", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "google_adk_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.cloud.google.com/gemini-enterprise-agent-platform/build/adk", + "version": null + } + ], + "technology_area": "agent_frameworks", + "technology_id": "google_adk_stack" + }, + { + "awoooi_role": "MCP/A2A enterprise workflow watch-only 候選", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Microsoft Agent Framework", + "evaluation_priority": "p1", + "integration_surface": "enterprise_mcp_a2a_workflow", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": true, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "268e86906524bb431c109f4d", + "error": null, + "http_status": 403, + "published_at": "2026-06-23T21:18:26Z", + "reference_version": "dotnet-1.11.0", + "source_id": "microsoft_agent_framework_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/microsoft/agent-framework/releases/latest", + "version": "dotnet-1.11.0" + }, + { + "changed_since_reference": false, + "content_hash": "97e807de8517641d1c3d1a77", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "microsoft_agent_framework_docs", + "status": "ok", + "type": "docs", + "url": "https://learn.microsoft.com/en-us/agent-framework/overview/", + "version": null + } + ], + "technology_area": "agent_frameworks", + "technology_id": "microsoft_agent_framework" + }, + { + "awoooi_role": "快速 prototype / non-production 評估候選", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "CrewAI Flows + Crews", + "evaluation_priority": "p2", + "integration_surface": "multi_agent_prototype", + "recommended_actions": [ + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "a6181721375c7b51eef2d85a", + "error": null, + "http_status": 200, + "published_at": "2026-06-25T23:18:27.665972Z", + "reference_version": "1.14.7", + "source_id": "crewai_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/crewai/json", + "version": "1.15.0" + }, + { + "changed_since_reference": true, + "content_hash": "c971a64519884f8e7f2589a2", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "crewai_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.crewai.com/", + "version": null + } + ], + "technology_area": "agent_frameworks", + "technology_id": "crewai_flows" + }, + { + "awoooi_role": "read-only tool registry / MCP adapter 候選", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Model Context Protocol SDK", + "evaluation_priority": "p0", + "integration_surface": "tool_registry_interoperability", + "recommended_actions": [ + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "ca0fb3725519e6517209f0f0", + "error": null, + "http_status": 200, + "published_at": "2026-03-30T16:50:42.718Z", + "reference_version": null, + "source_id": "mcp_typescript_sdk_npm", + "status": "ok", + "type": "npm", + "url": "https://registry.npmjs.org/%40modelcontextprotocol%2Fsdk", + "version": "1.29.0" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "910d982e612e4ea69f6dddb6", + "error": null, + "http_status": 403, + "published_at": "2026-03-30T16:49:30Z", + "reference_version": null, + "source_id": "mcp_typescript_sdk_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/modelcontextprotocol/typescript-sdk/releases/latest", + "version": "v1.29.0" + }, + { + "changed_since_reference": true, + "content_hash": "b9beab05256cdc514ed829b9", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "mcp_typescript_sdk_docs", + "status": "ok", + "type": "docs", + "url": "https://github.com/modelcontextprotocol/typescript-sdk", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "82ed729c20434d686fd7bdf5", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "mcp_specification_2025_06_18", + "status": "ok", + "type": "docs", + "url": "https://modelcontextprotocol.io/specification/2025-06-18", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "8a4c54a7f22814e3d66d19f8", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "mcp_roadmap", + "status": "ok", + "type": "docs", + "url": "https://modelcontextprotocol.io/development/roadmap", + "version": null + } + ], + "technology_area": "mcp_and_a2a", + "technology_id": "modelcontextprotocol_sdk" + }, + { + "awoooi_role": "跨 Agent 溝通協定 watch-only 候選", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Agent2Agent Protocol", + "evaluation_priority": "p1", + "integration_surface": "agent_to_agent_interop", + "recommended_actions": [ + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "fe0f870c8c568c6597f38079", + "error": null, + "http_status": 403, + "published_at": "2026-05-28T11:34:36Z", + "reference_version": null, + "source_id": "a2a_protocol_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/a2aproject/A2A/releases/latest", + "version": "v1.0.1" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "39cd1b8a6283966240e0a778", + "error": null, + "http_status": 403, + "published_at": "2026-05-29T09:34:03Z", + "reference_version": null, + "source_id": "a2a_python_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/a2aproject/a2a-python/releases/latest", + "version": "v1.1.0" + }, + { + "changed_since_reference": true, + "content_hash": "cf782dfff3a2ac78e34f3bf7", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "a2a_protocol_docs", + "status": "ok", + "type": "docs", + "url": "https://a2a-protocol.org/latest/", + "version": null + } + ], + "technology_area": "mcp_and_a2a", + "technology_id": "a2a_protocol" + }, + { + "awoooi_role": "Agent / LLM / MCP trace 欄位標準與日週月報可觀測基礎", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "OpenTelemetry GenAI Semantic Conventions", + "evaluation_priority": "p1", + "integration_surface": "agent_llm_trace_semantic_conventions", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "9898498d440cb0834857a588", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "otel_genai_attributes", + "status": "ok", + "type": "docs", + "url": "https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/", + "version": null + } + ], + "technology_area": "evaluation_and_observability", + "technology_id": "opentelemetry_genai_semconv" + }, + { + "awoooi_role": "模型能力、成本與 routing scorecard 來源", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "OpenAI Model Platform", + "evaluation_priority": "p0", + "integration_surface": "model_capability_cost_routing", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": true, + "requires_dependency_approval": false, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "a0e7044dcc1a3c9bdde9b3c4", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "openai_models_docs", + "status": "ok", + "type": "docs", + "url": "https://platform.openai.com/docs/models", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "3a739b3b7cff0338db14af8d", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T20:55:58.882276Z", + "reference_version": null, + "source_id": "openai_python_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/openai/json", + "version": "2.44.0" + }, + { + "changed_since_reference": false, + "content_hash": "0c062fa5557606da1df42726", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T20:36:37.856Z", + "reference_version": null, + "source_id": "openai_node_npm", + "status": "ok", + "type": "npm", + "url": "https://registry.npmjs.org/openai", + "version": "6.45.0" + } + ], + "technology_area": "model_providers", + "technology_id": "openai_model_platform" + }, + { + "awoooi_role": "Claude model / coding agent / remediation watch source", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Anthropic Claude Platform", + "evaluation_priority": "p0", + "integration_surface": "model_capability_cost_routing", + "recommended_actions": [ + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" + ], + "requires_cost_approval": true, + "requires_dependency_approval": false, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "c095436fb98455ac96b5cd7a", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "anthropic_models_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.anthropic.com/en/docs/about-claude/models/overview", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "22069549b95a4e6e9b3efcd2", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T18:50:15.985Z", + "reference_version": null, + "source_id": "anthropic_sdk_npm", + "status": "ok", + "type": "npm", + "url": "https://registry.npmjs.org/%40anthropic-ai%2Fsdk", + "version": "0.106.0" + }, + { + "changed_since_reference": false, + "content_hash": "5622132c0dc32c13c0f62568", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "claude_agent_sdk_docs", + "status": "ok", + "type": "docs", + "url": "https://code.claude.com/docs/en/agent-sdk/overview", + "version": null + } + ], + "technology_area": "model_providers", + "technology_id": "anthropic_claude_platform" + }, + { + "awoooi_role": "Gemini model capability / cost watch source", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Google Gemini Platform", + "evaluation_priority": "p1", + "integration_surface": "model_capability_cost_routing", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": true, + "requires_dependency_approval": false, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "60db43f0591d1789c13ba419", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "gemini_models_docs", + "status": "ok", + "type": "docs", + "url": "https://ai.google.dev/gemini-api/docs/models", + "version": null + }, + { + "changed_since_reference": false, + "content_hash": "2e5896a3f7ebb9c5e5d6b168", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T01:33:16.296219Z", + "reference_version": null, + "source_id": "google_genai_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/google-genai/json", + "version": "2.10.0" + } + ], + "technology_area": "model_providers", + "technology_id": "google_gemini_platform" + }, + { + "awoooi_role": "RAG ingestion / indexing / connector watch source", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "LlamaIndex", + "evaluation_priority": "p1", + "integration_surface": "rag_indexing_connectors", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "d23e23bd7fd9440b5b11f8b1", + "error": null, + "http_status": 200, + "published_at": "2026-06-24T19:35:52.833783Z", + "reference_version": null, + "source_id": "llama_index_core_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/llama-index-core/json", + "version": "0.14.23" + }, + { + "changed_since_reference": false, + "content_hash": "07a4311e0c56bef5eff90470", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "llama_index_docs", + "status": "ok", + "type": "docs", + "url": "https://developers.llamaindex.ai/python/framework/", + "version": null + } + ], + "technology_area": "rag_and_vector", + "technology_id": "llamaindex_rag" + }, + { + "awoooi_role": "LLM app integration connector watch source", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "LangChain", + "evaluation_priority": "p2", + "integration_surface": "llm_app_runtime_connectors", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "a48c3b206d034c7b0b90786e", + "error": null, + "http_status": 200, + "published_at": "2026-06-22T23:00:31.619946Z", + "reference_version": null, + "source_id": "langchain_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/langchain/json", + "version": "1.3.11" + }, + { + "changed_since_reference": false, + "content_hash": "57dbf4a9e69eafc6d490c181", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "langchain_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.langchain.com/", + "version": null + } + ], + "technology_area": "rag_and_vector", + "technology_id": "langchain_runtime" + }, + { + "awoooi_role": "現有 Postgres/pgvector 能力與版本 freshness 來源", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "pgvector", + "evaluation_priority": "p1", + "integration_surface": "postgres_vector_index", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "36549870c2aa94c481c5f376", + "error": null, + "http_status": 403, + "published_at": null, + "reference_version": null, + "source_id": "pgvector_github_tags", + "status": "carried_forward_rate_limited", + "type": "github_tags", + "url": "https://api.github.com/repos/pgvector/pgvector/tags", + "version": "v0.8.3" + }, + { + "changed_since_reference": false, + "content_hash": "61b9903cc26a5fc7111cbc99", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "pgvector_docs", + "status": "ok", + "type": "docs", + "url": "https://github.com/pgvector/pgvector", + "version": null + } + ], + "technology_area": "rag_and_vector", + "technology_id": "pgvector_vector_store" + }, + { + "awoooi_role": "專用 vector DB 候選,只能 sandbox 評估", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Qdrant", + "evaluation_priority": "p2", + "integration_surface": "dedicated_vector_database", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "fbb71fdce3f7457bd10eb978", + "error": null, + "http_status": 200, + "published_at": "2026-05-11T14:12:36.998394Z", + "reference_version": null, + "source_id": "qdrant_client_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/qdrant-client/json", + "version": "1.18.0" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "0f0682ba8a040a64057c03c1", + "error": null, + "http_status": 403, + "published_at": "2026-06-04T06:50:36Z", + "reference_version": null, + "source_id": "qdrant_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/qdrant/qdrant/releases/latest", + "version": "v1.18.2" + } + ], + "technology_area": "rag_and_vector", + "technology_id": "qdrant_vector_store" + }, + { + "awoooi_role": "本機 / sandbox vector store 候選", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "ChromaDB", + "evaluation_priority": "p3", + "integration_surface": "local_vector_database", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "06e0ac7ae5aa6f22125c0dcb", + "error": null, + "http_status": 200, + "published_at": "2026-05-05T05:54:48.906852Z", + "reference_version": null, + "source_id": "chromadb_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/chromadb/json", + "version": "1.5.9" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "dbd2cd85ad1f7c822a080e46", + "error": null, + "http_status": 403, + "published_at": "2026-05-05T05:55:40Z", + "reference_version": null, + "source_id": "chromadb_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/chroma-core/chroma/releases/latest", + "version": "1.5.9" + } + ], + "technology_area": "rag_and_vector", + "technology_id": "chromadb_vector_store" + }, + { + "awoooi_role": "RAG / LLM app evaluation metrics 候選", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Ragas", + "evaluation_priority": "p1", + "integration_surface": "rag_eval_metrics", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "8c0a0fea2e6b423ae05b5d40", + "error": null, + "http_status": 200, + "published_at": "2026-01-13T17:47:59.200116Z", + "reference_version": null, + "source_id": "ragas_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/ragas/json", + "version": "0.4.3" + }, + { + "changed_since_reference": false, + "content_hash": "adc0098fc150daf81a599005", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "ragas_docs", + "status": "ok", + "type": "docs", + "url": "https://docs.ragas.io/en/stable/", + "version": null + } + ], + "technology_area": "evaluation_and_observability", + "technology_id": "ragas_eval" + }, + { + "awoooi_role": "LLM trace / prompt / eval observability 候選", + "changed": true, + "decision": "changed_requires_replay_readiness_review", + "display_name": "Langfuse", + "evaluation_priority": "p1", + "integration_surface": "llm_observability_tracing", + "recommended_actions": [ + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" + ], + "requires_cost_approval": true, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": true, + "content_hash": "f5f21a93660cb58a9210111f", + "error": null, + "http_status": 200, + "published_at": "2026-06-25T11:57:10.497914Z", + "reference_version": null, + "source_id": "langfuse_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/langfuse/json", + "version": "4.12.0" + }, + { + "changed_since_reference": false, + "content_hash": "d90e1c8e226fa4ce2b45c22b", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "langfuse_docs", + "status": "ok", + "type": "docs", + "url": "https://langfuse.com/docs", + "version": null + } + ], + "technology_area": "evaluation_and_observability", + "technology_id": "langfuse_observability" + }, + { + "awoooi_role": "自託管模型 serving 能力與版本 freshness 來源", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "Hugging Face Text Generation Inference", + "evaluation_priority": "p2", + "integration_surface": "self_hosted_model_serving", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": true, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "d6ea0096e24f7e441961e35a", + "error": null, + "http_status": 403, + "published_at": "2025-12-19T14:35:25Z", + "reference_version": null, + "source_id": "tgi_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/huggingface/text-generation-inference/releases/latest", + "version": "v3.3.7" + }, + { + "changed_since_reference": false, + "content_hash": "c42a7871f12bcff3648aba61", + "error": null, + "http_status": 200, + "published_at": null, + "reference_version": null, + "source_id": "tgi_docs", + "status": "ok", + "type": "docs", + "url": "https://huggingface.co/docs/text-generation-inference/index", + "version": null + } + ], + "technology_area": "model_serving", + "technology_id": "huggingface_tgi" + }, + { + "awoooi_role": "自託管 LLM inference 候選,需 GPU/成本/安全 gate", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "vLLM", + "evaluation_priority": "p2", + "integration_surface": "self_hosted_llm_inference", + "recommended_actions": [ + "keep_watch_only_status" + ], + "requires_cost_approval": true, + "requires_dependency_approval": true, + "requires_security_review": true, + "sources": [ + { + "changed_since_reference": false, + "content_hash": "df159e475abea07f118301ee", + "error": null, + "http_status": 200, + "published_at": "2026-06-15T05:11:49.394364Z", + "reference_version": null, + "source_id": "vllm_pypi", + "status": "ok", + "type": "pypi", + "url": "https://pypi.org/pypi/vllm/json", + "version": "0.23.0" + }, + { + "carried_forward_from_previous": true, + "changed_since_reference": false, + "content_hash": "1fee977f40c7bef04a3bb2f9", + "error": null, + "http_status": 403, + "published_at": "2026-06-15T05:27:20Z", + "reference_version": null, + "source_id": "vllm_github_release", + "status": "carried_forward_rate_limited", + "type": "github_release", + "url": "https://api.github.com/repos/vllm-project/vllm/releases/latest", + "version": "v0.23.0" + } + ], + "technology_area": "model_serving", + "technology_id": "vllm_serving" + } + ], + "technology_area_counts": { + "agent_frameworks": 6, + "evaluation_and_observability": 3, + "mcp_and_a2a": 2, + "model_providers": 3, + "model_serving": 2, + "rag_and_vector": 5 + } +} diff --git a/docs/operations/AI-AGENT-MARKET-RADAR-READBACK-2026-06-26.md b/docs/operations/AI-AGENT-MARKET-RADAR-READBACK-2026-06-26.md new file mode 100644 index 00000000..1248553f --- /dev/null +++ b/docs/operations/AI-AGENT-MARKET-RADAR-READBACK-2026-06-26.md @@ -0,0 +1,71 @@ +# AI Agent 市場雷達與近期變更盤點 + +- 產生時間:`2026-06-26T03:43:01.458349+00:00` +- 整體治理完成度:`42.2%` +- 市場雷達完成度:`100.0%` +- 候選 Agent:`13` +- 官方 / 主要來源:`36` +- 來源失敗:`0` +- 需要重新審查候選:`5` +- 仍被整合 gate 擋下:`5` +- OpenClaw 取代批准:`0` + +## 近期變更盤點 + +| 優先級 | 工作線 | 狀態 | 進度 | 下一步 | +|---|---|---|---:|---| +| `P0` | Product Governance Owner Response Dashboard / handoff 收斂 | `read_model_ready_runtime_blocked` | `100%` | Owner questions 與 boundary acknowledgements 仍需逐項回覆。 | +| `P0` | Status Cleanup Dashboard read-only API 正式化 | `blocked_status_cleanup_apply_not_authorized` | `100%` | apply_allowed=false 前不得更新 project status 或 memory。 | +| `P0` | Wazuh / IwoooS 可視性邊界 | `blocked_waiting_manager_agent_registry_readback` | `35%` | 等待 manager agent registry readback 與 live route readback。 | +| `P0` | AI Agent market watch 最新 primary-source refresh | `market_refresh_done_integration_blocked` | `100%` | 更新 scorecard 並進入 offline replay gate,不得直接替換。 | +| `P1` | 日報 / 週報 / 月報數據化報告 | `report_contract_defined_runtime_delivery_blocked` | `65%` | 接 Agent 工作量、Telegram receipt 與 human-review queue。 | +| `P1` | 工具 / 套件 / 服務 / 主機版本新鮮度 | `read_only_inventory_defined_update_execution_blocked` | `55%` | 定期產生版本 freshness snapshot;中低風險可 auto proposal,高風險維持人工審核。 | + +## 市場主流做法對齊 + +| 做法 | AWOOOI 判定 | 下一步 | +|---|---|---| +| 多 Agent handoff / specialist delegation | `partially_modeled` | 將 OpenClaw / Hermes / NemoTron handoff 事件寫入可讀 timeline。 | +| Tracing / tool call / guardrail observability | `missing_unified_trace` | 建立 Agent run trace id,串接報告、Telegram receipt 與 replay outcome。 | +| Durable execution / persistence / human-in-the-loop | `needed_for_incident_loop` | 優先把 incident workflow kernel 設計成可暫停、恢復、審核與重放。 | +| MCP / A2A / enterprise multi-agent interoperability | `watch_and_design` | MCP server 先做 read-only tool registry,再開 write adapter。 | +| Evaluation / replay / profiling before integration | `strong_fit_for_nemotron` | NemoTron 維持 smoke / replay / evaluator,不直接接 production routing。 | +| Agent SDK as programmable code/ops remediator | `candidate_for_remediation_lane` | 只允許 no-write replay 與 patch proposal,禁止自動 merge / deploy。 | +| Enterprise-scale ADK with evaluation and observability | `candidate_for_google_stack_review` | 先納入 weekly watch,成本與資料邊界審核後才可 adapter。 | + +## Agent 專業角色安排 + +| Agent / 候選 | 建議角色 | Gate 狀態 | 下一步 | +|---|---|---|---| +| OpenClaw incumbent | 生產仲裁者 / production decision core | `production_baseline` | formal_replacement_adr_and_promotion_gate_required | +| NVIDIA NeMo Agent Toolkit + Nemotron Fabric | 離線 replay、模型能力評估、合約輸出 smoke gate | `integration_blocked` | refresh_source_evidence_then_5_record_smoke_only | +| NousResearch Hermes Agent | 知識記憶、證據草稿、長期技能庫候選 | `watch_only_blocked` | continue_watch_only_until_primary_source_evidence_is_sufficient | +| OpenAI Agents SDK Coordinator | Coordinator / handoff / tracing / guardrail 候選 | `registered_no_review` | continue_weekly_primary_source_market_watch | +| LangGraph Incident Kernel | durable incident workflow kernel 候選 | `registered_no_review` | continue_weekly_primary_source_market_watch | +| Claude Agent SDK Remediator | DevOps / code remediation patch proposal 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | +| Microsoft Agent Framework | MCP / A2A enterprise workflow 候選 | `registered_no_review` | continue_weekly_primary_source_market_watch | +| Google Agent Development Kit Stack | Gemini / Vertex agent stack 候選 | `registered_no_review` | continue_weekly_primary_source_market_watch | +| CrewAI Flows + Crews | 快速多 Agent prototype 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | + +## 優先工作清單 + +| 順序 | 工作 | 風險 | 自動化模式 | 完成定義 | +|---:|---|---|---|---| +| 1 | 固定每週 AI Agent market watch 並產生治理 snapshot | `low` | `agent_auto_read_only` | 每週一 09:00 Asia/Taipei 有 watch / integration / discovery / promotion / governance 五份 artifacts。 | +| 2 | 刷新 market capability scorecard | `medium` | `agent_propose_owner_review` | OpenAI / LangGraph / NeMo-Nemotron / Claude / Microsoft / Google / CrewAI 均有新版官方來源與分數差異。 | +| 3 | 建立 50 筆歷史 incident offline replay queue | `medium` | `agent_auto_prepare_human_approve_run` | replay fixture 不含 secret,候選結果可與 OpenClaw baseline 比較。 | +| 4 | Agent 溝通 / 學習 / 成長可視化 readback | `medium` | `agent_auto_read_model` | 每個 Agent 的 handoff、decision、learning writeback、review score 與 blocked action 可被前端和報告讀到。 | +| 5 | Telegram Bot 報告與高風險審核橋接 | `high` | `human_approve_before_send_or_action` | 低中風險只告警回報,高風險需要 Telegram approval token / owner response 才能執行。 | +| 6 | 工具、套件、服務、主機版本自動 freshness 盤點 | `medium` | `agent_auto_scan_agent_propose` | 套件、服務、主機、MCP、AI provider、模型版本都有 stale / upgrade / rollback / approval gate。 | + +## 禁止越界 + +- `replacement_decisions_approved=0` +- `replay_candidates_approved=0` +- `sdk_installations_approved=0` +- `paid_api_calls_approved=0` +- `shadow_or_canary_approved=0` +- `production_routing_approved=false` +- `status_cleanup_apply_allowed=false` +- `memory_write_authorized=false` +- `telegram_send_approved=false` diff --git a/docs/operations/AI-AGENT-MARKET-RADAR-READBACK.md b/docs/operations/AI-AGENT-MARKET-RADAR-READBACK.md index b7a0734a..1248553f 100644 --- a/docs/operations/AI-AGENT-MARKET-RADAR-READBACK.md +++ b/docs/operations/AI-AGENT-MARKET-RADAR-READBACK.md @@ -1,13 +1,13 @@ # AI Agent 市場雷達與近期變更盤點 -- 產生時間:`2026-06-25T02:52:39.092430+00:00` +- 產生時間:`2026-06-26T03:43:01.458349+00:00` - 整體治理完成度:`42.2%` - 市場雷達完成度:`100.0%` - 候選 Agent:`13` -- 官方 / 主要來源:`34` +- 官方 / 主要來源:`36` - 來源失敗:`0` -- 需要重新審查候選:`13` -- 仍被整合 gate 擋下:`13` +- 需要重新審查候選:`5` +- 仍被整合 gate 擋下:`5` - OpenClaw 取代批准:`0` ## 近期變更盤點 @@ -17,7 +17,7 @@ | `P0` | Product Governance Owner Response Dashboard / handoff 收斂 | `read_model_ready_runtime_blocked` | `100%` | Owner questions 與 boundary acknowledgements 仍需逐項回覆。 | | `P0` | Status Cleanup Dashboard read-only API 正式化 | `blocked_status_cleanup_apply_not_authorized` | `100%` | apply_allowed=false 前不得更新 project status 或 memory。 | | `P0` | Wazuh / IwoooS 可視性邊界 | `blocked_waiting_manager_agent_registry_readback` | `35%` | 等待 manager agent registry readback 與 live route readback。 | -| `P0` | AI Agent market watch 2026-06-25 | `market_refresh_done_integration_blocked` | `100%` | 更新 scorecard 並進入 offline replay gate,不得直接替換。 | +| `P0` | AI Agent market watch 最新 primary-source refresh | `market_refresh_done_integration_blocked` | `100%` | 更新 scorecard 並進入 offline replay gate,不得直接替換。 | | `P1` | 日報 / 週報 / 月報數據化報告 | `report_contract_defined_runtime_delivery_blocked` | `65%` | 接 Agent 工作量、Telegram receipt 與 human-review queue。 | | `P1` | 工具 / 套件 / 服務 / 主機版本新鮮度 | `read_only_inventory_defined_update_execution_blocked` | `55%` | 定期產生版本 freshness snapshot;中低風險可 auto proposal,高風險維持人工審核。 | @@ -40,11 +40,11 @@ | OpenClaw incumbent | 生產仲裁者 / production decision core | `production_baseline` | formal_replacement_adr_and_promotion_gate_required | | NVIDIA NeMo Agent Toolkit + Nemotron Fabric | 離線 replay、模型能力評估、合約輸出 smoke gate | `integration_blocked` | refresh_source_evidence_then_5_record_smoke_only | | NousResearch Hermes Agent | 知識記憶、證據草稿、長期技能庫候選 | `watch_only_blocked` | continue_watch_only_until_primary_source_evidence_is_sufficient | -| OpenAI Agents SDK Coordinator | Coordinator / handoff / tracing / guardrail 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | -| LangGraph Incident Kernel | durable incident workflow kernel 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | +| OpenAI Agents SDK Coordinator | Coordinator / handoff / tracing / guardrail 候選 | `registered_no_review` | continue_weekly_primary_source_market_watch | +| LangGraph Incident Kernel | durable incident workflow kernel 候選 | `registered_no_review` | continue_weekly_primary_source_market_watch | | Claude Agent SDK Remediator | DevOps / code remediation patch proposal 候選 | `integration_blocked` | refresh_scorecard_then_offline_replay_or_promotion_gate | -| Microsoft Agent Framework | MCP / A2A enterprise workflow 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | -| Google Agent Development Kit Stack | Gemini / Vertex agent stack 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | +| Microsoft Agent Framework | MCP / A2A enterprise workflow 候選 | `registered_no_review` | continue_weekly_primary_source_market_watch | +| Google Agent Development Kit Stack | Gemini / Vertex agent stack 候選 | `registered_no_review` | continue_weekly_primary_source_market_watch | | CrewAI Flows + Crews | 快速多 Agent prototype 候選 | `integration_blocked` | create_no_sdk_no_api_adapter_then_offline_replay | ## 優先工作清單 @@ -52,7 +52,7 @@ | 順序 | 工作 | 風險 | 自動化模式 | 完成定義 | |---:|---|---|---|---| | 1 | 固定每週 AI Agent market watch 並產生治理 snapshot | `low` | `agent_auto_read_only` | 每週一 09:00 Asia/Taipei 有 watch / integration / discovery / promotion / governance 五份 artifacts。 | -| 2 | 刷新 2026-06-25 market capability scorecard | `medium` | `agent_propose_owner_review` | OpenAI / LangGraph / NeMo-Nemotron / Claude / Microsoft / Google / CrewAI 均有新版官方來源與分數差異。 | +| 2 | 刷新 market capability scorecard | `medium` | `agent_propose_owner_review` | OpenAI / LangGraph / NeMo-Nemotron / Claude / Microsoft / Google / CrewAI 均有新版官方來源與分數差異。 | | 3 | 建立 50 筆歷史 incident offline replay queue | `medium` | `agent_auto_prepare_human_approve_run` | replay fixture 不含 secret,候選結果可與 OpenClaw baseline 比較。 | | 4 | Agent 溝通 / 學習 / 成長可視化 readback | `medium` | `agent_auto_read_model` | 每個 Agent 的 handoff、decision、learning writeback、review score 與 blocked action 可被前端和報告讀到。 | | 5 | Telegram Bot 報告與高風險審核橋接 | `high` | `human_approve_before_send_or_action` | 低中風險只告警回報,高風險需要 Telegram approval token / owner response 才能執行。 | diff --git a/docs/operations/AI-TECHNOLOGY-RADAR-READBACK-2026-06-26.md b/docs/operations/AI-TECHNOLOGY-RADAR-READBACK-2026-06-26.md new file mode 100644 index 00000000..46bb5ce6 --- /dev/null +++ b/docs/operations/AI-TECHNOLOGY-RADAR-READBACK-2026-06-26.md @@ -0,0 +1,85 @@ +# AI 技術雷達與滾動更新讀回 + +- 產生時間:`2026-06-26T03:43:13.171222+00:00` +- 整體治理完成度:`42.2%` +- AI 技術雷達來源成功率:`100.0%` +- 監控技術項目:`21` +- 技術領域:`6` +- 官方 / primary sources:`52` +- 來源失敗:`0` +- 需要審核變更:`5` +- 高優先級項目:`15` +- 滾動更新狀態:`near_real_time_watch_ready_integration_gated` + +## 技術領域覆蓋 + +| 技術領域 | 技術數 | 高優先級 | 需要審核 | 代表技術 | +|---|---:|---:|---:|---| +| `agent_frameworks` | `6` | `5` | `1` | OpenAI Agents SDK, NVIDIA Nemotron + NeMo Agent Toolkit, LangGraph, Google Agent Development Kit | +| `evaluation_and_observability` | `3` | `3` | `1` | OpenTelemetry GenAI Semantic Conventions, Ragas, Langfuse | +| `mcp_and_a2a` | `2` | `2` | `2` | Model Context Protocol SDK, Agent2Agent Protocol | +| `model_providers` | `3` | `3` | `1` | OpenAI Model Platform, Anthropic Claude Platform, Google Gemini Platform | +| `model_serving` | `2` | `0` | `0` | Hugging Face Text Generation Inference, vLLM | +| `rag_and_vector` | `5` | `2` | `0` | LlamaIndex, LangChain, pgvector, Qdrant | + +## 高優先級審核佇列 + +| 技術 | 領域 | 優先級 | Gate | 下一步 | +|---|---|---|---|---| +| Model Context Protocol SDK | `mcp_and_a2a` | `p0` | `scorecard_required_before_integration` | 刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。 | +| Agent2Agent Protocol | `mcp_and_a2a` | `p1` | `scorecard_required_before_integration` | 刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。 | +| Anthropic Claude Platform | `model_providers` | `p0` | `scorecard_required_before_integration` | 刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。 | +| Langfuse | `evaluation_and_observability` | `p1` | `scorecard_required_before_integration` | 刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。 | + +## Agent 專業分工 + +| Agent | 專業角色 | 自動化範圍 | 需要審核的邊界 | +|---|---|---|---| +| OpenClaw | 生產決策仲裁者、風險分級與最後 policy guard | 維持現有 production baseline、讀取 replay / shadow 評分、拒絕無證據替換 | 任何取代、降級、生產路由切換都必須通過 replay / shadow / canary 與人工批准。 | +| NemoTron | 離線回放評估者、模型能力比較、合約輸出 smoke gate | 只讀 request pack、比對候選輸出、產生 replay scorecard 草稿 | 不得自行呼叫外部 NIM/API、不得讀 labels 作答、不得進生產路由。 | +| Hermes | 知識管理、RAG 整理、報告草稿與長期技能庫維護 | 整理 primary source 摘要、建立 no-send 日週月報、準備人審包 | 不得同步 raw chat history、不得保存 secret、不得直接發 Telegram live report。 | +| MarketRadar | AI 技術市場雷達、版本監控、來源失敗偵測 | 每 6 小時只讀 primary sources、產生 freshness / review queue | 不得自動新增 SDK、不得自動修改 provider route 或 workflow 行為。 | +| Critic / Reviewer | 獨立審核、反例檢查、整合風險評分 | 檢查政策旗標、來源可靠性、成本與資安風險 | 只能輸出 blocked / candidate / owner_review,不得直接執行寫入。 | + +## 滾動更新控制 + +| 節奏 | Agent 可自動做什麼 | 輸出 | Gate | +|---|---|---|---| +| 每 6 小時 | 讀取官方文件、PyPI、npm、GitHub release、primary source hash。 | AI 技術 watch report、來源失敗清單、review queue。 | `read_only_only` | +| 每日 | 依 business applicability、成本、依賴、資安、AWOOOI fit 分類。 | 日報摘要與中低風險自動處理建議。 | `no_send_report_until_delivery_gate` | +| 每週 | 刷新 scorecard,決定 sandbox / replay / adapter design 優先級。 | 週報、優先序、候選整合審查包。 | `scorecard_required_before_replay` | +| 每月 | 彙整趨勢,提出 roadmap / watch-only / retire 建議。 | 月報與策略審核包。 | `human_review_for_strategy_or_production_change` | + +## 主流實務來源證據 + +| 實務 | 官方來源 | AWOOOI Gate | Agent 分工 | +|---|---|---|---| +| OpenAI Agents SDK:專家協作、tool execution、approvals、state 由產品掌控 | https://developers.openai.com/api/docs/guides/agents | `sandbox_orchestration_no_write` | OpenClaw 負責 policy guard;MarketRadar 追版本;Hermes 產審核包。 | +| NVIDIA Nemotron 3 Ultra / NeMo:長任務 Agent、profiling、evaluation、MCP / A2A 互通 | https://developer.nvidia.com/blog/nvidia-nemotron-3-ultra-powers-faster-more-efficient-reasoning-for-long-running-agents/ | `nemotron_replay_evaluator_only` | NemoTron 只做離線 replay / evaluator / smoke gate,不接 production routing。 | +| LangGraph:durable execution、human-in-the-loop、stateful workflow runtime | https://docs.langchain.com/oss/python/langgraph/overview | `incident_workflow_kernel_replay_first` | OpenClaw 仲裁狀態轉移;Hermes 記錄 replay 證據與交接原因。 | +| MCP:標準化 agent-to-tool / resource / prompt 連接,且需明確 user consent | https://modelcontextprotocol.io/specification/2025-06-18 | `read_only_tool_registry_before_write_adapter` | MarketRadar 監控 SDK / spec;Critic 檢查資料權限與 tool safety。 | +| A2A:跨框架 Agent 溝通、委派與互通;MCP 處理工具、A2A 處理 Agent 對 Agent | https://a2a-protocol.org/latest/ | `agent_to_agent_interop_watch_only` | OpenClaw 設定協作邊界;Hermes 彙整 handoff 記錄;NemoTron 比對輸出。 | +| OpenTelemetry GenAI:Agent / LLM / MCP trace 語意慣例,支援可觀測與稽核 | https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/ | `trace_semconv_mapping_before_runtime_export` | Critic 定義稽核欄位;MarketRadar 追語意規範版本;Hermes 產日週月報。 | + +## 優先工作清單 + +| 順序 | 工作 | 優先級 | 自動化模式 | 完成定義 | +|---:|---|---|---|---| +| 1 | AI 技術雷達 primary source 監控產品化 | `P0` | `agent_auto_read_only` | API、snapshot、Markdown、schema、測試與 production readback 都能顯示技術領域、來源與 Gate。 | +| 2 | 近即時版本 / release / docs 變更偵測 | `P0` | `agent_auto_schedule_read_only` | 每 6 小時可跑 watch;失敗來源會進日報,不會自動整合。 | +| 3 | OpenClaw / Hermes / NemoTron / MarketRadar 專業分工與成長紀錄 | `P0` | `agent_auto_read_model_human_review_for_write` | 每個 Agent 的角色、輸出、學習寫回與限制都能被前端讀回。 | +| 4 | AI 技術 scorecard 與 sandbox / replay 優先級 | `P1` | `agent_propose_owner_review` | 高優先級變更先進 scorecard,再進 no-cost/no-write sandbox 或 replay 計畫。 | +| 5 | Telegram Bot 報告與高風險審核橋接 | `P1` | `blocked_until_telegram_send_gate` | 低中風險只告警回報;高風險需 owner approval 後才可發送或執行。 | +| 6 | 新 AI 技術探索與 watchlist 擴充 | `P2` | `agent_auto_discover_human_classify` | GitHub topic / package registry / 官方 blog 可提出候選,但加入正式 watchlist 前需審核。 | + +## 仍被 Gate 擋下 + +- `sdk_installation_approved=false` +- `paid_api_calls_approved=false` +- `production_routing_approved=false` +- `telegram_send_approved=false` +- `model_provider_switch_approved=false` +- `host_write_approved=false` +- `openclaw_replacement_approved=false` +- `replay_shadow_canary_gate_required=true` +- `cost_and_data_boundary_review_required=true` diff --git a/docs/operations/AI-TECHNOLOGY-RADAR-READBACK.md b/docs/operations/AI-TECHNOLOGY-RADAR-READBACK.md index f81e6ac7..46bb5ce6 100644 --- a/docs/operations/AI-TECHNOLOGY-RADAR-READBACK.md +++ b/docs/operations/AI-TECHNOLOGY-RADAR-READBACK.md @@ -1,24 +1,24 @@ # AI 技術雷達與滾動更新讀回 -- 產生時間:`2026-06-25T03:56:51.751955+00:00` +- 產生時間:`2026-06-26T03:43:13.171222+00:00` - 整體治理完成度:`42.2%` - AI 技術雷達來源成功率:`100.0%` -- 監控技術項目:`20` +- 監控技術項目:`21` - 技術領域:`6` -- 官方 / primary sources:`47` +- 官方 / primary sources:`52` - 來源失敗:`0` -- 需要審核變更:`0` -- 高優先級項目:`14` +- 需要審核變更:`5` +- 高優先級項目:`15` - 滾動更新狀態:`near_real_time_watch_ready_integration_gated` ## 技術領域覆蓋 | 技術領域 | 技術數 | 高優先級 | 需要審核 | 代表技術 | |---|---:|---:|---:|---| -| `agent_frameworks` | `6` | `5` | `0` | OpenAI Agents SDK, NVIDIA Nemotron + NeMo Agent Toolkit, LangGraph, Google Agent Development Kit | -| `evaluation_and_observability` | `2` | `2` | `0` | Ragas, Langfuse | -| `mcp_and_a2a` | `2` | `2` | `0` | Model Context Protocol SDK, Agent2Agent Protocol | -| `model_providers` | `3` | `3` | `0` | OpenAI Model Platform, Anthropic Claude Platform, Google Gemini Platform | +| `agent_frameworks` | `6` | `5` | `1` | OpenAI Agents SDK, NVIDIA Nemotron + NeMo Agent Toolkit, LangGraph, Google Agent Development Kit | +| `evaluation_and_observability` | `3` | `3` | `1` | OpenTelemetry GenAI Semantic Conventions, Ragas, Langfuse | +| `mcp_and_a2a` | `2` | `2` | `2` | Model Context Protocol SDK, Agent2Agent Protocol | +| `model_providers` | `3` | `3` | `1` | OpenAI Model Platform, Anthropic Claude Platform, Google Gemini Platform | | `model_serving` | `2` | `0` | `0` | Hugging Face Text Generation Inference, vLLM | | `rag_and_vector` | `5` | `2` | `0` | LlamaIndex, LangChain, pgvector, Qdrant | @@ -26,6 +26,10 @@ | 技術 | 領域 | 優先級 | Gate | 下一步 | |---|---|---|---|---| +| Model Context Protocol SDK | `mcp_and_a2a` | `p0` | `scorecard_required_before_integration` | 刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。 | +| Agent2Agent Protocol | `mcp_and_a2a` | `p1` | `scorecard_required_before_integration` | 刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。 | +| Anthropic Claude Platform | `model_providers` | `p0` | `scorecard_required_before_integration` | 刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。 | +| Langfuse | `evaluation_and_observability` | `p1` | `scorecard_required_before_integration` | 刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。 | ## Agent 專業分工 @@ -46,6 +50,17 @@ | 每週 | 刷新 scorecard,決定 sandbox / replay / adapter design 優先級。 | 週報、優先序、候選整合審查包。 | `scorecard_required_before_replay` | | 每月 | 彙整趨勢,提出 roadmap / watch-only / retire 建議。 | 月報與策略審核包。 | `human_review_for_strategy_or_production_change` | +## 主流實務來源證據 + +| 實務 | 官方來源 | AWOOOI Gate | Agent 分工 | +|---|---|---|---| +| OpenAI Agents SDK:專家協作、tool execution、approvals、state 由產品掌控 | https://developers.openai.com/api/docs/guides/agents | `sandbox_orchestration_no_write` | OpenClaw 負責 policy guard;MarketRadar 追版本;Hermes 產審核包。 | +| NVIDIA Nemotron 3 Ultra / NeMo:長任務 Agent、profiling、evaluation、MCP / A2A 互通 | https://developer.nvidia.com/blog/nvidia-nemotron-3-ultra-powers-faster-more-efficient-reasoning-for-long-running-agents/ | `nemotron_replay_evaluator_only` | NemoTron 只做離線 replay / evaluator / smoke gate,不接 production routing。 | +| LangGraph:durable execution、human-in-the-loop、stateful workflow runtime | https://docs.langchain.com/oss/python/langgraph/overview | `incident_workflow_kernel_replay_first` | OpenClaw 仲裁狀態轉移;Hermes 記錄 replay 證據與交接原因。 | +| MCP:標準化 agent-to-tool / resource / prompt 連接,且需明確 user consent | https://modelcontextprotocol.io/specification/2025-06-18 | `read_only_tool_registry_before_write_adapter` | MarketRadar 監控 SDK / spec;Critic 檢查資料權限與 tool safety。 | +| A2A:跨框架 Agent 溝通、委派與互通;MCP 處理工具、A2A 處理 Agent 對 Agent | https://a2a-protocol.org/latest/ | `agent_to_agent_interop_watch_only` | OpenClaw 設定協作邊界;Hermes 彙整 handoff 記錄;NemoTron 比對輸出。 | +| OpenTelemetry GenAI:Agent / LLM / MCP trace 語意慣例,支援可觀測與稽核 | https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/ | `trace_semconv_mapping_before_runtime_export` | Critic 定義稽核欄位;MarketRadar 追語意規範版本;Hermes 產日週月報。 | + ## 優先工作清單 | 順序 | 工作 | 優先級 | 自動化模式 | 完成定義 | diff --git a/docs/operations/ai-agent-market-radar-readback.snapshot.json b/docs/operations/ai-agent-market-radar-readback.snapshot.json index c3a18212..2ccbd793 100644 --- a/docs/operations/ai-agent-market-radar-readback.snapshot.json +++ b/docs/operations/ai-agent-market-radar-readback.snapshot.json @@ -35,15 +35,15 @@ { "candidate_id": "openai_agents_sdk_coordinator", "display_name": "OpenAI Agents SDK Coordinator", - "gate_status": "integration_blocked", - "next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "gate_status": "registered_no_review", + "next_gate": "continue_weekly_primary_source_market_watch", "recommended_role": "Coordinator / handoff / tracing / guardrail 候選" }, { "candidate_id": "langgraph_incident_kernel", "display_name": "LangGraph Incident Kernel", - "gate_status": "integration_blocked", - "next_gate": "refresh_scorecard_then_offline_replay_or_promotion_gate", + "gate_status": "registered_no_review", + "next_gate": "continue_weekly_primary_source_market_watch", "recommended_role": "durable incident workflow kernel 候選" }, { @@ -56,15 +56,15 @@ { "candidate_id": "microsoft_agent_framework", "display_name": "Microsoft Agent Framework", - "gate_status": "integration_blocked", - "next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "gate_status": "registered_no_review", + "next_gate": "continue_weekly_primary_source_market_watch", "recommended_role": "MCP / A2A enterprise workflow 候選" }, { "candidate_id": "google_adk_stack", "display_name": "Google Agent Development Kit Stack", - "gate_status": "integration_blocked", - "next_gate": "create_no_sdk_no_api_adapter_then_offline_replay", + "gate_status": "registered_no_review", + "next_gate": "continue_weekly_primary_source_market_watch", "recommended_role": "Gemini / Vertex agent stack 候選" }, { @@ -75,7 +75,7 @@ "recommended_role": "快速多 Agent prototype 候選" } ], - "generated_at": "2026-06-25T02:52:39.092430+00:00", + "generated_at": "2026-06-26T03:43:01.458349+00:00", "market_practice_alignment": [ { "awoooi_status": "partially_modeled", @@ -123,12 +123,12 @@ "market_source_freshness": [ { "candidate_id": "openai_agents_sdk_coordinator", - "changed": true, - "decision": "changed_requires_replay_readiness_review", + "changed": false, + "decision": "watch_only_no_change", "display_name": "OpenAI Agents SDK Coordinator", "versions": [ { - "changed": true, + "changed": false, "published_at": null, "source_id": "openai_agents_docs", "status": "ok", @@ -149,14 +149,14 @@ "version": null }, { - "changed": true, + "changed": false, "published_at": "2026-06-24T05:15:31.741499Z", "source_id": "openai_agents_python_pypi", "status": "ok", "version": "0.17.7" }, { - "changed": true, + "changed": false, "published_at": "2026-06-24T04:02:12.610Z", "source_id": "openai_agents_typescript_npm", "status": "ok", @@ -166,29 +166,29 @@ }, { "candidate_id": "langgraph_incident_kernel", - "changed": true, - "decision": "changed_requires_replay_readiness_review", + "changed": false, + "decision": "watch_only_no_change", "display_name": "LangGraph Incident Kernel", "versions": [ { - "changed": true, + "changed": false, "published_at": null, "source_id": "langgraph_docs", "status": "ok", "version": null }, { - "changed": true, + "changed": false, "published_at": "2026-06-18T20:58:20.335564Z", "source_id": "langgraph_pypi", "status": "ok", "version": "1.2.6" }, { - "changed": true, + "changed": false, "published_at": "2026-06-18T20:58:32Z", "source_id": "langgraph_github_release", - "status": "ok", + "status": "carried_forward_rate_limited", "version": "1.2.6" } ] @@ -200,12 +200,19 @@ "display_name": "NVIDIA NeMo Agent Toolkit + Nemotron Fabric", "versions": [ { - "changed": true, + "changed": false, "published_at": null, "source_id": "nvidia_nemo_agent_toolkit_docs", "status": "ok", "version": null }, + { + "changed": false, + "published_at": "2026-06-17T00:25:55.043213Z", + "source_id": "nvidia_nemo_agent_toolkit_pypi", + "status": "ok", + "version": "1.8.0" + }, { "changed": false, "published_at": null, @@ -213,6 +220,13 @@ "status": "ok", "version": null }, + { + "changed": false, + "published_at": null, + "source_id": "nvidia_nemotron_3_ultra_blog", + "status": "ok", + "version": null + }, { "changed": false, "published_at": null, @@ -236,7 +250,7 @@ "display_name": "Claude Agent SDK Remediator", "versions": [ { - "changed": true, + "changed": false, "published_at": null, "source_id": "claude_agent_sdk_docs", "status": "ok", @@ -253,51 +267,51 @@ }, { "candidate_id": "google_adk_stack", - "changed": true, - "decision": "changed_requires_replay_readiness_review", + "changed": false, + "decision": "watch_only_no_change", "display_name": "Google Agent Development Kit Stack", "versions": [ { - "changed": true, + "changed": false, "published_at": null, "source_id": "google_adk_docs", "status": "ok", "version": null }, { - "changed": true, + "changed": false, "published_at": "2026-06-18T18:47:06.323661Z", "source_id": "google_adk_pypi", "status": "ok", "version": "2.3.0" }, { - "changed": true, + "changed": false, "published_at": "2026-06-18T18:45:04Z", "source_id": "google_adk_github_release", - "status": "ok", + "status": "carried_forward_rate_limited", "version": "v2.3.0" } ] }, { "candidate_id": "microsoft_agent_framework", - "changed": true, - "decision": "changed_requires_replay_readiness_review", + "changed": false, + "decision": "watch_only_no_change", "display_name": "Microsoft Agent Framework", "versions": [ { - "changed": true, + "changed": false, "published_at": null, "source_id": "microsoft_agent_framework_docs", "status": "ok", "version": null }, { - "changed": true, + "changed": false, "published_at": "2026-06-23T21:18:26Z", "source_id": "microsoft_agent_framework_github_release", - "status": "ok", + "status": "carried_forward_rate_limited", "version": "dotnet-1.11.0" } ] @@ -317,16 +331,16 @@ }, { "changed": true, - "published_at": "2026-06-11T17:14:39.912300Z", + "published_at": "2026-06-25T23:18:27.665972Z", "source_id": "crewai_pypi", "status": "ok", - "version": "1.14.7" + "version": "1.15.0" }, { - "changed": true, + "changed": false, "published_at": "2026-06-11T17:13:46Z", "source_id": "crewai_github_release", - "status": "ok", + "status": "carried_forward_rate_limited", "version": "1.14.7" } ] @@ -377,7 +391,7 @@ "done_definition": "OpenAI / LangGraph / NeMo-Nemotron / Claude / Microsoft / Google / CrewAI 均有新版官方來源與分數差異。", "order": 2, "risk": "medium", - "work_item": "刷新 2026-06-25 market capability scorecard" + "work_item": "刷新 market capability scorecard" }, { "automation_mode": "agent_auto_prepare_human_approve_run", @@ -435,7 +449,7 @@ "next_gate": "更新 scorecard 並進入 offline replay gate,不得直接替換。", "priority": "P0", "status": "market_refresh_done_integration_blocked", - "title": "AI Agent market watch 2026-06-25" + "title": "AI Agent market watch 最新 primary-source refresh" }, { "completion_percent": 65, @@ -454,21 +468,21 @@ ], "schema_version": "ai_agent_market_radar_readback_v1", "source_scope": { - "gitea_main_evidence_basis_commit": "279f9531", - "market_governance_snapshot": "docs/evaluations/agent_market_governance_snapshot_2026-06-25.json", - "market_watch_report": "docs/evaluations/agent_market_watch_report_2026-06-25.json", - "project_handoff_basis": "Codex Start Here handoff generated 2026-06-25", + "gitea_main_evidence_basis_commit": "61cf5024", + "market_governance_snapshot": "docs/evaluations/agent_market_governance_snapshot_2026-06-26.json", + "market_watch_report": "docs/evaluations/agent_market_watch_report_2026-06-26.json", + "project_handoff_basis": "Codex Start Here handoff + P2-412 primary-source refresh", "scope_note": "盤點範圍涵蓋近期 Gitea 主線、治理 handoff、AI Agent market watch 與 Status Cleanup gates;不包含 raw chat history。", "status_cleanup_dashboard": "docs/operations/awoooi-status-cleanup-dashboard.snapshot.json" }, "summary": { - "changed_candidates": 13, - "integration_blocked_candidates": 13, + "changed_candidates": 5, + "integration_blocked_candidates": 5, "market_candidates": 13, - "market_sources": 34, + "market_sources": 36, "market_watch_completion_percent": 100.0, "overall_completion_percent": 42.2, - "recommended_watch_additions": 5, + "recommended_watch_additions": 0, "replacement_decisions_approved": 0, "source_failures": 0, "status": "market_refresh_done_integration_blocked", diff --git a/docs/operations/ai-technology-radar-readback.snapshot.json b/docs/operations/ai-technology-radar-readback.snapshot.json index 78ea7bda..efb355a7 100644 --- a/docs/operations/ai-technology-radar-readback.snapshot.json +++ b/docs/operations/ai-technology-radar-readback.snapshot.json @@ -10,8 +10,53 @@ "replay_shadow_canary_gate_required=true", "cost_and_data_boundary_review_required=true" ], - "generated_at": "2026-06-25T03:56:51.751955+00:00", - "high_priority_review_queue": [], + "generated_at": "2026-06-26T03:43:13.171222+00:00", + "high_priority_review_queue": [ + { + "display_name": "Model Context Protocol SDK", + "evaluation_priority": "p0", + "gate_status": "scorecard_required_before_integration", + "next_gate": "刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "technology_area": "mcp_and_a2a", + "technology_id": "modelcontextprotocol_sdk" + }, + { + "display_name": "Agent2Agent Protocol", + "evaluation_priority": "p1", + "gate_status": "scorecard_required_before_integration", + "next_gate": "刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。", + "requires_cost_approval": false, + "requires_dependency_approval": true, + "requires_security_review": true, + "technology_area": "mcp_and_a2a", + "technology_id": "a2a_protocol" + }, + { + "display_name": "Anthropic Claude Platform", + "evaluation_priority": "p0", + "gate_status": "scorecard_required_before_integration", + "next_gate": "刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。", + "requires_cost_approval": true, + "requires_dependency_approval": false, + "requires_security_review": true, + "technology_area": "model_providers", + "technology_id": "anthropic_claude_platform" + }, + { + "display_name": "Langfuse", + "evaluation_priority": "p1", + "gate_status": "scorecard_required_before_integration", + "next_gate": "刷新 scorecard,若涉及 SDK/API/route/Telegram/host write 則送人工審核。", + "requires_cost_approval": true, + "requires_dependency_approval": true, + "requires_security_review": true, + "technology_area": "evaluation_and_observability", + "technology_id": "langfuse_observability" + } + ], "integration_candidates": [ { "awoooi_role": "協調者、handoff、tool tracing、guardrail 候選", @@ -75,40 +120,61 @@ }, { "awoooi_role": "快速 prototype / non-production 評估候選", - "changed": false, - "decision": "watch_only_no_change", + "changed": true, + "decision": "changed_requires_replay_readiness_review", "display_name": "CrewAI Flows + Crews", "integration_surface": "multi_agent_prototype", "recommended_actions": [ - "keep_watch_only_status" + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" ], "technology_area": "agent_frameworks", "technology_id": "crewai_flows" }, { "awoooi_role": "read-only tool registry / MCP adapter 候選", - "changed": false, - "decision": "watch_only_no_change", + "changed": true, + "decision": "changed_requires_replay_readiness_review", "display_name": "Model Context Protocol SDK", "integration_surface": "tool_registry_interoperability", "recommended_actions": [ - "keep_watch_only_status" + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" ], "technology_area": "mcp_and_a2a", "technology_id": "modelcontextprotocol_sdk" }, { "awoooi_role": "跨 Agent 溝通協定 watch-only 候選", - "changed": false, - "decision": "watch_only_no_change", + "changed": true, + "decision": "changed_requires_replay_readiness_review", "display_name": "Agent2Agent Protocol", "integration_surface": "agent_to_agent_interop", "recommended_actions": [ - "keep_watch_only_status" + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" ], "technology_area": "mcp_and_a2a", "technology_id": "a2a_protocol" }, + { + "awoooi_role": "Agent / LLM / MCP trace 欄位標準與日週月報可觀測基礎", + "changed": false, + "decision": "watch_only_no_change", + "display_name": "OpenTelemetry GenAI Semantic Conventions", + "integration_surface": "agent_llm_trace_semantic_conventions", + "recommended_actions": [ + "keep_watch_only_status" + ], + "technology_area": "evaluation_and_observability", + "technology_id": "opentelemetry_genai_semconv" + }, { "awoooi_role": "模型能力、成本與 routing scorecard 來源", "changed": false, @@ -123,12 +189,15 @@ }, { "awoooi_role": "Claude model / coding agent / remediation watch source", - "changed": false, - "decision": "watch_only_no_change", + "changed": true, + "decision": "changed_requires_replay_readiness_review", "display_name": "Anthropic Claude Platform", "integration_surface": "model_capability_cost_routing", "recommended_actions": [ - "keep_watch_only_status" + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" ], "technology_area": "model_providers", "technology_id": "anthropic_claude_platform" @@ -219,12 +288,15 @@ }, { "awoooi_role": "LLM trace / prompt / eval observability 候選", - "changed": false, - "decision": "watch_only_no_change", + "changed": true, + "decision": "changed_requires_replay_readiness_review", "display_name": "Langfuse", "integration_surface": "llm_observability_tracing", "recommended_actions": [ - "keep_watch_only_status" + "refresh_ai_technology_scorecard", + "classify_business_applicability", + "prepare_no_install_integration_note", + "route_high_risk_items_to_human_review" ], "technology_area": "evaluation_and_observability", "technology_id": "langfuse_observability" @@ -265,6 +337,44 @@ "sdk_installation_approved": false, "telegram_send_approved": false }, + "primary_source_alignment": [ + { + "agent_assignment": "OpenClaw 負責 policy guard;MarketRadar 追版本;Hermes 產審核包。", + "awoooi_gate": "sandbox_orchestration_no_write", + "practice": "OpenAI Agents SDK:專家協作、tool execution、approvals、state 由產品掌控", + "source": "https://developers.openai.com/api/docs/guides/agents" + }, + { + "agent_assignment": "NemoTron 只做離線 replay / evaluator / smoke gate,不接 production routing。", + "awoooi_gate": "nemotron_replay_evaluator_only", + "practice": "NVIDIA Nemotron 3 Ultra / NeMo:長任務 Agent、profiling、evaluation、MCP / A2A 互通", + "source": "https://developer.nvidia.com/blog/nvidia-nemotron-3-ultra-powers-faster-more-efficient-reasoning-for-long-running-agents/" + }, + { + "agent_assignment": "OpenClaw 仲裁狀態轉移;Hermes 記錄 replay 證據與交接原因。", + "awoooi_gate": "incident_workflow_kernel_replay_first", + "practice": "LangGraph:durable execution、human-in-the-loop、stateful workflow runtime", + "source": "https://docs.langchain.com/oss/python/langgraph/overview" + }, + { + "agent_assignment": "MarketRadar 監控 SDK / spec;Critic 檢查資料權限與 tool safety。", + "awoooi_gate": "read_only_tool_registry_before_write_adapter", + "practice": "MCP:標準化 agent-to-tool / resource / prompt 連接,且需明確 user consent", + "source": "https://modelcontextprotocol.io/specification/2025-06-18" + }, + { + "agent_assignment": "OpenClaw 設定協作邊界;Hermes 彙整 handoff 記錄;NemoTron 比對輸出。", + "awoooi_gate": "agent_to_agent_interop_watch_only", + "practice": "A2A:跨框架 Agent 溝通、委派與互通;MCP 處理工具、A2A 處理 Agent 對 Agent", + "source": "https://a2a-protocol.org/latest/" + }, + { + "agent_assignment": "Critic 定義稽核欄位;MarketRadar 追語意規範版本;Hermes 產日週月報。", + "awoooi_gate": "trace_semconv_mapping_before_runtime_export", + "practice": "OpenTelemetry GenAI:Agent / LLM / MCP trace 語意慣例,支援可觀測與稽核", + "source": "https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/" + } + ], "priority_workplan": [ { "automation_mode": "agent_auto_read_only", @@ -401,26 +511,26 @@ "schema_version": "ai_technology_radar_readback_v1", "source_scope": { "agent_market_radar_readback": "docs/operations/ai-agent-market-radar-readback.snapshot.json", - "gitea_main_evidence_basis_commit": "683428bd", + "gitea_main_evidence_basis_commit": "61cf5024", "scope_note": "本讀回只整合已提交的只讀來源監控、AI Agent 市場雷達與治理 gate;不包含 raw chat history、secret、session 或本機工作視窗內容。", "technology_source_registry": "docs/ai/ai-technology-watch-sources.v1.json", "technology_watch_report": "docs/evaluations/ai_technology_watch_report_2026-06-25.json" }, "summary": { "ai_technology_radar_completion_percent": 100.0, - "changed_technologies": 0, - "high_priority_count": 14, + "changed_technologies": 5, + "high_priority_count": 15, "overall_completion_percent": 42.2, - "review_queue_count": 0, + "review_queue_count": 5, "rolling_update_status": "near_real_time_watch_ready_integration_gated", - "source_count": 47, + "source_count": 52, "source_failures": 0, "technology_area_count": 6, - "technology_count": 20 + "technology_count": 21 }, "technology_area_counts": { "agent_frameworks": 6, - "evaluation_and_observability": 2, + "evaluation_and_observability": 3, "mcp_and_a2a": 2, "model_providers": 3, "model_serving": 2, @@ -428,7 +538,7 @@ }, "technology_domains": [ { - "changed_count": 0, + "changed_count": 1, "high_priority_count": 5, "representative_technologies": [ "OpenAI Agents SDK", @@ -440,17 +550,18 @@ "technology_count": 6 }, { - "changed_count": 0, - "high_priority_count": 2, + "changed_count": 1, + "high_priority_count": 3, "representative_technologies": [ + "OpenTelemetry GenAI Semantic Conventions", "Ragas", "Langfuse" ], "technology_area": "evaluation_and_observability", - "technology_count": 2 + "technology_count": 3 }, { - "changed_count": 0, + "changed_count": 2, "high_priority_count": 2, "representative_technologies": [ "Model Context Protocol SDK", @@ -460,7 +571,7 @@ "technology_count": 2 }, { - "changed_count": 0, + "changed_count": 1, "high_priority_count": 3, "representative_technologies": [ "OpenAI Model Platform", diff --git a/scripts/dev/ai-agent-market-radar-readback.py b/scripts/dev/ai-agent-market-radar-readback.py index f40e637e..1628534c 100644 --- a/scripts/dev/ai-agent-market-radar-readback.py +++ b/scripts/dev/ai-agent-market-radar-readback.py @@ -15,6 +15,10 @@ def build_radar( market_watch: dict[str, Any], governance_snapshot: dict[str, Any], status_cleanup_dashboard: dict[str, Any], + market_watch_path: str, + governance_snapshot_path: str, + status_cleanup_dashboard_path: str, + evidence_commit: str, generated_at: str | None = None, ) -> dict[str, Any]: """Build a read-only market radar readback from committed evidence.""" @@ -30,11 +34,11 @@ def build_radar( "schema_version": "ai_agent_market_radar_readback_v1", "generated_at": generated_at or datetime.now(timezone.utc).isoformat(), "source_scope": { - "market_watch_report": "docs/evaluations/agent_market_watch_report_2026-06-25.json", - "market_governance_snapshot": "docs/evaluations/agent_market_governance_snapshot_2026-06-25.json", - "status_cleanup_dashboard": "docs/operations/awoooi-status-cleanup-dashboard.snapshot.json", - "project_handoff_basis": "Codex Start Here handoff generated 2026-06-25", - "gitea_main_evidence_basis_commit": "279f9531", + "market_watch_report": market_watch_path, + "market_governance_snapshot": governance_snapshot_path, + "status_cleanup_dashboard": status_cleanup_dashboard_path, + "project_handoff_basis": "Codex Start Here handoff + P2-412 primary-source refresh", + "gitea_main_evidence_basis_commit": evidence_commit, "scope_note": "盤點範圍涵蓋近期 Gitea 主線、治理 handoff、AI Agent market watch 與 Status Cleanup gates;不包含 raw chat history。", }, "summary": { @@ -213,7 +217,7 @@ def _recent_change_inventory(status_summary: dict[str, Any]) -> list[dict[str, A }, { "priority": "P0", - "title": "AI Agent market watch 2026-06-25", + "title": "AI Agent market watch 最新 primary-source refresh", "status": "market_refresh_done_integration_blocked", "completion_percent": 100, "next_gate": "更新 scorecard 並進入 offline replay gate,不得直接替換。", @@ -358,7 +362,7 @@ def _priority_workplan() -> list[dict[str, Any]]: }, { "order": 2, - "work_item": "刷新 2026-06-25 market capability scorecard", + "work_item": "刷新 market capability scorecard", "risk": "medium", "automation_mode": "agent_propose_owner_review", "done_definition": "OpenAI / LangGraph / NeMo-Nemotron / Claude / Microsoft / Google / CrewAI 均有新版官方來源與分數差異。", @@ -407,6 +411,7 @@ def main() -> int: parser.add_argument("--market-watch", required=True) parser.add_argument("--governance-snapshot", required=True) parser.add_argument("--status-cleanup-dashboard", required=True) + parser.add_argument("--evidence-commit", required=True) parser.add_argument("--output", required=True) parser.add_argument("--markdown-output", required=True) args = parser.parse_args() @@ -415,6 +420,10 @@ def main() -> int: market_watch=load_json(Path(args.market_watch)), governance_snapshot=load_json(Path(args.governance_snapshot)), status_cleanup_dashboard=load_json(Path(args.status_cleanup_dashboard)), + market_watch_path=args.market_watch, + governance_snapshot_path=args.governance_snapshot, + status_cleanup_dashboard_path=args.status_cleanup_dashboard, + evidence_commit=args.evidence_commit, ) Path(args.output).write_text( json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", diff --git a/scripts/dev/ai-technology-radar-readback.py b/scripts/dev/ai-technology-radar-readback.py index d2361b1b..3d395225 100644 --- a/scripts/dev/ai-technology-radar-readback.py +++ b/scripts/dev/ai-technology-radar-readback.py @@ -66,6 +66,7 @@ def build_radar( "high_priority_review_queue": _high_priority_review_queue(technology_watch), "professional_agent_roles": _professional_agent_roles(), "rolling_update_controls": _rolling_update_controls(technology_watch), + "primary_source_alignment": _primary_source_alignment(), "integration_candidates": _integration_candidates(technology_watch), "priority_workplan": _priority_workplan(), "blocked_gates": [ @@ -177,6 +178,19 @@ def render_markdown(payload: dict[str, Any]) -> str: f"{control['output']} | `{control['gate']}` |" ) + lines.extend([ + "", + "## 主流實務來源證據", + "", + "| 實務 | 官方來源 | AWOOOI Gate | Agent 分工 |", + "|---|---|---|---|", + ]) + for item in payload["primary_source_alignment"]: + lines.append( + f"| {item['practice']} | {item['source']} | `{item['awoooi_gate']}` | " + f"{item['agent_assignment']} |" + ) + lines.extend([ "", "## 優先工作清單", @@ -325,6 +339,47 @@ def _rolling_update_controls(report: dict[str, Any]) -> list[dict[str, Any]]: ] +def _primary_source_alignment() -> list[dict[str, str]]: + return [ + { + "practice": "OpenAI Agents SDK:專家協作、tool execution、approvals、state 由產品掌控", + "source": "https://developers.openai.com/api/docs/guides/agents", + "awoooi_gate": "sandbox_orchestration_no_write", + "agent_assignment": "OpenClaw 負責 policy guard;MarketRadar 追版本;Hermes 產審核包。", + }, + { + "practice": "NVIDIA Nemotron 3 Ultra / NeMo:長任務 Agent、profiling、evaluation、MCP / A2A 互通", + "source": "https://developer.nvidia.com/blog/nvidia-nemotron-3-ultra-powers-faster-more-efficient-reasoning-for-long-running-agents/", + "awoooi_gate": "nemotron_replay_evaluator_only", + "agent_assignment": "NemoTron 只做離線 replay / evaluator / smoke gate,不接 production routing。", + }, + { + "practice": "LangGraph:durable execution、human-in-the-loop、stateful workflow runtime", + "source": "https://docs.langchain.com/oss/python/langgraph/overview", + "awoooi_gate": "incident_workflow_kernel_replay_first", + "agent_assignment": "OpenClaw 仲裁狀態轉移;Hermes 記錄 replay 證據與交接原因。", + }, + { + "practice": "MCP:標準化 agent-to-tool / resource / prompt 連接,且需明確 user consent", + "source": "https://modelcontextprotocol.io/specification/2025-06-18", + "awoooi_gate": "read_only_tool_registry_before_write_adapter", + "agent_assignment": "MarketRadar 監控 SDK / spec;Critic 檢查資料權限與 tool safety。", + }, + { + "practice": "A2A:跨框架 Agent 溝通、委派與互通;MCP 處理工具、A2A 處理 Agent 對 Agent", + "source": "https://a2a-protocol.org/latest/", + "awoooi_gate": "agent_to_agent_interop_watch_only", + "agent_assignment": "OpenClaw 設定協作邊界;Hermes 彙整 handoff 記錄;NemoTron 比對輸出。", + }, + { + "practice": "OpenTelemetry GenAI:Agent / LLM / MCP trace 語意慣例,支援可觀測與稽核", + "source": "https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/", + "awoooi_gate": "trace_semconv_mapping_before_runtime_export", + "agent_assignment": "Critic 定義稽核欄位;MarketRadar 追語意規範版本;Hermes 產日週月報。", + }, + ] + + def _integration_candidates(report: dict[str, Any]) -> list[dict[str, Any]]: return [ {