diff --git a/.gitea/workflows/run-migration.yml b/.gitea/workflows/run-migration.yml index 452ccde3..bc12b58d 100644 --- a/.gitea/workflows/run-migration.yml +++ b/.gitea/workflows/run-migration.yml @@ -57,7 +57,7 @@ jobs: - name: Identify new migrations id: diff run: | - ALL_NEW_FILES=$(git diff --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true) + ALL_NEW_FILES=$(git diff --no-renames --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true) NEW_FILES=$(echo "$ALL_NEW_FILES" | grep -Ev '(_down|rollback)\.sql$' || true) SKIPPED_ROLLBACK_FILES=$(echo "$ALL_NEW_FILES" | grep -E '(_down|rollback)\.sql$' || true) echo "new_files<> $GITHUB_OUTPUT @@ -102,7 +102,7 @@ jobs: echo "=== Applying: $file ===" migration_err="$(mktemp)" if ! apply_migration "$PGURL_PSQL" "$file" 2>"$migration_err"; then - if grep -q "must be owner of table" "$migration_err"; then + if grep -Eq "(must be owner of table|permission denied for table)" "$migration_err"; then if [ -z "$OWNER_PGURL_PSQL" ]; then cat "$migration_err" >&2 echo "::error::migration requires table owner but DATABASE_URL secret is not set" diff --git a/apps/api/migrations/awooop_awoooi_mcp_gateway_project_shadow_2026-05-13.sql b/apps/api/migrations/awooop_awoooi_mcp_gateway_project_shadow_2026-05-13.sql new file mode 100644 index 00000000..10f28d48 --- /dev/null +++ b/apps/api/migrations/awooop_awoooi_mcp_gateway_project_shadow_2026-05-13.sql @@ -0,0 +1,25 @@ +-- ============================================================================= +-- AwoooP / AWOOOI MCP Gateway Shadow Onboarding +-- 2026-05-13 Codex + ogt +-- +-- 背景: +-- AWOOOI 已完成 read-only MCP tool registry / grants seed,但 project 本身仍停在 +-- legacy_awoooi_default,會被 MCP Gateway Gate 1 正確攔截。 +-- +-- 邊界: +-- 只把 AWOOOI 租戶升到 shadow,讓既有 Gate 1 生效。 +-- write/admin tool 仍未授權;自動修復/破壞性動作不因本 migration 開放。 +-- ============================================================================= + +BEGIN; + +SELECT set_config('app.project_id', 'awoooi', FALSE); + +UPDATE awooop_projects +SET + migration_mode = 'shadow', + updated_at = NOW() +WHERE project_id = 'awoooi' + AND migration_mode = 'legacy_awoooi_default'; + +COMMIT; diff --git a/apps/api/migrations/awooop_awoooi_mcp_gateway_project_shadow_2026-05-13_down.sql b/apps/api/migrations/awooop_awoooi_mcp_gateway_project_shadow_2026-05-13_down.sql new file mode 100644 index 00000000..d972b967 --- /dev/null +++ b/apps/api/migrations/awooop_awoooi_mcp_gateway_project_shadow_2026-05-13_down.sql @@ -0,0 +1,20 @@ +-- ============================================================================= +-- Rollback: AwoooP / AWOOOI MCP Gateway Shadow Onboarding +-- 2026-05-13 Codex + ogt +-- +-- 只回退仍停在 shadow 的 AWOOOI;若已由人工/後續 migration 推進到 canary/active, +-- 不自動降級。 +-- ============================================================================= + +BEGIN; + +SELECT set_config('app.project_id', 'awoooi', FALSE); + +UPDATE awooop_projects +SET + migration_mode = 'legacy_awoooi_default', + updated_at = NOW() +WHERE project_id = 'awoooi' + AND migration_mode = 'shadow'; + +COMMIT; diff --git a/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v3_2026-05-13.sql b/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v3_2026-05-13.sql new file mode 100644 index 00000000..5622e6ef --- /dev/null +++ b/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v3_2026-05-13.sql @@ -0,0 +1,211 @@ +-- T7: awoooi read-only MCP Gateway seed +-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3,產生 first-class audit。 +-- 邊界:只授權 read scope;不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。 + +SELECT set_config('app.project_id', 'awoooi', FALSE); + +WITH agent_seed(agent_id, display_name) AS ( + VALUES + ('pre_decision_investigator', 'Pre-decision Investigator'), + ('post_execution_verifier', 'Post-execution Verifier') +), +agent_body AS ( + SELECT + agent_id, + jsonb_build_object( + 'schema_version', 'awooop_agent_contract_v1', + 'agent_id', agent_id, + 'display_name', display_name, + 'project_id', 'awoooi', + 'purpose', 'Read-only MCP sensing through AwoooP Gateway', + 'allowed_scopes', jsonb_build_array('read'), + 'forbidden_scopes', jsonb_build_array('write', 'admin'), + 'stage', 't7_mcp_gateway_read_sense' + ) AS body_json + FROM agent_seed +), +inserted_revision AS ( + INSERT INTO awooop_contract_revisions ( + project_id, + contract_family, + contract_id, + version_major, + version_minor, + lifecycle_status, + body_json, + body_hash, + body_schema_version, + publisher_id, + published_at + ) + SELECT + 'awoooi', + 'agent', + agent_id, + 1, + 0, + 'active', + body_json, + encode(digest(body_json::text, 'sha256'), 'hex'), + 'v1.0', + 'migration:t7_mcp_gateway_read_seed', + NOW() + FROM agent_body + ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor) + DO NOTHING + RETURNING revision_id, project_id, contract_family, contract_id +), +chosen_revision AS ( + SELECT revision_id, project_id, contract_family, contract_id + FROM inserted_revision + UNION ALL + SELECT revision_id, project_id, contract_family, contract_id + FROM awooop_contract_revisions + WHERE project_id = 'awoooi' + AND contract_family = 'agent' + AND contract_id IN (SELECT agent_id FROM agent_seed) + AND version_major = 1 + AND version_minor = 0 + AND lifecycle_status = 'active' +), +upsert_pointer AS ( + INSERT INTO awooop_active_revisions ( + project_id, + contract_family, + contract_id, + active_revision_id, + updated_at + ) + SELECT DISTINCT ON (project_id, contract_family, contract_id) + project_id, + contract_family, + contract_id, + revision_id, + NOW() + FROM chosen_revision + ORDER BY project_id, contract_family, contract_id, revision_id + ON CONFLICT (project_id, contract_family, contract_id) + DO UPDATE SET + active_revision_id = EXCLUDED.active_revision_id, + updated_at = NOW() + RETURNING contract_id +) +SELECT 'active_agent_contracts', count(*) FROM upsert_pointer; + +WITH read_tools(tool_name, description) AS ( + VALUES + ('k8s_get_pod_logs', 'Kubernetes pod logs read'), + ('k8s_get_events', 'Kubernetes events read'), + ('k8s_describe_pod', 'Kubernetes pod describe read'), + ('k8s_get_hpa_status', 'Kubernetes HPA status read'), + ('k8s_get_node_conditions', 'Kubernetes node conditions read'), + ('ssh_diagnose', 'SSH host diagnosis read'), + ('ssh_get_top_processes', 'SSH top processes read'), + ('ssh_get_disk_usage', 'SSH disk usage read'), + ('ssh_get_memory_info', 'SSH memory info read'), + ('ssh_get_container_logs', 'SSH container logs read'), + ('ssh_get_container_status', 'SSH container status read'), + ('ssh_get_service_status', 'SSH service status read'), + ('ssh_check_port', 'SSH port check read'), + ('ssh_get_nginx_error_log', 'SSH nginx error log read'), + ('ssh_get_swap_info', 'SSH swap info read'), + ('prometheus_query', 'Prometheus instant query read'), + ('prometheus_query_range', 'Prometheus range query read'), + ('prometheus_get_alert_history', 'Prometheus alert history read'), + ('gold_metrics', 'SigNoz gold metrics read'), + ('trace_url', 'SigNoz trace URL read'), + ('system_metrics', 'SigNoz system metrics read'), + ('query_logs', 'SigNoz logs read'), + ('error_logs_summary', 'SigNoz error logs summary read'), + ('list_approvals', 'Approval records read'), + ('get_approval', 'Approval detail read'), + ('list_incidents', 'Incident records read'), + ('list_timeline', 'Timeline records read'), + ('read_file', 'Filesystem allowlisted file read'), + ('list_directory', 'Filesystem allowlisted directory read'), + ('search_in_file', 'Filesystem allowlisted file search'), + ('list_dashboards', 'Grafana dashboards read'), + ('get_dashboard', 'Grafana dashboard read'), + ('get_panel_data', 'Grafana panel data read'), + ('generate_dashboard_url', 'Grafana dashboard URL read'), + ('search_runbook', 'Runbook semantic search read'), + ('get_index_stats', 'Runbook index stats read'), + ('argocd_list_apps', 'ArgoCD apps read'), + ('argocd_get_app_status', 'ArgoCD app status read'), + ('argocd_get_sync_history', 'ArgoCD sync history read'), + ('sentry_list_issues', 'Sentry issues read'), + ('sentry_get_issue', 'Sentry issue detail read'), + ('sentry_search_issues', 'Sentry issue search read') +), +upsert_tools AS ( + INSERT INTO awooop_mcp_tool_registry ( + project_id, + tool_name, + tool_type, + description, + allowed_scopes, + environment_tags, + is_active, + updated_at + ) + SELECT + 'awoooi', + tool_name, + 'mcp_server', + description, + '["read"]'::jsonb, + '{"env": "prod"}'::jsonb, + TRUE, + NOW() + FROM read_tools + ON CONFLICT (project_id, tool_name) + DO UPDATE SET + description = EXCLUDED.description, + allowed_scopes = EXCLUDED.allowed_scopes, + environment_tags = EXCLUDED.environment_tags, + is_active = TRUE, + updated_at = NOW() + RETURNING tool_id +), +grant_agents(agent_id) AS ( + VALUES + ('pre_decision_investigator'), + ('post_execution_verifier') +), +upsert_grants AS ( + INSERT INTO awooop_mcp_grants ( + project_id, + agent_id, + tool_id, + granted_by, + granted_scopes, + expires_at, + is_revoked, + revoked_at, + revoked_by + ) + SELECT + 'awoooi', + grant_agents.agent_id, + upsert_tools.tool_id, + 'migration:t7_mcp_gateway_read_seed', + '["read"]'::jsonb, + NULL, + FALSE, + NULL, + NULL + FROM upsert_tools + CROSS JOIN grant_agents + ON CONFLICT (project_id, agent_id, tool_id) + DO UPDATE SET + granted_scopes = EXCLUDED.granted_scopes, + expires_at = NULL, + is_revoked = FALSE, + revoked_at = NULL, + revoked_by = NULL + RETURNING grant_id +) +SELECT + 'awoooi_read_tools', + (SELECT count(*) FROM upsert_tools) AS tool_rows, + (SELECT count(*) FROM upsert_grants) AS grant_rows; diff --git a/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v3_2026-05-13_down.sql b/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v3_2026-05-13_down.sql new file mode 100644 index 00000000..2e9b1306 --- /dev/null +++ b/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v3_2026-05-13_down.sql @@ -0,0 +1,77 @@ +-- Rollback for T7 awoooi read-only MCP Gateway seed. +-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools. + +SELECT set_config('app.project_id', 'awoooi', FALSE); + +UPDATE awooop_mcp_grants +SET + is_revoked = TRUE, + revoked_at = NOW(), + revoked_by = 'rollback:t7_mcp_gateway_read_seed' +WHERE project_id = 'awoooi' + AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier') + AND granted_by = 'migration:t7_mcp_gateway_read_seed' + AND is_revoked = FALSE; + +UPDATE awooop_mcp_tool_registry +SET + is_active = FALSE, + updated_at = NOW() +WHERE project_id = 'awoooi' + AND tool_name IN ( + 'k8s_get_pod_logs', + 'k8s_get_events', + 'k8s_describe_pod', + 'k8s_get_hpa_status', + 'k8s_get_node_conditions', + 'ssh_diagnose', + 'ssh_get_top_processes', + 'ssh_get_disk_usage', + 'ssh_get_memory_info', + 'ssh_get_container_logs', + 'ssh_get_container_status', + 'ssh_get_service_status', + 'ssh_check_port', + 'ssh_get_nginx_error_log', + 'ssh_get_swap_info', + 'prometheus_query', + 'prometheus_query_range', + 'prometheus_get_alert_history', + 'gold_metrics', + 'trace_url', + 'system_metrics', + 'query_logs', + 'error_logs_summary', + 'list_approvals', + 'get_approval', + 'list_incidents', + 'list_timeline', + 'read_file', + 'list_directory', + 'search_in_file', + 'list_dashboards', + 'get_dashboard', + 'get_panel_data', + 'generate_dashboard_url', + 'search_runbook', + 'get_index_stats', + 'argocd_list_apps', + 'argocd_get_app_status', + 'argocd_get_sync_history', + 'sentry_list_issues', + 'sentry_get_issue', + 'sentry_search_issues' + ); + +DELETE FROM awooop_active_revisions +WHERE project_id = 'awoooi' + AND contract_family = 'agent' + AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier'); + +UPDATE awooop_contract_revisions +SET lifecycle_status = 'revoked' +WHERE project_id = 'awoooi' + AND contract_family = 'agent' + AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier') + AND publisher_id = 'migration:t7_mcp_gateway_read_seed' + AND lifecycle_status = 'active'; diff --git a/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v4_2026-05-13.sql b/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v4_2026-05-13.sql new file mode 100644 index 00000000..f98b89d7 --- /dev/null +++ b/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v4_2026-05-13.sql @@ -0,0 +1,213 @@ +-- T7: awoooi read-only MCP Gateway seed +-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3,產生 first-class audit。 +-- 邊界:只授權 read scope;不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。 + +SELECT set_config('app.project_id', 'awoooi', FALSE); + +WITH agent_seed(agent_id, display_name) AS ( + VALUES + ('pre_decision_investigator', 'Pre-decision Investigator'), + ('post_execution_verifier', 'Post-execution Verifier') +), +agent_body AS ( + SELECT + agent_id, + jsonb_build_object( + 'schema_version', 'awooop_agent_contract_v1', + 'agent_id', agent_id, + 'display_name', display_name, + 'project_id', 'awoooi', + 'purpose', 'Read-only MCP sensing through AwoooP Gateway', + 'allowed_scopes', jsonb_build_array('read'), + 'forbidden_scopes', jsonb_build_array('write', 'admin'), + 'stage', 't7_mcp_gateway_read_sense' + ) AS body_json + FROM agent_seed +), +inserted_revision AS ( + INSERT INTO awooop_contract_revisions ( + project_id, + contract_family, + contract_id, + version_major, + version_minor, + lifecycle_status, + body_json, + body_hash, + body_schema_version, + publisher_id, + published_at + ) + SELECT + 'awoooi', + 'agent', + agent_id, + 1, + 0, + 'active', + body_json, + encode(digest(body_json::text, 'sha256'), 'hex'), + 'v1.0', + 'migration:t7_mcp_gateway_read_seed', + NOW() + FROM agent_body + ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor) + DO NOTHING + RETURNING revision_id, project_id, contract_family, contract_id +), +chosen_revision AS ( + SELECT revision_id, project_id, contract_family, contract_id + FROM inserted_revision + UNION ALL + SELECT revision_id, project_id, contract_family, contract_id + FROM awooop_contract_revisions + WHERE project_id = 'awoooi' + AND contract_family = 'agent' + AND contract_id IN (SELECT agent_id FROM agent_seed) + AND version_major = 1 + AND version_minor = 0 + AND lifecycle_status = 'active' +), +upsert_pointer AS ( + INSERT INTO awooop_active_revisions ( + project_id, + contract_family, + contract_id, + active_revision_id, + updated_at + ) + SELECT DISTINCT ON (project_id, contract_family, contract_id) + project_id, + contract_family, + contract_id, + revision_id, + NOW() + FROM chosen_revision + ORDER BY project_id, contract_family, contract_id, revision_id + ON CONFLICT (project_id, contract_family, contract_id) + DO UPDATE SET + active_revision_id = EXCLUDED.active_revision_id, + updated_at = NOW() + RETURNING contract_id +) +SELECT 'active_agent_contracts', count(*) FROM upsert_pointer; + +WITH read_tools(tool_name, description) AS ( + VALUES + ('k8s_get_pod_logs', 'Kubernetes pod logs read'), + ('k8s_get_events', 'Kubernetes events read'), + ('k8s_describe_pod', 'Kubernetes pod describe read'), + ('k8s_get_hpa_status', 'Kubernetes HPA status read'), + ('k8s_get_node_conditions', 'Kubernetes node conditions read'), + ('ssh_diagnose', 'SSH host diagnosis read'), + ('ssh_get_top_processes', 'SSH top processes read'), + ('ssh_get_disk_usage', 'SSH disk usage read'), + ('ssh_get_memory_info', 'SSH memory info read'), + ('ssh_get_container_logs', 'SSH container logs read'), + ('ssh_get_container_status', 'SSH container status read'), + ('ssh_get_service_status', 'SSH service status read'), + ('ssh_check_port', 'SSH port check read'), + ('ssh_get_nginx_error_log', 'SSH nginx error log read'), + ('ssh_get_swap_info', 'SSH swap info read'), + ('prometheus_query', 'Prometheus instant query read'), + ('prometheus_query_range', 'Prometheus range query read'), + ('prometheus_get_alert_history', 'Prometheus alert history read'), + ('gold_metrics', 'SigNoz gold metrics read'), + ('trace_url', 'SigNoz trace URL read'), + ('system_metrics', 'SigNoz system metrics read'), + ('query_logs', 'SigNoz logs read'), + ('error_logs_summary', 'SigNoz error logs summary read'), + ('list_approvals', 'Approval records read'), + ('get_approval', 'Approval detail read'), + ('list_incidents', 'Incident records read'), + ('list_timeline', 'Timeline records read'), + ('read_file', 'Filesystem allowlisted file read'), + ('list_directory', 'Filesystem allowlisted directory read'), + ('search_in_file', 'Filesystem allowlisted file search'), + ('list_dashboards', 'Grafana dashboards read'), + ('get_dashboard', 'Grafana dashboard read'), + ('get_panel_data', 'Grafana panel data read'), + ('generate_dashboard_url', 'Grafana dashboard URL read'), + ('search_runbook', 'Runbook semantic search read'), + ('get_index_stats', 'Runbook index stats read'), + ('argocd_list_apps', 'ArgoCD apps read'), + ('argocd_get_app_status', 'ArgoCD app status read'), + ('argocd_get_sync_history', 'ArgoCD sync history read'), + ('sentry_list_issues', 'Sentry issues read'), + ('sentry_get_issue', 'Sentry issue detail read'), + ('sentry_search_issues', 'Sentry issue search read') +), +upsert_tools AS ( + INSERT INTO awooop_mcp_tool_registry ( + project_id, + tool_name, + tool_type, + description, + allowed_scopes, + environment_tags, + is_active, + updated_at + ) + SELECT + 'awoooi', + tool_name, + 'mcp_server', + description, + '["read"]'::jsonb, + '{"env": "prod"}'::jsonb, + TRUE, + NOW() + FROM read_tools + ON CONFLICT (project_id, tool_name) + DO UPDATE SET + description = EXCLUDED.description, + allowed_scopes = EXCLUDED.allowed_scopes, + environment_tags = EXCLUDED.environment_tags, + is_active = TRUE, + updated_at = NOW() + RETURNING tool_id +), +grant_agents(agent_id) AS ( + VALUES + ('pre_decision_investigator'), + ('post_execution_verifier') +), +upsert_grants AS ( + INSERT INTO awooop_mcp_grants ( + project_id, + agent_id, + tool_id, + granted_by, + granted_scopes, + expires_at, + is_revoked, + revoked_at, + revoked_by + ) + SELECT + 'awoooi', + grant_agents.agent_id, + upsert_tools.tool_id, + 'migration:t7_mcp_gateway_read_seed', + '["read"]'::jsonb, + NULL, + FALSE, + NULL, + NULL + FROM upsert_tools + CROSS JOIN grant_agents + ON CONFLICT (project_id, agent_id, tool_id) + DO UPDATE SET + granted_scopes = EXCLUDED.granted_scopes, + expires_at = NULL, + is_revoked = FALSE, + revoked_at = NULL, + revoked_by = NULL + RETURNING grant_id +) +SELECT + 'awoooi_read_tools', + (SELECT count(*) FROM upsert_tools) AS tool_rows, + (SELECT count(*) FROM upsert_grants) AS grant_rows; + +-- v4 exists only to retrigger run-migration after Gitea skipped the v2->v3 rename-only push. diff --git a/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v4_2026-05-13_down.sql b/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v4_2026-05-13_down.sql new file mode 100644 index 00000000..0e33d372 --- /dev/null +++ b/apps/api/migrations/awooop_awoooi_mcp_read_gateway_seed_v4_2026-05-13_down.sql @@ -0,0 +1,79 @@ +-- Rollback for T7 awoooi read-only MCP Gateway seed. +-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools. + +SELECT set_config('app.project_id', 'awoooi', FALSE); + +UPDATE awooop_mcp_grants +SET + is_revoked = TRUE, + revoked_at = NOW(), + revoked_by = 'rollback:t7_mcp_gateway_read_seed' +WHERE project_id = 'awoooi' + AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier') + AND granted_by = 'migration:t7_mcp_gateway_read_seed' + AND is_revoked = FALSE; + +UPDATE awooop_mcp_tool_registry +SET + is_active = FALSE, + updated_at = NOW() +WHERE project_id = 'awoooi' + AND tool_name IN ( + 'k8s_get_pod_logs', + 'k8s_get_events', + 'k8s_describe_pod', + 'k8s_get_hpa_status', + 'k8s_get_node_conditions', + 'ssh_diagnose', + 'ssh_get_top_processes', + 'ssh_get_disk_usage', + 'ssh_get_memory_info', + 'ssh_get_container_logs', + 'ssh_get_container_status', + 'ssh_get_service_status', + 'ssh_check_port', + 'ssh_get_nginx_error_log', + 'ssh_get_swap_info', + 'prometheus_query', + 'prometheus_query_range', + 'prometheus_get_alert_history', + 'gold_metrics', + 'trace_url', + 'system_metrics', + 'query_logs', + 'error_logs_summary', + 'list_approvals', + 'get_approval', + 'list_incidents', + 'list_timeline', + 'read_file', + 'list_directory', + 'search_in_file', + 'list_dashboards', + 'get_dashboard', + 'get_panel_data', + 'generate_dashboard_url', + 'search_runbook', + 'get_index_stats', + 'argocd_list_apps', + 'argocd_get_app_status', + 'argocd_get_sync_history', + 'sentry_list_issues', + 'sentry_get_issue', + 'sentry_search_issues' + ); + +DELETE FROM awooop_active_revisions +WHERE project_id = 'awoooi' + AND contract_family = 'agent' + AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier'); + +UPDATE awooop_contract_revisions +SET lifecycle_status = 'revoked' +WHERE project_id = 'awoooi' + AND contract_family = 'agent' + AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier') + AND publisher_id = 'migration:t7_mcp_gateway_read_seed' + AND lifecycle_status = 'active'; + +-- v4 rollback companion for the retrigger migration. diff --git a/apps/api/src/plugins/mcp/gateway.py b/apps/api/src/plugins/mcp/gateway.py index 24f4c7a6..e82ad578 100644 --- a/apps/api/src/plugins/mcp/gateway.py +++ b/apps/api/src/plugins/mcp/gateway.py @@ -388,10 +388,7 @@ class McpGateway: parameters: dict[str, Any], ) -> MCPToolResult: """呼叫底層 MCP provider 執行工具""" - registry = get_provider_registry() - provider = registry.get(ctx.tool_name) or registry.get( - tool_row.tool_name if tool_row else ctx.tool_name - ) + provider = await self._resolve_provider(ctx, tool_row) # 找不到 provider → 回傳 shadow no-op if provider is None: @@ -407,15 +404,57 @@ class McpGateway: ) audit_params = dict(parameters) + existing_audit = ( + parameters.get("_mcp_audit") + if isinstance(parameters, dict) and isinstance(parameters.get("_mcp_audit"), dict) + else {} + ) audit_params["_mcp_audit"] = { "project_id": ctx.project_id, "agent_id": ctx.agent_id, "run_id": str(ctx.run_id) if ctx.run_id else None, "trace_id": ctx.trace_id, + "incident_id": existing_audit.get("incident_id") or ctx.trace_id, + "session_id": existing_audit.get("session_id"), + "flywheel_node": existing_audit.get("flywheel_node"), + "agent_role": existing_audit.get("agent_role") or ctx.agent_id, "gateway_path": "awooop_mcp_gateway", } return await provider.execute(ctx.tool_name, audit_params) + async def _resolve_provider( + self, + ctx: GatewayContext, + tool_row: AwoooPMcpToolRegistry | None, + ): + """Find the provider that owns ctx.tool_name. + + ProviderRegistry is keyed by provider name (`kubernetes`, `ssh_host`, ...), + while GatewayContext intentionally uses the governed tool name + (`kubectl_get`, `ssh_diagnose`, ...). Scan provider tool manifests as the + compatibility bridge until registry exposes a first-class tool index. + """ + registry = get_provider_registry() + direct = registry.get(ctx.tool_name) + if direct is not None: + return direct + + lookup_name = tool_row.tool_name if tool_row else ctx.tool_name + for provider in registry.all(): + try: + tools = await provider.list_tools() + except Exception as exc: + logger.debug( + "mcp_gateway_provider_manifest_skipped", + provider=getattr(provider, "name", None), + tool_name=lookup_name, + error=str(exc), + ) + continue + if any(tool.name == lookup_name for tool in tools): + return provider + return None + # ── Audit log ───────────────────────────────────────────────────────────── async def _write_audit( @@ -443,6 +482,15 @@ class McpGateway: json.dumps(result.output, sort_keys=True, default=str).encode() ).hexdigest() + gate_payload = { + **gate_result.as_dict(), + "schema_version": "awooop_mcp_gateway_audit_v1", + "gateway_path": "awooop_mcp_gateway", + "policy_enforced": True, + "is_shadow": ctx.is_shadow, + "required_scope": ctx.required_scope, + } + audit = AwoooPMcpGatewayAudit( project_id=ctx.project_id, run_id=ctx.run_id, @@ -452,7 +500,7 @@ class McpGateway: tool_name=ctx.tool_name, input_hash=input_hash, output_hash=output_hash, - gate_result=gate_result.as_dict(), + gate_result=gate_payload, result_status=result_status, block_gate=block_gate, block_reason=block_reason, diff --git a/apps/api/src/services/pre_decision_investigator.py b/apps/api/src/services/pre_decision_investigator.py index fc5ccf44..839ae1f0 100644 --- a/apps/api/src/services/pre_decision_investigator.py +++ b/apps/api/src/services/pre_decision_investigator.py @@ -32,6 +32,9 @@ from typing import TYPE_CHECKING, Any import structlog +from src.db.base import get_db_context +from src.plugins.mcp.gateway import GatewayContext, McpGateway +from src.plugins.mcp.registry import AuditedMCPToolProvider from src.services.evidence_snapshot import EvidenceSnapshot from src.services.mcp_audit_context import with_mcp_audit_context from src.services.mcp_tool_registry import RegisteredTool, SensorDimension, get_mcp_tool_registry @@ -332,7 +335,7 @@ class PreDecisionInvestigator: agent_role="pre_decision_investigator", ) result = await asyncio.wait_for( - reg.provider.execute(tool_name, audited_params), + self._execute_tool(reg, tool_name, audited_params, snapshot.incident_id), timeout=MCP_TOOL_TIMEOUT_SEC, ) @@ -376,6 +379,34 @@ class PreDecisionInvestigator: except Exception: pass + async def _execute_tool( + self, + reg: RegisteredTool, + tool_name: str, + audited_params: dict[str, Any], + incident_id: str, + ): + """Route production audited providers through AwoooP MCP Gateway. + + Tests and manual injections can still pass a raw provider; production + registry entries are `AuditedMCPToolProvider` and must now leave a + first-class gateway audit row instead of only a legacy bridge row. + """ + if not isinstance(reg.provider, AuditedMCPToolProvider): + return await reg.provider.execute(tool_name, audited_params) + + async with get_db_context("awoooi") as db: + ctx = GatewayContext( + project_id="awoooi", + agent_id="pre_decision_investigator", + tool_name=tool_name, + trace_id=incident_id, + is_shadow=True, + environment={"env": "prod"}, + required_scope="read", + ) + return await McpGateway(db).call(ctx, audited_params) + async def _log_mcp_call_to_timeline( snapshot_incident_id: str | None, diff --git a/apps/api/tests/test_mcp_gateway_audit.py b/apps/api/tests/test_mcp_gateway_audit.py index 11b27150..bc4c5b60 100644 --- a/apps/api/tests/test_mcp_gateway_audit.py +++ b/apps/api/tests/test_mcp_gateway_audit.py @@ -1,10 +1,13 @@ from __future__ import annotations import uuid +from types import SimpleNamespace import pytest +from src.plugins.mcp import gateway as gateway_module from src.plugins.mcp.gateway import GateCheckResult, GatewayContext, McpGateway +from src.plugins.mcp.interfaces import MCPTool, MCPToolProvider, MCPToolResult class FakeDb: @@ -51,3 +54,72 @@ async def test_write_audit_persists_blocked_gate_without_tool_row() -> None: assert audit.tool_name == "missing_tool" assert audit.result_status == "blocked" assert audit.block_gate == 1 + assert audit.gate_result["schema_version"] == "awooop_mcp_gateway_audit_v1" + assert audit.gate_result["gateway_path"] == "awooop_mcp_gateway" + assert audit.gate_result["policy_enforced"] is True + + +class FakeProvider(MCPToolProvider): + def __init__(self) -> None: + self.calls: list[tuple[str, dict]] = [] + + @property + def name(self) -> str: + return "kubernetes" + + async def list_tools(self) -> list[MCPTool]: + return [MCPTool(name="kubectl_get", description="", input_schema={})] + + async def execute(self, tool_name: str, parameters: dict) -> MCPToolResult: + self.calls.append((tool_name, parameters)) + return MCPToolResult(success=True, execution_id="provider-ok", output={"ok": True}) + + +class FakeProviderRegistry: + def __init__(self, provider: FakeProvider) -> None: + self.provider = provider + + def get(self, _name: str): + return None + + def all(self) -> list[FakeProvider]: + return [self.provider] + + +@pytest.mark.asyncio +async def test_execute_tool_resolves_provider_by_tool_manifest( + monkeypatch: pytest.MonkeyPatch, +) -> None: + provider = FakeProvider() + monkeypatch.setattr( + gateway_module, + "get_provider_registry", + lambda: FakeProviderRegistry(provider), + ) + + result = await McpGateway(FakeDb())._execute_tool( + GatewayContext( + project_id="awoooi", + agent_id="pre_decision_investigator", + tool_name="kubectl_get", + trace_id="INC-GW", + ), + SimpleNamespace(tool_name="kubectl_get"), + { + "namespace": "awoooi-prod", + "_mcp_audit": { + "incident_id": "INC-GW", + "session_id": "incident:INC-GW:pre_decision", + "flywheel_node": "sense", + "agent_role": "pre_decision_investigator", + }, + }, + ) + + assert result.success is True + assert provider.calls + tool_name, parameters = provider.calls[0] + assert tool_name == "kubectl_get" + assert parameters["_mcp_audit"]["gateway_path"] == "awooop_mcp_gateway" + assert parameters["_mcp_audit"]["incident_id"] == "INC-GW" + assert parameters["_mcp_audit"]["flywheel_node"] == "sense" diff --git a/apps/api/tests/test_pre_decision_investigator.py b/apps/api/tests/test_pre_decision_investigator.py index 831a8672..9ad3347b 100644 --- a/apps/api/tests/test_pre_decision_investigator.py +++ b/apps/api/tests/test_pre_decision_investigator.py @@ -20,12 +20,14 @@ ADR-081: Phase 1 決策前情報調查員 from __future__ import annotations import asyncio +from typing import Any import pytest from src.plugins.mcp.interfaces import MCPTool, MCPToolProvider, MCPToolResult +from src.plugins.mcp.registry import AuditedMCPToolProvider +from src.services import pre_decision_investigator as pdi_module from src.services.evidence_snapshot import EvidenceSnapshot from src.services.mcp_tool_registry import ( - MCPToolRegistry, RegisteredTool, SensorDimension, ) @@ -103,6 +105,14 @@ class _CaptureProvider(_SuccessProvider): return await super().execute(tool_name, parameters) +class _DbContext: + async def __aenter__(self) -> object: + return object() + + async def __aexit__(self, *_args: object) -> None: + return None + + def _stub_incident( alertname: str = "KubePodCrashLooping", namespace: str = "awoooi-prod", @@ -296,6 +306,46 @@ class TestCollectOne: assert audit_context["flywheel_node"] == "sense" assert audit_context["agent_role"] == "pre_decision_investigator" + @pytest.mark.asyncio + async def test_collect_one_routes_audited_provider_through_gateway( + self, + monkeypatch: pytest.MonkeyPatch, + ): + investigator = PreDecisionInvestigator() + snap = EvidenceSnapshot(incident_id="INC-GATEWAY") + provider = _CaptureProvider() + reg = _reg( + "kubectl_describe", + AuditedMCPToolProvider(provider), + SensorDimension.D1_K8S_STATE, + ) + calls: list[dict[str, Any]] = [] + + class FakeGateway: + def __init__(self, db: object) -> None: + self.db = db + + async def call(self, ctx, parameters: dict[str, Any]) -> MCPToolResult: + calls.append({"ctx": ctx, "parameters": parameters, "db": self.db}) + return MCPToolResult(success=True, execution_id="gw", output={"status": "Running"}) + + monkeypatch.setattr(pdi_module, "get_db_context", lambda _project_id: _DbContext()) + monkeypatch.setattr(pdi_module, "McpGateway", FakeGateway) + + await investigator._collect_one(snap, reg, {"namespace": "prod"}) + + assert snap.mcp_health["kubectl_describe"] is True + assert snap.k8s_state is not None + assert provider.seen_parameters is None + assert calls + ctx = calls[0]["ctx"] + assert ctx.project_id == "awoooi" + assert ctx.agent_id == "pre_decision_investigator" + assert ctx.tool_name == "kubectl_describe" + assert ctx.trace_id == "INC-GATEWAY" + assert ctx.required_scope == "read" + assert calls[0]["parameters"]["_mcp_audit"]["incident_id"] == "INC-GATEWAY" + @pytest.mark.asyncio async def test_failed_tool_marks_health_false(self): investigator = PreDecisionInvestigator() diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 05446d86..f66bbcdd 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -96,6 +96,54 @@ - `git diff --check` 通過。 - PR diff added lines 未命中本輪敏感 token / credential pattern。 +## 2026-05-13 | T6 Incident timeline / Telegram detail reconciliation visibility 已推版 + +**背景**:T5 已把 incident / approval / execution / evidence 的矛盾整理成 `incident_reconciliation_v1`,但 operator 仍需要在既有 incident timeline 與 Telegram「詳情」入口看到同一個真相鏈狀態,不能只靠另外查 truth-chain API。 + +**修正**: +- `awooop_truth_chain_service.py` 將 incident reconciliation builder 公開為 `build_incident_reconciliation()`,讓其他查詢面共用同一份判定邏輯。 +- `incident_timeline_service.py`: + - 在 incident timeline response 追加頂層 `reconciliation`。 + - 當 reconciliation 顯示 `blocked` / `degraded` 時,把同一份資料放進 `safe.events[]`,事件來源標示為 `truth_chain` / `truth_chain_reconciliation`。 + - 不修改 incident、approval、execution、timeline_events;只做 read-only compose。 +- `api/v1/incidents.py` 更新 timeline response schema。 +- `telegram_gateway.py` 的 incident detail 追加「真相鏈狀態」區塊,顯示 `consistency_status`、`operator_next_state` 與前 4 個 mismatch code。 +- 邊界:只調整詳情/查詢顯示,不改主告警卡、按鈕 callback、nonce、approval execution 或自動修復行為。 + +**驗證與推版**: +- Local: + - `py_compile`:pass。 + - `ruff --select F,E9`:pass。 + - `pytest tests/test_incident_timeline_service.py tests/test_awooop_truth_chain_service.py tests/test_phase25_drift_detection.py tests/test_drift_interpreter_ollama_first.py tests/test_platform_router_order.py tests/test_awooop_operator_auth.py -q`:43 passed。 + - `git diff --check`:pass。 +- Gitea: + - `af9798a6 feat(awooop): surface reconciliation in incident timeline` 已推 `gitea main`。 + - Code Review run `1945`:success。 + - CD run `1944`:success。 + - Deploy marker:`c01012d7 chore(cd): deploy af9798a [skip ci]`。 +- Production: + - API/Web/Worker image 均為 `af9798a62e85e3876b471d7c9c4339dd78fb6aa4`。 + - K3s rollout status:API/Web/Worker success。 + - Health:host-local NodePort `127.0.0.1:32334` healthy / mock_mode=false,PostgreSQL / Redis / OpenClaw / SignOz 皆 up。 + - Incident timeline smoke `INC-20260512-B6C589`: + - `GET /api/v1/incidents/INC-20260512-B6C589/timeline` → 200。 + - `reconciliation.schema_version=incident_reconciliation_v1`。 + - `consistency_status=blocked`。 + - `operator_next_state=manual_required`。 + - `safe.events[]` 內有 `actor=truth_chain_reconciliation`、`title=Lifecycle reconciliation: blocked`。 + - mismatch codes:`incident_open_after_approval_resolved`、`approval_approved_without_execution_record`、`approval_no_action_without_execution`、`evidence_all_sensors_failed`。 + +**整體進度**: +- Wave 0:MOMO PostgreSQL backup → AwoooP 失敗通知接線完成並已推版。 +- T0:Truth-chain read-only API 完成、部署、production smoke 完成。 +- T1:Channel Event hardening 完成、部署、production smoke 完成。 +- T2:legacy MCP audit bridge / backfill / truth-chain visibility 完成、部署、production smoke 完成;first-class Gateway enforced path 仍待後續 wave。 +- T3:Ansible audit contract + decision candidate dry-run audit 完成、部署、production smoke 完成。 +- T4:Config Drift stable fingerprint / repeat-state / Telegram stage visibility 完成、部署、production smoke 完成。 +- T5:Incident / Approval / Execution reconciliation 完成、部署、production smoke 完成。 +- T6:Incident timeline / Telegram detail reconciliation visibility 完成、部署、production smoke 完成。 +- 仍未完成:first-class MCP Gateway enforcement、Ansible 真正 check-mode executor / diff / apply / rollback、Operator Console 前端完整呈現、root cause 修復 execution / incident closure 矛盾。 + ## 2026-05-13 | T5 Incident / Approval / Execution reconciliation 已推版 **背景**:B6C589 類 incident 會出現狀態矛盾:Telegram 顯示需要審批 / 處理,DB 裡 approval 已 `APPROVED` 且 action 是 `NO_ACTION`,但 incident 仍 `INVESTIGATING`,automation execution / verification 又沒有成功紀錄。Operator 不能再靠人工猜測「AI 到底修了沒」。 diff --git a/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md b/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md index f83d53c9..e341943a 100644 --- a/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md +++ b/docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md @@ -365,10 +365,11 @@ source_event_received | T3 Ansible declarative executor | Ansible 成為 executor candidate,支援 check mode、diff、apply、rollback、artifact link | 每次 AI 修復決策能說明 Ansible 是否可用、是否 dry-run、為何選/不選 | | T4 Drift fingerprint FSM | Drift 不再每小時新增不可辨識卡;同 fingerprint 更新 repeat_count / status / PR / adopt / rollback / ignore | `awoooi-prod HIGH=1 MEDIUM=30` 同組 12h 只更新同一 truth chain | | T5 Incident status reconciliation | approval、incident、evidence、execution、timeline 狀態一致;NO_ACTION 必須是明確 manual_required 或 resolved(noop) | B6C589 類事件不得再出現 incident investigating + approval approved/resolved + no execution/verification 的矛盾 | +| T6 Incident timeline / Telegram detail visibility | T5 reconciliation 必須出現在既有 incident timeline 與 Telegram 詳情入口,不只藏在 truth-chain API | B6C589 類事件在 timeline / 詳情中直接顯示 `blocked`、`manual_required` 與 mismatch codes | **T0 first implementation(2026-05-12 22:50 台北)**:新增 read-only `GET /api/v1/platform/truth-chain/{source_id}`,由 Operator Console auth 保護,聚合 incident / drift / approval / evidence / legacy MCP / AwoooP MCP Gateway / automation_operation_log / KM / timeline / outbound mirror。此 endpoint 只揭露現況與缺口,不改任何 incident、approval、execution 或 Telegram state。 -**當前紅線**:在 T0-T2 未完成前,任何「中低風險告警已有 AI 自動修復」都只能逐案查證,不能全域宣稱。Telegram 卡片必須誠實顯示 degraded / failed / pending_human,而不是只顯示 AI 研判摘要。 +**當前紅線**:T0-T6 已補上第一批查詢/詳情可觀測性,但 T2 仍不是 first-class MCP Gateway enforcement,T3 仍不是 Ansible check-mode / apply executor,T6 也只把 reconciliation 推進詳情層。任何「中低風險告警已有完整 AI 自動修復」仍必須逐案查證,不能全域宣稱。 **T1 first implementation(2026-05-12 23:20 台北)**:開始補 `awooop_outbound_message` 的真相鏈欄位:`content_redacted`、`redaction_version`、`source_envelope`。設計邊界是只保存 redacted rendered card 與 source metadata 摘要;raw Telegram payload、完整 callback data、未遮蔽 token 不入庫。production DB migration 已預套用,API app role 在 `app.project_id=awoooi` 下可讀 outbound rows(`total=312`),代表 T1 的 RLS visibility 紅燈已先驗證可見;新欄位需等 T1 API image 上線後才會產生非空資料。 @@ -1973,6 +1974,21 @@ Phase 6 完成後 - Health:K3s rollout success;host-local NodePort health 200 / `mock_mode=false`。本機直連 `192.168.0.120:32334` 當下 timeout,需另查 workstation-to-node path;cluster 內與 host-local API healthy。 - 邊界:T5 只讓矛盾狀態可見;下一段仍需把 reconciliation 結果回推 Telegram / Operator Console UI,並處理 root cause(execution / incident closure)。 +**T6 Incident timeline / Telegram detail reconciliation visibility production verified(2026-05-13 台北)**: +- `af9798a6 feat(awooop): surface reconciliation in incident timeline` 已推 Gitea main,Code Review run `1945` success,CD run `1944` success。 +- Deploy marker:`c01012d7 chore(cd): deploy af9798a [skip ci]`。 +- `incident_timeline_service` 共用 `build_incident_reconciliation()`,timeline response 追加頂層 `reconciliation`,並在 `safe.events[]` 加入 `truth_chain_reconciliation` event。 +- `telegram_gateway` incident detail 追加「真相鏈狀態」區塊,顯示 `consistency_status`、`operator_next_state` 與 mismatch codes。 +- Production `INC-20260512-B6C589` timeline smoke: + - `GET /api/v1/incidents/INC-20260512-B6C589/timeline` → 200 + - `reconciliation.schema_version=incident_reconciliation_v1` + - `consistency_status=blocked` + - `operator_next_state=manual_required` + - `safe.events[]` 有 `actor=truth_chain_reconciliation` / `title=Lifecycle reconciliation: blocked` + - mismatch codes:`incident_open_after_approval_resolved`、`approval_approved_without_execution_record`、`approval_no_action_without_execution`、`evidence_all_sensors_failed` +- Production API/Web/Worker image 均為 `af9798a62e85e3876b471d7c9c4339dd78fb6aa4`,K3s rollout success,host-local health healthy / `mock_mode=false`。 +- 邊界:T6 是 read-only 顯示層收斂,不修改主告警卡、Telegram button callback、approval execution,也尚未修復 execution / incident closure root cause。 + --- ### 2026-04-20 晚 (台北) — C1-C4 全流程串接 — Playbook 鏈路保護(commit de2d34d) diff --git a/k8s/awoooi-prod/kustomization.yaml b/k8s/awoooi-prod/kustomization.yaml index 93bd80d2..7297e855 100644 --- a/k8s/awoooi-prod/kustomization.yaml +++ b/k8s/awoooi-prod/kustomization.yaml @@ -40,7 +40,7 @@ resources: images: - name: 192.168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER newName: 192.168.0.110:5000/awoooi/api - newTag: af9798a62e85e3876b471d7c9c4339dd78fb6aa4 + newTag: 0b707495a11d66180f95981cb22eab562dfbd515 - name: 192.168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER newName: 192.168.0.110:5000/awoooi/web - newTag: af9798a62e85e3876b471d7c9c4339dd78fb6aa4 + newTag: 0b707495a11d66180f95981cb22eab562dfbd515