"""Current AI Agent autonomous runtime control plane. This read model is the current directive layer. Historical P2 snapshots can still describe earlier no-send / no-live states, but this payload states what the product should enforce now: low, medium, and high risk routes may proceed through controlled automation when allowlist, check-mode, verifier, rollback, KM, and Telegram receipts are present. """ from __future__ import annotations from collections.abc import Iterable, Mapping from datetime import UTC, datetime from typing import Any from sqlalchemy import text from src.core.config import settings from src.core.logging import get_logger from src.db.base import get_db_context from src.services.report_generation_service import ( DAILY_REPORT_HOUR_TAIPEI, MONTHLY_REPORT_DAY_TAIPEI, MONTHLY_REPORT_HOUR_TAIPEI, WEEKLY_REPORT_HOUR_TAIPEI, WEEKLY_REPORT_WEEKDAY_TAIPEI, ) _SCHEMA_VERSION = "ai_agent_autonomous_runtime_control_v1" _RUNTIME_AUTHORITY = "current_owner_directive_controlled_ai_automation" _DEPLOY_READBACK_MARKER = "p2_416_d1n_autonomous_runtime_control_prod_readback_v2" _DEPLOY_ATTEMPT_NOTE = "cd_internal_control_plane_readback_retry_20260628_2" _LIVE_READBACK_SCHEMA_VERSION = "ai_agent_autonomous_runtime_receipt_readback_v1" _DEFAULT_PROJECT_ID = "awoooi" _DEFAULT_LOOKBACK_HOURS = 24 # CD cancel-stale-cd no-op triggers must not change runtime payloads. _EXECUTOR_OPERATION_TYPES = ( "ansible_candidate_matched", "ansible_check_mode_executed", "ansible_apply_executed", "ansible_learning_writeback_recorded", "ansible_rollback_executed", "ansible_execution_skipped", ) logger = get_logger(__name__) def _allowed_risk_levels() -> list[str]: raw = str(settings.AWOOOP_ANSIBLE_CONTROLLED_APPLY_ALLOWED_RISK_LEVELS or "") return sorted({item.strip().lower() for item in raw.split(",") if item.strip()}) def _utc_iso(value: Any) -> str | None: if value is None: return None if isinstance(value, datetime): if value.tzinfo is None: value = value.replace(tzinfo=UTC) return value.astimezone(UTC).isoformat() return str(value) def _row_mapping(row: Mapping[str, Any] | Any) -> dict[str, Any]: if isinstance(row, Mapping): return dict(row) mapping = getattr(row, "_mapping", None) if mapping is not None: return dict(mapping) return dict(row) def _int_value(value: Any) -> int: try: return int(value or 0) except (TypeError, ValueError): return 0 def _sanitize_latest_rows( rows: Iterable[Mapping[str, Any] | Any], *, allowed_keys: tuple[str, ...], time_keys: tuple[str, ...] = ("created_at", "collected_at", "queued_at", "sent_at"), limit: int = 5, ) -> list[dict[str, Any]]: clean_rows: list[dict[str, Any]] = [] for row in rows: item = _row_mapping(row) clean: dict[str, Any] = {} for key in allowed_keys: if key not in item: continue value = item.get(key) clean[key] = _utc_iso(value) if key in time_keys else value clean_rows.append(clean) if len(clean_rows) >= limit: break return clean_rows def _operation_counts( rows: Iterable[Mapping[str, Any] | Any], ) -> dict[str, dict[str, Any]]: counts = { operation_type: { "total": 0, "recent": 0, "by_status": {}, } for operation_type in _EXECUTOR_OPERATION_TYPES } for row in rows: item = _row_mapping(row) operation_type = str(item.get("operation_type") or "unknown") status = str(item.get("status") or "unknown") bucket = counts.setdefault( operation_type, { "total": 0, "recent": 0, "by_status": {}, }, ) total = _int_value(item.get("total")) recent = _int_value(item.get("recent")) bucket["total"] += total bucket["recent"] += recent bucket["by_status"][status] = bucket["by_status"].get(status, 0) + total return counts def _status_counts( rows: Iterable[Mapping[str, Any] | Any], *, status_key: str, ) -> dict[str, Any]: by_status: dict[str, int] = {} total = 0 recent = 0 for row in rows: item = _row_mapping(row) status = str(item.get(status_key) or "unknown") row_total = _int_value(item.get("total")) by_status[status] = by_status.get(status, 0) + row_total total += row_total recent += _int_value(item.get("recent")) return { "total": total, "recent": recent, "by_status": by_status, } def _trace_stage( *, stage_id: str, display_name: str, source_tables: list[str], total: int, recent: int, required_for_closed_loop: bool, feeds_learning: bool, public_safe: bool = True, next_action_if_missing: str | None = None, ) -> dict[str, Any]: present = total > 0 return { "stage_id": stage_id, "display_name": display_name, "source_tables": source_tables, "recorded": present, "record_quality": "recorded" if present else "missing", "total": max(0, total), "recent": max(0, recent), "required_for_closed_loop": required_for_closed_loop, "feeds_learning": feeds_learning, "public_safe": public_safe, "next_action_if_missing": None if present else next_action_if_missing, } def _trace_total(summary: Mapping[str, Any] | None, *operation_types: str) -> int: if not isinstance(summary, Mapping): return 0 if not operation_types: return _int_value(summary.get("total")) return sum( _int_value((summary.get(operation_type) or {}).get("total")) for operation_type in operation_types ) def _trace_recent(summary: Mapping[str, Any] | None, *operation_types: str) -> int: if not isinstance(summary, Mapping): return 0 if not operation_types: return _int_value(summary.get("recent")) return sum( _int_value((summary.get(operation_type) or {}).get("recent")) for operation_type in operation_types ) def _build_trace_ledger( *, operation_summary: Mapping[str, Any], auto_repair_summary: Mapping[str, Any], verifier_summary: Mapping[str, Any], km_summary: Mapping[str, Any], telegram_summary: Mapping[str, Any], mcp_gateway_summary: Mapping[str, Any], legacy_mcp_summary: Mapping[str, Any], service_log_summary: Mapping[str, Any], executor_log_summary: Mapping[str, Any], timeline_summary: Mapping[str, Any], playbook_trust_summary: Mapping[str, Any], latest_flow_closure: Mapping[str, Any], loop_ledger: Mapping[str, Any], ) -> dict[str, Any]: """Build the full public-safe AI automation trace ledger.""" mcp_total = _trace_total(mcp_gateway_summary) + _trace_total(legacy_mcp_summary) mcp_recent = _trace_recent(mcp_gateway_summary) + _trace_recent(legacy_mcp_summary) stages = [ _trace_stage( stage_id="mcp_context", display_name="MCP sensor / tool context", source_tables=["awooop_mcp_gateway_audit", "mcp_audit_log"], total=mcp_total, recent=mcp_recent, required_for_closed_loop=False, feeds_learning=True, next_action_if_missing="record_mcp_gateway_or_legacy_mcp_audit_for_every_ai_decision", ), _trace_stage( stage_id="service_log_evidence", display_name="Sanitized service / package log evidence", source_tables=["incident_evidence.recent_logs", "incident_evidence.evidence_summary"], total=_trace_total(service_log_summary), recent=_trace_recent(service_log_summary), required_for_closed_loop=False, feeds_learning=True, next_action_if_missing="collect_sanitized_service_log_evidence_before_ai_decision", ), _trace_stage( stage_id="candidate", display_name="AI candidate / playbook match", source_tables=["automation_operation_log"], total=_trace_total(operation_summary, "ansible_candidate_matched"), recent=_trace_recent(operation_summary, "ansible_candidate_matched"), required_for_closed_loop=True, feeds_learning=True, next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook", ), _trace_stage( stage_id="check_mode", display_name="No-write check-mode / dry-run", source_tables=["automation_operation_log"], total=_trace_total(operation_summary, "ansible_check_mode_executed"), recent=_trace_recent(operation_summary, "ansible_check_mode_executed"), required_for_closed_loop=True, feeds_learning=True, next_action_if_missing="ansible_check_mode_worker_claims_candidate", ), _trace_stage( stage_id="executor_log_projection", display_name="Executor stdout / stderr / dry-run projection", source_tables=[ "automation_operation_log.output", "automation_operation_log.error", "automation_operation_log.stderr_feed_back", "automation_operation_log.dry_run_result", ], total=_trace_total(executor_log_summary), recent=_trace_recent(executor_log_summary), required_for_closed_loop=False, feeds_learning=True, next_action_if_missing="persist_sanitized_executor_log_projection_for_failed_or_applied_actions", ), _trace_stage( stage_id="controlled_apply", display_name="Controlled apply execution", source_tables=["automation_operation_log"], total=_trace_total(operation_summary, "ansible_apply_executed"), recent=_trace_recent(operation_summary, "ansible_apply_executed"), required_for_closed_loop=True, feeds_learning=True, next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success", ), _trace_stage( stage_id="auto_repair_execution_receipt", display_name="Auto-repair execution receipt", source_tables=["auto_repair_executions"], total=_trace_total(auto_repair_summary), recent=_trace_recent(auto_repair_summary), required_for_closed_loop=True, feeds_learning=True, next_action_if_missing="receipt_backfill_records_auto_repair_execution", ), _trace_stage( stage_id="post_apply_verifier", display_name="Post-apply verifier evidence", source_tables=["incident_evidence"], total=_trace_total(verifier_summary), recent=_trace_recent(verifier_summary), required_for_closed_loop=True, feeds_learning=True, next_action_if_missing="post_apply_verifier_writes_incident_evidence", ), _trace_stage( stage_id="rag_km_learning", display_name="RAG / KM / PlayBook learning writeback", source_tables=["knowledge_entries"], total=_trace_total(km_summary), recent=_trace_recent(km_summary), required_for_closed_loop=True, feeds_learning=True, next_action_if_missing="hermes_writes_km_playbook_trust_candidate", ), _trace_stage( stage_id="playbook_trust", display_name="PlayBook trust / success-failure learning", source_tables=[ "playbooks.trust_score", "playbooks.success_count", "playbooks.failure_count", "playbooks.review_required", ], total=_trace_total(playbook_trust_summary), recent=_trace_recent(playbook_trust_summary), required_for_closed_loop=False, feeds_learning=True, next_action_if_missing="write_playbook_trust_delta_after_verified_execution", ), _trace_stage( stage_id="timeline_projection", display_name="Operator timeline projection", source_tables=["timeline_events"], total=_trace_total(timeline_summary), recent=_trace_recent(timeline_summary), required_for_closed_loop=False, feeds_learning=True, next_action_if_missing="project_ai_runtime_stage_to_timeline_events", ), _trace_stage( stage_id="telegram_receipt", display_name="Telegram Gateway receipt", source_tables=["awooop_outbound_message"], total=_trace_total(telegram_summary), recent=_trace_recent(telegram_summary), required_for_closed_loop=True, feeds_learning=False, next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt", ), ] required = [stage for stage in stages if stage["required_for_closed_loop"]] missing_required = [ str(stage["stage_id"]) for stage in required if stage["recorded"] is not True ] recorded_count = sum(1 for stage in stages if stage["recorded"] is True) return { "schema_version": "ai_agent_autonomous_trace_ledger_v1", "purpose": ( "把 AI 自動化每個節點的 public-safe receipt 收斂成同一份 ledger;" "這些紀錄是後續 RAG、KM、PlayBook trust 與報告學習的依據。" ), "latest_flow_closed": latest_flow_closure.get("closed") is True, "latest_loop_closed": loop_ledger.get("closed") is True, "stage_count": len(stages), "recorded_stage_count": recorded_count, "required_stage_count": len(required), "missing_required_stage_ids": missing_required, "learning_source_stage_ids": [ str(stage["stage_id"]) for stage in stages if stage["feeds_learning"] is True ], "public_safety": { "reads_raw_sessions": False, "stores_secret_values": False, "stores_unredacted_telegram_payload": False, "stores_internal_reasoning": False, }, "stages": stages, } def _build_log_integration_taxonomy( *, operation_summary: Mapping[str, Any], auto_repair_summary: Mapping[str, Any], verifier_summary: Mapping[str, Any], km_summary: Mapping[str, Any], telegram_summary: Mapping[str, Any], mcp_gateway_summary: Mapping[str, Any], legacy_mcp_summary: Mapping[str, Any], service_log_summary: Mapping[str, Any], executor_log_summary: Mapping[str, Any], timeline_summary: Mapping[str, Any], playbook_trust_summary: Mapping[str, Any], ) -> dict[str, Any]: """Expose how logs are normalized, labeled, grouped, and fed to agents.""" operation_total = sum(_trace_total(operation_summary, item) for item in _EXECUTOR_OPERATION_TYPES) operation_recent = sum(_trace_recent(operation_summary, item) for item in _EXECUTOR_OPERATION_TYPES) source_families = [ { "source_family_id": "mcp_gateway_tool_calls", "source_tables": ["awooop_mcp_gateway_audit"], "normalized_event_schema": "ToolCallEvidence", "label_dimensions": ["project", "run", "trace", "agent", "tool", "policy_gate"], "total": _trace_total(mcp_gateway_summary), "recent": _trace_recent(mcp_gateway_summary), "feeds_learning": True, "public_safe": True, "raw_payload_policy": "hash_only_no_raw_input_output", "next_action_if_empty": "route_first_class_tools_through_awooop_mcp_gateway", }, { "source_family_id": "legacy_mcp_tool_calls", "source_tables": ["mcp_audit_log"], "normalized_event_schema": "LegacyToolCallEvidence", "label_dimensions": ["incident", "session_ref", "flywheel_node", "agent", "tool"], "total": _trace_total(legacy_mcp_summary), "recent": _trace_recent(legacy_mcp_summary), "feeds_learning": True, "public_safe": True, "raw_payload_policy": "bridge_to_gateway_hash_or_redacted_summary", "next_action_if_empty": "keep_legacy_bridge_until_all_callers_use_gateway", }, { "source_family_id": "service_package_logs", "source_tables": [ "incident_evidence.recent_logs", "incident_evidence.evidence_summary", "incident_evidence.anomaly_context", ], "normalized_event_schema": "ServiceLogEvidence", "label_dimensions": ["project", "product", "website", "service", "package", "incident"], "total": _trace_total(service_log_summary), "recent": _trace_recent(service_log_summary), "feeds_learning": True, "public_safe": True, "raw_payload_policy": "sanitized_summary_only", "next_action_if_empty": "collect_sanitized_service_package_logs_before_decision", }, { "source_family_id": "executor_operation_logs", "source_tables": ["automation_operation_log"], "normalized_event_schema": "ExecutorOperationEvidence", "label_dimensions": [ "project", "service", "package", "tool", "incident", "operation", "playbook", "risk", ], "total": max(operation_total, _trace_total(executor_log_summary)), "recent": max(operation_recent, _trace_recent(executor_log_summary)), "feeds_learning": True, "public_safe": True, "raw_payload_policy": "stdout_stderr_tail_or_structured_result_only", "next_action_if_empty": "persist_executor_operation_log_for_candidate_check_apply", }, { "source_family_id": "auto_repair_receipts", "source_tables": ["auto_repair_executions"], "normalized_event_schema": "RepairExecutionReceipt", "label_dimensions": ["incident", "service", "playbook", "risk", "result"], "total": _trace_total(auto_repair_summary), "recent": _trace_recent(auto_repair_summary), "feeds_learning": True, "public_safe": True, "raw_payload_policy": "execution_step_refs_not_raw_secrets", "next_action_if_empty": "write_auto_repair_execution_receipt_after_apply", }, { "source_family_id": "post_apply_verifier", "source_tables": ["incident_evidence.post_execution_state"], "normalized_event_schema": "VerifierEvidence", "label_dimensions": ["incident", "operation", "playbook", "service", "result"], "total": _trace_total(verifier_summary), "recent": _trace_recent(verifier_summary), "feeds_learning": True, "public_safe": True, "raw_payload_policy": "post_state_summary_redacted_refs_only", "next_action_if_empty": "run_post_apply_verifier_for_each_apply", }, { "source_family_id": "rag_km_entries", "source_tables": ["knowledge_entries"], "normalized_event_schema": "KnowledgeWritebackEvidence", "label_dimensions": ["project", "incident", "playbook", "path_type", "status"], "total": _trace_total(km_summary), "recent": _trace_recent(km_summary), "feeds_learning": True, "public_safe": True, "raw_payload_policy": "curated_summary_and_refs_only", "next_action_if_empty": "write_km_entry_after_verifier", }, { "source_family_id": "playbook_trust_signals", "source_tables": ["playbooks"], "normalized_event_schema": "PlayBookTrustSignal", "label_dimensions": ["project", "playbook", "status", "trust_band", "review_required"], "total": _trace_total(playbook_trust_summary), "recent": _trace_recent(playbook_trust_summary), "feeds_learning": True, "public_safe": True, "raw_payload_policy": "aggregate_trust_counters_only", "next_action_if_empty": "write_trust_delta_after_verified_execution", }, { "source_family_id": "operator_timeline_projection", "source_tables": ["timeline_events"], "normalized_event_schema": "OperatorTimelineEvent", "label_dimensions": ["incident", "event_type", "status", "actor", "actor_role"], "total": _trace_total(timeline_summary), "recent": _trace_recent(timeline_summary), "feeds_learning": True, "public_safe": True, "raw_payload_policy": "short_public_safe_status_projection", "next_action_if_empty": "project_ai_runtime_stage_to_timeline_events", }, { "source_family_id": "telegram_delivery_receipts", "source_tables": ["awooop_outbound_message"], "normalized_event_schema": "NotificationReceipt", "label_dimensions": ["project", "channel", "incident", "action", "send_status"], "total": _trace_total(telegram_summary), "recent": _trace_recent(telegram_summary), "feeds_learning": False, "public_safe": True, "raw_payload_policy": "provider_message_ref_no_unredacted_payload", "next_action_if_empty": "send_controlled_apply_result_via_gateway", }, ] label_dimensions = sorted( { str(dimension) for source in source_families for dimension in source["label_dimensions"] } ) active_source_count = sum(1 for source in source_families if _int_value(source["total"]) > 0) return { "schema_version": "ai_agent_log_integration_taxonomy_v1", "purpose": ( "將專案、產品、網站、服務、套件、工具與通知來源的 log " "統一轉成可貼標、可分群、可回放、可餵 RAG/KM/PlayBook 的 evidence。" ), "normalized_event_flow": [ "collect_source_log_or_receipt", "redact_and_hash_sensitive_fields", "assign_labels", "correlate_incident_operation_playbook", "write_trace_ledger", "retrieve_similar_context_via_rag", "select_or_repair_playbook", "run_check_mode_then_controlled_apply", "verify_and_write_learning_back", ], "label_dimensions": label_dimensions, "required_label_dimensions": [ "project", "source_family", "incident", "operation", "service", "tool", "playbook", ], "source_families": source_families, "rollups": { "source_family_count": len(source_families), "active_source_family_count": active_source_count, "inactive_source_family_count": len(source_families) - active_source_count, "label_dimension_count": len(label_dimensions), "classified_event_total": sum(_int_value(source["total"]) for source in source_families), "recent_classified_event_total": sum(_int_value(source["recent"]) for source in source_families), "learning_source_family_count": sum( 1 for source in source_families if source["feeds_learning"] is True ), }, "public_safety": { "raw_secret_collection_allowed": False, "raw_session_collection_allowed": False, "unredacted_payload_storage_allowed": False, "internal_reasoning_storage_allowed": False, }, } def _decision_wiring_stage( *, stage_id: str, display_name: str, evidence_sources: list[str], total: int, recent: int, required_for_decision_wiring: bool, feeds_next_stage: str, next_action_if_missing: str, ) -> dict[str, Any]: present = total > 0 return { "stage_id": stage_id, "display_name": display_name, "evidence_sources": evidence_sources, "present": present, "total": max(0, total), "recent": max(0, recent), "required_for_decision_wiring": required_for_decision_wiring, "feeds_next_stage": feeds_next_stage, "next_action_if_missing": None if present else next_action_if_missing, } def _build_agent_decision_wiring( *, operation_summary: Mapping[str, Any], verifier_summary: Mapping[str, Any], km_summary: Mapping[str, Any], mcp_gateway_summary: Mapping[str, Any], legacy_mcp_summary: Mapping[str, Any], service_log_summary: Mapping[str, Any], timeline_summary: Mapping[str, Any], playbook_trust_summary: Mapping[str, Any], log_integration_taxonomy: Mapping[str, Any], loop_ledger: Mapping[str, Any], latest_flow_closure: Mapping[str, Any], ) -> dict[str, Any]: """Summarize live evidence-to-execution wiring for the AI Agent path.""" taxonomy_rollups = log_integration_taxonomy.get("rollups") if not isinstance(taxonomy_rollups, Mapping): taxonomy_rollups = {} source_family_count = _int_value(taxonomy_rollups.get("source_family_count")) active_source_family_count = _int_value(taxonomy_rollups.get("active_source_family_count")) all_sources_active = source_family_count > 0 and active_source_family_count == source_family_count evidence_total = ( _trace_total(mcp_gateway_summary) + _trace_total(legacy_mcp_summary) + _trace_total(service_log_summary) + _trace_total(timeline_summary) ) evidence_recent = ( _trace_recent(mcp_gateway_summary) + _trace_recent(legacy_mcp_summary) + _trace_recent(service_log_summary) + _trace_recent(timeline_summary) ) rag_context_total = _trace_total(km_summary) + _trace_total(playbook_trust_summary) rag_context_recent = _trace_recent(km_summary) + _trace_recent(playbook_trust_summary) candidate_total = _trace_total(operation_summary, "ansible_candidate_matched") candidate_recent = _trace_recent(operation_summary, "ansible_candidate_matched") check_mode_total = _trace_total(operation_summary, "ansible_check_mode_executed") check_mode_recent = _trace_recent(operation_summary, "ansible_check_mode_executed") apply_total = _trace_total(operation_summary, "ansible_apply_executed") apply_recent = _trace_recent(operation_summary, "ansible_apply_executed") rollback_total = _trace_total(operation_summary, "ansible_rollback_executed") verifier_total = _trace_total(verifier_summary) verifier_recent = _trace_recent(verifier_summary) stages = [ _decision_wiring_stage( stage_id="labeled_evidence_sources", display_name="Labeled log / MCP / timeline evidence available", evidence_sources=["log_integration_taxonomy", "mcp", "service_logs", "timeline_events"], total=evidence_total if all_sources_active else 0, recent=evidence_recent, required_for_decision_wiring=True, feeds_next_stage="rag_context_retrieval", next_action_if_missing="keep_p1a_source_family_ingestion_active_until_10_of_10", ), _decision_wiring_stage( stage_id="rag_context_retrieval", display_name="RAG / KM / PlayBook trust context available", evidence_sources=["knowledge_entries", "playbooks"], total=rag_context_total, recent=rag_context_recent, required_for_decision_wiring=True, feeds_next_stage="playbook_candidate_selection", next_action_if_missing="retrieve_similar_km_entries_and_playbook_trust_before_candidate", ), _decision_wiring_stage( stage_id="playbook_candidate_selection", display_name="Allowlisted PlayBook candidate selected", evidence_sources=["automation_operation_log:ansible_candidate_matched"], total=candidate_total, recent=candidate_recent, required_for_decision_wiring=True, feeds_next_stage="check_mode_dry_run", next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook", ), _decision_wiring_stage( stage_id="check_mode_dry_run", display_name="Check-mode / dry-run receipt recorded", evidence_sources=["automation_operation_log:ansible_check_mode_executed"], total=check_mode_total, recent=check_mode_recent, required_for_decision_wiring=True, feeds_next_stage="controlled_apply_boundary", next_action_if_missing="ansible_check_mode_worker_claims_candidate", ), _decision_wiring_stage( stage_id="controlled_apply_boundary", display_name="Controlled apply / rollback boundary recorded", evidence_sources=[ "automation_operation_log:ansible_apply_executed", "automation_operation_log:ansible_rollback_executed", ], total=apply_total + rollback_total, recent=apply_recent, required_for_decision_wiring=True, feeds_next_stage="post_apply_verifier", next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success", ), _decision_wiring_stage( stage_id="post_apply_verifier", display_name="Post-apply verifier receipt recorded", evidence_sources=["incident_evidence"], total=verifier_total, recent=verifier_recent, required_for_decision_wiring=True, feeds_next_stage="learning_writeback", next_action_if_missing="post_apply_verifier_writes_incident_evidence", ), ] missing_required = [ str(stage["stage_id"]) for stage in stages if stage["required_for_decision_wiring"] is True and stage["present"] is not True ] present_required_count = sum( 1 for stage in stages if stage["required_for_decision_wiring"] is True and stage["present"] is True ) required_count = sum(1 for stage in stages if stage["required_for_decision_wiring"] is True) closed_loop_observed = bool( loop_ledger.get("closed") is True or latest_flow_closure.get("closed") is True ) return { "schema_version": "ai_agent_decision_wiring_readback_v1", "status": "completed" if not missing_required else "in_progress", "stages": stages, "missing_required_stage_ids": missing_required, "runtime_switches": { "candidate_backfill_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CANDIDATE_BACKFILL_WORKER), "check_mode_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER), "controlled_apply_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY), "allowed_risk_levels": _allowed_risk_levels(), }, "closed_loop_observed": closed_loop_observed, "public_safety": { "stores_raw_logs": False, "stores_secret_values": False, "executes_on_read": False, "critical_break_glass_still_required": True, }, "rollups": { "stage_count": len(stages), "required_stage_count": required_count, "required_stage_present_count": present_required_count, "required_stage_missing_count": len(missing_required), "evidence_event_total": evidence_total, "rag_context_total": rag_context_total, "candidate_total": candidate_total, "check_mode_total": check_mode_total, "controlled_apply_total": apply_total, "rollback_total": rollback_total, "verifier_total": verifier_total, }, } def _learning_loop_stage( *, stage_id: str, display_name: str, evidence_sources: list[str], total: int, recent: int, required_for_learning_loop: bool, writes_runtime_state: bool, next_action_if_missing: str, ) -> dict[str, Any]: present = total > 0 return { "stage_id": stage_id, "display_name": display_name, "evidence_sources": evidence_sources, "present": present, "total": max(0, total), "recent": max(0, recent), "required_for_learning_loop": required_for_learning_loop, "writes_runtime_state": writes_runtime_state, "next_action_if_missing": None if present else next_action_if_missing, } def _build_learning_loop_readback( *, operation_summary: Mapping[str, Any], verifier_summary: Mapping[str, Any], km_summary: Mapping[str, Any], playbook_trust_summary: Mapping[str, Any], log_integration_taxonomy: Mapping[str, Any], agent_decision_wiring: Mapping[str, Any], latest_flow_closure: Mapping[str, Any], latest_failure_classification: Mapping[str, Any], controlled_retry_package: Mapping[str, Any], loop_ledger: Mapping[str, Any], ) -> dict[str, Any]: """Expose the verified execution to KM/PlayBook learning loop.""" taxonomy_rollups = log_integration_taxonomy.get("rollups") if not isinstance(taxonomy_rollups, Mapping): taxonomy_rollups = {} learning_source_family_count = _int_value( taxonomy_rollups.get("learning_source_family_count") ) classified_event_total = _int_value(taxonomy_rollups.get("classified_event_total")) recent_classified_event_total = _int_value( taxonomy_rollups.get("recent_classified_event_total") ) verifier_total = _trace_total(verifier_summary) verifier_recent = _trace_recent(verifier_summary) km_total = _trace_total(km_summary) km_recent = _trace_recent(km_summary) learning_writeback_total = _trace_total( operation_summary, "ansible_learning_writeback_recorded", ) learning_writeback_recent = _trace_recent( operation_summary, "ansible_learning_writeback_recorded", ) trust_total = _trace_total(playbook_trust_summary) trust_recent = _trace_recent(playbook_trust_summary) repair_feedback_ready = bool( latest_failure_classification.get("classification") not in {"", "no_controlled_apply_observed"} and controlled_retry_package.get("schema_version") == "ai_agent_controlled_retry_package_v1" ) next_decision_ready = bool( agent_decision_wiring.get("status") == "completed" and loop_ledger.get("closed") is True ) stages = [ _learning_loop_stage( stage_id="verified_execution_outcome", display_name="Verified execution outcome available", evidence_sources=["incident_evidence.post_execution_state"], total=verifier_total if latest_flow_closure.get("has_post_apply_verifier") is True else 0, recent=verifier_recent, required_for_learning_loop=True, writes_runtime_state=True, next_action_if_missing="run_post_apply_verifier_and_attach_apply_op_id", ), _learning_loop_stage( stage_id="km_learning_writeback", display_name="KM learning writeback recorded", evidence_sources=["knowledge_entries"], total=km_total if latest_flow_closure.get("has_km_writeback") is True else 0, recent=km_recent, required_for_learning_loop=True, writes_runtime_state=True, next_action_if_missing="write_verified_execution_summary_to_km", ), _learning_loop_stage( stage_id="learning_repair_record", display_name="Learning repository repair result recorded", evidence_sources=[ "automation_operation_log:ansible_learning_writeback_recorded", "learning_repository", ], total=learning_writeback_total, recent=learning_writeback_recent, required_for_learning_loop=True, writes_runtime_state=True, next_action_if_missing="record_learning_repair_result_after_verifier", ), _learning_loop_stage( stage_id="playbook_trust_delta", display_name="PlayBook trust signal available", evidence_sources=["playbooks"], total=trust_total, recent=trust_recent, required_for_learning_loop=True, writes_runtime_state=True, next_action_if_missing="write_playbook_trust_delta_after_verifier", ), _learning_loop_stage( stage_id="similar_case_context", display_name="Similar-case context sources active", evidence_sources=["log_integration_taxonomy", "knowledge_entries", "playbooks"], total=classified_event_total if learning_source_family_count > 0 else 0, recent=recent_classified_event_total, required_for_learning_loop=True, writes_runtime_state=False, next_action_if_missing="activate_learning_source_families_for_similar_case_retrieval", ), _learning_loop_stage( stage_id="repair_candidate_feedback", display_name="Repair or no-repair feedback classified", evidence_sources=["latest_failure_classification", "controlled_retry_package"], total=1 if repair_feedback_ready else 0, recent=1 if repair_feedback_ready else 0, required_for_learning_loop=True, writes_runtime_state=False, next_action_if_missing="classify_latest_apply_result_and_prepare_retry_package", ), _learning_loop_stage( stage_id="next_decision_context", display_name="Next decision can consume learned context", evidence_sources=["agent_decision_wiring", "autonomous_execution_loop_ledger"], total=1 if next_decision_ready else 0, recent=1 if next_decision_ready else 0, required_for_learning_loop=True, writes_runtime_state=False, next_action_if_missing="complete_decision_wiring_and_execution_loop_before_learning_release", ), ] missing_required = [ str(stage["stage_id"]) for stage in stages if stage["required_for_learning_loop"] is True and stage["present"] is not True ] present_required_count = sum( 1 for stage in stages if stage["required_for_learning_loop"] is True and stage["present"] is True ) required_count = sum(1 for stage in stages if stage["required_for_learning_loop"] is True) return { "schema_version": "ai_agent_learning_loop_readback_v1", "status": "completed" if not missing_required else "in_progress", "stages": stages, "missing_required_stage_ids": missing_required, "public_safety": { "stores_raw_logs": False, "stores_secret_values": False, "stores_unredacted_telegram_payload": False, "executes_on_read": False, "critical_break_glass_still_required": True, }, "rollups": { "stage_count": len(stages), "required_stage_count": required_count, "required_stage_present_count": present_required_count, "required_stage_missing_count": len(missing_required), "verified_execution_total": verifier_total, "km_writeback_total": km_total, "learning_writeback_total": learning_writeback_total, "learning_writeback_recent": learning_writeback_recent, "playbook_trust_total": trust_total, "learning_source_family_count": learning_source_family_count, "similar_case_source_total": classified_event_total, "repair_feedback_ready_count": 1 if repair_feedback_ready else 0, "next_decision_ready_count": 1 if next_decision_ready else 0, }, } def _build_work_item_progress( *, trace_ledger: Mapping[str, Any], log_integration_taxonomy: Mapping[str, Any], agent_decision_wiring: Mapping[str, Any], learning_loop: Mapping[str, Any], db_read_status: str, ) -> dict[str, Any]: """Build ordered work items that the UI and agent can keep advancing.""" taxonomy_rollups = log_integration_taxonomy.get("rollups") if not isinstance(taxonomy_rollups, Mapping): taxonomy_rollups = {} source_families = log_integration_taxonomy.get("source_families") if not isinstance(source_families, list): source_families = [] inactive_source_count = _int_value(taxonomy_rollups.get("inactive_source_family_count")) missing_required = trace_ledger.get("missing_required_stage_ids") if not isinstance(missing_required, list): missing_required = [] decision_rollups = agent_decision_wiring.get("rollups") if not isinstance(decision_rollups, Mapping): decision_rollups = {} decision_wiring_missing = _int_value(decision_rollups.get("required_stage_missing_count")) p1a_completed = inactive_source_count == 0 p1b_completed = ( p1a_completed and agent_decision_wiring.get("schema_version") == "ai_agent_decision_wiring_readback_v1" and decision_wiring_missing == 0 ) learning_rollups = learning_loop.get("rollups") if not isinstance(learning_rollups, Mapping): learning_rollups = {} learning_loop_missing = _int_value(learning_rollups.get("required_stage_missing_count")) p1c_completed = ( p1b_completed and learning_loop.get("schema_version") == "ai_agent_learning_loop_readback_v1" and learning_loop_missing == 0 ) deployed_readback_complete = ( db_read_status == "ok" and trace_ledger.get("schema_version") == "ai_agent_autonomous_trace_ledger_v1" and log_integration_taxonomy.get("schema_version") == "ai_agent_log_integration_taxonomy_v1" ) ordered_items = [ { "work_item_id": "P0-A-runtime-truth", "priority": "P0-A", "title": "Controlled apply runtime truth readback", "status": "completed", "exit_criteria": "production API reports db_read_status=ok and live executor receipts", }, { "work_item_id": "P0-B-trace-ledger", "priority": "P0-B", "title": "Trace ledger for MCP/log/executor/verifier/KM/PlayBook/Telegram", "status": "completed" if not missing_required else "in_progress", "exit_criteria": "trace_ledger exposes required closed-loop stages and missing_required_stage_ids", }, { "work_item_id": "P0-C-log-taxonomy", "priority": "P0-C", "title": "Project/product/site/service/package/tool log taxonomy", "status": "completed", "exit_criteria": "log_integration_taxonomy lists source families, labels, and public-safety policy", }, { "work_item_id": "P0-D-ui-visibility", "priority": "P0-D", "title": "AwoooP UI shows automation loop and log integration progress", "status": "completed", "exit_criteria": "AwoooP, Approvals, Runs, and Work Items show trace/log taxonomy panel", }, { "work_item_id": "P0-E-verification-deploy", "priority": "P0-E", "title": "Focused verification and production deploy marker readback", "status": "completed" if deployed_readback_complete else "in_progress", "exit_criteria": "deploy marker includes this code and production API exposes trace_ledger/log_integration_taxonomy", "blocker": None if deployed_readback_complete else "waiting_for_successful_gitea_cd_deploy_marker", }, { "work_item_id": "P1-A-ingestion-coverage", "priority": "P1-A", "title": "Collector and sanitizer coverage for all source families", "status": "completed" if p1a_completed else "in_progress", "exit_criteria": "all source families have active sanitized classified events", "remaining_source_family_count": inactive_source_count, }, { "work_item_id": "P1-B-agent-decision-wiring", "priority": "P1-B", "title": "RAG retrieval to PlayBook select/repair/check-mode/apply/verifier", "status": "completed" if p1b_completed else "in_progress" if p1a_completed else "pending", "exit_criteria": "AI Agent consumes labeled evidence and emits target selector, dry-run, apply, verifier, rollback", "remaining_decision_wiring_stage_count": decision_wiring_missing, }, { "work_item_id": "P1-C-learning-loop", "priority": "P1-C", "title": "KM / PlayBook trust learning loop", "status": "completed" if p1c_completed else "in_progress" if p1b_completed else "pending", "exit_criteria": "verified execution updates KM entries, trust delta, similar-case clusters, and repair candidates", "remaining_learning_loop_stage_count": learning_loop_missing, }, { "work_item_id": "P1-D-alert-noise-reduction", "priority": "P1-D", "title": "Alert grouping and AI controlled workflow routing", "status": "pending", "exit_criteria": "repeated alerts are clustered, deduped, routed to controlled automation, and no longer default to manual handling", }, { "work_item_id": "P2-A-ui-ux-productization", "priority": "P2-A", "title": "Professional product UI replacing text-heavy surfaces", "status": "pending", "exit_criteria": "AI automation status is shown as dense dashboard controls, filters, counters, and action rails", }, { "work_item_id": "P2-B-multi-product-expansion", "priority": "P2-B", "title": "Reuse taxonomy across AWOOOI products/projects", "status": "pending", "exit_criteria": "StockPlatform, VibeWork, MOMO, AwoooGo, and other products report the same log taxonomy contract", }, ] source_family_items = [] for source in source_families: if not isinstance(source, Mapping): continue total = _int_value(source.get("total")) source_family_items.append({ "work_item_id": f"P1-A-source-{source.get('source_family_id')}", "priority": "P1-A", "source_family_id": source.get("source_family_id"), "title": f"Ingest and label {source.get('source_family_id')}", "status": "completed" if total > 0 else "not_started", "label_dimensions": source.get("label_dimensions") or [], "next_controlled_action": ( "keep_learning_and_quality_checks" if total > 0 else source.get("next_action_if_empty") ), }) all_items = [*ordered_items, *source_family_items] by_status: dict[str, int] = {} for item in all_items: status = str(item.get("status") or "unknown") by_status[status] = by_status.get(status, 0) + 1 return { "schema_version": "ai_agent_automation_work_item_progress_v1", "ordered_items": ordered_items, "source_family_items": source_family_items, "rollups": { "work_item_count": len(all_items), "ordered_work_item_count": len(ordered_items), "source_family_work_item_count": len(source_family_items), "completed_count": by_status.get("completed", 0), "in_progress_count": by_status.get("in_progress", 0), "pending_count": by_status.get("pending", 0), "blocked_count": by_status.get("blocked", 0), "not_started_count": by_status.get("not_started", 0), "by_status": by_status, }, } def _first_operation( rows: Iterable[Mapping[str, Any]], operation_type: str, ) -> dict[str, Any] | None: for row in rows: if str(row.get("operation_type") or "") == operation_type: return dict(row) return None def _operation_by_id( rows: Iterable[Mapping[str, Any]], op_id: Any, ) -> dict[str, Any] | None: needle = str(op_id or "") if not needle: return None for row in rows: if str(row.get("op_id") or "") == needle: return dict(row) return None def _stage_status(row: Mapping[str, Any] | None, *, fallback_status: str | None = None) -> str: if row is None: return fallback_status or "missing" return str(row.get("status") or row.get("result_status") or fallback_status or "present") def _loop_stage( *, stage_id: str, receipt_source: str, present: bool, status: str, ref_id: str | None, writes_runtime_state: bool, next_action_if_missing: str, ) -> dict[str, Any]: return { "stage_id": stage_id, "receipt_source": receipt_source, "present": present, "status": status, "ref_id": ref_id, "writes_runtime_state": writes_runtime_state, "next_action_if_missing": None if present else next_action_if_missing, } def _autonomous_execution_loop_ledger( *, project_id: str, operation_latest_rows: Iterable[Mapping[str, Any] | Any], verifier_latest_rows: Iterable[Mapping[str, Any] | Any], km_latest_rows: Iterable[Mapping[str, Any] | Any], telegram_latest_rows: Iterable[Mapping[str, Any] | Any], auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any], latest_flow_closure: Mapping[str, Any], latest_failure_classification: Mapping[str, Any], controlled_retry_package: Mapping[str, Any], ) -> dict[str, Any]: """Build the operation-id ledger that proves whether the runtime loop closed.""" operation_rows = [_row_mapping(row) for row in operation_latest_rows] verifier_rows = [_row_mapping(row) for row in verifier_latest_rows] km_rows = [_row_mapping(row) for row in km_latest_rows] telegram_rows = [_row_mapping(row) for row in telegram_latest_rows] auto_repair_rows = [_row_mapping(row) for row in auto_repair_latest_rows] latest_apply = _first_operation(operation_rows, "ansible_apply_executed") latest_check = None latest_candidate = None if latest_apply is not None: latest_check = _operation_by_id( operation_rows, latest_apply.get("check_mode_op_id") or latest_apply.get("parent_op_id"), ) else: latest_check = _first_operation(operation_rows, "ansible_check_mode_executed") source_candidate_op_id = None if latest_check is not None: source_candidate_op_id = latest_check.get("parent_op_id") or latest_check.get("source_candidate_op_id") if latest_apply is not None and not source_candidate_op_id: source_candidate_op_id = latest_apply.get("source_candidate_op_id") latest_candidate = _operation_by_id(operation_rows, source_candidate_op_id) if latest_candidate is None and latest_apply is None and latest_check is None: latest_candidate = _first_operation(operation_rows, "ansible_candidate_matched") anchor = latest_apply or latest_check or latest_candidate or {} apply_op_id = str((latest_apply or {}).get("op_id") or "") check_mode_op_id = str( (latest_check or {}).get("op_id") or (latest_apply or {}).get("check_mode_op_id") or (latest_apply or {}).get("parent_op_id") or "" ) candidate_op_id = str( (latest_candidate or {}).get("op_id") or source_candidate_op_id or "" ) incident_id = str(anchor.get("incident_id") or "") catalog_id = str(anchor.get("catalog_id") or "") playbook_path = str(anchor.get("playbook_path") or "") verifier = next( ( row for row in verifier_rows if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id ), None, ) km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else "" km = next( ( row for row in km_rows if ( km_path_type and str(row.get("path_type") or "") == km_path_type ) or ( incident_id and str(row.get("related_incident_id") or "") == incident_id ) ), None, ) telegram = next( ( row for row in telegram_rows if str(row.get("send_status") or "") == "sent" and str(row.get("action") or "") == "controlled_apply_result" and ( not incident_id or str(row.get("incident_id") or "") == incident_id ) ), None, ) auto_repair = next( ( row for row in auto_repair_rows if apply_op_id and apply_op_id in str(row.get("executed_steps_text") or row.get("executed_steps") or "") ), None, ) candidate_present = bool(latest_candidate or candidate_op_id) check_present = bool(latest_check or check_mode_op_id) apply_present = latest_apply is not None auto_repair_present = auto_repair is not None verifier_present = verifier is not None km_present = km is not None telegram_present = telegram is not None stages = [ _loop_stage( stage_id="candidate", receipt_source="automation_operation_log:ansible_candidate_matched", present=candidate_present, status=_stage_status(latest_candidate, fallback_status="inferred_from_check_mode") if candidate_present else "missing", ref_id=candidate_op_id or None, writes_runtime_state=False, next_action_if_missing="candidate_backfill_worker_enqueue_allowlisted_playbook", ), _loop_stage( stage_id="check_mode", receipt_source="automation_operation_log:ansible_check_mode_executed", present=check_present, status=_stage_status(latest_check, fallback_status="inferred_from_apply_parent") if check_present else "missing", ref_id=check_mode_op_id or None, writes_runtime_state=False, next_action_if_missing="ansible_check_mode_worker_claims_candidate", ), _loop_stage( stage_id="controlled_apply", receipt_source="automation_operation_log:ansible_apply_executed", present=apply_present, status=_stage_status(latest_apply), ref_id=apply_op_id or None, writes_runtime_state=True, next_action_if_missing="controlled_apply_worker_waits_for_check_mode_success", ), _loop_stage( stage_id="auto_repair_execution_receipt", receipt_source="auto_repair_executions:ansible_controlled_apply", present=auto_repair_present, status=str((auto_repair or {}).get("result_status") or "missing"), ref_id=str((auto_repair or {}).get("id") or "") or None, writes_runtime_state=True, next_action_if_missing="receipt_backfill_records_auto_repair_execution", ), _loop_stage( stage_id="post_apply_verifier", receipt_source="incident_evidence.post_execution_state", present=verifier_present, status=str((verifier or {}).get("verification_result") or "missing"), ref_id=str((verifier or {}).get("id") or "") or None, writes_runtime_state=True, next_action_if_missing="post_apply_verifier_writes_incident_evidence", ), _loop_stage( stage_id="km_playbook_writeback", receipt_source="knowledge_entries:ansible_apply_receipt", present=km_present, status=str((km or {}).get("status") or "missing"), ref_id=str((km or {}).get("id") or "") or None, writes_runtime_state=True, next_action_if_missing="hermes_writes_km_playbook_trust_candidate", ), _loop_stage( stage_id="telegram_receipt", receipt_source="awooop_outbound_message:controlled_apply_result", present=telegram_present, status=str((telegram or {}).get("send_status") or "missing"), ref_id=str((telegram or {}).get("message_id") or "") or None, writes_runtime_state=True, next_action_if_missing="live_apply_gateway_sends_controlled_apply_result_receipt", ), ] missing_stage_ids = [ str(stage["stage_id"]) for stage in stages if stage["present"] is not True ] closed = bool( apply_op_id and auto_repair_present and latest_flow_closure.get("closed") is True ) classification = str(latest_failure_classification.get("classification") or "") if not candidate_present and not check_present and not apply_present: execution_state = "waiting_for_candidate" next_executor_action = "candidate_backfill_worker_waits_for_matching_incident" elif not apply_present: execution_state = "executor_in_progress_or_waiting" next_executor_action = "continue_candidate_to_check_mode_to_apply" elif closed and classification == "latest_controlled_apply_closed_success": execution_state = "closed_success" next_executor_action = "keep_receipt_chain_closed" elif closed: execution_state = "closed_failed_apply_repair_ready" next_executor_action = str( controlled_retry_package.get("next_ai_action") or "run_no_write_check_mode_replay" ) elif "telegram_receipt" in missing_stage_ids: execution_state = "open_waiting_for_live_gateway_receipt" next_executor_action = "do_not_fake_send_backfill_wait_for_live_apply_gateway" else: execution_state = "open_missing_internal_receipts" next_executor_action = "backfill_missing_auto_repair_verifier_km_receipts" return { "schema_version": "ai_agent_autonomous_execution_loop_ledger_v1", "project_id": project_id, "operation_id": apply_op_id or check_mode_op_id or candidate_op_id or None, "root_candidate_op_id": candidate_op_id or None, "check_mode_op_id": check_mode_op_id or None, "apply_op_id": apply_op_id or None, "incident_id": incident_id or None, "catalog_id": catalog_id or None, "playbook_path": playbook_path or None, "execution_state": execution_state, "closed": closed, "missing_stage_ids": missing_stage_ids, "next_executor_action": next_executor_action, "stages": stages, "safety_contract": { "writes_on_read": False, "backfill_may_write_auto_repair_verifier_km": True, "backfill_may_send_telegram": False, "live_apply_may_send_telegram_gateway_receipt": True, "reads_raw_sessions": False, "reads_secret_values": False, }, } def _latest_flow_closure( *, operation_latest_rows: Iterable[Mapping[str, Any] | Any], verifier_latest_rows: Iterable[Mapping[str, Any] | Any], km_latest_rows: Iterable[Mapping[str, Any] | Any], telegram_latest_rows: Iterable[Mapping[str, Any] | Any], ) -> dict[str, Any]: operation_rows = [_row_mapping(row) for row in operation_latest_rows] verifier_rows = [_row_mapping(row) for row in verifier_latest_rows] km_rows = [_row_mapping(row) for row in km_latest_rows] telegram_rows = [_row_mapping(row) for row in telegram_latest_rows] latest_apply = next( ( row for row in operation_rows if str(row.get("operation_type") or "") == "ansible_apply_executed" ), None, ) if latest_apply is None: return { "apply_op_id": None, "incident_id": None, "has_post_apply_verifier": False, "has_km_writeback": False, "has_telegram_receipt": False, "closed": False, "missing": [ "ansible_apply_executed", "post_apply_verifier", "km_writeback", "telegram_receipt", ], } apply_op_id = str(latest_apply.get("op_id") or "") incident_id = str(latest_apply.get("incident_id") or "") km_path_type = f"ansible_apply_receipt:{apply_op_id[:8]}" if apply_op_id else "" has_verifier = any( str(row.get("apply_op_id") or "") == apply_op_id for row in verifier_rows ) has_km = any( str(row.get("path_type") or "") == km_path_type or ( incident_id and str(row.get("related_incident_id") or "") == incident_id ) for row in km_rows ) has_telegram = any( str(row.get("send_status") or "") == "sent" and str(row.get("action") or "") == "controlled_apply_result" and ( not incident_id or str(row.get("incident_id") or "") == incident_id ) for row in telegram_rows ) missing = [ name for name, present in ( ("post_apply_verifier", has_verifier), ("km_writeback", has_km), ("telegram_receipt", has_telegram), ) if not present ] return { "apply_op_id": apply_op_id or None, "incident_id": incident_id or None, "has_post_apply_verifier": has_verifier, "has_km_writeback": has_km, "has_telegram_receipt": has_telegram, "closed": not missing, "missing": missing, } def _latest_failure_classification( *, operation_latest_rows: Iterable[Mapping[str, Any] | Any], verifier_latest_rows: Iterable[Mapping[str, Any] | Any], latest_flow_closure: Mapping[str, Any], ) -> dict[str, Any]: """Classify the newest controlled apply outcome without exposing command output.""" operation_rows = [_row_mapping(row) for row in operation_latest_rows] verifier_rows = [_row_mapping(row) for row in verifier_latest_rows] latest_apply = next( ( row for row in operation_rows if str(row.get("operation_type") or "") == "ansible_apply_executed" ), None, ) if latest_apply is None: return { "schema_version": "ai_agent_executor_failure_classification_v1", "classification": "no_controlled_apply_observed", "action": "wait_for_controlled_apply_receipt", "target_selector": {}, "evidence": { "latest_flow_closed": False, "output_tail_in_readback": False, "unredacted_output_required": False, }, } apply_op_id = str(latest_apply.get("op_id") or "") incident_id = str(latest_apply.get("incident_id") or "") returncode = _int_value(latest_apply.get("returncode")) verifier = next( ( row for row in verifier_rows if apply_op_id and str(row.get("apply_op_id") or "") == apply_op_id ), {}, ) verification_result = str(verifier.get("verification_result") or "").lower() latest_flow_closed = latest_flow_closure.get("closed") is True if returncode == 0 and verification_result in {"success", ""} and latest_flow_closed: classification = "latest_controlled_apply_closed_success" action = "keep_receipt_chain_closed" elif returncode == 0: classification = "controlled_apply_success_receipt_gap" action = "backfill_missing_verifier_km_or_telegram_receipt" elif latest_flow_closed: classification = "closed_failed_apply_requires_ai_repair" action = "queue_check_mode_replay_and_playbook_repair_candidate" else: classification = "failed_apply_receipt_gap_requires_backfill_then_repair" action = "backfill_missing_receipts_then_queue_repair_candidate" return { "schema_version": "ai_agent_executor_failure_classification_v1", "classification": classification, "action": action, "target_selector": { "incident_id": incident_id or None, "apply_op_id": apply_op_id or None, "parent_op_id": latest_apply.get("parent_op_id"), "catalog_id": latest_apply.get("catalog_id"), "playbook_path": latest_apply.get("playbook_path"), "execution_mode": latest_apply.get("execution_mode"), }, "evidence": { "operation_status": latest_apply.get("status"), "returncode": latest_apply.get("returncode"), "verification_result": verification_result or None, "latest_flow_closed": latest_flow_closed, "has_post_apply_verifier": latest_flow_closure.get("has_post_apply_verifier") is True, "has_km_writeback": latest_flow_closure.get("has_km_writeback") is True, "has_telegram_receipt": latest_flow_closure.get("has_telegram_receipt") is True, "output_tail_in_readback": False, "unredacted_output_required": False, }, "safe_next_steps": [ "run_no_write_check_mode_replay", "extract_sanitized_failed_task_summary", "write_km_playbook_repair_candidate", "retry_controlled_apply_only_after_check_mode_passes", ], } def _controlled_retry_package(classification: Mapping[str, Any]) -> dict[str, Any]: """Build the next no-write repair package from the public failure classification.""" target_selector = classification.get("target_selector") if not isinstance(target_selector, Mapping): target_selector = {} apply_op_id = str(target_selector.get("apply_op_id") or "") repair_required = classification.get("classification") in { "closed_failed_apply_requires_ai_repair", "failed_apply_receipt_gap_requires_backfill_then_repair", } return { "schema_version": "ai_agent_controlled_retry_package_v1", "package_id": ( f"ansible_retry:{apply_op_id[:8]}" if repair_required and apply_op_id else None ), "status": ( "ready_for_no_write_check_mode_replay" if repair_required else "not_required_for_latest_apply" ), "target_selector": dict(target_selector), "source_of_truth": { "catalog_id": target_selector.get("catalog_id"), "playbook_path": target_selector.get("playbook_path"), "source_diff_required_before_retry": True, "failed_task_summary_required": True, }, "preflight": { "no_write_check_mode_replay_required": repair_required, "reuse_parent_check_mode_op_id": target_selector.get("parent_op_id"), "unredacted_output_required": False, "secret_value_collection_allowed": False, }, "apply_gate": { "controlled_apply_retry_allowed_now": False, "opens_legacy_runner": False, "requires_check_mode_success_before_apply": repair_required, }, "rollback": { "rollback_candidate_required": repair_required, "destructive_rollback_allowed": False, "rollback_plan_source": "playbook_repair_candidate_after_failed_task_summary", }, "post_apply": { "post_apply_verifier_required": repair_required, "km_playbook_trust_writeback_required": repair_required, "telegram_receipt_required": repair_required, }, "next_ai_action": ( "run_no_write_check_mode_replay" if repair_required else "keep_latest_apply_receipts" ), } def classify_deploy_control_plane_observation( *, run_status: str, is_latest_deploy_intent: bool, active_task_container_count: int, production_marker_hit: bool, latest_flow_closed: bool, runner_capacity_ok: bool, runner_forbidden_label_count: int, ) -> dict[str, Any]: """Classify CD/run noise into an internal PlayBook decision.""" normalized_status = str(run_status or "unknown").strip().lower() has_active_task = active_task_container_count > 0 runner_lane_safe = runner_capacity_ok and runner_forbidden_label_count == 0 production_truth_ok = production_marker_hit and latest_flow_closed if not is_latest_deploy_intent: classification = "superseded_run_skip" action = "skip_cd_work_and_attach_to_superseded_intent" elif production_truth_ok and normalized_status == "success": classification = "deploy_succeeded_marker_hit" action = "close_deploy_intent_and_write_receipts" elif normalized_status == "running" and has_active_task and runner_lane_safe: classification = "running_with_controlled_task" action = "continue_observing_without_restarting_runner" elif normalized_status == "running" and not has_active_task and production_truth_ok: classification = "running_no_container_stale_ui" action = "treat_gitea_spinner_as_stale_and_keep_production_truth" elif normalized_status == "failure" and production_truth_ok: classification = "failed_run_superseded_by_marker_hit" action = "record_non_blocking_failure_and_keep_current_marker" elif normalized_status == "failure": classification = "real_failure_requires_playbook_repair" action = "open_cd_repair_playbook_with_target_selector_and_verifier" elif not runner_lane_safe: classification = "runner_lane_guardrail_violation" action = "fail_closed_runner_lane_and_open_repair_playbook" else: classification = "waiting_for_controlled_observation" action = "wait_for_mcp_observation_or_deploy_intent_update" return { "schema_version": "ai_agent_deploy_control_plane_decision_v1", "classification": classification, "action": action, "inputs": { "run_status": normalized_status, "is_latest_deploy_intent": is_latest_deploy_intent, "active_task_container_count": max(0, active_task_container_count), "production_marker_hit": production_marker_hit, "latest_flow_closed": latest_flow_closed, "runner_capacity_ok": runner_capacity_ok, "runner_forbidden_label_count": max(0, runner_forbidden_label_count), }, "internal_writeback": { "mcp_event_type": "deploy_run_observation", "rag_context_required": True, "km_writeback_required": True, "playbook_route_required": True, "log_projection_required": True, "telegram_receipt_required": classification in { "deploy_succeeded_marker_hit", "real_failure_requires_playbook_repair", "runner_lane_guardrail_violation", }, }, "safety_boundary": { "reads_raw_sessions": False, "reads_secret_values": False, "opens_legacy_runner": False, "uses_force_push": False, "writes_runtime_state": classification in { "deploy_succeeded_marker_hit", "real_failure_requires_playbook_repair", "runner_lane_guardrail_violation", }, }, } def _control_plane_integration() -> dict[str, Any]: classifier_examples = [ classify_deploy_control_plane_observation( run_status="success", is_latest_deploy_intent=True, active_task_container_count=0, production_marker_hit=True, latest_flow_closed=True, runner_capacity_ok=True, runner_forbidden_label_count=0, ), classify_deploy_control_plane_observation( run_status="running", is_latest_deploy_intent=True, active_task_container_count=0, production_marker_hit=True, latest_flow_closed=True, runner_capacity_ok=True, runner_forbidden_label_count=0, ), classify_deploy_control_plane_observation( run_status="failure", is_latest_deploy_intent=True, active_task_container_count=0, production_marker_hit=False, latest_flow_closed=False, runner_capacity_ok=True, runner_forbidden_label_count=0, ), ] return { "schema_version": "ai_agent_autonomous_runtime_internal_loop_v1", "status": "mcp_rag_km_playbook_log_control_loop_declared", "purpose": ( "把 Gitea run、runner lane、production marker、browser smoke 與 executor receipt " "先收斂成內部事件,再由 PlayBook decision 推進或跳過。" ), "mcp_sensors": [ { "sensor_id": "gitea_actions_run_observer", "normalized_event": "RunObservation", "raw_secret_access_allowed": False, }, { "sensor_id": "controlled_runner_lane_observer", "normalized_event": "RunnerLaneState", "raw_runner_token_access_allowed": False, }, { "sensor_id": "production_marker_observer", "normalized_event": "ProductionTruthSnapshot", "raw_session_access_allowed": False, }, { "sensor_id": "browser_smoke_observer", "normalized_event": "FrontendTruthSnapshot", "raw_conversation_access_allowed": False, }, ], "rag_context_queries": [ "runner_pressure_buildkit_stockplatform_collision", "controlled_cd_lane_capacity_label_guardrails", "autonomous_runtime_marker_receipt_contract", ], "playbook_decision_classes": [ "deploy_succeeded_marker_hit", "running_with_controlled_task", "running_no_container_stale_ui", "superseded_run_skip", "failed_run_superseded_by_marker_hit", "real_failure_requires_playbook_repair", "runner_lane_guardrail_violation", ], "km_writeback_contract": { "knowledge_entry_path_type": "deploy_control_plane_decision:", "required_refs": [ "deploy_intent_id", "target_sha", "gitea_run_id", "production_marker", "latest_flow_closure", "runner_lane_state", ], "stores_raw_logs": False, "stores_secret_values": False, }, "log_projection_contract": { "timeline_event_type": "ai_agent_deploy_control_plane_decision", "logbook_projection": "summary_only_after_verifier", "raw_html_or_long_log_allowed": False, }, "classifier_examples": classifier_examples, "rollups": { "mcp_sensor_count": 4, "rag_context_query_count": 3, "playbook_decision_class_count": 7, "classifier_example_count": len(classifier_examples), }, } def build_runtime_receipt_readback_from_rows( *, project_id: str = _DEFAULT_PROJECT_ID, lookback_hours: int = _DEFAULT_LOOKBACK_HOURS, db_read_status: str = "ok", operation_count_rows: Iterable[Mapping[str, Any] | Any] = (), operation_latest_rows: Iterable[Mapping[str, Any] | Any] = (), auto_repair_count_rows: Iterable[Mapping[str, Any] | Any] = (), auto_repair_latest_rows: Iterable[Mapping[str, Any] | Any] = (), verifier_count_rows: Iterable[Mapping[str, Any] | Any] = (), verifier_latest_rows: Iterable[Mapping[str, Any] | Any] = (), km_count_rows: Iterable[Mapping[str, Any] | Any] = (), km_latest_rows: Iterable[Mapping[str, Any] | Any] = (), telegram_count_rows: Iterable[Mapping[str, Any] | Any] = (), telegram_latest_rows: Iterable[Mapping[str, Any] | Any] = (), mcp_gateway_count_rows: Iterable[Mapping[str, Any] | Any] = (), legacy_mcp_count_rows: Iterable[Mapping[str, Any] | Any] = (), service_log_count_rows: Iterable[Mapping[str, Any] | Any] = (), executor_log_count_rows: Iterable[Mapping[str, Any] | Any] = (), timeline_count_rows: Iterable[Mapping[str, Any] | Any] = (), playbook_trust_count_rows: Iterable[Mapping[str, Any] | Any] = (), error_type: str | None = None, ) -> dict[str, Any]: """Build the live executor receipt readback from already-fetched rows.""" operation_latest = list(operation_latest_rows) auto_repair_latest = list(auto_repair_latest_rows) verifier_latest = list(verifier_latest_rows) km_latest = list(km_latest_rows) telegram_latest = list(telegram_latest_rows) operation_summary = _operation_counts(operation_count_rows) auto_repair_summary = _status_counts( auto_repair_count_rows, status_key="result_status", ) verifier_summary = _status_counts( verifier_count_rows, status_key="verification_result", ) km_summary = _status_counts(km_count_rows, status_key="status") telegram_summary = _status_counts(telegram_count_rows, status_key="send_status") mcp_gateway_summary = _status_counts(mcp_gateway_count_rows, status_key="status") legacy_mcp_summary = _status_counts(legacy_mcp_count_rows, status_key="status") service_log_summary = _status_counts(service_log_count_rows, status_key="status") executor_log_summary = _status_counts(executor_log_count_rows, status_key="status") timeline_summary = _status_counts(timeline_count_rows, status_key="status") playbook_trust_summary = _status_counts(playbook_trust_count_rows, status_key="status") latest_closure = _latest_flow_closure( operation_latest_rows=operation_latest, verifier_latest_rows=verifier_latest, km_latest_rows=km_latest, telegram_latest_rows=telegram_latest, ) latest_failure = _latest_failure_classification( operation_latest_rows=operation_latest, verifier_latest_rows=verifier_latest, latest_flow_closure=latest_closure, ) retry_package = _controlled_retry_package(latest_failure) loop_ledger = _autonomous_execution_loop_ledger( project_id=project_id, operation_latest_rows=operation_latest, verifier_latest_rows=verifier_latest, km_latest_rows=km_latest, telegram_latest_rows=telegram_latest, auto_repair_latest_rows=auto_repair_latest, latest_flow_closure=latest_closure, latest_failure_classification=latest_failure, controlled_retry_package=retry_package, ) trace_ledger = _build_trace_ledger( operation_summary=operation_summary, auto_repair_summary=auto_repair_summary, verifier_summary=verifier_summary, km_summary=km_summary, telegram_summary=telegram_summary, mcp_gateway_summary=mcp_gateway_summary, legacy_mcp_summary=legacy_mcp_summary, service_log_summary=service_log_summary, executor_log_summary=executor_log_summary, timeline_summary=timeline_summary, playbook_trust_summary=playbook_trust_summary, latest_flow_closure=latest_closure, loop_ledger=loop_ledger, ) log_integration_taxonomy = _build_log_integration_taxonomy( operation_summary=operation_summary, auto_repair_summary=auto_repair_summary, verifier_summary=verifier_summary, km_summary=km_summary, telegram_summary=telegram_summary, mcp_gateway_summary=mcp_gateway_summary, legacy_mcp_summary=legacy_mcp_summary, service_log_summary=service_log_summary, executor_log_summary=executor_log_summary, timeline_summary=timeline_summary, playbook_trust_summary=playbook_trust_summary, ) agent_decision_wiring = _build_agent_decision_wiring( operation_summary=operation_summary, verifier_summary=verifier_summary, km_summary=km_summary, mcp_gateway_summary=mcp_gateway_summary, legacy_mcp_summary=legacy_mcp_summary, service_log_summary=service_log_summary, timeline_summary=timeline_summary, playbook_trust_summary=playbook_trust_summary, log_integration_taxonomy=log_integration_taxonomy, loop_ledger=loop_ledger, latest_flow_closure=latest_closure, ) learning_loop = _build_learning_loop_readback( operation_summary=operation_summary, verifier_summary=verifier_summary, km_summary=km_summary, playbook_trust_summary=playbook_trust_summary, log_integration_taxonomy=log_integration_taxonomy, agent_decision_wiring=agent_decision_wiring, latest_flow_closure=latest_closure, latest_failure_classification=latest_failure, controlled_retry_package=retry_package, loop_ledger=loop_ledger, ) work_item_progress = _build_work_item_progress( trace_ledger=trace_ledger, log_integration_taxonomy=log_integration_taxonomy, agent_decision_wiring=agent_decision_wiring, learning_loop=learning_loop, db_read_status=db_read_status, ) apply_summary = operation_summary.get("ansible_apply_executed") or {} readback = { "schema_version": _LIVE_READBACK_SCHEMA_VERSION, "project_id": project_id, "lookback_hours": max(1, int(lookback_hours or _DEFAULT_LOOKBACK_HOURS)), "db_read_status": db_read_status, "writes_on_read": False, "ansible_operations": { "counts": operation_summary, "latest": _sanitize_latest_rows( operation_latest, allowed_keys=( "op_id", "parent_op_id", "operation_type", "status", "actor", "incident_id", "catalog_id", "playbook_path", "execution_mode", "source_candidate_op_id", "check_mode_op_id", "risk_level", "controlled_apply_allowed", "returncode", "duration_ms", "created_at", ), ), }, "auto_repair_execution_receipt": { **auto_repair_summary, "latest": _sanitize_latest_rows( auto_repair_latest, allowed_keys=( "id", "incident_id", "catalog_id", "playbook_name", "result_status", "triggered_by", "risk_level", "execution_time_ms", "created_at", ), ), }, "ansible_apply_executed": { "total": _int_value(apply_summary.get("total")), "recent": _int_value(apply_summary.get("recent")), "by_status": apply_summary.get("by_status") or {}, }, "post_apply_verifier": { **verifier_summary, "latest": _sanitize_latest_rows( verifier_latest, allowed_keys=( "id", "incident_id", "matched_playbook_id", "verification_result", "apply_op_id", "catalog_id", "playbook_path", "returncode", "collected_at", ), ), }, "km_writeback": { **km_summary, "latest": _sanitize_latest_rows( km_latest, allowed_keys=( "id", "title", "related_incident_id", "related_playbook_id", "path_type", "status", "created_by", "created_at", ), ), }, "telegram_receipt": { **telegram_summary, "latest": _sanitize_latest_rows( telegram_latest, allowed_keys=( "message_id", "run_id", "message_type", "send_status", "provider_message_id", "incident_id", "action", "queued_at", "sent_at", ), ), }, "mcp_context": { "gateway": mcp_gateway_summary, "legacy": legacy_mcp_summary, "total": _trace_total(mcp_gateway_summary) + _trace_total(legacy_mcp_summary), "recent": _trace_recent(mcp_gateway_summary) + _trace_recent(legacy_mcp_summary), }, "service_log_evidence": service_log_summary, "executor_log_projection": executor_log_summary, "timeline_projection": timeline_summary, "playbook_trust": playbook_trust_summary, "latest_flow_closure": latest_closure, "latest_failure_classification": latest_failure, "controlled_retry_package": retry_package, "autonomous_execution_loop_ledger": loop_ledger, "trace_ledger": trace_ledger, "log_integration_taxonomy": log_integration_taxonomy, "agent_decision_wiring": agent_decision_wiring, "learning_loop": learning_loop, "work_item_progress": work_item_progress, } if error_type: readback["error"] = { "type": error_type, "message": "runtime receipt DB read failed; see API logs", } return readback def _attach_runtime_receipt_readback( payload: dict[str, Any], readback: dict[str, Any], ) -> dict[str, Any]: payload["runtime_receipt_readback"] = readback rollups = payload.setdefault("rollups", {}) rollups.update({ "live_ansible_apply_executed_count": _int_value( readback.get("ansible_apply_executed", {}).get("total") ), "live_auto_repair_execution_receipt_count": _int_value( readback.get("auto_repair_execution_receipt", {}).get("total") ), "live_post_apply_verifier_count": _int_value( readback.get("post_apply_verifier", {}).get("total") ), "live_km_writeback_count": _int_value( readback.get("km_writeback", {}).get("total") ), "live_telegram_receipt_count": _int_value( readback.get("telegram_receipt", {}).get("total") ), "live_executor_latest_flow_closed_count": ( 1 if (readback.get("latest_flow_closure") or {}).get("closed") is True else 0 ), "live_autonomous_execution_loop_closed_count": ( 1 if (readback.get("autonomous_execution_loop_ledger") or {}).get("closed") is True else 0 ), "live_executor_latest_apply_repair_required_count": ( 1 if ( (readback.get("latest_failure_classification") or {}).get("classification") in { "closed_failed_apply_requires_ai_repair", "failed_apply_receipt_gap_requires_backfill_then_repair", } ) else 0 ), "live_executor_retry_package_ready_count": ( 1 if (readback.get("controlled_retry_package") or {}).get("status") == "ready_for_no_write_check_mode_replay" else 0 ), "live_mcp_context_count": _int_value(readback.get("mcp_context", {}).get("total")), "live_service_log_evidence_count": _int_value( readback.get("service_log_evidence", {}).get("total") ), "live_executor_log_projection_count": _int_value( readback.get("executor_log_projection", {}).get("total") ), "live_timeline_projection_count": _int_value( readback.get("timeline_projection", {}).get("total") ), "live_playbook_trust_signal_count": _int_value( readback.get("playbook_trust", {}).get("total") ), "live_trace_recorded_stage_count": _int_value( readback.get("trace_ledger", {}).get("recorded_stage_count") ), "live_trace_required_missing_count": len( (readback.get("trace_ledger") or {}).get("missing_required_stage_ids") or [] ), "live_log_source_family_count": _int_value( ((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get( "source_family_count" ) ), "live_log_active_source_family_count": _int_value( ((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get( "active_source_family_count" ) ), "live_log_label_dimension_count": _int_value( ((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get( "label_dimension_count" ) ), "live_log_classified_event_total": _int_value( ((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get( "classified_event_total" ) ), "live_log_recent_classified_event_total": _int_value( ((readback.get("log_integration_taxonomy") or {}).get("rollups") or {}).get( "recent_classified_event_total" ) ), "live_agent_decision_wiring_stage_count": _int_value( ((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get( "stage_count" ) ), "live_agent_decision_wiring_required_present_count": _int_value( ((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get( "required_stage_present_count" ) ), "live_agent_decision_wiring_required_missing_count": _int_value( ((readback.get("agent_decision_wiring") or {}).get("rollups") or {}).get( "required_stage_missing_count" ) ), "live_agent_decision_wiring_complete_count": ( 1 if (readback.get("agent_decision_wiring") or {}).get("status") == "completed" else 0 ), "live_learning_loop_stage_count": _int_value( ((readback.get("learning_loop") or {}).get("rollups") or {}).get( "stage_count" ) ), "live_learning_loop_required_present_count": _int_value( ((readback.get("learning_loop") or {}).get("rollups") or {}).get( "required_stage_present_count" ) ), "live_learning_loop_required_missing_count": _int_value( ((readback.get("learning_loop") or {}).get("rollups") or {}).get( "required_stage_missing_count" ) ), "live_learning_loop_complete_count": ( 1 if (readback.get("learning_loop") or {}).get("status") == "completed" else 0 ), "live_learning_loop_similar_case_source_count": _int_value( ((readback.get("learning_loop") or {}).get("rollups") or {}).get( "similar_case_source_total" ) ), "live_work_item_count": _int_value( ((readback.get("work_item_progress") or {}).get("rollups") or {}).get( "work_item_count" ) ), "live_work_item_completed_count": _int_value( ((readback.get("work_item_progress") or {}).get("rollups") or {}).get( "completed_count" ) ), "live_work_item_in_progress_count": _int_value( ((readback.get("work_item_progress") or {}).get("rollups") or {}).get( "in_progress_count" ) ), "live_work_item_pending_count": _int_value( ((readback.get("work_item_progress") or {}).get("rollups") or {}).get( "pending_count" ) ), "live_work_item_blocked_count": _int_value( ((readback.get("work_item_progress") or {}).get("rollups") or {}).get( "blocked_count" ) ), }) return payload def build_ai_agent_autonomous_runtime_control() -> dict[str, Any]: """Build the current AI Agent autonomy control-plane readback.""" allowed_risks = _allowed_risk_levels() report_cadences = [ { "cadence": "daily", "display_name": "日報", "schedule": f"每日 {DAILY_REPORT_HOUR_TAIPEI:02d}:00 台北時間", "worker": "report_generation_service.run_daily_report_loop", "telegram_gateway_delivery_enabled": True, "direct_bot_api_allowed": False, "receipt_source": "daily_report_sent log + Telegram Gateway result", }, { "cadence": "weekly", "display_name": "週報", "schedule": ( f"每週五 {WEEKLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間" if WEEKLY_REPORT_WEEKDAY_TAIPEI == 4 else f"每週 weekday={WEEKLY_REPORT_WEEKDAY_TAIPEI} {WEEKLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間" ), "worker": "report_generation_service.run_weekly_report_loop", "telegram_gateway_delivery_enabled": True, "direct_bot_api_allowed": False, "receipt_source": "weekly_report_sent log + Telegram Gateway result", }, { "cadence": "monthly", "display_name": "月報", "schedule": f"每月 {MONTHLY_REPORT_DAY_TAIPEI} 日 {MONTHLY_REPORT_HOUR_TAIPEI:02d}:00 台北時間", "worker": "report_generation_service.run_monthly_report_loop", "telegram_gateway_delivery_enabled": True, "direct_bot_api_allowed": False, "receipt_source": "monthly_report_sent log + Telegram Gateway result", }, ] executor_receipts = [ { "operation_type": "ansible_candidate_matched", "owner_agent": "Hermes", "purpose": "把修復候選寫入 executor 可認領佇列", "writes_runtime_state": False, }, { "operation_type": "ansible_check_mode_executed", "owner_agent": "AwoooP Ansible check-mode worker", "purpose": "執行 ansible-playbook --check --diff 並留下乾跑收據", "writes_runtime_state": False, }, { "operation_type": "ansible_apply_executed", "owner_agent": "AwoooP controlled apply worker", "purpose": "check-mode 通過後,對 allowlisted low / medium / high PlayBook 受控 apply", "writes_runtime_state": True, }, { "operation_type": "incident_evidence.post_execution_state", "owner_agent": "post_apply_verifier", "purpose": "apply 後寫入 verifier 結果與 post-execution evidence", "writes_runtime_state": True, }, { "operation_type": "knowledge_entries", "owner_agent": "Hermes", "purpose": "把已驗證執行沉澱成 KM / PlayBook trust 候選", "writes_runtime_state": True, }, ] hard_blockers = [ "secret_token_private_key_cookie_session_auth_header_cleartext", "drop_truncate_restore_prune_destructive_database_operation", "reboot_node_drain_irreversible_firewall_or_host_lockout", "credentialed_exploit_or_external_active_scan", "new_paid_provider_cost_ceiling_or_provider_switch_without_replay_shadow_canary", "force_push_delete_repo_refs_or_visibility_change", "critical_or_break_glass_route_without_explicit_break_glass_contract", ] legacy_overrides = [ { "legacy_area": "report_status_board_no_live_send", "current_effect": "overridden", "new_behavior": "日報 / 週報 / 月報透過 Telegram Gateway 排程派送", }, { "legacy_area": "report_live_delivery_owner_review_required", "current_effect": "overridden", "new_behavior": "報告派送走低/中/高風險自動化政策;critical 才 break-glass", }, { "legacy_area": "high_risk_owner_review_queue", "current_effect": "overridden_for_high_non_critical", "new_behavior": "high 風險允許 controlled apply;critical / hard blocker 仍不自動", }, { "legacy_area": "telegram_no_send_preview_only", "current_effect": "overridden", "new_behavior": "用 Telegram Gateway 實送報告與 actionable receipt;不直接暴露 Bot API", }, ] control_plane_integration = _control_plane_integration() payload = { "schema_version": _SCHEMA_VERSION, "generated_at": datetime.now(UTC).isoformat(), "program_status": { "current_task_id": "P2-416-D1N", "status": "current_directive_control_plane_active", "runtime_authority": _RUNTIME_AUTHORITY, "deploy_readback_marker": _DEPLOY_READBACK_MARKER, "deploy_attempt_note": _DEPLOY_ATTEMPT_NOTE, "legacy_no_send_no_live_rules_overridden": True, "implementation_completion_percent": 88, "status_note": ( "目前有效規則:low / medium / high 風險由 AI Agent 在 allowlist、" "Ansible check-mode、verifier、rollback、KM 與 Telegram receipt 下受控自動處理。" ), }, "current_policy": { "low_risk_controlled_apply_allowed": "low" in allowed_risks, "medium_risk_controlled_apply_allowed": "medium" in allowed_risks, "high_risk_controlled_apply_allowed": "high" in allowed_risks, "critical_break_glass_required": True, "owner_review_required_for_low_medium_high": False, "direct_bot_api_allowed": False, "telegram_gateway_required": True, "post_apply_verifier_required": True, "km_learning_writeback_required": True, }, "runtime_switches": { "ansible_check_mode_worker_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CHECK_MODE_WORKER), "ansible_controlled_apply_enabled": bool(settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY), "ansible_controlled_apply_allowed_risk_levels": allowed_risks, "ansible_check_mode_interval_seconds": settings.AWOOOP_ANSIBLE_CHECK_MODE_INTERVAL_SECONDS, "ansible_check_mode_batch_limit": settings.AWOOOP_ANSIBLE_CHECK_MODE_BATCH_LIMIT, "ansible_check_mode_timeout_seconds": settings.AWOOOP_ANSIBLE_CHECK_MODE_TIMEOUT_SECONDS, "ansible_controlled_apply_timeout_seconds": settings.AWOOOP_ANSIBLE_CONTROLLED_APPLY_TIMEOUT_SECONDS, }, "agent_roles": [ { "agent_id": "openclaw", "role": "仲裁 / hard blocker / replay-shadow-canary gate", "current_job": "只阻擋真正 critical 與 hard blocker,不再用身份保護舊架構", }, { "agent_id": "hermes", "role": "報告 / Telegram digest / KM 與 PlayBook trust writeback", "current_job": "日週月報、收據摘要與 verifier 後學習沉澱", }, { "agent_id": "nemotron", "role": "市場技術雷達 / no-write replay / challenger scorecard", "current_job": "用市場與回放數據挑戰 OpenClaw / provider / Agent 組合", }, { "agent_id": "awooop_ansible_worker", "role": "executor", "current_job": "candidate → check-mode → controlled apply → verifier → KM", }, { "agent_id": "telegram_ops", "role": "Telegram Gateway receipt", "current_job": "群組報告、actionable receipt、失敗告警;不展示敏感值或未脫敏資料", }, ], "report_delivery": { "status": "telegram_gateway_delivery_enabled", "cadences": report_cadences, }, "controlled_executor": { "status": "check_mode_then_apply_enabled" if settings.ENABLE_AWOOOP_ANSIBLE_CONTROLLED_APPLY else "check_mode_only_by_config", "operation_receipts": executor_receipts, "required_flow": [ "allowlisted_candidate", "ansible_check_mode_success", "controlled_apply", "post_apply_verifier", "auto_repair_execution_receipt", "km_learning_writeback", "telegram_receipt_or_alert", ], }, "control_plane_integration": control_plane_integration, "legacy_policy_overrides": legacy_overrides, "hard_blockers": hard_blockers, "visibility_contract": { "frontend_displays_runtime_truth": True, "work_window_transcript_display_allowed": False, "prompt_body_display_allowed": False, "internal_reasoning_display_allowed": False, "sensitive_value_display_allowed": False, "telegram_unredacted_payload_display_allowed": False, "lan_topology_redaction_required": True, }, "rollups": { "automated_risk_tier_count": sum(1 for risk in ("low", "medium", "high") if risk in allowed_risks), "hard_blocker_count": len(hard_blockers), "report_cadence_enabled_count": len(report_cadences), "telegram_gateway_delivery_enabled_count": sum( 1 for item in report_cadences if item["telegram_gateway_delivery_enabled"] ), "direct_bot_api_allowed_count": 0, "controlled_executor_operation_receipt_count": len(executor_receipts), "runtime_write_receipt_type_count": sum( 1 for item in executor_receipts if item["writes_runtime_state"] ), "legacy_policy_overridden_count": len(legacy_overrides), "mcp_sensor_count": control_plane_integration["rollups"]["mcp_sensor_count"], "rag_context_query_count": control_plane_integration["rollups"]["rag_context_query_count"], "playbook_decision_class_count": control_plane_integration["rollups"]["playbook_decision_class_count"], "deploy_control_classifier_example_count": control_plane_integration["rollups"]["classifier_example_count"], }, } _attach_runtime_receipt_readback( payload, build_runtime_receipt_readback_from_rows( project_id=_DEFAULT_PROJECT_ID, db_read_status="not_queried", ), ) _validate_payload(payload) return payload async def load_ai_agent_autonomous_runtime_receipt_readback( *, project_id: str = _DEFAULT_PROJECT_ID, lookback_hours: int = _DEFAULT_LOOKBACK_HOURS, limit: int = 20, ) -> dict[str, Any]: """Read live executor receipts without sending messages or mutating runtime state.""" params = { "project_id": project_id, "lookback_hours": max(1, int(lookback_hours or _DEFAULT_LOOKBACK_HOURS)), "limit": max(1, int(limit or 20)), } try: async with get_db_context(project_id) as db: await db.execute(text("SET LOCAL statement_timeout = '5000ms'")) async def _safe_aux_rows( query_name: str, sql: str, fallback_sql: str | None = None, ) -> list[Mapping[str, Any]]: try: return (await db.execute(text(sql), params)).mappings().all() except Exception as exc: # pragma: no cover - depends on live schema drift logger.warning( "ai_agent_autonomous_runtime_trace_aux_read_failed", project_id=project_id, query_name=query_name, error_type=type(exc).__name__, ) if fallback_sql: try: return (await db.execute(text(fallback_sql), params)).mappings().all() except Exception as fallback_exc: # pragma: no cover - live schema drift logger.warning( "ai_agent_autonomous_runtime_trace_aux_fallback_failed", project_id=project_id, query_name=query_name, error_type=type(fallback_exc).__name__, ) return [] operation_counts = ( await db.execute(text(_RUNTIME_OPERATION_COUNTS_SQL), params) ).mappings().all() operation_latest = ( await db.execute(text(_RUNTIME_OPERATION_LATEST_SQL), params) ).mappings().all() auto_repair_counts = ( await db.execute(text(_RUNTIME_AUTO_REPAIR_COUNTS_SQL), params) ).mappings().all() auto_repair_latest = ( await db.execute(text(_RUNTIME_AUTO_REPAIR_LATEST_SQL), params) ).mappings().all() verifier_counts = ( await db.execute(text(_RUNTIME_VERIFIER_COUNTS_SQL), params) ).mappings().all() verifier_latest = ( await db.execute(text(_RUNTIME_VERIFIER_LATEST_SQL), params) ).mappings().all() km_counts = ( await db.execute(text(_RUNTIME_KM_COUNTS_SQL), params) ).mappings().all() km_latest = ( await db.execute(text(_RUNTIME_KM_LATEST_SQL), params) ).mappings().all() telegram_counts = ( await db.execute(text(_RUNTIME_TELEGRAM_COUNTS_SQL), params) ).mappings().all() telegram_latest = ( await db.execute(text(_RUNTIME_TELEGRAM_LATEST_SQL), params) ).mappings().all() mcp_gateway_counts = await _safe_aux_rows( "mcp_gateway_counts", _RUNTIME_MCP_GATEWAY_COUNTS_SQL, ) legacy_mcp_counts = await _safe_aux_rows( "legacy_mcp_counts", _RUNTIME_LEGACY_MCP_COUNTS_SQL, ) service_log_counts = await _safe_aux_rows( "service_log_counts", _RUNTIME_SERVICE_LOG_COUNTS_SQL, ) executor_log_counts = await _safe_aux_rows( "executor_log_counts", _RUNTIME_EXECUTOR_LOG_COUNTS_SQL, ) timeline_counts = await _safe_aux_rows( "timeline_counts", _RUNTIME_TIMELINE_COUNTS_SQL, _RUNTIME_TIMELINE_COUNTS_FALLBACK_SQL, ) playbook_trust_counts = await _safe_aux_rows( "playbook_trust_counts", _RUNTIME_PLAYBOOK_TRUST_COUNTS_SQL, _RUNTIME_PLAYBOOK_TRUST_COUNTS_FALLBACK_SQL, ) except Exception as exc: logger.warning( "ai_agent_autonomous_runtime_receipt_readback_failed", project_id=project_id, error_type=type(exc).__name__, ) return build_runtime_receipt_readback_from_rows( project_id=project_id, lookback_hours=params["lookback_hours"], db_read_status="unavailable", error_type=type(exc).__name__, ) return build_runtime_receipt_readback_from_rows( project_id=project_id, lookback_hours=params["lookback_hours"], db_read_status="ok", operation_count_rows=operation_counts, operation_latest_rows=operation_latest, auto_repair_count_rows=auto_repair_counts, auto_repair_latest_rows=auto_repair_latest, verifier_count_rows=verifier_counts, verifier_latest_rows=verifier_latest, km_count_rows=km_counts, km_latest_rows=km_latest, telegram_count_rows=telegram_counts, telegram_latest_rows=telegram_latest, mcp_gateway_count_rows=mcp_gateway_counts, legacy_mcp_count_rows=legacy_mcp_counts, service_log_count_rows=service_log_counts, executor_log_count_rows=executor_log_counts, timeline_count_rows=timeline_counts, playbook_trust_count_rows=playbook_trust_counts, ) async def build_ai_agent_autonomous_runtime_control_with_live_readback( *, project_id: str = _DEFAULT_PROJECT_ID, lookback_hours: int = _DEFAULT_LOOKBACK_HOURS, ) -> dict[str, Any]: """Build the control plane and attach live DB receipt readback.""" payload = build_ai_agent_autonomous_runtime_control() readback = await load_ai_agent_autonomous_runtime_receipt_readback( project_id=project_id, lookback_hours=lookback_hours, ) _attach_runtime_receipt_readback(payload, readback) _validate_payload(payload) return payload _RUNTIME_OPERATION_COUNTS_SQL = """ SELECT operation_type, status, count(*) AS total, count(*) FILTER ( WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM automation_operation_log WHERE operation_type IN ( 'ansible_candidate_matched', 'ansible_check_mode_executed', 'ansible_apply_executed', 'ansible_learning_writeback_recorded', 'ansible_rollback_executed', 'ansible_execution_skipped' ) GROUP BY operation_type, status ORDER BY operation_type, status """ _RUNTIME_OPERATION_LATEST_SQL = """ SELECT op_id::text AS op_id, parent_op_id::text AS parent_op_id, operation_type, status, actor, coalesce(incident_id::text, input ->> 'incident_id') AS incident_id, input ->> 'catalog_id' AS catalog_id, coalesce(input ->> 'apply_playbook_path', input ->> 'playbook_path') AS playbook_path, input ->> 'execution_mode' AS execution_mode, input ->> 'source_candidate_op_id' AS source_candidate_op_id, input ->> 'check_mode_op_id' AS check_mode_op_id, input ->> 'risk_level' AS risk_level, input ->> 'controlled_apply_allowed' AS controlled_apply_allowed, coalesce(output ->> 'returncode', dry_run_result ->> 'returncode') AS returncode, duration_ms, created_at FROM automation_operation_log WHERE operation_type IN ( 'ansible_candidate_matched', 'ansible_check_mode_executed', 'ansible_apply_executed', 'ansible_learning_writeback_recorded', 'ansible_rollback_executed', 'ansible_execution_skipped' ) ORDER BY created_at DESC LIMIT :limit """ _RUNTIME_AUTO_REPAIR_COUNTS_SQL = """ SELECT CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status, count(*) AS total, count(*) FILTER ( WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM auto_repair_executions WHERE triggered_by = 'ansible_controlled_apply' GROUP BY CASE WHEN success THEN 'success' ELSE 'failed' END ORDER BY result_status """ _RUNTIME_AUTO_REPAIR_LATEST_SQL = """ SELECT id, incident_id, playbook_id AS catalog_id, playbook_name, CASE WHEN success THEN 'success' ELSE 'failed' END AS result_status, executed_steps::text AS executed_steps_text, triggered_by, risk_level, execution_time_ms, created_at FROM auto_repair_executions WHERE triggered_by = 'ansible_controlled_apply' ORDER BY created_at DESC LIMIT :limit """ _RUNTIME_VERIFIER_COUNTS_SQL = """ SELECT coalesce(verification_result, 'missing') AS verification_result, count(*) AS total, count(*) FILTER ( WHERE collected_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM incident_evidence WHERE post_execution_state ->> 'apply_op_id' IS NOT NULL GROUP BY coalesce(verification_result, 'missing') ORDER BY verification_result """ _RUNTIME_VERIFIER_LATEST_SQL = """ SELECT id, incident_id, matched_playbook_id, coalesce(verification_result, 'missing') AS verification_result, post_execution_state ->> 'apply_op_id' AS apply_op_id, post_execution_state ->> 'catalog_id' AS catalog_id, post_execution_state ->> 'playbook_path' AS playbook_path, post_execution_state ->> 'returncode' AS returncode, collected_at FROM incident_evidence WHERE post_execution_state ->> 'apply_op_id' IS NOT NULL ORDER BY collected_at DESC LIMIT :limit """ _RUNTIME_KM_COUNTS_SQL = """ SELECT status, count(*) AS total, count(*) FILTER ( WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM knowledge_entries WHERE project_id = :project_id AND ( path_type LIKE 'ansible_apply_receipt:%' OR tags::text LIKE '%ansible_controlled_apply%' ) GROUP BY status ORDER BY status """ _RUNTIME_KM_LATEST_SQL = """ SELECT id, title, related_incident_id, related_playbook_id, path_type, status, created_by, created_at FROM knowledge_entries WHERE project_id = :project_id AND ( path_type LIKE 'ansible_apply_receipt:%' OR tags::text LIKE '%ansible_controlled_apply%' ) ORDER BY created_at DESC LIMIT :limit """ _RUNTIME_TELEGRAM_COUNTS_SQL = """ SELECT send_status, count(*) AS total, count(*) FILTER ( WHERE queued_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM awooop_outbound_message WHERE project_id = :project_id AND channel_type = 'telegram' AND source_envelope #>> '{callback_reply,action}' = 'controlled_apply_result' GROUP BY send_status ORDER BY send_status """ _RUNTIME_TELEGRAM_LATEST_SQL = """ SELECT message_id::text AS message_id, run_id::text AS run_id, message_type, send_status, provider_message_id, source_envelope #>> '{callback_reply,incident_id}' AS incident_id, source_envelope #>> '{callback_reply,action}' AS action, queued_at, sent_at FROM awooop_outbound_message WHERE project_id = :project_id AND channel_type = 'telegram' AND source_envelope #>> '{callback_reply,action}' = 'controlled_apply_result' ORDER BY queued_at DESC LIMIT :limit """ _RUNTIME_MCP_GATEWAY_COUNTS_SQL = """ SELECT coalesce(result_status, 'unknown') AS status, count(*) AS total, count(*) FILTER ( WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM awooop_mcp_gateway_audit WHERE project_id = :project_id GROUP BY coalesce(result_status, 'unknown') ORDER BY status """ _RUNTIME_LEGACY_MCP_COUNTS_SQL = """ SELECT CASE WHEN success IS TRUE THEN 'success' WHEN success IS FALSE THEN 'failed' ELSE 'unknown' END AS status, count(*) AS total, count(*) FILTER ( WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM mcp_audit_log GROUP BY CASE WHEN success IS TRUE THEN 'success' WHEN success IS FALSE THEN 'failed' ELSE 'unknown' END ORDER BY status """ _RUNTIME_SERVICE_LOG_COUNTS_SQL = """ SELECT 'sanitized_recent_logs' AS status, count(*) AS total, count(*) FILTER ( WHERE collected_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM incident_evidence WHERE recent_logs IS NOT NULL OR evidence_summary IS NOT NULL OR mcp_health IS NOT NULL OR anomaly_context IS NOT NULL """ _RUNTIME_EXECUTOR_LOG_COUNTS_SQL = """ SELECT coalesce(status, 'unknown') AS status, count(*) AS total, count(*) FILTER ( WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM automation_operation_log WHERE operation_type IN ( 'ansible_candidate_matched', 'ansible_check_mode_executed', 'ansible_apply_executed', 'ansible_rollback_executed', 'ansible_execution_skipped' ) AND ( output IS NOT NULL OR error IS NOT NULL OR stderr_feed_back IS NOT NULL OR dry_run_result IS NOT NULL ) GROUP BY coalesce(status, 'unknown') ORDER BY status """ _RUNTIME_TIMELINE_COUNTS_SQL = """ SELECT coalesce(status, 'unknown') AS status, count(*) AS total, count(*) FILTER ( WHERE created_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM timeline_events WHERE event_type IS NOT NULL OR actor IS NOT NULL OR actor_role IS NOT NULL GROUP BY coalesce(status, 'unknown') ORDER BY status """ _RUNTIME_TIMELINE_COUNTS_FALLBACK_SQL = """ SELECT 'timeline_event' AS status, count(*) AS total, 0 AS recent FROM timeline_events """ _RUNTIME_PLAYBOOK_TRUST_COUNTS_SQL = """ SELECT CASE WHEN review_required IS TRUE THEN 'review_required' WHEN trust_score >= 0.8 THEN 'high_trust' WHEN trust_score < 0.3 THEN 'low_trust' WHEN success_count > 0 OR failure_count > 0 THEN 'learning_active' ELSE 'seeded_not_used' END AS status, count(*) AS total, count(*) FILTER ( WHERE updated_at >= NOW() - (:lookback_hours * INTERVAL '1 hour') ) AS recent FROM playbooks GROUP BY CASE WHEN review_required IS TRUE THEN 'review_required' WHEN trust_score >= 0.8 THEN 'high_trust' WHEN trust_score < 0.3 THEN 'low_trust' WHEN success_count > 0 OR failure_count > 0 THEN 'learning_active' ELSE 'seeded_not_used' END ORDER BY status """ _RUNTIME_PLAYBOOK_TRUST_COUNTS_FALLBACK_SQL = """ SELECT 'cataloged' AS status, count(*) AS total, 0 AS recent FROM playbooks """ def _validate_payload(payload: dict[str, Any]) -> None: if payload.get("schema_version") != _SCHEMA_VERSION: raise ValueError(f"schema_version must be {_SCHEMA_VERSION}") status = payload.get("program_status") or {} if status.get("runtime_authority") != _RUNTIME_AUTHORITY: raise ValueError(f"runtime_authority must be {_RUNTIME_AUTHORITY}") if status.get("deploy_readback_marker") != _DEPLOY_READBACK_MARKER: raise ValueError(f"deploy_readback_marker must be {_DEPLOY_READBACK_MARKER}") if status.get("deploy_attempt_note") != _DEPLOY_ATTEMPT_NOTE: raise ValueError(f"deploy_attempt_note must be {_DEPLOY_ATTEMPT_NOTE}") policy = payload.get("current_policy") or {} for key in ( "low_risk_controlled_apply_allowed", "medium_risk_controlled_apply_allowed", "high_risk_controlled_apply_allowed", "telegram_gateway_required", "post_apply_verifier_required", "km_learning_writeback_required", ): if policy.get(key) is not True: raise ValueError(f"current_policy.{key} must be true") if policy.get("owner_review_required_for_low_medium_high") is not False: raise ValueError("owner_review_required_for_low_medium_high must be false") if policy.get("direct_bot_api_allowed") is not False: raise ValueError("direct_bot_api_allowed must be false") visibility = payload.get("visibility_contract") or {} for key in ( "work_window_transcript_display_allowed", "prompt_body_display_allowed", "internal_reasoning_display_allowed", "sensitive_value_display_allowed", "telegram_unredacted_payload_display_allowed", ): if visibility.get(key) is not False: raise ValueError(f"visibility_contract.{key} must remain false")