chore(security): sync with gitea main before ref detail diff [skip ci]
# Conflicts: # docs/LOGBOOK.md
This commit is contained in:
@@ -133,13 +133,15 @@ jobs:
|
||||
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
FILES_JSON=$(echo "${{ steps.diff.outputs.new_files }}" | jq -Rn '[inputs | select(length > 0)]')
|
||||
SUMMARY_JSON=$(jq -cn \
|
||||
--arg commit_sha "${{ github.sha }}" \
|
||||
--argjson files "$FILES_JSON" \
|
||||
'{type: "ci_migration", commit_sha: $commit_sha, files: $files}')
|
||||
SUMMARY_JSON_SQL=${SUMMARY_JSON//\'/\'\'}
|
||||
|
||||
seed_audit() {
|
||||
local url="$1"
|
||||
psql "$url" \
|
||||
-v ON_ERROR_STOP=1 \
|
||||
-v commit_sha="${{ github.sha }}" \
|
||||
-v files_json="$FILES_JSON" <<'SQL'
|
||||
psql "$url" -v ON_ERROR_STOP=1 <<SQL
|
||||
INSERT INTO asset_discovery_run (
|
||||
run_id, triggered_by, scope, scan_depth, status,
|
||||
started_at, ended_at, tools_used, summary
|
||||
@@ -151,12 +153,8 @@ jobs:
|
||||
'success',
|
||||
NOW(),
|
||||
NOW(),
|
||||
'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb,
|
||||
jsonb_build_object(
|
||||
'type', 'ci_migration',
|
||||
'commit_sha', :'commit_sha',
|
||||
'files', :'files_json'::jsonb
|
||||
)
|
||||
'{"psql": 1, "gitea_ci": 1}'::jsonb,
|
||||
'${SUMMARY_JSON_SQL}'::jsonb
|
||||
);
|
||||
SQL
|
||||
}
|
||||
|
||||
36
apps/api/migrations/adr090d_ansible_operation_types.sql
Normal file
36
apps/api/migrations/adr090d_ansible_operation_types.sql
Normal file
@@ -0,0 +1,36 @@
|
||||
-- ADR-090-D: automation_operation_log.operation_type adds Ansible executor audit states
|
||||
-- Created: 2026-05-12 Taipei
|
||||
--
|
||||
-- Purpose:
|
||||
-- T3 Ansible declarative executor visibility. These operation types allow
|
||||
-- the AI automation truth chain to record that Ansible was matched,
|
||||
-- check-mode executed, applied, rolled back, or explicitly skipped.
|
||||
--
|
||||
-- Safety:
|
||||
-- This migration only expands the CHECK allowlist. It does not execute
|
||||
-- Ansible, change approval behavior, or create auto-remediation rows.
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
|
||||
'monitor_configured','monitor_removed',
|
||||
'alert_fired','alert_suppressed','alert_routed',
|
||||
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
|
||||
'playbook_generated','playbook_updated','playbook_executed',
|
||||
'remediation_executed','remediation_verified','remediation_rolled_back',
|
||||
'self_correction_attempted',
|
||||
'km_created','km_updated','km_linked',
|
||||
'asset_discovered','coverage_recalculated',
|
||||
'capacity_recommendation','quota_enforced',
|
||||
'notification_formatted',
|
||||
'ansible_candidate_matched',
|
||||
'ansible_check_mode_executed',
|
||||
'ansible_apply_executed',
|
||||
'ansible_rollback_executed',
|
||||
'ansible_execution_skipped'
|
||||
));
|
||||
|
||||
COMMENT ON CONSTRAINT automation_operation_log_type_valid ON automation_operation_log IS
|
||||
'ADR-090-D: allow first-class Ansible executor audit states for AwoooP truth-chain visibility.';
|
||||
19
apps/api/migrations/adr090d_ansible_operation_types_down.sql
Normal file
19
apps/api/migrations/adr090d_ansible_operation_types_down.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- ADR-090-D rollback: remove Ansible executor audit states from operation_type allowlist.
|
||||
-- Only apply after confirming no automation_operation_log rows use ansible_* operation types.
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
|
||||
'monitor_configured','monitor_removed',
|
||||
'alert_fired','alert_suppressed','alert_routed',
|
||||
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
|
||||
'playbook_generated','playbook_updated','playbook_executed',
|
||||
'remediation_executed','remediation_verified','remediation_rolled_back',
|
||||
'self_correction_attempted',
|
||||
'km_created','km_updated','km_linked',
|
||||
'asset_discovered','coverage_recalculated',
|
||||
'capacity_recommendation','quota_enforced',
|
||||
'notification_formatted'
|
||||
));
|
||||
@@ -167,6 +167,31 @@ class DriftReportRepository:
|
||||
{"report_id": report_id, "narrative": narrative},
|
||||
)
|
||||
|
||||
async def get_repeat_state(self, report: DriftReport) -> dict:
|
||||
"""Return stable fingerprint repeat state for a drift report."""
|
||||
from src.services.drift_repeat_state import build_drift_repeat_state
|
||||
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
text("""
|
||||
SELECT
|
||||
report_id,
|
||||
namespace,
|
||||
status,
|
||||
scanned_at,
|
||||
created_at,
|
||||
items
|
||||
FROM drift_reports
|
||||
WHERE namespace = :namespace
|
||||
AND created_at > now() - interval '24 hours'
|
||||
ORDER BY scanned_at DESC
|
||||
LIMIT 200
|
||||
"""),
|
||||
{"namespace": report.namespace},
|
||||
)
|
||||
rows = [dict(row) for row in result.mappings().all()]
|
||||
return build_drift_repeat_state(report, rows)
|
||||
|
||||
|
||||
_drift_repo: DriftReportRepository | None = None
|
||||
|
||||
|
||||
433
apps/api/src/services/awooop_ansible_audit_service.py
Normal file
433
apps/api/src/services/awooop_ansible_audit_service.py
Normal file
@@ -0,0 +1,433 @@
|
||||
"""AwoooP Ansible audit helpers.
|
||||
|
||||
This module is intentionally non-executing. It exposes the Ansible audit
|
||||
contract and repo-known playbook catalog so the truth chain can say whether
|
||||
Ansible was actually considered or executed, without pretending that catalog
|
||||
hints are runtime remediation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.db.base import get_db_context
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
ANSIBLE_OPERATION_TYPES = frozenset({
|
||||
"ansible_candidate_matched",
|
||||
"ansible_check_mode_executed",
|
||||
"ansible_apply_executed",
|
||||
"ansible_rollback_executed",
|
||||
"ansible_execution_skipped",
|
||||
})
|
||||
|
||||
_CATALOG: tuple[dict[str, Any], ...] = (
|
||||
{
|
||||
"catalog_id": "ansible:110-devops",
|
||||
"playbook_path": "infra/ansible/playbooks/110-devops.yml",
|
||||
"inventory_hosts": ["host_110"],
|
||||
"domains": ["swap", "harbor", "sentry", "gitea", "langfuse", "bitan", "runner", "keepalived", "nginx"],
|
||||
"keywords": [
|
||||
"110",
|
||||
"docker",
|
||||
"container",
|
||||
"dockercontainerunhealthy",
|
||||
"swap",
|
||||
"harbor",
|
||||
"sentry",
|
||||
"gitea",
|
||||
"langfuse",
|
||||
"bitan",
|
||||
"runner",
|
||||
"github-runner",
|
||||
"keepalived",
|
||||
],
|
||||
"supports_check_mode": True,
|
||||
"auto_apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"risk_level": "medium",
|
||||
},
|
||||
{
|
||||
"catalog_id": "ansible:188-ai-web",
|
||||
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
|
||||
"inventory_hosts": ["host_188"],
|
||||
"domains": ["docker", "momo_backup", "signoz", "minio", "litellm", "n8n", "open_webui", "nginx"],
|
||||
"keywords": [
|
||||
"188",
|
||||
"docker",
|
||||
"container",
|
||||
"dockercontainerunhealthy",
|
||||
"momo",
|
||||
"backup",
|
||||
"postgresql",
|
||||
"pg_backup",
|
||||
"signoz",
|
||||
"minio",
|
||||
"litellm",
|
||||
"n8n",
|
||||
"open-webui",
|
||||
"openwebui",
|
||||
"docker-registry",
|
||||
],
|
||||
"supports_check_mode": True,
|
||||
"auto_apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"risk_level": "medium",
|
||||
},
|
||||
{
|
||||
"catalog_id": "ansible:nginx-sync",
|
||||
"playbook_path": "infra/ansible/playbooks/nginx-sync.yml",
|
||||
"inventory_hosts": ["host_110", "host_188"],
|
||||
"domains": ["nginx", "proxy", "ollama_proxy", "tls"],
|
||||
"keywords": ["nginx", "proxy", "ollama", "gcp", "tls", "cert", "502", "upstream"],
|
||||
"supports_check_mode": True,
|
||||
"auto_apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"risk_level": "medium",
|
||||
},
|
||||
{
|
||||
"catalog_id": "ansible:restore-password-auth",
|
||||
"playbook_path": "infra/ansible/playbooks/restore-password-auth.yml",
|
||||
"inventory_hosts": ["host_110", "host_120", "host_121", "host_188"],
|
||||
"domains": ["ssh", "password_auth"],
|
||||
"keywords": ["ssh", "passwordauthentication", "password auth", "login", "auth"],
|
||||
"supports_check_mode": False,
|
||||
"auto_apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"risk_level": "high",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _get(row: dict[str, Any], key: str) -> Any:
|
||||
return row.get(key)
|
||||
|
||||
|
||||
def _tags(row: dict[str, Any]) -> list[str]:
|
||||
raw = _get(row, "tags")
|
||||
if isinstance(raw, list):
|
||||
return [str(item).lower() for item in raw]
|
||||
if isinstance(raw, str):
|
||||
return [part.strip().lower() for part in raw.split(",") if part.strip()]
|
||||
return []
|
||||
|
||||
|
||||
def _first_present(row: dict[str, Any], keys: tuple[str, ...]) -> Any:
|
||||
for key in keys:
|
||||
value = _get(row, key)
|
||||
if value not in (None, ""):
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _is_ansible_operation(row: dict[str, Any]) -> bool:
|
||||
operation_type = str(_get(row, "operation_type") or "").lower()
|
||||
if operation_type in ANSIBLE_OPERATION_TYPES:
|
||||
return True
|
||||
if "ansible" in _tags(row):
|
||||
return True
|
||||
executor = str(
|
||||
_first_present(
|
||||
row,
|
||||
(
|
||||
"input_executor",
|
||||
"input_execution_backend",
|
||||
"output_executor",
|
||||
"output_execution_backend",
|
||||
),
|
||||
)
|
||||
or ""
|
||||
).lower()
|
||||
if executor == "ansible":
|
||||
return True
|
||||
playbook_path = str(
|
||||
_first_present(row, ("input_playbook_path", "output_playbook_path", "input_ansible_playbook_path", "output_ansible_playbook_path"))
|
||||
or ""
|
||||
).lower()
|
||||
return "infra/ansible/" in playbook_path or playbook_path.endswith(".yml") and "ansible" in playbook_path
|
||||
|
||||
|
||||
def _ansible_record(row: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"op_id": _get(row, "op_id"),
|
||||
"operation_type": _get(row, "operation_type"),
|
||||
"status": _get(row, "status"),
|
||||
"actor": _get(row, "actor"),
|
||||
"playbook_id": _first_present(row, ("input_playbook_id", "output_playbook_id")),
|
||||
"playbook_path": _first_present(
|
||||
row,
|
||||
("input_playbook_path", "output_playbook_path", "input_ansible_playbook_path", "output_ansible_playbook_path"),
|
||||
),
|
||||
"check_mode": _first_present(row, ("input_check_mode", "output_check_mode")),
|
||||
"not_used_reason": _first_present(row, ("input_not_used_reason", "output_not_used_reason")),
|
||||
"dry_run_result": _get(row, "dry_run_result"),
|
||||
"error": _get(row, "error"),
|
||||
"duration_ms": _get(row, "duration_ms"),
|
||||
"tags": _get(row, "tags"),
|
||||
"created_at": _get(row, "created_at"),
|
||||
}
|
||||
|
||||
|
||||
def _flatten_text(value: Any, pieces: list[str], remaining: int = 80) -> int:
|
||||
if remaining <= 0 or value is None:
|
||||
return remaining
|
||||
if isinstance(value, dict):
|
||||
for key, item in value.items():
|
||||
remaining = _flatten_text(key, pieces, remaining)
|
||||
remaining = _flatten_text(item, pieces, remaining)
|
||||
if remaining <= 0:
|
||||
break
|
||||
return remaining
|
||||
if isinstance(value, list):
|
||||
for item in value:
|
||||
remaining = _flatten_text(item, pieces, remaining)
|
||||
if remaining <= 0:
|
||||
break
|
||||
return remaining
|
||||
pieces.append(str(value).lower())
|
||||
return remaining - 1
|
||||
|
||||
|
||||
def _source_haystack(incident: dict[str, Any] | None, drift: dict[str, Any] | None) -> str:
|
||||
pieces: list[str] = []
|
||||
_flatten_text(incident, pieces)
|
||||
_flatten_text(drift, pieces)
|
||||
return " ".join(pieces)
|
||||
|
||||
|
||||
def _catalog_hints(incident: dict[str, Any] | None, drift: dict[str, Any] | None) -> dict[str, Any]:
|
||||
haystack = _source_haystack(incident, drift)
|
||||
candidates: list[dict[str, Any]] = []
|
||||
unmatched: list[str] = []
|
||||
for item in _CATALOG:
|
||||
matched = [keyword for keyword in item["keywords"] if keyword in haystack]
|
||||
public_item = {
|
||||
key: value
|
||||
for key, value in item.items()
|
||||
if key
|
||||
in {
|
||||
"catalog_id",
|
||||
"playbook_path",
|
||||
"inventory_hosts",
|
||||
"domains",
|
||||
"supports_check_mode",
|
||||
"auto_apply_enabled",
|
||||
"approval_required",
|
||||
"risk_level",
|
||||
}
|
||||
}
|
||||
if matched:
|
||||
candidates.append({
|
||||
**public_item,
|
||||
"match_score": len(matched),
|
||||
"matched_keywords": matched,
|
||||
})
|
||||
else:
|
||||
unmatched.append(item["catalog_id"])
|
||||
candidates.sort(key=lambda row: (-int(row["match_score"]), str(row["catalog_id"])))
|
||||
return {
|
||||
"match_mode": "static_catalog_keyword_hint_v1",
|
||||
"decision_effect": "none",
|
||||
"available_count": len(_CATALOG),
|
||||
"candidates": candidates,
|
||||
"unmatched_catalog_ids": unmatched,
|
||||
}
|
||||
|
||||
|
||||
def build_ansible_truth(
|
||||
automation_ops: list[dict[str, Any]],
|
||||
*,
|
||||
incident: dict[str, Any] | None,
|
||||
drift: dict[str, Any] | None,
|
||||
) -> dict[str, Any]:
|
||||
"""Build the truth-chain Ansible section from audited facts and catalog hints."""
|
||||
|
||||
records = [_ansible_record(row) for row in automation_ops if _is_ansible_operation(row)]
|
||||
return {
|
||||
"considered": bool(records),
|
||||
"records": records,
|
||||
"audit_contract": {
|
||||
"schema_version": "ansible_executor_audit_v1",
|
||||
"operation_types": sorted(ANSIBLE_OPERATION_TYPES),
|
||||
"required_audit_fields": [
|
||||
"operation_type",
|
||||
"status",
|
||||
"actor",
|
||||
"input.executor",
|
||||
"input.playbook_path",
|
||||
"input.check_mode",
|
||||
"output.not_used_reason",
|
||||
"dry_run_result",
|
||||
],
|
||||
"default_execution_mode": "catalog/dry-run audit only until approval execution is explicitly wired",
|
||||
},
|
||||
"candidate_catalog": _catalog_hints(incident, drift),
|
||||
"not_used_reason": (
|
||||
None
|
||||
if records
|
||||
else "no automation_operation_log row with Ansible operation type, tag, or executor backend for this source"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _incident_public_dict(incident: Any) -> dict[str, Any]:
|
||||
if incident is None:
|
||||
return {}
|
||||
if isinstance(incident, dict):
|
||||
return incident
|
||||
severity = getattr(incident, "severity", None)
|
||||
signals_payload: list[dict[str, Any]] = []
|
||||
for signal in getattr(incident, "signals", None) or []:
|
||||
signals_payload.append({
|
||||
"alert_name": getattr(signal, "alert_name", None),
|
||||
"labels": getattr(signal, "labels", None) or {},
|
||||
"annotations": getattr(signal, "annotations", None) or {},
|
||||
})
|
||||
return {
|
||||
"incident_id": getattr(incident, "incident_id", None),
|
||||
"project_id": getattr(incident, "project_id", None),
|
||||
"alertname": getattr(incident, "alertname", None),
|
||||
"alert_category": getattr(incident, "alert_category", None),
|
||||
"notification_type": getattr(incident, "notification_type", None),
|
||||
"severity": getattr(severity, "value", severity),
|
||||
"affected_services": getattr(incident, "affected_services", None) or [],
|
||||
"signals": signals_payload,
|
||||
}
|
||||
|
||||
|
||||
def build_ansible_decision_audit_payload(
|
||||
*,
|
||||
incident: Any,
|
||||
proposal_data: dict[str, Any],
|
||||
decision_path: str,
|
||||
not_used_reason: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Return an AOL payload when Ansible has catalog candidates for a decision."""
|
||||
|
||||
incident_payload = _incident_public_dict(incident)
|
||||
hints = _catalog_hints(incident_payload, None)
|
||||
candidates = hints.get("candidates") or []
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
incident_id = str(incident_payload.get("incident_id") or "")
|
||||
input_payload = {
|
||||
"incident_id": incident_id,
|
||||
"executor": "ansible",
|
||||
"execution_backend": "ansible",
|
||||
"decision_path": decision_path,
|
||||
"check_mode": True,
|
||||
"apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"candidate_catalog_schema": hints["match_mode"],
|
||||
"executor_candidates": [
|
||||
{
|
||||
"catalog_id": row["catalog_id"],
|
||||
"playbook_path": row["playbook_path"],
|
||||
"inventory_hosts": row["inventory_hosts"],
|
||||
"risk_level": row["risk_level"],
|
||||
"match_score": row["match_score"],
|
||||
"matched_keywords": row["matched_keywords"],
|
||||
}
|
||||
for row in candidates[:5]
|
||||
],
|
||||
"proposal_source": proposal_data.get("source", ""),
|
||||
"proposal_risk_level": proposal_data.get("risk_level", ""),
|
||||
"proposal_action_preview": str(
|
||||
proposal_data.get("action")
|
||||
or proposal_data.get("kubectl_command")
|
||||
or ""
|
||||
)[:240],
|
||||
}
|
||||
output_payload = {
|
||||
"not_used_reason": not_used_reason,
|
||||
"decision_effect": "audit_only",
|
||||
"next_required_step": "wire approval_execution to Ansible check-mode before apply",
|
||||
}
|
||||
return {
|
||||
"operation_type": "ansible_candidate_matched",
|
||||
"status": "dry_run",
|
||||
"input": input_payload,
|
||||
"output": output_payload,
|
||||
"dry_run_result": {
|
||||
"check_mode_executed": False,
|
||||
"candidate_count": len(candidates),
|
||||
"reason": not_used_reason,
|
||||
},
|
||||
"tags": ["ansible", "decision", "candidate", "check_mode_pending"],
|
||||
}
|
||||
|
||||
|
||||
async def record_ansible_decision_audit(
|
||||
*,
|
||||
incident: Any,
|
||||
proposal_data: dict[str, Any],
|
||||
decision_path: str,
|
||||
not_used_reason: str,
|
||||
) -> bool:
|
||||
"""Write a best-effort Ansible candidate audit row for one decision."""
|
||||
|
||||
payload = build_ansible_decision_audit_payload(
|
||||
incident=incident,
|
||||
proposal_data=proposal_data,
|
||||
decision_path=decision_path,
|
||||
not_used_reason=not_used_reason,
|
||||
)
|
||||
if payload is None:
|
||||
return False
|
||||
|
||||
incident_id = payload["input"]["incident_id"]
|
||||
project_id = getattr(incident, "project_id", None) or "awoooi"
|
||||
try:
|
||||
async with get_db_context(str(project_id)) as db:
|
||||
existing = await db.execute(
|
||||
text("""
|
||||
SELECT op_id
|
||||
FROM automation_operation_log
|
||||
WHERE operation_type = 'ansible_candidate_matched'
|
||||
AND input ->> 'incident_id' = :incident_id
|
||||
AND input ->> 'executor' = 'ansible'
|
||||
LIMIT 1
|
||||
"""),
|
||||
{"incident_id": incident_id},
|
||||
)
|
||||
if existing.scalar() is not None:
|
||||
return False
|
||||
await db.execute(
|
||||
text("""
|
||||
INSERT INTO automation_operation_log (
|
||||
operation_type, actor, status,
|
||||
input, output, dry_run_result, tags
|
||||
) VALUES (
|
||||
:operation_type,
|
||||
'decision_manager',
|
||||
:status,
|
||||
CAST(:input AS jsonb),
|
||||
CAST(:output AS jsonb),
|
||||
CAST(:dry_run_result AS jsonb),
|
||||
:tags
|
||||
)
|
||||
"""),
|
||||
{
|
||||
"operation_type": payload["operation_type"],
|
||||
"status": payload["status"],
|
||||
"input": json.dumps(payload["input"], ensure_ascii=False),
|
||||
"output": json.dumps(payload["output"], ensure_ascii=False),
|
||||
"dry_run_result": json.dumps(payload["dry_run_result"], ensure_ascii=False),
|
||||
"tags": payload["tags"],
|
||||
},
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"ansible_decision_audit_write_failed",
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
return False
|
||||
@@ -7,6 +7,7 @@ Telegram cards can be audited without guessing which subsystem owns the truth.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
@@ -16,10 +17,13 @@ import structlog
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.db.base import get_db_context
|
||||
from src.services.awooop_ansible_audit_service import build_ansible_truth
|
||||
from src.services.drift_repeat_state import build_drift_repeat_state
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
_MAX_ROWS = 100
|
||||
_JSON_TEXT_FIELDS = {"gate_result", "source_envelope"}
|
||||
|
||||
|
||||
def _clean(value: Any) -> Any:
|
||||
@@ -38,7 +42,15 @@ def _clean(value: Any) -> Any:
|
||||
|
||||
|
||||
def _clean_row(row: Any) -> dict[str, Any]:
|
||||
return {key: _clean(value) for key, value in dict(row).items()}
|
||||
cleaned: dict[str, Any] = {}
|
||||
for key, value in dict(row).items():
|
||||
if key in _JSON_TEXT_FIELDS and isinstance(value, str):
|
||||
try:
|
||||
value = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
cleaned[key] = _clean(value)
|
||||
return cleaned
|
||||
|
||||
|
||||
async def _fetch_all(db: Any, sql: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
@@ -85,6 +97,127 @@ def _operation_ids(automation_ops: list[dict[str, Any]]) -> list[str]:
|
||||
return [str(row["op_id"]) for row in automation_ops if row.get("op_id")]
|
||||
|
||||
|
||||
def _build_reconciliation(
|
||||
*,
|
||||
incident: dict[str, Any] | None,
|
||||
approvals: list[dict[str, Any]],
|
||||
evidence_rows: list[dict[str, Any]],
|
||||
automation_ops: list[dict[str, Any]],
|
||||
timeline_events: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
"""Build a read-only consistency report across incident lifecycle tables."""
|
||||
if incident is None:
|
||||
return {
|
||||
"schema_version": "incident_reconciliation_v1",
|
||||
"applicable": False,
|
||||
"consistency_status": "not_applicable",
|
||||
"operator_next_state": "not_applicable",
|
||||
"facts": {},
|
||||
"mismatches": [],
|
||||
}
|
||||
|
||||
incident_status = str(incident.get("status") or "unknown").upper()
|
||||
incident_closed = incident_status in {"RESOLVED", "CLOSED"}
|
||||
latest_approval = approvals[0] if approvals else None
|
||||
approval_status = str((latest_approval or {}).get("status") or "none").upper()
|
||||
approval_action = str((latest_approval or {}).get("action") or "")
|
||||
approval_resolved = bool((latest_approval or {}).get("resolved_at"))
|
||||
attempted = sum(int(row.get("sensors_attempted") or 0) for row in evidence_rows)
|
||||
succeeded = sum(int(row.get("sensors_succeeded") or 0) for row in evidence_rows)
|
||||
executed_ops = [
|
||||
row
|
||||
for row in automation_ops
|
||||
if str(row.get("status") or "").lower()
|
||||
in {"success", "completed", "executed"}
|
||||
]
|
||||
mismatches: list[dict[str, Any]] = []
|
||||
|
||||
def add(code: str, severity: str, message: str) -> None:
|
||||
mismatches.append({
|
||||
"code": code,
|
||||
"severity": severity,
|
||||
"message": message,
|
||||
})
|
||||
|
||||
if (
|
||||
latest_approval
|
||||
and not incident_closed
|
||||
and (approval_resolved or approval_status in {"APPROVED", "REJECTED"})
|
||||
):
|
||||
add(
|
||||
"incident_open_after_approval_resolved",
|
||||
"high",
|
||||
"Approval reached a terminal state while the incident is still open.",
|
||||
)
|
||||
|
||||
if approval_status == "APPROVED" and not automation_ops:
|
||||
add(
|
||||
"approval_approved_without_execution_record",
|
||||
"high",
|
||||
"Approval is approved but automation_operation_log has no linked execution record.",
|
||||
)
|
||||
|
||||
if (
|
||||
approval_status == "APPROVED"
|
||||
and "NO_ACTION" in approval_action.upper()
|
||||
and not executed_ops
|
||||
):
|
||||
add(
|
||||
"approval_no_action_without_execution",
|
||||
"high",
|
||||
"Approval resolved to NO_ACTION and no executor produced a successful operation.",
|
||||
)
|
||||
|
||||
if attempted > 0 and succeeded == 0:
|
||||
add(
|
||||
"evidence_all_sensors_failed",
|
||||
"medium",
|
||||
"Evidence collection attempted sensors but none succeeded.",
|
||||
)
|
||||
|
||||
if latest_approval and not timeline_events:
|
||||
add(
|
||||
"timeline_missing_for_approval",
|
||||
"medium",
|
||||
"Approval exists but timeline_events has no linked lifecycle entries.",
|
||||
)
|
||||
|
||||
high_count = sum(1 for row in mismatches if row["severity"] == "high")
|
||||
medium_count = sum(1 for row in mismatches if row["severity"] == "medium")
|
||||
if high_count:
|
||||
consistency_status = "blocked"
|
||||
operator_next_state = "manual_required"
|
||||
elif medium_count:
|
||||
consistency_status = "degraded"
|
||||
operator_next_state = "investigate"
|
||||
else:
|
||||
consistency_status = "consistent"
|
||||
operator_next_state = "continue"
|
||||
|
||||
return {
|
||||
"schema_version": "incident_reconciliation_v1",
|
||||
"applicable": True,
|
||||
"consistency_status": consistency_status,
|
||||
"operator_next_state": operator_next_state,
|
||||
"facts": {
|
||||
"incident_id": incident.get("incident_id"),
|
||||
"incident_status": incident_status,
|
||||
"incident_closed": incident_closed,
|
||||
"latest_approval_id": (latest_approval or {}).get("id"),
|
||||
"latest_approval_status": approval_status,
|
||||
"latest_approval_action": approval_action,
|
||||
"approval_resolved": approval_resolved,
|
||||
"evidence_records": len(evidence_rows),
|
||||
"sensors_attempted": attempted,
|
||||
"sensors_succeeded": succeeded,
|
||||
"automation_operation_records": len(automation_ops),
|
||||
"executed_operation_records": len(executed_ops),
|
||||
"timeline_events": len(timeline_events),
|
||||
},
|
||||
"mismatches": mismatches,
|
||||
}
|
||||
|
||||
|
||||
def _truth_status(
|
||||
*,
|
||||
incident: dict[str, Any] | None,
|
||||
@@ -255,6 +388,7 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
|
||||
created_at,
|
||||
resolved_at,
|
||||
interpretation,
|
||||
items,
|
||||
narrative_text
|
||||
FROM drift_reports
|
||||
WHERE report_id = :source_id
|
||||
@@ -411,15 +545,30 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
|
||||
error,
|
||||
duration_ms,
|
||||
tags,
|
||||
input ->> 'executor' AS input_executor,
|
||||
input ->> 'execution_backend' AS input_execution_backend,
|
||||
input ->> 'playbook_id' AS input_playbook_id,
|
||||
input ->> 'playbook_path' AS input_playbook_path,
|
||||
input ->> 'ansible_playbook_path' AS input_ansible_playbook_path,
|
||||
input ->> 'check_mode' AS input_check_mode,
|
||||
input ->> 'not_used_reason' AS input_not_used_reason,
|
||||
output ->> 'executor' AS output_executor,
|
||||
output ->> 'execution_backend' AS output_execution_backend,
|
||||
output ->> 'playbook_id' AS output_playbook_id,
|
||||
output ->> 'playbook_path' AS output_playbook_path,
|
||||
output ->> 'ansible_playbook_path' AS output_ansible_playbook_path,
|
||||
output ->> 'check_mode' AS output_check_mode,
|
||||
output ->> 'not_used_reason' AS output_not_used_reason,
|
||||
created_at
|
||||
FROM automation_operation_log
|
||||
WHERE coalesce(input::text, '') LIKE :needle
|
||||
WHERE incident_id::text = :incident_id
|
||||
OR coalesce(input::text, '') LIKE :needle
|
||||
OR coalesce(output::text, '') LIKE :needle
|
||||
OR coalesce(array_to_string(tags, ','), '') LIKE :needle
|
||||
ORDER BY created_at DESC
|
||||
LIMIT :limit
|
||||
""",
|
||||
{"needle": f"%{incident_id}%", "limit": _MAX_ROWS},
|
||||
{"incident_id": incident_id, "needle": f"%{incident_id}%", "limit": _MAX_ROWS},
|
||||
)
|
||||
km_entries = await _fetch_all(
|
||||
db,
|
||||
@@ -447,55 +596,27 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
|
||||
"reports": [],
|
||||
}
|
||||
if drift is not None:
|
||||
repeat_summary = await _fetch_one(
|
||||
recent_drift_reports = await _fetch_all(
|
||||
db,
|
||||
"""
|
||||
SELECT
|
||||
count(*) AS occurrences_12h,
|
||||
min(scanned_at) AS first_scanned_at,
|
||||
max(scanned_at) AS last_scanned_at
|
||||
report_id,
|
||||
namespace,
|
||||
status,
|
||||
scanned_at,
|
||||
created_at,
|
||||
items,
|
||||
interpretation,
|
||||
narrative_text
|
||||
FROM drift_reports
|
||||
WHERE created_at > now() - interval '12 hours'
|
||||
WHERE created_at > now() - interval '24 hours'
|
||||
AND namespace = :namespace
|
||||
AND status = :status
|
||||
AND high_count = :high_count
|
||||
AND medium_count = :medium_count
|
||||
AND info_count = :info_count
|
||||
""",
|
||||
{
|
||||
"namespace": drift["namespace"],
|
||||
"status": drift["status"],
|
||||
"high_count": drift["high_count"],
|
||||
"medium_count": drift["medium_count"],
|
||||
"info_count": drift["info_count"],
|
||||
},
|
||||
)
|
||||
repeat_reports = await _fetch_all(
|
||||
db,
|
||||
"""
|
||||
SELECT report_id, scanned_at, created_at, status, interpretation, narrative_text
|
||||
FROM drift_reports
|
||||
WHERE created_at > now() - interval '12 hours'
|
||||
AND namespace = :namespace
|
||||
AND status = :status
|
||||
AND high_count = :high_count
|
||||
AND medium_count = :medium_count
|
||||
AND info_count = :info_count
|
||||
ORDER BY scanned_at DESC
|
||||
LIMIT 20
|
||||
LIMIT 200
|
||||
""",
|
||||
{
|
||||
"namespace": drift["namespace"],
|
||||
"status": drift["status"],
|
||||
"high_count": drift["high_count"],
|
||||
"medium_count": drift["medium_count"],
|
||||
"info_count": drift["info_count"],
|
||||
},
|
||||
{"namespace": drift["namespace"]},
|
||||
)
|
||||
drift_repeats = {
|
||||
**(repeat_summary or {}),
|
||||
"reports": repeat_reports,
|
||||
}
|
||||
drift_repeats = build_drift_repeat_state(drift, recent_drift_reports)
|
||||
|
||||
gateway_mcp_rows = await _fetch_all(
|
||||
db,
|
||||
@@ -507,6 +628,7 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
|
||||
trace_id,
|
||||
agent_id,
|
||||
tool_name,
|
||||
gate_result,
|
||||
result_status,
|
||||
block_gate,
|
||||
block_reason,
|
||||
@@ -572,6 +694,13 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
|
||||
legacy_mcp_total=legacy_mcp_summary["total"],
|
||||
outbound_visible_total=len(outbound_rows),
|
||||
)
|
||||
reconciliation = _build_reconciliation(
|
||||
incident=incident,
|
||||
approvals=approvals,
|
||||
evidence_rows=evidence_rows,
|
||||
automation_ops=automation_ops,
|
||||
timeline_events=timeline_events,
|
||||
)
|
||||
|
||||
evidence_totals = {
|
||||
"records": len(evidence_rows),
|
||||
@@ -615,12 +744,9 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
|
||||
},
|
||||
"execution": {
|
||||
"automation_operation_log": automation_ops,
|
||||
"ansible": {
|
||||
"considered": False,
|
||||
"records": [],
|
||||
"not_used_reason": "no first-class Ansible executor audit record in current truth chain",
|
||||
},
|
||||
"ansible": build_ansible_truth(automation_ops, incident=incident, drift=drift),
|
||||
},
|
||||
"reconciliation": reconciliation,
|
||||
"learning": {
|
||||
"knowledge_entries": km_entries,
|
||||
},
|
||||
|
||||
@@ -1790,6 +1790,25 @@ class DecisionManager:
|
||||
token.proposal_data["auto_approve_reason"] = auto_decision.reason_detail
|
||||
await self._save_token(token)
|
||||
|
||||
try:
|
||||
from src.services.awooop_ansible_audit_service import (
|
||||
record_ansible_decision_audit as _record_ansible_decision_audit,
|
||||
)
|
||||
|
||||
_fire_and_forget(
|
||||
_record_ansible_decision_audit(
|
||||
incident=incident,
|
||||
proposal_data=token.proposal_data,
|
||||
decision_path="auto_execute",
|
||||
not_used_reason=(
|
||||
"auto_execute selected existing executor path; "
|
||||
"Ansible check-mode is not wired yet"
|
||||
),
|
||||
)
|
||||
)
|
||||
except Exception as _ansible_audit_err:
|
||||
logger.debug("ansible_decision_audit_schedule_error", error=str(_ansible_audit_err))
|
||||
|
||||
# 觸發自動執行 (非阻塞)
|
||||
_fire_and_forget(
|
||||
self._auto_execute(incident, token)
|
||||
@@ -1813,6 +1832,24 @@ class DecisionManager:
|
||||
),
|
||||
)
|
||||
)
|
||||
try:
|
||||
from src.services.awooop_ansible_audit_service import (
|
||||
record_ansible_decision_audit as _record_ansible_decision_audit,
|
||||
)
|
||||
|
||||
_fire_and_forget(
|
||||
_record_ansible_decision_audit(
|
||||
incident=incident,
|
||||
proposal_data=token.proposal_data,
|
||||
decision_path="manual_approval",
|
||||
not_used_reason=(
|
||||
"manual approval required; Ansible check-mode "
|
||||
"is not wired to approval execution yet"
|
||||
),
|
||||
)
|
||||
)
|
||||
except Exception as _ansible_audit_err:
|
||||
logger.debug("ansible_decision_audit_schedule_error", error=str(_ansible_audit_err))
|
||||
_fire_and_forget(
|
||||
_push_decision_to_telegram(incident, token.proposal_data)
|
||||
)
|
||||
|
||||
@@ -148,7 +148,13 @@ class DriftNarratorService:
|
||||
# 2026-04-18 B 方案: LLM 同時產 narrative + 結構化 items(取代 str()[:30])
|
||||
# 2026-04-20 P0.2: 追加 recommendation(action/confidence/reason)
|
||||
narrative, items, recommendation = await self._generate_narrative_and_items(report, interpretation)
|
||||
await self._send_telegram(report, narrative, items, recommendation)
|
||||
repeat_state = None
|
||||
try:
|
||||
from src.repositories.drift_repository import get_drift_repository
|
||||
repeat_state = await get_drift_repository().get_repeat_state(report)
|
||||
except Exception as e:
|
||||
logger.warning("drift_repeat_state_lookup_failed", report_id=report.report_id, error=str(e))
|
||||
await self._send_telegram(report, narrative, items, recommendation, repeat_state)
|
||||
|
||||
# 寫入 DB narrative_text (Phase 30 ADR-067)
|
||||
try:
|
||||
@@ -643,6 +649,7 @@ class DriftNarratorService:
|
||||
narrative: str,
|
||||
items: list[dict],
|
||||
recommendation: dict | None = None,
|
||||
repeat_state: dict | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
推送 TYPE-4D Config Drift 卡片(ADR-075)+ B 方案智能摘要
|
||||
@@ -654,7 +661,7 @@ class DriftNarratorService:
|
||||
"""
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
diff_summary = self._render_telegram_body(report, narrative, items, recommendation)
|
||||
diff_summary = self._render_telegram_body(report, narrative, items, recommendation, repeat_state)
|
||||
|
||||
try:
|
||||
tg = get_telegram_gateway()
|
||||
@@ -711,6 +718,7 @@ class DriftNarratorService:
|
||||
narrative: str,
|
||||
items: list[dict],
|
||||
recommendation: dict | None = None,
|
||||
repeat_state: dict | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
組裝 Telegram 卡片 body(B 方案格式 + P0.2 AI 推薦)
|
||||
@@ -741,6 +749,10 @@ class DriftNarratorService:
|
||||
}.get(_act, _act)
|
||||
lines.append(f"🎯 AI 建議:{_emoji_action} ({int(_conf * 100)}%) — {_reason}\n")
|
||||
|
||||
repeat_line = self._render_repeat_state(repeat_state)
|
||||
if repeat_line:
|
||||
lines.append(f"{repeat_line}\n")
|
||||
|
||||
lines.append(f"🤖 AI 研判\n{narrative}\n")
|
||||
|
||||
# 用非 trivial + 非白名單 的實際可操作數顯示
|
||||
@@ -761,6 +773,23 @@ class DriftNarratorService:
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _render_repeat_state(self, repeat_state: dict | None) -> str:
|
||||
"""Render operator-visible repeat/stage metadata for Telegram."""
|
||||
if not repeat_state:
|
||||
return ""
|
||||
fingerprint = str(repeat_state.get("fingerprint") or "unknown")
|
||||
occurrences = int(repeat_state.get("occurrences_12h") or 0)
|
||||
window_hours = int(repeat_state.get("window_hours") or 12)
|
||||
stage = str(repeat_state.get("operator_stage") or "unknown")
|
||||
if occurrences <= 1:
|
||||
repeat_text = f"{window_hours}h 內首次出現"
|
||||
else:
|
||||
repeat_text = f"{window_hours}h 內第 {occurrences} 次同指紋"
|
||||
return (
|
||||
"流程: drift_scanned → ai_analyzed → "
|
||||
f"{stage}\n重複: {repeat_text}\n指紋: {fingerprint}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Singleton
|
||||
|
||||
180
apps/api/src/services/drift_repeat_state.py
Normal file
180
apps/api/src/services/drift_repeat_state.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Stable repeat identity for Config Drift reports.
|
||||
|
||||
The drift scanner emits a fresh ``report_id`` for every run. Operators need a
|
||||
stable identity that answers whether two reports describe the same drift, not
|
||||
just whether they have the same HIGH/MEDIUM/INFO counts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
SCHEMA_VERSION = "drift_repeat_state_v1"
|
||||
FINGERPRINT_VERSION = "drift_fingerprint_v1"
|
||||
|
||||
|
||||
def _get(obj: Any, key: str, default: Any = None) -> Any:
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(key, default)
|
||||
return getattr(obj, key, default)
|
||||
|
||||
|
||||
def _enum_value(value: Any) -> Any:
|
||||
return getattr(value, "value", value)
|
||||
|
||||
|
||||
def _jsonable(value: Any) -> Any:
|
||||
value = _enum_value(value)
|
||||
if isinstance(value, dict):
|
||||
return {str(k): _jsonable(v) for k, v in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [_jsonable(v) for v in value]
|
||||
if isinstance(value, tuple):
|
||||
return [_jsonable(v) for v in value]
|
||||
if isinstance(value, datetime):
|
||||
return value.isoformat()
|
||||
return value
|
||||
|
||||
|
||||
def _canonical_json(value: Any) -> str:
|
||||
return json.dumps(
|
||||
_jsonable(value),
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=str,
|
||||
)
|
||||
|
||||
|
||||
def _parse_datetime(value: Any) -> datetime | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
parsed = value
|
||||
if parsed.tzinfo is not None:
|
||||
return parsed.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
return parsed
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
if parsed.tzinfo is not None:
|
||||
return parsed.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
return parsed
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _iso(value: Any) -> str | None:
|
||||
parsed = _parse_datetime(value)
|
||||
return parsed.isoformat() if parsed else None
|
||||
|
||||
|
||||
def drift_item_identity(item: Any) -> dict[str, Any]:
|
||||
"""Return the stable fields that define one drift item."""
|
||||
return {
|
||||
"resource_kind": str(_get(item, "resource_kind", "")),
|
||||
"resource_name": str(_get(item, "resource_name", "")),
|
||||
"namespace": str(_get(item, "namespace", "")),
|
||||
"field_path": str(_get(item, "field_path", "")),
|
||||
"drift_level": str(_enum_value(_get(item, "drift_level", ""))),
|
||||
"git_value": _jsonable(_get(item, "git_value")),
|
||||
"actual_value": _jsonable(_get(item, "actual_value")),
|
||||
"is_allowlisted": bool(_get(item, "is_allowlisted", False)),
|
||||
}
|
||||
|
||||
|
||||
def build_drift_fingerprint(namespace: str, items: list[Any]) -> str:
|
||||
"""Build a deterministic fingerprint from namespace + sorted drift items."""
|
||||
identities = [drift_item_identity(item) for item in items]
|
||||
identities.sort(key=_canonical_json)
|
||||
payload = {
|
||||
"version": FINGERPRINT_VERSION,
|
||||
"namespace": namespace,
|
||||
"items": identities,
|
||||
}
|
||||
digest = hashlib.sha256(_canonical_json(payload).encode("utf-8")).hexdigest()
|
||||
return f"dfp_{digest[:16]}"
|
||||
|
||||
|
||||
def _report_identity(report: Any) -> dict[str, Any]:
|
||||
items = _get(report, "items", []) or []
|
||||
namespace = str(_get(report, "namespace", ""))
|
||||
return {
|
||||
"report_id": _get(report, "report_id"),
|
||||
"namespace": namespace,
|
||||
"status": str(_enum_value(_get(report, "status", ""))),
|
||||
"scanned_at": _get(report, "scanned_at"),
|
||||
"created_at": _get(report, "created_at"),
|
||||
"fingerprint": build_drift_fingerprint(namespace, list(items)),
|
||||
}
|
||||
|
||||
|
||||
def build_drift_repeat_state(
|
||||
report: Any,
|
||||
recent_reports: list[Any],
|
||||
*,
|
||||
window_hours: int = 12,
|
||||
max_reports: int = 20,
|
||||
) -> dict[str, Any]:
|
||||
"""Summarize repeat state for one drift report using stable fingerprints."""
|
||||
current = _report_identity(report)
|
||||
current_time = (
|
||||
_parse_datetime(current.get("scanned_at"))
|
||||
or _parse_datetime(current.get("created_at"))
|
||||
or datetime.now()
|
||||
)
|
||||
cutoff = current_time - timedelta(hours=window_hours)
|
||||
|
||||
by_id: dict[str, dict[str, Any]] = {}
|
||||
for candidate in [report, *recent_reports]:
|
||||
identity = _report_identity(candidate)
|
||||
report_id = str(identity.get("report_id") or "")
|
||||
if not report_id:
|
||||
continue
|
||||
candidate_time = (
|
||||
_parse_datetime(identity.get("scanned_at"))
|
||||
or _parse_datetime(identity.get("created_at"))
|
||||
)
|
||||
if candidate_time is not None and candidate_time < cutoff:
|
||||
continue
|
||||
if identity["fingerprint"] != current["fingerprint"]:
|
||||
continue
|
||||
by_id[report_id] = identity
|
||||
|
||||
matches = sorted(
|
||||
by_id.values(),
|
||||
key=lambda row: (
|
||||
_parse_datetime(row.get("scanned_at"))
|
||||
or _parse_datetime(row.get("created_at"))
|
||||
or datetime.min
|
||||
),
|
||||
)
|
||||
first = matches[0] if matches else current
|
||||
last = matches[-1] if matches else current
|
||||
status = current.get("status") or "unknown"
|
||||
operator_stage = "pending_human" if status == "pending" else str(status)
|
||||
|
||||
return {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"fingerprint": current["fingerprint"],
|
||||
"matching_strategy": "namespace_and_stable_items_v1",
|
||||
"window_hours": window_hours,
|
||||
"occurrences_12h": len(matches),
|
||||
"first_scanned_at": _iso(first.get("scanned_at") or first.get("created_at")),
|
||||
"last_scanned_at": _iso(last.get("scanned_at") or last.get("created_at")),
|
||||
"operator_stage": operator_stage,
|
||||
"reports": [
|
||||
{
|
||||
"report_id": row.get("report_id"),
|
||||
"scanned_at": _iso(row.get("scanned_at")),
|
||||
"created_at": _iso(row.get("created_at")),
|
||||
"status": row.get("status"),
|
||||
}
|
||||
for row in reversed(matches[-max_reports:])
|
||||
],
|
||||
}
|
||||
@@ -104,6 +104,11 @@ _AUTOMATION_STAGE_MAP = {
|
||||
"capacity_recommendation": "investigator",
|
||||
"quota_enforced": "safe",
|
||||
"notification_formatted": "safe",
|
||||
"ansible_candidate_matched": "ai_router",
|
||||
"ansible_check_mode_executed": "executor",
|
||||
"ansible_apply_executed": "executor",
|
||||
"ansible_rollback_executed": "executor",
|
||||
"ansible_execution_skipped": "safe",
|
||||
}
|
||||
_AUTOMATION_STATUS_MAP = {
|
||||
"pending": "pending",
|
||||
|
||||
@@ -1,6 +1,36 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from src.services.awooop_truth_chain_service import _truth_status
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from types import SimpleNamespace
|
||||
|
||||
from src.services.awooop_ansible_audit_service import (
|
||||
build_ansible_decision_audit_payload,
|
||||
build_ansible_truth,
|
||||
)
|
||||
from src.services.awooop_truth_chain_service import (
|
||||
_build_reconciliation,
|
||||
_clean_row,
|
||||
_truth_status,
|
||||
)
|
||||
from src.services.drift_repeat_state import (
|
||||
build_drift_fingerprint,
|
||||
build_drift_repeat_state,
|
||||
)
|
||||
|
||||
|
||||
def test_clean_row_parses_json_text_fields_for_gateway_visibility() -> None:
|
||||
row = {
|
||||
"gate_result": '{"schema_version":"legacy_mcp_bridge_v1","policy_enforced":false}',
|
||||
"source_envelope": '{"adapter":"legacy_telegram_gateway"}',
|
||||
"plain_text": '{"not":"parsed"}',
|
||||
}
|
||||
|
||||
cleaned = _clean_row(row)
|
||||
|
||||
assert cleaned["gate_result"]["schema_version"] == "legacy_mcp_bridge_v1"
|
||||
assert cleaned["gate_result"]["policy_enforced"] is False
|
||||
assert cleaned["source_envelope"]["adapter"] == "legacy_telegram_gateway"
|
||||
assert cleaned["plain_text"] == '{"not":"parsed"}'
|
||||
|
||||
|
||||
def test_truth_status_marks_no_action_approval_as_manual_required() -> None:
|
||||
@@ -46,3 +76,217 @@ def test_truth_status_marks_repeated_pending_drift_as_human_needed() -> None:
|
||||
assert status["needs_human"] is True
|
||||
assert "drift_report_pending_without_resolution" in status["blockers"]
|
||||
assert "drift_ai_confidence_zero" in status["blockers"]
|
||||
|
||||
|
||||
def _drift_item(
|
||||
*,
|
||||
resource_name: str = "awoooi-api",
|
||||
field_path: str = "spec.template.spec.containers[0].image",
|
||||
actual_value: str = "api:hotfix",
|
||||
) -> dict:
|
||||
return {
|
||||
"resource_kind": "Deployment",
|
||||
"resource_name": resource_name,
|
||||
"namespace": "awoooi-prod",
|
||||
"field_path": field_path,
|
||||
"git_value": "api:main",
|
||||
"actual_value": actual_value,
|
||||
"drift_level": "high",
|
||||
"is_allowlisted": False,
|
||||
}
|
||||
|
||||
|
||||
def test_drift_fingerprint_is_stable_across_item_order() -> None:
|
||||
item_a = _drift_item(resource_name="awoooi-api")
|
||||
item_b = _drift_item(
|
||||
resource_name="awoooi-worker",
|
||||
field_path="spec.template.spec.serviceAccountName",
|
||||
actual_value="awoooi-executor",
|
||||
)
|
||||
|
||||
first = build_drift_fingerprint("awoooi-prod", [item_a, item_b])
|
||||
second = build_drift_fingerprint("awoooi-prod", [item_b, item_a])
|
||||
changed = build_drift_fingerprint(
|
||||
"awoooi-prod",
|
||||
[item_a, {**item_b, "actual_value": "different-service-account"}],
|
||||
)
|
||||
|
||||
assert first == second
|
||||
assert first.startswith("dfp_")
|
||||
assert first != changed
|
||||
|
||||
|
||||
def test_drift_repeat_state_counts_matching_fingerprint_only() -> None:
|
||||
now = datetime(2026, 5, 13, 1, 0, tzinfo=timezone.utc)
|
||||
report = {
|
||||
"report_id": "drift-now",
|
||||
"namespace": "awoooi-prod",
|
||||
"status": "pending",
|
||||
"scanned_at": now,
|
||||
"created_at": now,
|
||||
"items": [_drift_item()],
|
||||
}
|
||||
recent = [
|
||||
{
|
||||
**report,
|
||||
"report_id": "drift-prev",
|
||||
"scanned_at": now - timedelta(hours=1),
|
||||
"created_at": now - timedelta(hours=1),
|
||||
},
|
||||
{
|
||||
**report,
|
||||
"report_id": "drift-different",
|
||||
"scanned_at": now - timedelta(hours=2),
|
||||
"created_at": now - timedelta(hours=2),
|
||||
"items": [_drift_item(actual_value="api:other")],
|
||||
},
|
||||
{
|
||||
**report,
|
||||
"report_id": "drift-old",
|
||||
"scanned_at": now - timedelta(hours=13),
|
||||
"created_at": now - timedelta(hours=13),
|
||||
},
|
||||
]
|
||||
|
||||
repeat_state = build_drift_repeat_state(report, recent)
|
||||
|
||||
assert repeat_state["schema_version"] == "drift_repeat_state_v1"
|
||||
assert repeat_state["fingerprint"].startswith("dfp_")
|
||||
assert repeat_state["matching_strategy"] == "namespace_and_stable_items_v1"
|
||||
assert repeat_state["occurrences_12h"] == 2
|
||||
assert repeat_state["operator_stage"] == "pending_human"
|
||||
assert [row["report_id"] for row in repeat_state["reports"]] == [
|
||||
"drift-now",
|
||||
"drift-prev",
|
||||
]
|
||||
|
||||
|
||||
def test_reconciliation_blocks_open_incident_after_no_action_approval() -> None:
|
||||
reconciliation = _build_reconciliation(
|
||||
incident={"incident_id": "INC-1", "status": "INVESTIGATING"},
|
||||
approvals=[
|
||||
{
|
||||
"id": "approval-1",
|
||||
"status": "APPROVED",
|
||||
"action": "未知操作 | NO_ACTION",
|
||||
"resolved_at": "2026-05-13T01:00:00+00:00",
|
||||
}
|
||||
],
|
||||
evidence_rows=[{"sensors_attempted": 8, "sensors_succeeded": 0}],
|
||||
automation_ops=[],
|
||||
timeline_events=[],
|
||||
)
|
||||
|
||||
codes = {row["code"] for row in reconciliation["mismatches"]}
|
||||
assert reconciliation["schema_version"] == "incident_reconciliation_v1"
|
||||
assert reconciliation["consistency_status"] == "blocked"
|
||||
assert reconciliation["operator_next_state"] == "manual_required"
|
||||
assert reconciliation["facts"]["incident_closed"] is False
|
||||
assert reconciliation["facts"]["automation_operation_records"] == 0
|
||||
assert "incident_open_after_approval_resolved" in codes
|
||||
assert "approval_approved_without_execution_record" in codes
|
||||
assert "approval_no_action_without_execution" in codes
|
||||
assert "evidence_all_sensors_failed" in codes
|
||||
assert "timeline_missing_for_approval" in codes
|
||||
|
||||
|
||||
def test_reconciliation_marks_consistent_resolved_execution() -> None:
|
||||
reconciliation = _build_reconciliation(
|
||||
incident={"incident_id": "INC-2", "status": "RESOLVED"},
|
||||
approvals=[
|
||||
{
|
||||
"id": "approval-2",
|
||||
"status": "APPROVED",
|
||||
"action": "restart service",
|
||||
"resolved_at": "2026-05-13T01:00:00+00:00",
|
||||
}
|
||||
],
|
||||
evidence_rows=[{"sensors_attempted": 8, "sensors_succeeded": 7}],
|
||||
automation_ops=[{"status": "success"}],
|
||||
timeline_events=[{"event_type": "executor", "status": "success"}],
|
||||
)
|
||||
|
||||
assert reconciliation["consistency_status"] == "consistent"
|
||||
assert reconciliation["operator_next_state"] == "continue"
|
||||
assert reconciliation["mismatches"] == []
|
||||
|
||||
|
||||
def test_ansible_truth_surfaces_audited_check_mode_record() -> None:
|
||||
truth = build_ansible_truth(
|
||||
[
|
||||
{
|
||||
"op_id": "op-ansible-1",
|
||||
"operation_type": "ansible_check_mode_executed",
|
||||
"status": "dry_run",
|
||||
"actor": "platform_operator",
|
||||
"input_playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
|
||||
"input_check_mode": "true",
|
||||
"dry_run_result": {"changed": 1},
|
||||
"tags": ["ansible", "check_mode"],
|
||||
"created_at": "2026-05-12T22:00:00+08:00",
|
||||
}
|
||||
],
|
||||
incident={"incident_id": "INC-1", "alertname": "momo pg_backup failed on 188"},
|
||||
drift=None,
|
||||
)
|
||||
|
||||
assert truth["considered"] is True
|
||||
assert truth["not_used_reason"] is None
|
||||
assert truth["records"][0]["playbook_path"] == "infra/ansible/playbooks/188-ai-web.yml"
|
||||
assert truth["records"][0]["check_mode"] == "true"
|
||||
assert truth["records"][0]["dry_run_result"] == {"changed": 1}
|
||||
assert "ansible_check_mode_executed" in truth["audit_contract"]["operation_types"]
|
||||
assert truth["candidate_catalog"]["decision_effect"] == "none"
|
||||
assert truth["candidate_catalog"]["candidates"][0]["catalog_id"] == "ansible:188-ai-web"
|
||||
assert truth["candidate_catalog"]["candidates"][0]["auto_apply_enabled"] is False
|
||||
|
||||
|
||||
def test_ansible_truth_keeps_catalog_hint_separate_from_runtime_use() -> None:
|
||||
truth = build_ansible_truth(
|
||||
[],
|
||||
incident={"incident_id": "INC-2", "alertname": "nginx 502 upstream timeout"},
|
||||
drift=None,
|
||||
)
|
||||
|
||||
assert truth["considered"] is False
|
||||
assert truth["records"] == []
|
||||
assert truth["not_used_reason"].startswith("no automation_operation_log row")
|
||||
assert truth["candidate_catalog"]["candidates"][0]["catalog_id"] == "ansible:nginx-sync"
|
||||
assert truth["candidate_catalog"]["candidates"][0]["approval_required"] is True
|
||||
assert truth["candidate_catalog"]["decision_effect"] == "none"
|
||||
|
||||
|
||||
def test_ansible_decision_audit_payload_is_dry_run_only() -> None:
|
||||
incident = SimpleNamespace(
|
||||
incident_id="INC-DOCKER",
|
||||
project_id="awoooi",
|
||||
alert_category="infrastructure",
|
||||
notification_type="TYPE-3",
|
||||
severity=SimpleNamespace(value="P3"),
|
||||
affected_services=["bitan-pharmacy-bitan-1"],
|
||||
signals=[
|
||||
SimpleNamespace(
|
||||
alert_name="DockerContainerUnhealthy",
|
||||
labels={"alertname": "DockerContainerUnhealthy", "container": "bitan-pharmacy-bitan-1"},
|
||||
annotations={},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
payload = build_ansible_decision_audit_payload(
|
||||
incident=incident,
|
||||
proposal_data={"source": "expert_system", "risk_level": "low", "action": "NO_ACTION"},
|
||||
decision_path="manual_approval",
|
||||
not_used_reason="manual approval required; Ansible check-mode is not wired yet",
|
||||
)
|
||||
|
||||
assert payload is not None
|
||||
assert payload["operation_type"] == "ansible_candidate_matched"
|
||||
assert payload["status"] == "dry_run"
|
||||
assert payload["input"]["executor"] == "ansible"
|
||||
assert payload["input"]["check_mode"] is True
|
||||
assert payload["input"]["apply_enabled"] is False
|
||||
assert payload["input"]["approval_required"] is True
|
||||
assert payload["input"]["executor_candidates"]
|
||||
assert payload["output"]["decision_effect"] == "audit_only"
|
||||
assert payload["dry_run_result"]["check_mode_executed"] is False
|
||||
|
||||
347
docs/LOGBOOK.md
347
docs/LOGBOOK.md
@@ -1,98 +1,251 @@
|
||||
## 2026-05-12 | Source Control Draft Reconcile Plan 草案
|
||||
## 2026-05-13 | T4 Config Drift fingerprint repeat-state 已推版
|
||||
|
||||
**背景**:統帥批准繼續推進後,本輪先同步最新 `gitea/main`,納入另一個 AwoooP Session 的 `legacy mcp audit -> gateway timeline` 進度,避免雙 Session 分歧。同步後繼續沿用低摩擦原則,只針對 refs-blocked repo 產生草案,不執行同步。
|
||||
**背景**:Config Drift Telegram 卡片只顯示單次 `report_id` 與 HIGH/MEDIUM/INFO 計數,Operator 無法判斷是否同一漂移一直重複、已跑到哪個流程階段、是否需要人工。舊 truth-chain repeat 只用 namespace/status/counts 分組,會把「剛好同計數但 items 不同」誤認為同一漂移。
|
||||
|
||||
**本次交付**:
|
||||
- 新增 `scripts/security/source-control-reconcile-plan.py`,只讀既有 redacted snapshot,不呼叫遠端 Git,不 fetch、不 push、不改 remote。
|
||||
- 新增 `docs/schemas/source_control_reconcile_plan_v1.schema.json`。
|
||||
- 產出 `docs/security/source-control-reconcile-plan.snapshot.json` 與 `docs/security/SOURCE-CONTROL-RECONCILE-PLAN.md`。
|
||||
- Draft plan 涵蓋 3 個 refs-blocked mapped repos:`wooo/awoooi`、`wooo/clawbot-v5`、`wooo/wooo-aiops`。
|
||||
- 更新 `SECURITY-SUPPLY-CHAIN-CONTRACT-MANIFEST`,contract count 從 13 增至 14,新增 `source_control_reconcile_plan_v1`。
|
||||
- 更新 `SECURITY-SUPPLY-CHAIN-PROGRESS` 與 `AWOOOP-MIRROR-ONLY-CONSUMPTION-CHECKLIST`,讓 AwoooP 可 mirror draft plan 但不得執行 refs sync。
|
||||
**修正**:
|
||||
- 新增 `drift_repeat_state.py`:
|
||||
- 以 namespace + sorted drift items 建立 stable fingerprint。
|
||||
- fingerprint 只看 drift 的實際 identity,不看 report_id / 掃描時間。
|
||||
- repeat-state schema:`drift_repeat_state_v1`。
|
||||
- `awooop_truth_chain_service`:
|
||||
- drift report 查詢納入 `items`。
|
||||
- repeat-state 改用 stable fingerprint,比對 24h 內候選並回傳 12h repeat window。
|
||||
- 回傳 `fingerprint`、`matching_strategy=namespace_and_stable_items_v1`、`operator_stage`、matching reports。
|
||||
- `drift_narrator_service`:
|
||||
- Telegram drift card body 會追加:
|
||||
- `流程: drift_scanned → ai_analyzed → pending_human`
|
||||
- `重複: 12h 內第 N 次同指紋`
|
||||
- `指紋: dfp_xxxxx`
|
||||
- 這仍只揭露真相鏈狀態,不自動採納 / 回滾 / 忽略。
|
||||
|
||||
**邊界**:
|
||||
- Plan 狀態為 `draft_blocked`;authenticated / admin_export server-side inventory 尚未完成前,不可執行。
|
||||
- 未 push refs、未 force push、未刪 refs、未建立 GitHub repo、未改 visibility、未切 GitHub primary、未部署。
|
||||
- 人工批准未來也必須單一 repo 生效,不得批次套用到所有 repo。
|
||||
**驗證與推版**:
|
||||
- Local:
|
||||
- `py_compile`:pass。
|
||||
- `ruff --select F,E9`:pass。
|
||||
- `pytest tests/test_awooop_truth_chain_service.py tests/test_phase25_drift_detection.py tests/test_drift_interpreter_ollama_first.py tests/test_platform_router_order.py tests/test_awooop_operator_auth.py -q`:37 passed。
|
||||
- `git diff --check`:pass。
|
||||
- Gitea:
|
||||
- `5b348774 feat(awooop): expose drift repeat fingerprint` 已推 `gitea main`。
|
||||
- Code Review run `1938`:success。
|
||||
- CD run `1937`:success。
|
||||
- Deploy marker:`3d38039b chore(cd): deploy 5b34877 [skip ci]`。
|
||||
- Production:
|
||||
- API/Web/Worker image 均為 `5b34877429c16c42f0f894eb4d7f0484711fde9b`。
|
||||
- K3s rollout status:API/Web/Worker success。
|
||||
- `/api/v1/health`:healthy,mock_mode=false。
|
||||
- Truth-chain smoke `7f858956`:
|
||||
- `source_type=drift_report`
|
||||
- `current_stage=dedup_or_repeat_updated`
|
||||
- `stage_status=pending`
|
||||
- `needs_human=true`
|
||||
- `repeat_schema=drift_repeat_state_v1`
|
||||
- `fingerprint=dfp_02dc625b64784b24`
|
||||
- `matching_strategy=namespace_and_stable_items_v1`
|
||||
- `operator_stage=pending_human`
|
||||
- `repeat_12h=2`
|
||||
- `outbound_visible=2`
|
||||
- Production narrator render smoke:
|
||||
- `流程: drift_scanned → ai_analyzed → pending_human | 重複: 12h 內第 2 次同指紋 | 指紋: dfp_smoke1234`
|
||||
|
||||
**驗證**:
|
||||
- `source-control-reconcile-plan.py` 產生 3 plans。
|
||||
- JSON / schema / snapshot parse 通過。
|
||||
- `scripts/security/*.py` 可編譯。
|
||||
- `git diff --check` 通過。
|
||||
- PR diff added lines 未命中本輪敏感 token / credential pattern。
|
||||
**重要校正**:
|
||||
- 舊 count-based repeat 會把 `7f858956` 算成 12 次。
|
||||
- 新 stable fingerprint 顯示同一 items fingerprint 12h 內是 2 次;這代表之前的 12 次是「同計數重複候選」,不是已證明同一漂移。
|
||||
|
||||
## 2026-05-12 | Source Control Approval Board 低摩擦決策隊列
|
||||
**整體進度**:
|
||||
- Wave 0:MOMO PostgreSQL backup → AwoooP 失敗通知接線完成並已推版。
|
||||
- T0:Truth-chain read-only API 完成、部署、production smoke 完成。
|
||||
- T1:Channel Event hardening 完成、部署、production smoke 完成。
|
||||
- T2:legacy MCP audit bridge / backfill / truth-chain visibility 完成、部署、production smoke 完成;first-class Gateway enforced path 仍待後續 wave。
|
||||
- T3:Ansible audit contract + decision candidate dry-run audit 完成、部署、production smoke 完成。
|
||||
- T4:Config Drift stable fingerprint / repeat-state / Telegram stage visibility 完成、部署、production smoke 完成。
|
||||
- 仍未完成:T5 incident / approval / execution reconciliation、Ansible 真正 check-mode executor / diff / apply / rollback、first-class MCP Gateway enforcement。
|
||||
|
||||
**背景**:統帥批准繼續推進後,下一步原本是 Gitea authenticated read-only inventory;但目前 `GITEA_READONLY_TOKEN` 未提供。本輪因此不使用可 push 的既有 Gitea remote credential 代替 read-only token,避免把 inventory 與寫入權限憑證混在一起。
|
||||
## 2026-05-13 | T3 Ansible decision candidate audit 已推版
|
||||
|
||||
**本次交付**:
|
||||
- 新增 `scripts/security/source-control-approval-board.py`,只讀既有 redacted snapshot,不呼叫 Gitea/GitHub API,不需要 token。
|
||||
- 新增 `docs/schemas/source_control_approval_board_v1.schema.json`。
|
||||
- 產出 `docs/security/source-control-approval-board.snapshot.json` 與 `docs/security/SOURCE-CONTROL-APPROVAL-BOARD.md`。
|
||||
- Board 彙整 8 個 target,其中 7 個為 pending approval:`awoooi`、`clawbot-v5`、`wooo-aiops`、`wooo-infra-config`、`ewoooc`、`bitan-pharmacy`、`tsenyang-website`;`nexu-io/open-design` 維持 scope review。
|
||||
- 更新 `SECURITY-SUPPLY-CHAIN-CONTRACT-MANIFEST`,contract count 從 12 增至 13,新增 `source_control_approval_board_v1`。
|
||||
- 更新 `SECURITY-SUPPLY-CHAIN-PROGRESS` 與 `AWOOOP-MIRROR-ONLY-CONSUMPTION-CHECKLIST`,讓 AwoooP 可 mirror board 但不得執行 board item。
|
||||
**背景**:T3 第一段只讓 truth-chain 看得到 Ansible audit contract 與 repo playbook catalog;但 AI decision path 還不會留下「曾考慮 Ansible、但尚未進 check-mode/apply」的 first-class record。這會讓 Telegram / Operator Console 仍看不出 Ansible 是否真的被 AI 修復鏈評估過。
|
||||
|
||||
**修正**:
|
||||
- `awooop_ansible_audit_service.py` 新增 decision candidate audit payload / writer。
|
||||
- `decision_manager` 在 auto-execute / manual-approval 分支排程 best-effort `ansible_candidate_matched` audit write。
|
||||
- Audit row 明確是 dry-run / audit-only:
|
||||
- `status=dry_run`
|
||||
- `input.executor=ansible`
|
||||
- `input.check_mode=true`
|
||||
- `input.apply_enabled=false`
|
||||
- `input.approval_required=true`
|
||||
- `output.decision_effect=audit_only`
|
||||
- Docker/container 類 incident 也會命中 188 / 110 Ansible catalog hints;未來新 decision 可在 truth-chain 顯示「有候選、尚未執行 check-mode」。
|
||||
|
||||
**驗證與推版**:
|
||||
- Local:
|
||||
- `py_compile`:pass。
|
||||
- `ruff --select F,E9`:pass。
|
||||
- `pytest apps/api/tests/test_awooop_truth_chain_service.py apps/api/tests/test_platform_router_order.py apps/api/tests/test_awooop_operator_auth.py -q`:14 passed。
|
||||
- Tier 3 adjacent tests:133 passed, 1 existing RuntimeWarning。
|
||||
- `git diff --check`:pass。
|
||||
- Gitea:
|
||||
- `3799e0db feat(awooop): audit ansible decision candidates` 已推 `gitea main`。
|
||||
- Code Review run `1936`:success。
|
||||
- CD run `1935`:success。
|
||||
- Deploy marker:`90b9ddb7 chore(cd): deploy 3799e0d [skip ci]`。
|
||||
- Production:
|
||||
- API/Web/Worker image 均為 `192.168.0.110:5000/awoooi/*:3799e0db0d30f29fdc251197634d2fca4c2c67fd`。
|
||||
- K3s rollout status:API/Web/Worker success。
|
||||
- `/api/v1/health`:healthy,mock_mode=false。
|
||||
- Pure function smoke(API pod):DockerContainerUnhealthy 事件可產生 `ansible_candidate_matched` payload,`candidate_count=2`,`check_mode_executed=false`。
|
||||
- Truth-chain smoke `INC-20260512-B6C589`:
|
||||
- `source_type=incident`
|
||||
- `current_stage=manual_required`
|
||||
- `stage_status=blocked`
|
||||
- `needs_human=true`
|
||||
- `execution.ansible.audit_contract.schema_version=ansible_executor_audit_v1`
|
||||
- `ansible_candidates=2`
|
||||
- `mcp_gateway_total=8`
|
||||
- Truth-chain smoke `7f858956`:
|
||||
- `source_type=drift_report`
|
||||
- `current_stage=dedup_or_repeat_updated`
|
||||
- `stage_status=pending`
|
||||
- `needs_human=true`
|
||||
- `repeat_12h=12`
|
||||
- `outbound_visible=2`
|
||||
|
||||
**整體進度**:
|
||||
- Wave 0:MOMO PostgreSQL backup → AwoooP 失敗通知接線完成並已推版。
|
||||
- T0:Truth-chain read-only API 完成、部署、production smoke 完成。
|
||||
- T1:Channel Event hardening 完成、部署、production smoke 完成。
|
||||
- T2:legacy MCP audit bridge / backfill / truth-chain visibility 完成、部署、production smoke 完成;first-class Gateway enforced path 仍待後續 wave。
|
||||
- T3:Ansible audit contract + decision candidate dry-run audit 完成、部署、production smoke 完成。
|
||||
- 仍未完成:Ansible 真正 check-mode executor、diff artifact、apply / rollback audit、T4 drift fingerprint FSM、T5 incident / approval / execution reconciliation、first-class MCP Gateway enforcement。
|
||||
|
||||
## 2026-05-12 | Security Supply Chain PR #117 累積紀錄
|
||||
|
||||
**背景**:統帥批准 Kali `192.168.0.112`、開發主機 `192.168.0.111` / `192.168.0.168`、Code Review -> Codex、Gitea -> GitHub 長期遷移納入同一個資安工作項目;同時要求初期不要把資安等級一次拉太高,避免產品、架構與流程變得過度複雜。本支線以乾淨 worktree 建立 PR `#117`,並持續與另一個 AwoooP Session 的 `gitea/main` 同步。
|
||||
|
||||
**累積交付**:
|
||||
- 建立 docs-only / contracts-first Security Supply Chain scaffold,涵蓋 Kali、Code Review、Codex、Gitea、GitHub 與 AwoooP mirror-only handoff。
|
||||
- 產出 Gitea/GitHub refs diff、Gitea public-only inventory、local remote inventory、GitHub target probe、canonical lineage、110 refs probe、repo-by-repo approval package 與 contract manifest snapshot。
|
||||
- 建立 `SOURCE-CONTROL-APPROVAL-BOARD.md`,彙整 8 個 target,其中 7 個為 pending approval;authenticated inventory gate 仍為 `blocked`。
|
||||
- 建立 `SOURCE-CONTROL-RECONCILE-PLAN.md`,涵蓋 `awoooi`、`clawbot-v5`、`wooo-aiops` 三個 refs-blocked mapped repos;狀態仍為 `draft_blocked`。
|
||||
- Contract manifest 已收斂到 14 個主要 contract,可供 AwoooP mirror / read-only policy / approval candidate 消費,但不得作 execution router。
|
||||
|
||||
**邊界**:
|
||||
- 未使用 Gitea write-capable remote credential 做 authenticated inventory。
|
||||
- 未建立 GitHub repo、未改 visibility、未同步 refs、未切 GitHub primary、未部署。
|
||||
- authenticated inventory gate 仍為 `blocked`,等待 read-only token 或 redacted admin export。
|
||||
- AwoooP 可 mirror board / plan / policy,但不得執行 board item 或新增高風險 action button。
|
||||
|
||||
**驗證**:
|
||||
- `source-control-approval-board.py` 產生 8 items,pending approval 7。
|
||||
- JSON / schema / snapshot parse 通過。
|
||||
- `scripts/security/*.py` 可編譯。
|
||||
- `git diff --check` 通過。
|
||||
- PR diff added lines 未命中本輪敏感 token / credential pattern。
|
||||
|
||||
## 2026-05-12 | Security Supply Chain PR #117 與 AwoooP 主線同步
|
||||
## 2026-05-12 | T3 Ansible audit surface 第一段
|
||||
|
||||
**背景**:Security Supply Chain docs-only 分支完成首次推版後,另一個 AwoooP Session 已將 `feat(awooop): harden outbound truth chain mirror` 與 deploy marker 推入 `gitea/main`。為避免雙 Session 推進互相衝突,本輪先把最新 `gitea/main` 合入資安分支,再建立 review-only PR。
|
||||
**背景**:Telegram / truth-chain live audit 顯示 Ansible 目前仍只是 repo/主機部署工具,沒有出現在 AI 自動化修復鏈路的 first-class audit record;Operator 無法知道「是否被考慮、是否 dry-run、為何沒用」。
|
||||
|
||||
**本次同步**:
|
||||
- 資安分支 `codex/security-supply-chain-contracts-20260512` 已合入最新 `gitea/main`,merge commit 為 `dc540cba`。
|
||||
- 已建立 Gitea PR `#117`:`http://192.168.0.110:3001/wooo/awoooi/pulls/117`。
|
||||
- PR 維持 review-only / docs-first / contracts-first;未合併、未部署、未切 GitHub primary。
|
||||
- AwoooP 主線 runtime / migration / k8s 變更由 `gitea/main` 保留,本資安分支的 PR diff 仍只呈現資安文件、schema、snapshot 與 read-only tooling。
|
||||
**修正**:
|
||||
- 新增 migration `adr090d_ansible_operation_types.sql`,擴充 `automation_operation_log.operation_type`:
|
||||
- `ansible_candidate_matched`
|
||||
- `ansible_check_mode_executed`
|
||||
- `ansible_apply_executed`
|
||||
- `ansible_rollback_executed`
|
||||
- `ansible_execution_skipped`
|
||||
- 新增 rollback migration `adr090d_ansible_operation_types_down.sql`;`run-migration.yml` 會跳過 `_down.sql`。
|
||||
- 新增 `awooop_ansible_audit_service.py`:
|
||||
- 讀取 automation ops 中的 Ansible operation type/tag/backend。
|
||||
- 暴露 repo 既有 playbook catalog hint。
|
||||
- 明確標示 `decision_effect=none`,避免把候選 playbook 當成已執行。
|
||||
- truth-chain `execution.ansible` 現在會顯示:
|
||||
- `considered` 是否有真實 Ansible audit record。
|
||||
- `records`、`audit_contract`、`candidate_catalog`、`not_used_reason`。
|
||||
- `incident_timeline_service` 補 Ansible operation type → stage mapping。
|
||||
|
||||
**驗證**:
|
||||
- `gitea/main` 已是資安分支祖先。
|
||||
- `python3 -m py_compile scripts/security/*.py` 通過。
|
||||
- `git diff --check gitea/main...HEAD` 通過。
|
||||
- JSON / schema / snapshot parse 通過,`security_supply_chain_contract_manifest_v1` 12 個 contracts path check 通過。
|
||||
- PR diff added lines 未命中本輪敏感 token / credential pattern。
|
||||
- `py_compile`:Ansible audit service / truth-chain / incident timeline / truth-chain tests 通過。
|
||||
- `ruff --select F,E9`:All checks passed。
|
||||
- `pytest apps/api/tests/test_awooop_truth_chain_service.py apps/api/tests/test_platform_router_order.py apps/api/tests/test_awooop_operator_auth.py -q`:13 passed。
|
||||
- `ruby YAML.load_file(".gitea/workflows/run-migration.yml")`:ok。
|
||||
- `git diff --check`:ok。
|
||||
|
||||
**下一步**:
|
||||
- 等 PR review 後再決定是否合併;不得直接切 GitHub primary 或啟動 refs sync。
|
||||
- Gitea read-only inventory approval 未批准前,private/internal server-side repo list 保持 blocked。
|
||||
- 下一階段仍維持低摩擦 observe-first,不做 runtime blocking。
|
||||
**整體進度**:
|
||||
- Wave 0:MOMO PostgreSQL backup → AwoooP 失敗通知接線完成並已推版。
|
||||
- T0:Truth-chain read-only API 完成、部署、production smoke 完成。
|
||||
- T1:Channel Event hardening 完成、部署、production smoke 完成。
|
||||
- T2:legacy MCP audit bridge / backfill / truth-chain visibility 完成、部署、production smoke 完成;first-class Gateway enforced path 仍待後續 wave。
|
||||
- T3:Ansible first-class audit contract / truth-chain 可見性完成、已部署;尚未把 approval execution path 寫入 Ansible dry-run/check-mode。
|
||||
- 下一步:T3 第二段接 decision / approval execution 的 Ansible check-mode audit row,仍不直接 apply。
|
||||
|
||||
## 2026-05-12 | Security Supply Chain docs-only contract manifest
|
||||
**production push 追加**:
|
||||
- Gitea `run-migration` run `1933` 顯示 migration 本體已成功:
|
||||
- `adr090d_ansible_operation_types.sql` 以 owner fallback 套用成功。
|
||||
- 但 audit seed 仍失敗,這次不是 `:'commit_sha'`,而是 tools JSON literal 在 unquoted heredoc 下仍保留反斜線:
|
||||
- `'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb`
|
||||
- PostgreSQL 回 `invalid input syntax for type json`。
|
||||
- 已修 `.gitea/workflows/run-migration.yml`:tools JSON 改為 `'{"psql": 1, "gitea_ci": 1}'::jsonb`。
|
||||
- 已補 production `asset_discovery_run` repair audit row:
|
||||
- `triggered_by=codex:gitea-migration-audit-repair`
|
||||
- `summary.type=ci_migration_manual_repair`
|
||||
- `summary.commit_sha=ca80972dc73cb647f8fab3bf9439784c4b8eef7b`
|
||||
- Production DB constraint 驗證:`automation_operation_log_type_valid` 已包含全部 `ansible_*` operation types。
|
||||
- CD 部署:
|
||||
- `07000dae chore(cd): deploy ca80972 [skip ci]`
|
||||
- API/Web/Worker image 均為 `ca80972dc73cb647f8fab3bf9439784c4b8eef7b`
|
||||
- rollout success。
|
||||
- Truth-chain smoke(B6C589):
|
||||
- `truth_status=manual_required/blocked`
|
||||
- `mcp_gateway_total=8`
|
||||
- `execution.ansible.considered=false`
|
||||
- `execution.ansible.records=0`
|
||||
- `not_used_reason=no automation_operation_log row with Ansible operation type, tag, or executor backend for this source`
|
||||
- `audit_contract.schema_version=ansible_executor_audit_v1`
|
||||
- Caveat:下一個 migration push 仍需 live 驗證 `run-migration` audit seed 是否完全通過;本輪 workflow 修正後沒有新的 migration 觸發可重跑。
|
||||
|
||||
**背景**:統帥批准 Kali `192.168.0.112`、開發主機 `192.168.0.111` / `192.168.0.168`、Code Review -> Codex、Gitea -> GitHub 長期遷移納入同一個資安工作項目;同時要求初期不要把資安等級一次拉太高,避免產品、架構與流程變得過度複雜。
|
||||
**T3 第二段本地實作**:
|
||||
- `awooop_ansible_audit_service.py` 新增 decision audit payload/writer:
|
||||
- 只有 static catalog 有候選 playbook 時才寫 `automation_operation_log`。
|
||||
- operation_type=`ansible_candidate_matched`。
|
||||
- status=`dry_run`。
|
||||
- `input.executor=ansible`、`check_mode=true`、`apply_enabled=false`、`approval_required=true`。
|
||||
- `output.decision_effect=audit_only`。
|
||||
- `decision_manager` 在 auto-execute / manual-approval 分支都排程 best-effort audit write:
|
||||
- 不改 executor。
|
||||
- 不跑 Ansible。
|
||||
- 不阻塞決策和 Telegram。
|
||||
- Docker/container 類 incident 也會命中 Ansible catalog hint,讓 B6C589 這類事件後續新 decision 能留下 Ansible candidate audit row。
|
||||
- 本地驗證:
|
||||
- `py_compile`:pass。
|
||||
- `ruff --select F,E9`:pass。
|
||||
- `pytest test_awooop_truth_chain_service.py test_platform_router_order.py test_awooop_operator_auth.py -q`:14 passed。
|
||||
- `git diff --check`:pass。
|
||||
- 待推版與 production smoke。
|
||||
|
||||
**本次交付**:
|
||||
- 建立 Kali / Code Review / GitHub / Gitea / Codex / AwoooP 的 docs-only security supply chain scaffold。
|
||||
- 建立 `security_finding_v1`、`coding_task_v1`、`source_control_migration_event_v1`、`gitea_repo_inventory_v1`、`local_git_remote_inventory_v1`、`github_target_probe_v1`、`github_target_decision_v1`、`github_target_repo_approval_package_v1`、`local_repo_canonical_probe_v1`、`git_remote_refs_probe_v1`、`approval_required_event_v1`、`security_rollout_policy_v1`、`security_supply_chain_contract_manifest_v1` schema 草案。
|
||||
- 產出 Gitea/GitHub refs diff、Gitea public-only inventory、local remote inventory、GitHub target probe、canonical lineage、110 refs probe、repo-by-repo approval package 與 contract manifest snapshot。
|
||||
- 明確採低摩擦 `observe-first` / `mirror_only`:LOW / MEDIUM observation 先 observe / warn;只有 read-only token、repo creation、visibility change、refs sync、secret、deploy、primary switch 等高風險動作才進 approval。
|
||||
## 2026-05-12 | run-migration audit seed 再修正
|
||||
|
||||
**邊界**:
|
||||
- 本輪只做文件、schema、read-only scripts 與 redacted snapshots。
|
||||
- 未建立 repo、未修改 visibility、未同步 refs、未切 GitHub primary、未部署、未碰 runtime enforcement。
|
||||
- AwoooP 只可 mirror / read-only policy / approval candidate,不可把 manifest 當 execution router。
|
||||
**背景**:Gitea `run-migration` 在 `Seed asset_discovery_run (audit)` 再次失敗:
|
||||
|
||||
```text
|
||||
ERROR: syntax error at or near ":"
|
||||
LINE 16: 'commit_sha', :'commit_sha',
|
||||
```
|
||||
|
||||
**修正**:
|
||||
- `.gitea/workflows/run-migration.yml` 不再依賴 `psql` 的 `:'commit_sha'` / `:'files_json'` 變數展開。
|
||||
- 改由 `jq` 先產生完整 `summary` JSON,再以 shell-safe SQL literal 寫入 `asset_discovery_run.summary`。
|
||||
- 保留 owner connection fallback,只修 audit seed,不改 migration apply 流程。
|
||||
|
||||
**驗證**:
|
||||
- JSON / schema parse 通過。
|
||||
- `scripts/security/*.py` 可編譯。
|
||||
- `git diff --check` 通過。
|
||||
- 新增 / 修改內容未命中本輪敏感 token / credential pattern。
|
||||
- `ruby -e 'require "yaml"; YAML.load_file(".gitea/workflows/run-migration.yml")'`:yaml ok。
|
||||
- 抽出 `Seed asset_discovery_run (audit)` step 後 `bash -n`:通過。
|
||||
- mock `psql` 實跑該 step:rendered SQL 已無 `:'...'` psql 變數,並包含 `commit_sha` / `files` JSON。
|
||||
- `git diff --check`:通過。
|
||||
|
||||
**下一步**:
|
||||
- 等 Gitea read-only inventory approval 被批准後,補 private/internal server-side repo list。
|
||||
- 逐 repo 取得 owner / visibility / canonical 決策。
|
||||
- 對 refs blocked repos 產生 reconcile plan;GitHub primary 仍保持 blocked。
|
||||
**整體進度**:
|
||||
- Wave 0:MOMO PostgreSQL backup → AwoooP 失敗通知接線完成並已推版。
|
||||
- Truth-chain T0:read-only truth-chain API 完成、部署、production smoke 完成。
|
||||
- T1:Channel Event hardening 完成、部署、production smoke 完成。
|
||||
- T2:legacy MCP audit bridge / backfill / truth-chain visibility 完成、部署、production smoke 完成;first-class MCP Gateway enforced path 仍待後續 wave。
|
||||
- 本次:CI migration audit seed 紅燈修正完成,待推 Gitea main 觀察下一次 `run-migration`。
|
||||
- 下一步:回到 T3 Ansible declarative executor 盤點與 first-class audit surface。
|
||||
|
||||
## 2026-05-12 | Truth-chain T0 read-only API 第一版
|
||||
|
||||
@@ -6601,3 +6754,67 @@ gateway_audit_total=0 last_15m=0 bridge_total=0
|
||||
|
||||
- 因此目前只能宣稱「T2 bridge 寫入能力已部署並經 rollback smoke 驗證」。
|
||||
- 尚不能宣稱「所有 MCP / 自建 MCP 都已完全經 AwoooP Gateway 強制治理」;下一段要讓下一個真實 incident / MCP 呼叫自然產生 durable bridge row,或把高頻 caller 改成 first-class `McpGateway`。
|
||||
|
||||
**T2 backfill / truth-chain visibility 追加**:
|
||||
|
||||
- 新增 `scripts/ops/awooop-mcp-gateway-bridge-backfill-24h.sql`:
|
||||
- 將最近 24h 真實 `mcp_audit_log` 鏡像到 `awooop_mcp_gateway_audit`。
|
||||
- 以 `gate_result.legacy_audit_id` 做 idempotency key。
|
||||
- bridge row 保留 `policy_enforced=false` 與 `not_used_reason`,避免誤判為五閘門已 enforcement。
|
||||
- production 已執行 backfill:
|
||||
|
||||
```text
|
||||
inserted_bridge_rows=1160
|
||||
gateway_total=1310 bridge_total=1310 last_24h=1276
|
||||
B6C589_gateway_rows=8 failed=8 success=0
|
||||
```
|
||||
|
||||
- truth-chain API 追加 `gate_result` 欄位,並把 JSONB text 解析回物件,讓 UI 能顯示 bridge reason。
|
||||
|
||||
```text
|
||||
py_compile:
|
||||
apps/api/src/services/awooop_truth_chain_service.py
|
||||
apps/api/tests/test_awooop_truth_chain_service.py
|
||||
# OK
|
||||
|
||||
ruff F,E9:
|
||||
# All checks passed
|
||||
|
||||
pytest:
|
||||
apps/api/tests/test_awooop_truth_chain_service.py
|
||||
apps/api/tests/test_platform_router_order.py
|
||||
apps/api/tests/test_awooop_operator_auth.py
|
||||
# 11 passed
|
||||
```
|
||||
|
||||
**效果**:
|
||||
|
||||
- `INC-20260512-B6C589` truth-chain 現在不再是 `awooop_mcp_gateway_audit_empty`。
|
||||
- 仍顯示 `manual_required/blocked`,因為 8 個 SSH MCP 都失敗,approval/incident 狀態仍矛盾;這是 T5 要處理,不能用 T2 粉飾成自動修復完成。
|
||||
|
||||
**production deploy / endpoint smoke 追加(完成)**:
|
||||
|
||||
```text
|
||||
Gitea:
|
||||
1928 CD Pipeline b4d367ee -> success
|
||||
1929 Code Review b4d367ee -> success
|
||||
|
||||
K8s image:
|
||||
awoooi-api 192.168.0.110:5000/awoooi/api:b4d367eeb463eccda5aec8aa9c90f19897dbd634
|
||||
awoooi-worker 192.168.0.110:5000/awoooi/api:b4d367eeb463eccda5aec8aa9c90f19897dbd634
|
||||
awoooi-web 192.168.0.110:5000/awoooi/web:b4d367eeb463eccda5aec8aa9c90f19897dbd634
|
||||
|
||||
health:
|
||||
http://192.168.0.125:32334/api/v1/health -> 200 healthy
|
||||
|
||||
Truth-chain:
|
||||
GET /api/v1/platform/truth-chain/INC-20260512-B6C589?project_id=awoooi -> 200
|
||||
stage=manual_required status=blocked needs_human=True
|
||||
blockers=all_evidence_sensors_failed,
|
||||
approval_resolved_no_action_without_execution,
|
||||
incident_still_investigating_after_approval
|
||||
gateway_total=8 legacy_total=8
|
||||
first_gateway_tool=legacy:ssh_host:ssh_get_nginx_error_log result=failed
|
||||
gate_schema=legacy_mcp_bridge_v1 policy_enforced=False
|
||||
not_used_reason=legacy direct provider path; bridge audit only
|
||||
```
|
||||
|
||||
@@ -1883,12 +1883,83 @@ Phase 6 完成後
|
||||
- T2 bridge image `94d006ea` 已部署,CD run `1921` success,health 200。
|
||||
- rollback smoke 證明 `record_mcp_call()` 在同一 transaction 內會同時寫 legacy `mcp_audit_log` 與 `awooop_mcp_gateway_audit` bridge row,且 bridge row 標示 `policy_enforced=false` / `not_used_reason=legacy direct provider path; bridge audit only`;rollback 後兩邊皆未污染 production。
|
||||
- 部署後短觀察窗內沒有自然新 legacy MCP call(`legacy_mcp_15m=0`),所以 live `awooop_mcp_gateway_audit` total 仍是 0。T2 bridge capability 已上線,但 T2 全退出條件仍需下一個真實 MCP 呼叫產生 durable row,或把高頻 caller 改成 first-class Gateway path。
|
||||
- 已執行最近 24h 真實 legacy MCP backfill:`inserted_bridge_rows=1160`,目前 `awooop_mcp_gateway_audit gateway_total=1310 / bridge_total=1310 / last_24h=1276`。`INC-20260512-B6C589` 現在 gateway side 可見 8 筆 MCP,8 failed / 0 success;truth-chain blocker 移除 `awooop_mcp_gateway_audit_empty`,但仍是 `manual_required/blocked`,因為 evidence sensors 全失敗、NO_ACTION approval 無 execution、incident 仍 investigating。
|
||||
- truth-chain API 追加回傳 `gate_result`,讓 Operator Console 可直接顯示 `policy_enforced=false` 與 `not_used_reason`,避免把 bridge row 誤認為 first-class Gateway enforcement。
|
||||
- `b4d367ee` 已部署,CD run `1928` success。B6C589 endpoint smoke:`gateway_total=8 / legacy_total=8`,第一筆 gateway row 顯示 `gate_schema=legacy_mcp_bridge_v1`、`policy_enforced=False`、`not_used_reason=legacy direct provider path; bridge audit only`;truth status 仍是 `manual_required/blocked`。
|
||||
|
||||
**仍未宣稱完成**:
|
||||
- 這只是 legacy bridge,不是把所有呼叫強制改經 AwoooP Gateway;T2 後續仍要把新 MCP caller 收斂到 first-class Gateway path。
|
||||
|
||||
---
|
||||
|
||||
### 2026-05-12 晚 (台北) — T3 Ansible declarative executor audit surface 第一段
|
||||
|
||||
**範圍**:
|
||||
- `automation_operation_log.operation_type` CHECK 追加 Ansible executor audit states:
|
||||
`ansible_candidate_matched` / `ansible_check_mode_executed` /
|
||||
`ansible_apply_executed` / `ansible_rollback_executed` /
|
||||
`ansible_execution_skipped`。
|
||||
- 新增 `awooop_ansible_audit_service.py`,把 repo 既有 Ansible playbook catalog 以
|
||||
read-only 方式暴露給 truth-chain。
|
||||
- truth-chain `execution.ansible` 改為顯示:
|
||||
- 是否真的有 `automation_operation_log` Ansible audit record。
|
||||
- audit contract / required fields。
|
||||
- static catalog keyword hints,且 `decision_effect=none`,避免把候選 playbook 誤判成已自動修復。
|
||||
- `incident_timeline_service` 加入 Ansible operation type stage mapping。
|
||||
|
||||
**已驗證**:
|
||||
- 本地 `py_compile` / `ruff F,E9` / `git diff --check` 通過。
|
||||
- `test_awooop_truth_chain_service.py`、router order、operator auth 共 13 passed。
|
||||
- `run-migration.yml` YAML parse 通過;新增 `_down.sql` 會被既有 workflow skip 規則排除。
|
||||
|
||||
**仍未宣稱完成**:
|
||||
- 這不是 Ansible 自動修復執行器接線;目前只建立 first-class audit contract 與 truth-chain 可見性。
|
||||
- 下一段需把 decision / approval execution path 在「只 dry-run/check-mode」下寫入上述 operation types,再談 apply。
|
||||
|
||||
**production 追加**:
|
||||
- Gitea `run-migration` run `1933`:`adr090d_ansible_operation_types.sql` 已成功套用,含 owner fallback。
|
||||
- 同 run 的 `Seed asset_discovery_run (audit)` 仍失敗;新根因是 unquoted heredoc 下 tools JSON literal 還寫成 `'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb`,PostgreSQL 視為非法 JSON。
|
||||
- 後續修正:workflow tools JSON literal 改成 `'{"psql": 1, "gitea_ci": 1}'::jsonb`。
|
||||
- 已補 production `asset_discovery_run` repair audit row(`ci_migration_manual_repair` / `commit_sha=ca80972dc73cb647f8fab3bf9439784c4b8eef7b`)。
|
||||
- Production DB constraint 已確認包含全部 `ansible_*` operation types。
|
||||
- CD 已部署 `ca80972d` image,deploy marker `07000dae`;API/Web/Worker rollout success。
|
||||
- B6C589 truth-chain smoke:`manual_required/blocked`、`mcp_gateway_total=8`、`execution.ansible.considered=false`、`records=0`、not_used_reason 清楚顯示沒有 Ansible audit record。
|
||||
- 下一個 migration push 仍需驗證 `run-migration` audit seed live gate,因本輪 workflow 修正後未再新增 migration 觸發重跑。
|
||||
|
||||
**T3 第二段本地追加**:
|
||||
- `decision_manager` 在 auto-execute / manual-approval 分支新增 best-effort Ansible candidate audit write。
|
||||
- 僅在 catalog 有候選 playbook 時寫 `automation_operation_log`:
|
||||
`operation_type=ansible_candidate_matched`、`status=dry_run`、
|
||||
`input.check_mode=true`、`input.apply_enabled=false`、
|
||||
`output.decision_effect=audit_only`。
|
||||
- 這仍不是 Ansible 執行器;它只讓 truth-chain 能看到 AI decision path 曾考慮 Ansible candidate,以及為何未進入 check-mode/apply。
|
||||
- 本地 `py_compile` / `ruff F,E9` / 14 個 truth-chain/operator/router tests 通過;待推版和 production smoke。
|
||||
|
||||
**T3 第二段 production verified(2026-05-13 台北)**:
|
||||
- `3799e0db feat(awooop): audit ansible decision candidates` 已推 Gitea main,Code Review run `1936` success,CD run `1935` success。
|
||||
- Deploy marker:`90b9ddb7 chore(cd): deploy 3799e0d [skip ci]`。
|
||||
- Production API/Web/Worker image 均為 `3799e0db0d30f29fdc251197634d2fca4c2c67fd`,K3s rollout success,health 200 / `mock_mode=false`。
|
||||
- API pod pure smoke:DockerContainerUnhealthy 事件可產生 `ansible_candidate_matched` audit payload,`candidate_count=2`,`check_mode_executed=false`。
|
||||
- Truth-chain smoke:
|
||||
- `INC-20260512-B6C589` → `manual_required/blocked`,`mcp_gateway_total=8`,`execution.ansible.audit_contract=ansible_executor_audit_v1`,`ansible_candidates=2`。
|
||||
- `7f858956` → `dedup_or_repeat_updated/pending`,`repeat_12h=12`,`outbound_visible=2`。
|
||||
- 邊界:仍未執行 Ansible check-mode / apply / rollback;T3 目前完成的是 first-class candidate audit,而不是修復執行器。
|
||||
|
||||
**T4 Config Drift fingerprint repeat-state production verified(2026-05-13 台北)**:
|
||||
- `5b348774 feat(awooop): expose drift repeat fingerprint` 已推 Gitea main,Code Review run `1938` success,CD run `1937` success。
|
||||
- Deploy marker:`3d38039b chore(cd): deploy 5b34877 [skip ci]`。
|
||||
- 新增 `drift_repeat_state_v1`:以 namespace + sorted drift items 建 stable fingerprint,不再只靠 HIGH/MEDIUM/INFO counts。
|
||||
- Truth-chain drift repeat-state 現在回傳 `fingerprint`、`matching_strategy=namespace_and_stable_items_v1`、`operator_stage`、matching reports。
|
||||
- Telegram drift narrator 會在 card body 補:
|
||||
- `流程: drift_scanned → ai_analyzed → pending_human`
|
||||
- `重複: 12h 內第 N 次同指紋`
|
||||
- `指紋: dfp_xxxxx`
|
||||
- Production `7f858956` smoke:`repeat_schema=drift_repeat_state_v1`、`fingerprint=dfp_02dc625b64784b24`、`operator_stage=pending_human`、`repeat_12h=2`、`outbound_visible=2`。
|
||||
- 重要校正:舊 count-based repeat 看到 12 次,新 stable item fingerprint 證實同一漂移 fingerprint 只有 2 次;12 次只能稱為同計數候選,不能稱為同一漂移。
|
||||
- 邊界:T4 只補可觀測與重複判定,不做 auto-adopt / rollback / ignore。
|
||||
|
||||
---
|
||||
|
||||
### 2026-04-20 晚 (台北) — C1-C4 全流程串接 — Playbook 鏈路保護(commit de2d34d)
|
||||
|
||||
**觸發**:統帥全景盤查 AI 自動化節點後,發現 Playbook 自動修復鏈路有 3 個結構性斷點。
|
||||
|
||||
@@ -40,7 +40,7 @@ resources:
|
||||
images:
|
||||
- name: 192.168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER
|
||||
newName: 192.168.0.110:5000/awoooi/api
|
||||
newTag: 94d006eac88fd65f6efca817eb392a103ec10d3f
|
||||
newTag: 5b34877429c16c42f0f894eb4d7f0484711fde9b
|
||||
- name: 192.168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER
|
||||
newName: 192.168.0.110:5000/awoooi/web
|
||||
newTag: 94d006eac88fd65f6efca817eb392a103ec10d3f
|
||||
newTag: 5b34877429c16c42f0f894eb4d7f0484711fde9b
|
||||
|
||||
69
scripts/ops/awooop-mcp-gateway-bridge-backfill-24h.sql
Normal file
69
scripts/ops/awooop-mcp-gateway-bridge-backfill-24h.sql
Normal file
@@ -0,0 +1,69 @@
|
||||
-- AwoooP T2 MCP Gateway bridge backfill (24h)
|
||||
-- 2026-05-12 Codex + ogt
|
||||
--
|
||||
-- Purpose:
|
||||
-- Mirror real legacy mcp_audit_log rows into awooop_mcp_gateway_audit so
|
||||
-- truth-chain can show MCP usage for recent incidents while first-class
|
||||
-- Gateway migration continues. These rows are explicitly marked as bridge
|
||||
-- records and policy_enforced=false; they are not proof of five-gate
|
||||
-- Gateway enforcement.
|
||||
--
|
||||
-- Idempotency:
|
||||
-- gate_result.legacy_audit_id stores the mcp_audit_log.id source key.
|
||||
-- Re-running this SQL will only insert missing rows.
|
||||
|
||||
WITH inserted AS (
|
||||
INSERT INTO awooop_mcp_gateway_audit (
|
||||
project_id,
|
||||
run_id,
|
||||
trace_id,
|
||||
agent_id,
|
||||
tool_name,
|
||||
input_hash,
|
||||
output_hash,
|
||||
gate_result,
|
||||
result_status,
|
||||
block_gate,
|
||||
block_reason,
|
||||
latency_ms,
|
||||
created_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi' AS project_id,
|
||||
NULL::uuid AS run_id,
|
||||
LEFT(COALESCE(src.incident_id, src.session_id), 128) AS trace_id,
|
||||
LEFT(COALESCE(src.agent_role, 'legacy-mcp-provider'), 128) AS agent_id,
|
||||
LEFT('legacy:' || src.mcp_server || ':' || src.tool_name, 128) AS tool_name,
|
||||
encode(digest(COALESCE(src.input_params::text, 'null'), 'sha256'), 'hex') AS input_hash,
|
||||
CASE
|
||||
WHEN src.output_result IS NULL THEN NULL
|
||||
ELSE encode(digest(src.output_result::text, 'sha256'), 'hex')
|
||||
END AS output_hash,
|
||||
jsonb_build_object(
|
||||
'schema_version', 'legacy_mcp_bridge_v1',
|
||||
'gateway_path', 'legacy_backfill',
|
||||
'policy_enforced', false,
|
||||
'not_used_reason', 'legacy direct provider path; bridge audit only',
|
||||
'legacy_audit_id', src.id::text,
|
||||
'legacy_mcp_server', src.mcp_server,
|
||||
'legacy_tool_name', src.tool_name,
|
||||
'flywheel_node', src.flywheel_node
|
||||
) AS gate_result,
|
||||
CASE WHEN src.success IS TRUE THEN 'success' ELSE 'failed' END AS result_status,
|
||||
NULL::smallint AS block_gate,
|
||||
CASE WHEN src.success IS TRUE THEN NULL ELSE LEFT(src.error_message, 256) END AS block_reason,
|
||||
src.duration_ms AS latency_ms,
|
||||
src.created_at
|
||||
FROM mcp_audit_log src
|
||||
WHERE src.created_at > NOW() - INTERVAL '24 hours'
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM awooop_mcp_gateway_audit dst
|
||||
WHERE dst.project_id = 'awoooi'
|
||||
AND dst.gate_result->>'schema_version' = 'legacy_mcp_bridge_v1'
|
||||
AND dst.gate_result->>'legacy_audit_id' = src.id::text
|
||||
)
|
||||
RETURNING call_id
|
||||
)
|
||||
SELECT COUNT(*) AS inserted_bridge_rows
|
||||
FROM inserted;
|
||||
Reference in New Issue
Block a user