chore(security): sync with gitea main before ref detail diff [skip ci]

# Conflicts:
#	docs/LOGBOOK.md
This commit is contained in:
Your Name
2026-05-13 09:09:09 +08:00
15 changed files with 1618 additions and 129 deletions

View File

@@ -133,13 +133,15 @@ jobs:
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
FILES_JSON=$(echo "${{ steps.diff.outputs.new_files }}" | jq -Rn '[inputs | select(length > 0)]')
SUMMARY_JSON=$(jq -cn \
--arg commit_sha "${{ github.sha }}" \
--argjson files "$FILES_JSON" \
'{type: "ci_migration", commit_sha: $commit_sha, files: $files}')
SUMMARY_JSON_SQL=${SUMMARY_JSON//\'/\'\'}
seed_audit() {
local url="$1"
psql "$url" \
-v ON_ERROR_STOP=1 \
-v commit_sha="${{ github.sha }}" \
-v files_json="$FILES_JSON" <<'SQL'
psql "$url" -v ON_ERROR_STOP=1 <<SQL
INSERT INTO asset_discovery_run (
run_id, triggered_by, scope, scan_depth, status,
started_at, ended_at, tools_used, summary
@@ -151,12 +153,8 @@ jobs:
'success',
NOW(),
NOW(),
'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb,
jsonb_build_object(
'type', 'ci_migration',
'commit_sha', :'commit_sha',
'files', :'files_json'::jsonb
)
'{"psql": 1, "gitea_ci": 1}'::jsonb,
'${SUMMARY_JSON_SQL}'::jsonb
);
SQL
}

View File

@@ -0,0 +1,36 @@
-- ADR-090-D: automation_operation_log.operation_type adds Ansible executor audit states
-- Created: 2026-05-12 Taipei
--
-- Purpose:
-- T3 Ansible declarative executor visibility. These operation types allow
-- the AI automation truth chain to record that Ansible was matched,
-- check-mode executed, applied, rolled back, or explicitly skipped.
--
-- Safety:
-- This migration only expands the CHECK allowlist. It does not execute
-- Ansible, change approval behavior, or create auto-remediation rows.
ALTER TABLE automation_operation_log
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
ALTER TABLE automation_operation_log
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
'monitor_configured','monitor_removed',
'alert_fired','alert_suppressed','alert_routed',
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
'playbook_generated','playbook_updated','playbook_executed',
'remediation_executed','remediation_verified','remediation_rolled_back',
'self_correction_attempted',
'km_created','km_updated','km_linked',
'asset_discovered','coverage_recalculated',
'capacity_recommendation','quota_enforced',
'notification_formatted',
'ansible_candidate_matched',
'ansible_check_mode_executed',
'ansible_apply_executed',
'ansible_rollback_executed',
'ansible_execution_skipped'
));
COMMENT ON CONSTRAINT automation_operation_log_type_valid ON automation_operation_log IS
'ADR-090-D: allow first-class Ansible executor audit states for AwoooP truth-chain visibility.';

View File

@@ -0,0 +1,19 @@
-- ADR-090-D rollback: remove Ansible executor audit states from operation_type allowlist.
-- Only apply after confirming no automation_operation_log rows use ansible_* operation types.
ALTER TABLE automation_operation_log
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
ALTER TABLE automation_operation_log
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
'monitor_configured','monitor_removed',
'alert_fired','alert_suppressed','alert_routed',
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
'playbook_generated','playbook_updated','playbook_executed',
'remediation_executed','remediation_verified','remediation_rolled_back',
'self_correction_attempted',
'km_created','km_updated','km_linked',
'asset_discovered','coverage_recalculated',
'capacity_recommendation','quota_enforced',
'notification_formatted'
));

View File

@@ -167,6 +167,31 @@ class DriftReportRepository:
{"report_id": report_id, "narrative": narrative},
)
async def get_repeat_state(self, report: DriftReport) -> dict:
"""Return stable fingerprint repeat state for a drift report."""
from src.services.drift_repeat_state import build_drift_repeat_state
async with get_db_context() as db:
result = await db.execute(
text("""
SELECT
report_id,
namespace,
status,
scanned_at,
created_at,
items
FROM drift_reports
WHERE namespace = :namespace
AND created_at > now() - interval '24 hours'
ORDER BY scanned_at DESC
LIMIT 200
"""),
{"namespace": report.namespace},
)
rows = [dict(row) for row in result.mappings().all()]
return build_drift_repeat_state(report, rows)
_drift_repo: DriftReportRepository | None = None

View File

@@ -0,0 +1,433 @@
"""AwoooP Ansible audit helpers.
This module is intentionally non-executing. It exposes the Ansible audit
contract and repo-known playbook catalog so the truth chain can say whether
Ansible was actually considered or executed, without pretending that catalog
hints are runtime remediation.
"""
from __future__ import annotations
import json
from typing import Any
import structlog
from sqlalchemy import text
from src.db.base import get_db_context
logger = structlog.get_logger(__name__)
ANSIBLE_OPERATION_TYPES = frozenset({
"ansible_candidate_matched",
"ansible_check_mode_executed",
"ansible_apply_executed",
"ansible_rollback_executed",
"ansible_execution_skipped",
})
_CATALOG: tuple[dict[str, Any], ...] = (
{
"catalog_id": "ansible:110-devops",
"playbook_path": "infra/ansible/playbooks/110-devops.yml",
"inventory_hosts": ["host_110"],
"domains": ["swap", "harbor", "sentry", "gitea", "langfuse", "bitan", "runner", "keepalived", "nginx"],
"keywords": [
"110",
"docker",
"container",
"dockercontainerunhealthy",
"swap",
"harbor",
"sentry",
"gitea",
"langfuse",
"bitan",
"runner",
"github-runner",
"keepalived",
],
"supports_check_mode": True,
"auto_apply_enabled": False,
"approval_required": True,
"risk_level": "medium",
},
{
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"inventory_hosts": ["host_188"],
"domains": ["docker", "momo_backup", "signoz", "minio", "litellm", "n8n", "open_webui", "nginx"],
"keywords": [
"188",
"docker",
"container",
"dockercontainerunhealthy",
"momo",
"backup",
"postgresql",
"pg_backup",
"signoz",
"minio",
"litellm",
"n8n",
"open-webui",
"openwebui",
"docker-registry",
],
"supports_check_mode": True,
"auto_apply_enabled": False,
"approval_required": True,
"risk_level": "medium",
},
{
"catalog_id": "ansible:nginx-sync",
"playbook_path": "infra/ansible/playbooks/nginx-sync.yml",
"inventory_hosts": ["host_110", "host_188"],
"domains": ["nginx", "proxy", "ollama_proxy", "tls"],
"keywords": ["nginx", "proxy", "ollama", "gcp", "tls", "cert", "502", "upstream"],
"supports_check_mode": True,
"auto_apply_enabled": False,
"approval_required": True,
"risk_level": "medium",
},
{
"catalog_id": "ansible:restore-password-auth",
"playbook_path": "infra/ansible/playbooks/restore-password-auth.yml",
"inventory_hosts": ["host_110", "host_120", "host_121", "host_188"],
"domains": ["ssh", "password_auth"],
"keywords": ["ssh", "passwordauthentication", "password auth", "login", "auth"],
"supports_check_mode": False,
"auto_apply_enabled": False,
"approval_required": True,
"risk_level": "high",
},
)
def _get(row: dict[str, Any], key: str) -> Any:
return row.get(key)
def _tags(row: dict[str, Any]) -> list[str]:
raw = _get(row, "tags")
if isinstance(raw, list):
return [str(item).lower() for item in raw]
if isinstance(raw, str):
return [part.strip().lower() for part in raw.split(",") if part.strip()]
return []
def _first_present(row: dict[str, Any], keys: tuple[str, ...]) -> Any:
for key in keys:
value = _get(row, key)
if value not in (None, ""):
return value
return None
def _is_ansible_operation(row: dict[str, Any]) -> bool:
operation_type = str(_get(row, "operation_type") or "").lower()
if operation_type in ANSIBLE_OPERATION_TYPES:
return True
if "ansible" in _tags(row):
return True
executor = str(
_first_present(
row,
(
"input_executor",
"input_execution_backend",
"output_executor",
"output_execution_backend",
),
)
or ""
).lower()
if executor == "ansible":
return True
playbook_path = str(
_first_present(row, ("input_playbook_path", "output_playbook_path", "input_ansible_playbook_path", "output_ansible_playbook_path"))
or ""
).lower()
return "infra/ansible/" in playbook_path or playbook_path.endswith(".yml") and "ansible" in playbook_path
def _ansible_record(row: dict[str, Any]) -> dict[str, Any]:
return {
"op_id": _get(row, "op_id"),
"operation_type": _get(row, "operation_type"),
"status": _get(row, "status"),
"actor": _get(row, "actor"),
"playbook_id": _first_present(row, ("input_playbook_id", "output_playbook_id")),
"playbook_path": _first_present(
row,
("input_playbook_path", "output_playbook_path", "input_ansible_playbook_path", "output_ansible_playbook_path"),
),
"check_mode": _first_present(row, ("input_check_mode", "output_check_mode")),
"not_used_reason": _first_present(row, ("input_not_used_reason", "output_not_used_reason")),
"dry_run_result": _get(row, "dry_run_result"),
"error": _get(row, "error"),
"duration_ms": _get(row, "duration_ms"),
"tags": _get(row, "tags"),
"created_at": _get(row, "created_at"),
}
def _flatten_text(value: Any, pieces: list[str], remaining: int = 80) -> int:
if remaining <= 0 or value is None:
return remaining
if isinstance(value, dict):
for key, item in value.items():
remaining = _flatten_text(key, pieces, remaining)
remaining = _flatten_text(item, pieces, remaining)
if remaining <= 0:
break
return remaining
if isinstance(value, list):
for item in value:
remaining = _flatten_text(item, pieces, remaining)
if remaining <= 0:
break
return remaining
pieces.append(str(value).lower())
return remaining - 1
def _source_haystack(incident: dict[str, Any] | None, drift: dict[str, Any] | None) -> str:
pieces: list[str] = []
_flatten_text(incident, pieces)
_flatten_text(drift, pieces)
return " ".join(pieces)
def _catalog_hints(incident: dict[str, Any] | None, drift: dict[str, Any] | None) -> dict[str, Any]:
haystack = _source_haystack(incident, drift)
candidates: list[dict[str, Any]] = []
unmatched: list[str] = []
for item in _CATALOG:
matched = [keyword for keyword in item["keywords"] if keyword in haystack]
public_item = {
key: value
for key, value in item.items()
if key
in {
"catalog_id",
"playbook_path",
"inventory_hosts",
"domains",
"supports_check_mode",
"auto_apply_enabled",
"approval_required",
"risk_level",
}
}
if matched:
candidates.append({
**public_item,
"match_score": len(matched),
"matched_keywords": matched,
})
else:
unmatched.append(item["catalog_id"])
candidates.sort(key=lambda row: (-int(row["match_score"]), str(row["catalog_id"])))
return {
"match_mode": "static_catalog_keyword_hint_v1",
"decision_effect": "none",
"available_count": len(_CATALOG),
"candidates": candidates,
"unmatched_catalog_ids": unmatched,
}
def build_ansible_truth(
automation_ops: list[dict[str, Any]],
*,
incident: dict[str, Any] | None,
drift: dict[str, Any] | None,
) -> dict[str, Any]:
"""Build the truth-chain Ansible section from audited facts and catalog hints."""
records = [_ansible_record(row) for row in automation_ops if _is_ansible_operation(row)]
return {
"considered": bool(records),
"records": records,
"audit_contract": {
"schema_version": "ansible_executor_audit_v1",
"operation_types": sorted(ANSIBLE_OPERATION_TYPES),
"required_audit_fields": [
"operation_type",
"status",
"actor",
"input.executor",
"input.playbook_path",
"input.check_mode",
"output.not_used_reason",
"dry_run_result",
],
"default_execution_mode": "catalog/dry-run audit only until approval execution is explicitly wired",
},
"candidate_catalog": _catalog_hints(incident, drift),
"not_used_reason": (
None
if records
else "no automation_operation_log row with Ansible operation type, tag, or executor backend for this source"
),
}
def _incident_public_dict(incident: Any) -> dict[str, Any]:
if incident is None:
return {}
if isinstance(incident, dict):
return incident
severity = getattr(incident, "severity", None)
signals_payload: list[dict[str, Any]] = []
for signal in getattr(incident, "signals", None) or []:
signals_payload.append({
"alert_name": getattr(signal, "alert_name", None),
"labels": getattr(signal, "labels", None) or {},
"annotations": getattr(signal, "annotations", None) or {},
})
return {
"incident_id": getattr(incident, "incident_id", None),
"project_id": getattr(incident, "project_id", None),
"alertname": getattr(incident, "alertname", None),
"alert_category": getattr(incident, "alert_category", None),
"notification_type": getattr(incident, "notification_type", None),
"severity": getattr(severity, "value", severity),
"affected_services": getattr(incident, "affected_services", None) or [],
"signals": signals_payload,
}
def build_ansible_decision_audit_payload(
*,
incident: Any,
proposal_data: dict[str, Any],
decision_path: str,
not_used_reason: str,
) -> dict[str, Any] | None:
"""Return an AOL payload when Ansible has catalog candidates for a decision."""
incident_payload = _incident_public_dict(incident)
hints = _catalog_hints(incident_payload, None)
candidates = hints.get("candidates") or []
if not candidates:
return None
incident_id = str(incident_payload.get("incident_id") or "")
input_payload = {
"incident_id": incident_id,
"executor": "ansible",
"execution_backend": "ansible",
"decision_path": decision_path,
"check_mode": True,
"apply_enabled": False,
"approval_required": True,
"candidate_catalog_schema": hints["match_mode"],
"executor_candidates": [
{
"catalog_id": row["catalog_id"],
"playbook_path": row["playbook_path"],
"inventory_hosts": row["inventory_hosts"],
"risk_level": row["risk_level"],
"match_score": row["match_score"],
"matched_keywords": row["matched_keywords"],
}
for row in candidates[:5]
],
"proposal_source": proposal_data.get("source", ""),
"proposal_risk_level": proposal_data.get("risk_level", ""),
"proposal_action_preview": str(
proposal_data.get("action")
or proposal_data.get("kubectl_command")
or ""
)[:240],
}
output_payload = {
"not_used_reason": not_used_reason,
"decision_effect": "audit_only",
"next_required_step": "wire approval_execution to Ansible check-mode before apply",
}
return {
"operation_type": "ansible_candidate_matched",
"status": "dry_run",
"input": input_payload,
"output": output_payload,
"dry_run_result": {
"check_mode_executed": False,
"candidate_count": len(candidates),
"reason": not_used_reason,
},
"tags": ["ansible", "decision", "candidate", "check_mode_pending"],
}
async def record_ansible_decision_audit(
*,
incident: Any,
proposal_data: dict[str, Any],
decision_path: str,
not_used_reason: str,
) -> bool:
"""Write a best-effort Ansible candidate audit row for one decision."""
payload = build_ansible_decision_audit_payload(
incident=incident,
proposal_data=proposal_data,
decision_path=decision_path,
not_used_reason=not_used_reason,
)
if payload is None:
return False
incident_id = payload["input"]["incident_id"]
project_id = getattr(incident, "project_id", None) or "awoooi"
try:
async with get_db_context(str(project_id)) as db:
existing = await db.execute(
text("""
SELECT op_id
FROM automation_operation_log
WHERE operation_type = 'ansible_candidate_matched'
AND input ->> 'incident_id' = :incident_id
AND input ->> 'executor' = 'ansible'
LIMIT 1
"""),
{"incident_id": incident_id},
)
if existing.scalar() is not None:
return False
await db.execute(
text("""
INSERT INTO automation_operation_log (
operation_type, actor, status,
input, output, dry_run_result, tags
) VALUES (
:operation_type,
'decision_manager',
:status,
CAST(:input AS jsonb),
CAST(:output AS jsonb),
CAST(:dry_run_result AS jsonb),
:tags
)
"""),
{
"operation_type": payload["operation_type"],
"status": payload["status"],
"input": json.dumps(payload["input"], ensure_ascii=False),
"output": json.dumps(payload["output"], ensure_ascii=False),
"dry_run_result": json.dumps(payload["dry_run_result"], ensure_ascii=False),
"tags": payload["tags"],
},
)
return True
except Exception as exc:
logger.warning(
"ansible_decision_audit_write_failed",
incident_id=incident_id,
error=str(exc),
)
return False

View File

@@ -7,6 +7,7 @@ Telegram cards can be audited without guessing which subsystem owns the truth.
from __future__ import annotations
import json
from datetime import date, datetime
from decimal import Decimal
from typing import Any
@@ -16,10 +17,13 @@ import structlog
from sqlalchemy import text
from src.db.base import get_db_context
from src.services.awooop_ansible_audit_service import build_ansible_truth
from src.services.drift_repeat_state import build_drift_repeat_state
logger = structlog.get_logger(__name__)
_MAX_ROWS = 100
_JSON_TEXT_FIELDS = {"gate_result", "source_envelope"}
def _clean(value: Any) -> Any:
@@ -38,7 +42,15 @@ def _clean(value: Any) -> Any:
def _clean_row(row: Any) -> dict[str, Any]:
return {key: _clean(value) for key, value in dict(row).items()}
cleaned: dict[str, Any] = {}
for key, value in dict(row).items():
if key in _JSON_TEXT_FIELDS and isinstance(value, str):
try:
value = json.loads(value)
except json.JSONDecodeError:
pass
cleaned[key] = _clean(value)
return cleaned
async def _fetch_all(db: Any, sql: str, params: dict[str, Any]) -> list[dict[str, Any]]:
@@ -85,6 +97,127 @@ def _operation_ids(automation_ops: list[dict[str, Any]]) -> list[str]:
return [str(row["op_id"]) for row in automation_ops if row.get("op_id")]
def _build_reconciliation(
*,
incident: dict[str, Any] | None,
approvals: list[dict[str, Any]],
evidence_rows: list[dict[str, Any]],
automation_ops: list[dict[str, Any]],
timeline_events: list[dict[str, Any]],
) -> dict[str, Any]:
"""Build a read-only consistency report across incident lifecycle tables."""
if incident is None:
return {
"schema_version": "incident_reconciliation_v1",
"applicable": False,
"consistency_status": "not_applicable",
"operator_next_state": "not_applicable",
"facts": {},
"mismatches": [],
}
incident_status = str(incident.get("status") or "unknown").upper()
incident_closed = incident_status in {"RESOLVED", "CLOSED"}
latest_approval = approvals[0] if approvals else None
approval_status = str((latest_approval or {}).get("status") or "none").upper()
approval_action = str((latest_approval or {}).get("action") or "")
approval_resolved = bool((latest_approval or {}).get("resolved_at"))
attempted = sum(int(row.get("sensors_attempted") or 0) for row in evidence_rows)
succeeded = sum(int(row.get("sensors_succeeded") or 0) for row in evidence_rows)
executed_ops = [
row
for row in automation_ops
if str(row.get("status") or "").lower()
in {"success", "completed", "executed"}
]
mismatches: list[dict[str, Any]] = []
def add(code: str, severity: str, message: str) -> None:
mismatches.append({
"code": code,
"severity": severity,
"message": message,
})
if (
latest_approval
and not incident_closed
and (approval_resolved or approval_status in {"APPROVED", "REJECTED"})
):
add(
"incident_open_after_approval_resolved",
"high",
"Approval reached a terminal state while the incident is still open.",
)
if approval_status == "APPROVED" and not automation_ops:
add(
"approval_approved_without_execution_record",
"high",
"Approval is approved but automation_operation_log has no linked execution record.",
)
if (
approval_status == "APPROVED"
and "NO_ACTION" in approval_action.upper()
and not executed_ops
):
add(
"approval_no_action_without_execution",
"high",
"Approval resolved to NO_ACTION and no executor produced a successful operation.",
)
if attempted > 0 and succeeded == 0:
add(
"evidence_all_sensors_failed",
"medium",
"Evidence collection attempted sensors but none succeeded.",
)
if latest_approval and not timeline_events:
add(
"timeline_missing_for_approval",
"medium",
"Approval exists but timeline_events has no linked lifecycle entries.",
)
high_count = sum(1 for row in mismatches if row["severity"] == "high")
medium_count = sum(1 for row in mismatches if row["severity"] == "medium")
if high_count:
consistency_status = "blocked"
operator_next_state = "manual_required"
elif medium_count:
consistency_status = "degraded"
operator_next_state = "investigate"
else:
consistency_status = "consistent"
operator_next_state = "continue"
return {
"schema_version": "incident_reconciliation_v1",
"applicable": True,
"consistency_status": consistency_status,
"operator_next_state": operator_next_state,
"facts": {
"incident_id": incident.get("incident_id"),
"incident_status": incident_status,
"incident_closed": incident_closed,
"latest_approval_id": (latest_approval or {}).get("id"),
"latest_approval_status": approval_status,
"latest_approval_action": approval_action,
"approval_resolved": approval_resolved,
"evidence_records": len(evidence_rows),
"sensors_attempted": attempted,
"sensors_succeeded": succeeded,
"automation_operation_records": len(automation_ops),
"executed_operation_records": len(executed_ops),
"timeline_events": len(timeline_events),
},
"mismatches": mismatches,
}
def _truth_status(
*,
incident: dict[str, Any] | None,
@@ -255,6 +388,7 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
created_at,
resolved_at,
interpretation,
items,
narrative_text
FROM drift_reports
WHERE report_id = :source_id
@@ -411,15 +545,30 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
error,
duration_ms,
tags,
input ->> 'executor' AS input_executor,
input ->> 'execution_backend' AS input_execution_backend,
input ->> 'playbook_id' AS input_playbook_id,
input ->> 'playbook_path' AS input_playbook_path,
input ->> 'ansible_playbook_path' AS input_ansible_playbook_path,
input ->> 'check_mode' AS input_check_mode,
input ->> 'not_used_reason' AS input_not_used_reason,
output ->> 'executor' AS output_executor,
output ->> 'execution_backend' AS output_execution_backend,
output ->> 'playbook_id' AS output_playbook_id,
output ->> 'playbook_path' AS output_playbook_path,
output ->> 'ansible_playbook_path' AS output_ansible_playbook_path,
output ->> 'check_mode' AS output_check_mode,
output ->> 'not_used_reason' AS output_not_used_reason,
created_at
FROM automation_operation_log
WHERE coalesce(input::text, '') LIKE :needle
WHERE incident_id::text = :incident_id
OR coalesce(input::text, '') LIKE :needle
OR coalesce(output::text, '') LIKE :needle
OR coalesce(array_to_string(tags, ','), '') LIKE :needle
ORDER BY created_at DESC
LIMIT :limit
""",
{"needle": f"%{incident_id}%", "limit": _MAX_ROWS},
{"incident_id": incident_id, "needle": f"%{incident_id}%", "limit": _MAX_ROWS},
)
km_entries = await _fetch_all(
db,
@@ -447,55 +596,27 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
"reports": [],
}
if drift is not None:
repeat_summary = await _fetch_one(
recent_drift_reports = await _fetch_all(
db,
"""
SELECT
count(*) AS occurrences_12h,
min(scanned_at) AS first_scanned_at,
max(scanned_at) AS last_scanned_at
report_id,
namespace,
status,
scanned_at,
created_at,
items,
interpretation,
narrative_text
FROM drift_reports
WHERE created_at > now() - interval '12 hours'
WHERE created_at > now() - interval '24 hours'
AND namespace = :namespace
AND status = :status
AND high_count = :high_count
AND medium_count = :medium_count
AND info_count = :info_count
""",
{
"namespace": drift["namespace"],
"status": drift["status"],
"high_count": drift["high_count"],
"medium_count": drift["medium_count"],
"info_count": drift["info_count"],
},
)
repeat_reports = await _fetch_all(
db,
"""
SELECT report_id, scanned_at, created_at, status, interpretation, narrative_text
FROM drift_reports
WHERE created_at > now() - interval '12 hours'
AND namespace = :namespace
AND status = :status
AND high_count = :high_count
AND medium_count = :medium_count
AND info_count = :info_count
ORDER BY scanned_at DESC
LIMIT 20
LIMIT 200
""",
{
"namespace": drift["namespace"],
"status": drift["status"],
"high_count": drift["high_count"],
"medium_count": drift["medium_count"],
"info_count": drift["info_count"],
},
{"namespace": drift["namespace"]},
)
drift_repeats = {
**(repeat_summary or {}),
"reports": repeat_reports,
}
drift_repeats = build_drift_repeat_state(drift, recent_drift_reports)
gateway_mcp_rows = await _fetch_all(
db,
@@ -507,6 +628,7 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
trace_id,
agent_id,
tool_name,
gate_result,
result_status,
block_gate,
block_reason,
@@ -572,6 +694,13 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
legacy_mcp_total=legacy_mcp_summary["total"],
outbound_visible_total=len(outbound_rows),
)
reconciliation = _build_reconciliation(
incident=incident,
approvals=approvals,
evidence_rows=evidence_rows,
automation_ops=automation_ops,
timeline_events=timeline_events,
)
evidence_totals = {
"records": len(evidence_rows),
@@ -615,12 +744,9 @@ async def fetch_truth_chain(source_id: str, project_id: str = "awoooi") -> dict[
},
"execution": {
"automation_operation_log": automation_ops,
"ansible": {
"considered": False,
"records": [],
"not_used_reason": "no first-class Ansible executor audit record in current truth chain",
},
"ansible": build_ansible_truth(automation_ops, incident=incident, drift=drift),
},
"reconciliation": reconciliation,
"learning": {
"knowledge_entries": km_entries,
},

View File

@@ -1790,6 +1790,25 @@ class DecisionManager:
token.proposal_data["auto_approve_reason"] = auto_decision.reason_detail
await self._save_token(token)
try:
from src.services.awooop_ansible_audit_service import (
record_ansible_decision_audit as _record_ansible_decision_audit,
)
_fire_and_forget(
_record_ansible_decision_audit(
incident=incident,
proposal_data=token.proposal_data,
decision_path="auto_execute",
not_used_reason=(
"auto_execute selected existing executor path; "
"Ansible check-mode is not wired yet"
),
)
)
except Exception as _ansible_audit_err:
logger.debug("ansible_decision_audit_schedule_error", error=str(_ansible_audit_err))
# 觸發自動執行 (非阻塞)
_fire_and_forget(
self._auto_execute(incident, token)
@@ -1813,6 +1832,24 @@ class DecisionManager:
),
)
)
try:
from src.services.awooop_ansible_audit_service import (
record_ansible_decision_audit as _record_ansible_decision_audit,
)
_fire_and_forget(
_record_ansible_decision_audit(
incident=incident,
proposal_data=token.proposal_data,
decision_path="manual_approval",
not_used_reason=(
"manual approval required; Ansible check-mode "
"is not wired to approval execution yet"
),
)
)
except Exception as _ansible_audit_err:
logger.debug("ansible_decision_audit_schedule_error", error=str(_ansible_audit_err))
_fire_and_forget(
_push_decision_to_telegram(incident, token.proposal_data)
)

View File

@@ -148,7 +148,13 @@ class DriftNarratorService:
# 2026-04-18 B 方案: LLM 同時產 narrative + 結構化 items取代 str()[:30]
# 2026-04-20 P0.2: 追加 recommendationaction/confidence/reason
narrative, items, recommendation = await self._generate_narrative_and_items(report, interpretation)
await self._send_telegram(report, narrative, items, recommendation)
repeat_state = None
try:
from src.repositories.drift_repository import get_drift_repository
repeat_state = await get_drift_repository().get_repeat_state(report)
except Exception as e:
logger.warning("drift_repeat_state_lookup_failed", report_id=report.report_id, error=str(e))
await self._send_telegram(report, narrative, items, recommendation, repeat_state)
# 寫入 DB narrative_text (Phase 30 ADR-067)
try:
@@ -643,6 +649,7 @@ class DriftNarratorService:
narrative: str,
items: list[dict],
recommendation: dict | None = None,
repeat_state: dict | None = None,
) -> None:
"""
推送 TYPE-4D Config Drift 卡片ADR-075+ B 方案智能摘要
@@ -654,7 +661,7 @@ class DriftNarratorService:
"""
from src.services.telegram_gateway import get_telegram_gateway
diff_summary = self._render_telegram_body(report, narrative, items, recommendation)
diff_summary = self._render_telegram_body(report, narrative, items, recommendation, repeat_state)
try:
tg = get_telegram_gateway()
@@ -711,6 +718,7 @@ class DriftNarratorService:
narrative: str,
items: list[dict],
recommendation: dict | None = None,
repeat_state: dict | None = None,
) -> str:
"""
組裝 Telegram 卡片 bodyB 方案格式 + P0.2 AI 推薦)
@@ -741,6 +749,10 @@ class DriftNarratorService:
}.get(_act, _act)
lines.append(f"🎯 AI 建議:{_emoji_action} ({int(_conf * 100)}%) — {_reason}\n")
repeat_line = self._render_repeat_state(repeat_state)
if repeat_line:
lines.append(f"{repeat_line}\n")
lines.append(f"🤖 AI 研判\n{narrative}\n")
# 用非 trivial + 非白名單 的實際可操作數顯示
@@ -761,6 +773,23 @@ class DriftNarratorService:
return "\n".join(lines)
def _render_repeat_state(self, repeat_state: dict | None) -> str:
"""Render operator-visible repeat/stage metadata for Telegram."""
if not repeat_state:
return ""
fingerprint = str(repeat_state.get("fingerprint") or "unknown")
occurrences = int(repeat_state.get("occurrences_12h") or 0)
window_hours = int(repeat_state.get("window_hours") or 12)
stage = str(repeat_state.get("operator_stage") or "unknown")
if occurrences <= 1:
repeat_text = f"{window_hours}h 內首次出現"
else:
repeat_text = f"{window_hours}h 內第 {occurrences} 次同指紋"
return (
"流程: drift_scanned → ai_analyzed → "
f"{stage}\n重複: {repeat_text}\n指紋: {fingerprint}"
)
# ============================================================
# Singleton

View File

@@ -0,0 +1,180 @@
"""Stable repeat identity for Config Drift reports.
The drift scanner emits a fresh ``report_id`` for every run. Operators need a
stable identity that answers whether two reports describe the same drift, not
just whether they have the same HIGH/MEDIUM/INFO counts.
"""
from __future__ import annotations
import hashlib
import json
from datetime import datetime, timedelta, timezone
from typing import Any
SCHEMA_VERSION = "drift_repeat_state_v1"
FINGERPRINT_VERSION = "drift_fingerprint_v1"
def _get(obj: Any, key: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
def _enum_value(value: Any) -> Any:
return getattr(value, "value", value)
def _jsonable(value: Any) -> Any:
value = _enum_value(value)
if isinstance(value, dict):
return {str(k): _jsonable(v) for k, v in value.items()}
if isinstance(value, list):
return [_jsonable(v) for v in value]
if isinstance(value, tuple):
return [_jsonable(v) for v in value]
if isinstance(value, datetime):
return value.isoformat()
return value
def _canonical_json(value: Any) -> str:
return json.dumps(
_jsonable(value),
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
default=str,
)
def _parse_datetime(value: Any) -> datetime | None:
if value is None:
return None
if isinstance(value, datetime):
parsed = value
if parsed.tzinfo is not None:
return parsed.astimezone(timezone.utc).replace(tzinfo=None)
return parsed
if isinstance(value, str):
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
if parsed.tzinfo is not None:
return parsed.astimezone(timezone.utc).replace(tzinfo=None)
return parsed
except ValueError:
return None
return None
def _iso(value: Any) -> str | None:
parsed = _parse_datetime(value)
return parsed.isoformat() if parsed else None
def drift_item_identity(item: Any) -> dict[str, Any]:
"""Return the stable fields that define one drift item."""
return {
"resource_kind": str(_get(item, "resource_kind", "")),
"resource_name": str(_get(item, "resource_name", "")),
"namespace": str(_get(item, "namespace", "")),
"field_path": str(_get(item, "field_path", "")),
"drift_level": str(_enum_value(_get(item, "drift_level", ""))),
"git_value": _jsonable(_get(item, "git_value")),
"actual_value": _jsonable(_get(item, "actual_value")),
"is_allowlisted": bool(_get(item, "is_allowlisted", False)),
}
def build_drift_fingerprint(namespace: str, items: list[Any]) -> str:
"""Build a deterministic fingerprint from namespace + sorted drift items."""
identities = [drift_item_identity(item) for item in items]
identities.sort(key=_canonical_json)
payload = {
"version": FINGERPRINT_VERSION,
"namespace": namespace,
"items": identities,
}
digest = hashlib.sha256(_canonical_json(payload).encode("utf-8")).hexdigest()
return f"dfp_{digest[:16]}"
def _report_identity(report: Any) -> dict[str, Any]:
items = _get(report, "items", []) or []
namespace = str(_get(report, "namespace", ""))
return {
"report_id": _get(report, "report_id"),
"namespace": namespace,
"status": str(_enum_value(_get(report, "status", ""))),
"scanned_at": _get(report, "scanned_at"),
"created_at": _get(report, "created_at"),
"fingerprint": build_drift_fingerprint(namespace, list(items)),
}
def build_drift_repeat_state(
report: Any,
recent_reports: list[Any],
*,
window_hours: int = 12,
max_reports: int = 20,
) -> dict[str, Any]:
"""Summarize repeat state for one drift report using stable fingerprints."""
current = _report_identity(report)
current_time = (
_parse_datetime(current.get("scanned_at"))
or _parse_datetime(current.get("created_at"))
or datetime.now()
)
cutoff = current_time - timedelta(hours=window_hours)
by_id: dict[str, dict[str, Any]] = {}
for candidate in [report, *recent_reports]:
identity = _report_identity(candidate)
report_id = str(identity.get("report_id") or "")
if not report_id:
continue
candidate_time = (
_parse_datetime(identity.get("scanned_at"))
or _parse_datetime(identity.get("created_at"))
)
if candidate_time is not None and candidate_time < cutoff:
continue
if identity["fingerprint"] != current["fingerprint"]:
continue
by_id[report_id] = identity
matches = sorted(
by_id.values(),
key=lambda row: (
_parse_datetime(row.get("scanned_at"))
or _parse_datetime(row.get("created_at"))
or datetime.min
),
)
first = matches[0] if matches else current
last = matches[-1] if matches else current
status = current.get("status") or "unknown"
operator_stage = "pending_human" if status == "pending" else str(status)
return {
"schema_version": SCHEMA_VERSION,
"fingerprint": current["fingerprint"],
"matching_strategy": "namespace_and_stable_items_v1",
"window_hours": window_hours,
"occurrences_12h": len(matches),
"first_scanned_at": _iso(first.get("scanned_at") or first.get("created_at")),
"last_scanned_at": _iso(last.get("scanned_at") or last.get("created_at")),
"operator_stage": operator_stage,
"reports": [
{
"report_id": row.get("report_id"),
"scanned_at": _iso(row.get("scanned_at")),
"created_at": _iso(row.get("created_at")),
"status": row.get("status"),
}
for row in reversed(matches[-max_reports:])
],
}

View File

@@ -104,6 +104,11 @@ _AUTOMATION_STAGE_MAP = {
"capacity_recommendation": "investigator",
"quota_enforced": "safe",
"notification_formatted": "safe",
"ansible_candidate_matched": "ai_router",
"ansible_check_mode_executed": "executor",
"ansible_apply_executed": "executor",
"ansible_rollback_executed": "executor",
"ansible_execution_skipped": "safe",
}
_AUTOMATION_STATUS_MAP = {
"pending": "pending",

View File

@@ -1,6 +1,36 @@
from __future__ import annotations
from src.services.awooop_truth_chain_service import _truth_status
from datetime import datetime, timedelta, timezone
from types import SimpleNamespace
from src.services.awooop_ansible_audit_service import (
build_ansible_decision_audit_payload,
build_ansible_truth,
)
from src.services.awooop_truth_chain_service import (
_build_reconciliation,
_clean_row,
_truth_status,
)
from src.services.drift_repeat_state import (
build_drift_fingerprint,
build_drift_repeat_state,
)
def test_clean_row_parses_json_text_fields_for_gateway_visibility() -> None:
row = {
"gate_result": '{"schema_version":"legacy_mcp_bridge_v1","policy_enforced":false}',
"source_envelope": '{"adapter":"legacy_telegram_gateway"}',
"plain_text": '{"not":"parsed"}',
}
cleaned = _clean_row(row)
assert cleaned["gate_result"]["schema_version"] == "legacy_mcp_bridge_v1"
assert cleaned["gate_result"]["policy_enforced"] is False
assert cleaned["source_envelope"]["adapter"] == "legacy_telegram_gateway"
assert cleaned["plain_text"] == '{"not":"parsed"}'
def test_truth_status_marks_no_action_approval_as_manual_required() -> None:
@@ -46,3 +76,217 @@ def test_truth_status_marks_repeated_pending_drift_as_human_needed() -> None:
assert status["needs_human"] is True
assert "drift_report_pending_without_resolution" in status["blockers"]
assert "drift_ai_confidence_zero" in status["blockers"]
def _drift_item(
*,
resource_name: str = "awoooi-api",
field_path: str = "spec.template.spec.containers[0].image",
actual_value: str = "api:hotfix",
) -> dict:
return {
"resource_kind": "Deployment",
"resource_name": resource_name,
"namespace": "awoooi-prod",
"field_path": field_path,
"git_value": "api:main",
"actual_value": actual_value,
"drift_level": "high",
"is_allowlisted": False,
}
def test_drift_fingerprint_is_stable_across_item_order() -> None:
item_a = _drift_item(resource_name="awoooi-api")
item_b = _drift_item(
resource_name="awoooi-worker",
field_path="spec.template.spec.serviceAccountName",
actual_value="awoooi-executor",
)
first = build_drift_fingerprint("awoooi-prod", [item_a, item_b])
second = build_drift_fingerprint("awoooi-prod", [item_b, item_a])
changed = build_drift_fingerprint(
"awoooi-prod",
[item_a, {**item_b, "actual_value": "different-service-account"}],
)
assert first == second
assert first.startswith("dfp_")
assert first != changed
def test_drift_repeat_state_counts_matching_fingerprint_only() -> None:
now = datetime(2026, 5, 13, 1, 0, tzinfo=timezone.utc)
report = {
"report_id": "drift-now",
"namespace": "awoooi-prod",
"status": "pending",
"scanned_at": now,
"created_at": now,
"items": [_drift_item()],
}
recent = [
{
**report,
"report_id": "drift-prev",
"scanned_at": now - timedelta(hours=1),
"created_at": now - timedelta(hours=1),
},
{
**report,
"report_id": "drift-different",
"scanned_at": now - timedelta(hours=2),
"created_at": now - timedelta(hours=2),
"items": [_drift_item(actual_value="api:other")],
},
{
**report,
"report_id": "drift-old",
"scanned_at": now - timedelta(hours=13),
"created_at": now - timedelta(hours=13),
},
]
repeat_state = build_drift_repeat_state(report, recent)
assert repeat_state["schema_version"] == "drift_repeat_state_v1"
assert repeat_state["fingerprint"].startswith("dfp_")
assert repeat_state["matching_strategy"] == "namespace_and_stable_items_v1"
assert repeat_state["occurrences_12h"] == 2
assert repeat_state["operator_stage"] == "pending_human"
assert [row["report_id"] for row in repeat_state["reports"]] == [
"drift-now",
"drift-prev",
]
def test_reconciliation_blocks_open_incident_after_no_action_approval() -> None:
reconciliation = _build_reconciliation(
incident={"incident_id": "INC-1", "status": "INVESTIGATING"},
approvals=[
{
"id": "approval-1",
"status": "APPROVED",
"action": "未知操作 | NO_ACTION",
"resolved_at": "2026-05-13T01:00:00+00:00",
}
],
evidence_rows=[{"sensors_attempted": 8, "sensors_succeeded": 0}],
automation_ops=[],
timeline_events=[],
)
codes = {row["code"] for row in reconciliation["mismatches"]}
assert reconciliation["schema_version"] == "incident_reconciliation_v1"
assert reconciliation["consistency_status"] == "blocked"
assert reconciliation["operator_next_state"] == "manual_required"
assert reconciliation["facts"]["incident_closed"] is False
assert reconciliation["facts"]["automation_operation_records"] == 0
assert "incident_open_after_approval_resolved" in codes
assert "approval_approved_without_execution_record" in codes
assert "approval_no_action_without_execution" in codes
assert "evidence_all_sensors_failed" in codes
assert "timeline_missing_for_approval" in codes
def test_reconciliation_marks_consistent_resolved_execution() -> None:
reconciliation = _build_reconciliation(
incident={"incident_id": "INC-2", "status": "RESOLVED"},
approvals=[
{
"id": "approval-2",
"status": "APPROVED",
"action": "restart service",
"resolved_at": "2026-05-13T01:00:00+00:00",
}
],
evidence_rows=[{"sensors_attempted": 8, "sensors_succeeded": 7}],
automation_ops=[{"status": "success"}],
timeline_events=[{"event_type": "executor", "status": "success"}],
)
assert reconciliation["consistency_status"] == "consistent"
assert reconciliation["operator_next_state"] == "continue"
assert reconciliation["mismatches"] == []
def test_ansible_truth_surfaces_audited_check_mode_record() -> None:
truth = build_ansible_truth(
[
{
"op_id": "op-ansible-1",
"operation_type": "ansible_check_mode_executed",
"status": "dry_run",
"actor": "platform_operator",
"input_playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"input_check_mode": "true",
"dry_run_result": {"changed": 1},
"tags": ["ansible", "check_mode"],
"created_at": "2026-05-12T22:00:00+08:00",
}
],
incident={"incident_id": "INC-1", "alertname": "momo pg_backup failed on 188"},
drift=None,
)
assert truth["considered"] is True
assert truth["not_used_reason"] is None
assert truth["records"][0]["playbook_path"] == "infra/ansible/playbooks/188-ai-web.yml"
assert truth["records"][0]["check_mode"] == "true"
assert truth["records"][0]["dry_run_result"] == {"changed": 1}
assert "ansible_check_mode_executed" in truth["audit_contract"]["operation_types"]
assert truth["candidate_catalog"]["decision_effect"] == "none"
assert truth["candidate_catalog"]["candidates"][0]["catalog_id"] == "ansible:188-ai-web"
assert truth["candidate_catalog"]["candidates"][0]["auto_apply_enabled"] is False
def test_ansible_truth_keeps_catalog_hint_separate_from_runtime_use() -> None:
truth = build_ansible_truth(
[],
incident={"incident_id": "INC-2", "alertname": "nginx 502 upstream timeout"},
drift=None,
)
assert truth["considered"] is False
assert truth["records"] == []
assert truth["not_used_reason"].startswith("no automation_operation_log row")
assert truth["candidate_catalog"]["candidates"][0]["catalog_id"] == "ansible:nginx-sync"
assert truth["candidate_catalog"]["candidates"][0]["approval_required"] is True
assert truth["candidate_catalog"]["decision_effect"] == "none"
def test_ansible_decision_audit_payload_is_dry_run_only() -> None:
incident = SimpleNamespace(
incident_id="INC-DOCKER",
project_id="awoooi",
alert_category="infrastructure",
notification_type="TYPE-3",
severity=SimpleNamespace(value="P3"),
affected_services=["bitan-pharmacy-bitan-1"],
signals=[
SimpleNamespace(
alert_name="DockerContainerUnhealthy",
labels={"alertname": "DockerContainerUnhealthy", "container": "bitan-pharmacy-bitan-1"},
annotations={},
)
],
)
payload = build_ansible_decision_audit_payload(
incident=incident,
proposal_data={"source": "expert_system", "risk_level": "low", "action": "NO_ACTION"},
decision_path="manual_approval",
not_used_reason="manual approval required; Ansible check-mode is not wired yet",
)
assert payload is not None
assert payload["operation_type"] == "ansible_candidate_matched"
assert payload["status"] == "dry_run"
assert payload["input"]["executor"] == "ansible"
assert payload["input"]["check_mode"] is True
assert payload["input"]["apply_enabled"] is False
assert payload["input"]["approval_required"] is True
assert payload["input"]["executor_candidates"]
assert payload["output"]["decision_effect"] == "audit_only"
assert payload["dry_run_result"]["check_mode_executed"] is False

View File

@@ -1,98 +1,251 @@
## 2026-05-12 | Source Control Draft Reconcile Plan 草案
## 2026-05-13 | T4 Config Drift fingerprint repeat-state 已推版
**背景**統帥批准繼續推進後,本輪先同步最新 `gitea/main`,納入另一個 AwoooP Session 的 `legacy mcp audit -> gateway timeline` 進度,避免雙 Session 分歧。同步後繼續沿用低摩擦原則,只針對 refs-blocked repo 產生草案,不執行同步
**背景**Config Drift Telegram 卡片只顯示單次 `report_id` 與 HIGH/MEDIUM/INFO 計數Operator 無法判斷是否同一漂移一直重複、已跑到哪個流程階段、是否需要人工。舊 truth-chain repeat 只用 namespace/status/counts 分組,會把「剛好同計數但 items 不同」誤認為同一漂移
**本次交付**
- 新增 `scripts/security/source-control-reconcile-plan.py`,只讀既有 redacted snapshot不呼叫遠端 Git不 fetch、不 push、不改 remote。
- 新增 `docs/schemas/source_control_reconcile_plan_v1.schema.json`
- 產出 `docs/security/source-control-reconcile-plan.snapshot.json``docs/security/SOURCE-CONTROL-RECONCILE-PLAN.md`
- Draft plan 涵蓋 3 個 refs-blocked mapped repos`wooo/awoooi``wooo/clawbot-v5``wooo/wooo-aiops`
- 更新 `SECURITY-SUPPLY-CHAIN-CONTRACT-MANIFEST`contract count 從 13 增至 14新增 `source_control_reconcile_plan_v1`
- 更新 `SECURITY-SUPPLY-CHAIN-PROGRESS``AWOOOP-MIRROR-ONLY-CONSUMPTION-CHECKLIST`,讓 AwoooP 可 mirror draft plan 但不得執行 refs sync
**修正**
- 新增 `drift_repeat_state.py`
- 以 namespace + sorted drift items 建立 stable fingerprint
- fingerprint 只看 drift 的實際 identity不看 report_id / 掃描時間
- repeat-state schema`drift_repeat_state_v1`
- `awooop_truth_chain_service`
- drift report 查詢納入 `items`
- repeat-state 改用 stable fingerprint比對 24h 內候選並回傳 12h repeat window。
- 回傳 `fingerprint``matching_strategy=namespace_and_stable_items_v1``operator_stage`、matching reports。
- `drift_narrator_service`
- Telegram drift card body 會追加:
- `流程: drift_scanned → ai_analyzed → pending_human`
- `重複: 12h 內第 N 次同指紋`
- `指紋: dfp_xxxxx`
- 這仍只揭露真相鏈狀態,不自動採納 / 回滾 / 忽略。
**邊界**
- Plan 狀態為 `draft_blocked`authenticated / admin_export server-side inventory 尚未完成前,不可執行。
- 未 push refs、未 force push、未刪 refs、未建立 GitHub repo、未改 visibility、未切 GitHub primary、未部署
- 人工批准未來也必須單一 repo 生效,不得批次套用到所有 repo
**驗證與推版**
- Local
- `py_compile`pass
- `ruff --select F,E9`pass
- `pytest tests/test_awooop_truth_chain_service.py tests/test_phase25_drift_detection.py tests/test_drift_interpreter_ollama_first.py tests/test_platform_router_order.py tests/test_awooop_operator_auth.py -q`37 passed。
- `git diff --check`pass。
- Gitea
- `5b348774 feat(awooop): expose drift repeat fingerprint` 已推 `gitea main`
- Code Review run `1938`success。
- CD run `1937`success。
- Deploy marker`3d38039b chore(cd): deploy 5b34877 [skip ci]`
- Production
- API/Web/Worker image 均為 `5b34877429c16c42f0f894eb4d7f0484711fde9b`
- K3s rollout statusAPI/Web/Worker success。
- `/api/v1/health`healthymock_mode=false。
- Truth-chain smoke `7f858956`
- `source_type=drift_report`
- `current_stage=dedup_or_repeat_updated`
- `stage_status=pending`
- `needs_human=true`
- `repeat_schema=drift_repeat_state_v1`
- `fingerprint=dfp_02dc625b64784b24`
- `matching_strategy=namespace_and_stable_items_v1`
- `operator_stage=pending_human`
- `repeat_12h=2`
- `outbound_visible=2`
- Production narrator render smoke
- `流程: drift_scanned → ai_analyzed → pending_human | 重複: 12h 內第 2 次同指紋 | 指紋: dfp_smoke1234`
**驗證**
- `source-control-reconcile-plan.py` 產生 3 plans
- JSON / schema / snapshot parse 通過
- `scripts/security/*.py` 可編譯。
- `git diff --check` 通過。
- PR diff added lines 未命中本輪敏感 token / credential pattern。
**重要校正**
- count-based repeat 會把 `7f858956` 算成 12 次
- 新 stable fingerprint 顯示同一 items fingerprint 12h 內是 2 次;這代表之前的 12 次是「同計數重複候選」,不是已證明同一漂移
## 2026-05-12 | Source Control Approval Board 低摩擦決策隊列
**整體進度**:
- Wave 0MOMO PostgreSQL backup → AwoooP 失敗通知接線完成並已推版。
- T0Truth-chain read-only API 完成、部署、production smoke 完成。
- T1Channel Event hardening 完成、部署、production smoke 完成。
- T2legacy MCP audit bridge / backfill / truth-chain visibility 完成、部署、production smoke 完成first-class Gateway enforced path 仍待後續 wave。
- T3Ansible audit contract + decision candidate dry-run audit 完成、部署、production smoke 完成。
- T4Config Drift stable fingerprint / repeat-state / Telegram stage visibility 完成、部署、production smoke 完成。
- 仍未完成T5 incident / approval / execution reconciliation、Ansible 真正 check-mode executor / diff / apply / rollback、first-class MCP Gateway enforcement。
**背景**:統帥批准繼續推進後,下一步原本是 Gitea authenticated read-only inventory但目前 `GITEA_READONLY_TOKEN` 未提供。本輪因此不使用可 push 的既有 Gitea remote credential 代替 read-only token避免把 inventory 與寫入權限憑證混在一起。
## 2026-05-13 | T3 Ansible decision candidate audit 已推版
**本次交付**
- 新增 `scripts/security/source-control-approval-board.py`,只讀既有 redacted snapshot不呼叫 Gitea/GitHub API不需要 token。
- 新增 `docs/schemas/source_control_approval_board_v1.schema.json`
- 產出 `docs/security/source-control-approval-board.snapshot.json``docs/security/SOURCE-CONTROL-APPROVAL-BOARD.md`
- Board 彙整 8 個 target其中 7 個為 pending approval`awoooi``clawbot-v5``wooo-aiops``wooo-infra-config``ewoooc``bitan-pharmacy``tsenyang-website``nexu-io/open-design` 維持 scope review
- 更新 `SECURITY-SUPPLY-CHAIN-CONTRACT-MANIFEST`contract count 從 12 增至 13新增 `source_control_approval_board_v1`
- 更新 `SECURITY-SUPPLY-CHAIN-PROGRESS``AWOOOP-MIRROR-ONLY-CONSUMPTION-CHECKLIST`,讓 AwoooP 可 mirror board 但不得執行 board item。
**背景**T3 第一段只讓 truth-chain 看得到 Ansible audit contract 與 repo playbook catalog但 AI decision path 還不會留下「曾考慮 Ansible、但尚未進 check-mode/apply」的 first-class record。這會讓 Telegram / Operator Console 仍看不出 Ansible 是否真的被 AI 修復鏈評估過。
**修正**
- `awooop_ansible_audit_service.py` 新增 decision candidate audit payload / writer
- `decision_manager` 在 auto-execute / manual-approval 分支排程 best-effort `ansible_candidate_matched` audit write
- Audit row 明確是 dry-run / audit-only
- `status=dry_run`
- `input.executor=ansible`
- `input.check_mode=true`
- `input.apply_enabled=false`
- `input.approval_required=true`
- `output.decision_effect=audit_only`
- Docker/container 類 incident 也會命中 188 / 110 Ansible catalog hints未來新 decision 可在 truth-chain 顯示「有候選、尚未執行 check-mode」。
**驗證與推版**
- Local
- `py_compile`pass。
- `ruff --select F,E9`pass。
- `pytest apps/api/tests/test_awooop_truth_chain_service.py apps/api/tests/test_platform_router_order.py apps/api/tests/test_awooop_operator_auth.py -q`14 passed。
- Tier 3 adjacent tests133 passed, 1 existing RuntimeWarning。
- `git diff --check`pass。
- Gitea
- `3799e0db feat(awooop): audit ansible decision candidates` 已推 `gitea main`
- Code Review run `1936`success。
- CD run `1935`success。
- Deploy marker`90b9ddb7 chore(cd): deploy 3799e0d [skip ci]`
- Production
- API/Web/Worker image 均為 `192.168.0.110:5000/awoooi/*:3799e0db0d30f29fdc251197634d2fca4c2c67fd`
- K3s rollout statusAPI/Web/Worker success。
- `/api/v1/health`healthymock_mode=false。
- Pure function smokeAPI podDockerContainerUnhealthy 事件可產生 `ansible_candidate_matched` payload`candidate_count=2``check_mode_executed=false`
- Truth-chain smoke `INC-20260512-B6C589`
- `source_type=incident`
- `current_stage=manual_required`
- `stage_status=blocked`
- `needs_human=true`
- `execution.ansible.audit_contract.schema_version=ansible_executor_audit_v1`
- `ansible_candidates=2`
- `mcp_gateway_total=8`
- Truth-chain smoke `7f858956`
- `source_type=drift_report`
- `current_stage=dedup_or_repeat_updated`
- `stage_status=pending`
- `needs_human=true`
- `repeat_12h=12`
- `outbound_visible=2`
**整體進度**
- Wave 0MOMO PostgreSQL backup → AwoooP 失敗通知接線完成並已推版。
- T0Truth-chain read-only API 完成、部署、production smoke 完成。
- T1Channel Event hardening 完成、部署、production smoke 完成。
- T2legacy MCP audit bridge / backfill / truth-chain visibility 完成、部署、production smoke 完成first-class Gateway enforced path 仍待後續 wave。
- T3Ansible audit contract + decision candidate dry-run audit 完成、部署、production smoke 完成。
- 仍未完成Ansible 真正 check-mode executor、diff artifact、apply / rollback audit、T4 drift fingerprint FSM、T5 incident / approval / execution reconciliation、first-class MCP Gateway enforcement。
## 2026-05-12 | Security Supply Chain PR #117 累積紀錄
**背景**:統帥批准 Kali `192.168.0.112`、開發主機 `192.168.0.111` / `192.168.0.168`、Code Review -> Codex、Gitea -> GitHub 長期遷移納入同一個資安工作項目;同時要求初期不要把資安等級一次拉太高,避免產品、架構與流程變得過度複雜。本支線以乾淨 worktree 建立 PR `#117`,並持續與另一個 AwoooP Session 的 `gitea/main` 同步。
**累積交付**
- 建立 docs-only / contracts-first Security Supply Chain scaffold涵蓋 Kali、Code Review、Codex、Gitea、GitHub 與 AwoooP mirror-only handoff。
- 產出 Gitea/GitHub refs diff、Gitea public-only inventory、local remote inventory、GitHub target probe、canonical lineage、110 refs probe、repo-by-repo approval package 與 contract manifest snapshot。
- 建立 `SOURCE-CONTROL-APPROVAL-BOARD.md`,彙整 8 個 target其中 7 個為 pending approvalauthenticated inventory gate 仍為 `blocked`
- 建立 `SOURCE-CONTROL-RECONCILE-PLAN.md`,涵蓋 `awoooi``clawbot-v5``wooo-aiops` 三個 refs-blocked mapped repos狀態仍為 `draft_blocked`
- Contract manifest 已收斂到 14 個主要 contract可供 AwoooP mirror / read-only policy / approval candidate 消費,但不得作 execution router。
**邊界**
- 未使用 Gitea write-capable remote credential 做 authenticated inventory。
- 未建立 GitHub repo、未改 visibility、未同步 refs、未切 GitHub primary、未部署。
- authenticated inventory gate 仍為 `blocked`,等待 read-only token 或 redacted admin export
- AwoooP 可 mirror board / plan / policy但不得執行 board item 或新增高風險 action button
**驗證**
- `source-control-approval-board.py` 產生 8 itemspending approval 7。
- JSON / schema / snapshot parse 通過。
- `scripts/security/*.py` 可編譯。
- `git diff --check` 通過。
- PR diff added lines 未命中本輪敏感 token / credential pattern。
## 2026-05-12 | Security Supply Chain PR #117 與 AwoooP 主線同步
## 2026-05-12 | T3 Ansible audit surface 第一段
**背景**Security Supply Chain docs-only 分支完成首次推版後,另一個 AwoooP Session 已將 `feat(awooop): harden outbound truth chain mirror` 與 deploy marker 推入 `gitea/main`。為避免雙 Session 推進互相衝突,本輪先把最新 `gitea/main` 合入資安分支,再建立 review-only PR
**背景**Telegram / truth-chain live audit 顯示 Ansible 目前仍只是 repo/主機部署工具,沒有出現在 AI 自動化修復鏈路的 first-class audit recordOperator 無法知道「是否被考慮、是否 dry-run、為何沒用」
**本次同步**
- 資安分支 `codex/security-supply-chain-contracts-20260512` 已合入最新 `gitea/main`merge commit 為 `dc540cba`
- 已建立 Gitea PR `#117``http://192.168.0.110:3001/wooo/awoooi/pulls/117`
- PR 維持 review-only / docs-first / contracts-first未合併、未部署、未切 GitHub primary。
- AwoooP 主線 runtime / migration / k8s 變更由 `gitea/main` 保留,本資安分支的 PR diff 仍只呈現資安文件、schema、snapshot 與 read-only tooling。
**修正**
- 新增 migration `adr090d_ansible_operation_types.sql`,擴充 `automation_operation_log.operation_type`
- `ansible_candidate_matched`
- `ansible_check_mode_executed`
- `ansible_apply_executed`
- `ansible_rollback_executed`
- `ansible_execution_skipped`
- 新增 rollback migration `adr090d_ansible_operation_types_down.sql``run-migration.yml` 會跳過 `_down.sql`
- 新增 `awooop_ansible_audit_service.py`
- 讀取 automation ops 中的 Ansible operation type/tag/backend。
- 暴露 repo 既有 playbook catalog hint。
- 明確標示 `decision_effect=none`,避免把候選 playbook 當成已執行。
- truth-chain `execution.ansible` 現在會顯示:
- `considered` 是否有真實 Ansible audit record。
- `records``audit_contract``candidate_catalog``not_used_reason`
- `incident_timeline_service` 補 Ansible operation type → stage mapping。
**驗證**
- `gitea/main` 已是資安分支祖先
- `python3 -m py_compile scripts/security/*.py` 通過
- `git diff --check gitea/main...HEAD` 通過
- JSON / schema / snapshot parse 通過,`security_supply_chain_contract_manifest_v1` 12 個 contracts path check 通過
- PR diff added lines 未命中本輪敏感 token / credential pattern
- `py_compile`Ansible audit service / truth-chain / incident timeline / truth-chain tests 通過
- `ruff --select F,E9`All checks passed
- `pytest apps/api/tests/test_awooop_truth_chain_service.py apps/api/tests/test_platform_router_order.py apps/api/tests/test_awooop_operator_auth.py -q`13 passed
- `ruby YAML.load_file(".gitea/workflows/run-migration.yml")`ok
- `git diff --check`ok
**下一步**
- 等 PR review 後再決定是否合併;不得直接切 GitHub primary 或啟動 refs sync
- Gitea read-only inventory approval 未批准前private/internal server-side repo list 保持 blocked
- 下一階段仍維持低摩擦 observe-first不做 runtime blocking
**整體進度**
- Wave 0MOMO PostgreSQL backup → AwoooP 失敗通知接線完成並已推版
- T0Truth-chain read-only API 完成、部署、production smoke 完成
- T1Channel Event hardening 完成、部署、production smoke 完成
- T2legacy MCP audit bridge / backfill / truth-chain visibility 完成、部署、production smoke 完成first-class Gateway enforced path 仍待後續 wave。
- T3Ansible first-class audit contract / truth-chain 可見性完成、已部署;尚未把 approval execution path 寫入 Ansible dry-run/check-mode。
- 下一步T3 第二段接 decision / approval execution 的 Ansible check-mode audit row仍不直接 apply。
## 2026-05-12 | Security Supply Chain docs-only contract manifest
**production push 追加**
- Gitea `run-migration` run `1933` 顯示 migration 本體已成功:
- `adr090d_ansible_operation_types.sql` 以 owner fallback 套用成功。
- 但 audit seed 仍失敗,這次不是 `:'commit_sha'`,而是 tools JSON literal 在 unquoted heredoc 下仍保留反斜線:
- `'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb`
- PostgreSQL 回 `invalid input syntax for type json`
- 已修 `.gitea/workflows/run-migration.yml`tools JSON 改為 `'{"psql": 1, "gitea_ci": 1}'::jsonb`
- 已補 production `asset_discovery_run` repair audit row
- `triggered_by=codex:gitea-migration-audit-repair`
- `summary.type=ci_migration_manual_repair`
- `summary.commit_sha=ca80972dc73cb647f8fab3bf9439784c4b8eef7b`
- Production DB constraint 驗證:`automation_operation_log_type_valid` 已包含全部 `ansible_*` operation types。
- CD 部署:
- `07000dae chore(cd): deploy ca80972 [skip ci]`
- API/Web/Worker image 均為 `ca80972dc73cb647f8fab3bf9439784c4b8eef7b`
- rollout success。
- Truth-chain smokeB6C589
- `truth_status=manual_required/blocked`
- `mcp_gateway_total=8`
- `execution.ansible.considered=false`
- `execution.ansible.records=0`
- `not_used_reason=no automation_operation_log row with Ansible operation type, tag, or executor backend for this source`
- `audit_contract.schema_version=ansible_executor_audit_v1`
- Caveat下一個 migration push 仍需 live 驗證 `run-migration` audit seed 是否完全通過;本輪 workflow 修正後沒有新的 migration 觸發可重跑。
**背景**:統帥批准 Kali `192.168.0.112`、開發主機 `192.168.0.111` / `192.168.0.168`、Code Review -> Codex、Gitea -> GitHub 長期遷移納入同一個資安工作項目;同時要求初期不要把資安等級一次拉太高,避免產品、架構與流程變得過度複雜。
**T3 第二段本地實作**
- `awooop_ansible_audit_service.py` 新增 decision audit payload/writer
- 只有 static catalog 有候選 playbook 時才寫 `automation_operation_log`
- operation_type=`ansible_candidate_matched`
- status=`dry_run`
- `input.executor=ansible``check_mode=true``apply_enabled=false``approval_required=true`
- `output.decision_effect=audit_only`
- `decision_manager` 在 auto-execute / manual-approval 分支都排程 best-effort audit write
- 不改 executor。
- 不跑 Ansible。
- 不阻塞決策和 Telegram。
- Docker/container 類 incident 也會命中 Ansible catalog hint讓 B6C589 這類事件後續新 decision 能留下 Ansible candidate audit row。
- 本地驗證:
- `py_compile`pass。
- `ruff --select F,E9`pass。
- `pytest test_awooop_truth_chain_service.py test_platform_router_order.py test_awooop_operator_auth.py -q`14 passed。
- `git diff --check`pass。
- 待推版與 production smoke。
**本次交付**
- 建立 Kali / Code Review / GitHub / Gitea / Codex / AwoooP 的 docs-only security supply chain scaffold。
- 建立 `security_finding_v1``coding_task_v1``source_control_migration_event_v1``gitea_repo_inventory_v1``local_git_remote_inventory_v1``github_target_probe_v1``github_target_decision_v1``github_target_repo_approval_package_v1``local_repo_canonical_probe_v1``git_remote_refs_probe_v1``approval_required_event_v1``security_rollout_policy_v1``security_supply_chain_contract_manifest_v1` schema 草案。
- 產出 Gitea/GitHub refs diff、Gitea public-only inventory、local remote inventory、GitHub target probe、canonical lineage、110 refs probe、repo-by-repo approval package 與 contract manifest snapshot。
- 明確採低摩擦 `observe-first` / `mirror_only`LOW / MEDIUM observation 先 observe / warn只有 read-only token、repo creation、visibility change、refs sync、secret、deploy、primary switch 等高風險動作才進 approval。
## 2026-05-12 | run-migration audit seed 再修正
**邊界**
- 本輪只做文件、schema、read-only scripts 與 redacted snapshots。
- 未建立 repo、未修改 visibility、未同步 refs、未切 GitHub primary、未部署、未碰 runtime enforcement。
- AwoooP 只可 mirror / read-only policy / approval candidate不可把 manifest 當 execution router。
**背景**Gitea `run-migration``Seed asset_discovery_run (audit)` 再次失敗:
```text
ERROR: syntax error at or near ":"
LINE 16: 'commit_sha', :'commit_sha',
```
**修正**
- `.gitea/workflows/run-migration.yml` 不再依賴 `psql``:'commit_sha'` / `:'files_json'` 變數展開。
- 改由 `jq` 先產生完整 `summary` JSON再以 shell-safe SQL literal 寫入 `asset_discovery_run.summary`
- 保留 owner connection fallback只修 audit seed不改 migration apply 流程。
**驗證**
- JSON / schema parse 通過
- `scripts/security/*.py` 可編譯
- `git diff --check` 通過
- 新增 / 修改內容未命中本輪敏感 token / credential pattern
- `ruby -e 'require "yaml"; YAML.load_file(".gitea/workflows/run-migration.yml")'`yaml ok
- 抽出 `Seed asset_discovery_run (audit)` step 後 `bash -n`:通過
- mock `psql` 實跑該 steprendered SQL 已無 `:'...'` psql 變數,並包含 `commit_sha` / `files` JSON
- `git diff --check`:通過
**下一步**
- 等 Gitea read-only inventory approval 被批准後,補 private/internal server-side repo list
- 逐 repo 取得 owner / visibility / canonical 決策
- 對 refs blocked repos 產生 reconcile planGitHub primary 仍保持 blocked
**整體進度**
- Wave 0MOMO PostgreSQL backup → AwoooP 失敗通知接線完成並已推版
- Truth-chain T0read-only truth-chain API 完成、部署、production smoke 完成
- T1Channel Event hardening 完成、部署、production smoke 完成
- T2legacy MCP audit bridge / backfill / truth-chain visibility 完成、部署、production smoke 完成first-class MCP Gateway enforced path 仍待後續 wave。
- 本次CI migration audit seed 紅燈修正完成,待推 Gitea main 觀察下一次 `run-migration`
- 下一步:回到 T3 Ansible declarative executor 盤點與 first-class audit surface。
## 2026-05-12 | Truth-chain T0 read-only API 第一版
@@ -6601,3 +6754,67 @@ gateway_audit_total=0 last_15m=0 bridge_total=0
- 因此目前只能宣稱「T2 bridge 寫入能力已部署並經 rollback smoke 驗證」。
- 尚不能宣稱「所有 MCP / 自建 MCP 都已完全經 AwoooP Gateway 強制治理」;下一段要讓下一個真實 incident / MCP 呼叫自然產生 durable bridge row或把高頻 caller 改成 first-class `McpGateway`
**T2 backfill / truth-chain visibility 追加**
- 新增 `scripts/ops/awooop-mcp-gateway-bridge-backfill-24h.sql`
- 將最近 24h 真實 `mcp_audit_log` 鏡像到 `awooop_mcp_gateway_audit`
-`gate_result.legacy_audit_id` 做 idempotency key。
- bridge row 保留 `policy_enforced=false``not_used_reason`,避免誤判為五閘門已 enforcement。
- production 已執行 backfill
```text
inserted_bridge_rows=1160
gateway_total=1310 bridge_total=1310 last_24h=1276
B6C589_gateway_rows=8 failed=8 success=0
```
- truth-chain API 追加 `gate_result` 欄位,並把 JSONB text 解析回物件,讓 UI 能顯示 bridge reason。
```text
py_compile:
apps/api/src/services/awooop_truth_chain_service.py
apps/api/tests/test_awooop_truth_chain_service.py
# OK
ruff F,E9:
# All checks passed
pytest:
apps/api/tests/test_awooop_truth_chain_service.py
apps/api/tests/test_platform_router_order.py
apps/api/tests/test_awooop_operator_auth.py
# 11 passed
```
**效果**
- `INC-20260512-B6C589` truth-chain 現在不再是 `awooop_mcp_gateway_audit_empty`
- 仍顯示 `manual_required/blocked`,因為 8 個 SSH MCP 都失敗approval/incident 狀態仍矛盾;這是 T5 要處理,不能用 T2 粉飾成自動修復完成。
**production deploy / endpoint smoke 追加(完成)**
```text
Gitea:
1928 CD Pipeline b4d367ee -> success
1929 Code Review b4d367ee -> success
K8s image:
awoooi-api 192.168.0.110:5000/awoooi/api:b4d367eeb463eccda5aec8aa9c90f19897dbd634
awoooi-worker 192.168.0.110:5000/awoooi/api:b4d367eeb463eccda5aec8aa9c90f19897dbd634
awoooi-web 192.168.0.110:5000/awoooi/web:b4d367eeb463eccda5aec8aa9c90f19897dbd634
health:
http://192.168.0.125:32334/api/v1/health -> 200 healthy
Truth-chain:
GET /api/v1/platform/truth-chain/INC-20260512-B6C589?project_id=awoooi -> 200
stage=manual_required status=blocked needs_human=True
blockers=all_evidence_sensors_failed,
approval_resolved_no_action_without_execution,
incident_still_investigating_after_approval
gateway_total=8 legacy_total=8
first_gateway_tool=legacy:ssh_host:ssh_get_nginx_error_log result=failed
gate_schema=legacy_mcp_bridge_v1 policy_enforced=False
not_used_reason=legacy direct provider path; bridge audit only
```

View File

@@ -1883,12 +1883,83 @@ Phase 6 完成後
- T2 bridge image `94d006ea` 已部署CD run `1921` successhealth 200。
- rollback smoke 證明 `record_mcp_call()` 在同一 transaction 內會同時寫 legacy `mcp_audit_log``awooop_mcp_gateway_audit` bridge row且 bridge row 標示 `policy_enforced=false` / `not_used_reason=legacy direct provider path; bridge audit only`rollback 後兩邊皆未污染 production。
- 部署後短觀察窗內沒有自然新 legacy MCP call`legacy_mcp_15m=0`),所以 live `awooop_mcp_gateway_audit` total 仍是 0。T2 bridge capability 已上線,但 T2 全退出條件仍需下一個真實 MCP 呼叫產生 durable row或把高頻 caller 改成 first-class Gateway path。
- 已執行最近 24h 真實 legacy MCP backfill`inserted_bridge_rows=1160`,目前 `awooop_mcp_gateway_audit gateway_total=1310 / bridge_total=1310 / last_24h=1276``INC-20260512-B6C589` 現在 gateway side 可見 8 筆 MCP8 failed / 0 successtruth-chain blocker 移除 `awooop_mcp_gateway_audit_empty`,但仍是 `manual_required/blocked`,因為 evidence sensors 全失敗、NO_ACTION approval 無 execution、incident 仍 investigating。
- truth-chain API 追加回傳 `gate_result`,讓 Operator Console 可直接顯示 `policy_enforced=false``not_used_reason`,避免把 bridge row 誤認為 first-class Gateway enforcement。
- `b4d367ee` 已部署CD run `1928` success。B6C589 endpoint smoke`gateway_total=8 / legacy_total=8`,第一筆 gateway row 顯示 `gate_schema=legacy_mcp_bridge_v1``policy_enforced=False``not_used_reason=legacy direct provider path; bridge audit only`truth status 仍是 `manual_required/blocked`
**仍未宣稱完成**
- 這只是 legacy bridge不是把所有呼叫強制改經 AwoooP GatewayT2 後續仍要把新 MCP caller 收斂到 first-class Gateway path。
---
### 2026-05-12 晚 (台北) — T3 Ansible declarative executor audit surface 第一段
**範圍**
- `automation_operation_log.operation_type` CHECK 追加 Ansible executor audit states
`ansible_candidate_matched` / `ansible_check_mode_executed` /
`ansible_apply_executed` / `ansible_rollback_executed` /
`ansible_execution_skipped`
- 新增 `awooop_ansible_audit_service.py`,把 repo 既有 Ansible playbook catalog 以
read-only 方式暴露給 truth-chain。
- truth-chain `execution.ansible` 改為顯示:
- 是否真的有 `automation_operation_log` Ansible audit record。
- audit contract / required fields。
- static catalog keyword hints`decision_effect=none`,避免把候選 playbook 誤判成已自動修復。
- `incident_timeline_service` 加入 Ansible operation type stage mapping。
**已驗證**
- 本地 `py_compile` / `ruff F,E9` / `git diff --check` 通過。
- `test_awooop_truth_chain_service.py`、router order、operator auth 共 13 passed。
- `run-migration.yml` YAML parse 通過;新增 `_down.sql` 會被既有 workflow skip 規則排除。
**仍未宣稱完成**
- 這不是 Ansible 自動修復執行器接線;目前只建立 first-class audit contract 與 truth-chain 可見性。
- 下一段需把 decision / approval execution path 在「只 dry-run/check-mode」下寫入上述 operation types再談 apply。
**production 追加**
- Gitea `run-migration` run `1933``adr090d_ansible_operation_types.sql` 已成功套用,含 owner fallback。
- 同 run 的 `Seed asset_discovery_run (audit)` 仍失敗;新根因是 unquoted heredoc 下 tools JSON literal 還寫成 `'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb`PostgreSQL 視為非法 JSON。
- 後續修正workflow tools JSON literal 改成 `'{"psql": 1, "gitea_ci": 1}'::jsonb`
- 已補 production `asset_discovery_run` repair audit row`ci_migration_manual_repair` / `commit_sha=ca80972dc73cb647f8fab3bf9439784c4b8eef7b`)。
- Production DB constraint 已確認包含全部 `ansible_*` operation types。
- CD 已部署 `ca80972d` imagedeploy marker `07000dae`API/Web/Worker rollout success。
- B6C589 truth-chain smoke`manual_required/blocked``mcp_gateway_total=8``execution.ansible.considered=false``records=0`、not_used_reason 清楚顯示沒有 Ansible audit record。
- 下一個 migration push 仍需驗證 `run-migration` audit seed live gate因本輪 workflow 修正後未再新增 migration 觸發重跑。
**T3 第二段本地追加**
- `decision_manager` 在 auto-execute / manual-approval 分支新增 best-effort Ansible candidate audit write。
- 僅在 catalog 有候選 playbook 時寫 `automation_operation_log`
`operation_type=ansible_candidate_matched``status=dry_run`
`input.check_mode=true``input.apply_enabled=false`
`output.decision_effect=audit_only`
- 這仍不是 Ansible 執行器;它只讓 truth-chain 能看到 AI decision path 曾考慮 Ansible candidate以及為何未進入 check-mode/apply。
- 本地 `py_compile` / `ruff F,E9` / 14 個 truth-chain/operator/router tests 通過;待推版和 production smoke。
**T3 第二段 production verified2026-05-13 台北)**
- `3799e0db feat(awooop): audit ansible decision candidates` 已推 Gitea mainCode Review run `1936` successCD run `1935` success。
- Deploy marker`90b9ddb7 chore(cd): deploy 3799e0d [skip ci]`
- Production API/Web/Worker image 均為 `3799e0db0d30f29fdc251197634d2fca4c2c67fd`K3s rollout successhealth 200 / `mock_mode=false`
- API pod pure smokeDockerContainerUnhealthy 事件可產生 `ansible_candidate_matched` audit payload`candidate_count=2``check_mode_executed=false`
- Truth-chain smoke
- `INC-20260512-B6C589``manual_required/blocked``mcp_gateway_total=8``execution.ansible.audit_contract=ansible_executor_audit_v1``ansible_candidates=2`
- `7f858956``dedup_or_repeat_updated/pending``repeat_12h=12``outbound_visible=2`
- 邊界:仍未執行 Ansible check-mode / apply / rollbackT3 目前完成的是 first-class candidate audit而不是修復執行器。
**T4 Config Drift fingerprint repeat-state production verified2026-05-13 台北)**
- `5b348774 feat(awooop): expose drift repeat fingerprint` 已推 Gitea mainCode Review run `1938` successCD run `1937` success。
- Deploy marker`3d38039b chore(cd): deploy 5b34877 [skip ci]`
- 新增 `drift_repeat_state_v1`:以 namespace + sorted drift items 建 stable fingerprint不再只靠 HIGH/MEDIUM/INFO counts。
- Truth-chain drift repeat-state 現在回傳 `fingerprint``matching_strategy=namespace_and_stable_items_v1``operator_stage`、matching reports。
- Telegram drift narrator 會在 card body 補:
- `流程: drift_scanned → ai_analyzed → pending_human`
- `重複: 12h 內第 N 次同指紋`
- `指紋: dfp_xxxxx`
- Production `7f858956` smoke`repeat_schema=drift_repeat_state_v1``fingerprint=dfp_02dc625b64784b24``operator_stage=pending_human``repeat_12h=2``outbound_visible=2`
- 重要校正:舊 count-based repeat 看到 12 次,新 stable item fingerprint 證實同一漂移 fingerprint 只有 2 次12 次只能稱為同計數候選,不能稱為同一漂移。
- 邊界T4 只補可觀測與重複判定,不做 auto-adopt / rollback / ignore。
---
### 2026-04-20 晚 (台北) — C1-C4 全流程串接 — Playbook 鏈路保護commit de2d34d
**觸發**:統帥全景盤查 AI 自動化節點後,發現 Playbook 自動修復鏈路有 3 個結構性斷點。

View File

@@ -40,7 +40,7 @@ resources:
images:
- name: 192.168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER
newName: 192.168.0.110:5000/awoooi/api
newTag: 94d006eac88fd65f6efca817eb392a103ec10d3f
newTag: 5b34877429c16c42f0f894eb4d7f0484711fde9b
- name: 192.168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER
newName: 192.168.0.110:5000/awoooi/web
newTag: 94d006eac88fd65f6efca817eb392a103ec10d3f
newTag: 5b34877429c16c42f0f894eb4d7f0484711fde9b

View File

@@ -0,0 +1,69 @@
-- AwoooP T2 MCP Gateway bridge backfill (24h)
-- 2026-05-12 Codex + ogt
--
-- Purpose:
-- Mirror real legacy mcp_audit_log rows into awooop_mcp_gateway_audit so
-- truth-chain can show MCP usage for recent incidents while first-class
-- Gateway migration continues. These rows are explicitly marked as bridge
-- records and policy_enforced=false; they are not proof of five-gate
-- Gateway enforcement.
--
-- Idempotency:
-- gate_result.legacy_audit_id stores the mcp_audit_log.id source key.
-- Re-running this SQL will only insert missing rows.
WITH inserted AS (
INSERT INTO awooop_mcp_gateway_audit (
project_id,
run_id,
trace_id,
agent_id,
tool_name,
input_hash,
output_hash,
gate_result,
result_status,
block_gate,
block_reason,
latency_ms,
created_at
)
SELECT
'awoooi' AS project_id,
NULL::uuid AS run_id,
LEFT(COALESCE(src.incident_id, src.session_id), 128) AS trace_id,
LEFT(COALESCE(src.agent_role, 'legacy-mcp-provider'), 128) AS agent_id,
LEFT('legacy:' || src.mcp_server || ':' || src.tool_name, 128) AS tool_name,
encode(digest(COALESCE(src.input_params::text, 'null'), 'sha256'), 'hex') AS input_hash,
CASE
WHEN src.output_result IS NULL THEN NULL
ELSE encode(digest(src.output_result::text, 'sha256'), 'hex')
END AS output_hash,
jsonb_build_object(
'schema_version', 'legacy_mcp_bridge_v1',
'gateway_path', 'legacy_backfill',
'policy_enforced', false,
'not_used_reason', 'legacy direct provider path; bridge audit only',
'legacy_audit_id', src.id::text,
'legacy_mcp_server', src.mcp_server,
'legacy_tool_name', src.tool_name,
'flywheel_node', src.flywheel_node
) AS gate_result,
CASE WHEN src.success IS TRUE THEN 'success' ELSE 'failed' END AS result_status,
NULL::smallint AS block_gate,
CASE WHEN src.success IS TRUE THEN NULL ELSE LEFT(src.error_message, 256) END AS block_reason,
src.duration_ms AS latency_ms,
src.created_at
FROM mcp_audit_log src
WHERE src.created_at > NOW() - INTERVAL '24 hours'
AND NOT EXISTS (
SELECT 1
FROM awooop_mcp_gateway_audit dst
WHERE dst.project_id = 'awoooi'
AND dst.gate_result->>'schema_version' = 'legacy_mcp_bridge_v1'
AND dst.gate_result->>'legacy_audit_id' = src.id::text
)
RETURNING call_id
)
SELECT COUNT(*) AS inserted_bridge_rows
FROM inserted;