fix(awooop): cooldown ansible check-mode transport blockers
Some checks failed
CD Pipeline / tests (push) Successful in 1m25s
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / build-and-deploy (push) Successful in 4m56s
CD Pipeline / post-deploy-checks (push) Has been cancelled

This commit is contained in:
Your Name
2026-05-31 13:08:39 +08:00
parent 46cc56c3ce
commit e45e52e526
5 changed files with 143 additions and 1 deletions

View File

@@ -639,6 +639,15 @@ class Settings(BaseSettings):
le=900,
description="Delay before the check-mode worker first tick after API startup.",
)
AWOOOP_ANSIBLE_CHECK_MODE_TRANSPORT_COOLDOWN_SECONDS: int = Field(
default=21_600,
ge=300,
le=86_400,
description=(
"Cooldown after transport-level check-mode blockers such as "
"forced-command repair SSH denial."
),
)
# ==========================================================================
# 統帥鐵律:禁止 SQLite (AWOOOI 憲法)

View File

@@ -31,6 +31,7 @@ _SAFE_HOST_RE = re.compile(r"^[A-Za-z0-9_.-]+$")
_PLAYBOOK_PREFIX = Path("infra/ansible/playbooks")
_STDOUT_LIMIT = 20_000
_STDERR_LIMIT = 12_000
FORCED_COMMAND_BLOCKER = "ansible_repair_ssh_forced_command_denies_ansible_bootstrap"
@dataclass(frozen=True)
@@ -80,6 +81,14 @@ def _json_loads(value: Any) -> dict[str, Any]:
return {}
def detect_ansible_transport_blockers(*values: Any) -> list[str]:
combined = " ".join(str(value or "") for value in values)
blockers: list[str] = []
if "REPAIR_DENIED:invalid_command" in combined:
blockers.append(FORCED_COMMAND_BLOCKER)
return blockers
def _playbook_roots(module_path: Path | None = None) -> list[Path]:
resolved_module_path = (module_path or Path(__file__)).resolve()
return [
@@ -387,6 +396,44 @@ async def claim_pending_check_modes(
return claims
async def recent_ansible_transport_blockers(
*,
project_id: str = "awoooi",
cooldown_seconds: int | None = None,
) -> list[str]:
"""Return transport blockers observed from recent failed check-mode rows."""
cooldown = cooldown_seconds or settings.AWOOOP_ANSIBLE_CHECK_MODE_TRANSPORT_COOLDOWN_SECONDS
async with get_db_context(project_id) as db:
result = await db.execute(
text("""
SELECT
coalesce(output::text, '') AS output_text,
coalesce(dry_run_result::text, '') AS dry_run_text,
coalesce(error, '') AS error_text,
coalesce(stderr_feed_back, '') AS stderr_text
FROM automation_operation_log
WHERE operation_type = 'ansible_check_mode_executed'
AND status = 'failed'
AND created_at >= NOW() - CAST(:cooldown AS interval)
ORDER BY created_at DESC
LIMIT 20
"""),
{"cooldown": f"{max(60, cooldown)} seconds"},
)
blockers: set[str] = set()
for row in result.mappings().all():
blockers.update(
detect_ansible_transport_blockers(
row.get("output_text"),
row.get("dry_run_text"),
row.get("error_text"),
row.get("stderr_text"),
)
)
return sorted(blockers)
async def _insert_skipped_candidate(
db: Any,
*,
@@ -512,6 +559,10 @@ async def run_pending_check_modes_once(
if blockers:
logger.warning("ansible_check_mode_runtime_blocked", blockers=blockers)
return {"claimed": 0, "completed": 0, "failed": 0, "blockers": blockers}
transport_blockers = await recent_ansible_transport_blockers(project_id=project_id)
if transport_blockers:
logger.warning("ansible_check_mode_transport_blocked", blockers=transport_blockers)
return {"claimed": 0, "completed": 0, "failed": 0, "blockers": transport_blockers}
claims = await claim_pending_check_modes(project_id=project_id, limit=limit)
completed = 0

View File

@@ -21,6 +21,7 @@ import structlog
from sqlalchemy import text
from src.db.base import get_db_context
from src.services.awooop_ansible_check_mode_service import detect_ansible_transport_blockers
from src.services.awooop_ansible_audit_service import build_ansible_truth
from src.services.drift_repeat_state import build_drift_repeat_state
@@ -722,6 +723,28 @@ def _execution_backend_summary(records: list[dict[str, Any]]) -> dict[str, Any]:
return summary
def _ansible_observed_runtime_blockers(records: list[dict[str, Any]]) -> list[str]:
blockers: set[str] = set()
for record in records:
execution = record.get("execution") if isinstance(record.get("execution"), dict) else {}
ansible = execution.get("ansible") if isinstance(execution.get("ansible"), dict) else {}
ansible_records = ansible.get("records") if isinstance(ansible.get("records"), list) else []
for row in ansible_records:
if not isinstance(row, dict):
continue
if str(row.get("operation_type") or "") != "ansible_check_mode_executed":
continue
dry_run_result = row.get("dry_run_result") if isinstance(row.get("dry_run_result"), dict) else {}
blockers.update(
detect_ansible_transport_blockers(
row.get("error"),
dry_run_result.get("stdout_tail"),
dry_run_result.get("stderr_tail"),
)
)
return sorted(blockers)
def _ansible_playbook_roots(module_path: Path | None = None) -> list[Path]:
resolved_module_path = (module_path or Path(__file__)).resolve()
return [
@@ -875,6 +898,15 @@ def summarize_automation_quality_records(
key=lambda row: (-int(row["total"]), str(row["gate"])),
)
ansible_runtime = _ansible_runtime_readiness()
observed_ansible_blockers = _ansible_observed_runtime_blockers(records)
if observed_ansible_blockers:
ansible_runtime["observed_transport_blockers"] = observed_ansible_blockers
ansible_runtime["blockers"] = sorted(
set(ansible_runtime.get("blockers") or []) | set(observed_ansible_blockers)
)
ansible_runtime["can_run_check_mode"] = False
return {
"schema_version": "automation_quality_summary_v1",
"project_id": project_id,
@@ -888,7 +920,7 @@ def summarize_automation_quality_records(
"by_verdict": by_verdict,
"gate_failures": failing_gates,
"execution_backend_summary": _execution_backend_summary(records),
"ansible_runtime": _ansible_runtime_readiness(),
"ansible_runtime": ansible_runtime,
"examples": examples[:25],
"production_claim": {
"can_claim_full_auto_repair": evaluated_total > 0 and verified_total == evaluated_total,

View File

@@ -12,6 +12,7 @@ from src.services.awooop_ansible_audit_service import (
from src.services.awooop_ansible_check_mode_service import (
build_ansible_check_mode_claim_input,
build_ansible_check_mode_command,
detect_ansible_transport_blockers,
)
from src.services.awooop_truth_chain_service import (
_ansible_playbook_roots,
@@ -976,6 +977,14 @@ def test_ansible_check_mode_command_uses_check_diff_and_repair_ssh(tmp_path: Pat
assert "apply" not in " ".join(spec.command)
def test_ansible_transport_blocker_detects_repair_forced_command_denial() -> None:
blockers = detect_ansible_transport_blockers(
"fatal: host unreachable REPAIR_DENIED:invalid_command",
)
assert blockers == ["ansible_repair_ssh_forced_command_denies_ansible_bootstrap"]
def test_execution_backend_summary_subtracts_completed_check_mode_parent() -> None:
summary = _execution_backend_summary([
{
@@ -1006,3 +1015,42 @@ def test_execution_backend_summary_subtracts_completed_check_mode_parent() -> No
assert summary["ansible_check_mode_total"] == 1
assert summary["ansible_pending_check_mode_total"] == 0
def test_quality_summary_marks_forced_command_denial_as_runtime_blocker() -> None:
summary = summarize_automation_quality_records(
project_id="awoooi",
window_hours=24,
limit=20,
records=[
{
"incident": {"incident_id": "INC-1", "alertname": "DockerContainerUnhealthy"},
"truth_status": {},
"automation_quality": {"applicable": True, "score": 50, "verdict": "observed"},
"execution": {
"automation_operation_log": [],
"auto_repair_executions": [],
"ansible": {
"considered": True,
"candidate_catalog": {"candidates": [{"catalog_id": "ansible:110-devops"}]},
"records": [
{
"op_id": "check-1",
"operation_type": "ansible_check_mode_executed",
"status": "failed",
"dry_run_result": {
"stdout_tail": "REPAIR_DENIED:invalid_command",
},
}
],
},
},
}
],
)
assert summary["ansible_runtime"]["can_run_check_mode"] is False
assert (
"ansible_repair_ssh_forced_command_denies_ansible_bootstrap"
in summary["ansible_runtime"]["blockers"]
)

View File

@@ -111,6 +111,8 @@ spec:
value: "180"
- name: AWOOOP_ANSIBLE_CHECK_MODE_STARTUP_SLEEP_SECONDS
value: "120"
- name: AWOOOP_ANSIBLE_CHECK_MODE_TRANSPORT_COOLDOWN_SECONDS
value: "21600"
# 2026-04-05 Claude Code: Sprint 3 — 掛載 SSH key 供 HostRepairAgent 使用
volumeMounts:
- name: repair-ssh-key