awoooi/scripts/ops/awooop-rls-manual-script-audit.py

#!/usr/bin/env python3
"""Static review inventory for manual DB scripts before AwoooP RLS enablement.

This is intentionally not a runtime gate. It separates:
- BLOCKED: secrets or hardcoded connection strings in scripts.
- REVIEW: manual/operator scripts that need a migration role or explicit review.
- PASS: scripts that already set app.project_id or use get_db_context().
"""

from __future__ import annotations

import argparse
import re
from dataclasses import dataclass
from pathlib import Path


ROOT = Path(__file__).resolve().parents[2]
SCAN_ROOTS = (
    ROOT / "apps/api/scripts",
    ROOT / "scripts",
)

SKIP_PATHS = {
    "scripts/ops/awooop-rls-access-audit.py",
    "scripts/ops/awooop-rls-manual-script-audit.py",
}

SCRIPT_SUFFIXES = {".py", ".sh", ".sql"}

HARDCODED_DB_URL_RE = re.compile(
    r"postgresql(?:\+asyncpg)?://[^:'\"\s/]+:[^@'\"\s]+@",
    re.IGNORECASE,
)
DIRECT_DB_RE = re.compile(
    r"\b(asyncpg\.connect|create_async_engine|psql\b|pg_dump\b|DATABASE_URL|PGPASSWORD)\b"
)
PROJECT_CONTEXT_RE = re.compile(
    r"set_config\('app\.project_id'|SET\s+(?:LOCAL\s+)?app\.project_id|get_db_context\(",
    re.IGNORECASE,
)
MIGRATION_HINT_RE = re.compile(
    r"\b(ALTER\s+TABLE|CREATE\s+TABLE|CREATE\s+INDEX|CREATE\s+EXTENSION|DROP\s+POLICY|ENABLE\s+ROW\s+LEVEL\s+SECURITY)\b",
    re.IGNORECASE,
)

TENANT_TABLES = (
    "incidents",
    "knowledge_entries",
    "playbooks",
    "audit_logs",
    "budget_ledger",
    "approval_records",
    "notification_outcomes",
    "rag_chunks",
    "playbook_embeddings",
    "awooop_projects",
    "awooop_contract_revisions",
    "awooop_run_state",
    "awooop_mcp_tool_registry",
    "awooop_mcp_grants",
    "awooop_mcp_credential_refs",
    "awooop_mcp_gateway_audit",
    "awooop_conversation_event",
    "awooop_outbound_message",
)

OPERATOR_REVIEW_PATHS = {
    "apps/api/scripts/awooop_phase1_batch1_backfill.py":
        "RLS/project_id bootstrap backfill; run only with migration/operator role.",
    "apps/api/scripts/run_migration.py":
        "DDL migration script; run only with migration/operator role.",
    "scripts/ops/awooop_rls_preflight.py":
        "Read-only preflight that probes app.project_id inside the API pod.",
    "scripts/ops/awooop-rls-role-bootstrap.sql":
        "Role bootstrap SQL; must be reviewed and run by postgres/CREATEROLE operator.",
    "scripts/sync_dev_db.py":
        "Dev DB schema sync; use DEV_DATABASE_URL and run only against non-production DB.",
}


@dataclass(frozen=True)
class Finding:
    severity: str
    path: str
    reason: str


def rel(path: Path) -> str:
    return path.relative_to(ROOT).as_posix()


def iter_script_paths() -> list[Path]:
    paths: list[Path] = []
    for root in SCAN_ROOTS:
        if not root.exists():
            continue
        for path in root.rglob("*"):
            if not path.is_file() or path.suffix not in SCRIPT_SUFFIXES:
                continue
            if rel(path) in SKIP_PATHS:
                continue
            paths.append(path)
    return sorted(set(paths))


def classify(path: Path) -> list[Finding]:
    text = path.read_text(encoding="utf-8", errors="replace")
    path_rel = rel(path)
    findings: list[Finding] = []

    hardcoded_db_url = False
    for line in text.splitlines():
        if "<password>" in line or ":password@" in line:
            continue
        if HARDCODED_DB_URL_RE.search(line):
            hardcoded_db_url = True
            break

    if hardcoded_db_url:
        findings.append(
            Finding(
                "BLOCKED",
                path_rel,
                "hardcoded PostgreSQL URL with inline credentials; move to environment/secret store.",
            )
        )

    if not DIRECT_DB_RE.search(text):
        return findings

    touches_tenant_table = any(re.search(rf"\b{re.escape(table)}\b", text) for table in TENANT_TABLES)
    has_project_context = PROJECT_CONTEXT_RE.search(text) is not None

    if path_rel in OPERATOR_REVIEW_PATHS:
        findings.append(Finding("REVIEW", path_rel, OPERATOR_REVIEW_PATHS[path_rel]))
    elif touches_tenant_table and not has_project_context:
        findings.append(
            Finding(
                "REVIEW",
                path_rel,
                "direct DB access touches tenant tables without app.project_id; add project context or use operator role.",
            )
        )
    elif touches_tenant_table and has_project_context:
        findings.append(Finding("PASS", path_rel, "tenant table access sets app.project_id or uses get_db_context."))
    elif MIGRATION_HINT_RE.search(text):
        findings.append(Finding("REVIEW", path_rel, "DDL/operator script; verify role and maintenance window before use."))
    else:
        findings.append(Finding("PASS", path_rel, "no tenant table access detected in direct DB usage."))

    return findings


def main() -> int:
    parser = argparse.ArgumentParser(description="Audit manual scripts for AwoooP RLS readiness.")
    parser.add_argument("--show-pass", action="store_true", help="Print PASS findings.")
    parser.add_argument("--strict-review", action="store_true", help="Exit non-zero when REVIEW findings exist.")
    args = parser.parse_args()

    findings: list[Finding] = []
    for path in iter_script_paths():
        findings.extend(classify(path))

    blocked = [f for f in findings if f.severity == "BLOCKED"]
    review = [f for f in findings if f.severity == "REVIEW"]
    passed = [f for f in findings if f.severity == "PASS"]

    print(
        "AwoooP RLS manual script audit: "
        f"BLOCKED={len(blocked)} REVIEW={len(review)} PASS={len(passed)}"
    )

    for item in blocked + review:
        print(f"{item.severity} {item.path}")
        print(f"  reason: {item.reason}")

    if args.show_pass:
        for item in passed:
            print(f"{item.severity} {item.path}")
            print(f"  reason: {item.reason}")

    if blocked:
        return 2
    if review and args.strict_review:
        return 1
    return 0


if __name__ == "__main__":
    raise SystemExit(main())