#!/usr/bin/env python3 """Static review inventory for manual DB scripts before AwoooP RLS enablement. This is intentionally not a runtime gate. It separates: - BLOCKED: secrets or hardcoded connection strings in scripts. - REVIEW: manual/operator scripts that need a migration role or explicit review. - PASS: scripts that already set app.project_id or use get_db_context(). """ from __future__ import annotations import argparse import re from dataclasses import dataclass from pathlib import Path ROOT = Path(__file__).resolve().parents[2] SCAN_ROOTS = ( ROOT / "apps/api/scripts", ROOT / "scripts", ) SKIP_PATHS = { "scripts/ops/awooop-rls-access-audit.py", "scripts/ops/awooop-rls-manual-script-audit.py", } SCRIPT_SUFFIXES = {".py", ".sh", ".sql"} HARDCODED_DB_URL_RE = re.compile( r"postgresql(?:\+asyncpg)?://[^:'\"\s/]+:[^@'\"\s]+@", re.IGNORECASE, ) DIRECT_DB_RE = re.compile( r"\b(asyncpg\.connect|create_async_engine|psql\b|pg_dump\b|DATABASE_URL|PGPASSWORD)\b" ) PROJECT_CONTEXT_RE = re.compile( r"set_config\('app\.project_id'|SET\s+(?:LOCAL\s+)?app\.project_id|get_db_context\(", re.IGNORECASE, ) MIGRATION_HINT_RE = re.compile( r"\b(ALTER\s+TABLE|CREATE\s+TABLE|CREATE\s+INDEX|CREATE\s+EXTENSION|DROP\s+POLICY|ENABLE\s+ROW\s+LEVEL\s+SECURITY)\b", re.IGNORECASE, ) TENANT_TABLES = ( "incidents", "knowledge_entries", "playbooks", "audit_logs", "budget_ledger", "approval_records", "notification_outcomes", "rag_chunks", "playbook_embeddings", "awooop_projects", "awooop_contract_revisions", "awooop_run_state", "awooop_mcp_tool_registry", "awooop_mcp_grants", "awooop_mcp_credential_refs", "awooop_mcp_gateway_audit", "awooop_conversation_event", "awooop_outbound_message", ) OPERATOR_REVIEW_PATHS = { "apps/api/scripts/awooop_phase1_batch1_backfill.py": "RLS/project_id bootstrap backfill; run only with migration/operator role.", "apps/api/scripts/run_migration.py": "DDL migration script; run only with migration/operator role.", "scripts/ops/awooop_rls_preflight.py": "Read-only preflight that probes app.project_id inside the API pod.", "scripts/ops/awooop-rls-role-bootstrap.sql": "Role bootstrap SQL; must be reviewed and run by postgres/CREATEROLE operator.", "scripts/sync_dev_db.py": "Dev DB schema sync; use DEV_DATABASE_URL and run only against non-production DB.", } @dataclass(frozen=True) class Finding: severity: str path: str reason: str def rel(path: Path) -> str: return path.relative_to(ROOT).as_posix() def iter_script_paths() -> list[Path]: paths: list[Path] = [] for root in SCAN_ROOTS: if not root.exists(): continue for path in root.rglob("*"): if not path.is_file() or path.suffix not in SCRIPT_SUFFIXES: continue if rel(path) in SKIP_PATHS: continue paths.append(path) return sorted(set(paths)) def classify(path: Path) -> list[Finding]: text = path.read_text(encoding="utf-8", errors="replace") path_rel = rel(path) findings: list[Finding] = [] hardcoded_db_url = False for line in text.splitlines(): if "" in line or ":password@" in line: continue if HARDCODED_DB_URL_RE.search(line): hardcoded_db_url = True break if hardcoded_db_url: findings.append( Finding( "BLOCKED", path_rel, "hardcoded PostgreSQL URL with inline credentials; move to environment/secret store.", ) ) if not DIRECT_DB_RE.search(text): return findings touches_tenant_table = any(re.search(rf"\b{re.escape(table)}\b", text) for table in TENANT_TABLES) has_project_context = PROJECT_CONTEXT_RE.search(text) is not None if path_rel in OPERATOR_REVIEW_PATHS: findings.append(Finding("REVIEW", path_rel, OPERATOR_REVIEW_PATHS[path_rel])) elif touches_tenant_table and not has_project_context: findings.append( Finding( "REVIEW", path_rel, "direct DB access touches tenant tables without app.project_id; add project context or use operator role.", ) ) elif touches_tenant_table and has_project_context: findings.append(Finding("PASS", path_rel, "tenant table access sets app.project_id or uses get_db_context.")) elif MIGRATION_HINT_RE.search(text): findings.append(Finding("REVIEW", path_rel, "DDL/operator script; verify role and maintenance window before use.")) else: findings.append(Finding("PASS", path_rel, "no tenant table access detected in direct DB usage.")) return findings def main() -> int: parser = argparse.ArgumentParser(description="Audit manual scripts for AwoooP RLS readiness.") parser.add_argument("--show-pass", action="store_true", help="Print PASS findings.") parser.add_argument("--strict-review", action="store_true", help="Exit non-zero when REVIEW findings exist.") args = parser.parse_args() findings: list[Finding] = [] for path in iter_script_paths(): findings.extend(classify(path)) blocked = [f for f in findings if f.severity == "BLOCKED"] review = [f for f in findings if f.severity == "REVIEW"] passed = [f for f in findings if f.severity == "PASS"] print( "AwoooP RLS manual script audit: " f"BLOCKED={len(blocked)} REVIEW={len(review)} PASS={len(passed)}" ) for item in blocked + review: print(f"{item.severity} {item.path}") print(f" reason: {item.reason}") if args.show_pass: for item in passed: print(f"{item.severity} {item.path}") print(f" reason: {item.reason}") if blocked: return 2 if review and args.strict_review: return 1 return 0 if __name__ == "__main__": raise SystemExit(main())