#!/usr/bin/env python3 """Static RLS access-path audit for AWOOOI API runtime code. The goal is narrow: find production runtime DB access that may bypass get_db()/get_db_context() and therefore miss SET LOCAL app.project_id. It is intentionally conservative and read-only. """ from __future__ import annotations import argparse import re from dataclasses import dataclass from pathlib import Path ROOT = Path(__file__).resolve().parents[2] SRC_ROOT = ROOT / "apps/api/src" @dataclass(frozen=True) class Finding: severity: str path: Path line: int rule: str text: str reason: str @dataclass(frozen=True) class AllowRule: path: str rule: str text_pattern: re.Pattern[str] reason: str RULES: list[tuple[str, re.Pattern[str]]] = [ ("session_factory", re.compile(r"\bget_session_factory\s*\(")), ("create_async_engine", re.compile(r"\bcreate_async_engine\s*\(")), ("asyncpg_connect", re.compile(r"\basyncpg\.connect\s*\(")), ("settings_database_url", re.compile(r"\bsettings\.DATABASE_URL\b")), ("env_database_url", re.compile(r"os\.environ(?:\.get)?\([\"']DATABASE_URL[\"']|os\.environ\[[\"']DATABASE_URL[\"']")), ] ALLOW_RULES: tuple[AllowRule, ...] = ( AllowRule( "apps/api/src/db/base.py", "settings_database_url", re.compile(r"\bdatabase_url\s*=\s*settings\.DATABASE_URL\b"), "DB engine owner reads DATABASE_URL and sets RLS context in get_db/get_db_context.", ), AllowRule( "apps/api/src/db/base.py", "create_async_engine", re.compile(r"\b_engine\s*=\s*create_async_engine\("), "DB engine owner creates the shared async engine.", ), AllowRule( "apps/api/src/db/base.py", "session_factory", re.compile(r"\bdef\s+get_session_factory\("), "Factory definition, not a call-site bypass.", ), AllowRule( "apps/api/src/db/base.py", "session_factory", re.compile(r"\bfactory\s*=\s*get_session_factory\(\)"), "get_db/get_db_context wrap factory and set app.project_id.", ), AllowRule( "apps/api/src/routes/health.py", "settings_database_url", re.compile(r"\bdb_url\s*=\s*settings\.DATABASE_URL\.replace\("), "Health check parses DATABASE_URL for SELECT 1 only.", ), AllowRule( "apps/api/src/routes/health.py", "asyncpg_connect", re.compile(r"\basyncpg\.connect\(db_url\)"), "Health check raw asyncpg SELECT 1 does not read tenant tables.", ), AllowRule( "apps/api/src/main.py", "settings_database_url", re.compile(r"\bdb_url\s*=\s*settings\.DATABASE_URL\b"), "Startup logs sanitized DB host suffix after init_db.", ), AllowRule( "apps/api/src/workers/signal_worker.py", "settings_database_url", re.compile(r"\bdatabase_url=settings\.DATABASE_URL\.split\(\"@\"\)\[-1\]"), "Structured log uses redacted DATABASE_URL suffix only.", ), AllowRule( "apps/api/src/services/incident_approval_service.py", "session_factory", re.compile(r"\bsession_factory=get_session_factory\(\),"), "IncidentApprovalService injects UnitOfWork; UnitOfWork now sets app.project_id.", ), ) def classify(path: Path, rule: str, line_text: str) -> tuple[str, str]: rel = path.relative_to(ROOT).as_posix() for allow in ALLOW_RULES: if allow.path == rel and allow.rule == rule and allow.text_pattern.search(line_text): return "ALLOW", allow.reason return "BLOCKED", "Runtime DB access must set app.project_id through get_db/get_db_context or UnitOfWork." def scan() -> list[Finding]: findings: list[Finding] = [] for path in sorted(SRC_ROOT.rglob("*.py")): try: lines = path.read_text(encoding="utf-8").splitlines() except UnicodeDecodeError: lines = path.read_text(errors="replace").splitlines() for idx, line in enumerate(lines, start=1): for rule, pattern in RULES: if not pattern.search(line): continue severity, reason = classify(path, rule, line) findings.append( Finding( severity=severity, path=path.relative_to(ROOT), line=idx, rule=rule, text=line.strip(), reason=reason, ) ) return findings def main() -> int: parser = argparse.ArgumentParser(description="Audit API runtime DB access paths for RLS readiness.") parser.add_argument("--show-allowed", action="store_true", help="Print allowed findings too.") args = parser.parse_args() findings = scan() blocked = [item for item in findings if item.severity == "BLOCKED"] allowed = [item for item in findings if item.severity == "ALLOW"] print(f"AwoooP RLS access audit: BLOCKED={len(blocked)} ALLOW={len(allowed)}") for item in blocked: print(f"{item.severity} {item.path}:{item.line} [{item.rule}] {item.text}") print(f" reason: {item.reason}") if args.show_allowed: for item in allowed: print(f"{item.severity} {item.path}:{item.line} [{item.rule}] {item.text}") print(f" reason: {item.reason}") return 2 if blocked else 0 if __name__ == "__main__": raise SystemExit(main())