From 8c4dc7a5a855bc0c64c009ac8d65eb2916f26834 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 12 May 2026 20:23:22 +0800 Subject: [PATCH] =?UTF-8?q?chore(rls):=20=E6=96=B0=E5=A2=9E=20manual=20scr?= =?UTF-8?q?ipt=20gate=20=E8=88=87=20canary=20wave1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../scripts/awooop_phase1_batch1_backfill.py | 2 +- apps/api/scripts/reembed_bge_m3.py | 2 + apps/api/scripts/run_migration.py | 2 +- docs/LOGBOOK.md | 47 +++++ docs/runbooks/AWOOOP-RLS-CANARY-WAVE1.md | 78 +++++++ docs/runbooks/AWOOOP-RLS-MANUAL-SCRIPTS.md | 55 +++++ scripts/backfill_km_from_approvals.py | 4 + scripts/batch_vectorize_km.py | 12 +- scripts/bootstrap_prod.sh | 4 +- scripts/cold_start_playbooks.py | 2 + ...rls-canary-wave1-empty-tables-rollback.sql | 34 ++++ .../awooop-rls-canary-wave1-empty-tables.sql | 128 ++++++++++++ scripts/ops/awooop-rls-manual-script-audit.py | 191 ++++++++++++++++++ scripts/sync_dev_db.py | 7 +- .../verify/verify_telegram_dedup_b3a0f0d7.sh | 3 + 15 files changed, 562 insertions(+), 9 deletions(-) create mode 100644 docs/runbooks/AWOOOP-RLS-CANARY-WAVE1.md create mode 100644 docs/runbooks/AWOOOP-RLS-MANUAL-SCRIPTS.md create mode 100644 scripts/ops/awooop-rls-canary-wave1-empty-tables-rollback.sql create mode 100644 scripts/ops/awooop-rls-canary-wave1-empty-tables.sql create mode 100755 scripts/ops/awooop-rls-manual-script-audit.py diff --git a/apps/api/scripts/awooop_phase1_batch1_backfill.py b/apps/api/scripts/awooop_phase1_batch1_backfill.py index 9cc13f29..8863c9e1 100644 --- a/apps/api/scripts/awooop_phase1_batch1_backfill.py +++ b/apps/api/scripts/awooop_phase1_batch1_backfill.py @@ -9,7 +9,7 @@ AwoooP Phase 1 Batch 1 回填腳本 awooop_phase1_batch1_rls_2026-05-04.sql Step A(ADD COLUMN nullable)已執行 執行方式: - export DATABASE_URL="postgresql+asyncpg://awoooi:@192.168.0.188:5432/awoooi_prod" + 從 secret manager / operator vault 設定 DATABASE_URL,禁止在指令或檔案中寫入 URL。 cd apps/api && python scripts/awooop_phase1_batch1_backfill.py 2026-05-04 ogt + Claude Sonnet 4.6(ADR-118 Batch 1 C-3 修正) diff --git a/apps/api/scripts/reembed_bge_m3.py b/apps/api/scripts/reembed_bge_m3.py index 15d7688d..ae3ce589 100644 --- a/apps/api/scripts/reembed_bge_m3.py +++ b/apps/api/scripts/reembed_bge_m3.py @@ -37,6 +37,7 @@ logging = structlog.get_logger(__name__) OLLAMA_URL = os.getenv("OLLAMA_URL", "http://34.143.170.20:11434") EMBEDDING_MODEL = "bge-m3:latest" EXPECTED_DIM = 1024 +PROJECT_ID = os.getenv("AWOOOP_PROJECT_ID", "awoooi") async def embed_text(client: httpx.AsyncClient, text: str) -> list[float]: @@ -162,6 +163,7 @@ async def main(dry_run: bool, batch_size: int) -> None: conn = await asyncpg.connect(database_url) try: + await conn.execute("SELECT set_config('app.project_id', $1, FALSE)", PROJECT_ID) # 統計待嵌入筆數 rag_null = await conn.fetchval("SELECT COUNT(*) FROM rag_chunks WHERE embedding IS NULL") pb_null = await conn.fetchval("SELECT COUNT(*) FROM playbook_embeddings WHERE embedding IS NULL") diff --git a/apps/api/scripts/run_migration.py b/apps/api/scripts/run_migration.py index 5314eb6e..4d16edee 100644 --- a/apps/api/scripts/run_migration.py +++ b/apps/api/scripts/run_migration.py @@ -15,7 +15,7 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import create_async_engine # 2026-04-22 ogt: 移除硬碼 changeme,改為讀取環境變數(強制要求設定)。 -# 執行前: export DATABASE_URL="postgresql+asyncpg://awoooi:@192.168.0.188:5432/awoooi_prod" +# 執行前: 從 secret manager / operator vault 設定 DATABASE_URL,禁止在指令或檔案中寫入 URL。 DATABASE_URL = os.environ["DATABASE_URL"] MIGRATION_SQLS = [ diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 2134d451..8309bb75 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,50 @@ +## 2026-05-12 | RLS Manual Script Gate 與 Canary Wave1 套件 + +**背景**:API runtime DB access path 已收斂後,下一個風險是人工腳本在 RLS fail-closed 後直接用 `DATABASE_URL` 讀寫 tenant tables;同時需要第一批低風險 RLS policy 套件,但不可直接熱開高流量表。 + +**manual scripts 收斂**: +- 新增 `scripts/ops/awooop-rls-manual-script-audit.py`: + - 掃描 `apps/api/scripts/` 與 top-level `scripts/` 中的直接 DB access、硬編碼 PostgreSQL URL、tenant table access。 + - `BLOCKED` 表示 secrets/inline credential 類問題;`REVIEW` 表示 migration/operator path;`PASS` 表示已設 project context 或非 tenant DB 操作。 +- 移除/避免腳本中的 inline DB URL: + - `scripts/sync_dev_db.py` 改讀 `DEV_DATABASE_URL`,不再含硬編碼 dev DB URL。 + - `scripts/bootstrap_prod.sh` 產生 Secret 時不再提供 `DATABASE_URL` / `REDIS_URL` fallback。 + - `apps/api/scripts/run_migration.py`、`apps/api/scripts/awooop_phase1_batch1_backfill.py` 文件範例不再寫出 PostgreSQL URL。 +- 補上 direct `asyncpg` 腳本的 session-level `app.project_id`: + - `apps/api/scripts/reembed_bge_m3.py` + - `scripts/backfill_km_from_approvals.py` + - `scripts/batch_vectorize_km.py` + - `scripts/cold_start_playbooks.py` + - `scripts/verify/verify_telegram_dedup_b3a0f0d7.sh` +- 新增 `docs/runbooks/AWOOOP-RLS-MANUAL-SCRIPTS.md` 記錄 operator rule 與現況。 + +**Canary Wave1 套件**: +- 新增 apply / rollback SQL: + - `scripts/ops/awooop-rls-canary-wave1-empty-tables.sql` + - `scripts/ops/awooop-rls-canary-wave1-empty-tables-rollback.sql` +- 新增 `docs/runbooks/AWOOOP-RLS-CANARY-WAVE1.md`。 +- Wave1 只納入 live preflight 顯示 `total_rows=0` 的表: + - `awooop_contract_revisions` + - `awooop_conversation_event` + - `awooop_mcp_credential_refs` + - `awooop_mcp_gateway_audit` + - `awooop_mcp_grants` + - `budget_ledger` +- SQL 內建防呆:target 不存在、缺 `project_id`、有 NULL project_id、或 row count 已非 0 都會 abort;policy 為 fail-closed,無 NULL / 空字串 bypass。 + +**驗證**: +- `python3 scripts/ops/awooop-rls-manual-script-audit.py --show-pass` → `BLOCKED=0 REVIEW=5 PASS=13`。 +- `python3 scripts/ops/awooop-rls-access-audit.py` → `BLOCKED=0 ALLOW=10`。 +- `python3 -m py_compile` 對修改過的 Python 腳本與 audit script → passed。 +- `bash -n scripts/bootstrap_prod.sh scripts/verify/verify_telegram_dedup_b3a0f0d7.sh` → passed。 +- `rg` 檢查 scripts 中 inline PostgreSQL credential URL → no matches。 +- `scripts/ops/awooop-rls-preflight.sh --exact-counts` → 仍為 `PASS=7 WARN=0 BLOCKED=1`;六張 wave1 canary 表仍為 `total_rows=0 null_project_id_rows=0`。 +- 本輪未執行 production RLS apply;只產出 staged apply / rollback 套件。 + +**下一步**: +- 人工 review `AWOOOP-RLS-CANARY-WAVE1.md`,確認維護窗口與 operator role。 +- 若批准 production apply,先重跑三個 gate:runtime access audit、manual script audit、RLS preflight exact counts;再執行 wave1 SQL,隨後 health + preflight 驗證。 + ## 2026-05-12 | RLS Access Path Audit 收斂 **背景**:RLS role bootstrap 已完成後,下一個 gate 是確認 API runtime DB access 都會設定 `app.project_id`;否則一旦 fail-closed policy 上線,直接 session factory 入口會讀不到資料或寫入失敗。 diff --git a/docs/runbooks/AWOOOP-RLS-CANARY-WAVE1.md b/docs/runbooks/AWOOOP-RLS-CANARY-WAVE1.md new file mode 100644 index 00000000..7d59ce88 --- /dev/null +++ b/docs/runbooks/AWOOOP-RLS-CANARY-WAVE1.md @@ -0,0 +1,78 @@ +# AwoooP RLS Canary Wave 1 + +This wave is a staged apply package only. It enables fail-closed RLS on tables +that were empty in the latest production preflight. + +Apply script: + +```bash +scripts/ops/awooop-rls-canary-wave1-empty-tables.sql +``` + +Rollback script: + +```bash +scripts/ops/awooop-rls-canary-wave1-empty-tables-rollback.sql +``` + +## Scope + +Wave 1 targets only tables that had `total_rows=0` and +`null_project_id_rows=0` on 2026-05-12: + +- `awooop_contract_revisions` +- `awooop_conversation_event` +- `awooop_mcp_credential_refs` +- `awooop_mcp_gateway_audit` +- `awooop_mcp_grants` +- `budget_ledger` + +The SQL aborts if any target now has rows, is missing, lacks `project_id`, or +contains NULL `project_id`. + +## Pre-Apply Gate + +Run these before any production apply: + +```bash +python3 scripts/ops/awooop-rls-access-audit.py +python3 scripts/ops/awooop-rls-manual-script-audit.py +bash scripts/ops/awooop-rls-preflight.sh --exact-counts +``` + +Expected before wave1 apply: + +- Runtime access audit: `BLOCKED=0`. +- Manual script audit: `BLOCKED=0`; review items acknowledged. +- RLS preflight: still blocked only because policies are not enabled. +- Exact counts for the six wave1 target tables remain `total_rows=0`. + +## Apply + +Run with a migration/operator role in a reviewed maintenance window: + +```bash +psql "$DATABASE_URL" -v ON_ERROR_STOP=1 \ + -f scripts/ops/awooop-rls-canary-wave1-empty-tables.sql +``` + +Then rerun: + +```bash +bash scripts/ops/awooop-rls-preflight.sh --exact-counts +curl -fsS https://awoooi.wooo.work/api/v1/health +``` + +The global preflight will still be blocked until later waves cover the remaining +tables. The six wave1 tables should show `rls=true`, `force=true`, and +`policies=1`, with no fail-open policy expression. + +## Rollback + +```bash +psql "$DATABASE_URL" -v ON_ERROR_STOP=1 \ + -f scripts/ops/awooop-rls-canary-wave1-empty-tables-rollback.sql +``` + +Rollback disables RLS and removes only the wave1 policies on the six canary +tables. It does not modify data. diff --git a/docs/runbooks/AWOOOP-RLS-MANUAL-SCRIPTS.md b/docs/runbooks/AWOOOP-RLS-MANUAL-SCRIPTS.md new file mode 100644 index 00000000..8c5269d2 --- /dev/null +++ b/docs/runbooks/AWOOOP-RLS-MANUAL-SCRIPTS.md @@ -0,0 +1,55 @@ +# AwoooP RLS Manual Script Review + +Manual scripts are not API runtime, but they can still break after fail-closed +RLS if they connect directly with `DATABASE_URL` and do not set +`app.project_id`. + +Run: + +```bash +python3 scripts/ops/awooop-rls-manual-script-audit.py --show-pass +``` + +Use strict mode for CI or pre-apply review: + +```bash +python3 scripts/ops/awooop-rls-manual-script-audit.py --strict-review +``` + +## 2026-05-12 Result + +```text +AwoooP RLS manual script audit: BLOCKED=0 REVIEW=5 PASS=13 +``` + +`BLOCKED=0` means no hardcoded PostgreSQL URL with inline credentials was found +in the scanned manual scripts. + +`REVIEW=5` are intentional operator paths: + +- `apps/api/scripts/awooop_phase1_batch1_backfill.py`: RLS/project_id bootstrap + backfill; use migration/operator role. +- `apps/api/scripts/run_migration.py`: DDL migration script; use + migration/operator role. +- `scripts/ops/awooop-rls-role-bootstrap.sql`: role bootstrap; requires + postgres/CREATEROLE operator. +- `scripts/ops/awooop_rls_preflight.py`: read-only preflight inside API pod. +- `scripts/sync_dev_db.py`: dev schema sync; requires `DEV_DATABASE_URL` and + must not target production. + +The common tenant-data manual scripts now set `app.project_id` or use +`get_db_context()`. + +## Operator Rule + +- Direct tenant table access must set `app.project_id` on every connection, or + use an approved migration/operator role with BYPASSRLS. +- Direct `asyncpg` scripts should use session-level context: + +```python +await conn.execute("SELECT set_config('app.project_id', $1, FALSE)", project_id) +``` + +- SQLAlchemy runtime/service code should use `get_db_context()` or `get_db()`. +- Do not add PostgreSQL URLs with inline credentials to scripts, comments, or + examples. diff --git a/scripts/backfill_km_from_approvals.py b/scripts/backfill_km_from_approvals.py index a935ec91..d38a0e2c 100644 --- a/scripts/backfill_km_from_approvals.py +++ b/scripts/backfill_km_from_approvals.py @@ -17,6 +17,9 @@ import sys import asyncpg +PROJECT_ID = os.environ.get("AWOOOP_PROJECT_ID", "awoooi") + + async def backfill(): db_url = os.environ.get("DATABASE_URL", "").replace("postgresql+asyncpg://", "postgresql://") if not db_url: @@ -24,6 +27,7 @@ async def backfill(): sys.exit(1) conn = await asyncpg.connect(db_url) + await conn.execute("SELECT set_config('app.project_id', $1, FALSE)", PROJECT_ID) # 取得所有有意義的 approval records (已批准/執行) approvals = await conn.fetch(""" diff --git a/scripts/batch_vectorize_km.py b/scripts/batch_vectorize_km.py index 77694e70..890ae8e7 100644 --- a/scripts/batch_vectorize_km.py +++ b/scripts/batch_vectorize_km.py @@ -22,15 +22,19 @@ VIA_API = "--via-api" in sys.argv DRY_RUN = "--dry-run" in sys.argv DATABASE_URL = os.environ.get("DATABASE_URL", "").replace("postgresql+asyncpg://", "postgresql://") API_BASE = os.environ.get("API_BASE", "http://localhost:8000") +PROJECT_ID = os.environ.get("AWOOOP_PROJECT_ID", "awoooi") async def check_status(): """確認需要向量化的數量""" conn = await asyncpg.connect(DATABASE_URL) - total = await conn.fetchval("SELECT count(*) FROM knowledge_entries") - null_emb = await conn.fetchval("SELECT count(*) FROM knowledge_entries WHERE embedding IS NULL") - await conn.close() - return total, null_emb + try: + await conn.execute("SELECT set_config('app.project_id', $1, FALSE)", PROJECT_ID) + total = await conn.fetchval("SELECT count(*) FROM knowledge_entries") + null_emb = await conn.fetchval("SELECT count(*) FROM knowledge_entries WHERE embedding IS NULL") + return total, null_emb + finally: + await conn.close() async def via_api(): diff --git a/scripts/bootstrap_prod.sh b/scripts/bootstrap_prod.sh index 603a77e5..08e56c15 100755 --- a/scripts/bootstrap_prod.sh +++ b/scripts/bootstrap_prod.sh @@ -111,10 +111,10 @@ metadata: type: Opaque stringData: # 資料庫 - DATABASE_URL: "${DATABASE_URL:-postgresql+asyncpg://awoooi:changeme@192.168.0.188:5432/awoooi_prod}" + DATABASE_URL: "${DATABASE_URL}" # Redis - REDIS_URL: "${REDIS_URL:-redis://192.168.0.188:6380/10}" + REDIS_URL: "${REDIS_URL}" # AI 服務 GEMINI_API_KEY: "${GEMINI_API_KEY:-}" diff --git a/scripts/cold_start_playbooks.py b/scripts/cold_start_playbooks.py index 02641cd0..ce89e2d4 100644 --- a/scripts/cold_start_playbooks.py +++ b/scripts/cold_start_playbooks.py @@ -22,6 +22,7 @@ from datetime import datetime, timezone DRY_RUN = "--dry-run" in sys.argv DATABASE_URL = os.environ.get("DATABASE_URL", "").replace("postgresql+asyncpg://", "postgresql://") +PROJECT_ID = os.environ.get("AWOOOP_PROJECT_ID", "awoooi") if not DATABASE_URL: print("ERROR: DATABASE_URL 未設定") @@ -243,6 +244,7 @@ PLAYBOOK_TEMPLATES = [ async def main(): conn = await asyncpg.connect(DATABASE_URL) + await conn.execute("SELECT set_config('app.project_id', $1, FALSE)", PROJECT_ID) # 確認當前 playbooks 數量 current = await conn.fetchval("SELECT count(*) FROM playbooks") diff --git a/scripts/ops/awooop-rls-canary-wave1-empty-tables-rollback.sql b/scripts/ops/awooop-rls-canary-wave1-empty-tables-rollback.sql new file mode 100644 index 00000000..86abb579 --- /dev/null +++ b/scripts/ops/awooop-rls-canary-wave1-empty-tables-rollback.sql @@ -0,0 +1,34 @@ +-- Rollback for AwoooP RLS Canary Wave 1. +-- This only removes the wave1 policies and disables RLS on the canary tables. +-- It intentionally does not touch data. + +BEGIN; + +SET LOCAL lock_timeout = '5s'; +SET LOCAL statement_timeout = '30s'; + +DROP POLICY IF EXISTS awooop_contract_revisions_tenant ON awooop_contract_revisions; +ALTER TABLE awooop_contract_revisions NO FORCE ROW LEVEL SECURITY; +ALTER TABLE awooop_contract_revisions DISABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS awooop_conversation_event_tenant ON awooop_conversation_event; +ALTER TABLE awooop_conversation_event NO FORCE ROW LEVEL SECURITY; +ALTER TABLE awooop_conversation_event DISABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS awooop_mcp_credential_refs_tenant ON awooop_mcp_credential_refs; +ALTER TABLE awooop_mcp_credential_refs NO FORCE ROW LEVEL SECURITY; +ALTER TABLE awooop_mcp_credential_refs DISABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS awooop_mcp_gateway_audit_tenant ON awooop_mcp_gateway_audit; +ALTER TABLE awooop_mcp_gateway_audit NO FORCE ROW LEVEL SECURITY; +ALTER TABLE awooop_mcp_gateway_audit DISABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS awooop_mcp_grants_tenant ON awooop_mcp_grants; +ALTER TABLE awooop_mcp_grants NO FORCE ROW LEVEL SECURITY; +ALTER TABLE awooop_mcp_grants DISABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS budget_ledger_tenant ON budget_ledger; +ALTER TABLE budget_ledger NO FORCE ROW LEVEL SECURITY; +ALTER TABLE budget_ledger DISABLE ROW LEVEL SECURITY; + +COMMIT; diff --git a/scripts/ops/awooop-rls-canary-wave1-empty-tables.sql b/scripts/ops/awooop-rls-canary-wave1-empty-tables.sql new file mode 100644 index 00000000..312d8975 --- /dev/null +++ b/scripts/ops/awooop-rls-canary-wave1-empty-tables.sql @@ -0,0 +1,128 @@ +-- AwoooP RLS Canary Wave 1: empty/low-risk tables only +-- Date: 2026-05-12 +-- +-- Scope: +-- These tables had exact production row_count=0 in the latest preflight: +-- - awooop_contract_revisions +-- - awooop_conversation_event +-- - awooop_mcp_credential_refs +-- - awooop_mcp_gateway_audit +-- - awooop_mcp_grants +-- - budget_ledger +-- +-- Safety: +-- - fail-closed policy only; no NULL/empty-string app.project_id bypass. +-- - aborts if any target is missing project_id, has NULL project_id, or has rows. +-- - run with a migration/operator role, not through the production app role. +-- - do not run until scripts/ops/awooop-rls-access-audit.py and +-- scripts/ops/awooop-rls-manual-script-audit.py are green. + +BEGIN; + +SET LOCAL lock_timeout = '5s'; +SET LOCAL statement_timeout = '30s'; + +CREATE TEMP TABLE _awooop_rls_wave1_targets ( + table_name text PRIMARY KEY +) ON COMMIT DROP; + +INSERT INTO _awooop_rls_wave1_targets (table_name) +VALUES + ('awooop_contract_revisions'), + ('awooop_conversation_event'), + ('awooop_mcp_credential_refs'), + ('awooop_mcp_gateway_audit'), + ('awooop_mcp_grants'), + ('budget_ledger'); + +DO $$ +DECLARE + target_table text; + target_oid regclass; + total_rows bigint; + null_project_rows bigint; +BEGIN + FOR target_table IN SELECT table_name FROM _awooop_rls_wave1_targets ORDER BY table_name LOOP + SELECT to_regclass(format('public.%I', target_table)) INTO target_oid; + IF target_oid IS NULL THEN + RAISE EXCEPTION 'RLS canary target table does not exist: %', target_table; + END IF; + + IF NOT EXISTS ( + SELECT 1 + FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = target_table + AND column_name = 'project_id' + ) THEN + RAISE EXCEPTION 'RLS canary target missing project_id: %', target_table; + END IF; + + EXECUTE format( + 'SELECT COUNT(*), COUNT(*) FILTER (WHERE project_id IS NULL) FROM %I', + target_table + ) + INTO total_rows, null_project_rows; + + IF null_project_rows <> 0 THEN + RAISE EXCEPTION 'RLS canary target has NULL project_id rows: %, nulls=%', + target_table, null_project_rows; + END IF; + + IF total_rows <> 0 THEN + RAISE EXCEPTION 'RLS canary wave1 only accepts empty tables: %, rows=%', + target_table, total_rows; + END IF; + END LOOP; +END +$$; + +ALTER TABLE awooop_contract_revisions ENABLE ROW LEVEL SECURITY; +ALTER TABLE awooop_contract_revisions FORCE ROW LEVEL SECURITY; +DROP POLICY IF EXISTS awooop_contract_revisions_tenant ON awooop_contract_revisions; +CREATE POLICY awooop_contract_revisions_tenant ON awooop_contract_revisions + FOR ALL TO awooop_app + USING (project_id = current_setting('app.project_id', TRUE)) + WITH CHECK (project_id = current_setting('app.project_id', TRUE)); + +ALTER TABLE awooop_conversation_event ENABLE ROW LEVEL SECURITY; +ALTER TABLE awooop_conversation_event FORCE ROW LEVEL SECURITY; +DROP POLICY IF EXISTS awooop_conversation_event_tenant ON awooop_conversation_event; +CREATE POLICY awooop_conversation_event_tenant ON awooop_conversation_event + FOR ALL TO awooop_app + USING (project_id = current_setting('app.project_id', TRUE)) + WITH CHECK (project_id = current_setting('app.project_id', TRUE)); + +ALTER TABLE awooop_mcp_credential_refs ENABLE ROW LEVEL SECURITY; +ALTER TABLE awooop_mcp_credential_refs FORCE ROW LEVEL SECURITY; +DROP POLICY IF EXISTS awooop_mcp_credential_refs_tenant ON awooop_mcp_credential_refs; +CREATE POLICY awooop_mcp_credential_refs_tenant ON awooop_mcp_credential_refs + FOR ALL TO awooop_app + USING (project_id = current_setting('app.project_id', TRUE)) + WITH CHECK (project_id = current_setting('app.project_id', TRUE)); + +ALTER TABLE awooop_mcp_gateway_audit ENABLE ROW LEVEL SECURITY; +ALTER TABLE awooop_mcp_gateway_audit FORCE ROW LEVEL SECURITY; +DROP POLICY IF EXISTS awooop_mcp_gateway_audit_tenant ON awooop_mcp_gateway_audit; +CREATE POLICY awooop_mcp_gateway_audit_tenant ON awooop_mcp_gateway_audit + FOR ALL TO awooop_app + USING (project_id = current_setting('app.project_id', TRUE)) + WITH CHECK (project_id = current_setting('app.project_id', TRUE)); + +ALTER TABLE awooop_mcp_grants ENABLE ROW LEVEL SECURITY; +ALTER TABLE awooop_mcp_grants FORCE ROW LEVEL SECURITY; +DROP POLICY IF EXISTS awooop_mcp_grants_tenant ON awooop_mcp_grants; +CREATE POLICY awooop_mcp_grants_tenant ON awooop_mcp_grants + FOR ALL TO awooop_app + USING (project_id = current_setting('app.project_id', TRUE)) + WITH CHECK (project_id = current_setting('app.project_id', TRUE)); + +ALTER TABLE budget_ledger ENABLE ROW LEVEL SECURITY; +ALTER TABLE budget_ledger FORCE ROW LEVEL SECURITY; +DROP POLICY IF EXISTS budget_ledger_tenant ON budget_ledger; +CREATE POLICY budget_ledger_tenant ON budget_ledger + FOR ALL TO awooop_app + USING (project_id = current_setting('app.project_id', TRUE)) + WITH CHECK (project_id = current_setting('app.project_id', TRUE)); + +COMMIT; diff --git a/scripts/ops/awooop-rls-manual-script-audit.py b/scripts/ops/awooop-rls-manual-script-audit.py new file mode 100755 index 00000000..41507433 --- /dev/null +++ b/scripts/ops/awooop-rls-manual-script-audit.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +"""Static review inventory for manual DB scripts before AwoooP RLS enablement. + +This is intentionally not a runtime gate. It separates: +- BLOCKED: secrets or hardcoded connection strings in scripts. +- REVIEW: manual/operator scripts that need a migration role or explicit review. +- PASS: scripts that already set app.project_id or use get_db_context(). +""" + +from __future__ import annotations + +import argparse +import re +from dataclasses import dataclass +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[2] +SCAN_ROOTS = ( + ROOT / "apps/api/scripts", + ROOT / "scripts", +) + +SKIP_PATHS = { + "scripts/ops/awooop-rls-access-audit.py", + "scripts/ops/awooop-rls-manual-script-audit.py", +} + +SCRIPT_SUFFIXES = {".py", ".sh", ".sql"} + +HARDCODED_DB_URL_RE = re.compile( + r"postgresql(?:\+asyncpg)?://[^:'\"\s/]+:[^@'\"\s]+@", + re.IGNORECASE, +) +DIRECT_DB_RE = re.compile( + r"\b(asyncpg\.connect|create_async_engine|psql\b|pg_dump\b|DATABASE_URL|PGPASSWORD)\b" +) +PROJECT_CONTEXT_RE = re.compile( + r"set_config\('app\.project_id'|SET\s+(?:LOCAL\s+)?app\.project_id|get_db_context\(", + re.IGNORECASE, +) +MIGRATION_HINT_RE = re.compile( + r"\b(ALTER\s+TABLE|CREATE\s+TABLE|CREATE\s+INDEX|CREATE\s+EXTENSION|DROP\s+POLICY|ENABLE\s+ROW\s+LEVEL\s+SECURITY)\b", + re.IGNORECASE, +) + +TENANT_TABLES = ( + "incidents", + "knowledge_entries", + "playbooks", + "audit_logs", + "budget_ledger", + "approval_records", + "notification_outcomes", + "rag_chunks", + "playbook_embeddings", + "awooop_projects", + "awooop_contract_revisions", + "awooop_run_state", + "awooop_mcp_tool_registry", + "awooop_mcp_grants", + "awooop_mcp_credential_refs", + "awooop_mcp_gateway_audit", + "awooop_conversation_event", + "awooop_outbound_message", +) + +OPERATOR_REVIEW_PATHS = { + "apps/api/scripts/awooop_phase1_batch1_backfill.py": + "RLS/project_id bootstrap backfill; run only with migration/operator role.", + "apps/api/scripts/run_migration.py": + "DDL migration script; run only with migration/operator role.", + "scripts/ops/awooop_rls_preflight.py": + "Read-only preflight that probes app.project_id inside the API pod.", + "scripts/ops/awooop-rls-role-bootstrap.sql": + "Role bootstrap SQL; must be reviewed and run by postgres/CREATEROLE operator.", + "scripts/sync_dev_db.py": + "Dev DB schema sync; use DEV_DATABASE_URL and run only against non-production DB.", +} + + +@dataclass(frozen=True) +class Finding: + severity: str + path: str + reason: str + + +def rel(path: Path) -> str: + return path.relative_to(ROOT).as_posix() + + +def iter_script_paths() -> list[Path]: + paths: list[Path] = [] + for root in SCAN_ROOTS: + if not root.exists(): + continue + for path in root.rglob("*"): + if not path.is_file() or path.suffix not in SCRIPT_SUFFIXES: + continue + if rel(path) in SKIP_PATHS: + continue + paths.append(path) + return sorted(set(paths)) + + +def classify(path: Path) -> list[Finding]: + text = path.read_text(encoding="utf-8", errors="replace") + path_rel = rel(path) + findings: list[Finding] = [] + + hardcoded_db_url = False + for line in text.splitlines(): + if "" in line or ":password@" in line: + continue + if HARDCODED_DB_URL_RE.search(line): + hardcoded_db_url = True + break + + if hardcoded_db_url: + findings.append( + Finding( + "BLOCKED", + path_rel, + "hardcoded PostgreSQL URL with inline credentials; move to environment/secret store.", + ) + ) + + if not DIRECT_DB_RE.search(text): + return findings + + touches_tenant_table = any(re.search(rf"\b{re.escape(table)}\b", text) for table in TENANT_TABLES) + has_project_context = PROJECT_CONTEXT_RE.search(text) is not None + + if path_rel in OPERATOR_REVIEW_PATHS: + findings.append(Finding("REVIEW", path_rel, OPERATOR_REVIEW_PATHS[path_rel])) + elif touches_tenant_table and not has_project_context: + findings.append( + Finding( + "REVIEW", + path_rel, + "direct DB access touches tenant tables without app.project_id; add project context or use operator role.", + ) + ) + elif touches_tenant_table and has_project_context: + findings.append(Finding("PASS", path_rel, "tenant table access sets app.project_id or uses get_db_context.")) + elif MIGRATION_HINT_RE.search(text): + findings.append(Finding("REVIEW", path_rel, "DDL/operator script; verify role and maintenance window before use.")) + else: + findings.append(Finding("PASS", path_rel, "no tenant table access detected in direct DB usage.")) + + return findings + + +def main() -> int: + parser = argparse.ArgumentParser(description="Audit manual scripts for AwoooP RLS readiness.") + parser.add_argument("--show-pass", action="store_true", help="Print PASS findings.") + parser.add_argument("--strict-review", action="store_true", help="Exit non-zero when REVIEW findings exist.") + args = parser.parse_args() + + findings: list[Finding] = [] + for path in iter_script_paths(): + findings.extend(classify(path)) + + blocked = [f for f in findings if f.severity == "BLOCKED"] + review = [f for f in findings if f.severity == "REVIEW"] + passed = [f for f in findings if f.severity == "PASS"] + + print( + "AwoooP RLS manual script audit: " + f"BLOCKED={len(blocked)} REVIEW={len(review)} PASS={len(passed)}" + ) + + for item in blocked + review: + print(f"{item.severity} {item.path}") + print(f" reason: {item.reason}") + + if args.show_pass: + for item in passed: + print(f"{item.severity} {item.path}") + print(f" reason: {item.reason}") + + if blocked: + return 2 + if review and args.strict_review: + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/sync_dev_db.py b/scripts/sync_dev_db.py index 5c25f950..39e3857e 100644 --- a/scripts/sync_dev_db.py +++ b/scripts/sync_dev_db.py @@ -1,10 +1,15 @@ #!/usr/bin/env python3 """同步 dev DB — 補齊 prod 有但 dev 沒有的表""" import asyncio +import os +import sys from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy import text -DEV_URL = "postgresql+asyncpg://awoooi:awoooi_prod_2026@192.168.0.188:5432/awoooi_dev" +DEV_URL = os.environ.get("DEV_DATABASE_URL") +if not DEV_URL: + print("ERROR: DEV_DATABASE_URL not set", file=sys.stderr) + sys.exit(1) MIGRATIONS = [ ("auto_repair_executions", """ diff --git a/scripts/verify/verify_telegram_dedup_b3a0f0d7.sh b/scripts/verify/verify_telegram_dedup_b3a0f0d7.sh index 99b03a40..1543e686 100755 --- a/scripts/verify/verify_telegram_dedup_b3a0f0d7.sh +++ b/scripts/verify/verify_telegram_dedup_b3a0f0d7.sh @@ -19,6 +19,7 @@ sudo kubectl exec -n awoooi-prod "$POD" -- python -c " import asyncio, os, asyncpg async def q(): conn = await asyncpg.connect(os.environ['DATABASE_URL']) + await conn.execute(\"SELECT set_config('app.project_id', 'awoooi', FALSE)\") rows = await conn.fetch(\"\"\" SELECT COALESCE(i.title, 'unknown') AS alertname, @@ -44,6 +45,7 @@ sudo kubectl exec -n awoooi-prod "$POD" -- python -c " import asyncio, os, asyncpg async def q(): conn = await asyncpg.connect(os.environ['DATABASE_URL']) + await conn.execute(\"SELECT set_config('app.project_id', 'awoooi', FALSE)\") rows = await conn.fetch(\"\"\" SELECT COALESCE(i.title, 'unknown') AS alertname, @@ -67,6 +69,7 @@ sudo kubectl exec -n awoooi-prod "$POD" -- python -c " import asyncio, os, asyncpg async def q(): conn = await asyncpg.connect(os.environ['DATABASE_URL']) + await conn.execute(\"SELECT set_config('app.project_id', 'awoooi', FALSE)\") rows = await conn.fetch(\"\"\" SELECT i.id, i.title, COUNT(t.id) AS total_24h, MAX(t.created_at) AS last_sent,