# ============================================================================= # PostgreSQL Exporter 自訂查詢 # ============================================================================= # 負責人: DevOps Commander # 版本: v1.0 # 日期: 2026-03-29 # ADR: ADR-037 Phase B # # 用途: 擴展預設指標,監控 AWOOOI 特定需求 # ============================================================================= # ========================================================================== # 連接池監控 # ========================================================================== pg_stat_activity_count: query: | SELECT datname, state, count(*) as count FROM pg_stat_activity WHERE datname IS NOT NULL GROUP BY datname, state metrics: - datname: usage: "LABEL" description: "Database name" - state: usage: "LABEL" description: "Connection state (active, idle, idle in transaction)" - count: usage: "GAUGE" description: "Number of connections in this state" # ========================================================================== # 慢查詢監控 (> 1 秒) # ========================================================================== pg_slow_queries: query: | SELECT datname, usename, count(*) as slow_query_count FROM pg_stat_activity WHERE state = 'active' AND query_start < now() - interval '1 second' AND query NOT LIKE 'SELECT pg_%' AND query NOT LIKE '%pg_stat%' GROUP BY datname, usename metrics: - datname: usage: "LABEL" description: "Database name" - usename: usage: "LABEL" description: "User name" - slow_query_count: usage: "GAUGE" description: "Number of slow queries (> 1s)" # ========================================================================== # 鎖等待監控 # ========================================================================== pg_locks_waiting: query: | SELECT COALESCE(d.datname, 'unknown') as datname, l.mode, count(*) as waiting_count FROM pg_locks l LEFT JOIN pg_database d ON l.database = d.oid WHERE NOT l.granted GROUP BY d.datname, l.mode metrics: - datname: usage: "LABEL" description: "Database name" - mode: usage: "LABEL" description: "Lock mode" - waiting_count: usage: "GAUGE" description: "Number of locks waiting" # ========================================================================== # 表膨脹估算 (Dead Tuples) # ========================================================================== pg_stat_user_tables_bloat: query: | SELECT schemaname, relname, n_dead_tup, n_live_tup, CASE WHEN n_live_tup > 0 THEN round(100.0 * n_dead_tup / n_live_tup, 2) ELSE 0 END as dead_tuple_ratio FROM pg_stat_user_tables WHERE n_live_tup > 1000 ORDER BY n_dead_tup DESC LIMIT 20 metrics: - schemaname: usage: "LABEL" description: "Schema name" - relname: usage: "LABEL" description: "Table name" - n_dead_tup: usage: "GAUGE" description: "Dead tuples count" - n_live_tup: usage: "GAUGE" description: "Live tuples count" - dead_tuple_ratio: usage: "GAUGE" description: "Dead tuple percentage" # ========================================================================== # 資料庫大小 # ========================================================================== pg_database_size_bytes: query: | SELECT datname, pg_database_size(datname) as size_bytes FROM pg_database WHERE datname NOT IN ('template0', 'template1') metrics: - datname: usage: "LABEL" description: "Database name" - size_bytes: usage: "GAUGE" description: "Database size in bytes" # ========================================================================== # 事務統計 (AWOOOI 特定) # ========================================================================== pg_stat_database_transactions: query: | SELECT datname, xact_commit, xact_rollback, CASE WHEN xact_commit + xact_rollback > 0 THEN round(100.0 * xact_rollback / (xact_commit + xact_rollback), 2) ELSE 0 END as rollback_ratio FROM pg_stat_database WHERE datname = 'awoooi' OR datname = 'awoooi_prod' metrics: - datname: usage: "LABEL" description: "Database name" - xact_commit: usage: "COUNTER" description: "Committed transactions" - xact_rollback: usage: "COUNTER" description: "Rolled back transactions" - rollback_ratio: usage: "GAUGE" description: "Rollback percentage" # ========================================================================== # 複製延遲 (若有 Replica) # ========================================================================== pg_replication_lag: query: | SELECT COALESCE(client_addr::text, 'local') as client_addr, application_name, EXTRACT(EPOCH FROM (now() - sent_lsn::text::pg_lsn::text::timestamp)) as lag_seconds FROM pg_stat_replication WHERE state = 'streaming' master: true metrics: - client_addr: usage: "LABEL" description: "Replica address" - application_name: usage: "LABEL" description: "Application name" - lag_seconds: usage: "GAUGE" description: "Replication lag in seconds" # ========================================================================== # 最長執行查詢 # ========================================================================== pg_longest_query_seconds: query: | SELECT datname, usename, EXTRACT(EPOCH FROM (now() - query_start)) as duration_seconds FROM pg_stat_activity WHERE state = 'active' AND query NOT LIKE 'SELECT pg_%' AND query_start IS NOT NULL ORDER BY query_start LIMIT 1 metrics: - datname: usage: "LABEL" description: "Database name" - usename: usage: "LABEL" description: "User name" - duration_seconds: usage: "GAUGE" description: "Duration of longest running query in seconds"