PostgreSQL — Practical
Diagnosis
Section titled “Diagnosis”-- active queriesSELECT pid, now()-xact_start AS dur, state, queryFROM pg_stat_activity WHERE state <> 'idle'ORDER BY dur DESC;
-- blocking pairsSELECT a.pid blocked, a.query blocked_q, b.pid blocking, b.query blocking_qFROM pg_stat_activity aJOIN pg_locks bl ON bl.pid=a.pid AND NOT bl.grantedJOIN pg_locks bg ON bg.locktype=bl.locktype AND bg.relation=bl.relation AND bg.grantedJOIN pg_stat_activity b ON b.pid=bg.pid;
-- top by total timeSELECT calls, total_exec_time::int total_ms, mean_exec_time::int mean_ms, substring(query,1,100) qFROM pg_stat_statements ORDER BY total_exec_time DESC LIMIT 10;
-- table sizesSELECT relname, pg_size_pretty(pg_total_relation_size(c.oid)) totalFROM pg_class c WHERE relkind='r'ORDER BY pg_total_relation_size(c.oid) DESC LIMIT 20;
-- unused indexesSELECT relname, indexrelname, idx_scanFROM pg_stat_user_indexes WHERE idx_scan=0ORDER BY pg_relation_size(indexrelid) DESC;
-- bloat estimateSELECT relname, n_dead_tup, round(n_dead_tup::numeric/nullif(n_live_tup,0)*100,1) dead_pctFROM pg_stat_user_tables ORDER BY n_dead_tup DESC LIMIT 20;EXPLAIN
Section titled “EXPLAIN”EXPLAIN (ANALYZE, BUFFERS) SELECT ...;Watch: Seq Scan on big tables, large actual rows vs estimated, Rows Removed by Filter, Nested Loop over many rows.
Upsert
Section titled “Upsert”INSERT INTO users (email, name) VALUES ('a@b','A')ON CONFLICT (email) DO UPDATESET name=EXCLUDED.name, updated_at=now()RETURNING *;Keyset pagination
Section titled “Keyset pagination”SELECT id, created_at, title FROM postsWHERE (created_at, id) < ($1, $2)ORDER BY created_at DESC, id DESCLIMIT 50;Window functions
Section titled “Window functions”-- top 3 per userSELECT * FROM ( SELECT *, row_number() OVER (PARTITION BY user_id ORDER BY total DESC) rn FROM orders) t WHERE rn <= 3;
-- running totalSELECT date, amount, sum(amount) OVER (ORDER BY date) running FROM payments;Recursive CTE
Section titled “Recursive CTE”WITH RECURSIVE tree AS ( SELECT id, parent_id, name, 0 depth FROM categories WHERE parent_id IS NULL UNION ALL SELECT c.id, c.parent_id, c.name, t.depth+1 FROM categories c JOIN tree t ON c.parent_id=t.id) SELECT * FROM tree;Concurrent index
Section titled “Concurrent index”CREATE INDEX CONCURRENTLY idx_users_email ON users (email);DROP INDEX CONCURRENTLY idx_old;CREATE INDEX idx_data ON events USING gin (data);CREATE INDEX idx_data_op ON events USING gin (data jsonb_path_ops); -- @> only
SELECT * FROM events WHERE data @> '{"type":"click"}';SELECT data->>'user_id' FROM events;Partition by range
Section titled “Partition by range”CREATE TABLE events (id bigint, ts timestamptz, payload jsonb)PARTITION BY RANGE (ts);
CREATE TABLE events_2026_05 PARTITION OF eventsFOR VALUES FROM ('2026-05-01') TO ('2026-06-01');Queue worker (skip-locked)
Section titled “Queue worker (skip-locked)”WITH next AS ( SELECT id FROM jobs WHERE status='pending' ORDER BY created_at FOR UPDATE SKIP LOCKED LIMIT 1)UPDATE jobs SET status='running'WHERE id = (SELECT id FROM next) RETURNING *;Online migration (NOT NULL on huge table)
Section titled “Online migration (NOT NULL on huge table)”ALTER TABLE big_t ADD COLUMN status text; -- 1-- 2: backfill in batchesUPDATE big_t SET status='ok' WHERE id BETWEEN 1 AND 1000;-- 3ALTER TABLE big_t ALTER COLUMN status SET NOT NULL;Avoid ADD COLUMN ... NOT NULL DEFAULT 'x' on huge tables (rewrites). PG 11+ optimized for constant defaults.
Settings
Section titled “Settings”shared_buffers = 25% of RAMeffective_cache_size = 50-75% of RAMwork_mem = 16-64MBmaintenance_work_mem = 1GBrandom_page_cost = 1.1 # SSDBackup
Section titled “Backup”pg_dump -Fc dbname > db.dumppg_restore -d dbname db.dumppg_dump -Fd -j 4 -f dump_dir dbname # parallelpg_basebackup -D /backup -Ft -z -X stream # physicalUseful extensions
Section titled “Useful extensions”pg_stat_statements (always), pg_partman, pg_repack, postgis, pgvector, hypopg, pgaudit, timescaledb.
psql, DBeaver, pgcli. Pooler: PgBouncer. Migrations: Flyway, Alembic, knex, Atlas.
Deep — indexing patterns
Section titled “Deep — indexing patterns”-- Multicolumn: tenant first because every query filters on itCREATE INDEX orders_tenant_status_created ON orders (tenant_id, status, created_at DESC);
-- Partial index: only "open" orders, much smallerCREATE INDEX orders_open ON orders (created_at DESC) WHERE status = 'open';
-- Expression index: case-insensitive lookupCREATE INDEX users_email_lower ON users (LOWER(email));
-- Covering index: enables index-only scan returning totalCREATE INDEX orders_tenant_total ON orders (tenant_id, created_at) INCLUDE (total_cents);
-- BRIN for a 5-billion-row append-only event tableCREATE INDEX events_ts_brin ON events USING BRIN (ts);
-- Reading EXPLAINEXPLAIN (ANALYZE, BUFFERS) SELECT id FROM orders WHERE tenant_id = 42 AND status='open' ORDER BY created_at DESC LIMIT 20;-- Look for: Index Scan / Index Only Scan, Buffers: shared hit=...,-- and that Rows Removed by Filter is small.Deep — slow query diagnosis helpers
Section titled “Deep — slow query diagnosis helpers”-- Find the worst aggregate offendersSELECT query, calls, total_exec_time, mean_exec_time, rows, shared_blks_read, shared_blks_hitFROM pg_stat_statementsORDER BY total_exec_time DESC LIMIT 20;
-- Enable plan logging for slow queriesLOAD 'auto_explain';SET auto_explain.log_min_duration = '500ms';SET auto_explain.log_analyze = on;SET auto_explain.log_buffers = on;
-- Anti-pattern: OFFSET pagination on 50M rows (each page rescans)SELECT id, ts FROM events ORDER BY ts DESC OFFSET 1000000 LIMIT 20;-- Fix: keyset pagination (constant time per page)SELECT id, ts FROM eventsWHERE (ts, id) < ($1, $2)ORDER BY ts DESC, id DESCLIMIT 20;
-- Anti-pattern: ORDER BY random()SELECT * FROM products ORDER BY random() LIMIT 1; -- O(N) sort-- Fix on indexed PK with mostly contiguous IDs:SELECT * FROM productsWHERE id >= (random() * (SELECT max(id) FROM products))::bigintORDER BY id LIMIT 1;Deep — JSONB catalog example
Section titled “Deep — JSONB catalog example”CREATE TABLE products ( id bigserial PRIMARY KEY, sku text UNIQUE NOT NULL, price numeric(12,2) NOT NULL, attrs jsonb NOT NULL DEFAULT '{}'::jsonb);
-- GIN index supporting @> and key-existenceCREATE INDEX products_attrs_gin ON products USING GIN (attrs);
-- Smaller, faster index for containment-only workloadsCREATE INDEX products_attrs_path_gin ON products USING GIN (attrs jsonb_path_ops);
-- Containment query (uses GIN)SELECT id, sku FROM products WHERE attrs @> '{"color":"red","size":"L"}';
-- jsonpath querySELECT id FROM products WHERE attrs @@ '$.tags[*] == "clearance"';
-- Expression index for a hot nested keyCREATE INDEX products_brand ON products ((attrs->>'brand'));Deep — SERIALIZABLE retry pattern
Section titled “Deep — SERIALIZABLE retry pattern”DO $$DECLARE attempts int := 0;BEGIN LOOP BEGIN START TRANSACTION ISOLATION LEVEL SERIALIZABLE; UPDATE accounts SET bal = bal - 100 WHERE id = 'A'; UPDATE accounts SET bal = bal + 100 WHERE id = 'B'; COMMIT; EXIT; EXCEPTION WHEN serialization_failure THEN attempts := attempts + 1; IF attempts > 3 THEN RAISE; END IF; ROLLBACK; END; END LOOP;END $$;Deep — partitioning + logical replication
Section titled “Deep — partitioning + logical replication”-- Postgres declarative partitioning by month, then indexCREATE TABLE events (id bigserial, ts timestamptz NOT NULL, payload jsonb) PARTITION BY RANGE (ts);
CREATE TABLE events_2026_05 PARTITION OF events FOR VALUES FROM ('2026-05-01') TO ('2026-06-01');
CREATE INDEX ON events (ts); -- propagates to all (future) partitions
-- Logical replicationCREATE PUBLICATION orders_pub FOR TABLE orders; -- on primaryCREATE SUBSCRIPTION orders_sub -- on subscriber CONNECTION 'host=primary dbname=app user=repl' PUBLICATION orders_pub;Deep — schema anti-pattern fixes
Section titled “Deep — schema anti-pattern fixes”-- Anti-pattern: JaywalkingALTER TABLE posts ADD COLUMN tags text; -- "red,green,blue" ❌
-- Fix: junction tableCREATE TABLE tags ( id serial PRIMARY KEY, name text UNIQUE NOT NULL);CREATE TABLE post_tags ( post_id bigint REFERENCES posts(id) ON DELETE CASCADE, tag_id int REFERENCES tags(id), PRIMARY KEY (post_id, tag_id));CREATE INDEX ON post_tags (tag_id);
-- Postgres-idiomatic alternative for small unjoined listsALTER TABLE posts ADD COLUMN tags text[] DEFAULT '{}';CREATE INDEX posts_tags_gin ON posts USING GIN (tags);SELECT * FROM posts WHERE tags @> ARRAY['featured'];
-- Closure table for hierarchies (Karwin's preferred pattern)CREATE TABLE org (id int PRIMARY KEY, name text);CREATE TABLE org_paths ( ancestor int NOT NULL REFERENCES org(id), descendant int NOT NULL REFERENCES org(id), depth int NOT NULL, PRIMARY KEY (ancestor, descendant));-- "All descendants of node 5"SELECT o.* FROM org o JOIN org_paths p ON p.descendant = o.idWHERE p.ancestor = 5 AND p.depth > 0;
-- Safe online migrationALTER TABLE users ADD COLUMN email_verified_at timestamptz; -- instant in PG 11+CREATE INDEX CONCURRENTLY users_email_lower ON users (LOWER(email));Deep — polyglot persistence (Redis + Timescale)
Section titled “Deep — polyglot persistence (Redis + Timescale)”Redis use cases pair naturally with Postgres as the system of record:
// Redis: rate limiter (token bucket) with atomic Luaconst luaScript = ` local key = KEYS[1] local capacity = tonumber(ARGV[1]) local refillPerSec = tonumber(ARGV[2]) local now = tonumber(ARGV[3]) local bucket = redis.call('HMGET', key, 'tokens', 'ts') local tokens = tonumber(bucket[1]) or capacity local ts = tonumber(bucket[2]) or now local delta = math.max(0, now - ts) * refillPerSec tokens = math.min(capacity, tokens + delta) if tokens < 1 then return 0 end redis.call('HMSET', key, 'tokens', tokens - 1, 'ts', now) redis.call('EXPIRE', key, 60) return 1`;const allowed = await redis.eval(luaScript, 1, `rl:${userId}`, 60, 1, Date.now()/1000);
// Redis sorted set: leaderboardawait redis.zadd('lb:season-1', score, userId);const top10 = await redis.zrevrange('lb:season-1', 0, 9, 'WITHSCORES');
// Redis HyperLogLog: unique visitors (12 KB, ~0.81% error)await redis.pfadd('uv:2026-05-17', userIdA, userIdB);const uniques = await redis.pfcount('uv:2026-05-17');-- TimescaleDB hypertable: stays inside PostgresCREATE TABLE metrics ( ts timestamptz NOT NULL, device_id bigint NOT NULL, value double precision);SELECT create_hypertable('metrics', 'ts', chunk_time_interval => INTERVAL '1 day');CREATE INDEX ON metrics (device_id, ts DESC);
-- Continuous aggregate (incremental materialized view)CREATE MATERIALIZED VIEW metrics_1hWITH (timescaledb.continuous) ASSELECT time_bucket('1 hour', ts) AS hour, device_id, avg(value)FROM metrics GROUP BY hour, device_id;Deep — Bloom index + HLL
Section titled “Deep — Bloom index + HLL”-- Postgres bloom-index extension for multi-column equalityCREATE EXTENSION bloom;CREATE INDEX events_bloom ON events USING bloom (tenant_id, source, type, severity) WITH (length=80, col1=2, col2=2, col3=4, col4=2);-- Now WHERE source='x' AND severity='y' can probe the bloom index// Redis HyperLogLog vs naïve SET for unique countingawait redis.sadd('uv:naive:2026-05-17', userId); // O(N) memoryawait redis.scard('uv:naive:2026-05-17'); // exact
await redis.pfadd('uv:hll:2026-05-17', userId); // ~12 KB totalawait redis.pfcount('uv:hll:2026-05-17'); // ~0.81% error
// Merge a week of HLLs into one countawait redis.pfmerge('uv:hll:week-20', 'uv:hll:2026-05-11', /* ... */ 'uv:hll:2026-05-17');