PostgreSQL — Practical

Diagnosis

-- active queries
SELECT pid, now()-xact_start AS dur, state, query
FROM pg_stat_activity WHERE state <> 'idle'
ORDER BY dur DESC;

-- blocking pairs
SELECT a.pid blocked, a.query blocked_q, b.pid blocking, b.query blocking_q
FROM pg_stat_activity a
JOIN pg_locks bl ON bl.pid=a.pid AND NOT bl.granted
JOIN pg_locks bg ON bg.locktype=bl.locktype AND bg.relation=bl.relation AND bg.granted
JOIN pg_stat_activity b ON b.pid=bg.pid;

-- top by total time
SELECT calls, total_exec_time::int total_ms, mean_exec_time::int mean_ms,
       substring(query,1,100) q
FROM pg_stat_statements ORDER BY total_exec_time DESC LIMIT 10;

-- table sizes
SELECT relname, pg_size_pretty(pg_total_relation_size(c.oid)) total
FROM pg_class c WHERE relkind='r'
ORDER BY pg_total_relation_size(c.oid) DESC LIMIT 20;

-- unused indexes
SELECT relname, indexrelname, idx_scan
FROM pg_stat_user_indexes WHERE idx_scan=0
ORDER BY pg_relation_size(indexrelid) DESC;

-- bloat estimate
SELECT relname, n_dead_tup, round(n_dead_tup::numeric/nullif(n_live_tup,0)*100,1) dead_pct
FROM pg_stat_user_tables ORDER BY n_dead_tup DESC LIMIT 20;

EXPLAIN

EXPLAIN (ANALYZE, BUFFERS) SELECT ...;

Watch: Seq Scan on big tables, large actual rows vs estimated, Rows Removed by Filter, Nested Loop over many rows.

Upsert

INSERT INTO users (email, name) VALUES ('a@b','A')
ON CONFLICT (email) DO UPDATE
SET name=EXCLUDED.name, updated_at=now()
RETURNING *;

Keyset pagination

SELECT id, created_at, title FROM posts
WHERE (created_at, id) < ($1, $2)
ORDER BY created_at DESC, id DESC
LIMIT 50;

Window functions

-- top 3 per user
SELECT * FROM (
  SELECT *, row_number() OVER (PARTITION BY user_id ORDER BY total DESC) rn
  FROM orders
) t WHERE rn <= 3;

-- running total
SELECT date, amount, sum(amount) OVER (ORDER BY date) running FROM payments;

Recursive CTE

WITH RECURSIVE tree AS (
  SELECT id, parent_id, name, 0 depth FROM categories WHERE parent_id IS NULL
  UNION ALL
  SELECT c.id, c.parent_id, c.name, t.depth+1
  FROM categories c JOIN tree t ON c.parent_id=t.id
) SELECT * FROM tree;

Concurrent index

CREATE INDEX CONCURRENTLY idx_users_email ON users (email);
DROP INDEX CONCURRENTLY idx_old;

JSONB

CREATE INDEX idx_data ON events USING gin (data);
CREATE INDEX idx_data_op ON events USING gin (data jsonb_path_ops);  -- @> only

SELECT * FROM events WHERE data @> '{"type":"click"}';
SELECT data->>'user_id' FROM events;

Partition by range

CREATE TABLE events (id bigint, ts timestamptz, payload jsonb)
PARTITION BY RANGE (ts);

CREATE TABLE events_2026_05 PARTITION OF events
FOR VALUES FROM ('2026-05-01') TO ('2026-06-01');

Queue worker (skip-locked)

WITH next AS (
  SELECT id FROM jobs WHERE status='pending'
  ORDER BY created_at FOR UPDATE SKIP LOCKED LIMIT 1
)
UPDATE jobs SET status='running'
WHERE id = (SELECT id FROM next) RETURNING *;

Online migration (NOT NULL on huge table)

ALTER TABLE big_t ADD COLUMN status text;        -- 1
-- 2: backfill in batches
UPDATE big_t SET status='ok' WHERE id BETWEEN 1 AND 1000;
-- 3
ALTER TABLE big_t ALTER COLUMN status SET NOT NULL;

Avoid ADD COLUMN ... NOT NULL DEFAULT 'x' on huge tables (rewrites). PG 11+ optimized for constant defaults.

Settings

shared_buffers = 25% of RAM
effective_cache_size = 50-75% of RAM
work_mem = 16-64MB
maintenance_work_mem = 1GB
random_page_cost = 1.1     # SSD

Backup

pg_dump -Fc dbname > db.dump
pg_restore -d dbname db.dump
pg_dump -Fd -j 4 -f dump_dir dbname           # parallel
pg_basebackup -D /backup -Ft -z -X stream     # physical

Useful extensions

pg_stat_statements (always), pg_partman, pg_repack, postgis, pgvector, hypopg, pgaudit, timescaledb.

Tools

psql, DBeaver, pgcli. Pooler: PgBouncer. Migrations: Flyway, Alembic, knex, Atlas.

Deep — indexing patterns

-- Multicolumn: tenant first because every query filters on it
CREATE INDEX orders_tenant_status_created
  ON orders (tenant_id, status, created_at DESC);

-- Partial index: only "open" orders, much smaller
CREATE INDEX orders_open
  ON orders (created_at DESC)
  WHERE status = 'open';

-- Expression index: case-insensitive lookup
CREATE INDEX users_email_lower ON users (LOWER(email));

-- Covering index: enables index-only scan returning total
CREATE INDEX orders_tenant_total
  ON orders (tenant_id, created_at) INCLUDE (total_cents);

-- BRIN for a 5-billion-row append-only event table
CREATE INDEX events_ts_brin ON events USING BRIN (ts);

-- Reading EXPLAIN
EXPLAIN (ANALYZE, BUFFERS) SELECT id FROM orders
  WHERE tenant_id = 42 AND status='open' ORDER BY created_at DESC LIMIT 20;
-- Look for: Index Scan / Index Only Scan, Buffers: shared hit=...,
-- and that Rows Removed by Filter is small.

Deep — slow query diagnosis helpers

-- Find the worst aggregate offenders
SELECT query, calls, total_exec_time, mean_exec_time,
       rows, shared_blks_read, shared_blks_hit
FROM pg_stat_statements
ORDER BY total_exec_time DESC LIMIT 20;

-- Enable plan logging for slow queries
LOAD 'auto_explain';
SET auto_explain.log_min_duration = '500ms';
SET auto_explain.log_analyze = on;
SET auto_explain.log_buffers = on;

-- Anti-pattern: OFFSET pagination on 50M rows (each page rescans)
SELECT id, ts FROM events ORDER BY ts DESC OFFSET 1000000 LIMIT 20;
-- Fix: keyset pagination (constant time per page)
SELECT id, ts FROM events
WHERE (ts, id) < ($1, $2)
ORDER BY ts DESC, id DESC
LIMIT 20;

-- Anti-pattern: ORDER BY random()
SELECT * FROM products ORDER BY random() LIMIT 1;          -- O(N) sort
-- Fix on indexed PK with mostly contiguous IDs:
SELECT * FROM products
WHERE id >= (random() * (SELECT max(id) FROM products))::bigint
ORDER BY id LIMIT 1;

Deep — JSONB catalog example

CREATE TABLE products (
  id    bigserial PRIMARY KEY,
  sku   text UNIQUE NOT NULL,
  price numeric(12,2) NOT NULL,
  attrs jsonb NOT NULL DEFAULT '{}'::jsonb
);

-- GIN index supporting @> and key-existence
CREATE INDEX products_attrs_gin ON products USING GIN (attrs);

-- Smaller, faster index for containment-only workloads
CREATE INDEX products_attrs_path_gin ON products USING GIN (attrs jsonb_path_ops);

-- Containment query (uses GIN)
SELECT id, sku FROM products WHERE attrs @> '{"color":"red","size":"L"}';

-- jsonpath query
SELECT id FROM products WHERE attrs @@ '$.tags[*] == "clearance"';

-- Expression index for a hot nested key
CREATE INDEX products_brand ON products ((attrs->>'brand'));

Deep — SERIALIZABLE retry pattern

DO $$
DECLARE attempts int := 0;
BEGIN
  LOOP
    BEGIN
      START TRANSACTION ISOLATION LEVEL SERIALIZABLE;
      UPDATE accounts SET bal = bal - 100 WHERE id = 'A';
      UPDATE accounts SET bal = bal + 100 WHERE id = 'B';
      COMMIT;
      EXIT;
    EXCEPTION WHEN serialization_failure THEN
      attempts := attempts + 1;
      IF attempts > 3 THEN RAISE; END IF;
      ROLLBACK;
    END;
  END LOOP;
END $$;

Deep — partitioning + logical replication

-- Postgres declarative partitioning by month, then index
CREATE TABLE events (id bigserial, ts timestamptz NOT NULL, payload jsonb)
  PARTITION BY RANGE (ts);

CREATE TABLE events_2026_05 PARTITION OF events
  FOR VALUES FROM ('2026-05-01') TO ('2026-06-01');

CREATE INDEX ON events (ts);  -- propagates to all (future) partitions

-- Logical replication
CREATE PUBLICATION orders_pub FOR TABLE orders;            -- on primary
CREATE SUBSCRIPTION orders_sub                              -- on subscriber
  CONNECTION 'host=primary dbname=app user=repl'
  PUBLICATION orders_pub;

Deep — schema anti-pattern fixes

-- Anti-pattern: Jaywalking
ALTER TABLE posts ADD COLUMN tags text;   -- "red,green,blue"  ❌

-- Fix: junction table
CREATE TABLE tags (
  id   serial PRIMARY KEY,
  name text UNIQUE NOT NULL
);
CREATE TABLE post_tags (
  post_id bigint REFERENCES posts(id) ON DELETE CASCADE,
  tag_id  int    REFERENCES tags(id),
  PRIMARY KEY (post_id, tag_id)
);
CREATE INDEX ON post_tags (tag_id);

-- Postgres-idiomatic alternative for small unjoined lists
ALTER TABLE posts ADD COLUMN tags text[] DEFAULT '{}';
CREATE INDEX posts_tags_gin ON posts USING GIN (tags);
SELECT * FROM posts WHERE tags @> ARRAY['featured'];

-- Closure table for hierarchies (Karwin's preferred pattern)
CREATE TABLE org (id int PRIMARY KEY, name text);
CREATE TABLE org_paths (
  ancestor   int NOT NULL REFERENCES org(id),
  descendant int NOT NULL REFERENCES org(id),
  depth      int NOT NULL,
  PRIMARY KEY (ancestor, descendant)
);
-- "All descendants of node 5"
SELECT o.* FROM org o JOIN org_paths p ON p.descendant = o.id
WHERE p.ancestor = 5 AND p.depth > 0;

-- Safe online migration
ALTER TABLE users ADD COLUMN email_verified_at timestamptz;       -- instant in PG 11+
CREATE INDEX CONCURRENTLY users_email_lower ON users (LOWER(email));

Deep — polyglot persistence (Redis + Timescale)

Redis use cases pair naturally with Postgres as the system of record:

// Redis: rate limiter (token bucket) with atomic Lua
const luaScript = `
  local key = KEYS[1]
  local capacity = tonumber(ARGV[1])
  local refillPerSec = tonumber(ARGV[2])
  local now = tonumber(ARGV[3])
  local bucket = redis.call('HMGET', key, 'tokens', 'ts')
  local tokens = tonumber(bucket[1]) or capacity
  local ts = tonumber(bucket[2]) or now
  local delta = math.max(0, now - ts) * refillPerSec
  tokens = math.min(capacity, tokens + delta)
  if tokens < 1 then return 0 end
  redis.call('HMSET', key, 'tokens', tokens - 1, 'ts', now)
  redis.call('EXPIRE', key, 60)
  return 1
`;
const allowed = await redis.eval(luaScript, 1, `rl:${userId}`, 60, 1, Date.now()/1000);

// Redis sorted set: leaderboard
await redis.zadd('lb:season-1', score, userId);
const top10 = await redis.zrevrange('lb:season-1', 0, 9, 'WITHSCORES');

// Redis HyperLogLog: unique visitors (12 KB, ~0.81% error)
await redis.pfadd('uv:2026-05-17', userIdA, userIdB);
const uniques = await redis.pfcount('uv:2026-05-17');

-- TimescaleDB hypertable: stays inside Postgres
CREATE TABLE metrics (
  ts        timestamptz NOT NULL,
  device_id bigint NOT NULL,
  value     double precision
);
SELECT create_hypertable('metrics', 'ts', chunk_time_interval => INTERVAL '1 day');
CREATE INDEX ON metrics (device_id, ts DESC);

-- Continuous aggregate (incremental materialized view)
CREATE MATERIALIZED VIEW metrics_1h
WITH (timescaledb.continuous) AS
SELECT time_bucket('1 hour', ts) AS hour, device_id, avg(value)
FROM metrics GROUP BY hour, device_id;

Deep — Bloom index + HLL

-- Postgres bloom-index extension for multi-column equality
CREATE EXTENSION bloom;
CREATE INDEX events_bloom
  ON events USING bloom (tenant_id, source, type, severity)
  WITH (length=80, col1=2, col2=2, col3=4, col4=2);
-- Now WHERE source='x' AND severity='y' can probe the bloom index

// Redis HyperLogLog vs naïve SET for unique counting
await redis.sadd('uv:naive:2026-05-17', userId);          // O(N) memory
await redis.scard('uv:naive:2026-05-17');                 // exact

await redis.pfadd('uv:hll:2026-05-17', userId);           // ~12 KB total
await redis.pfcount('uv:hll:2026-05-17');                 // ~0.81% error

// Merge a week of HLLs into one count
await redis.pfmerge('uv:hll:week-20', 'uv:hll:2026-05-11', /* ... */ 'uv:hll:2026-05-17');