SQL mistakes that kill performance and how to fix them
Understanding common SQL anti-patterns helps you write efficient queries from the start. These patterns appear repeatedly in slow-performing applications.
SELECT * FROM orders WHERE customer_id = 123; -- [!code error]-- Why it's bad:-- 1. Transfers unnecessary data-- 2. Cannot use index-only scans-- 3. Breaks if schema changes-- 4. Includes large TEXT/BLOB columns
SELECT id, total, status, created_at FROM ordersWHERE customer_id = 123;-- With covering index, enables index-only scanCREATE INDEX idx_orders_customer ON orders(customer_id) INCLUDE (id, total, status, created_at);
# Application code:customers = query("SELECT * FROM customers LIMIT 100")for customer in customers: -- [!code error] orders = query(f""" -- [!code error] SELECT * FROM orders -- [!code error] WHERE customer_id = {customer.id} -- [!code error] """) -- [!code error] # Process orders...# Results in 101 queries:# 1 for customers + 100 for orders
-- Solution 1: JOINSELECT c.*, o.*FROM customers c LEFT JOIN orders o ON c.id = o.customer_idLIMIT 100;-- Solution 2: Batch fetchSELECT * FROM ordersWHERE customer_id IN (1, 2, 3, ... 100); -- Solution 3: SubquerySELECT * FROM ordersWHERE customer_id IN ( SELECT id FROM customers LIMIT 100);
-- PostgreSQL: Find repeated queriesSELECT query, calls, mean_timeFROM pg_stat_statementsWHERE query LIKE '%customer_id = $1%'ORDER BY calls DESC;-- Look for:-- - High call counts with similar query patterns-- - Many queries with different literal values-- - Sequential query execution patterns-- MySQL: Enable general log temporarilySET GLOBAL general_log = 'ON';-- Check mysql.general_log table
-- Function prevents index usageSELECT * FROM users WHERE LOWER(email) = 'user@example.com'; -- [!code error]SELECT * FROM orders WHERE YEAR(created_at) = 2024; -- [!code error]SELECT * FROM products WHERE CONCAT(category, '-', name) = 'Electronics-Phone'; -- [!code error]
-- Solution 1: Store normalized dataUPDATE users SET email = LOWER(email); SELECT * FROM users WHERE email = 'user@example.com'; -- Solution 2: Expression indexCREATE INDEX idx_email_lower ON users(LOWER(email)); SELECT * FROM users WHERE LOWER(email) = 'user@example.com';-- Solution 3: Range instead of functionSELECT * FROM orders WHERE created_at >= '2024-01-01' AND created_at < '2025-01-01'; -- Solution 4: Generated columnALTER TABLE products ADD COLUMN full_key GENERATED ALWAYS AS (CONCAT(category, '-', name)) STORED; CREATE INDEX idx_full_key ON products(full_key);
-- OR on different columns prevents optimal index useSELECT * FROM productsWHERE category_id = 5 OR supplier_id = 10; -- [!code warning]-- Database might:-- 1. Do full table scan-- 2. Do bitmap scans and combine (better)-- 3. Use neither index efficiently
-- Solution 1: UNION ALL (often faster)SELECT * FROM products WHERE category_id = 5UNION ALL SELECT * FROM products WHERE supplier_id = 10 AND category_id <> 5; -- Solution 2: UNION (deduplicates)SELECT * FROM products WHERE category_id = 5UNIONSELECT * FROM products WHERE supplier_id = 10;-- Solution 3: Composite index (if pattern is common)CREATE INDEX idx_category_supplier ON products(category_id, supplier_id);
-- ✅ OR on same column uses indexSELECT * FROM productsWHERE category_id = 5 OR category_id = 10;-- Equivalent to: IN (5, 10)-- ✅ OR with bitmap scans (PostgreSQL)-- Database can combine multiple index scansEXPLAIN ANALYZESELECT * FROM productsWHERE category_id = 5 OR supplier_id = 10;-- Look for: BitmapOr with multiple Bitmap Index Scans
-- String compared to integer (Index on customer_id INTEGER)SELECT * FROM orders WHERE customer_id = '123'; -- [!code error]-- Database may cast every row's customer_id to string-- Different string typesSELECT * FROM users WHERE phone = 12345; -- [!code error]-- phone is VARCHAR-- Date/timestamp mismatchSELECT * FROM orders WHERE created_at = '2024-01-01'; -- [!code error]-- created_at is TIMESTAMP, '2024-01-01' is DATE
-- Match types exactlySELECT * FROM orders WHERE customer_id = 123; -- Explicit castingSELECT * FROM users WHERE phone = '12345'; -- Proper date handlingSELECT * FROM orders WHERE created_at >= '2024-01-01 00:00:00'::timestamp AND created_at < '2024-01-02 00:00:00'::timestamp; -- Or use date range with expression indexSELECT * FROM orders WHERE created_at::date = '2024-01-01';
-- Check column types\d+ table_name-- PostgreSQL: Look for implicit casts in EXPLAINEXPLAIN ANALYZE SELECT * FROM orders WHERE customer_id = '123';-- May show: Filter: ((customer_id)::text = '123'::text)-- Or check for seq scan where index expected
-- No index on frequently filtered columnSELECT * FROM logs WHERE event_type = 'error'; -- [!code error]-- Full table scan on millions of rows-- Wrong column order in composite index-- Index: (status, customer_id)SELECT * FROM orders WHERE customer_id = 123; -- [!code warning]-- Cannot efficiently use index (leftmost prefix rule)-- Too many indexes-- Every INSERT/UPDATE must maintain all indexes-- More indexes = slower writes
-- Index on filter columnsCREATE INDEX idx_logs_event_type ON logs(event_type); -- Correct column order (query-driven)-- For: WHERE customer_id = ? AND status = ?CREATE INDEX idx_orders ON orders(customer_id, status); -- Cover common queriesCREATE INDEX idx_orders_covering ON orders(customer_id) INCLUDE (total, status); -- Partial index for subsetCREATE INDEX idx_active_orders ON orders(customer_id) WHERE status = 'active';
-- PostgreSQL: Find missing indexesSELECT relname AS table, seq_scan, seq_tup_read, idx_scan, seq_tup_read / NULLIF(seq_scan, 0) AS avg_rows_per_scanFROM pg_stat_user_tablesWHERE seq_scan > 0ORDER BY seq_tup_read DESC;-- High seq_tup_read with low idx_scan = missing index-- MySQL: Analyze tableSHOW INDEX FROM table_name;EXPLAIN SELECT ...; -- Look for full table scans-- SQL Server: Missing index DMVsSELECT * FROM sys.dm_db_missing_index_details;
-- OFFSET-based pagination degrades with page numberSELECT * FROM productsORDER BY created_at DESCLIMIT 20 OFFSET 100000; -- [!code error]-- Database must:-- 1. Find 100,020 rows-- 2. Sort them all-- 3. Skip 100,000-- 4. Return 20-- Page 1: Fast-- Page 5000: Very slow
-- Cursor-based (keyset) pagination-- First pageSELECT * FROM productsORDER BY created_at DESC, id DESCLIMIT 20;-- Next page (use last row's values)SELECT * FROM productsWHERE (created_at, id) < ('2024-01-15 10:00:00', 12345) ORDER BY created_at DESC, id DESCLIMIT 20;-- With index: (created_at DESC, id DESC)-- Always O(1) regardless of page numberCREATE INDEX idx_products_cursor ON products(created_at DESC, id DESC);
-- ✅ Indexed columns for sortingCREATE INDEX idx_products_date_id ON products(created_at DESC, id DESC);-- ✅ Limit maximum offsetSELECT * FROM productsORDER BY created_at DESCLIMIT 20 OFFSET LEAST(1000, $offset); -- Cap at 50 pages-- ✅ Use estimated counts for UISELECT reltuples::bigint AS estimateFROM pg_class WHERE relname = 'products';-- Faster than COUNT(*) for display purposes