Subqueries
Master nested queries, correlated subqueries, and scalar subqueries
Subqueries (nested queries) are queries embedded within other queries. They allow you to build complex logic by breaking it into smaller, manageable pieces.
Subquery Flow
Types of Subqueries
Scalar Subquery
Returns a single value (one row, one column).
-- Get products above average price
SELECT name, price
FROM products
WHERE price > (
SELECT AVG(price) FROM products
);
-- Use in SELECT clause
SELECT
name,
price,
price - (SELECT AVG(price) FROM products) AS diff_from_avg
FROM products;
-- Use in CASE expression
SELECT
name,
price,
CASE
WHEN price > (SELECT AVG(price) FROM products) THEN 'Above Average'
ELSE 'Below Average'
END AS price_category
FROM products;Scalar Subquery Rules
If a scalar subquery returns more than one row or column, you'll get an error.
Row Subquery
Returns a single row with multiple columns.
-- Compare entire row (PostgreSQL)
SELECT * FROM products
WHERE (category_id, price) = (
SELECT category_id, MAX(price)
FROM products
GROUP BY category_id
LIMIT 1
);
-- Row comparison with IN
SELECT * FROM orders
WHERE (customer_id, product_id) IN (
SELECT customer_id, favorite_product_id
FROM customer_preferences
);Table Subquery
Returns multiple rows and columns (a derived table).
-- Subquery in FROM clause (derived table)
SELECT
category_stats.category_id,
category_stats.avg_price,
p.name AS most_expensive
FROM (
SELECT
category_id,
AVG(price) AS avg_price,
MAX(price) AS max_price
FROM products
GROUP BY category_id
) AS category_stats
JOIN products p
ON p.category_id = category_stats.category_id
AND p.price = category_stats.max_price;
-- Must have an alias
SELECT * FROM (
SELECT customer_id, COUNT(*) AS order_count
FROM orders
GROUP BY customer_id
) AS customer_orders -- Alias required
WHERE order_count > 5;Subquery Operators
EXISTS and NOT EXISTS
-- Customers with at least one order
SELECT c.id, c.name
FROM customers c
WHERE EXISTS (
SELECT 1
FROM orders o
WHERE o.customer_id = c.id
);
-- Customers with NO orders
SELECT c.id, c.name
FROM customers c
WHERE NOT EXISTS (
SELECT 1
FROM orders o
WHERE o.customer_id = c.id
);
-- Products never ordered
SELECT p.id, p.name
FROM products p
WHERE NOT EXISTS (
SELECT 1
FROM order_items oi
WHERE oi.product_id = p.id
);EXISTS Performance Tip
EXISTS stops searching as soon as it finds one matching row.
Use SELECT 1 or SELECT * - the column list doesn't matter.
EXISTS is often faster than IN for large subqueries.
IN and NOT IN
-- Orders from USA customers
SELECT * FROM orders
WHERE customer_id IN (
SELECT id FROM customers WHERE country = 'USA'
);
-- Products not in any order
SELECT * FROM products
WHERE id NOT IN (
SELECT DISTINCT product_id
FROM order_items
WHERE product_id IS NOT NULL -- Important!
);NOT IN and NULL Trap
If the subquery returns any NULL values, NOT IN returns no results!
-- This returns NOTHING if any product_id is NULL:
SELECT * FROM products
WHERE id NOT IN (SELECT product_id FROM order_items);
-- Safe alternatives:
-- 1. Exclude NULLs explicitly
WHERE id NOT IN (SELECT product_id FROM order_items WHERE product_id IS NOT NULL);
-- 2. Use NOT EXISTS instead
WHERE NOT EXISTS (SELECT 1 FROM order_items WHERE product_id = products.id);ALL and ANY/SOME
-- Price greater than ALL categories' average (i.e., the maximum average)
SELECT * FROM products
WHERE price > ALL (
SELECT AVG(price) FROM products GROUP BY category_id
);
-- Price greater than ANY category's average (i.e., at least one)
SELECT * FROM products
WHERE price > ANY (
SELECT AVG(price) FROM products GROUP BY category_id
);
-- ANY is equivalent to SOME
SELECT * FROM products
WHERE price > SOME (
SELECT AVG(price) FROM products GROUP BY category_id
);| Operator | Description | Equivalent |
|---|---|---|
> ALL | Greater than maximum | > (SELECT MAX(...)) |
< ALL | Less than minimum | < (SELECT MIN(...)) |
= ALL | Equal to all (all same value) | - |
> ANY | Greater than minimum | > (SELECT MIN(...)) |
< ANY | Less than maximum | < (SELECT MAX(...)) |
= ANY | Equal to any | IN (...) |
Subquery Locations
-- Scalar subquery in SELECT
SELECT
p.name,
p.price,
(SELECT AVG(price) FROM products) AS avg_price,
(SELECT COUNT(*) FROM order_items oi WHERE oi.product_id = p.id) AS times_ordered
FROM products p;
-- Correlated subquery in SELECT
SELECT
c.name,
(SELECT COUNT(*) FROM orders o WHERE o.customer_id = c.id) AS order_count,
(SELECT SUM(total) FROM orders o WHERE o.customer_id = c.id) AS total_spent
FROM customers c;-- Derived table
SELECT
t.category_id,
t.total_products,
t.avg_price
FROM (
SELECT
category_id,
COUNT(*) AS total_products,
AVG(price) AS avg_price
FROM products
GROUP BY category_id
) AS t
WHERE t.avg_price > 100;
-- Multiple derived tables
SELECT
o.month,
o.order_count,
p.product_count
FROM (
SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS order_count
FROM orders
GROUP BY 1
) AS o
JOIN (
SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS product_count
FROM products
GROUP BY 1
) AS p ON o.month = p.month;-- Scalar in WHERE
SELECT * FROM orders
WHERE total > (SELECT AVG(total) FROM orders);
-- IN list in WHERE
SELECT * FROM orders
WHERE customer_id IN (SELECT id FROM customers WHERE status = 'vip');
-- EXISTS in WHERE
SELECT * FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.id AND o.total > 1000
);-- Subquery in HAVING
SELECT
category_id,
AVG(price) AS avg_price
FROM products
GROUP BY category_id
HAVING AVG(price) > (
SELECT AVG(price) FROM products -- Overall average
);
-- Categories with more products than average
SELECT
category_id,
COUNT(*) AS product_count
FROM products
GROUP BY category_id
HAVING COUNT(*) > (
SELECT AVG(cnt)
FROM (
SELECT COUNT(*) AS cnt
FROM products
GROUP BY category_id
) AS counts
);-- INSERT from subquery
INSERT INTO order_archive (id, customer_id, total, created_at)
SELECT id, customer_id, total, created_at
FROM orders
WHERE created_at < '2023-01-01';
-- UPDATE with subquery
UPDATE products p
SET price = price * 1.1
WHERE category_id IN (
SELECT id FROM categories WHERE name = 'Electronics'
);
-- UPDATE with correlated subquery
UPDATE products p
SET category_avg = (
SELECT AVG(price)
FROM products p2
WHERE p2.category_id = p.category_id
);
-- DELETE with subquery
DELETE FROM orders
WHERE customer_id IN (
SELECT id FROM customers WHERE status = 'deleted'
);Subqueries vs CTEs
CTE vs Subquery
CTEs (Common Table Expressions) are often cleaner than subqueries. See the CTEs page for detailed comparison.
-- Nested subqueries (hard to read)
SELECT * FROM (
SELECT customer_id, SUM(total) AS total_spent
FROM (
SELECT * FROM orders WHERE status = 'completed'
) AS completed_orders
GROUP BY customer_id
) AS customer_totals
WHERE total_spent > 1000;
-- Same logic with CTE (clearer)
WITH completed_orders AS (
SELECT * FROM orders WHERE status = 'completed'
),
customer_totals AS (
SELECT customer_id, SUM(total) AS total_spent
FROM completed_orders
GROUP BY customer_id
)
SELECT * FROM customer_totals WHERE total_spent > 1000;Performance Patterns
Best Practices
Use CTEs for Readability
Replace deeply nested subqueries with CTEs.
-- ❌ Hard to read
SELECT * FROM (SELECT * FROM (SELECT ... ) t1 WHERE ...) t2;
-- ✅ Clear and maintainable
WITH step1 AS (...),
step2 AS (SELECT * FROM step1 WHERE ...)
SELECT * FROM step2;Avoid NOT IN with Nullable Columns
Use NOT EXISTS or explicitly exclude NULLs.
-- ❌ May return nothing if NULLs present
WHERE id NOT IN (SELECT nullable_column FROM ...)
-- ✅ Safe alternatives
WHERE NOT EXISTS (SELECT 1 FROM ... WHERE id = outer.id)
WHERE id NOT IN (SELECT column FROM ... WHERE column IS NOT NULL)Consider EXISTS Over IN for Large Datasets
EXISTS can short-circuit and use indexes effectively.
Rewrite Correlated Subqueries When Possible
JOINs and window functions are often more efficient.