Appearance
查询进阶
本章将介绍PostgreSQL的高级查询功能,包括子查询、CTE(公共表表达式)、窗口函数等强大的查询技术。
子查询
子查询是嵌套在其他查询中的查询,可以出现在SELECT、FROM、WHERE等子句中。
标量子查询
标量子查询返回单个值,可以用在需要单个值的地方:
sql
-- 创建示例表
CREATE TABLE departments (
dept_id SERIAL PRIMARY KEY,
dept_name VARCHAR(50) NOT NULL,
budget NUMERIC(12, 2)
);
CREATE TABLE employees (
emp_id SERIAL PRIMARY KEY,
emp_name VARCHAR(50) NOT NULL,
dept_id INTEGER REFERENCES departments(dept_id),
salary NUMERIC(10, 2),
hire_date DATE
);
INSERT INTO departments (dept_name, budget) VALUES
('技术部', 1000000),
('销售部', 800000),
('人事部', 500000);
INSERT INTO employees (emp_name, dept_id, salary, hire_date) VALUES
('张三', 1, 15000, '2022-01-15'),
('李四', 1, 18000, '2021-06-20'),
('王五', 2, 12000, '2023-03-10'),
('赵六', 2, 14000, '2022-08-25'),
('钱七', 3, 10000, '2023-01-01');
-- 查询薪水高于平均薪水的员工
SELECT emp_name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
-- 查询部门预算最高的部门名称
SELECT dept_name
FROM departments
WHERE budget = (SELECT MAX(budget) FROM departments);
-- 在SELECT中使用标量子查询
SELECT
emp_name,
salary,
(SELECT AVG(salary) FROM employees) AS avg_salary,
salary - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees;列子查询
列子查询返回一列多行,常与IN、ANY、ALL等运算符配合使用:
sql
-- 使用IN
SELECT emp_name, salary
FROM employees
WHERE dept_id IN (SELECT dept_id FROM departments WHERE budget > 600000);
-- 使用NOT IN
SELECT emp_name
FROM employees
WHERE dept_id NOT IN (SELECT dept_id FROM departments WHERE dept_name = '人事部');
-- 使用ANY(任意一个满足条件)
SELECT emp_name, salary
FROM employees
WHERE salary > ANY (SELECT salary FROM employees WHERE dept_id = 1);
-- 薪水高于技术部任意一个员工即可
-- 使用ALL(所有都要满足条件)
SELECT emp_name, salary
FROM employees
WHERE salary > ALL (SELECT salary FROM employees WHERE dept_id = 2);
-- 薪水高于销售部所有员工
-- 使用EXISTS(存在性检查)
SELECT dept_name
FROM departments d
WHERE EXISTS (
SELECT 1 FROM employees e
WHERE e.dept_id = d.dept_id
AND e.salary > 15000
);
-- 查询有员工薪水超过15000的部门
-- 使用NOT EXISTS
SELECT dept_name
FROM departments d
WHERE NOT EXISTS (
SELECT 1 FROM employees e
WHERE e.dept_id = d.dept_id
);
-- 查询没有员工的部门行子查询
行子查询返回一行多列:
sql
-- 查询与某个员工同部门同薪水的员工
SELECT emp_name, dept_id, salary
FROM employees
WHERE (dept_id, salary) = (
SELECT dept_id, salary
FROM employees
WHERE emp_name = '张三'
);
-- 使用IN进行多列匹配
SELECT emp_name, dept_id, salary
FROM employees
WHERE (dept_id, salary) IN (
SELECT dept_id, MAX(salary)
FROM employees
GROUP BY dept_id
);
-- 查询各部门薪水最高的员工FROM子句中的子查询
子查询可以用作临时表:
sql
-- 统计各部门的平均薪水等级
SELECT
t.dept_name,
t.avg_salary,
CASE
WHEN t.avg_salary >= 15000 THEN '高'
WHEN t.avg_salary >= 12000 THEN '中'
ELSE '低'
END AS salary_level
FROM (
SELECT
d.dept_name,
AVG(e.salary) AS avg_salary
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id
GROUP BY d.dept_name
) t;
-- 使用LATERAL关键字(允许子查询引用外部查询的列)
SELECT
e.emp_name,
e.salary,
t.avg_dept_salary,
e.salary - t.avg_dept_salary AS diff
FROM employees e,
LATERAL (
SELECT AVG(salary) AS avg_dept_salary
FROM employees
WHERE dept_id = e.dept_id
) t;公共表表达式(CTE)
CTE(Common Table Expression)是一种临时的结果集,可以简化复杂查询。
基本CTE
sql
-- 使用WITH定义CTE
WITH dept_stats AS (
SELECT
dept_id,
COUNT(*) AS emp_count,
AVG(salary) AS avg_salary,
MAX(salary) AS max_salary,
MIN(salary) AS min_salary
FROM employees
GROUP BY dept_id
)
SELECT
d.dept_name,
ds.emp_count,
ROUND(ds.avg_salary, 2) AS avg_salary,
ds.max_salary,
ds.min_salary
FROM dept_stats ds
JOIN departments d ON ds.dept_id = d.dept_id
ORDER BY ds.avg_salary DESC;
-- 多个CTE
WITH
high_salary AS (
SELECT * FROM employees WHERE salary > 15000
),
low_salary AS (
SELECT * FROM employees WHERE salary <= 12000
)
SELECT '高薪' AS category, COUNT(*) AS count FROM high_salary
UNION ALL
SELECT '低薪', COUNT(*) FROM low_salary;递归CTE
递归CTE可以处理层次结构数据:
sql
-- 创建员工层级表
CREATE TABLE emp_hierarchy (
emp_id INTEGER PRIMARY KEY,
emp_name VARCHAR(50),
manager_id INTEGER REFERENCES emp_hierarchy(emp_id)
);
INSERT INTO emp_hierarchy VALUES
(1, 'CEO', NULL),
(2, '技术总监', 1),
(3, '销售总监', 1),
(4, '技术经理', 2),
(5, '销售经理', 3),
(6, '工程师A', 4),
(7, '工程师B', 4),
(8, '销售员A', 5);
-- 递归查询员工层级
WITH RECURSIVE emp_tree AS (
-- 基础查询:顶级员工(没有经理)
SELECT
emp_id,
emp_name,
manager_id,
1 AS level,
emp_name::TEXT AS path
FROM emp_hierarchy
WHERE manager_id IS NULL
UNION ALL
-- 递归查询:下级员工
SELECT
e.emp_id,
e.emp_name,
e.manager_id,
t.level + 1,
t.path || ' > ' || e.emp_name
FROM emp_hierarchy e
JOIN emp_tree t ON e.manager_id = t.emp_id
)
SELECT
level,
REPEAT(' ', level - 1) || emp_name AS org_chart,
path
FROM emp_tree
ORDER BY path;
-- 计算每个员工的下属数量
WITH RECURSIVE emp_subordinates AS (
SELECT emp_id, emp_id AS subordinate_id
FROM emp_hierarchy
UNION ALL
SELECT e.emp_id, s.subordinate_id
FROM emp_hierarchy e
JOIN emp_subordinates s ON e.manager_id = s.emp_id
)
SELECT
e.emp_name,
COUNT(s.subordinate_id) - 1 AS subordinate_count
FROM emp_hierarchy e
LEFT JOIN emp_subordinates s ON e.emp_id = s.emp_id
GROUP BY e.emp_id, e.emp_name
ORDER BY subordinate_count DESC;CTE与数据修改
sql
-- 使用CTE进行UPDATE并返回结果
WITH updated AS (
UPDATE employees
SET salary = salary * 1.1
WHERE dept_id = 1
RETURNING *
)
SELECT * FROM updated;
-- 使用CTE进行DELETE并插入历史表
CREATE TABLE salary_history (
id SERIAL PRIMARY KEY,
emp_name VARCHAR(50),
old_salary NUMERIC(10, 2),
change_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
WITH deleted AS (
DELETE FROM employees
WHERE salary < 11000
RETURNING *
)
INSERT INTO salary_history (emp_name, old_salary)
SELECT emp_name, salary FROM deleted;窗口函数
窗口函数可以在不减少行数的情况下进行聚合计算,是PostgreSQL强大的分析功能。
基本窗口函数
sql
-- ROW_NUMBER:行号
SELECT
emp_name,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num
FROM employees;
-- RANK:排名(有并列,跳过后续名次)
SELECT
emp_name,
salary,
RANK() OVER (ORDER BY salary DESC) AS rank
FROM employees;
-- DENSE_RANK:密集排名(有并列,不跳过名次)
SELECT
emp_name,
salary,
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees;
-- 对比三种排名函数
SELECT
emp_name,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num,
RANK() OVER (ORDER BY salary DESC) AS rank,
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees;分区窗口函数
sql
-- 按部门分区计算排名
SELECT
e.emp_name,
d.dept_name,
e.salary,
RANK() OVER (PARTITION BY e.dept_id ORDER BY e.salary DESC) AS dept_rank
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id;
-- 各部门内的薪水百分比
SELECT
e.emp_name,
d.dept_name,
e.salary,
ROUND(
100.0 * e.salary / SUM(e.salary) OVER (PARTITION BY e.dept_id),
2
) AS salary_percent
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id;
-- 各部门最高薪水
SELECT
emp_name,
dept_id,
salary,
MAX(salary) OVER (PARTITION BY dept_id) AS dept_max_salary
FROM employees;聚合窗口函数
sql
-- 累计求和
SELECT
emp_name,
hire_date,
salary,
SUM(salary) OVER (ORDER BY hire_date) AS running_total
FROM employees;
-- 移动平均
SELECT
emp_name,
hire_date,
salary,
ROUND(
AVG(salary) OVER (
ORDER BY hire_date
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
),
2
) AS moving_avg
FROM employees;
-- 各种聚合窗口函数
SELECT
emp_name,
dept_id,
salary,
COUNT(*) OVER (PARTITION BY dept_id) AS dept_count,
SUM(salary) OVER (PARTITION BY dept_id) AS dept_total,
AVG(salary) OVER (PARTITION BY dept_id) AS dept_avg,
MIN(salary) OVER (PARTITION BY dept_id) AS dept_min,
MAX(salary) OVER (PARTITION BY dept_id) AS dept_max
FROM employees;窗口帧定义
sql
-- 窗口帧:定义窗口函数作用的数据范围
SELECT
emp_name,
hire_date,
salary,
-- 从开始到当前行
SUM(salary) OVER (
ORDER BY hire_date
ROWS UNBOUNDED PRECEDING
) AS cumulative_sum,
-- 当前行及前2行
AVG(salary) OVER (
ORDER BY hire_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS avg_3_rows,
-- 当前行及后2行
AVG(salary) OVER (
ORDER BY hire_date
ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING
) AS avg_next_3,
-- 前后各1行
AVG(salary) OVER (
ORDER BY hire_date
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) AS avg_surrounding
FROM employees;取值窗口函数
sql
-- 添加更多数据用于演示
INSERT INTO employees (emp_name, dept_id, salary, hire_date) VALUES
('孙八', 1, 16000, '2022-03-15'),
('周九', 1, 17000, '2022-06-01');
-- LAG:获取前N行的值
SELECT
emp_name,
hire_date,
salary,
LAG(salary) OVER (ORDER BY hire_date) AS prev_salary,
LAG(salary, 2) OVER (ORDER BY hire_date) AS prev_2_salary,
LAG(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_salary_default
FROM employees;
-- LEAD:获取后N行的值
SELECT
emp_name,
hire_date,
salary,
LEAD(salary) OVER (ORDER BY hire_date) AS next_salary,
LEAD(salary, 2) OVER (ORDER BY hire_date) AS next_2_salary
FROM employees;
-- FIRST_VALUE:获取窗口第一行的值
SELECT
emp_name,
dept_id,
salary,
FIRST_VALUE(salary) OVER (PARTITION BY dept_id ORDER BY salary DESC) AS dept_highest
FROM employees;
-- LAST_VALUE:获取窗口最后一行的值(注意窗口帧定义)
SELECT
emp_name,
dept_id,
salary,
LAST_VALUE(salary) OVER (
PARTITION BY dept_id
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS dept_lowest
FROM employees;
-- NTH_VALUE:获取窗口第N行的值
SELECT
emp_name,
salary,
NTH_VALUE(salary, 2) OVER (ORDER BY hire_date) AS second_salary
FROM employees;NTILE函数
sql
-- 将数据分成N组
SELECT
emp_name,
salary,
NTILE(4) OVER (ORDER BY salary DESC) AS quartile -- 分成4组
FROM employees;
-- 按部门分组后分成两组
SELECT
emp_name,
d.dept_name,
e.salary,
NTILE(2) OVER (PARTITION BY e.dept_id ORDER BY e.salary DESC) AS half
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id;集合操作
集合操作用于合并多个查询的结果。
UNION
sql
-- UNION:合并结果并去重
SELECT emp_name FROM employees WHERE dept_id = 1
UNION
SELECT emp_name FROM employees WHERE salary > 13000;
-- UNION ALL:合并结果不去重(性能更好)
SELECT emp_name FROM employees WHERE dept_id = 1
UNION ALL
SELECT emp_name FROM employees WHERE salary > 13000;INTERSECT
sql
-- INTERSECT:返回两个查询的交集
SELECT emp_name FROM employees WHERE dept_id = 1
INTERSECT
SELECT emp_name FROM employees WHERE salary > 14000;
-- INTERSECT ALL:保留重复行
SELECT emp_name FROM employees WHERE dept_id = 1
INTERSECT ALL
SELECT emp_name FROM employees WHERE salary > 14000;EXCEPT
sql
-- EXCEPT:返回第一个查询有但第二个查询没有的结果
SELECT emp_name FROM employees WHERE dept_id = 1
EXCEPT
SELECT emp_name FROM employees WHERE salary > 16000;
-- EXCEPT ALL:保留重复行
SELECT emp_name FROM employees WHERE dept_id = 1
EXCEPT ALL
SELECT emp_name FROM employees WHERE salary > 16000;CASE表达式
CASE表达式用于条件逻辑:
sql
-- 简单CASE表达式
SELECT
emp_name,
dept_id,
CASE dept_id
WHEN 1 THEN '技术部'
WHEN 2 THEN '销售部'
WHEN 3 THEN '人事部'
ELSE '其他'
END AS department
FROM employees;
-- 搜索CASE表达式
SELECT
emp_name,
salary,
CASE
WHEN salary >= 18000 THEN '高薪'
WHEN salary >= 14000 THEN '中薪'
WHEN salary >= 10000 THEN '低薪'
ELSE '待定'
END AS salary_level
FROM employees;
-- 在聚合中使用CASE
SELECT
COUNT(CASE WHEN salary >= 15000 THEN 1 END) AS high_count,
COUNT(CASE WHEN salary < 15000 THEN 1 END) AS low_count
FROM employees;
-- CASE在UPDATE中使用
UPDATE employees
SET salary = CASE
WHEN salary < 12000 THEN salary * 1.2
WHEN salary < 15000 THEN salary * 1.1
ELSE salary * 1.05
END;NULL处理
sql
-- COALESCE:返回第一个非NULL值
SELECT
emp_name,
COALESCE(salary, 0) AS salary_or_zero
FROM employees;
-- NULLIF:如果两个值相等则返回NULL
SELECT
emp_name,
salary,
NULLIF(salary, 0) AS salary_non_zero -- 如果salary是0则返回NULL
FROM employees;
-- GREATEST:返回最大值(忽略NULL)
SELECT GREATEST(10, 20, NULL, 30); -- 返回30
-- LEAST:返回最小值(忽略NULL)
SELECT LEAST(10, 20, NULL, 5); -- 返回5
-- 使用COALESCE处理除法
SELECT
emp_name,
salary,
COALESCE(salary / NULLIF(salary, 0), 0) AS ratio -- 避免除以0
FROM employees;查询优化提示
使用EXPLAIN分析查询
sql
-- 查看查询计划
EXPLAIN SELECT * FROM employees WHERE salary > 15000;
-- 查看详细查询计划(包括实际执行统计)
EXPLAIN ANALYZE SELECT * FROM employees WHERE salary > 15000;
-- 查看更详细的信息
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT * FROM employees WHERE salary > 15000;常见优化技巧
sql
-- 1. 只选择需要的列
-- 不好的写法
SELECT * FROM employees WHERE dept_id = 1;
-- 好的写法
SELECT emp_name, salary FROM employees WHERE dept_id = 1;
-- 2. 使用LIMIT限制结果
SELECT * FROM employees ORDER BY salary DESC LIMIT 10;
-- 3. 避免在WHERE中对列使用函数(会导致索引失效)
-- 不好的写法
SELECT * FROM employees WHERE UPPER(emp_name) = '张三';
-- 好的写法
SELECT * FROM employees WHERE emp_name = '张三';
-- 4. 使用EXISTS代替IN处理大数据集
-- 对于大数据集,EXISTS通常更高效
SELECT * FROM employees e
WHERE EXISTS (
SELECT 1 FROM departments d
WHERE d.dept_id = e.dept_id
AND d.budget > 600000
);小结
本章介绍了PostgreSQL的高级查询功能:
| 功能 | 说明 |
|---|---|
| 子查询 | 嵌套查询,支持标量、列、行子查询 |
| CTE | 公共表表达式,简化复杂查询 |
| 递归CTE | 处理层次结构数据 |
| 窗口函数 | 不减少行数的聚合计算 |
| 集合操作 | UNION、INTERSECT、EXCEPT |
| CASE表达式 | 条件逻辑处理 |
| NULL处理 | COALESCE、NULLIF等函数 |
下一章我们将学习多表查询(JOIN操作)。
