Appearance
多表查询
在实际应用中,数据通常分散在多个相关联的表中。JOIN操作是SQL中最重要的功能之一,用于将多个表的数据组合在一起查询。本章将详细介绍PostgreSQL中的各种JOIN操作。
准备示例数据
sql
-- 创建部门表
CREATE TABLE departments (
dept_id SERIAL PRIMARY KEY,
dept_name VARCHAR(50) NOT NULL,
location VARCHAR(100)
);
-- 创建员工表
CREATE TABLE employees (
emp_id SERIAL PRIMARY KEY,
emp_name VARCHAR(50) NOT NULL,
dept_id INTEGER REFERENCES departments(dept_id),
manager_id INTEGER REFERENCES employees(emp_id),
salary NUMERIC(10, 2),
hire_date DATE
);
-- 创建项目表
CREATE TABLE projects (
project_id SERIAL PRIMARY KEY,
project_name VARCHAR(100) NOT NULL,
dept_id INTEGER REFERENCES departments(dept_id),
start_date DATE,
end_date DATE
);
-- 创建员工项目关联表(多对多关系)
CREATE TABLE employee_projects (
emp_id INTEGER REFERENCES employees(emp_id),
project_id INTEGER REFERENCES projects(project_id),
role VARCHAR(50),
hours_allocated INTEGER,
PRIMARY KEY (emp_id, project_id)
);
-- 插入测试数据
INSERT INTO departments (dept_name, location) VALUES
('技术部', '北京'),
('销售部', '上海'),
('人事部', '广州'),
('财务部', NULL); -- 故意不设置location
INSERT INTO employees (emp_name, dept_id, manager_id, salary, hire_date) VALUES
('张三', 1, NULL, 20000, '2020-01-15'), -- 技术部,无上级
('李四', 1, 1, 18000, '2021-03-20'), -- 技术部,上级是张三
('王五', 1, 1, 16000, '2022-06-10'), -- 技术部,上级是张三
('赵六', 2, NULL, 15000, '2021-01-01'), -- 销售部,无上级
('钱七', 2, 4, 12000, '2022-08-15'), -- 销售部,上级是赵六
('孙八', 3, NULL, 14000, '2021-05-20'), -- 人事部,无上级
('周九', NULL, NULL, 10000, '2023-01-01'); -- 无部门
INSERT INTO projects (project_name, dept_id, start_date, end_date) VALUES
('电商平台开发', 1, '2023-01-01', '2023-12-31'),
('客户管理系统', 1, '2023-03-01', '2023-09-30'),
('销售数据分析', 2, '2023-02-01', NULL), -- 进行中的项目
('员工培训计划', 3, '2023-04-01', '2023-06-30');
INSERT INTO employee_projects (emp_id, project_id, role, hours_allocated) VALUES
(1, 1, '项目经理', 200),
(2, 1, '开发工程师', 300),
(3, 1, '开发工程师', 250),
(2, 2, '技术负责人', 150),
(3, 2, '开发工程师', 200),
(4, 3, '项目经理', 180),
(5, 3, '分析师', 220),
(6, 4, '协调员', 100);内连接(INNER JOIN)
内连接只返回两个表中匹配的行,是最常用的连接类型。
基本语法
sql
-- 内连接的基本语法
SELECT 列名
FROM 表1
INNER JOIN 表2 ON 表1.列 = 表2.列;
-- INNER可以省略
SELECT 列名
FROM 表1
JOIN 表2 ON 表1.列 = 表2.列;示例
sql
-- 查询员工及其所属部门
SELECT
e.emp_name,
e.salary,
d.dept_name,
d.location
FROM employees e
INNER JOIN departments d ON e.dept_id = d.dept_id;
-- 使用表别名简化
SELECT
e.emp_name AS 员工姓名,
d.dept_name AS 部门名称
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id
ORDER BY d.dept_name, e.emp_name;
-- 多表内连接
SELECT
e.emp_name,
d.dept_name,
p.project_name,
ep.role
FROM employees e
INNER JOIN departments d ON e.dept_id = d.dept_id
INNER JOIN employee_projects ep ON e.emp_id = ep.emp_id
INNER JOIN projects p ON ep.project_id = p.project_id
ORDER BY d.dept_name, e.emp_name;
-- 使用USING简化(当连接列名相同时)
SELECT
e.emp_name,
d.dept_name
FROM employees e
INNER JOIN departments d USING (dept_id);
-- 多条件连接
SELECT
e.emp_name,
d.dept_name,
p.project_name
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id
JOIN projects p ON d.dept_id = p.dept_id
AND p.start_date <= e.hire_date; -- 额外条件:员工入职时项目已开始左外连接(LEFT JOIN)
左外连接返回左表的所有行,右表没有匹配的则返回NULL。
sql
-- 查询所有员工及其部门(包括没有部门的员工)
SELECT
e.emp_name,
e.salary,
d.dept_name
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.dept_id
ORDER BY e.emp_name;
-- 只查询没有部门的员工
SELECT
e.emp_name,
e.salary
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.dept_id
WHERE d.dept_id IS NULL; -- 右表为NULL表示没有匹配
-- 查询所有部门及其员工数量(包括没有员工的部门)
SELECT
d.dept_name,
COUNT(e.emp_id) AS employee_count
FROM departments d
LEFT JOIN employees e ON d.dept_id = e.dept_id
GROUP BY d.dept_id, d.dept_name
ORDER BY employee_count DESC;
-- 多表左连接
SELECT
e.emp_name,
d.dept_name,
p.project_name
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.dept_id
LEFT JOIN employee_projects ep ON e.emp_id = ep.emp_id
LEFT JOIN projects p ON ep.project_id = p.project_id
ORDER BY e.emp_name;右外连接(RIGHT JOIN)
右外连接返回右表的所有行,左表没有匹配的则返回NULL。
sql
-- 查询所有部门及其员工(包括没有员工的部门)
SELECT
d.dept_name,
e.emp_name,
e.salary
FROM employees e
RIGHT JOIN departments d ON e.dept_id = d.dept_id
ORDER BY d.dept_name;
-- 右连接可以改写为左连接(推荐使用左连接,更直观)
SELECT
d.dept_name,
e.emp_name,
e.salary
FROM departments d
LEFT JOIN employees e ON d.dept_id = e.dept_id
ORDER BY d.dept_name;
-- 查询没有员工的部门
SELECT
d.dept_name,
d.location
FROM employees e
RIGHT JOIN departments d ON e.dept_id = d.dept_id
WHERE e.emp_id IS NULL;全外连接(FULL JOIN)
全外连接返回两个表的所有行,没有匹配的则返回NULL。
sql
-- 查询所有员工和所有部门(包括没有部门的员工和没有员工的部门)
SELECT
e.emp_name,
d.dept_name
FROM employees e
FULL JOIN departments d ON e.dept_id = d.dept_id
ORDER BY d.dept_name NULLS LAST, e.emp_name;
-- 找出没有匹配的记录(员工无部门或部门无员工)
SELECT
e.emp_name,
d.dept_name,
CASE
WHEN e.emp_id IS NULL THEN '部门无员工'
WHEN d.dept_id IS NULL THEN '员工无部门'
ELSE '正常'
END AS status
FROM employees e
FULL JOIN departments d ON e.dept_id = d.dept_id
WHERE e.emp_id IS NULL OR d.dept_id IS NULL;
-- 使用COALESCE处理NULL值
SELECT
COALESCE(e.emp_name, '(无员工)') AS 员工,
COALESCE(d.dept_name, '(无部门)') AS 部门
FROM employees e
FULL JOIN departments d ON e.dept_id = d.dept_id;交叉连接(CROSS JOIN)
交叉连接返回两个表的笛卡尔积,即左表每行与右表每行组合。
sql
-- 交叉连接:每个员工与每个部门的组合
SELECT
e.emp_name,
d.dept_name
FROM employees e
CROSS JOIN departments d
ORDER BY e.emp_name, d.dept_name;
-- 使用场景:生成所有可能的组合
-- 例如:生成每个员工每月的考勤记录模板
CREATE TABLE months (
month_id INTEGER PRIMARY KEY,
month_name VARCHAR(20)
);
INSERT INTO months VALUES
(1, '一月'), (2, '二月'), (3, '三月');
SELECT
e.emp_name,
m.month_name
FROM employees e
CROSS JOIN months m
ORDER BY e.emp_name, m.month_id;
-- 交叉连接也可以用逗号语法
SELECT e.emp_name, d.dept_name
FROM employees e, departments d;自连接
自连接是表与自身的连接,常用于处理层次结构数据。
sql
-- 查询员工及其上级
SELECT
e.emp_name AS 员工,
m.emp_name AS 上级
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.emp_id
ORDER BY e.emp_name;
-- 查询有下属的员工及其下属数量
SELECT
m.emp_name AS 上级,
COUNT(e.emp_id) AS 下属数量
FROM employees m
LEFT JOIN employees e ON e.manager_id = m.emp_id
GROUP BY m.emp_id, m.emp_name
HAVING COUNT(e.emp_id) > 0
ORDER BY 下属数量 DESC;
-- 查询同一部门的员工对(排除自己和自己)
SELECT
e1.emp_name AS 员工1,
e2.emp_name AS 员工2,
d.dept_name
FROM employees e1
JOIN employees e2 ON e1.dept_id = e2.dept_id AND e1.emp_id < e2.emp_id
JOIN departments d ON e1.dept_id = d.dept_id
ORDER BY d.dept_name, e1.emp_name;
-- 多级上级查询
SELECT
e.emp_name AS 员工,
m1.emp_name AS 直属上级,
m2.emp_name AS 更上级
FROM employees e
LEFT JOIN employees m1 ON e.manager_id = m1.emp_id
LEFT JOIN employees m2 ON m1.manager_id = m2.emp_id
ORDER BY e.emp_name;自然连接(NATURAL JOIN)
自然连接自动基于同名列进行连接。
sql
-- 自然连接:自动匹配同名列
SELECT emp_name, dept_name
FROM employees
NATURAL JOIN departments;
-- 自然连接可能产生意外结果,建议显式指定连接条件
-- 更安全的写法
SELECT e.emp_name, d.dept_name
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id;复杂JOIN示例
统计各部门项目情况
sql
-- 统计各部门的项目数量、参与员工数量、总工时
SELECT
d.dept_name,
COUNT(DISTINCT p.project_id) AS project_count,
COUNT(DISTINCT ep.emp_id) AS participant_count,
COALESCE(SUM(ep.hours_allocated), 0) AS total_hours
FROM departments d
LEFT JOIN projects p ON d.dept_id = p.dept_id
LEFT JOIN employee_projects ep ON p.project_id = ep.project_id
GROUP BY d.dept_id, d.dept_name
ORDER BY project_count DESC;员工项目详情
sql
-- 查询每个员工参与的项目详情
SELECT
e.emp_name,
d.dept_name,
p.project_name,
ep.role,
ep.hours_allocated,
CASE
WHEN p.end_date IS NULL THEN '进行中'
WHEN p.end_date < CURRENT_DATE THEN '已结束'
ELSE '进行中'
END AS project_status
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.dept_id
LEFT JOIN employee_projects ep ON e.emp_id = ep.emp_id
LEFT JOIN projects p ON ep.project_id = p.project_id
ORDER BY e.emp_name, p.project_name;部门薪资分析
sql
-- 各部门薪资统计,与公司整体对比
SELECT
d.dept_name,
COUNT(e.emp_id) AS employee_count,
ROUND(AVG(e.salary), 2) AS avg_salary,
ROUND(MAX(e.salary), 2) AS max_salary,
ROUND(MIN(e.salary), 2) AS min_salary,
ROUND(AVG(e.salary) - (SELECT AVG(salary) FROM employees), 2) AS diff_from_company_avg
FROM departments d
LEFT JOIN employees e ON d.dept_id = e.dept_id
GROUP BY d.dept_id, d.dept_name
ORDER BY avg_salary DESC;项目资源分配
sql
-- 查询每个项目的资源分配情况
SELECT
p.project_name,
d.dept_name AS 负责部门,
COUNT(ep.emp_id) AS 参与人数,
SUM(ep.hours_allocated) AS 总工时,
STRING_AGG(e.emp_name || '(' || ep.role || ')', ', ') AS 参与人员
FROM projects p
JOIN departments d ON p.dept_id = d.dept_id
LEFT JOIN employee_projects ep ON p.project_id = ep.project_id
LEFT JOIN employees e ON ep.emp_id = e.emp_id
GROUP BY p.project_id, p.project_name, d.dept_name
ORDER BY p.project_name;JOIN与性能优化
使用EXPLAIN分析JOIN
sql
-- 查看JOIN的执行计划
EXPLAIN ANALYZE
SELECT e.emp_name, d.dept_name
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id;
-- 查看更详细的执行计划
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT e.emp_name, d.dept_name
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id;JOIN优化建议
sql
-- 1. 确保连接列有索引
CREATE INDEX idx_employees_dept_id ON employees(dept_id);
CREATE INDEX idx_employee_projects_emp_id ON employee_projects(emp_id);
CREATE INDEX idx_employee_projects_project_id ON employee_projects(project_id);
-- 2. 小表驱动大表(PostgreSQL优化器通常会自动处理)
-- 3. 只选择需要的列
SELECT e.emp_name, d.dept_name -- 只选需要的列
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id;
-- 4. 使用适当的JOIN类型
-- 如果确定一定有匹配,使用INNER JOIN
-- 如果需要保留所有左表记录,使用LEFT JOIN
-- 5. 避免在JOIN条件中使用函数
-- 不好的写法(会导致索引失效)
SELECT e.emp_name, d.dept_name
FROM employees e
JOIN departments d ON LOWER(d.dept_name) = '技术部';
-- 好的写法
SELECT e.emp_name, d.dept_name
FROM employees e
JOIN departments d ON e.dept_id = d.dept_id
WHERE d.dept_name = '技术部';JOIN类型总结
| JOIN类型 | 说明 | 使用场景 |
|---|---|---|
| INNER JOIN | 只返回匹配的行 | 只需要有关联的数据 |
| LEFT JOIN | 返回左表所有行 | 需要保留左表所有数据 |
| RIGHT JOIN | 返回右表所有行 | 需要保留右表所有数据(可改用LEFT JOIN) |
| FULL JOIN | 返回两表所有行 | 需要两表所有数据 |
| CROSS JOIN | 笛卡尔积 | 生成所有组合 |
| SELF JOIN | 表自连接 | 层次结构数据 |
| NATURAL JOIN | 自动匹配同名列 | 列名规范统一时(慎用) |
小结
本章介绍了PostgreSQL中各种JOIN操作:
- INNER JOIN:最常用的连接,只返回匹配的行
- LEFT/RIGHT/FULL JOIN:外连接,保留未匹配的行
- CROSS JOIN:生成笛卡尔积
- SELF JOIN:处理层次结构数据
- 多表JOIN:连接多个表获取综合信息
掌握JOIN操作是数据库查询的核心技能,下一章我们将学习索引的创建和使用。
