Skip to content

性能优化

性能优化是数据库管理的重要环节。本章将介绍PostgreSQL的性能优化技术,包括查询优化、索引优化、配置优化和监控分析。

查询优化

使用EXPLAIN分析查询

sql
-- 创建示例表
CREATE TABLE orders (
    order_id SERIAL PRIMARY KEY,
    customer_id INTEGER,
    order_date DATE,
    total_amount NUMERIC(10, 2),
    status VARCHAR(20)
);

CREATE TABLE order_items (
    item_id SERIAL PRIMARY KEY,
    order_id INTEGER REFERENCES orders(order_id),
    product_name VARCHAR(100),
    quantity INTEGER,
    price NUMERIC(10, 2)
);

-- 插入测试数据
INSERT INTO orders (customer_id, order_date, total_amount, status)
SELECT 
    (RANDOM() * 100)::INTEGER,
    CURRENT_DATE - (RANDOM() * 365)::INTEGER,
    (RANDOM() * 10000)::NUMERIC(10, 2),
    CASE (RANDOM() * 3)::INTEGER
        WHEN 0 THEN 'pending'
        WHEN 1 THEN 'processing'
        WHEN 2 THEN 'shipped'
        ELSE 'delivered'
    END
FROM generate_series(1, 100000);

INSERT INTO order_items (order_id, product_name, quantity, price)
SELECT 
    (RANDOM() * 100000)::INTEGER + 1,
    'Product ' || (RANDOM() * 100)::INTEGER,
    (RANDOM() * 10)::INTEGER + 1,
    (RANDOM() * 1000)::NUMERIC(10, 2)
FROM generate_series(1, 500000);

-- 查看查询计划
EXPLAIN SELECT * FROM orders WHERE customer_id = 50;

-- 查看实际执行统计
EXPLAIN ANALYZE SELECT * FROM orders WHERE customer_id = 50;

-- 查看详细统计
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) 
SELECT * FROM orders WHERE customer_id = 50;

-- JSON格式输出
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) 
SELECT * FROM orders WHERE customer_id = 50;

EXPLAIN输出解读

sql
-- 查询计划示例
EXPLAIN ANALYZE
SELECT o.order_id, o.total_amount, COUNT(oi.item_id) AS item_count
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
WHERE o.status = 'shipped'
GROUP BY o.order_id, o.total_amount
ORDER BY o.total_amount DESC
LIMIT 10;

/*
输出解读:
1. Seq Scan(顺序扫描):全表扫描,通常需要优化
2. Index Scan(索引扫描):使用索引,性能较好
3. Index Only Scan(仅索引扫描):只访问索引,性能最好
4. Bitmap Index Scan(位图索引扫描):多条件组合时使用
5. Hash Join(哈希连接):适合大表连接
6. Nested Loop(嵌套循环):适合小表连接
7. Merge Join(合并连接):适合已排序的数据

关键指标:
- cost:估计的执行成本(启动成本..总成本)
- rows:估计返回的行数
- width:每行的平均字节数
- actual time:实际执行时间(毫秒)
- loops:执行次数
- Buffers:缓冲区使用情况
*/

常见查询优化

sql
-- 1. 创建索引优化查询
-- 创建索引前
EXPLAIN ANALYZE SELECT * FROM orders WHERE customer_id = 50;
-- 结果:Seq Scan

-- 创建索引
CREATE INDEX idx_orders_customer_id ON orders(customer_id);

-- 创建索引后
EXPLAIN ANALYZE SELECT * FROM orders WHERE customer_id = 50;
-- 结果:Index Scan

-- 2. 复合索引优化多条件查询
-- 查询特定状态的订单
EXPLAIN ANALYZE 
SELECT * FROM orders 
WHERE customer_id = 50 AND status = 'shipped';

-- 创建复合索引
CREATE INDEX idx_orders_customer_status ON orders(customer_id, status);

-- 3. 覆盖索引避免回表
-- 只查询索引列
SELECT customer_id, status FROM orders WHERE customer_id = 50;

-- 创建覆盖索引
CREATE INDEX idx_orders_covering ON orders(customer_id) INCLUDE (status, total_amount);

-- 4. 使用LIMIT优化
-- 只需要前N条记录时使用LIMIT
SELECT * FROM orders ORDER BY order_date DESC LIMIT 10;

-- 5. 避免SELECT *
-- 不好的做法
SELECT * FROM orders WHERE customer_id = 50;

-- 好的做法:只选择需要的列
SELECT order_id, order_date, total_amount 
FROM orders 
WHERE customer_id = 50;

-- 6. 使用EXISTS代替IN
-- 不好的做法
SELECT * FROM orders o
WHERE o.customer_id IN (SELECT customer_id FROM customers WHERE status = 'active');

-- 好的做法
SELECT * FROM orders o
WHERE EXISTS (SELECT 1 FROM customers c WHERE c.customer_id = o.customer_id AND c.status = 'active');

-- 7. 使用JOIN代替子查询
-- 子查询
SELECT * FROM orders 
WHERE customer_id IN (SELECT customer_id FROM customers WHERE status = 'active');

-- JOIN(通常更高效)
SELECT DISTINCT o.* 
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
WHERE c.status = 'active';

-- 8. 批量操作优化
-- 不好的做法:循环插入
-- INSERT INTO orders (...) VALUES (...); -- 执行多次

-- 好的做法:批量插入
INSERT INTO orders (customer_id, order_date, total_amount, status) VALUES
    (1, '2024-01-01', 100, 'pending'),
    (2, '2024-01-02', 200, 'pending'),
    (3, '2024-01-03', 300, 'pending');

-- 使用COPY更快
COPY orders (customer_id, order_date, total_amount, status) FROM STDIN WITH (FORMAT csv);

索引优化

索引使用分析

sql
-- 查看索引使用统计
SELECT 
    schemaname,
    relname AS table_name,
    indexrelname AS index_name,
    idx_scan AS index_scans,
    idx_tup_read AS tuples_read,
    idx_tup_fetch AS tuples_fetched,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;

-- 查找未使用的索引
SELECT 
    schemaname || '.' || relname AS table,
    indexrelname AS index,
    pg_size_pretty(pg_relation_size(indexrelid)) AS size,
    idx_scan AS scans
FROM pg_stat_user_indexes
WHERE idx_scan = 0
    AND indexrelname NOT LIKE '%_pkey'
ORDER BY pg_relation_size(indexrelid) DESC;

-- 查看索引定义
SELECT 
    indexname,
    indexdef
FROM pg_indexes
WHERE tablename = 'orders';

索引优化建议

sql
-- 1. 选择性高的列适合索引
-- 计算选择性
SELECT 
    COUNT(DISTINCT status) AS distinct_values,
    COUNT(*) AS total_rows,
    ROUND(COUNT(DISTINCT status)::FLOAT / COUNT(*) * 100, 2) AS selectivity_percent
FROM orders;

-- 2. 复合索引列顺序
-- 将等值查询的列放在前面
CREATE INDEX idx_orders_status_customer ON orders(status, customer_id);
-- 适用于:WHERE status = 'shipped' AND customer_id = 50

-- 3. 部分索引减少索引大小
CREATE INDEX idx_orders_active ON orders(customer_id) 
WHERE status IN ('pending', 'processing');

-- 4. 表达式索引
CREATE INDEX idx_orders_date_year ON orders(EXTRACT(YEAR FROM order_date));

-- 5. 并发创建索引(不阻塞写入)
CREATE INDEX CONCURRENTLY idx_orders_date ON orders(order_date);

-- 6. 重建索引
REINDEX INDEX idx_orders_customer_id;
REINDEX TABLE orders;

配置优化

内存配置

sql
-- 查看当前配置
SHOW ALL;
SHOW shared_buffers;
SHOW effective_cache_size;
SHOW work_mem;
SHOW maintenance_work_mem;

-- 关键内存参数

-- shared_buffers:共享缓冲区大小
-- 建议:系统内存的25%
-- shared_buffers = '4GB'

-- effective_cache_size:有效缓存大小
-- 建议:系统内存的50-75%
-- effective_cache_size = '12GB'

-- work_mem:每个操作的内存
-- 建议:根据并发连接数调整
-- work_mem = '64MB'

-- maintenance_work_mem:维护操作内存
-- 建议:用于VACUUM、CREATE INDEX等
-- maintenance_work_mem = '512MB'

-- huge_pages:大页内存
-- 建议:大内存系统启用
-- huge_pages = 'on'

-- 修改配置
ALTER SYSTEM SET shared_buffers = '4GB';
ALTER SYSTEM SET effective_cache_size = '12GB';
ALTER SYSTEM SET work_mem = '64MB';

-- 需要重启生效
-- systemctl restart postgresql

连接配置

sql
-- max_connections:最大连接数
-- 建议:根据应用需求设置
SHOW max_connections;
ALTER SYSTEM SET max_connections = 200;

-- 连接池推荐使用PgBouncer

-- superuser_reserved_connections:超级用户保留连接
ALTER SYSTEM SET superuser_reserved_connections = 5;

WAL配置

sql
-- wal_buffers:WAL缓冲区
ALTER SYSTEM SET wal_buffers = '64MB';

-- checkpoint_completion_target:检查点完成目标
-- 建议:0.8-0.9
ALTER SYSTEM SET checkpoint_completion_target = 0.9;

-- max_wal_size:最大WAL大小
ALTER SYSTEM SET max_wal_size = '2GB';

-- min_wal_size:最小WAL大小
ALTER SYSTEM SET min_wal_size = '1GB';

查询规划器配置

sql
-- random_page_cost:随机页成本
-- SSD建议降低此值
ALTER SYSTEM SET random_page_cost = 1.1;  -- 默认4.0

-- effective_io_concurrency:有效IO并发
-- SSD可以设置更高
ALTER SYSTEM SET effective_io_concurrency = 200;

-- default_statistics_target:统计信息目标
-- 更高的值提供更准确的统计,但分析更慢
ALTER SYSTEM SET default_statistics_target = 200;

-- 对特定列设置更高的统计目标
ALTER TABLE orders ALTER COLUMN customer_id SET STATISTICS 500;
ANALYZE orders;

VACUUM和ANALYZE

VACUUM操作

sql
-- VACUUM:清理死元组,回收空间
VACUUM orders;

-- VACUUM FULL:完全清理,回收空间给操作系统(会锁表)
VACUUM FULL orders;

-- VACUUM ANALYZE:清理并更新统计信息
VACUUM ANALYZE orders;

-- 查看表的死元组数量
SELECT 
    schemaname,
    relname,
    n_live_tup AS live_tuples,
    n_dead_tup AS dead_tuples,
    ROUND(n_dead_tup::FLOAT / NULLIF(n_live_tup + n_dead_tup, 0) * 100, 2) AS dead_ratio
FROM pg_stat_user_tables
ORDER BY n_dead_tup DESC;

-- 查看是否需要VACUUM
SELECT 
    relname,
    last_vacuum,
    last_autovacuum,
    vacuum_count,
    autovacuum_count
FROM pg_stat_user_tables
WHERE schemaname = 'public';

自动清理配置

sql
-- 查看自动清理配置
SHOW autovacuum;
SHOW autovacuum_vacuum_threshold;
SHOW autovacuum_vacuum_scale_factor;

-- 配置自动清理
ALTER SYSTEM SET autovacuum = on;
ALTER SYSTEM SET autovacuum_vacuum_threshold = 50;
ALTER SYSTEM SET autovacuum_vacuum_scale_factor = 0.1;

-- 对特定表设置
ALTER TABLE orders SET (
    autovacuum_vacuum_scale_factor = 0.05,
    autovacuum_vacuum_threshold = 100
);

ANALYZE操作

sql
-- ANALYZE:更新统计信息
ANALYZE orders;

-- 分析特定列
ANALYZE orders (customer_id, status);

-- 查看表的统计信息
SELECT 
    attname,
    n_distinct,
    most_common_vals,
    most_common_freqs
FROM pg_stats
WHERE tablename = 'orders';

-- 查看最后分析时间
SELECT 
    relname,
    last_analyze,
    last_autoanalyze
FROM pg_stat_user_tables
WHERE schemaname = 'public';

监控与分析

活动监控

sql
-- 查看当前活动查询
SELECT 
    pid,
    usename,
    application_name,
    state,
    query_start,
    EXTRACT(EPOCH FROM (now() - query_start)) AS duration_seconds,
    query
FROM pg_stat_activity
WHERE state = 'active'
ORDER BY query_start;

-- 查看等待事件
SELECT 
    pid,
    wait_event_type,
    wait_event,
    query
FROM pg_stat_activity
WHERE wait_event IS NOT NULL;

-- 终止长时间运行的查询
-- SELECT pg_cancel_backend(pid);
-- SELECT pg_terminate_backend(pid);

-- 查看锁等待
SELECT 
    blocked_locks.pid AS blocked_pid,
    blocked_activity.usename AS blocked_user,
    blocking_locks.pid AS blocking_pid,
    blocking_activity.usename AS blocking_user,
    blocked_activity.query AS blocked_query,
    blocking_activity.query AS blocking_query
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
    AND blocking_locks.relation = blocked_locks.relation
    AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted;

性能统计

sql
-- 启用pg_stat_statements扩展
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;

-- 查看最慢的查询
SELECT 
    query,
    calls,
    ROUND(total_exec_time::NUMERIC, 2) AS total_time_ms,
    ROUND(mean_exec_time::NUMERIC, 2) AS mean_time_ms,
    ROUND((100 * total_exec_time / SUM(total_exec_time) OVER ())::NUMERIC, 2) AS percentage
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 10;

-- 查看执行次数最多的查询
SELECT 
    query,
    calls,
    ROUND(mean_exec_time::NUMERIC, 2) AS mean_time_ms
FROM pg_stat_statements
ORDER BY calls DESC
LIMIT 10;

-- 重置统计
SELECT pg_stat_statements_reset();

表大小监控

sql
-- 查看表大小
SELECT 
    schemaname,
    relname AS table_name,
    pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
    pg_size_pretty(pg_relation_size(relid)) AS table_size,
    pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size
FROM pg_catalog.pg_statio_user_tables
ORDER BY pg_total_relation_size(relid) DESC
LIMIT 20;

-- 查看索引大小
SELECT 
    schemaname,
    relname AS table_name,
    indexrelname AS index_name,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC
LIMIT 20;

-- 查看数据库大小
SELECT 
    datname,
    pg_size_pretty(pg_database_size(datname)) AS size
FROM pg_database
ORDER BY pg_database_size(datname) DESC;

性能优化清单

查询优化

  • [ ] 使用EXPLAIN ANALYZE分析慢查询
  • [ ] 为WHERE、JOIN、ORDER BY列创建索引
  • [ ] 避免SELECT *
  • [ ] 使用LIMIT限制结果
  • [ ] 使用适当的JOIN类型
  • [ ] 避免在WHERE中使用函数

索引优化

  • [ ] 定期检查未使用的索引
  • [ ] 使用复合索引优化多条件查询
  • [ ] 考虑使用部分索引
  • [ ] 使用INCLUDE创建覆盖索引

配置优化

  • [ ] 根据系统内存调整shared_buffers
  • [ ] 设置合理的work_mem
  • [ ] SSD系统降低random_page_cost
  • [ ] 配置适当的连接数

维护任务

  • [ ] 定期执行VACUUM ANALYZE
  • [ ] 监控死元组数量
  • [ ] 定期重建碎片化严重的表
  • [ ] 更新统计信息

小结

本章介绍了PostgreSQL性能优化的主要内容:

内容说明
EXPLAIN分析查询执行计划
索引优化创建合适的索引,删除无用索引
配置优化内存、连接、WAL等参数调优
VACUUM清理死元组,回收空间
ANALYZE更新统计信息
监控活动查询、性能统计、表大小监控

性能优化是一个持续的过程,需要根据实际情况不断调整和优化。至此,PostgreSQL教程全部完成!