应急性能优化方法
# 应急性能优化方法
在数据库运维过程中,经常会遇到突发的性能问题,需要快速响应和处理。本文将介绍一系列应急性能优化的方法和技巧,帮助在紧急情况下快速恢复系统性能。
# 1. 应急性能优化概述
# 1.1 应急场景识别
应急性能优化通常发生在以下场景:
-- 1. 突发的高负载
-- 2. 查询响应时间急剧增加
-- 3. 系统资源耗尽
-- 4. 连接数激增
-- 5. 磁盘空间不足
-- 6. 内存不足
-- 识别性能问题的常用命令
SHOW PROCESSLIST;
SHOW STATUS LIKE 'Threads_connected';
SHOW STATUS LIKE 'Created_tmp_tables';
SHOW STATUS LIKE 'Handler_read_rnd_next';
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
# 1.2 应急优化原则
-- 应急优化的核心原则:
-- 1. 快速诊断问题根源
-- 2. 最小化对业务的影响
-- 3. 立即见效的优化措施
-- 4. 为长期优化争取时间
-- 5. 记录优化过程和结果
-- 紧急响应流程:
-- 1. 问题识别和确认
-- 2. 快速诊断和分析
-- 3. 实施应急优化措施
-- 4. 验证优化效果
-- 5. 记录和总结
1
2
3
4
5
6
7
8
9
10
11
12
13
2
3
4
5
6
7
8
9
10
11
12
13
# 2. 快速诊断工具
# 2.1 实时监控命令
-- 实时性能监控
-- 1. 查看活跃连接
SHOW PROCESSLIST;
-- 2. 查看系统状态
SHOW STATUS LIKE 'Threads_connected';
SHOW STATUS LIKE 'Threads_running';
SHOW STATUS LIKE 'Aborted_connects';
-- 3. 查看查询缓存状态
SHOW STATUS LIKE 'Qcache%';
-- 4. 查看慢查询
SHOW VARIABLES LIKE 'slow_query_log';
SHOW VARIABLES LIKE 'long_query_time';
SHOW STATUS LIKE 'Slow_queries';
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 2.2 性能瓶颈识别
-- 识别性能瓶颈的常用指标
SELECT
VARIABLE_NAME,
VARIABLE_VALUE
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME IN (
'Threads_connected',
'Threads_running',
'Created_tmp_tables',
'Created_tmp_disk_tables',
'Handler_read_rnd_next',
'Handler_read_first',
'Handler_read_key',
'Handler_read_next',
'Innodb_buffer_pool_reads',
'Innodb_buffer_pool_wait_free'
);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 2.3 SQL性能分析
-- 快速分析慢查询
-- 1. 查看当前慢查询
SHOW VARIABLES LIKE 'slow_query_log_file';
-- 2. 使用EXPLAIN分析查询
EXPLAIN SELECT * FROM table_name WHERE condition;
-- 3. 查看执行计划
EXPLAIN FORMAT=JSON SELECT * FROM table_name WHERE condition;
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 3. 应急优化措施
# 3.1 连接优化
-- 1. 限制连接数
SET GLOBAL max_connections = 500;
-- 2. 优化连接超时设置
SET GLOBAL wait_timeout = 28800;
SET GLOBAL interactive_timeout = 28800;
-- 3. 查看连接状态
SHOW STATUS LIKE 'Max_used_connections';
SHOW STATUS LIKE 'Connections';
1
2
3
4
5
6
7
8
9
10
2
3
4
5
6
7
8
9
10
# 3.2 内存优化
-- 1. 调整缓冲区大小
SET GLOBAL tmp_table_size = 256*1024*1024; -- 256MB
SET GLOBAL max_heap_table_size = 256*1024*1024; -- 256MB
-- 2. 优化InnoDB缓冲池
SET GLOBAL innodb_buffer_pool_size = 2G; -- 根据服务器内存调整
-- 3. 查看内存使用情况
SHOW STATUS LIKE 'Tmp_tables';
SHOW STATUS LIKE 'Created_tmp_disk_tables';
1
2
3
4
5
6
7
8
9
10
2
3
4
5
6
7
8
9
10
# 3.3 索引优化
-- 1. 快速添加索引(应急情况)
-- 注意:在生产环境添加索引需要谨慎
CREATE INDEX idx_emergency ON table_name(column_name);
-- 2. 重建索引
ALTER TABLE table_name ENGINE=InnoDB;
-- 3. 查看索引使用情况
SELECT
TABLE_SCHEMA,
TABLE_NAME,
INDEX_NAME,
CARDINALITY
FROM information_schema.STATISTICS
WHERE TABLE_SCHEMA NOT IN ('information_schema', 'mysql', 'performance_schema')
ORDER BY CARDINALITY DESC
LIMIT 10;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 4. 查询优化应急措施
# 4.1 慢查询优化
-- 1. 临时禁用慢查询日志(避免日志文件过大)
SET GLOBAL slow_query_log = 'OFF';
-- 2. 调整慢查询阈值
SET GLOBAL long_query_time = 10;
-- 3. 查看慢查询统计
SHOW STATUS LIKE 'Slow_queries';
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 4.2 查询重写应急方案
-- 1. 临时添加LIMIT限制
SELECT * FROM large_table WHERE condition LIMIT 1000;
-- 2. 使用临时表缓存中间结果
CREATE TEMPORARY TABLE temp_results AS
SELECT * FROM large_table WHERE condition;
-- 3. 分批处理大数据
SELECT * FROM large_table WHERE condition LIMIT 1000 OFFSET 0;
SELECT * FROM large_table WHERE condition LIMIT 1000 OFFSET 1000;
1
2
3
4
5
6
7
8
9
10
2
3
4
5
6
7
8
9
10
# 4.3 查询缓存优化
-- 1. 临时启用查询缓存
SET GLOBAL query_cache_type = ON;
SET GLOBAL query_cache_size = 128*1024*1024; -- 128MB
-- 2. 查看缓存状态
SHOW STATUS LIKE 'Qcache%';
-- 3. 清空查询缓存
FLUSH QUERY CACHE;
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 5. 系统资源优化
# 5.1 磁盘I/O优化
-- 1. 优化I/O设置
SET GLOBAL innodb_io_capacity = 2000;
SET GLOBAL innodb_io_capacity_max = 4000;
-- 2. 查看I/O状态
SHOW STATUS LIKE 'Innodb_os_log%';
SHOW STATUS LIKE 'Innodb_buffer_pool%';
1
2
3
4
5
6
7
2
3
4
5
6
7
# 5.2 内存管理
-- 1. 监控内存使用
SHOW STATUS LIKE 'Memory%';
-- 2. 调整内存相关参数
SET GLOBAL key_buffer_size = 256*1024*1024; -- 256MB
SET GLOBAL innodb_log_file_size = 256*1024*1024; -- 256MB
-- 3. 查看内存使用情况
SELECT
VARIABLE_NAME,
VARIABLE_VALUE
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME LIKE '%Memory%';
1
2
3
4
5
6
7
8
9
10
11
12
13
2
3
4
5
6
7
8
9
10
11
12
13
# 5.3 进程管理
-- 1. 终止慢查询(谨慎使用)
-- KILL QUERY thread_id;
-- 2. 查看长时间运行的查询
SELECT
ID,
USER,
HOST,
DB,
COMMAND,
TIME,
STATE,
INFO
FROM INFORMATION_SCHEMA.PROCESSLIST
WHERE TIME > 30 AND COMMAND != 'Sleep';
-- 3. 终止阻塞查询
SELECT
r.trx_id waiting_trx_id,
r.trx_mysql_thread_id waiting_thread,
r.trx_query waiting_query,
b.trx_id blocking_trx_id,
b.trx_mysql_thread_id blocking_thread,
b.trx_query blocking_query
FROM information_schema.innodb_lock_waits w
INNER JOIN information_schema.innodb_trx b ON b.trx_id = w.blocking_trx_id
INNER JOIN information_schema.innodb_trx r ON r.trx_id = w.requesting_trx_id;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# 6. 应急优化脚本
# 6.1 快速诊断脚本
-- 创建应急诊断存储过程
DELIMITER //
CREATE PROCEDURE emergency_diagnosis()
BEGIN
-- 1. 连接状态检查
SELECT 'Connection Status' as check_section;
SELECT
VARIABLE_NAME,
VARIABLE_VALUE
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME IN (
'Threads_connected',
'Max_used_connections',
'Connections',
'Aborted_connects'
);
-- 2. 内存使用情况
SELECT 'Memory Usage' as check_section;
SELECT
VARIABLE_NAME,
VARIABLE_VALUE
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME IN (
'Created_tmp_tables',
'Created_tmp_disk_tables',
'Tmp_tables',
'Tmp_disk_tables'
);
-- 3. 查询性能指标
SELECT 'Query Performance' as check_section;
SELECT
VARIABLE_NAME,
VARIABLE_VALUE
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME IN (
'Slow_queries',
'Handler_read_rnd_next',
'Innodb_buffer_pool_reads'
);
END //
DELIMITER ;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# 6.2 应急优化脚本
-- 创建应急优化存储过程
DELIMITER //
CREATE PROCEDURE emergency_optimization()
BEGIN
-- 1. 临时调整参数
SET GLOBAL tmp_table_size = 256*1024*1024;
SET GLOBAL max_heap_table_size = 256*1024*1024;
SET GLOBAL innodb_io_capacity = 2000;
-- 2. 检查优化结果
SELECT 'Emergency Optimization Applied' as result;
-- 3. 记录优化操作
INSERT INTO optimization_log (operation, timestamp, notes)
VALUES ('Emergency Optimization', NOW(), 'Applied temporary settings for performance recovery');
END //
DELIMITER ;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 7. 应急处理流程
# 7.1 问题识别流程
-- 应急处理标准流程:
-- 1. 确认问题现象
-- 2. 收集相关指标
-- 3. 分析问题原因
-- 4. 实施应急措施
-- 5. 验证优化效果
-- 6. 记录处理过程
-- 问题确认示例
SELECT
NOW() as current_time,
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Threads_connected') as connections,
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Slow_queries') as slow_queries,
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Innodb_buffer_pool_reads') as buffer_pool_reads;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
2
3
4
5
6
7
8
9
10
11
12
13
14
# 7.2 优化效果验证
-- 验证优化效果的脚本
DELIMITER //
CREATE PROCEDURE verify_optimization()
BEGIN
DECLARE before_connections INT;
DECLARE after_connections INT;
DECLARE before_slow INT;
DECLARE after_slow INT;
-- 记录优化前状态
SELECT VARIABLE_VALUE INTO before_connections
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Threads_connected';
SELECT VARIABLE_VALUE INTO before_slow
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slow_queries';
-- 执行测试查询
SELECT COUNT(*) FROM large_table WHERE condition;
-- 记录优化后状态
SELECT VARIABLE_VALUE INTO after_connections
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Threads_connected';
SELECT VARIABLE_VALUE INTO after_slow
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slow_queries';
-- 输出对比结果
SELECT
before_connections as connections_before,
after_connections as connections_after,
before_slow as slow_queries_before,
after_slow as slow_queries_after;
END //
DELIMITER ;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# 8. 预防性措施
# 8.1 建立监控体系
-- 创建性能监控事件
CREATE EVENT performance_monitor
ON SCHEDULE EVERY 5 MINUTE
DO
BEGIN
INSERT INTO performance_metrics (
timestamp,
connections,
slow_queries,
tmp_tables,
buffer_pool_reads
) VALUES (
NOW(),
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Threads_connected'),
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Slow_queries'),
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Created_tmp_tables'),
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Innodb_buffer_pool_reads')
);
END;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# 8.2 建立预警机制
-- 创建性能预警存储过程
DELIMITER //
CREATE PROCEDURE check_performance_alerts()
BEGIN
DECLARE connection_usage DECIMAL(5,2);
DECLARE slow_query_count INT;
-- 检查连接使用率
SELECT
ROUND((VARIABLE_VALUE / (SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_VARIABLES
WHERE VARIABLE_NAME = 'max_connections')) * 100, 2)
INTO connection_usage
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Threads_connected';
-- 检查慢查询数
SELECT VARIABLE_VALUE INTO slow_query_count
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slow_queries';
-- 设置预警阈值
IF connection_usage > 80 THEN
INSERT INTO performance_alerts (alert_type, message, severity)
VALUES ('HIGH_CONNECTION_USAGE',
CONCAT('Connection usage: ', connection_usage, '%'),
'WARNING');
END IF;
IF slow_query_count > 100 THEN
INSERT INTO performance_alerts (alert_type, message, severity)
VALUES ('HIGH_SLOW_QUERIES',
CONCAT('Slow queries count: ', slow_query_count),
'WARNING');
END IF;
END //
DELIMITER ;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# 9. 总结
应急性能优化是数据库运维的重要技能,需要在快速响应和系统稳定性之间找到平衡。通过掌握以下关键技能,可以有效应对突发性能问题:
- 快速诊断能力:熟练使用各种监控命令和工具
- 应急优化手段:掌握常用的优化参数和方法
- 风险控制意识:在紧急情况下保持谨慎操作
- 记录总结习惯:及时记录优化过程和结果
- 预防机制建设:建立完善的监控和预警体系
在实际应用中,应该结合具体的业务场景和系统特点,灵活运用这些应急优化方法,确保在关键时刻能够快速有效地恢复系统性能。
上次更新: 3/4/2026