数据库健康检查
# 数据库健康检查
数据库健康检查是保障系统稳定运行的重要环节。通过定期的健康检查,可以及时发现和解决潜在问题,预防系统故障。
# 1. 健康检查的重要性
# 1.1 预防性维护
数据库健康检查能够帮助我们:
- 及早发现性能瓶颈
- 预防数据丢失风险
- 识别配置问题
- 确保系统稳定运行
# 1.2 系统可靠性
通过定期健康检查,可以:
- 保证数据一致性
- 确保服务可用性
- 提高系统响应速度
- 降低故障发生概率
# 2. 健康检查的主要内容
# 2.1 系统资源监控
-- 检查内存使用情况
SHOW STATUS LIKE 'Bytes_received';
SHOW STATUS LIKE 'Bytes_sent';
SHOW STATUS LIKE 'Threads_connected';
-- 检查磁盘空间
SHOW VARIABLES LIKE 'datadir';
-- 需要手动检查磁盘空间使用情况
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 2.2 连接状态检查
-- 查看连接状态
SHOW PROCESSLIST;
SHOW STATUS LIKE 'Threads_connected';
SHOW STATUS LIKE 'Max_used_connections';
-- 检查连接池使用情况
SHOW STATUS LIKE 'Aborted_connects';
SHOW STATUS LIKE 'Connections';
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 2.3 性能指标监控
-- 检查慢查询
SHOW VARIABLES LIKE 'slow_query_log';
SHOW VARIABLES LIKE 'long_query_time';
-- 查看慢查询统计
SHOW STATUS LIKE 'Slow_queries';
-- 检查查询缓存(MySQL 8.0已移除)
SHOW STATUS LIKE 'Qcache%';
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 3. 常见健康检查项
# 3.1 索引健康检查
-- 检查表的索引使用情况
SELECT
TABLE_SCHEMA,
TABLE_NAME,
INDEX_NAME,
CARDINALITY,
INDEX_TYPE
FROM information_schema.STATISTICS
WHERE TABLE_SCHEMA NOT IN ('information_schema', 'mysql', 'performance_schema')
ORDER BY TABLE_SCHEMA, TABLE_NAME;
-- 检查未使用的索引
SELECT
t.TABLE_SCHEMA,
t.TABLE_NAME,
i.INDEX_NAME,
i.CARDINALITY
FROM information_schema.TABLES t
JOIN information_schema.STATISTICS i ON t.TABLE_SCHEMA = i.TABLE_SCHEMA
AND t.TABLE_NAME = i.TABLE_NAME
WHERE t.TABLE_SCHEMA NOT IN ('information_schema', 'mysql', 'performance_schema')
AND i.CARDINALITY = 0;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 3.2 表结构健康检查
-- 检查表的完整性
CHECK TABLE table_name;
-- 检查表的状态
SHOW TABLE STATUS LIKE 'table_name';
-- 检查表的碎片情况
SELECT
TABLE_SCHEMA,
TABLE_NAME,
DATA_LENGTH,
INDEX_LENGTH,
DATA_FREE
FROM information_schema.TABLES
WHERE TABLE_SCHEMA NOT IN ('information_schema', 'mysql', 'performance_schema')
AND DATA_FREE > 0;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 3.3 日志文件检查
-- 查看二进制日志状态
SHOW MASTER STATUS;
SHOW BINARY LOGS;
-- 查看错误日志位置
SHOW VARIABLES LIKE 'log_error';
-- 检查慢查询日志
SHOW VARIABLES LIKE 'slow_query_log_file';
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 4. 自动化健康检查脚本
# 4.1 基础健康检查脚本
-- 创建健康检查存储过程
DELIMITER //
CREATE PROCEDURE health_check()
BEGIN
DECLARE connection_count INT DEFAULT 0;
DECLARE slow_query_count INT DEFAULT 0;
DECLARE error_count INT DEFAULT 0;
-- 检查连接数
SELECT VARIABLE_VALUE INTO connection_count
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Threads_connected';
-- 检查慢查询数
SELECT VARIABLE_VALUE INTO slow_query_count
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slow_queries';
-- 检查连接错误数
SELECT VARIABLE_VALUE INTO error_count
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Aborted_connects';
-- 输出检查结果
SELECT
'Health Check Results' as check_type,
connection_count as connections,
slow_query_count as slow_queries,
error_count as connection_errors;
END //
DELIMITER ;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# 4.2 完整健康检查脚本
-- 创建综合健康检查脚本
DELIMITER //
CREATE PROCEDURE comprehensive_health_check()
BEGIN
-- 检查系统状态
SELECT 'System Status' as check_section;
SELECT
VARIABLE_NAME,
VARIABLE_VALUE
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME IN (
'Threads_connected',
'Max_used_connections',
'Slow_queries',
'Aborted_connects',
'Opened_tables',
'Created_tmp_tables'
);
-- 检查关键变量
SELECT 'System Variables' as check_section;
SELECT
VARIABLE_NAME,
VARIABLE_VALUE
FROM information_schema.GLOBAL_VARIABLES
WHERE VARIABLE_NAME IN (
'innodb_buffer_pool_size',
'max_connections',
'tmp_table_size',
'max_heap_table_size'
);
-- 检查表空间使用情况
SELECT 'Table Space Usage' as check_section;
SELECT
TABLE_SCHEMA,
COUNT(*) as table_count,
SUM(DATA_LENGTH + INDEX_LENGTH) as total_size
FROM information_schema.TABLES
WHERE TABLE_SCHEMA NOT IN ('information_schema', 'mysql', 'performance_schema')
GROUP BY TABLE_SCHEMA
ORDER BY total_size DESC
LIMIT 10;
END //
DELIMITER ;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# 5. 健康检查指标阈值
# 5.1 连接数阈值
-- 连接数监控
SELECT
VARIABLE_VALUE as current_connections,
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_VARIABLES
WHERE VARIABLE_NAME = 'max_connections') as max_connections,
ROUND((VARIABLE_VALUE / (SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_VARIABLES
WHERE VARIABLE_NAME = 'max_connections')) * 100, 2) as usage_percent
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Threads_connected';
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 5.2 性能指标监控
-- 性能指标监控
SELECT
'Slow Queries' as metric,
VARIABLE_VALUE as value
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slow_queries'
UNION ALL
SELECT
'Aborted Connections' as metric,
VARIABLE_VALUE as value
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Aborted_connects'
UNION ALL
SELECT
'Created Temp Tables' as metric,
VARIABLE_VALUE as value
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Created_tmp_tables';
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 6. 健康检查告警机制
# 6.1 创建告警表
-- 创建健康检查告警表
CREATE TABLE health_alerts (
id INT AUTO_INCREMENT PRIMARY KEY,
alert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
alert_type VARCHAR(50),
alert_message TEXT,
severity ENUM('INFO', 'WARNING', 'ERROR') DEFAULT 'INFO'
);
-- 插入告警记录
INSERT INTO health_alerts (alert_type, alert_message, severity)
VALUES ('CONNECTION_LIMIT', 'Connection usage approaching threshold', 'WARNING');
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
# 6.2 告警检查存储过程
-- 告警检查存储过程
DELIMITER //
CREATE PROCEDURE check_health_alerts()
BEGIN
DECLARE connection_usage DECIMAL(5,2);
DECLARE slow_query_count INT;
-- 检查连接使用率
SELECT
ROUND((VARIABLE_VALUE / (SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_VARIABLES
WHERE VARIABLE_NAME = 'max_connections')) * 100, 2)
INTO connection_usage
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Threads_connected';
-- 检查慢查询数
SELECT VARIABLE_VALUE INTO slow_query_count
FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slow_queries';
-- 设置告警阈值
IF connection_usage > 80 THEN
INSERT INTO health_alerts (alert_type, alert_message, severity)
VALUES ('HIGH_CONNECTION_USAGE',
CONCAT('Connection usage: ', connection_usage, '%'),
'WARNING');
END IF;
IF slow_query_count > 100 THEN
INSERT INTO health_alerts (alert_type, alert_message, severity)
VALUES ('HIGH_SLOW_QUERIES',
CONCAT('Slow queries count: ', slow_query_count),
'WARNING');
END IF;
END //
DELIMITER ;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# 7. 健康检查最佳实践
# 7.1 定期检查计划
-- 创建定期健康检查事件
CREATE EVENT regular_health_check
ON SCHEDULE EVERY 10 MINUTE
DO
BEGIN
CALL comprehensive_health_check();
CALL check_health_alerts();
END;
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# 7.2 健康检查报告生成
-- 生成健康检查报告
SELECT
'Database Health Report' as report_title,
NOW() as report_time,
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Threads_connected') as current_connections,
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Slow_queries') as slow_queries,
(SELECT VARIABLE_VALUE FROM information_schema.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Created_tmp_tables') as temp_tables_created;
1
2
3
4
5
6
7
8
9
10
2
3
4
5
6
7
8
9
10
# 8. 总结
数据库健康检查是保障系统稳定运行的重要手段。通过建立完善的健康检查机制,可以:
- 提前发现问题
- 预防系统故障
- 优化系统性能
- 提高运维效率
建议建立定期的健康检查制度,结合自动化监控工具,确保数据库系统的稳定性和可靠性。
上次更新: 3/4/2026