利用performance_schema进行故障诊断(mysql金字塔法则读书笔记)

instrunments:生产者,用于采集mysql中各种操作产生的事件信息,对应配置表中的配置项,我们可以称之为采集配置项。
consumers:消费者,对应的消费者用于存储来自instruments采集的数据,对应配置表的配置项,我们可以称之为消费存储配置项。
启用所有等待事件的instruments:
use performance_schema
update  setup_instruments set enabled='yes',timed='yes' where name like 'wait/%';
select * from setup_instruments where name like 'wait/%';

启用等待事件的consumers
update setup_consumers set enabled='yes' where name like '%wait%';
select * from setup_consumers where name like '%wait%';

实时统计当前等待事件:
create view sys.test_waits as select sum(timer_wait) as timer_wait ,sum(number_of_bytes) as number_of_bytes,event_name,operation from events_waits_current where event_name !='idle' group by event_name,operation;
select sys.format_time(timer_wait),sys.format_bytes(number_of_bytes),event_name,operation from sys.test_waits where sys.format_time(timer_wait) not regexp 'ns|us' order by timer_wait desc;

select thread_id,event_name,sys.format_time(timer_wait),index_name,nesting_event_type,operation,number_of_bytes from events_waits_current where event_name !='idle' order by timer_wait desc;

找出谁持有全局读锁:
通过performance_schema.metadata_locks 表来排查谁持有全局读锁,全局读锁在该表中通常记录着同一个会话的object_type为global和commit、lock_type都为shared的两把显式锁,  LOCK_TYPE: INTENTION_EXCLUSIVE 意向排他锁
select * from performance_schema.metadata_locks where owner_thread_id!=sys.ps_thread_id(connection_id())\G;

找出 OWNER_THREAD_ID为内部线程id,可以查询threads表找出对应会话id:

select * from performance_schema.threads where THREAD_ID in (31,30)\G;


*************************** 1. row ***************************

          OBJECT_TYPE: GLOBAL

        OBJECT_SCHEMA: NULL

          OBJECT_NAME: NULL

OBJECT_INSTANCE_BEGIN: 140514110025712

            LOCK_TYPE: SHARED

        LOCK_DURATION: EXPLICIT

          LOCK_STATUS: GRANTED

               SOURCE: lock.cc:1110

      OWNER_THREAD_ID: 31 持有锁的内部线程id为31

       OWNER_EVENT_ID: 43

*************************** 2. row ***************************

          OBJECT_TYPE: COMMIT

        OBJECT_SCHEMA: NULL

          OBJECT_NAME: NULL

OBJECT_INSTANCE_BEGIN: 140514110303360

            LOCK_TYPE: SHARED

        LOCK_DURATION: EXPLICIT

          LOCK_STATUS: GRANTED

               SOURCE: lock.cc:1194

      OWNER_THREAD_ID: 31  持有锁的内部线程id为31

       OWNER_EVENT_ID: 97

*************************** 3. row ***************************

          OBJECT_TYPE: GLOBAL

        OBJECT_SCHEMA: NULL

          OBJECT_NAME: NULL

OBJECT_INSTANCE_BEGIN: 140514110036384

            LOCK_TYPE: INTENTION_EXCLUSIVE n意向排他锁

        LOCK_DURATION: STATEMENT

          LOCK_STATUS: PENDING  状态为pending,表示正在等待被授权

               SOURCE: sql_base.cc:3190

      OWNER_THREAD_ID: 30 被阻塞

       OWNER_EVENT_ID: 2507

3 rows in set (0.04 sec)



找出谁持有MDL锁:
一个shared_write锁处于granted状态,其余是shared_upgradable、execlusive锁,shared_upgradable处于granted状态,exclusive处于pending状态
select * from performance_schema.metadata_locks where owner_thread_id!=sys.ps_thread_id(connection_id())\G;
通过information_schema.innodb_trx表确认源阻塞线程是否存在一个没有提交的事务:
select * from information_schema.innodb_trx\G;

通过performance_schema.events_statements_current来查询某个线程正在执行或者最后一次执行完成的语句事件信息:
select * from performance_schema.events_statements_current where thread_id=11\G;

找出谁持有表级锁:
找出持有表 READ EXTERNAL表级锁的内部线程id
select * from performance_schema.table_handles where owner_thread_id!=0\G;
找出线程正在执行什么sql:
select * from performance_schema.events_statements_current where thread_id= 30\G;
找出processlist_id来进行kill

select * from performance_schema.threads where THREAD_ID =30\G;


找出谁持有行级锁:

mysql8

select * from  performance_schema.data_locks\G;

mysql 5.7

select * from sys.innodb_lock_waits\G;


查询最近的topsql语句:

select thread_id,event_name,source,sys.format_time(timer_wait),sys.format_time(lock_time),sql_text,current_schema,message_text,rows_affected,rows_sent,rows_examined from  performance_schema.events_statements_history where current_schema!='performance_schema' order by timer_wait desc limit 10\G;


统计后sql:

select schema_name,digest_text,COUNT_STAR,sys.format_time(sum_timer_wait) as sum_time,sys.format_time(min_timer_wait) as min_time, sys.format_time(avg_timer_wait) as avg_time,sys.format_time(max_timer_wait) as max_time,sys.format_time(sum_lock_time) as sum_lock_time,sum_rows_affected,sum_rows_sent,sum_rows_examined from performance_schema.events_statements_summary_by_digest where schema_name is not null order by count_star desc limit 10\G;


查看最近执行失败的sql语句:

select thread_id,event_name,source,sys.format_time(timer_wait) as exec_time, sys.format_time(lock_time) as lock_time,sql_text,current_schema,message_text,rows_affected,rows_sent,rows_examined ,mysql_errno from  performance_schema.events_statements_history where  mysql_errno=1064\G;


select thread_id,event_name,source,sys.format_time(timer_wait) as exec_time, sys.format_time(lock_time) as lock_time,sql_text,current_schema,message_text,rows_affected,rows_sent,rows_examined ,mysql_errno ,errors from performance_schema.events_statements_history where errors>0\G;


select schema_name,digest_text, COUNT_STAR ,sys.format_time(sum_timer_wait) as sum_time,sys.format_time(min_timer_wait) as min_time,sys.format_time(avg_timer_wait) as avg_time,sys.format_time(max_timer_wait) as max_time,sys.format_time(sum_lock_time) as sum_lock_time,sum_errors,first_seen,last_seen from performance_schema.events_statements_summary_by_digest where sum_errors!=0\G;

查看sql语句执行阶段和进度信息:


use performance_schema
update  setup_instruments set enabled='yes',timed='yes' where name like 'stage/%';
update setup_consumers set enabled='yes' where name like '%stage%';

查看语句执行全过程以及每个过程时间开销等。
select thread_id,event_name,source,sys.format_time(timer_wait) as exec_time,work_completed,work_estimated from performance_schema.events_stages_history_long;

查看sql语句执行进度信息:
select  * from sys.session where conn_id!=connection_id()\G;

查询最近的事务执行信息:
use performance_schema
update  setup_instruments set enabled='yes',timed='yes' where name like 'transaction%';
update setup_consumers set enabled='yes' where name like '%transaction%';
select thread_id,event_name,state,trx_id,gtid,source,timer_wait,access_mode,isolation_level,autocommit,nesting_event_id,nesting_event_type from performance_schema.events_transactions_current\G;

select thread_id,event_name,state,trx_id,gtid,source,timer_wait,access_mode,isolation_level,autocommit,nesting_event_id,nesting_event_type from performance_schema.events_transactions_history_long\G;

查询多线程复制报错详情:
show slave status\G;
select * from performance_schema.replication_applier_status_by_worker where last_error_message!=''\G







请使用浏览器的分享功能分享到微信等