[20210915]探究mutex的值 6.txt
--//昨天重新设计试验,验证如下:
--//至此可以知道dump library cache,mutex的值应该表示mutex结构体的第0-3,第8-11,第12-15,第20-23字节.分别对应ref count,gets,sleep.???.
--//同样道理mutex结构体24字节里面的信息(各占4个字节)分别表示ref_count,sid,gets,sleeps,mutex标识(这里对应bucket值,sql语句可能对应hash_value),???.
--//最后第20-23,我还是不知道表示什么?
Bucket: #=102650 Mutex=0x80528f40(0, 19, 0, 6)
--//我当时测试中猜测第3个数字表示sleeps的次数还是十分牵强的,我开始猜测这里是sleeps次数的主要原因是转储后这个数字变化较快,如果静态查看没有变化,
--//仅仅知道上面的第3个数字 对应 mutex结构体的第12-15字节.
SYS@book> oradebug poke 0x0000000080528f40 4 0x000001ab
BEFORE: [080528F40, 080528F44) = 00000100
AFTER: [080528F40, 080528F44) = 000001AB
SYS@book> oradebug peek 0x80528f40 24
[080528F40, 080528F58) = 000001AB 0000002C 00000062 000486F7 000190FA 00000000
--//0x000190FA = 102650,对应Bucket.
--//0x000486F7 = 296695
SYS@book> oradebug dump library_cache 8;
Statement processed.
--//检查转储文件内容.
Bucket: #=102650 Mutex=0x80528f40(1ab, 98, 296695, 0)
--//oracle这里很奇葩,mutex的值第1个数字竟然是16进制的.
--//296695 = 0x486f7,与前面oradebug peek 0x80528f40 24看到的一致.
LibraryHandle: Address=0x7d3c9600 Hash=5ab90fa LockMode=N PinMode=0 LoadLockMode=0 Status=VALD
ObjectName: Name=select * from dept where deptno=20
....
--//测试看看Mutex=0x80528f40(0, 19, 0, 6)中第3个数字的变化,验证自己的猜测.
1.环境:
SCOTT@book> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
SYS@book> alter system set session_cached_cursors=0 scope=spfile;
System altered.
--//重启略,设置目的主要保证每次都是软解析,这样都会访问library cache mutex.
--//session 1:
SCOTT@book> @ spid
SID SERIAL# PROCESS SERVER SPID PID P_SERIAL# C50
---------- ---------- ------------------------ --------- ------ ------- ---------- --------------------------------------------------
295 5 9539 DEDICATED 9540 21 3 alter system kill session '295,5' immediate;
--//sid=295 = 0x127.
SCOTT@book> select * from dept where deptno=20;
DEPTNO DNAME LOC
---------- -------------- -------------
20 RESEARCH DALLAS
--//执行5次以上。
SCOTT@book> @ hash
HASH_VALUE SQL_ID CHILD_NUMBER HASH_HEX
---------- ------------- ------------ ---------
95129850 80baj2c2ur47u 0 5ab90fa
--//95129850%131072 = 102650,确定library cache mutex 地址如下,通过转储library cache,过程略.
oradebug setmypid
oradebug dump library_cache 10;
--//检查转储,搜索Bucket: #=102650,发现如下:
Bucket: #=102650 Mutex=0x80528f40(0, 6, 0, 6)
2.建立测试脚本:
$ cat mutex_sleeps.sh
#! /bin/bash
# test mutix sleep change
#
# argv1=mutex_address
# argv2=poke mutex_address1 value
# argv3=poke mutex_address2 value
# argv4=sleep seconds
#
vdate=$(date '+%Y%m%d%H%M%S')
echo $vdate
offset4=`printf "%x" $(( 0x${1} + 0x4 )) `
#echo $offset4
sqlplus -s -l / as sysdba <
oradebug poke 0x${1} 4 0x${2}
oradebug poke 0x${offset4} 4 0x${3}
oradebug peek 0x${1} 24
quit
EOF
sqlplus -s -l scott/book <<<"select * from dept where deptno=20;" &
strace -Ttt -f -p $! -o /tmp/mutex_${vdate}.txt -e select,sched_yield,semtimedop,getrusage &
sqlplus -s -l / as sysdba <
$(seq ${4} | xargs -IQ echo -e "oradebug peek 0x${1} 24\nhost sleep 1\n")
oradebug poke 0x${1} 8 0x0000000000000000
oradebug peek 0x${1} 24
quit
EOF
--//简单说明:开始设置值非0,执行sqlplus语句放在后台,这时会挂起,通过$!获得前面sqlplus的进程号,通过strace监测,等待一定时间后,
--//修改设置值为0,sqlplus正常执行后退出,strace跟踪也停止.通过前后sleep的差值就可以知道次数,同时看看strace跟踪的调用次数
--//是否基本一致.
3.测试:
$ . mutex_sleeps.sh 80528f40 00000001 00000000 10 | grep "080528F40,"
[2021-09-15 08:40:58] BEFORE: [080528F40, 080528F44) = 00000000
[2021-09-15 08:40:58] AFTER: [080528F40, 080528F44) = 00000001
[2021-09-15 08:40:58] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
Process 15948 attached - interrupt to quit
Process 15967 attached
[2021-09-15 08:40:58] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:40:59] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:41:00] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:41:01] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:41:02] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:41:03] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:41:04] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:41:05] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:41:06] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:41:07] [080528F40, 080528F58) = 00000001 00000000 00000067 0007A031 000190FA 00000000
[2021-09-15 08:41:08] BEFORE: [080528F40, 080528F48) = 00000001 00000000
[2021-09-15 08:41:08] AFTER: [080528F40, 080528F48) = 00000000 00000000
[2021-09-15 08:41:08] [080528F40, 080528F58) = 00000000 00000000 00000068 00081DE7 000190FA 00000000
Process 15948 detached
Process 15967 detached
--//注意看peek输出的第4列,没有变化,仅仅在解除阻塞后,才更新这列.
--//81DE7-7A031 = 32182
--//81DE7 = 531943 7A031 = 499761 531943-499761 =32182
--//我测试好几次大约都是在3YXXX Y=2,3,4,5都有.
$ awk '{print $3}' /tmp/mutex_20210915084058.txt| uniq -c| head -16
1 sched_yield()
500 getrusage(RUSAGE_SELF,
99 sched_yield()
1 select(0,
99 sched_yield()
1 select(0,
99 sched_yield()
1 select(0,
99 sched_yield()
1 select(0,
99 sched_yield()
1 select(0,
99 sched_yield()
1 select(0,
99 sched_yield()
1 select(0,
--//这个以前测试过,不过有点奇怪的是使用sched_yield,select调用模式.调用99次sched_yield,调用1次select.这种模式非常耗费CPU资源.
$ top -p 16734 -d 1
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
16734 oracle 20 0 854m 20m 17m t 26.9 0.0 1:50.03 oracle
--//CPU使用在30%上下.
$ grep "sched_yield()" /tmp/mutex_20210915084058.txt |wc -l
96742
--//96742 与 32182 差距也太大了,
--//96742/32182 = 3.00609036107140637623 ,有3倍的差距.
$ grep "select(" /tmp/mutex_20210915084058.txt |wc -l
977
--// 96742/977 = 99.01944728761514841351.调用sched_yield,select比例接近99:1.
$ grep sched_yield /tmp/mutex_20210915084058.txt | awk '{print $NF}' | sed 's/^/;s/>$//' | paste -sd+| bc -l
2.038630
$ grep select /tmp/mutex_20210915084058.txt | awk '{print $NF}' | sed 's/^/;s/>$//' | paste -sd+| bc -l
1.055319
$ awk '{print $3}' /tmp/mutex_20210915084058.txt|uniq -c | egrep "getrusage"
500 getrusage(RUSAGE_SELF,
2 getrusage(RUSAGE_SELF,
2 getrusage(RUSAGE_SELF,
2 getrusage(RUSAGE_SELF,
2 getrusage(RUSAGE_SELF,
2 getrusage(RUSAGE_SELF,
15 getrusage(RUSAGE_SELF,
$ grep "getrusage" /tmp/mutex_20210915084058.txt
....
15967 08:40:58.459998 getrusage(RUSAGE_SELF, {ru_utime={0, 29995}, ru_stime={0, 23996}, ...}) = 0 <0.000023>
15967 08:40:58.460145 getrusage(RUSAGE_SELF, {ru_utime={0, 29995}, ru_stime={0, 23996}, ...}) = 0 <0.000023>
15967 08:41:00.038747 getrusage(RUSAGE_SELF, {ru_utime={0, 364944}, ru_stime={0, 193970}, ...}) = 0 <0.000026>
15967 08:41:00.038869 getrusage(RUSAGE_SELF, {ru_utime={0, 364944}, ru_stime={0, 193970}, ...}) = 0 <0.000025>
15967 08:41:02.038991 getrusage(RUSAGE_SELF, {ru_utime={0, 774882}, ru_stime={0, 413937}, ...}) = 0 <0.000025>
15967 08:41:02.039101 getrusage(RUSAGE_SELF, {ru_utime={0, 774882}, ru_stime={0, 413937}, ...}) = 0 <0.000024>
15967 08:41:04.040235 getrusage(RUSAGE_SELF, {ru_utime={1, 191818}, ru_stime={0, 636903}, ...}) = 0 <0.000026>
15967 08:41:04.040346 getrusage(RUSAGE_SELF, {ru_utime={1, 191818}, ru_stime={0, 636903}, ...}) = 0 <0.000027>
15967 08:41:06.040303 getrusage(RUSAGE_SELF, {ru_utime={1, 578759}, ru_stime={0, 769882}, ...}) = 0 <0.000019>
15967 08:41:06.040398 getrusage(RUSAGE_SELF, {ru_utime={1, 578759}, ru_stime={0, 769882}, ...}) = 0 <0.000018>
15967 08:41:08.040524 getrusage(RUSAGE_SELF, {ru_utime={1, 976699}, ru_stime={0, 938857}, ...}) = 0 <0.000019>
15967 08:41:08.040617 getrusage(RUSAGE_SELF, {ru_utime={1, 976699}, ru_stime={0, 938857}, ...}) = 0 <0.000018>
15967 08:41:08.466725 getrusage(RUSAGE_SELF, {ru_utime={2, 56687}, ru_stime={0, 977851}, ...}) = 0 <0.000020>
15967 08:41:08.466857 getrusage(RUSAGE_SELF, {ru_utime={2, 56687}, ru_stime={0, 977851}, ...}) = 0 <0.000030>
...
--//还可以发现一个规律中间间隔2秒每次调用getrusage2次.与下面的测试一样.
4.继续测试:
$ . mutex_sleeps.sh 80528f40 00000000 00009999 10 | grep "080528F40,"
[2021-09-15 09:05:16] BEFORE: [080528F40, 080528F44) = 00000000
[2021-09-15 09:05:16] AFTER: [080528F40, 080528F44) = 00000000
[2021-09-15 09:05:16] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
Process 16481 attached - interrupt to quit
Process 16500 attached
Process 16481 detached
Process 16500 detached
[2021-09-15 09:05:17] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:18] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:19] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:20] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:21] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:22] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:23] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:24] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:25] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:26] [080528F40, 080528F58) = 00000000 00009999 0000006C 00092E5B 000190FA 00000000
[2021-09-15 09:05:27] BEFORE: [080528F40, 080528F48) = 00000000 00009999
[2021-09-15 09:05:27] AFTER: [080528F40, 080528F48) = 00000000 00000000
[2021-09-15 09:05:27] [080528F40, 080528F58) = 00000000 00000000 0000006D 000931E4 000190FA 00000000
--//注意看peek输出的第4列,没有变化,仅仅在解除阻塞后,才更新这列.
--// 0x931E4-0x92E5B = 905
--//931E4 = 602596 92E5B = 601691 602596-601691 = 905.
$ awk '{print $3}' /tmp/mutex_20210915090516.txt|uniq -c
1 sched_yield()
500 getrusage(RUSAGE_SELF,
2 sched_yield()
141 semtimedop(43581440,
2 getrusage(RUSAGE_SELF,
182 semtimedop(43581440,
2 getrusage(RUSAGE_SELF,
178 semtimedop(43581440,
2 getrusage(RUSAGE_SELF,
181 semtimedop(43581440,
2 getrusage(RUSAGE_SELF,
182 semtimedop(43581440,
2 getrusage(RUSAGE_SELF,
39 semtimedop(43581440,
15 getrusage(RUSAGE_SELF,
--//你可以发现semtimedop,getrusage模式,基本是调用2次getrusage,182次semtimedop(每次0.010XXX秒),,实际上一次循环占用2秒.
$ grep semtimedop /tmp/mutex_20210915090516.txt|wc -l
903
--//可以发现调用semtimedop的次数与前面看到的sleeps的次数非常接近.我多次测试基本相差1-3之间.
--//而且都是grep semtimedop 计数的值要小于前面相减的值1-3之间,我觉得基本可以推断mutex结构体的第12-15字节表示sleeps计数.
$ grep semtimedop /tmp/mutex_20210915090516.txt | awk '{print $NF}' | sed 's/^/;s/>$//' | paste -sd+| bc -l
9.819210
--//10秒的监测,有差不多9.8秒在semtimedop,也就是在sleep上,这种模式消耗CPU很少.
--//至于为什么前面的模式如何计算我有点不明白了.
4.总结:
--//使用sched_yield,select调用模式,如何计数sleeps的我不是很清楚.
--//使用semtimedop调用模式基本与测试非常接近.
--//我以前做过这类测试,一致以为mutex的模式与隐含参数_mutex_wait_scheme有关,看来不一定.
--//一些情况下oracle还是使用sched_yield函数.看来以前的测试我有时间给重复做一次...
SYS@book> @ hide mutex
NAME DESCRIPTION DEFAULT_VALUE SESSION_VALUE SYSTEM_VALUE ISSES ISSYS_MOD
------------------ ----------------- ------------- ------------- ------------ ----- ---------
_mutex_spin_count Mutex spin count TRUE 255 255 FALSE IMMEDIATE
_mutex_wait_scheme Mutex wait scheme TRUE 2 2 FALSE IMMEDIATE
_mutex_wait_time Mutex wait time TRUE 1 1 FALSE IMMEDIATE
--//参考链接 http://blog.itpub.net/267265/viewspace-2640073/=>[20190402]对比_mutex_wait_scheme不同模式cpu消耗.txt
--//我当时还画了一个表格,
-------------------------------------------------------------------------------------------------------
_mutex_wait_scheme CPU使用率 描述
------------------------------------------------------------------------------------------------------
2 .3 2秒时间内,semtimedop 182次 getrusage 2次 _mutex_spin_count=255
12 2秒时间内,semtimedop 167次 getrusage 2次 _mutex_spin_count=65535
1 3 2秒时间内, select 16XX次(每次调用0.001秒),getrusage 2次
0 39.9 调用99次sched_yield,然后1次seelct(每次调用0.001秒),
补充间隔2秒调用getrusage 2次.
------------------------------------------------------------------------------------------------------