[20200319]KILL STATUS ='KILLED'的进程.txt
--//生产系统例行检查,我发现存在一些STATUS ='KILLED'的会话。
--//看看这些进程如何删除。
1.环境:
SYS@ZZZZ/SIDSID> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
SYS@ZZZZ/SIDSID> select paddr,inst_id,SID,S.SERIAL#,status,client_info ,logon_time,CREATOR_ADDR,CREATOR_SERIAL# from gv$session S where STATUS ='KILLED' order by sid;
PADDR INST_ID SID SERIAL# STATUS CLIENT_INFO LOGON_TIME CREATOR_ADDR CREATOR_SERIAL#
---------------- ---------- ---------- ---------- -------- -------------------- ------------------- ---------------- ---------------
000000134207A620 1 167 6305 KILLED aaa.bbb.c5.116 2020-03-11 20:22:26 0000001291D9E998 77
000000134207A620 1 1747 12439 KILLED AAA.BBB.C.74 2020-03-10 07:23:33 00000012A1E0DCE0 89
000000134207A620 1 3499 1473 KILLED aaa.bbb.c0.224 2020-03-13 15:40:14 00000012A1DBB418 218
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
000000134207A620 1 5055 15825 KILLED aaa.bbb.c0.224 2020-03-13 10:31:55 00000012B1D5A100 121
000000134207A620 2 6393 11793 KILLED aaa.bbb.c5.116 2020-03-11 20:24:13 00000012D1D47F68 20
000000134207A620 1 6980 3377 KILLED aaa.bbb.c0.224 2020-03-13 10:31:56 00000012B1F37978 122
000000134207A620 1 7261 10139 KILLED aaa.bbb.c5.116 2020-03-11 20:22:25 00000012A1DFC0A8 71
000000134207A620 1 7423 23319 KILLED AAA.BBB.C.74 2020-03-10 07:23:33 00000012A1E32608 215
8 rows selected.
--//出现这样的情况PADDR地址指向都已一样的,如果你使用该地址查询视图v$process无法获得spid进程号的。
SYS@ZZZZ/SIDSID> select * from v$process where addr=hextoraw('000000134207A620');
no rows selected
--//只能查询使用CREATOR_ADDR查询。
SYS@ZZZZ/SIDSID> select * from v$process where addr=hextoraw('00000012A1DBB418')
2 @ prxx
==============================
ADDR : 00000012A1DBB418
PID : 203
SPID : 102216
PNAME :
USERNAME : grid
SERIAL# : 218
TERMINAL : UNKNOWN
PROGRAM : oracle@ZZZZZZZZZZZZZZZZ
TRACEID :
TRACEFILE : /u01/app/oracle/diag/rdbms/SIDSID/SIDSID1/trace/SIDSID1_ora_102216.trc
BACKGROUND :
LATCHWAIT :
LATCHSPIN :
PGA_USED_MEM : 1927126
PGA_ALLOC_MEM : 3231990
PGA_FREEABLE_MEM : 1048576
PGA_MAX_MEM : 5329142
PL/SQL procedure successfully completed.
--//奇怪 USERNAME='grid',什么回事。
SELECT s.paddr
,s.inst_id
,s.SID
,S.SERIAL#
,s.status
,s.client_info
,s.logon_time
,s.CREATOR_ADDR
,s.CREATOR_SERIAL#
,p.spid
,p.username
FROM gv$session s, gv$process p
WHERE s.STATUS = 'KILLED'
AND p.addr = s.CREATOR_ADDR
AND ( (s.USERNAME IS NOT NULL)
AND (NVL (s.osuser, 'x') <> 'SYSTEM')
AND (s.TYPE <> 'BACKGROUND'))
AND (s.ownerid = 2147483644)
AND ( s.module <> 'TOAD background query session' OR s.module IS NULL)
AND p.inst_id = s.inst_id;
--//我查询发现都是USERNAME='grid'。加入其它条件是抄toad的查询,主要避免下面kill时删除重要进程。
# netstat -tnop 2>/dev/null | grep 102216
tcp 0 0 192.168.100.104:1521 aaa.bbb.c0.224:2549 ESTABLISHED 102216/oracleSIDSID1 keepalive (61.59/0/0)
# ps -fp 102216
UID PID PPID C STIME TTY TIME CMD
oracle 102216 1 0 Mar13 ? 00:00:00 oracleSIDSID1 (LOCAL=NO)
--//说明该进程存在。如果采用原来的方式是不行的。
SYS@ZZZZ/SIDSID> alter system disconnect session 3499,1473 immediate ;
alter system disconnect session 3499,1473 immediate
*
ERROR at line 1:
ORA-00026: missing or invalid session ID
--//知道进程号直接杀进程就ok了。
SELECT 'kill -9 ' || spid c20, inst_id
FROM (SELECT s.paddr
,s.inst_id
,s.SID
,S.SERIAL#
,s.status
,s.client_info
,s.logon_time
,s.CREATOR_ADDR
,s.CREATOR_SERIAL#
,p.spid
FROM gv$session s, gv$process p
WHERE s.STATUS = 'KILLED'
AND p.addr = s.CREATOR_ADDR
AND ( (s.USERNAME IS NOT NULL)
AND (NVL (s.osuser, 'x') <> 'SYSTEM')
AND (s.TYPE <> 'BACKGROUND'))
AND (s.ownerid = 2147483644)
AND ( s.module <> 'TOAD background query session'
OR s.module IS NULL)
AND p.inst_id = s.inst_id) order by 2;
C20 INST_ID
---------------- -------
kill -9 102216 1
kill -9 77715 1
kill -9 125001 1
kill -9 77731 1
kill -9 92612 1
kill -9 125003 1
kill -9 92620 1
kill -9 60858 2
8 rows selected.
--//注意实例号。最后1行是另外一台机器的。先尝试1个进程看看。
# kill -9 102216
# ps -fp 102216
UID PID PPID C STIME TTY TIME CMD
# netstat -tnop 2>/dev/null | grep 102216
--//执行前还是建议查看一下。
$ ps -fp 102216,77715,125001,77731,92612,125003,92620
--//然后copy and paste执行以上命令,注意服务器千万别选择错了。一般等2,3秒进程就会消失。我翻了以前笔记以前写成如下:
select spid from v$process where addr in(select creator_addr from v$session where status='KILLED');
select 'kill -9 '||spid from v$process where addr in(select creator_addr from v$session where status='KILLED');
--//感觉执行前还是给仔细看看什么进程,贸然操作还是存在一定风险。