在使用oltp压缩方式存放的表中delete操作后释放出来的空间并不会被后来的insert语句所重用,换而言之,频繁的对oltp压缩表进行delete & insert操作会占用愈来愈多的存储空间。
通过测试验证一下,测试步骤:
创建压缩表并插入记录à收集表的block使用信息àdump data blockà删除表中一半记录à收集表的block使用信息àdump data blockà重新插入被删除的记录àdump data blockà收集表的block使用信息
##创建压缩表并往里灌入10W条数据,特地设计一些内容重复的字段,否则没法压缩
drop table system.numtabc;
create table system.numtabc(id number,col2 varchar2(2),col3 varchar2(2),col4 varchar2(2)) tablespace ts_zwcs01_dat compress for oltp;
select compression,compress_for from dba_tables where table_name='NUMTABC' ;
COMPRESS COMPRESS_FOR
-------- ------------
ENABLED OLTP
declare
begin
for i in 1..100000 loop
insert into system.numtabc values(mod(i,2),'AA','BB','CC');
end loop;
commit;
end;
/
col segment_name format a40
set linesize 130
select segment_name,blocks,header_file,relative_fno,header_block,bytes from dba_segments where segment_name='NUMTABC';
SEGMENT_NAME BLOCKS HEADER_FILE RELATIVE_FNO HEADER_BLOCK BYTES
---------------------------------------- ---------- ----------- ------------ ------------ ----------
NUMTABC 256 230 230 145338 2097152
##为达到对比效果,创建了一个非压缩的表,表里的记录和压缩表完全一样
drop table system.numtabnc;
create table system.numtabnc(id number,col2 varchar2(2),col3 varchar2(2),col4 varchar2(2)) tablespace ts_zwcs01_dat;
(insert数据的步骤和前面一样)
select compression,compress_for from dba_tables where table_name='NUMTABNC' ;
COMPRESS COMPRESS_FOR
-------- ------------
DISABLED
select segment_name,blocks,header_file,relative_fno,header_block,bytes from dba_segments where segment_name='NUMTABNC';
SEGMENT_NAME BLOCKS HEADER_FILE RELATIVE_FNO HEADER_BLOCK BYTES
---------------------------------------- ---------- ----------- ------------ ------------ ----------
NUMTABNC 256 230 230 145346 2097152
##对比两张表在dba_segments中显示占用的空间完全一样
SQL> select segment_name,blocks,header_file,header_block,bytes from dba_segments where segment_name='NUMTABNC';
SEGMENT_NAME BLOCKS HEADER_FILE HEADER_BLOCK BYTES
---------------------------------------- ---------- ----------- ------------ ----------
NUMTABNC 256 230 145346 2097152
SQL> select segment_name,blocks,header_file,header_block,bytes from dba_segments where segment_name='NUMTABC';
SEGMENT_NAME BLOCKS HEADER_FILE HEADER_BLOCK BYTES
---------------------------------------- ---------- ----------- ------------ ----------
NUMTABC 256 230 145338 2097152
##但两张表在data block的使用上有所不同,dbms_space.space_usage的运行结果可以看出压缩表中有unformatted block,非full的block也比非压缩表要多
numtabc numtabnc
unformated: 62 0
<25% : 26 16
<50% : 0 1
75% : 1 0
100% : 0 0
full : 155 227
##对于非压缩的表进行压缩比的评估,得到压缩比为1.4:1
exec DBMS_COMPRESSION.GET_COMPRESSION_RATIO('TS_ZWCS01_IND','SYSTEM','NUMTABNC',null,DBMS_COMPRESSION.comp_for_oltp,:blkcnt_cmp,:blkcnt_uncmp,:row_cmp,:row_uncmp,:cmp_ratio,:comptype_str);
CMP_RATIO
----------
1.4
##验证一下1.4:1这个压缩比与实际情况的相似度:
压缩表中实际使用的blocks为256-62(unformated)=194,非压缩表中实际使用的Blocks为256,256:194=1.32,所以1.4这个值还是预估的比较准确的
##删除前dump一下230/145339块的内容到numtabc.blk.txt
Dump块的内容摘要如下:
tab 0, row 0, @0x1f89
tl: 15 fb: --H-FL-- lb: 0x0 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 2] c1 02
bindmp: 01 37 04 ca 41 41 ca 42 42 ca 43 43 ca c1 02
tab 0, row 1, @0x1f7b
tl: 14 fb: --H-FL-- lb: 0x0 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 1] 80
bindmp: 01 37 04 ca 41 41 ca 42 42 ca 43 43 c9 80
tab 1, row 0, @0x1f76
tl: 5 fb: --H-FL-- lb: 0x1 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 2] c1 02
bindmp: 2c 01 01 04 00
tab 1, row 1, @0x1f71
tl: 5 fb: --H-FL-- lb: 0x1 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 1] 80
bindmp: 2c 01 01 04 01
tab 1, row 2, @0x1f6c
tl: 5 fb: --H-FL-- lb: 0x1 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 2] c1 02
bindmp: 2c 01 01 04 00
##删除压缩表中的一半记录,删除前备份一下即将被删除的记录到临时表,以便后续插入时使用
create table system.numtabc_tmp tablespace ts_zwcs01_dat as select * from system.numtabc where id=1;
select count(1) from system.numtabc_tmp;
COUNT(1)
----------
50000
delete system.numtabc where id=1;
50000 rows deleted.
SQL> commit;
Commit complete.
SQL> alter system checkpoint;
System altered.
##删除后dump 230/145339块的内容,可以看出tab 1中被删除的块内容变成了--HDFL—
tab 0, row 0, @0x1f89
tl: 15 fb: --H-FL-- lb: 0x0 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 2] c1 02
bindmp: 00 00 04 ca 41 41 ca 42 42 ca 43 43 ca c1 02
tab 0, row 1, @0x1f7b
tl: 14 fb: --H-FL-- lb: 0x0 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 1] 80
bindmp: 01 37 04 ca 41 41 ca 42 42 ca 43 43 c9 80
tab 1, row 0, @0x1f76
tl: 2 fb: --HDFL-- lb: 0x2
bindmp: 3c 02
tab 1, row 1, @0x1f71
tl: 5 fb: --H-FL-- lb: 0x0 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 1] 80
bindmp: 2c 00 01 04 01
tab 1, row 2, @0x1f6c
tl: 2 fb: --HDFL-- lb: 0x2
bindmp: 3c 02
##删除前后的block space使用情况对比,发现很多原来塞满的表现在利用率都降到了50%~75%之间
numtabc删除后 numtabc删除前
unformated: 62 62
<25% : 26 26
<50% : 1 0
75% : 155 1
100% : 0 0
full : 0 155
##再重新插入被删除的这部分记录
SQL> select count(1) from system.numtabc_tmp;
COUNT(1)
----------
50000
insert into system.numtabc select * from system.numtabc_tmp;
##重新插入后表的大小未发生变化
SQL> select segment_name,blocks,header_file,relative_fno,header_block,bytes from dba_segments where segment_name='NUMTABC';
SEGMENT_NAME BLOCKS HEADER_FILE RELATIVE_FNO HEADER_BLOCK BYTES
---------------------------------------- ---------- ----------- ------------ ------------ ----------
NUMTABC 256 230 230 145338 2097152
##dump重新插入之后的145339,块的内容保存在numtabc.insert.txt中,摘录部分块信息,发现此块内容和delete前的一样,结果中仍然有--HDFL--,说明重新插入时没有重用此块中的可用空间
tab 0, row 0, @0x1f89
tl: 15 fb: --H-FL-- lb: 0x0 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 2] c1 02
bindmp: 00 00 04 ca 41 41 ca 42 42 ca 43 43 ca c1 02
tab 0, row 1, @0x1f7b
tl: 14 fb: --H-FL-- lb: 0x0 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 1] 80
bindmp: 01 37 04 ca 41 41 ca 42 42 ca 43 43 c9 80
tab 1, row 0, @0x1f76
tl: 2 fb: --HDFL-- lb: 0x2
bindmp: 3c 02
tab 1, row 1, @0x1f71
tl: 5 fb: --H-FL-- lb: 0x0 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 1] 80
bindmp: 2c 00 01 04 01
tab 1, row 2, @0x1f6c
tl: 2 fb: --HDFL-- lb: 0x2
bindmp: 3c 02
##通过执行compress_row.sh、calc_row_perblk.sh等脚本,脚本可参照附录,找出五种包含行数不同的块,
这5类块的详细信息依次包含在如下文件中(详见附录2)
numtabc_reinsert_comp_mostblk.txt
numtabc_reinsert_comp_halfblk.txt
numtabc_reinsert_comp_littleblk.txt
numtabc_reinsert_nocomp_blk.txt
numtabc_reinsert_comp_fullblk.txt
(1) 56个block,每个block包含642条记录,均为压缩,例如:555/281391,block dump的主要特征是
--Block header部分
flag=-0----X-
ntab=2
nrow=643
frre=-1
--data block部分,均为H-FL,没有D标记
tab 1, row 641, @0x1358
tl: 16 fb: --H-FL-- lb: 0x1 cc: 4
col 0: [ 2] c1 02
col 1: [ 2] 41 41
col 2: [ 2] 42 42
col 3: [ 2] 43 43
bindmp: 2c 01 04 00 ca c1 02 ca 41 41 ca 42 42 ca 43 43
(2) 75个block,每个block包含506条记录,均为压缩,例如:452/67165,block dump的主要特征是
--Block header部分
flag=-0------
ntab=2
nrow=644
frre=372 --表示row directory里首个可用的行条目序号
--row directory部分,从372个条目开始往后开始,每隔一条出现一个可用的行条目
0x312:pri[371] offs=0x1841
0x314:pri[372] sfll=374
0x316:pri[373] offs=0x1837
0x318:pri[374] sfll=376
(3) 80个block,每个block包含321条记录,均为压缩,例如:555/281460,block dump的主要特征是
--Block header 部分,虽然block中有可以重用的行,但frre并没有指向首个可用的行条目序号,而是显示为-1,这个和block clean out[C3] 机制有关
frre=-1
--Data block 部分,--HDFL—和—H-FL—间隔出现,和delete之后的情况一致,表明该block并没有在重新插入时被重复利用
tab 1, row 0, @0x1f76
tl: 2 fb: --HDFL-- lb: 0x2
bindmp: 3c 02
tab 1, row 1, @0x1f71
tl: 5 fb: --H-FL-- lb: 0x0 cc: 4
col 0: [ 2] 41 41
col 1: [ 2] 42 42
col 2: [ 2] 43 43
col 3: [ 1] 80
bindmp: 2c 00 01 04 01
(4) 1个block,这个block包含245条记录,均为压缩,例如:555/281429,block dump的主要特征同(3)
(5) 1个block,这个block包含173条记录,没有压缩,例如:555/281370,block dump的主要特征是
--Block header部分
flag=-------- 此处没有0,表示为非压缩块
ntab=1 ntab=1,表示没有符号表,块里仅有一个表的数据
nrow=173
frre=-1
##插入后的block space使用情况,可以看出插入后原本unformatted的空间被利用了
numtabc 重新插入后 numtabc删除后插入前 numtabc删除前
unformated: 0 62 62
<25% : 32 26 26
<50% : 1 1 0
75% : 155 155 1
100% : 0 0 0
full : 56 0 155
结论: 删除后插入前,full的block里有一半的行被删掉了,所以155个block从原来的full区间落到了50%~75%这个区间。 在之后重新插入的过程中,oracle会优先使用unformatted的62个block,而不是重用删除后留有空闲空间的block,对于空间使用率在50%~75%的这155个块= 75个block (每个包含506行记录)+ 80个block (每个包含321行记录),仍然保持删除后的状态。重新插入后full状态的block为56个,其数目正好等于包含642行的block个数(尚无法确认的是62个unformatted block中的56个已经变成了full状态,剩下的6个可能变成了<25%的状态,包含245条记录的block 555/281429,包含273条记录的555/281370,照理都应归到<50%这个区间,但dbms_space.space_usage结果显示该区间只有1个block)
上述测试告诉我们为了达到较理想的压缩效果,对于oltp压缩表尽量少的进行dml操作,以保持其每个blocks的空间利用率接近100%,降低sparse blocks的数量。
附录1:脚本说明
***********compress_row.sh返回每一行对应的relative_fno、block_number、压缩类型
sqlplus '/as sysdba' << EOF > compress_row_numtabc_insert.log
set pagesize 20000
select dbms_compression.get_compression_type('SYSTEM','NUMTABC',rowid) comp_type,rowid,DBMS_ROWID.ROWID_BLOCK_NUMBER(rowid) blkno,DBMS_ROWID.ROWID_RELATIVE_FNO(rowid) rfno from system.numtabc;
EOF
**********生成numtabc_blk.txt.all,包含rowid中的7-15位,即rfile+blocknum部分
cat compress_row_numtabc_insert.log | awk '$1==1' | awk '{print $2}' | cut -c 7-15|sort -u > numtabc_blk.txt.1
cat compress_row_numtabc_insert.log | awk '$1==2' | awk '{print $2}' | cut -c 7-15|sort -u > numtabc_blk.txt.2
cat numtabc_blk.txt.1 numtabc_blk.txt.2 >> numtabc_blk.txt.all
**********calc_row_perblk.sh 返回每个block包含的行数
> numtabc_rows_per_block.txt
cat numtabc_blk.txt.all| while read v_blkno
do
echo "$v_blkno"" : " `grep $v_blkno compress_row_numtabc_insert.log|awk '{print $3" "$4}' | head -n 1` `grep $v_blkno compress_row_numtabc_insert.log | wc -l` >> numtabc_rows_per_block.txt
done
附录2:block dump输出:
20140426 compress table delete insert操作空间分配附件.rar
附录3:查看表中每行的压缩类型:
SELECT CASE comp_type
WHEN 1 THEN 'No Compression'
WHEN 2 THEN 'Advanced compression level'
WHEN 4 THEN 'Hybrid Columnar Compression for Query High'
WHEN 8 THEN 'Hybrid Columnar Compression for Query Low'
WHEN 16 THEN 'Hybrid Columnar Compression for Archive High'
WHEN 32 THEN 'Hybrid Columnar Compression for Archive Low'
WHEN 64 THEN 'Compressed row'
WHEN 128 THEN 'High compression level for LOB operations'
WHEN 256 THEN 'Medium compression level for LOB operations'
WHEN 512 THEN 'Low compression level for LOB operations'
WHEN 1000 THEN 'Minimum required number of LOBs in the object for which LOB compression ratio is to be estimated'
WHEN 4096 THEN 'Basic compression level'
WHEN 5000 THEN 'Maximum number of LOBs used to compute the LOB compression ratio'
WHEN 1000000 THEN 'Minimum required number of rows in the object for which HCC ratio is to be estimated'
WHEN -1 THEN 'To indicate the use of all the rows in the object to estimate HCC ratio'
WHEN 1 THEN 'Identifies the object whose compression ratio is estimated as of type table'
ELSE 'Unknown Compression Type'
END AS Compression_type,
n as num_rows
FROM (SELECT comp_type,
Count(*) n
FROM (SELECT
dbms_compression.Get_compression_type('SCOTT', 'TABCOMP', ROWID)
AS comp_type
FROM scott.tabcomp)
GROUP BY comp_type);
附录4:查看压缩表所真正使用的blocks数量:
WITH blocks AS (
SELECT COUNT(*) usedblocks, AVG(rowcount) avgrowsperusedblock
FROM (
SELECT
DBMS_ROWID.ROWID_RELATIVE_FNO(ROWID),
DBMS_ROWID.ROWID_BLOCK_NUMBER(ROWID),
COUNT(*) rowcount
FROM &&tablename
GROUP BY
DBMS_ROWID.ROWID_RELATIVE_FNO(ROWID),
DBMS_ROWID.ROWID_BLOCK_NUMBER(ROWID)
)
)
SELECT extents, blocks allocatedblocks, usedblocks, avgrowsperusedblock
FROM blocks, user_segments
WHERE segment_name = '&tablename';