PostgreSQL DBA(61) - zedstore:想说爱你不容易

本节继续介绍zedstore,在CentOS 7.4进行”大批量”(不过1000w行)数据的测试,zedstore在执行avg时直接coredump.


testdb=# create table t_ap_heap (id1 int,id2 int,id3 int,id4 int,id5 int ,id6 int,id7 int,id8 int,id9 int);
CREATE TABLE
testdb=# 
testdb=# insert into t_ap_heap 
testdb-#   select x,x,x,x,x,x,x,x,x
testdb-#   from generate_series(1,10000000) as x;
INSERT 0 10000000
testdb=# 
testdb=# drop table if exists t_ap_zedstore;
DROP TABLE
testdb=# create table t_ap_zedstore 
testdb-#   (id1 int,id2 int,id3 int,id4 int,id5 int ,id6 int,id7 int,id8 int,id9 int) using zedstore;
CREATE TABLE
testdb=# 
testdb=# insert into t_ap_zedstore 
testdb-#   select x,x,x,x,x,x,x,x,x
testdb-#   from generate_series(1,10000000) as x;
INSERT 0 10000000
testdb=# 
testdb=# select pg_size_pretty(pg_table_size('t_ap_heap'));
 pg_size_pretty 
----------------
 651 MB
(1 row)
testdb=# select pg_size_pretty(pg_table_size('t_ap_zedstore'));
 pg_size_pretty 
----------------
 501 MB
(1 row)
testdb=# explain analyze select avg(id1) from t_ap_heap;
                                                                    QUERY PLAN                                               
-----------------------------------------------------------------------------------------------------------------------------
---------------------
 Finalize Aggregate  (cost=136417.97..136417.98 rows=1 width=32) (actual time=2432.238..2432.240 rows=1 loops=1)
   ->  Gather  (cost=136417.75..136417.96 rows=2 width=32) (actual time=2432.015..2433.781 rows=3 loops=1)
         Workers Planned: 2
         Workers Launched: 2
         ->  Partial Aggregate  (cost=135417.75..135417.76 rows=1 width=32) (actual time=2363.185..2363.185 rows=1 loops=3)
               ->  Parallel Seq Scan on t_ap_heap  (cost=0.00..125001.00 rows=4166700 width=4) (actual time=0.348..1843.592 r
ows=3333333 loops=3)
 Planning Time: 28.360 ms
 Execution Time: 2434.173 ms
(8 rows)
testdb=# explain analyze select avg(id1) from t_ap_zedstore;
psql: WARNING:  terminating connection because of crash of another server process
DETAIL:  The postmaster has commanded this server process to roll back the current transaction and exit, because another server process exited abnormally and possibly corrupted shared memory.
HINT:  In a moment you should be able to reconnect to the database and repeat your command.
psql: server closed the connection unexpectedly
  This probably means the server terminated abnormally
  before or while processing the request.
The connection to the server was lost. Attempting reset: Failed.
!>

重新连接,执行查询:


testdb=# explain analyze select avg(id1) from t_ap_heap;
                                                                   QUERY PLAN                                                
-----------------------------------------------------------------------------------------------------------------------------
--------------------
 Finalize Aggregate  (cost=136417.97..136417.98 rows=1 width=32) (actual time=1357.266..1357.267 rows=1 loops=1)
   ->  Gather  (cost=136417.75..136417.96 rows=2 width=32) (actual time=1357.068..1362.153 rows=3 loops=1)
         Workers Planned: 2
         Workers Launched: 2
         ->  Partial Aggregate  (cost=135417.75..135417.76 rows=1 width=32) (actual time=1346.515..1346.515 rows=1 loops=3)
               ->  Parallel Seq Scan on t_ap_heap  (cost=0.00..125001.00 rows=4166700 width=4) (actual time=0.488..830.427 ro
ws=3333333 loops=3)
 Planning Time: 0.550 ms
 Execution Time: 1362.347 ms
(8 rows)
testdb=# explain analyze select avg(id1) from t_ap_zedstore;
                                                                 QUERY PLAN                                                  
-----------------------------------------------------------------------------------------------------------------------------
---------------
 Finalize Aggregate  (cost=107843.55..107843.56 rows=1 width=32) (actual time=9.579..9.580 rows=1 loops=1)
   ->  Gather  (cost=107843.33..107843.54 rows=2 width=32) (actual time=0.467..11.620 rows=3 loops=1)
         Workers Planned: 2
         Workers Launched: 2
         ->  Partial Aggregate  (cost=106843.33..106843.34 rows=1 width=32) (actual time=0.020..0.021 rows=1 loops=3)
               ->  Parallel Seq Scan on t_ap_zedstore  (cost=0.00..98295.87 rows=3418987 width=4) (actual time=0.007..0.008 r
ows=0 loops=3)
 Planning Time: 0.268 ms
 Execution Time: 11.835 ms
(8 rows)
testdb=#

heap vs zedstore : 1362ms vs 12ms,性能确实有大幅提升.

不过,等等


testdb=# select avg(id1) from t_ap_heap;
         avg          
----------------------
 5000000.500000000000
(1 row)
testdb=# select avg(id1) from t_ap_zedstore;
 avg 
-----
(1 row)
testdb=# select count(*) from t_ap_zedstore;
 count 
-------
     0
(1 row)
testdb=# insert into t_ap_zedstore 
testdb-#   select x,x,x,x,x,x,x,x,x
testdb-#   from generate_series(1,10000000) as x;
psql: ERROR:  too many attributes for zedstore
testdb=#

数据竟然都丢了?┌(。Д。)┐

参考资料
PostgreSQL 基于access method api的列存zedstore

请使用浏览器的分享功能分享到微信等