tag: SQL基础

概述详情与验证1、记录应用产生的所有SQL语句的日志2、vacuum系统表3、在PLPythonu异常里设置DETAIL和HINT4、xml格式化5、调试PostgreSQL和client之间的通信6、快速替换主键依赖上的索引7、从命令行传参数给script中的DO语句8、数组元素按序号访问，转成多行9、只允许目标列出现一个NULL值10、Bytea到BLOB值的转换11、只允许一列出现NULL12、在psql中覆盖app名字13、在pg_tgrm 中使用KNNsearch 找出N个最相似的值14、使用自定义转换(oracle到pg的移植)15、JSON值的创建16、数组字段的trim17、bash里头直接处理查询结果18、查询中的注释19、退出所有连接20、获取第一个未锁定的行 (用到了advisory lock)21、窗口函数中的gaps获取22、删除窗口函数查询中的重复行23、LIKE语句使用list模式24、PL/pgSQL中的cursor强制使用唯一名字25、BLOB到Bytea的转换26、URL的解码27、Emacs上的配置(用的很少)28、bytea到TEXT的转换29、年龄的计算30、master还是slave节点的判断(这个太简单了)31、动态修改记录的字段32、触发器里遍历RECORD33、强制chars使用to_char转换34、阻止不必要的char(n)到text的转换35、得到function的DDL36、Russian alphabet (Cyrillic) to ASCII (俄兄弟的字母表转换)37、to_string, to_array38、"unix timestamp" 和timestamp的转换39、使用time zone的域40、psql中非交互的输入密码41、更快的表列表42、Array of arrays43、MySQL function group_concat in PostgreSQL44、窗口函数中分组中的first45、BIGINT到IP的转换46、让文件成为patch的原始文件47、字符串到数值的转换48、timestamp中去掉微秒49、Attention on IS NULL and IS NOT NULL operators for composite types50、Faster execution of PL/pgSQL function with explicit casting51、PL函数的并行执行的保护52、使用科学计数法53、终端的配置54、表在不同schema间的移动55、使用CTE完成数的不同进制转换56、获取表定义中的缺省值57、函数Function to_bin58、SQL code for hierarchical break-down of domains（牛)59、Solution for migration to 8.3 (problem with tsearch2) (略老)60、让函数更快参考

概述

整理PostgreSQL中一些常见的SQL小技巧，绝大部分来源于网上。

详情与验证

1、记录应用产生的所有SQL语句的日志

使用libpq要使用的系统变量：PGOPTIONS，设置选项即可：

PGOPTIONS="-c log_min_duration_statement=0" pg_dump -s postgres > /dev/null

2、vacuum系统表

psql -At  -c  "select 'VACUUM ANALYZE pg_catalog.'||table_name from information_schema.tables where table_schema = 'pg_catalog' and table_type <> 'VIEW'" db_test \
 | psql -S db_test

3、在PLPythonu异常里设置DETAIL和HINT

postgres=# do $$
x = plpy.SPIError('Nazdarek'); 
x.spidata = (100, "Some detail", "some hint", None, None); 
raise x;
$$ language plpythonu;
ERROR:  plpy.SPIError: Nazdarek
DETAIL:  Some detail
HINT:  some hint
CONTEXT:  Traceback (most recent call last):
  PL/Python anonymous code block, line 4, in 
    raise x;
PL/Python anonymous code block

4、xml格式化

create or replace function xml_pretty(xml)
returns xml as $$
  select xslt_process($1,
'<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:strip-space elements="*" />
<xsl:output method="xml" indent="yes" />
<xsl:template match="node() | @*">
<xsl:copy>
<xsl:apply-templates select="node() | @*" />
xsl:copy>
xsl:template>
xsl:stylesheet>')::xml
$$ language sql immutable strict;

-- 示例：
postgres=# select xml_pretty('<root><a><b>some textb>a>root>');
      xml_pretty      
----------------------
 <root>              +
   <a>               +
     <b>some textb>+
   a>              +
 root>             +

(1 row)

5、调试PostgreSQL和client之间的通信

--用到了pgshark
unbuffer ./pgs-debug --host 172.30.34.72 -i lo --port 6432 | while read line; do echo `date +"%T.%3N"` $line; done | gzip > /mnt/ebs/pgsharklog.gz

6、快速替换主键依赖上的索引

BEGIN;
CREATE UNIQUE INDEX CONCURRENTLY tab_pkey_idx2 ON tab(id);
ALTER TABLE tab
   DROP CONSTRAINT tab_pkey CASCADE, 
   ADD CONSTRAINT tab_pkey PRIMARY KEY USING INDEX tab_pkey_idx2;
ALTER TABLE second_tab
   ADD CONSTRAINT second_tab_fkey FOREIGN KEY (tab_id) REFERENCES tab(id) NOT VALID;
COMMIT;

7、从命令行传参数给script中的DO语句

PG中的DO并不支持参数，但是我们可以通过psql和服务端的会话参数来传参。

bash-4.1$ cat test.sh
echo "
set myvars.msgcount TO :'msgcount'; 
DO \$\$ 
BEGIN 
  FOR i IN 1..current_setting('myvars.msgcount')::int LOOP 
    RAISE NOTICE 'Hello';
  END LOOP; 
END \$\$" | psql postgres -v msgcount=$1

bash-4.1$ sh test.sh 3
SET
Time: 0.341 ms
NOTICE:  Hello
NOTICE:  Hello
NOTICE:  Hello
DO
Time: 2.415 ms

8、数组元素按序号访问，转成多行

-- 9.1
CREATE OR REPLACE FUNCTION unnest_rownum(anyarray)
RETURNS TABLE (id int, element anyelement) AS $$
BEGIN
  id := 1;
  FOREACH element IN array $1
  LOOP
    RETURN NEXT;
    id := id + 1;
  END LOOP;
  RETURN;
END
$$ LANGUAGE plpgsql; 

postgres=# select * from unnest_rownum(ARRAY['A','B','C']);
 id | element
----+---------
  1 | A
  2 | B
  3 | C
(3 rows)

--9.3 
SELECT i, arraycol[i]
   FROM tab,
        LATERAL generate_subscripts(arraycol, 1) as i;

--示例
mydb=# create table test(id int, col2 int[]);
CREATE TABLE
mydb=# insert into test values(1, '{3, 4, 5, 7}'::int[]);
INSERT 0 1
mydb=# select i, col2[i] from test, LATERAL generate_subscripts(col2, 1) as i;
 i | col2
---+------
 1 |    3
 2 |    4
 3 |    5
 4 |    7
(4 rows)

9、只允许目标列出现一个NULL值

postgres=# CREATE TABLE omega(a int UNIQUE);
CREATE TABLE

postgres=# INSERT INTO omega VALUES(NULL);
INSERT 0 1

postgres=# INSERT INTO omega VALUES(NULL);
INSERT 0 1

postgres=# TRUNCATE omega;
TRUNCATE TABLE

postgres=# CREATE UNIQUE INDEX ON omega ((1)) WHERE a IS NULL;
CREATE INDEX

postgres=# INSERT INTO omega VALUES(NULL);
INSERT 0 1

postgres=# INSERT INTO omega VALUES(NULL);
ERROR:  duplicate key value violates unique constraint "omega_expr_idx"
DETAIL:  Key ((1))=(1) already exists.

postgres=# UPDATE omega SET a = 10;
UPDATE 1

postgres=# INSERT INTO omega VALUES(NULL);
INSERT 0 1
Time: 7.601 ms

10、Bytea到BLOB值的转换

CREATE OR REPLACE FUNCTION make_lo(bytea)
RETURNS oid AS $$
DECLARE
  loid oid;
  fd integer;
  bytes integer;
BEGIN
  loid := lo_creat(-1);
  fd := lo_open(loid, 131072);
  bytes := lowrite(fd, $1);
  IF (bytes != LENGTH($1)) THEN
    RAISE EXCEPTION 'Not all data copied to blob';
  END IF;
  PERFORM lo_close(fd);
  RETURN loid;
END;
$$ LANGUAGE plpgsql STRICT;

11、只允许一列出现NULL

CREATE OR REPLACE FUNCTION public.null_count(VARIADIC anyarray)
 RETURNS integer
 LANGUAGE sql
AS $function$
  SELECT sum(CASE WHEN v IS NULL THEN 1 ELSE 0 END)::int FROM unnest($1) g(v)
$function$

CREATE TABLE xxx(
  a int,
  b int,
  c int,
  CHECK (null_count(a,b,c) <= 1))

12、在psql中覆盖app名字

[pavel@localhost ~]$ PGAPPNAME=splunk psql -c "select application_name from pg_stat_activity where pid = pg_backend_pid();" postgres
 application_name 
------------------
 splunk
(1 row)

[pavel@localhost ~]$ psql -c "select application_name from pg_stat_activity where pid = pg_backend_pid();" "dbname=postgres application_name=splunk"
 application_name 
------------------
 splunk
(1 row)

[pavel@localhost ~]$ psql -c "select application_name from pg_stat_activity where pid = pg_backend_pid();" postgresql://localhost/postgres?application_name=splunk
 application_name 
------------------
 splunk
(1 row)

13、在pg_tgrm 中使用KNNsearch 找出N个最相似的值

SELECT DISTINCT ON (village_name <-> 'Benešov') village_name, village_name<->'Benešov' 
   FROM villages 
   ORDER BY village_name <-> 'Benešov' 
   LIMIT 10;

     village_name      | ?column? 
-----------------------+----------
 Benešov               |        0
 Benešovice            | 0.416667
 Dolní Benešov         | 0.428571
 Benešov u Semil       |      0.5
 Benešov nad Černou    | 0.578947
 Benešov nad Ploučnicí | 0.636364
 Benecko               | 0.666667
 Benetice              | 0.692308
 Bečov                 | 0.727273
 Bezkov                |     0.75
(10 rows)

postgres=# EXPLAIN SELECT DISTINCT ON (village_name <-> 'Benešov') village_name, village_name<->'Benešov' 
   FROM obce 
   ORDER BY village_name <-> 'Benešov' 
   LIMIT 10;
                                           QUERY PLAN                                            
-------------------------------------------------------------------------------------------------
 Limit  (cost=0.00..2.76 rows=10 width=10)
   ->  Unique  (cost=0.00..1474.75 rows=5341 width=10)
         ->  Index Scan using villages_village_name_idx on obce  (cost=0.00..1433.14 rows=16644 width=10)
               Order By: (village_name <-> 'Benešov'::text)
(4 rows)

14、使用自定义转换(oracle到pg的移植)

Oracle doesn't support boolean data type, and developers use a varchar(1) or number(1) instead. It is a issue in PostgreSQL because this width is too less for storing a strings "true" or "false". But we can redefine a default casting (there is used a ugly hack - direct update of system tables under superuser rights). This solution is fast hack - changing to PostgreSQL's boolean is much better.

CREATE OR REPLACE FUNCTION public.boolcast_varchar(boolean)
 RETURNS character varying
 LANGUAGE sql
AS $$
  SELECT CASE WHEN $1 THEN 't' ELSE 'f' END 
$$

UPDATE pg_cast SET castfunc='boolcast_varchar'::regproc, 
                   castcontext='i' 
  WHERE castsource=16 and casttarget=1043;

CREATE TABLE test(a varchar(1));
INSERT INTO test VALUES(true);

15、JSON值的创建

select
  row_to_json(t1)
from (
  select
    'joe' as username,
    (select project from (values(1, 'prj1')) as project(project_id,
project_name)) as project
) t1;

                             row_to_json
---------------------------------------------------------------------
 {"username":"joe","project":{"project_id":1,"project_name":"prj1"}}
(1 row)

16、数组字段的trim

postgres=# select * from g;
               a                
--------------------------------
 {" ssss ssss","sss  ssss    "}
 {" ssss ssssaaaa "}
(2 rows)

postgres=# select row_number() over(), unnest(a) e
            from g;
 row_number |        e        
------------+-----------------
          1 |  ssss ssss
          1 | sss  ssss    
          2 |  ssss ssssaaaa 
(3 rows)

postgres=# select array_agg(trim(x.e)) 
              from (select row_number() over() rn, unnest(a) e
                       from g) x 
             group by rn;
         array_agg         
---------------------------
 {"ssss ssss","sss  ssss"}
 {"ssss ssssaaaa"}
(2 rows)

17、bash里头直接处理查询结果

pavel ~ $ psql postgres -A -t --field-separator=" " \
>   -c "copy (select 1,'Ahoj Svete', i 
>                from generate_series(1,3) g(i)) 
>         to stdout delimiter ' '" | \
> while read var1 var2 var3; 
> do 
>    echo "a=$var1,b=$var2,c=$var3 "; 
> done
a=1,b=Ahoj Svete,c=1 
a=1,b=Ahoj Svete,c=2 
a=1,b=Ahoj Svete,c=3 

或者 用下边这个：
psql -qAtX -c "copy (select * from t) to stdout" | while IFS=$'\t' read -r a b c; do echo -e "a=[$a] b=[$b] c=[$c]"; done

18、查询中的注释

SELECT /* my comments, that I would to see in PostgreSQL log */
       a, b, c
   FROM mytab;

19、退出所有连接

postgres=# select * from pg_stat_activity ;
 datid | datname  |  pid  | usesysid | usename  | application_name | client_addr | client_hostname | client_port |         backend_start         |         
-------+----------+-------+----------+----------+------------------+-------------+-----------------+-------------+-------------------------------+
 12894 | postgres | 21091 |       10 | postgres | psql             |             |                 |          -1 | 2012-03-06 09:16:02.466983+01 |
 12894 | postgres | 21103 |    16384 | pavel    | psql             |             |                 |          -1 | 2012-03-06 09:17:02.827352+01 |         
 12894 | postgres | 21104 |    16384 | pavel    | psql             |             |                 |          -1 | 2012-03-06 09:17:12.176979+01 |         
(3 rows)

postgres=# select pg_terminate_backend(pid) 
               from pg_stat_activity 
              where pid <> pg_backend_pid() ;
 pg_terminate_backend 
----------------------
 t
 t
(2 rows)

postgres=# select * from pg_stat_activity ;
 datid | datname  |  pid  | usesysid | usename  | application_name | client_addr | client_hostname | client_port |         backend_start         |         
-------+----------+-------+----------+----------+------------------+-------------+-----------------+-------------+-------------------------------+
 12894 | postgres | 21091 |       10 | postgres | psql             |             |                 |          -1 | 2012-03-06 09:16:02.466983+01 |
(1 row)

20、获取第一个未锁定的行 (用到了advisory lock)

postgres=# select * from queue;
 id | processed |  cmd  
----+-----------+-------
  1 | f         | task1
  2 | f         | task2
  3 | f         | task2
  4 | f         | task3
(4 rows)

-- consument1
postgres=# begin;
BEGIN
postgres=# select * from queue where not processed and pg_try_advisory_xact_lock(id) for update limit 1;
 id | processed |  cmd  
----+-----------+-------
  1 | f         | task1
(1 row)

postgres=# update queue set processed = true where id = 1;
UPDATE 1
postgres=# commit;
COMMIT

-- consument2
postgres=# begin;
BEGIN
postgres=# select * from queue where not processed and pg_try_advisory_xact_lock(id) for update limit 1;
 id | processed |  cmd  
----+-----------+-------
  2 | f         | task2
(1 row)

postgres=# update queue set processed = true where id = 2;
UPDATE 1
postgres=# commit;
COMMIT

21、窗口函数中的gaps获取

参考：[Magnus Hagander http://blog.hagander.net/archives/203-Finding-gaps-in-partitioned-sequences.html](Magnus Hagander http://blog.hagander.net/archives/203-Finding-gaps-in-partitioned-sequences.html)

SELECT * FROM (
   SELECT
      gropid,
      year,
      month,
      seq, 
      seq-lag(seq,1) OVER (PARTITION BY groupid, year, month ORDER BY seq) AS gap FROM mytable
) AS t
WHERE NOT (t.gap=1)
ORDER BY groupid, year, month, seq

22、删除窗口函数查询中的重复行

DELETE FROM tab 
  WHERE id IN (SELECT id 
                  FROM (SELECT row_number() OVER (PARTITION BY column_with_duplicate_values), id 
                           FROM tab) x 
                 WHERE x.row_number > 1);

优化版本（使用ctid)
DELETE FROM tab 
  WHERE (ctid, tableoid) = ANY(ARRAY(SELECT (ctid, tableoid) 
                  FROM (SELECT row_number() OVER (PARTITION BY column_with_duplicate_values), ctid 
                           FROM tab) x 
                 WHERE x.row_number > 1));

23、LIKE语句使用list模式

-- this is not valid
WHERE somecolumn LIKE ('%some%','%someelse%') 
-- 使用array 或正则来搞定
-- this is valid
WHERE somecolumn ~~* any(array['%some%', '%someelse']));

24、PL/pgSQL中的cursor强制使用唯一名字

PL/pgSQL cursors uses PostgreSQL SQL cursors. When we use PL/pgSQL cursor, then PL/pgSQL runtime creates SQL cursor with same name. This behave is unwanted sometimes. But when we know, so PL/pgSQL cursor is +/- text variable with SQL name, we can assign NULL to this variable. Then PL/pgSQL runtime cannot to use a predefined name and it will generate a unique name:

mycursor := null;是关键

DECLARE
  mycursor CURSOR FOR SELECT * FROM int4_tbl WHERE f1 > id;
  newid INTEGER;
  out  TEXT;
BEGIN
  out := id::text || ' ';
  mycursor := null;
  OPEN mycursor;
  ..

25、BLOB到Bytea的转换

原始参考：http://snipplr.com/view/62576/convert-oid-to-bytea/

CREATE OR REPLACE FUNCTION merge_oid(val oid)
RETURNS bytea AS $$
DECLARE 
  merged bytea; 
  arr bytea;
BEGIN
  FOR arr IN SELECT DATA 
                FROM pg_largeobject 
               WHERE loid = val 
               ORDER BY pageno 
  LOOP
    IF merged IS NULL THEN
      merged := arr;
    ELSE
      merged := merged || arr;
    END IF;
  END LOOP;
  RETURN merged;
END
$$ LANGUAGE plpgsql;

26、URL的解码

CREATE OR REPLACE FUNCTION url_decode(input text) RETURNS text
LANGUAGE plpgsql IMMUTABLE STRICT AS $$
DECLARE
 bin bytea = '';
 byte text;
BEGIN
 FOR byte IN (select (regexp_matches(input, '(%..|.)', 'g'))[1]) LOOP
   IF length(byte) = 3 THEN
     bin = bin || decode(substring(byte, 2, 2), 'hex');
   ELSE
     bin = bin || byte::bytea;
   END IF;
 END LOOP;
 RETURN convert_from(bin, 'utf8');
END
$$;
--示例:
ohs=# select url_decode('Hell%C3%B6%20World%21');
  url_decode  
──────────────
 Hellö World!
(1 row)

另一个版本：
CREATE OR REPLACE FUNCTION urldecode_arr(url text)
RETURNS text AS $$
BEGIN
  RETURN 
   (WITH str AS (SELECT CASE WHEN $1 ~ '^%[0-9a-fA-F][0-9a-fA-F]' THEN array[''] END
                                      || regexp_split_to_array ($1, '(%[0-9a-fA-F][0-9a-fA-F])+', 'i') plain,
                       ARRAY(SELECT (regexp_matches ($1, '((?:%[0-9a-fA-F][0-9a-fA-F])+)', 'gi'))[1]) encoded)
     SELECT  coalesce(string_agg(plain[i] || coalesce( convert_from(decode(replace(encoded[i], '%',''), 'hex'), 'utf8'), ''), ''), $1)
        FROM str,
             (SELECT  generate_series(1, array_upper(encoded,1) + 2) i FROM str) blah);
END
$$ LANGUAGE plpgsql IMMUTABLE STRICT;

27、Emacs上的配置(用的很少)

modify file .profile

PSQL_EDITOR=emacs;
export PSQL_EDITOR.

modify file .emacs
(add-to-list 'auto-mode-alist
            '("/psql.edit.[0-9]+\\'" . sql-mode))

28、bytea到TEXT的转换

CREATE OR REPLACE FUNCTION bytea_to_text(bytea) 
RETURNS text AS $$
SELECT convert_from($1, current_setting('server_encoding'))
$$ LANGUAGE sql;

xxx=# SELECT bytea_to_text('žluťoučký kůň se napil žluté vody'::bytea);
           bytea_to_text           
───────────────────────────────────
 žluťoučký kůň se napil žluté vody
(1 row)

29、年龄的计算

postgres=# SELECT CURRENT_DATE;
    date    
------------
 2011-09-20
(1 row)

postgres=# SELECT EXTRACT(YEAR FROM age('1972-08-20'::date));
 date_part 
-----------
        39
(1 row)

30、master还是slave节点的判断(这个太简单了)

SELECT pg_is_in_recovery();

31、动态修改记录的字段

CREATE OR REPLACE FUNCTION x.setfield2(anyelement, text, text)
 RETURNS anyelement
 LANGUAGE plpgsql
AS $function$
DECLARE 
  _name text;
  _values text[];
  _value text;
  _attnum int;
BEGIN
  FOR _name, _attnum
     IN SELECT a.attname, a.attnum
           FROM pg_catalog.pg_attribute a 
          WHERE a.attrelid = (SELECT typrelid
                                 FROM pg_type
                                WHERE oid = pg_typeof($1)::oid) 
  LOOP
    IF _name = $2 THEN
      _value := $3;
    ELSE
      EXECUTE 'SELECT (($1).' || quote_ident(_name) || ')::text' INTO _value USING $1;
    END IF;
    _values[_attnum] :=  COALESCE('"' || replace(replace(_value, '"', '""'), '''', '''''') || '"', '');
  END LOOP;
  EXECUTE 'SELECT (' || pg_typeof($1)::text || '''(' || array_to_string(_values,',') || ')'').*' INTO $1; 
  RETURN $1;
END;
$function$;

postgres=# select setfield2(mypoint '(10,)', 'b', '33');
 setfield2 
───────────
 (10,33)
(1 row)

Time: 9,480 ms

更牛叉简易快速的版本：

-- Erwin 1
CREATE OR REPLACE FUNCTION x.setfield3(anyelement, text, text)
RETURNS anyelement
AS $body$
DECLARE
 _list text;

BEGIN
_list := (
   SELECT string_agg(x.fld, ',')
   FROM   (
      SELECT CASE WHEN a.attname = $2
              THEN quote_literal($3)
              ELSE quote_ident(a.attname)
             END AS fld
      FROM   pg_catalog.pg_attribute a 
      WHERE  a.attrelid = (SELECT typrelid
                           FROM   pg_type
                           WHERE  oid = pg_typeof($1)::oid) 
      ORDER BY a.attnum
   ) x
);

EXECUTE '
SELECT ' || _list || '
FROM   (SELECT $1.*) x'
USING  $1
INTO   $1;

RETURN $1;
END;
$body$ LANGUAGE plpgsql;

更时髦的现代版本：

CREATE FUNCTION f_setfield(INOUT _comp_val anyelement, _field text, _val text)
  RETURNS anyelement AS
$func$
BEGIN

EXECUTE 'SELECT ' || array_to_string(ARRAY(
      SELECT CASE WHEN attname = _field
                THEN '$2'
                ELSE '($1).' || quote_ident(attname)
             END AS fld
      FROM   pg_catalog.pg_attribute
      WHERE  attrelid = pg_typeof(_comp_val)::text::regclass
      AND    attnum > 0
      AND    attisdropped = FALSE
      ORDER  BY attnum
      ), ',')
USING  _comp_val, _val
INTO   _comp_val;

END
$func$ LANGUAGE plpgsql;

32、触发器里遍历RECORD

CREATE OR REPLACE FUNCTION dynamic_trigger()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
DECLARE
    ri RECORD;
    t TEXT;
BEGIN
    RAISE NOTICE E'\n    Operation: %\n    Schema: %\n    Table: %',
        TG_OP,
        TG_TABLE_SCHEMA,
        TG_TABLE_NAME;
    FOR ri IN
        SELECT ordinal_position, column_name, data_type
        FROM information_schema.columns
        WHERE
            table_schema = quote_ident(TG_TABLE_SCHEMA)
        AND table_name = quote_ident(TG_TABLE_NAME)
        ORDER BY ordinal_position
    LOOP
        EXECUTE 'SELECT ($1).' || ri.column_name || '::text' INTO t USING NEW;
        RAISE NOTICE E'Column\n    number: %\n    name: %\n    type: %\n    value: %.',
            ri.ordinal_position,
            ri.column_name,
            ri.data_type,
            t;
    END LOOP;
    RETURN NEW;
END; $$;

33、强制chars使用to_char转换

postgres=# select to_char(current_timestamp,'YYYY-MM-DDTHH24:MI:SS');
        to_char        
-----------------------
 2010-02-07THH24:38:10
(1 row)

postgres=# select to_char(current_timestamp,'YYYY-MM-DD"T"HH24:MI:SS');
       to_char       
---------------------
 2010-02-07T07:38:22
(1 row)

34、阻止不必要的char(n)到text的转换

Almost all string functions have parameters of text type. PostgreSQL ensures automatic conversion from char(n) to varchar. But there are a few cases where this behave causes problems - mainly for older applications, where char(n) type was used (there is a clean advice - don't use this type). In some legacy application is comparison between char(n) type and literal constant with spaces and it doesn't work in PostgreSQL now: 
SELECT ...
  WHERE substr(somecolumn,1,4) = 'ab  ';
It doesn't work, because substr function has text parameter - and returns text and 'ab' is not equal to 'ab '. But we can overload substr function with char(n) datatype (it is possible because varchar and char uses same binary format) - and it doesn't do unwanted conversion from char(n) to text: 

-- 解决方案

create or replace function substr(character, int, int) returns character as $$
select substr($1::cstring::text,$2,$3)
$$ language sql;

create function substr(char,int,int) returns char
 strict immutable language internal as 'text_substr' ;

postgres=# create table f(a character(5));
CREATE TABLE
postgres=# insert into f values('a'),('ab'),('abc');
INSERT 0 3
postgres=# select * from f;
  a
-------
 a
 ab
 abc
(3 rows)

postgres=# select * from f where substr(a,1,3) = 'a  ';
  a
-------
 a
(1 row)

postgres=# select * from f where substr(a,1,3) = 'ab  ';
  a
-------
 ab
(1 row)

35、得到function的DDL

postgres=# select oid from pg_proc where proname = 'fu';
  oid  
-------
 16389
(1 row)

postgres=# select pg_get_functiondef(16389);
                         pg_get_functiondef                          
---------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION public.fu(anyelement)                   
  RETURNS anyelement                                                
  LANGUAGE plpgsql                                                  
 AS $function$
begin 
  raise notice '%', $1; 
  return $1; 
end; $function$+

(1 row)

36、Russian alphabet (Cyrillic) to ASCII (俄兄弟的字母表转换)

CREATE OR REPLACE FUNCTION urltranslit(text) RETURNS text as $$
SELECT
regexp_replace(
    replace(
        replace(
            replace(
                replace(
                    replace(
                        replace(
                            replace(
                                translate(
                                    lower($1),
                                    'абвгдеёзийклмнопрстуфхыэъь',
                                    'abvgdeezijklmnoprstufhye'
                                ), 'ж', 'zh'
                            ), 'ц', 'ts'
                        ), 'ч', 'ch'
                    ), 'ш', 'sh'
                ), 'щ', 'sch'
            ), 'ю', 'yu'
        ), 'я', 'ya'
    )
    ,
    '[^a-z]+',
    '-',
    'g'
)
$$ LANGUAGE SQL;

37、to_string, to_array

CREATE OR REPLACE FUNCTION to_string(anyarray, sep text, nullstr text DEFAULT '') 
RETURNS text AS $$
SELECT array_to_string(ARRAY(SELECT coalesce(v::text, $3) 
                                FROM unnest($1) g(v)),
                       $2)
$$ LANGUAGE sql;

postgres=# select to_string(array[1,2,3,4,null,5],','); 
 to_string  
------------
 1,2,3,4,,5
(1 row)

postgres=# select to_string(array[1,2,3,4,null,5],',','');
    to_string     
------------------
 1,2,3,4,,5
(1 row)

CREATE OR REPLACE FUNCTION to_array(text, sep text, nullstr text DEFAULT '') 
RETURNS text[] AS $$ 
  SELECT ARRAY(SELECT CASE 
                           WHEN v = $3 THEN NULL::text 
                           ELSE v END 
                  FROM unnest(string_to_array($1,$2)) g(v)) 
$$ LANGUAGE sql;

postgres=# select to_array('1,2,3,4,,5',',');
     to_array     
------------------
 {1,2,3,4,NULL,5}
(1 row)

postgres=# select to_array('1,2,3,4,,5',',','');
     to_array     
------------------
 {1,2,3,4,NULL,5}
(1 row)

postgres=# select to_array('1,2,3,,5',',')::int[];
    to_array    
----------------
 {1,2,3,NULL,5}
(1 row)

This issue is fixed in modern versions, that allow third parameter for NULL substitution.

postgres=# SELECT array_to_string(ARRAY[10,10,NULL,10], ',','');
 array_to_string 
─────────────────
 10,10,,10
(1 row)

38、"unix timestamp" 和timestamp的转换

CREATE OR REPLACE FUNCTION convert_timestamp_to_xtime(dt timestamp) 
RETURNS integer AS $$
  SELECT EXTRACT(EPOCH FROM $1)::integer
$$ LANGUAGE sql;

CREATE OR REPLACE FUNCTION convert_xtime_to_timestamp(xdt integer) 
RETURNS timestamp AS $$
  SELECT to_timestamp($1)::timestamp
$$ LANGUAGE sql;

39、使用time zone的域

CREATE OR REPLACE FUNCTION is_timezone( tz TEXT ) RETURNS BOOLEAN as $$
BEGIN
 PERFORM now() AT TIME ZONE tz;
 RETURN TRUE;
EXCEPTION WHEN invalid_parameter_value THEN
 RETURN FALSE;
END;
$$ language plpgsql STABLE;

CREATE DOMAIN timezone AS CITEXT
CHECK ( is_timezone( value ) );

40、psql中非交互的输入密码

PGPASSWORD或者.pgpass两种方法都可以

postgres=# CREATE USER tom;
CREATE ROLE

postgres=# ALTER USER tom PASSWORD 'tiger';
ALTER ROLE
...
postgres=> \q

[pavel@nemesis ~]$ PGPASSWORD=tiger psql postgres -U tom
Welcome to psql 8.3.8, the PostgreSQL interactive terminal.

Type:  \copyright for distribution terms
       \h for help with SQL commands
       \? for help with psql commands
       \g or terminate with semicolon to execute query
       \q to quit

postgres=> ...

41、更快的表列表

alter function pg_table_is_visible(oid) cost 10;

42、Array of arrays

postgres=# create type intarr as (f int[]);
CREATE TYPE

postgres=# select * from t;
      a      
─────────────
 {10,20,30}
 {1,2,3,4,5}
(2 rows)

postgres=# select array_agg(a) from t;
ERROR:  could not find array type for data type integer[]

postgres=# select array_agg(distinct row(a)::intarr) from t;
                array_agg                 
──────────────────────────────────────────
 {"(\"{1,2,3,4,5}\")","(\"{10,20,30}\")"}
(1 row)

postgres=# select (unnest(array_agg(distinct row(a)::intarr))::intarr).f from t;
      f      
─────────────
 {1,2,3,4,5}
 {10,20,30}
(2 rows)

43、MySQL function group_concat in PostgreSQL

postgres=# SELECT * FROM x;
 cat |  town  
-----+---------
   1 | Benešov
   1 | Tábor
   1 | Písek
   2 | Praha
   2 | Hradec
   3 | Cheb
   3 | Aš
(7 rows)

postgres=# SELECT cat, array_to_string(ARRAY(SELECT unnest(array_agg(town)) 
                                               ORDER BY 1),',') 
              FROM x 
             GROUP BY cat;
 kat |   array_to_string   
-----+---------------------
   1 | Benešov,Písek,Tábor
   3 | Aš,Cheb
   2 | Hradec,Praha
(3 rows)

44、MySQL function field in PostgreSQL

select * from pet order by field(species, 'cat', 'dog', 'bird') desc;

+----------+--------+---------+------+------------+------------+
| name     | owner  | species | sex  | birthday   | death      |
+----------+--------+---------+------+------------+------------+
| Fluffy   | Harold | cat     | f    | 1993-02-04 | NULL       | 
| Claws    | Gwen   | cat     | m    | 1994-03-17 | NULL       | 
| Buffy    | Harold | dog     | f    | 1989-05-13 | NULL       | 
| Fang     | Benny  | dog     | m    | 1990-08-27 | NULL       | 
| Bowser   | Diane  | dog     | m    | 1989-08-31 | 1995-07-29 | 
| Chirpy   | Gwen   | bird    | f    | 1998-09-11 | NULL       | 
| Whistler | Gwen   | bird    | NULL | 1997-12-09 | NULL       | 
+----------+--------+---------+------+------------+------------+
-- PG中
select * 
   from pet 
  order by species, case species when 'cat' then 1 
                                 when 'dog' then 2 
                                 when 'bird' then 3 
                                 else 0 end desc;

CREATE OR REPLACE FUNCTION field(varchar, VARIADIC text[]) 
RETURNS int AS $$
  SELECT i 
     FROM generate_subscripts($2,1) g(i) 
    WHERE $1 = $2[i] 
  UNION ALL
  SELECT 0 
  LIMIT 1 1
$$ LANGUAGE sql;

CREATE OR REPLACE FUNCTION elt(int, VARIADIC text[])
RETURNS text AS $$
  SELECT $2[$1];
$$ LANGUAGE sql;

44、窗口函数中分组中的first

postgres=# SELECT * FROM staff;
┌───────────┬───────────┬────────────┬──────────┐
│   name    │ surname   │ department │  salary  │
├───────────┴───────────┴────────────┴──────────┤
│ Pavel     │ Stehule   │          1 │ 10000.00 │
│ Zdenek    │ Stehule   │          1 │  9000.00 │
│ Vladimira │ Stehulova │          2 │  9000.00 │
└───────────────────────────────────────────────┘
(3 rows)

postgres=# SELECT * 
              FROM (SELECT *, max(salary) OVER (PARTITION BY department) 
                       FROM staff) x 
             WHERE max = salary;
┌───────────┬───────────┬────────────┬──────────┬──────────┐
│   name    │ surname   │ department │  salary  │   max    │
├───────────┴───────────┴────────────┴──────────┴──────────┤
│ Pavel     │ Stehule   │          1 │ 10000.00 │ 10000.00 │
│ Vladimira │ Stehulova │          2 │  9000.00 │  9000.00 │
└──────────────────────────────────────────────────────────┘
(2 rows)

45、BIGINT到IP的转换

create function bigint_to_inet(bigint) returns inet as $$
select (($1>>24&255)||'.'||($1>>16&255)||'.'||($1>>8&255)||'.'||($1>>0&255))::inet
$$ language sql;

46、让文件成为patch的原始文件

PostgreSQL hackers like commands cporig and difforig. cporig creates copy file with .orig extension. difforig search files with extension .orig and does cumulative diff i.e. patch. Sometimes we need mark modified files from patch. This process can be simplified with unix commands:

cat mnnotation.diff | egrep -o -e "^\*\*\*.*(sql|out|h|c|sgml|y|lex|Makefile)" | replace "*** " "" | xargs cporig

47、字符串到数值的转换

CREATE OR REPLACE FUNCTION read_int(varchar) 
RETURNS int AS $$ 
  SELECT CASE WHEN $1 ~ e'^\\d+$' THEN $1::int END; 
$$ LANGUAGE SQL IMMUTABLE STRICT;

48、timestamp中去掉微秒

postgres=# select current_timestamp;
             now              
------------------------------
 2009-05-23 20:42:21.57899+02
(1 row)

Time: 196,784 ms
postgres=# select current_timestamp::timestamp(2);
          now           
------------------------
 2009-05-23 20:42:27.74
(1 row)

Time: 51,861 ms
postgres=# select current_timestamp::timestamp(0);
         now         
---------------------
 2009-05-23 20:42:31
(1 row)

Time: 0,729 ms

49、Attention on IS NULL and IS NOT NULL operators for composite types

One may think that !(x IS NULL) = x IS NOT NULL is true in all cases. But there is an exception - composite types. When one field of a composite value is NULL and another field is NOT NULL, then result of both operators is false. IS NULL is true, only when all fields are NULL. IS NOT NULL is true, only when all fields are NOT NULL. For any case in between, then both operators return false.

CREATE OR REPLACE FUNCTION test_isnull()
RETURNS TABLE (a int, b int, isnull bool, isnotnull bool) AS $$
DECLARE r foo;
BEGIN
  isnull := r IS NULL; isnotnull := r IS NOT NULL; RETURN NEXT;
  a := NULL; b := 10; r := ROW(a, b);
  isnull := r IS NULL; isnotnull := r IS NOT NULL; RETURN NEXT;
  a := 10; b := 10; r := ROW(a, b);
  isnull := r IS NULL; isnotnull := r IS NOT NULL; RETURN NEXT;
  RETURN;
END;
$$ LANGUAGE plpgsql;
SELECT * FROM test_isnull();

 a  | b  | isnull | isnotnull
----+----+--------+-----------
    |    | t      | f
    | 10 | f      | f
 10 | 10 | f      | t
(3 rows)

50、Faster execution of PL/pgSQL function with explicit casting

create or replace function test1() 
returns int as $$
declare s int := 0; 
begin 
  for i in 1..100000 loop s := 4e3; end loop; -- numeric constant
  return s; 
end; 
$$ language plpgsql immutable;

create or replace function test2() 
returns int as $$
declare s int := 0; 
begin 
  for i in 1..100000 loop s := 4e3::int; end loop; -- int 
  return s; 
end; 
$$ language plpgsql immutable;

postgres=# select test1();
 test1 
-------
  4000
(1 row)

Time: 176,623 ms

postgres=# select test2();
 test2 
-------
  4000
(1 row)

Time: 47,673 ms

Same problem is implicit conversion in return statement:

create or replace function test1() returns int as $$begin return 4e1; end; $$ language plpgsql; 
create or replace function test2() returns int as $$begin return 4e1::int; end; $$ language plpgsql;

postgres=# select count(test1()) from generate_series(1,100000);
 count  
--------nebo v konverzi návratové hodnoty
 100000
(1 row)

Time: 682,005 ms
postgres=# select count(test2()) from generate_series(1,100000);
 count  
--------
 100000
(1 row)

Time: 528,099 ms

51、PL函数的并行执行的保护

Theoretically we should to detect an run of any function - but these information are not accessible from user interface (without C coding). We should to look to list of processed queries or we should to use advisory locks. Test based on select from pg_stat_activity view is useful only for function directly called by user. Functions called from other functions are invisible.

create or replace function long_run() 
returns void as $$
begin 
  if exists(select procpid 
               from pg_stat_activity 
              where procpid <> pg_backend_pid() 
                    and current_query like '%long_run(%') 
  then 
    raise notice 'procedure is running'; 
    return; 
  end if; 
  perform pg_sleep(10); 
end$$ 
language plpgsql volatile;
CREATE FUNCTION

Next possibility is using advisory locks http://www.postgresql.org/docs/8.3/interactive/functions-admin.html#FUNCTIONS-ADVISORY-LOCKS.

create or replace function long_run()
returns void as $$
declare foid oid:= 'long_run'::regproc::oid;
begin
  if pg_try_advisory_lock(foid::bigint) then
    perform pg_sleep(10); 
    perform pg_advisory_unlock(foid::bigint);   
  else
    raise notice 'procedure is running';
  end if;
  return;
end;
$$ language plpgsql;

52、使用科学计数法

create or replace function floating_format(double precision, int) 
  returns varchar as $$
   sprintf("%.$_[1]e", $_[0])
$$ language plperl;

create or replace function floating_format(double precision) 
  returns varchar as $$
   sprintf("%e", $_[0])
$$ language plperl;

postgres=# select floating_format(10.223);
 floating_format 
-----------------
 1.022300e+01
(1 row)

postgres=# select floating_format(10.223,2);
 floating_format 
-----------------
 1.02e+01
(1 row)

postgres=# select floating_format(10.223,4);
 floating_format 
-----------------
 1.0223e+01
(1 row)

53、终端的配置

emacs (.emacs)

(setq-default truncate-lines t)
(ansi-color-for-comint-mode-on)
(setq lazy-lock-defer-on-scrolling t)
(setq inhibit-startup-echo-area-message t)
(setq inhibit-startup-message t)
(show-paren-mode t)
(setq show-paren-style 'mixed)
(fset 'yes-or-no-p 'y-or-n-p)
(global-set-key "\M-g" 'goto-line)
(setq sql-database "postgres")  ;; name of most often used database
;; don't use pager, empty title
(setq sql-postgres-options '("-P" "pager=off" "-P" "title= ")) 
;; interesting idea is setting of start of every new command on new line
;; (setq sql-postgres-options '("-P" "pager=off" "-P" "title= " "-v" "PROMPT1=%/.%n%#\n" "-v" "PROMPT2=" "-v" "PROMPT3="))

psql

export PAGER="less -RSX"
psql ... -P pager=always

54、表在不同schema间的移动

create or replace function mvtable(src_schema varchar, dest_schema varchar, mask varchar)
returns void as $$
declare r record;
begin
  for r in
          select *
             from information_schema.tables
            where table_schema = src_schema and table_name like mask
  loop
    execute 'alter table ' || quote_ident(r.table_schema)
                   || '.' || quote_ident(r.table_name)
                   || ' set schema ' || quote_ident(dest_schema);
  end loop;
  return;
end;
$$ language plpgsql;

55、使用CTE完成数的不同进制转换

postgres=# create function to_base(num int, base int = 10) 
           returns varchar as $$
             with recursive z as (
                                  select $1 as a, null::int as b, 0 as i 
                                  union all 
                                  select a/$2, a % $2, i+1 
                                     from z 
                                  where a > 0
                                 ) 
                select array_to_string(array(select substring('0123456789abcdef' from b + 1 for 1) 
                   from z 
                  where i > 0  
                 order by i desc),'');
           $$ language sql immutable;

postgres=# select to_base(255);
 to_base 
---------
 255
(1 row)

postgres=# select to_base(255,16);
 to_base 
---------
 ff
(1 row)

postgres=# select to_base(255,8);
 to_base 
---------
 377
(1 row)

postgres=# select to_base(255,2);
 to_base  
----------
 11111111
(1 row)

56、获取表定义中的缺省值

CREATE OR REPLACE FUNCTION eval(varchar) 
RETURNS varchar AS $$
DECLARE result varchar;
BEGIN
 EXECUTE 'SELECT ' || $1 INTO result;
 RETURN result;
END;$$ LANGUAGE plpgsql STRICT;

CREATE OR REPLACE FUNCTION defaults(text, 
                                    OUT attname name, OUT type varchar, OUT default_val varchar)
RETURNS SETOF RECORD AS $$
SELECT a.attname,
       pg_catalog.format_type(a.atttypid, a.atttypmod),
       (SELECT eval(pg_catalog.pg_get_expr(d.adbin, d.adrelid))
           FROM pg_catalog.pg_attrdef d
          WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef)
   FROM pg_catalog.pg_attribute a
  WHERE a.attrelid = $1::regclass::oid AND a.attnum > 0 AND NOT a.attisdropped
  ORDER BY a.attnum
$$ LANGUAGE SQL STRICT;

57、函数Function to_bin

create or replace function to_bin(int) 
returns varchar as $$
declare r varchar; 
begin 
  execute 'select ' || $1 || '::bit(' || case when $1 = 0 then 1 else trunc(log(2, $1)) + 1 end || ')' into r; 
  return r; 
end;
$$ language plpgsql immutable strict;

create or replace function bin(varchar) 
returns varchar as $$
declare r varchar; 
begin 
  execute 'select b''' || $1 || '''::int' into r; 
  return r; 
end;
$$ language plpgsql immutable strict;

postgres=# select to_bin(5), bin('101');
 to_bin | bin 
--------+-----
 101    | 5
(1 row)

58、SQL code for hierarchical break-down of domains（牛)

postgres=# create or replace function domain_list(varchar[]) 
           returns setof varchar as $$
              select array_to_string($1,’.') 
              union all 
              select domain_list($1[2:array_upper($1,1)]) 
                 where array_upper($1,1) > 1 
           $$ language sql immutable strict;
CREATE FUNCTION
postgres=# select domain_list(array['a','b','c','d']);
domain_list
————-
a.b.c.d
b.c.d
c.d
d
(4 rows)

postgres=# create or replace function domain_list(varchar) 
           returns setof varchar as $$ 
              select domain_list(string_to_array($1, ‘.’))
           $$ language sql immutable strict;
CREATE FUNCTION
postgres=# select domain_list(’a.b.c.d’);
domain_list
————-
a.b.c.d
b.c.d
c.d
d

59、Solution for migration to 8.3 (problem with tsearch2) (略老)

PostgreSQL documentation has a section with some advice that should to help with migration of older application (based on TSearch2) to PostgreSQL 8.3 and higher (with integrated fulltext). This works well when you migrate databases individually, but it fails when you use pg_dump. As documented, you should do some pre steps - import TSearch compatible module into template1. But complete dump generates databases from template0 (you can't to change it). So you cannot load this dump, and you have to edit dump (advice - dump structure and data to two different files).

so, when you find bug:

ERROR:  type "tsvector" already exists
You have to add import of tsearch2.sql to every creation database part in dump.

DROP DATABASE brigady;
CREATE DATABASE brigady WITH TEMPLATE = template0  ENCODING = 'UTF8';
--
-- PostgreSQL database dump complete
--

\connect brigady
-- new line
\i /usr/local/pgsql/share/contrib/tsearch2.sql

SET search_path = public, pg_catalog;
...

60、让函数更快

Much faster than PL/pgSQL - especially when they are wrappers around integrated features (in this example - integrated qsort). PL/pgSQL is usually fast to interpret - I know only about two slow operations - array update and string update. It's based on the internal architecture - every change generate a new object - it isn't significant for small objects (less than 1000), but for large objects this operations needs significant time. However, we can go a different way, using integrated functionality (this way is usually the fastest in any embedded language).

PostgreSQL use internal qsort -so, I wrote qsort in PL/pgSQL too (results will be comparable):

CREATE OR REPLACE FUNCTION quicksort(l integer, r integer, a int[])
RETURNS int[] AS $$
DECLARE akt int[] = a;
  i integer := l; j integer := r; x integer = akt[(l+r) / 2]; 
  w integer;
BEGIN
  LOOP
    WHILE akt[i] < x LOOP i := i + 1; END LOOP;
    WHILE x < akt[j] loop j := j - 1; END LOOP;
    IF i <= j THEN
      w := akt[i];
      akt[i] := akt[j]; akt[j] := w;
      i := i + 1; j := j - 1;
    END IF;
    EXIT WHEN i > j;
  END LOOP;
  IF l < j THEN akt := quicksort(l,j,akt); END IF;
  IF i < r then akt := quicksort(i,r,akt); END IF;
  RETURN akt;
END;
$$ LANGUAGE plpgsql STRICT IMMUTABLE;

postgres=# SELECT array_upper(quicksort(1,10000,array_agg(a)),1) FROM test;
 array_upper
-------------
       10000
(1 row)
Time: 5918,531 ms

CREATE OR REPLACE FUNCTION sort(anyarray) 
RETURNS anyarray AS $$
  SELECT array(SELECT * FROM unnest($1) ORDER BY 1); 
$$ language sql;

postgres=# SELECT array_upper(sort(array_agg(a)),1) FROM test;
 array_upper
-------------
       10000
(1 row)
Time: 35,980 ms

参考

[1] https://postgres.cz/wiki/PostgreSQL_SQL_Tricks:
https://postgres.cz/wiki/PostgreSQL_SQL_Tricks

[2] https://postgres.cz/wiki/PostgreSQL_SQL_Tricks_III:
https://postgres.cz/wiki/PostgreSQL_SQL_Tricks_III

PostgreSQL中的SQL使用技巧汇编(第一篇)

概述