tag: SQL基础
概述详情与验证1、记录应用产生的所有SQL语句的日志2、vacuum系统表3、在PLPythonu异常里设置DETAIL和HINT4、xml格式化5、调试PostgreSQL和client之间的通信6、快速替换主键依赖上的索引7、从命令行传参数给script中的DO语句8、数组元素按序号访问,转成多行9、只允许目标列出现一个NULL值10、Bytea到BLOB值的转换11、只允许一列出现NULL12、在psql中覆盖app名字13、在pg_tgrm 中使用KNNsearch 找出N个最相似的值14、使用自定义转换(oracle到pg的移植)15、JSON值的创建16、数组字段的trim17、bash里头直接处理查询结果18、查询中的注释19、退出所有连接20、获取第一个未锁定的行 (用到了advisory lock)21、窗口函数中的gaps获取22、删除窗口函数查询中的重复行23、LIKE语句使用list模式24、PL/pgSQL中的cursor强制使用唯一名字25、BLOB到Bytea的转换26、URL的解码27、Emacs上的配置(用的很少)28、bytea到TEXT的转换29、年龄的计算30、master还是slave节点的判断(这个太简单了)31、动态修改记录的字段32、触发器里遍历RECORD33、强制chars使用to_char转换34、阻止不必要的char(n)到text的转换35、得到function的DDL36、Russian alphabet (Cyrillic) to ASCII (俄兄弟的字母表转换)37、to_string, to_array38、"unix timestamp" 和timestamp的转换39、使用time zone的域40、psql中非交互的输入密码41、更快的表列表42、Array of arrays43、MySQL function group_concat in PostgreSQL44、窗口函数中分组中的first45、BIGINT到IP的转换46、让文件成为patch的原始文件47、字符串到数值的转换48、timestamp中去掉微秒49、Attention on IS NULL and IS NOT NULL operators for composite types50、Faster execution of PL/pgSQL function with explicit casting51、PL函数的并行执行的保护52、使用科学计数法53、终端的配置54、表在不同schema间的移动55、使用CTE完成数的不同进制转换56、获取表定义中的缺省值57、函数Function to_bin58、SQL code for hierarchical break-down of domains(牛)59、Solution for migration to 8.3 (problem with tsearch2) (略老)60、让函数更快参考
概述
整理PostgreSQL中一些常见的SQL小技巧,绝大部分来源于网上。
详情与验证
1、记录应用产生的所有SQL语句的日志
使用libpq要使用的系统变量:PGOPTIONS,设置选项即可:
PGOPTIONS="-c log_min_duration_statement=0" pg_dump -s postgres > /dev/null
2、vacuum系统表
psql -At -c "select 'VACUUM ANALYZE pg_catalog.'||table_name from information_schema.tables where table_schema = 'pg_catalog' and table_type <> 'VIEW'" db_test \
| psql -S db_test
3、在PLPythonu异常里设置DETAIL和HINT
postgres=# do $$
x = plpy.SPIError('Nazdarek');
x.spidata = (100, "Some detail", "some hint", None, None);
raise x;
$$ language plpythonu;
ERROR: plpy.SPIError: Nazdarek
DETAIL: Some detail
HINT: some hint
CONTEXT: Traceback (most recent call last):
PL/Python anonymous code block, line 4, in
raise x;
PL/Python anonymous code block
4、xml格式化
create or replace function xml_pretty(xml)
returns xml as $$
select xslt_process($1,
'<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:strip-space elements="*" />
<xsl:output method="xml" indent="yes" />
<xsl:template match="node() | @*">
<xsl:copy>
<xsl:apply-templates select="node() | @*" />
xsl:copy>
xsl:template>
xsl:stylesheet>')::xml
$$ language sql immutable strict;
-- 示例:
postgres=# select xml_pretty('<root><a><b>some textb>a>root>');
xml_pretty
----------------------
<root> +
<a> +
<b>some textb>+
a> +
root> +
(1 row)
5、调试PostgreSQL和client之间的通信
--用到了pgshark
unbuffer ./pgs-debug --host 172.30.34.72 -i lo --port 6432 | while read line; do echo `date +"%T.%3N"` $line; done | gzip > /mnt/ebs/pgsharklog.gz
6、快速替换主键依赖上的索引
BEGIN;
CREATE UNIQUE INDEX CONCURRENTLY tab_pkey_idx2 ON tab(id);
ALTER TABLE tab
DROP CONSTRAINT tab_pkey CASCADE,
ADD CONSTRAINT tab_pkey PRIMARY KEY USING INDEX tab_pkey_idx2;
ALTER TABLE second_tab
ADD CONSTRAINT second_tab_fkey FOREIGN KEY (tab_id) REFERENCES tab(id) NOT VALID;
COMMIT;
7、从命令行传参数给script中的DO语句
PG中的DO并不支持参数,但是我们可以通过psql和服务端的会话参数来传参。
bash-4.1$ cat test.sh
echo "
set myvars.msgcount TO :'msgcount';
DO \$\$
BEGIN
FOR i IN 1..current_setting('myvars.msgcount')::int LOOP
RAISE NOTICE 'Hello';
END LOOP;
END \$\$" | psql postgres -v msgcount=$1
bash-4.1$ sh test.sh 3
SET
Time: 0.341 ms
NOTICE: Hello
NOTICE: Hello
NOTICE: Hello
DO
Time: 2.415 ms
8、数组元素按序号访问,转成多行
-- 9.1
CREATE OR REPLACE FUNCTION unnest_rownum(anyarray)
RETURNS TABLE (id int, element anyelement) AS $$
BEGIN
id := 1;
FOREACH element IN array $1
LOOP
RETURN NEXT;
id := id + 1;
END LOOP;
RETURN;
END
$$ LANGUAGE plpgsql;
postgres=# select * from unnest_rownum(ARRAY['A','B','C']);
id | element
----+---------
1 | A
2 | B
3 | C
(3 rows)
--9.3
SELECT i, arraycol[i]
FROM tab,
LATERAL generate_subscripts(arraycol, 1) as i;
--示例
mydb=# create table test(id int, col2 int[]);
CREATE TABLE
mydb=# insert into test values(1, '{3, 4, 5, 7}'::int[]);
INSERT 0 1
mydb=# select i, col2[i] from test, LATERAL generate_subscripts(col2, 1) as i;
i | col2
---+------
1 | 3
2 | 4
3 | 5
4 | 7
(4 rows)
9、只允许目标列出现一个NULL值
postgres=# CREATE TABLE omega(a int UNIQUE);
CREATE TABLE
postgres=# INSERT INTO omega VALUES(NULL);
INSERT 0 1
postgres=# INSERT INTO omega VALUES(NULL);
INSERT 0 1
postgres=# TRUNCATE omega;
TRUNCATE TABLE
postgres=# CREATE UNIQUE INDEX ON omega ((1)) WHERE a IS NULL;
CREATE INDEX
postgres=# INSERT INTO omega VALUES(NULL);
INSERT 0 1
postgres=# INSERT INTO omega VALUES(NULL);
ERROR: duplicate key value violates unique constraint "omega_expr_idx"
DETAIL: Key ((1))=(1) already exists.
postgres=# UPDATE omega SET a = 10;
UPDATE 1
postgres=# INSERT INTO omega VALUES(NULL);
INSERT 0 1
Time: 7.601 ms
10、Bytea到BLOB值的转换
CREATE OR REPLACE FUNCTION make_lo(bytea)
RETURNS oid AS $$
DECLARE
loid oid;
fd integer;
bytes integer;
BEGIN
loid := lo_creat(-1);
fd := lo_open(loid, 131072);
bytes := lowrite(fd, $1);
IF (bytes != LENGTH($1)) THEN
RAISE EXCEPTION 'Not all data copied to blob';
END IF;
PERFORM lo_close(fd);
RETURN loid;
END;
$$ LANGUAGE plpgsql STRICT;
11、只允许一列出现NULL
CREATE OR REPLACE FUNCTION public.null_count(VARIADIC anyarray)
RETURNS integer
LANGUAGE sql
AS $function$
SELECT sum(CASE WHEN v IS NULL THEN 1 ELSE 0 END)::int FROM unnest($1) g(v)
$function$
CREATE TABLE xxx(
a int,
b int,
c int,
CHECK (null_count(a,b,c) <= 1))
12、在psql中覆盖app名字
[pavel@localhost ~]$ PGAPPNAME=splunk psql -c "select application_name from pg_stat_activity where pid = pg_backend_pid();" postgres
application_name
------------------
splunk
(1 row)
[pavel@localhost ~]$ psql -c "select application_name from pg_stat_activity where pid = pg_backend_pid();" "dbname=postgres application_name=splunk"
application_name
------------------
splunk
(1 row)
[pavel@localhost ~]$ psql -c "select application_name from pg_stat_activity where pid = pg_backend_pid();" postgresql://localhost/postgres?application_name=splunk
application_name
------------------
splunk
(1 row)
13、在pg_tgrm 中使用KNNsearch 找出N个最相似的值
SELECT DISTINCT ON (village_name <-> 'Benešov') village_name, village_name<->'Benešov'
FROM villages
ORDER BY village_name <-> 'Benešov'
LIMIT 10;
village_name | ?column?
-----------------------+----------
Benešov | 0
Benešovice | 0.416667
Dolní Benešov | 0.428571
Benešov u Semil | 0.5
Benešov nad Černou | 0.578947
Benešov nad Ploučnicí | 0.636364
Benecko | 0.666667
Benetice | 0.692308
Bečov | 0.727273
Bezkov | 0.75
(10 rows)
postgres=# EXPLAIN SELECT DISTINCT ON (village_name <-> 'Benešov') village_name, village_name<->'Benešov'
FROM obce
ORDER BY village_name <-> 'Benešov'
LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Limit (cost=0.00..2.76 rows=10 width=10)
-> Unique (cost=0.00..1474.75 rows=5341 width=10)
-> Index Scan using villages_village_name_idx on obce (cost=0.00..1433.14 rows=16644 width=10)
Order By: (village_name <-> 'Benešov'::text)
(4 rows)
14、使用自定义转换(oracle到pg的移植)
Oracle doesn't support boolean data type, and developers use a varchar(1) or number(1) instead. It is a issue in PostgreSQL because this width is too less for storing a strings "true" or "false". But we can redefine a default casting (there is used a ugly hack - direct update of system tables under superuser rights). This solution is fast hack - changing to PostgreSQL's boolean is much better.
CREATE OR REPLACE FUNCTION public.boolcast_varchar(boolean)
RETURNS character varying
LANGUAGE sql
AS $$
SELECT CASE WHEN $1 THEN 't' ELSE 'f' END
$$
UPDATE pg_cast SET castfunc='boolcast_varchar'::regproc,
castcontext='i'
WHERE castsource=16 and casttarget=1043;
CREATE TABLE test(a varchar(1));
INSERT INTO test VALUES(true);
15、JSON值的创建
select
row_to_json(t1)
from (
select
'joe' as username,
(select project from (values(1, 'prj1')) as project(project_id,
project_name)) as project
) t1;
row_to_json
---------------------------------------------------------------------
{"username":"joe","project":{"project_id":1,"project_name":"prj1"}}
(1 row)
16、数组字段的trim
postgres=# select * from g;
a
--------------------------------
{" ssss ssss","sss ssss "}
{" ssss ssssaaaa "}
(2 rows)
postgres=# select row_number() over(), unnest(a) e
from g;
row_number | e
------------+-----------------
1 | ssss ssss
1 | sss ssss
2 | ssss ssssaaaa
(3 rows)
postgres=# select array_agg(trim(x.e))
from (select row_number() over() rn, unnest(a) e
from g) x
group by rn;
array_agg
---------------------------
{"ssss ssss","sss ssss"}
{"ssss ssssaaaa"}
(2 rows)
17、bash里头直接处理查询结果
pavel ~ $ psql postgres -A -t --field-separator=" " \
> -c "copy (select 1,'Ahoj Svete', i
> from generate_series(1,3) g(i))
> to stdout delimiter ' '" | \
> while read var1 var2 var3;
> do
> echo "a=$var1,b=$var2,c=$var3 ";
> done
a=1,b=Ahoj Svete,c=1
a=1,b=Ahoj Svete,c=2
a=1,b=Ahoj Svete,c=3
或者 用下边这个:
psql -qAtX -c "copy (select * from t) to stdout" | while IFS=$'\t' read -r a b c; do echo -e "a=[$a] b=[$b] c=[$c]"; done
18、查询中的注释
SELECT /* my comments, that I would to see in PostgreSQL log */
a, b, c
FROM mytab;
19、退出所有连接
postgres=# select * from pg_stat_activity ;
datid | datname | pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start |
-------+----------+-------+----------+----------+------------------+-------------+-----------------+-------------+-------------------------------+
12894 | postgres | 21091 | 10 | postgres | psql | | | -1 | 2012-03-06 09:16:02.466983+01 |
12894 | postgres | 21103 | 16384 | pavel | psql | | | -1 | 2012-03-06 09:17:02.827352+01 |
12894 | postgres | 21104 | 16384 | pavel | psql | | | -1 | 2012-03-06 09:17:12.176979+01 |
(3 rows)
postgres=# select pg_terminate_backend(pid)
from pg_stat_activity
where pid <> pg_backend_pid() ;
pg_terminate_backend
----------------------
t
t
(2 rows)
postgres=# select * from pg_stat_activity ;
datid | datname | pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start |
-------+----------+-------+----------+----------+------------------+-------------+-----------------+-------------+-------------------------------+
12894 | postgres | 21091 | 10 | postgres | psql | | | -1 | 2012-03-06 09:16:02.466983+01 |
(1 row)
20、获取第一个未锁定的行 (用到了advisory lock)
postgres=# select * from queue;
id | processed | cmd
----+-----------+-------
1 | f | task1
2 | f | task2
3 | f | task2
4 | f | task3
(4 rows)
-- consument1
postgres=# begin;
BEGIN
postgres=# select * from queue where not processed and pg_try_advisory_xact_lock(id) for update limit 1;
id | processed | cmd
----+-----------+-------
1 | f | task1
(1 row)
postgres=# update queue set processed = true where id = 1;
UPDATE 1
postgres=# commit;
COMMIT
-- consument2
postgres=# begin;
BEGIN
postgres=# select * from queue where not processed and pg_try_advisory_xact_lock(id) for update limit 1;
id | processed | cmd
----+-----------+-------
2 | f | task2
(1 row)
postgres=# update queue set processed = true where id = 2;
UPDATE 1
postgres=# commit;
COMMIT
21、窗口函数中的gaps获取
参考:[Magnus Hagander http://blog.hagander.net/archives/203-Finding-gaps-in-partitioned-sequences.html](Magnus Hagander http://blog.hagander.net/archives/203-Finding-gaps-in-partitioned-sequences.html)
SELECT * FROM (
SELECT
gropid,
year,
month,
seq,
seq-lag(seq,1) OVER (PARTITION BY groupid, year, month ORDER BY seq) AS gap FROM mytable
) AS t
WHERE NOT (t.gap=1)
ORDER BY groupid, year, month, seq
22、删除窗口函数查询中的重复行
DELETE FROM tab
WHERE id IN (SELECT id
FROM (SELECT row_number() OVER (PARTITION BY column_with_duplicate_values), id
FROM tab) x
WHERE x.row_number > 1);
优化版本(使用ctid)
DELETE FROM tab
WHERE (ctid, tableoid) = ANY(ARRAY(SELECT (ctid, tableoid)
FROM (SELECT row_number() OVER (PARTITION BY column_with_duplicate_values), ctid
FROM tab) x
WHERE x.row_number > 1));
23、LIKE语句使用list模式
-- this is not valid
WHERE somecolumn LIKE ('%some%','%someelse%')
-- 使用array 或正则来搞定
-- this is valid
WHERE somecolumn ~~* any(array['%some%', '%someelse']));
24、PL/pgSQL中的cursor强制使用唯一名字
PL/pgSQL cursors uses PostgreSQL SQL cursors. When we use PL/pgSQL cursor, then PL/pgSQL runtime creates SQL cursor with same name. This behave is unwanted sometimes. But when we know, so PL/pgSQL cursor is +/- text variable with SQL name, we can assign NULL to this variable. Then PL/pgSQL runtime cannot to use a predefined name and it will generate a unique name:
mycursor := null;是关键
DECLARE
mycursor CURSOR FOR SELECT * FROM int4_tbl WHERE f1 > id;
newid INTEGER;
out TEXT;
BEGIN
out := id::text || ' ';
mycursor := null;
OPEN mycursor;
..
25、BLOB到Bytea的转换
原始参考:http://snipplr.com/view/62576/convert-oid-to-bytea/
CREATE OR REPLACE FUNCTION merge_oid(val oid)
RETURNS bytea AS $$
DECLARE
merged bytea;
arr bytea;
BEGIN
FOR arr IN SELECT DATA
FROM pg_largeobject
WHERE loid = val
ORDER BY pageno
LOOP
IF merged IS NULL THEN
merged := arr;
ELSE
merged := merged || arr;
END IF;
END LOOP;
RETURN merged;
END
$$ LANGUAGE plpgsql;
26、URL的解码
CREATE OR REPLACE FUNCTION url_decode(input text) RETURNS text
LANGUAGE plpgsql IMMUTABLE STRICT AS $$
DECLARE
bin bytea = '';
byte text;
BEGIN
FOR byte IN (select (regexp_matches(input, '(%..|.)', 'g'))[1]) LOOP
IF length(byte) = 3 THEN
bin = bin || decode(substring(byte, 2, 2), 'hex');
ELSE
bin = bin || byte::bytea;
END IF;
END LOOP;
RETURN convert_from(bin, 'utf8');
END
$$;
--示例:
ohs=# select url_decode('Hell%C3%B6%20World%21');
url_decode
──────────────
Hellö World!
(1 row)
另一个版本:
CREATE OR REPLACE FUNCTION urldecode_arr(url text)
RETURNS text AS $$
BEGIN
RETURN
(WITH str AS (SELECT CASE WHEN $1 ~ '^%[0-9a-fA-F][0-9a-fA-F]' THEN array[''] END
|| regexp_split_to_array ($1, '(%[0-9a-fA-F][0-9a-fA-F])+', 'i') plain,
ARRAY(SELECT (regexp_matches ($1, '((?:%[0-9a-fA-F][0-9a-fA-F])+)', 'gi'))[1]) encoded)
SELECT coalesce(string_agg(plain[i] || coalesce( convert_from(decode(replace(encoded[i], '%',''), 'hex'), 'utf8'), ''), ''), $1)
FROM str,
(SELECT generate_series(1, array_upper(encoded,1) + 2) i FROM str) blah);
END
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
27、Emacs上的配置(用的很少)
modify file .profile
PSQL_EDITOR=emacs;
export PSQL_EDITOR.
modify file .emacs
(add-to-list 'auto-mode-alist
'("/psql.edit.[0-9]+\\'" . sql-mode))
28、bytea到TEXT的转换
CREATE OR REPLACE FUNCTION bytea_to_text(bytea)
RETURNS text AS $$
SELECT convert_from($1, current_setting('server_encoding'))
$$ LANGUAGE sql;
xxx=# SELECT bytea_to_text('žluťoučký kůň se napil žluté vody'::bytea);
bytea_to_text
───────────────────────────────────
žluťoučký kůň se napil žluté vody
(1 row)
29、年龄的计算
postgres=# SELECT CURRENT_DATE;
date
------------
2011-09-20
(1 row)
postgres=# SELECT EXTRACT(YEAR FROM age('1972-08-20'::date));
date_part
-----------
39
(1 row)
30、master还是slave节点的判断(这个太简单了)
SELECT pg_is_in_recovery();
31、动态修改记录的字段
CREATE OR REPLACE FUNCTION x.setfield2(anyelement, text, text)
RETURNS anyelement
LANGUAGE plpgsql
AS $function$
DECLARE
_name text;
_values text[];
_value text;
_attnum int;
BEGIN
FOR _name, _attnum
IN SELECT a.attname, a.attnum
FROM pg_catalog.pg_attribute a
WHERE a.attrelid = (SELECT typrelid
FROM pg_type
WHERE oid = pg_typeof($1)::oid)
LOOP
IF _name = $2 THEN
_value := $3;
ELSE
EXECUTE 'SELECT (($1).' || quote_ident(_name) || ')::text' INTO _value USING $1;
END IF;
_values[_attnum] := COALESCE('"' || replace(replace(_value, '"', '""'), '''', '''''') || '"', '');
END LOOP;
EXECUTE 'SELECT (' || pg_typeof($1)::text || '''(' || array_to_string(_values,',') || ')'').*' INTO $1;
RETURN $1;
END;
$function$;
postgres=# select setfield2(mypoint '(10,)', 'b', '33');
setfield2
───────────
(10,33)
(1 row)
Time: 9,480 ms
更牛叉简易快速 的版本:
-- Erwin 1
CREATE OR REPLACE FUNCTION x.setfield3(anyelement, text, text)
RETURNS anyelement
AS $body$
DECLARE
_list text;
BEGIN
_list := (
SELECT string_agg(x.fld, ',')
FROM (
SELECT CASE WHEN a.attname = $2
THEN quote_literal($3)
ELSE quote_ident(a.attname)
END AS fld
FROM pg_catalog.pg_attribute a
WHERE a.attrelid = (SELECT typrelid
FROM pg_type
WHERE oid = pg_typeof($1)::oid)
ORDER BY a.attnum
) x
);
EXECUTE '
SELECT ' || _list || '
FROM (SELECT $1.*) x'
USING $1
INTO $1;
RETURN $1;
END;
$body$ LANGUAGE plpgsql;
更时髦的现代版本:
CREATE FUNCTION f_setfield(INOUT _comp_val anyelement, _field text, _val text)
RETURNS anyelement AS
$func$
BEGIN
EXECUTE 'SELECT ' || array_to_string(ARRAY(
SELECT CASE WHEN attname = _field
THEN '$2'
ELSE '($1).' || quote_ident(attname)
END AS fld
FROM pg_catalog.pg_attribute
WHERE attrelid = pg_typeof(_comp_val)::text::regclass
AND attnum > 0
AND attisdropped = FALSE
ORDER BY attnum
), ',')
USING _comp_val, _val
INTO _comp_val;
END
$func$ LANGUAGE plpgsql;
32、触发器里遍历RECORD
CREATE OR REPLACE FUNCTION dynamic_trigger()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
DECLARE
ri RECORD;
t TEXT;
BEGIN
RAISE NOTICE E'\n Operation: %\n Schema: %\n Table: %',
TG_OP,
TG_TABLE_SCHEMA,
TG_TABLE_NAME;
FOR ri IN
SELECT ordinal_position, column_name, data_type
FROM information_schema.columns
WHERE
table_schema = quote_ident(TG_TABLE_SCHEMA)
AND table_name = quote_ident(TG_TABLE_NAME)
ORDER BY ordinal_position
LOOP
EXECUTE 'SELECT ($1).' || ri.column_name || '::text' INTO t USING NEW;
RAISE NOTICE E'Column\n number: %\n name: %\n type: %\n value: %.',
ri.ordinal_position,
ri.column_name,
ri.data_type,
t;
END LOOP;
RETURN NEW;
END; $$;
33、强制chars使用to_char转换
postgres=# select to_char(current_timestamp,'YYYY-MM-DDTHH24:MI:SS');
to_char
-----------------------
2010-02-07THH24:38:10
(1 row)
postgres=# select to_char(current_timestamp,'YYYY-MM-DD"T"HH24:MI:SS');
to_char
---------------------
2010-02-07T07:38:22
(1 row)
34、阻止不必要的char(n)到text的转换
Almost all string functions have parameters of text type. PostgreSQL ensures automatic conversion from char(n) to varchar. But there are a few cases where this behave causes problems - mainly for older applications, where char(n) type was used (there is a clean advice - don't use this type). In some legacy application is comparison between char(n) type and literal constant with spaces and it doesn't work in PostgreSQL now:
SELECT ...
WHERE substr(somecolumn,1,4) = 'ab ';
It doesn't work, because substr function has text parameter - and returns text and 'ab' is not equal to 'ab '. But we can overload substr function with char(n) datatype (it is possible because varchar and char uses same binary format) - and it doesn't do unwanted conversion from char(n) to text:
-- 解决方案
create or replace function substr(character, int, int) returns character as $$
select substr($1::cstring::text,$2,$3)
$$ language sql;
create function substr(char,int,int) returns char
strict immutable language internal as 'text_substr' ;
postgres=# create table f(a character(5));
CREATE TABLE
postgres=# insert into f values('a'),('ab'),('abc');
INSERT 0 3
postgres=# select * from f;
a
-------
a
ab
abc
(3 rows)
postgres=# select * from f where substr(a,1,3) = 'a ';
a
-------
a
(1 row)
postgres=# select * from f where substr(a,1,3) = 'ab ';
a
-------
ab
(1 row)
35、得到function的DDL
postgres=# select oid from pg_proc where proname = 'fu';
oid
-------
16389
(1 row)
postgres=# select pg_get_functiondef(16389);
pg_get_functiondef
---------------------------------------------------------------------
CREATE OR REPLACE FUNCTION public.fu(anyelement)
RETURNS anyelement
LANGUAGE plpgsql
AS $function$
begin
raise notice '%', $1;
return $1;
end; $function$+
(1 row)
36、Russian alphabet (Cyrillic) to ASCII (俄兄弟的字母表转换)
CREATE OR REPLACE FUNCTION urltranslit(text) RETURNS text as $$
SELECT
regexp_replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
translate(
lower($1),
'абвгдеёзийклмнопрстуфхыэъь',
'abvgdeezijklmnoprstufhye'
), 'ж', 'zh'
), 'ц', 'ts'
), 'ч', 'ch'
), 'ш', 'sh'
), 'щ', 'sch'
), 'ю', 'yu'
), 'я', 'ya'
)
,
'[^a-z]+',
'-',
'g'
)
$$ LANGUAGE SQL;
37、to_string, to_array
CREATE OR REPLACE FUNCTION to_string(anyarray, sep text, nullstr text DEFAULT '')
RETURNS text AS $$
SELECT array_to_string(ARRAY(SELECT coalesce(v::text, $3)
FROM unnest($1) g(v)),
$2)
$$ LANGUAGE sql;
postgres=# select to_string(array[1,2,3,4,null,5],',');
to_string
------------
1,2,3,4,,5
(1 row)
postgres=# select to_string(array[1,2,3,4,null,5],',','' );
to_string
------------------
1,2,3,4,,5
(1 row)
CREATE OR REPLACE FUNCTION to_array(text, sep text, nullstr text DEFAULT '')
RETURNS text[] AS $$
SELECT ARRAY(SELECT CASE
WHEN v = $3 THEN NULL::text
ELSE v END
FROM unnest(string_to_array($1,$2)) g(v))
$$ LANGUAGE sql;
postgres=# select to_array('1,2,3,4,,5',',');
to_array
------------------
{1,2,3,4,NULL,5}
(1 row)
postgres=# select to_array('1,2,3,4,,5' ,',','' );
to_array
------------------
{1,2,3,4,NULL,5}
(1 row)
postgres=# select to_array('1,2,3,,5',',')::int[];
to_array
----------------
{1,2,3,NULL,5}
(1 row)
This issue is fixed in modern versions, that allow third parameter for NULL substitution.
postgres=# SELECT array_to_string(ARRAY[10,10,NULL,10], ',','');
array_to_string
─────────────────
10,10,,10
(1 row)
38、"unix timestamp" 和timestamp的转换
CREATE OR REPLACE FUNCTION convert_timestamp_to_xtime(dt timestamp)
RETURNS integer AS $$
SELECT EXTRACT(EPOCH FROM $1)::integer
$$ LANGUAGE sql;
CREATE OR REPLACE FUNCTION convert_xtime_to_timestamp(xdt integer)
RETURNS timestamp AS $$
SELECT to_timestamp($1)::timestamp
$$ LANGUAGE sql;
39、使用time zone的域
CREATE OR REPLACE FUNCTION is_timezone( tz TEXT ) RETURNS BOOLEAN as $$
BEGIN
PERFORM now() AT TIME ZONE tz;
RETURN TRUE;
EXCEPTION WHEN invalid_parameter_value THEN
RETURN FALSE;
END;
$$ language plpgsql STABLE;
CREATE DOMAIN timezone AS CITEXT
CHECK ( is_timezone( value ) );
40、psql中非交互的输入密码
PGPASSWORD或者.pgpass两种方法都可以
postgres=# CREATE USER tom;
CREATE ROLE
postgres=# ALTER USER tom PASSWORD 'tiger';
ALTER ROLE
...
postgres=> \q
[pavel@nemesis ~]$ PGPASSWORD=tiger psql postgres -U tom
Welcome to psql 8.3.8, the PostgreSQL interactive terminal.
Type: \copyright for distribution terms
\h for help with SQL commands
\? for help with psql commands
\g or terminate with semicolon to execute query
\q to quit
postgres=> ...
41、更快的表列表
alter function pg_table_is_visible(oid) cost 10;
42、Array of arrays
postgres=# create type intarr as (f int[]);
CREATE TYPE
postgres=# select * from t;
a
─────────────
{10,20,30}
{1,2,3,4,5}
(2 rows)
postgres=# select array_agg(a) from t;
ERROR: could not find array type for data type integer[]
postgres=# select array_agg(distinct row(a)::intarr) from t;
array_agg
──────────────────────────────────────────
{"(\"{1,2,3,4,5}\")","(\"{10,20,30}\")"}
(1 row)
postgres=# select (unnest(array_agg(distinct row(a)::intarr))::intarr).f from t;
f
─────────────
{1,2,3,4,5}
{10,20,30}
(2 rows)
43、MySQL function group_concat in PostgreSQL
postgres=# SELECT * FROM x;
cat | town
-----+---------
1 | Benešov
1 | Tábor
1 | Písek
2 | Praha
2 | Hradec
3 | Cheb
3 | Aš
(7 rows)
postgres=# SELECT cat, array_to_string(ARRAY(SELECT unnest(array_agg(town))
ORDER BY 1),',')
FROM x
GROUP BY cat;
kat | array_to_string
-----+---------------------
1 | Benešov,Písek,Tábor
3 | Aš,Cheb
2 | Hradec,Praha
(3 rows)
44、MySQL function field in PostgreSQL
select * from pet order by field(species, 'cat', 'dog', 'bird') desc;
+----------+--------+---------+------+------------+------------+
| name | owner | species | sex | birthday | death |
+----------+--------+---------+------+------------+------------+
| Fluffy | Harold | cat | f | 1993-02-04 | NULL |
| Claws | Gwen | cat | m | 1994-03-17 | NULL |
| Buffy | Harold | dog | f | 1989-05-13 | NULL |
| Fang | Benny | dog | m | 1990-08-27 | NULL |
| Bowser | Diane | dog | m | 1989-08-31 | 1995-07-29 |
| Chirpy | Gwen | bird | f | 1998-09-11 | NULL |
| Whistler | Gwen | bird | NULL | 1997-12-09 | NULL |
+----------+--------+---------+------+------------+------------+
-- PG中
select *
from pet
order by species, case species when 'cat' then 1
when 'dog' then 2
when 'bird' then 3
else 0 end desc;
CREATE OR REPLACE FUNCTION field(varchar, VARIADIC text[])
RETURNS int AS $$
SELECT i
FROM generate_subscripts($2,1) g(i)
WHERE $1 = $2[i]
UNION ALL
SELECT 0
LIMIT 1 1
$$ LANGUAGE sql;
CREATE OR REPLACE FUNCTION elt(int, VARIADIC text[])
RETURNS text AS $$
SELECT $2[$1];
$$ LANGUAGE sql;
44、窗口函数中分组中的first
postgres=# SELECT * FROM staff;
┌───────────┬───────────┬────────────┬──────────┐
│ name │ surname │ department │ salary │
├───────────┴───────────┴────────────┴──────────┤
│ Pavel │ Stehule │ 1 │ 10000.00 │
│ Zdenek │ Stehule │ 1 │ 9000.00 │
│ Vladimira │ Stehulova │ 2 │ 9000.00 │
└───────────────────────────────────────────────┘
(3 rows)
postgres=# SELECT *
FROM (SELECT *, max(salary) OVER (PARTITION BY department)
FROM staff) x
WHERE max = salary;
┌───────────┬───────────┬────────────┬──────────┬──────────┐
│ name │ surname │ department │ salary │ max │
├───────────┴───────────┴────────────┴──────────┴──────────┤
│ Pavel │ Stehule │ 1 │ 10000.00 │ 10000.00 │
│ Vladimira │ Stehulova │ 2 │ 9000.00 │ 9000.00 │
└──────────────────────────────────────────────────────────┘
(2 rows)
45、BIGINT到IP的转换
create function bigint_to_inet(bigint) returns inet as $$
select (($1>>24&255)||'.'||($1>>16&255)||'.'||($1>>8&255)||'.'||($1>>0&255))::inet
$$ language sql;
46、让文件成为patch的原始文件
PostgreSQL hackers like commands cporig and difforig. cporig creates copy file with .orig extension. difforig search files with extension .orig and does cumulative diff i.e. patch. Sometimes we need mark modified files from patch. This process can be simplified with unix commands:
cat mnnotation.diff | egrep -o -e "^\*\*\*.*(sql|out|h|c|sgml|y|lex|Makefile)" | replace "*** " "" | xargs cporig
47、字符串到数值的转换
CREATE OR REPLACE FUNCTION read_int(varchar)
RETURNS int AS $$
SELECT CASE WHEN $1 ~ e'^\\d+$' THEN $1::int END;
$$ LANGUAGE SQL IMMUTABLE STRICT;
48、timestamp中去掉微秒
postgres=# select current_timestamp;
now
------------------------------
2009-05-23 20:42:21.57899+02
(1 row)
Time: 196,784 ms
postgres=# select current_timestamp::timestamp(2);
now
------------------------
2009-05-23 20:42:27.74
(1 row)
Time: 51,861 ms
postgres=# select current_timestamp::timestamp(0);
now
---------------------
2009-05-23 20:42:31
(1 row)
Time: 0,729 ms
49、Attention on IS NULL and IS NOT NULL operators for composite types
One may think that !(x IS NULL) = x IS NOT NULL is true in all cases. But there is an exception - composite types. When one field of a composite value is NULL and another field is NOT NULL, then result of both operators is false. IS NULL is true, only when all fields are NULL. IS NOT NULL is true, only when all fields are NOT NULL. For any case in between, then both operators return false.
CREATE OR REPLACE FUNCTION test_isnull()
RETURNS TABLE (a int, b int, isnull bool, isnotnull bool) AS $$
DECLARE r foo;
BEGIN
isnull := r IS NULL; isnotnull := r IS NOT NULL; RETURN NEXT;
a := NULL; b := 10; r := ROW(a, b);
isnull := r IS NULL; isnotnull := r IS NOT NULL; RETURN NEXT;
a := 10; b := 10; r := ROW(a, b);
isnull := r IS NULL; isnotnull := r IS NOT NULL; RETURN NEXT;
RETURN;
END;
$$ LANGUAGE plpgsql;
SELECT * FROM test_isnull();
a | b | isnull | isnotnull
----+----+--------+-----------
| | t | f
| 10 | f | f
10 | 10 | f | t
(3 rows)
50、Faster execution of PL/pgSQL function with explicit casting
create or replace function test1()
returns int as $$
declare s int := 0;
begin
for i in 1..100000 loop s := 4e3; end loop; -- numeric constant
return s;
end;
$$ language plpgsql immutable;
create or replace function test2()
returns int as $$
declare s int := 0;
begin
for i in 1..100000 loop s := 4e3::int; end loop; -- int
return s;
end;
$$ language plpgsql immutable;
postgres=# select test1();
test1
-------
4000
(1 row)
Time: 176,623 ms
postgres=# select test2();
test2
-------
4000
(1 row)
Time: 47,673 ms
Same problem is implicit conversion in return statement:
create or replace function test1() returns int as $$begin return 4e1; end; $$ language plpgsql;
create or replace function test2() returns int as $$begin return 4e1::int; end; $$ language plpgsql;
postgres=# select count(test1()) from generate_series(1,100000);
count
--------nebo v konverzi návratové hodnoty
100000
(1 row)
Time: 682,005 ms
postgres=# select count(test2()) from generate_series(1,100000);
count
--------
100000
(1 row)
Time: 528,099 ms
51、PL函数的并行执行的保护
Theoretically we should to detect an run of any function - but these information are not accessible from user interface (without C coding). We should to look to list of processed queries or we should to use advisory locks. Test based on select from pg_stat_activity view is useful only for function directly called by user. Functions called from other functions are invisible.
create or replace function long_run()
returns void as $$
begin
if exists(select procpid
from pg_stat_activity
where procpid <> pg_backend_pid()
and current_query like '%long_run(%')
then
raise notice 'procedure is running';
return;
end if;
perform pg_sleep(10);
end$$
language plpgsql volatile;
CREATE FUNCTION
Next possibility is using advisory locks http://www.postgresql.org/docs/8.3/interactive/functions-admin.html#FUNCTIONS-ADVISORY-LOCKS.
create or replace function long_run()
returns void as $$
declare foid oid:= 'long_run'::regproc::oid;
begin
if pg_try_advisory_lock(foid::bigint) then
perform pg_sleep(10);
perform pg_advisory_unlock(foid::bigint);
else
raise notice 'procedure is running';
end if;
return;
end;
$$ language plpgsql;
52、使用科学计数法
create or replace function floating_format(double precision, int)
returns varchar as $$
sprintf("%.$_[1]e", $_[0])
$$ language plperl;
create or replace function floating_format(double precision)
returns varchar as $$
sprintf("%e", $_[0])
$$ language plperl;
postgres=# select floating_format(10.223);
floating_format
-----------------
1.022300e+01
(1 row)
postgres=# select floating_format(10.223,2);
floating_format
-----------------
1.02e+01
(1 row)
postgres=# select floating_format(10.223,4);
floating_format
-----------------
1.0223e+01
(1 row)
53、终端的配置
emacs (.emacs)
(setq-default truncate-lines t)
(ansi-color-for-comint-mode-on)
(setq lazy-lock-defer-on-scrolling t)
(setq inhibit-startup-echo-area-message t)
(setq inhibit-startup-message t)
(show-paren-mode t)
(setq show-paren-style 'mixed)
(fset 'yes-or-no-p 'y-or-n-p)
(global-set-key "\M-g" 'goto-line)
(setq sql-database "postgres") ;; name of most often used database
;; don't use pager, empty title
(setq sql-postgres-options '("-P" "pager=off" "-P" "title= "))
;; interesting idea is setting of start of every new command on new line
;; (setq sql-postgres-options '("-P" "pager=off" "-P" "title= " "-v" "PROMPT1=%/.%n%#\n" "-v" "PROMPT2=" "-v" "PROMPT3="))
psql
export PAGER="less -RSX"
psql ... -P pager=always
54、表在不同schema间的移动
create or replace function mvtable(src_schema varchar, dest_schema varchar, mask varchar)
returns void as $$
declare r record;
begin
for r in
select *
from information_schema.tables
where table_schema = src_schema and table_name like mask
loop
execute 'alter table ' || quote_ident(r.table_schema)
|| '.' || quote_ident(r.table_name)
|| ' set schema ' || quote_ident(dest_schema);
end loop;
return;
end;
$$ language plpgsql;
55、使用CTE完成数的不同进制转换
postgres=# create function to_base(num int, base int = 10)
returns varchar as $$
with recursive z as (
select $1 as a, null::int as b, 0 as i
union all
select a/$2, a % $2, i+1
from z
where a > 0
)
select array_to_string(array(select substring('0123456789abcdef' from b + 1 for 1)
from z
where i > 0
order by i desc),'');
$$ language sql immutable;
postgres=# select to_base(255);
to_base
---------
255
(1 row)
postgres=# select to_base(255,16);
to_base
---------
ff
(1 row)
postgres=# select to_base(255,8);
to_base
---------
377
(1 row)
postgres=# select to_base(255,2);
to_base
----------
11111111
(1 row)
56、获取表定义中的缺省值
CREATE OR REPLACE FUNCTION eval(varchar)
RETURNS varchar AS $$
DECLARE result varchar;
BEGIN
EXECUTE 'SELECT ' || $1 INTO result;
RETURN result;
END;$$ LANGUAGE plpgsql STRICT;
CREATE OR REPLACE FUNCTION defaults(text,
OUT attname name, OUT type varchar, OUT default_val varchar)
RETURNS SETOF RECORD AS $$
SELECT a.attname,
pg_catalog.format_type(a.atttypid, a.atttypmod),
(SELECT eval(pg_catalog.pg_get_expr(d.adbin, d.adrelid))
FROM pg_catalog.pg_attrdef d
WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef)
FROM pg_catalog.pg_attribute a
WHERE a.attrelid = $1::regclass::oid AND a.attnum > 0 AND NOT a.attisdropped
ORDER BY a.attnum
$$ LANGUAGE SQL STRICT;
57、函数Function to_bin
create or replace function to_bin(int)
returns varchar as $$
declare r varchar;
begin
execute 'select ' || $1 || '::bit(' || case when $1 = 0 then 1 else trunc(log(2, $1)) + 1 end || ')' into r;
return r;
end;
$$ language plpgsql immutable strict;
create or replace function bin(varchar)
returns varchar as $$
declare r varchar;
begin
execute 'select b''' || $1 || '''::int' into r;
return r;
end;
$$ language plpgsql immutable strict;
postgres=# select to_bin(5), bin('101');
to_bin | bin
--------+-----
101 | 5
(1 row)
58、SQL code for hierarchical break-down of domains(牛)
postgres=# create or replace function domain_list(varchar[])
returns setof varchar as $$
select array_to_string($1,’.')
union all
select domain_list($1[2:array_upper($1,1)])
where array_upper($1,1) > 1
$$ language sql immutable strict;
CREATE FUNCTION
postgres=# select domain_list(array['a','b','c','d']);
domain_list
————-
a.b.c.d
b.c.d
c.d
d
(4 rows)
postgres=# create or replace function domain_list(varchar)
returns setof varchar as $$
select domain_list(string_to_array($1, ‘.’))
$$ language sql immutable strict;
CREATE FUNCTION
postgres=# select domain_list(’a.b.c.d’);
domain_list
————-
a.b.c.d
b.c.d
c.d
d
59、Solution for migration to 8.3 (problem with tsearch2) (略老)
PostgreSQL documentation has a section with some advice that should to help with migration of older application (based on TSearch2) to PostgreSQL 8.3 and higher (with integrated fulltext). This works well when you migrate databases individually, but it fails when you use pg_dump. As documented, you should do some pre steps - import TSearch compatible module into template1. But complete dump generates databases from template0 (you can't to change it). So you cannot load this dump, and you have to edit dump (advice - dump structure and data to two different files).
so, when you find bug:
ERROR: type "tsvector" already exists
You have to add import of tsearch2.sql to every creation database part in dump.
DROP DATABASE brigady;
CREATE DATABASE brigady WITH TEMPLATE = template0 ENCODING = 'UTF8';
--
-- PostgreSQL database dump complete
--
\connect brigady
-- new line
\i /usr/local/pgsql/share/contrib/tsearch2.sql
SET search_path = public, pg_catalog;
...
60、让函数更快
Much faster than PL/pgSQL - especially when they are wrappers around integrated features (in this example - integrated qsort). PL/pgSQL is usually fast to interpret - I know only about two slow operations - array update and string update. It's based on the internal architecture - every change generate a new object - it isn't significant for small objects (less than 1000), but for large objects this operations needs significant time. However, we can go a different way, using integrated functionality (this way is usually the fastest in any embedded language).
PostgreSQL use internal qsort -so, I wrote qsort in PL/pgSQL too (results will be comparable):
CREATE OR REPLACE FUNCTION quicksort(l integer, r integer, a int[])
RETURNS int[] AS $$
DECLARE akt int[] = a;
i integer := l; j integer := r; x integer = akt[(l+r) / 2];
w integer;
BEGIN
LOOP
WHILE akt[i] < x LOOP i := i + 1; END LOOP;
WHILE x < akt[j] loop j := j - 1; END LOOP;
IF i <= j THEN
w := akt[i];
akt[i] := akt[j]; akt[j] := w;
i := i + 1; j := j - 1;
END IF;
EXIT WHEN i > j;
END LOOP;
IF l < j THEN akt := quicksort(l,j,akt); END IF;
IF i < r then akt := quicksort(i,r,akt); END IF;
RETURN akt;
END;
$$ LANGUAGE plpgsql STRICT IMMUTABLE;
postgres=# SELECT array_upper(quicksort(1,10000,array_agg(a)),1) FROM test;
array_upper
-------------
10000
(1 row)
Time: 5918,531 ms
CREATE OR REPLACE FUNCTION sort(anyarray)
RETURNS anyarray AS $$
SELECT array(SELECT * FROM unnest($1) ORDER BY 1);
$$ language sql;
postgres=# SELECT array_upper(sort(array_agg(a)),1) FROM test;
array_upper
-------------
10000
(1 row)
Time: 35,980 ms
参考
[1] https://postgres.cz/wiki/PostgreSQL_SQL_Tricks:
https://postgres.cz/wiki/PostgreSQL_SQL_Tricks
[2] https://postgres.cz/wiki/PostgreSQL_SQL_Tricks_III:
https://postgres.cz/wiki/PostgreSQL_SQL_Tricks_III