solution 1:
select count(*) from (select distinct * from table where column='') as dis
2. Check redundant tuple
soluction 1 (self join)
SELECTp.place_id, p.href,s.place_id,s.href
FROM place p
INNER JOIN place s ON p.href=s.href
WHERE p.place_id &s s.place_id
ORDER BY p.place_id, s.place_id;
3. Materialized View (http://tech.jonathangardner.net/wiki/PostgreSQL/Materialized_Views)
a. snapshot (view definition is never changed)
1) create a table to store view info
CREATE TABLE matviews(
mv_name NAME NOT NULL PRIMARY KEY,
b_name NAME NOT NULL,
last_refresh TIMESTAMP WITH TIME ZONE
);
2)create a function to insert a row in Mat_VIEWS and create a materialized view
CREATE OR REPLACE FUNCTION create_matview(NAME,NAME)
RETURNS VOID
SECURITY DEFINER
LANGUAGE plpgsql AS '
DECLARE
matview ALIAS FOR $1;
view_name ALIAS FOR $2;
entry matviews%ROWTYPE;
BEGIN
SELECT * INTO entry FROM matviews WHERE mv_name=matview;
IF FOUND THEN
RAISE EXCEPTION ''Materialized view ''''%'''' already exists.'', matview;
END IF;
EXECUTE ''REVOKE ALL ON '' || view_name || ''FROM PUBLIC'';
EXECUTE "GRANT SELECT ON '' || view_name || '' TO PUBLIC'';
EXECUTE ''CREATE TABLE '' || matview|| AS SELECT * FROM ''||view_name;
EXECUTE ''REVOKE ALL ON '' || matview || ' FROM PUBLIC'';
EXECUTE ''GRANT SELECT ON '' || matview || '' TO PUBLIC'';
INSERT INTO matviews (mv_name, b_name,last_refresh)
VALUES(matview, view_name, CURRENT_TIMESTAMP);
RETURN;
END
';
3) create a function to drop a materialized view
CREATE OR REPLACE FUNCTION drop_matview(NAME) RETURN VOID
SECURITY DEFINER
LANGUAGE plpgsql AS '
DECLARE
matview ALIAS FOR $1;
entry matviews%ROWTYPE;
BEGIN
SELECT * INTO entry FROM matview WHERE mv_name=matview;
IF NOT FOUND THEN
RAISE EXCEPTION ''Materialized view % does not exist.'',matview;
END IF;
EXECUTE ''DROP TABLE'' || matview;
DELETE FROM matviews WHERE mv_name=matview;
RETURN;
END
';
4) create a function to refresh a materialized view
CREATE OR REPLACE FUNCTION refresh_matview(NAME) RETURNS VOID
SECURITY DEFINER
LANGUAGE plpgsql AS '
DECLARE
matview ALIAS $1;
entry matviews%ROWTYP;
BEGIN
SELECT * INTO entry FROM matviews WHERE mv_name=matview;
IF NOT FOUND THEN
RAISE EXCEPTION ''Materialized view % does not exist.'',matview;
END IF;
EXECUTE '' DELETE FROM '' || matview;
EXECUTE ''INSERT INTO ''||matview || '' SELECT * FROM '' || entry.b_name;
UPDATE matviews
SET last_refresh = CURRENT_TIMESTAMP
WHERE mv_name=matview;
RETURN;
END
';
b. eager (update whenever the base view is changed)
4. delete existing duplicate rows in a table
Before execute the following statement, make sure you have enough buffers for the session (refer to cheat 5 and 6).
solution1
DELETE FROM table USING table ta
WHERE table.key=ta.key AND table.id>ta.id;
solution2
DELETE FROM table
WHERE ctid NOT IN
(SELECT MAX(s.ctid) FROM table s GROUP BY s.column_has_be_distinct);
solution3
CREATE OR REPLACE FUNCTION remove_duplicates(text,text) RETURN VOID AS $$
DECLARE
tablename ALIAS FOR $1;
duplicate_column ALIAS FRO $2;
BEGIN
EXECUTE 'CREATE TEMPORARY TABLE TABLE_DISTINCT_' ||tablename||'
AS (SELECT DISTINCT ON ('||duplicate_column||') * FROM ' ||tablename||
ORDER BY '||duplicate_column||' ASC);';
EXECUTE 'DELETE FROM ' || tablename||';';
EXECUTE 'INSERT INTO ' || tablename||' (SELECT * FROM _DISTINCT_' || tablename||');';
EXECUTE 'DROP TABLE_DISTINCT_'|| tablename||';';
RETURN;
END;
$$LANGUAGE plpgsql;
solution4
CREATE TEMPORARY TABLE t_temp ON COMMIT DROP AS
SELECT DISTINCT ON ('column_name') * FROM table;
TRUNCATE table;
INSERT INTO table SELECT * FROM t_temp;
5. find out the size of table
solution:
SELECT pg_size_pretty(pg_relation_size('tablename'));
6. set temporary buffers
solution:
SET temp_buffers =' XXXMB';
7. delete table rows which some columns in that table (a) don't exist in the other table (b).
solution:
DELETE FROM a WHERE a.column_name NOT IN (SELECT column_name FROM b);
8. Backup
Solution1:
pg_dump
EX. login db at localhost as postgres and dump database poi2 into poi.backup.gz
pg_dump -u postgres -h localhost poi2 | gzip>poi.backup.gz
Solution2:
file system level backup
Ex.
tar -cf poi2.backup.tar /usr/local/pgsql/data
PS. The database server must be shutdown before doing file system level backup and restore.
Solutioin3:
Continuous archiving
EX.
a. enable wal in postgresql.conf
wal_level=archive
archive_mode=on
archive_command='test ! -f /mnt/server/archivedir/%f && cp %p /mnt/server/archivedir/%f'
b.SELECT pg_start_backup('label',ture);
c. perform file system level backup, like tar
d. SELECT pg_stop_backup();
touch /var/lib/pgsql/backup_in_progress
psql -c "select pg_start_backup('hot_backup');"
tar -cf /var/lib/pgsql/backup.tar /var/lib/pgsql/data/
psql -c "select pg_stop_backup();"
rm /var/lib/pgsql/backup_in_progress
tar -rf /var/lib/pgsql/backup.tar /var/lib/pgsql/archive/
pg_basebackup
EX.
a. enable local replicate permission for posgres @localhost
b. enable max_wal_senders=1
c. pg_basebackup -U postgres -D /backup/data -F tar -z -l label
9. PostGIS import shape files into one table
Solution 1:
shp2pgsql -s 915 -a[-c] -g geom shapfile.shp schema.table | psql -U username -W dbname
-s: srid, -g: geometry_column_name10. Test if geometry column is valid
Solution 1:
select ST_IsValid(geom) from table;
Solution 2:
select ST_IsValidDetail(geom) from table;
Solution 3:
select ST_IsEmpty(geom) from table;
11. Test the geometry type
Solution 1:
select ST_GeometryType(geom) from table;
12. Mapping points in table a to a polygon in a table b
Solution1:
select * from table1
where st_within(geom, polygon);
13. get a boundary from a collection of geometries
Solution 1:
select st_extent(geom) from table;
14. Performance tuning:
Install pgtune (apt-get update; apt-get install pgtune) -> pgtune -i postgresql.conf -o psql.conf
create dedicate index for query (considering the order of fields, and the join condition)
Use multi-fields index
the order of fields in multi-fields index (A, B, C can be used to find AB, but not BC)
Vacuum the tables after delete, update, insert, or create index.
Order of fields and order of joint have a big impact on performance.
Explicit casting: "where x=3" v.s. "where x=cast(3 as smallint)", "abs(dest) = cast(111 as smallint)" v.s. "dest =111 or dest=-111" have huge different.
set enable_seqscan=no
Avoid nested loop, merge and sort is faster.
Use temp table
Add sequence in query to monitor the progress. select nextval('seq_progress'); select currval('seq_progress');
Enable stats_start_collector and stats_row_level to get auto vacuum
Partial index: create index index_name on table1(column_name) where column_nam like 'America%'
Specify sort_memory, temp_buffers for each connections or query.
No comments:
Post a Comment