Wednesday, May 2, 2012

PostgreSQL+PostGIS cheat sheet

1. Distinct Count
    solution 1:
    select count(*) from (select distinct * from table where column='') as dis

2. Check redundant tuple
    soluction 1 (self join)
       SELECTp.place_id, p.href,s.place_id,s.href
       FROM place p
       INNER JOIN place s ON p.href=s.href
      WHERE p.place_id &s s.place_id
      ORDER BY p.place_id, s.place_id;

3. Materialized View (http://tech.jonathangardner.net/wiki/PostgreSQL/Materialized_Views)
    a. snapshot (view definition is never changed)
        1) create a table to store view info
            CREATE TABLE matviews(
                      mv_name NAME NOT NULL PRIMARY KEY,
                      b_name NAME NOT NULL,
                      last_refresh TIMESTAMP WITH TIME ZONE
            );


        2)create a function to insert a row in Mat_VIEWS and create a materialized view
            CREATE OR REPLACE FUNCTION create_matview(NAME,NAME)
                   RETURNS VOID
                   SECURITY DEFINER
                   LANGUAGE plpgsql AS '
                         DECLARE
                                 matview ALIAS FOR $1;
                                 view_name ALIAS FOR $2;
                                 entry matviews%ROWTYPE;
                         BEGIN
                                 SELECT * INTO entry FROM matviews WHERE mv_name=matview;
                   
                                 IF FOUND THEN
                                           RAISE EXCEPTION ''Materialized view ''''%'''' already exists.'', matview;
                                 END IF;


                                 EXECUTE ''REVOKE ALL ON '' || view_name || ''FROM PUBLIC'';
                                 EXECUTE "GRANT SELECT ON '' || view_name || '' TO PUBLIC'';
                                 EXECUTE ''CREATE TABLE '' || matview|| AS SELECT * FROM ''||view_name;
                                 EXECUTE ''REVOKE ALL ON '' || matview || ' FROM PUBLIC'';
                                 EXECUTE ''GRANT SELECT ON '' || matview || '' TO PUBLIC'';


                                 INSERT INTO matviews (mv_name, b_name,last_refresh)
                                 VALUES(matview, view_name, CURRENT_TIMESTAMP);


                                 RETURN;
                         END
                  ';
         3) create a function to drop a materialized view
             CREATE OR REPLACE FUNCTION drop_matview(NAME) RETURN VOID
                   SECURITY DEFINER
                   LANGUAGE plpgsql AS '
                         DECLARE
                              matview ALIAS FOR $1;
                              entry matviews%ROWTYPE;
                         BEGIN
                              SELECT * INTO entry FROM matview WHERE mv_name=matview;
                              IF NOT FOUND THEN
                                        RAISE EXCEPTION ''Materialized view % does not exist.'',matview;
                              END IF;
                              EXECUTE ''DROP TABLE'' || matview;
                              DELETE FROM matviews WHERE mv_name=matview;
                              RETURN;
                         END
                  ';


          4) create a function to refresh a materialized view
              CREATE OR REPLACE FUNCTION refresh_matview(NAME) RETURNS VOID
                    SECURITY DEFINER
                    LANGUAGE plpgsql AS '
                         DECLARE
                               matview ALIAS $1;
                               entry matviews%ROWTYP;
                         BEGIN
                               SELECT * INTO entry FROM matviews WHERE mv_name=matview;
                               IF NOT FOUND THEN
                                         RAISE EXCEPTION ''Materialized view % does not exist.'',matview;
                               END IF;
                               EXECUTE '' DELETE FROM '' || matview;
                               EXECUTE ''INSERT INTO ''||matview || '' SELECT * FROM '' || entry.b_name;
                               UPDATE matviews
                                        SET last_refresh = CURRENT_TIMESTAMP
                                        WHERE mv_name=matview;
                               RETURN;
                         END


                   ';


   b. eager (update whenever the base view is changed)
4. delete existing duplicate rows in a table

    Before execute the following statement, make sure you have enough buffers for the session (refer to cheat 5 and 6).
 
    solution1
    DELETE FROM table USING table ta
    WHERE table.key=ta.key AND table.id>ta.id;


    solution2
    DELETE FROM table
    WHERE ctid NOT IN
    (SELECT MAX(s.ctid) FROM table s GROUP BY s.column_has_be_distinct);


    solution3
    CREATE OR REPLACE FUNCTION remove_duplicates(text,text) RETURN VOID AS $$
         DECLARE
                tablename ALIAS FOR $1;
                duplicate_column ALIAS FRO $2;
         BEGIN
                EXECUTE 'CREATE TEMPORARY TABLE TABLE_DISTINCT_' ||tablename||'
                           AS (SELECT DISTINCT ON ('||duplicate_column||') * FROM ' ||tablename||
                                  ORDER BY '||duplicate_column||' ASC);';
               EXECUTE 'DELETE FROM ' || tablename||';';
               EXECUTE 'INSERT INTO ' || tablename||' (SELECT * FROM _DISTINCT_' || tablename||');';
               EXECUTE 'DROP TABLE_DISTINCT_'|| tablename||';';
               RETURN;
         END;
   $$LANGUAGE plpgsql;


   solution4
   CREATE TEMPORARY TABLE t_temp ON COMMIT DROP AS
   SELECT DISTINCT ON ('column_name') * FROM table;
   TRUNCATE table;
   INSERT INTO table SELECT * FROM t_temp;


5. find out the size of table
    solution:
           SELECT pg_size_pretty(pg_relation_size('tablename'));

6. set temporary buffers
    solution:
          SET temp_buffers =' XXXMB';

7. delete table rows which some columns in that table (a) don't exist in the other table (b).
    solution:
          DELETE FROM a WHERE a.column_name NOT IN (SELECT column_name FROM b);

8. Backup
   Solution1:
          pg_dump
          EX.  login db at localhost as postgres and dump database poi2 into poi.backup.gz
                  pg_dump -u postgres -h localhost poi2 | gzip>poi.backup.gz


   Solution2:
         file system level backup
         Ex. 
                tar -cf poi2.backup.tar /usr/local/pgsql/data
         PS.  The database server must be shutdown before doing file system level backup and restore.


   Solutioin3:
        Continuous archiving
        EX. 
               a. enable wal in postgresql.conf
                   wal_level=archive
                   archive_mode=on
                   archive_command='test ! -f /mnt/server/archivedir/%f && cp %p /mnt/server/archivedir/%f'
               b.SELECT pg_start_backup('label',ture);
               c. perform file system level backup, like tar
               d. SELECT pg_stop_backup();
touch /var/lib/pgsql/backup_in_progress
psql -c "select pg_start_backup('hot_backup');"
tar -cf /var/lib/pgsql/backup.tar /var/lib/pgsql/data/
psql -c "select pg_stop_backup();"
rm /var/lib/pgsql/backup_in_progress
tar -rf /var/lib/pgsql/backup.tar /var/lib/pgsql/archive/

   Solution4: 
        pg_basebackup
        EX.
               a. enable local replicate permission for posgres @localhost
               b. enable max_wal_senders=1
               c. pg_basebackup -U postgres -D /backup/data -F tar -z -l label


9. PostGIS import shape files into one table
   Solution 1:
     shp2pgsql -s 915 -a[-c] -g geom shapfile.shp schema.table | psql -U username -W dbname
     -s: srid, -g: geometry_column_name


10. Test if geometry column is valid
  Solution 1:
      select ST_IsValid(geom) from table;
  Solution 2:
      select ST_IsValidDetail(geom) from table;
  Solution 3:
      select ST_IsEmpty(geom) from table;
11. Test the geometry type
  Solution 1:
      select ST_GeometryType(geom) from table;


12. Mapping points in table a to a polygon in a table b
   Solution1:
       select * from table1
       where st_within(geom, polygon);


13. get a boundary from a collection of geometries
   Solution 1:
       select st_extent(geom) from table;


14. Performance tuning:
      Install pgtune (apt-get update; apt-get install pgtune)  -> pgtune -i postgresql.conf -o psql.conf


      create dedicate index for query (considering the order of fields, and the join condition)
      Use multi-fields index
      the order of fields in multi-fields index (A, B, C can be used to find AB, but not BC)
      Vacuum the tables after delete, update, insert, or create index.
      Order of fields and order of joint have a big impact on performance.
      Explicit casting: "where x=3"   v.s. "where x=cast(3 as smallint)",  "abs(dest) = cast(111 as smallint)" v.s. "dest =111 or dest=-111"  have huge different.
      set enable_seqscan=no
      Avoid nested loop, merge and sort is faster.
      Use temp table
      Add sequence in query to monitor the progress.  select nextval('seq_progress');   select currval('seq_progress');
      Enable stats_start_collector and stats_row_level to get auto vacuum
      Partial index:  create index index_name on table1(column_name) where column_nam like 'America%'
      Specify sort_memory, temp_buffers for each connections or query.






      
      



No comments:

Post a Comment