Re: count(*) performance improvement ideas

From: PFC <lists(at)peufeu(dot)com>
To: "Stephen Denne" <Stephen(dot)Denne(at)datamail(dot)co(dot)nz>, "Tom Lane" <tgl(at)sss(dot)pgh(dot)pa(dot)us>
Cc: "Pavan Deolasee" <pavan(dot)deolasee(at)gmail(dot)com>, "Mark Mielke" <mark(at)mark(dot)mielke(dot)cc>, PostgreSQL-development <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: count(*) performance improvement ideas
Date: 2008-04-17 00:30:44
Message-ID: op.t9qbpigrcigqcu@apollo13.peufeu.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers


>> The whole thing is a bit of an abuse of what the mechanism
>> was intended
>> for, and so I'm not sure we should rejigger GUC's behavior to make it
>> more pleasant, but on the other hand if we're not ready to provide a
>> better substitute ...
>
> In my experiments with materialized views, I identified these problems
> as "minor" difficulties. Resolving them would allow further abuse ;)

Let's try this quick & dirty implementation of a local count-delta cache
using a local in-memory hashtable (ie. {}).
Writing the results to stable storage in an ON COMMIT trigger is left as
an exercise to the reader ;)
Performance isn't that bad, calling the trigger takes about 50 us.
Oldskool implementation with a table is at the end, it's about 10x slower.

Example :

INSERT INTO victim1 (key) VALUES ('one'),('two'),('two');
INSERT 0 3
Temps : 1,320 ms
test=# SELECT * FROM get_count();
key | cnt
-----+-----
two | 2
one | 1

CREATE OR REPLACE FUNCTION clear_count( )
RETURNS VOID
AS $$
GD.clear()
$$ LANGUAGE plpythonu;

CREATE OR REPLACE FUNCTION update_count( key TEXT, delta INTEGER )
RETURNS INTEGER
AS $$
if key in GD:
GD[key] += delta
else:
GD[key] = delta
return GD[key]
$$ LANGUAGE plpythonu;

CREATE TYPE count_data AS ( key TEXT, cnt INTEGER );

CREATE OR REPLACE FUNCTION get_count( )
RETURNS SETOF count_data
AS $$
return GD.iteritems()
$$ LANGUAGE plpythonu;

CREATE TABLE victim( id SERIAL PRIMARY KEY, key TEXT NOT NULL );
INSERT INTO victim (key) SELECT (random() * 300)::INTEGER::TEXT FROM
generate_series( 1,100000 );

CREATE TABLE victim1( id SERIAL PRIMARY KEY, key TEXT NOT NULL );

\timing
INSERT INTO victim1 SELECT * FROM victim;
TRUNCATE TABLE victim1;

SELECT clear_count();
INSERT INTO victim1 SELECT * FROM victim RETURNING update_count( key, 1 );
SELECT * FROM get_count();
TRUNCATE TABLE victim1;

CREATE OR REPLACE FUNCTION counter_trigger_f()
RETURNS TRIGGER
LANGUAGE plpgsql
AS
$$
DECLARE
BEGIN
IF TG_OP = 'INSERT' THEN
PERFORM update_count( NEW.key, 1 );
RETURN NEW;
ELSEIF TG_OP = 'UPDATE' THEN
-- update topic
IF NEW.key != OLD.key THEN
PERFORM update_count( OLD.key, -1 ), update_count( NEW.key, 1
);
END IF;
RETURN NEW;
ELSE -- DELETE
PERFORM update_count( OLD.key, -1 );
RETURN OLD;
END IF;
END;
$$;

CREATE TRIGGER count_trigger BEFORE INSERT OR UPDATE OR DELETE ON victim1
FOR EACH ROW EXECUTE PROCEDURE counter_trigger_f();

SELECT clear_count();
INSERT INTO victim1 SELECT * FROM victim;
SELECT * FROM get_count();

SELECT clear_count();
TRUNCATE TABLE victim1;
INSERT INTO victim1 (key) VALUES ('one'),('two'),('two');
SELECT * FROM get_count();
DELETE FROM victim1 WHERE key='two';
SELECT * FROM get_count();
UPDATE victim1 SET key='three' WHERE key='one';
SELECT * FROM get_count();
DELETE FROM victim1;
SELECT * FROM get_count();

CREATE TABLE counts( key TEXT PRIMARY KEY, total INTEGER NOT NULL DEFAULT
0 );

CREATE OR REPLACE FUNCTION table_counter_trigger_f()
RETURNS TRIGGER
LANGUAGE plpgsql
AS
$$
DECLARE
BEGIN
IF TG_OP = 'INSERT' THEN
UPDATE counts SET total=total+1 WHERE key=NEW.key;
IF NOT FOUND THEN INSERT INTO counts (key,total) VALUES
(NEW.key,1); END IF;
RETURN NEW;
ELSEIF TG_OP = 'UPDATE' THEN
-- update topic
IF NEW.key != OLD.key THEN
UPDATE counts SET total=total-1 WHERE key=OLD.key;
UPDATE counts SET total=total+1 WHERE key=NEW.key;
IF NOT FOUND THEN INSERT INTO counts (key,total) VALUES
(NEW.key,1); END IF;
END IF;
RETURN NEW;
ELSE -- DELETE
UPDATE counts SET total=total-1 WHERE key=OLD.key;
RETURN OLD;
END IF;
END;
$$;

CREATE TABLE victim2( id SERIAL PRIMARY KEY, key TEXT NOT NULL );

CREATE TRIGGER table_count_trigger BEFORE INSERT OR UPDATE OR DELETE ON
victim2 FOR EACH ROW EXECUTE PROCEDURE table_counter_trigger_f();
SELECT * FROM counts;
TRUNCATE TABLE victim2;
INSERT INTO victim2 SELECT * FROM victim;

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Stephen Denne 2008-04-17 00:33:41 Re: count(*) performance improvement ideas
Previous Message Tom Lane 2008-04-17 00:20:59 Re: count(*) performance improvement ideas