Re: Performance die when COPYing to table with bigint PK

From: Robert Ayrapetyan <robert(dot)ayrapetyan(at)comodo(dot)com>
To: Kevin Grittner <Kevin(dot)Grittner(at)wicourts(dot)gov>
Cc: Vitalii Tymchyshyn <tivv00(at)gmail(dot)com>, pgsql-performance(at)postgresql(dot)org
Subject: Re: Performance die when COPYing to table with bigint PK
Date: 2011-08-03 14:39:16
Message-ID: CAAboi9t8i_+W=c-bWCeYsc4mwYyT7+mGQ5QymtO6MLcH96YdqA@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-performance

Hi.

Timings for your test:

foo=# create table bi (big bigint not null, medium int not null);
CREATE TABLE
foo=# insert into bi with x(n) as (select generate_series(1, 1000000))
foo-# select n + 5000000000, n from x;
INSERT 0 1000000
foo=# \timing on
Timing is on.
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 211.205 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 2789.607 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 206.712 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 2959.679 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 594.584 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 3651.206 ms
foo=# create unique index bi_medium on bi (medium);
CREATE INDEX
Time: 781.407 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 42.177 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 5671.883 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 139.418 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 5668.894 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 204.479 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 6530.010 ms
foo=# drop index bi_medium;
DROP INDEX
Time: 212.038 ms
foo=# create unique index bi_big on bi (big);
CREATE INDEX
Time: 650.492 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 39.818 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 8093.276 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 282.165 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 5988.694 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 245.859 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 5702.236 ms
foo=# \timing off
Timing is off.

Now please perform mine:

CREATE TABLESPACE tblsp_ix LOCATION '/foo';
CREATE SCHEMA test;
CREATE TABLE test.t
(
id_big bigint, --PRIMARY KEY USING INDEX TABLESPACE tblsp_ix,
ts timestamp NOT NULL,
ip inet,
id_medium integer NOT NULL,
id_small smallint NOT NULL,
id_smalll smallint NOT NULL
);
CREATE INDEX ix_t ON test.t
USING btree (ts, ip, id_medium, id_small) TABLESPACE tblsp_ix;

gen_data.csh
-------------cut here-----------------------------------------------------------
#!/bin/tcsh
set f = $1
set lines_cnt = $2
rm ${f}
set id_big = -2147483648
set time_t = 1000000000
set ts = `date -r ${time_t}`
set ip = "127.0.0.1"
set id_medium = -2147483648
set id_small = 0
set echo_style = both
while ( $lines_cnt > 0 )
echo "${id_big}\t${ts}\t${ip}\t${id_medium}\t${id_small}\t${id_small}"
>> ${f}
@ id_big = ${id_big} + 1
@ time_t = ${time_t} + 1
@ id_medium = ${id_medium} + 1
@ lines_cnt = ${lines_cnt} - 1
end
exit 0
-------------cut here-----------------------------------------------------------

time ./gen_data.csh app.data 100000
9.564u 2.487s 0:12.05 99.9% 420+1113k 0+51io 0pf+0w

copy_data.csh
-------------cut here-----------------------------------------------------------
#!/bin/tcsh
set f = $1
set cnt = $2
while ( $cnt > 0 )
time psql -d foo -c "COPY test.t(id_big, ts, ip, id_medium,
id_small, id_smalll) from '$f'"
@ cnt = ${cnt} - 1
end
exit 0
-------------cut here-----------------------------------------------------------

time copy_data.csh /aaa/app.data 100
...
0.000u 0.027s 0:01.55 1.2% 474+1254k 0+0io 0pf+0w
COPY 100000
...
(~1-3 sec for every of 100 iterations with 3-4 spikes to 5 secs max)

CREATE INDEX ix_t_big ON test.t USING btree (id_big) TABLESPACE tblsp_ix;

time copy_data.csh /aaa/app.data 100
(the show begins from iteration # ~20):
COPY 100000
0.000u 0.005s 0:20.70 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:06.50 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:03.44 0.8% 704+514k 0+0io 0pf+0w
COPY 100000
0.007u 0.029s 0:04.55 0.4% 808+1746k 0+0io 0pf+0w
COPY 100000
0.005u 0.000s 0:03.60 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.005u 0.000s 0:02.55 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:03.03 0.9% 469+197k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:03.85 0.7% 526+1393k 0+0io 0pf+0w
COPY 100000
0.005u 0.000s 0:06.66 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:02.73 1.0% 526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:11.85 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.022s 0:02.56 0.7% 492+1238k 0+0io 0pf+0w
COPY 100000
0.007u 0.022s 0:02.46 0.8% 650+1328k 0+0io 0pf+0w
COPY 100000
0.006u 0.031s 0:04.71 0.6% 692+525k 0+0io 0pf+0w
COPY 100000
0.000u 0.039s 0:29.10 0.1% 526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:36.29 0.0% 538+1164k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:43.77 0.0% 526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 1:01.94 0.0% 538+1164k 0+0io 0pf+0w
COPY 100000
0.007u 0.029s 0:13.99 0.1% 808+2074k 0+0io 0pf+0w
COPY 100000
0.003u 0.005s 0:46.02 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.031s 0:45.58 0.0% 316+836k 0+0io 0pf+0w
COPY 100000
0.000u 0.038s 1:00.39 0.0% 526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:24.38 0.1% 538+1164k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:41.32 0.0% 538+1382k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:46.13 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.005u 0.000s 0:43.15 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:45.59 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 1:54.92 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 2:22.47 0.0% 538+1382k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 1:40.65 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.006u 0.020s 1:43.52 0.0% 650+1328k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 1:43.33 0.0% 538+1164k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 1:47.00 0.0% 526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 2:18.94 0.0% 538+1164k 0+0io 0pf+0w

from that moment all iterations went for more then 1 min and I interrupted test.

DROP INDEX test.ix_t_big;

time copy_data.csh /aaa/app.data 100
COPY 100000
0.000u 0.005s 0:02.42 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.007u 0.029s 0:01.88 1.0% 808+2074k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:01.83 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:01.75 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:01.82 0.0% 0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:01.81 1.6% 526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:01.84 1.6% 538+1164k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:01.86 1.6% 421+1114k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:01.77 1.6% 538+1164k 0+0io 0pf+0w
...
Everything returned back to good perfomance state.

With number of rows > 50 mln all numbers in test with index on bigint column
are multiplied on 20, while without index even on 200 mln rows speed
remains constant (1-2 sec per 100k rows file).

P.S. tried same with 2 columns (bigint and int) - it didn't produced such effect
probably because data volume has critical effect.

On Tue, Aug 2, 2011 at 8:41 PM, Kevin Grittner
<Kevin(dot)Grittner(at)wicourts(dot)gov> wrote:
> Robert Ayrapetyan <robert(dot)ayrapetyan(at)comodo(dot)com> wrote:
>
>> So I'm still convinced - this bug relates to FreeBSD 64-bit + UFS
>> + bigint column index
>> (some of these may be superfluous, but I have no resources to
>> check on different platforms with different filesystems).
>
> Linux 64 bit XFS bigint column index only shows a slightly longer
> run time for bigint versus int here.  What timings do you get for
> the insert statements if you run the following in your environment?
>
> create table bi (big bigint not null, medium int not null);
> insert into bi with x(n) as (select generate_series(1, 1000000)
> select n + 5000000000, n from x;
> \timing on
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> create unique index bi_medium on bi (medium);
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> drop index bi_medium;
> create unique index bi_big on bi (big);
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> \timing off
> drop table bi;
>
> Here's what I get:
>
> Time: 1629.141 ms
> Time: 1638.060 ms
> Time: 1711.833 ms
>
> Time: 4151.953 ms
> Time: 4602.679 ms
> Time: 5107.259 ms
>
> Time: 4654.060 ms
> Time: 5158.157 ms
> Time: 5101.110 ms
>
> -Kevin
>

--
Ayrapetyan Robert,
Comodo Anti-Malware Data Processing Analysis and Management System (CAMDPAMS)
http://repo-qa.camdpams.odessa.office.comodo.net/mediawiki/index.php

In response to

Responses

Browse pgsql-performance by date

  From Date Subject
Next Message Robert Klemme 2011-08-03 16:15:18 Re: Performance penalty when using WITH
Previous Message Robert Haas 2011-08-03 13:29:16 Re: Re: GROUP BY with reasonable timings in PLAN but unreasonable execution time