Re: Postgresql 7.3.3 crashing on query

Lists: pgsql-bugs
From: Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com>
To: pgsql-bugs(at)postgresql(dot)org
Subject: Postgresql 7.3.3 crashing on query
Date: 2003-07-28 09:05:13
Message-ID: 200307281105.13456.philipp.reisner@linbit.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-bugs

HI,

Executing a crashes postgresql.

If I execute this query several times (I receive the correct result set),
and then do an explain of the same query, the backend terminates with
sig 11 !!!

Sometimes it crashes by simply executing the query (without explain).

I looks a lot like if the crash has something to do with planing/optimizing.
In case the planer survives the executor can deliver the result set.

I was also able to reproduce this crash on 7.3.2. 7.2.1 does not crash on
this query.

select doc.objid, doc.id, bpa.shortname, dot.shortname, doc.documentdate,
doc.documentid, loc_from.shortname, loc_to.shortname, doc.edittimeutc,
doc.tzo_objid, edi.lastname, doc.cal_id, tzo.tzid
from timezones tzo, documenttypes dot, bpartners bpa, documentheaders as doc
left outer join locations loc_from on loc_from.objid = doc.from_objid
left outer join locations loc_to on loc_to.objid = doc.to_objid
left outer join bppersons edi on edi.objid = doc.edi_objid
where tzo.objid = doc.tzo_objid
and dot.objid = doc.dot_objid
and bpa.objid = doc.bpa_objid
and bpa.objid in (187756)
and doc.documentdate >= '2003-07-25 00:00:00.000000000+00'
and doc.documentdate <= '2003-07-25 23:59:59.009000000+00'
order by 9 DESC

Jul 28 08:34:39 localhost postgres[20301]: [19-1] LOG: query: explain select
doc.objid, doc.id, bpa.shortname, dot.shortname, doc.documentdate,
Jul 28 08:34:39 localhost postgres[20301]: [19-2] doc.documentid,
loc_from.shortname, loc_to.shortname, doc.edittimeutc,
Jul 28 08:34:39 localhost postgres[20301]: [19-3] doc.tzo_objid,
edi.lastname, do
c.cal_id, tzo.tzid
Jul 28 08:34:39 localhost postgres[20301]: [19-4] from timezones tzo,
documenttypes dot, bpartners bpa, documentheaders as doc
Jul 28 08:34:39 localhost postgres[20301]: [19-5] left outer join
locations loc_from on loc_from.objid = doc.from_objid
Jul 28 08:34:39 localhost postgres[20301]: [19-6] left outer join
locations loc_to on loc_to.objid = doc.to_objid
Jul 28 08:34:39 localhost postgres[20301]: [19-7] left outer join
bppersons edi on edi.objid = doc.edi_objid
Jul 28 08:34:39 localhost postgres[20301]: [19-8] where tzo.objid =
doc.tzo_objid
Jul 28 08:34:39 localhost postgres[20301]: [19-9] and dot.objid =
doc.dot_objid
Jul 28 08:34:39 localhost postgres[20301]: [19-10] and bpa.objid =
doc.bpa_objid
Jul 28 08:34:39 localhost postgres[20301]: [19-11] and bpa.objid in
(187756)
Jul 28 08:34:39 localhost postgres[20301]: [19-12] and doc.documentdate
>= '2003-07-25 00:00:00.000000000+00'
Jul 28 08:34:39 localhost postgres[20301]: [19-13] and doc.documentdate
<= '2003-07-25 23:59:59.009000000+00'
Jul 28 08:34:39 localhost postgres[20301]: [19-14] order by 9 DESC;
Jul 28 08:34:39 localhost postgres[11886]: [7] LOG: server process (pid
20301) was terminated by signal 11
Jul 28 08:34:39 localhost postgres[11886]: [8] LOG: terminating any other
active server processes
Jul 28 08:34:39 localhost postgres[20302]: [9] LOG: connection received:
host=[local]
Jul 28 08:34:39 localhost postgres[20302]: [10] FATAL: The database system is
in recovery mode
Jul 28 08:34:39 localhost postgres[19663]: [13-1] WARNING: Message from
PostgreSQL backend:
Jul 28 08:34:39 localhost postgres[19663]: [13-2] ^IThe Postmaster has
informed me that some other backend
Jul 28 08:34:39 localhost postgres[19663]: [13-3] ^Idied abnormally and
possibly corrupted shared memory.
Jul 28 08:34:39 localhost postgres[19663]: [13-4] ^II have rolled back the
current transaction and am
Jul 28 08:34:39 localhost postgres[19663]: [13-5] ^Igoing to terminate your
database system connection and exit.
Jul 28 08:34:39 localhost postgres[19663]: [13-6] ^IPlease reconnect to the
database system and repeat your query.
Jul 28 08:34:39 localhost postgres[19665]: [13-1] WARNING: Message from
PostgreSQL backend:
Jul 28 08:34:39 localhost postgres[19665]: [13-2] ^IThe Postmaster has
informed me that some other backend
Jul 28 08:34:39 localhost postgres[19665]: [13-3] ^Idied abnormally and
possibly corrupted shared memory.
Jul 28 08:34:39 localhost postgres[19665]: [13-4] ^II have rolled back the
current transaction and am
Jul 28 08:34:39 localhost postgres[19665]: [13-5] ^Igoing to terminate your
database system connection and exit.
Jul 28 08:34:39 localhost postgres[19665]: [13-6] ^IPlease reconnect to the
database system and repeat your query.
Jul 28 08:34:39 localhost postgres[19667]: [13-1] WARNING: Message from
PostgreSQL backend:
Jul 28 08:34:39 localhost postgres[19667]: [13-2] ^IThe Postmaster has
informed me that some other backend
Jul 28 08:34:39 localhost postgres[19667]: [13-3] ^Idied abnormally and
possibly corrupted shared memory.
Jul 28 08:34:39 localhost postgres[19667]: [13-4] ^II have rolled back the
current transaction and am
Jul 28 08:34:39 localhost postgres[19667]: [13-5] ^Igoing to terminate your
database system connection and exit.
Jul 28 08:34:39 localhost postgres[19667]: [13-6] ^IPlease reconnect to the
database system and repeat your query.
Jul 28 08:34:39 localhost postgres[19664]: [13-1] WARNING: Message from
PostgreSQL backend:
Jul 28 08:34:39 localhost postgres[19664]: [13-2] ^IThe Postmaster has
informed me that some other backend
Jul 28 08:34:39 localhost postgres[19664]: [13-3] ^Idied abnormally and
possibly corrupted shared memory.
Jul 28 08:34:39 localhost postgres[19664]: [13-4] ^II have rolled back the
current transaction and am
Jul 28 08:34:39 localhost postgres[19664]: [13-5] ^Igoing to terminate your
database system connection and exit.
Jul 28 08:34:39 localhost postgres[19664]: [13-6] ^IPlease reconnect to the
database system and repeat your query.
Jul 28 08:34:39 localhost postgres[11886]: [9] LOG: all server processes
terminated; reinitializing shared memory and semaphores
Jul 28 08:34:39 localhost postgres[20303]: [10] LOG: database system was
interrupted at 2003-07-28 05:06:45 UTC
Jul 28 08:34:39 localhost postgres[20303]: [11] LOG: checkpoint record is at
0/FBA26428
Jul 28 08:34:39 localhost postgres[20303]: [12] LOG: redo record is at
0/FBA26428; undo record is at 0/0; shutdown FALSE
Jul 28 08:34:39 localhost postgres[20303]: [13] LOG: next transaction id:
55952; next oid: 6520640
Jul 28 08:34:39 localhost postgres[20303]: [14] LOG: database system was not
properly shut down; automatic recovery in progress
Jul 28 08:34:39 localhost postgres[20303]: [15] LOG: ReadRecord: record with
zero length at 0/FBA26468
Jul 28 08:34:39 localhost postgres[20303]: [16] LOG: redo is not required
Jul 28 08:34:41 localhost postgres[20303]: [17] LOG: database system is ready

-------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=272.62..272.62 rows=1 width=156)
Sort Key: doc.edittimeutc
-> Hash Join (cost=271.39..272.61 rows=1 width=156)
Hash Cond: ("outer".bpa_objid = "inner".objid)
-> Nested Loop (cost=266.52..267.73 rows=1 width=143)
Join Filter: ("inner".objid = "outer".dot_objid)
-> Merge Join (cost=266.52..266.57 rows=1 width=128)
Merge Cond: ("outer".objid = "inner".tzo_objid)
-> Index Scan using timezones_pkey on timezones tzo
(cost=0.00..9.72 rows=327 width=21)
-> Sort (cost=266.52..266.53 rows=1 width=107)
Sort Key: doc.tzo_objid
-> Nested Loop (cost=0.00..266.51 rows=1
width=107)
-> Nested Loop (cost=0.00..260.51 rows=1
width=90)
-> Nested Loop (cost=0.00..254.71
rows=1 width=76)
-> Seq Scan on documentheaders
doc (cost=0.00..248.91 rows=1 width=62)
Filter: ((documentdate >=
'2003-07-25'::date) AND (documentdate <= '2003-07-25'::date))
-> Index Scan using
locations_pkey on locations loc_from (cost=0.00..5.78 rows=1 width=14)
Index Cond: (loc_from.objid
= "outer".from_objid)
-> Index Scan using locations_pkey on
locations loc_to (cost=0.00..5.78 rows=1 width=14)
Index Cond: (loc_to.objid =
"outer".to_objid)
-> Index Scan using bppersons_pkey on
bppersons edi (cost=0.00..5.99 rows=1 width=17)
Index Cond: (edi.objid =
"outer".edi_objid)
-> Seq Scan on documenttypes dot (cost=0.00..1.07 rows=7
width=15)
-> Hash (cost=4.87..4.87 rows=1 width=13)
-> Index Scan using bpartners_pkey on bpartners bpa
(cost=0.00..4.87 rows=1 width=13)
Index Cond: (objid = 187756)
(26 rows)

-- Name: timezones; Type: TABLE; Schema: public; Owner: sd
--

CREATE TABLE timezones (
objid integer NOT NULL,
id integer NOT NULL,
shortname character varying(12) NOT NULL,
name character varying(100) NOT NULL,
tzid character varying(50) NOT NULL,
utcrawoffset integer NOT NULL
);

-- Name: documenttypes; Type: TABLE; Schema: public; Owner: sd
--

CREATE TABLE documenttypes (
objid integer NOT NULL,
id integer NOT NULL,
shortname character varying(12) NOT NULL,
name character varying(50),
description character varying(4000) NOT NULL,
hasfromlocation character varying(1) NOT NULL,
hastolocation character varying(1) NOT NULL,
hasquantity character varying(1) NOT NULL,
hasreservedquantity character varying(1) NOT NULL,
cancreate character varying(1) NOT NULL,
mustcalcpurchaseprice character varying(1) NOT NULL,
allownegativequantity character varying(1) NOT NULL
);

-- Name: bpartners_tmp; Type: TABLE; Schema: public; Owner: sd
--

CREATE TABLE bpartners_tmp (
objid integer,
id integer,
shortname character varying(12),
name character varying(50),
street character varying(50),
city character varying(50),
zip character varying(50),
tel character varying(50),
fax character varying(50),
email character varying(50),
"location" character varying(50),
memo character varying(2000),
sdcallflag character varying(1),
sdcallpingflag character varying(1),
sdcallmobileflag character varying(1),
sdinventoryflag character varying(1),
sdreportflag character varying(1),
sdbenchmarkflag character varying(1),
cou_objid integer,
tzo_objid integer,
logofilename character varying(50),
additionalmenulinks character varying(2000),
showonlyallowedmenuentries character varying(1),
duns character varying(50),
accountcode character varying(50),
bpa_objid integer
);

-- Name: documentheaders_tmp; Type: TABLE; Schema: public; Owner: sd
--

CREATE TABLE documentheaders_tmp (
objid integer,
id integer,
dot_objid integer,
documentdate date,
edittime timestamp with time zone,
edittimeutc timestamp with time zone,
tzo_objid integer,
edi_objid integer,
documentid character varying(50),
cal_id integer,
from_objid integer,
to_objid integer
);

-- Name: locations; Type: TABLE; Schema: public; Owner: sd
--

CREATE TABLE locations (
objid integer NOT NULL,
id integer NOT NULL,
shortname character varying(12) NOT NULL,
name character varying(50),
zip character varying(50),
city character varying(50),
street character varying(50),
cou_objid integer,
tel character varying(50),
fax character varying(50),
modemtelnr character varying(50),
description character varying(2000),
sco_objid integer,
contactfirstname character varying(50),
contactlastname character varying(50),
contactsalutation character varying(50),
contacttitle character varying(50),
moopenfromam time without time zone,
moopentoam time without time zone,
moopenfrompm time without time zone,
moopentopm time without time zone,
tuopenfromam time without time zone,
tuopentoam time without time zone,
tuopenfrompm time without time zone,
tuopentopm time without time zone,
weopenfromam time without time zone,
weopentoam time without time zone,
weopenfrompm time without time zone,
weopentopm time without time zone,
thopenfromam time without time zone,
thopentoam time without time zone,
thopenfrompm time without time zone,
thopentopm time without time zone,
fropenfromam time without time zone,
fropentoam time without time zone,
fropenfrompm time without time zone,
fropentopm time without time zone,
saopenfromam time without time zone,
saopentoam time without time zone,
saopenfrompm time without time zone,
saopentopm time without time zone,
suopenfromam time without time zone,
suopentoam time without time zone,
suopenfrompm time without time zone,
suopentopm time without time zone,
edittime timestamp with time zone NOT NULL,
edittimeutc timestamp with time zone NOT NULL,
tzo_objid integer NOT NULL,
edi_objid integer NOT NULL,
isactiv character varying(1) NOT NULL,
locationtype character varying(50)
);

-- Name: bppersons_tmp; Type: TABLE; Schema: public; Owner: sd
--

CREATE TABLE bppersons_tmp (
objid integer,
id integer,
shortname character varying(12),
firstname character varying(50),
lastname character varying(50),
title character varying(20),
salutation character varying(50),
street character varying(50),
city character varying(50),
zip character varying(50),
cou_objid integer,
tel character varying(50),
tel2 character varying(50),
mobiletel character varying(50),
fax character varying(50),
"password" character varying(50),
email character varying(50),
pgr_objid integer,
isactiv character varying(1),
"location" character varying(50),
loc_objid integer,
tzo_objid integer,
pin character varying(50),
description character varying(4000),
department character varying(50),
sign character varying(50)
);

-Philipp
--
: Dipl-Ing Philipp Reisner Tel +43-1-8178292-50 :
: LINBIT Information Technologies GmbH Fax +43-1-8178292-82 :
: Schönbrunnerstr 244, 1120 Vienna, Austria http://www.linbit.com :


From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com>
Cc: pgsql-bugs(at)postgresql(dot)org
Subject: Re: Postgresql 7.3.3 crashing on query
Date: 2003-07-28 13:53:22
Message-ID: 8900.1059400402@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-bugs

Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com> writes:
> If I execute this query several times (I receive the correct result set),
> and then do an explain of the same query, the backend terminates with
> sig 11 !!!

Can you get us a debugger stack trace from the crash?

Also you should specify the platform you're using in reports of this sort.

regards, tom lane


From: Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com>
To: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
Cc: pgsql-bugs(at)postgresql(dot)org
Subject: Re: Postgresql 7.3.3 crashing on query
Date: 2003-07-30 07:32:30
Message-ID: 200307300932.30470.philipp.reisner@linbit.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-bugs

Am Montag, 28. Juli 2003 15:53 schrieb Tom Lane:
> Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com> writes:
> > If I execute this query several times (I receive the correct result set),
> > and then do an explain of the same query, the backend terminates with
> > sig 11 !!!
>
> Can you get us a debugger stack trace from the crash?
>

Ok, I attached GDB to the backend.
Attaching to program: /usr/lib/postgresql/bin/postgres, process 25380
Reading symbols from /lib/libpam.so.0...(no debugging symbols found)...done.
Loaded symbols for /lib/libpam.so.0
Reading symbols from /usr/lib/libssl.so.0.9.6...(no debugging symbols
found)...done.
Loaded symbols for /usr/lib/libssl.so.0.9.6
Reading symbols from /usr/lib/libcrypto.so.0.9.6...(no debugging symbols
found)...done.
Loaded symbols for /usr/lib/libcrypto.so.0.9.6
Reading symbols from /usr/lib/libkrb5.so.17...(no debugging symbols
found)...done.
Loaded symbols for /usr/lib/libkrb5.so.17
Reading symbols from /usr/lib/libz.so.1...(no debugging symbols found)...done.
Loaded symbols for /usr/lib/libz.so.1
Reading symbols from /lib/libreadline.so.4...(no debugging symbols
found)...done.
Loaded symbols for /lib/libreadline.so.4
Reading symbols from /lib/libcrypt.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib/libcrypt.so.1
Reading symbols from /lib/libresolv.so.2...(no debugging symbols
found)...done.
Loaded symbols for /lib/libresolv.so.2
Reading symbols from /lib/libnsl.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib/libnsl.so.1
Reading symbols from /lib/libdl.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib/libdl.so.2
Reading symbols from /lib/libm.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib/libm.so.6
Reading symbols from /lib/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib/libc.so.6
Reading symbols from /usr/lib/libcom_err.so.1...(no debugging symbols
found)...done.
Loaded symbols for /usr/lib/libcom_err.so.1
Reading symbols from /usr/lib/libasn1.so.5...(no debugging symbols
found)...done.
Loaded symbols for /usr/lib/libasn1.so.5
Reading symbols from /usr/lib/libroken.so.9...(no debugging symbols
found)...done.
Loaded symbols for /usr/lib/libroken.so.9
Reading symbols from /lib/libncurses.so.5...(no debugging symbols
found)...done.
Loaded symbols for /lib/libncurses.so.5
Reading symbols from /lib/ld-linux.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib/ld-linux.so.2
Reading symbols from /usr/lib/libdb3.so.3...(no debugging symbols
found)...done.
Loaded symbols for /usr/lib/libdb3.so.3
Reading symbols from /lib/libnss_compat.so.2...(no debugging symbols
found)...done.
Loaded symbols for /lib/libnss_compat.so.2
0x402be812 in recv () from /lib/libc.so.6
(gdb) c
Continuing.
Program received signal SIGSEGV, Segmentation fault.
0x0812f9bc in DecodeDateTime ()
(gdb) where
#0 0x0812f9bc in DecodeDateTime ()
Cannot access memory at address 0xbf003030

Looks a lot like a corrupted stack :(

> Also you should specify the platform you're using in reports of this sort.
>

Its a PIII 733MHz 256MB RAM, running Debian Linux 3.0 (Woody) an a 2.4.18
kernel.

BTW, it crashes are getting rarer on the system...

-Philipp
--
: Dipl-Ing Philipp Reisner Tel +43-1-8178292-50 :
: LINBIT Information Technologies GmbH Fax +43-1-8178292-82 :
: Schönbrunnerstr 244, 1120 Vienna, Austria http://www.linbit.com :


From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com>
Cc: pgsql-bugs(at)postgresql(dot)org
Subject: Re: Postgresql 7.3.3 crashing on query
Date: 2003-07-30 14:00:20
Message-ID: 9286.1059573620@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-bugs

Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com> writes:
> Program received signal SIGSEGV, Segmentation fault.
> 0x0812f9bc in DecodeDateTime ()
> (gdb) where
> #0 0x0812f9bc in DecodeDateTime ()
> Cannot access memory at address 0xbf003030

That's a fairly large routine :-(. Could I trouble you to rebuild with
debugging symbols (configure --enable-debug) so we can get a more exact
fix on the problem location? While you're at it, please add
--enable-cassert too, just in case that produces any useful info.

Alternatively: maybe you can now create a simple test case. This is
enough information to let us guess that the crash occurs while trying
to read an unusual date or timestamp input value. Do you know what's
getting fed to the database? If you can exhibit a crash with a test
case like "SELECT 'something odd'::timestamp", we could take it from
there.

regards, tom lane


From: Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com>
To: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
Cc: pgsql-bugs(at)postgresql(dot)org
Subject: Re: Postgresql 7.3.3 crashing on query
Date: 2003-07-31 08:09:50
Message-ID: 200307311009.50118.philipp.reisner@linbit.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-bugs

Am Mittwoch, 30. Juli 2003 16:00 schrieb Tom Lane:
> Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com> writes:
> > Program received signal SIGSEGV, Segmentation fault.
> > 0x0812f9bc in DecodeDateTime ()
> > (gdb) where
> > #0 0x0812f9bc in DecodeDateTime ()
> > Cannot access memory at address 0xbf003030
>
> That's a fairly large routine :-(. Could I trouble you to rebuild with
> debugging symbols (configure --enable-debug) so we can get a more exact
> fix on the problem location? While you're at it, please add
> --enable-cassert too, just in case that produces any useful info.
>
> Alternatively: maybe you can now create a simple test case. This is
> enough information to let us guess that the crash occurs while trying
> to read an unusual date or timestamp input value. Do you know what's
> getting fed to the database? If you can exhibit a crash with a test
> case like "SELECT 'something odd'::timestamp", we could take it from
> there.
>
> regards, tom lane

Hi Tom,

(gdb) c
Continuing.

Program received signal SIGSEGV, Segmentation fault.
DecodeDateTime (field=Cannot access memory at address 0x303038
) at datetime.c:1404
1404 datetime.c: No such file or directory.
in datetime.c
(gdb) where
#0 DecodeDateTime (field=Cannot access memory at address 0x303038
) at datetime.c:1404
Cannot access memory at address 0x303030

Looks a lot like an access to freed memory to me. 0x303030 looks like
a poison value.

The way I reproduced the bug:

1) I stared up the server compiled with debug
2) It was not possible to crash the server with the in the first
interactive psql session. -- I executed the query several times.
3) I exited the first session and started a new one.
4) Executing the same query again crashed the backend immediately!

BTW, this pattern is reproducable. It never happens in the first
session. But it immediately happens in the second session.

I was not able to find simple SELECT statement to reproduce
the bug, neither selecting some fancy timestamp value nor using
some fancy timestamp value in the where clause did the trick.

-Philipp
--
: Dipl-Ing Philipp Reisner Tel +43-1-8178292-50 :
: LINBIT Information Technologies GmbH Fax +43-1-8178292-82 :
: Schönbrunnerstr 244, 1120 Vienna, Austria http://www.linbit.com :


From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com>
Cc: pgsql-bugs(at)postgresql(dot)org
Subject: Re: Postgresql 7.3.3 crashing on query
Date: 2003-07-31 13:53:26
Message-ID: 1087.1059659606@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-bugs

Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com> writes:
> Program received signal SIGSEGV, Segmentation fault.
> DecodeDateTime (field=Cannot access memory at address 0x303038
> ) at datetime.c:1404
> 1404 datetime.c: No such file or directory.
> in datetime.c
> (gdb) where
> #0 DecodeDateTime (field=Cannot access memory at address 0x303038
> ) at datetime.c:1404
> Cannot access memory at address 0x303030

> Looks a lot like an access to freed memory to me. 0x303030 looks like
> a poison value.

No, it looks like ASCII text. Something has clobbered the stack with
a text string. Unfortunately, that probably means we can't trust the
claimed execution address --- DecodeDateTime may just happen to live at
address 0x3030 or something like that in your executable.

Did you try a backtrace (bt)? The odds are it will fail, or give junk
results, but you should try it.

> I was not able to find simple SELECT statement to reproduce
> the bug, neither selecting some fancy timestamp value nor using
> some fancy timestamp value in the where clause did the trick.

If the pointer to DecodeDateTime is a red herring then that's not too
surprising. Unfortunately, that conclusion leaves me with zero to go
on :-(. I think you'll need to work on setting up a self-contained test
case. You should try pg_dumping the tables involved, loading them into
a fresh database, and then seeing if the bug can be reproduced there.

regards, tom lane


From: Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com>
To: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
Cc: pgsql-bugs(at)postgresql(dot)org
Subject: Re: Postgresql 7.3.3 crashing on query
Date: 2003-08-05 12:27:54
Message-ID: 200308051427.54759.philipp.reisner@linbit.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-bugs

Am Donnerstag, 31. Juli 2003 15:53 schrieb Tom Lane:
> Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com> writes:
> > Program received signal SIGSEGV, Segmentation fault.
> > DecodeDateTime (field=Cannot access memory at address 0x303038
> > ) at datetime.c:1404
> > 1404 datetime.c: No such file or directory.
> > in datetime.c
> > (gdb) where
> > #0 DecodeDateTime (field=Cannot access memory at address 0x303038
> > ) at datetime.c:1404
> > Cannot access memory at address 0x303030
> >
> > Looks a lot like an access to freed memory to me. 0x303030 looks like
> > a poison value.
>
> No, it looks like ASCII text. Something has clobbered the stack with
> a text string. Unfortunately, that probably means we can't trust the
> claimed execution address --- DecodeDateTime may just happen to live at
> address 0x3030 or something like that in your executable.
>
> Did you try a backtrace (bt)? The odds are it will fail, or give junk
> results, but you should try it.
>
> > I was not able to find simple SELECT statement to reproduce
> > the bug, neither selecting some fancy timestamp value nor using
> > some fancy timestamp value in the where clause did the trick.
>
> If the pointer to DecodeDateTime is a red herring then that's not too
> surprising. Unfortunately, that conclusion leaves me with zero to go
> on :-(. I think you'll need to work on setting up a self-contained test
> case. You should try pg_dumping the tables involved, loading them into
> a fresh database, and then seeing if the bug can be reproduced there.
>
> regards, tom lane

Hi Tom,

We (Clifford Wolf and me) have found the bug!

the strncpy call results in fstr beeing not 0 terminated. In case
you are lucky (or not) there were some zero bytes there...

This was the case when the query worked for me. But after closing
the first interactive psql session and opening the second one
this particular page is no longer filled with zeros.

In our case the strlen() returned ~50, and the strcpy overwrote
the stack frame pointer of the current stack frame.

This small patch cures the bug for us!

--- datetime.c_o 2003-08-05 14:16:01.000000000 +0200
+++ datetime.c 2003-08-05 14:16:25.000000000 +0200
@@ -2277,6 +2277,7 @@
* integer.
*/
strncpy(fstr, (cp + 1), 7);
+ fstr[7]=0;
strcpy((fstr + strlen(fstr)), "000000");
*(fstr + 6) = '\0';
*fsec = strtol(fstr, &cp, 10);

-Philipp
--
: Dipl-Ing Philipp Reisner Tel +43-1-8178292-50 :
: LINBIT Information Technologies GmbH Fax +43-1-8178292-82 :
: Schönbrunnerstr 244, 1120 Vienna, Austria http://www.linbit.com :


From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com>
Cc: pgsql-bugs(at)postgresql(dot)org
Subject: Re: Postgresql 7.3.3 crashing on query
Date: 2003-08-05 16:35:53
Message-ID: 7112.1060101353@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-bugs

Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com> writes:
> We (Clifford Wolf and me) have found the bug!

Good work! The INT64_TIMESTAMP code isn't very widely used, I think,
which probably explains why this wasn't noticed before. Will commit
the patch shortly.

regards, tom lane


From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com>
Cc: pgsql-bugs(at)postgresql(dot)org
Subject: Re: Postgresql 7.3.3 crashing on query
Date: 2003-08-05 17:41:48
Message-ID: 9846.1060105308@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-bugs

Philipp Reisner <philipp(dot)reisner(at)linbit(dot)com> writes:
> strncpy(fstr, (cp + 1), 7);
> + fstr[7]=0;
> strcpy((fstr + strlen(fstr)), "000000");

After some looking around, it turns out there was another similar error,
plus several related places where the code was not quite right.
Attached is the full patch I applied against 7.3.4. Many thanks for
pointing out this mistake!

regards, tom lane

Index: datetime.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/datetime.c,v
retrieving revision 1.96.2.5
diff -c -r1.96.2.5 datetime.c
*** datetime.c 4 May 2003 04:30:35 -0000 1.96.2.5
--- datetime.c 5 Aug 2003 17:34:48 -0000
***************
*** 1128,1134 ****
if (*cp != '\0')
return -1;
#ifdef HAVE_INT64_TIMESTAMP
! *fsec = frac * 1000000;
#else
*fsec = frac;
#endif
--- 1128,1134 ----
if (*cp != '\0')
return -1;
#ifdef HAVE_INT64_TIMESTAMP
! *fsec = rint(frac * 1000000);
#else
*fsec = frac;
#endif
***************
*** 1158,1166 ****

tmask |= DTK_TIME_M;
#ifdef HAVE_INT64_TIMESTAMP
! dt2time((time * 86400000000), &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec);
#else
! dt2time((time * 86400), &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec);
#endif
}
break;
--- 1158,1168 ----

tmask |= DTK_TIME_M;
#ifdef HAVE_INT64_TIMESTAMP
! dt2time((time * 86400000000),
! &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec);
#else
! dt2time((time * 86400),
! &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec);
#endif
}
break;
***************
*** 1835,1843 ****
tmask = DTK_M(SECOND);
if (*cp == '.')
{
! *fsec = strtod(cp, &cp);
if (*cp != '\0')
return -1;
}
break;

--- 1837,1852 ----
tmask = DTK_M(SECOND);
if (*cp == '.')
{
! double frac;
!
! frac = strtod(cp, &cp);
if (*cp != '\0')
return -1;
+ #ifdef HAVE_INT64_TIMESTAMP
+ *fsec = rint(frac * 1000000);
+ #else
+ *fsec = frac;
+ #endif
}
break;

***************
*** 1863,1871 ****

tmask |= DTK_TIME_M;
#ifdef HAVE_INT64_TIMESTAMP
! dt2time((time * 86400000000), &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec);
#else
! dt2time((time * 86400), &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec);
#endif
}
break;
--- 1872,1882 ----

tmask |= DTK_TIME_M;
#ifdef HAVE_INT64_TIMESTAMP
! dt2time((time * 86400000000),
! &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec);
#else
! dt2time((time * 86400),
! &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec);
#endif
}
break;
***************
*** 2268,2291 ****
*fsec = 0;
else if (*cp == '.')
{
! #ifdef HAVE_INT64_TIMESTAMP
! char fstr[MAXDATELEN + 1];

- /*
- * OK, we have at most six digits to work with. Let's
- * construct a string and then do the conversion to an
- * integer.
- */
- strncpy(fstr, (cp + 1), 7);
- strcpy((fstr + strlen(fstr)), "000000");
- *(fstr + 6) = '\0';
- *fsec = strtol(fstr, &cp, 10);
- #else
str = cp;
! *fsec = strtod(str, &cp);
! #endif
if (*cp != '\0')
return -1;
}
else
return -1;
--- 2279,2295 ----
*fsec = 0;
else if (*cp == '.')
{
! double frac;

str = cp;
! frac = strtod(str, &cp);
if (*cp != '\0')
return -1;
+ #ifdef HAVE_INT64_TIMESTAMP
+ *fsec = rint(frac * 1000000);
+ #else
+ *fsec = frac;
+ #endif
}
else
return -1;
***************
*** 2328,2333 ****
--- 2332,2339 ----

if (*cp == '.')
{
+ double frac;
+
/*
* More than two digits? Then could be a date or a run-together
* time: 2001.360 20011225 040506.789
***************
*** 2336,2344 ****
return DecodeNumberField(flen, str, (fmask | DTK_DATE_M),
tmask, tm, fsec, is2digits);

! *fsec = strtod(cp, &cp);
if (*cp != '\0')
return -1;
}
else if (*cp != '\0')
return -1;
--- 2342,2355 ----
return DecodeNumberField(flen, str, (fmask | DTK_DATE_M),
tmask, tm, fsec, is2digits);

! frac = strtod(cp, &cp);
if (*cp != '\0')
return -1;
+ #ifdef HAVE_INT64_TIMESTAMP
+ *fsec = rint(frac * 1000000);
+ #else
+ *fsec = frac;
+ #endif
}
else if (*cp != '\0')
return -1;
***************
*** 2441,2459 ****
*/
if ((cp = strchr(str, '.')) != NULL)
{
! #ifdef HAVE_INT64_TIMESTAMP
! char fstr[MAXDATELEN + 1];

! /*
! * OK, we have at most six digits to care about. Let's construct a
! * string and then do the conversion to an integer.
! */
! strcpy(fstr, (cp + 1));
! strcpy((fstr + strlen(fstr)), "000000");
! *(fstr + 6) = '\0';
! *fsec = strtol(fstr, NULL, 10);
#else
! *fsec = strtod(cp, NULL);
#endif
*cp = '\0';
len = strlen(str);
--- 2452,2464 ----
*/
if ((cp = strchr(str, '.')) != NULL)
{
! double frac;

! frac = strtod(cp, NULL);
! #ifdef HAVE_INT64_TIMESTAMP
! *fsec = rint(frac * 1000000);
#else
! *fsec = frac;
#endif
*cp = '\0';
len = strlen(str);