9.1.3 backends getting stuck in 'startup'

From: Jeff Frost <jeff(at)pgexperts(dot)com>
To: pgsql-bugs(at)postgresql(dot)org
Subject: 9.1.3 backends getting stuck in 'startup'
Date: 2012-04-27 16:07:46
Message-ID: 4F9AC452.3050102@pgexperts.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

A few times today, we've seen postgresql 9.1.3 backends on Ubuntu 11.10 x86_64
get stuck in 'startup' mode. By that I mean the set_ps_output mode. Postgres
is installed via Martin Pitt's packages.

It manifests like this:

Server has been humming along fine, then suddenly many backends get stuck in
'startup' mode and the ps output looks like so:

postgres 2425 3.5 0.3 8695020 483868 ? Ss 08:56 0:41 postgres: consprod consprod 192.168.1.150(41606) startup
postgres 2427 4.1 0.3 8695020 511756 ? Ss 08:56 0:48 postgres: consprod consprod 192.168.1.152(60876) startup
postgres 2429 3.5 0.3 8695020 497252 ? Ss 08:56 0:40 postgres: consprod consprod 192.168.1.151(41930) startup
postgres 2431 3.7 0.3 8695020 502004 ? Ss 08:56 0:43 postgres: consprod consprod 192.168.1.151(41933) startup
postgres 2435 3.8 0.3 8695020 439080 ? Ss 08:56 0:44 postgres: consprod consprod 192.168.1.157(37656) startup
postgres 2437 3.5 0.3 8695020 482304 ? Ss 08:56 0:41 postgres: consprod consprod 192.168.1.153(45071) startup
postgres 2438 3.4 0.3 8695020 506776 ? Ss 08:56 0:39 postgres: consprod consprod 192.168.1.153(45072) startup
postgres 2441 3.7 0.3 8695020 495652 ? Ss 08:56 0:43 postgres: consprod consprod 192.168.1.153(45075) startup
postgres 2442 3.7 0.3 8695020 503336 ? Ss 08:56 0:43 postgres: consprod consprod 192.168.1.153(45076) startup
postgres 2443 4.0 0.3 8695020 511760 ? Ss 08:56 0:46 postgres: consprod consprod 192.168.1.153(45077) startup
postgres 2445 3.4 0.3 8695020 478632 ? Ss 08:56 0:39 postgres: consprod consprod 192.168.1.155(54413) startup
postgres 2446 3.7 0.3 8695020 464380 ? Ss 08:56 0:43 postgres: consprod consprod 192.168.1.154(46186) startup
postgres 2448 3.8 0.3 8695024 474400 ? Ss 08:56 0:44 postgres: consprod consprod 192.168.1.154(46187) startup
postgres 2449 3.8 0.3 8695020 464932 ? Rs 08:56 0:45 postgres: consprod consprod 192.168.1.154(46216) startup
postgres 2450 3.3 0.3 8695020 515200 ? Ss 08:56 0:38 postgres: consprod consprod 192.168.1.152(60890) startup
postgres 2452 3.3 0.3 8695020 493384 ? Ss 08:56 0:38 postgres: consprod consprod 192.168.1.152(60892) startup
postgres 2453 3.7 0.3 8695020 490168 ? Ss 08:56 0:44 postgres: consprod consprod 192.168.1.152(60893) startup
postgres 2456 3.4 0.3 8695020 449348 ? Ss 08:56 0:40 postgres: consprod consprod 192.168.1.156(50829) startup

Restarting required '-m immediate'.

Stracing reveals lots of semops:

semop(589837, {{3, 1, 0}}, 1) = 0
semop(524299, {{11, 1, 0}}, 1) = 0
semop(557068, {{6, 1, 0}}, 1) = 0
semop(458761, {{2, 1, 0}}, 1) = 0
semop(557068, {{9, 1, 0}}, 1) = 0
read(8, ":\n\0\0\210\261\224\316\4\0\1\0\364\0\0 \0 \4
\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(8, ":\n\0\0\320\262\224\316\4\0\1\0\364\0\0 \0 \4
\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192

backtrace looks like this:
0x00007f7dd1d1f497 in semop () at ../sysdeps/unix/syscall-template.S:82
82 in ../sysdeps/unix/syscall-template.S
#0 0x00007f7dd1d1f497 in semop () at ../sysdeps/unix/syscall-template.S:82
#1 0x00007f7dd3d67b18 in PGSemaphoreLock (sema=0x7f7dce0e8210, interruptOK=0
'\000') at pg_sema.c:418
#2 0x00007f7dd3da67c5 in LWLockAcquire (lockid=FirstBufMappingLock,
mode=LW_SHARED) at lwlock.c:464
#3 0x00007f7dd3d91289 in BufferAlloc (foundPtr=0x7fff84d3b45e "",
strategy=0x7f7dd5f359e0, blockNum=201362, forkNum=MAIN_FORKNUM,
relpersistence=112 'p', smgr=<optimized out>) at bufmgr.c:531
#4 ReadBuffer_common (smgr=0x7f7dd5f39f00, relpersistence=112 'p',
forkNum=MAIN_FORKNUM, blockNum=201362, mode=RBM_NORMAL,
strategy=0x7f7dd5f359e0, hit=0x7fff84d3b4cf "") at bufmgr.c:325
#5 0x00007f7dd3d91ca4 in ReadBufferExtended (reln=0x7f7dd3b17540,
forkNum=MAIN_FORKNUM, blockNum=201362, mode=RBM_NORMAL, strategy=<optimized
out>) at bufmgr.c:246
#6 0x00007f7dd3be3d46 in heapgetpage (scan=0x7f7dd5f351c0, page=201362) at
heapam.c:223
#7 0x00007f7dd3be4732 in heapgettup (scan=0x7f7dd5f351c0, dir=<optimized
out>, nkeys=2, key=0x7f7dd5f358a0) at heapam.c:556
#8 0x00007f7dd3be52f5 in heap_getnext (scan=0x7f7dd5f351c0,
direction=<optimized out>) at heapam.c:1345
#9 0x00007f7dd3e66167 in RelationBuildTupleDesc (relation=0x7f7dd3b23ee8) at
relcache.c:468
#10 RelationBuildDesc (targetRelId=<optimized out>, insertIt=1 '\001') at
relcache.c:882
#11 0x00007f7dd3e66bdd in load_critical_index (indexoid=2655, heapoid=2603) at
relcache.c:3080
#12 0x00007f7dd3e6839b in RelationCacheInitializePhase3 () at relcache.c:2890
#13 0x00007f7dd3e7f931 in InitPostgres (in_dbname=<optimized out>,
dboid=2228467824, username=<optimized out>, out_dbname=0x0) at postinit.c:816
#14 0x00007f7dd3db26d8 in PostgresMain (argc=<optimized out>, argv=<optimized
out>, username=0x7f7dd5e4bc90 "consprod") at postgres.c:3650
#15 0x00007f7dd3d75923 in BackendRun (port=0x7f7dd5e96ea0) at postmaster.c:3606
#16 BackendStartup (port=0x7f7dd5e96ea0) at postmaster.c:3291
#17 ServerLoop () at postmaster.c:1455
#18 0x00007f7dd3d7637c in PostmasterMain (argc=<optimized out>,
argv=<optimized out>) at postmaster.c:1116
#19 0x00007f7dd3bce5b0 in main (argc=5, argv=0x7f7dd5e4b170) at main.c:199
Continuing.

Program received signal SIGUSR1, User defined signal 1.
0x00007f7dd1d1f497 in semop () at ../sysdeps/unix/syscall-template.S:82
82 in ../sysdeps/unix/syscall-template.S
#0 0x00007f7dd1d1f497 in semop () at ../sysdeps/unix/syscall-template.S:82
#1 0x00007f7dd3d67b18 in PGSemaphoreLock (sema=0x7f7dce0e8210, interruptOK=0
'\000') at pg_sema.c:418
#2 0x00007f7dd3da67c5 in LWLockAcquire (lockid=FirstBufMappingLock,
mode=LW_SHARED) at lwlock.c:464
#3 0x00007f7dd3d91289 in BufferAlloc (foundPtr=0x7fff84d3b45e "",
strategy=0x7f7dd5f359e0, blockNum=201362, forkNum=MAIN_FORKNUM,
relpersistence=112 'p', smgr=<optimized out>) at bufmgr.c:531
#4 ReadBuffer_common (smgr=0x7f7dd5f39f00, relpersistence=112 'p',
forkNum=MAIN_FORKNUM, blockNum=201362, mode=RBM_NORMAL,
strategy=0x7f7dd5f359e0, hit=0x7fff84d3b4cf "") at bufmgr.c:325
#5 0x00007f7dd3d91ca4 in ReadBufferExtended (reln=0x7f7dd3b17540,
forkNum=MAIN_FORKNUM, blockNum=201362, mode=RBM_NORMAL, strategy=<optimized
out>) at bufmgr.c:246
#6 0x00007f7dd3be3d46 in heapgetpage (scan=0x7f7dd5f351c0, page=201362) at
heapam.c:223
#7 0x00007f7dd3be4732 in heapgettup (scan=0x7f7dd5f351c0, dir=<optimized
out>, nkeys=2, key=0x7f7dd5f358a0) at heapam.c:556
#8 0x00007f7dd3be52f5 in heap_getnext (scan=0x7f7dd5f351c0,
direction=<optimized out>) at heapam.c:1345
#9 0x00007f7dd3e66167 in RelationBuildTupleDesc (relation=0x7f7dd3b23ee8) at
relcache.c:468
#10 RelationBuildDesc (targetRelId=<optimized out>, insertIt=1 '\001') at
relcache.c:882
#11 0x00007f7dd3e66bdd in load_critical_index (indexoid=2655, heapoid=2603) at
relcache.c:3080
#12 0x00007f7dd3e6839b in RelationCacheInitializePhase3 () at relcache.c:2890
#13 0x00007f7dd3e7f931 in InitPostgres (in_dbname=<optimized out>,
dboid=2228467824, username=<optimized out>, out_dbname=0x0) at postinit.c:816
#14 0x00007f7dd3db26d8 in PostgresMain (argc=<optimized out>, argv=<optimized
out>, username=0x7f7dd5e4bc90 "consprod") at postgres.c:3650
#15 0x00007f7dd3d75923 in BackendRun (port=0x7f7dd5e96ea0) at postmaster.c:3606
#16 BackendStartup (port=0x7f7dd5e96ea0) at postmaster.c:3291
#17 ServerLoop () at postmaster.c:1455
#18 0x00007f7dd3d7637c in PostmasterMain (argc=<optimized out>,
argv=<optimized out>) at postmaster.c:1116
#19 0x00007f7dd3bce5b0 in main (argc=5, argv=0x7f7dd5e4b170) at main.c:199

pg_config output:
BINDIR = /usr/lib/postgresql/9.1/bin
DOCDIR = /usr/share/doc/postgresql
HTMLDIR = /usr/share/doc/postgresql
INCLUDEDIR = /usr/include/postgresql
PKGINCLUDEDIR = /usr/include/postgresql
INCLUDEDIR-SERVER = /usr/include/postgresql/9.1/server
LIBDIR = /usr/lib
PKGLIBDIR = /usr/lib/postgresql/9.1/lib
LOCALEDIR = /usr/share/locale
MANDIR = /usr/share/postgresql/9.1/man
SHAREDIR = /usr/share/postgresql/9.1
SYSCONFDIR = /etc/postgresql-common
PGXS = /usr/lib/postgresql/9.1/lib/pgxs/src/makefiles/pgxs.mk
CONFIGURE = '--build=x86_64-linux-gnu' '--prefix=/usr'
'--includedir=/usr/include' '--mandir=/usr/share/man'
'--infodir=/usr/share/info' '--sysconfdir=/etc' '--localstatedir=/var'
'--libexecdir=/usr/lib/postgresql-9.1' '--disable-maintainer-mode'
'--disable-dependency-tracking' '--mandir=/usr/share/postgresql/9.1/man'
'--with-docdir=/usr/share/doc/postgresql-doc-9.1'
'--sysconfdir=/etc/postgresql-common' '--datadir=/usr/share/postgresql/9.1'
'--bindir=/usr/lib/postgresql/9.1/bin' '--includedir=/usr/include/postgresql/'
'--enable-nls' '--enable-integer-datetimes' '--enable-thread-safety'
'--enable-debug' '--disable-rpath' '--with-tcl' '--with-perl' '--with-python'
'--with-pam' '--with-krb5' '--with-gssapi' '--with-openssl' '--with-libxml'
'--with-libxslt' '--with-ldap' '--with-ossp-uuid' '--with-gnu-ld'
'--with-tclconfig=/usr/lib/tcl8.5' '--with-tkconfig=/usr/lib/tk8.5'
'--with-includes=/usr/include/tcl8.5'
'--with-system-tzdata=/usr/share/zoneinfo' '--with-pgport=5432' 'CFLAGS=-g -O2
-fPIC -fPIC' 'LDFLAGS=-Wl,-Bsymbolic-functions -Wl,--as-needed
-Wl,--as-needed' 'build_alias=x86_64-linux-gnu' 'CPPFLAGS='
CC = gcc
CPPFLAGS = -D_GNU_SOURCE -I/usr/include/libxml2 -I/usr/include/tcl8.5
CFLAGS = -g -O2 -fPIC -fPIC -Wall -Wmissing-prototypes -Wpointer-arith
-Wdeclaration-after-statement -Wendif-labels -Wformat-security
-fno-strict-aliasing -fwrapv -fexcess-precision=standard -g
CFLAGS_SL = -fpic
LDFLAGS = -Wl,-Bsymbolic-functions -Wl,--as-needed -Wl,--as-needed -Wl,--as-needed
LDFLAGS_EX =
LDFLAGS_SL =
LIBS = -lpgport -lxslt -lxml2 -lpam -lssl -lcrypto -lkrb5 -lcom_err
-lgssapi_krb5 -lz -ledit -lcrypt -ldl -lm
VERSION = PostgreSQL 9.1.3

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message kris 2012-04-27 16:11:41 BUG #6621: Constant Segmentation Faults in the Postmaster
Previous Message Tom Lane 2012-04-27 14:22:11 Re: hstore parser incorrectly handles malformed input