*** doc/src/sgml/catalogs.sgml.orig Thu Jul 17 23:32:51 2008
--- doc/src/sgml/catalogs.sgml Wed Jul 23 14:44:38 2008
***************
*** 522,527 ****
--- 522,534 ----
+ aminsertcleanup
+ regproc
+ pg_proc.oid
+ Post-INSERT cleanup finction (optional)
+
+
+
amvacuumcleanup
regproc
pg_proc.oid
*** doc/src/sgml/gin.sgml.orig Tue Jul 22 18:05:24 2008
--- doc/src/sgml/gin.sgml Wed Jul 23 14:44:39 2008
***************
*** 188,196 ****
list of heap pointers (PL, posting list) if the list is small enough.
Partial match algorithm
!
GIN can support partial match> queries, in which the query
does not determine an exact match for one or more keys, but the possible
--- 188,232 ----
list of heap pointers (PL, posting list) if the list is small enough.
+
+ GIN fast update technique
+
+
+ Updating a GIN index tends to be slow because of the
+ intrinsic nature of inverted indexes: inserting or updating one heap row
+ can cause many inserts into the index (one for each key extracted
+ from the indexed value). As of
+ PostgreSQL 8.4, this problem is alleviated
+ by postponing most of the work until the next VACUUM>.
+ Newly inserted index entries are temporarily stored in an unsorted list of
+ pending entries. VACUUM> inserts all pending entries into the
+ main GIN index data structure,
+ using the same bulk insert techniques used during initial index creation.
+ This greatly improves GIN index update speed, even
+ counting the additional vacuum overhead.
+
+
+
+ The disadvantage of this approach is that searches must scan the list
+ of pending entries in addition to searching the regular index, and so
+ a large list of pending entries will slow searches significantly.
+ It's recommended to use properly-configured autovacuum with tables
+ having GIN indexes, to keep this overhead to
+ reasonable levels.
+
+
+
+ If consistently-fast search speed is more important than update speed,
+ use of pending entries can be disabled by turning off the
+ FASTUPDATE storage parameter for a
+ GIN index. See for details.
+
+
+
Partial match algorithm
!
GIN can support partial match> queries, in which the query
does not determine an exact match for one or more keys, but the possible
***************
*** 225,235 ****
Create vs insert
! In most cases, insertion into a GIN index is slow
due to the likelihood of many keys being inserted for each value.
So, for bulk insertions into a table it is advisable to drop the GIN
index and recreate it after finishing bulk insertion.
--- 261,278 ----
Create vs insert
! Insertion into a GIN index can be slow
due to the likelihood of many keys being inserted for each value.
So, for bulk insertions into a table it is advisable to drop the GIN
index and recreate it after finishing bulk insertion.
+
+
+ As of PostgreSQL 8.4, this advice is less
+ necessary since delayed indexing is used (see for details). But for very large updates
+ it may still be best to drop and recreate the index.
+
*** doc/src/sgml/ref/create_index.sgml.orig Fri Jul 11 17:06:29 2008
--- doc/src/sgml/ref/create_index.sgml Wed Jul 23 14:44:34 2008
***************
*** 264,270 ****
The WITH> clause can specify storage parameters>
for indexes. Each index method can have its own set of allowed storage
! parameters. The built-in index methods all accept a single parameter:
--- 264,270 ----
The WITH> clause can specify storage parameters>
for indexes. Each index method can have its own set of allowed storage
! parameters. All built-in index methods accept this parameter:
***************
*** 291,296 ****
--- 291,326 ----
+
+ GIN indexes accept an additional parameter:
+
+
+
+
+
+ FASTUPDATE>
+
+
+ This setting controls usage of the fast update technique described in
+ . It is a Boolean parameter:
+ ON> enables fast update, OFF> disables it.
+ (Alternative spellings of ON> and OFF> are
+ allowed as described in .) The
+ default is ON>.
+
+
+
+
+ Turning FASTUPDATE> off via ALTER INDEX> prevents
+ future insertions from going into the list of pending index entries,
+ but does not in itself flush previous entries. You might want to do a
+ VACUUM> afterward to ensure the pending list is emptied.
+
+
+
+
+
+
***************
*** 499,504 ****
--- 529,541 ----
+ To create a GIN> index with fast update turned off:
+
+ CREATE INDEX gin_idx ON documents_table (locations) WITH (fastupdate = off);
+
+
+
+
To create an index on the column code> in the table
films> and have the index reside in the tablespace
indexspace>:
*** doc/src/sgml/ref/vacuum.sgml.orig Thu Feb 7 11:58:46 2008
--- doc/src/sgml/ref/vacuum.sgml Wed Jul 23 14:44:34 2008
***************
*** 62,67 ****
--- 62,74 ----
blocks. This form is much slower and requires an exclusive lock on each
table while it is being processed.
+
+
+ For tables with GIN> indexes, VACUUM (in
+ any form) also completes any delayed index insertions, by moving pending
+ index entries to the appropriate places in the main GIN> index
+ structure. (See for more details.)
+
*** doc/src/sgml/textsearch.sgml.orig Fri May 16 12:31:01 2008
--- doc/src/sgml/textsearch.sgml Wed Jul 23 14:44:39 2008
***************
*** 3201,3207 ****
! GIN indexes are about ten times slower to update than GiST
--- 3201,3209 ----
! GIN indexes are moderately slower to update than GiST indexes, but
! about 10 times slower if fast update support was disabled
! (see for details)
*** src/backend/access/gin/Makefile.orig Tue Feb 19 12:14:41 2008
--- src/backend/access/gin/Makefile Wed Jul 23 14:37:49 2008
***************
*** 14,19 ****
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
! ginbulk.o
include $(top_srcdir)/src/backend/common.mk
--- 14,19 ----
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
! ginbulk.o ginfast.o
include $(top_srcdir)/src/backend/common.mk
*** src/backend/access/gin/ginbulk.c.orig Fri Jul 11 17:06:29 2008
--- src/backend/access/gin/ginbulk.c Wed Jul 23 14:37:49 2008
***************
*** 197,202 ****
--- 197,204 ----
if (nentry <= 0)
return;
+ Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
+
i = nentry - 1;
for (; i > 0; i >>= 1)
nbit++;
*** src/backend/access/gin/gindatapage.c.orig Wed Jun 18 22:57:38 2008
--- src/backend/access/gin/gindatapage.c Wed Jul 23 14:37:50 2008
***************
*** 43,50 ****
while (aptr - a < na && bptr - b < nb)
{
! if (compareItemPointers(aptr, bptr) > 0)
*dptr++ = *bptr++;
else
*dptr++ = *aptr++;
}
--- 43,56 ----
while (aptr - a < na && bptr - b < nb)
{
! int cmp = compareItemPointers(aptr, bptr);
! if (cmp > 0)
*dptr++ = *bptr++;
+ else if ( cmp == 0 )
+ {
+ *dptr++ = *bptr++;
+ aptr++;
+ }
else
*dptr++ = *aptr++;
}
***************
*** 630,638 ****
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
! elog(ERROR, "item pointer (%u,%d) already exists",
! ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
! ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
ginInsertValue(&(gdi->btree), gdi->stack);
--- 636,650 ----
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
! {
! /*
! * gdi->btree.items[ gdi->btree.curitem ] already exists in index
! */
! gdi->btree.curitem ++;
! LockBuffer(gdi->stack->buffer, GIN_UNLOCK);
! freeGinBtreeStack(gdi->stack);
! continue;
! }
ginInsertValue(&(gdi->btree), gdi->stack);
*** src/backend/access/gin/ginget.c.orig Fri Jul 11 17:06:29 2008
--- src/backend/access/gin/ginget.c Wed Jul 23 14:40:39 2008
***************
*** 268,273 ****
--- 268,282 ----
Page page;
bool needUnlock = TRUE;
+ entry->buffer = InvalidBuffer;
+ entry->offset = InvalidOffsetNumber;
+ entry->list = NULL;
+ entry->nlist = 0;
+ entry->partialMatch = NULL;
+ entry->partialMatchResult = NULL;
+ entry->reduceResult = FALSE;
+ entry->predictNumberResult = 0;
+
if (entry->master != NULL)
{
entry->isFinished = entry->master->isFinished;
***************
*** 285,298 ****
page = BufferGetPage(stackEntry->buffer);
entry->isFinished = TRUE;
- entry->buffer = InvalidBuffer;
- entry->offset = InvalidOffsetNumber;
- entry->list = NULL;
- entry->nlist = 0;
- entry->partialMatch = NULL;
- entry->partialMatchResult = NULL;
- entry->reduceResult = FALSE;
- entry->predictNumberResult = 0;
if ( entry->isPartialMatch )
{
--- 294,299 ----
***************
*** 350,358 ****
entry->buffer = scanBeginPostingTree(gdi);
/*
! * We keep buffer pinned because we need to prevent deletition
* page during scan. See GIN's vacuum implementation. RefCount
! * is increased to keep buffer pinned after freeGinBtreeStack() call.
*/
IncrBufferRefCount(entry->buffer);
--- 351,360 ----
entry->buffer = scanBeginPostingTree(gdi);
/*
! * We keep buffer pinned because we need to prevent deletion of
* page during scan. See GIN's vacuum implementation. RefCount
! * is increased to keep buffer pinned after freeGinBtreeStack()
! * call.
*/
IncrBufferRefCount(entry->buffer);
***************
*** 436,441 ****
--- 438,479 ----
uint32 i;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ /*
+ * If isScanFastInsert is still true, set up to scan the pending-insert
+ * list rather than the main index.
+ */
+ if (so->isScanFastInsert)
+ {
+ if (so->fastBuffer == InvalidBuffer)
+ {
+ Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
+ BlockNumber blkno;
+
+ LockBuffer(metabuffer, GIN_SHARE);
+ blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
+
+ /*
+ * fetch head of list before unlocking metapage.
+ * head page must be pinned to prevent deletion by vacuum process
+ */
+ if ( blkno == InvalidBlockNumber )
+ {
+ /* No pending list, so proceed with normal scan */
+ so->isScanFastInsert = FALSE;
+ UnlockReleaseBuffer( metabuffer );
+ }
+ else
+ {
+ so->fastBuffer = ReadBuffer(scan->indexRelation, blkno);
+ so->fastOffset = FirstOffsetNumber;
+ UnlockReleaseBuffer( metabuffer );
+ return;
+ }
+ }
+ else
+ return;
+ }
+
for (i = 0; i < so->nkeys; i++)
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
}
***************
*** 727,732 ****
--- 765,944 ----
}
/*
+ * Get ItemPointer of next heap row to be checked from fast insert storage.
+ * Returns false if there are no more.
+ *
+ * The fastBuffer is presumed pinned on entry, and is pinned and share-locked
+ * on success exit. On failure exit it's released.
+ */
+ static bool
+ scanGetCandidate(IndexScanDesc scan, ItemPointerData *item)
+ {
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ OffsetNumber maxoff;
+ Page page;
+ IndexTuple itup;
+
+ ItemPointerSetInvalid( item );
+ for(;;)
+ {
+ LockBuffer( so->fastBuffer, GIN_SHARE );
+ page = BufferGetPage(so->fastBuffer);
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ if ( so->fastOffset > maxoff )
+ {
+ BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+ if ( blkno == InvalidBlockNumber )
+ {
+ UnlockReleaseBuffer(so->fastBuffer);
+ so->fastBuffer=InvalidBuffer;
+
+ return false;
+ }
+ else
+ {
+ LockBuffer( so->fastBuffer, GIN_UNLOCK );
+ so->fastBuffer=ReleaseAndReadBuffer(so->fastBuffer,
+ scan->indexRelation,
+ blkno);
+ so->fastOffset = FirstOffsetNumber;
+ }
+ }
+ else
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, so->fastOffset));
+ *item = itup->t_tid;
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ static bool
+ matchDatumByEntry(GinState *ginstate, GinScanEntry entry, Datum value)
+ {
+ bool res;
+
+ if ( entry->isPartialMatch )
+ {
+ res = (
+ DatumGetInt32(FunctionCall3(&ginstate->comparePartialFn[entry->attnum-1],
+ entry->entry,
+ value,
+ UInt16GetDatum(entry->strategy)) == 0 )
+ ) ? true : false;
+ }
+ else
+ {
+ res = ( compareEntries(ginstate, entry->attnum, entry->entry, value) == 0) ? true : false;
+ }
+
+ return res;
+ }
+
+ /*
+ * Sets entryRes array for each key by looking on
+ * every entry per indexed value (row) in fast insert storage.
+ * returns true if at least one of datum was matched by key's entry
+ *
+ * The fastBuffer is presumed pinned and share-locked on entry. On exit,
+ * it's pinned but unlocked.
+ */
+ static bool
+ collectDatumForItem(IndexScanDesc scan, ItemPointerData *item)
+ {
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ OffsetNumber maxoff, attrnum;
+ Page page;
+ IndexTuple itup;
+ int i, j;
+ bool hasMatch = false;
+
+ /*
+ * Resets entryRes
+ */
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+ memset( key->entryRes, FALSE, key->nentries );
+ }
+
+ for(;;)
+ {
+ Datum datum;
+ bool datumExtracted;
+
+ page = BufferGetPage(so->fastBuffer);
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ if ( so->fastOffset > maxoff )
+ {
+ BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+
+ if ( blkno == InvalidBlockNumber )
+ {
+ break;
+ }
+ else
+ {
+ LockBuffer( so->fastBuffer, GIN_UNLOCK );
+ so->fastBuffer=ReleaseAndReadBuffer(so->fastBuffer,
+ scan->indexRelation,
+ blkno);
+ LockBuffer( so->fastBuffer, GIN_SHARE );
+ so->fastOffset = FirstOffsetNumber;
+ continue;
+ }
+ }
+
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, so->fastOffset));
+
+ if (!ItemPointerEquals(item, &itup->t_tid))
+ break;
+
+ attrnum = gintuple_get_attrnum(&so->ginstate, itup);
+ datum = 0;
+ datumExtracted = false;
+
+ /*
+ * Go through all entries of all keys to find a match
+ */
+ for(i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ if (key->attnum != attrnum)
+ continue;
+
+ for(j=0; jnentries; j++)
+ {
+ if (key->entryRes[j] == false)
+ {
+ if (datumExtracted == false)
+ {
+ datum = gin_index_getattr(&so->ginstate, itup);
+ datumExtracted = true;
+ }
+
+ key->entryRes[j] = matchDatumByEntry(&so->ginstate,
+ key->scanEntry + j,
+ datum);
+ hasMatch |= key->entryRes[j];
+ }
+ }
+ }
+
+ so->fastOffset++;
+ }
+
+ LockBuffer( so->fastBuffer, GIN_UNLOCK );
+
+ return hasMatch;
+ }
+
+ /*
* Get heap item pointer from scan
* returns true if found
*/
***************
*** 748,753 ****
--- 960,1030 ----
*/
*recheck = false;
+ /*
+ * First of all we should check fast insert list of pages
+ */
+ if ( so->isScanFastInsert )
+ {
+ MemoryContext oldCtx;
+ bool match;
+
+ for(;;)
+ {
+ /*
+ * Get ItemPointer to next heap row to be checked
+ */
+ if (!scanGetCandidate(scan, item))
+ {
+ /*
+ * No more fast insert items, so begin regular index scan
+ */
+ so->isScanFastInsert = false;
+ startScan(scan);
+ return scanGetItem(scan, item, recheck);
+ }
+
+ /*
+ * Check entries in rows and setup entryRes array
+ */
+ if (!collectDatumForItem(scan, item))
+ continue;
+
+ /*
+ * check for consistent
+ */
+ oldCtx = MemoryContextSwitchTo(so->tempCtx);
+ *recheck = false;
+ match = true;
+
+ for (i = 0; match && i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ keyrecheck = true;
+
+ if ( DatumGetBool(FunctionCall4(&so->ginstate.consistentFn[ key->attnum-1 ],
+ PointerGetDatum(key->entryRes),
+ UInt16GetDatum(key->strategy),
+ key->query,
+ PointerGetDatum(&keyrecheck))) == false )
+ {
+ match = false;
+ }
+
+ *recheck |= keyrecheck;
+ }
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextReset(so->tempCtx);
+
+ if ( match )
+ return true;
+ }
+ }
+
+ /*
+ * Regular scanning
+ */
ItemPointerSetMin(item);
for (i = 0; i < so->nkeys; i++)
{
*** src/backend/access/gin/gininsert.c.orig Fri Jul 11 17:06:29 2008
--- src/backend/access/gin/gininsert.c Wed Jul 23 14:37:50 2008
***************
*** 137,143 ****
/*
* Inserts only one entry to the index, but it can add more than 1 ItemPointer.
*/
! static void
ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
ItemPointerData *items, uint32 nitem, bool isBuild)
{
--- 137,143 ----
/*
* Inserts only one entry to the index, but it can add more than 1 ItemPointer.
*/
! void
ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
ItemPointerData *items, uint32 nitem, bool isBuild)
{
***************
*** 272,278 ****
IndexBuildResult *result;
double reltuples;
GinBuildState buildstate;
! Buffer buffer;
ItemPointerData *list;
Datum entry;
uint32 nlist;
--- 272,278 ----
IndexBuildResult *result;
double reltuples;
GinBuildState buildstate;
! Buffer RootBuffer, MetaBuffer;
ItemPointerData *list;
Datum entry;
uint32 nlist;
***************
*** 285,295 ****
initGinState(&buildstate.ginstate, index);
/* initialize the root page */
! buffer = GinNewBuffer(index);
START_CRIT_SECTION();
! GinInitBuffer(buffer, GIN_LEAF);
! MarkBufferDirty(buffer);
if (!index->rd_istemp)
{
--- 285,301 ----
initGinState(&buildstate.ginstate, index);
+ /* initialize the meta page */
+ MetaBuffer = GinNewBuffer(index);
+
/* initialize the root page */
! RootBuffer = GinNewBuffer(index);
!
START_CRIT_SECTION();
! GinInitMetabuffer(MetaBuffer);
! MarkBufferDirty(MetaBuffer);
! GinInitBuffer(RootBuffer, GIN_LEAF);
! MarkBufferDirty(RootBuffer);
if (!index->rd_istemp)
{
***************
*** 302,317 ****
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
- page = BufferGetPage(buffer);
-
-
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
! UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
/* build the index */
--- 308,326 ----
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+
+ page = BufferGetPage(RootBuffer);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
+ page = BufferGetPage(MetaBuffer);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
}
! UnlockReleaseBuffer(MetaBuffer);
! UnlockReleaseBuffer(RootBuffer);
END_CRIT_SECTION();
/* build the index */
***************
*** 413,421 ****
initGinState(&ginstate, index);
! for(i=0; inatts;i++)
! if ( !isnull[i] )
! res += ginHeapTupleInsert(index, &ginstate, (OffsetNumber)(i+1), values[i], ht_ctid);
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
--- 422,447 ----
initGinState(&ginstate, index);
! if ( GinGetUseFastUpdate(index) )
! {
! GinTupleCollector collector;
!
! memset(&collector, 0, sizeof(GinTupleCollector));
! for(i=0; inatts;i++)
! if ( !isnull[i] )
! res += ginHeapTupleFastCollect(index, &ginstate, &collector,
! (OffsetNumber)(i+1), values[i], ht_ctid);
!
! ginHeapTupleFastInsert(index, &collector);
! }
! else
! {
! for(i=0; inatts;i++)
! if ( !isnull[i] )
! res += ginHeapTupleInsert(index, &ginstate,
! (OffsetNumber)(i+1), values[i], ht_ctid);
!
! }
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
*** src/backend/access/gin/ginscan.c.orig Fri Jul 11 17:06:29 2008
--- src/backend/access/gin/ginscan.c Wed Jul 23 14:37:50 2008
***************
*** 159,164 ****
--- 159,167 ----
errmsg("GIN indexes do not support whole-index scans")));
so->isVoidRes = false;
+ so->isScanFastInsert = true;
+ so->fastBuffer = InvalidBuffer;
+ so->fastOffset = InvalidOffsetNumber;
for (i = 0; i < scan->numberOfKeys; i++)
{
***************
*** 234,241 ****
--- 237,250 ----
{
freeScanKeys(so->keys, so->nkeys, TRUE);
freeScanKeys(so->markPos, so->nkeys, FALSE);
+ if ( so->fastBuffer != InvalidBuffer )
+ ReleaseBuffer(so->fastBuffer);
+ if ( so->markFastBuffer != InvalidBuffer )
+ ReleaseBuffer(so->markFastBuffer);
}
+ so->isScanFastInsert = true;
+ so->fastBuffer = so->markFastBuffer = InvalidBuffer;
so->markPos = so->keys = NULL;
if (scankey && scan->numberOfKeys > 0)
***************
*** 259,264 ****
--- 268,278 ----
freeScanKeys(so->keys, so->nkeys, TRUE);
freeScanKeys(so->markPos, so->nkeys, FALSE);
+ if ( so->fastBuffer != InvalidBuffer )
+ ReleaseBuffer(so->fastBuffer);
+ if ( so->markFastBuffer != InvalidBuffer )
+ ReleaseBuffer(so->markFastBuffer);
+
MemoryContextDelete(so->tempCtx);
pfree(so);
***************
*** 308,314 ****
--- 322,338 ----
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->markPos, so->nkeys, FALSE);
+
+ if ( so->markFastBuffer != InvalidBuffer )
+ ReleaseBuffer(so->markFastBuffer);
+
+ if ( so->fastBuffer != InvalidBuffer )
+ IncrBufferRefCount(so->fastBuffer);
+ so->markFastBuffer = so->fastBuffer;
+
so->markPos = copyScanKeys(so->keys, so->nkeys, FALSE);
+ so->markIsScanFastInsert = so->isScanFastInsert;
+ so->markFastOffset = so->fastOffset;
PG_RETURN_VOID();
}
***************
*** 320,326 ****
--- 344,360 ----
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->keys, so->nkeys, FALSE);
+
+ if ( so->fastBuffer != InvalidBuffer )
+ ReleaseBuffer(so->fastBuffer);
+
+ if ( so->markFastBuffer != InvalidBuffer )
+ IncrBufferRefCount(so->markFastBuffer);
+ so->fastBuffer = so->markFastBuffer;
+
so->keys = copyScanKeys(so->markPos, so->nkeys, TRUE);
+ so->isScanFastInsert = so->markIsScanFastInsert;
+ so->fastOffset = so->markFastOffset;
PG_RETURN_VOID();
}
*** src/backend/access/gin/ginutil.c.orig Fri Jul 11 17:06:29 2008
--- src/backend/access/gin/ginutil.c Wed Jul 23 15:43:13 2008
***************
*** 16,25 ****
#include "access/genam.h"
#include "access/gin.h"
#include "access/reloptions.h"
! #include "catalog/pg_type.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
void
initGinState(GinState *state, Relation index)
--- 16,27 ----
#include "access/genam.h"
#include "access/gin.h"
#include "access/reloptions.h"
! #include "catalog/pg_type.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
+ #include "utils/guc.h"
+
void
initGinState(GinState *state, Relation index)
***************
*** 56,62 ****
CurrentMemoryContext);
/*
! * Check opclass capability to do partial match.
*/
if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
{
--- 58,64 ----
CurrentMemoryContext);
/*
! * Check opclass capability to do partial match.
*/
if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
{
***************
*** 87,93 ****
bool isnull;
/*
! * First attribute is always int16, so we can safely use any
* tuple descriptor to obtain first attribute of tuple
*/
res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
--- 89,95 ----
bool isnull;
/*
! * First attribute is always int16, so we can safely use any
* tuple descriptor to obtain first attribute of tuple
*/
res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
***************
*** 212,217 ****
--- 214,233 ----
GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
}
+ void
+ GinInitMetabuffer(Buffer b)
+ {
+ GinMetaPageData *metadata;
+ Page page = BufferGetPage(b);
+
+ GinInitPage(page, GIN_META, BufferGetPageSize(b));
+
+ metadata = GinPageGetMeta(page);
+
+ metadata->head = metadata->tail = InvalidBlockNumber;
+ metadata->tailFreeSize = 0;
+ }
+
int
compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b)
{
***************
*** 324,335 ****
return tmppage;
}
! Datum
! ginoptions(PG_FUNCTION_ARGS)
{
! Datum reloptions = PG_GETARG_DATUM(0);
! bool validate = PG_GETARG_BOOL(1);
! bytea *result;
/*
* It's not clear that fillfactor is useful for GIN, but for the moment
--- 340,349 ----
return tmppage;
}
! static int
! parseFillfactor(char *value, bool validate)
{
! int fillfactor;
/*
* It's not clear that fillfactor is useful for GIN, but for the moment
***************
*** 338,347 ****
#define GIN_MIN_FILLFACTOR 10
#define GIN_DEFAULT_FILLFACTOR 100
! result = default_reloptions(reloptions, validate,
! GIN_MIN_FILLFACTOR,
! GIN_DEFAULT_FILLFACTOR);
! if (result)
! PG_RETURN_BYTEA_P(result);
! PG_RETURN_NULL();
}
--- 352,424 ----
#define GIN_MIN_FILLFACTOR 10
#define GIN_DEFAULT_FILLFACTOR 100
! if (value == NULL)
! return GIN_DEFAULT_FILLFACTOR;
!
! if (!parse_int(value, &fillfactor, 0, NULL))
! {
! if (validate)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("fillfactor must be an integer: \"%s\"",
! value)));
! return GIN_DEFAULT_FILLFACTOR;
! }
!
! if (fillfactor < GIN_MIN_FILLFACTOR || fillfactor > 100)
! {
! if (validate)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("fillfactor=%d is out of range (should be between %d and 100)",
! fillfactor, GIN_MIN_FILLFACTOR)));
! return GIN_DEFAULT_FILLFACTOR;
! }
!
! return fillfactor;
! }
!
! static bool
! parseFastupdate(char *value, bool validate)
! {
! bool result;
!
! if (value == NULL)
! return GIN_DEFAULT_USE_FASTUPDATE;
!
! if (!parse_bool(value, &result))
! {
! if (validate)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("fastupdate=\"%s\" is not recognized",
! value)));
! return GIN_DEFAULT_USE_FASTUPDATE;
! }
!
! return result;
! }
!
! Datum
! ginoptions(PG_FUNCTION_ARGS)
! {
! Datum reloptions = PG_GETARG_DATUM(0);
! bool validate = PG_GETARG_BOOL(1);
! static const char *const gin_keywords[2] = {"fillfactor", "fastupdate"};
! char *values[2];
! GinOptions *options;
!
! parseRelOptions(reloptions, 2, gin_keywords, values, validate);
!
! /* If no options, just return NULL */
! if (values[0] == NULL && values[1] == NULL)
! PG_RETURN_NULL();
!
! options = (GinOptions *) palloc(sizeof(GinOptions));
! SET_VARSIZE(options, sizeof(GinOptions));
!
! options->std.fillfactor = parseFillfactor(values[0], validate);
! options->useFastUpdate = parseFastupdate(values[1], validate);
!
! PG_RETURN_BYTEA_P(options);
}
*** src/backend/access/gin/ginxlog.c.orig Fri Jul 11 17:06:29 2008
--- src/backend/access/gin/ginxlog.c Wed Jul 23 14:37:51 2008
***************
*** 71,90 ****
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
! Buffer buffer;
Page page;
! buffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
! Assert(BufferIsValid(buffer));
! page = (Page) BufferGetPage(buffer);
! GinInitBuffer(buffer, GIN_LEAF);
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
! MarkBufferDirty(buffer);
! UnlockReleaseBuffer(buffer);
}
static void
--- 71,100 ----
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
! Buffer RootBuffer, MetaBuffer;
Page page;
! MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
! Assert(BufferIsValid(MetaBuffer));
! GinInitMetabuffer(MetaBuffer);
!
! page = (Page) BufferGetPage(MetaBuffer);
! PageSetLSN(page, lsn);
! PageSetTLI(page, ThisTimeLineID);
! RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
! Assert(BufferIsValid(RootBuffer));
! page = (Page) BufferGetPage(RootBuffer);
!
! GinInitBuffer(RootBuffer, GIN_LEAF);
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
! MarkBufferDirty(MetaBuffer);
! UnlockReleaseBuffer(MetaBuffer);
! MarkBufferDirty(RootBuffer);
! UnlockReleaseBuffer(RootBuffer);
}
static void
***************
*** 433,438 ****
--- 443,603 ----
}
}
+ static void
+ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
+ {
+ ginxlogUpdateMeta *data = (ginxlogUpdateMeta*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ if ( data->ntuples > 0 )
+ {
+ /*
+ * insert into tail page
+ */
+ if (!(record->xl_info & XLR_BKP_BLOCK_1))
+ {
+ Buffer buffer = XLogReadBuffer(data->node, data->metadata.tail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ OffsetNumber l, off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+ }
+ else if ( data->prevTail != InvalidBlockNumber )
+ {
+ /*
+ * New tail
+ */
+
+ Buffer buffer = XLogReadBuffer(data->node, data->prevTail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->rightlink = data->newRightlink;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+
+ UnlockReleaseBuffer(metabuffer);
+ }
+
+ static void
+ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
+ {
+ ginxlogInsertListPage *data = (ginxlogInsertListPage*) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ OffsetNumber l, off = FirstOffsetNumber;
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
+
+ if (record->xl_info & XLR_BKP_BLOCK_1)
+ return;
+
+ buffer = XLogReadBuffer(data->node, data->blkno, true);
+ page = BufferGetPage(buffer);
+
+ GinInitBuffer(buffer, GIN_LIST);
+ GinPageGetOpaque(page)->rightlink = data->rightlink;
+ if ( data->rightlink == InvalidBlockNumber )
+ GinPageSetFullRow(page);
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+
+ UnlockReleaseBuffer(buffer);
+ }
+
+ static void
+ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
+ {
+ ginxlogDeleteListPages *data = (ginxlogDeleteListPages*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+ int i;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ for(i=0; indeleted; i++)
+ {
+ Buffer buffer = XLogReadBuffer(data->node,data->toDelete[i],false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->flags = GIN_DELETED;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+ UnlockReleaseBuffer(metabuffer);
+ }
+
void
gin_redo(XLogRecPtr lsn, XLogRecord *record)
{
***************
*** 459,464 ****
--- 624,638 ----
case XLOG_GIN_DELETE_PAGE:
ginRedoDeletePage(lsn, record);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ ginRedoUpdateMetapage(lsn, record);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ ginRedoInsertListPage(lsn, record);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ ginRedoDeleteListPages(lsn, record);
+ break;
default:
elog(PANIC, "gin_redo: unknown op code %u", info);
}
***************
*** 514,519 ****
--- 688,705 ----
appendStringInfo(buf, "Delete page, ");
desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ appendStringInfo(buf, "Update metapage, ");
+ desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, ((ginxlogUpdateMeta *) rec)->metadata.tail);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ appendStringInfo(buf, "insert new list page, ");
+ desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ appendStringInfo(buf, "Delete list page (%d), ", ((ginxlogDeleteListPages *) rec)->ndeleted);
+ desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, ((ginxlogDeleteListPages *) rec)->metadata.head);
+ break;
default:
elog(PANIC, "gin_desc: unknown op code %u", info);
}
*** src/backend/access/gin/ginfast.c.orig Tue Jul 22 11:40:27 2008
--- src/backend/access/gin/ginfast.c Wed Jul 23 15:44:48 2008
***************
*** 0 ****
--- 1,728 ----
+ /*-------------------------------------------------------------------------
+ *
+ * ginfast.c
+ * Fast insert routines for the Postgres inverted index access method.
+ * Pending entries are stored in linear list of pages and vacuum
+ * will transfer them into regular structure.
+ *
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * $PostgreSQL$
+ *
+ *-------------------------------------------------------------------------
+ */
+
+ #include "postgres.h"
+
+ #include "access/genam.h"
+ #include "access/gin.h"
+ #include "access/tuptoaster.h"
+ #include "catalog/index.h"
+ #include "miscadmin.h"
+ #include "storage/bufmgr.h"
+ #include "utils/memutils.h"
+
+
+ static int32
+ writeListPage(Relation index, Buffer buffer, IndexTuple *tuples, int32 ntuples, BlockNumber rightlink)
+ {
+ Page page = BufferGetPage(buffer);
+ int i, freesize, size=0;
+ OffsetNumber l, off;
+
+ START_CRIT_SECTION();
+
+ GinInitBuffer(buffer, GIN_LIST);
+
+ off = FirstOffsetNumber;
+
+ for(i=0; irightlink = rightlink;
+ /*
+ * tail page may contain only the whole row(s) or final
+ * part of row placed on previous pages
+ */
+ if ( rightlink == InvalidBlockNumber )
+ GinPageSetFullRow(page);
+
+ freesize = PageGetFreeSpace(page);
+
+ MarkBufferDirty(buffer);
+
+ if (!index->rd_istemp)
+ {
+ XLogRecData rdata[2];
+ ginxlogInsertListPage data;
+ XLogRecPtr recptr;
+ char *ptr;
+
+ rdata[0].buffer = buffer;
+ rdata[0].buffer_std = true;
+ rdata[0].data = (char*)&data;
+ rdata[0].len = sizeof(ginxlogInsertListPage);
+ rdata[0].next = rdata+1;
+
+ rdata[1].buffer = InvalidBuffer;
+ ptr = rdata[1].data = palloc( size );
+ rdata[1].len = size;
+ rdata[1].next = NULL;
+
+ for(i=0; i 0);
+
+ /*
+ * Split tuples for pages
+ */
+ for(i=0;ihead = BufferGetBlockNumber(curBuffer);
+ }
+
+ prevBuffer = curBuffer;;
+ startTuple = i;
+ size = 0;
+ }
+
+ tupsize = IndexTupleSize(tuples[i]) + sizeof(ItemIdData);
+
+ if ( size + tupsize >= GinListPageSize )
+ {
+ i--;
+ curBuffer = InvalidBuffer;
+ }
+ else
+ {
+ size += tupsize;
+ }
+ }
+
+ /*
+ * Write last page
+ */
+ res->tail = BufferGetBlockNumber(curBuffer);
+ res->tailFreeSize = writeListPage(index, curBuffer, tuples+startTuple, ntuples-startTuple, InvalidBlockNumber);
+ }
+
+ /*
+ * Inserts collected values during normal insertion. Function guarantees
+ * that all values of heap will be stored sequentually with
+ * preserving order
+ */
+ void
+ ginHeapTupleFastInsert(Relation index, GinTupleCollector *collector)
+ {
+ Buffer metabuffer;
+ Page metapage;
+ GinMetaPageData *metadata;
+ XLogRecData rdata[2];
+ Buffer buffer = InvalidBuffer;
+ Page page = NULL;
+ ginxlogUpdateMeta data;
+
+ if ( collector->ntuples == 0 )
+ return;
+
+ metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+
+ data.node = index->rd_node;
+ data.ntuples = 0;
+ data.newRightlink = data.prevTail = InvalidBlockNumber;
+
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogUpdateMeta);
+ rdata[0].next = NULL;
+
+ if ( metadata->head == InvalidBlockNumber ||
+ collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize )
+ {
+ GinMetaPageData sublist;
+
+ /*
+ * We should make sublist separately and append it
+ * to keep high concurrency
+ */
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ memset( &sublist, 0, sizeof(GinMetaPageData) );
+
+ makeSublist(index, collector->tuples, collector->ntuples, &sublist);
+
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+
+ if ( metadata->head == InvalidBlockNumber )
+ {
+ /*
+ * Sublist becomes main list
+ */
+ START_CRIT_SECTION();
+ memcpy(metadata, &sublist, sizeof(GinMetaPageData) );
+ memcpy(&data.metadata, &sublist, sizeof(GinMetaPageData) );
+ }
+ else
+ {
+ /*
+ * join lists
+ */
+
+ data.prevTail = metadata->tail;
+ buffer = ReadBuffer(index, metadata->tail);
+ LockBuffer(buffer, GIN_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
+
+ START_CRIT_SECTION();
+
+ GinPageGetOpaque(page)->rightlink = sublist.head;
+ metadata->tail = sublist.tail;
+ metadata->tailFreeSize = sublist.tailFreeSize;
+
+ memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+ data.newRightlink = sublist.head;
+
+ MarkBufferDirty(buffer);
+ }
+ }
+ else
+ {
+ /*
+ * Insert into tail page
+ */
+
+ OffsetNumber l, off;
+ int i, tupsize;
+ char *ptr;
+
+ buffer = ReadBuffer(index, metadata->tail);
+ LockBuffer(buffer, GIN_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+
+ rdata[0].next = rdata + 1;
+
+ rdata[1].buffer = buffer;
+ rdata[1].buffer_std = true;
+ ptr = rdata[1].data = (char *) palloc( collector->sumsize );
+ rdata[1].len = collector->sumsize;
+ rdata[1].next = NULL;
+
+ data.ntuples = collector->ntuples;
+
+ START_CRIT_SECTION();
+
+ for(i=0; intuples; i++)
+ {
+ tupsize = IndexTupleSize(collector->tuples[i]);
+ l = PageAddItem(page, (Item)collector->tuples[i], tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page in \"%s\"",
+ RelationGetRelationName(index));
+
+ memcpy(ptr, collector->tuples[i], tupsize);
+ ptr+=tupsize;
+
+ off++;
+ }
+
+ metadata->tailFreeSize -= collector->sumsize + collector->ntuples * sizeof(ItemIdData);
+ memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+ MarkBufferDirty(buffer);
+ }
+
+ /*
+ * Make real write
+ */
+
+ MarkBufferDirty(metabuffer);
+ if ( !index->rd_istemp )
+ {
+ XLogRecPtr recptr;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
+ PageSetLSN(metapage, recptr);
+ PageSetTLI(metapage, ThisTimeLineID);
+
+ if ( buffer != InvalidBuffer )
+ {
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+ }
+
+ if (buffer != InvalidBuffer)
+ UnlockReleaseBuffer(buffer);
+ UnlockReleaseBuffer(metabuffer);
+
+ END_CRIT_SECTION();
+ }
+
+ /*
+ * Collect values from one tuples to be indexed. All values for
+ * one tuples shouold be written at once - to guarantee consistent state
+ */
+ uint32
+ ginHeapTupleFastCollect(Relation index, GinState *ginstate, GinTupleCollector *collector,
+ OffsetNumber attnum, Datum value, ItemPointer item)
+ {
+ Datum *entries;
+ int32 i,
+ nentries;
+
+ entries = extractEntriesSU(ginstate, attnum, value, &nentries);
+
+ if (nentries == 0)
+ /* nothing to insert */
+ return 0;
+
+ /*
+ * Allocate/reallocate memory for storing collected tuples
+ */
+ if ( collector->tuples == NULL )
+ {
+ collector->lentuples = nentries * index->rd_att->natts;
+ collector->tuples = (IndexTuple*)palloc(sizeof(IndexTuple) * collector->lentuples);
+ }
+
+ while ( collector->ntuples + nentries > collector->lentuples )
+ {
+ collector->lentuples *= 2;
+ collector->tuples = (IndexTuple*)repalloc( collector->tuples,
+ sizeof(IndexTuple) * collector->lentuples);
+ }
+
+ /*
+ * Creates tuple's array
+ */
+ for (i = 0; i < nentries; i++)
+ {
+ int32 tupsize;
+
+ collector->tuples[collector->ntuples + i] = GinFormTuple(ginstate, attnum, entries[i], NULL, 0);
+ collector->tuples[collector->ntuples + i]->t_tid = *item;
+ tupsize = IndexTupleSize(collector->tuples[collector->ntuples + i]);
+
+ if ( tupsize > TOAST_INDEX_TARGET || tupsize >= GinMaxItemSize)
+ elog(ERROR, "huge tuple");
+
+ collector->sumsize += tupsize;
+ }
+
+ collector->ntuples += nentries;
+
+ return nentries;
+ }
+
+ /*
+ * Deletes first pages in list before newHead page.
+ * If newHead == InvalidBlockNumber then function drops the whole list.
+ * returns true if concurrent completion process is running
+ */
+ static bool
+ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
+ IndexBulkDeleteResult *stats)
+ {
+ #define NDELETE_AT_ONCE (16)
+ Buffer buffers[NDELETE_AT_ONCE];
+ ginxlogDeleteListPages data;
+ XLogRecData rdata[1];
+ Page metapage;
+ GinMetaPageData *metadata;
+ BlockNumber blknoToDelete;
+
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+ blknoToDelete = metadata->head;
+
+ data.node = index->rd_node;
+
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogDeleteListPages);
+ rdata[0].next = NULL;
+
+ do
+ {
+ Page page;
+ int i;
+
+ data.ndeleted = 0;
+ while( data.ndeleted < NDELETE_AT_ONCE && blknoToDelete != newHead )
+ {
+ data.toDelete[ data.ndeleted ] = blknoToDelete;
+ buffers[ data.ndeleted ] = ReadBuffer(index, blknoToDelete);
+ LockBufferForCleanup( buffers[ data.ndeleted ] );
+ page = BufferGetPage( buffers[ data.ndeleted ] );
+
+ data.ndeleted++;
+ stats->pages_deleted++;
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent deletion process is detected */
+ for(i=0;irightlink;
+ }
+
+ START_CRIT_SECTION();
+
+ metadata->head = blknoToDelete;
+ if ( blknoToDelete == InvalidBlockNumber )
+ {
+ metadata->tail = InvalidBlockNumber;
+ metadata->tailFreeSize = 0;
+ }
+ memcpy( &data.metadata, metadata, sizeof(GinMetaPageData));
+ MarkBufferDirty( metabuffer );
+
+ for(i=0; iflags = GIN_DELETED;
+ MarkBufferDirty( buffers[ i ] );
+ }
+
+ if ( !index->rd_istemp )
+ {
+ XLogRecPtr recptr;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
+ PageSetLSN(metapage, recptr);
+ PageSetTLI(metapage, ThisTimeLineID);
+
+ for(i=0; invalues >= datums->maxvalues)
+ {
+ datums->maxvalues *= 2;
+ datums->values = (Datum*)repalloc( datums->values, sizeof(Datum)*datums->maxvalues);
+ }
+
+ datums->values[ datums->nvalues++ ] = datum;
+ }
+
+ /*
+ * Go through all tuples on page and collect values in memory
+ */
+
+ static void
+ processPendingPage(BuildAccumulator *accum, DatumArray *da, Page page, OffsetNumber startoff)
+ {
+ ItemPointerData heapptr;
+ OffsetNumber i,maxoff;
+ OffsetNumber attrnum, curattnum;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ Assert( maxoff >= FirstOffsetNumber );
+ ItemPointerSetInvalid(&heapptr);
+ attrnum = 0;
+
+ for (i = startoff; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+
+ curattnum = gintuple_get_attrnum(accum->ginstate, itup);
+
+ if ( !ItemPointerIsValid(&heapptr) )
+ {
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ else if ( !(ItemPointerEquals(&heapptr, &itup->t_tid) && curattnum == attrnum) )
+ {
+ /*
+ * We can insert several datums per call, but only for one heap tuple
+ * and one column.
+ */
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+ da->nvalues = 0;
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ addDatum(da, gin_index_getattr(accum->ginstate, itup));
+ }
+
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+ }
+
+ /*
+ * Moves tuples from pending pages into regular GIN structure.
+ * Function doesn't require special locking and could be called
+ * in any time but only one at the same time.
+ */
+
+ Datum
+ gininsertcleanup(PG_FUNCTION_ARGS)
+ {
+ IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
+ IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+ Relation index = info->index;
+ GinState ginstate;
+ Buffer metabuffer, buffer;
+ Page metapage, page;
+ GinMetaPageData *metadata;
+ MemoryContext opCtx, oldCtx;
+ BuildAccumulator accum;
+ DatumArray datums;
+ BlockNumber blkno;
+
+ /* Set up all-zero stats if ginbulkdelete wasn't called */
+ if (stats == NULL)
+ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+ initGinState(&ginstate, index);
+
+ metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+ LockBuffer(metabuffer, GIN_SHARE);
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber )
+ {
+ UnlockReleaseBuffer(metabuffer);
+ PG_RETURN_VOID();
+ }
+
+ /*
+ * Init
+ */
+ datums.maxvalues=128;
+ datums.nvalues = 0;
+ datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues);
+
+ ginInitBA(&accum);
+ accum.ginstate = &ginstate;
+
+ opCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "Gin refresh temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ oldCtx = MemoryContextSwitchTo(opCtx);
+
+ /*
+ * Read and lock head
+ */
+ blkno = metadata->head;
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ for(;;)
+ {
+ /*
+ * reset datum's collector and read page's datums into memory
+ */
+ datums.nvalues = 0;
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent completion is running */
+ UnlockReleaseBuffer( buffer );
+ break;
+ }
+
+ processPendingPage(&accum, &datums, page, FirstOffsetNumber);
+
+ /*
+ * Is it time to flush memory to disk?
+ */
+ if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
+ ( GinPageHasFullRow(page) && accum.allocatedMemory > maintenance_work_mem * 1024L ) )
+ {
+ ItemPointerData *list;
+ uint32 nlist;
+ Datum entry;
+ OffsetNumber maxoff, attnum;
+
+ /*
+ * Unlock current page to increase performance.
+ * Changes of page will be checked later by comparing
+ * maxoff after completion of memory flush.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ LockBuffer(buffer, GIN_UNLOCK);
+
+ /*
+ * Moving collected data into regular structure can take
+ * significant amount of time - so, run it without locking pending
+ * list.
+ */
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ ginEntryInsert(index, &ginstate, attnum, entry, list, nlist, FALSE);
+
+ /*
+ * Lock the whole list to remove pages
+ */
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ LockBuffer(buffer, GIN_SHARE);
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent completion is running */
+ UnlockReleaseBuffer(buffer);
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ /*
+ * While we keeped page unlocked it might be changed -
+ * add read the changes separately. On one page is rather
+ * small - so, overused memory isn't very big, although
+ * we should reinit accumulator. We need to make a
+ * check only once because now both page and metapage are
+ * locked. Inserion algorithm gurantees that inserted row(s)
+ * will not continue on next page.
+ */
+ if ( PageGetMaxOffsetNumber(page) != maxoff )
+ {
+ ginInitBA(&accum);
+ datums.nvalues = 0;
+ processPendingPage(&accum, &datums, page, maxoff+1);
+
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ ginEntryInsert(index, &ginstate, attnum, entry, list, nlist, FALSE);
+ }
+
+ /*
+ * Remember next page - it will become a new head
+ */
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer); /* shiftList will do exclusive locking */
+
+ /*
+ * remove readed pages from pending list, at this point all
+ * content of readed pages is in regular structure
+ */
+ if ( shiftList(index, metabuffer, blkno, stats) )
+ {
+ /* concurrent completion is running */
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ Assert( blkno == metadata->head );
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ /*
+ * if we remove the whole list just exit
+ */
+ if ( blkno == InvalidBlockNumber )
+ break;
+
+ /*
+ * reinit state
+ */
+ MemoryContextReset(opCtx);
+ ginInitBA(&accum);
+ }
+ else
+ {
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer);
+ }
+
+
+ /*
+ * Read next page in pending list
+ */
+ CHECK_FOR_INTERRUPTS();
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+ }
+
+ ReleaseBuffer(metabuffer);
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(opCtx);
+
+ PG_RETURN_POINTER(stats);
+ }
*** src/backend/access/index/indexam.c.orig Wed Jun 18 22:57:40 2008
--- src/backend/access/index/indexam.c Wed Jul 23 14:37:39 2008
***************
*** 23,28 ****
--- 23,29 ----
* index_getnext - get the next tuple from a scan
* index_getbitmap - get all tuples from a scan
* index_bulk_delete - bulk deletion of index tuples
+ * index_insert_cleanup - completion of insertion to index
* index_vacuum_cleanup - post-deletion cleanup of an index
* index_getprocid - get a support procedure OID
* index_getprocinfo - get a support procedure's lookup info
***************
*** 705,710 ****
--- 706,738 ----
}
/* ----------------
+ * index_insert_cleanup - do mass insertion of index entries
+ *
+ * return value is an optional palloc'd struct of statistics
+ * ----------------
+ */
+ IndexBulkDeleteResult *
+ index_insert_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
+ {
+ Relation indexRelation = info->index;
+ FmgrInfo *procedure;
+ IndexBulkDeleteResult *result = NULL;
+
+ RELATION_CHECKS;
+ if (RegProcedureIsValid(indexRelation->rd_am->aminsertcleanup))
+ {
+ GET_REL_PROCEDURE(aminsertcleanup);
+
+ result = (IndexBulkDeleteResult *)
+ DatumGetPointer(FunctionCall2(procedure,
+ PointerGetDatum(info),
+ PointerGetDatum(stats)));
+ }
+
+ return result;
+ }
+
+ /* ----------------
* index_vacuum_cleanup - do post-deletion cleanup of an index
*
* return value is an optional palloc'd struct of statistics
*** src/backend/catalog/index.c.orig Tue Jun 24 11:38:03 2008
--- src/backend/catalog/index.c Wed Jul 23 14:37:18 2008
***************
*** 1918,1923 ****
--- 1918,1924 ----
false);
state.htups = state.itups = state.tups_inserted = 0;
+ (void) index_insert_cleanup(&ivinfo, NULL);
(void) index_bulk_delete(&ivinfo, NULL,
validate_index_callback, (void *) &state);
*** src/backend/commands/vacuum.c.orig Thu Jun 5 14:20:15 2008
--- src/backend/commands/vacuum.c Wed Jul 23 14:37:03 2008
***************
*** 3314,3320 ****
ivinfo.num_heap_tuples = num_tuples;
ivinfo.strategy = vac_strategy;
! stats = index_vacuum_cleanup(&ivinfo, NULL);
if (!stats)
return;
--- 3314,3323 ----
ivinfo.num_heap_tuples = num_tuples;
ivinfo.strategy = vac_strategy;
! /* Do insert completion */
! stats = index_insert_cleanup(&ivinfo, NULL);
!
! stats = index_vacuum_cleanup(&ivinfo, stats);
if (!stats)
return;
***************
*** 3380,3387 ****
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
ivinfo.strategy = vac_strategy;
/* Do bulk deletion */
! stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);
/* Do post-VACUUM cleanup */
stats = index_vacuum_cleanup(&ivinfo, stats);
--- 3383,3393 ----
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
ivinfo.strategy = vac_strategy;
+ /* Do insert completion */
+ stats = index_insert_cleanup(&ivinfo, NULL);
+
/* Do bulk deletion */
! stats = index_bulk_delete(&ivinfo, stats, tid_reaped, (void *) vacpagelist);
/* Do post-VACUUM cleanup */
stats = index_vacuum_cleanup(&ivinfo, stats);
*** src/backend/commands/vacuumlazy.c.orig Sun May 11 20:02:29 2008
--- src/backend/commands/vacuumlazy.c Wed Jul 23 14:37:03 2008
***************
*** 123,128 ****
--- 123,129 ----
static void lazy_vacuum_index(Relation indrel,
IndexBulkDeleteResult **stats,
LVRelStats *vacrelstats);
+ static void lazy_insert_cleanup_index(Relation indrel, IndexBulkDeleteResult **stats);
static void lazy_cleanup_index(Relation indrel,
IndexBulkDeleteResult *stats,
LVRelStats *vacrelstats);
***************
*** 302,307 ****
--- 303,318 ----
lazy_space_alloc(vacrelstats, nblocks);
+ /*
+ * Complete inserts for all indexes, but during vacuum
+ * new ones can be inserted and completion shoul be done before any
+ * call of ambulkdelete. In any case it should be called
+ * because we don't track number of inserted tuples since last
+ * vacuum but only dead tuples. First call might take a lot of time.
+ */
+ for (i = 0; i < nindexes; i++)
+ lazy_insert_cleanup_index(Irel[i], &indstats[i]);
+
for (blkno = 0; blkno < nblocks; blkno++)
{
Buffer buf;
***************
*** 328,333 ****
--- 339,345 ----
lazy_vacuum_index(Irel[i],
&indstats[i],
vacrelstats);
+
/* Remove tuples from heap */
lazy_vacuum_heap(onerel, vacrelstats);
/* Forget the now-vacuumed tuples, and press on */
***************
*** 589,594 ****
--- 601,607 ----
lazy_vacuum_index(Irel[i],
&indstats[i],
vacrelstats);
+
/* Remove tuples from heap */
lazy_vacuum_heap(onerel, vacrelstats);
vacrelstats->num_index_scans++;
***************
*** 745,750 ****
--- 758,769 ----
pg_rusage_init(&ru0);
+ /*
+ * ambulkdelete doesn't scan a pending list of inserting tuples,
+ * so move pending tuples into regular structure before
+ */
+ lazy_insert_cleanup_index(indrel, stats);
+
ivinfo.index = indrel;
ivinfo.vacuum_full = false;
ivinfo.message_level = elevel;
***************
*** 764,769 ****
--- 783,820 ----
}
/*
+ * lazy_insert_cleanup_index() - perform insert cleanup for one index
+ */
+
+ static void
+ lazy_insert_cleanup_index(Relation indrel, IndexBulkDeleteResult **stats)
+ {
+ IndexVacuumInfo ivinfo;
+ PGRUsage ru0;
+
+ pg_rusage_init(&ru0);
+
+ ivinfo.index = indrel;
+ ivinfo.vacuum_full = false;
+ ivinfo.message_level = elevel;
+ ivinfo.num_heap_tuples = -1;
+ ivinfo.strategy = vac_strategy;
+
+ /* Do insert completion */
+ *stats = index_insert_cleanup(&ivinfo, *stats);
+
+ if (! *stats)
+ return;
+
+ ereport(elevel,
+ (errmsg("scanned index \"%s\" to complete insertion",
+ RelationGetRelationName(indrel)),
+ errdetail("%u index pending pages have been deleted.\n%s.",
+ (*stats)->pages_deleted, pg_rusage_show(&ru0))));
+
+ }
+
+ /*
* lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
*/
static void
*** src/include/access/genam.h.orig Wed Jun 18 22:57:57 2008
--- src/include/access/genam.h Wed Jul 23 14:36:53 2008
***************
*** 112,117 ****
--- 112,118 ----
extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction);
extern int64 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap);
+ extern IndexBulkDeleteResult *index_insert_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats);
extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info,
IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback,
*** src/include/access/gin.h.orig Sun Jul 13 17:50:04 2008
--- src/include/access/gin.h Wed Jul 23 14:38:51 2008
***************
*** 21,26 ****
--- 21,27 ----
#include "storage/buf.h"
#include "storage/off.h"
#include "storage/relfilenode.h"
+ #include "utils/rel.h"
/*
***************
*** 52,62 ****
typedef GinPageOpaqueData *GinPageOpaque;
! #define GIN_ROOT_BLKNO (0)
#define GIN_DATA (1 << 0)
#define GIN_LEAF (1 << 1)
#define GIN_DELETED (1 << 2)
/*
* Works on page
--- 53,86 ----
typedef GinPageOpaqueData *GinPageOpaque;
! #define GIN_METAPAGE_BLKNO (0)
! #define GIN_ROOT_BLKNO (1)
#define GIN_DATA (1 << 0)
#define GIN_LEAF (1 << 1)
#define GIN_DELETED (1 << 2)
+ #define GIN_META (1 << 3)
+ #define GIN_LIST (1 << 4)
+ #define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
+
+ typedef struct GinMetaPageData
+ {
+ /*
+ * Pointers to head and tail of list of GIN_LIST pages. These store
+ * fast-inserted entries that haven't yet been moved into the regular
+ * GIN structure.
+ */
+ BlockNumber head;
+ BlockNumber tail;
+
+ /*
+ * Free space in bytes in the list's tail page.
+ */
+ uint32 tailFreeSize;
+ } GinMetaPageData;
+
+ #define GinPageGetMeta(p) \
+ ((GinMetaPageData *) PageGetContents(p))
/*
* Works on page
***************
*** 68,73 ****
--- 92,99 ----
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
#define GinPageIsData(page) ( GinPageGetOpaque(page)->flags & GIN_DATA )
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
+ #define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
+ #define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
#define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
***************
*** 135,140 ****
--- 161,180 ----
- GinPageGetOpaque(page)->maxoff * GinSizeOfItem(page) \
- MAXALIGN(sizeof(GinPageOpaqueData)))
+ /*
+ * storage type for GIN's options. Must be upward compatible with
+ * StdRdOptions, since we might call RelationGetFillFactor().
+ */
+ typedef struct GinOptions
+ {
+ StdRdOptions std; /* standard options */
+ bool useFastUpdate; /* use fast updates? */
+ } GinOptions;
+
+ #define GIN_DEFAULT_USE_FASTUPDATE true
+ #define GinGetUseFastUpdate(relation) \
+ ((relation)->rd_options ? \
+ ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
#define GIN_UNLOCK BUFFER_LOCK_UNLOCK
#define GIN_SHARE BUFFER_LOCK_SHARE
***************
*** 234,245 ****
--- 274,322 ----
BlockNumber rightLink;
} ginxlogDeletePage;
+
+ #define XLOG_GIN_UPDATE_META_PAGE 0x60
+
+ typedef struct ginxlogUpdateMeta
+ {
+ RelFileNode node;
+ GinMetaPageData metadata;
+ BlockNumber prevTail;
+ BlockNumber newRightlink;
+ int32 ntuples; /* if ntuples > 0 then metadata.tail was updated with
+ that tuples else new sub list was inserted */
+ /* follows array of inserted tuples */
+ } ginxlogUpdateMeta;
+
+ #define XLOG_GIN_INSERT_LISTPAGE 0x70
+
+ typedef struct ginxlogInsertListPage
+ {
+ RelFileNode node;
+ BlockNumber blkno;
+ BlockNumber rightlink;
+ int32 ntuples;
+ /* follows array of inserted tuples */
+ } ginxlogInsertListPage;
+
+ #define XLOG_GIN_DELETE_LISTPAGE 0x80
+
+ #define NDELETE_AT_ONCE (16)
+ typedef struct ginxlogDeleteListPages
+ {
+ RelFileNode node;
+ GinMetaPageData metadata;
+ int32 ndeleted;
+ BlockNumber toDelete[ NDELETE_AT_ONCE ];
+ } ginxlogDeleteListPages;
+
/* ginutil.c */
extern Datum ginoptions(PG_FUNCTION_ARGS);
extern void initGinState(GinState *state, Relation index);
extern Buffer GinNewBuffer(Relation index);
extern void GinInitBuffer(Buffer b, uint32 f);
extern void GinInitPage(Page page, uint32 f, Size pageSize);
+ extern void GinInitMetabuffer(Buffer b);
extern int compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b);
extern int compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a,
OffsetNumber attnum_b, Datum b);
***************
*** 253,258 ****
--- 330,337 ----
/* gininsert.c */
extern Datum ginbuild(PG_FUNCTION_ARGS);
extern Datum gininsert(PG_FUNCTION_ARGS);
+ extern void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
+ ItemPointerData *items, uint32 nitem, bool isBuild);
/* ginxlog.c */
extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
***************
*** 428,433 ****
--- 507,520 ----
* that nothing will be found */
GinScanKey markPos;
+
+ bool isScanFastInsert;
+ Buffer fastBuffer;
+ OffsetNumber fastOffset;
+
+ bool markIsScanFastInsert;
+ Buffer markFastBuffer;
+ OffsetNumber markFastOffset;
} GinScanOpaqueData;
typedef GinScanOpaqueData *GinScanOpaque;
***************
*** 492,495 ****
--- 579,601 ----
OffsetNumber attnum, Datum *entries, int32 nentry);
extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n);
+ /* ginfast.c */
+
+ typedef struct GinTupleCollector {
+ IndexTuple *tuples;
+ uint32 ntuples;
+ uint32 lentuples;
+ uint32 sumsize;
+ } GinTupleCollector;
+
+ extern void ginHeapTupleFastInsert(Relation index, GinTupleCollector *collector);
+ extern uint32 ginHeapTupleFastCollect(Relation index, GinState *ginstate,
+ GinTupleCollector *collector,
+ OffsetNumber attnum, Datum value, ItemPointer item);
+
+ #define GinListPageSize \
+ ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+ extern Datum gininsertcleanup(PG_FUNCTION_ARGS);
+
#endif
*** src/include/catalog/pg_am.h.orig Fri Jul 11 17:06:29 2008
--- src/include/catalog/pg_am.h Wed Jul 23 14:36:37 2008
***************
*** 58,63 ****
--- 58,69 ----
regproc amrestrpos; /* "restore marked scan position" function */
regproc ambuild; /* "build new index" function */
regproc ambulkdelete; /* bulk-delete function */
+ regproc aminsertcleanup; /* post-INSERT cleanup function.
+ * Usial insert might store tuples somewhere
+ * outside the regular structure of index.
+ * ambulkdelete() should not scan that storage to
+ * avoid duplication of code. So, aminsertcleanup
+ * should be called before any call of ambulkdelete */
regproc amvacuumcleanup; /* post-VACUUM cleanup function */
regproc amcostestimate; /* estimate cost of an indexscan */
regproc amoptions; /* parse AM-specific parameters */
***************
*** 74,80 ****
* compiler constants for pg_am
* ----------------
*/
! #define Natts_pg_am 24
#define Anum_pg_am_amname 1
#define Anum_pg_am_amstrategies 2
#define Anum_pg_am_amsupport 3
--- 80,86 ----
* compiler constants for pg_am
* ----------------
*/
! #define Natts_pg_am 25
#define Anum_pg_am_amname 1
#define Anum_pg_am_amstrategies 2
#define Anum_pg_am_amsupport 3
***************
*** 96,120 ****
#define Anum_pg_am_amrestrpos 19
#define Anum_pg_am_ambuild 20
#define Anum_pg_am_ambulkdelete 21
! #define Anum_pg_am_amvacuumcleanup 22
! #define Anum_pg_am_amcostestimate 23
! #define Anum_pg_am_amoptions 24
/* ----------------
* initial contents of pg_am
* ----------------
*/
! DATA(insert OID = 403 ( btree 5 1 t t t t t t f t btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate btoptions ));
DESCR("b-tree index access method");
#define BTREE_AM_OID 403
! DATA(insert OID = 405 ( hash 1 1 f f f f f f f f hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions ));
DESCR("hash index access method");
#define HASH_AM_OID 405
! DATA(insert OID = 783 ( gist 0 7 f f t t t t t t gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
DESCR("GiST index access method");
#define GIST_AM_OID 783
! DATA(insert OID = 2742 ( gin 0 5 f f t t f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
DESCR("GIN index access method");
#define GIN_AM_OID 2742
--- 102,127 ----
#define Anum_pg_am_amrestrpos 19
#define Anum_pg_am_ambuild 20
#define Anum_pg_am_ambulkdelete 21
! #define Anum_pg_am_aminsertcleanup 22
! #define Anum_pg_am_amvacuumcleanup 23
! #define Anum_pg_am_amcostestimate 24
! #define Anum_pg_am_amoptions 25
/* ----------------
* initial contents of pg_am
* ----------------
*/
! DATA(insert OID = 403 ( btree 5 1 t t t t t t f t btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete 0 btvacuumcleanup btcostestimate btoptions ));
DESCR("b-tree index access method");
#define BTREE_AM_OID 403
! DATA(insert OID = 405 ( hash 1 1 f f f f f f f f hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete 0 hashvacuumcleanup hashcostestimate hashoptions ));
DESCR("hash index access method");
#define HASH_AM_OID 405
! DATA(insert OID = 783 ( gist 0 7 f f t t t t t t gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete 0 gistvacuumcleanup gistcostestimate gistoptions ));
DESCR("GiST index access method");
#define GIST_AM_OID 783
! DATA(insert OID = 2742 ( gin 0 5 f f t t f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete gininsertcleanup ginvacuumcleanup gincostestimate ginoptions ));
DESCR("GIN index access method");
#define GIN_AM_OID 2742
*** src/include/catalog/pg_proc.h.orig Thu Jul 17 23:32:53 2008
--- src/include/catalog/pg_proc.h Wed Jul 23 14:36:37 2008
***************
*** 4015,4020 ****
--- 4015,4022 ----
DESCR("gin(internal)");
DATA(insert OID = 2739 ( ginbulkdelete PGNSP PGUID 12 1 0 0 f f t f v 4 2281 "2281 2281 2281 2281" _null_ _null_ _null_ ginbulkdelete _null_ _null_ _null_ ));
DESCR("gin(internal)");
+ DATA(insert OID = 2328 ( gininsertcleanup PGNSP PGUID 12 1 0 0 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ gininsertcleanup _null_ _null_ _null_ ));
+ DESCR("gin(internal)");
DATA(insert OID = 2740 ( ginvacuumcleanup PGNSP PGUID 12 1 0 0 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ ginvacuumcleanup _null_ _null_ _null_ ));
DESCR("gin(internal)");
DATA(insert OID = 2741 ( gincostestimate PGNSP PGUID 12 1 0 0 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ gincostestimate _null_ _null_ _null_ ));
*** src/include/utils/rel.h.orig Wed Jun 18 22:58:00 2008
--- src/include/utils/rel.h Wed Jul 23 14:36:30 2008
***************
*** 108,113 ****
--- 108,114 ----
FmgrInfo amrestrpos;
FmgrInfo ambuild;
FmgrInfo ambulkdelete;
+ FmgrInfo aminsertcleanup;
FmgrInfo amvacuumcleanup;
FmgrInfo amcostestimate;
FmgrInfo amoptions;