diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/bitmap/bitmapattutil.c bitmap/src/backend/access/bitmap/bitmapattutil.c --- pgsql-head/src/backend/access/bitmap/bitmapattutil.c 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/backend/access/bitmap/bitmapattutil.c 2006-11-29 17:14:09.000000000 +1100 @@ -0,0 +1,296 @@ +/*------------------------------------------------------------------------- + * + * bitmapattutil.c + * Defines the routines to maintain all distinct attribute values + * which are indexed in the on-disk bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/tupdesc.h" +#include "access/bitmap.h" +#include "access/nbtree.h" +#include "access/xact.h" +#include "nodes/execnodes.h" +#include "nodes/primnodes.h" +#include "nodes/makefuncs.h" +#include "catalog/dependency.h" +#include "catalog/heap.h" +#include "catalog/index.h" +#include "catalog/pg_type.h" +#include "catalog/namespace.h" +#include "catalog/pg_namespace.h" +#include "access/heapam.h" +#include "optimizer/clauses.h" +#include "utils/syscache.h" +#include "utils/lsyscache.h" +#include "utils/builtins.h" +#include "commands/defrem.h" +#include "commands/tablecmds.h" + +static TupleDesc _bitmap_create_lov_heapTupleDesc(Relation rel); + +/* + * _bitmap_create_lov_heapandindex() -- create a new heap relation and + * a btree index for the list of values (LOV). + */ + +void +_bitmap_create_lov_heapandindex(Relation rel, Oid *lovHeapId, Oid *lovIndexId) +{ + char lovHeapName[NAMEDATALEN]; + char lovIndexName[NAMEDATALEN]; + TupleDesc tupDesc; + IndexInfo *indexInfo; + ObjectAddress objAddr, referenced; + Oid *classObjectId; + Oid heapid; + Oid indid; + int indattrs; + int i; + + /* create the new names for the new lov heap and index */ + snprintf(lovHeapName, sizeof(lovHeapName), + "pg_bm_%u", RelationGetRelid(rel)); + snprintf(lovIndexName, sizeof(lovIndexName), + "pg_bm_%u_index", RelationGetRelid(rel)); + + /* + * If this is happening during re-indexing, then such a heap should + * have existed already. Here, we delete this heap and its btree + * index first. + */ + heapid = get_relname_relid(lovHeapName, PG_BITMAPINDEX_NAMESPACE); + if (OidIsValid(heapid)) + { + ObjectAddress object; + indid = get_relname_relid(lovIndexName, PG_BITMAPINDEX_NAMESPACE); + + Assert(OidIsValid(indid)); + + /* + * Remove the dependency between the LOV heap relation, + * the LOV index, and the parent bitmap index before + * we drop the lov heap and index. + */ + deleteDependencyRecordsFor(RelationRelationId, heapid); + deleteDependencyRecordsFor(RelationRelationId, indid); + CommandCounterIncrement(); + + object.classId = RelationRelationId; + object.objectId = indid; + object.objectSubId = 0; + performDeletion(&object, DROP_RESTRICT); + + object.objectId = heapid; + performDeletion(&object, DROP_RESTRICT); + } + + /* + * create a new empty heap to store all attribute values with their + * corresponding block number and offset in LOV. + */ + tupDesc = _bitmap_create_lov_heapTupleDesc(rel); + + *lovHeapId = heap_create_with_catalog(lovHeapName, PG_BITMAPINDEX_NAMESPACE, + rel->rd_rel->reltablespace, + InvalidOid, rel->rd_rel->relowner, + tupDesc, RELKIND_RELATION, + rel->rd_rel->relisshared, false, 1, + ONCOMMIT_NOOP, false, true); + + /* + * We must bump the command counter to make the newly-created relation + * tuple visible for opening. + */ + CommandCounterIncrement(); + + objAddr.classId = RelationRelationId; + objAddr.objectId = *lovHeapId; + objAddr.objectSubId = 0 ; + + referenced.classId = RelationRelationId; + referenced.objectId = RelationGetRelid(rel); + referenced.objectSubId = 0; + + recordDependencyOn(&objAddr, &referenced, DEPENDENCY_INTERNAL); + + /* + * create a btree index on the newly-created heap. + * The key includes all attributes to be indexed in this bitmap index. + */ + indattrs = tupDesc->natts - 2; + indexInfo = makeNode(IndexInfo); + indexInfo->ii_NumIndexAttrs = indattrs; + indexInfo->ii_Expressions = NIL; + indexInfo->ii_ExpressionsState = NIL; + indexInfo->ii_Predicate = make_ands_implicit(NULL); + indexInfo->ii_PredicateState = NIL; + indexInfo->ii_Unique = true; + + classObjectId = (Oid *) palloc(indattrs * sizeof(Oid)); + for (i = 0; i < indattrs; i++) + { + Oid typid = tupDesc->attrs[i]->atttypid; + + indexInfo->ii_KeyAttrNumbers[i] = i + 1; + classObjectId[i] = GetDefaultOpClass(typid, BTREE_AM_OID); + } + + *lovIndexId = index_create(*lovHeapId, lovIndexName, InvalidOid, + indexInfo, BTREE_AM_OID, + rel->rd_rel->reltablespace, + classObjectId, 0, false, false, true, + false, false); + + + objAddr.classId = RelationRelationId; + objAddr.objectId = *lovIndexId; + objAddr.objectSubId = 0 ; + + recordDependencyOn(&objAddr, &referenced, DEPENDENCY_INTERNAL); +} + +/* + * _bitmap_create_lov_heapTupleDesc() -- create the new heap tuple descriptor. + */ + +TupleDesc +_bitmap_create_lov_heapTupleDesc(Relation rel) +{ + TupleDesc tupDesc; + TupleDesc oldTupDesc; + AttrNumber attno; + int natts; + + oldTupDesc = RelationGetDescr(rel); + natts = oldTupDesc->natts + 2; + + tupDesc = CreateTemplateTupleDesc(natts, false); + + for (attno = 1; attno <= oldTupDesc->natts; attno++) + { + /* copy the attribute to be indexed. */ + memcpy(tupDesc->attrs[attno - 1], oldTupDesc->attrs[attno - 1], + ATTRIBUTE_TUPLE_SIZE); + tupDesc->attrs[attno - 1]->attnum = attno; + } + + /* the block number */ + TupleDescInitEntry(tupDesc, attno, "blockNumber", INT4OID, -1, 0); + attno++; + + /* the offset number */ + TupleDescInitEntry(tupDesc, attno, "offsetNumber", INT4OID, -1, 0); + + return tupDesc; +} + +/* + * _bitmap_open_lov_heapandindex() -- open the heap relation and the btree + * index for LOV. + */ + +void +_bitmap_open_lov_heapandindex(Relation rel, BMMetaPage metapage, + Relation *lovHeapP, Relation *lovIndexP, + LOCKMODE lockMode) +{ + *lovHeapP = heap_open(metapage->bm_lov_heapId, lockMode); + *lovIndexP = index_open(metapage->bm_lov_indexId, lockMode); +} + +/* + * _bitmap_insert_lov() -- insert a new data into the given heap and index. + */ +void +_bitmap_insert_lov(Relation lovHeap, Relation lovIndex, Datum *datum, + bool *nulls) +{ + TupleDesc tupDesc; + HeapTuple tuple; + bool result; + Datum *indexDatum; + bool *indexNulls; + + tupDesc = RelationGetDescr(lovHeap); + + /* insert this tuple into the heap */ + tuple = heap_form_tuple(tupDesc, datum, nulls); + simple_heap_insert(lovHeap, tuple); + + /* insert a new tuple into the index */ + indexDatum = palloc0((tupDesc->natts - 2) * sizeof(Datum)); + indexNulls = palloc0((tupDesc->natts - 2) * sizeof(bool)); + memcpy(indexDatum, datum, (tupDesc->natts - 2) * sizeof(Datum)); + memcpy(indexNulls, nulls, (tupDesc->natts - 2) * sizeof(bool)); + result = index_insert(lovIndex, indexDatum, indexNulls, + &(tuple->t_self), lovHeap, true); + + pfree(indexDatum); + pfree(indexNulls); + Assert(result); + + heap_freetuple(tuple); +} + + +/* + * _bitmap_close_lov_heapandindex() -- close the heap and the index. + */ +void +_bitmap_close_lov_heapandindex(Relation lovHeap, Relation lovIndex, + LOCKMODE lockMode) +{ + heap_close(lovHeap, lockMode); + index_close(lovIndex, lockMode); +} + +/* + * _bitmap_findvalue() -- find a row in a given heap using + * a given index that satisfies the given scan key. + * + * If this value exists, this function returns true. Otherwise, + * returns false. + * + * If this value exists in the heap, this function also returns + * the block number and the offset number that are stored in the same + * row with this value. This block number and the offset number + * are for the LOV item that points the bitmap vector for this value. + */ +bool +_bitmap_findvalue(Relation lovHeap, Relation lovIndex, + ScanKey scanKey, IndexScanDesc scanDesc, + BlockNumber *lovBlock, bool *blockNull, + OffsetNumber *lovOffset, bool *offsetNull) +{ + TupleDesc tupDesc; + HeapTuple tuple; + bool found = false; + + tupDesc = RelationGetDescr(lovIndex); + + tuple = index_getnext(scanDesc, ForwardScanDirection); + + if (tuple != NULL) + { + TupleDesc heapTupDesc; + Datum d; + + found = true; + heapTupDesc = RelationGetDescr(lovHeap); + + d = heap_getattr(tuple, tupDesc->natts + 1, heapTupDesc, blockNull); + *lovBlock = DatumGetInt32(d); + d = heap_getattr(tuple, tupDesc->natts + 2, heapTupDesc, offsetNull); + *lovOffset = DatumGetInt16(d); + } + return found; +} + diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/bitmap/bitmap.c bitmap/src/backend/access/bitmap/bitmap.c --- pgsql-head/src/backend/access/bitmap/bitmap.c 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/backend/access/bitmap/bitmap.c 2006-11-28 20:38:59.000000000 +1100 @@ -0,0 +1,862 @@ +/*------------------------------------------------------------------------- + * + * bitmap.c + * Implementation of the Hybrid Run-Length (HRL) on-disk bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + * + * NOTES + * This file contains only the public interface routines. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/bitmap.h" +#include "access/xact.h" +#include "catalog/index.h" +#include "miscadmin.h" +#include "nodes/tidbitmap.h" +#include "storage/lmgr.h" +#include "storage/smgr.h" +#include "parser/parse_oper.h" +#include "utils/memutils.h" + +static void bmbuildCallback(Relation index, HeapTuple htup, Datum *attdata, + bool *nulls, bool tupleIsAlive, void *state); +static bool words_get_entry(BMBatchWords *words, BMIterateResult *result, + BlockNumber nextBlockNo, PagetableEntry *entry); +static IndexScanDesc copy_scan_desc(IndexScanDesc scan); +static void stream_free(void *opaque); +static bool pull_stream(void *opaque, PagetableEntry *e); +static void cleanup_pos(BMScanPosition pos); + +/* type to hide BM specific stream state */ +typedef struct BMStreamOpaque +{ + IndexScanDesc scan; + MemoryContext mcxt; +} BMStreamOpaque; + +/* + * bmbuild() -- Build a new bitmap index. + */ +Datum +bmbuild(PG_FUNCTION_ARGS) +{ + Relation heap = (Relation) PG_GETARG_POINTER(0); + Relation index = (Relation) PG_GETARG_POINTER(1); + IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); + double reltuples; + BMBuildState bmstate; + IndexBuildResult *result; + TupleDesc tupDesc; + + /* We expect this to be called exactly once. */ + if (RelationGetNumberOfBlocks(index) != 0) + ereport (ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" already contains data", + RelationGetRelationName(index)))); + + tupDesc = RelationGetDescr(index); + + /* initialize the bitmap index. */ + _bitmap_init(index, true); + + /* initialize the build state. */ + _bitmap_init_buildstate(index, &bmstate); + + /* do the heap scan */ + reltuples = IndexBuildHeapScan(heap, index, indexInfo, + bmbuildCallback, (void*)&bmstate); + + /* + * fsync the relevant files to disk, unless we're building + * a temporary index + */ + if (!index->rd_istemp) + { + RelationOpenSmgr(index); + smgrimmedsync(index->rd_smgr); + + RelationOpenSmgr(bmstate.bm_lov_heap); + smgrimmedsync(bmstate.bm_lov_heap->rd_smgr); + + RelationOpenSmgr(bmstate.bm_lov_index); + smgrimmedsync(bmstate.bm_lov_index->rd_smgr); + } + /* clean up the build state */ + _bitmap_cleanup_buildstate(index, &bmstate); + + + + /* return statistics */ + result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); + + result->heap_tuples = reltuples; + result->index_tuples = bmstate.ituples; + + PG_RETURN_POINTER(result); +} + + +/* + * bminsert() -- insert an index tuple into a bitmap index. + */ +Datum +bminsert(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation) PG_GETARG_POINTER(0); + Datum *datum = (Datum *) PG_GETARG_POINTER(1); + bool *nulls = (bool *) PG_GETARG_POINTER(2); + ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); + + _bitmap_doinsert(rel, *ht_ctid, datum, nulls); + + PG_RETURN_BOOL(true); +} + +/* + * bmgettuple() -- return the next tuple in a scan. + */ +Datum +bmgettuple(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); + + bool res; + + /* + * If we have already begun our scan, continue in the same direction. + * Otherwise, start up the scan. + */ + if (ItemPointerIsValid(&(scan->currentItemData))) + res = _bitmap_next(scan, dir); + else + res = _bitmap_first(scan, dir); + + PG_RETURN_BOOL(res); +} + +/* + * bmgetbitmap() -- return a stream bitmap. + */ +Datum +bmgetbitmap(PG_FUNCTION_ARGS) +{ + /* We ignore the second argument as we're returning a hash bitmap */ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + Node *bm = (Node *)PG_GETARG_POINTER(1); + IndexStream *is; + BMScanPosition scanPos; + bool res; + + /* perhaps this should be in a special context? */ + is = (IndexStream *)palloc(sizeof(IndexStream)); + is->type = BMS_INDEX; + is->free = stream_free; + is->nextblock = 0; + is->pull = pull_stream; + is->needfree = true; + + /* create a memory context for the stream */ + + + res = _bitmap_firstbatchwords(scan, ForwardScanDirection); + + scanPos = ((BMScanOpaque)scan->opaque)->bm_currPos; + scanPos->bm_result.nextTid = 1; + + if(res) + { + BMScanPosition sp; + IndexScanDesc copy = copy_scan_desc(scan); + BMStreamOpaque *so; + + so = palloc(sizeof(BMStreamOpaque)); + sp = ((BMScanOpaque)copy->opaque)->bm_currPos; + so->scan = copy; + is->opaque = (void *)so; + } + else + { + /* there were no matches so our state "invalid" */ + is->opaque = NULL; + } + + + if(!bm) + { + /* + * We must create the StreamBitmap outside of our temporary + * memory context. The reason is, because we glue all the + * related streams together, bitmap_stream_free() will + * descend the stream tree and free up all the nodes by + * killing their memory context. If we lose the StreamBitmap + * memory, we'll be reading invalid memory. + */ + StreamBitmap *sb = makeNode(StreamBitmap); + sb->opaque = (void *)is; + bm = (Node *)sb; + } + else if(IsA(bm, StreamBitmap)) + { + stream_add_node((StreamBitmap *)bm, (void *)is, BMS_OR); + } + else + { + elog(ERROR, "non stream bitmap"); + } + PG_RETURN_POINTER(bm); +} + +/* + * bmbeginscan() -- start a scan on the bitmap index. + */ +Datum +bmbeginscan(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation) PG_GETARG_POINTER(0); + int nkeys = PG_GETARG_INT32(1); + ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); + IndexScanDesc scan; + + /* get the scan */ + scan = RelationGetIndexScan(rel, nkeys, scankey); + + PG_RETURN_POINTER(scan); +} + +/* + * bmrescan() -- restart a scan on the bitmap index. + */ +Datum +bmrescan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); + BMScanOpaque so = (BMScanOpaque) scan->opaque; + + /* so will be NULL if we were called via index_rescan() */ + if (so == NULL) + { + so = (BMScanOpaque) palloc(sizeof(BMScanOpaqueData)); + so->bm_currPos = NULL; + so->bm_markPos = NULL; + scan->opaque = so; + } + + if (so->bm_currPos != NULL) + { + cleanup_pos(so->bm_currPos); + so->bm_currPos = NULL; + } + + if (so->bm_markPos != NULL) + { + cleanup_pos(so->bm_markPos); + so->bm_markPos = NULL; + } + /* reset the scan key */ + if (scankey && scan->numberOfKeys > 0) + memmove(scan->keyData, scankey, + scan->numberOfKeys * sizeof(ScanKeyData)); + + ItemPointerSetInvalid(&scan->currentItemData); + + PG_RETURN_VOID(); +} + +/* + * bmendscan() -- close a scan. + */ +Datum +bmendscan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + BMScanOpaque so = (BMScanOpaque) scan->opaque; + + /* free the space */ + if (so->bm_currPos != NULL) + { + /* + * release the buffers that have been stored for each related + * bitmap vector. + */ + if (so->bm_currPos->nvec > 1) + _bitmap_cleanup_batchwords(so->bm_currPos->bm_batchWords); + _bitmap_cleanup_scanpos(so->bm_currPos->posvecs, + so->bm_currPos->nvec); + so->bm_currPos = NULL; + } + + if (so->bm_markPos != NULL) + { + if (so->bm_markPos->nvec > 1) + _bitmap_cleanup_batchwords(so->bm_markPos->bm_batchWords); + _bitmap_cleanup_scanpos(so->bm_markPos->posvecs, + so->bm_markPos->nvec); + so->bm_markPos = NULL; + } + + pfree(so); + + scan->opaque = NULL; + + if (ItemPointerIsValid(&(scan->currentItemData))) + ItemPointerSetInvalid(&(scan->currentItemData)); + if (ItemPointerIsValid(&(scan->currentMarkData))) + ItemPointerSetInvalid(&(scan->currentMarkData)); + + + PG_RETURN_VOID(); +} + +/* + * bmmarkpos() -- save the current scan position. + */ +Datum +bmmarkpos(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + BMScanOpaque so = (BMScanOpaque) scan->opaque; + BMVector bmScanPos; + uint32 vectorNo; + + /* free the space */ + if (ItemPointerIsValid(&(scan->currentMarkData))) + { + /* + * release the buffers that have been stored for each + * related bitmap. + */ + bmScanPos = so->bm_markPos->posvecs; + + for (vectorNo=0; vectorNo < so->bm_markPos->nvec; vectorNo++) + { + if (BufferIsValid((bmScanPos[vectorNo]).bm_lovBuffer)) + { + ReleaseBuffer((bmScanPos[vectorNo]).bm_lovBuffer); + (bmScanPos[vectorNo]).bm_lovBuffer = InvalidBuffer; + } + } + + ItemPointerSetInvalid(&(scan->currentMarkData)); + } + + if (ItemPointerIsValid(&(scan->currentItemData))) + { + uint32 size = sizeof(BMScanPositionData); + + + /* set the mark position */ + if (so->bm_markPos == NULL) + { + so->bm_markPos = (BMScanPosition) palloc(size); + } + + bmScanPos = so->bm_currPos->posvecs; + + for (vectorNo=0; vectorNobm_currPos->nvec; + vectorNo++) + { + if (BufferIsValid((bmScanPos[vectorNo]).bm_lovBuffer)) + IncrBufferRefCount((bmScanPos[vectorNo]).bm_lovBuffer); + } + + memcpy(so->bm_markPos->posvecs, bmScanPos, + so->bm_currPos->nvec * + sizeof(BMVectorData)); + memcpy(so->bm_markPos, so->bm_currPos, size); + + scan->currentMarkData = scan->currentItemData; + } + + PG_RETURN_VOID(); +} + +/* + * bmrestrpos() -- restore a scan to the last saved position. + */ +Datum +bmrestrpos(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + BMScanOpaque so = (BMScanOpaque) scan->opaque; + + BMVector bmScanPos; + uint32 vectorNo; + + /* free space */ + if (ItemPointerIsValid(&(scan->currentItemData))) + { + /* release the buffers that have been stored for each related bitmap.*/ + bmScanPos = so->bm_currPos->posvecs; + + for (vectorNo=0; vectorNobm_markPos->nvec; + vectorNo++) + { + if (BufferIsValid((bmScanPos[vectorNo]).bm_lovBuffer)) + { + ReleaseBuffer((bmScanPos[vectorNo]).bm_lovBuffer); + (bmScanPos[vectorNo]).bm_lovBuffer = InvalidBuffer; + } + } + + ItemPointerSetInvalid(&(scan->currentItemData)); + } + + if (ItemPointerIsValid(&(scan->currentMarkData))) + { + uint32 size = sizeof(BMScanPositionData); + + /* set the current position */ + if (so->bm_currPos == NULL) + { + so->bm_currPos = (BMScanPosition) palloc(size); + } + + bmScanPos = so->bm_markPos->posvecs; + + for (vectorNo=0; vectorNobm_currPos->nvec; + vectorNo++) + { + if (BufferIsValid((bmScanPos[vectorNo]).bm_lovBuffer)) + IncrBufferRefCount((bmScanPos[vectorNo]).bm_lovBuffer); + } + + memcpy(so->bm_currPos->posvecs, bmScanPos, + so->bm_markPos->nvec * + sizeof(BMVectorData)); + memcpy(so->bm_currPos, so->bm_markPos, size); + scan->currentItemData = scan->currentMarkData; + } + + PG_RETURN_VOID(); +} + +/* + * bmbulkdelete() -- bulk delete index entries + * + * Re-index is performed before retrieving the number of tuples + * indexed in this index. + */ +Datum +bmbulkdelete(PG_FUNCTION_ARGS) +{ + IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); + Relation rel = info->index; + IndexBulkDeleteResult* volatile result = + (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); + double numTuples; + bool needRebuild = false; + Buffer metabuf; + BMMetaPage metapage; + + /* allocate stats if first time through, else re-use existing struct */ + if (result == NULL) + result = (IndexBulkDeleteResult *) + palloc0(sizeof(IndexBulkDeleteResult)); + + if (!info->vacuum_full) + { + /* obtain the indicator if this index needs to be re-built. */ + metabuf = _bitmap_getbuf(rel, BM_METAPAGE, BM_READ); + metapage = (BMMetaPage)PageGetContents(BufferGetPage(metabuf)); + needRebuild = metapage->bm_need_rebuilt; + _bitmap_relbuf(metabuf); + } + + if (needRebuild || info->vacuum_full) + { + reindex_index(RelationGetRelid(rel)); + CommandCounterIncrement(); + } + + /* obtain the number of tuples from the index */ + metabuf = _bitmap_getbuf(rel, BM_METAPAGE, BM_READ); + metapage = (BMMetaPage)PageGetContents(BufferGetPage(metabuf)); + numTuples = metapage->bm_num_tuples; + _bitmap_relbuf(metabuf); + + result = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + result->num_pages = RelationGetNumberOfBlocks(rel); + result->num_index_tuples = numTuples; + result->tuples_removed = 0; + + PG_RETURN_POINTER(result); +} + +/* + * bmvacuumcleanup() -- post-vacuum cleanup. + * + * We do nothing useful here. + */ +Datum +bmvacuumcleanup(PG_FUNCTION_ARGS) +{ + IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); + Relation rel = info->index; + IndexBulkDeleteResult *stats = + (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); + + if(stats == NULL) + stats = (IndexBulkDeleteResult *)palloc0(sizeof(IndexBulkDeleteResult)); + + /* update statistics */ + stats->num_pages = RelationGetNumberOfBlocks(rel); + stats->pages_deleted = 0; + stats->pages_free = 0; + /* XXX: dodgy hack to shutup index_scan() and vacuum_index() */ + stats->num_index_tuples = info->num_heap_tuples; + + PG_RETURN_POINTER(stats); +} + +/* + * Per-tuple callback from IndexBuildHeapScan + */ +static void +bmbuildCallback(Relation index, HeapTuple htup, Datum *attdata, + bool *nulls, bool tupleIsAlive, void *state) +{ + BMBuildState *bstate = (BMBuildState *) state; + + _bitmap_buildinsert(index, htup->t_self, attdata, nulls, bstate); + bstate->ituples += 1; +} + +/* + * free the memory associated with the stream + */ + +static void +stream_free(void *opaque) +{ + IndexStream *is = (IndexStream *)opaque; + BMStreamOpaque *so = (BMStreamOpaque *)is->opaque; + + /* opaque may be NULL */ + if(is->needfree && so) + { + IndexScanDesc scan = so->scan; + BMScanOpaque s = (BMScanOpaque)scan->opaque; + + is->needfree = false; + if(s->bm_currPos) + { + cleanup_pos(s->bm_currPos); + s->bm_currPos = NULL; + } + if(s->bm_markPos) + { + cleanup_pos(s->bm_markPos); + s->bm_markPos = NULL; + } + is->opaque = NULL; + } +} + +static void +cleanup_pos(BMScanPosition pos) +{ + /* + * Only cleanup bm_batchWords if we have more than one vector since + * _bitmap_cleanup_scanpos() will clean it up for the single vector + * case. + */ + if (pos->nvec > 1) + _bitmap_cleanup_batchwords(pos->bm_batchWords); + _bitmap_cleanup_scanpos(pos->posvecs, pos->nvec); +} + + +/* + * pull the next block of tids from a bitmap stream + */ + +static bool +pull_stream(void *opaque, PagetableEntry *e) +{ + StreamNode *n = (StreamNode *)opaque; + bool res = false; + IndexStream *is; + BMScanPosition scanPos; + IndexScanDesc scan; + BMStreamOpaque *so; + + MemSet(e, 0, sizeof(PagetableEntry)); + + Assert(n->type == BMS_INDEX); + + is = (IndexStream *)n; + so = (BMStreamOpaque *)is->opaque; + + /* empty bitmap vector */ + if(so == NULL) + return false; + + scan = so->scan; + scanPos = ((BMScanOpaque)scan->opaque)->bm_currPos; + e->blockno = is->nextblock; + + while(true) + { + if (scanPos != NULL) + res = _bitmap_nextbatchwords(scan, ForwardScanDirection); + else + /* we should be initialised! */ + elog(ERROR, "scan position uninitialized"); + + if (words_get_entry(scanPos->bm_batchWords, &(scanPos->bm_result), + is->nextblock, e)) + { + res = true; + break; + } + else if(!res) + { + is->opaque = NULL; + res = true; + break; + } + } + is->nextblock++; + return res; +} + +/* + * Make a copy of an index scan descriptor as well as useful fields in + * the opaque structure + */ + +static IndexScanDesc +copy_scan_desc(IndexScanDesc scan) +{ + IndexScanDesc s; + BMScanOpaque so; + BMScanPosition sp; + BMScanPosition spcopy; + BMBatchWords *w; + BMVector bsp; + + /* we only need a few fields */ + s = (IndexScanDesc)palloc0(sizeof(IndexScanDescData)); + s->opaque = palloc(sizeof(BMScanOpaqueData)); + spcopy = palloc0(sizeof(BMScanPositionData)); + w = (BMBatchWords *)palloc(sizeof(BMBatchWords)); + + s->indexRelation = scan->indexRelation; + so = (BMScanOpaque)scan->opaque; + sp = so->bm_currPos; + + if(sp) + { + int vec; + + spcopy->done = sp->done; + spcopy->nvec = sp->nvec; + spcopy->bm_batchWords = w; + + /* now the batch words */ + w->maxNumOfWords = sp->bm_batchWords->maxNumOfWords; + w->nwordsread = sp->bm_batchWords->nwordsread; + w->nextread = sp->bm_batchWords->nextread; + w->firstTid = sp->bm_batchWords->firstTid; + w->startNo = sp->bm_batchWords->startNo; + w->nwords = sp->bm_batchWords->nwords; + + /* the actual words now */ + /* use copy */ + w->hwords = palloc0(sizeof(BM_HRL_WORD) * + BM_CALC_H_WORDS(sp->bm_batchWords->maxNumOfWords)); + w->cwords = palloc0(sizeof(BM_HRL_WORD) * + sp->bm_batchWords->maxNumOfWords); + + memcpy(w->hwords, sp->bm_batchWords->hwords, + BM_CALC_H_WORDS(sp->bm_batchWords->maxNumOfWords) * sizeof(BM_HRL_WORD)); + memcpy(w->cwords, sp->bm_batchWords->cwords, + sp->bm_batchWords->maxNumOfWords * sizeof(BM_HRL_WORD)); + + memcpy(&spcopy->bm_result, &sp->bm_result, sizeof(BMIterateResult)); + + bsp = (BMVector)palloc(sizeof(BMVectorData) * sp->nvec); + spcopy->posvecs = bsp; + if(sp->nvec == 1) + { + bsp->bm_lovBuffer = sp->posvecs->bm_lovBuffer; + bsp->bm_lovOffset = sp->posvecs->bm_lovOffset; + bsp->bm_nextBlockNo = sp->posvecs->bm_nextBlockNo; + bsp->bm_readLastWords = sp->posvecs->bm_readLastWords; + bsp->bm_batchWords = w; + } + else + { + for (vec = 0; vec < sp->nvec; vec++) + { + BMVector bmScanPos = &(bsp[vec]); + BMVector spp = &(sp->posvecs[vec]); + + bmScanPos->bm_lovBuffer = spp->bm_lovBuffer; + bmScanPos->bm_lovOffset = spp->bm_lovOffset; + bmScanPos->bm_nextBlockNo = spp->bm_nextBlockNo; + bmScanPos->bm_readLastWords = spp->bm_readLastWords; + + bmScanPos->bm_batchWords = + (BMBatchWords *) palloc0(sizeof(BMBatchWords)); + _bitmap_init_batchwords(bmScanPos->bm_batchWords, + BM_NUM_OF_HRL_WORDS_PER_PAGE, + CurrentMemoryContext); + _bitmap_copy_batchwords(spp->bm_batchWords, + bmScanPos->bm_batchWords); + + } + } + } + else + spcopy = NULL; + + ((BMScanOpaque)s->opaque)->bm_currPos = spcopy; + + return s; +} + +/* + * words_get_entry() - get the bitmap for all tuples in a given heap page. + * Returns true if we found all tids for a page, otherwise false. + */ + +static bool +words_get_entry(BMBatchWords *words, BMIterateResult *result, + BlockNumber nextBlockNo, PagetableEntry *entry) +{ + tbm_bitmapword newWord; + int numHrlWords = (TBM_BITS_PER_BITMAPWORD/BM_HRL_WORD_SIZE); + int hrlWordNo = 0; + int newWordNo = 0; + uint64 firstTidLoc, lastTidLoc; + + /* compute the first and last tid location for 'nextBlockNo'. */ + /* XXX: use TID converter */ + firstTidLoc = nextBlockNo*BM_MAX_HTUP_PER_PAGE + 1; + lastTidLoc = (nextBlockNo+1)*BM_MAX_HTUP_PER_PAGE; + + Assert(result->lastScanWordNo < words->maxNumOfWords); + + /* + * XXX: We assume that BM_HRL_WORD_SIZE is not greater than + * TBM_BITS_PER_BITMAPWORD for tidbitmap. + */ + Assert(numHrlWords >= 1); + + Assert((result->nextTid-firstTidLoc)%BM_HRL_WORD_SIZE == 0); + + /* + * find the first tid location in 'words' that is equal to + * 'firstTidLoc'. + */ + while (words->nwords > 0 && result->nextTid < firstTidLoc) + { + BM_HRL_WORD word = words->cwords[result->lastScanWordNo]; + + if (IS_FILL_WORD(words->hwords, result->lastScanWordNo)) + { + uint32 fillLength; + if (word == 0) + fillLength = 1; + else + fillLength = FILL_LENGTH(word); + + if (firstTidLoc - result->nextTid >= fillLength*BM_HRL_WORD_SIZE) + { + result->nextTid += fillLength * BM_HRL_WORD_SIZE; + result->lastScanWordNo++; + words->nwords--; + } + else + { + words->cwords[result->lastScanWordNo] -= + (firstTidLoc - result->nextTid)/BM_HRL_WORD_SIZE; + result->nextTid = firstTidLoc; + } + } + else + { + result->nextTid += BM_HRL_WORD_SIZE; + result->lastScanWordNo++; + words->nwords--; + } + } + + /* + * if there are no such a bitmap in the given batch words, then + * return false. + */ + if (words->nwords == 0) + { + result->lastScanWordNo = 0; + return false; + } + + /* copy the bitmap for tuples in the given heap page. */ + newWord = 0; + hrlWordNo = ((result->nextTid-firstTidLoc)/BM_HRL_WORD_SIZE)%numHrlWords; + newWordNo = (result->nextTid-firstTidLoc)/TBM_BITS_PER_BITMAPWORD; + while (words->nwords > 0 && result->nextTid < lastTidLoc) + { + BM_HRL_WORD word = words->cwords[result->lastScanWordNo]; + + if (IS_FILL_WORD(words->hwords, result->lastScanWordNo)) + { + if (GET_FILL_BIT(word) == 1) + newWord |= ((tbm_bitmapword)(LITERAL_ALL_ONE)) << + (hrlWordNo*BM_HRL_WORD_SIZE); + + words->cwords[result->lastScanWordNo]--; + if (FILL_LENGTH(words->cwords[result->lastScanWordNo]) == 0) + { + result->lastScanWordNo++; + words->nwords--; + } + } + else + { + newWord |= ((tbm_bitmapword)word) << + (hrlWordNo*BM_HRL_WORD_SIZE); + + result->lastScanWordNo++; + words->nwords--; + } + + hrlWordNo = (hrlWordNo+1)%numHrlWords; + result->nextTid += BM_HRL_WORD_SIZE; + + if (hrlWordNo%numHrlWords == 0) + { + entry->words[newWordNo] |= newWord; + newWordNo++; + + /* reset newWord */ + newWord = 0; + } + } + + if (hrlWordNo%numHrlWords != 0) + entry->words[newWordNo] |= newWord; + + if (words->nwords == 0) + { + result->lastScanWordNo = 0; + + if (result->nextTid < lastTidLoc) + return false; + } + + return true; +} diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/bitmap/bitmapinsert.c bitmap/src/backend/access/bitmap/bitmapinsert.c --- pgsql-head/src/backend/access/bitmap/bitmapinsert.c 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/backend/access/bitmap/bitmapinsert.c 2006-12-04 16:42:10.000000000 +1100 @@ -0,0 +1,1780 @@ +/*------------------------------------------------------------------------- + * + * bitmapinsert.c + * Tuple insertion in the on-disk bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/tupdesc.h" +#include "access/heapam.h" +#include "access/bitmap.h" +#include "parser/parse_oper.h" +#include "utils/builtins.h" +#include "utils/datum.h" + + +/* + * The following two structures are used to buffer calls to write_tids() + * during index create -- bmbuild(). + */ + +/* + * BMTIDBuffer represents TIDs we've buffered for a given bitmap vector -- + * i.e., TIDs for a distinct value in the underlying table. We take advantage + * of the fact that since we are reading the table from beginning to end + * TIDs will be ordered. + */ + +typedef struct BMTIDBuffer +{ + int32 size; /* total number of allocated elements in tids */ + int32 ntids; /* number of items in tids */ + uint64 *tids; /* variable size */ +} BMTIDBuffer; + +/* + * BMTIDLOVBuffer represents those bitmap vectors whose LOV item would be + * stored on the specified lov_block. The array bufs stores the TIDs for + * a distinct vector (see above). The index of the array we're upto tell + * us the offset number of the LOV item on the lov_block. + */ + +typedef struct BMTIDLOVBuffer +{ + BlockNumber lov_block; + BMTIDBuffer **bufs; +} BMTIDLOVBuffer; + + +static Buffer get_lastbitmappagebuf(Relation rel, BMLOVItem lovItem); +static void move_to_bitmappage(Relation rel, BMLOVItem lovItem, + Buffer *lastBufferP, uint32 *numWordsP, + bool use_wal); +static void mergewords(Relation rel, BMLOVItem lovItem, + Buffer *lastBufferP, uint32 *numWordsP, + uint64 tidNumber, bool use_wal); +static void insertsetbit(Relation rel, BMLOVItem lovItem, + Buffer *lastBufferP, uint32 *numWordsP, + uint64 tidnum, bool use_wal); +static void create_lovitem(Relation rel, Buffer metabuf, + uint64 tidnum, TupleDesc tupDesc, + Datum *attdata, bool *nulls, + Relation lovHeap, Relation lovIndex, + BlockNumber *lovBlockP, + OffsetNumber *lovOffsetP, bool use_wal); +static void write_tids(Relation rel, Buffer metabuf, + BlockNumber lovBlock, OffsetNumber lovOffset, + uint64 *tids, uint32 numTids, bool use_wal); +static void build_inserttuple(Relation rel, uint64 tidnum, + ItemPointerData ht_ctid, TupleDesc tupDesc, + Datum *attdata, bool *nulls, BMBuildState *state); +static void inserttuple(Relation rel, Buffer metabuf, + uint64 tidnum, ItemPointerData ht_ctid, + TupleDesc tupDesc, Datum* attdata, + bool *nulls, Relation lovHeap, + Relation lovIndex, ScanKey scanKey, + IndexScanDesc scanDesc, bool use_wal); +static void updatesetbit(Relation rel, BMLOVItem lovItem, + Buffer *lastBufferP, uint64 tidnum, + uint32 *numWordsP, bool use_wal); + +static void updatesetbit_inword(BM_HRL_WORD word, uint64 updateBitLoc, + BM_HRL_WORD *words, BM_HRL_WORD *headerWordP, + uint32 *numWordsP); + +static void updatesetbit_inpage(Relation rel, Buffer bitmapBuffer, + Buffer *nextBitmapBufferP, Buffer lastBuffer, + uint64 tidnum, uint64 firstTidNumber, + bool use_wal); +static uint64 getnumbits(BM_HRL_WORD *contentWords, + BM_HRL_WORD *headerWords, uint32 nwords); +static void findbitmappage(Relation rel, BMLOVItem lovItem, + Buffer lastBuffer, uint64 tidnum, + Buffer *bitmapBufferP, uint64 *firstTidNumberP); +static void shift_header_bits(BM_HRL_WORD *words, uint32 numOfBits, + uint32 maxNumOfWords, uint32 startLoc, + uint32 numOfShiftingBits); +static void insert_newwords(BM_HRL_WORD *contentWords, + BM_HRL_WORD *headerWords, + uint32 *numWordsP, uint32 maxNumWords, + uint32 insertPos, + BM_HRL_WORD *newContentWords, + BM_HRL_WORD newHeaderWord, + uint32 numNewWords, + BM_HRL_WORD *leftContentWords, + BM_HRL_WORD *leftHeaderWord, + uint32 *numLeftWordsP); +static void buf_make_space(Relation rel, Buffer metabuf, + BMTidLocsBuffer *tidLocsBuffer, bool use_wal); +#ifdef DEBUG +static void verify_bitmappages(Relation rel, BMLOVItem lovItem); +#endif + +/* + * get_lastbitmappagebuf() -- return the buffer for the last + * bitmap page that is pointed by a given LOV item. + * + * The returned buffer will hold an exclusive lock. + */ +Buffer +get_lastbitmappagebuf(Relation rel, BMLOVItem lovItem) +{ + Buffer lastBuffer = InvalidBuffer; + + if (lovItem->bm_lov_head != InvalidBlockNumber) + lastBuffer = _bitmap_getbuf(rel, lovItem->bm_lov_tail, BM_WRITE); + + return lastBuffer; +} + + +/* + * move_to_bitmappage() -- append bm_last_compword in an LOV item + * to its associated bitmap page. + * + * This function moves bm_last_compword in a LOV item into the last + * bitmap page that is buffered in *lastBufferP. If *lastBufferP is + * not a valid buffer or there is no enough space for the new word, + * this function creates a new last bitmap page to store this new word. + * + * This function increments "*numWordsP" by 1 if the bitmap page pointed + * by "*lastBufferP". If "*lastBuffer" does not have enough space for + * a new bitmap word, set "*numWordsP" to 1. The '*numWordsP' is used + * to indicate how many new words are added to a bitmap page since last + * time we have called WAL. + */ +void +move_to_bitmappage(Relation rel, BMLOVItem lovItem, Buffer *lastBufferP, + uint32 *numWordsP, bool use_wal) +{ + Page lastPage; + BMBitmapOpaque bitmapPageOpaque; + Buffer newBuffer; + BMBitmap bitmap; + + if (!BufferIsValid(*lastBufferP)) + { + Assert(lovItem->bm_lov_head == InvalidBlockNumber); + + *lastBufferP = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + lovItem->bm_lov_head = *lastBufferP; + + _bitmap_init_bitmappage(rel, *lastBufferP); + if(use_wal) + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWBITMAP, + *lastBufferP); + + lovItem->bm_lov_head = BufferGetBlockNumber(*lastBufferP); + lovItem->bm_lov_tail = lovItem->bm_lov_head; + } + + lastPage = BufferGetPage(*lastBufferP); + + bitmapPageOpaque = (BMBitmapOpaque)PageGetSpecialPointer(lastPage); + + /* if there is no space in this page */ + if (bitmapPageOpaque->bm_hrl_words_used == BM_NUM_OF_HRL_WORDS_PER_PAGE) + { + /* create a new bitmap page, and write the old page to the disk. */ + newBuffer = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + _bitmap_init_bitmappage(rel, newBuffer); + + if(use_wal) + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWBITMAP, + newBuffer); + + lovItem->bm_lov_tail = BufferGetBlockNumber(newBuffer); + bitmapPageOpaque->bm_bitmap_next = lovItem->bm_lov_tail; + + if(use_wal) + _bitmap_log_bitmappage(rel, *lastBufferP, true, *numWordsP); + _bitmap_wrtbuf(*lastBufferP); + + *numWordsP = 0; + *lastBufferP = newBuffer; + lastPage = BufferGetPage(*lastBufferP); + bitmapPageOpaque = (BMBitmapOpaque)PageGetSpecialPointer(lastPage); + + } + + bitmap = (BMBitmap) PageGetContents(lastPage); + + if (lovItem->bm_last_two_headerbits == 2 || + lovItem->bm_last_two_headerbits == 3) + { + uint32 off = bitmapPageOpaque->bm_hrl_words_used/BM_HRL_WORD_SIZE; + uint32 n = bitmapPageOpaque->bm_hrl_words_used; + + bitmap->hwords[off] |= WORDNO_GET_HEADER_BIT(n); + } + + bitmap->cwords[bitmapPageOpaque->bm_hrl_words_used] = + lovItem->bm_last_compword; + + bitmapPageOpaque->bm_last_tid_location = lovItem->bm_last_tid_location; + bitmapPageOpaque->bm_hrl_words_used++; + (*numWordsP)++; +} + +/* + * mergewords() -- merge bm_last_word into bm_last_compword + * in a given LOV item. + * + * If bm_last_word and bm_last_compword can be compressed into one word, + * then simply update the value for bm_last_compword, and reset + * bm_last_word. + * + * If bm_last_word and bm_last_compword can not be compressed into one word, + * we append bm_last_compword into the last bitmap page, set + * bm_last_compword to bm_last_word, and reset bm_last_word. + */ +void +mergewords(Relation rel, BMLOVItem lovItem, Buffer *lastBufferP, + uint32 *numWordsP, uint64 tidNumber, bool use_wal) +{ + bool lastWordIsFill = (lovItem->bm_last_two_headerbits == 1 || + lovItem->bm_last_two_headerbits == 3); + + /* + * If two words are both fill word, then try to increase the + * fill length in bm_last_compword. If this fill length exceeds + * the maximum fill length, then write it out to disk, and create + * a new word for bm_last_compword. + */ + if ((lovItem->bm_last_two_headerbits == 3) && + (GET_FILL_BIT(lovItem->bm_last_compword) == + GET_FILL_BIT(lovItem->bm_last_word))) + { + BM_HRL_WORD lastCompWordFillLength = + FILL_LENGTH(lovItem->bm_last_compword); + BM_HRL_WORD lastWordFillLength = + FILL_LENGTH(lovItem->bm_last_word); + + if (lastCompWordFillLength+lastWordFillLength >= MAX_FILL_LENGTH) + { + lovItem->bm_last_compword += + (MAX_FILL_LENGTH-lastCompWordFillLength); + lovItem->bm_last_word -= + (MAX_FILL_LENGTH-lastCompWordFillLength); + lovItem->bm_last_tid_location += + (MAX_FILL_LENGTH-lastCompWordFillLength)*BM_HRL_WORD_SIZE; + + move_to_bitmappage(rel, lovItem, lastBufferP, numWordsP, + use_wal); + + lovItem->bm_last_compword = lovItem->bm_last_word; + lovItem->bm_last_tid_location += + FILL_LENGTH(lovItem->bm_last_compword)*BM_HRL_WORD_SIZE; + } + else + { + lovItem->bm_last_compword += lastWordFillLength; + lovItem->bm_last_tid_location += + lastWordFillLength*BM_HRL_WORD_SIZE; + } + lovItem->bm_last_two_headerbits = 2; + } + else + { + if (tidNumber != BM_HRL_WORD_SIZE) + move_to_bitmappage(rel, lovItem, lastBufferP, numWordsP, + use_wal); + + /* move the last word to the last complete word. */ + lovItem->bm_last_compword = lovItem->bm_last_word; + + if (lastWordIsFill) + { + lovItem->bm_last_two_headerbits = 2; + lovItem->bm_last_tid_location += + FILL_LENGTH(lovItem->bm_last_compword)*BM_HRL_WORD_SIZE; + } + else + { + lovItem->bm_last_two_headerbits = 0; + lovItem->bm_last_tid_location += BM_HRL_WORD_SIZE; + } + } + + lovItem->bm_last_word = LITERAL_ALL_ZERO; +} + +/* + * getnumbits() -- return the number of bits included in the given + * bitmap words. + */ +static uint64 +getnumbits(BM_HRL_WORD *contentWords, BM_HRL_WORD *headerWords, uint32 nwords) +{ + uint64 nbits = 0; + uint32 i; + + for (i=0; ibm_last_tid_location; + if (lovItem->bm_last_two_headerbits == 2) + tidLocation -= (FILL_LENGTH(lovItem->bm_last_compword)* + BM_HRL_WORD_SIZE); + else + tidLocation -= BM_HRL_WORD_SIZE; + + /* + * If tidnum is in either bm_last_compword or bm_last_word, + * we simply change it. Otherwise, we have to find the right page + * that contains this bit, starting from the beginning of the + * bitmap vector. + */ + if (tidnum > lovItem->bm_last_tid_location) /* bm_last_word */ + { + insertingPos = (tidnum-1)%BM_HRL_WORD_SIZE; + lovItem->bm_last_word |= (((BM_HRL_WORD)1)< tidLocation) /* bm_last_compword */ + { + /* if this is a literal word, we simply update the bit. */ + if (lovItem->bm_last_two_headerbits == 0) + { + insertingPos = (tidnum-1)%BM_HRL_WORD_SIZE; + lovItem->bm_last_compword |= (((BM_HRL_WORD)1)<bm_last_two_headerbits == 2 && + GET_FILL_BIT(lovItem->bm_last_compword) == 0) + { + /* + * If this is a fill zero word, we need to split this word + * into two or three words, depending on the splitting position. + */ + + BM_HRL_WORD newContentWords[3]; + BM_HRL_WORD newHeaderWord; + uint32 numNewWords; + uint32 newWordNo; + + updatesetbit_inword(lovItem->bm_last_compword, + tidnum - tidLocation - 1, + newContentWords, &newHeaderWord, + &numNewWords); + + /* reset lovItem->bm_last_tid_location */ + lovItem->bm_last_tid_location = tidLocation; + + for (newWordNo=0; newWordNobm_last_compword = newContentWords[newWordNo]; + if (IS_FILL_WORD(&newHeaderWord, newWordNo)) + { + lovItem->bm_last_two_headerbits = 2; + lovItem->bm_last_tid_location += + FILL_LENGTH(lovItem->bm_last_compword) * + BM_HRL_WORD_SIZE; + } + else + { + lovItem->bm_last_two_headerbits = 0; + lovItem->bm_last_tid_location += BM_HRL_WORD_SIZE; + } + + if (newWordNo != numNewWords - 1) + move_to_bitmappage(rel, lovItem, lastBufferP, + numWordsP, use_wal); + } + } + } + else + { + /* + * If tidnum is in the middle of the bitmap vector, + * we try to find the bitmap page that contains this bit, + * and update the bit. + */ + + uint64 firstTidNumber = 1; + Buffer bitmapBuffer = InvalidBuffer; + Page bitmapPage; + BMBitmapOpaque bitmapOpaque; + + Buffer nextBuffer = InvalidBuffer; + + /* find the page that contains this bit. */ + findbitmappage(rel, lovItem, *lastBufferP, tidnum, + &bitmapBuffer, &firstTidNumber); + + /* + * We may need the next page of this page when we update the bit + * in this page, because if there are enough space in the next + * page to hold the extra words generated by this update, we + * will insert them into the next page. + */ + bitmapPage = BufferGetPage(bitmapBuffer); + bitmapOpaque = (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage); + if (BlockNumberIsValid(bitmapOpaque->bm_bitmap_next)) + { + if (bitmapOpaque->bm_bitmap_next != lovItem->bm_lov_tail) + nextBuffer = _bitmap_getbuf(rel, bitmapOpaque->bm_bitmap_next, + BM_WRITE); + else + nextBuffer = *lastBufferP; + } + + updatesetbit_inpage(rel, bitmapBuffer, &nextBuffer, *lastBufferP, + tidnum, firstTidNumber, use_wal); + + if (bitmapBuffer == *lastBufferP && + BufferIsValid(nextBuffer)) + { + _bitmap_log_bitmappage(rel, bitmapBuffer, false, + bitmapOpaque->bm_hrl_words_used); + _bitmap_wrtbuf(bitmapBuffer); + + *lastBufferP = nextBuffer; + lovItem->bm_lov_tail = BufferGetBlockNumber(nextBuffer); + } + else if (BufferIsValid(nextBuffer)) + { + if(use_wal) + _bitmap_log_bitmappage(rel, bitmapBuffer, false, + bitmapOpaque->bm_hrl_words_used); + + _bitmap_wrtbuf(bitmapBuffer); + if (nextBuffer != *lastBufferP) + { + Page nextPage = BufferGetPage(nextBuffer); + BMBitmapOpaque nextBitmapOpaque = + (BMBitmapOpaque)PageGetSpecialPointer(nextPage); + + if(use_wal) + _bitmap_log_bitmappage(rel, nextBuffer, false, + nextBitmapOpaque->bm_hrl_words_used); + _bitmap_wrtbuf(nextBuffer); + } + } + } +} + +/* + * updatesetbit_inword() -- update the given bit to 1 in a given + * word. + * + * The given word will generate at most three new words, depending on + * the position of the given bit to be updated. Make sure that the + * array 'words' has the size of 3 when you call this function. All new + * words will be put in this array, and the final number of new words is + * stored in '*numWordsP'. The bit location 'updateBitLoc' is relative to + * the beginning of the given word, starting from 0. + * + * We assume that word is a fill zero word. + */ +void +updatesetbit_inword(BM_HRL_WORD word, uint64 updateBitLoc, + BM_HRL_WORD* words, BM_HRL_WORD* headerWordP, + uint32* numWordsP) +{ + uint64 numBits, usedNumBits; + uint16 insertingPos; + + *numWordsP = 0; + *headerWordP = 0; + + Assert(updateBitLoc < BM_HRL_WORD_SIZE*FILL_LENGTH(word)); + + numBits = FILL_LENGTH(word) * BM_HRL_WORD_SIZE; + usedNumBits = 0; + if (updateBitLoc >= BM_HRL_WORD_SIZE) + { + words[*numWordsP] = BM_MAKE_FILL_WORD(0, updateBitLoc/BM_HRL_WORD_SIZE); + (*numWordsP)++; + *headerWordP |= (((BM_HRL_WORD)1)<<(BM_HRL_WORD_SIZE-*numWordsP)); + usedNumBits += (updateBitLoc/BM_HRL_WORD_SIZE) * BM_HRL_WORD_SIZE; + } + + /* construct the literal word */ + insertingPos = updateBitLoc - usedNumBits; + words[*numWordsP] = ((BM_HRL_WORD)0) | (((BM_HRL_WORD)1) << insertingPos); + (*numWordsP)++; + usedNumBits += BM_HRL_WORD_SIZE; + + if (numBits > usedNumBits) + { + Assert((numBits - usedNumBits) % BM_HRL_WORD_SIZE == 0); + + words[*numWordsP] = BM_MAKE_FILL_WORD(0, + (numBits - usedNumBits) / BM_HRL_WORD_SIZE); + (*numWordsP)++; + *headerWordP |= (1 << (BM_HRL_WORD_SIZE - *numWordsP)); + } +} + +/* + * shift_header_bits() -- right-shift bits after 'startLoc' for + * 'numofShiftingBits' bits. + * + * These bits are stored in an array of words with the word size of + * BM_HRL_WORD_SIZE. This shift is done in-place. The maximum number of + * words in this array is given. If the shifting causes the array not to + * have enough space for all bits, the right-most overflow bits will be + * discarded. The value 'startLoc' starts with 0. + */ +void +shift_header_bits(BM_HRL_WORD* words, uint32 numOfBits, + uint32 maxNumOfWords, uint32 startLoc, + uint32 numOfShiftingBits) +{ + uint32 startWordNo; + uint32 endWordNo; + uint32 wordNo; + uint32 numOfFinalShiftingBits; + BM_HRL_WORD tmpWord; + + Assert(startLoc <= numOfBits); + Assert((numOfBits-1)/BM_HRL_WORD_SIZE < maxNumOfWords); + + startWordNo = startLoc/BM_HRL_WORD_SIZE; + endWordNo = (numOfBits-1)/BM_HRL_WORD_SIZE; + + for (wordNo = endWordNo; wordNo > startWordNo; wordNo--) + { + /* + * obtain the last 'numOfShiftingBits' bits in the words[wordNo], + * and store them in the high-end of a word. + */ + tmpWord = (((BM_HRL_WORD)words[wordNo])<< + (BM_HRL_WORD_SIZE-numOfShiftingBits)); + + /* right-shift the original word 'numOfShiftingBits' bits. */ + words[wordNo] = (((BM_HRL_WORD)words[wordNo])>>numOfShiftingBits); + + /* OR those shifted bits into the next word in the array. */ + if (wordNo < maxNumOfWords-1) + words[wordNo + 1] |= tmpWord; + + } + + /* obtain bits after 'startLoc'.*/ + tmpWord = ((BM_HRL_WORD)(words[startWordNo]<< + (startLoc%BM_HRL_WORD_SIZE)))>>(startLoc%BM_HRL_WORD_SIZE); + + words[startWordNo] = ((BM_HRL_WORD)(words[startWordNo]>> + (BM_HRL_WORD_SIZE-startLoc%BM_HRL_WORD_SIZE)))<< + (BM_HRL_WORD_SIZE-startLoc%BM_HRL_WORD_SIZE); + + numOfFinalShiftingBits = numOfShiftingBits; + if (BM_HRL_WORD_SIZE - startLoc % BM_HRL_WORD_SIZE < numOfShiftingBits) + numOfFinalShiftingBits = BM_HRL_WORD_SIZE - startLoc % BM_HRL_WORD_SIZE; + + words[startWordNo] |= (tmpWord>>numOfFinalShiftingBits); + + if (startWordNo < maxNumOfWords-1) + { + tmpWord = ((BM_HRL_WORD)(tmpWord << (BM_HRL_WORD_SIZE - numOfFinalShiftingBits)))>> + (numOfShiftingBits - numOfFinalShiftingBits); + words[startWordNo+1] |= tmpWord; + } +} + +/* + * insert_newwords() -- insert some given words into an array + * of bitmap words. + * + * The new words will be inserted into the positions starting from + * 'insertPos'(>=0). The original words from 'insertPos' will be shifted + * to the right. If the given array does not have enough space to + * hold all words, the last '(*numWordsP+numNewWords-maxNumWords)' words + * will be stored in 'leftWords', for which the caller should set + * the enough space to hold these left words. '*numWordsP' will be + * set to the final total number of words in this array. + * + * The 'numNewWords' is less than or equal to BM_HRL_WORD_SIZE. + */ +void +insert_newwords(BM_HRL_WORD *contentWords, BM_HRL_WORD *headerWords, + uint32 *numWordsP, uint32 maxNumWords, uint32 insertPos, + BM_HRL_WORD *newContentWords, BM_HRL_WORD newHeaderWord, + uint32 numNewWords, BM_HRL_WORD *leftContentWords, + BM_HRL_WORD *leftHeaderWord, uint32 *numLeftWordsP) +{ + int32 wordNo; + uint16 bitLoc; + + Assert(numNewWords <= BM_HRL_WORD_SIZE); + Assert(insertPos <= maxNumWords); + + *numLeftWordsP = 0; + + /* if there are no words in this page, we simply copy the new words. */ + if (*numWordsP == 0) + { + memcpy(contentWords, newContentWords, numNewWords*sizeof(BM_HRL_WORD)); + memcpy(headerWords, &newHeaderWord, sizeof(BM_HRL_WORD)); + *numWordsP = numNewWords; + + return; + } + + /* + * if insertPos is pointing to the position after the maximum position + * in this word, we simply copy the new words to leftContentWords. + */ + if (insertPos == maxNumWords) + { + memcpy(leftContentWords, newContentWords, + numNewWords*sizeof(BM_HRL_WORD)); + memcpy(leftHeaderWord, &newHeaderWord, sizeof(BM_HRL_WORD)); + *numLeftWordsP = numNewWords; + + return; + } + + Assert(*numWordsP > 0); + + if (*numWordsP + numNewWords > maxNumWords) + *numLeftWordsP = *numWordsP + numNewWords - maxNumWords; + *leftHeaderWord = 0; + + /* + * Walk from the last word in the array back to 'insertPos'. + * If the word no + numNewWords is greater than maxNumWords, + * we store these words in leftContentWords. + */ + for (wordNo=*numWordsP-1; wordNo>=0 && wordNo>=insertPos; wordNo--) + { + if (wordNo + numNewWords >= maxNumWords) + { + leftContentWords[wordNo+numNewWords-maxNumWords] = + contentWords[wordNo]; + if (IS_FILL_WORD(headerWords, wordNo)) + { + uint32 o = (int)wordNo/BM_HRL_WORD_SIZE; + uint32 n = wordNo + numNewWords - maxNumWords; + + *leftHeaderWord |= WORDNO_GET_HEADER_BIT(n); + headerWords[o] &= ~(WORDNO_GET_HEADER_BIT(wordNo)); + } + } + else + contentWords[wordNo + numNewWords] = contentWords[wordNo]; + } + + /* insert new words */ + for (wordNo=0; wordNo= maxNumWords) + { + uint32 n = insertPos + wordNo - maxNumWords; + + leftContentWords[n] = newContentWords[wordNo]; + if (IS_FILL_WORD(&newHeaderWord, wordNo)) + *leftHeaderWord |= WORDNO_GET_HEADER_BIT(n); + } + else + contentWords[insertPos+wordNo] = newContentWords[wordNo]; + } + + /* right-shift the bits in the header words */ + shift_header_bits(headerWords, *numWordsP, + BM_NUM_OF_HEADER_WORDS, insertPos, + numNewWords); + + /* set the newWords header bits */ + for (bitLoc = insertPos; + bitLoc < insertPos + numNewWords && bitLoc < maxNumWords; + bitLoc++) + { + if (IS_FILL_WORD(&newHeaderWord, bitLoc-insertPos)) + { + uint32 off = (uint32)bitLoc/BM_HRL_WORD_SIZE; + + headerWords[off] |= WORDNO_GET_HEADER_BIT(bitLoc); + } + } + + *numWordsP += (numNewWords-*numLeftWordsP); +} + + +/* + * updatesetbit_inpage() -- update the given bit to 1 in a given + * bitmap page. + * + * The argument 'firstTidNumber' indicates the first tid location of + * the bits stored in this page. This is necessary for locating the bit + * of 'tidnum'. + */ +static void +updatesetbit_inpage(Relation rel, Buffer bitmapBuffer, + Buffer* nextBitmapBufferP, Buffer lastBuffer, + uint64 tidnum, uint64 firstTidNumber, + bool use_wal) +{ + Page bitmapPage; + BMBitmapOpaque bitmapOpaque; + BMBitmap bitmap; + uint64 bitNo = 0; + uint32 wordNo; + BM_HRL_WORD word = 0; + bool found = false; + + bitmapPage = BufferGetPage(bitmapBuffer); + bitmapOpaque = (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage); + + bitmap = (BMBitmap) PageGetContents(bitmapPage); + bitNo = 0; + + /* Find the word that contains the bit of tidnum. */ + for (wordNo = 0; wordNo < bitmapOpaque->bm_hrl_words_used; wordNo++) + { + word = bitmap->cwords[wordNo]; + if (IS_FILL_WORD(bitmap->hwords, wordNo)) + bitNo += FILL_LENGTH(word) * BM_HRL_WORD_SIZE; + else + bitNo += BM_HRL_WORD_SIZE; + + if (firstTidNumber + bitNo - 1 >= tidnum) + { + found = true; + break; /* find the word */ + } + } + + if(!found) + elog(ERROR, "bitmap word uninitialized"); + + Assert (wordNo <= bitmapOpaque->bm_hrl_words_used); + + if (!IS_FILL_WORD(bitmap->hwords, wordNo)) + { + uint16 insertingPos = (tidnum - 1) % BM_HRL_WORD_SIZE; + + bitmap->cwords[wordNo] |= (((BM_HRL_WORD)1)<= firstTidNumber); + + /* update the bit in 'word', and generate new words */ + updatesetbit_inword(word, tidnum - firstTidNumber, + newContentWords, &newHeaderWord, + &numNewWords); + + if (numNewWords == 1) + { + uint32 off = wordNo/BM_HRL_WORD_SIZE; + + bitmap->cwords[wordNo] = newContentWords[0]; + bitmap->hwords[off] &= ~WORDNO_GET_HEADER_BIT(wordNo); + } + else + { + BM_HRL_WORD leftContentWords[3]; + BM_HRL_WORD leftHeaderWord; + uint32 numLeftWords; + Buffer newBitmapBuffer = InvalidBuffer; + Page newBitmapPage; + BMBitmapOpaque newBitmapOpaque; + BMBitmap newBitmap; + BM_HRL_WORD newLeftContentWords[3]; + BM_HRL_WORD newLeftHeaderWord; + uint32 numNewLeftWords; + uint64 oldTidLocation; + + bitmap->cwords[wordNo] = newContentWords[0]; + if (tidnum - firstTidNumber + 1 <= BM_HRL_WORD_SIZE) + { + uint32 off = wordNo/BM_HRL_WORD_SIZE; + + bitmap->hwords[off] &= ~WORDNO_GET_HEADER_BIT(wordNo); + } + /* ignore the first word in newContentWords. */ + newHeaderWord = newHeaderWord << 1; + insert_newwords(bitmap->cwords, + bitmap->hwords, + &(bitmapOpaque->bm_hrl_words_used), + BM_NUM_OF_HRL_WORDS_PER_PAGE, + wordNo + 1, + newContentWords + 1, newHeaderWord, + numNewWords - 1, + leftContentWords, &leftHeaderWord, + &numLeftWords); + if (numLeftWords == 0) + return; + + oldTidLocation = bitmapOpaque->bm_last_tid_location; + bitmapOpaque->bm_last_tid_location -= + getnumbits(leftContentWords, &leftHeaderWord, + numLeftWords); + + /* + * if this page does not have enough space for these new words, + * we look at the next page. If the next page has enough space for + * the left words, we insert them into the next page. Otherwise, + * we create a new page to hold these words. + */ + if (BufferIsValid(*nextBitmapBufferP)) + { + Page nextBitmapPage = BufferGetPage(*nextBitmapBufferP); + BMBitmapOpaque nextBitmapOpaque = + (BMBitmapOpaque)PageGetSpecialPointer(nextBitmapPage); + + if (nextBitmapOpaque->bm_hrl_words_used + numLeftWords <= + BM_NUM_OF_HRL_WORDS_PER_PAGE) + newBitmapBuffer = *nextBitmapBufferP; + } + + if (!BufferIsValid(newBitmapBuffer)) + { + newBitmapBuffer = + _bitmap_getbuf(rel, P_NEW, BM_WRITE); + _bitmap_init_bitmappage(rel, newBitmapBuffer); + if(use_wal) + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWBITMAP, + newBitmapBuffer); + + bitmapOpaque->bm_bitmap_next = + BufferGetBlockNumber(newBitmapBuffer); + } + + newBitmapPage = BufferGetPage(newBitmapBuffer); + newBitmapOpaque = + (BMBitmapOpaque)PageGetSpecialPointer(newBitmapPage); + + if (BufferIsValid(*nextBitmapBufferP) && + *nextBitmapBufferP != newBitmapBuffer) + { + newBitmapOpaque->bm_last_tid_location = oldTidLocation; + newBitmapOpaque->bm_bitmap_next = + BufferGetBlockNumber(*nextBitmapBufferP); + if (*nextBitmapBufferP != lastBuffer) + _bitmap_relbuf(*nextBitmapBufferP); + } + else if (!BufferIsValid(*nextBitmapBufferP)) + newBitmapOpaque->bm_last_tid_location = oldTidLocation; + + *nextBitmapBufferP = newBitmapBuffer; + + newBitmap = (BMBitmap)PageGetContents(newBitmapPage); + + insert_newwords(newBitmap->cwords,newBitmap->hwords, + &(newBitmapOpaque->bm_hrl_words_used), + BM_NUM_OF_HRL_WORDS_PER_PAGE, 0, + leftContentWords, leftHeaderWord, numLeftWords, + newLeftContentWords, &newLeftHeaderWord, + &numNewLeftWords); + + Assert(numNewLeftWords == 0); + } + } +} + +/* + * findbitmappage() -- find the bitmap page that contains + * the given tid location. + * + * We assume that this tid location is not in bm_last_compword or + * bm_last_word of its LOVItem. + */ +void +findbitmappage(Relation rel, BMLOVItem lovItem, + Buffer lastBuffer, uint64 tidnum, + Buffer* bitmapBufferP, uint64* firstTidNumberP) +{ + BlockNumber nextBlockNo = lovItem->bm_lov_head; + + while (BlockNumberIsValid(nextBlockNo)) + { + Page bitmapPage; + BMBitmapOpaque bitmapOpaque; + + /* + * if this is the last page, verify if tidnum is in + * this page, and set returnBuffer to *lastBufferP. Note that + * we already have the write lock on this page. + */ + if (nextBlockNo == lovItem->bm_lov_tail) + { + bitmapPage = (Page)BufferGetPage(lastBuffer); + bitmapOpaque = (BMBitmapOpaque) + PageGetSpecialPointer(bitmapPage); + Assert(bitmapOpaque->bm_last_tid_location >= tidnum); + *bitmapBufferP = lastBuffer; + break; + } + + *bitmapBufferP = _bitmap_getbuf(rel, nextBlockNo, BM_READ); + bitmapPage = BufferGetPage(*bitmapBufferP); + bitmapOpaque = (BMBitmapOpaque) + PageGetSpecialPointer(bitmapPage); + + if (bitmapOpaque->bm_last_tid_location >= tidnum) + break; /* find the page */ + + (*firstTidNumberP) = bitmapOpaque->bm_last_tid_location + 1; + nextBlockNo = bitmapOpaque->bm_bitmap_next; + + _bitmap_relbuf(*bitmapBufferP); + } +} + +#ifdef DEBUG +/* + * verify_bitmappages() -- verify if the bm_last_tid_location values + * are valid in all bitmap pages. Only used during debugging. + */ +static void +verify_bitmappages(Relation rel, BMLOVItem lovItem) +{ + BlockNumber nextBlockNo = lovItem->bm_lov_head; + uint64 tidnum = 0; + + while (BlockNumberIsValid(nextBlockNo)) + { + Page bitmapPage; + BMBitmapOpaque bitmapOpaque; + Buffer bitmapBuffer; + uint32 wordNo; + BMBitmap bitmap; + + bitmapBuffer = _bitmap_getbuf(rel, nextBlockNo, BM_READ); + bitmapPage = BufferGetPage(bitmapBuffer); + bitmapOpaque = (BMBitmapOpaque) + PageGetSpecialPointer(bitmapPage); + bitmap = (BMBitmap) PageGetContents(bitmapPage); + + for (wordNo = 0; wordNo < bitmapOpaque->bm_hrl_words_used; wordNo++) + { + BM_HRL_WORD word = bitmap->cwords[wordNo]; + if (IS_FILL_WORD(bitmap->hwords, wordNo)) + tidnum += FILL_LENGTH(word) * BM_HRL_WORD_SIZE; + else + tidnum += BM_HRL_WORD_SIZE; + + } + + if (bitmapOpaque->bm_last_tid_location != tidnum) + elog(ERROR, "bm_last_tid_location=%lld, tidnum=%lld", + bitmapOpaque->bm_last_tid_location, tidnum); + + nextBlockNo = bitmapOpaque->bm_bitmap_next; + + _bitmap_relbuf(bitmapBuffer); + } +} + +#endif /* DEBUG */ + +/* + * insertsetbit() -- insert a set bit into a bitmap. + */ +static void +insertsetbit(Relation rel, BMLOVItem lovItem, Buffer *lastBufferP, + uint32 *numWordsP, uint64 tidnum, bool use_wal) +{ + int32 numOfZeros; + uint16 zerosNeeded, insertingPos; + + /* + * If this is the first time to insert a set bit, then + * we have already inserted the first tidnum/BM_HRL_WORD_SIZE + * zeros. + */ + if (lovItem->bm_last_setbit == 0) + numOfZeros = tidnum % BM_HRL_WORD_SIZE; + else + { + /* + * Usually, tidnum is greater than lovItem->bm_last_setbit. + * However, if this is not the case, this should be called while + * doing 'vacuum full' or doing insertion after 'vacuum'. In this + * case, we try to update this bit in the corresponding bitmap + * vector. + */ + if (tidnum <= lovItem->bm_last_setbit) + { + /* + * Scan through the bitmap vector, and update the bit in + * tidnum. + */ + updatesetbit(rel, lovItem, lastBufferP, tidnum, numWordsP, use_wal); + + return; + } + numOfZeros = tidnum - lovItem->bm_last_setbit - 1; + } + + /* + * If there are some zeros between these two set bits, then + * we need to fill these zero bits into the bitmap. + */ + if (numOfZeros > 0) + { + /* try to fill bm_last_word */ + if (lovItem->bm_last_setbit == 0) + zerosNeeded = BM_HRL_WORD_SIZE; + else + zerosNeeded = BM_HRL_WORD_SIZE - + ((lovItem->bm_last_setbit-1) % BM_HRL_WORD_SIZE) - 1; + + if ((zerosNeeded != 0) && (numOfZeros >= zerosNeeded)) + { + /* merge bm_last_word into bm_last_compword */ + mergewords(rel, lovItem, lastBufferP, numWordsP, + lovItem->bm_last_setbit + zerosNeeded, use_wal); + + numOfZeros -= zerosNeeded; + } + + /* + * if the remaining zeros are more than BM_HRL_WORD_SIZE, + * we construct the last word to be a fill word, and merge it + * with bm_last_compword. + */ + if (numOfZeros >= BM_HRL_WORD_SIZE) + { + uint32 numOfTotalFillWords = numOfZeros/BM_HRL_WORD_SIZE; + uint32 loopNo=0; + + while (numOfTotalFillWords > 0) + { + BM_HRL_WORD numOfFillWords; + uint64 tid_n; + + if (numOfTotalFillWords >= MAX_FILL_LENGTH) + numOfFillWords = MAX_FILL_LENGTH; + else + numOfFillWords = numOfTotalFillWords; + + lovItem->bm_last_word = BM_MAKE_FILL_WORD(0, numOfFillWords); + lovItem->bm_last_two_headerbits |= 1; + + tid_n = (lovItem->bm_last_setbit + zerosNeeded + + loopNo * MAX_FILL_LENGTH * BM_HRL_WORD_SIZE + + numOfFillWords * BM_HRL_WORD_SIZE); + + mergewords(rel, lovItem, lastBufferP, numWordsP, tid_n, + use_wal); + loopNo++; + + numOfTotalFillWords -= numOfFillWords; + numOfZeros -= numOfFillWords * BM_HRL_WORD_SIZE; + } + } + } + + Assert((numOfZeros >= 0) && (numOfZerosbm_last_word |= (((BM_HRL_WORD)1) << insertingPos); + + lovItem->bm_last_setbit = tidnum; + + if (tidnum % BM_HRL_WORD_SIZE == 0) + { + if (lovItem->bm_last_word == LITERAL_ALL_ZERO) + { + lovItem->bm_last_word = BM_MAKE_FILL_WORD(0, 1); + lovItem->bm_last_two_headerbits |= 1; + } + else if (lovItem->bm_last_word == LITERAL_ALL_ONE) + { + lovItem->bm_last_word = BM_MAKE_FILL_WORD(1, 1); + lovItem->bm_last_two_headerbits |= 1; + } + + mergewords(rel, lovItem, lastBufferP, numWordsP, + tidnum, use_wal); + } +} + +/* + * create_lovitem() -- create a new LOV item. + * + * Create a new LOV item and append this item into the last LOV page. + * Each LOV item is associated with one distinct value for attributes + * to be indexed. This function also inserts this distinct value along + * with this new LOV item's block number and offsetnumber into the + * auxiliary heap and its b-tree of this bitmap index. + * + * This function returns the block number and offset number of this + * new LOV item. + * + * The caller should have an exclusive lock on metabuf. + */ +void +create_lovitem(Relation rel, Buffer metabuf, uint64 tidnum, + TupleDesc tupDesc, Datum *attdata, bool *nulls, + Relation lovHeap, Relation lovIndex, BlockNumber *lovBlockP, + OffsetNumber *lovOffsetP, bool use_wal) +{ + Page mp; + BMMetaPage metapage; + Buffer currLovBuffer; + Page currLovPage; + Datum* lovDatum; + bool* lovNulls; + OffsetNumber itemSize; + BMLOVItem lovItem; + int numOfAttrs; + + numOfAttrs = tupDesc->natts; + + /* Get the last LOV page. Meta page should be locked. */ + mp = BufferGetPage(metabuf); + metapage = (BMMetaPage) PageGetContents(mp); + *lovBlockP = metapage->bm_lov_lastpage; + + currLovBuffer = _bitmap_getbuf(rel, *lovBlockP, BM_WRITE); + currLovPage = BufferGetPage(currLovBuffer); + + lovItem = _bitmap_formitem(tidnum); + + *lovOffsetP = OffsetNumberNext(PageGetMaxOffsetNumber(currLovPage)); + itemSize = sizeof(BMLOVItemData); + + /* + * If there is no enough space in the last LOV page for + * a new item, create a new LOV page, and update the metapage. + */ + if (itemSize > PageGetFreeSpace(currLovPage)) + { + Buffer newLovBuffer; + + /* create a new LOV page */ + newLovBuffer = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + _bitmap_init_lovpage(rel, newLovBuffer); + + if(use_wal) + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWLOV, + newLovBuffer); + + _bitmap_relbuf(currLovBuffer); + + currLovBuffer = newLovBuffer; + currLovPage = BufferGetPage(currLovBuffer); + *lovOffsetP = OffsetNumberNext(PageGetMaxOffsetNumber(currLovPage)); + *lovBlockP = BufferGetBlockNumber(currLovBuffer); + + metapage->bm_lov_lastpage = BufferGetBlockNumber(currLovBuffer); + + if(use_wal) + _bitmap_log_metapage(rel, mp); + + _bitmap_wrtnorelbuf(metabuf); + } + + lovDatum = palloc0((numOfAttrs + 2) * sizeof(Datum)); + lovNulls = palloc0((numOfAttrs + 2) * sizeof(bool)); + memcpy(lovDatum, attdata, numOfAttrs * sizeof(Datum)); + memcpy(lovNulls, nulls, numOfAttrs * sizeof(bool)); + lovDatum[numOfAttrs] = Int32GetDatum(*lovBlockP); + lovNulls[numOfAttrs] = false; + lovDatum[numOfAttrs + 1] = Int16GetDatum(*lovOffsetP); + lovNulls[numOfAttrs + 1] = false; + + _bitmap_insert_lov(lovHeap, lovIndex, lovDatum, lovNulls); + + if (PageAddItem(currLovPage, (Item)lovItem, itemSize, *lovOffsetP, + LP_USED) == InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("failed to add LOV item to \"%s\"", + RelationGetRelationName(rel)))); + + if(use_wal) + _bitmap_log_lovitem(rel, currLovBuffer, true, *lovOffsetP, lovItem); + + _bitmap_wrtbuf(currLovBuffer); + + pfree(lovItem); + pfree(lovDatum); + pfree(lovNulls); +} + +/* + * write_tids() -- write out all tids that are stored + * in a given array. + */ +static void +write_tids(Relation rel, Buffer metabuf, BlockNumber lovBlock, + OffsetNumber lovOffset, uint64 *tids, uint32 numTids, bool use_wal) +{ + Buffer currLovBuffer; + Page currLovPage; + uint32 tidNo; + Buffer lastBuffer; + BMLOVItem lovItem; + uint32 numWords; + Page mp; + BMMetaPage metapage; + uint32 numNewTids; + + Assert(BlockNumberIsValid(lovBlock) && numTids >= 1); + + currLovBuffer = _bitmap_getbuf(rel, lovBlock, BM_WRITE); + currLovPage = BufferGetPage(currLovBuffer); + + lovItem = (BMLOVItem) PageGetItem(currLovPage, + PageGetItemId(currLovPage, lovOffset)); + + lastBuffer = get_lastbitmappagebuf(rel, lovItem); + + numWords = 0; + numNewTids = 0; + + for (tidNo = 0; tidNo < numTids; tidNo++) + { + uint64 tidnum = tids[tidNo]; + + if (lovItem->bm_last_setbit < tidnum) + numNewTids++; + + insertsetbit(rel, lovItem, &lastBuffer, &numWords, + tidnum, use_wal); + } + + /* reset all tids in 'tids' to 0 */ + MemSet(tids, 0, numTids * sizeof(uint64)); + + /* write lastBuffer to disk */ + if (BufferIsValid(lastBuffer)) + { + if(use_wal) + _bitmap_log_bitmappage(rel, lastBuffer, false, numWords); + _bitmap_wrtbuf(lastBuffer); + } + + if(use_wal) + _bitmap_log_lovitem(rel, currLovBuffer, false, lovOffset, lovItem); + _bitmap_wrtbuf(currLovBuffer); + + /* update the metapage */ + LockBuffer(metabuf, BM_WRITE); + mp = BufferGetPage(metabuf); + metapage = (BMMetaPage)PageGetContents(mp); + metapage->bm_num_tuples += numNewTids; + + /* + * If we updated the index in the middle of the index itself then + * VACUUM FULL should be a reindex of the index. If, however, we only + * appended data to the index, VACUUM FULL becomes a no-op. + */ + if (numNewTids < numTids) + metapage->bm_need_rebuilt = true; + + if(use_wal) + _bitmap_log_metapage(rel, mp); + + _bitmap_wrtnorelbuf(metabuf); + LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); +} + +/* + * When building an index we try and buffer calls to write_tids() as it will + * result in I/O. + */ + +static void +buf_add_tid(Relation rel, BMTidLocsBuffer *tids, uint64 tidnum, + BMBuildState *state, BlockNumber lov_block, OffsetNumber off) +{ + BMTIDBuffer *buf; + BMTIDLOVBuffer *lov_buf = NULL; + + /* If we surpass maintenance_work_mem, free some space from the buffer */ + if(tids->byte_size >= maintenance_work_mem * 1024L) + buf_make_space(rel, state->bm_metabuf, tids, state->use_wal); + + /* + * tids is lazily initialized. If we do not have a current LOV block + * buffer, initialize one. + */ + if(!BlockNumberIsValid(tids->max_lov_block) || + tids->max_lov_block < lov_block) + { + lov_buf = palloc(sizeof(BMTIDLOVBuffer)); + lov_buf->lov_block = lov_block; + lov_buf->bufs = palloc0(BM_MAX_LOVITEMS_PER_PAGE * + sizeof(BMTIDBuffer *)); + tids->max_lov_block = lov_block; + + /* + * Add the new LOV buffer to the list head. It seems reasonable that + * future calls to this function will want this lov_block rather than + * older lov_blocks. + */ + tids->lov_blocks = lcons(lov_buf, tids->lov_blocks); + } + else + { + ListCell *cell; + + foreach(cell, tids->lov_blocks) + { + BMTIDLOVBuffer *tmp = lfirst(cell); + if(tmp->lov_block == lov_block) + { + lov_buf = tmp; + break; + } + } + } + + Assert(lov_buf); + Assert(off - 1 < BM_MAX_LOVITEMS_PER_PAGE); + + if(lov_buf->bufs[off - 1]) + { + buf = lov_buf->bufs[off - 1]; + buf->tids[buf->ntids++] = tidnum; + + /* Check if we have enough space */ + if(buf->size == buf->ntids) + { + /* double the size of the buffer */ + int32 size = buf->size; + + buf->tids = repalloc(buf->tids, + (buf->size + size) * sizeof(uint64)); + buf->size += size; + tids->byte_size += size * sizeof(uint64); + } + return; + } + + /* not found, create a new one */ + buf = (BMTIDBuffer *)palloc(sizeof(BMTIDBuffer)); +#define BM_BL_BUF_SIZE 1024 /* seems like a good place to start */ + + buf->size = BM_BL_BUF_SIZE; + buf->tids = palloc(sizeof(uint64) * BM_BL_BUF_SIZE); + buf->tids[0] = tidnum; + buf->ntids = 1; + + lov_buf->bufs[off - 1] = buf; + tids->byte_size += buf->size * sizeof(uint64); +} + +/* + * Spill some data out of the buffer to free up space. + */ +static void +buf_make_space(Relation rel, Buffer metabuf, BMTidLocsBuffer *locbuf, + bool use_wal) +{ + ListCell *cell; + + /* + * Now, we could just pull the head of lov_blocks but there'd be no + * guarantee that we'd free up enough space. + */ + foreach(cell, locbuf->lov_blocks) + { + int i; + BMTIDLOVBuffer *lov_buf = (BMTIDLOVBuffer *)lfirst(cell); + BlockNumber lov_block; + + lov_block = lov_buf->lov_block; + + for(i = 0; i < BM_MAX_LOVITEMS_PER_PAGE; i++) + { + BMTIDBuffer *buf = (BMTIDBuffer *)lov_buf->bufs[i]; + OffsetNumber off; + + /* break if we've freed enough space */ + if(locbuf->byte_size < maintenance_work_mem * 1024L) + break; + + /* XXX: can we just break now? */ + if(!buf) + continue; + + off = i + 1; + write_tids(rel, metabuf, lov_block, off, buf->tids, + buf->ntids, use_wal); + locbuf->byte_size -= buf->size * sizeof(uint64); + pfree(buf->tids); + lov_buf->bufs[i] = NULL; + } + if(locbuf->byte_size < maintenance_work_mem * 1024L) + break; + } +} + +/* + * _bitmap_write_alltids() -- write all tids in the given buffer into disk. + */ +void +_bitmap_write_alltids(Relation rel, Buffer metabuf, BMTidLocsBuffer *tids, + bool use_wal) +{ + ListCell *cell; + + foreach(cell, tids->lov_blocks) + { + int i; + BMTIDLOVBuffer *lov_buf = (BMTIDLOVBuffer *)lfirst(cell); + BlockNumber lov_block; + + lov_block = lov_buf->lov_block; + + for(i = 0; i < BM_MAX_LOVITEMS_PER_PAGE; i++) + { + BMTIDBuffer *buf = (BMTIDBuffer *)lov_buf->bufs[i]; + OffsetNumber off; + + if(!buf) + continue; + + off = i + 1; + write_tids(rel, metabuf, lov_block, off, buf->tids, + buf->ntids, use_wal); + tids->byte_size -= buf->size * sizeof(uint64); + pfree(buf->tids); + lov_buf->bufs[i] = NULL; + } + } + + list_free_deep(tids->lov_blocks); + tids->lov_blocks = NIL; + tids->byte_size = 0; +} + +/* + * build_inserttuple() -- insert a new tuple into the bitmap index + * during the bitmap index construction. + * + * Each new tuple has an assigned number -- tidnum, called a + * tid location, which represents the bit location for this tuple in + * a bitmap vector. To speed up the construction, this function does not + * write this tid location into its bitmap vector immediately. We maintain + * a buffer -- BMTidLocsBuffer to keep an array of tid locations + * for each distinct attribute value. + * + * If this insertion causes the buffer to overflow, we write tid locations + * for enough distinct values to disk to accommodate this new tuple. + */ +static void +build_inserttuple(Relation rel, uint64 tidnum, + ItemPointerData ht_ctid, TupleDesc tupDesc, + Datum *attdata, bool *nulls, BMBuildState *state) +{ + BlockNumber lovBlock; + OffsetNumber lovOffset; + BMTidLocsBuffer *tidLocsBuffer; + int attno; + bool allNulls = true; + Datum *entry; + + tidLocsBuffer = state->bm_tidLocsBuffer; + + /* Check if all attributes have value of NULL. */ + for (attno = 0; attno < tupDesc->natts; attno++) + { + if (!nulls[attno]) + { + allNulls = false; + break; + } + } + + LockBuffer(state->bm_metabuf, BM_WRITE); + + /* + * if the inserting tuple has the value of NULL, then + * the corresponding tid array is the first. + */ + if (allNulls) + { + lovBlock = BM_LOV_STARTPAGE; + lovOffset = 1; + } + else + { + bool found; + BMBuildLovData *lov; + + /* look up the hash to see if we can find the lov data that way */ + entry = (Datum *)hash_search(state->lovitem_hash, + (void *)attdata, + HASH_ENTER, &found); + if (!found) + { + /* Copy the key values in case someone modifies them */ + for(attno = 0; attno < tupDesc->natts; attno++) + { + Form_pg_attribute at = tupDesc->attrs[attno]; + + entry[attno] = datumCopy(entry[attno], at->attbyval, + at->attlen); + } + /* + * If the inserting tuple has a new value, then we create a new + * LOV item. + */ + create_lovitem(rel, state->bm_metabuf, tidnum, tupDesc, attdata, + nulls, state->bm_lov_heap, state->bm_lov_index, + &lovBlock, &lovOffset, state->use_wal); + + lov = (BMBuildLovData *) &(entry[tupDesc->natts]); + lov->lov_block = lovBlock; + lov->lov_off = lovOffset; + } + else + { + lov = (BMBuildLovData *) &(entry[tupDesc->natts]); + lovBlock = lov->lov_block; + lovOffset = lov->lov_off; + } + } + + _bitmap_wrtnorelbuf(state->bm_metabuf); + + LockBuffer(state->bm_metabuf, BUFFER_LOCK_UNLOCK); + + buf_add_tid(rel, tidLocsBuffer, tidnum, state, lovBlock, lovOffset); +} + +/* + * inserttuple() -- insert a new tuple into the bitmap index. + * + * This function finds the corresponding bitmap vector associated with + * the given attribute value, and inserts a set bit into this bitmap + * vector. Each distinct attribute value is stored as a LOV item, which + * is stored in a list of LOV pages. + * + * If there is no LOV item associated with the given attribute value, + * a new LOV item is created and appended into the last LOV page. + * + * For support the high-cardinality case for attributes to be indexed, + * we also maintain an auxiliary heap and a btree structure for all + * the distinct attribute values so that the search for the + * corresponding bitmap vector can be done faster. The heap + * contains all attributes to be indexed and 2 more attributes -- + * the block number of the offset number of the block that stores + * the corresponding LOV item. The b-tree index is on this new heap + * and the key contains all attributes to be indexed. + */ +static void +inserttuple(Relation rel, Buffer metabuf, + uint64 tidnum, ItemPointerData ht_ctid, + TupleDesc tupDesc, Datum* attdata, bool *nulls, + Relation lovHeap, Relation lovIndex, + ScanKey scanKey, IndexScanDesc scanDesc, bool use_wal) +{ + BlockNumber lovBlock; + OffsetNumber lovOffset; + bool blockNull, offsetNull; + bool allNulls = true; + int attno; + + /* Check if the values of given attributes are all NULL. */ + for (attno = 0; attno < tupDesc->natts; attno++) + { + if (!nulls[attno]) + { + allNulls = false; + break; + } + } + + + /* + * if the inserting tuple has the value NULL, then the LOV item is + * the first item in the lovBuffer. + */ + if (allNulls) + { + lovBlock = BM_LOV_STARTPAGE; + lovOffset = 1; + } + else + { + bool res; + + /* + * XXX: We lock the meta page to guard against a race condition where + * by a concurrent writer is inserting the same key as us and they + * create_lovitem() between us calling _bitmap_findvalue() and + * create_lovitem(). + * + * The problem is, locking the metapage is pretty heavy handed + * because the read routines need a read lock on it. There are a + * few other things we could do instead: use a BM insert lock or + * wrap the code below in a PG_TRY and try and catch the unique + * constraint violation from the btree code. + */ + LockBuffer(metabuf, BM_WRITE); + res = _bitmap_findvalue(lovHeap, lovIndex, scanKey, scanDesc, &lovBlock, + &blockNull, &lovOffset, &offsetNull); + + if(!res) + { + /* + * Search through the lov heap and index to find the LOV item which + * has the same value as the inserting tuple. If such an item is + * not found, then we create a new LOV item, and insert it into the + * lov heap and index. + */ + create_lovitem(rel, metabuf, tidnum, tupDesc, + attdata, nulls, lovHeap, lovIndex, + &lovBlock, &lovOffset, use_wal); + } + } + + LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); + + /* + * Here, we have found the block number and offset number of the + * LOV item that points to the bitmap page, to which we will + * append the set bit. + */ + write_tids(rel, metabuf, lovBlock, lovOffset, &tidnum, 1, use_wal); +} + +/* + * _bitmap_buildinsert() -- insert an index tuple during index creation. + */ +void +_bitmap_buildinsert(Relation rel, ItemPointerData ht_ctid, Datum *attdata, + bool *nulls, BMBuildState *state) +{ + TupleDesc tupDesc; + uint64 tidOffset; + + Assert(ItemPointerGetOffsetNumber(&ht_ctid) <= BM_MAX_HTUP_PER_PAGE); + + tidOffset = BM_IPTR_TO_INT(&ht_ctid); + + tupDesc = RelationGetDescr(rel); + + /* insert a new bit into the corresponding bitmap */ + build_inserttuple(rel, tidOffset, ht_ctid, + tupDesc, attdata, nulls, state); +} + +/* + * _bitmap_doinsert() -- insert an index tuple for a given tuple. + */ +void +_bitmap_doinsert(Relation rel, ItemPointerData ht_ctid, + Datum *attdata, bool *nulls) +{ + uint64 tidOffset; + + TupleDesc tupDesc; + Buffer metabuf; + BMMetaPage metapage; + Relation lovHeap, lovIndex; + ScanKey scanKeys; + IndexScanDesc scanDesc; + int attno; + + tupDesc = RelationGetDescr(rel); + if (tupDesc->natts <= 0) + return ; + + Assert(ItemPointerGetOffsetNumber(&ht_ctid) <= BM_MAX_HTUP_PER_PAGE); + tidOffset = BM_IPTR_TO_INT(&ht_ctid); + + /* insert a new bit into the corresponding bitmap using the HRL scheme */ + metabuf = _bitmap_getbuf(rel, BM_METAPAGE, BM_READ); + metapage = (BMMetaPage)PageGetContents(BufferGetPage(metabuf)); + _bitmap_open_lov_heapandindex(rel, metapage, &lovHeap, &lovIndex, + RowExclusiveLock); + + LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); + + scanKeys = (ScanKey) palloc0(tupDesc->natts * sizeof(ScanKeyData)); + + for (attno = 0; attno < tupDesc->natts; attno++) + { + RegProcedure opfuncid; + ScanKey scanKey; + + opfuncid = equality_oper_funcid(tupDesc->attrs[attno]->atttypid); + scanKey = (ScanKey) (((char *)scanKeys) + attno * sizeof(ScanKeyData)); + + ScanKeyEntryInitialize(scanKey, SK_ISNULL, attno + 1, + BTEqualStrategyNumber, InvalidOid, opfuncid, 0); + + if (nulls[attno]) + { + scanKey->sk_flags = SK_ISNULL; + scanKey->sk_argument = attdata[attno]; + } + else + { + scanKey->sk_flags = 0; + scanKey->sk_argument = attdata[attno]; + } + } + + scanDesc = index_beginscan(lovHeap, lovIndex, SnapshotAny, + tupDesc->natts, scanKeys); + + /* insert this new tuple into the bitmap index. */ + inserttuple(rel, metabuf, tidOffset, ht_ctid, tupDesc, attdata, + nulls, lovHeap, lovIndex, scanKeys, scanDesc, true); + + index_endscan(scanDesc); + _bitmap_close_lov_heapandindex(lovHeap, lovIndex, RowExclusiveLock); + + ReleaseBuffer(metabuf); + pfree(scanKeys); +} diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/bitmap/bitmappages.c bitmap/src/backend/access/bitmap/bitmappages.c --- pgsql-head/src/backend/access/bitmap/bitmappages.c 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/backend/access/bitmap/bitmappages.c 2006-12-04 23:32:59.000000000 +1100 @@ -0,0 +1,380 @@ +/*------------------------------------------------------------------------- + * + * bitmappage.c + * Bitmap index page management code for the bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL$ + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/tupdesc.h" +#include "access/bitmap.h" +#include "parser/parse_oper.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + +/* + * Helper functions for hashing and matching build data. At this stage, the + * hash API doesn't know about complex keys like those use during index + * creation (the key is an array of key attributes). c.f. execGrouping.c. + */ +typedef struct BMBuildHashData +{ + int natts; + FmgrInfo *hash_funcs; + FmgrInfo *eq_funcs; + MemoryContext tmpcxt; + MemoryContext hash_cxt; +} BMBuildHashData; + +static BMBuildHashData *cur_bmbuild = NULL; + +static uint32 build_hash_key(const void *key, Size keysize); +static int build_match_key(const void *key1, const void *key2, Size keysize); + +/* + * _bitmap_getbuf() -- return the buffer for the given block number and + * the access method. + */ +Buffer +_bitmap_getbuf(Relation rel, BlockNumber blkno, int access) +{ + Buffer buf; + + buf = ReadBuffer(rel, blkno); + if (access != BM_NOLOCK) + LockBuffer(buf, access); + + return buf; +} + +/* + * _bitmap_wrtbuf() -- write a buffer page to disk. + * + * Release the lock and the pin held on the buffer. + */ +void +_bitmap_wrtbuf(Buffer buf) +{ + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); +} + +/* + * _bitmap_wrtnorelbuf() -- write a buffer page to disk without still holding + * the pin on this page. + */ +void +_bitmap_wrtnorelbuf(Buffer buf) +{ + MarkBufferDirty(buf); +} + +/* + * _bitmap_relbuf() -- release the buffer without writing. + */ +void +_bitmap_relbuf(Buffer buf) +{ + UnlockReleaseBuffer(buf); +} + +/* + * _bitmap_init_lovpage -- initialize a new LOV page. + */ +void +_bitmap_init_lovpage(Relation rel, Buffer buf) +{ + Page page; + + page = (Page) BufferGetPage(buf); + + if(PageIsNew(page)) + PageInit(page, BufferGetPageSize(buf), 0); +} + +/* + * _bitmap_init_bitmappage() -- initialize a new page to store the bitmap. + * + * Note: This function requires an exclusive lock on the metapage. + */ +void +_bitmap_init_bitmappage(Relation rel, Buffer buf) +{ + Page page; + BMBitmapOpaque opaque; + + page = (Page) BufferGetPage(buf); + + if(PageIsNew(page)) + PageInit(page, BufferGetPageSize(buf), sizeof(BMBitmapOpaqueData)); + + /* even though page may not be new, reset all values */ + opaque = (BMBitmapOpaque) PageGetSpecialPointer(page); + opaque->bm_hrl_words_used = 0; + opaque->bm_bitmap_next = InvalidBlockNumber; + opaque->bm_last_tid_location = 0; +} + +/* + * _bitmap_init_buildstate() -- initialize the build state before building + * a bitmap index. + */ +void +_bitmap_init_buildstate(Relation index, BMBuildState *bmstate) +{ + Page page; + BMMetaPage mp; + HASHCTL hash_ctl; + int hash_flags; + int i; + + + /* initialize the build state */ + bmstate->bm_metabuf = _bitmap_getbuf(index, BM_METAPAGE, BM_WRITE); + bmstate->bm_tupDesc = RelationGetDescr(index); + + bmstate->bm_tidLocsBuffer = (BMTidLocsBuffer *) + palloc(sizeof(BMTidLocsBuffer)); + bmstate->bm_tidLocsBuffer->byte_size = 0; + bmstate->bm_tidLocsBuffer->lov_blocks = NIL; + bmstate->bm_tidLocsBuffer->max_lov_block = InvalidBlockNumber; + page = BufferGetPage(bmstate->bm_metabuf); + mp = (BMMetaPage) PageGetContents(page); + _bitmap_open_lov_heapandindex(index, mp, &(bmstate->bm_lov_heap), + &(bmstate->bm_lov_index), + RowExclusiveLock); + + _bitmap_wrtnorelbuf(bmstate->bm_metabuf); + LockBuffer(bmstate->bm_metabuf, BUFFER_LOCK_UNLOCK); + + Assert(cur_bmbuild == NULL); + + cur_bmbuild = (BMBuildHashData *)palloc(sizeof(BMBuildHashData)); + cur_bmbuild->hash_funcs = (FmgrInfo *) + palloc(sizeof(FmgrInfo) * bmstate->bm_tupDesc->natts); + cur_bmbuild->eq_funcs = (FmgrInfo *) + palloc(sizeof(FmgrInfo) * bmstate->bm_tupDesc->natts); + + for (i = 0; i < bmstate->bm_tupDesc->natts; i++) + { + Oid typid = bmstate->bm_tupDesc->attrs[i]->atttypid; + Operator optup; + Oid eq_opr; + Oid eq_function; + Oid hash_function; + + optup = equality_oper(typid, false); + eq_opr = oprid(optup); + eq_function = oprfuncid(optup); + ReleaseSysCache(optup); + hash_function = get_op_hash_function(eq_opr); + if (!OidIsValid(hash_function)) /* should not happen */ + elog(ERROR, "could not find hash function for hash operator %u", + eq_opr); + fmgr_info(eq_function, &cur_bmbuild->eq_funcs[i]); + fmgr_info(hash_function, &cur_bmbuild->hash_funcs[i]); + } + + cur_bmbuild->natts = bmstate->bm_tupDesc->natts; + cur_bmbuild->tmpcxt = AllocSetContextCreate(CurrentMemoryContext, + "Bitmap build temp space", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + /* setup the hash table */ + MemSet(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(Datum) * cur_bmbuild->natts; + hash_ctl.entrysize = hash_ctl.keysize + sizeof(BMBuildLovData) + 200; + hash_ctl.hash = build_hash_key; + hash_ctl.match = build_match_key; + hash_ctl.hcxt = AllocSetContextCreate(CurrentMemoryContext, + "Bitmap build hash table", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + cur_bmbuild->hash_cxt = hash_ctl.hcxt; + + hash_flags = HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT; + + bmstate->lovitem_hash = hash_create("Bitmap index build lov item hash", + 100, &hash_ctl, hash_flags); + + /* + * We need to log index creation in WAL iff WAL archiving is enabled AND + * it's not a temp index. + */ + bmstate->use_wal = XLogArchivingActive() && !index->rd_istemp; + +} + +/* + * _bitmap_cleanup_buildstate() -- clean up the build state after + * inserting all rows in the heap into the bitmap index. + */ +void +_bitmap_cleanup_buildstate(Relation index, BMBuildState *bmstate) +{ + /* write out remaining tids in bmstate->bm_tidLicsBuffer */ + BMTidLocsBuffer *tidLocsBuffer = bmstate->bm_tidLocsBuffer; + _bitmap_write_alltids(index, bmstate->bm_metabuf, tidLocsBuffer, + bmstate->use_wal); + + ReleaseBuffer(bmstate->bm_metabuf); + _bitmap_close_lov_heapandindex(bmstate->bm_lov_heap,bmstate->bm_lov_index, + RowExclusiveLock); + pfree(bmstate->bm_tidLocsBuffer); + + MemoryContextDelete(cur_bmbuild->tmpcxt); + MemoryContextDelete(cur_bmbuild->hash_cxt); + pfree(cur_bmbuild->hash_funcs); + pfree(cur_bmbuild->eq_funcs); + pfree(cur_bmbuild); + cur_bmbuild = NULL; +} + +/* + * _bitmap_init() -- initialize the bitmap index. + * + * Create the meta page, a new heap which stores the distinct values for + * the attributes to be indexed, a btree index on this new heap for searching + * those distinct values, and the first LOV page. + */ +void +_bitmap_init(Relation rel, bool use_wal) +{ + BMMetaPage metapage; + Buffer metabuf; + Page page; + Buffer buf; + BMLOVItem lovItem; + OffsetNumber newOffset; + Page currLovPage; + OffsetNumber o; + + /* sanity check */ + if (RelationGetNumberOfBlocks(rel) != 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("cannot initialize non-empty bitmap index \"%s\"", + RelationGetRelationName(rel)))); + + /* create the metapage */ + metabuf = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + page = BufferGetPage(metabuf); + Assert(PageIsNew(page)); + + /* initialize the metapage */ + PageInit(page, BufferGetPageSize(metabuf), 0); + metapage = (BMMetaPage) PageGetContents(page); + metapage->bm_num_tuples = 0; + metapage->bm_need_rebuilt = false; + + if(use_wal) + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWMETA, metabuf); + + /* initialize the LOV metadata */ + _bitmap_create_lov_heapandindex(rel, &(metapage->bm_lov_heapId), + &(metapage->bm_lov_indexId)); + + /* allocate the first LOV page. */ + buf = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + _bitmap_init_lovpage(rel, buf); + + if(use_wal) + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWLOV, buf); + + currLovPage = BufferGetPage(buf); + + /* set the first item to support NULL value */ + lovItem = _bitmap_formitem(0); + newOffset = OffsetNumberNext(PageGetMaxOffsetNumber(currLovPage)); + + /* + * XXX: perhaps this could be a special page, with more efficient storage + * after all, we have fixed size data + */ + o = PageAddItem(currLovPage, (Item)lovItem, sizeof(BMLOVItemData), + newOffset, LP_USED); + + if (o == InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("failed to add LOV item to \"%s\"", + RelationGetRelationName(rel)))); + + metapage->bm_lov_lastpage = BufferGetBlockNumber(buf); + if(use_wal) + _bitmap_log_lovitem(rel, buf, true, newOffset, lovItem); + pfree(lovItem); + + _bitmap_wrtbuf(buf); + + if(use_wal) + _bitmap_log_metapage(rel, page); + /* write the metapage */ + _bitmap_wrtbuf(metabuf); +} + +/* + * Build a hash of the key we're indexing. + */ + +static uint32 +build_hash_key(const void *key, Size keysize) +{ + Datum *k = (Datum *)key; + int i; + uint32 hashkey = 0; + + for(i = 0; i < cur_bmbuild->natts; i++) + { + /* rotate hashkey left 1 bit at each step */ + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + + hashkey ^= DatumGetUInt32(FunctionCall1(&cur_bmbuild->hash_funcs[i], + k[i])); + } + return hashkey; +} + +/* + * Test whether key1 matches key2. Since the equality functions may leak, + * reset the temporary context at each call and do all equality calculation + * in that context. + */ +static int +build_match_key(const void *key1, const void *key2, Size keysize) +{ + int i; + MemoryContext old; + int result = 0; + + MemoryContextReset(cur_bmbuild->tmpcxt); + old = MemoryContextSwitchTo(cur_bmbuild->tmpcxt); + + for(i = 0; i < cur_bmbuild->natts; i++) + { + Datum attr1 = ((Datum *)key1)[i]; + Datum attr2 = ((Datum *)key2)[i]; + if (!DatumGetBool(FunctionCall2(&cur_bmbuild->eq_funcs[i], + attr1, attr2))) + { + result = 1; /* they aren't equal */ + break; + } + } + MemoryContextSwitchTo(old); + return result; +} + diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/bitmap/bitmapsearch.c bitmap/src/backend/access/bitmap/bitmapsearch.c --- pgsql-head/src/backend/access/bitmap/bitmapsearch.c 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/backend/access/bitmap/bitmapsearch.c 2006-11-29 15:36:41.000000000 +1100 @@ -0,0 +1,516 @@ +/*------------------------------------------------------------------------- + * + * bitmapsearch.c + * Search routines for on-disk bitmap index access method. + * + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/genam.h" +#include "access/tupdesc.h" +#include "access/bitmap.h" +#include "storage/lmgr.h" +#include "parser/parse_oper.h" +#include "utils/lsyscache.h" + +typedef struct ItemPos +{ + BlockNumber blockNo; + OffsetNumber offset; +} ItemPos; + +static void next_batch_words(IndexScanDesc scan); +static void read_words(Relation rel, Buffer lovBuffer, + OffsetNumber lovOffset, BlockNumber *nextBlockNoP, + BM_HRL_WORD *headerWords, BM_HRL_WORD *words, + uint32 *numOfWordsP, bool *readLastWords); +static void init_scanpos(IndexScanDesc scan, BMVector bmScanPos, + BlockNumber lovBlock, OffsetNumber lovOffset); + +/* + * _bitmap_first() -- find the first tuple that satisfies a given scan. + */ +bool +_bitmap_first(IndexScanDesc scan, ScanDirection dir) +{ + _bitmap_findbitmaps(scan, dir); + return _bitmap_next(scan, dir); +} + +/* + * _bitmap_next() -- return the next tuple that satisfies a given scan. + */ +bool +_bitmap_next(IndexScanDesc scan, ScanDirection dir) +{ + BMScanOpaque so; + BMScanPosition scanPos; + uint64 nextTid; + + so = (BMScanOpaque) scan->opaque; + scanPos = so->bm_currPos; + + if (scanPos->done) + return false; + + for (;;) + { + /* + * If there are no more words left from the previous scan, we + * try to compute the next batch of words. + */ + if (scanPos->bm_batchWords->nwords == 0 && + scanPos->bm_result.nextTidLoc >= scanPos->bm_result.numOfTids) + { + _bitmap_reset_batchwords(scanPos->bm_batchWords); + scanPos->bm_batchWords->firstTid = scanPos->bm_result.nextTid; + + next_batch_words(scan); + + _bitmap_begin_iterate(scanPos->bm_batchWords, &(scanPos->bm_result)); + } + + /* If we can not find more words, then this scan is over. */ + if (scanPos->bm_batchWords->nwords == 0 && + scanPos->bm_result.nextTidLoc >= scanPos->bm_result.numOfTids) + return false; + + nextTid = _bitmap_findnexttid(scanPos->bm_batchWords, + &(scanPos->bm_result)); + if (nextTid == 0) + continue; + else + break; + } + + Assert((nextTid % BM_MAX_HTUP_PER_PAGE) + 1 > 0); + + ItemPointerSet(&(scan->xs_ctup.t_self), BM_INT_GET_BLOCKNO(nextTid), + BM_INT_GET_OFFSET(nextTid)); + scan->currentItemData = scan->xs_ctup.t_self; + + return true; +} + +/* + * _bitmap_firstbatchwords() -- find the first batch of bitmap words + * in a bitmap vector for a given scan. + */ +bool +_bitmap_firstbatchwords(IndexScanDesc scan, + ScanDirection dir) +{ + _bitmap_findbitmaps(scan, dir); + + return _bitmap_nextbatchwords(scan, dir); +} + +/* + * _bitmap_nextbatchwords() -- find the next batch of bitmap words + * in a bitmap vector for a given scan. + */ +bool +_bitmap_nextbatchwords(IndexScanDesc scan, + ScanDirection dir) +{ + BMScanOpaque so; + + so = (BMScanOpaque) scan->opaque; + + /* check if this scan if over */ + if (so->bm_currPos->done) + return false; + + /* + * If there are some leftover words from the previous scan, simply + * return them. + */ + if (so->bm_currPos->bm_batchWords->nwords > 0) + return true; + + next_batch_words(scan); + + return true; +} + +/* + * next_batch_words() -- compute the next batch of bitmap words + * from a given scan position. + */ +static void +next_batch_words(IndexScanDesc scan) +{ + BMScanPosition scanPos; + BMVector bmScanPos; + int i; + BMBatchWords **batches; + int numBatches; + + scanPos = ((BMScanOpaque) scan->opaque)->bm_currPos; + bmScanPos = scanPos->posvecs; + + batches = (BMBatchWords **) + palloc0(scanPos->nvec * sizeof(BMBatchWords *)); + + numBatches = 0; + /* + * Obtains the next batch of words for each bitmap vector. + * Ignores those bitmap vectors that contain no new words. + */ + for (i = 0; i < scanPos->nvec; i++) + { + BMBatchWords *batchWords; + batchWords = bmScanPos[i].bm_batchWords; + + /* + * If there are no words left from previous scan, read the next + * batch of words. + */ + if (bmScanPos[i].bm_batchWords->nwords == 0 && + !(bmScanPos[i].bm_readLastWords)) + { + + _bitmap_reset_batchwords(batchWords); + read_words(scan->indexRelation, + bmScanPos[i].bm_lovBuffer, + bmScanPos[i].bm_lovOffset, + &(bmScanPos[i].bm_nextBlockNo), + batchWords->hwords, + batchWords->cwords, + &(batchWords->nwords), + &(bmScanPos[i].bm_readLastWords)); + } + + if (bmScanPos[i].bm_batchWords->nwords > 0) + { + batches[numBatches] = batchWords; + numBatches++; + } + } + + /* + * We handle the case where only one bitmap vector contributes to + * the scan separately with other cases. This is because + * bmScanPos->bm_batchWords and scanPos->bm_batchWords + * are the same. + */ + if (scanPos->nvec == 1) + { + if (bmScanPos->bm_batchWords->nwords == 0) + scanPos->done = true; + pfree(batches); + scanPos->bm_batchWords = scanPos->posvecs->bm_batchWords; + + return; + } + + /* + * At least two bitmap vectors contribute to this scan, we + * ORed these bitmap vectors. + */ + if (numBatches == 0) + { + scanPos->done = true; + pfree(batches); + return; + } + + _bitmap_union(batches, numBatches, scanPos->bm_batchWords); + pfree(batches); +} + +/* + * read_words() -- read one-block of bitmap words from + * the bitmap page. + * + * If nextBlockNo is an invalid block number, then the two last words + * are stored in lovItem. Otherwise, read words from nextBlockNo. + */ +static void +read_words(Relation rel, Buffer lovBuffer, OffsetNumber lovOffset, + BlockNumber *nextBlockNoP, BM_HRL_WORD *headerWords, + BM_HRL_WORD *words, uint32 *numOfWordsP, bool *readLastWords) +{ + if (BlockNumberIsValid(*nextBlockNoP)) + { + Buffer bitmapBuffer = _bitmap_getbuf(rel, *nextBlockNoP, BM_READ); + + Page bitmapPage; + BMBitmap bitmap; + BMBitmapOpaque bo; + + bitmapPage = BufferGetPage(bitmapBuffer); + + bitmap = (BMBitmap) PageGetContents(bitmapPage); + bo = (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage); + + *numOfWordsP = bo->bm_hrl_words_used; + memcpy(headerWords, bitmap->hwords, + BM_NUM_OF_HEADER_WORDS * sizeof(BM_HRL_WORD)); + memcpy(words, bitmap->cwords, sizeof(BM_HRL_WORD) * *numOfWordsP); + + *nextBlockNoP = bo->bm_bitmap_next; + + _bitmap_relbuf(bitmapBuffer); + + *readLastWords = false; + + /* + * If this is the last bitmap page and the total number of words + * in this page is less than or equal to + * BM_NUM_OF_HRL_WORDS_PER_PAGE - 2, we read the last two words + * and append them into 'headerWords' and 'words'. + */ + + /* XXX: this is messy, lets hide it in a define */ + if ((!BlockNumberIsValid(*nextBlockNoP)) && + (*numOfWordsP <= BM_NUM_OF_HRL_WORDS_PER_PAGE - 2)) + { + BM_HRL_WORD lastWords[2]; + BM_HRL_WORD lastHeaderWords; + BM_HRL_WORD tmp; + uint32 numWords; + int offs; + + read_words(rel, lovBuffer, lovOffset, nextBlockNoP, + &lastHeaderWords, lastWords, &numWords, + readLastWords); + + Assert(numWords == 2); + + memcpy(words + *numOfWordsP, lastWords, 2 * sizeof(BM_HRL_WORD)); + offs = *numOfWordsP / BM_HRL_WORD_SIZE; + + tmp = lastHeaderWords >> *numOfWordsP % BM_HRL_WORD_SIZE; + headerWords[offs] |= tmp; + + if (*numOfWordsP % BM_HRL_WORD_SIZE == BM_HRL_WORD_SIZE - 1) + { + offs = (*numOfWordsP + 1)/BM_HRL_WORD_SIZE; + headerWords[offs] |= lastHeaderWords << 1; + } + *numOfWordsP += 2; + } + } + else + { + BMLOVItem lovItem; + Page lovPage; + + LockBuffer(lovBuffer, BM_READ); + + lovPage = BufferGetPage(lovBuffer); + lovItem = (BMLOVItem) PageGetItem(lovPage, + PageGetItemId(lovPage, lovOffset)); + + if (lovItem->bm_last_compword != LITERAL_ALL_ONE) + { + *numOfWordsP = 2; + headerWords[0] = (((BM_HRL_WORD)lovItem->bm_last_two_headerbits) << + (BM_HRL_WORD_SIZE-2)); + words[0] = lovItem->bm_last_compword; + words[1] = lovItem->bm_last_word; + } + else + { + *numOfWordsP = 1; + headerWords[0] = (((BM_HRL_WORD)lovItem->bm_last_two_headerbits) << + (BM_HRL_WORD_SIZE-1)); + words[0] = lovItem->bm_last_word; + } + + LockBuffer(lovBuffer, BUFFER_LOCK_UNLOCK); + *readLastWords = true; + } +} + +/* + * _bitmap_findbitmaps() -- find the bitmap vectors that satisfy the + * index predicate. + */ +void +_bitmap_findbitmaps(IndexScanDesc scan, ScanDirection dir) +{ + BMScanOpaque so; + BMScanPosition scanPos; + Buffer metabuf; + BMMetaPage metapage; + BlockNumber lovBlock; + OffsetNumber lovOffset; + bool blockNull, offsetNull; + bool isnull = true; + int vectorNo, keyNo; + + so = (BMScanOpaque) scan->opaque; + + /* allocate space and initialize values for so->bm_currPos */ + if(so->bm_currPos == NULL) + so->bm_currPos = (BMScanPosition) palloc0(sizeof(BMScanPositionData)); + + scanPos = so->bm_currPos; + scanPos->nvec = 0; + scanPos->done = false; + MemSet(&scanPos->bm_result, 0, sizeof(BMIterateResult)); + + metabuf = _bitmap_getbuf(scan->indexRelation, BM_METAPAGE, BM_READ); + metapage = (BMMetaPage)PageGetContents(BufferGetPage(metabuf)); + + for (keyNo = 0; keyNo < scan->numberOfKeys; keyNo++) + { + if (!(scan->keyData[keyNo].sk_flags & SK_ISNULL)) + isnull = false; + } + + /* + * If the values for these keys are all NULL, the bitmap vector + * is the first LOV item in the LOV pages. + */ + if (isnull) + { + lovBlock = BM_LOV_STARTPAGE; + lovOffset = 1; + + scanPos->posvecs = (BMVector)palloc0(sizeof(BMVectorData)); + + init_scanpos(scan, scanPos->posvecs, lovBlock, lovOffset); + scanPos->nvec = 1; + + } + else + { + Relation lovHeap, lovIndex; + TupleDesc indexTupDesc; + ScanKey scanKeys; + IndexScanDesc scanDesc; + List* lovItemPoss = NIL; + ListCell *cell; + + _bitmap_open_lov_heapandindex(scan->indexRelation, metapage, + &lovHeap, &lovIndex, AccessShareLock); + + indexTupDesc = RelationGetDescr(lovIndex); + + scanKeys = palloc0(scan->numberOfKeys * sizeof(ScanKeyData)); + for (keyNo = 0; keyNo < scan->numberOfKeys; keyNo++) + { + ScanKey scanKey = (ScanKey)(((char *)scanKeys) + + keyNo * sizeof(ScanKeyData)); + + ScanKeyEntryInitialize(scanKey, + scan->keyData[keyNo].sk_flags, + scan->keyData[keyNo].sk_attno, + scan->keyData[keyNo].sk_strategy, + scan->keyData[keyNo].sk_subtype, + scan->keyData[keyNo].sk_func.fn_oid, + scan->keyData[keyNo].sk_argument); + } + + /* XXX: is SnapshotAny really the right choice? */ + scanDesc = index_beginscan(lovHeap, lovIndex, SnapshotAny, + scan->numberOfKeys, scanKeys); + + /* + * finds all lov items for this scan through lovHeap and lovIndex. + */ + while (true) + { + ItemPos *itemPos; + + bool res = _bitmap_findvalue(lovHeap, lovIndex, scanKeys, scanDesc, + &lovBlock, &blockNull, &lovOffset, + &offsetNull); + + if(!res) + break; + + /* + * We find the position for one LOV item. Append it into + * the list. + */ + itemPos = (ItemPos*)palloc0(sizeof(ItemPos)); + itemPos->blockNo = lovBlock; + itemPos->offset = lovOffset; + lovItemPoss = lappend(lovItemPoss, itemPos); + + scanPos->nvec++; + } + + scanPos->posvecs = + (BMVector)palloc0(sizeof(BMVectorData) * scanPos->nvec); + vectorNo = 0; + foreach(cell, lovItemPoss) + { + ItemPos *itemPos = (ItemPos*)lfirst(cell); + + BMVector bmScanPos = &(scanPos->posvecs[vectorNo]); + init_scanpos(scan, bmScanPos, itemPos->blockNo, itemPos->offset); + vectorNo++; + } + + list_free_deep(lovItemPoss); + + index_endscan(scanDesc); + _bitmap_close_lov_heapandindex(lovHeap, lovIndex, AccessShareLock); + pfree(scanKeys); + } + + _bitmap_relbuf(metabuf); + + if (scanPos->nvec == 0) + { + scanPos->done = true; + return; + } + + /* + * Since there is only one related bitmap vector, we have + * the scan position's batch words structure point directly to + * the vector's batch words. + */ + if (scanPos->nvec == 1) + scanPos->bm_batchWords = scanPos->posvecs->bm_batchWords; + else + { + scanPos->bm_batchWords = (BMBatchWords *) palloc0(sizeof(BMBatchWords)); + _bitmap_init_batchwords(scanPos->bm_batchWords, + BM_NUM_OF_HRL_WORDS_PER_PAGE, + CurrentMemoryContext); + } +} + +/* + * init_scanpos() -- initialize a BMScanPosition for a given + * bitmap vector. + */ +static void +init_scanpos(IndexScanDesc scan, BMVector bmScanPos, BlockNumber lovBlock, + OffsetNumber lovOffset) +{ + Page lovPage; + BMLOVItem lovItem; + + bmScanPos->bm_lovOffset = lovOffset; + bmScanPos->bm_lovBuffer = _bitmap_getbuf(scan->indexRelation, lovBlock, + BM_READ); + + lovPage = BufferGetPage(bmScanPos->bm_lovBuffer); + lovItem = (BMLOVItem) PageGetItem(lovPage, + PageGetItemId(lovPage, bmScanPos->bm_lovOffset)); + + bmScanPos->bm_nextBlockNo = lovItem->bm_lov_head; + bmScanPos->bm_readLastWords = false; + bmScanPos->bm_batchWords = (BMBatchWords *) palloc0(sizeof(BMBatchWords)); + _bitmap_init_batchwords(bmScanPos->bm_batchWords, + BM_NUM_OF_HRL_WORDS_PER_PAGE, + CurrentMemoryContext); + + LockBuffer(bmScanPos->bm_lovBuffer, BUFFER_LOCK_UNLOCK); +} diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/bitmap/bitmaputil.c bitmap/src/backend/access/bitmap/bitmaputil.c --- pgsql-head/src/backend/access/bitmap/bitmaputil.c 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/backend/access/bitmap/bitmaputil.c 2006-11-30 08:49:46.000000000 +1100 @@ -0,0 +1,731 @@ +/*------------------------------------------------------------------------- + * + * bitmaputil.c + * Utility routines for on-disk bitmap index access method. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/bitmap.h" +#include "access/reloptions.h" + +static void _bitmap_findnextword(BMBatchWords* words, uint32 nextReadNo); +static void _bitmap_resetWord(BMBatchWords *words, uint32 prevStartNo); +static uint8 _bitmap_find_bitset(BM_HRL_WORD word, uint8 lastPos); + +/* + * _bitmap_formitem() -- construct a LOV entry + */ +BMLOVItem +_bitmap_formitem(uint64 currTidNumber) +{ + int nbytes_bmitem; + BMLOVItem bmitem; + + nbytes_bmitem = sizeof(BMLOVItemData); + + bmitem = (BMLOVItem)palloc(nbytes_bmitem); + + bmitem->bm_lov_head = bmitem->bm_lov_tail = InvalidBlockNumber; + bmitem->bm_last_setbit = 0; + + /* fill up all existing bits with 0. */ + if (currTidNumber < BM_HRL_WORD_SIZE) + { + bmitem->bm_last_compword = LITERAL_ALL_ONE; + bmitem->bm_last_word = LITERAL_ALL_ZERO; + bmitem->bm_last_two_headerbits = 0; + bmitem->bm_last_tid_location = 0; + } + else + { + uint32 numOfTotalFillWords; + BM_HRL_WORD numOfFillWords; + + numOfTotalFillWords = (currTidNumber-1)/BM_HRL_WORD_SIZE; + + numOfFillWords = (numOfTotalFillWords >= MAX_FILL_LENGTH) ? + MAX_FILL_LENGTH : numOfTotalFillWords; + + bmitem->bm_last_compword = BM_MAKE_FILL_WORD(0, numOfFillWords); + bmitem->bm_last_word = LITERAL_ALL_ZERO; + bmitem->bm_last_two_headerbits = 2; + + bmitem->bm_last_tid_location = numOfFillWords * BM_HRL_WORD_SIZE; + + /* + * If all zeros are too many to fit in one word, then + * we set bm_last_setbit so that the remaining zeros can + * be handled outside. + */ + if (numOfTotalFillWords > numOfFillWords) + bmitem->bm_last_setbit = numOfFillWords*BM_HRL_WORD_SIZE; + } + + return bmitem; +} + +/* + * _bitmap_init_batchwords() -- initialize a BMBatchWords in a given + * memory context. + * + * Allocate spaces for bitmap header words and bitmap content words. + */ +void +_bitmap_init_batchwords(BMBatchWords* words, + uint32 maxNumOfWords, + MemoryContext mcxt) +{ + uint32 numOfHeaderWords; + MemoryContext oldcxt; + + words->nwordsread = 0; + words->nextread = 1; + words->startNo = 0; + words->nwords = 0; + + numOfHeaderWords = BM_CALC_H_WORDS(maxNumOfWords); + + words->maxNumOfWords = maxNumOfWords; + + /* Make sure that we have at least one page of words */ + Assert(words->maxNumOfWords >= BM_NUM_OF_HRL_WORDS_PER_PAGE); + + oldcxt = MemoryContextSwitchTo(mcxt); + words->hwords = palloc0(sizeof(BM_HRL_WORD)*numOfHeaderWords); + words->cwords = palloc0(sizeof(BM_HRL_WORD)*words->maxNumOfWords); + MemoryContextSwitchTo(oldcxt); +} + +/* + * _bitmap_copy_batchwords() -- copy a given BMBatchWords to another + * BMBatchWords. + */ +void +_bitmap_copy_batchwords(BMBatchWords* words, BMBatchWords* copyWords) +{ + uint32 numOfHeaderWords; + + copyWords->maxNumOfWords = words->maxNumOfWords; + copyWords->nwordsread = words->nwordsread; + copyWords->nextread = words->nextread; + copyWords->firstTid = words->firstTid; + copyWords->startNo = words->startNo; + copyWords->nwords = words->nwords; + + numOfHeaderWords = BM_CALC_H_WORDS(copyWords->maxNumOfWords); + + memcpy(copyWords->hwords, words->hwords, + sizeof(BM_HRL_WORD)*numOfHeaderWords); + memcpy(copyWords->cwords, words->cwords, + sizeof(BM_HRL_WORD)*copyWords->maxNumOfWords); +} + +/* + * _bitmap_reset_batchwords() -- reset the BMBatchWords for re-use. + */ +void +_bitmap_reset_batchwords(BMBatchWords *words) +{ + words->startNo = 0; + words->nwords = 0; + MemSet(words->hwords, 0, + sizeof(BM_HRL_WORD) * BM_CALC_H_WORDS(words->maxNumOfWords)); +} + +/* + * _bitmap_cleanup_batchwords() -- release spaces allocated for the BMBatchWords. + */ +void _bitmap_cleanup_batchwords(BMBatchWords* words) +{ + if (words == NULL) + return; + + if (words->hwords) + pfree(words->hwords); + if (words->cwords) + pfree(words->cwords); +} + +/* + * _bitmap_cleanup_scanpos() -- releast space allocated for + * BMVector. + */ +void +_bitmap_cleanup_scanpos(BMVector bmScanPos, uint32 numBitmapVectors) +{ + uint32 keyNo; + + for (keyNo=0; keyNonextTidLoc >= result->numOfTids) + _bitmap_findnexttids(words, result, BM_BATCH_TIDS); + + /* if find more tids, then return the first one */ + if (result->nextTidLoc < result->numOfTids) + { + result->nextTidLoc++; + return (result->nextTids[result->nextTidLoc-1]); + } + + /* no more tids */ + return 0; +} + +/* + * _bitmap_findprevtid() -- find the previous tid location in an array of tids. + */ +void +_bitmap_findprevtid(BMIterateResult *result) +{ + Assert(result->nextTidLoc > 0); + result->nextTidLoc--; +} + +/* + * _bitmap_findnexttids() -- find the next set of tids from a given + * batch of bitmap words. + * + * The maximum number of tids to be found is defined in 'maxTids'. + */ +void +_bitmap_findnexttids(BMBatchWords *words, BMIterateResult *result, + uint32 maxTids) +{ + bool done = false; + + result->nextTidLoc = result->numOfTids = 0; + while (words->nwords > 0 && result->numOfTids < maxTids && !done) + { + uint8 oldScanPos = result->lastScanPos; + BM_HRL_WORD word = words->cwords[result->lastScanWordNo]; + + /* new word, zero filled */ + if (oldScanPos == 0 && + ((IS_FILL_WORD(words->hwords, result->lastScanWordNo) && + GET_FILL_BIT(word) == 0) || word == 0)) + { + uint32 fillLength; + if (word == 0) + fillLength = 1; + else + fillLength = FILL_LENGTH(word); + + /* skip over non-matches */ + result->nextTid += fillLength * BM_HRL_WORD_SIZE; + result->lastScanWordNo++; + words->nwords--; + result->lastScanPos = 0; + continue; + } + else if (IS_FILL_WORD(words->hwords, result->lastScanWordNo) + && GET_FILL_BIT(word) == 1) + { + uint32 nfillwords = FILL_LENGTH(word); + uint8 bitNo; + + while (result->numOfTids + BM_HRL_WORD_SIZE <= maxTids && + nfillwords > 0) + { + /* explain the fill word */ + for (bitNo = 0; bitNo < BM_HRL_WORD_SIZE; bitNo++) + result->nextTids[result->numOfTids++] = ++result->nextTid; + + nfillwords--; + /* update fill word to reflect expansion */ + words->cwords[result->lastScanWordNo]--; + } + + if (nfillwords == 0) + { + result->lastScanWordNo++; + words->nwords--; + result->lastScanPos = 0; + continue; + } + else + { + done = true; + break; + } + } + else + { + if(oldScanPos == 0) + oldScanPos = BM_HRL_WORD_SIZE + 1; + + while (oldScanPos != 0 && result->numOfTids < maxTids) + { + BM_HRL_WORD w; + + if (oldScanPos == BM_HRL_WORD_SIZE + 1) + oldScanPos = 0; + + w = words->cwords[result->lastScanWordNo]; + result->lastScanPos = _bitmap_find_bitset(w, oldScanPos); + + /* did we fine a bit set in this word? */ + if (result->lastScanPos != 0) + { + result->nextTid += (result->lastScanPos - oldScanPos); + result->nextTids[result->numOfTids++] = result->nextTid; + } + else + { + result->nextTid += BM_HRL_WORD_SIZE - oldScanPos; + /* start scanning a new word */ + words->nwords--; + result->lastScanWordNo++; + result->lastScanPos = 0; + } + oldScanPos = result->lastScanPos; + } + } + } +} + +/* + * _bitmap_intesect() is dead code because streaming intersects + * PagetableEntry structures, not raw batch words. It's possible we may + * want to intersect batches later though -- it would definately improve + * streaming of intersections. + */ + +#ifdef NOT_USED + +/* + * _bitmap_intersect() -- intersect 'numBatches' bitmap words. + * + * All 'numBatches' bitmap words are HRL compressed. The result + * bitmap words HRL compressed, except that fill set words(1s) may + * be lossily compressed. + */ +void +_bitmap_intersect(BMBatchWords **batches, uint32 numBatches, + BMBatchWords *result) +{ + bool done = false; + uint32 *prevStartNos; + uint32 nextReadNo; + uint32 batchNo; + + Assert(numBatches > 0); + + prevStartNos = (uint32 *)palloc0(numBatches * sizeof(uint32)); + nextReadNo = batches[0]->nextread; + + while (!done && result->nwords < result->maxNumOfWords) + { + BM_HRL_WORD andWord = LITERAL_ALL_ONE; + BM_HRL_WORD word; + + bool andWordIsLiteral = true; + + /* + * We walk through the bitmap word in each list one by one + * without de-compress the bitmap words. 'nextReadNo' defines + * the position of the next word that should be read in an + * uncompressed format. + */ + for (batchNo = 0; batchNo < numBatches; batchNo++) + { + uint32 offs; + BMBatchWords *bch = batches[batchNo]; + + /* skip nextReadNo - nwordsread - 1 words */ + _bitmap_findnextword(bch, nextReadNo); + + if (bch->nwords == 0) + { + done = true; + break; + } + + Assert(bch->nwordsread == nextReadNo - 1); + + /* Here, startNo should point to the word to be read. */ + offs = bch->startNo; + word = bch->cwords[offs]; + + if (CUR_WORD_IS_FILL(bch) && (GET_FILL_BIT(word) == 0)) + { + uint32 n; + + bch->nwordsread += FILL_LENGTH(word); + + n = bch->nwordsread - nextReadNo + 1; + andWord = BM_MAKE_FILL_WORD(0, n); + andWordIsLiteral = false; + + nextReadNo = bch->nwordsread + 1; + bch->startNo++; + bch->nwords--; + break; + } + else if (CUR_WORD_IS_FILL(bch) && (GET_FILL_BIT(word) == 1)) + { + bch->nwordsread++; + + prevStartNos[batchNo] = bch->startNo; + + if (FILL_LENGTH(word) == 1) + { + bch->startNo++; + bch->nwords--; + } + else + { + uint32 s = bch->startNo; + bch->cwords[s]--; + } + andWordIsLiteral = true; + } + else if (!CUR_WORD_IS_FILL(bch)) + { + prevStartNos[batchNo] = bch->startNo; + + andWord &= word; + bch->nwordsread++; + bch->startNo++; + bch->nwords--; + andWordIsLiteral = true; + } + } + + /* Since there are not enough words in this attribute break this loop */ + if (done) + { + uint32 preBatchNo; + + /* reset the attributes before batchNo */ + for (preBatchNo = 0; preBatchNo < batchNo; preBatchNo++) + { + _bitmap_resetWord(batches[preBatchNo], prevStartNos[preBatchNo]); + } + break; + } + else + { + if (!andWordIsLiteral) + { + uint32 off = result->nwords/BM_HRL_WORD_SIZE; + uint32 w = result->nwords; + + result->hwords[off] |= WORDNO_GET_HEADER_BIT(w); + } + result->cwords[result->nwords] = andWord; + result->nwords++; + } + + if (andWordIsLiteral) + nextReadNo++; + + if (batchNo == 1 && bch->nwords == 0) + done = true; + } + + /* set the nextReadNo */ + for (batchNo = 0; batchNo < numBatches; batchNo++) + batches[batchNo]->nextread = nextReadNo; + + pfree(prevStartNos); +} + +#endif /* NOT_USED */ + +/* + * _bitmap_union() -- union 'numBatches' bitmaps + * + * All bitmap words are HRL compressed. The result bitmap words are also + * HRL compressed, except that fill unset words may be lossily compressed. + */ +void +_bitmap_union(BMBatchWords **batches, uint32 numBatches, BMBatchWords *result) +{ + bool done = false; + uint32 *prevstarts; + uint32 nextReadNo; + uint32 batchNo; + + Assert (numBatches >= 0); + + if (numBatches == 0) + return; + + /* save batch->startNo for each input bitmap vector */ + prevstarts = (uint32 *)palloc0(numBatches * sizeof(uint32)); + + /* + * Each batch should have the same next read offset, so take + * the first one + */ + nextReadNo = batches[0]->nextread; + + while (!done && result->nwords < result->maxNumOfWords) + { + BM_HRL_WORD orWord = LITERAL_ALL_ZERO; + BM_HRL_WORD word; + bool orWordIsLiteral = true; + + for (batchNo = 0; batchNo < numBatches; batchNo++) + { + BMBatchWords *bch = batches[batchNo]; + + /* skip nextReadNo - nwordsread - 1 words */ + _bitmap_findnextword(bch, nextReadNo); + + if (bch->nwords == 0) + { + done = true; + break; + } + + Assert(bch->nwordsread == nextReadNo - 1); + + /* Here, startNo should point to the word to be read. */ + word = bch->cwords[bch->startNo]; + + if (CUR_WORD_IS_FILL(bch) && GET_FILL_BIT(word) == 1) + { + /* Fill word represents matches */ + bch->nwordsread += FILL_LENGTH(word); + orWord = BM_MAKE_FILL_WORD(1, bch->nwordsread - nextReadNo + 1); + orWordIsLiteral = false; + + nextReadNo = bch->nwordsread + 1; + bch->startNo++; + bch->nwords--; + break; + } + else if (CUR_WORD_IS_FILL(bch) && GET_FILL_BIT(word) == 0) + { + /* Fill word represents no matches */ + + bch->nwordsread++; + prevstarts[batchNo] = bch->startNo; + if (FILL_LENGTH(word) == 1) + { + bch->startNo++; + bch->nwords--; + } + else + bch->cwords[bch->startNo]--; + orWordIsLiteral = true; + } + else if (!CUR_WORD_IS_FILL(bch)) + { + /* word is literal */ + prevstarts[batchNo] = bch->startNo; + orWord |= word; + bch->nwordsread++; + bch->startNo++; + bch->nwords--; + orWordIsLiteral = true; + } + } + + if (done) + { + uint32 i; + + /* reset the attributes before batchNo */ + for (i = 0; i < batchNo; i++) + _bitmap_resetWord(batches[i], prevstarts[i]); + break; + } + else + { + if (!orWordIsLiteral) + { + /* Word is not literal, update the result header */ + uint32 offs = result->nwords/BM_HRL_WORD_SIZE; + uint32 n = result->nwords; + result->hwords[offs] |= WORDNO_GET_HEADER_BIT(n); + } + result->cwords[result->nwords] = orWord; + result->nwords++; + } + + if (orWordIsLiteral) + nextReadNo++; + + /* we just processed the last batch and it was empty */ + if (batchNo == numBatches - 1 && batches[batchNo]->nwords == 0) + done = true; + } + + /* set the next word to read for all input vectors */ + for (batchNo = 0; batchNo < numBatches; batchNo++) + batches[batchNo]->nextread = nextReadNo; + + pfree(prevstarts); +} + +/* + * _bitmap_findnextword() -- Find the next word whose position is + * 'nextReadNo' in an uncompressed format. + */ +static void +_bitmap_findnextword(BMBatchWords *words, uint32 nextReadNo) +{ + /* + * 'words->nwordsread' defines how many un-compressed words + * have been read in this bitmap. We read from + * position 'startNo', and increment 'words->nwordsread' + * differently based on the type of words that are read, until + * 'words->nwordsread' is equal to 'nextReadNo'. + */ + while (words->nwords > 0 && words->nwordsread < nextReadNo - 1) + { + /* Get the current word */ + BM_HRL_WORD word = words->cwords[words->startNo]; + + if (CUR_WORD_IS_FILL(words)) + { + if(FILL_LENGTH(word) <= (nextReadNo - words->nwordsread - 1)) + { + words->nwordsread += FILL_LENGTH(word); + words->startNo++; + words->nwords--; + } + else + { + words->cwords[words->startNo] -= (nextReadNo - words->nwordsread - 1); + words->nwordsread = nextReadNo - 1; + } + } + else + { + words->nwordsread++; + words->startNo++; + words->nwords--; + } + } +} + +/* + * _bitmap_resetWord() -- Reset the read position in an BMBatchWords + * to its previous value. + * + * Reset the read position in an BMBatchWords to its previous value, + * which is given in 'prevStartNo'. Based on different type of words read, + * the actual bitmap word may need to be changed. + */ +static void +_bitmap_resetWord(BMBatchWords *words, uint32 prevStartNo) +{ + if (words->startNo > prevStartNo) + { + Assert(words->startNo == prevStartNo + 1); + words->startNo = prevStartNo; + words->nwords++; + } + else + { + Assert(words->startNo == prevStartNo); + Assert(CUR_WORD_IS_FILL(words)); + words->cwords[words->startNo]++; + } + words->nwordsread--; +} + + +/* + * _bitmap_find_bitset() -- find the rightmost set bit (bit=1) in the + * given word since 'lastPos', not including 'lastPos'. + * + * The rightmost bit in the given word is considered the position 1, and + * the leftmost bit is considered the position BM_HRL_WORD_SIZE. + * + * If such set bit does not exist in this word, 0 is returned. + */ +static uint8 +_bitmap_find_bitset(BM_HRL_WORD word, uint8 lastPos) +{ + uint8 pos = lastPos + 1; + BM_HRL_WORD rightmostBitWord; + + if (pos > BM_HRL_WORD_SIZE) + return 0; + + rightmostBitWord = (((BM_HRL_WORD)1) << (pos-1)); + + while (pos <= BM_HRL_WORD_SIZE && (word & rightmostBitWord) == 0) + { + rightmostBitWord <<= 1; + pos++; + } + + if (pos > BM_HRL_WORD_SIZE) + pos = 0; + + return pos; +} + +/* + * _bitmap_begin_iterate() -- initialize the given BMIterateResult instance. + */ +void +_bitmap_begin_iterate(BMBatchWords *words, BMIterateResult *result) +{ + result->nextTid = words->firstTid; + result->lastScanPos = 0; + result->lastScanWordNo = words->startNo; + result->numOfTids = 0; + result->nextTidLoc = 0; +} + + +Datum +bmoptions(PG_FUNCTION_ARGS) +{ + Datum reloptions = PG_GETARG_DATUM(0); + bool validate = PG_GETARG_BOOL(1); + bytea *result; + + /* + * It's not clear that fillfactor is useful for on-disk bitmap index, + * but for the moment we'll accept it anyway. (It won't do anything...) + */ +#define BM_MIN_FILLFACTOR 10 +#define BM_DEFAULT_FILLFACTOR 100 + + result = default_reloptions(reloptions, validate, + BM_MIN_FILLFACTOR, + BM_DEFAULT_FILLFACTOR); + if (result) + PG_RETURN_BYTEA_P(result); + PG_RETURN_NULL(); +} diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/bitmap/bitmapxlog.c bitmap/src/backend/access/bitmap/bitmapxlog.c --- pgsql-head/src/backend/access/bitmap/bitmapxlog.c 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/backend/access/bitmap/bitmapxlog.c 2006-12-04 20:38:38.000000000 +1100 @@ -0,0 +1,765 @@ +/*------------------------------------------------------------------------- + * + * bitmapxlog.c + * WAL replay logic for the bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/bitmap.h" +#include "access/xlogutils.h" + +/* The information about inserting a new lovitem into the LOV list. */ +typedef struct xl_bm_lovitem +{ + RelFileNode bm_node; + BlockNumber bm_lov_blkno; + bool bm_isNewItem; + OffsetNumber bm_lov_offset; + BMLOVItemData bm_lovItem; +} xl_bm_lovitem; + +/* The information about adding a new page */ +typedef struct xl_bm_newpage +{ + RelFileNode bm_node; + BlockNumber bm_new_blkno; +} xl_bm_newpage; + +/* + * The information about changes on a bitmap page. + * If bm_isOpaque is true, then bm_next_blkno is set. + */ +typedef struct xl_bm_bitmappage +{ + RelFileNode bm_node; + BlockNumber bm_bitmap_blkno; + + bool bm_isOpaque; + BlockNumber bm_next_blkno; + + uint32 bm_last_tid_location; + uint32 bm_hrl_words_used; + uint32 bm_num_words; + /* for simplicity, we log the header words each time */ + BM_HRL_WORD hwords[BM_NUM_OF_HEADER_WORDS]; + /* followed by the "bm_num_words" content words. */ +} xl_bm_bitmappage; + +/* The information about changes to the last 2 words in a bitmap vector */ +typedef struct xl_bm_bitmap_lastwords +{ + RelFileNode bm_node; + BM_HRL_WORD bm_last_compword; + BM_HRL_WORD bm_last_word; + uint8 bm_last_two_headerbits; + + BlockNumber bm_lov_blkno; + OffsetNumber bm_lov_offset; +} xl_bm_bitmap_lastwords; + +/* The information about the changes in the metapage. */ +typedef struct xl_bm_metapage +{ + RelFileNode bm_node; + uint64 bm_num_tuples; + Oid bm_lov_heapId; /* the relation id for the heap */ + Oid bm_lov_indexId; /* the relation id for the index */ + /* the block number for the last LOV pages. */ + BlockNumber bm_lov_lastpage; + /* indicate if this bitmap index needs to be re-built while vacuuming. */ + bool bm_need_rebuilt; +} xl_bm_metapage; + + +/* + * _bitmap_xlog_newpage() -- create a new page. + */ +static void +_bitmap_xlog_newpage(bool redo, XLogRecPtr lsn, XLogRecord *record) +{ + xl_bm_newpage *xlrec = (xl_bm_newpage *) XLogRecGetData(record); + + Relation reln; + Page page; + uint8 info; + + info = record->xl_info & ~XLR_INFO_MASK; + + reln = XLogOpenRelation(xlrec->bm_node); + if (!RelationIsValid(reln)) + return; + + if (redo) + { + Buffer buffer; + + buffer = XLogReadBuffer(reln, xlrec->bm_new_blkno, true); + if (!BufferIsValid(buffer)) + elog(PANIC, "_bitmap_xlog_newpage: block unfound: %d", + xlrec->bm_new_blkno); + + page = BufferGetPage(buffer); + + if (XLByteLT(PageGetLSN(page), lsn)) + { + BMMetaPage metapage; + + switch (info) + { + case XLOG_BITMAP_INSERT_NEWMETA: + if(PageIsNew(page)) + PageInit(page, BufferGetPageSize(buffer), 0); + metapage = (BMMetaPage) PageGetContents(page); + metapage->bm_num_tuples = 0; + metapage->bm_need_rebuilt = false; + break; + case XLOG_BITMAP_INSERT_NEWLOV: + _bitmap_init_lovpage(reln, buffer); + break; + case XLOG_BITMAP_INSERT_NEWBITMAP: + _bitmap_init_bitmappage(reln, buffer); + break; + default: + elog(PANIC, "bitmap_redo: unknown newpage op code %u", info); + } + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + _bitmap_wrtbuf(buffer); + } + else + _bitmap_relbuf(buffer); + } + else + elog(PANIC, "bm_insert_undo: not implemented."); +} + +/* + * _bitmap_xlog_insert_lovitem() -- insert a new lov item. + */ +static void +_bitmap_xlog_insert_lovitem(bool redo, XLogRecPtr lsn, XLogRecord *record) +{ + xl_bm_lovitem *xlrec = (xl_bm_lovitem *) XLogRecGetData(record); + Relation reln; + + reln = XLogOpenRelation(xlrec->bm_node); + if (!RelationIsValid(reln)) + return; + + if (redo) + { + Buffer lovBuffer; + Page lovPage; + + lovBuffer = XLogReadBuffer(reln, xlrec->bm_lov_blkno, false); + if (!BufferIsValid(lovBuffer)) + elog(PANIC, "bm_insert_redo: (_bitmap_xlog_insert_lovitem)" + " block %d does not exist", + xlrec->bm_lov_blkno); + + lovPage = BufferGetPage(lovBuffer); + + if (XLByteLT(PageGetLSN(lovPage), lsn)) + { + if(xlrec->bm_isNewItem) + { + OffsetNumber newOffset, itemSize; + + newOffset = OffsetNumberNext(PageGetMaxOffsetNumber(lovPage)); + if (newOffset != xlrec->bm_lov_offset) + elog(PANIC, "bm_insert_redo: LOV item is not inserted " + "in pos %d(requested %d)", + newOffset, xlrec->bm_lov_offset); + + itemSize = sizeof(BMLOVItemData); + if (itemSize > PageGetFreeSpace(lovPage)) + elog(PANIC, + "bm_insert_redo: not enough space in LOV page %d", + xlrec->bm_lov_blkno); + + if (PageAddItem(lovPage, (Item)&(xlrec->bm_lovItem), itemSize, + newOffset, LP_USED) == InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("failed to add LOV item to \"%s\"", + RelationGetRelationName(reln)))); + } + else + { + /* LOV item already existed, just update it */ + BMLOVItem oldLovItem; + oldLovItem = (BMLOVItem) PageGetItem(lovPage, + PageGetItemId(lovPage, xlrec->bm_lov_offset)); + + memcpy(oldLovItem, &(xlrec->bm_lovItem), sizeof(BMLOVItemData)); + } + + PageSetLSN(lovPage, lsn); + PageSetTLI(lovPage, ThisTimeLineID); + _bitmap_wrtbuf(lovBuffer); + } + else + _bitmap_relbuf(lovBuffer); + } + + else + elog(PANIC, "bm_insert_undo: not implemented."); +} + +/* + * _bitmap_xlog_insert_meta() -- update a metapage. + */ +static void +_bitmap_xlog_insert_meta(bool redo, XLogRecPtr lsn, XLogRecord *record) +{ + xl_bm_metapage *xlrec = (xl_bm_metapage *) XLogRecGetData(record); + Relation reln; + + reln = XLogOpenRelation(xlrec->bm_node); + + if (!RelationIsValid(reln)) + return; + + if (redo) + { + Buffer metabuf; + Page mp; + BMMetaPage metapage; + + metabuf = XLogReadBuffer(reln, BM_METAPAGE, false); + if (!BufferIsValid(metabuf)) + elog(PANIC, "bm_insert_redo: (_bitmap_xlog_insert_meta) " + "block unfound: %d", BM_METAPAGE); + + /* restore the page */ + mp = BufferGetPage(metabuf); + metapage = (BMMetaPage)PageGetContents(mp); + + if (XLByteLT(PageGetLSN(mp), lsn)) + { + metapage->bm_num_tuples = xlrec->bm_num_tuples; + metapage->bm_lov_heapId = xlrec->bm_lov_heapId; + metapage->bm_lov_indexId = xlrec->bm_lov_indexId; + metapage->bm_lov_lastpage = xlrec->bm_lov_lastpage; + metapage->bm_need_rebuilt = xlrec->bm_need_rebuilt; + + PageSetLSN(mp, lsn); + PageSetTLI(mp, ThisTimeLineID); + _bitmap_wrtbuf(metabuf); + } + else + _bitmap_relbuf(metabuf); + } + else + elog(PANIC, "bm_insert_undo: not implemented."); +} + +/* + * _bitmap_xlog_insert_bitmap() -- update a bitmap page. + */ +static void +_bitmap_xlog_insert_bitmap(bool redo, XLogRecPtr lsn, XLogRecord *record) +{ + xl_bm_bitmappage *xlrec = (xl_bm_bitmappage *) XLogRecGetData(record); + Relation reln; + + reln = XLogOpenRelation(xlrec->bm_node); + + if (redo) + { + Buffer bitmapBuffer; + Page bitmapPage; + BMBitmapOpaque bp; + + bitmapBuffer = XLogReadBuffer(reln, xlrec->bm_bitmap_blkno, false); + if (!BufferIsValid(bitmapBuffer)) + elog(PANIC, "bm_insert_redo: (_bitmap_xlog_insert_bitmap) " + "block unfound: %d", + xlrec->bm_bitmap_blkno); + + bitmapPage = BufferGetPage(bitmapBuffer); + + if (XLByteLT(PageGetLSN(bitmapPage), lsn)) + { + BMBitmap bitmap; + BM_HRL_WORD *words; + int offs; + + words = (BM_HRL_WORD *)(char *)xlrec + + MAXALIGN(sizeof(xl_bm_bitmappage)); + + bp = (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage); + bitmap = (BMBitmap) PageGetContents(bitmapPage); + + bp->bm_last_tid_location = xlrec->bm_last_tid_location; + bp->bm_hrl_words_used = xlrec->bm_hrl_words_used; + + /* copy the header words and the content words */ + memcpy(bitmap->hwords, xlrec->hwords, + BM_NUM_OF_HEADER_WORDS * sizeof(BM_HRL_WORD)); + + /* the offset is bound to be small enough to store in offs */ + offs = bp->bm_hrl_words_used - xlrec->bm_num_words; + Assert(offs >= 0); + memcpy(bitmap->cwords + offs, words, + xlrec->bm_num_words * sizeof(BM_HRL_WORD)); + + if (xlrec->bm_isOpaque) + { + /* copy the block number for the next page */ + if (bp->bm_bitmap_next != InvalidBlockNumber) + elog(PANIC, "next bitmap page for blkno %d is " + "already set", xlrec->bm_bitmap_blkno); + + bp->bm_bitmap_next = xlrec->bm_next_blkno; + } + + PageSetLSN(bitmapPage, lsn); + PageSetTLI(bitmapPage, ThisTimeLineID); + _bitmap_wrtbuf(bitmapBuffer); + } + else + _bitmap_relbuf(bitmapBuffer); + } + else + elog(PANIC, "bm_insert_undo: not implemented."); +} + +/* + * _bitmap_xlog_insert_bitmap_lastwords() -- update the last two words + * in a bitmap vector. + */ +static void +_bitmap_xlog_insert_bitmap_lastwords(bool redo, XLogRecPtr lsn, + XLogRecord *record) +{ + Relation reln; + xl_bm_bitmap_lastwords *xlrec; + + xlrec = (xl_bm_bitmap_lastwords *) XLogRecGetData(record); + + reln = XLogOpenRelation(xlrec->bm_node); + + if (redo) + { + Buffer lovBuffer; + Page lovPage; + BMLOVItem lovItem; + + lovBuffer = XLogReadBuffer(reln, xlrec->bm_lov_blkno, false); + if (!BufferIsValid(lovBuffer)) + elog(PANIC, "bm_insert_redo: (_bitmap_xlog_insert_bitmap_lastwords)" + " block not found: %d", + xlrec->bm_lov_blkno); + + lovPage = BufferGetPage(lovBuffer); + + if (XLByteLT(PageGetLSN(lovPage), lsn)) + { + ItemId item = PageGetItemId(lovPage, xlrec->bm_lov_offset); + + lovItem = (BMLOVItem)PageGetItem(lovPage, item); + + lovItem->bm_last_compword = xlrec->bm_last_compword; + lovItem->bm_last_word = xlrec->bm_last_word; + lovItem->bm_last_two_headerbits = xlrec->bm_last_two_headerbits; + + PageSetLSN(lovPage, lsn); + PageSetTLI(lovPage, ThisTimeLineID); + _bitmap_wrtbuf(lovBuffer); + } + else + _bitmap_relbuf(lovBuffer); + } + else + elog(PANIC, "bm_insert_undo: not implemented."); +} + +void +bitmap_redo(XLogRecPtr lsn, XLogRecord *record) +{ + uint8 info = record->xl_info & ~XLR_INFO_MASK; + + switch (info) + { + case XLOG_BITMAP_INSERT_NEWMETA: + _bitmap_xlog_newpage(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_NEWLOV: + _bitmap_xlog_newpage(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_LOVITEM: + _bitmap_xlog_insert_lovitem(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_META: + _bitmap_xlog_insert_meta(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_NEWBITMAP: + _bitmap_xlog_newpage(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_BITMAP: + _bitmap_xlog_insert_bitmap(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_BITMAP_LASTWORDS: + _bitmap_xlog_insert_bitmap_lastwords(true, lsn, record); + break; + default: + elog(PANIC, "bitmap_redo: unknown op code %u", info); + } +} + +void +bitmap_undo(XLogRecPtr lsn, XLogRecord *record) +{ + uint8 info = record->xl_info & ~XLR_INFO_MASK; + + switch (info) + { + case XLOG_BITMAP_INSERT_NEWMETA: + _bitmap_xlog_newpage(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_NEWLOV: + _bitmap_xlog_newpage(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_LOVITEM: + _bitmap_xlog_insert_lovitem(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_META: + _bitmap_xlog_insert_meta(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_NEWBITMAP: + _bitmap_xlog_newpage(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_BITMAP: + _bitmap_xlog_insert_bitmap(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_BITMAP_LASTWORDS: + _bitmap_xlog_insert_bitmap_lastwords(false, lsn, record); + break; + + default: + elog(PANIC, "bitmap_undo: unknown op code %u", info); + } +} + +static void +out_target(StringInfo buf, RelFileNode *node) +{ + appendStringInfo(buf, "rel %u/%u/%u", + node->spcNode, node->dbNode, node->relNode); +} + +void +bitmap_desc(StringInfo buf, uint8 xl_info, char *rec) +{ + uint8 info = xl_info & ~XLR_INFO_MASK; + + switch (info) + { + case XLOG_BITMAP_INSERT_NEWMETA: + { + xl_bm_newpage *xlrec = (xl_bm_newpage *)rec; + + appendStringInfo(buf, "insert a new metapage: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_NEWLOV: + { + xl_bm_newpage *xlrec = (xl_bm_newpage *)rec; + + appendStringInfo(buf, "insert a new LOV page: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_LOVITEM: + { + xl_bm_lovitem *xlrec = (xl_bm_lovitem *)rec; + + appendStringInfo(buf, "insert a new LOV item: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_META: + { + xl_bm_metapage *xlrec = (xl_bm_metapage *)rec; + + appendStringInfo(buf, "update the metapage: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_NEWBITMAP: + { + xl_bm_newpage *xlrec = (xl_bm_newpage *)rec; + + appendStringInfo(buf, "insert a new bitmap page: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_BITMAP: + { + xl_bm_bitmappage *xlrec = (xl_bm_bitmappage *)rec; + + appendStringInfo(buf, "update a bitmap page: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_BITMAP_LASTWORDS: + { + xl_bm_bitmap_lastwords *xlrec = (xl_bm_bitmap_lastwords *)rec; + + appendStringInfo(buf, "update the last two words in a bitmap: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + + default: + appendStringInfo(buf, "UNKNOWN"); + break; + } +} + +/* + * _bitmap_log_newpage() -- log a new page. + * + * This function is called before writing a new buffer. + */ +void +_bitmap_log_newpage(Relation rel, uint8 info, Buffer buf) +{ + Page page; + + page = BufferGetPage(buf); + + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!rel->rd_istemp) + { + xl_bm_newpage xlNewPage; + XLogRecPtr recptr; + XLogRecData rdata[1]; + + xlNewPage.bm_node = rel->rd_node; + xlNewPage.bm_new_blkno = BufferGetBlockNumber(buf); + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *)&xlNewPage; + rdata[0].len = sizeof(xl_bm_newpage); + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BITMAP_ID, info, rdata); + + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + } + + END_CRIT_SECTION(); +} + +/* + * _bitmap_log_metapage() -- log the changes to the metapage + */ +void +_bitmap_log_metapage(Relation rel, Page page) +{ + BMMetaPage metapage = (BMMetaPage) PageGetContents(page); + + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!rel->rd_istemp) + { + xl_bm_metapage* xlMeta; + XLogRecPtr recptr; + XLogRecData rdata[1]; + + xlMeta = (xl_bm_metapage *) + palloc(MAXALIGN(sizeof(xl_bm_metapage))); + xlMeta->bm_node = rel->rd_node; + xlMeta->bm_num_tuples = metapage->bm_num_tuples; + xlMeta->bm_lov_heapId = metapage->bm_lov_heapId; + xlMeta->bm_lov_indexId = metapage->bm_lov_indexId; + xlMeta->bm_lov_lastpage = metapage->bm_lov_lastpage; + xlMeta->bm_need_rebuilt = metapage->bm_need_rebuilt; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char*)xlMeta; + rdata[0].len = MAXALIGN(sizeof(xl_bm_metapage)); + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_META, rdata); + + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + pfree(xlMeta); + } + + END_CRIT_SECTION(); +} + +/* + * _bitmap_log_bitmappage() -- log the changes to a bitmap page. + * + * This function inserts the changes to a bitmap page to xlog. + * The parameter 'numWords' defines the last 'numWords' words + * in bitmapBuffer are new. If isOpaque is set, then we also + * log the information about the block number for the next + * bitmap page. + */ +void +_bitmap_log_bitmappage(Relation rel, Buffer bitmapBuffer, bool isOpaque, + uint32 numWords) +{ + Page bitmapPage; + BMBitmapOpaque bitmapPageOpaque; + BMBitmap bitmap; + + bitmapPage = BufferGetPage(bitmapBuffer); + bitmapPageOpaque = (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage); + bitmap = (BMBitmap) PageGetContents(bitmapPage); + + Assert(bitmapPageOpaque->bm_hrl_words_used >= numWords); + + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!rel->rd_istemp) + { + xl_bm_bitmappage *xlBitmap; + XLogRecPtr recptr; + XLogRecData rdata[1]; + BM_HRL_WORD *bitmapWords; + + xlBitmap = (xl_bm_bitmappage *) + palloc(MAXALIGN(sizeof(xl_bm_bitmappage)) + + numWords * sizeof(BM_HRL_WORD)); + xlBitmap->bm_node = rel->rd_node; + xlBitmap->bm_bitmap_blkno = BufferGetBlockNumber(bitmapBuffer); + xlBitmap->bm_isOpaque = isOpaque; + + xlBitmap->bm_last_tid_location = bitmapPageOpaque->bm_last_tid_location; + xlBitmap->bm_hrl_words_used = bitmapPageOpaque->bm_hrl_words_used; + xlBitmap->bm_num_words = numWords; + memcpy(xlBitmap->hwords, bitmap->hwords, + BM_NUM_OF_HEADER_WORDS * sizeof(BM_HRL_WORD)); + + bitmapWords = (BM_HRL_WORD *) + (((char *)xlBitmap) + MAXALIGN(sizeof(xl_bm_bitmappage))); + + /* + * We copy the last 'numWords' in this page to xlog because only + * these words are new. + */ + memcpy(bitmapWords, + (bitmap->cwords + (bitmapPageOpaque->bm_hrl_words_used - + numWords)), + numWords * sizeof(BM_HRL_WORD)); + + if (isOpaque) + xlBitmap->bm_next_blkno = bitmapPageOpaque->bm_bitmap_next; + else + xlBitmap->bm_next_blkno = InvalidBlockNumber; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char*)xlBitmap; + rdata[0].len = MAXALIGN(sizeof(xl_bm_bitmappage)) + + numWords * sizeof(BM_HRL_WORD); + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_BITMAP, rdata); + + PageSetLSN(bitmapPage, recptr); + PageSetTLI(bitmapPage, ThisTimeLineID); + pfree(xlBitmap); + } + + END_CRIT_SECTION(); +} + +/* + * _bitmap_log_bitmap_lastwords() -- log the last two words in a bitmap. + */ +void +_bitmap_log_bitmap_lastwords(Relation rel, Buffer lovBuffer, + OffsetNumber lovOffset, BMLOVItem lovItem) +{ + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!rel->rd_istemp) + { + xl_bm_bitmap_lastwords xlLastwords; + XLogRecPtr recptr; + XLogRecData rdata[1]; + + xlLastwords.bm_node = rel->rd_node; + xlLastwords.bm_last_compword = lovItem->bm_last_compword; + xlLastwords.bm_last_word = lovItem->bm_last_word; + xlLastwords.bm_last_two_headerbits = lovItem->bm_last_two_headerbits; + xlLastwords.bm_lov_blkno = BufferGetBlockNumber(lovBuffer); + xlLastwords.bm_lov_offset = lovOffset; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char*)&xlLastwords; + rdata[0].len = sizeof(xl_bm_bitmap_lastwords); + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_BITMAP_LASTWORDS, + rdata); + + PageSetLSN(BufferGetPage(lovBuffer), recptr); + PageSetTLI(BufferGetPage(lovBuffer), ThisTimeLineID); + } + + END_CRIT_SECTION(); +} + +/* + * _bitmap_log_lovitem() -- log adding a new lov item to a lov page. + */ +void +_bitmap_log_lovitem(Relation rel, Buffer lovBuffer, bool isNewItem, + OffsetNumber offset, BMLOVItem lovItem) +{ + Page lovPage = BufferGetPage(lovBuffer); + + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!rel->rd_istemp) + { + xl_bm_lovitem xlLovItem; + XLogRecPtr recptr; + XLogRecData rdata[1]; + + xlLovItem.bm_node = rel->rd_node; + xlLovItem.bm_lov_blkno = BufferGetBlockNumber(lovBuffer); + xlLovItem.bm_isNewItem = isNewItem; + xlLovItem.bm_lov_offset = offset; + memcpy(&(xlLovItem.bm_lovItem), lovItem, sizeof(BMLOVItemData)); + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *)&xlLovItem; + rdata[0].len = sizeof(xl_bm_lovitem); + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BITMAP_ID, + XLOG_BITMAP_INSERT_LOVITEM, rdata); + + PageSetLSN(lovPage, recptr); + PageSetTLI(lovPage, ThisTimeLineID); + } + + END_CRIT_SECTION(); +} diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/bitmap/Makefile bitmap/src/backend/access/bitmap/Makefile --- pgsql-head/src/backend/access/bitmap/Makefile 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/backend/access/bitmap/Makefile 2006-12-04 20:38:13.000000000 +1100 @@ -0,0 +1,34 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for access/bitmap +# +# Copyright (c) 2006, PostgreSQL Global Development Group +# +# IDENTIFICATION +# $PostgreSQL$ +# +#------------------------------------------------------------------------- + +subdir = src/backend/access/bitmap +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = bitmaputil.o bitmapattutil.o \ + bitmappages.o bitmapinsert.o bitmapsearch.o bitmap.o bitmapxlog.o + +all: SUBSYS.o + +SUBSYS.o: $(OBJS) + $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) + +depend dep: + $(CC) -MM $(CFLAGS) *.c >depend + +clean: + rm -f SUBSYS.o $(OBJS) + +ifeq (depend,$(wildcard depend)) +include depend +endif + diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/bitmap/README bitmap/src/backend/access/bitmap/README --- pgsql-head/src/backend/access/bitmap/README 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/backend/access/bitmap/README 2006-12-04 18:41:03.000000000 +1100 @@ -0,0 +1,138 @@ +$PostgreSQL$ + +This directory contains an implementation of an on-disk bitmap index. + +An on-disk bitmap index consists of bitmap vectors, one for each +distinct key value. Each vector is a (compressed) map of locations in the +underlying heap where the key value occurs. + +The advantage of on-disk bitmap indexes is that they can locate large numbers +of matches at low cost. When compressed, they are also very small. For +low-cardinality data (less than 50,000 distinct values), on-disk bitmap +indexes are much less expensive to construct than b-trees. + +Hybrid Run-Length (HRL) equality encoding bitmap index +------------------------------------------------------ + +HRL is the bitmap encoding mechanism used in this implemention. In HRL, +each vector is represented in two sections: the header section and the +content section. The header section contains bits, each of which +corresponds to a word in the content section. If a bit in the header +section is 1, then the corresponding word in the content section is a +compressed word; if the bit is 0, then the corresponding word is not a +compressed word. + +For a compressed word in the content section, the first bit in this word +indicates whether 1s or 0s are compressed. The rest of the bits represent the +value of "/". + +Consider this example. Assume that there is an uncompressed bitmap vector: + + 00000000 00000000 01000000 11111111 11111111 11111111 + +If the size of a word is set to 8, then an HRL compressed form for +this bitmap vector is as follows: + + header section: 101 + content section: 00000010 01000000 10000011 + +Consider the first word in the content section "00000010". The header +section tells us that this is a compressed word. As the word represents +the number two, this word tells us that it compresses 16 bits +(i.e., 2 * 8 = 16). As the first bit is zero, it is compressing zeroed bits. + +The second word is uncompressed. + +The third word is compressed and it's first bit is set to one. As such +it compresses ones. As 0011 evaluates to three, this compressed word +represents 24 bits of ones (3 * 8 = 24). + +The insertion algorithm +----------------------- + +The distinct values are stored as an array of "LOV items" on "LOV pages" +(LOV stands for List of Values). LOV items also store some vector meta data. +To deal with high-cardinality cases, we also create an internal heap and a +btree index on this heap to speed up searches on distinct values. This +internal heap stores the distinct values and their LOV items in LOV pages, +which can be retrieved through the block numbers and the offset numbers. In +other words, the heap has " + 2" +attributes (one for the block number, the other for the offset number). The +btree index is built on this heap with the key as attributes to be indexed. + +The LOV item for NULL keys is the first LOV item of the first LOV page. + +We do not store TIDs in this bitmap index implementation. The reason is +that TIDs take too much space. Instead, we convert them to a 64 bit number +as follows: + + + ((uint64)ItemPointerGetBlockNumber(TID) * MaxNumHeapTuples) + + ((uint64)ItemPointerGetOffsetNumber(TID)); + +where MaxNumHeapTuples represents the maximum number of tuples that +can be stored on a heap page. This TID location is used as the index position +of this bit in its bitmap vector. + +Each insertion will affect only one bitmap vector. When inserting a +new tuple into a bitmap index, we search through the internal heap to +obtain the block number and the offset number of the LOV page that +contains the given value. From there, we obtain an exclusive lock on +that LOV page, and try to insert this new bit into the right bitmap +vector. The index position for this bit is calculated through the +formula for the tid location above. There are the following three +cases: + +(1) This bit will only affect the last two words. In this case, we + simply update the LOV item, which stores this information. +(2) This bit will require writing words to the last bitmap page, and + the last bitmap page has enough space to store these words. In + this case, we obtain an exclusive lock on the last bitmap page, + and write those words to the page. +(3) This bit will require writing words to the last bitmap page, and + the last bitmap page does not have enough space for these new words. + In this case, we create a new bitmap page, and insert these new + words to this new bitmap page. We also update the previous + bitmap page and the LOV item. + +There is a fourth case -- the TID location might be in the middle of a +vector. We deal with that specifically in the next section. + +When building a bitmap index, we also maintain an in-memory buffer to +store a bunch of tid locations for each distinct value before writing +them to bitmap vectors in batches. There are two advantages of this +approach: + +(1) The bitmap pages for a bitmap vector are likely to be allocated + sequentially. +(2) This can avoid visiting different bitmap pages for each insert + in a sequence of inserts, which can produce a lot of IOs when + the cardinality of attributes is high. + +Handling tuples that are inserted in the middle of the heap +----------------------------------------------------------- + +When a new tuple is inserted into the middle of the heap, a bit needs +to be updated in the middle of a bitmap vector. This is called an +in-place bit update. Since the bitmap vector is compressed, this +update may require us to convert one compressed word to 2-3 new +words. Replacing the old compressed word with these new words may +cause the current bitmap page to overflow. In this case, we create a +new bitmap page to store overflow words, and insert this page +right after the current bitmap page. + +One limitation about this approach is that this may cause a lot of +fragmentation in a bitmap vector when many tuples are inserted in the +middle of the heap. + +TODO: Currently, we need to search a bitmap vector from the beginning +to find the bit to be updated. One potential solution is to maintain a +list of the first tid locations for all bitmap pages in a bitmap +vector so that we can find the bitmap page that contains +the bit to be updated without scanning from the beginning. + +Vacuum/Vacuum full +------------------ + +During VACUUM FULL, tuples that are re-organized in the heap are not +inserted into the bitmap index. Instead, we REINDEX the bitmap index(s). diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/gin/ginget.c bitmap/src/backend/access/gin/ginget.c --- pgsql-head/src/backend/access/gin/ginget.c 2006-12-04 23:09:04.000000000 +1100 +++ bitmap/src/backend/access/gin/ginget.c 2006-12-04 18:57:25.000000000 +1100 @@ -13,6 +13,8 @@ */ #include "postgres.h" +#include "miscadmin.h" + #include "access/gin.h" #include "catalog/index.h" #include "utils/memutils.h" @@ -422,31 +424,42 @@ scanGetItem(IndexScanDesc scan, ItemPoin #define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL ) Datum -gingetmulti(PG_FUNCTION_ARGS) +gingetbitmap(PG_FUNCTION_ARGS) { - IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); - int32 max_tids = PG_GETARG_INT32(2); - int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); - + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + Node *n = (Node *) PG_GETARG_POINTER(1); + HashBitmap *hashBitmap; + + if (n == NULL || IsA(n, StreamBitmap)) + /* XXX should we use less than work_mem for this? */ + hashBitmap = tbm_create(work_mem * 1024L); + else + hashBitmap = (HashBitmap *)n; + if (GinIsNewKey(scan)) newScanKey(scan); - + startScan(scan); - - *returned_tids = 0; - - do + + while (true) { - if (scanGetItem(scan, tids + *returned_tids)) - (*returned_tids)++; - else - break; - } while (*returned_tids < max_tids); - + ItemPointerData tid; + if (scanGetItem(scan,&tid)) + tbm_add_tuples(hashBitmap, &tid, 1); + else + break; + } + stopScan(scan); + + if(n && IsA(n, StreamBitmap)) + { + stream_add_node((StreamBitmap *)n, + tbm_create_stream_node(hashBitmap), BMS_OR); + PG_RETURN_POINTER(n); + } - PG_RETURN_BOOL(*returned_tids == max_tids); + PG_RETURN_POINTER(hashBitmap); } Datum diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/gist/gistget.c bitmap/src/backend/access/gist/gistget.c --- pgsql-head/src/backend/access/gist/gistget.c 2006-12-04 23:09:04.000000000 +1100 +++ bitmap/src/backend/access/gist/gistget.c 2006-12-04 20:46:57.000000000 +1100 @@ -13,16 +13,19 @@ *------------------------------------------------------------------------- */ #include "postgres.h" +#include "miscadmin.h" #include "access/gist_private.h" #include "executor/execdebug.h" +#include "nodes/tidbitmap.h" #include "pgstat.h" #include "utils/memutils.h" static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n, - ScanDirection dir); -static int gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples); + ScanDirection dir); +static bool gistnext(IndexScanDesc scan, ScanDirection dir, + bool ignore_killed_tuples); static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan, OffsetNumber offset); @@ -97,7 +100,6 @@ gistgettuple(PG_FUNCTION_ARGS) IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); GISTScanOpaque so; - ItemPointerData tid; bool res; so = (GISTScanOpaque) scan->opaque; @@ -114,22 +116,33 @@ gistgettuple(PG_FUNCTION_ARGS) * tuples, continue looping until we find a non-killed tuple that matches * the search key. */ - res = (gistnext(scan, dir, &tid, 1, scan->ignore_killed_tuples)) ? true : false; + res = (gistnext(scan, dir, scan->ignore_killed_tuples)) ? true : false; PG_RETURN_BOOL(res); } Datum -gistgetmulti(PG_FUNCTION_ARGS) +gistgetbitmap(PG_FUNCTION_ARGS) { - IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); - int32 max_tids = PG_GETARG_INT32(2); - int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + Node *n = (Node *) PG_GETARG_POINTER(1); + HashBitmap *hashBitmap; + + if (n == NULL || IsA(n, StreamBitmap)) + hashBitmap = tbm_create(work_mem * 1024L); + else + hashBitmap = (HashBitmap *)n; - *returned_tids = gistnext(scan, ForwardScanDirection, tids, max_tids, false); + while (gistnext(scan, ForwardScanDirection, false)) + tbm_add_tuples(hashBitmap, &scan->xs_ctup.t_self, 1); - PG_RETURN_BOOL(*returned_tids == max_tids); + if(n && IsA(n, StreamBitmap)) + { + stream_add_node((StreamBitmap *)n, + tbm_create_stream_node(hashBitmap), BMS_OR); + PG_RETURN_POINTER(n); + } + PG_RETURN_POINTER(hashBitmap); } /* @@ -137,8 +150,8 @@ gistgetmulti(PG_FUNCTION_ARGS) * either to fetch the first such tuple or subsequent matching * tuples. Returns true iff a matching tuple was found. */ -static int -gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples) +static bool +gistnext(IndexScanDesc scan, ScanDirection dir, bool ignore_killed_tuples) { Page p; OffsetNumber n; @@ -147,7 +160,6 @@ gistnext(IndexScanDesc scan, ScanDirecti IndexTuple it; GISTPageOpaque opaque; bool resetoffset = false; - int ntids = 0; so = (GISTScanOpaque) scan->opaque; @@ -168,7 +180,7 @@ gistnext(IndexScanDesc scan, ScanDirecti } else if (so->curbuf == InvalidBuffer) { - return 0; + return false; } for (;;) @@ -181,9 +193,10 @@ gistnext(IndexScanDesc scan, ScanDirecti opaque = GistPageGetOpaque(p); resetoffset = false; - if (XLogRecPtrIsInvalid(so->stack->lsn) || !XLByteEQ(so->stack->lsn, PageGetLSN(p))) + if (XLogRecPtrIsInvalid(so->stack->lsn) || + !XLByteEQ(so->stack->lsn, PageGetLSN(p))) { - /* page changed from last visit or visit first time , reset offset */ + /* page changed from last visit or visit first time, reset offset */ so->stack->lsn = PageGetLSN(p); resetoffset = true; @@ -191,7 +204,8 @@ gistnext(IndexScanDesc scan, ScanDirecti if (!XLogRecPtrIsInvalid(so->stack->parentlsn) && XLByteLT(so->stack->parentlsn, opaque->nsn) && opaque->rightlink != InvalidBlockNumber /* sanity check */ && - (so->stack->next == NULL || so->stack->next->block != opaque->rightlink) /* check if already + (so->stack->next == NULL || + so->stack->next->block != opaque->rightlink) /* check if already added */ ) { /* detect page split, follow right link to add pages */ @@ -217,7 +231,7 @@ gistnext(IndexScanDesc scan, ScanDirecti { ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; - return ntids; + return false; } so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation, @@ -244,7 +258,6 @@ gistnext(IndexScanDesc scan, ScanDirecti } /* wonderful, we can look at page */ - for (;;) { n = gistfindnext(scan, n, dir); @@ -267,7 +280,7 @@ gistnext(IndexScanDesc scan, ScanDirecti { ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; - return ntids; + return false; } so->curbuf = ReleaseAndReadBuffer(so->curbuf, @@ -291,14 +304,9 @@ gistnext(IndexScanDesc scan, ScanDirecti if (!(ignore_killed_tuples && ItemIdDeleted(PageGetItemId(p, n)))) { it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - tids[ntids] = scan->xs_ctup.t_self = it->t_tid; - ntids++; - - if (ntids == maxtids) - { - LockBuffer(so->curbuf, GIST_UNLOCK); - return ntids; - } + scan->xs_ctup.t_self = it->t_tid; + LockBuffer(so->curbuf, GIST_UNLOCK); + return true; } } else @@ -327,7 +335,7 @@ gistnext(IndexScanDesc scan, ScanDirecti } } - return ntids; + return false; } /* diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/hash/hash.c bitmap/src/backend/access/hash/hash.c --- pgsql-head/src/backend/access/hash/hash.c 2006-12-04 23:09:04.000000000 +1100 +++ bitmap/src/backend/access/hash/hash.c 2006-12-04 20:40:58.000000000 +1100 @@ -17,12 +17,12 @@ */ #include "postgres.h" - +#include "miscadmin.h" #include "access/genam.h" #include "access/hash.h" #include "catalog/index.h" #include "commands/vacuum.h" - +#include "nodes/tidbitmap.h" /* Working state for hashbuild and its callback */ typedef struct @@ -237,25 +237,22 @@ hashgettuple(PG_FUNCTION_ARGS) PG_RETURN_BOOL(res); } - /* - * hashgetmulti() -- get multiple tuples at once - * - * This is a somewhat generic implementation: it avoids lock reacquisition - * overhead, but there's no smarts about picking especially good stopping - * points such as index page boundaries. + * hashgetbitmap() -- get the next bitmap for the scan. */ Datum -hashgetmulti(PG_FUNCTION_ARGS) +hashgetbitmap(PG_FUNCTION_ARGS) { - IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); - int32 max_tids = PG_GETARG_INT32(2); - int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); - HashScanOpaque so = (HashScanOpaque) scan->opaque; - Relation rel = scan->indexRelation; - bool res = true; - int32 ntids = 0; + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + Node *n = (Node *) PG_GETARG_POINTER(1); + HashBitmap *hashBitmap; + HashScanOpaque so = (HashScanOpaque) scan->opaque; + Relation rel = scan->indexRelation; + + if (n == NULL || IsA(n, StreamBitmap)) + hashBitmap = tbm_create(work_mem * 1024L); + else + hashBitmap = (HashBitmap *)n; /* * We hold pin but not lock on current buffer while outside the hash AM. @@ -264,8 +261,9 @@ hashgetmulti(PG_FUNCTION_ARGS) if (BufferIsValid(so->hashso_curbuf)) _hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ); - while (ntids < max_tids) + while (true) { + bool res; /* * Start scan, or advance to next tuple. */ @@ -295,19 +293,26 @@ hashgetmulti(PG_FUNCTION_ARGS) if (!res) break; /* Save tuple ID, and continue scanning */ - tids[ntids] = scan->xs_ctup.t_self; - ntids++; + tbm_add_tuples(hashBitmap, &(scan->xs_ctup.t_self), 1); } /* Release read lock on current buffer, but keep it pinned */ if (BufferIsValid(so->hashso_curbuf)) _hash_chgbufaccess(rel, so->hashso_curbuf, HASH_READ, HASH_NOLOCK); - *returned_tids = ntids; - PG_RETURN_BOOL(res); -} + tbm_begin_iterate(hashBitmap); + + if(n && IsA(n, StreamBitmap)) + { + stream_add_node((StreamBitmap *)n, + tbm_create_stream_node(hashBitmap), BMS_OR); + PG_RETURN_POINTER(n); + } + PG_RETURN_POINTER(hashBitmap); +} + /* * hashbeginscan() -- start a scan on a hash index */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/index/genam.c bitmap/src/backend/access/index/genam.c --- pgsql-head/src/backend/access/index/genam.c 2006-12-04 23:09:04.000000000 +1100 +++ bitmap/src/backend/access/index/genam.c 2006-12-04 19:14:39.000000000 +1100 @@ -86,7 +86,7 @@ RelationGetIndexScan(Relation indexRelat else scan->keyData = NULL; - scan->is_multiscan = false; /* caller may change this */ + scan->is_bitmapscan = false; /* caller may change this */ scan->kill_prior_tuple = false; scan->ignore_killed_tuples = true; /* default setting */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/index/indexam.c bitmap/src/backend/access/index/indexam.c --- pgsql-head/src/backend/access/index/indexam.c 2006-12-04 23:09:04.000000000 +1100 +++ bitmap/src/backend/access/index/indexam.c 2006-12-04 20:41:50.000000000 +1100 @@ -14,14 +14,15 @@ * index_open - open an index relation by relation OID * index_close - close an index relation * index_beginscan - start a scan of an index with amgettuple - * index_beginscan_multi - start a scan of an index with amgetmulti + * index_beginscan_bitmap - start a scan of an index with + * amgetbitmap * index_rescan - restart a scan of an index * index_endscan - end a scan * index_insert - insert an index tuple into a relation * index_markpos - mark a scan position * index_restrpos - restore a scan position * index_getnext - get the next tuple from a scan - * index_getmulti - get multiple tuples from a scan + * index_getbitmap - get the next bitmap from a scan * index_bulk_delete - bulk deletion of index tuples * index_vacuum_cleanup - post-deletion cleanup of an index * index_getprocid - get a support procedure OID @@ -226,7 +227,7 @@ index_beginscan(Relation heapRelation, * Save additional parameters into the scandesc. Everything else was set * up by RelationGetIndexScan. */ - scan->is_multiscan = false; + scan->is_bitmapscan = false; scan->heapRelation = heapRelation; scan->xs_snapshot = snapshot; @@ -234,30 +235,37 @@ index_beginscan(Relation heapRelation, } /* - * index_beginscan_multi - start a scan of an index with amgetmulti + * index_beginscan_bitmap - start a scan of an index + * with amgetbitmap * * As above, caller had better be holding some lock on the parent heap * relation, even though it's not explicitly mentioned here. */ IndexScanDesc -index_beginscan_multi(Relation indexRelation, - Snapshot snapshot, - int nkeys, ScanKey key) +index_beginscan_bitmap(Relation indexRelation, + Snapshot snapshot, + int nkeys, ScanKey key) { IndexScanDesc scan; scan = index_beginscan_internal(indexRelation, nkeys, key); + /* + * Save additional parameters into the scandesc. Everything else was + * set up by RelationGetIndexScan. + */ + scan->xs_snapshot = snapshot; /* - * Save additional parameters into the scandesc. Everything else was set - * up by RelationGetIndexScan. + * Save additional parameters into the scandesc. Everything else was + * set up by RelationGetIndexScan. */ - scan->is_multiscan = true; + scan->is_bitmapscan = true; scan->xs_snapshot = snapshot; return scan; } + /* * index_beginscan_internal --- common code for index_beginscan variants */ @@ -507,45 +515,30 @@ index_getnext_indexitem(IndexScanDesc sc return found; } -/* ---------------- - * index_getmulti - get multiple tuples from an index scan - * - * Collects the TIDs of multiple heap tuples satisfying the scan keys. - * Since there's no interlock between the index scan and the eventual heap - * access, this is only safe to use with MVCC-based snapshots: the heap - * item slot could have been replaced by a newer tuple by the time we get - * to it. - * - * A TRUE result indicates more calls should occur; a FALSE result says the - * scan is done. *returned_tids could be zero or nonzero in either case. - * ---------------- +/* + * index_getbitmap - get the next bitmap from an index scan. */ -bool -index_getmulti(IndexScanDesc scan, - ItemPointer tids, int32 max_tids, - int32 *returned_tids) +Node * +index_getbitmap(IndexScanDesc scan, Node *bitmap) { FmgrInfo *procedure; - bool found; + Node *bm; SCAN_CHECKS; - GET_SCAN_PROCEDURE(amgetmulti); + GET_SCAN_PROCEDURE(amgetbitmap); /* just make sure this is false... */ scan->kill_prior_tuple = false; /* - * have the am's getmulti proc do all the work. + * have the am's getbitmap proc do all the work. + * index_beginscan_bitmap already set up fn_getbitmap. */ - found = DatumGetBool(FunctionCall4(procedure, - PointerGetDatum(scan), - PointerGetDatum(tids), - Int32GetDatum(max_tids), - PointerGetDatum(returned_tids))); + bm = (Node *)DatumGetPointer(FunctionCall2(procedure, + PointerGetDatum(scan), + PointerGetDatum(bitmap))); - pgstat_count_index_tuples(&scan->xs_pgstat_info, *returned_tids); - - return found; + return bm; } /* ---------------- diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/Makefile bitmap/src/backend/access/Makefile --- pgsql-head/src/backend/access/Makefile 2006-12-04 23:09:04.000000000 +1100 +++ bitmap/src/backend/access/Makefile 2006-07-19 14:59:48.000000000 +1000 @@ -8,7 +8,7 @@ subdir = src/backend/access top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -SUBDIRS := common gist hash heap index nbtree transam gin +SUBDIRS := common gist hash heap index nbtree transam gin bitmap SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o) all: SUBSYS.o diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/nbtree/nbtree.c bitmap/src/backend/access/nbtree/nbtree.c --- pgsql-head/src/backend/access/nbtree/nbtree.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/access/nbtree/nbtree.c 2006-11-10 09:41:06.000000000 +1100 @@ -18,6 +18,7 @@ */ #include "postgres.h" +#include "miscadmin.h" #include "access/genam.h" #include "access/nbtree.h" #include "catalog/index.h" @@ -278,46 +279,43 @@ btgettuple(PG_FUNCTION_ARGS) } /* - * btgetmulti() -- get multiple tuples at once - * - * In the current implementation there seems no strong reason to stop at - * index page boundaries; we just press on until we fill the caller's buffer - * or run out of matches. + * btgetbitmap() -- construct a HashBitmap. */ Datum -btgetmulti(PG_FUNCTION_ARGS) +btgetbitmap(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); - int32 max_tids = PG_GETARG_INT32(2); - int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); + Node *n = (Node *)PG_GETARG_POINTER(1); + HashBitmap *hashBitmap; + BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res = true; - int32 ntids = 0; - - if (max_tids <= 0) /* behave correctly in boundary case */ - PG_RETURN_BOOL(true); + if (n == NULL || IsA(n, StreamBitmap)) + { + /* XXX should we use less than work_mem for this? */ + hashBitmap = tbm_create(work_mem * 1024L); + } + else + { + hashBitmap = (HashBitmap *)n; + } /* If we haven't started the scan yet, fetch the first page & tuple. */ if (!BTScanPosIsValid(so->currPos)) { res = _bt_first(scan, ForwardScanDirection); - if (!res) + if (res) { - /* empty scan */ - *returned_tids = ntids; - PG_RETURN_BOOL(res); + /* Save tuple ID, and continue scanning */ + tbm_add_tuples(hashBitmap, &(scan->xs_ctup.t_self), 1); } - /* Save tuple ID, and continue scanning */ - tids[ntids] = scan->xs_ctup.t_self; - ntids++; } - while (ntids < max_tids) + while (res) { /* - * Advance to next tuple within page. This is the same as the easy - * case in _bt_next(). + * Advance to next tuple within page. This is the same as the + * easy case in _bt_next(). */ if (++so->currPos.itemIndex > so->currPos.lastItem) { @@ -328,12 +326,18 @@ btgetmulti(PG_FUNCTION_ARGS) } /* Save tuple ID, and continue scanning */ - tids[ntids] = so->currPos.items[so->currPos.itemIndex].heapTid; - ntids++; + tbm_add_tuples(hashBitmap, + &(so->currPos.items[so->currPos.itemIndex].heapTid), + 1); } - *returned_tids = ntids; - PG_RETURN_BOOL(res); + if(n && IsA(n, StreamBitmap)) + { + stream_add_node((StreamBitmap *)n, + tbm_create_stream_node(hashBitmap), BMS_OR); + PG_RETURN_POINTER(n); + } + PG_RETURN_POINTER(hashBitmap); } /* diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/access/transam/rmgr.c bitmap/src/backend/access/transam/rmgr.c --- pgsql-head/src/backend/access/transam/rmgr.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/access/transam/rmgr.c 2006-11-10 10:16:10.000000000 +1100 @@ -7,6 +7,7 @@ */ #include "postgres.h" +#include "access/bitmap.h" #include "access/clog.h" #include "access/gin.h" #include "access/gist_private.h" @@ -38,5 +39,6 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] {"Hash", hash_redo, hash_desc, NULL, NULL, NULL}, {"Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, gin_safe_restartpoint}, {"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, gist_safe_restartpoint}, + {"Bitmap", bitmap_redo, bitmap_desc, NULL, NULL, NULL}, {"Sequence", seq_redo, seq_desc, NULL, NULL, NULL} }; diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/commands/vacuum.c bitmap/src/backend/commands/vacuum.c --- pgsql-head/src/backend/commands/vacuum.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/commands/vacuum.c 2006-11-23 18:25:00.000000000 +1100 @@ -1169,8 +1169,13 @@ full_vacuum_rel(Relation onerel, VacuumS if (vacuum_pages.num_pages > 0) { for (i = 0; i < nindexes; i++) - vacuum_index(&vacuum_pages, Irel[i], - vacrelstats->rel_tuples, 0); + /* + * We postpone vacuuming of those indexes which cannot + * shrink + */ + if (Irel[i]->rd_am->amcanshrink) + vacuum_index(&vacuum_pages, Irel[i], + vacrelstats->rel_tuples, 0); } else { @@ -1185,6 +1190,13 @@ full_vacuum_rel(Relation onerel, VacuumS /* Try to shrink heap */ repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages, nindexes, Irel); + /* Perform re-index for those AMs which cannot shrink */ + for (i = 0; i < nindexes; i++) + { + if (!Irel[i]->rd_am->amcanshrink) + vacuum_index(&vacuum_pages, Irel[i], + vacrelstats->rel_tuples, 0); + } vac_close_indexes(nindexes, Irel, NoLock); } else @@ -2395,8 +2407,12 @@ repair_frag(VRelStats *vacrelstats, Rela */ Assert(keep_tuples >= 0); for (i = 0; i < nindexes; i++) - vacuum_index(&Nvacpagelist, Irel[i], - vacrelstats->rel_tuples, keep_tuples); + { + /* We postpone vacuuming an on-disk bitmap index. */ + if (Irel[i]->rd_am->amcanshrink) + vacuum_index(&Nvacpagelist, Irel[i], + vacrelstats->rel_tuples, keep_tuples); + } } /* diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/executor/execUtils.c bitmap/src/backend/executor/execUtils.c --- pgsql-head/src/backend/executor/execUtils.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/executor/execUtils.c 2006-12-04 22:24:15.000000000 +1100 @@ -1008,6 +1008,10 @@ ExecInsertIndexTuples(TupleTableSlot *sl if (relationDescs[i] == NULL) continue; + /* AMs which cannot shrink will just reindex after this anyway... */ + if(is_vacuum && !relationDescs[i]->rd_am->amcanshrink) + continue; + indexInfo = indexInfoArray[i]; /* Check for partial index */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/executor/nodeBitmapAnd.c bitmap/src/backend/executor/nodeBitmapAnd.c --- pgsql-head/src/backend/executor/nodeBitmapAnd.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/executor/nodeBitmapAnd.c 2006-11-10 09:26:33.000000000 +1100 @@ -112,7 +112,8 @@ MultiExecBitmapAnd(BitmapAndState *node) PlanState **bitmapplans; int nplans; int i; - TIDBitmap *result = NULL; + bool isEmpty = false; + HashBitmap *hbm = NULL; /* must provide our own instrumentation support */ if (node->ps.instrument) @@ -124,46 +125,89 @@ MultiExecBitmapAnd(BitmapAndState *node) bitmapplans = node->bitmapplans; nplans = node->nplans; + /* * Scan all the subplans and AND their result bitmaps - */ + */ for (i = 0; i < nplans; i++) { - PlanState *subnode = bitmapplans[i]; - TIDBitmap *subresult; + PlanState *subnode = bitmapplans[i]; + Node *subresult = NULL; - subresult = (TIDBitmap *) MultiExecProcNode(subnode); + subresult = MultiExecProcNode(subnode); + if (subresult == NULL) + { + /* got an empty result, intersection fails */ + isEmpty = true; + break; + } - if (!subresult || !IsA(subresult, TIDBitmap)) + if (!(IsA(subresult, HashBitmap) || + IsA(subresult, StreamBitmap))) elog(ERROR, "unrecognized result from subplan"); - if (result == NULL) - result = subresult; /* first subplan */ - else + /* Intersect the hash bitmaps */ + if (IsA(subresult, HashBitmap)) { - tbm_intersect(result, subresult); - tbm_free(subresult); + /* first subplan that generates a hash bitmap */ + if (hbm == NULL) + hbm = (HashBitmap *) subresult; + else + { + tbm_intersect(hbm, (HashBitmap *)subresult); + tbm_free((HashBitmap *)subresult); + } + + /* + * If at any stage we have a completely empty bitmap, we can + * fall out without evaluating the remaining subplans, since + * ANDing them can no longer change the result. (Note: the + * fact that indxpath.c orders the subplans by selectivity + * should make this case more likely to occur.) + */ + if (tbm_is_empty(hbm)) + { + isEmpty = true; + break; + } } + else + { + if(node->bitmap) + { + if(node->bitmap != subresult) + { + StreamBitmap *s = (StreamBitmap *)subresult; + stream_add_node((StreamBitmap *)node->bitmap, + s->opaque, BMS_AND); + } + } + else + node->bitmap = subresult; - /* - * If at any stage we have a completely empty bitmap, we can fall out - * without evaluating the remaining subplans, since ANDing them can no - * longer change the result. (Note: the fact that indxpath.c orders - * the subplans by selectivity should make this case more likely to - * occur.) - */ - if (tbm_is_empty(result)) - break; + } } - if (result == NULL) - elog(ERROR, "BitmapAnd doesn't support zero inputs"); + /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStopNode(node->ps.instrument, 0 /* XXX */ ); - return (Node *) result; + if (isEmpty) + return (Node*) NULL; + + /* check to see if we have any hash bitmaps */ + if (hbm != NULL) + { + if(node->bitmap && IsA(node->bitmap, StreamBitmap)) + stream_add_node((StreamBitmap *)node->bitmap, + tbm_create_stream_node(hbm), BMS_AND); + else + node->bitmap = (Node *)hbm; + } + + return (Node *) node->bitmap; } /* ---------------------------------------------------------------- diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/executor/nodeBitmapHeapscan.c bitmap/src/backend/executor/nodeBitmapHeapscan.c --- pgsql-head/src/backend/executor/nodeBitmapHeapscan.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/executor/nodeBitmapHeapscan.c 2006-12-04 22:25:30.000000000 +1100 @@ -59,10 +59,12 @@ BitmapHeapNext(BitmapHeapScanState *node ExprContext *econtext; HeapScanDesc scan; Index scanrelid; - TIDBitmap *tbm; + Node *tbm; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; + bool more = true; + bool done = false; /* there are no more tuples */ /* * extract necessary information from index scan node @@ -75,6 +77,15 @@ BitmapHeapNext(BitmapHeapScanState *node tbm = node->tbm; tbmres = node->tbmres; + /* + * Clear any reference to the previously returned tuple. The idea here is + * to not have the tuple slot be the last holder of a pin on that tuple's + * buffer; if it is, we'll need a separate visit to the bufmgr to release + * the buffer. By clearing here, we get to have the release done by + * ReleaseAndReadBuffer, below. + */ + ExecClearTuple(slot); + /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could @@ -104,121 +115,138 @@ BitmapHeapNext(BitmapHeapScanState *node return slot; } - /* - * If we haven't yet performed the underlying index scan, do it, and - * prepare the bitmap to be iterated over. - */ - if (tbm == NULL) + while (!done) { - tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); + bool startIterating = false; - if (!tbm || !IsA(tbm, TIDBitmap)) - elog(ERROR, "unrecognized result from subplan"); + /* + * If we haven't yet performed the underlying index scan, or + * we have used up the bitmaps from the previous scan, do the next scan, + * and prepare the bitmap to be iterated over. + */ + if (tbm == NULL) + { + tbm = (Node *) MultiExecProcNode(outerPlanState(node)); - node->tbm = tbm; - node->tbmres = tbmres = NULL; + if ((tbm != NULL) && + !(IsA(tbm, HashBitmap) || + IsA(tbm, StreamBitmap))) + elog(ERROR, "unrecognized result from subplan"); - tbm_begin_iterate(tbm); - } + /* If tbm is NULL, the scan is over. */ + if (tbm == NULL) + return ExecClearTuple(slot); - for (;;) - { - Page dp; - ItemId lp; + node->tbm = tbm; - /* - * Get next page of results if needed - */ - if (tbmres == NULL) + more = false; + startIterating = true; + } + + for (;;) { - node->tbmres = tbmres = tbm_iterate(tbm); - if (tbmres == NULL) + Page dp; + ItemId lp; + + if (!startIterating && tbmres->ntuples == 0) + more = false; + + /* + * Get next page of results if needed + */ + if (!more) + { + more = tbm_iterate(tbm, tbmres); + if (!more) + { + done = true; + break; + } + + /* + * Ignore any claimed entries past what we think is the end of + * the relation. (This is probably not necessary given that we + * got at least AccessShareLock on the table before performing + * any of the indexscans, but let's be safe.) + */ + if (tbmres->blockno >= scan->rs_nblocks) + { + more = false; + continue; + } + + /* + * Fetch the current heap page and identify candidate tuples. + */ + bitgetpage(scan, tbmres); + + /* + * Set rs_cindex to first slot to examine + */ + scan->rs_cindex = 0; + } + else { - /* no more entries in the bitmap */ - break; + /* + * Continuing in previously obtained page; advance rs_cindex + */ + scan->rs_cindex++; + tbmres->ntuples--; } /* - * Ignore any claimed entries past what we think is the end of the - * relation. (This is probably not necessary given that we got at - * least AccessShareLock on the table before performing any of the - * indexscans, but let's be safe.) + * Out of range? If so, nothing more to look at on this page */ - if (tbmres->blockno >= scan->rs_nblocks) + if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) { - node->tbmres = tbmres = NULL; + more = false; + tbmres->ntuples = 0; continue; } /* - * Fetch the current heap page and identify candidate tuples. + * Okay to fetch the tuple */ - bitgetpage(scan, tbmres); + targoffset = scan->rs_vistuples[scan->rs_cindex]; + dp = (Page) BufferGetPage(scan->rs_cbuf); + lp = PageGetItemId(dp, targoffset); + Assert(ItemIdIsUsed(lp)); + + scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); + scan->rs_ctup.t_len = ItemIdGetLength(lp); + ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); + + pgstat_count_heap_fetch(&scan->rs_pgstat_info); /* - * Set rs_cindex to first slot to examine + * Set up the result slot to point to this tuple. Note that the + * slot acquires a pin on the buffer. */ - scan->rs_cindex = 0; - } - else - { + ExecStoreTuple(&scan->rs_ctup, + slot, + scan->rs_cbuf, + false); + /* - * Continuing in previously obtained page; advance rs_cindex + * If we are using lossy info, we have to recheck the qual + * conditions at every tuple. */ - scan->rs_cindex++; - } - - /* - * Out of range? If so, nothing more to look at on this page - */ - if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) - { - node->tbmres = tbmres = NULL; - continue; - } - - /* - * Okay to fetch the tuple - */ - targoffset = scan->rs_vistuples[scan->rs_cindex]; - dp = (Page) BufferGetPage(scan->rs_cbuf); - lp = PageGetItemId(dp, targoffset); - Assert(ItemIdIsUsed(lp)); - - scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); - scan->rs_ctup.t_len = ItemIdGetLength(lp); - ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); - - pgstat_count_heap_fetch(&scan->rs_pgstat_info); - - /* - * Set up the result slot to point to this tuple. Note that the slot - * acquires a pin on the buffer. - */ - ExecStoreTuple(&scan->rs_ctup, - slot, - scan->rs_cbuf, - false); - - /* - * If we are using lossy info, we have to recheck the qual conditions - * at every tuple. - */ - if (tbmres->ntuples < 0) - { - econtext->ecxt_scantuple = slot; - ResetExprContext(econtext); - - if (!ExecQual(node->bitmapqualorig, econtext, false)) + if (tbmres->ntuples < 0) { - /* Fails recheck, so drop it and loop back for another */ - ExecClearTuple(slot); - continue; + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + + if (!ExecQual(node->bitmapqualorig, econtext, false)) + { + /* Fails recheck, so drop it and loop back for another */ + ExecClearTuple(slot); + continue; + } } - } - /* OK to return this tuple */ - return slot; + /* OK to return this tuple */ + return slot; + } } /* @@ -259,8 +287,8 @@ bitgetpage(HeapScanDesc scan, TBMIterate snapshot = scan->rs_snapshot; /* - * We must hold share lock on the buffer content while examining tuple - * visibility. Afterwards, however, the tuples we have found to be + * We must hold share lock on the buffer content while examining tuple + * visibility. Afterwards, however, the tuples we have found to be * visible are guaranteed good as long as we hold the buffer pin. */ LockBuffer(buffer, BUFFER_LOCK_SHARE); @@ -269,8 +297,8 @@ bitgetpage(HeapScanDesc scan, TBMIterate maxoff = PageGetMaxOffsetNumber(dp); /* - * Determine how many entries we need to look at on this page. If the - * bitmap is lossy then we need to look at each physical item pointer; + * Determine how many entries we need to look at on this page. If the + * bitmap is lossy then we need to look at each physical item pointer; * otherwise we just look through the offsets listed in tbmres. */ if (tbmres->ntuples >= 0) @@ -389,10 +417,8 @@ ExecBitmapHeapReScan(BitmapHeapScanState /* undo bogus "seq scan" count (see notes in ExecInitBitmapHeapScan) */ pgstat_discount_heap_scan(&node->ss.ss_currentScanDesc->rs_pgstat_info); - if (node->tbm) - tbm_free(node->tbm); node->tbm = NULL; - node->tbmres = NULL; + node->tbmres->ntuples = 0; /* * Always rescan the input immediately, to ensure we can pass down any @@ -434,12 +460,6 @@ ExecEndBitmapHeapScan(BitmapHeapScanStat ExecEndNode(outerPlanState(node)); /* - * release bitmap if any - */ - if (node->tbm) - tbm_free(node->tbm); - - /* * close heap scan */ heap_endscan(scanDesc); @@ -448,6 +468,8 @@ ExecEndBitmapHeapScan(BitmapHeapScanStat * close the heap relation. */ ExecCloseScanRelation(relation); + + pfree(node->tbmres); } /* ---------------------------------------------------------------- @@ -479,7 +501,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *n scanstate->ss.ps.state = estate; scanstate->tbm = NULL; - scanstate->tbmres = NULL; + scanstate->tbmres = palloc(sizeof(TBMIterateResult) + + MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber)); /* * Miscellaneous initialization @@ -548,7 +571,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *n * initialize child nodes * * We do this last because the child nodes will open indexscans on our - * relation's indexes, and we want to be sure we have acquired a lock on + * relation's indexes, and we want to be sure we have acquired a lock on * the relation first. */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/executor/nodeBitmapIndexscan.c bitmap/src/backend/executor/nodeBitmapIndexscan.c --- pgsql-head/src/backend/executor/nodeBitmapIndexscan.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/executor/nodeBitmapIndexscan.c 2006-12-04 22:26:50.000000000 +1100 @@ -37,13 +37,9 @@ Node * MultiExecBitmapIndexScan(BitmapIndexScanState *node) { -#define MAX_TIDS 1024 - TIDBitmap *tbm; IndexScanDesc scandesc; - ItemPointerData tids[MAX_TIDS]; - int32 ntids; - double nTuples = 0; bool doscan; + Node *bitmap = NULL; /* must provide our own instrumentation support */ if (node->ss.ps.instrument) @@ -57,7 +53,7 @@ MultiExecBitmapIndexScan(BitmapIndexScan /* * If we have runtime keys and they've not already been set up, do it now. * Array keys are also treated as runtime keys; note that if ExecReScan - * returns with biss_RuntimeKeysReady still false, then there is an empty + * returns with biss_RuntimeKeysReady still false, then there is an empty * array key so we should do nothing. */ if (!node->biss_RuntimeKeysReady && @@ -69,52 +65,32 @@ MultiExecBitmapIndexScan(BitmapIndexScan else doscan = true; - /* - * Prepare the result bitmap. Normally we just create a new one to pass - * back; however, our parent node is allowed to store a pre-made one into - * node->biss_result, in which case we just OR our tuple IDs into the - * existing bitmap. (This saves needing explicit UNION steps.) - */ - if (node->biss_result) - { - tbm = node->biss_result; - node->biss_result = NULL; /* reset for next time */ - } - else - { - /* XXX should we use less than work_mem for this? */ - tbm = tbm_create(work_mem * 1024L); - } - /* - * Get TIDs from index and insert into bitmap - */ + /* Get bitmap from index */ while (doscan) { - bool more = index_getmulti(scandesc, tids, MAX_TIDS, &ntids); + bitmap = index_getbitmap(scandesc, node->bitmap); - if (ntids > 0) - { - tbm_add_tuples(tbm, tids, ntids); - nTuples += ntids; - } + if ((bitmap != NULL) && + !(IsA(bitmap, HashBitmap) || + IsA(bitmap, StreamBitmap))) + elog(ERROR, "unrecognized result from bitmap index scan"); CHECK_FOR_INTERRUPTS(); - if (!more) - { - doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys, + if(node->bitmap == NULL) + node->bitmap = (Node *)bitmap; + doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys, node->biss_NumArrayKeys); - if (doscan) /* reset index scan */ - index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); - } + if (doscan) /* reset index scan */ + index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); } /* must provide our own instrumentation support */ if (node->ss.ps.instrument) - InstrStopNode(node->ss.ps.instrument, nTuples); + InstrStopNode(node->ss.ps.instrument, 0 /* XXX */); - return (Node *) tbm; + return (Node *) bitmap; } /* ---------------------------------------------------------------- @@ -152,8 +128,8 @@ ExecBitmapIndexReScan(BitmapIndexScanSta * If we are doing runtime key calculations (ie, the index keys depend on * data from an outer scan), compute the new key values. * - * Array keys are also treated as runtime keys; note that if we return - * with biss_RuntimeKeysReady still false, then there is an empty array + * Array keys are also treated as runtime keys; note that if we return + * with biss_RuntimeKeysReady still false, then there is an empty array * key so no index scan is needed. */ if (node->biss_NumRuntimeKeys != 0) @@ -171,6 +147,19 @@ ExecBitmapIndexReScan(BitmapIndexScanSta /* reset index scan */ if (node->biss_RuntimeKeysReady) index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); + + /* reset hashBitmap */ + if(node->bitmap && IsA(node->bitmap, HashBitmap)) + { + tbm_free((HashBitmap *)node->bitmap); + node->bitmap = NULL; + } + else + { + /* XXX: we leak here */ + /* XXX: put in own memory context? */ + node->bitmap = NULL; + } } /* ---------------------------------------------------------------- @@ -202,6 +191,21 @@ ExecEndBitmapIndexScan(BitmapIndexScanSt */ index_endscan(indexScanDesc); index_close(indexRelationDesc, NoLock); + + if (node->bitmap != NULL) + { + if(IsA(node->bitmap, HashBitmap)) + tbm_free((HashBitmap *)node->bitmap); + else if(IsA(node->bitmap, StreamBitmap)) + { + StreamBitmap *s = (StreamBitmap *)node->bitmap; + if(s->opaque) + { + bitmap_stream_free(s->opaque); + s->opaque = NULL; + } + } + } } /* ---------------------------------------------------------------- @@ -226,9 +230,6 @@ ExecInitBitmapIndexScan(BitmapIndexScan indexstate->ss.ps.plan = (Plan *) node; indexstate->ss.ps.state = estate; - /* normally we don't make the result bitmap till runtime */ - indexstate->biss_result = NULL; - /* * Miscellaneous initialization * @@ -249,7 +250,7 @@ ExecInitBitmapIndexScan(BitmapIndexScan /* * We do not open or lock the base relation here. We assume that an - * ancestor BitmapHeapScan node is holding AccessShareLock (or better) on + * ancestor BitmapHeapScan node is holding AccessShareLock (or better) on * the heap relation throughout the execution of the plan tree. */ @@ -311,11 +312,10 @@ ExecInitBitmapIndexScan(BitmapIndexScan * Initialize scan descriptor. */ indexstate->biss_ScanDesc = - index_beginscan_multi(indexstate->biss_RelationDesc, - estate->es_snapshot, - indexstate->biss_NumScanKeys, - indexstate->biss_ScanKeys); - + index_beginscan_bitmap(indexstate->biss_RelationDesc, + estate->es_snapshot, + indexstate->biss_NumScanKeys, + indexstate->biss_ScanKeys); /* * all done. */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/executor/nodeBitmapOr.c bitmap/src/backend/executor/nodeBitmapOr.c --- pgsql-head/src/backend/executor/nodeBitmapOr.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/executor/nodeBitmapOr.c 2006-11-23 17:08:15.000000000 +1100 @@ -113,7 +113,7 @@ MultiExecBitmapOr(BitmapOrState *node) PlanState **bitmapplans; int nplans; int i; - TIDBitmap *result = NULL; + HashBitmap *hbm = NULL; /* must provide our own instrumentation support */ if (node->ps.instrument) @@ -131,55 +131,61 @@ MultiExecBitmapOr(BitmapOrState *node) for (i = 0; i < nplans; i++) { PlanState *subnode = bitmapplans[i]; - TIDBitmap *subresult; + Node *subresult = NULL; - /* - * We can special-case BitmapIndexScan children to avoid an explicit - * tbm_union step for each child: just pass down the current result - * bitmap and let the child OR directly into it. - */ if (IsA(subnode, BitmapIndexScanState)) - { - if (result == NULL) /* first subplan */ - { - /* XXX should we use less than work_mem for this? */ - result = tbm_create(work_mem * 1024L); - } + ((BitmapIndexScanState *) subnode)->bitmap = node->bitmap; - ((BitmapIndexScanState *) subnode)->biss_result = result; + subresult = MultiExecProcNode(subnode); - subresult = (TIDBitmap *) MultiExecProcNode(subnode); + if(subresult == NULL) + continue; - if (subresult != result) - elog(ERROR, "unrecognized result from subplan"); + if (!(IsA(subresult, HashBitmap) || + IsA(subresult, StreamBitmap))) + elog(ERROR, "unrecognized result from subplan"); + + if (IsA(subresult, HashBitmap)) + { + if (hbm == NULL) + hbm = (HashBitmap *)subresult; + else + { + tbm_union(hbm, (HashBitmap *)subresult); + tbm_free((HashBitmap *)subresult); + } } else { - /* standard implementation */ - subresult = (TIDBitmap *) MultiExecProcNode(subnode); - - if (!subresult || !IsA(subresult, TIDBitmap)) - elog(ERROR, "unrecognized result from subplan"); - - if (result == NULL) - result = subresult; /* first subplan */ - else + if(node->bitmap) { - tbm_union(result, subresult); - tbm_free(subresult); + if(node->bitmap != subresult) + { + StreamBitmap *s = (StreamBitmap *)subresult; + stream_add_node((StreamBitmap *)node->bitmap, + s->opaque, BMS_OR); + } } + else + node->bitmap = subresult; } } - /* We could return an empty result set here? */ - if (result == NULL) - elog(ERROR, "BitmapOr doesn't support zero inputs"); + /* check to see if we have any hash bitmaps */ + if (hbm != NULL) + { + if(node->bitmap && IsA(node->bitmap, StreamBitmap)) + stream_add_node((StreamBitmap *)node->bitmap, + tbm_create_stream_node(hbm), BMS_OR); + else + node->bitmap = (Node *)hbm; + } /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStopNode(node->ps.instrument, 0 /* XXX */ ); - return (Node *) result; + return node->bitmap; } /* ---------------------------------------------------------------- @@ -211,6 +217,7 @@ ExecEndBitmapOr(BitmapOrState *node) if (bitmapplans[i]) ExecEndNode(bitmapplans[i]); } + /* XXX: what if we're a bitmap stream ? */ } void diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/nodes/tidbitmap.c bitmap/src/backend/nodes/tidbitmap.c --- pgsql-head/src/backend/nodes/tidbitmap.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/nodes/tidbitmap.c 2006-12-04 23:38:53.000000000 +1100 @@ -32,63 +32,17 @@ #include #include "access/htup.h" +#include "access/bitmap.h" /* XXX: remove once pull_stream is generic */ #include "nodes/tidbitmap.h" #include "storage/bufpage.h" #include "utils/hsearch.h" -/* - * The maximum number of tuples per page is not large (typically 256 with - * 8K pages, or 1024 with 32K pages). So there's not much point in making - * the per-page bitmaps variable size. We just legislate that the size - * is this: - */ -#define MAX_TUPLES_PER_PAGE MaxHeapTuplesPerPage - -/* - * When we have to switch over to lossy storage, we use a data structure - * with one bit per page, where all pages having the same number DIV - * PAGES_PER_CHUNK are aggregated into one chunk. When a chunk is present - * and has the bit set for a given page, there must not be a per-page entry - * for that page in the page table. - * - * We actually store both exact pages and lossy chunks in the same hash - * table, using identical data structures. (This is because dynahash.c's - * memory management doesn't allow space to be transferred easily from one - * hashtable to another.) Therefore it's best if PAGES_PER_CHUNK is the - * same as MAX_TUPLES_PER_PAGE, or at least not too different. But we - * also want PAGES_PER_CHUNK to be a power of 2 to avoid expensive integer - * remainder operations. So, define it like this: - */ -#define PAGES_PER_CHUNK (BLCKSZ / 32) - -/* The bitmap unit size can be adjusted by changing these declarations: */ -#define BITS_PER_BITMAPWORD 32 -typedef uint32 bitmapword; /* must be an unsigned type */ - -#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) -#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) - -/* number of active words for an exact page: */ -#define WORDS_PER_PAGE ((MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1) -/* number of active words for a lossy chunk: */ -#define WORDS_PER_CHUNK ((PAGES_PER_CHUNK - 1) / BITS_PER_BITMAPWORD + 1) - -/* - * The hashtable entries are represented by this data structure. For - * an exact page, blockno is the page number and bit k of the bitmap - * represents tuple offset k+1. For a lossy chunk, blockno is the first - * page in the chunk (this must be a multiple of PAGES_PER_CHUNK) and - * bit k represents page blockno+k. Note that it is not possible to - * have exact storage for the first page of a chunk if we are using - * lossy storage for any page in the chunk's range, since the same - * hashtable entry has to serve both purposes. - */ -typedef struct PagetableEntry -{ - BlockNumber blockno; /* page number (hashtable key) */ - bool ischunk; /* T = lossy storage, F = exact */ - bitmapword words[Max(WORDS_PER_PAGE, WORDS_PER_CHUNK)]; -} PagetableEntry; +#define WORDNUM(x) ((x) / TBM_BITS_PER_BITMAPWORD) +#define BITNUM(x) ((x) % TBM_BITS_PER_BITMAPWORD) + +static bool tbm_iterate_page(PagetableEntry *page, TBMIterateResult *output); +static bool tbm_iterate_hash(HashBitmap *tbm,TBMIterateResult *output); +static PagetableEntry *tbm_next_page(HashBitmap *tbm, bool *more); /* * dynahash.c is optimized for relatively large, long-lived hash tables. @@ -103,15 +57,15 @@ typedef struct PagetableEntry */ typedef enum { - TBM_EMPTY, /* no hashtable, nentries == 0 */ - TBM_ONE_PAGE, /* entry1 contains the single entry */ - TBM_HASH /* pagetable is valid, entry1 is not */ + HASHBM_EMPTY, /* no hashtable, nentries == 0 */ + HASHBM_ONE_PAGE, /* entry1 contains the single entry */ + HASHBM_HASH /* pagetable is valid, entry1 is not */ } TBMStatus; /* - * Here is the representation for a whole TIDBitMap: + * Here is the representation for a whole HashBitmap. */ -struct TIDBitmap +struct HashBitmap { NodeTag type; /* to make it a valid Node */ MemoryContext mcxt; /* memory context containing me */ @@ -122,29 +76,36 @@ struct TIDBitmap int npages; /* number of exact entries in pagetable */ int nchunks; /* number of lossy entries in pagetable */ bool iterating; /* tbm_begin_iterate called? */ - PagetableEntry entry1; /* used when status == TBM_ONE_PAGE */ + PagetableEntry entry1; /* used when status == HASHBM_ONE_PAGE */ /* the remaining fields are used while producing sorted output: */ PagetableEntry **spages; /* sorted exact-page list, or NULL */ PagetableEntry **schunks; /* sorted lossy-chunk list, or NULL */ int spageptr; /* next spages index */ int schunkptr; /* next schunks index */ int schunkbit; /* next bit to check in current schunk */ - TBMIterateResult output; /* MUST BE LAST (because variable-size) */ }; +/* A struct to hide away HashBitmap state for a streaming bitmap */ +typedef struct HashStreamOpaque +{ + HashBitmap *tbm; + PagetableEntry *entry; +} HashStreamOpaque; /* Local function prototypes */ -static void tbm_union_page(TIDBitmap *a, const PagetableEntry *bpage); -static bool tbm_intersect_page(TIDBitmap *a, PagetableEntry *apage, - const TIDBitmap *b); -static const PagetableEntry *tbm_find_pageentry(const TIDBitmap *tbm, +static void tbm_union_page(HashBitmap *a, const PagetableEntry *bpage); +static bool tbm_intersect_page(HashBitmap *a, PagetableEntry *apage, + const HashBitmap *b); +static const PagetableEntry *tbm_find_pageentry(const HashBitmap *tbm, BlockNumber pageno); -static PagetableEntry *tbm_get_pageentry(TIDBitmap *tbm, BlockNumber pageno); -static bool tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno); -static void tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno); -static void tbm_lossify(TIDBitmap *tbm); -static int tbm_comparator(const void *left, const void *right); +static PagetableEntry *tbm_get_pageentry(HashBitmap *tbm, BlockNumber pageno); +static bool tbm_page_is_lossy(const HashBitmap *tbm, BlockNumber pageno); +static void tbm_mark_page_lossy(HashBitmap *tbm, BlockNumber pageno); +static void tbm_lossify(HashBitmap *tbm); +static int tbm_comparator(const void *left, const void *right); +static bool tbm_stream_block(void *opaque, PagetableEntry *e); +static void tbm_stream_free(void *opaque); /* * tbm_create - create an initially-empty bitmap @@ -153,24 +114,22 @@ static int tbm_comparator(const void *le * at the time of this call. It will be limited to (approximately) maxbytes * total memory consumption. */ -TIDBitmap * +HashBitmap * tbm_create(long maxbytes) { - TIDBitmap *tbm; + HashBitmap *tbm; long nbuckets; /* - * Create the TIDBitmap struct, with enough trailing space to serve the - * needs of the TBMIterateResult sub-struct. + * Create the HashBitmap struct. */ - tbm = (TIDBitmap *) palloc(sizeof(TIDBitmap) + - MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber)); + tbm = (HashBitmap *) palloc(sizeof(HashBitmap)); /* Zero all the fixed fields */ - MemSetAligned(tbm, 0, sizeof(TIDBitmap)); + MemSetAligned(tbm, 0, sizeof(HashBitmap)); - tbm->type = T_TIDBitmap; /* Set NodeTag */ + tbm->type = T_HashBitmap; /* Set NodeTag */ tbm->mcxt = CurrentMemoryContext; - tbm->status = TBM_EMPTY; + tbm->status = HASHBM_EMPTY; /* * Estimate number of hashtable entries we can have within maxbytes. This @@ -194,11 +153,11 @@ tbm_create(long maxbytes) * proposition, we don't do it until we have to. */ static void -tbm_create_pagetable(TIDBitmap *tbm) +tbm_create_pagetable(HashBitmap *tbm) { HASHCTL hash_ctl; - Assert(tbm->status != TBM_HASH); + Assert(tbm->status != HASHBM_HASH); Assert(tbm->pagetable == NULL); /* Create the hashtable proper */ @@ -207,13 +166,13 @@ tbm_create_pagetable(TIDBitmap *tbm) hash_ctl.entrysize = sizeof(PagetableEntry); hash_ctl.hash = tag_hash; hash_ctl.hcxt = tbm->mcxt; - tbm->pagetable = hash_create("TIDBitmap", + tbm->pagetable = hash_create("HashBitmap", 128, /* start small and extend */ &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); /* If entry1 is valid, push it into the hashtable */ - if (tbm->status == TBM_ONE_PAGE) + if (tbm->status == HASHBM_ONE_PAGE) { PagetableEntry *page; bool found; @@ -225,14 +184,14 @@ tbm_create_pagetable(TIDBitmap *tbm) memcpy(page, &tbm->entry1, sizeof(PagetableEntry)); } - tbm->status = TBM_HASH; + tbm->status = HASHBM_HASH; } /* - * tbm_free - free a TIDBitmap + * tbm_free - free a HashBitmap */ void -tbm_free(TIDBitmap *tbm) +tbm_free(HashBitmap *tbm) { if (tbm->pagetable) hash_destroy(tbm->pagetable); @@ -244,10 +203,10 @@ tbm_free(TIDBitmap *tbm) } /* - * tbm_add_tuples - add some tuple IDs to a TIDBitmap + * tbm_add_tuples - add some tuple IDs to a HashBitmap */ void -tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids) +tbm_add_tuples(HashBitmap *tbm, const ItemPointer tids, int ntids) { int i; @@ -280,7 +239,7 @@ tbm_add_tuples(TIDBitmap *tbm, const Ite wordnum = WORDNUM(off - 1); bitnum = BITNUM(off - 1); } - page->words[wordnum] |= ((bitmapword) 1 << bitnum); + page->words[wordnum] |= ((tbm_bitmapword) 1 << bitnum); if (tbm->nentries > tbm->maxentries) tbm_lossify(tbm); @@ -293,21 +252,21 @@ tbm_add_tuples(TIDBitmap *tbm, const Ite * a is modified in-place, b is not changed */ void -tbm_union(TIDBitmap *a, const TIDBitmap *b) +tbm_union(HashBitmap *a, const HashBitmap *b) { Assert(!a->iterating); /* Nothing to do if b is empty */ if (b->nentries == 0) return; /* Scan through chunks and pages in b, merge into a */ - if (b->status == TBM_ONE_PAGE) + if (b->status == HASHBM_ONE_PAGE) tbm_union_page(a, &b->entry1); else { HASH_SEQ_STATUS status; PagetableEntry *bpage; - Assert(b->status == TBM_HASH); + Assert(b->status == HASHBM_HASH); hash_seq_init(&status, b->pagetable); while ((bpage = (PagetableEntry *) hash_seq_search(&status)) != NULL) tbm_union_page(a, bpage); @@ -316,7 +275,7 @@ tbm_union(TIDBitmap *a, const TIDBitmap /* Process one page of b during a union op */ static void -tbm_union_page(TIDBitmap *a, const PagetableEntry *bpage) +tbm_union_page(HashBitmap *a, const PagetableEntry *bpage) { PagetableEntry *apage; int wordnum; @@ -326,13 +285,13 @@ tbm_union_page(TIDBitmap *a, const Paget /* Scan b's chunk, mark each indicated page lossy in a */ for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++) { - bitmapword w = bpage->words[wordnum]; + tbm_bitmapword w = bpage->words[wordnum]; if (w != 0) { BlockNumber pg; - pg = bpage->blockno + (wordnum * BITS_PER_BITMAPWORD); + pg = bpage->blockno + (wordnum * TBM_BITS_PER_BITMAPWORD); while (w != 0) { if (w & 1) @@ -354,7 +313,7 @@ tbm_union_page(TIDBitmap *a, const Paget if (apage->ischunk) { /* The page is a lossy chunk header, set bit for itself */ - apage->words[0] |= ((bitmapword) 1 << 0); + apage->words[0] |= ((tbm_bitmapword) 1 << 0); } else { @@ -374,14 +333,14 @@ tbm_union_page(TIDBitmap *a, const Paget * a is modified in-place, b is not changed */ void -tbm_intersect(TIDBitmap *a, const TIDBitmap *b) +tbm_intersect(HashBitmap *a, const HashBitmap *b) { Assert(!a->iterating); /* Nothing to do if a is empty */ if (a->nentries == 0) return; /* Scan through chunks and pages in a, try to match to b */ - if (a->status == TBM_ONE_PAGE) + if (a->status == HASHBM_ONE_PAGE) { if (tbm_intersect_page(a, &a->entry1, b)) { @@ -390,7 +349,7 @@ tbm_intersect(TIDBitmap *a, const TIDBit a->npages--; a->nentries--; Assert(a->nentries == 0); - a->status = TBM_EMPTY; + a->status = HASHBM_EMPTY; } } else @@ -398,7 +357,7 @@ tbm_intersect(TIDBitmap *a, const TIDBit HASH_SEQ_STATUS status; PagetableEntry *apage; - Assert(a->status == TBM_HASH); + Assert(a->status == HASHBM_HASH); hash_seq_init(&status, a->pagetable); while ((apage = (PagetableEntry *) hash_seq_search(&status)) != NULL) { @@ -425,7 +384,7 @@ tbm_intersect(TIDBitmap *a, const TIDBit * Returns TRUE if apage is now empty and should be deleted from a */ static bool -tbm_intersect_page(TIDBitmap *a, PagetableEntry *apage, const TIDBitmap *b) +tbm_intersect_page(HashBitmap *a, PagetableEntry *apage, const HashBitmap *b) { const PagetableEntry *bpage; int wordnum; @@ -437,15 +396,15 @@ tbm_intersect_page(TIDBitmap *a, Pagetab for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++) { - bitmapword w = apage->words[wordnum]; + tbm_bitmapword w = apage->words[wordnum]; if (w != 0) { - bitmapword neww = w; + tbm_bitmapword neww = w; BlockNumber pg; int bitnum; - pg = apage->blockno + (wordnum * BITS_PER_BITMAPWORD); + pg = apage->blockno + (wordnum * TBM_BITS_PER_BITMAPWORD); bitnum = 0; while (w != 0) { @@ -455,7 +414,7 @@ tbm_intersect_page(TIDBitmap *a, Pagetab tbm_find_pageentry(b, pg) == NULL) { /* Page is not in b at all, lose lossy bit */ - neww &= ~((bitmapword) 1 << bitnum); + neww &= ~((tbm_bitmapword) 1 << bitnum); } } pg++; @@ -511,23 +470,23 @@ tbm_intersect_page(TIDBitmap *a, Pagetab } /* - * tbm_is_empty - is a TIDBitmap completely empty? + * tbm_is_empty - is a HashBitmap completely empty? */ bool -tbm_is_empty(const TIDBitmap *tbm) +tbm_is_empty(const HashBitmap *tbm) { return (tbm->nentries == 0); } /* - * tbm_begin_iterate - prepare to iterate through a TIDBitmap + * tbm_begin_iterate - prepare to iterate through a HashBitmap * * NB: after this is called, it is no longer allowed to modify the contents * of the bitmap. However, you can call this multiple times to scan the * contents repeatedly. */ void -tbm_begin_iterate(TIDBitmap *tbm) +tbm_begin_iterate(HashBitmap *tbm) { HASH_SEQ_STATUS status; PagetableEntry *page; @@ -546,7 +505,7 @@ tbm_begin_iterate(TIDBitmap *tbm) /* * Nothing else to do if no entries, nor if we don't have a hashtable. */ - if (tbm->nentries == 0 || tbm->status != TBM_HASH) + if (tbm->nentries == 0 || tbm->status != HASHBM_HASH) return; /* @@ -579,22 +538,125 @@ tbm_begin_iterate(TIDBitmap *tbm) } /* - * tbm_iterate - scan through next page of a TIDBitmap + * tbm_iterate - scan through next page of a HashBitmap or a StreamBitmap. + */ +bool +tbm_iterate(Node *tbm, TBMIterateResult *output) +{ + Assert(IsA(tbm, HashBitmap) || IsA(tbm, StreamBitmap)); + + switch(tbm->type) + { + case T_HashBitmap: + { + HashBitmap *hashBitmap = (HashBitmap*)tbm; + if (!hashBitmap->iterating) + tbm_begin_iterate(hashBitmap); + + return tbm_iterate_hash(hashBitmap, output); + } + case T_StreamBitmap: + { + StreamBitmap *streamBitmap = (StreamBitmap*)tbm; + bool status; + StreamNode *s; + + s = (StreamNode *)streamBitmap->opaque; + + status = bitmap_stream_iterate((void *)s, &(streamBitmap->entry)); + + /* XXX: perhaps we should only do this if status == true ? */ + tbm_iterate_page(&(streamBitmap->entry), output); + + return status; + } + default: + elog(ERROR, "unrecoganized node type"); + } + + return false; +} + +/* + * tbm_iterate_page - get a TBMIterateResult from a given PagetableEntry. + */ +static bool +tbm_iterate_page(PagetableEntry *page, TBMIterateResult *output) +{ + int ntuples; + int wordnum; + + if(page->ischunk) + { + output->ntuples = -1; + return true; + } + + /* scan bitmap to extract individual offset numbers */ + ntuples = 0; + for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++) + { + tbm_bitmapword w = page->words[wordnum]; + + if (w != 0) + { + int off = wordnum * TBM_BITS_PER_BITMAPWORD + 1; + + while (w != 0) + { + if (w & 1) + output->offsets[ntuples++] = (OffsetNumber) off; + off++; + w >>= 1; + } + } + } + output->blockno = page->blockno; + output->ntuples = ntuples; + + return true; +} + +/* + * tbm_iterate_hash - scan through next page of a HashBitmap * - * Returns a TBMIterateResult representing one page, or NULL if there are + * Gets a TBMIterateResult representing one page, or NULL if there are * no more pages to scan. Pages are guaranteed to be delivered in numerical * order. If result->ntuples < 0, then the bitmap is "lossy" and failed to * remember the exact tuples to look at on this page --- the caller must * examine all tuples on the page and check if they meet the intended * condition. + * + * If 'output' is NULL, simple advance the HashBitmap by one. */ -TBMIterateResult * -tbm_iterate(TIDBitmap *tbm) +static bool +tbm_iterate_hash(HashBitmap *tbm, TBMIterateResult *output) { - TBMIterateResult *output = &(tbm->output); + PagetableEntry *e; + bool more; + + e = tbm_next_page(tbm, &more); + if(more && e) + { + tbm_iterate_page(e, output); + return true; + } + return false; +} +/* + * tbm_next_page - actually traverse the HashBitmap + * + * Store the next block of matches in nextpage. + */ + +static PagetableEntry * +tbm_next_page(HashBitmap *tbm, bool *more) +{ Assert(tbm->iterating); + *more = true; + /* * If lossy chunk pages remain, make sure we've advanced schunkptr/ * schunkbit to the next set bit. @@ -609,7 +671,7 @@ tbm_iterate(TIDBitmap *tbm) int wordnum = WORDNUM(schunkbit); int bitnum = BITNUM(schunkbit); - if ((chunk->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0) + if ((chunk->words[wordnum] & ((tbm_bitmapword) 1 << bitnum)) != 0) break; schunkbit++; } @@ -630,6 +692,7 @@ tbm_iterate(TIDBitmap *tbm) if (tbm->schunkptr < tbm->nchunks) { PagetableEntry *chunk = tbm->schunks[tbm->schunkptr]; + PagetableEntry *nextpage; BlockNumber chunk_blockno; chunk_blockno = chunk->blockno + tbm->schunkbit; @@ -637,51 +700,29 @@ tbm_iterate(TIDBitmap *tbm) chunk_blockno < tbm->spages[tbm->spageptr]->blockno) { /* Return a lossy page indicator from the chunk */ - output->blockno = chunk_blockno; - output->ntuples = -1; + nextpage = (PagetableEntry *)palloc(sizeof(PagetableEntry)); + nextpage->ischunk = true; + nextpage->blockno = chunk_blockno; tbm->schunkbit++; - return output; + return nextpage; } } if (tbm->spageptr < tbm->npages) { - PagetableEntry *page; - int ntuples; - int wordnum; - + PagetableEntry *e; /* In ONE_PAGE state, we don't allocate an spages[] array */ - if (tbm->status == TBM_ONE_PAGE) - page = &tbm->entry1; + if (tbm->status == HASHBM_ONE_PAGE) + e = &tbm->entry1; else - page = tbm->spages[tbm->spageptr]; - - /* scan bitmap to extract individual offset numbers */ - ntuples = 0; - for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++) - { - bitmapword w = page->words[wordnum]; - - if (w != 0) - { - int off = wordnum * BITS_PER_BITMAPWORD + 1; + e = tbm->spages[tbm->spageptr]; - while (w != 0) - { - if (w & 1) - output->offsets[ntuples++] = (OffsetNumber) off; - off++; - w >>= 1; - } - } - } - output->blockno = page->blockno; - output->ntuples = ntuples; tbm->spageptr++; - return output; + return e; } /* Nothing more in the bitmap */ + *more = false; return NULL; } @@ -691,14 +732,14 @@ tbm_iterate(TIDBitmap *tbm) * Returns NULL if there is no non-lossy entry for the pageno. */ static const PagetableEntry * -tbm_find_pageentry(const TIDBitmap *tbm, BlockNumber pageno) +tbm_find_pageentry(const HashBitmap *tbm, BlockNumber pageno) { const PagetableEntry *page; if (tbm->nentries == 0) /* in case pagetable doesn't exist */ return NULL; - if (tbm->status == TBM_ONE_PAGE) + if (tbm->status == HASHBM_ONE_PAGE) { page = &tbm->entry1; if (page->blockno != pageno) @@ -726,21 +767,21 @@ tbm_find_pageentry(const TIDBitmap *tbm, * up to the caller to call tbm_lossify() at the next safe point if so. */ static PagetableEntry * -tbm_get_pageentry(TIDBitmap *tbm, BlockNumber pageno) +tbm_get_pageentry(HashBitmap *tbm, BlockNumber pageno) { PagetableEntry *page; bool found; - if (tbm->status == TBM_EMPTY) + if (tbm->status == HASHBM_EMPTY) { /* Use the fixed slot */ page = &tbm->entry1; found = false; - tbm->status = TBM_ONE_PAGE; + tbm->status = HASHBM_ONE_PAGE; } else { - if (tbm->status == TBM_ONE_PAGE) + if (tbm->status == HASHBM_ONE_PAGE) { page = &tbm->entry1; if (page->blockno == pageno) @@ -772,7 +813,7 @@ tbm_get_pageentry(TIDBitmap *tbm, BlockN * tbm_page_is_lossy - is the page marked as lossily stored? */ static bool -tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno) +tbm_page_is_lossy(const HashBitmap *tbm, BlockNumber pageno) { PagetableEntry *page; BlockNumber chunk_pageno; @@ -781,7 +822,7 @@ tbm_page_is_lossy(const TIDBitmap *tbm, /* we can skip the lookup if there are no lossy chunks */ if (tbm->nchunks == 0) return false; - Assert(tbm->status == TBM_HASH); + Assert(tbm->status == HASHBM_HASH); bitno = pageno % PAGES_PER_CHUNK; chunk_pageno = pageno - bitno; @@ -793,7 +834,7 @@ tbm_page_is_lossy(const TIDBitmap *tbm, int wordnum = WORDNUM(bitno); int bitnum = BITNUM(bitno); - if ((page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0) + if ((page->words[wordnum] & ((tbm_bitmapword) 1 << bitnum)) != 0) return true; } return false; @@ -806,7 +847,7 @@ tbm_page_is_lossy(const TIDBitmap *tbm, * up to the caller to call tbm_lossify() at the next safe point if so. */ static void -tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno) +tbm_mark_page_lossy(HashBitmap *tbm, BlockNumber pageno) { PagetableEntry *page; bool found; @@ -816,7 +857,7 @@ tbm_mark_page_lossy(TIDBitmap *tbm, Bloc int bitnum; /* We force the bitmap into hashtable mode whenever it's lossy */ - if (tbm->status != TBM_HASH) + if (tbm->status != HASHBM_HASH) tbm_create_pagetable(tbm); bitno = pageno % PAGES_PER_CHUNK; @@ -860,7 +901,7 @@ tbm_mark_page_lossy(TIDBitmap *tbm, Bloc page->blockno = chunk_pageno; page->ischunk = true; /* we assume it had some tuple bit(s) set, so mark it lossy */ - page->words[0] = ((bitmapword) 1 << 0); + page->words[0] = ((tbm_bitmapword) 1 << 0); /* adjust counts */ tbm->nchunks++; tbm->npages--; @@ -869,14 +910,14 @@ tbm_mark_page_lossy(TIDBitmap *tbm, Bloc /* Now set the original target page's bit */ wordnum = WORDNUM(bitno); bitnum = BITNUM(bitno); - page->words[wordnum] |= ((bitmapword) 1 << bitnum); + page->words[wordnum] |= ((tbm_bitmapword) 1 << bitnum); } /* * tbm_lossify - lose some information to get back under the memory limit */ static void -tbm_lossify(TIDBitmap *tbm) +tbm_lossify(HashBitmap *tbm) { HASH_SEQ_STATUS status; PagetableEntry *page; @@ -888,7 +929,7 @@ tbm_lossify(TIDBitmap *tbm) * to lossify more than the minimum number of pages during each call. */ Assert(!tbm->iterating); - Assert(tbm->status == TBM_HASH); + Assert(tbm->status == HASHBM_HASH); hash_seq_init(&status, tbm->pagetable); while ((page = (PagetableEntry *) hash_seq_search(&status)) != NULL) @@ -932,3 +973,366 @@ tbm_comparator(const void *left, const v return 1; return 0; } + +/* + * functions related to streaming + */ + +static void * +make_opstream(StreamType kind, void *n1, void *n2) +{ + OpStream *op; + + Assert(kind == BMS_OR || kind == BMS_AND); + Assert(PointerIsValid(n1)); + + op = (OpStream *)palloc(sizeof(OpStream)); + op->needfree = true; + op->type = kind; + op->nextblock = 0; + op->input = NIL; + op->input = lappend(op->input, n1); + if(PointerIsValid(n2)) + op->input = lappend(op->input, n2); + + op->pull = bitmap_stream_iterate; + return (void *)op; +} + +/* + * stream_add_node() - add a new node to a bitmap stream + * node is a base node -- i.e., an index/external + * kind is one of BMS_INDEX, BMS_OR or BMS_AND + */ + +void +stream_add_node(StreamBitmap *sbm, void *node, StreamType kind) +{ + /* initialised */ + if(sbm->opaque) + { + StreamNode *n = (StreamNode *)sbm->opaque; + /* StreamNode is already an index, transform to OpStream */ + if((n->type == BMS_AND && kind == BMS_AND) || + (n->type == BMS_OR && kind == BMS_OR)) + { + OpStream *o = (OpStream *)n; + o->input = lappend(o->input, node); + } + else if((n->type == BMS_AND && kind != BMS_AND) || + (n->type == BMS_OR && kind == BMS_OR) || + (n->type == BMS_INDEX)) + { + sbm->opaque = make_opstream(kind, sbm->opaque, node); + } + else + elog(ERROR, "unknown stream type %i", (int)n->type); + } + else + { + if(kind == BMS_INDEX) + sbm->opaque = (void *)node; + else + sbm->opaque = make_opstream(kind, node, NULL); + } +} + +/* + * tbm_create_stream_node() - turn a HashBitmap into a stream + */ + +void * +tbm_create_stream_node(HashBitmap *tbm) +{ + IndexStream *is; + HashStreamOpaque *op; + + is = (IndexStream *)palloc(sizeof(IndexStream)); + op = (HashStreamOpaque *)palloc(sizeof(HashStreamOpaque)); + + is->type = BMS_INDEX; + is->nextblock = 0; + is->pull = tbm_stream_block; + is->free = tbm_stream_free; + + op->tbm = tbm; + op->entry = NULL; + + is->opaque = (void *)op; + + return (void *)is; +} + +/* + * tbm_stream_block() - Fetch the next block from HashBitmap stream + * + * Notice that the IndexStream passed in as opaque will tell us the + * desired block to stream. If the block requrested is greater than or equal + * to the block we've cached inside the HashStreamOpaque, return that. + */ + +static bool +tbm_stream_block(void *opaque, PagetableEntry *e) +{ + IndexStream *is = (IndexStream *)opaque; + HashStreamOpaque *op = (HashStreamOpaque *)is->opaque; + HashBitmap *tbm = op->tbm; + PagetableEntry *next = op->entry; + bool more; + + /* have we already got an entry? */ + if(next && is->nextblock <= next->blockno) + { + memcpy(e, next, sizeof(PagetableEntry)); + return true; + } + + if (!tbm->iterating) + tbm_begin_iterate(tbm); + + /* we need a new entry */ + op->entry = tbm_next_page(tbm, &more); + if(more) + { + Assert(op->entry); + memcpy(e, op->entry, sizeof(PagetableEntry)); + } + is->nextblock++; + return more; +} + +static void +tbm_stream_free(void *opaque) +{ + /* + * A reference to the plan is kept in the BitmapIndexScanState + * so this is a no-op for now. + */ +#ifdef NOT_USED + IndexStream *is = (IndexStream *)opaque; + HashStreamOpaque *op = (HashStreamOpaque *)is->opaque; + HashBitmap *tbm = op->tbm; + + tbm_free(tbm); + pfree(op); + pfree(is); +#endif +} +/* + * bitmap_stream_iterate() + * + * This is a generic iterator for bitmap streams. The function doesn't + * know anything about the streams it is actually iterating. + * + * Returns false when no more results can be obtained, otherwise true. + */ + +bool +bitmap_stream_iterate(void *opaque, PagetableEntry *e) +{ + StreamNode *n = (StreamNode *)opaque; + bool res = false; + + MemSet(e, 0, sizeof(PagetableEntry)); + + if(n->type == BMS_INDEX) + { + IndexStream *is = (IndexStream *)n; + res = is->pull((void *)is, e); + } + else if(n->type == BMS_OR || n->type == BMS_AND) + { + /* + * There are two ways we can do this: either, we could maintain our + * own top level BatchWords structure and pull blocks out of that OR + * we could maintain batch words for each sub map and union/intersect + * those together to get the resulting page entries. + * + * Now, BatchWords are specific to bitmap indexes so we'd have to + * translate HashBitmaps. All the infrastructure is available to + * translate bitmap indexes into the HashBitmap mechanism so + * we'll do that for now. + */ + ListCell *map; + OpStream *op = (OpStream *)n; + BlockNumber minblockno; + ListCell *cell; + int wordnum; + List *matches; + bool empty; + + + /* + * First, iterate through each input bitmap stream and save the + * block which is returned. HashBitmaps are designed such that + * they do not return blocks with no matches -- that is, say a + * HashBitmap has matches for block 1, 4 and 5 it store matches + * only for those blocks. Therefore, we may have one stream return + * a match for block 10, another for block 15 and another yet for + * block 10 again. In this case, we cannot include block 15 in + * the union/intersection because it represents matches on some + * page later in the scan. We'll get around to it in good time. + * + * In this case, if we're doing a union, we perform the operation + * without reference to block 15. If we're performing an intersection + * we cannot perform it on block 10 because we didn't get any + * matches for block 10 for one of the streams: the intersection + * with fail. So, we set the desired block (op->nextblock) to + * block 15 and loop around to the `restart' label. + */ +restart: + e->blockno = InvalidBlockNumber; + empty = false; + matches = NIL; + minblockno = InvalidBlockNumber; + Assert(PointerIsValid(op->input)); + foreach(map, op->input) + { + StreamNode *in = (StreamNode *) lfirst(map); + PagetableEntry *new; + bool r; + + new = (PagetableEntry *)palloc(sizeof(PagetableEntry)); + + /* set the desired block */ + in->nextblock = op->nextblock; + r = in->pull((void *)in, new); + + /* + * Let to caller know we got a result from some input + * bitmap. This doesn't hold true if we're doing an + * intersection, and that is handled below + */ + res = res || r; + + /* only include a match if the pull function tells us to */ + if(r) + { + if(minblockno == InvalidBlockNumber) + minblockno = new->blockno; + else if(n->type == BMS_OR) + minblockno = Min(minblockno, new->blockno); + else + minblockno = Max(minblockno, new->blockno); + matches = lappend(matches, (void *)new); + } + else + { + if(n->type == BMS_AND) + { + /* + * No more results for this stream and since + * we're doing an intersection we wont get any + * valid results from now on, so tell our caller that + */ + op->nextblock = minblockno + 1; /* seems safe */ + return false; + } + else if(n->type == BMS_OR) + continue; + } + } + + /* + * Now we iterate through the actual matches and perform the + * desired operation on those from the same minimum block + */ + foreach(cell, matches) + { + PagetableEntry *tmp = (PagetableEntry *)lfirst(cell); + if(tmp->blockno == minblockno) + { + if(e->blockno == InvalidBlockNumber) + { + memcpy(e, tmp, sizeof(PagetableEntry)); + continue; + } + + /* already initialised, so OR together */ + if(tmp->ischunk == true) + { + /* + * Okay, new entry is lossy so match our + * output as lossy + */ + e->ischunk = true; + /* XXX: we can just return now... I think :) */ + op->nextblock = minblockno + 1; + list_free_deep(matches); + return res; + } + /* union/intersect existing output and new matches */ + for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++) + { + if(n->type == BMS_OR) + e->words[wordnum] |= tmp->words[wordnum]; + else + e->words[wordnum] &= tmp->words[wordnum]; + } + } + else if(n->type == BMS_AND) + { + /* + * One of our input maps didn't return a block for the + * desired block number so, we loop around again. + * + * Notice that we don't set the next block as minblockno + * + 1. We don't know if the other streams will find a + * match for minblockno, so we cannot skip past it yet. + */ + + op->nextblock = minblockno; + empty = true; + break; + } + } + if(empty) + { + /* start again */ + empty = false; + MemSet(e->words, 0, sizeof(tbm_bitmapword) * WORDS_PER_PAGE); + list_free_deep(matches); + goto restart; + } + else + list_free_deep(matches); + if(res) + op->nextblock = minblockno + 1; + } + return res; +} + +/* + * bitmap_stream_free() - free a StreamNode structure + * + * May be just a StreamIndex or could be an OpStream, requiring + * recursive behaviour. + */ + +void +bitmap_stream_free(void *opaque) +{ + StreamNode *s = (StreamNode *)opaque; + + if(s->type == BMS_INDEX) + { + s->free(opaque); + } + else if(s->type == BMS_OR || s->type == BMS_AND) + { + OpStream *op = (OpStream *)s; + ListCell *tmp; + + if(op->needfree) + { + /* iterate through input nodes, freeing each of them */ + foreach(tmp, op->input) + { + StreamNode *n = lfirst(tmp); + n->free((void *)n); + } + list_free(op->input); + op->needfree = false; + } + } +} diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/backend/utils/adt/selfuncs.c bitmap/src/backend/utils/adt/selfuncs.c --- pgsql-head/src/backend/utils/adt/selfuncs.c 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/backend/utils/adt/selfuncs.c 2006-11-10 10:27:11.000000000 +1100 @@ -5073,3 +5073,22 @@ gincostestimate(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +Datum +bmcostestimate(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); + List *indexQuals = (List *) PG_GETARG_POINTER(2); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); + double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + + genericcostestimate(root, index, indexQuals, outer_rel, 0.0, + indexStartupCost, indexTotalCost, + indexSelectivity, indexCorrelation); + + PG_RETURN_VOID(); +} diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/access/bitmap.h bitmap/src/include/access/bitmap.h --- pgsql-head/src/include/access/bitmap.h 1970-01-01 10:00:00.000000000 +1000 +++ bitmap/src/include/access/bitmap.h 2006-12-04 23:30:55.000000000 +1100 @@ -0,0 +1,592 @@ +/*------------------------------------------------------------------------- + * + * bitmap.h + * header file for on-disk bitmap index access method implementation. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + +#ifndef BITMAP_H +#define BITMAP_H + +#include "access/htup.h" +#include "access/itup.h" +#include "access/relscan.h" +#include "access/sdir.h" +#include "access/xlogutils.h" +#include "nodes/tidbitmap.h" +#include "storage/lock.h" +#include "miscadmin.h" + +#define BM_READ BUFFER_LOCK_SHARE +#define BM_WRITE BUFFER_LOCK_EXCLUSIVE +#define BM_NOLOCK (-1) + +/* the size in bits of a hybrid run-length(HRL) word */ +#define BM_HRL_WORD_SIZE 16 + +/* the type for a HRL word */ +typedef uint16 BM_HRL_WORD; + +#define BM_HRL_WORD_LEFTMOST (BM_HRL_WORD_SIZE-1) + +/* + * Metapage, always the first page (page 0) in the index. + * + * This page stores some meta-data information about this index. + */ +typedef struct BMMetaPageData +{ + /* number of indexed tuples in this index */ + uint64 bm_num_tuples; + + /* + * The relation ids for a heap and a btree on this heap. They are + * used to speed up finding the bitmap vector for given attribute + * value(s), see the comments for LOV pages below for more + * information. We consider these as the metadata for LOV pages. + */ + Oid bm_lov_heapId; /* the relation id for the heap */ + Oid bm_lov_indexId; /* the relation id for the index */ + + /* the block number for the last LOV pages. */ + BlockNumber bm_lov_lastpage; + + /* + * Indicates if the bitmap index needs to be re-built during + * vacuuming. + * + * The vacuum command will check this value to determine if + * this bitmap index needs to be re-built. This value is set to + * true if there is an update to a bit that is in the middle + * of its bitmap vector. + */ + bool bm_need_rebuilt; + +} BMMetaPageData; + +typedef BMMetaPageData *BMMetaPage; + +/* + * The meta page is always the first block of the index + */ + +#define BM_METAPAGE 0 + +/* + * The maximum number of heap tuples in one page that is considered + * in the bitmap index. We set this number to be a multiplication + * of BM_HRL_WORD_SIZE because we can then bits for heap + * tuples in different heap pages are stored in different words. + * This makes it easier during the search. + */ +#define BM_MAX_HTUP_PER_PAGE \ + ((((MaxHeapTuplesPerPage - 1) / BM_HRL_WORD_SIZE) + 1) * \ + BM_HRL_WORD_SIZE) + +/* + * LOV (List Of Values) page -- pages to store a list of distinct + * values for attribute(s) to be indexed, some metadata related to + * their corresponding bitmap vectors, and the pointers to their + * bitmap vectors. For each distinct value, there is a BMLOVItemData + * associated with it. A LOV page maintains an array of BMLOVItemData + * instances, called lov items. + * + * To speed up finding the lov item for a given value, we + * create a heap to maintain all distinct values along with the + * block numbers and offset numbers for their lov items in LOV pages. + * That is, there are total " + 2" attributes + * in this new heap. Along with this heap, we also create a new btree + * index on this heap using attribute(s) as btree keys. In this way, + * for any given value, we search this btree to find + * the block number and offset number for its corresponding lov item. + */ + +/* + * The first LOV page is reserved for NULL keys + */ +#define BM_LOV_STARTPAGE 1 + +/* + * Items in a LOV page. + * + * Each item is corresponding to a distinct value for attribute(s) + * to be indexed. For multi-column indexes on (a_1,a_2,...,a_n), we say + * two values (l_1,l_2,...,l_n) and (k_1,k_2,...,k_n) for (a_1,a_2,...,a_n) + * are the same if and only if for all i, l_i=k_i. + * + */ +typedef struct BMLOVItemData +{ + /* the first page and last page of the bitmap vector. */ + BlockNumber bm_lov_head; + BlockNumber bm_lov_tail; + + /* + * Additional information to be used to append new bits into + * existing bitmap vector that this distinct value is associated with. + * The following two words do not store in the regular bitmap page, + * defined below. + */ + + /* the last complete word in its bitmap vector. */ + BM_HRL_WORD bm_last_compword; + + /* + * the last word in its bitmap vector. This word is not + * a complete word. If a new appending bit makes this word + * to be complete, this word will merge with bm_last_compword. + */ + BM_HRL_WORD bm_last_word; + + /* + * the tid location for the last bit stored in bm_last_compword. + * A tid location represents the index position for a bit in a + * bitmap vector, which is conceptualized as an array + * of bits. This value -- the index position starts from 1, and + * is calculated through (block#)*BM_MAX_HTUP_PER_PAGE + (offset#), + * where (block#) and (offset#) are from the heap tuple ctid. + * This value is used while updating a bit in the middle of + * its bitmap vector. When moving the last complete word to + * the bitmap page, this value will also be written to that page. + * Each bitmap page maintains a similar value -- the tid location + * for the last bit stored in that page. This will help us + * know the range of tid locations for bits in a bitmap page + * without decompressing all bits. + */ + uint64 bm_last_tid_location; + + /* + * the tid location of the last bit whose value is 1 (a set bit). + * Each bitmap vector will be visited only when there is a new + * set bit to be appended/updated. In the appending case, a new + * tid location is presented. With this value, we can calculate + * how many bits are 0s between this new set bit and the previous + * set bit. + */ + uint64 bm_last_setbit; + + /* + * Only two least-significant bits in this byte is used. + * + * If the first least-significant bit is 1, then it represents + * that bm_last_word is a fill word. If the second least-significant + * bit is 1, it represents that bm_last_compword is a fill word. + */ + uint8 bm_last_two_headerbits; + +} BMLOVItemData; + +typedef BMLOVItemData *BMLOVItem; + +#define BM_MAX_LOVITEMS_PER_PAGE \ + ((BLCKSZ-sizeof(PageHeaderData))/sizeof(BMLOVItemData)) + +/* + * Bitmap page -- pages to store bits in a bitmap vector. + * + * Each bitmap page stores two parts of information: header words and + * content words. Each bit in the header words is corresponding to + * a word in the content words. If a bit in the header words is 1, + * then its corresponding content word is a compressed word. Otherwise, + * it is a literal word. + * + * If a content word is a fill word, it means that there is a sequence + * of 0 bits or 1 bits. The most significant bit in this content word + * represents the bits in this sequence are 0s or 1s. The rest of bits + * stores the value of "the number of bits / BM_HRL_WORD_SIZE". + */ + +/* + * Opaque data for a bitmap page. + */ +typedef struct BMBitmapOpaqueData +{ + uint32 bm_hrl_words_used; /* the number of words used */ + BlockNumber bm_bitmap_next; /* the next page for this bitmap */ + + /* + * the tid location for the last bit in this page. + */ + uint64 bm_last_tid_location; +} BMBitmapOpaqueData; +typedef BMBitmapOpaqueData *BMBitmapOpaque; + +#define BM_MAX_NUM_OF_HRL_WORDS_PER_PAGE \ + ((BLCKSZ - \ + MAXALIGN(sizeof(PageHeaderData)) - \ + MAXALIGN(sizeof(BMBitmapOpaqueData)))/sizeof(BM_HRL_WORD)) + +#define BM_MAX_NUM_OF_HEADER_WORDS \ + (((BM_MAX_NUM_OF_HRL_WORDS_PER_PAGE-1)/BM_HRL_WORD_SIZE) + 1) + +/* + * To make the last header word a complete word, we limit this number to + * the multiplication of the word size. + */ +#define BM_NUM_OF_HRL_WORDS_PER_PAGE \ + (((BM_MAX_NUM_OF_HRL_WORDS_PER_PAGE - \ + BM_MAX_NUM_OF_HEADER_WORDS)/BM_HRL_WORD_SIZE) * BM_HRL_WORD_SIZE) + +#define BM_NUM_OF_HEADER_WORDS \ + (((BM_NUM_OF_HRL_WORDS_PER_PAGE-1)/BM_HRL_WORD_SIZE) + 1) + +/* + * A page of a compressed bitmap + */ +typedef struct BMBitmapData +{ + BM_HRL_WORD hwords[BM_NUM_OF_HEADER_WORDS]; + BM_HRL_WORD cwords[BM_NUM_OF_HRL_WORDS_PER_PAGE]; +} BMBitmapData; +typedef BMBitmapData *BMBitmap; + +/* + * Data structure for used to buffer index creation during bmbuild(). + * Buffering provides three benefits: firstly, it makes for many fewer + * calls to the lower-level bitmap insert functions; secondly, it means that + * we reduce the amount of unnecessary compression and decompression we do; + * thirdly, in some cases pages for a given bitmap vector will be contiguous + * on disk. + * + * byte_size counts how many bytes we've consumed in the buffer. + * max_lov_block is a hint as to whether we'll find a LOV block in lov_blocks + * or not (we take advantage of the fact that LOV block numbers will be + * increasing). + * lov_blocks is a list of LOV block buffers. The structures put in + * this list are defined in bitmapinsert.c. + */ + +typedef struct BMTidLocsBuffer +{ + uint32 byte_size; /* The size in bytes of the buffer's data */ + BlockNumber max_lov_block; /* highest lov block we're seen */ + List *lov_blocks; /* list of lov blocks we're buffering */ +} BMTidLocsBuffer; + + +/* + * The number of tid locations to be found at once during query processing. + */ +#define BM_BATCH_TIDS 16*1024 + +/* + * the maximum number of words to be retrieved during BitmapIndexScan. + */ +#define BM_MAX_WORDS BM_NUM_OF_HRL_WORDS_PER_PAGE*4 + +/* Some macros for manipulating a bitmap word. */ +#define LITERAL_ALL_ZERO 0 +#define LITERAL_ALL_ONE ((BM_HRL_WORD)(~((BM_HRL_WORD)0))) + +#define FILL_MASK ~(((BM_HRL_WORD)1) << (BM_HRL_WORD_SIZE - 1)) + +#define BM_MAKE_FILL_WORD(bit, length) \ + ((((BM_HRL_WORD)bit) << (BM_HRL_WORD_SIZE-1)) | (length)) + +#define FILL_LENGTH(w) (((BM_HRL_WORD)(w)) & FILL_MASK) + +#define MAX_FILL_LENGTH ((((BM_HRL_WORD)1)<<(BM_HRL_WORD_SIZE-1))-1) + +/* get the left most bit of the word */ +#define GET_FILL_BIT(w) (((BM_HRL_WORD)(w))>>BM_HRL_WORD_LEFTMOST) + +/* + * Given a word number, determine the bit position it that holds in its + * header word. + */ +#define WORDNO_GET_HEADER_BIT(cw_no) \ + ((BM_HRL_WORD)1 << (BM_HRL_WORD_SIZE - 1 - ((cw_no) % BM_HRL_WORD_SIZE))) + +/* + * To see if the content word at cw_no is a compressed word or not we must look + * look in the header words h_words. Each bit in the header words corresponds + * to a word amongst the content words. If the bit is 1, the word is compressed + * (i.e., it is a fill word) otherwise it is uncompressed. + * + * See src/backend/access/bitmap/README for more details + */ + +#define IS_FILL_WORD(h_words, cw_no) \ + (((h_words)[(cw_no)/BM_HRL_WORD_SIZE]) & WORDNO_GET_HEADER_BIT(cw_no)) + +/* A simplified interface to IS_FILL_WORD */ + +#define CUR_WORD_IS_FILL(b) \ + IS_FILL_WORD(b->hwords, b->startNo) + +/* + * Calculate the number of header words we need given the number of + * content words + */ +#define BM_CALC_H_WORDS(c_words) (((c_words - 1)/BM_HRL_WORD_SIZE) + 1) + +/* + * Convert an ItemPointer to and from an integer representation + */ + +#define BM_IPTR_TO_INT(iptr) \ + ((uint64)ItemPointerGetBlockNumber(iptr) * BM_MAX_HTUP_PER_PAGE + \ + (uint64)ItemPointerGetOffsetNumber(iptr)) + +#define BM_INT_GET_BLOCKNO(i) \ + ((i - 1)/BM_MAX_HTUP_PER_PAGE) + +#define BM_INT_GET_OFFSET(i) \ + (((i - 1) % BM_MAX_HTUP_PER_PAGE) + 1) + +typedef struct BMBuildLovData +{ + BlockNumber lov_block; + OffsetNumber lov_off; +} BMBuildLovData; + + +/* + * the state for index build + */ +typedef struct BMBuildState +{ + Buffer bm_metabuf; + TupleDesc bm_tupDesc; + Relation bm_lov_heap; + Relation bm_lov_index; + ScanKey bm_lov_scanKeys; + IndexScanDesc bm_lov_scanDesc; + /* We use this hash to cache lookups of lov blocks for different keys */ + HTAB *lovitem_hash; + + /* + * the buffer to store last several tid locations for each distinct + * value. + */ + BMTidLocsBuffer *bm_tidLocsBuffer; + + double ituples; /* the number of index tuples */ + bool use_wal; /* whether or not we write WAL records */ +} BMBuildState; + +/* + * Define an iteration result while scanning an BMBatchWords. + * + * This result includes the last scan position in an BMBatchWords, + * and all tids that are generated from previous scan. + */ +typedef struct BMIterateResult +{ + uint64 nextTid; /* the first tid for the next iteration */ + uint32 lastScanPos; /* position in the bitmap word we're looking at */ + uint32 lastScanWordNo; /* offset in BWBatchWords */ + uint64 nextTids[BM_BATCH_TIDS]; /* array of matching TIDs */ + uint32 numOfTids; /* number of TIDs matched */ + uint32 nextTidLoc; /* the next position in 'nextTids' to be read. */ +} BMIterateResult; + +/* + * Stores a batch of consecutive bitmap words from a bitmap vector. + * + * These bitmap words come from a bitmap vector stored in this bitmap + * index, or a bitmap vector that is generated by ANDing/ORing several + * bitmap vectors. + * + * This struct also contains information to compute the tid locations + * for the set bits in these bitmap words. + */ +typedef struct BMBatchWords +{ + uint32 maxNumOfWords; /* maximum number of words in this list */ + + /* Number of uncompressed words that have been read already */ + uint32 nwordsread; + uint32 nextread; /* next word to read */ + uint64 firstTid; /* the TID we're up to */ + uint32 startNo; /* position we're at in cwords */ + uint32 nwords; /* the number of bitmap words */ + BM_HRL_WORD *hwords; /* the header words */ + BM_HRL_WORD *cwords; /* the actual bitmap words */ +} BMBatchWords; + +/* + * Scan opaque data for one bitmap vector. + * + * This structure stores a batch of consecutive bitmap words for a + * bitmap vector that have been read from the disk, and remembers + * the next reading position for the next batch of consecutive + * bitmap words. + */ +typedef struct BMVectorData +{ + Buffer bm_lovBuffer;/* the buffer that contains the LOV item. */ + OffsetNumber bm_lovOffset; /* the offset of the LOV item */ + BlockNumber bm_nextBlockNo; /* the next bitmap page block */ + + /* indicate if the last two words in the bitmap has been read. + * These two words are stored inside a BMLovItem. If this value + * is true, it means this bitmap vector has no more words. + */ + bool bm_readLastWords; + BMBatchWords *bm_batchWords; /* actual bitmap words */ + +} BMVectorData; +typedef BMVectorData *BMVector; + +/* + * Defines the current position of a scan. + * + * For each scan, all related bitmap vectors are read from the bitmap + * index, and ORed together into a final bitmap vector. The words + * in each bitmap vector are read in batches. This structure stores + * the following: + * (1) words for a final bitmap vector after ORing words from + * related bitmap vectors. + * (2) tid locations that satisfy the query. + * (3) One BMVectorData for each related bitmap vector. + */ +typedef struct BMScanPositionData +{ + bool done; /* indicate if this scan is over */ + int nvec; /* the number of related bitmap vectors */ + /* the words in the final bitmap vector that satisfies the query. */ + BMBatchWords *bm_batchWords; + + /* + * The BMIterateResult instance that contains the final + * tid locations for tuples that satisfy the query. + */ + BMIterateResult bm_result; + BMVector posvecs; /* one or more bitmap vectors */ +} BMScanPositionData; + +typedef BMScanPositionData *BMScanPosition; + +typedef struct BMScanOpaqueData +{ + BMScanPosition bm_currPos; + /* XXX: we can get rid of markpos */ + BMScanPosition bm_markPos; +} BMScanOpaqueData; + +typedef BMScanOpaqueData *BMScanOpaque; + +/* + * XLOG records for bitmap index operations + * + * Some information in high 4 bits of log record xl_info field. + */ +#define XLOG_BITMAP_INSERT_NEWMETA 0x00 /* add a new metapage */ +#define XLOG_BITMAP_INSERT_NEWLOV 0x10 /* add a new LOV page */ +#define XLOG_BITMAP_INSERT_LOVITEM 0x20 /* add a new entry into a LOV page */ +#define XLOG_BITMAP_INSERT_META 0x30 /* update the metapage */ +#define XLOG_BITMAP_INSERT_NEWBITMAP 0x40 /* add a new bitmap page */ +#define XLOG_BITMAP_INSERT_BITMAP 0x50 /* add a new set bit */ +#define XLOG_BITMAP_INSERT_BITMAP_LASTWORDS 0x60 /* update the last 2 words + in a bitmap */ + +/* public routines */ +extern Datum bmbuild(PG_FUNCTION_ARGS); +extern Datum bminsert(PG_FUNCTION_ARGS); +extern Datum bmbeginscan(PG_FUNCTION_ARGS); +extern Datum bmgettuple(PG_FUNCTION_ARGS); +extern Datum bmgetbitmap(PG_FUNCTION_ARGS); +extern Datum bmrescan(PG_FUNCTION_ARGS); +extern Datum bmendscan(PG_FUNCTION_ARGS); +extern Datum bmmarkpos(PG_FUNCTION_ARGS); +extern Datum bmrestrpos(PG_FUNCTION_ARGS); +extern Datum bmbulkdelete(PG_FUNCTION_ARGS); +extern Datum bmvacuumcleanup(PG_FUNCTION_ARGS); +extern Datum bmoptions(PG_FUNCTION_ARGS); + +/* bitmappages.c */ +extern Buffer _bitmap_getbuf(Relation rel, BlockNumber blkno, int access); +extern void _bitmap_wrtbuf(Buffer buf); +extern void _bitmap_relbuf(Buffer buf); +extern void _bitmap_wrtnorelbuf(Buffer buf); +extern void _bitmap_init_lovpage(Relation rel, Buffer buf); +extern void _bitmap_init_bitmappage(Relation rel, Buffer buf); +extern void _bitmap_init_buildstate(Relation index, BMBuildState* bmstate); +extern void _bitmap_cleanup_buildstate(Relation index, BMBuildState* bmstate); +extern void _bitmap_init(Relation rel, bool use_wal); + +/* bitmapinsert.c */ +extern void _bitmap_buildinsert(Relation rel, ItemPointerData ht_ctid, + Datum *attdata, bool *nulls, + BMBuildState *state); +extern void _bitmap_doinsert(Relation rel, ItemPointerData ht_ctid, + Datum *attdata, bool *nulls); +extern void _bitmap_write_alltids(Relation rel, Buffer metabuf, + BMTidLocsBuffer *tidLocsBuffer, + bool use_wal); + +/* bitmaputil.c */ +extern BMLOVItem _bitmap_formitem(uint64 currTidNumber); +extern void _bitmap_init_batchwords(BMBatchWords* words, + uint32 maxNumOfWords, + MemoryContext mcxt); +extern void _bitmap_copy_batchwords(BMBatchWords *words, BMBatchWords *copyWords); +extern void _bitmap_reset_batchwords(BMBatchWords* words); +extern void _bitmap_cleanup_batchwords(BMBatchWords* words); +extern void _bitmap_cleanup_scanpos(BMVector bmScanPos, + uint32 numBitmapVectors); +extern uint64 _bitmap_findnexttid(BMBatchWords *words, + BMIterateResult *result); +extern void _bitmap_findprevtid(BMIterateResult *result); +extern void _bitmap_findnexttids(BMBatchWords *words, + BMIterateResult *result, uint32 maxTids); +extern bool _bitmap_getbitmapinpage(BMBatchWords* words, + BMIterateResult* result, + BlockNumber nextBlockNo, + PagetableEntry* entry); +#ifdef NOT_USED /* we might use this later */ +extern void _bitmap_intersect(BMBatchWords **batches, uint32 numBatches, + BMBatchWords *result); +#endif +extern void _bitmap_union(BMBatchWords **batches, uint32 numBatches, + BMBatchWords *result); +extern void _bitmap_begin_iterate(BMBatchWords *words, BMIterateResult *result); + +/* bitmapsearch.c */ +extern bool _bitmap_first(IndexScanDesc scan, ScanDirection dir); +extern bool _bitmap_next(IndexScanDesc scan, ScanDirection dir); +extern bool _bitmap_firstbatchwords(IndexScanDesc scan, ScanDirection dir); +extern bool _bitmap_nextbatchwords(IndexScanDesc scan, ScanDirection dir); +extern void _bitmap_findbitmaps(IndexScanDesc scan, ScanDirection dir); + + + +/* bitmapattutil.c */ +extern void _bitmap_create_lov_heapandindex(Relation rel, Oid *heapId, + Oid *indexId); +extern void _bitmap_open_lov_heapandindex(Relation rel, BMMetaPage metapage, + Relation *lovHeapP, Relation *lovIndexP, + LOCKMODE lockMode); +extern void _bitmap_insert_lov(Relation lovHeap, Relation lovIndex, + Datum *datum, bool* nulls); +extern void _bitmap_close_lov_heapandindex(Relation lovHeap, + Relation lovIndex, LOCKMODE lockMode); +extern bool _bitmap_findvalue(Relation lovHeap, Relation lovIndex, + ScanKey scanKey, IndexScanDesc scanDesc, + BlockNumber *lovBlock, bool *blockNull, + OffsetNumber *lovOffset, bool *offsetNull); + +/* + * prototypes for functions in bitmapxlog.c + */ +extern void bitmap_redo(XLogRecPtr lsn, XLogRecord *record); +extern void bitmap_undo(XLogRecPtr lsn, XLogRecord *record); +extern void bitmap_desc(StringInfo buf, uint8 xl_info, char *rec); +extern void _bitmap_log_newpage(Relation rel, uint8 info, Buffer buf); +extern void _bitmap_log_metapage(Relation rel, Page page); +extern void _bitmap_log_bitmappage(Relation rel, Buffer bitmapBuffer, + bool isOpaque, uint32 numWords); +extern void _bitmap_log_bitmap_lastwords(Relation rel, Buffer lovBuffer, + OffsetNumber lovOffset, BMLOVItem lovItem); +extern void _bitmap_log_lovitem(Relation rel, Buffer lovBuffer, + bool isNewItem, OffsetNumber offset, + BMLOVItem lovItem); + +#endif diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/access/genam.h bitmap/src/include/access/genam.h --- pgsql-head/src/include/access/genam.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/access/genam.h 2006-11-11 05:51:15.000000000 +1100 @@ -98,7 +98,7 @@ extern IndexScanDesc index_beginscan(Rel Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key); -extern IndexScanDesc index_beginscan_multi(Relation indexRelation, +extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key); extern void index_rescan(IndexScanDesc scan, ScanKey key); @@ -108,9 +108,7 @@ extern void index_restrpos(IndexScanDesc extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction); extern bool index_getnext_indexitem(IndexScanDesc scan, ScanDirection direction); -extern bool index_getmulti(IndexScanDesc scan, - ItemPointer tids, int32 max_tids, - int32 *returned_tids); +extern Node *index_getbitmap(IndexScanDesc scan, Node *bitmap); extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/access/gin.h bitmap/src/include/access/gin.h --- pgsql-head/src/include/access/gin.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/access/gin.h 2006-11-10 10:25:15.000000000 +1100 @@ -420,7 +420,7 @@ extern DLLIMPORT int GinFuzzySearchLimit #define ItemPointerSetMin(p) ItemPointerSet( (p), (BlockNumber)0, (OffsetNumber)0) #define ItemPointerIsMin(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0 && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 ) -extern Datum gingetmulti(PG_FUNCTION_ARGS); +extern Datum gingetbitmap(PG_FUNCTION_ARGS); extern Datum gingettuple(PG_FUNCTION_ARGS); /* ginvacuum.c */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/access/gist_private.h bitmap/src/include/access/gist_private.h --- pgsql-head/src/include/access/gist_private.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/access/gist_private.h 2006-11-10 09:31:36.000000000 +1100 @@ -269,7 +269,7 @@ extern XLogRecPtr gistxlogInsertCompleti /* gistget.c */ extern Datum gistgettuple(PG_FUNCTION_ARGS); -extern Datum gistgetmulti(PG_FUNCTION_ARGS); +extern Datum gistgetbitmap(PG_FUNCTION_ARGS); /* gistutil.c */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/access/hash.h bitmap/src/include/access/hash.h --- pgsql-head/src/include/access/hash.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/access/hash.h 2006-11-10 09:31:50.000000000 +1100 @@ -231,7 +231,7 @@ extern Datum hashbuild(PG_FUNCTION_ARGS) extern Datum hashinsert(PG_FUNCTION_ARGS); extern Datum hashbeginscan(PG_FUNCTION_ARGS); extern Datum hashgettuple(PG_FUNCTION_ARGS); -extern Datum hashgetmulti(PG_FUNCTION_ARGS); +extern Datum hashgetbitmap(PG_FUNCTION_ARGS); extern Datum hashrescan(PG_FUNCTION_ARGS); extern Datum hashendscan(PG_FUNCTION_ARGS); extern Datum hashmarkpos(PG_FUNCTION_ARGS); diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/access/nbtree.h bitmap/src/include/access/nbtree.h --- pgsql-head/src/include/access/nbtree.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/access/nbtree.h 2006-11-10 09:36:11.000000000 +1100 @@ -471,7 +471,7 @@ extern Datum btbuild(PG_FUNCTION_ARGS); extern Datum btinsert(PG_FUNCTION_ARGS); extern Datum btbeginscan(PG_FUNCTION_ARGS); extern Datum btgettuple(PG_FUNCTION_ARGS); -extern Datum btgetmulti(PG_FUNCTION_ARGS); +extern Datum btgetbitmap(PG_FUNCTION_ARGS); extern Datum btrescan(PG_FUNCTION_ARGS); extern Datum btendscan(PG_FUNCTION_ARGS); extern Datum btmarkpos(PG_FUNCTION_ARGS); diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/access/relscan.h bitmap/src/include/access/relscan.h --- pgsql-head/src/include/access/relscan.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/access/relscan.h 2006-11-10 09:32:25.000000000 +1100 @@ -61,7 +61,7 @@ typedef struct IndexScanDescData Snapshot xs_snapshot; /* snapshot to see */ int numberOfKeys; /* number of scan keys */ ScanKey keyData; /* array of scan key descriptors */ - bool is_multiscan; /* TRUE = using amgetmulti */ + bool is_bitmapscan; /* TRUE = using amgetbitmap */ /* signaling to index AM about killing index tuples */ bool kill_prior_tuple; /* last-returned tuple is dead */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/access/rmgr.h bitmap/src/include/access/rmgr.h --- pgsql-head/src/include/access/rmgr.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/access/rmgr.h 2006-11-10 10:01:41.000000000 +1100 @@ -29,7 +29,8 @@ typedef uint8 RmgrId; #define RM_HASH_ID 12 #define RM_GIN_ID 13 #define RM_GIST_ID 14 -#define RM_SEQ_ID 15 +#define RM_BITMAP_ID 15 +#define RM_SEQ_ID 16 #define RM_MAX_ID RM_SEQ_ID #endif /* RMGR_H */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/catalog/pg_am.h bitmap/src/include/catalog/pg_am.h --- pgsql-head/src/include/catalog/pg_am.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/catalog/pg_am.h 2006-12-04 19:44:35.000000000 +1100 @@ -52,10 +52,12 @@ CATALOG(pg_am,2601) bool amindexnulls; /* does AM support NULL index entries? */ bool amstorage; /* can storage type differ from column type? */ bool amclusterable; /* does AM support cluster command? */ + bool amcanshrink; /* does AM do anything other than REINDEX in + * VACUUM? */ regproc aminsert; /* "insert this tuple" function */ regproc ambeginscan; /* "start new scan" function */ regproc amgettuple; /* "next valid tuple" function */ - regproc amgetmulti; /* "fetch multiple tuples" function */ + regproc amgetbitmap; /* "fetch next bitmap" function */ regproc amrescan; /* "restart this scan" function */ regproc amendscan; /* "end this scan" function */ regproc ammarkpos; /* "mark current scan position" function */ @@ -78,7 +80,7 @@ typedef FormData_pg_am *Form_pg_am; * compiler constants for pg_am * ---------------- */ -#define Natts_pg_am 23 +#define Natts_pg_am 24 #define Anum_pg_am_amname 1 #define Anum_pg_am_amstrategies 2 #define Anum_pg_am_amsupport 3 @@ -89,36 +91,40 @@ typedef FormData_pg_am *Form_pg_am; #define Anum_pg_am_amindexnulls 8 #define Anum_pg_am_amstorage 9 #define Anum_pg_am_amclusterable 10 -#define Anum_pg_am_aminsert 11 -#define Anum_pg_am_ambeginscan 12 -#define Anum_pg_am_amgettuple 13 -#define Anum_pg_am_amgetmulti 14 -#define Anum_pg_am_amrescan 15 -#define Anum_pg_am_amendscan 16 -#define Anum_pg_am_ammarkpos 17 -#define Anum_pg_am_amrestrpos 18 -#define Anum_pg_am_ambuild 19 -#define Anum_pg_am_ambulkdelete 20 -#define Anum_pg_am_amvacuumcleanup 21 -#define Anum_pg_am_amcostestimate 22 -#define Anum_pg_am_amoptions 23 +#define Anum_pg_am_amcanshrink 11 +#define Anum_pg_am_aminsert 12 +#define Anum_pg_am_ambeginscan 13 +#define Anum_pg_am_amgettuple 14 +#define Anum_pg_am_amgetbitmap 15 +#define Anum_pg_am_amrescan 16 +#define Anum_pg_am_amendscan 17 +#define Anum_pg_am_ammarkpos 18 +#define Anum_pg_am_amrestrpos 19 +#define Anum_pg_am_ambuild 20 +#define Anum_pg_am_ambulkdelete 21 +#define Anum_pg_am_amvacuumcleanup 22 +#define Anum_pg_am_amcostestimate 23 +#define Anum_pg_am_amoptions 24 /* ---------------- * initial contents of pg_am * ---------------- */ -DATA(insert OID = 403 ( btree 5 1 1 t t t t f t btinsert btbeginscan btgettuple btgetmulti btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate btoptions )); +DATA(insert OID = 403 ( btree 5 1 1 t t t t f t t btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate btoptions )); DESCR("b-tree index access method"); #define BTREE_AM_OID 403 -DATA(insert OID = 405 ( hash 1 1 0 f f f f f f hashinsert hashbeginscan hashgettuple hashgetmulti hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); +DATA(insert OID = 405 ( hash 1 1 0 f f f f f f t hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); DESCR("hash index access method"); #define HASH_AM_OID 405 -DATA(insert OID = 783 ( gist 100 7 0 f t t t t t gistinsert gistbeginscan gistgettuple gistgetmulti gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); +DATA(insert OID = 783 ( gist 100 7 0 f t t t t t t gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 -DATA(insert OID = 2742 ( gin 100 4 0 f f f f t f gininsert ginbeginscan gingettuple gingetmulti ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); +DATA(insert OID = 2742 ( gin 100 4 0 f f f f t f t gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); DESCR("GIN index access method"); #define GIN_AM_OID 2742 +DATA(insert OID = 3013 ( bitmap 5 1 0 f t t t f f f bminsert bmbeginscan bmgettuple bmgetbitmap bmrescan bmendscan bmmarkpos bmrestrpos bmbuild bmbulkdelete bmvacuumcleanup bmcostestimate bmoptions )); +DESCR("bitmap index access method"); +#define BITMAP_AM_OID 3013 #endif /* PG_AM_H */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/catalog/pg_amop.h bitmap/src/include/catalog/pg_amop.h --- pgsql-head/src/include/catalog/pg_amop.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/catalog/pg_amop.h 2006-12-04 19:48:05.000000000 +1100 @@ -892,4 +892,448 @@ DATA(insert ( 2780 0 2 f 2751 )); DATA(insert ( 2780 0 3 t 2752 )); DATA(insert ( 2780 0 4 t 1070 )); +/* + * the operators for the on-disk bitmap index. + */ +/* + * on-disk bitmap index abstime + */ +DATA(insert ( 3014 0 1 f 562 )); +DATA(insert ( 3014 0 2 f 564 )); +DATA(insert ( 3014 0 3 f 560 )); +DATA(insert ( 3014 0 4 f 565 )); +DATA(insert ( 3014 0 5 f 563 )); + +/* + * on-disk bitmap index array + */ +DATA(insert ( 3015 0 1 f 1072 )); +DATA(insert ( 3015 0 2 f 1074 )); +DATA(insert ( 3015 0 3 f 1070 )); +DATA(insert ( 3015 0 4 f 1075 )); +DATA(insert ( 3015 0 5 f 1073 )); + +/* + * on-disk bitmap index bit + */ +DATA(insert ( 3016 0 1 f 1786 )); +DATA(insert ( 3016 0 2 f 1788 )); +DATA(insert ( 3016 0 3 f 1784 )); +DATA(insert ( 3016 0 4 f 1789 )); +DATA(insert ( 3016 0 5 f 1787 )); + +/* + * on-disk bitmap index bool + */ +DATA(insert ( 3017 0 1 f 58 )); +DATA(insert ( 3017 0 2 f 1694 )); +DATA(insert ( 3017 0 3 f 91 )); +DATA(insert ( 3017 0 4 f 1695 )); +DATA(insert ( 3017 0 5 f 59 )); + +/* + * on-disk bitmap index bpchar + */ +DATA(insert ( 3018 0 1 f 1058 )); +DATA(insert ( 3018 0 2 f 1059 )); +DATA(insert ( 3018 0 3 f 1054 )); +DATA(insert ( 3018 0 4 f 1061 )); +DATA(insert ( 3018 0 5 f 1060 )); + +/* + * on-disk bitmap index bytea + */ +DATA(insert ( 3019 0 1 f 1957 )); +DATA(insert ( 3019 0 2 f 1958 )); +DATA(insert ( 3019 0 3 f 1955 )); +DATA(insert ( 3019 0 4 f 1960 )); +DATA(insert ( 3019 0 5 f 1959 )); + +/* + * on-disk bitmap index char + */ +DATA(insert ( 3020 0 1 f 631 )); +DATA(insert ( 3020 0 2 f 632 )); +DATA(insert ( 3020 0 3 f 92 )); +DATA(insert ( 3020 0 4 f 634 )); +DATA(insert ( 3020 0 5 f 633 )); + +/* + * on-disk bitmap index cidr + */ +DATA(insert ( 3021 0 1 f 1203 )); +DATA(insert ( 3021 0 2 f 1204 )); +DATA(insert ( 3021 0 3 f 1201 )); +DATA(insert ( 3021 0 4 f 1206 )); +DATA(insert ( 3021 0 5 f 1205 )); + +/* + * on-disk bitmap index date + */ +DATA(insert ( 3022 0 1 f 1095 )); +DATA(insert ( 3022 0 2 f 1096 )); +DATA(insert ( 3022 0 3 f 1093 )); +DATA(insert ( 3022 0 4 f 1098 )); +DATA(insert ( 3022 0 5 f 1097 )); + +/* + * date-timestamp + */ +DATA(insert ( 3022 1114 1 f 2345 )); +DATA(insert ( 3022 1114 2 f 2346 )); +DATA(insert ( 3022 1114 3 f 2347 )); +DATA(insert ( 3022 1114 4 f 2348 )); +DATA(insert ( 3022 1114 5 f 2349 )); + +/* + * date-timestamptz + */ +DATA(insert ( 3022 1184 1 f 2358 )); +DATA(insert ( 3022 1184 2 f 2359 )); +DATA(insert ( 3022 1184 3 f 2360 )); +DATA(insert ( 3022 1184 4 f 2361 )); +DATA(insert ( 3022 1184 5 f 2362 )); + +/* + * float4 + */ +DATA(insert ( 3023 0 1 f 622 )); +DATA(insert ( 3023 0 2 f 624 )); +DATA(insert ( 3023 0 3 f 620 )); +DATA(insert ( 3023 0 4 f 625 )); +DATA(insert ( 3023 0 5 f 623 )); + +/* + * float48 + */ +DATA(insert ( 3023 701 1 f 1122 )); +DATA(insert ( 3023 701 2 f 1124 )); +DATA(insert ( 3023 701 3 f 1120 )); +DATA(insert ( 3023 701 4 f 1125 )); +DATA(insert ( 3023 701 5 f 1123 )); + +/* + * float8 + */ +DATA(insert ( 3024 0 1 f 672 )); +DATA(insert ( 3024 0 2 f 673 )); +DATA(insert ( 3024 0 3 f 670 )); +DATA(insert ( 3024 0 4 f 675 )); +DATA(insert ( 3024 0 5 f 674 )); + +/* + * float84 + */ +DATA(insert ( 3024 700 1 f 1132 )); +DATA(insert ( 3024 700 2 f 1134 )); +DATA(insert ( 3024 700 3 f 1130 )); +DATA(insert ( 3024 700 4 f 1135 )); +DATA(insert ( 3024 700 5 f 1133 )); + +/* + * inet + */ +DATA(insert ( 3025 0 1 f 1203 )); +DATA(insert ( 3025 0 2 f 1204 )); +DATA(insert ( 3025 0 3 f 1201 )); +DATA(insert ( 3025 0 4 f 1206 )); +DATA(insert ( 3025 0 5 f 1205 )); + +/* + * int2 + */ +DATA(insert ( 3026 0 1 f 95 )); +DATA(insert ( 3026 0 2 f 522 )); +DATA(insert ( 3026 0 3 f 94 )); +DATA(insert ( 3026 0 4 f 524 )); +DATA(insert ( 3026 0 5 f 520 )); + +/* + * int24 + */ +DATA(insert ( 3026 23 1 f 534 )); +DATA(insert ( 3026 23 2 f 540 )); +DATA(insert ( 3026 23 3 f 532 )); +DATA(insert ( 3026 23 4 f 542 )); +DATA(insert ( 3026 23 5 f 536 )); + +/* + * int28 + */ +DATA(insert ( 3026 20 1 f 1864 )); +DATA(insert ( 3026 20 2 f 1866 )); +DATA(insert ( 3026 20 3 f 1862 )); +DATA(insert ( 3026 20 4 f 1867 )); +DATA(insert ( 3026 20 5 f 1865 )); + +/* + * int4 + */ +DATA(insert ( 3027 0 1 f 97 )); +DATA(insert ( 3027 0 2 f 523 )); +DATA(insert ( 3027 0 3 f 96 )); +DATA(insert ( 3027 0 4 f 525 )); +DATA(insert ( 3027 0 5 f 521 )); + +/* + * int42 + */ +DATA(insert ( 3027 21 1 f 535 )); +DATA(insert ( 3027 21 2 f 541 )); +DATA(insert ( 3027 21 3 f 533 )); +DATA(insert ( 3027 21 4 f 543 )); +DATA(insert ( 3027 21 5 f 537 )); + +/* + * int48 + */ +DATA(insert ( 3027 20 1 f 37 )); +DATA(insert ( 3027 20 2 f 80 )); +DATA(insert ( 3027 20 3 f 15 )); +DATA(insert ( 3027 20 4 f 82 )); +DATA(insert ( 3027 20 5 f 76 )); + +/* + * int8 + */ +DATA(insert ( 3028 0 1 f 412 )); +DATA(insert ( 3028 0 2 f 414 )); +DATA(insert ( 3028 0 3 f 410 )); +DATA(insert ( 3028 0 4 f 415 )); +DATA(insert ( 3028 0 5 f 413 )); + +/* + * int82 + */ +DATA(insert ( 3028 21 1 f 1870 )); +DATA(insert ( 3028 21 2 f 1872 )); +DATA(insert ( 3028 21 3 f 1868 )); +DATA(insert ( 3028 21 4 f 1873 )); +DATA(insert ( 3028 21 5 f 1871 )); + +/* + * int84 + */ +DATA(insert ( 3028 23 1 f 418 )); +DATA(insert ( 3028 23 2 f 420 )); +DATA(insert ( 3028 23 3 f 416 )); +DATA(insert ( 3028 23 4 f 430 )); +DATA(insert ( 3028 23 5 f 419 )); + +/* + * interval + */ +DATA(insert ( 3029 0 1 f 1332 )); +DATA(insert ( 3029 0 2 f 1333 )); +DATA(insert ( 3029 0 3 f 1330 )); +DATA(insert ( 3029 0 4 f 1335 )); +DATA(insert ( 3029 0 5 f 1334 )); + +/* + * macaddr + */ +DATA(insert ( 3030 0 1 f 1222 )); +DATA(insert ( 3030 0 2 f 1223 )); +DATA(insert ( 3030 0 3 f 1220 )); +DATA(insert ( 3030 0 4 f 1225 )); +DATA(insert ( 3030 0 5 f 1224 )); + +/* + * name + */ +DATA(insert ( 3031 0 1 f 660 )); +DATA(insert ( 3031 0 2 f 661 )); +DATA(insert ( 3031 0 3 f 93 )); +DATA(insert ( 3031 0 4 f 663 )); +DATA(insert ( 3031 0 5 f 662 )); + +/* + * numeric + */ +DATA(insert ( 3032 0 1 f 1754 )); +DATA(insert ( 3032 0 2 f 1755 )); +DATA(insert ( 3032 0 3 f 1752 )); +DATA(insert ( 3032 0 4 f 1757 )); +DATA(insert ( 3032 0 5 f 1756 )); + +/* + * oid + */ +DATA(insert ( 3033 0 1 f 609 )); +DATA(insert ( 3033 0 2 f 611 )); +DATA(insert ( 3033 0 3 f 607 )); +DATA(insert ( 3033 0 4 f 612 )); +DATA(insert ( 3033 0 5 f 610 )); + +/* + * oidvector + */ +DATA(insert ( 3034 0 1 f 645 )); +DATA(insert ( 3034 0 2 f 647 )); +DATA(insert ( 3034 0 3 f 649 )); +DATA(insert ( 3034 0 4 f 648 )); +DATA(insert ( 3034 0 5 f 646 )); + +/* + * text + */ +DATA(insert ( 3035 0 1 f 664 )); +DATA(insert ( 3035 0 2 f 665 )); +DATA(insert ( 3035 0 3 f 98 )); +DATA(insert ( 3035 0 4 f 667 )); +DATA(insert ( 3035 0 5 f 666 )); + +/* + * time + */ +DATA(insert ( 3036 0 1 f 1110 )); +DATA(insert ( 3036 0 2 f 1111 )); +DATA(insert ( 3036 0 3 f 1108 )); +DATA(insert ( 3036 0 4 f 1113 )); +DATA(insert ( 3036 0 5 f 1112 )); + +/* + * timestamptz + */ +DATA(insert ( 3037 0 1 f 1322 )); +DATA(insert ( 3037 0 2 f 1323 )); +DATA(insert ( 3037 0 3 f 1320 )); +DATA(insert ( 3037 0 4 f 1325 )); +DATA(insert ( 3037 0 5 f 1324 )); + +/* + * timestamptz-date + */ +DATA(insert ( 3037 1082 1 f 2384 )); +DATA(insert ( 3037 1082 2 f 2385 )); +DATA(insert ( 3037 1082 3 f 2386 )); +DATA(insert ( 3037 1082 4 f 2387 )); +DATA(insert ( 3037 1082 5 f 2388 )); + +/* + * timestamptz-timestamp + */ +DATA(insert ( 3037 1114 1 f 2540 )); +DATA(insert ( 3037 1114 2 f 2541 )); +DATA(insert ( 3037 1114 3 f 2542 )); +DATA(insert ( 3037 1114 4 f 2543 )); +DATA(insert ( 3037 1114 5 f 2544 )); + +/* + * timetz + */ +DATA(insert ( 3038 0 1 f 1552 )); +DATA(insert ( 3038 0 2 f 1553 )); +DATA(insert ( 3038 0 3 f 1550 )); +DATA(insert ( 3038 0 4 f 1555 )); +DATA(insert ( 3038 0 5 f 1554 )); + +/* + * varbit + */ +DATA(insert ( 3039 0 1 f 1806 )); +DATA(insert ( 3039 0 2 f 1808 )); +DATA(insert ( 3039 0 3 f 1804 )); +DATA(insert ( 3039 0 4 f 1809 )); +DATA(insert ( 3039 0 5 f 1807 )); + +/* + * varchar + */ +DATA(insert ( 3040 0 1 f 664 )); +DATA(insert ( 3040 0 2 f 665 )); +DATA(insert ( 3040 0 3 f 98 )); +DATA(insert ( 3040 0 4 f 667 )); +DATA(insert ( 3040 0 5 f 666 )); + +/* + * timestamp + */ +DATA(insert ( 3041 0 1 f 2062 )); +DATA(insert ( 3041 0 2 f 2063 )); +DATA(insert ( 3041 0 3 f 2060 )); +DATA(insert ( 3041 0 4 f 2065 )); +DATA(insert ( 3041 0 5 f 2064 )); + +/* + * timestamp-date + */ +DATA(insert ( 3041 1082 1 f 2371 )); +DATA(insert ( 3041 1082 2 f 2372 )); +DATA(insert ( 3041 1082 3 f 2373 )); +DATA(insert ( 3041 1082 4 f 2374 )); +DATA(insert ( 3041 1082 5 f 2375 )); + +/* + * timestamp-timestamptz + */ +DATA(insert ( 3041 1184 1 f 2534 )); +DATA(insert ( 3041 1184 2 f 2535 )); +DATA(insert ( 3041 1184 3 f 2536 )); +DATA(insert ( 3041 1184 4 f 2537 )); +DATA(insert ( 3041 1184 5 f 2538 )); + +/* + * text pattern + */ +DATA(insert ( 3042 0 1 f 2314 )); +DATA(insert ( 3042 0 2 f 2315 )); +DATA(insert ( 3042 0 3 f 2316 )); +DATA(insert ( 3042 0 4 f 2317 )); +DATA(insert ( 3042 0 5 f 2318 )); + +/* + * varchar pattern + */ +DATA(insert ( 3043 0 1 f 2314 )); +DATA(insert ( 3043 0 2 f 2315 )); +DATA(insert ( 3043 0 3 f 2316 )); +DATA(insert ( 3043 0 4 f 2317 )); +DATA(insert ( 3043 0 5 f 2318 )); + +/* + * bpchar pattern + */ +DATA(insert ( 3044 0 1 f 2326 )); +DATA(insert ( 3044 0 2 f 2327 )); +DATA(insert ( 3044 0 3 f 2328 )); +DATA(insert ( 3044 0 4 f 2329 )); +DATA(insert ( 3044 0 5 f 2330 )); + +/* + * name pattern + */ +DATA(insert ( 3045 0 1 f 2332 )); +DATA(insert ( 3045 0 2 f 2333 )); +DATA(insert ( 3045 0 3 f 2334 )); +DATA(insert ( 3045 0 4 f 2335 )); +DATA(insert ( 3045 0 5 f 2336 )); + +/* + * money + */ +DATA(insert ( 3046 0 1 f 902 )); +DATA(insert ( 3046 0 2 f 904 )); +DATA(insert ( 3046 0 3 f 900 )); +DATA(insert ( 3046 0 4 f 905 )); +DATA(insert ( 3046 0 5 f 903 )); + +/* + * reltime + */ +DATA(insert ( 3047 0 1 f 568 )); +DATA(insert ( 3047 0 2 f 570 )); +DATA(insert ( 3047 0 3 f 566 )); +DATA(insert ( 3047 0 4 f 571 )); +DATA(insert ( 3047 0 5 f 569 )); + +/* + * tinterval + */ +DATA(insert ( 3048 0 1 f 813 )); +DATA(insert ( 3048 0 2 f 815 )); +DATA(insert ( 3048 0 3 f 811 )); +DATA(insert ( 3048 0 4 f 816 )); +DATA(insert ( 3048 0 5 f 814 )); + #endif /* PG_AMOP_H */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/catalog/pg_amproc.h bitmap/src/include/catalog/pg_amproc.h --- pgsql-head/src/include/catalog/pg_amproc.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/catalog/pg_amproc.h 2006-11-10 11:19:44.000000000 +1100 @@ -308,4 +308,58 @@ DATA(insert ( 2780 0 2 2743 )); DATA(insert ( 2780 0 3 2743 )); DATA(insert ( 2780 0 4 2744 )); +/* + * the operator routines for the on-disk bitmap index. + */ +DATA(insert ( 3014 0 1 357 )); /* abstime */ +DATA(insert ( 3015 0 1 382 )); /* array */ +DATA(insert ( 3016 0 1 1596 )); /* bit */ +DATA(insert ( 3017 0 1 1693 )); /* bool */ +DATA(insert ( 3018 0 1 1078 )); /* bpchar */ +DATA(insert ( 3019 0 1 1954 )); /* bytea */ +DATA(insert ( 3020 0 1 358 )); /* char */ +DATA(insert ( 3021 0 1 926 )); /* cidr */ +DATA(insert ( 3022 0 1 1092 )); /* date */ +DATA(insert ( 3022 1114 1 2344 )); /* date-timestamp */ +DATA(insert ( 3022 1184 1 2357 )); /* date-timestamptz */ +DATA(insert ( 3023 0 1 354 )); /* float4 */ +DATA(insert ( 3023 701 1 2194 )); /* float48 */ +DATA(insert ( 3024 0 1 355 )); /* float8 */ +DATA(insert ( 3024 700 1 2195 )); /* float84 */ +DATA(insert ( 3025 0 1 926 )); /* inet */ +DATA(insert ( 3026 0 1 350 )); /* int2 */ +DATA(insert ( 3026 23 1 2190 )); /* int24 */ +DATA(insert ( 3026 20 1 2192 )); /* int28 */ +DATA(insert ( 3027 0 1 351 )); /* int4 */ +DATA(insert ( 3027 20 1 2191 )); /* int42 */ +DATA(insert ( 3027 21 1 2188 )); /* int48 */ +DATA(insert ( 3028 0 1 842 )); /* int8 */ +DATA(insert ( 3028 21 1 2193 )); /* int82 */ +DATA(insert ( 3028 23 1 2189 )); /* int84 */ +DATA(insert ( 3029 0 1 1315 )); /* interval */ +DATA(insert ( 3030 0 1 836 )); /* macaddr */ +DATA(insert ( 3031 0 1 359 )); /* name */ +DATA(insert ( 3032 0 1 1769 )); /* numeric */ +DATA(insert ( 3033 0 1 356 )); /* oid */ +DATA(insert ( 3034 0 1 404 )); /* oidvector */ +DATA(insert ( 3035 0 1 360 )); /* text */ +DATA(insert ( 3036 0 1 1107 )); /* time */ +DATA(insert ( 3037 0 1 1314 )); /* timestamptz */ +DATA(insert ( 3037 1082 1 2383 )); /* timestamptz-date */ +DATA(insert ( 3037 1114 1 2533 )); /* timestamptz-timestamp */ +DATA(insert ( 3038 0 1 1358 )); /* timetz */ +DATA(insert ( 3039 0 1 1672 )); /* varbit */ +DATA(insert ( 3040 0 1 360 )); /* varchar */ +DATA(insert ( 3041 0 1 2045 )); /* timestamp */ +DATA(insert ( 3041 1082 1 2370 )); /* timestamp-date */ +DATA(insert ( 3041 1184 1 2526 )); /* timestamp-timestamptz */ +DATA(insert ( 3042 0 1 2166 )); /* text pattern */ +DATA(insert ( 3043 0 1 2166 )); /* varchar pattern */ +DATA(insert ( 3044 0 1 2180 )); /* bpchar pattern */ +DATA(insert ( 3045 0 1 2187 )); /* name pattern */ +DATA(insert ( 3046 0 1 377 )); /* money */ +DATA(insert ( 3047 0 1 380 )); /* reltime */ +DATA(insert ( 3048 0 1 381 )); /* tinterval */ + + #endif /* PG_AMPROC_H */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/catalog/pg_namespace.h bitmap/src/include/catalog/pg_namespace.h --- pgsql-head/src/include/catalog/pg_namespace.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/catalog/pg_namespace.h 2006-08-01 09:43:24.000000000 +1000 @@ -74,6 +74,9 @@ DESCR("System catalog schema"); DATA(insert OID = 99 ( "pg_toast" PGUID _null_ )); DESCR("Reserved schema for TOAST tables"); #define PG_TOAST_NAMESPACE 99 +DATA(insert OID = 3012 ( "pg_bitmapindex" PGUID _null_ )); +DESCR("Reserved schema for internal relations of bitmap indexes"); +#define PG_BITMAPINDEX_NAMESPACE 3012 DATA(insert OID = 2200 ( "public" PGUID _null_ )); DESCR("Standard public schema"); #define PG_PUBLIC_NAMESPACE 2200 diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/catalog/pg_opclass.h bitmap/src/include/catalog/pg_opclass.h --- pgsql-head/src/include/catalog/pg_opclass.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/catalog/pg_opclass.h 2006-11-10 10:44:53.000000000 +1100 @@ -208,4 +208,43 @@ DATA(insert OID = 2778 ( 2742 _money_ops DATA(insert OID = 2779 ( 2742 _reltime_ops PGNSP PGUID 1024 t 703 )); DATA(insert OID = 2780 ( 2742 _tinterval_ops PGNSP PGUID 1025 t 704 )); +/* + * the operators for the on-disk bitmap index. + */ +DATA(insert OID = 3014 ( 3013 abstime_ops PGNSP PGUID 702 t 0 )); +DATA(insert OID = 3015 ( 3013 array_ops PGNSP PGUID 2277 t 0 )); +DATA(insert OID = 3016 ( 3013 bit_ops PGNSP PGUID 1560 t 0 )); +DATA(insert OID = 3017 ( 3013 bool_ops PGNSP PGUID 16 t 0 )); +DATA(insert OID = 3018 ( 3013 bpchar_ops PGNSP PGUID 1042 t 0 )); +DATA(insert OID = 3019 ( 3013 bytea_ops PGNSP PGUID 17 t 0 )); +DATA(insert OID = 3020 ( 3013 char_ops PGNSP PGUID 18 t 0 )); +DATA(insert OID = 3021 ( 3013 cidr_ops PGNSP PGUID 650 t 0 )); +DATA(insert OID = 3022 ( 3013 date_ops PGNSP PGUID 1082 t 0 )); +DATA(insert OID = 3023 ( 3013 float4_ops PGNSP PGUID 700 t 0 )); +DATA(insert OID = 3024 ( 3013 float8_ops PGNSP PGUID 701 t 0 )); +DATA(insert OID = 3025 ( 3013 inet_ops PGNSP PGUID 869 t 0 )); +DATA(insert OID = 3026 ( 3013 int2_ops PGNSP PGUID 21 t 0 )); +DATA(insert OID = 3027 ( 3013 int4_ops PGNSP PGUID 23 t 0 )); +DATA(insert OID = 3028 ( 3013 int8_ops PGNSP PGUID 20 t 0 )); +DATA(insert OID = 3029 ( 3013 interval_ops PGNSP PGUID 1186 t 0 )); +DATA(insert OID = 3030 ( 3013 macaddr_ops PGNSP PGUID 829 t 0 )); +DATA(insert OID = 3031 ( 3013 name_ops PGNSP PGUID 19 t 0 )); +DATA(insert OID = 3032 ( 3013 numeric_ops PGNSP PGUID 1700 t 0 )); +DATA(insert OID = 3033 ( 3013 oid_ops PGNSP PGUID 26 t 0 )); +DATA(insert OID = 3034 ( 3013 oidvector_ops PGNSP PGUID 30 t 0 )); +DATA(insert OID = 3035 ( 3013 text_ops PGNSP PGUID 25 t 0 )); +DATA(insert OID = 3036 ( 3013 time_ops PGNSP PGUID 1083 t 0 )); +DATA(insert OID = 3037 ( 3013 timestamptz_ops PGNSP PGUID 1184 t 0 )); +DATA(insert OID = 3038 ( 3013 timetz_ops PGNSP PGUID 1266 t 0 )); +DATA(insert OID = 3039 ( 3013 varbit_ops PGNSP PGUID 1562 t 0 )); +DATA(insert OID = 3040 ( 3013 varchar_ops PGNSP PGUID 1043 t 0 )); +DATA(insert OID = 3041 ( 3013 timestamp_ops PGNSP PGUID 1114 t 0 )); +DATA(insert OID = 3042 ( 3013 text_pattern_ops PGNSP PGUID 25 f 0 )); +DATA(insert OID = 3043 ( 3013 varchar_pattern_ops PGNSP PGUID 1043 f 0 )); +DATA(insert OID = 3044 ( 3013 bpchar_pattern_ops PGNSP PGUID 1042 f 0 )); +DATA(insert OID = 3045 ( 3013 name_pattern_ops PGNSP PGUID 19 f 0 )); +DATA(insert OID = 3046 ( 3013 money_ops PGNSP PGUID 790 t 0 )); +DATA(insert OID = 3047 ( 3013 reltime_ops PGNSP PGUID 703 t 0 )); +DATA(insert OID = 3048 ( 3013 tinterval_ops PGNSP PGUID 704 t 0 )); + #endif /* PG_OPCLASS_H */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/catalog/pg_proc.h bitmap/src/include/catalog/pg_proc.h --- pgsql-head/src/include/catalog/pg_proc.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/catalog/pg_proc.h 2006-12-04 23:03:21.000000000 +1100 @@ -658,7 +658,7 @@ DESCR("convert float4 to int4"); DATA(insert OID = 330 ( btgettuple PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_ _null_ _null_ btgettuple - _null_ )); DESCR("btree(internal)"); -DATA(insert OID = 636 ( btgetmulti PGNSP PGUID 12 f f t f v 4 16 "2281 2281 2281 2281" _null_ _null_ _null_ btgetmulti - _null_ )); +DATA(insert OID = 636 ( btgetbitmap PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ btgetbitmap - _null_ )); DESCR("btree(internal)"); DATA(insert OID = 331 ( btinsert PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ btinsert - _null_ )); DESCR("btree(internal)"); @@ -777,7 +777,7 @@ DESCR("convert char(n) to name"); DATA(insert OID = 440 ( hashgettuple PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_ _null_ _null_ hashgettuple - _null_ )); DESCR("hash(internal)"); -DATA(insert OID = 637 ( hashgetmulti PGNSP PGUID 12 f f t f v 4 16 "2281 2281 2281 2281" _null_ _null_ _null_ hashgetmulti - _null_ )); +DATA(insert OID = 637 ( hashgetbitmap PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ hashgetbitmap - _null_ )); DESCR("hash(internal)"); DATA(insert OID = 441 ( hashinsert PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ hashinsert - _null_ )); DESCR("hash(internal)"); @@ -1045,7 +1045,7 @@ DESCR("smaller of two"); DATA(insert OID = 774 ( gistgettuple PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_ _null_ _null_ gistgettuple - _null_ )); DESCR("gist(internal)"); -DATA(insert OID = 638 ( gistgetmulti PGNSP PGUID 12 f f t f v 4 16 "2281 2281 2281 2281" _null_ _null_ _null_ gistgetmulti - _null_ )); +DATA(insert OID = 638 ( gistgetbitmap PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ gistgetbitmap - _null_ )); DESCR("gist(internal)"); DATA(insert OID = 775 ( gistinsert PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ gistinsert - _null_ )); DESCR("gist(internal)"); @@ -3907,7 +3907,7 @@ DESCR("GiST support"); /* GIN */ DATA(insert OID = 2730 ( gingettuple PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_ _null_ _null_ gingettuple - _null_ )); DESCR("gin(internal)"); -DATA(insert OID = 2731 ( gingetmulti PGNSP PGUID 12 f f t f v 4 16 "2281 2281 2281 2281" _null_ _null_ _null_ gingetmulti - _null_ )); +DATA(insert OID = 2731 ( gingetbitmap PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ gingetbitmap - _null_ )); DESCR("gin(internal)"); DATA(insert OID = 2732 ( gininsert PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ gininsert - _null_ )); DESCR("gin(internal)"); @@ -3974,6 +3974,33 @@ DESCR("release shared advisory lock"); DATA(insert OID = 2892 ( pg_advisory_unlock_all PGNSP PGUID 12 f f t f v 0 2278 "" _null_ _null_ _null_ pg_advisory_unlock_all - _null_ )); DESCR("release all advisory locks"); +/* the bitmap index access method routines */ +DATA(insert OID = 3050 ( bmgettuple PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_ _null_ _null_ bmgettuple - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3051 ( bmgetbitmap PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ bmgetbitmap - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3001 ( bminsert PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ bminsert - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3002 ( bmbeginscan PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_ _null_ _null_ bmbeginscan - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3003 ( bmrescan PGNSP PGUID 12 f f t f v 2 2278 "2281 2281" _null_ _null_ _null_ bmrescan - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3004 ( bmendscan PGNSP PGUID 12 f f t f v 1 2278 "2281" _null_ _null_ _null_ bmendscan - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3005 ( bmmarkpos PGNSP PGUID 12 f f t f v 1 2278 "2281" _null_ _null_ _null_ bmmarkpos - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3006 ( bmrestrpos PGNSP PGUID 12 f f t f v 1 2278 "2281" _null_ _null_ _null_ bmrestrpos - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3007 ( bmbuild PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_ _null_ _null_ bmbuild - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3008 ( bmbulkdelete PGNSP PGUID 12 f f t f v 4 2281 "2281 2281 2281 2281" _null_ _null_ _null_ bmbulkdelete - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3009 ( bmvacuumcleanup PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ bmvacuumcleanup - _null_ )); +DATA(insert OID = 3010 ( bmcostestimate PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ bmcostestimate - _null_ )); +DESCR("bitmap(internal)"); +DATA(insert OID = 3011 ( bmoptions PGNSP PGUID 12 f f t f s 2 17 "1009 16" _null_ _null_ _null_ bmoptions - _null_ )); +DESCR("btree(internal)"); + /* * Symbolic values for provolatile column: these indicate whether the result * of a function is dependent *only* on the values of its explicit arguments, diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/nodes/execnodes.h bitmap/src/include/nodes/execnodes.h --- pgsql-head/src/include/nodes/execnodes.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/nodes/execnodes.h 2006-12-04 23:04:13.000000000 +1100 @@ -18,6 +18,7 @@ #include "nodes/params.h" #include "nodes/plannodes.h" #include "nodes/tidbitmap.h" +#include "nodes/tidbitmap.h" #include "utils/hsearch.h" #include "utils/tuplestore.h" @@ -865,6 +866,7 @@ typedef struct BitmapAndState PlanState ps; /* its first field is NodeTag */ PlanState **bitmapplans; /* array of PlanStates for my inputs */ int nplans; /* number of input plans */ + Node *bitmap; /* output stream bitmap */ } BitmapAndState; /* ---------------- @@ -876,6 +878,7 @@ typedef struct BitmapOrState PlanState ps; /* its first field is NodeTag */ PlanState **bitmapplans; /* array of PlanStates for my inputs */ int nplans; /* number of input plans */ + Node *bitmap; /* output bitmap */ } BitmapOrState; /* ---------------------------------------------------------------- @@ -979,7 +982,7 @@ typedef struct IndexScanState typedef struct BitmapIndexScanState { ScanState ss; /* its first field is NodeTag */ - TIDBitmap *biss_result; + Node *bitmap; /* output bitmap */ ScanKey biss_ScanKeys; int biss_NumScanKeys; IndexRuntimeKeyInfo *biss_RuntimeKeys; @@ -1004,7 +1007,7 @@ typedef struct BitmapHeapScanState { ScanState ss; /* its first field is NodeTag */ List *bitmapqualorig; - TIDBitmap *tbm; + Node *tbm; TBMIterateResult *tbmres; } BitmapHeapScanState; diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/nodes/nodes.h bitmap/src/include/nodes/nodes.h --- pgsql-head/src/include/nodes/nodes.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/nodes/nodes.h 2006-11-10 09:26:52.000000000 +1100 @@ -335,7 +335,8 @@ typedef enum NodeTag */ T_TriggerData = 900, /* in commands/trigger.h */ T_ReturnSetInfo, /* in nodes/execnodes.h */ - T_TIDBitmap /* in nodes/tidbitmap.h */ + T_HashBitmap, /* in nodes/tidbitmap.h */ + T_StreamBitmap /* in nodes/tidbitmap.h */ } NodeTag; /* diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/nodes/tidbitmap.h bitmap/src/include/nodes/tidbitmap.h --- pgsql-head/src/include/nodes/tidbitmap.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/nodes/tidbitmap.h 2006-12-04 22:28:37.000000000 +1100 @@ -22,14 +22,137 @@ #ifndef TIDBITMAP_H #define TIDBITMAP_H +#include "c.h" +#include "access/htup.h" +#include "nodes/nodes.h" +#include "nodes/pg_list.h" #include "storage/itemptr.h" +#include "storage/bufpage.h" +/* + * The maximum number of tuples per page is not large (typically 256 with + * 8K pages, or 1024 with 32K pages). So there's not much point in making + * the per-page bitmaps variable size. We just legislate that the size + * is this: + */ +#define MAX_TUPLES_PER_PAGE MaxHeapTuplesPerPage + +/* + * When we have to switch over to lossy storage, we use a data structure + * with one bit per page, where all pages having the same number DIV + * PAGES_PER_CHUNK are aggregated into one chunk. When a chunk is present + * and has the bit set for a given page, there must not be a per-page entry + * for that page in the page table. + * + * We actually store both exact pages and lossy chunks in the same hash + * table, using identical data structures. (This is because dynahash.c's + * memory management doesn't allow space to be transferred easily from one + * hashtable to another.) Therefore it's best if PAGES_PER_CHUNK is the + * same as MAX_TUPLES_PER_PAGE, or at least not too different. But we + * also want PAGES_PER_CHUNK to be a power of 2 to avoid expensive integer + * remainder operations. So, define it like this: + */ +#define PAGES_PER_CHUNK (BLCKSZ / 32) + +/* The bitmap unit size can be adjusted by changing these declarations: */ +#define TBM_BITS_PER_BITMAPWORD 32 +typedef uint32 tbm_bitmapword; /* must be an unsigned type */ + +/* number of active words for an exact page: */ +#define WORDS_PER_PAGE ((MAX_TUPLES_PER_PAGE - 1) / TBM_BITS_PER_BITMAPWORD + 1) +/* number of active words for a lossy chunk: */ +#define WORDS_PER_CHUNK ((PAGES_PER_CHUNK - 1) / TBM_BITS_PER_BITMAPWORD + 1) + +/* + * different node types for streaming bitmaps + */ + +typedef enum StreamType +{ + BMS_INDEX, /* pull the data from the index itself */ + BMS_AND, /* AND together input streams */ + BMS_OR /* OR together input streams */ +} StreamType; + + +/* + * The hashtable entries are represented by this data structure. For + * an exact page, blockno is the page number and bit k of the bitmap + * represents tuple offset k+1. For a lossy chunk, blockno is the first + * page in the chunk (this must be a multiple of PAGES_PER_CHUNK) and + * bit k represents page blockno+k. Note that it is not possible to + * have exact storage for the first page of a chunk if we are using + * lossy storage for any page in the chunk's range, since the same + * hashtable entry has to serve both purposes. + */ +typedef struct PagetableEntry +{ + BlockNumber blockno; /* page number (hashtable key) */ + bool ischunk; /* T = lossy storage, F = exact */ + tbm_bitmapword words[Max(WORDS_PER_PAGE, WORDS_PER_CHUNK)]; +} PagetableEntry; /* * Actual bitmap representation is private to tidbitmap.c. Callers can - * do IsA(x, TIDBitmap) on it, but nothing else. + * do IsA(x, HashBitmap) on it, but nothing else. */ -typedef struct TIDBitmap TIDBitmap; +typedef struct HashBitmap HashBitmap; + +/* + * Stream bitmap representation. + */ +typedef struct StreamBitmap +{ + NodeTag type; /* to make it a valid Node */ + PagetableEntry entry; /* a page of tids in this stream bitmap */ + void *opaque; /* state internal to stream implementation */ +} StreamBitmap; + +/* + * Generic type so that we know what kind of stream object we're + * dealing with. Notice that the first 3 fields are packed the same + * in the IndexStream and OpStream structures! + */ +typedef struct StreamNode +{ + StreamType type; /* type of stream */ + bool (*pull) (void *opaque, PagetableEntry *e); + void (*free) (void *opaque); + BlockNumber nextblock; /* block number we're up to */ + bool needfree; /* does the opaque need freeing? */ +} StreamNode; + +/* + * Storage for state specific to the streaming of blocks from the index + * itself. + */ +typedef struct IndexStream +{ + StreamType type; + /* pull up another block */ + bool (*pull) (void *opaque, PagetableEntry *e); + /* stream specific free */ + void (*free) (void *opaque); + BlockNumber nextblock; /* block number we're up to */ + bool needfree; + void *opaque; + /* function to pull more data */ +} IndexStream; + +/* + * Storage for streaming of multiple index streams which need to be + * AND or OR'd together + */ + +typedef struct OpStream +{ + StreamType type; + bool (*pull) (void *opaque, PagetableEntry *e); + void (*free) (void *opaque); + BlockNumber nextblock; /* block number we're up to */ + bool needfree; + List *input; /* input streams */ +} OpStream; /* Result structure for tbm_iterate */ typedef struct @@ -40,18 +163,20 @@ typedef struct } TBMIterateResult; /* VARIABLE LENGTH STRUCT */ /* function prototypes in nodes/tidbitmap.c */ +extern HashBitmap *tbm_create(long maxbytes); +extern void tbm_free(HashBitmap *tbm); -extern TIDBitmap *tbm_create(long maxbytes); -extern void tbm_free(TIDBitmap *tbm); - -extern void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids); - -extern void tbm_union(TIDBitmap *a, const TIDBitmap *b); -extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b); - -extern bool tbm_is_empty(const TIDBitmap *tbm); - -extern void tbm_begin_iterate(TIDBitmap *tbm); -extern TBMIterateResult *tbm_iterate(TIDBitmap *tbm); +extern void tbm_add_tuples(HashBitmap *tbm, const ItemPointer tids, int ntids); +extern void tbm_union(HashBitmap *a, const HashBitmap *b); +extern void tbm_intersect(HashBitmap *a, const HashBitmap *b); +extern bool tbm_is_empty(const HashBitmap *tbm); + +extern void tbm_begin_iterate(HashBitmap *tbm); +extern bool tbm_iterate(Node *tbm, TBMIterateResult *output); + +extern void stream_add_node(StreamBitmap *strm, void *node, StreamType kind); +extern void *tbm_create_stream_node(HashBitmap *tbm); +extern bool bitmap_stream_iterate(void *opaque, PagetableEntry *e); +extern void bitmap_stream_free(void *opaque); #endif /* TIDBITMAP_H */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/utils/rel.h bitmap/src/include/utils/rel.h --- pgsql-head/src/include/utils/rel.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/utils/rel.h 2006-12-04 19:59:27.000000000 +1100 @@ -106,7 +106,8 @@ typedef struct RelationAmInfo FmgrInfo aminsert; FmgrInfo ambeginscan; FmgrInfo amgettuple; - FmgrInfo amgetmulti; + FmgrInfo amgetbitmap; + FmgrInfo amgetbitmapwords; FmgrInfo amrescan; FmgrInfo amendscan; FmgrInfo ammarkpos; diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/include/utils/selfuncs.h bitmap/src/include/utils/selfuncs.h --- pgsql-head/src/include/utils/selfuncs.h 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/include/utils/selfuncs.h 2006-11-10 10:29:25.000000000 +1100 @@ -173,5 +173,6 @@ extern Datum btcostestimate(PG_FUNCTION_ extern Datum hashcostestimate(PG_FUNCTION_ARGS); extern Datum gistcostestimate(PG_FUNCTION_ARGS); extern Datum gincostestimate(PG_FUNCTION_ARGS); +extern Datum bmcostestimate(PG_FUNCTION_ARGS); #endif /* SELFUNCS_H */ diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/test/regress/expected/create_index.out bitmap/src/test/regress/expected/create_index.out --- pgsql-head/src/test/regress/expected/create_index.out 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/test/regress/expected/create_index.out 2006-11-23 16:50:06.000000000 +1100 @@ -410,3 +410,171 @@ Indexes: "std_index" btree (f2) DROP TABLE concur_heap; +SET enable_seqscan = OFF; +SET enable_indexscan = ON; +SET enable_bitmapscan = ON; +create table bm_test (i int, t text); +insert into bm_test select i % 10, (i % 10)::text from generate_series(1, 100) i; +create index bm_test_idx on bm_test using bitmap (i); +select count(*) from bm_test where i=1; + count +------- + 10 +(1 row) + +select count(*) from bm_test where i in(1, 2); + count +------- + 20 +(1 row) + +select * from bm_test where i > 10; + i | t +---+--- +(0 rows) + +reindex index bm_test_idx; +select count(*) from bm_test where i in(1, 2); + count +------- + 20 +(1 row) + +drop index bm_test_idx; +create index bm_test_multi_idx on bm_test using bitmap(i, t); +select * from bm_test where i=5 and t='5'; + i | t +---+--- + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 +(10 rows) + +select * from bm_test where i=5 or t='6'; + i | t +---+--- + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 + 5 | 5 + 6 | 6 +(20 rows) + +select * from bm_test where i between 1 and 10 and i::text = t; + i | t +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(90 rows) + +drop table bm_test; diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/test/regress/expected/oidjoins.out bitmap/src/test/regress/expected/oidjoins.out --- pgsql-head/src/test/regress/expected/oidjoins.out 2006-12-04 23:09:05.000000000 +1100 +++ bitmap/src/test/regress/expected/oidjoins.out 2006-11-10 11:51:07.000000000 +1100 @@ -65,12 +65,12 @@ WHERE amgettuple != 0 AND ------+------------ (0 rows) -SELECT ctid, amgetmulti +SELECT ctid, amgetbitmap FROM pg_catalog.pg_am fk -WHERE amgetmulti != 0 AND - NOT EXISTS(SELECT 1 FROM pg_catalog.pg_proc pk WHERE pk.oid = fk.amgetmulti); - ctid | amgetmulti -------+------------ +WHERE amgetbitmap != 0 AND + NOT EXISTS(SELECT 1 FROM pg_catalog.pg_proc pk WHERE pk.oid = fk.amgetbitmap); + ctid | amgetbitmap +------+------------- (0 rows) SELECT ctid, amrescan diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/test/regress/expected/opr_sanity.out bitmap/src/test/regress/expected/opr_sanity.out --- pgsql-head/src/test/regress/expected/opr_sanity.out 2006-09-10 10:29:35.000000000 +1000 +++ bitmap/src/test/regress/expected/opr_sanity.out 2006-11-10 11:51:07.000000000 +1100 @@ -844,7 +844,17 @@ ORDER BY 1, 2, 3; 2742 | 2 | @> 2742 | 3 | <@ 2742 | 4 | = -(30 rows) + 3013 | 1 | < + 3013 | 1 | ~<~ + 3013 | 2 | <= + 3013 | 2 | ~<=~ + 3013 | 3 | = + 3013 | 3 | ~=~ + 3013 | 4 | >= + 3013 | 4 | ~>=~ + 3013 | 5 | > + 3013 | 5 | ~>~ +(40 rows) -- Check that all operators linked to by opclass entries have selectivity -- estimators. This is not absolutely required, but it seems a reasonable diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/test/regress/sql/create_index.sql bitmap/src/test/regress/sql/create_index.sql --- pgsql-head/src/test/regress/sql/create_index.sql 2006-09-10 10:29:35.000000000 +1000 +++ bitmap/src/test/regress/sql/create_index.sql 2006-11-23 16:49:38.000000000 +1100 @@ -259,3 +259,24 @@ COMMIT; \d concur_heap DROP TABLE concur_heap; + + +SET enable_seqscan = OFF; +SET enable_indexscan = ON; +SET enable_bitmapscan = ON; + +create table bm_test (i int, t text); +insert into bm_test select i % 10, (i % 10)::text from generate_series(1, 100) i; +create index bm_test_idx on bm_test using bitmap (i); +select count(*) from bm_test where i=1; +select count(*) from bm_test where i in(1, 2); +select * from bm_test where i > 10; +reindex index bm_test_idx; +select count(*) from bm_test where i in(1, 2); +drop index bm_test_idx; +create index bm_test_multi_idx on bm_test using bitmap(i, t); +select * from bm_test where i=5 and t='5'; +select * from bm_test where i=5 or t='6'; +select * from bm_test where i between 1 and 10 and i::text = t; +drop table bm_test; + diff -Nupr --exclude=.svn --exclude=CVS --exclude='*~' --exclude='*.diff' --exclude=tags --exclude=configure --exclude=config.guess --exclude=config.sub --exclude=contrib --exclude=config.log --exclude=cscope.out --exclude=gram.c --exclude=scan.c --exclude='*.po' --exclude=parse.h pgsql-head/src/test/regress/sql/oidjoins.sql bitmap/src/test/regress/sql/oidjoins.sql --- pgsql-head/src/test/regress/sql/oidjoins.sql 2005-04-12 14:26:34.000000000 +1000 +++ bitmap/src/test/regress/sql/oidjoins.sql 2006-11-10 11:44:07.000000000 +1100 @@ -33,10 +33,10 @@ SELECT ctid, amgettuple FROM pg_catalog.pg_am fk WHERE amgettuple != 0 AND NOT EXISTS(SELECT 1 FROM pg_catalog.pg_proc pk WHERE pk.oid = fk.amgettuple); -SELECT ctid, amgetmulti +SELECT ctid, amgetbitmap FROM pg_catalog.pg_am fk -WHERE amgetmulti != 0 AND - NOT EXISTS(SELECT 1 FROM pg_catalog.pg_proc pk WHERE pk.oid = fk.amgetmulti); +WHERE amgetbitmap != 0 AND + NOT EXISTS(SELECT 1 FROM pg_catalog.pg_proc pk WHERE pk.oid = fk.amgetbitmap); SELECT ctid, amrescan FROM pg_catalog.pg_am fk WHERE amrescan != 0 AND