diff -Ncpr pgsql.head/doc/src/sgml/indices.sgml bmdist/doc/src/sgml/indices.sgml *** pgsql.head/doc/src/sgml/indices.sgml 2006-05-24 21:01:39.000000000 +1000 --- bmdist/doc/src/sgml/indices.sgml 2006-08-01 12:57:03.000000000 +1000 *************** CREATE INDEX test1_id_index ON test1 (id *** 104,110 **** PostgreSQL provides several index types: ! B-tree, Hash, and GiST. Each index type uses a different algorithm that is best suited to different types of queries. By default, the CREATE INDEX command will create a B-tree index, which fits the most common situations. --- 104,110 ---- PostgreSQL provides several index types: ! B-tree, Hash, bitmap and GiST. Each index type uses a different algorithm that is best suited to different types of queries. By default, the CREATE INDEX command will create a B-tree index, which fits the most common situations. *************** CREATE INDEX name index + bitmap + + + bitmap + index + + Bitmap indexes can only handle equality comparisons. Bitmaps are suited to + large, mostly static data sets on keys with few distinct values. For this + kind of data, bitmap indexes are much smaller and faster than B-tree + indexes. + + + + + index GiST diff -Ncpr pgsql.head/doc/src/sgml/ref/create_index.sgml bmdist/doc/src/sgml/ref/create_index.sgml *** pgsql.head/doc/src/sgml/ref/create_index.sgml 2006-07-12 07:05:57.000000000 +1000 --- bmdist/doc/src/sgml/ref/create_index.sgml 2006-08-01 12:57:03.000000000 +1000 *************** CREATE [ UNIQUE ] INDEX The name of the index method to be used. Choices are btree, hash, ! gist, and gin. The default method is btree. --- 136,142 ---- The name of the index method to be used. Choices are btree, hash, ! gist, gin and bitmap. The default method is btree. diff -Ncpr pgsql.head/src/backend/access/bitmap/bitmapattutil.c bmdist/src/backend/access/bitmap/bitmapattutil.c *** pgsql.head/src/backend/access/bitmap/bitmapattutil.c 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/access/bitmap/bitmapattutil.c 2006-08-01 13:18:47.923541328 +1000 *************** *** 0 **** --- 1,378 ---- + /*------------------------------------------------------------------------- + * + * bitmapattutil.c + * Defines the routines to maintain all distinct attribute values + * which are indexed in the on-disk bitmap index. + * + * Portions Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + *------------------------------------------------------------------------- + */ + #include "postgres.h" + + #include "access/genam.h" + #include "access/tupdesc.h" + #include "access/bitmap.h" + #include "access/nbtree.h" + #include "access/xact.h" + #include "nodes/execnodes.h" + #include "nodes/primnodes.h" + #include "nodes/makefuncs.h" + #include "catalog/dependency.h" + #include "catalog/heap.h" + #include "catalog/index.h" + #include "catalog/pg_type.h" + #include "catalog/namespace.h" + #include "catalog/pg_namespace.h" + #include "access/heapam.h" + #include "optimizer/clauses.h" + #include "utils/syscache.h" + #include "utils/lsyscache.h" + #include "utils/builtins.h" + #include "commands/defrem.h" + #include "commands/tablecmds.h" + + static TupleDesc _bitmap_create_lov_heapTupleDesc(Relation rel); + + /* + * _bitmap_create_lov_heapandindex() -- create a new heap relation and + * a btree index for the list of values (LOV). + */ + + void + _bitmap_create_lov_heapandindex(Relation rel, + Oid *lovHeapId, Oid *lovIndexId) + { + char lovHeapName[NAMEDATALEN]; + char lovIndexName[NAMEDATALEN]; + TupleDesc tupDesc; + IndexInfo *indexInfo; + ObjectAddress objAddr, referenced; + Oid accessMethodId = BTREE_AM_OID; + Oid *classObjectId; + Oid heapid; + Oid indid; + int numOfIndexAttrs; + int attno; + + /* create the new names for the new lov heap and index */ + snprintf(lovHeapName, sizeof(lovHeapName), + "pg_bm_%u", RelationGetRelid(rel)); + snprintf(lovIndexName, sizeof(lovIndexName), + "pg_bm_%u_index", RelationGetRelid(rel)); + + /* + * If this is happening during re-indexing, then such a heap should + * have existed already. Here, we delete this heap and its btree + * index first. + */ + heapid = get_relname_relid(lovHeapName, PG_BITMAPINDEX_NAMESPACE); + if (OidIsValid(heapid)) + { + ObjectAddress object; + indid = get_relname_relid(lovIndexName, PG_BITMAPINDEX_NAMESPACE); + + Assert(OidIsValid(indid)); + /* + * Remove the dependency between the LOV heap relation, + * the LOV index, and the parent bitmap index before + * we drop the lov heap and index. + */ + deleteDependencyRecordsFor(RelationRelationId, heapid); + deleteDependencyRecordsFor(RelationRelationId, indid); + CommandCounterIncrement(); + + object.classId = RelationRelationId; + object.objectId = indid; + object.objectSubId = 0; + performDeletion(&object, DROP_RESTRICT); + + object.objectId = heapid; + performDeletion(&object, DROP_RESTRICT); + } + + /* + * create a new empty heap to store all attribute values with their + * corresponding block number and offset in LOV. + */ + tupDesc = _bitmap_create_lov_heapTupleDesc(rel); + + *lovHeapId = + heap_create_with_catalog + (lovHeapName, PG_BITMAPINDEX_NAMESPACE, + rel->rd_rel->reltablespace, InvalidOid, + rel->rd_rel->relowner, tupDesc, RELKIND_RELATION, + rel->rd_rel->relisshared, false, 1, ONCOMMIT_NOOP, + false, true); + + /* + * We must bump the command counter to make the newly-created relation + * tuple visible for opening. + */ + CommandCounterIncrement(); + + objAddr.classId = RelationRelationId; + objAddr.objectId = *lovHeapId; + objAddr.objectSubId = 0 ; + + referenced.classId = RelationRelationId; + referenced.objectId = RelationGetRelid(rel); + referenced.objectSubId = 0; + + recordDependencyOn(&objAddr, &referenced, DEPENDENCY_INTERNAL); + + /* + * create a btree index on the newly-created heap. + * The key includes all attributes to be indexed in this bitmap index. + */ + numOfIndexAttrs = tupDesc->natts - 2; + indexInfo = makeNode(IndexInfo); + indexInfo->ii_NumIndexAttrs = numOfIndexAttrs; + indexInfo->ii_Expressions = NIL; + indexInfo->ii_ExpressionsState = NIL; + indexInfo->ii_Predicate = make_ands_implicit(NULL); + indexInfo->ii_PredicateState = NIL; + indexInfo->ii_Unique = true; + + classObjectId = (Oid *) palloc(numOfIndexAttrs * sizeof(Oid)); + for (attno = 0; attno < numOfIndexAttrs; attno++) + { + indexInfo->ii_KeyAttrNumbers[attno] = attno + 1; + classObjectId[attno] = GetDefaultOpClass(tupDesc->attrs[attno]->atttypid, + accessMethodId); + } + + *lovIndexId = index_create(*lovHeapId, lovIndexName, InvalidOid, + indexInfo, accessMethodId, + rel->rd_rel->reltablespace, + classObjectId, 0, false, false, true, + false); + + + objAddr.classId = RelationRelationId; + objAddr.objectId = *lovIndexId; + objAddr.objectSubId = 0 ; + + recordDependencyOn(&objAddr, &referenced, DEPENDENCY_INTERNAL); + } + + /* + * _bitmap_create_lov_heapTupleDesc() -- create the new heap tuple descriptor. + */ + + TupleDesc + _bitmap_create_lov_heapTupleDesc(Relation rel) + { + TupleDesc tupDesc; + TupleDesc oldTupDesc; + AttrNumber attnum; + + int numOfAttrs; + + oldTupDesc = RelationGetDescr(rel); + numOfAttrs = oldTupDesc->natts + 2; + + tupDesc = CreateTemplateTupleDesc(numOfAttrs, false); + + for (attnum = 1; attnum <= oldTupDesc->natts; attnum++) + { + /* copy the attribute to be indexed. */ + memcpy(tupDesc->attrs[attnum-1], oldTupDesc->attrs[attnum-1], + ATTRIBUTE_TUPLE_SIZE); + (tupDesc->attrs[attnum-1])->attnum = attnum; + } + + /* the block number */ + TupleDescInitEntry(tupDesc, attnum, "blockNumber", INT4OID, -1, 0); + + attnum++; + + /* the offset number */ + TupleDescInitEntry(tupDesc, attnum, "offsetNumber", INT4OID, -1, 0); + + return tupDesc; + } + + /* + * _bitmap_open_lov_heapandindex() -- open the heap relation and the btree + * index for LOV. + */ + + /* XXX: turn this into a macro */ + void + _bitmap_open_lov_heapandindex(Relation rel, BMMetaPage metapage, + Relation *lovHeapP, Relation *lovIndexP, + LOCKMODE lockMode) + { + *lovHeapP = heap_open(metapage->bm_lov_heapId, lockMode); + *lovIndexP = index_open(metapage->bm_lov_indexId, lockMode); + } + + /* + * _bitmap_insert_lov() -- insert a new data into the given heap and index. + */ + void + _bitmap_insert_lov(Relation lovHeap, Relation lovIndex, + Datum* datum, bool* nulls) + { + TupleDesc tupDesc; + HeapTuple tuple; + bool result; + + Datum* indexDatum; + bool* indexNulls; + + tupDesc = RelationGetDescr(lovHeap); + + /* insert this tuple into the heap */ + tuple = heap_form_tuple(tupDesc, datum, nulls); + simple_heap_insert(lovHeap, tuple); + + /* insert a new tuple into the index */ + indexDatum = palloc0((tupDesc->natts-2) * sizeof(Datum)); + indexNulls = palloc0((tupDesc->natts-2) * sizeof(bool)); + memcpy(indexDatum, datum, (tupDesc->natts-2) * sizeof(Datum)); + memcpy(indexNulls, nulls, (tupDesc->natts-2) * sizeof(bool)); + result = + index_insert(lovIndex, indexDatum, indexNulls, + &(tuple->t_self), lovHeap, true); + + pfree(indexDatum); + pfree(indexNulls); + Assert(result); + + heap_freetuple(tuple); + } + + + /* + * _bitmap_close_lov_heapandindex() -- close the heap and the index. + */ + void + _bitmap_close_lov_heapandindex + (Relation lovHeap, Relation lovIndex, LOCKMODE lockMode) + { + heap_close(lovHeap, lockMode); + index_close(lovIndex, lockMode); + } + + /* + * _bitmap_findvalue() -- find a row in a given heap using + * a given index that satisfies the given scan key. + * + * If this value exists, this function returns true. Otherwise, + * returns false. + * + * If this value exists in the heap, this function also returns + * the block number and the offset number that are stored in the same + * row with this value. This block number and the offset number + * are for the LOV item that points the bitmap vector for this value. + */ + bool + _bitmap_findvalue(Relation lovHeap, Relation lovIndex, + ScanKey scanKey, IndexScanDesc scanDesc, + BlockNumber *lovBlock, bool *blockNull, + OffsetNumber *lovOffset, bool *offsetNull) + { + TupleDesc tupDesc; + HeapTuple tuple; + bool found = false; + + tupDesc = RelationGetDescr(lovIndex); + + tuple = index_getnext(scanDesc, ForwardScanDirection); + + if (tuple != NULL) + { + TupleDesc heapTupDesc; + + found = true; + + heapTupDesc = RelationGetDescr(lovHeap); + + *lovBlock = + DatumGetInt32(heap_getattr(tuple, tupDesc->natts+1, + heapTupDesc, blockNull)); + *lovOffset = + DatumGetInt16(heap_getattr(tuple, tupDesc->natts+2, + heapTupDesc, offsetNull)); + } + + return found; + } + + /* + * _bitmap_insert_lov_block_number() -- insert a new LOV block number + * into a given array. + * + * If the given array does not have enough space for this new block number, + * we increase the array size by BM_NUM_LOV_BLOCKS. + * + * This function assumes that the given array is sorted by the block + * numbers. + */ + + /* XXX: this should be moved to bitmapinsert.c and made static */ + void + _bitmap_insert_lov_block_number(BlockNumber lovBlock, + BlockNumber** lovBlocks, + uint16* numLovBlocks, + uint16* maxNumLovBlocks) + { + /* If this is not a real new block number, we do nothing. */ + if ((*numLovBlocks > 0) && (lovBlock == ((*lovBlocks)[*numLovBlocks-1]))) + return; + + /* + * If there is not enough space, we need to allocate a bigger + * array to store all these block numbers. + */ + if (*numLovBlocks >= *maxNumLovBlocks) + { + BlockNumber* newLovBlock = (BlockNumber*) + palloc0(((*maxNumLovBlocks)+BM_NUM_LOV_BLOCKS)* + sizeof(BlockNumber)); + memcpy(newLovBlock, *lovBlocks, + (*maxNumLovBlocks)*sizeof(BlockNumber)); + pfree(*lovBlocks); + *lovBlocks = newLovBlock; + *maxNumLovBlocks += BM_NUM_LOV_BLOCKS; + } + + (*lovBlocks)[*numLovBlocks] = lovBlock; + (*numLovBlocks)++; + } + + /* + * _bitmap_find_lov_index() -- find the index position for a given lov + * block number in a given array. + * + * This function assumes that the given array is sorted by the block numbers. + */ + uint16 + _bitmap_find_lov_index(BlockNumber lovBlock, BlockNumber* lovBlocks, + uint16 numLovBlocks) + { + int16 start, end, mid; + + /* Since the given array is sorted, we use binary search here. */ + start = 0; + end = numLovBlocks-1; + mid = start + (end-start)/2; + + while (start < end) + { + if (lovBlocks[mid] > lovBlock) + end = mid - 1; + else if (lovBlocks[mid] < lovBlock) + start = mid + 1; + else + break; + mid = start + (end-start)/2; + } + + Assert((mid >= start) && (mid <= end) && (lovBlocks[mid] == lovBlock)); + return mid; + } diff -Ncpr pgsql.head/src/backend/access/bitmap/bitmap.c bmdist/src/backend/access/bitmap/bitmap.c *** pgsql.head/src/backend/access/bitmap/bitmap.c 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/access/bitmap/bitmap.c 2006-08-01 13:18:57.478088816 +1000 *************** *** 0 **** --- 1,595 ---- + /*------------------------------------------------------------------------- + * + * bitmap.c + * Implementation of the Hybrid Run-Length (HRL) on-disk bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + * + * NOTES + * This file contains only the public interface routines. + * + *------------------------------------------------------------------------- + */ + #include "postgres.h" + + #include "access/genam.h" + #include "access/bitmap.h" + #include "access/xact.h" + #include "catalog/index.h" + #include "storage/lmgr.h" + #include "parser/parse_oper.h" + + static void bmbuildCallback(Relation index, HeapTuple htup, Datum *attdata, + bool *nulls, bool tupleIsAlive, void *state); + + + /* + * bmbuild() -- Build a new bitmap index. + */ + Datum + bmbuild(PG_FUNCTION_ARGS) + { + Relation heap = (Relation) PG_GETARG_POINTER(0); + Relation index = (Relation) PG_GETARG_POINTER(1); + IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); + double reltuples; + BMBuildState bmstate; + IndexBuildResult *result; + TupleDesc tupDesc; + + /* We expect this to be called exactly once. */ + if (RelationGetNumberOfBlocks(index) != 0) + ereport (ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" already contains data", + RelationGetRelationName(index)))); + + tupDesc = RelationGetDescr(index); + + + /* XXX: is this correct? Maybe it should be an assertion? */ + if (tupDesc->natts <= 0) + PG_RETURN_VOID(); + + /* disable multi-column index support for now. */ + if (tupDesc->natts > 1) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"bitmap\" does not support " + "multicolumn indexes")) + ); + + /* initialize the bitmap index. */ + _bitmap_init(index); + + /* initialize the build state. */ + _bitmap_init_buildstate(index, &bmstate); + + /* do the heap scan */ + reltuples = IndexBuildHeapScan(heap, index, indexInfo, + bmbuildCallback, (void*)&bmstate); + + /* clean up the build state */ + _bitmap_cleanup_buildstate(index, &bmstate); + + /* return statistics */ + result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); + + result->heap_tuples = reltuples; + result->index_tuples = bmstate.ituples; + + PG_RETURN_POINTER(result); + } + + + /* + * bminsert() -- insert an index tuple into a bitmap index. + */ + Datum + bminsert(PG_FUNCTION_ARGS) + { + Relation rel = (Relation) PG_GETARG_POINTER(0); + Datum *datum = (Datum *) PG_GETARG_POINTER(1); + bool *nulls = (bool *) PG_GETARG_POINTER(2); + ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); + + _bitmap_doinsert(rel, *ht_ctid, datum, nulls); + + PG_RETURN_BOOL(true); + } + + /* + * bmgettuple() -- return the next tuple in a scan. + */ + Datum + bmgettuple(PG_FUNCTION_ARGS) + { + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); + + bool res; + + /* + * If we have already begun our scan, continue in the same direction. + * Otherwise, start up the scan. + */ + if (ItemPointerIsValid(&(scan->currentItemData))) + res = _bitmap_next(scan, dir); + else + res = _bitmap_first(scan, dir); + + PG_RETURN_BOOL(res); + } + + /* + * bmgetmulti() -- return multiple tuples at once in a scan. + */ + Datum + bmgetmulti(PG_FUNCTION_ARGS) + { + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); + int32 max_tids = PG_GETARG_INT32(2); + int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); + + bool res = false; + int32 ntids = 0; + + while (ntids < max_tids) + { + if (ItemPointerIsValid(&(scan->currentItemData))) + res = _bitmap_next(scan, ForwardScanDirection); + else + res = _bitmap_first(scan, ForwardScanDirection); + + if (!res) + break; + + tids[ntids] = scan->xs_ctup.t_self; + ntids++; + } + + *returned_tids = ntids; + PG_RETURN_BOOL(res); + } + + /* + * bmbeginscan() -- start a scan on the bitmap index. + */ + Datum + bmbeginscan(PG_FUNCTION_ARGS) + { + Relation rel = (Relation) PG_GETARG_POINTER(0); + int nkeys = PG_GETARG_INT32(1); + ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); + IndexScanDesc scan; + + /* get the scan */ + scan = RelationGetIndexScan(rel, nkeys, scankey); + + PG_RETURN_POINTER(scan); + } + + /* + * bmrescan() -- restart a scan on the bitmap index. + */ + Datum + bmrescan(PG_FUNCTION_ARGS) + { + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); + BMScanOpaque so = (BMScanOpaque) scan->opaque; + BMBitmapScanPosition bmScanPos; + uint32 keyNo; + + /* so will be NULL if we were called via index_rescan() */ + if (so == NULL) + { + so = (BMScanOpaque) palloc(sizeof(BMScanOpaqueData)); + so->bm_currPos = NULL; + so->bm_markPos = NULL; + scan->opaque = so; + } + + if (so->bm_currPos != NULL) + { + /* + * release the buffers that have been stored for each related + * bitmap vector. + */ + bmScanPos = so->bm_currPos->bm_bitmapScanPos; + + for (keyNo=0; keyNobm_currPos->bm_numBitmapVectors; keyNo++) + { + if (BufferIsValid((bmScanPos[keyNo]).bm_lovBuffer)) + ReleaseBuffer((bmScanPos[keyNo]).bm_lovBuffer); + + _bitmap_cleanup_batchwords((bmScanPos[keyNo]).bm_batchWords); + + if (bmScanPos[keyNo].bm_batchWords != NULL) + pfree((bmScanPos[keyNo]).bm_batchWords); + } + + pfree(bmScanPos); + pfree(so->bm_currPos); + so->bm_currPos = NULL; + } + + if (so->bm_markPos != NULL) + { + bmScanPos = so->bm_markPos->bm_bitmapScanPos; + + for (keyNo=0; keyNobm_currPos->bm_numBitmapVectors; keyNo++) + { + if (BufferIsValid((bmScanPos[keyNo]).bm_lovBuffer)) + ReleaseBuffer((bmScanPos[keyNo]).bm_lovBuffer); + + _bitmap_cleanup_batchwords((bmScanPos[keyNo]).bm_batchWords); + + if (bmScanPos[keyNo].bm_batchWords != NULL) + pfree((bmScanPos[keyNo]).bm_batchWords); + } + + pfree(bmScanPos); + pfree(so->bm_markPos); + so->bm_markPos = NULL; + } + + /* reset the scan key */ + if (scankey && scan->numberOfKeys > 0) + memmove(scan->keyData, scankey, + scan->numberOfKeys * sizeof(ScanKeyData)); + + PG_RETURN_VOID(); + } + + /* + * bmendscan() -- close a scan. + */ + Datum + bmendscan(PG_FUNCTION_ARGS) + { + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + BMScanOpaque so = (BMScanOpaque) scan->opaque; + + BMBitmapScanPosition bmScanPos; + uint32 keyNo; + + /* free the space */ + /* XXX: this should be pushed into a seperate routine as it is duplicated + * from above */ + if (so->bm_currPos != NULL) + { + /* release the buffers that have been stored for each related bitmap */ + bmScanPos = so->bm_currPos->bm_bitmapScanPos; + + for (keyNo=0; keyNobm_currPos->bm_numBitmapVectors; keyNo++) + { + if (BufferIsValid((bmScanPos[keyNo]).bm_lovBuffer)) + ReleaseBuffer((bmScanPos[keyNo]).bm_lovBuffer); + + _bitmap_cleanup_batchwords((bmScanPos[keyNo]).bm_batchWords); + if (bmScanPos[keyNo].bm_batchWords != NULL) + pfree((bmScanPos[keyNo]).bm_batchWords); + } + + pfree(bmScanPos); + pfree(so->bm_currPos); + so->bm_currPos = NULL; + } + + if (so->bm_markPos != NULL) + { + bmScanPos = so->bm_markPos->bm_bitmapScanPos; + + for (keyNo=0; keyNobm_currPos->bm_numBitmapVectors; keyNo++) + { + if (BufferIsValid((bmScanPos[keyNo]).bm_lovBuffer)) + ReleaseBuffer((bmScanPos[keyNo]).bm_lovBuffer); + _bitmap_cleanup_batchwords((bmScanPos[keyNo]).bm_batchWords); + if (bmScanPos[keyNo].bm_batchWords != NULL) + pfree((bmScanPos[keyNo]).bm_batchWords); + } + + pfree(bmScanPos); + pfree(so->bm_markPos); + so->bm_markPos = NULL; + } + + pfree(so); + + scan->opaque = NULL; + + if (ItemPointerIsValid(&(scan->currentItemData))) + ItemPointerSetInvalid(&(scan->currentItemData)); + if (ItemPointerIsValid(&(scan->currentMarkData))) + ItemPointerSetInvalid(&(scan->currentMarkData)); + + + PG_RETURN_VOID(); + } + + /* + * bmmarkpos() -- save the current scan position. + */ + Datum + bmmarkpos(PG_FUNCTION_ARGS) + { + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + BMScanOpaque so = (BMScanOpaque) scan->opaque; + + BMBitmapScanPosition bmScanPos; + uint32 vectorNo; + + /* free the space */ + if (ItemPointerIsValid(&(scan->currentMarkData))) + { + /* + * release the buffers that have been stored for each + * related bitmap. + */ + bmScanPos = so->bm_markPos->bm_bitmapScanPos; + + for (vectorNo=0; vectorNobm_markPos->bm_numBitmapVectors; + vectorNo++) + { + if (BufferIsValid((bmScanPos[vectorNo]).bm_lovBuffer)) + { + ReleaseBuffer((bmScanPos[vectorNo]).bm_lovBuffer); + (bmScanPos[vectorNo]).bm_lovBuffer = InvalidBuffer; + } + } + + ItemPointerSetInvalid(&(scan->currentMarkData)); + } + + if (ItemPointerIsValid(&(scan->currentItemData))) + { + uint32 size = sizeof(BMScanPositionData); + + + /* set the mark position */ + if (so->bm_markPos == NULL) + { + so->bm_markPos = (BMScanPosition) palloc(size); + } + + bmScanPos = so->bm_currPos->bm_bitmapScanPos; + + for (vectorNo=0; vectorNobm_currPos->bm_numBitmapVectors; + vectorNo++) + { + if (BufferIsValid((bmScanPos[vectorNo]).bm_lovBuffer)) + IncrBufferRefCount((bmScanPos[vectorNo]).bm_lovBuffer); + } + + memcpy(so->bm_markPos->bm_bitmapScanPos, bmScanPos, + so->bm_currPos->bm_numBitmapVectors * + sizeof(BMBitmapScanPositionData)); + memcpy(so->bm_markPos, so->bm_currPos, size); + + scan->currentMarkData = scan->currentItemData; + } + + PG_RETURN_VOID(); + } + + /* + * bmrestrpos() -- restore a scan to the last saved position. + */ + Datum + bmrestrpos(PG_FUNCTION_ARGS) + { + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + BMScanOpaque so = (BMScanOpaque) scan->opaque; + + BMBitmapScanPosition bmScanPos; + uint32 vectorNo; + + /* free space */ + if (ItemPointerIsValid(&(scan->currentItemData))) + { + /* release the buffers that have been stored for each related bitmap.*/ + bmScanPos = so->bm_currPos->bm_bitmapScanPos; + + for (vectorNo=0; vectorNobm_markPos->bm_numBitmapVectors; + vectorNo++) + { + if (BufferIsValid((bmScanPos[vectorNo]).bm_lovBuffer)) + { + ReleaseBuffer((bmScanPos[vectorNo]).bm_lovBuffer); + (bmScanPos[vectorNo]).bm_lovBuffer = InvalidBuffer; + } + } + + ItemPointerSetInvalid(&(scan->currentItemData)); + } + + if (ItemPointerIsValid(&(scan->currentMarkData))) + { + uint32 size = sizeof(BMScanPositionData); + + /* set the current position */ + if (so->bm_currPos == NULL) + { + so->bm_currPos = (BMScanPosition) palloc(size); + } + + bmScanPos = so->bm_markPos->bm_bitmapScanPos; + + for (vectorNo=0; vectorNobm_currPos->bm_numBitmapVectors; + vectorNo++) + { + if (BufferIsValid((bmScanPos[vectorNo]).bm_lovBuffer)) + IncrBufferRefCount((bmScanPos[vectorNo]).bm_lovBuffer); + } + + memcpy(so->bm_currPos->bm_bitmapScanPos, bmScanPos, + so->bm_markPos->bm_numBitmapVectors * + sizeof(BMBitmapScanPositionData)); + memcpy(so->bm_currPos, so->bm_markPos, size); + scan->currentItemData = scan->currentMarkData; + } + + PG_RETURN_VOID(); + } + + /* + * bmbulkdelete() -- bulk delete index entries + * + * Re-index is performed before retrieving the number of tuples + * indexed in this index. + */ + /* XXX: is reindexing really the only option? */ + Datum + bmbulkdelete(PG_FUNCTION_ARGS) + { + IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); + Relation rel = info->index; + IndexBulkDeleteResult* volatile result = + (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); + double numTuples; + bool needRebuild = false; + Buffer metabuf; + BMMetaPage metapage; + + /* allocate stats if first time through, else re-use existing struct */ + if (result == NULL) + result = (IndexBulkDeleteResult *) + palloc0(sizeof(IndexBulkDeleteResult)); + + if (!info->vacuum_full) + { + /* obtain the indicator if this index needs to be re-built. */ + metabuf = _bitmap_getbuf(rel, BM_METAPAGE, BM_READ); + metapage = (BMMetaPage)BufferGetPage(metabuf); + needRebuild = metapage->bm_need_rebuilt; + _bitmap_relbuf(metabuf); + } + + if (needRebuild || info->vacuum_full) + { + reindex_index(RelationGetRelid(rel)); + CommandCounterIncrement(); + } + + /* obtain the number of tuples from the index */ + metabuf = _bitmap_getbuf(rel, BM_METAPAGE, BM_READ); + metapage = (BMMetaPage)BufferGetPage(metabuf); + numTuples = metapage->bm_num_tuples; + _bitmap_relbuf(metabuf); + + result = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + result->num_pages = RelationGetNumberOfBlocks(rel); + result->num_index_tuples = numTuples; + result->tuples_removed = 0; + + PG_RETURN_POINTER(result); + } + + /* + * bmvacuumcleanup() -- post-vacuum cleanup. + * + * We do nothing useful here. + */ + Datum + bmvacuumcleanup(PG_FUNCTION_ARGS) + { + IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); + Relation rel = info->index; + IndexBulkDeleteResult *stats = + (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); + + if(stats == NULL) + stats = (IndexBulkDeleteResult *)palloc0(sizeof(IndexBulkDeleteResult)); + + /* update statistics */ + stats->num_pages = RelationGetNumberOfBlocks(rel); + stats->pages_deleted = 0; + stats->pages_free = 0; + /* XXX: dodgy hack to shutup index_scan() and vacuum_index() */ + stats->num_index_tuples = info->num_heap_tuples; + + PG_RETURN_POINTER(stats); + } + + /* + * bmgetbitmapwords() -- return a given number of bitmap words in a scan. + */ + Datum + bmgetbitmapwords(PG_FUNCTION_ARGS) + { + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + uint32 maxNumOfBitmapWords = PG_GETARG_UINT32(1); + uint32 *returnedNumOfBitmapWords = (uint32 *) PG_GETARG_POINTER(2); + BM_HRL_WORD *bitmapHeaderWords = (BM_HRL_WORD *) PG_GETARG_POINTER(3); + BM_HRL_WORD *bitmapContentWords = (BM_HRL_WORD *) PG_GETARG_POINTER(4); + bool res = false; + + Assert(maxNumOfBitmapWords > 0); + Assert((maxNumOfBitmapWords % BM_HRL_WORD_SIZE) == 0); + Assert((maxNumOfBitmapWords % BM_NUM_OF_HRL_WORDS_PER_PAGE) == 0); + + *returnedNumOfBitmapWords = 0; + while (*returnedNumOfBitmapWords < maxNumOfBitmapWords) + { + BMScanPosition scanPos; + int32 numOfWords = 0; + + if (((BMScanOpaque)scan->opaque)->bm_currPos != NULL) + res = _bitmap_nextblockwords(scan, ForwardScanDirection); + else + res = _bitmap_firstblockwords(scan, ForwardScanDirection); + + if (!res) + break; + + scanPos = ((BMScanOpaque) scan->opaque)->bm_currPos; + if (scanPos->bm_batchWords->numOfWords >= + (maxNumOfBitmapWords - *returnedNumOfBitmapWords)) + numOfWords = maxNumOfBitmapWords - *returnedNumOfBitmapWords; + else + numOfWords = scanPos->bm_batchWords->numOfWords; + + /* copy the content words */ + /* XXX: is this necessary? */ + memcpy(bitmapContentWords + *returnedNumOfBitmapWords, + scanPos->bm_batchWords->bitmapContentWords, + numOfWords*sizeof(BM_HRL_WORD)); + + /* copy the header words */ + memcpy(bitmapHeaderWords + + (*returnedNumOfBitmapWords)/BM_HRL_WORD_SIZE, + scanPos->bm_batchWords->bitmapHeaderWords, + (numOfWords/BM_HRL_WORD_SIZE + + ((numOfWords%BM_HRL_WORD_SIZE == 0) ? 0 : 1))* + sizeof(BM_HRL_WORD)); + + *returnedNumOfBitmapWords += numOfWords; + + _bitmap_reset_batchwords(scanPos->bm_batchWords); + + if (*returnedNumOfBitmapWords%BM_HRL_WORD_SIZE != 0) + break; + } + + if (*returnedNumOfBitmapWords > 0) + PG_RETURN_BOOL(true); + else + PG_RETURN_BOOL(false); + } + + /* + * Per-tuple callback from IndexBuildHeapScan + */ + static void + bmbuildCallback(Relation index, HeapTuple htup, Datum *attdata, + bool *nulls, bool tupleIsAlive, void *state) + { + BMBuildState *bstate = (BMBuildState *) state; + + _bitmap_buildinsert(index, htup->t_self, attdata, nulls, bstate); + bstate->ituples += 1; + } diff -Ncpr pgsql.head/src/backend/access/bitmap/bitmapcompare.c bmdist/src/backend/access/bitmap/bitmapcompare.c *** pgsql.head/src/backend/access/bitmap/bitmapcompare.c 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/access/bitmap/bitmapcompare.c 2006-08-01 13:19:12.376823864 +1000 *************** *** 0 **** --- 1,224 ---- + /*------------------------------------------------------------------------- + * + * bitmapcompare.c + * Comparison functions for bitmap access method. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + * NOTES + * + * These functions are stored in pg_amproc. For each operator class + * defined on bitmap, they compute + * + * compare(a, b): + * < 0 if a < b, + * = 0 if a == b, + * > 0 if a > b. + * + * The result is always an int32 regardless of the input datatype. + * + *------------------------------------------------------------------------- + */ + #include "postgres.h" + + #include "utils/builtins.h" + + + Datum + bmint2cmp(PG_FUNCTION_ARGS) + { + int16 a = PG_GETARG_INT16(0); + int16 b = PG_GETARG_INT16(1); + + PG_RETURN_INT32((int32) a - (int32) b); + } + + Datum + bmint4cmp(PG_FUNCTION_ARGS) + { + int32 res; + int32 a = PG_GETARG_INT32(0); + int32 b = PG_GETARG_INT32(1); + + if (a > b) + res = 1; + else if (a == b) + res = 0; + else + res = -1; + + PG_RETURN_INT32(res); + } + + Datum + bmint8cmp(PG_FUNCTION_ARGS) + { + int64 a = PG_GETARG_INT64(0); + int64 b = PG_GETARG_INT64(1); + + if (a > b) + PG_RETURN_INT32(1); + else if (a == b) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); + } + + Datum + bmint48cmp(PG_FUNCTION_ARGS) + { + int32 a = PG_GETARG_INT32(0); + int64 b = PG_GETARG_INT64(1); + + if (a > b) + PG_RETURN_INT32(1); + else if (a == b) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); + } + + Datum + bmint84cmp(PG_FUNCTION_ARGS) + { + int64 a = PG_GETARG_INT64(0); + int32 b = PG_GETARG_INT32(1); + + if (a > b) + PG_RETURN_INT32(1); + else if (a == b) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); + } + + Datum + bmint24cmp(PG_FUNCTION_ARGS) + { + int16 a = PG_GETARG_INT16(0); + int32 b = PG_GETARG_INT32(1); + + if (a > b) + PG_RETURN_INT32(1); + else if (a == b) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); + } + + Datum + bmint42cmp(PG_FUNCTION_ARGS) + { + int32 a = PG_GETARG_INT32(0); + int16 b = PG_GETARG_INT16(1); + + if (a > b) + PG_RETURN_INT32(1); + else if (a == b) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); + } + + Datum + bmint28cmp(PG_FUNCTION_ARGS) + { + int16 a = PG_GETARG_INT16(0); + int64 b = PG_GETARG_INT64(1); + + if (a > b) + PG_RETURN_INT32(1); + else if (a == b) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); + } + + Datum + bmint82cmp(PG_FUNCTION_ARGS) + { + int64 a = PG_GETARG_INT64(0); + int16 b = PG_GETARG_INT16(1); + + if (a > b) + PG_RETURN_INT32(1); + else if (a == b) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); + } + + Datum + bmboolcmp(PG_FUNCTION_ARGS) + { + bool a = PG_GETARG_BOOL(0); + bool b = PG_GETARG_BOOL(1); + + PG_RETURN_INT32((int32) a - (int32) b); + } + + Datum + bmcharcmp(PG_FUNCTION_ARGS) + { + char a = PG_GETARG_CHAR(0); + char b = PG_GETARG_CHAR(1); + + /* Be careful to compare chars as unsigned */ + PG_RETURN_INT32((int32) ((uint8) a) - (int32) ((uint8) b)); + } + + Datum + bmoidcmp(PG_FUNCTION_ARGS) + { + Oid a = PG_GETARG_OID(0); + Oid b = PG_GETARG_OID(1); + + if (a > b) + PG_RETURN_INT32(1); + else if (a == b) + PG_RETURN_INT32(0); + else + PG_RETURN_INT32(-1); + } + + Datum + bmoidvectorcmp(PG_FUNCTION_ARGS) + { + Oid *a = (Oid *) PG_GETARG_POINTER(0); + Oid *b = (Oid *) PG_GETARG_POINTER(1); + int i; + + for (i = 0; i < INDEX_MAX_KEYS; i++) + { + if (a[i] != b[i]) + { + if (a[i] > b[i]) + PG_RETURN_INT32(1); + else + PG_RETURN_INT32(-1); + } + } + PG_RETURN_INT32(0); + } + + Datum + bmnamecmp(PG_FUNCTION_ARGS) + { + Name a = PG_GETARG_NAME(0); + Name b = PG_GETARG_NAME(1); + + PG_RETURN_INT32(strncmp(NameStr(*a), NameStr(*b), NAMEDATALEN)); + } + + Datum + bmname_pattern_cmp(PG_FUNCTION_ARGS) + { + Name a = PG_GETARG_NAME(0); + Name b = PG_GETARG_NAME(1); + + PG_RETURN_INT32(memcmp(NameStr(*a), NameStr(*b), NAMEDATALEN)); + } diff -Ncpr pgsql.head/src/backend/access/bitmap/bitmapinsert.c bmdist/src/backend/access/bitmap/bitmapinsert.c *** pgsql.head/src/backend/access/bitmap/bitmapinsert.c 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/access/bitmap/bitmapinsert.c 2006-08-01 13:19:19.333766248 +1000 *************** *** 0 **** --- 1,1617 ---- + /*------------------------------------------------------------------------- + * + * bitmapinsert.c + * Tuple insertion in the on-disk bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + + #include "postgres.h" + + #include "access/genam.h" + #include "access/tupdesc.h" + #include "access/heapam.h" + #include "access/bitmap.h" + #include "parser/parse_oper.h" + #include "miscadmin.h" + + static Buffer _bitmap_get_lastbitmappagebuf(Relation rel, BMLOVItem lovItem); + static void _bitmap_movetobitmappage(Relation rel, BMLOVItem lovItem, + Buffer* lastBufferP, uint32* numWordsP); + static void _bitmap_mergewords(Relation rel, BMLOVItem lovItem, + Buffer* lastBufferP, uint32* numWordsP, + uint64 tidNumber); + static void _bitmap_insertsetbit(Relation rel, BMLOVItem lovItem, + Buffer* lastBufferP, uint32* numWordsP, + uint64 currTidNumber); + static void _bitmap_create_lovitem + (Relation rel, Buffer metabuf, uint64 currTidNumber, + TupleDesc tupDesc, Datum* attdata, bool* nulls, + Relation lovHeap, Relation lovIndex, + BlockNumber* lovBlockP, OffsetNumber* lovOffsetP); + static void _bitmap_write_tids(Relation rel, Buffer metabuf, + BlockNumber lovBlock, OffsetNumber lovOffset, + uint64* tids, uint32 numTids); + static void _bitmap_build_inserttuple(Relation rel, + uint64 currTidNumber, + ItemPointerData ht_ctid, TupleDesc tupDesc, + Datum* attdata, bool* nulls, + BMBuildState* state); + static void _bitmap_inserttuple + (Relation rel, Buffer metabuf, uint64 currTidNumber, + ItemPointerData ht_ctid, + TupleDesc tupDesc, Datum* attdata, + bool* nulls, + Relation lovHeap, Relation lovIndex, + ScanKey scanKey, IndexScanDesc scanDesc); + + static void _bitmap_updatesetbit(Relation rel, BMLOVItem lovItem, + Buffer* lastBufferP, uint64 currTidNumber, + uint32* numWordsP); + static void + _bitmap_updatesetbit_inword(BM_HRL_WORD word, uint64 updateBitLoc, + BM_HRL_WORD* words, BM_HRL_WORD* headerWordP, + uint32* numWordsP); + static void + _bitmap_updatesetbit_inpage(Relation rel, Buffer bitmapBuffer, + Buffer* nextBitmapBufferP, Buffer lastBuffer, + uint64 currTidNumber, uint64 firstTidNumber); + static uint64 + _bitmap_getnumbits(BM_HRL_WORD* contentWords, BM_HRL_WORD* headerWords, + uint64 numWords); + + static void + _bitmap_findbitmappage(Relation rel, BMLOVItem lovItem, + Buffer lastBuffer, uint64 currTidNumber, + Buffer* bitmapBufferP, uint64* firstTidNumberP); + static void + _bitmap_shift_header_bits(BM_HRL_WORD* words, + uint32 numOfBits, + uint32 maxNumOfWords, + uint32 startLoc, + uint32 numOfShiftingBits); + static void + _bitmap_insert_newwords(BM_HRL_WORD* contentWords, BM_HRL_WORD* headerWords, + uint32* numWordsP, uint32 maxNumWords, + uint32 insertPos, + BM_HRL_WORD* newContentWords, + BM_HRL_WORD newHeaderWord, + uint32 numNewWords, + BM_HRL_WORD* leftContentWords, + BM_HRL_WORD* leftHeaderWord, + uint32* numLeftWordsP); + + + /* + * _bitmap_get_lastbitmappagebuf() -- return the buffer for the last + * bitmap page that is pointed by a given LOV item. + * + * The returned buffer will hold an exclusive lock. + */ + Buffer + _bitmap_get_lastbitmappagebuf(Relation rel, BMLOVItem lovItem) + { + Buffer lastBuffer = InvalidBuffer; + + if (lovItem->bm_lov_head != InvalidBlockNumber) + lastBuffer = _bitmap_getbuf(rel, lovItem->bm_lov_tail, BM_WRITE); + + return lastBuffer; + } + + + /* + * _bitmap_movetobitmappage() -- append bm_last_compword in an LOV item + * to its associated bitmap page. + * + * This function moves bm_last_compword in a LOV item into the last + * bitmap page that is buffered in *lastBufferP. If *lastBufferP is + * not a valid buffer or there is no enough space for the new word, + * this function creates a new last bitmap page to store this new word. + * + * This function increments "*numWordsP" by 1 if the bitmap page pointed + * by "*lastBufferP". If "*lastBuffer" does not have enough space for + * a new bitmap word, set "*numWordsP" to 1. The '*numWordsP' is used + * to indicate how many new words are added to a bitmap page since last + * time we have called WAL. + */ + void + _bitmap_movetobitmappage(Relation rel, BMLOVItem lovItem, Buffer* lastBufferP, + uint32* numWordsP) + { + Page lastPage; + BMBitmapOpaque bitmapPageOpaque; + Buffer newBuffer; + BMBitmap bitmap; + + if (!BufferIsValid(*lastBufferP)) + { + Assert(lovItem->bm_lov_head == InvalidBlockNumber); + + *lastBufferP = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + lovItem->bm_lov_head = *lastBufferP; + + _bitmap_init_bitmappage(rel, *lastBufferP); + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWBITMAP, + *lastBufferP); + + lovItem->bm_lov_head = BufferGetBlockNumber(*lastBufferP); + lovItem->bm_lov_tail = lovItem->bm_lov_head; + } + + lastPage = BufferGetPage(*lastBufferP); + + bitmapPageOpaque = (BMBitmapOpaque) + PageGetSpecialPointer(lastPage); + + /* if there is no space in this page */ + if (bitmapPageOpaque->bm_hrl_words_used == BM_NUM_OF_HRL_WORDS_PER_PAGE) + { + /* create a new bitmap page, and write the old page to the disk. */ + newBuffer = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + _bitmap_init_bitmappage(rel, newBuffer); + + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWBITMAP, + newBuffer); + + lovItem->bm_lov_tail = BufferGetBlockNumber(newBuffer); + bitmapPageOpaque->bm_bitmap_next = lovItem->bm_lov_tail; + + _bitmap_log_bitmappage(rel, *lastBufferP, true, *numWordsP); + _bitmap_wrtbuf(*lastBufferP); + + *numWordsP = 0; + *lastBufferP = newBuffer; + lastPage = BufferGetPage(*lastBufferP); + bitmapPageOpaque = (BMBitmapOpaque) + PageGetSpecialPointer(lastPage); + + } + + bitmap = (BMBitmap) PageGetContents(lastPage); + + if (lovItem->bm_last_two_headerbits == 2 || + lovItem->bm_last_two_headerbits == 3) + bitmap->bm_headerWords + [(bitmapPageOpaque->bm_hrl_words_used/BM_HRL_WORD_SIZE)] |= + (1<<(BM_HRL_WORD_SIZE-1- + (bitmapPageOpaque->bm_hrl_words_used%BM_HRL_WORD_SIZE))); + bitmap->bm_contentWords[bitmapPageOpaque->bm_hrl_words_used] = + lovItem->bm_last_compword; + + bitmapPageOpaque->bm_last_tid_location = lovItem->bm_last_tid_location; + bitmapPageOpaque->bm_hrl_words_used++; + (*numWordsP) ++; + } + + /* + * _bitmap_mergewords() -- merge bm_last_word into bm_last_compword + * in a given LOV item. + * + * If bm_last_word and bm_last_compword can be compressed into one word, + * then simply update the value for bm_last_compword, and reset + * bm_last_word. + * + * If bm_last_word and bm_last_compword can not be compressed into one word, + * we append bm_last_compword into the last bitmap page, set + * bm_last_compword to bm_last_word, and reset bm_last_word. + */ + void + _bitmap_mergewords(Relation rel, BMLOVItem lovItem, + Buffer* lastBufferP, uint32* numWordsP, uint64 tidNumber) + { + bool lastWordIsFill = + (lovItem->bm_last_two_headerbits == 1 || + lovItem->bm_last_two_headerbits == 3); + + /* + * If two words are both fill word, then try to increase the + * fill length in bm_last_compword. If this fill length exceeds + * the maximum fill length, then write it out to disk, and create + * a new word for bm_last_compword. + */ + if ((lovItem->bm_last_two_headerbits == 3) && + (GET_FILL_BIT(lovItem->bm_last_compword) == + GET_FILL_BIT(lovItem->bm_last_word))) + { + BM_HRL_WORD lastCompWordFillLength = + FILL_LENGTH(lovItem->bm_last_compword); + BM_HRL_WORD lastWordFillLength = + FILL_LENGTH(lovItem->bm_last_word); + + if (lastCompWordFillLength+lastWordFillLength >= MAX_FILL_LENGTH) { + lovItem->bm_last_compword += + (MAX_FILL_LENGTH-lastCompWordFillLength); + lovItem->bm_last_word -= + (MAX_FILL_LENGTH-lastCompWordFillLength); + lovItem->bm_last_tid_location += + (MAX_FILL_LENGTH-lastCompWordFillLength)*BM_HRL_WORD_SIZE; + + _bitmap_movetobitmappage(rel, lovItem, lastBufferP, numWordsP); + lovItem->bm_last_compword = lovItem->bm_last_word; + lovItem->bm_last_tid_location += + FILL_LENGTH(lovItem->bm_last_compword)*BM_HRL_WORD_SIZE; + } + + else + { + lovItem->bm_last_compword += lastWordFillLength; + lovItem->bm_last_tid_location += + lastWordFillLength*BM_HRL_WORD_SIZE; + } + + lovItem->bm_last_two_headerbits = 2; + } + else + { + if (tidNumber != BM_HRL_WORD_SIZE) + _bitmap_movetobitmappage(rel, lovItem, lastBufferP, numWordsP); + + /* move the last word to the last complete word. */ + lovItem->bm_last_compword = lovItem->bm_last_word; + + if (lastWordIsFill) + { + lovItem->bm_last_two_headerbits = 2; + lovItem->bm_last_tid_location += + FILL_LENGTH(lovItem->bm_last_compword)*BM_HRL_WORD_SIZE; + } + else + { + lovItem->bm_last_two_headerbits = 0; + lovItem->bm_last_tid_location += BM_HRL_WORD_SIZE; + } + } + + lovItem->bm_last_word = LITERAL_ALL_ZERO; + } + + /* + * _bitmap_getnumbits() -- return the number of bits included in the given + * bitmap words. + */ + uint64 + _bitmap_getnumbits(BM_HRL_WORD* contentWords, BM_HRL_WORD* headerWords, + uint64 numWords) { + uint64 numBits = 0; + uint32 wordNo; + + for (wordNo=0; wordNobm_last_tid_location; + if (lovItem->bm_last_two_headerbits == 2) + tidLocation -= (FILL_LENGTH(lovItem->bm_last_compword)* + BM_HRL_WORD_SIZE); + else + tidLocation -= BM_HRL_WORD_SIZE; + + /* + * If currTidNumber is in either bm_last_compword or bm_last_word, + * we simply change it. Otherwise, we have to find the right page + * that contains this bit, starting from the beginning of the + * bitmap vector. + */ + if (currTidNumber > lovItem->bm_last_tid_location) /* bm_last_word */ + { + insertingPos = BM_HRL_WORD_SIZE - + ((currTidNumber-1)%BM_HRL_WORD_SIZE) - 1 ; + lovItem->bm_last_word |= (((BM_HRL_WORD)1)< tidLocation) /* bm_last_compword */ + { + /* if this is a literal word, we simply update the bit. */ + if (lovItem->bm_last_two_headerbits == 0) + { + insertingPos = BM_HRL_WORD_SIZE - + ((currTidNumber-1)%BM_HRL_WORD_SIZE) - 1; + lovItem->bm_last_compword |= (((BM_HRL_WORD)1)<bm_last_two_headerbits == 2 && + GET_FILL_BIT(lovItem->bm_last_compword) == 0) + { + BM_HRL_WORD newContentWords[3]; + BM_HRL_WORD newHeaderWord; + uint32 numNewWords; + uint32 newWordNo; + + _bitmap_updatesetbit_inword(lovItem->bm_last_compword, + currTidNumber-tidLocation-1, + newContentWords, &newHeaderWord, + &numNewWords); + + /* reset lovItem->bm_last_tid_location */ + lovItem->bm_last_tid_location = tidLocation; + + for (newWordNo=0; newWordNobm_last_compword = newContentWords[newWordNo]; + if (IS_FILL_WORD(&newHeaderWord, newWordNo)) + { + lovItem->bm_last_two_headerbits = 2; + lovItem->bm_last_tid_location += + FILL_LENGTH(lovItem->bm_last_compword)* + BM_HRL_WORD_SIZE; + } + else { + lovItem->bm_last_two_headerbits = 0; + lovItem->bm_last_tid_location += BM_HRL_WORD_SIZE; + } + + if (newWordNo != numNewWords-1) + _bitmap_movetobitmappage(rel, lovItem, + lastBufferP, numWordsP); + } + } + } + + /* + * If currTidNumber is in the middle of the bitmap vector, + * we try to find the bitmap page that contains this bit, + * and update the bit. + */ + else { + + /* find the page that contains this bit. */ + uint64 firstTidNumber = 1; + Buffer bitmapBuffer; + Page bitmapPage; + BMBitmapOpaque bitmapOpaque; + + Buffer nextBuffer = InvalidBuffer; + + _bitmap_findbitmappage(rel, lovItem, + *lastBufferP, currTidNumber, + &bitmapBuffer, &firstTidNumber); + + /* + * We may need the next page of this page when we update the bit + * in this page, because if there are enough space in the next + * page to hold the extra words generated by this update, we + * will insert them into the next page. + */ + bitmapPage = BufferGetPage(bitmapBuffer); + bitmapOpaque = (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage); + if (BlockNumberIsValid(bitmapOpaque->bm_bitmap_next)) + { + if (bitmapOpaque->bm_bitmap_next != lovItem->bm_lov_tail) + nextBuffer = + _bitmap_getbuf(rel, bitmapOpaque->bm_bitmap_next, + BM_WRITE); + else + nextBuffer = *lastBufferP; + } + + _bitmap_updatesetbit_inpage(rel, bitmapBuffer, + &nextBuffer, *lastBufferP, + currTidNumber, firstTidNumber); + + if (bitmapBuffer == *lastBufferP && + BufferIsValid(nextBuffer)) + { + _bitmap_log_bitmappage(rel, bitmapBuffer, false, + bitmapOpaque->bm_hrl_words_used); + _bitmap_wrtbuf(bitmapBuffer); + + *lastBufferP = nextBuffer; + lovItem->bm_lov_tail = BufferGetBlockNumber(nextBuffer); + } + + else if (BufferIsValid(nextBuffer)) { + _bitmap_log_bitmappage(rel, bitmapBuffer, false, + bitmapOpaque->bm_hrl_words_used); + _bitmap_wrtbuf(bitmapBuffer); + if (nextBuffer != *lastBufferP) + { + Page nextPage = BufferGetPage(nextBuffer); + BMBitmapOpaque nextBitmapOpaque = + (BMBitmapOpaque)PageGetSpecialPointer(nextPage); + + _bitmap_log_bitmappage(rel, nextBuffer, false, + nextBitmapOpaque->bm_hrl_words_used); + _bitmap_wrtbuf(nextBuffer); + } + } + } + } + + /* + * _bitmap_updatesetbit_inword() -- update the given bit to 1 in a given + * word. + * + * The given word will generate at most three new words, depending on + * the position of the given bit to be updated. Make sure that the + * array 'words' has the size of 3 when you call this function. All new + * words will be put in this array, and the final number of new words is + * stored in '*numWordsP'. The bit location 'updateBitLoc' is relative to + * the beginning of the given word, starting from 0. + * + * We assume that word is a fill zero word. + */ + void + _bitmap_updatesetbit_inword(BM_HRL_WORD word, uint64 updateBitLoc, + BM_HRL_WORD* words, BM_HRL_WORD* headerWordP, + uint32* numWordsP) + { + uint64 numBits, usedNumBits; + uint16 insertingPos; + + *numWordsP = 0; + *headerWordP = 0; + + Assert(updateBitLoc < BM_HRL_WORD_SIZE*FILL_LENGTH(word)); + + numBits = FILL_LENGTH(word) * BM_HRL_WORD_SIZE; + usedNumBits = 0; + if (updateBitLoc >= BM_HRL_WORD_SIZE) + { + words[*numWordsP] = + BM_MAKE_FILL_WORD(0, updateBitLoc/BM_HRL_WORD_SIZE); + (*numWordsP) ++; + *headerWordP |= (1<<(BM_HRL_WORD_SIZE-*numWordsP)); + usedNumBits += (updateBitLoc/BM_HRL_WORD_SIZE) * BM_HRL_WORD_SIZE; + } + + /* construct the literal word */ + insertingPos = BM_HRL_WORD_SIZE - (updateBitLoc-usedNumBits) - 1; + words[*numWordsP] = ((BM_HRL_WORD)0) | (1< usedNumBits) + { + Assert((numBits-usedNumBits)%BM_HRL_WORD_SIZE == 0); + + words[*numWordsP] = + BM_MAKE_FILL_WORD(0, (numBits-usedNumBits)/BM_HRL_WORD_SIZE); + (*numWordsP) ++; + *headerWordP |= (1<<(BM_HRL_WORD_SIZE-*numWordsP)); + } + } + + /* + * _bitmap_shift_header_bits() -- right-shift bits after 'startLoc' for + * 'numofShiftingBits' bits. + * + * These bits are stored in an array of words with the word size of + * BM_HRL_WORD_SIZE. This shift is done in-place. The maximum number of + * words in this array is given. If the shifting causes the array does not + * have enough space for all bits, the right-most overflow bits will be + * discarded. The value 'startLoc' starts with 0. + */ + void + _bitmap_shift_header_bits(BM_HRL_WORD* words, + uint32 numOfBits, + uint32 maxNumOfWords, + uint32 startLoc, + uint32 numOfShiftingBits) + { + uint32 startWordNo; + uint32 endWordNo; + uint32 wordNo; + uint32 numOfFinalShiftingBits; + BM_HRL_WORD tmpWord; + + Assert(startLoc <= numOfBits); + Assert((numOfBits-1)/BM_HRL_WORD_SIZE < maxNumOfWords); + + startWordNo = startLoc/BM_HRL_WORD_SIZE; + endWordNo = (numOfBits-1)/BM_HRL_WORD_SIZE; + + for (wordNo=endWordNo; wordNo>startWordNo; wordNo--) { + /* + * obtain the last 'numOfShiftingBits' bits in the words[wordNo], + * and store them in the high-end of a word. + */ + tmpWord = (((BM_HRL_WORD)words[wordNo])<< + (BM_HRL_WORD_SIZE-numOfShiftingBits)); + + /* right-shift the original word 'numOfShiftingBits' bits. */ + words[wordNo] = + (((BM_HRL_WORD)words[wordNo])>>numOfShiftingBits); + + /* OR those shifted bits into the next word in the array. */ + if (wordNo < maxNumOfWords-1) + words[wordNo+1] |= tmpWord; + + } + + /* obtain bits after 'startLoc'.*/ + tmpWord = ((BM_HRL_WORD)(words[startWordNo]<< + (startLoc%BM_HRL_WORD_SIZE)))>>(startLoc%BM_HRL_WORD_SIZE); + + words[startWordNo] = + ((BM_HRL_WORD)(words[startWordNo]>> + (BM_HRL_WORD_SIZE-startLoc%BM_HRL_WORD_SIZE)))<< + (BM_HRL_WORD_SIZE-startLoc%BM_HRL_WORD_SIZE); + + numOfFinalShiftingBits = numOfShiftingBits; + if (BM_HRL_WORD_SIZE-startLoc%BM_HRL_WORD_SIZE < numOfShiftingBits) + numOfFinalShiftingBits = + BM_HRL_WORD_SIZE - startLoc%BM_HRL_WORD_SIZE; + + words[startWordNo] |= (tmpWord>>numOfFinalShiftingBits); + + if (startWordNo < maxNumOfWords-1) { + tmpWord = + ((BM_HRL_WORD)(tmpWord<<(BM_HRL_WORD_SIZE-numOfFinalShiftingBits)))>> + (numOfShiftingBits-numOfFinalShiftingBits); + words[startWordNo+1] |= tmpWord; + } + } + + /* + * _bitmap_insert_newwords() -- insert some given words into an array + * of bitmap words. + * + * The new words will be inserted into the positions starting from + * 'insertPos'(>=0). The original words from 'insertPos' will be shifted + * to the right. If the given array does not have enough space to + * hold all words, the last '(*numWordsP+numNewWords-maxNumWords)' words + * will be stored in 'leftWords', for which the caller should set + * the enough space to hold these left words. '*numWordsP' will be + * set to the final total number of words in this array. + * + * The 'numNewWords' is less than or equal to BM_HRL_WORD_SIZE. + */ + void + _bitmap_insert_newwords(BM_HRL_WORD* contentWords, BM_HRL_WORD* headerWords, + uint32* numWordsP, uint32 maxNumWords, + uint32 insertPos, + BM_HRL_WORD* newContentWords, + BM_HRL_WORD newHeaderWord, + uint32 numNewWords, + BM_HRL_WORD* leftContentWords, + BM_HRL_WORD* leftHeaderWord, + uint32* numLeftWordsP) + { + int32 wordNo; + uint16 bitLoc; + + Assert(numNewWords <= BM_HRL_WORD_SIZE); + Assert(insertPos <= maxNumWords); + + *numLeftWordsP = 0; + + /* if there are no words in this page, we simply copy the new words. */ + if (*numWordsP == 0) + { + memcpy(contentWords, newContentWords, numNewWords*sizeof(BM_HRL_WORD)); + memcpy(headerWords, &newHeaderWord, sizeof(BM_HRL_WORD)); + *numWordsP = numNewWords; + + return; + } + + /* + * if insertPos is pointing to the position after the maximum position + * in this word, we simply copy the new words to leftContentWords. + */ + if (insertPos == maxNumWords) + { + memcpy(leftContentWords, newContentWords, + numNewWords*sizeof(BM_HRL_WORD)); + memcpy(leftHeaderWord, &newHeaderWord, sizeof(BM_HRL_WORD)); + *numLeftWordsP = numNewWords; + + return; + } + + Assert(*numWordsP > 0); + + if (*numWordsP + numNewWords > maxNumWords) + *numLeftWordsP = *numWordsP + numNewWords - maxNumWords; + *leftHeaderWord = 0; + + /* + * Walk from the last word in the array back to 'insertPos'. + * If the word no + numNewWords is greater than maxNumWords, + * we store these words in leftContentWords. + */ + for (wordNo=*numWordsP-1; wordNo>=0 && wordNo>=insertPos; wordNo--) + { + if (wordNo+numNewWords >= maxNumWords) + { + leftContentWords[wordNo+numNewWords-maxNumWords] = + contentWords[wordNo]; + if (IS_FILL_WORD(headerWords, wordNo)) + { + *leftHeaderWord |= + (((BM_HRL_WORD)1)<<(BM_HRL_WORD_SIZE- + (wordNo+numNewWords-maxNumWords)-1)); + headerWords[wordNo/BM_HRL_WORD_SIZE] &= + (~(((BM_HRL_WORD)1) << (BM_HRL_WORD_SIZE-1- + ((wordNo)%BM_HRL_WORD_SIZE)))); + } + } + else + contentWords[wordNo+numNewWords] = contentWords[wordNo]; + } + + /* insert new words */ + for (wordNo=0; wordNo= maxNumWords) + { + leftContentWords[insertPos+wordNo-maxNumWords] = + newContentWords[wordNo]; + if (IS_FILL_WORD(&newHeaderWord, wordNo)) + *leftHeaderWord |= + (((BM_HRL_WORD)1)<<(BM_HRL_WORD_SIZE- + (insertPos+wordNo-maxNumWords)-1)); + } + + else { + contentWords[insertPos+wordNo] = newContentWords[wordNo]; + } + } + + + + /* right-shift the bits in the header words */ + _bitmap_shift_header_bits(headerWords, + *numWordsP, + BM_MAX_NUM_OF_HEADER_WORDS, + insertPos, + numNewWords); + + /* set the newWords header bits */ + for (bitLoc=insertPos; + bitLocbm_hrl_words_used; wordNo++) + { + word = bitmap->bm_contentWords[wordNo]; + if (IS_FILL_WORD(bitmap->bm_headerWords, wordNo)) + bitNo += FILL_LENGTH(word) * BM_HRL_WORD_SIZE; + else + bitNo += BM_HRL_WORD_SIZE; + + if (firstTidNumber + bitNo - 1 >= currTidNumber) + break; /* find the word */ + } + + Assert (wordNo < bitmapOpaque->bm_hrl_words_used); + + if (!IS_FILL_WORD(bitmap->bm_headerWords, wordNo)) + { + uint16 insertingPos = BM_HRL_WORD_SIZE - + ((currTidNumber-1)%BM_HRL_WORD_SIZE) - 1; + + bitmap->bm_contentWords[wordNo] |= (1<bm_contentWords[wordNo] = newContentWords[0]; + bitmap->bm_headerWords[wordNo/BM_HRL_WORD_SIZE] &= + (~(1<<(BM_HRL_WORD_SIZE-1-wordNo%BM_HRL_WORD_SIZE))); + } + + else + { + BM_HRL_WORD leftContentWords[3]; + BM_HRL_WORD leftHeaderWord; + uint32 numLeftWords; + Buffer newBitmapBuffer = InvalidBuffer; + Page newBitmapPage; + BMBitmapOpaque newBitmapOpaque; + BMBitmap newBitmap; + BM_HRL_WORD newLeftContentWords[3]; + BM_HRL_WORD newLeftHeaderWord; + uint32 numNewLeftWords; + uint64 oldTidLocation; + + bitmap->bm_contentWords[wordNo] = newContentWords[0]; + if (currTidNumber-firstTidNumber+1<=BM_HRL_WORD_SIZE) + bitmap->bm_headerWords[wordNo/BM_HRL_WORD_SIZE] &= + (~(((BM_HRL_WORD)1<< + (BM_HRL_WORD_SIZE-1-wordNo%BM_HRL_WORD_SIZE)))); + + /* ignore the first word in newContentWords. */ + newHeaderWord = newHeaderWord << 1; + _bitmap_insert_newwords(bitmap->bm_contentWords, + bitmap->bm_headerWords, + &(bitmapOpaque->bm_hrl_words_used), + BM_NUM_OF_HRL_WORDS_PER_PAGE, + wordNo+1, + newContentWords+1, newHeaderWord, + numNewWords-1, + leftContentWords, &leftHeaderWord, + &numLeftWords); + if (numLeftWords == 0) + return; + + oldTidLocation = bitmapOpaque->bm_last_tid_location; + bitmapOpaque->bm_last_tid_location -= + _bitmap_getnumbits(leftContentWords, &leftHeaderWord, + numLeftWords); + + /* + * if this page does not have enough space for these new words, + * we look at the next page. If the next page has enough space for + * the left words, we insert them into the next page. Otherwise, + * we create a new page to hold these words. + */ + if (BufferIsValid(*nextBitmapBufferP)) + { + Page nextBitmapPage = BufferGetPage(*nextBitmapBufferP); + BMBitmapOpaque nextBitmapOpaque = + (BMBitmapOpaque)PageGetSpecialPointer(nextBitmapPage); + + if (nextBitmapOpaque->bm_hrl_words_used + numLeftWords <= + BM_NUM_OF_HRL_WORDS_PER_PAGE) + newBitmapBuffer = *nextBitmapBufferP; + } + + if (!BufferIsValid(newBitmapBuffer)) + { + newBitmapBuffer = + _bitmap_getbuf(rel, P_NEW, BM_WRITE); + _bitmap_init_bitmappage(rel, newBitmapBuffer); + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWBITMAP, + newBitmapBuffer); + + bitmapOpaque->bm_bitmap_next = + BufferGetBlockNumber(newBitmapBuffer); + } + + newBitmapPage = BufferGetPage(newBitmapBuffer); + newBitmapOpaque = + (BMBitmapOpaque)PageGetSpecialPointer(newBitmapPage); + + if (BufferIsValid(*nextBitmapBufferP) && + *nextBitmapBufferP != newBitmapBuffer) + { + newBitmapOpaque->bm_last_tid_location = oldTidLocation; + newBitmapOpaque->bm_bitmap_next = + BufferGetBlockNumber(*nextBitmapBufferP); + if (*nextBitmapBufferP != lastBuffer) + _bitmap_relbuf(*nextBitmapBufferP); + } else if (!BufferIsValid(*nextBitmapBufferP)) + newBitmapOpaque->bm_last_tid_location = oldTidLocation; + + *nextBitmapBufferP = newBitmapBuffer; + + newBitmap = (BMBitmap)PageGetContents(newBitmapPage); + + _bitmap_insert_newwords(newBitmap->bm_contentWords, + newBitmap->bm_headerWords, + &(newBitmapOpaque->bm_hrl_words_used), + BM_NUM_OF_HRL_WORDS_PER_PAGE, + 0, + leftContentWords, leftHeaderWord, + numLeftWords, + newLeftContentWords, &newLeftHeaderWord, + &numNewLeftWords); + + Assert(numNewLeftWords == 0); + } + } + } + + /* + * _bitmap_findbitmappage() -- find the bitmap page that contains + * the given tid location. + * + * We assume that this tid location is not in bm_last_compword or + * bm_last_word of its LOVItem. + */ + void + _bitmap_findbitmappage(Relation rel, BMLOVItem lovItem, + Buffer lastBuffer, uint64 currTidNumber, + Buffer* bitmapBufferP, uint64* firstTidNumberP) + { + BlockNumber nextBlockNo = lovItem->bm_lov_head; + + while (BlockNumberIsValid(nextBlockNo)) + { + Page bitmapPage; + BMBitmapOpaque bitmapOpaque; + + /* + * if this is the last page, verify if currTidNumber is in + * this page, and set returnBuffer to *lastBufferP. Note that + * we already have the write lock on this page. + */ + if (nextBlockNo == lovItem->bm_lov_tail) + { + bitmapPage = (Page)BufferGetPage(lastBuffer); + bitmapOpaque = (BMBitmapOpaque) + PageGetSpecialPointer(bitmapPage); + Assert(bitmapOpaque->bm_last_tid_location >= currTidNumber); + *bitmapBufferP = lastBuffer; + break; + } + + *bitmapBufferP = _bitmap_getbuf(rel, nextBlockNo, BM_READ); + bitmapPage = BufferGetPage(*bitmapBufferP); + bitmapOpaque = (BMBitmapOpaque) + PageGetSpecialPointer(bitmapPage); + + if (bitmapOpaque->bm_last_tid_location >= currTidNumber) + break; /* find the page */ + + (*firstTidNumberP) = bitmapOpaque->bm_last_tid_location + 1; + nextBlockNo = bitmapOpaque->bm_bitmap_next; + + _bitmap_relbuf(*bitmapBufferP); + } + } + + + /* + * _bitmap_insertsetbit() -- insert a set bit into a bitmap. + */ + void + _bitmap_insertsetbit(Relation rel, BMLOVItem lovItem, + Buffer* lastBufferP, uint32* numWordsP, + uint64 currTidNumber) + { + int32 numOfZeros; + uint16 zerosNeeded, insertingPos; + + /* + * If this is the first time to insert a set bit, then + * we have already inserted the first currTidNumber/BM_HRL_WORD_SIZE + * zeros. + */ + if (lovItem->bm_last_setbit == 0) + numOfZeros = currTidNumber%BM_HRL_WORD_SIZE; + else + { + /* + * Usually, currTidNumber is greater than lovItem->bm_last_setbit. + * However, if this is not the case, this should be called while + * doing 'vacuum full' or doing insertion after 'vacuum'. In this + * case, we try to update this bit in the corresponding bitmap + * vector. + */ + if (currTidNumber <= lovItem->bm_last_setbit) + { + /* + * Scan through the bitmap vector, and update the bit in + * currTidNumber. + */ + _bitmap_updatesetbit(rel, lovItem, lastBufferP, + currTidNumber, numWordsP); + + return; + } + numOfZeros = currTidNumber - lovItem->bm_last_setbit - 1; + } + + /* + * If there are some zeros between these two set bits, then + * we need to fill these zero bits into the bitmap. + */ + if (numOfZeros > 0){ + + /* try to fill bm_last_word */ + if (lovItem->bm_last_setbit == 0) + zerosNeeded = BM_HRL_WORD_SIZE; + else + zerosNeeded = BM_HRL_WORD_SIZE - + ((lovItem->bm_last_setbit-1)%BM_HRL_WORD_SIZE) - 1 ; + if ((zerosNeeded != 0) && (numOfZeros >= zerosNeeded)) + { + /* merge bm_last_word into bm_last_compword */ + _bitmap_mergewords (rel, lovItem, lastBufferP, numWordsP, + (lovItem->bm_last_setbit+zerosNeeded)); + + numOfZeros -= zerosNeeded; + } + + /* + * if the remaining zeros are more than BM_HRL_WORD_SIZE, + * we construct the last word to be a fill word, and merge it + * with bm_last_compword. + */ + if (numOfZeros >= BM_HRL_WORD_SIZE) + { + uint32 numOfTotalFillWords = numOfZeros/BM_HRL_WORD_SIZE; + uint32 loopNo=0; + + while (numOfTotalFillWords > 0) { + BM_HRL_WORD numOfFillWords ; + if (numOfTotalFillWords >= MAX_FILL_LENGTH) + numOfFillWords = MAX_FILL_LENGTH; + else + numOfFillWords = numOfTotalFillWords; + + lovItem->bm_last_word = + BM_MAKE_FILL_WORD(0, numOfFillWords); + lovItem->bm_last_two_headerbits |= 1; + _bitmap_mergewords (rel, lovItem, lastBufferP, numWordsP, + (lovItem->bm_last_setbit+zerosNeeded+ + loopNo*MAX_FILL_LENGTH*BM_HRL_WORD_SIZE+ + numOfFillWords*BM_HRL_WORD_SIZE)); + loopNo++; + + numOfTotalFillWords -= numOfFillWords; + numOfZeros -= numOfFillWords*BM_HRL_WORD_SIZE; + } + } + } + + Assert((numOfZeros >= 0) && (numOfZerosbm_last_word |= (1<bm_last_setbit = currTidNumber; + + if (currTidNumber%BM_HRL_WORD_SIZE == 0) + { + if (lovItem->bm_last_word == LITERAL_ALL_ZERO) + { + lovItem->bm_last_word = + BM_MAKE_FILL_WORD(0, 1); + lovItem->bm_last_two_headerbits |= 1; + } + + else if (lovItem->bm_last_word == LITERAL_ALL_ONE) + { + lovItem->bm_last_word = + BM_MAKE_FILL_WORD(1, 1); + lovItem->bm_last_two_headerbits |= 1; + } + + _bitmap_mergewords(rel, lovItem, lastBufferP, numWordsP, + currTidNumber); + } + } + + /* + * _bitmap_create_lovitem() -- create a new LOV item. + * + * Create a new LOV item and append this item into the last LOV page. + * Each LOV item is associated with one distinct value for attributes + * to be indexed. This function also inserts this distinct value along + * with this new LOV item's block number and offsetnumber into the + * auxiliary heap and its b-tree of this bitmap index. + * + * This function returns the block number and offset number of this + * new LOV item. + */ + void _bitmap_create_lovitem(Relation rel, + Buffer metabuf, + uint64 currTidNumber, + TupleDesc tupDesc, Datum* attdata, bool* nulls, + Relation lovHeap, Relation lovIndex, + BlockNumber* lovBlockP, OffsetNumber* lovOffsetP) + { + BMMetaPage metapage; + Buffer currLovBuffer; + Page currLovPage; + Datum* lovDatum; + bool* lovNulls; + OffsetNumber itemSize; + BMLOVItem lovItem; + int numOfAttrs; + + numOfAttrs = tupDesc->natts; + + /* Get the last LOV page */ + LockBuffer(metabuf, BM_READ); + metapage = (BMMetaPage) BufferGetPage(metabuf); + *lovBlockP = metapage->bm_lov_lastpage; + LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); + + currLovBuffer = _bitmap_getbuf(rel, *lovBlockP, BM_WRITE); + currLovPage = BufferGetPage(currLovBuffer); + + lovItem = _bitmap_formitem(currTidNumber); + + *lovOffsetP = OffsetNumberNext(PageGetMaxOffsetNumber(currLovPage)); + itemSize = sizeof(BMLOVItemData); + + /* + * If there is no enough space in the last LOV page for + * a new item, create a new LOV page, and update the metapage. + */ + if (itemSize > PageGetFreeSpace(currLovPage)) + { + Buffer newLovBuffer; + + /* create a new LOV page. */ + newLovBuffer = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + _bitmap_init_lovpage(rel, newLovBuffer); + + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWLOV, + newLovBuffer); + + _bitmap_relbuf(currLovBuffer); + + currLovBuffer = newLovBuffer; + currLovPage = BufferGetPage (currLovBuffer); + *lovOffsetP = OffsetNumberNext(PageGetMaxOffsetNumber(currLovPage)); + *lovBlockP = BufferGetBlockNumber(currLovBuffer); + + LockBuffer(metabuf, BM_WRITE); + metapage->bm_lov_lastpage = + BufferGetBlockNumber(currLovBuffer); + + _bitmap_log_metapage(rel, metapage); + _bitmap_wrtnorelbuf(metabuf); + LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); + } + + lovDatum = palloc0((numOfAttrs+2)*sizeof(Datum)); + lovNulls = palloc0((numOfAttrs+2)*sizeof(bool)); + memcpy(lovDatum, attdata, numOfAttrs * sizeof(Datum)); + memcpy(lovNulls, nulls, numOfAttrs * sizeof(bool)); + lovDatum[numOfAttrs] = Int32GetDatum(*lovBlockP); + lovNulls[numOfAttrs] = false; + lovDatum[numOfAttrs+1] = Int16GetDatum(*lovOffsetP); + lovNulls[numOfAttrs+1] = false; + + _bitmap_insert_lov(lovHeap, lovIndex, lovDatum, lovNulls); + + if (PageAddItem(currLovPage, (Item)lovItem, itemSize, *lovOffsetP, + LP_USED) == InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("failed to add LOV item to \"%s\"", + RelationGetRelationName(rel)))); + + + _bitmap_log_lovitem(rel, currLovBuffer, true, *lovOffsetP, lovItem); + + _bitmap_wrtbuf(currLovBuffer); + + pfree(lovItem); + pfree(lovDatum); + pfree(lovNulls); + } + + /* + * _bitmap_write_tids() -- write out all tids that are stored + * in a given array. + */ + void + _bitmap_write_tids(Relation rel, Buffer metabuf, + BlockNumber lovBlock, OffsetNumber lovOffset, + uint64* tids, uint32 numTids) + { + Buffer currLovBuffer; + Page currLovPage; + uint32 tidNo; + Buffer lastBuffer; + BMLOVItem lovItem; + uint32 numWords; + BMMetaPage metapage; + uint32 numNewTids; + + Assert(BlockNumberIsValid(lovBlock) && numTids>=1); + + currLovBuffer = + _bitmap_getbuf(rel, lovBlock, BM_WRITE); + currLovPage = BufferGetPage(currLovBuffer); + + lovItem = (BMLOVItem) + PageGetItem(currLovPage, PageGetItemId(currLovPage, lovOffset)); + + lastBuffer = _bitmap_get_lastbitmappagebuf(rel, lovItem); + + numWords = 0; + numNewTids = 0; + + for (tidNo=0; tidNobm_last_setbit < currTidNumber) + numNewTids ++; + + _bitmap_insertsetbit(rel, lovItem, &lastBuffer, + &numWords, currTidNumber); + } + + /* write lastBuffer to disk */ + if (BufferIsValid(lastBuffer)) + { + _bitmap_log_bitmappage(rel, lastBuffer, false, numWords); + _bitmap_wrtbuf(lastBuffer); + } + + _bitmap_log_lovitem(rel, currLovBuffer, false, lovOffset, lovItem); + _bitmap_wrtbuf(currLovBuffer); + + /* update the metapage */ + LockBuffer(metabuf, BM_WRITE); + metapage = (BMMetaPage)BufferGetPage(metabuf); + metapage->bm_num_tuples += numNewTids; + if (numNewTids < numTids) + metapage->bm_need_rebuilt = true; + _bitmap_log_metapage(rel, metapage); + _bitmap_wrtnorelbuf(metabuf); + LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); + } + + /* + * _bitmap_write_alltids() -- write all tids in the given buffer into disk. + */ + void + _bitmap_write_alltids(Relation rel, Buffer metabuf, + BMTidLocsBuffer* tidLocsBuffer, + uint64 tidLocsPerValue, + BlockNumber* lovBlocks) + { + uint32 index; + uint64* tidLocs = tidLocsBuffer->bm_tidLocs; + + for (index=0; indexbm_max_values; index++) + { + BlockNumber currLovBlock; + OffsetNumber currLovOffset; + uint64* currTidLocs = + tidLocs + index*(tidLocsPerValue+1); + + uint64 currNumTids = currTidLocs[0]; + + /* convert array index back to currLovBlock and currLovOffset */ + currLovBlock = lovBlocks[index/BM_MAX_LOVITEMS_PER_PAGE]; + currLovOffset = index%BM_MAX_LOVITEMS_PER_PAGE + 1; + + if (currNumTids > 0) + _bitmap_write_tids(rel, metabuf, currLovBlock, currLovOffset, + currTidLocs+1, currNumTids); + } + } + + /* + * _bitmap_build_inserttuple() -- insert a new tuple into the bitmap index + * during the bitmap index construction. + * + * This function is called during the bitmap index construction. + * + * Each new tuple has an assigned number -- currTidNumber, called a + * tid location, which represents the bit location for this tuple in + * a bitmap vector. To speed up the construction, this function does not + * write this tid location into its bitmap vector immediately. We maintain + * a buffer -- BMTidLocsBuffer to keep a fix-sized array of tid locations + * for each distinct attribute value. + * + * If this new tid location causes the corresponding tid array to overflow, + * then we write all tid locations stored in this tid array into disk first. + * This new tid location will be inserted in the first position of this + * tid array. + * + * If this tuple contains a new value, and the number of distinct values + * is beyond a maximum number, stored in the BMTidLocsBuffer (bm_max_values), + * then we write out all tid locations in this BMTidLocsBuffer, and + * re-allocate this buffer to support more distinct values -- + * bm_max_values + BM_NUM_DISTINCT_VALUES. + */ + void _bitmap_build_inserttuple(Relation rel, + uint64 currTidNumber, + ItemPointerData ht_ctid, + TupleDesc tupDesc, Datum* attdata, bool* nulls, + BMBuildState* state) + { + BlockNumber lovBlock; + OffsetNumber lovOffset; + bool blockNull, offsetNull; + uint32 arrayNo; + uint64 *currTidLocs, *tidLocs; + uint64 currNumTids; + uint64 tidLocsPerValue; + BMTidLocsBuffer* tidLocsBuffer; + int attno; + bool allNulls = true; + + tidLocsBuffer = state->bm_tidLocsBuffer; + tidLocs = tidLocsBuffer->bm_tidLocs; + + /* Check if all attributes have value of NULL. */ + for (attno = 0; attno < tupDesc->natts; attno++) { + if (!nulls[attno]) { + allNulls = false; + break; + } + } + + /* + * Search the btree to find the right bitmap vector to append + * this bit. Here, we reset the scan key and call index_rescan. + */ + for (attno = 0; attnonatts; attno++) { + ScanKey theScanKey = + (ScanKey)(((char*)state->bm_lov_scanKeys) + + attno * sizeof(ScanKeyData)); + if (nulls[attno]) + { + theScanKey->sk_flags = SK_ISNULL; + theScanKey->sk_argument = attdata[attno]; + } + else + { + theScanKey->sk_flags = 0; + theScanKey->sk_argument = attdata[attno]; + } + } + + index_rescan(state->bm_lov_scanDesc, state->bm_lov_scanKeys); + + /* if the inserting tuple has the value of NULL, then + the corresponding tid array is the first. */ + if (allNulls) + { + lovBlock = BM_LOV_STARTPAGE; + lovOffset = 1; + arrayNo = 0; + } + + /* + * search through the lov heap and index to find the LOV item which + * has the same value as the inserting tuple. If such an item is found, + * we calculate its arrayNo. + */ + else if (_bitmap_findvalue(state->bm_lov_heap, state->bm_lov_index, + state->bm_lov_scanKeys, state->bm_lov_scanDesc, + &lovBlock, &blockNull, &lovOffset, &offsetNull)) + { + uint16 lovIndex; + + Assert((!blockNull) && (!offsetNull)); + + /* + * find the index position for this lovBlock in + * state->bm_lov_blocks. + */ + lovIndex = _bitmap_find_lov_index(lovBlock, + state->bm_lov_blocks, + state->bm_num_lov_blocks); + + arrayNo = lovIndex*BM_MAX_LOVITEMS_PER_PAGE + (lovOffset-1); + } + + /* + * if the inserting tuple has a new value, then we create a new + * LOV item, and increment tidLocsBuffer->bm_max_values. + */ + else + { + _bitmap_create_lovitem(rel, state->bm_metabuf, currTidNumber, + tupDesc, attdata, nulls, + state->bm_lov_heap, state->bm_lov_index, + &lovBlock, &lovOffset); + + /* insert this new lov block number into state->bm_lov_blocks */ + _bitmap_insert_lov_block_number(lovBlock, + &(state->bm_lov_blocks), + &(state->bm_num_lov_blocks), + &(state->bm_max_num_lov_blocks)); + + arrayNo = (state->bm_num_lov_blocks-1)* + BM_MAX_LOVITEMS_PER_PAGE + + (lovOffset-1); + } + + tidLocsPerValue = BM_MAX_TIDLOCS/tidLocsBuffer->bm_max_values - 1; + + /* + * We want to make sure that tidLocsPerValue is greater than + * or equal to 1. + */ + if (tidLocsPerValue < 1) + tidLocsPerValue = 1; + + /* + * If arrayNo is greater than or equal to + * tidLocsBuffer->bm_max_values, + * then we write all tid locations into disk, and re-initialize + * tidLocsBuffer->bm_tidLocsBuffer. + */ + if (arrayNo >= tidLocsBuffer->bm_max_values) + { + _bitmap_write_alltids(rel, state->bm_metabuf, tidLocsBuffer, + tidLocsPerValue, + state->bm_lov_blocks); + + /* re-initialize the buffer */ + tidLocsBuffer->bm_max_values += BM_NUM_DISTINCT_VALUES; + memset(tidLocsBuffer->bm_tidLocs, 0, BM_MAX_TIDLOCS*sizeof(uint64)); + + tidLocsPerValue = BM_MAX_TIDLOCS/tidLocsBuffer->bm_max_values - 1; + Assert(tidLocsPerValue >= 1); + } + + /* + * we try to insert this new tid location into the corresponding array. + * If such an array is full, we will write out all tids stored in this + * array to disk. + */ + currTidLocs = tidLocs + arrayNo*(tidLocsPerValue+1); + currNumTids = currTidLocs[0]; + + if (currNumTids >= tidLocsPerValue) + { + _bitmap_write_tids(rel, state->bm_metabuf, lovBlock, lovOffset, + currTidLocs+1, currNumTids); + currTidLocs[0] = 0; + } + + /* We simply insert this new tid location into its array. */ + currTidLocs[0] ++; + currTidLocs[currTidLocs[0]] = currTidNumber; + } + + /* + * _bitmap_inserttuple() -- insert a new tuple into the bitmap index. + * + * This function finds the corresponding bitmap vector associated with + * the given attribute value, and inserts a set bit into this bitmap + * vector. Each distinct attribute value is stored as a LOV item, which + * is stored in a list of LOV pages. + * + * If there is no LOV item associated with the given attribute value, + * a new LOV item is created and appended into the last LOV page. + * + * For support the high-cardinality case for attributes to be indexed, + * we also maintain an auxiliary heap and a btree structure for all + * the distinct attribute values so that the search for the + * corresponding bitmap vector can be done faster. The heap + * contains all attributes to be indexed and 2 more attributes -- + * the block number of the offset number of the block that stores + * the corresponding LOV item. The b-tree index is on this new heap + * and the key contains all attributes to be indexed. + */ + void + _bitmap_inserttuple(Relation rel, Buffer metabuf, + uint64 currTidNumber, ItemPointerData ht_ctid, + TupleDesc tupDesc, Datum* attdata, bool* nulls, + Relation lovHeap, Relation lovIndex, + ScanKey scanKey, IndexScanDesc scanDesc) + { + BlockNumber lovBlock; + OffsetNumber lovOffset; + bool blockNull, offsetNull; + bool allNulls = true; + int attno; + + /* Check if the values of given attributes are all NULL. */ + for (attno = 0; attno < tupDesc->natts; attno++) { + if (!nulls[attno]) { + allNulls = false; + break; + } + } + + /* + * if the inserting tuple has the value NULL, then the LOV item is + * the first item in the lovBuffer. + */ + if (allNulls) + { + lovBlock = BM_LOV_STARTPAGE; + lovOffset = 1; + } + + /* + * search through the lov heap and index to find the LOV item which has + * the same value as the inserting tuple. If such an item is not found, + * then we create a new LOV item, and insert it into the lov heap and + * index. + */ + else if (!_bitmap_findvalue(lovHeap, lovIndex, + scanKey, scanDesc, + &lovBlock, &blockNull, + &lovOffset, &offsetNull)) + _bitmap_create_lovitem(rel, metabuf, currTidNumber, tupDesc, + attdata, nulls, lovHeap, lovIndex, + &lovBlock, &lovOffset); + + + /* + * Here, we have found the block number and offset number of the + * LOV item that points to the bitmap page, to which we will + * append the set bit. + */ + _bitmap_write_tids(rel, metabuf, lovBlock, lovOffset, + &currTidNumber, 1); + } + + /* + * _bitmap_buildinsert() -- insert an index tuple during index creation. + */ + void + _bitmap_buildinsert(Relation rel, ItemPointerData ht_ctid, + Datum* attdata, bool* nulls, + BMBuildState* state) + { + TupleDesc tupDesc; + uint64 tidOffset; + + Assert(ItemPointerGetOffsetNumber(&ht_ctid) <= MaxNumHeapTuples); + + tidOffset = + ((uint64)ItemPointerGetBlockNumber(&ht_ctid) * + MaxNumHeapTuples) + + ((uint64)ItemPointerGetOffsetNumber(&ht_ctid)); + + tupDesc = RelationGetDescr(rel); + + /* insert a new bit into the corresponding bitmap */ + _bitmap_build_inserttuple(rel, tidOffset, ht_ctid, + tupDesc, attdata, nulls, state); + } + + /* + * _bitmap_doinsert() -- insert an index tuple for a given tuple. + */ + void + _bitmap_doinsert(Relation rel, ItemPointerData ht_ctid, + Datum* attdata, bool* nulls) + { + uint64 tidOffset; + + TupleDesc tupDesc; + Buffer metabuf; + BMMetaPage metapage; + Relation lovHeap, lovIndex; + ScanKey scanKeys; + IndexScanDesc scanDesc; + int attno; + + tupDesc = RelationGetDescr(rel); + if (tupDesc->natts <= 0) + return ; + + Assert(ItemPointerGetOffsetNumber(&ht_ctid) <= MaxNumHeapTuples); + tidOffset = + ((uint64)ItemPointerGetBlockNumber(&ht_ctid) * + MaxNumHeapTuples) + + ((uint64)ItemPointerGetOffsetNumber(&ht_ctid)); + + /* insert a new bit into the corresponding bitmap using the HRL scheme */ + metabuf = _bitmap_getbuf(rel, BM_METAPAGE, BM_READ); + metapage = (BMMetaPage)BufferGetPage(metabuf); + _bitmap_open_lov_heapandindex(rel, metapage, + &lovHeap, &lovIndex, RowExclusiveLock); + LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); + + scanKeys = (ScanKey) palloc0(tupDesc->natts * sizeof(ScanKeyData)); + + for (attno = 0; attnonatts; attno++) + { + RegProcedure opfuncid; + ScanKey scanKey; + + opfuncid = equality_oper_funcid(tupDesc->attrs[attno]->atttypid); + scanKey = (ScanKey) + (((char*)scanKeys) + attno * sizeof(ScanKeyData)); + + ScanKeyEntryInitialize + (scanKey, SK_ISNULL, attno+1, + BTEqualStrategyNumber, InvalidOid, opfuncid, 0); + + if (nulls[attno]) + { + scanKey->sk_flags = SK_ISNULL; + scanKey->sk_argument = attdata[attno]; + } + else + { + scanKey->sk_flags = 0; + scanKey->sk_argument = attdata[attno]; + } + } + + scanDesc = index_beginscan(lovHeap, lovIndex, SnapshotAny, + tupDesc->natts, scanKeys); + + /* insert this new tuple into the bitmap index. */ + _bitmap_inserttuple(rel, metabuf, tidOffset, ht_ctid, tupDesc, attdata, + nulls, lovHeap, lovIndex, scanKeys, scanDesc); + + index_endscan(scanDesc); + _bitmap_close_lov_heapandindex(lovHeap, lovIndex, RowExclusiveLock); + + ReleaseBuffer(metabuf); + pfree(scanKeys); + } diff -Ncpr pgsql.head/src/backend/access/bitmap/bitmappages.c bmdist/src/backend/access/bitmap/bitmappages.c *** pgsql.head/src/backend/access/bitmap/bitmappages.c 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/access/bitmap/bitmappages.c 2006-08-01 13:19:25.357850448 +1000 *************** *** 0 **** --- 1,258 ---- + /*------------------------------------------------------------------------- + * + * bitmappage.c + * Bitmap index page management code for the bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL$ + *------------------------------------------------------------------------- + */ + #include "postgres.h" + + #include "access/genam.h" + #include "access/tupdesc.h" + #include "access/bitmap.h" + #include "storage/lmgr.h" + #include "parser/parse_oper.h" + #include "miscadmin.h" + + /* + * _bitmap_getbuf() -- return the buffer for the given block number and + * the access method. + */ + Buffer + _bitmap_getbuf(Relation rel, BlockNumber blkno, int access) + { + Buffer buf; + + buf = ReadBuffer(rel, blkno); + if (access != BM_NOLOCK) + LockBuffer(buf, access); + + return buf; + } + + /* + * _bitmap_wrtbuf() -- write a buffer page to disk. + * + * Release the lock and the pin held on the buffer. + */ + void + _bitmap_wrtbuf(Buffer buf) + { + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); + } + + /* + * _bitmap_wrtnorelbuf() -- write a buffer page to disk without still holding + * the pin on this page. + */ + void + _bitmap_wrtnorelbuf(Buffer buf) + { + MarkBufferDirty(buf); + } + + /* + * _bitmap_relbuf() -- release the buffer without writing. + */ + void + _bitmap_relbuf(Buffer buf) + { + UnlockReleaseBuffer(buf); + } + + /* + * _bitmap_init_lovpage -- initialize a new LOV page. + */ + void + _bitmap_init_lovpage(Relation rel, Buffer buf) + { + Page page; + + page = (Page) BufferGetPage(buf); + Assert (PageIsNew(page)); + + PageInit(page, BufferGetPageSize(buf), 0); + + } + + /* + * _bitmap_init_bitmappage() -- initialize a new page to store the bitmap. + * + * Note: This function requires an exclusive lock on the metapage. + */ + void + _bitmap_init_bitmappage(Relation rel, Buffer buf) + { + Page page; + BMBitmapOpaque opaque; + + page = (Page) BufferGetPage(buf); + Assert (PageIsNew(page)); + + PageInit(page, BufferGetPageSize(buf), sizeof(BMBitmapOpaqueData)); + + opaque = (BMBitmapOpaque) PageGetSpecialPointer(page); + opaque->bm_hrl_words_used = 0; + opaque->bm_bitmap_next = InvalidBlockNumber; + opaque->bm_last_tid_location = 0; + } + + /* + * _bitmap_init_buildstate() -- initialize the build state before building + * a bitmap index. + */ + void + _bitmap_init_buildstate(Relation index, BMBuildState* bmstate) + { + uint32 attno; + + /* initialize the build state */ + bmstate->bm_metabuf = _bitmap_getbuf(index, BM_METAPAGE, BM_WRITE); + bmstate->bm_tupDesc = RelationGetDescr(index); + + bmstate->bm_tidLocsBuffer = (BMTidLocsBuffer*) + palloc0(BM_MAX_TIDLOCS_BUFFER_SIZE); + bmstate->bm_tidLocsBuffer->bm_max_values = BM_NUM_DISTINCT_VALUES; + + bmstate->bm_num_lov_blocks = 0; + bmstate->bm_max_num_lov_blocks = BM_NUM_LOV_BLOCKS; + bmstate->bm_lov_blocks = (BlockNumber*) + palloc0(bmstate->bm_max_num_lov_blocks*sizeof(BlockNumber)); + + _bitmap_open_lov_heapandindex + (index, (BMMetaPage)BufferGetPage(bmstate->bm_metabuf), + &(bmstate->bm_lov_heap), + &(bmstate->bm_lov_index), + RowExclusiveLock); + + _bitmap_wrtnorelbuf(bmstate->bm_metabuf); + LockBuffer(bmstate->bm_metabuf, BUFFER_LOCK_UNLOCK); + + bmstate->bm_lov_scanKeys = + (ScanKey)palloc0(bmstate->bm_tupDesc->natts * sizeof(ScanKeyData)); + for (attno = 0; attno < bmstate->bm_tupDesc->natts; attno++) + { + RegProcedure opfuncid; + + opfuncid = + equality_oper_funcid + (bmstate->bm_tupDesc->attrs[attno]->atttypid); + + ScanKeyEntryInitialize + (&(bmstate->bm_lov_scanKeys[attno]), SK_ISNULL, attno+1, + BTEqualStrategyNumber, InvalidOid, opfuncid, 0); + } + + bmstate->bm_lov_scanDesc = index_beginscan(bmstate->bm_lov_heap, + bmstate->bm_lov_index, SnapshotAny, + bmstate->bm_tupDesc->natts, + bmstate->bm_lov_scanKeys); + } + + /* + * _bitmap_cleanup_buildstate() -- clean up the build state after + * inserting all rows in the heap into the bitmap index. + */ + void + _bitmap_cleanup_buildstate(Relation index, BMBuildState* bmstate) + { + /* write out remaining tids in bmstate->bm_tidLicsBuffer */ + BMTidLocsBuffer* tidLocsBuffer = bmstate->bm_tidLocsBuffer; + _bitmap_write_alltids(index, bmstate->bm_metabuf, tidLocsBuffer, + BM_MAX_TIDLOCS/tidLocsBuffer->bm_max_values - 1, + bmstate->bm_lov_blocks); + + index_endscan(bmstate->bm_lov_scanDesc); + + ReleaseBuffer(bmstate->bm_metabuf); + + _bitmap_close_lov_heapandindex + (bmstate->bm_lov_heap,bmstate->bm_lov_index, + RowExclusiveLock); + + pfree(bmstate->bm_tidLocsBuffer); + pfree(bmstate->bm_lov_blocks); + pfree(bmstate->bm_lov_scanKeys); + } + + /* + * _bitmap_init() -- initialize the bitmap index. + * + * Create the meta page, a new heap which stores the distinct values for + * the attributes to be indexed, a btree index on this new heap for searching + * those distinct values, and the first LOV page. + */ + void + _bitmap_init(Relation rel) + { + BMMetaPage metapage; + Buffer metabuf; + Page page; + Buffer buf; + BMLOVItem lovItem; + OffsetNumber newOffset; + Page currLovPage; + + /* sanity check */ + if (RelationGetNumberOfBlocks(rel) != 0) + ereport (ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("cannot initialize non-empty bitmap index \"%s\"", + RelationGetRelationName(rel)))); + + /* create the metapage */ + metabuf = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + page = BufferGetPage(metabuf); + Assert (PageIsNew(page)) ; + + /* initialize the metapage */ + PageInit(page, BufferGetPageSize(metabuf), 0); + metapage = (BMMetaPage) page; + metapage->bm_num_tuples = 0; + metapage->bm_need_rebuilt = false; + + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWMETA, metabuf); + + /* initialize the LOV metadata */ + _bitmap_create_lov_heapandindex(rel, + &(metapage->bm_lov_heapId), + &(metapage->bm_lov_indexId)); + + /* allocate the first LOV page. */ + buf = _bitmap_getbuf(rel, P_NEW, BM_WRITE); + _bitmap_init_lovpage(rel, buf); + + _bitmap_log_newpage(rel, XLOG_BITMAP_INSERT_NEWLOV, buf); + + currLovPage = BufferGetPage(buf); + + /* set the first item to support NULL value */ + lovItem = _bitmap_formitem(0); + newOffset = OffsetNumberNext(PageGetMaxOffsetNumber(currLovPage)); + + if (PageAddItem(currLovPage, (Item)lovItem, + sizeof(BMLOVItemData), newOffset, + LP_USED) == InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("failed to add LOV item to \"%s\"", + RelationGetRelationName(rel)))); + + metapage->bm_lov_lastpage = BufferGetBlockNumber(buf); + + _bitmap_log_lovitem(rel, buf, true, newOffset, lovItem); + pfree(lovItem); + + _bitmap_wrtbuf(buf); + + _bitmap_log_metapage(rel, metapage); + + /* write the metapage */ + _bitmap_wrtbuf(metabuf); + } diff -Ncpr pgsql.head/src/backend/access/bitmap/bitmapsearch.c bmdist/src/backend/access/bitmap/bitmapsearch.c *** pgsql.head/src/backend/access/bitmap/bitmapsearch.c 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/access/bitmap/bitmapsearch.c 2006-08-01 13:19:30.680041352 +1000 *************** *** 0 **** --- 1,543 ---- + /*------------------------------------------------------------------------- + * + * bitmapsearch.c + * Search routines for on-disk bitmap index access method. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + + #include "postgres.h" + + #include "access/genam.h" + #include "access/tupdesc.h" + #include "access/bitmap.h" + #include "storage/lmgr.h" + #include "parser/parse_oper.h" + + static void _bitmap_findbitmaps(IndexScanDesc scan, ScanDirection dir); + static void _bitmap_comp_next_batch_words(IndexScanDesc scan); + static void _bitmap_readWords(Relation rel, Buffer lovBuffer, + OffsetNumber lovOffset, BlockNumber* nextBlockNoP, + BM_HRL_WORD* headerWords, BM_HRL_WORD* words, + uint32* numOfWordsP, bool* readLastWords); + static void + _bitmap_init_scanpos(IndexScanDesc scan, + BMBitmapScanPosition bmScanPos, + BlockNumber lovBlock, + OffsetNumber lovOffset); + + /* + * _bitmap_first() -- find the first tuple that satisfies a given scan. + */ + bool + _bitmap_first(IndexScanDesc scan, + ScanDirection dir) + { + _bitmap_findbitmaps(scan, dir); + return _bitmap_next(scan, dir); + } + + /* + * _bitmap_next() -- return the next tuple that satisfies a given scan. + */ + bool + _bitmap_next(IndexScanDesc scan, + ScanDirection dir) + { + BMScanOpaque so ; + BMScanPosition scanPos; + uint64 nextTid; + + so = (BMScanOpaque) scan->opaque; + scanPos = so->bm_currPos; + + if (scanPos->bm_done) + return false; + + for (;;) + { + /* + * If there are no more words left from the previous scan, we + * try to compute the next batch of words. + */ + if (scanPos->bm_batchWords->numOfWords == 0 && + scanPos->bm_result.nextTidLoc >= scanPos->bm_result.numOfTids) + { + _bitmap_reset_batchwords(scanPos->bm_batchWords); + scanPos->bm_batchWords->firstTid = scanPos->bm_result.nextTid; + + _bitmap_comp_next_batch_words(scan); + + _bitmap_begin_iterate(scanPos->bm_batchWords, + &(scanPos->bm_result)); + } + + /* If we can not find more words, then this scan is over. */ + if (scanPos->bm_batchWords->numOfWords == 0 && + scanPos->bm_result.nextTidLoc >= + scanPos->bm_result.numOfTids) + return false; + + nextTid = _bitmap_findnexttid(scanPos->bm_batchWords, + &(scanPos->bm_result)); + if (nextTid == 0) + continue; + else + break; + } + + Assert((nextTid%MaxNumHeapTuples)+1 > 0); + + ItemPointerSet(&(scan->xs_ctup.t_self), + (nextTid-1)/MaxNumHeapTuples, + ((nextTid-1)%MaxNumHeapTuples)+1); + scan->currentItemData = scan->xs_ctup.t_self; + + return true; + } + + /* + * _bitmap_firstblockwords() -- find the first block of bitmap words + * in a bitmap vector for a given scan. + */ + bool + _bitmap_firstblockwords(IndexScanDesc scan, + ScanDirection dir) + { + _bitmap_findbitmaps(scan, dir); + + return _bitmap_nextblockwords(scan, dir); + } + + /* + * _bitmap_nextblockwords() -- find the next block of bitmap words + * in a bitmap vector for a given scan. + */ + bool + _bitmap_nextblockwords(IndexScanDesc scan, + ScanDirection dir) + { + BMScanOpaque so; + + so = (BMScanOpaque) scan->opaque; + + /* check if this scan if over */ + if (so->bm_currPos->bm_done) + return false; + + _bitmap_comp_next_batch_words(scan); + + return true; + } + + /* + * _bitmap_comp_next_batch_words() -- compute the next batch of bitmap words + * from a given scan position. + */ + static void + _bitmap_comp_next_batch_words(IndexScanDesc scan) + { + BMScanPosition scanPos; + BMBitmapScanPosition bmScanPos; + int vectorNo; + BMBatchWords** batches; + int numBatches; + + scanPos = ((BMScanOpaque) scan->opaque)->bm_currPos; + bmScanPos = scanPos->bm_bitmapScanPos; + + batches = (BMBatchWords**) + palloc0(scanPos->bm_numBitmapVectors * sizeof(BMBatchWords*)); + + numBatches = 0; + /* + * Obtains the next batch of words for each bitmap vector. + * Ignores those bitmap vectors that contain no new words. + */ + for (vectorNo=0; vectorNobm_numBitmapVectors; vectorNo++) + { + /* + * If there are no words left from previous scan, read the next + * batch of words. + */ + if (bmScanPos[vectorNo].bm_batchWords->numOfWords == 0 && + !(bmScanPos[vectorNo].bm_readLastWords)) { + BMBatchWords* batchWords; + + batchWords = bmScanPos[vectorNo].bm_batchWords; + _bitmap_reset_batchwords(batchWords); + _bitmap_readWords(scan->indexRelation, + bmScanPos[vectorNo].bm_lovBuffer, + bmScanPos[vectorNo].bm_lovOffset, + &(bmScanPos[vectorNo].bm_nextBlockNo), + batchWords->bitmapHeaderWords, + batchWords->bitmapContentWords, + &(batchWords->numOfWords), + &(bmScanPos[vectorNo].bm_readLastWords)); + } + + if (bmScanPos[vectorNo].bm_batchWords->numOfWords > 0) + { + batches[numBatches] = bmScanPos[vectorNo].bm_batchWords; + numBatches++; + } + } + + /* + * We handle the case where only one bitmap vector contributes to + * the scan separately with other cases. This is because + * bmScanPos->bm_batchWords and scanPos->bm_batchWords + * are the same. + */ + if (scanPos->bm_numBitmapVectors == 1) + { + if (bmScanPos->bm_batchWords->numOfWords == 0) + scanPos->bm_done = true; + pfree(batches); + return; + } + + /* + * At least two bitmap vectors contribute to this scan, we + * ORed these bitmap vectors. + */ + if (numBatches == 0) + { + scanPos->bm_done = true; + pfree(batches); + return; + } + + _bitmap_union(batches, numBatches, scanPos->bm_batchWords); + pfree(batches); + } + + /* + * _bitmap_readWords() -- read one-block of bitmap words from + * the bitmap page. + * + * If nextBlockNo is an invalid block number, then the two last words + * are stored in lovItem. Otherwise, read words from nextBlockNo. + */ + static void + _bitmap_readWords(Relation rel, Buffer lovBuffer, OffsetNumber lovOffset, + BlockNumber* nextBlockNoP, BM_HRL_WORD* headerWords, + BM_HRL_WORD* words, uint32* numOfWordsP, bool* readLastWords) + { + if (BlockNumberIsValid(*nextBlockNoP)) + { + Buffer bitmapBuffer = + _bitmap_getbuf(rel, *nextBlockNoP, BM_READ); + + Page bitmapPage; + BMBitmap bitmap; + BMBitmapOpaque bitmapOpaque; + + bitmapPage = BufferGetPage(bitmapBuffer); + + bitmap = (BMBitmap) PageGetContents(bitmapPage); + bitmapOpaque = (BMBitmapOpaque) + PageGetSpecialPointer(bitmapPage); + + *numOfWordsP = bitmapOpaque->bm_hrl_words_used; + memcpy(headerWords, bitmap->bm_headerWords, + BM_MAX_NUM_OF_HEADER_WORDS*sizeof(BM_HRL_WORD)); + memcpy(words, bitmap->bm_contentWords, + sizeof(BM_HRL_WORD)*(*numOfWordsP)); + + *nextBlockNoP = bitmapOpaque->bm_bitmap_next; + + _bitmap_relbuf(bitmapBuffer); + + *readLastWords = false; + + /* + * If this is the last bitmap page and the total number of words + * in this page is less than or equal to + * BM_NUM_OF_HRL_WORDS_PER_PAGE - 2, we read the last two words + * and append them into 'headerWords' and 'words'. + */ + /* XXX: this is messy, lets hide it in a define */ + if ((!BlockNumberIsValid(*nextBlockNoP)) && + (*numOfWordsP <= BM_NUM_OF_HRL_WORDS_PER_PAGE - 2)) + { + BM_HRL_WORD lastWords[2]; + BM_HRL_WORD lastHeaderWords; + uint32 numWords; + int offs; + + _bitmap_readWords(rel, lovBuffer, lovOffset, nextBlockNoP, + &lastHeaderWords, lastWords, &numWords, + readLastWords); + + Assert(numWords == 2); + + memcpy(words+(*numOfWordsP), lastWords, 2*sizeof(BM_HRL_WORD)); + offs = (*numOfWordsP)/BM_HRL_WORD_SIZE; + headerWords[offs] |= + (lastHeaderWords >> ((*numOfWordsP)%BM_HRL_WORD_SIZE)); + if ((*numOfWordsP) % BM_HRL_WORD_SIZE == BM_HRL_WORD_SIZE-1) + headerWords[((*numOfWordsP)+1)/BM_HRL_WORD_SIZE] |= + (lastHeaderWords << 1); + *numOfWordsP += 2; + } + } + else + { + BMLOVItem lovItem; + Page lovPage; + + LockBuffer(lovBuffer, BM_READ); + + lovPage = BufferGetPage(lovBuffer); + lovItem = (BMLOVItem) PageGetItem(lovPage, + PageGetItemId(lovPage, lovOffset)); + + if (lovItem->bm_last_compword != LITERAL_ALL_ONE) + { + *numOfWordsP = 2; + headerWords[0] = (((BM_HRL_WORD)lovItem->bm_last_two_headerbits) << + (BM_HRL_WORD_SIZE-2)); + words[0] = lovItem->bm_last_compword; + words[1] = lovItem->bm_last_word; + } + else + { + *numOfWordsP = 1; + headerWords[0] = (((BM_HRL_WORD)lovItem->bm_last_two_headerbits) << + (BM_HRL_WORD_SIZE-1)); + words[0] = lovItem->bm_last_word; + } + + LockBuffer(lovBuffer, BUFFER_LOCK_UNLOCK); + *readLastWords = true; + } + } + + /* + * _bitmap_findbitmaps() -- find the bitmap vectors that satisfy the + * index predicate. + */ + void + _bitmap_findbitmaps(IndexScanDesc scan, + ScanDirection dir) + { + BMScanOpaque so ; + BMScanPosition scanPos; + Buffer metabuf; + BMMetaPage metapage; + BlockNumber lovBlock; + OffsetNumber lovOffset; + BlockNumber* lovBlocks; + OffsetNumber* lovOffsets; + bool blockNull, offsetNull; + bool isnull = true; + int vectorNo, keyNo; + int loopNo; + + so = (BMScanOpaque) scan->opaque; + Assert(so->bm_currPos == NULL); + + /* allocate space and initialize values for so->bm_currPos */ + so->bm_currPos = (BMScanPosition) + palloc0(sizeof(BMScanPositionData)); + + scanPos = so->bm_currPos; + scanPos->bm_numBitmapVectors = 0; + scanPos->bm_done = false; + scanPos->bm_startBitmapVectorNo = 0; + MemSet(&(scanPos->bm_result), 0, sizeof(BMIterateResult)); + + metabuf = _bitmap_getbuf(scan->indexRelation, BM_METAPAGE, BM_READ); + metapage = (BMMetaPage)BufferGetPage(metabuf); + + for (keyNo = 0; keyNo < scan->numberOfKeys; keyNo++) + { + if (!((scan->keyData[keyNo]).sk_flags & SK_ISNULL)) + isnull = false; + } + + /* + * If the values for these keys are all NULL, the bitmap vector + * is the first LOV item in the LOV pages. + */ + if (isnull) + { + lovBlock = BM_LOV_STARTPAGE; + lovOffset = 1; + + scanPos->bm_bitmapScanPos = (BMBitmapScanPosition) + palloc0(sizeof(BMBitmapScanPositionData)); + + _bitmap_init_scanpos(scan, scanPos->bm_bitmapScanPos, + lovBlock, lovOffset); + scanPos->bm_numBitmapVectors = 1; + + } else { + Relation lovHeap, lovIndex; + TupleDesc indexTupDesc; + ScanKey scanKeys; + IndexScanDesc scanDesc; + + _bitmap_open_lov_heapandindex + (scan->indexRelation, metapage, + &lovHeap, &lovIndex, AccessShareLock); + + indexTupDesc = RelationGetDescr(lovIndex); + + Assert(scan->numberOfKeys <= indexTupDesc->natts); + + scanKeys = palloc0(scan->numberOfKeys * sizeof(ScanKeyData)); + /* XXX: simplify this */ + for (keyNo = 0; keyNo < scan->numberOfKeys; keyNo++) + { + RegProcedure opfuncid; + ScanKey scanKey = (ScanKey)(((char *)scanKeys) + + keyNo * sizeof(ScanKeyData)); + + opfuncid = + equality_oper_funcid(indexTupDesc->attrs[keyNo]->atttypid); + ScanKeyEntryInitialize(scanKey, SK_ISNULL, keyNo + 1, + BTEqualStrategyNumber, InvalidOid, + opfuncid, 0); + + if ((scan->keyData[keyNo]).sk_flags & SK_ISNULL) + { + scanKey->sk_flags = SK_ISNULL; + scanKey->sk_argument = scan->keyData[keyNo].sk_argument; + } + else + { + scanKey->sk_flags = 0; + scanKey->sk_argument = scan->keyData[keyNo].sk_argument; + } + } + scanDesc = index_beginscan(lovHeap, lovIndex, SnapshotAny, + scan->numberOfKeys, scanKeys); + + #define BM_NUM_BITMAPS 100 + lovBlocks = (BlockNumber*) + palloc0(BM_NUM_BITMAPS * sizeof(BlockNumber)); + lovOffsets = (OffsetNumber*) + palloc0(BM_NUM_BITMAPS * sizeof(OffsetNumber)); + loopNo = 1; + + /* + * finds all lov items for this scan through lovHeap and lovIndex. + */ + while (_bitmap_findvalue(lovHeap, lovIndex, + scanKeys, scanDesc, + &lovBlock, &blockNull, + &lovOffset, &offsetNull)) + { + /* + * If we don't have enough space in lovBlocks and lovOffsets, + * we allocate a bigger space with double of the previous + * size. + */ + if (scanPos->bm_numBitmapVectors / (loopNo*BM_NUM_BITMAPS) > 0) + { + BlockNumber* newLovBlocks = (BlockNumber*) + palloc0((loopNo+1) * BM_NUM_BITMAPS * sizeof(BlockNumber)); + OffsetNumber* newLovOffsets = (OffsetNumber*) + palloc0((loopNo+1) * BM_NUM_BITMAPS * sizeof(OffsetNumber)); + + memcpy(newLovBlocks, lovBlocks, + loopNo * BM_NUM_BITMAPS * sizeof(BlockNumber)); + memcpy(newLovOffsets, lovOffsets, + loopNo * BM_NUM_BITMAPS * sizeof(OffsetNumber)); + + pfree(lovBlocks); + pfree(lovOffsets); + + loopNo++; + + lovBlocks = newLovBlocks; + lovOffsets = newLovOffsets; + } + + lovBlocks[scanPos->bm_numBitmapVectors] = lovBlock; + lovOffsets[scanPos->bm_numBitmapVectors] = lovOffset; + scanPos->bm_numBitmapVectors++; + } + + scanPos->bm_bitmapScanPos = (BMBitmapScanPosition) + palloc0(sizeof(BMBitmapScanPositionData) * + scanPos->bm_numBitmapVectors); + for (vectorNo=0; vectorNobm_numBitmapVectors; vectorNo++) + { + BMBitmapScanPosition bmScanPos = + &(scanPos->bm_bitmapScanPos[vectorNo]); + _bitmap_init_scanpos(scan, bmScanPos, + lovBlocks[vectorNo], lovOffsets[vectorNo]); + } + + pfree(lovBlocks); + pfree(lovOffsets); + + index_endscan(scanDesc); + _bitmap_close_lov_heapandindex(lovHeap, lovIndex, AccessShareLock); + pfree(scanKeys); + } + + _bitmap_relbuf(metabuf); + + if (scanPos->bm_numBitmapVectors == 0) + { + scanPos->bm_done = true; + return; + } + + /* + * Since there is only one related bitmap vector, we share + * the result OnDiskBitmapWords with that of this bitmap vector. + */ + if (scanPos->bm_numBitmapVectors == 1) + { + scanPos->bm_batchWords = scanPos->bm_bitmapScanPos->bm_batchWords; + } else + { + scanPos->bm_batchWords = (BMBatchWords*) + palloc0(sizeof(BMBatchWords)); + _bitmap_init_batchwords(scanPos->bm_batchWords, + BM_NUM_OF_HRL_WORDS_PER_PAGE, + CurrentMemoryContext); + } + } + + /* + * _bitmap_init_scanpos() -- initialize a BMScanPosition for a given + * bitmap vector. + */ + void + _bitmap_init_scanpos(IndexScanDesc scan, + BMBitmapScanPosition bmScanPos, + BlockNumber lovBlock, + OffsetNumber lovOffset) + { + Page lovPage; + BMLOVItem lovItem; + + bmScanPos->bm_lovOffset = lovOffset; + bmScanPos->bm_lovBuffer = _bitmap_getbuf(scan->indexRelation, lovBlock, + BM_READ); + + lovPage = BufferGetPage(bmScanPos->bm_lovBuffer); + lovItem = (BMLOVItem) + PageGetItem(lovPage, PageGetItemId (lovPage, bmScanPos->bm_lovOffset)); + + bmScanPos->bm_nextBlockNo = lovItem->bm_lov_head; + bmScanPos->bm_readLastWords = false; + bmScanPos->bm_batchWords = (BMBatchWords *) palloc0(sizeof(BMBatchWords)); + _bitmap_init_batchwords(bmScanPos->bm_batchWords, + BM_NUM_OF_HRL_WORDS_PER_PAGE, + CurrentMemoryContext); + + LockBuffer(bmScanPos->bm_lovBuffer, BUFFER_LOCK_UNLOCK); + } diff -Ncpr pgsql.head/src/backend/access/bitmap/bitmaputil.c bmdist/src/backend/access/bitmap/bitmaputil.c *** pgsql.head/src/backend/access/bitmap/bitmaputil.c 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/access/bitmap/bitmaputil.c 2006-08-01 13:19:36.769115672 +1000 *************** *** 0 **** --- 1,926 ---- + /*------------------------------------------------------------------------- + * + * bitmaputil.c + * Utility routines for on-disk bitmap index access method. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + + #include "postgres.h" + + #include "access/genam.h" + #include "access/bitmap.h" + #include "access/reloptions.h" + #include "miscadmin.h" + + static void + _bitmap_findnextword(BMBatchWords* words, uint32 nextReadNo); + static void + _bitmap_resetWord(BMBatchWords *words, uint32 prevStartNo); + static uint8 _bitmap_find_bitset(BM_HRL_WORD word, uint8 lastPos); + + /* + * _bitmap_formitem() -- construct a LOV entry -- a lov item. + */ + BMLOVItem + _bitmap_formitem(uint64 currTidNumber) + { + int nbytes_bmitem; + BMLOVItem bmitem; + + nbytes_bmitem = sizeof(BMLOVItemData); + + bmitem = (BMLOVItem)palloc(nbytes_bmitem); + + bmitem->bm_lov_head = bmitem->bm_lov_tail = InvalidBlockNumber; + bmitem->bm_last_setbit = 0; + bmitem->bm_last_two_headerbits = (1<<7); + + /* fill up all existing bits with 0. */ + if (currTidNumber < BM_HRL_WORD_SIZE) + { + bmitem->bm_last_compword = LITERAL_ALL_ONE; + bmitem->bm_last_word = LITERAL_ALL_ZERO; + bmitem->bm_last_two_headerbits = 0; + bmitem->bm_last_tid_location = 0; + } + + else + { + uint32 numOfTotalFillWords; + BM_HRL_WORD numOfFillWords; + + numOfTotalFillWords = (currTidNumber-1)/BM_HRL_WORD_SIZE; + + numOfFillWords = + (numOfTotalFillWords >= MAX_FILL_LENGTH) ? MAX_FILL_LENGTH : + numOfTotalFillWords; + + bmitem->bm_last_compword = + BM_MAKE_FILL_WORD (0, numOfFillWords); + bmitem->bm_last_word = LITERAL_ALL_ZERO; + bmitem->bm_last_two_headerbits = 2; + + bmitem->bm_last_tid_location = numOfFillWords*BM_HRL_WORD_SIZE; + + /* + * If all zeros are too many to fit in one word, then + * we set bm_last_setbit so that the remaining zeros can + * be handled outside. + */ + if (numOfTotalFillWords > numOfFillWords) + bmitem->bm_last_setbit = numOfFillWords*BM_HRL_WORD_SIZE; + } + + return bmitem; + } + + /* + * _bitmap_init_batchwords() -- initialize a BMBatchWords in a given + * memory context. + * + * Allocate spaces for bitmap header words and bitmap content words. + */ + void + _bitmap_init_batchwords(BMBatchWords* words, + uint32 maxNumOfWords, + MemoryContext mcxt) + { + uint32 numOfHeaderWords; + + words->numOfWordsRead = 0; + words->nextReadNo = 1; + words->startNo = 0; + words->numOfWords = 0; + + numOfHeaderWords = + maxNumOfWords/BM_HRL_WORD_SIZE + + ((maxNumOfWords%BM_HRL_WORD_SIZE == 0) ? 0 : 1); + + words->maxNumOfWords = maxNumOfWords; + + /* Make sure that we have at least one page of words */ + Assert(words->maxNumOfWords >= BM_NUM_OF_HRL_WORDS_PER_PAGE); + + words->bitmapHeaderWords = + MemoryContextAllocZero(mcxt, + sizeof(BM_HRL_WORD)*numOfHeaderWords); + words->bitmapContentWords = + MemoryContextAllocZero(mcxt, + sizeof(BM_HRL_WORD)*words->maxNumOfWords); + } + + /* + * _bitmap_reset_batchwords() -- reset the BMBatchWords for re-use. + */ + void + _bitmap_reset_batchwords(BMBatchWords* words) + { + words->startNo = 0; + words->numOfWords = 0; + memset (words->bitmapHeaderWords, 0, + (words->maxNumOfWords/BM_HRL_WORD_SIZE + + ((words->maxNumOfWords%BM_HRL_WORD_SIZE == 0) ? 0 : 1))); + } + + /* + * _bitmap_cleanup_batchwords() -- release spaces allocated for the BMBatchWords. + */ + void _bitmap_cleanup_batchwords(BMBatchWords* words) + { + if (words == NULL) + return; + + if (words->bitmapHeaderWords) + pfree(words->bitmapHeaderWords); + if (words->bitmapContentWords) + pfree(words->bitmapContentWords); + } + + /* + * _bitmap_findnexttid() -- find the next tid location in a given batch + * of bitmap words. + */ + uint64 + _bitmap_findnexttid(BMBatchWords *words, + BMIterateResult *result) { + /* if there is not tids from previous computation, then we + try to find next set of tids. */ + if (result->nextTidLoc >= result->numOfTids) + _bitmap_findnexttids(words, result, BM_BATCH_TIDS); + + /* if find more tids, then return the first one */ + if (result->nextTidLoc < result->numOfTids) + { + result->nextTidLoc++; + return (result->nextTids[result->nextTidLoc-1]); + } + + /* no more tids */ + return 0; + } + + /* + * _bitmap_findnexttids() -- find the next set of tids from a given + * batch of bitmap words. + * + * The maximum number of tids to be found is defined in 'maxTids'. + */ + void + _bitmap_findnexttids(BMBatchWords *words, + BMIterateResult *result, + uint32 maxTids) { + bool done = false; + + result->nextTidLoc = result->numOfTids = 0; + while (words->numOfWords > 0 && + result->numOfTids < maxTids && !done) + { + uint8 oldScanPos = result->lastScanPos; + BM_HRL_WORD word = words->bitmapContentWords[result->lastScanWordNo]; + + /* if this is a new word, and a zero fill word */ + if ((oldScanPos == 0) && + ((IS_FILL_WORD(words->bitmapHeaderWords, + result->lastScanWordNo) && + (GET_FILL_BIT(word) == 0)) || + (word == 0))) + { + uint32 fillLength; + if (word == 0) + fillLength = 1; + else + fillLength = FILL_LENGTH(word); + result->nextTid += + fillLength * BM_HRL_WORD_SIZE; + + result->lastScanWordNo++; + words->numOfWords--; + result->lastScanPos = 0; + continue; + } + /* if this is a set fill word */ + else if (IS_FILL_WORD(words->bitmapHeaderWords, + result->lastScanWordNo) && + (GET_FILL_BIT(word) == 1)) + { + uint32 numOfFillWords = FILL_LENGTH(word); + uint8 bitNo; + + while ((numOfFillWords > 0) && + (result->numOfTids+BM_HRL_WORD_SIZE <= maxTids)) { + for (bitNo=0; bitNonextTids[result->numOfTids++] = + (++result->nextTid); + } + numOfFillWords--; + words->bitmapContentWords[result->lastScanWordNo]--; + } + + if (numOfFillWords == 0) { + result->lastScanWordNo++; + words->numOfWords--; + result->lastScanPos = 0; + continue; + } else { + done = true; + break; + } + } + + if(oldScanPos == 0) + oldScanPos = BM_HRL_WORD_SIZE+1; + + while (oldScanPos != 0 && result->numOfTids < maxTids) { + if (oldScanPos == BM_HRL_WORD_SIZE+1) + oldScanPos = 0; + result->lastScanPos = + _bitmap_find_bitset( + words->bitmapContentWords[result->lastScanWordNo], + oldScanPos); + + /* if we found a set bit in this word. */ + if (result->lastScanPos != 0) + { + result->nextTid += + (result->lastScanPos - oldScanPos); + result->nextTids[result->numOfTids++] = result->nextTid; + } + else + { + result->nextTid += + BM_HRL_WORD_SIZE - oldScanPos; + + /* start scanning a new word */ + words->numOfWords --; + result->lastScanWordNo ++; + result->lastScanPos = 0; + } + + oldScanPos = result->lastScanPos; + } + } + } + + /* + * _bitmap_intersect() -- intersect 'numBatches' bitmap words. + * + * All 'numBatches' bitmap words are HRL compressed. The result + * bitmap words HRL compressed, except that fill set words(1s) may + * be lossily compressed. + */ + void + _bitmap_intersect(BMBatchWords **batches, uint32 numBatches, + BMBatchWords *result) { + bool done = false; + uint32 *prevStartNos; + uint32 nextReadNo; + uint32 batchNo; + + Assert(numBatches > 0); + + prevStartNos = (uint32*)palloc0(numBatches*sizeof(uint32)); + + nextReadNo = batches[0]->nextReadNo; + + while (!done && + result->numOfWords < result->maxNumOfWords) + { + BM_HRL_WORD andWord = LITERAL_ALL_ONE; + BM_HRL_WORD word; + + bool andWordIsLiteral = true; + + /* + * We walk through the bitmap word in each list one by one + * without de-compress the bitmap words. 'nextReadNo' defines + * the position of the next word that should be read in an + * uncompressed format. + */ + for (batchNo=0; batchNonumOfWords == 0) + { + done = true; + break; + } + + Assert((batches[batchNo])->numOfWordsRead == nextReadNo-1); + + /* Here, startNo should point to the word to be read. */ + word = (batches[batchNo])->bitmapContentWords + [(batches[batchNo])->startNo]; + + if (IS_FILL_WORD((batches[batchNo])->bitmapHeaderWords, + (batches[batchNo])->startNo) && + (GET_FILL_BIT(word) == 0)) + { + (batches[batchNo])->numOfWordsRead += + FILL_LENGTH(word); + + andWord = BM_MAKE_FILL_WORD + (0, (batches[batchNo])->numOfWordsRead-nextReadNo+1); + andWordIsLiteral = false; + + nextReadNo = (batches[batchNo])->numOfWordsRead+1; + (batches[batchNo])->startNo ++; + (batches[batchNo])->numOfWords --; + break; + } + + else if (IS_FILL_WORD((batches[batchNo])->bitmapHeaderWords, + (batches[batchNo])->startNo) && + (GET_FILL_BIT(word) == 1)) { + (batches[batchNo])->numOfWordsRead ++; + + prevStartNos[batchNo] = (batches[batchNo])->startNo; + + if (FILL_LENGTH(word) == 1) + { + (batches[batchNo])->startNo ++; + (batches[batchNo])->numOfWords --; + } + + else { + (batches[batchNo])->bitmapContentWords + [(batches[batchNo])->startNo] -= 1; + } + + andWordIsLiteral = true; + } + + else if (!IS_FILL_WORD((batches[batchNo])->bitmapHeaderWords, + (batches[batchNo])->startNo)) + { + prevStartNos[batchNo] = (batches[batchNo])->startNo; + + andWord &= word; + (batches[batchNo])->numOfWordsRead ++; + (batches[batchNo])->startNo ++; + (batches[batchNo])->numOfWords --; + + andWordIsLiteral = true; + } + } + + /* Since there are not enough words in this attribute, + break this loop. */ + if (done) { + uint32 preBatchNo; + + /* reset the attributes before batchNo */ + for (preBatchNo=0; preBatchNobitmapHeaderWords[ + result->numOfWords/BM_HRL_WORD_SIZE] |= + (((BM_HRL_WORD)1)<<(BM_HRL_WORD_SIZE-1- + (result->numOfWords%BM_HRL_WORD_SIZE))); + result->bitmapContentWords[result->numOfWords] = + andWord; + result->numOfWords++; + } + + if (andWordIsLiteral) + nextReadNo++; + + if (batchNo == 1 && + (batches[batchNo])->numOfWords == 0) + done = true; + } + + /* set the nextReadNo */ + for (batchNo=0; batchNonextReadNo = nextReadNo; + + pfree(prevStartNos); + } + + /* + * _bitmap_union() -- union 'numBatches' bitmap words. + * + * All 'numBatches' bitmap words are HRL compressed. The result + * bitmap words HRL compressed, except that fill unset words(0s) may + * be lossily compressed. + */ + void + _bitmap_union(BMBatchWords **batches, uint32 numBatches, + BMBatchWords *result) { + bool done = false; + uint32 *prevStartNos; + uint32 nextReadNo; + uint32 batchNo; + + Assert (numBatches >= 0); + + if (numBatches == 0) + return; + + prevStartNos = (uint32*)palloc0(numBatches*sizeof(uint32)); + + nextReadNo = batches[0]->nextReadNo; + + while (!done && + result->numOfWords < result->maxNumOfWords) + { + BM_HRL_WORD orWord = LITERAL_ALL_ZERO; + BM_HRL_WORD word; + + bool orWordIsLiteral = true; + + for (batchNo=0; batchNonumOfWords == 0) + { + done = true; + break; + } + + Assert((batches[batchNo])->numOfWordsRead == nextReadNo-1); + + /* Here, startNo should point to the word to be read. */ + word = (batches[batchNo])->bitmapContentWords + [(batches[batchNo])->startNo]; + + if (IS_FILL_WORD((batches[batchNo])->bitmapHeaderWords, + (batches[batchNo])->startNo) && + (GET_FILL_BIT(word) == 1)) + { + (batches[batchNo])->numOfWordsRead += + FILL_LENGTH(word); + + orWord = BM_MAKE_FILL_WORD + (1, (batches[batchNo])->numOfWordsRead-nextReadNo+1); + orWordIsLiteral = false; + + nextReadNo = (batches[batchNo])->numOfWordsRead+1; + (batches[batchNo])->startNo ++; + (batches[batchNo])->numOfWords --; + break; + } + + else if (IS_FILL_WORD((batches[batchNo])->bitmapHeaderWords, + (batches[batchNo])->startNo) && + (GET_FILL_BIT(word) == 0)) { + (batches[batchNo])->numOfWordsRead ++; + + prevStartNos[batchNo] = (batches[batchNo])->startNo; + + if (FILL_LENGTH(word) == 1) + { + (batches[batchNo])->startNo ++; + (batches[batchNo])->numOfWords --; + } + + else { + (batches[batchNo])->bitmapContentWords + [(batches[batchNo])->startNo] -= 1; + } + + orWordIsLiteral = true; + } + + else if (!IS_FILL_WORD((batches[batchNo])->bitmapHeaderWords, + (batches[batchNo])->startNo)) + { + prevStartNos[batchNo] = (batches[batchNo])->startNo; + + orWord |= word; + (batches[batchNo])->numOfWordsRead ++; + (batches[batchNo])->startNo ++; + (batches[batchNo])->numOfWords --; + + orWordIsLiteral = true; + } + } + + /* Since there are not enough words in this attribute, + break this loop. */ + if (done) { + uint32 preBatchNo; + + /* reset the attributes before batchNo */ + for (preBatchNo=0; preBatchNobitmapHeaderWords[ + result->numOfWords/BM_HRL_WORD_SIZE] |= + (((BM_HRL_WORD)1)<<(BM_HRL_WORD_SIZE-1- + (result->numOfWords%BM_HRL_WORD_SIZE))); + result->bitmapContentWords[result->numOfWords] = orWord; + result->numOfWords++; + } + + if (orWordIsLiteral) + nextReadNo++; + + if (batchNo == numBatches-1 && + (batches[batchNo])->numOfWords == 0) + done = true; + } + + /* set the nextReadNo */ + for (batchNo=0; batchNonextReadNo = nextReadNo; + + pfree(prevStartNos); + } + + /* + * _bitmap_findnextword() -- Find the next word whose position is + * 'nextReadNo' in an uncompressed format. + */ + static void + _bitmap_findnextword(BMBatchWords* words, uint32 nextReadNo) + { + /* + * 'words->numOfWordsRead' defines how many un-compressed words + * have been read in this OnDiskBitmapWords. We read from + * position 'startNo', and increment 'words->numOfWordsRead' + * differently based on the type of words that are read, until + * 'words->numOfWordsRead' is equal to 'nextReadNo'. + */ + while ( (words->numOfWords > 0) && + (words->numOfWordsRead < + nextReadNo-1)) + { + BM_HRL_WORD word = words->bitmapContentWords + [words->startNo]; + + if (IS_FILL_WORD(words->bitmapHeaderWords, + words->startNo)) { + if(FILL_LENGTH(word) <= + (nextReadNo- + words->numOfWordsRead-1)) + { + words->numOfWordsRead += + FILL_LENGTH(word); + words->startNo ++; + words->numOfWords --; + } + + else { + words->bitmapContentWords + [words->startNo] -= + (nextReadNo- + words->numOfWordsRead-1); + words->numOfWordsRead = + nextReadNo-1; + } + } + else { + words->numOfWordsRead ++; + words->startNo ++; + words->numOfWords --; + } + } + } + + /* + * _bitmap_resetWord() -- Reset the read position in an BMBatchWords + * to its previous value. + * + * Reset the read position in an BMBatchWords to its previous value, + * which is given in 'prevStartNo'. Based on different type of words read, + * the actual bitmap word may need to be changed. + */ + static void + _bitmap_resetWord(BMBatchWords *words, uint32 prevStartNo) + { + if (words->startNo > prevStartNo) { + + Assert (words->startNo == prevStartNo + 1); + + words->startNo = prevStartNo; + words->numOfWords++; + } + + else { + Assert ((words->startNo == prevStartNo) && + IS_FILL_WORD(words->bitmapHeaderWords, words->startNo)); + words->bitmapContentWords[words->startNo] ++; + } + + words->numOfWordsRead--; + } + + /* + * _bitmap_find_bitset() -- find the leftmost set bit (bit=1) in the + * given word since 'lastPos', not including 'lastPos'. + * + * The leftmost bit in the given word is considered the position 1, and + * the rightmost bit is considered the position BM_HRL_WORD_SIZE. + * + * If such set bit does not exist in this word, 0 is returned. + */ + static uint8 + _bitmap_find_bitset(BM_HRL_WORD word, uint8 lastPos) + { + uint8 pos = lastPos+1; + BM_HRL_WORD leftmostBitWord; + + if (pos > BM_HRL_WORD_SIZE) + return 0; + + leftmostBitWord = (((BM_HRL_WORD)1) << (BM_HRL_WORD_SIZE - pos)); + + while ((pos<=BM_HRL_WORD_SIZE) && + ((word & leftmostBitWord) == 0)) + { + leftmostBitWord >>= 1; + pos ++; + } + + if (pos > BM_HRL_WORD_SIZE) + pos = 0; + + return pos; + } + + /* + * _bitmap_begin_iterate() -- initialize the given BMIterateResult instance. + */ + void + _bitmap_begin_iterate(BMBatchWords *words, BMIterateResult* result) + { + result->nextTid = words->firstTid; + result->lastScanPos = 0; + result->lastScanWordNo = words->startNo; + result->numOfTids = 0; + result->nextTidLoc = 0; + } + + /* + * _bitmap_log_newpage() -- log a new page. + * + * This function is called before writing a new buffer. + */ + void + _bitmap_log_newpage(Relation rel, uint8 info, Buffer buf) + { + Page page; + + page = BufferGetPage(buf); + + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!(rel->rd_istemp)) + { + xl_bm_newpage xlNewPage; + XLogRecPtr recptr; + XLogRecData rdata[1]; + + xlNewPage.bm_node = rel->rd_node; + xlNewPage.bm_new_blkno = BufferGetBlockNumber(buf); + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char*)&xlNewPage; + rdata[0].len = sizeof(xl_bm_newpage); + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BITMAP_ID, info, rdata); + + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + } + + END_CRIT_SECTION(); + } + + /* + * _bitmap_log_metapage() -- log the changes to the metapage + */ + void + _bitmap_log_metapage(Relation rel, BMMetaPage metapage) + { + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!(rel->rd_istemp)) + { + xl_bm_metapage* xlMeta; + XLogRecPtr recptr; + XLogRecData rdata[1]; + + xlMeta = (xl_bm_metapage*) + palloc(MAXALIGN(sizeof(xl_bm_metapage))); + xlMeta->bm_node = rel->rd_node; + xlMeta->bm_num_tuples = metapage->bm_num_tuples; + xlMeta->bm_lov_heapId = metapage->bm_lov_heapId; + xlMeta->bm_lov_indexId = metapage->bm_lov_indexId; + xlMeta->bm_lov_lastpage = metapage->bm_lov_lastpage; + xlMeta->bm_need_rebuilt = metapage->bm_need_rebuilt; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char*)xlMeta; + rdata[0].len = MAXALIGN(sizeof(xl_bm_metapage)); + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_META, rdata); + + PageSetLSN(metapage, recptr); + PageSetTLI(metapage, ThisTimeLineID); + pfree(xlMeta); + } + + END_CRIT_SECTION(); + } + + /* + * _bitmap_log_bitmappage() -- log the changes to a bitmap page. + * + * This function inserts the changes to a bitmap page to xlog. + * The parameter 'numWords' defines the last 'numWords' words + * in bitmapBuffer are new. If isOpaque is set, then we also + * log the information about the block number for the next + * bitmap page. + */ + void + _bitmap_log_bitmappage(Relation rel, Buffer bitmapBuffer, bool isOpaque, + uint32 numWords) + { + Page bitmapPage; + BMBitmapOpaque bitmapPageOpaque; + BMBitmap bitmap; + + bitmapPage = BufferGetPage(bitmapBuffer); + bitmapPageOpaque = (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage); + bitmap = (BMBitmap) PageGetContents(bitmapPage); + + Assert(bitmapPageOpaque->bm_hrl_words_used >= numWords); + + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!(rel->rd_istemp)) + { + xl_bm_bitmappage* xlBitmap; + XLogRecPtr recptr; + XLogRecData rdata[1]; + BM_HRL_WORD* bitmapWords; + + xlBitmap = (xl_bm_bitmappage*) + palloc(MAXALIGN(sizeof(xl_bm_bitmappage)) + + numWords*sizeof(BM_HRL_WORD)); + xlBitmap->bm_node = rel->rd_node; + xlBitmap->bm_bitmap_blkno = BufferGetBlockNumber(bitmapBuffer); + xlBitmap->bm_isOpaque = isOpaque; + + xlBitmap->bm_last_tid_location = bitmapPageOpaque->bm_last_tid_location; + xlBitmap->bm_hrl_words_used = bitmapPageOpaque->bm_hrl_words_used; + bitmapWords = (BM_HRL_WORD*) + (((char*)xlBitmap) + MAXALIGN(sizeof(xl_bm_bitmappage))); + xlBitmap->bm_num_words = numWords; + memcpy(xlBitmap->bm_headerWords, + bitmap->bm_headerWords, + BM_MAX_NUM_OF_HEADER_WORDS*sizeof(BM_HRL_WORD)); + memcpy(bitmapWords, + (bitmap->bm_contentWords + (bitmapPageOpaque->bm_hrl_words_used - + numWords)), + numWords*sizeof(BM_HRL_WORD)); + xlBitmap->bm_next_blkno = InvalidBlockNumber; + + if (isOpaque) + xlBitmap->bm_next_blkno = bitmapPageOpaque->bm_bitmap_next; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char*)xlBitmap; + rdata[0].len = MAXALIGN(sizeof(xl_bm_bitmappage)) + + numWords*sizeof(BM_HRL_WORD); + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_BITMAP, rdata); + + PageSetLSN(bitmapPage, recptr); + PageSetTLI(bitmapPage, ThisTimeLineID); + pfree(xlBitmap); + } + + END_CRIT_SECTION(); + } + + /* + * _bitmap_log_bitmap_lastwords() -- log the last two words in a bitmap. + */ + void + _bitmap_log_bitmap_lastwords(Relation rel, Buffer lovBuffer, + OffsetNumber lovOffset, BMLOVItem lovItem) + { + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!(rel->rd_istemp)) + { + xl_bm_bitmap_lastwords xlLastwords; + XLogRecPtr recptr; + XLogRecData rdata[1]; + + xlLastwords.bm_node = rel->rd_node; + xlLastwords.bm_last_compword = lovItem->bm_last_compword; + xlLastwords.bm_last_word = lovItem->bm_last_word; + xlLastwords.bm_last_two_headerbits = lovItem->bm_last_two_headerbits; + xlLastwords.bm_lov_blkno = BufferGetBlockNumber(lovBuffer); + xlLastwords.bm_lov_offset = lovOffset; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char*)&xlLastwords; + rdata[0].len = sizeof(xl_bm_bitmap_lastwords); + rdata[0].next = NULL; + + recptr = + XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_BITMAP_LASTWORDS, rdata); + + PageSetLSN(BufferGetPage(lovBuffer), recptr); + PageSetTLI(BufferGetPage(lovBuffer), ThisTimeLineID); + } + + END_CRIT_SECTION(); + } + + /* + * _bitmap_log_lovitem() -- log adding a new lov item to a lov page. + */ + void + _bitmap_log_lovitem(Relation rel, Buffer lovBuffer, bool isNewItem, + OffsetNumber offset, BMLOVItem lovItem) + { + Page lovPage = BufferGetPage(lovBuffer); + + /* XLOG stuff */ + START_CRIT_SECTION(); + + if (!(rel->rd_istemp)) + { + xl_bm_lovitem xlLovItem; + XLogRecPtr recptr; + XLogRecData rdata[1]; + + xlLovItem.bm_node = rel->rd_node; + xlLovItem.bm_lov_blkno = BufferGetBlockNumber(lovBuffer); + xlLovItem.bm_isNewItem = isNewItem; + xlLovItem.bm_lov_offset = offset; + memcpy(&(xlLovItem.bm_lovItem), lovItem, + sizeof(BMLOVItemData)); + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char*)&xlLovItem; + rdata[0].len = sizeof(xl_bm_lovitem); + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BITMAP_ID, + XLOG_BITMAP_INSERT_LOVITEM, rdata); + + PageSetLSN(lovPage, recptr); + PageSetTLI(lovPage, ThisTimeLineID); + } + + END_CRIT_SECTION(); + } + + Datum + bmoptions(PG_FUNCTION_ARGS) + { + Datum reloptions = PG_GETARG_DATUM(0); + bool validate = PG_GETARG_BOOL(1); + bytea *result; + + /* + * It's not clear that fillfactor is useful for on-disk bitmap index, + * but for the moment we'll accept it anyway. (It won't do anything...) + */ + #define BM_MIN_FILLFACTOR 10 + #define BM_DEFAULT_FILLFACTOR 100 + + result = default_reloptions(reloptions, validate, + BM_MIN_FILLFACTOR, + BM_DEFAULT_FILLFACTOR); + if (result) + PG_RETURN_BYTEA_P(result); + PG_RETURN_NULL(); + } diff -Ncpr pgsql.head/src/backend/access/bitmap/bitmapxlog.c bmdist/src/backend/access/bitmap/bitmapxlog.c *** pgsql.head/src/backend/access/bitmap/bitmapxlog.c 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/access/bitmap/bitmapxlog.c 2006-08-01 13:19:45.248826560 +1000 *************** *** 0 **** --- 1,472 ---- + /*------------------------------------------------------------------------- + * + * bitmapxlog.c + * WAL replay logic for the bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + #include "postgres.h" + + #include + + #include "access/bitmap.h" + #include "access/xlogutils.h" + + /* + * _bitmap_xlog_newpage() -- create a new page. + */ + static void + _bitmap_xlog_newpage(bool redo, XLogRecPtr lsn, XLogRecord *record) + { + xl_bm_newpage *xlrec = (xl_bm_newpage*) XLogRecGetData(record); + + Relation reln; + Page page; + uint8 info; + + info = record->xl_info & ~XLR_INFO_MASK; + + reln = XLogOpenRelation(xlrec->bm_node); + if (!RelationIsValid(reln)) + return; + + if (redo) + { + Buffer buffer; + + buffer = XLogReadBuffer(reln, xlrec->bm_new_blkno, true); + if (!BufferIsValid(buffer)) + elog(PANIC, "bm_insert_redo: (_bitmap_xlog_newpage) " + "block unfound: %d", + xlrec->bm_new_blkno); + + page = BufferGetPage(buffer); + + if (XLByteLT(PageGetLSN(page), lsn)) + { + BMMetaPage metapage; + + switch (info) + { + case XLOG_BITMAP_INSERT_NEWMETA: + PageInit(page, BufferGetPageSize(buffer), 0); + metapage = (BMMetaPage) page; + metapage->bm_num_tuples = 0; + metapage->bm_need_rebuilt = false; + break; + case XLOG_BITMAP_INSERT_NEWLOV: + _bitmap_init_lovpage(reln, buffer); + break; + case XLOG_BITMAP_INSERT_NEWBITMAP: + _bitmap_init_bitmappage(reln, buffer); + break; + default: + elog(PANIC, "bitmap_redo: unknown newpage op code %u", info); + } + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + _bitmap_wrtbuf(buffer); + } + + else { + _bitmap_relbuf(buffer); + } + } + else + elog(PANIC, "bm_insert_undo: not implemented."); + } + + /* + * _bitmap_xlog_insert_lovitem() -- insert a new lov item. + */ + static void + _bitmap_xlog_insert_lovitem(bool redo, XLogRecPtr lsn, XLogRecord* record) + { + xl_bm_lovitem *xlrec = (xl_bm_lovitem*) XLogRecGetData(record); + Relation reln; + + reln = XLogOpenRelation(xlrec->bm_node); + if (!RelationIsValid(reln)) + return; + + if (redo) + { + Buffer lovBuffer; + Page lovPage; + + lovBuffer = XLogReadBuffer(reln, xlrec->bm_lov_blkno, false); + if (!BufferIsValid(lovBuffer)) + elog(PANIC, "bm_insert_redo: (_bitmap_xlog_insert_lovitem)" + " block unfound: %d", + xlrec->bm_lov_blkno); + + lovPage = BufferGetPage(lovBuffer); + + if (XLByteLT(PageGetLSN(lovPage), lsn)) + { + if(xlrec->bm_isNewItem) + { + OffsetNumber newOffset, itemSize; + + newOffset = OffsetNumberNext(PageGetMaxOffsetNumber(lovPage)); + if (newOffset != xlrec->bm_lov_offset) + elog(PANIC, "bm_insert_redo: LOV item is not inserted " + "in pos %d(requested %d)", + newOffset, xlrec->bm_lov_offset); + + itemSize = sizeof(BMLOVItemData); + if (itemSize > PageGetFreeSpace(lovPage)) + elog(PANIC, + "bm_insert_redo: not enough space in LOV page %d", + xlrec->bm_lov_blkno); + + if (PageAddItem(lovPage, (Item)&(xlrec->bm_lovItem), itemSize, + newOffset, LP_USED) == InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("failed to add LOV item to \"%s\"", + RelationGetRelationName(reln)))); + } + else + { + /* XXX: we don't do anything with oldLovItem here ! */ + BMLOVItem oldLovItem; + oldLovItem = (BMLOVItem) PageGetItem(lovPage, + PageGetItemId(lovPage, xlrec->bm_lov_offset)); + + memcpy(oldLovItem, &(xlrec->bm_lovItem), sizeof(BMLOVItemData)); + } + + PageSetLSN(lovPage, lsn); + PageSetTLI(lovPage, ThisTimeLineID); + _bitmap_wrtbuf(lovBuffer); + } + else + _bitmap_relbuf(lovBuffer); + } + + else + elog(PANIC, "bm_insert_undo: not implemented."); + } + + /* + * _bitmap_xlog_insert_meta() -- update a metapage. + */ + static void + _bitmap_xlog_insert_meta(bool redo, XLogRecPtr lsn, XLogRecord* record) + { + xl_bm_metapage *xlrec = (xl_bm_metapage*) XLogRecGetData(record); + Relation reln; + + reln = XLogOpenRelation(xlrec->bm_node); + + if (!RelationIsValid(reln)) + return; + + if (redo) + { + Buffer metabuf; + BMMetaPage metapage; + + metabuf = XLogReadBuffer(reln, BM_METAPAGE, false); + if (!BufferIsValid(metabuf)) + elog(PANIC, "bm_insert_redo: (_bitmap_xlog_insert_meta) " + "block unfound: %d", BM_METAPAGE); + + /* restore the page */ + metapage = (BMMetaPage)BufferGetPage(metabuf); + + if (XLByteLT(PageGetLSN(metapage), lsn)) + { + metapage->bm_num_tuples = xlrec->bm_num_tuples; + metapage->bm_lov_heapId = xlrec->bm_lov_heapId; + metapage->bm_lov_indexId = xlrec->bm_lov_indexId; + metapage->bm_lov_lastpage = xlrec->bm_lov_lastpage; + metapage->bm_need_rebuilt = xlrec->bm_need_rebuilt; + + PageSetLSN(metapage, lsn); + PageSetTLI(metapage, ThisTimeLineID); + _bitmap_wrtbuf(metabuf); + } + else + _bitmap_relbuf(metabuf); + } + else + elog(PANIC, "bm_insert_undo: not implemented."); + } + + /* + * _bitmap_xlog_insert_bitmap() -- update a bitmap page. + */ + static void + _bitmap_xlog_insert_bitmap(bool redo, XLogRecPtr lsn, XLogRecord* record) + { + xl_bm_bitmappage *xlrec = (xl_bm_bitmappage*) XLogRecGetData(record); + Relation reln; + + reln = XLogOpenRelation(xlrec->bm_node); + if (!RelationIsValid(reln)) + return; + + if (redo) + { + Buffer bitmapBuffer; + Page bitmapPage; + BMBitmapOpaque bitmapPageOpaque ; + + bitmapBuffer = XLogReadBuffer(reln, xlrec->bm_bitmap_blkno, false); + if (!BufferIsValid(bitmapBuffer)) + elog(PANIC, "bm_insert_redo: (_bitmap_xlog_insert_bitmap) " + "block unfound: %d", + xlrec->bm_bitmap_blkno); + + bitmapPage = BufferGetPage(bitmapBuffer); + + if (XLByteLT(PageGetLSN(bitmapPage), lsn)) + { + BMBitmap bitmap; + BM_HRL_WORD* bitmapWords = (BM_HRL_WORD*) + (((char*)xlrec) + MAXALIGN(sizeof(xl_bm_bitmappage))); + + bitmapPageOpaque = + (BMBitmapOpaque)PageGetSpecialPointer(bitmapPage);; + bitmap = (BMBitmap) PageGetContents(bitmapPage); + + bitmapPageOpaque->bm_last_tid_location = + xlrec->bm_last_tid_location; + bitmapPageOpaque->bm_hrl_words_used = + xlrec->bm_hrl_words_used; + + /* copy the header words and the content words */ + memcpy(bitmap->bm_headerWords, + xlrec->bm_headerWords, + BM_MAX_NUM_OF_HEADER_WORDS*sizeof(BM_HRL_WORD)); + memcpy((bitmap->bm_contentWords + + (bitmapPageOpaque->bm_hrl_words_used- + xlrec->bm_num_words)), + bitmapWords, + xlrec->bm_num_words*sizeof(BM_HRL_WORD)); + + if (xlrec->bm_isOpaque) + { + /* copy the block number for the next page */ + if (bitmapPageOpaque->bm_bitmap_next != InvalidBlockNumber) + elog(PANIC, + "%s next bitmap page for blkno %d is already set", + "bm_insert_redo: ", + xlrec->bm_bitmap_blkno); + /*Assert(bitmapPageOpaque->bm_hrl_words_used == + BM_NUM_OF_HRL_WORDS_PER_PAGE);*/ + + bitmapPageOpaque->bm_bitmap_next = xlrec->bm_next_blkno; + } + + PageSetLSN(bitmapPage, lsn); + PageSetTLI(bitmapPage, ThisTimeLineID); + _bitmap_wrtbuf(bitmapBuffer); + } + else + _bitmap_relbuf(bitmapBuffer); + } + else + elog(PANIC, "bm_insert_undo: not implemented."); + } + + /* + * _bitmap_xlog_insert_bitmap_lastwords() -- update the last two words + * in a bitmap vector. + */ + static void + _bitmap_xlog_insert_bitmap_lastwords(bool redo, XLogRecPtr lsn, XLogRecord* record) + { + xl_bm_bitmap_lastwords *xlrec = + (xl_bm_bitmap_lastwords*) XLogRecGetData(record); + Relation reln; + + reln = XLogOpenRelation(xlrec->bm_node); + if (!RelationIsValid(reln)) + return; + + if (redo) + { + Buffer lovBuffer; + Page lovPage; + BMLOVItem lovItem; + + lovBuffer = XLogReadBuffer(reln, xlrec->bm_lov_blkno, false); + if (!BufferIsValid(lovBuffer)) + elog(PANIC, "bm_insert_redo: (_bitmap_xlog_insert_bitmap_lastwords) " + "block unfound: %d", + xlrec->bm_lov_blkno); + + lovPage = BufferGetPage(lovBuffer); + + if (XLByteLT(PageGetLSN(lovPage), lsn)) + { + lovItem = (BMLOVItem) + PageGetItem(lovPage, PageGetItemId(lovPage, xlrec->bm_lov_offset)); + + lovItem->bm_last_compword = xlrec->bm_last_compword; + lovItem->bm_last_word = xlrec->bm_last_word; + lovItem->bm_last_two_headerbits = xlrec->bm_last_two_headerbits; + + PageSetLSN(lovPage, lsn); + PageSetTLI(lovPage, ThisTimeLineID); + _bitmap_wrtbuf(lovBuffer); + } + else + _bitmap_relbuf(lovBuffer); + } + else + elog(PANIC, "bm_insert_undo: not implemented."); + } + + void + bitmap_redo(XLogRecPtr lsn, XLogRecord* record) + { + uint8 info = record->xl_info & ~XLR_INFO_MASK; + + switch (info) + { + case XLOG_BITMAP_INSERT_NEWMETA: + _bitmap_xlog_newpage(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_NEWLOV: + _bitmap_xlog_newpage(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_LOVITEM: + _bitmap_xlog_insert_lovitem(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_META: + _bitmap_xlog_insert_meta(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_NEWBITMAP: + _bitmap_xlog_newpage(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_BITMAP: + _bitmap_xlog_insert_bitmap(true, lsn, record); + break; + case XLOG_BITMAP_INSERT_BITMAP_LASTWORDS: + _bitmap_xlog_insert_bitmap_lastwords(true, lsn, record); + break; + default: + elog(PANIC, "bitmap_redo: unknown op code %u", info); + } + } + + void + bitmap_undo(XLogRecPtr lsn, XLogRecord *record) + { + uint8 info = record->xl_info & ~XLR_INFO_MASK; + + switch (info) + { + case XLOG_BITMAP_INSERT_NEWMETA: + _bitmap_xlog_newpage(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_NEWLOV: + _bitmap_xlog_newpage(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_LOVITEM: + _bitmap_xlog_insert_lovitem(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_META: + _bitmap_xlog_insert_meta(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_NEWBITMAP: + _bitmap_xlog_newpage(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_BITMAP: + _bitmap_xlog_insert_bitmap(false, lsn, record); + break; + case XLOG_BITMAP_INSERT_BITMAP_LASTWORDS: + _bitmap_xlog_insert_bitmap_lastwords(false, lsn, record); + break; + + default: + elog(PANIC, "bitmap_undo: unknown op code %u", info); + } + } + + static void + out_target(StringInfo buf, RelFileNode* node) + { + appendStringInfo(buf, "rel %u/%u/%u", + node->spcNode, node->dbNode, node->relNode); + } + + void + bitmap_desc(StringInfo buf, uint8 xl_info, char* rec) + { + uint8 info = xl_info & ~XLR_INFO_MASK; + + switch (info) + { + case XLOG_BITMAP_INSERT_NEWMETA: + { + xl_bm_newpage *xlrec = (xl_bm_newpage*)rec; + + appendStringInfo(buf, "insert a new metapage: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_NEWLOV: + { + xl_bm_newpage *xlrec = (xl_bm_newpage*)rec; + + appendStringInfo(buf, "insert a new LOV page: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_LOVITEM: + { + xl_bm_lovitem *xlrec = (xl_bm_lovitem*)rec; + + appendStringInfo(buf, "insert a new LOV item: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_META: + { + xl_bm_metapage *xlrec = (xl_bm_metapage*)rec; + + appendStringInfo(buf, "update the metapage: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_NEWBITMAP: + { + xl_bm_newpage *xlrec = (xl_bm_newpage*)rec; + + appendStringInfo(buf, "insert a new bitmap page: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_BITMAP: + { + xl_bm_bitmappage *xlrec = (xl_bm_bitmappage*)rec; + + appendStringInfo(buf, "update a bitmap page: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + case XLOG_BITMAP_INSERT_BITMAP_LASTWORDS: + { + xl_bm_bitmap_lastwords *xlrec = (xl_bm_bitmap_lastwords*)rec; + + appendStringInfo(buf, "update the last two words in a bitmap: "); + out_target(buf, &(xlrec->bm_node)); + break; + } + + default: + appendStringInfo(buf, "UNKNOWN"); + break; + } + } diff -Ncpr pgsql.head/src/backend/access/bitmap/Makefile bmdist/src/backend/access/bitmap/Makefile *** pgsql.head/src/backend/access/bitmap/Makefile 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/access/bitmap/Makefile 2006-08-01 13:12:15.274233096 +1000 *************** *** 0 **** --- 1,33 ---- + #------------------------------------------------------------------------- + # + # Makefile-- + # Makefile for access/bitmap + # + # + # IDENTIFICATION + # $PostgreSQL$ + # + #------------------------------------------------------------------------- + + subdir = src/backend/access/bitmap + top_builddir = ../../../.. + include $(top_builddir)/src/Makefile.global + + OBJS = bitmaputil.o bitmapattutil.o \ + bitmappages.o bitmapinsert.o bitmapsearch.o bitmap.o bitmapxlog.o + + all: SUBSYS.o + + SUBSYS.o: $(OBJS) + $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) + + depend dep: + $(CC) -MM $(CFLAGS) *.c >depend + + clean: + rm -f SUBSYS.o $(OBJS) + + ifeq (depend,$(wildcard depend)) + include depend + endif + diff -Ncpr pgsql.head/src/backend/access/index/indexam.c bmdist/src/backend/access/index/indexam.c *** pgsql.head/src/backend/access/index/indexam.c 2006-08-01 06:08:59.000000000 +1000 --- bmdist/src/backend/access/index/indexam.c 2006-08-01 13:11:51.333872584 +1000 *************** *** 15,20 **** --- 15,22 ---- * index_close - close an index relation * index_beginscan - start a scan of an index with amgettuple * index_beginscan_multi - start a scan of an index with amgetmulti + * index_beginscan_bitmapwords - start a scan of an index with + * amgetbitmapwords * index_rescan - restart a scan of an index * index_endscan - end a scan * index_insert - insert an index tuple into a relation *************** *** 22,27 **** --- 24,30 ---- * index_restrpos - restore a scan position * index_getnext - get the next tuple from a scan * index_getmulti - get multiple tuples from a scan + * index_getbitmapwords - get several bitmap words from a scan * index_bulk_delete - bulk deletion of index tuples * index_vacuum_cleanup - post-deletion cleanup of an index * index_getprocid - get a support procedure OID *************** index_beginscan_multi(Relation indexRela *** 258,263 **** --- 261,292 ---- return scan; } + /* + * index_beginscan_bitmapwords - start a scan of an index + * with amgetbitmapwords + * + * As above, caller had better be holding some lock on the parent heap + * relation, even though it's not explicitly mentioned here. + */ + IndexScanDesc + index_beginscan_bitmapwords(Relation indexRelation, + bool need_index_lock, + Snapshot snapshot, + int nkeys, ScanKey key) + { + IndexScanDesc scan; + + scan = index_beginscan_internal(indexRelation, nkeys, key); + /* + * Save additional parameters into the scandesc. Everything else was + * set up by RelationGetIndexScan. + */ + scan->xs_snapshot = snapshot; + + return scan; + } + + /* * index_beginscan_internal --- common code for index_beginscan variants */ *************** index_getmulti(IndexScanDesc scan, *** 549,554 **** --- 578,616 ---- } /* ---------------- + * index_getbitmapwords - get several bitmap words from an index scan. + */ + bool + index_getbitmapwords(IndexScanDesc scan, + uint32 maxNumOfWords, + uint32 *returnedNumOfWords, + BM_HRL_WORD *headerWords, + BM_HRL_WORD *contentWords) + { + FmgrInfo *procedure; + bool found; + + SCAN_CHECKS; + GET_SCAN_PROCEDURE(amgetbitmapwords); + + /* just make sure this is false... */ + scan->kill_prior_tuple = false; + + /* + * have the am's getbitmapwords proc do all the work. + * index_beginscan_bitmapwords already set up fn_getbitmapwords. + */ + found = DatumGetBool(FunctionCall5(procedure, + PointerGetDatum(scan), + Int32GetDatum(maxNumOfWords), + PointerGetDatum(returnedNumOfWords), + PointerGetDatum(headerWords), + PointerGetDatum(contentWords))); + + return found; + } + + /* ---------------- * index_bulk_delete - do mass deletion of index entries * * callback routine tells whether a given main-heap tuple is diff -Ncpr pgsql.head/src/backend/access/Makefile bmdist/src/backend/access/Makefile *** pgsql.head/src/backend/access/Makefile 2006-05-02 21:28:54.000000000 +1000 --- bmdist/src/backend/access/Makefile 2006-08-01 12:58:58.000000000 +1000 *************** subdir = src/backend/access *** 8,14 **** top_builddir = ../../.. include $(top_builddir)/src/Makefile.global ! SUBDIRS := common gist hash heap index nbtree transam gin SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o) all: SUBSYS.o --- 8,14 ---- top_builddir = ../../.. include $(top_builddir)/src/Makefile.global ! SUBDIRS := common gist hash heap index nbtree transam gin bitmap SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o) all: SUBSYS.o diff -Ncpr pgsql.head/src/backend/access/transam/rmgr.c bmdist/src/backend/access/transam/rmgr.c *** pgsql.head/src/backend/access/transam/rmgr.c 2006-07-12 03:26:58.000000000 +1000 --- bmdist/src/backend/access/transam/rmgr.c 2006-08-01 12:58:57.000000000 +1000 *************** *** 7,12 **** --- 7,13 ---- */ #include "postgres.h" + #include "access/bitmap.h" #include "access/clog.h" #include "access/gin.h" #include "access/gist_private.h" *************** const RmgrData RmgrTable[RM_MAX_ID + 1] *** 36,42 **** {"Heap", heap_redo, heap_desc, NULL, NULL}, {"Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup}, {"Hash", hash_redo, hash_desc, NULL, NULL}, ! {"Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup}, {"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup}, ! {"Sequence", seq_redo, seq_desc, NULL, NULL} }; --- 37,44 ---- {"Heap", heap_redo, heap_desc, NULL, NULL}, {"Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup}, {"Hash", hash_redo, hash_desc, NULL, NULL}, ! {"Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup}, {"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup}, ! {"Sequence", seq_redo, seq_desc, NULL, NULL}, ! {"Bitmap", bitmap_redo, bitmap_desc, NULL, NULL} }; diff -Ncpr pgsql.head/src/backend/commands/explain.c bmdist/src/backend/commands/explain.c *** pgsql.head/src/backend/commands/explain.c 2006-07-15 00:52:18.000000000 +1000 --- bmdist/src/backend/commands/explain.c 2006-08-01 12:59:16.000000000 +1000 *************** explain_outNode(StringInfo str, *** 621,626 **** --- 621,628 ---- case T_BitmapIndexScan: appendStringInfo(str, " on %s", quote_identifier(get_rel_name(((BitmapIndexScan *) plan)->indexid))); + if (((BitmapIndexScan *) plan)->indexam == BITMAP_AM_OID) + appendStringInfo(str, "(on-disk bitmap index)"); break; case T_SubqueryScan: if (((Scan *) plan)->scanrelid > 0) diff -Ncpr pgsql.head/src/backend/executor/nodeBitmapAnd.c bmdist/src/backend/executor/nodeBitmapAnd.c *** pgsql.head/src/backend/executor/nodeBitmapAnd.c 2006-05-31 00:01:58.000000000 +1000 --- bmdist/src/backend/executor/nodeBitmapAnd.c 2006-08-01 12:58:33.000000000 +1000 *************** *** 32,38 **** #include "executor/instrument.h" #include "executor/nodeBitmapAnd.h" - /* ---------------------------------------------------------------- * ExecInitBitmapAnd * --- 32,37 ---- *************** ExecInitBitmapAnd(BitmapAnd *node, EStat *** 48,53 **** --- 47,53 ---- int i; ListCell *l; Plan *initNode; + bool inmem = false; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); *************** ExecInitBitmapAnd(BitmapAnd *node, EStat *** 84,93 **** foreach(l, node->bitmapplans) { initNode = (Plan *) lfirst(l); ! bitmapplanstates[i] = ExecInitNode(initNode, estate, eflags); i++; } return bitmapandstate; } --- 84,109 ---- foreach(l, node->bitmapplans) { initNode = (Plan *) lfirst(l); ! bitmapplanstates[i] = ExecInitNode(initNode, estate, eflags); ! ! if (!inmem && IsA(initNode, BitmapIndexScan)) ! inmem = (((BitmapIndexScan*)initNode)->indexam != ! BITMAP_AM_OID); ! else if (!inmem && IsA(initNode, BitmapAnd)) ! inmem = ((BitmapAnd*)initNode)->inmem; ! else if (!inmem && IsA(initNode, BitmapOr)) ! inmem = ((BitmapOr*)initNode)->inmem; i++; } + node->inmem = inmem; + + bitmapandstate->odbms = (OnDiskBitmapWords**) + palloc0(nplans * sizeof(OnDiskBitmapWords*)); + for (i=0; iodbms[i] = odbm_create(BM_MAX_WORDS); + bitmapandstate->resultOdbm = NULL; + return bitmapandstate; } *************** MultiExecBitmapAnd(BitmapAndState *node) *** 112,118 **** PlanState **bitmapplans; int nplans; int i; ! TIDBitmap *result = NULL; /* must provide our own instrumentation support */ if (node->ps.instrument) --- 128,135 ---- PlanState **bitmapplans; int nplans; int i; ! Node *result = NULL; ! bool inmem = ((BitmapAnd*)(((PlanState*)node)->plan))->inmem; /* must provide our own instrumentation support */ if (node->ps.instrument) *************** MultiExecBitmapAnd(BitmapAndState *node) *** 124,159 **** bitmapplans = node->bitmapplans; nplans = node->nplans; /* * Scan all the subplans and AND their result bitmaps ! */ for (i = 0; i < nplans; i++) { ! PlanState *subnode = bitmapplans[i]; ! TIDBitmap *subresult; ! subresult = (TIDBitmap *) MultiExecProcNode(subnode); ! if (!subresult || !IsA(subresult, TIDBitmap)) ! elog(ERROR, "unrecognized result from subplan"); - if (result == NULL) - result = subresult; /* first subplan */ else { ! tbm_intersect(result, subresult); ! tbm_free(subresult); } ! /* ! * If at any stage we have a completely empty bitmap, we can fall out ! * without evaluating the remaining subplans, since ANDing them can no ! * longer change the result. (Note: the fact that indxpath.c orders ! * the subplans by selectivity should make this case more likely to ! * occur.) ! */ ! if (tbm_is_empty(result)) ! break; } if (result == NULL) --- 141,211 ---- bitmapplans = node->bitmapplans; nplans = node->nplans; + /* * Scan all the subplans and AND their result bitmaps ! */ for (i = 0; i < nplans; i++) { ! PlanState *subnode = bitmapplans[i]; ! ! /* set the required bitmap type for the subnodes */ ! odbm_set_bitmaptype(subnode->plan, inmem); ! if (inmem) ! { ! TIDBitmap *subresult; ! ! subresult = (TIDBitmap *) MultiExecProcNode(subnode); ! if (!subresult || !IsA(subresult, TIDBitmap)) ! elog(ERROR, "unrecognized result from subplan"); ! ! if (result == NULL) ! result = (Node*)subresult; /* first subplan */ ! else ! { ! tbm_intersect((TIDBitmap*)result, subresult); ! tbm_free(subresult); ! } ! } else { ! OnDiskBitmapWords* subresult; ! ! /* if there is no leftover from previous scan, then ! read next list of words. */ ! if ((node->odbms[i])->bitmapWords.numOfWords == 0) ! { ! node->odbms[i]->bitmapWords.startNo = 0; ! odbm_set_child_resultnode(subnode, node->odbms[i]); ! ! subresult = (OnDiskBitmapWords*)MultiExecProcNode(subnode); ! ! if (!subresult || !IsA(subresult, OnDiskBitmapWords)) ! elog(ERROR, "unrecognized result from subplan"); ! ! node->odbms[i] = subresult; ! } } + } ! if (!inmem) ! { ! BMBatchWords** batches = (BMBatchWords**) ! palloc0(nplans* sizeof(BMBatchWords*)); ! ! if (node->resultOdbm == NULL) ! node->resultOdbm = odbm_create(BM_MAX_WORDS); ! _bitmap_reset_batchwords(&(node->resultOdbm->bitmapWords)); ! ! for (i=0; iodbms[i]->bitmapWords); ! _bitmap_intersect(batches, nplans, &(node->resultOdbm->bitmapWords)); ! result = (Node*)(node->resultOdbm); ! ! pfree(batches); } if (result == NULL) *************** ExecEndBitmapAnd(BitmapAndState *node) *** 195,200 **** --- 247,260 ---- if (bitmapplans[i]) ExecEndNode(bitmapplans[i]); } + + if (node->odbms != NULL) + { + for (i=0; iodbms[i] != NULL) + odbm_free(node->odbms[i]); + pfree(node->odbms); + } } void *************** ExecReScanBitmapAnd(BitmapAndState *node *** 213,218 **** --- 273,284 ---- if (node->ps.chgParam != NULL) UpdateChangedParamSet(subnode, node->ps.chgParam); + if (node->odbms[i] != NULL) + { + node->odbms[i]->bitmapWords.startNo = 0; + node->odbms[i]->bitmapWords.numOfWords = 0; + } + /* * Always rescan the inputs immediately, to ensure we can pass down * any outer tuple that might be used in index quals. diff -Ncpr pgsql.head/src/backend/executor/nodeBitmapHeapscan.c bmdist/src/backend/executor/nodeBitmapHeapscan.c *** pgsql.head/src/backend/executor/nodeBitmapHeapscan.c 2006-07-15 00:52:19.000000000 +1000 --- bmdist/src/backend/executor/nodeBitmapHeapscan.c 2006-08-01 12:58:34.000000000 +1000 *************** BitmapHeapNext(BitmapHeapScanState *node *** 64,69 **** --- 64,73 ---- OffsetNumber targoffset; TupleTableSlot *slot; + OnDiskBitmapWords *odbm; + BMIterateResult *odbmres; + bool inmem = false; + /* * extract necessary information from index scan node */ *************** BitmapHeapNext(BitmapHeapScanState *node *** 74,79 **** --- 78,94 ---- scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid; tbm = node->tbm; tbmres = node->tbmres; + odbm = node->odbm; + odbmres = node->odbmres; + + /* + * Clear any reference to the previously returned tuple. The idea here is + * to not have the tuple slot be the last holder of a pin on that tuple's + * buffer; if it is, we'll need a separate visit to the bufmgr to release + * the buffer. By clearing here, we get to have the release done by + * ReleaseAndReadBuffer, below. + */ + ExecClearTuple(slot); /* * Check if we are evaluating PlanQual for tuple of this relation. *************** BitmapHeapNext(BitmapHeapScanState *node *** 104,109 **** --- 119,211 ---- return slot; } + /* check if this requires in-mem bitmap scan or on-disk bitmap index. */ + inmem = ((BitmapHeapScan*)(((PlanState*)node)->plan))->inmem; + + /* + * If the underline indexes are on disk bitmap indexes + */ + if (!inmem) + { + uint64 nextTid = 0; + + if (odbm == NULL) + { + odbm = odbm_create(BM_MAX_WORDS); + node->odbm = odbm; + } + + if (odbmres == NULL) + { + odbmres = odbm_res_create(odbm); + node->odbmres = odbmres; + } + + for (;;) + { + /* + * If we have used up the words from previous scan, or + * we haven't scan the underlying index scan for words yet, + * then do it. + */ + if (odbm->bitmapWords.numOfWords == 0 && + odbmres->nextTidLoc >= odbmres->numOfTids) + { + + Plan* outerPlan = (((PlanState*)node)->lefttree)->plan; + odbm_set_bitmaptype(outerPlan, false); + + odbm->bitmapWords.firstTid = odbmres->nextTid; + odbm->bitmapWords.startNo = 0; + odbm_set_child_resultnode(((PlanState*)node)->lefttree, + odbm); + odbm = (OnDiskBitmapWords *) + MultiExecProcNode(outerPlanState(node)); + + if (!odbm || !IsA(odbm, OnDiskBitmapWords)) + elog(ERROR, "unrecognized result from subplan"); + + _bitmap_begin_iterate(&(node->odbm->bitmapWords), node->odbmres); + } + + /* If we can not find more words, then this scan is over. */ + if (odbm == NULL || + (odbm->bitmapWords.numOfWords == 0 && + odbmres->nextTidLoc >= odbmres->numOfTids)) + return ExecClearTuple(slot); + + nextTid = _bitmap_findnexttid(&(odbm->bitmapWords), odbmres); + + if (nextTid == 0) + continue; + + ItemPointerSet(&scan->rs_ctup.t_self, + (nextTid-1)/MaxNumHeapTuples, + ((nextTid-1)%MaxNumHeapTuples)+1); + /* fetch the heap tuple and see if it matches the snapshot. */ + if (heap_release_fetch(scan->rs_rd, + scan->rs_snapshot, + &scan->rs_ctup, + &scan->rs_cbuf, + true, + &scan->rs_pgstat_info)) + { + /* + * Set up the result slot to point to this tuple. + * Note that the slot acquires a pin on the buffer. + */ + ExecStoreTuple(&scan->rs_ctup, + slot, + scan->rs_cbuf, + false); + + + /* return this tuple */ + return slot; + } + } + } + /* * If we haven't yet performed the underlying index scan, do it, and * prepare the bitmap to be iterated over. *************** ExecBitmapHeapReScan(BitmapHeapScanState *** 395,400 **** --- 497,506 ---- node->tbm = NULL; node->tbmres = NULL; + if (node->odbm) + odbm_free(node->odbm); + node->odbm = NULL; + /* * Always rescan the input immediately, to ensure we can pass down any * outer tuple that might be used in index quals. *************** ExecEndBitmapHeapScan(BitmapHeapScanStat *** 440,445 **** --- 546,554 ---- if (node->tbm) tbm_free(node->tbm); + if (node->odbm) + odbm_free(node->odbm); + /* * close heap scan */ *************** ExecInitBitmapHeapScan(BitmapHeapScan *n *** 554,559 **** --- 663,679 ---- */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); + if (IsA(outerPlan(node), BitmapIndexScan)) + node->inmem = + (((BitmapIndexScan*)outerPlan(node))->indexam != + BITMAP_AM_OID); + else if (IsA(outerPlan(node), BitmapAnd)) + node->inmem = ((BitmapAnd*)outerPlan(node))->inmem; + else if (IsA(outerPlan(node), BitmapOr)) + node->inmem = ((BitmapOr*)outerPlan(node))->inmem; + else + node->inmem = true; + /* * all done. */ diff -Ncpr pgsql.head/src/backend/executor/nodeBitmapIndexscan.c bmdist/src/backend/executor/nodeBitmapIndexscan.c *** pgsql.head/src/backend/executor/nodeBitmapIndexscan.c 2006-08-01 06:09:04.000000000 +1000 --- bmdist/src/backend/executor/nodeBitmapIndexscan.c 2006-08-01 12:58:33.000000000 +1000 *************** Node * *** 38,50 **** MultiExecBitmapIndexScan(BitmapIndexScanState *node) { #define MAX_TIDS 1024 ! TIDBitmap *tbm; IndexScanDesc scandesc; ItemPointerData tids[MAX_TIDS]; int32 ntids; double nTuples = 0; bool doscan; /* must provide our own instrumentation support */ if (node->ss.ps.instrument) InstrStartNode(node->ss.ps.instrument); --- 38,54 ---- MultiExecBitmapIndexScan(BitmapIndexScanState *node) { #define MAX_TIDS 1024 ! TIDBitmap *tbm = NULL; IndexScanDesc scandesc; + IndexScanDesc odScanDesc; ItemPointerData tids[MAX_TIDS]; int32 ntids; double nTuples = 0; bool doscan; + OnDiskBitmapWords *odbm = NULL; + bool inmem = false; + /* must provide our own instrumentation support */ if (node->ss.ps.instrument) InstrStartNode(node->ss.ps.instrument); *************** MultiExecBitmapIndexScan(BitmapIndexScan *** 53,58 **** --- 57,63 ---- * extract necessary information from index scan node */ scandesc = node->biss_ScanDesc; + odScanDesc = node->odbiss_ScanDesc; /* * If we have runtime keys and they've not already been set up, do it now. *************** MultiExecBitmapIndexScan(BitmapIndexScan *** 69,120 **** else doscan = true; ! /* ! * Prepare the result bitmap. Normally we just create a new one to pass ! * back; however, our parent node is allowed to store a pre-made one into ! * node->biss_result, in which case we just OR our tuple IDs into the ! * existing bitmap. (This saves needing explicit UNION steps.) ! */ ! if (node->biss_result) ! { ! tbm = node->biss_result; ! node->biss_result = NULL; /* reset for next time */ ! } ! else ! { ! /* XXX should we use less than work_mem for this? */ ! tbm = tbm_create(work_mem * 1024L); ! } ! /* ! * Get TIDs from index and insert into bitmap ! */ ! while (doscan) { ! bool more = index_getmulti(scandesc, tids, MAX_TIDS, &ntids); ! if (ntids > 0) { ! tbm_add_tuples(tbm, tids, ntids); ! nTuples += ntids; } ! CHECK_FOR_INTERRUPTS(); ! ! if (!more) { ! doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys, ! node->biss_NumArrayKeys); ! if (doscan) /* reset index scan */ ! index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); } } /* must provide our own instrumentation support */ if (node->ss.ps.instrument) InstrStopNode(node->ss.ps.instrument, nTuples); ! return (Node *) tbm; } /* ---------------------------------------------------------------- --- 74,150 ---- else doscan = true; ! inmem = ((BitmapIndexScan*)((PlanState*)node)->plan)->inmem; ! if (inmem) { ! node->odbiss_result = NULL; ! /* ! * Prepare the result bitmap. Normally we just create a new one to pass ! * back; however, our parent node is allowed to store a pre-made one ! * into node->biss_result, in which case we just OR our tuple IDs into ! * the existing bitmap. (This saves needing explicit UNION steps.) ! */ ! if (node->biss_result) { ! tbm = node->biss_result; ! node->biss_result = NULL; /* reset for next time */ ! } ! else ! { ! /* XXX should we use less than work_mem for this? */ ! tbm = tbm_create(work_mem * 1024L); } ! /* ! * Get TIDs from index and insert into bitmap ! */ ! while (doscan) { ! bool more = index_getmulti(scandesc, tids, MAX_TIDS, &ntids); ! ! if (ntids > 0) ! { ! tbm_add_tuples(tbm, tids, ntids); ! nTuples += ntids; ! } ! ! CHECK_FOR_INTERRUPTS(); ! ! if (!more) ! { ! doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys, ! node->biss_NumArrayKeys); ! if (doscan) /* reset index scan */ ! index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); ! } } } + else + { + /* XXX: this needs to change. push down into generic interface */ + node->biss_result = NULL; + + if (node->odbiss_result == NULL) + node->odbiss_result = odbm_create(BM_MAX_WORDS); + + odbm = node->odbiss_result; + + index_getbitmapwords(odScanDesc, odbm->bitmapWords.maxNumOfWords, + &(odbm->bitmapWords.numOfWords), + odbm->bitmapWords.bitmapHeaderWords, + odbm->bitmapWords.bitmapContentWords); + } /* must provide our own instrumentation support */ if (node->ss.ps.instrument) InstrStopNode(node->ss.ps.instrument, nTuples); ! if (tbm != NULL) ! return (Node *) tbm; ! else ! return (Node *) odbm; } /* ---------------------------------------------------------------- *************** ExecBitmapIndexReScan(BitmapIndexScanSta *** 170,176 **** --- 200,210 ---- /* reset index scan */ if (node->biss_RuntimeKeysReady) + { index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); + if (node->odbiss_ScanDesc != NULL) + index_rescan(node->odbiss_ScanDesc, node->biss_ScanKeys); + } } /* ---------------------------------------------------------------- *************** ExecEndBitmapIndexScan(BitmapIndexScanSt *** 182,193 **** --- 216,229 ---- { Relation indexRelationDesc; IndexScanDesc indexScanDesc; + IndexScanDesc odIndexScanDesc; /* * extract information from the node */ indexRelationDesc = node->biss_RelationDesc; indexScanDesc = node->biss_ScanDesc; + odIndexScanDesc = node->odbiss_ScanDesc; /* * Free the exprcontext ... now dead code, see ExecFreeExprContext *************** ExecEndBitmapIndexScan(BitmapIndexScanSt *** 200,205 **** --- 236,247 ---- /* * close the index relation */ + if (odIndexScanDesc != NULL) + { + index_endscan(odIndexScanDesc); + odIndexScanDesc = NULL; + } + index_endscan(indexScanDesc); index_close(indexRelationDesc, NoLock); } *************** ExecInitBitmapIndexScan(BitmapIndexScan *** 309,321 **** --- 351,388 ---- /* * Initialize scan descriptor. + * + * Note we acquire no locks here; the index machinery does its own locks + * and unlocks. (We rely on having a lock on the parent table to + * ensure the index won't go away!) Furthermore, if the parent table + * is one of the target relations of the query, then InitPlan already + * opened and write-locked the index, so we can tell the index machinery + * not to bother getting an extra lock. */ + relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); indexstate->biss_ScanDesc = index_beginscan_multi(indexstate->biss_RelationDesc, estate->es_snapshot, indexstate->biss_NumScanKeys, indexstate->biss_ScanKeys); + /* XXX: fix me! */ + if (node->indexam == BITMAP_AM_OID) + { + indexstate->odbiss_ScanDesc = + index_beginscan_bitmapwords(indexstate->biss_RelationDesc, + !relistarget, + estate->es_snapshot, + indexstate->biss_NumScanKeys, + indexstate->biss_ScanKeys); + node->inmem = false; + } + else + { + indexstate->odbiss_ScanDesc = NULL; + node->inmem = true; + } + /* * all done. */ diff -Ncpr pgsql.head/src/backend/executor/nodeBitmapOr.c bmdist/src/backend/executor/nodeBitmapOr.c *** pgsql.head/src/backend/executor/nodeBitmapOr.c 2006-05-31 00:01:58.000000000 +1000 --- bmdist/src/backend/executor/nodeBitmapOr.c 2006-08-01 12:58:33.000000000 +1000 *************** ExecInitBitmapOr(BitmapOr *node, EState *** 49,54 **** --- 49,55 ---- int i; ListCell *l; Plan *initNode; + bool inmem = false; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); *************** ExecInitBitmapOr(BitmapOr *node, EState *** 86,94 **** --- 87,110 ---- { initNode = (Plan *) lfirst(l); bitmapplanstates[i] = ExecInitNode(initNode, estate, eflags); + if (!inmem && IsA(initNode, BitmapIndexScan)) + inmem = (((BitmapIndexScan*)initNode)->indexam != + BITMAP_AM_OID); + else if (!inmem && IsA(initNode, BitmapAnd)) + inmem = ((BitmapAnd*)initNode)->inmem; + else if (!inmem && IsA(initNode, BitmapOr)) + inmem = ((BitmapOr*)initNode)->inmem; i++; } + node->inmem = inmem; + + bitmaporstate->odbms = (OnDiskBitmapWords**) + palloc0(nplans * sizeof(OnDiskBitmapWords*)); + for (i=0; iodbms[i] = odbm_create(BM_MAX_WORDS); + bitmaporstate->resultOdbm = NULL; + return bitmaporstate; } *************** MultiExecBitmapOr(BitmapOrState *node) *** 113,119 **** PlanState **bitmapplans; int nplans; int i; ! TIDBitmap *result = NULL; /* must provide our own instrumentation support */ if (node->ps.instrument) --- 129,136 ---- PlanState **bitmapplans; int nplans; int i; ! Node *result = NULL; ! bool inmem = ((BitmapOr*)(((PlanState*)node)->plan))->inmem; /* must provide our own instrumentation support */ if (node->ps.instrument) *************** MultiExecBitmapOr(BitmapOrState *node) *** 131,176 **** for (i = 0; i < nplans; i++) { PlanState *subnode = bitmapplans[i]; - TIDBitmap *subresult; ! /* ! * We can special-case BitmapIndexScan children to avoid an explicit ! * tbm_union step for each child: just pass down the current result ! * bitmap and let the child OR directly into it. ! */ ! if (IsA(subnode, BitmapIndexScanState)) ! { ! if (result == NULL) /* first subplan */ { ! /* XXX should we use less than work_mem for this? */ ! result = tbm_create(work_mem * 1024L); } ! ((BitmapIndexScanState *) subnode)->biss_result = result; ! subresult = (TIDBitmap *) MultiExecProcNode(subnode); ! if (subresult != result) ! elog(ERROR, "unrecognized result from subplan"); } else { ! /* standard implementation */ ! subresult = (TIDBitmap *) MultiExecProcNode(subnode); ! ! if (!subresult || !IsA(subresult, TIDBitmap)) ! elog(ERROR, "unrecognized result from subplan"); ! if (result == NULL) ! result = subresult; /* first subplan */ ! else { ! tbm_union(result, subresult); ! tbm_free(subresult); } } } /* We could return an empty result set here? */ if (result == NULL) elog(ERROR, "BitmapOr doesn't support zero inputs"); --- 148,266 ---- for (i = 0; i < nplans; i++) { PlanState *subnode = bitmapplans[i]; ! /* set the required bitmap type for the subnodes */ ! odbm_set_bitmaptype(subnode->plan, inmem); ! ! if (inmem) { ! TIDBitmap *subresult; ! ! /* ! * We can special-case BitmapIndexScan children to avoid an ! * explicit tbm_union step for each child: just pass down the ! * current result bitmap and let the child OR directly into it. ! */ ! if (IsA(subnode, BitmapIndexScanState)) { ! if (result == NULL) /* first subplan */ ! { ! /* XXX should we use less than work_mem for this? */ ! result = (Node*)tbm_create(work_mem * 1024L); ! } ! ! ((BitmapIndexScanState *) subnode)->biss_result = ! (TIDBitmap*)result; ! ! subresult = (TIDBitmap *) MultiExecProcNode(subnode); ! ! if (subresult != (TIDBitmap*)result) ! elog(ERROR, "unrecognized result from subplan"); } + else + { ! /* standard implementation */ ! subresult = (TIDBitmap *) MultiExecProcNode(subnode); ! if (!subresult || !IsA(subresult, TIDBitmap)) ! elog(ERROR, "unrecognized result from subplan"); ! if (result == NULL) ! result = (Node*)subresult; /* first subplan */ ! else ! { ! tbm_union((TIDBitmap*)result, subresult); ! tbm_free(subresult); ! } ! } } + else { ! OnDiskBitmapWords *subresult; ! /* if there is no leftover from previous scan, then ! read the next list of words. */ ! if (node->odbms[i]->bitmapWords.numOfWords == 0) { ! node->odbms[i]->bitmapWords.startNo = 0; ! odbm_set_child_resultnode(subnode, node->odbms[i]); ! ! subresult = (OnDiskBitmapWords*) MultiExecProcNode(subnode); ! ! if (!subresult || !IsA(subresult, OnDiskBitmapWords)) ! elog(ERROR, "unrecognized result from subplan"); ! ! node->odbms[i] = subresult; } } } + if (!inmem) + { + uint32 nonempty_nplans; + BMBatchWords** batches = (BMBatchWords**) + palloc0(nplans* sizeof(BMBatchWords*)); + + if (node->resultOdbm == NULL) + node->resultOdbm = odbm_create(BM_MAX_WORDS); + _bitmap_reset_batchwords(&(node->resultOdbm->bitmapWords)); + + nonempty_nplans = 0; + for (i=0; iodbms[i]->bitmapWords.numOfWords > 0) { + batches[nonempty_nplans] = &(node->odbms[i]->bitmapWords); + nonempty_nplans++; + } + } + _bitmap_union(batches, nonempty_nplans, + &(node->resultOdbm->bitmapWords)); + + /* if the number of words in the result is 0, then + at least one of subplans contains no words. We + want to discard these subplans, and re-calculate + the result. */ + if (node->resultOdbm->bitmapWords.numOfWords == 0) + { + BMBatchWords** newBatches = + palloc0(sizeof(BMBatchWords*)*nplans); + uint32 nonempty_nplans = 0; + + for (i=0; iodbms[i]->bitmapWords.numOfWords != 0) + newBatches[nonempty_nplans++] = + &(node->odbms[i]->bitmapWords); + + _bitmap_reset_batchwords(&(node->resultOdbm->bitmapWords)); + _bitmap_union(newBatches, nonempty_nplans, + &(node->resultOdbm->bitmapWords)); + pfree(newBatches); + } + + result = (Node*)(node->resultOdbm); + } + /* We could return an empty result set here? */ if (result == NULL) elog(ERROR, "BitmapOr doesn't support zero inputs"); *************** ExecEndBitmapOr(BitmapOrState *node) *** 211,216 **** --- 301,314 ---- if (bitmapplans[i]) ExecEndNode(bitmapplans[i]); } + + if (node->odbms != NULL) + { + for (i=0; iodbms[i] != NULL) + odbm_free(node->odbms[i]); + pfree(node->odbms); + } } void *************** ExecReScanBitmapOr(BitmapOrState *node, *** 229,238 **** --- 327,343 ---- if (node->ps.chgParam != NULL) UpdateChangedParamSet(subnode, node->ps.chgParam); + if (node->odbms[i] != NULL) + { + node->odbms[i]->bitmapWords.startNo = 0; + node->odbms[i]->bitmapWords.numOfWords = 0; + } + /* * Always rescan the inputs immediately, to ensure we can pass down * any outer tuple that might be used in index quals. */ ExecReScan(subnode, exprCtxt); } + } diff -Ncpr pgsql.head/src/backend/nodes/Makefile bmdist/src/backend/nodes/Makefile *** pgsql.head/src/backend/nodes/Makefile 2005-04-18 08:24:02.000000000 +1000 --- bmdist/src/backend/nodes/Makefile 2006-08-01 12:59:00.000000000 +1000 *************** include $(top_builddir)/src/Makefile.glo *** 14,20 **** OBJS = nodeFuncs.o nodes.o list.o bitmapset.o tidbitmap.o \ copyfuncs.o equalfuncs.o makefuncs.o \ ! outfuncs.o readfuncs.o print.o read.o params.o value.o all: SUBSYS.o --- 14,22 ---- OBJS = nodeFuncs.o nodes.o list.o bitmapset.o tidbitmap.o \ copyfuncs.o equalfuncs.o makefuncs.o \ ! outfuncs.o readfuncs.o print.o read.o params.o value.o \ ! ondiskbitmapwords.o ! all: SUBSYS.o diff -Ncpr pgsql.head/src/backend/nodes/ondiskbitmapwords.c bmdist/src/backend/nodes/ondiskbitmapwords.c *** pgsql.head/src/backend/nodes/ondiskbitmapwords.c 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/backend/nodes/ondiskbitmapwords.c 2006-08-01 13:13:49.341932632 +1000 *************** *** 0 **** --- 1,103 ---- + /*------------------------------------------------------------------------- + * + * ondiskbitmapwords.c + * PostgreSQL + * + * Copyright (c) 1996-2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + *------------------------------------------------------------------------- + */ + #include "postgres.h" + + #include "nodes/ondiskbitmapwords.h" + #include "nodes/execnodes.h" + + /* + * odbm_create() -- create an OnDiskBitmapWords object. + * + * The returned OnDiskBitmapWords contains 'maxNumOfWords' number of + * bitmap words. This number should be a multiple of + * BM_NUM_HRL_WORDS_PER_PAGE. + */ + OnDiskBitmapWords* + odbm_create(uint32 maxNumOfWords) + { + OnDiskBitmapWords* odbm; + + /* we want to read the words in a page at once. */ + Assert(maxNumOfWords%BM_NUM_OF_HRL_WORDS_PER_PAGE == 0); + + odbm = (OnDiskBitmapWords*) palloc0(sizeof(OnDiskBitmapWords)); + + odbm->type = T_OnDiskBitmapWords; /*set NodeTag */ + odbm->mcxt = CurrentMemoryContext; + _bitmap_init_batchwords(&(odbm->bitmapWords), maxNumOfWords, odbm->mcxt); + + return odbm; + } + + /* + * odbm_free() -- release the space for a OnDiskBitmapWords. + */ + void + odbm_free(OnDiskBitmapWords* odbm) + { + _bitmap_cleanup_batchwords(&(odbm->bitmapWords)); + + pfree(odbm); + } + + /* + * odbm_set_bitmaptype() -- set if in-memory bitmap scan is used in a plan. + */ + void + odbm_set_bitmaptype(Plan* plan, bool inmem) + { + if(IsA(plan, BitmapAnd)) + ((BitmapAnd*)plan)->inmem = inmem; + else if (IsA(plan, BitmapOr)) + ((BitmapOr*)plan)->inmem = inmem; + else if (IsA(plan, BitmapIndexScan)) + ((BitmapIndexScan*)plan)->inmem = inmem; + } + + /* + * odbm_set_child_resultnode() -- set 'resultOdbm' in a child node plan + * state 'ps' to a given 'odbm'. + */ + void + odbm_set_child_resultnode(struct PlanState* ps, OnDiskBitmapWords* odbm) + { + if (IsA(ps, BitmapAndState)) + ((BitmapAndState*)ps)->resultOdbm = odbm; + else if (IsA(ps, BitmapOrState)) + ((BitmapOrState*)ps)->resultOdbm = odbm; + else if (IsA(ps, BitmapIndexScanState)) + ((BitmapIndexScanState*)ps)->odbiss_result = odbm; + else + elog(ERROR, "wrong type in the subplan"); + } + + /* + * odbm_res_create() -- create an BMIterateResult instance. + */ + BMIterateResult* + odbm_res_create(OnDiskBitmapWords* odbm) + { + BMIterateResult* odbmres = + MemoryContextAllocZero(odbm->mcxt, sizeof(BMIterateResult)); + + return odbmres; + } + + /* + * odbm_res_free() -- free an BMIterateResult instance. + */ + void + odbm_res_free(BMIterateResult* odbmres) + { + if (odbmres != NULL) + pfree(odbmres); + } diff -Ncpr pgsql.head/src/backend/optimizer/plan/createplan.c bmdist/src/backend/optimizer/plan/createplan.c *** pgsql.head/src/backend/optimizer/plan/createplan.c 2006-07-26 10:34:48.000000000 +1000 --- bmdist/src/backend/optimizer/plan/createplan.c 2006-08-01 12:59:14.000000000 +1000 *************** static void copy_path_costsize(Plan *des *** 78,88 **** static void copy_plan_costsize(Plan *dest, Plan *src); static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, ! Oid indexid, List *indexqual, List *indexqualorig, List *indexstrategy, List *indexsubtype, ScanDirection indexscandir); static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, ! List *indexqual, List *indexqualorig, List *indexstrategy, List *indexsubtype); --- 78,88 ---- static void copy_plan_costsize(Plan *dest, Plan *src); static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, ! Oid indexid, Oid indexam, List *indexqual, List *indexqualorig, List *indexstrategy, List *indexsubtype, ScanDirection indexscandir); static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, ! Oid indexam, List *indexqual, List *indexqualorig, List *indexstrategy, List *indexsubtype); *************** create_indexscan_plan(PlannerInfo *root, *** 793,798 **** --- 793,799 ---- List *indexquals = best_path->indexquals; Index baserelid = best_path->path.parent->relid; Oid indexoid = best_path->indexinfo->indexoid; + Oid indexam = best_path->indexinfo->relam; List *qpqual; List *stripped_indexquals; List *fixed_indexquals; *************** create_indexscan_plan(PlannerInfo *root, *** 903,908 **** --- 904,910 ---- qpqual, baserelid, indexoid, + indexam, fixed_indexquals, stripped_indexquals, indexstrategy, *************** create_bitmap_subplan(PlannerInfo *root, *** 1172,1177 **** --- 1174,1180 ---- /* then convert to a bitmap indexscan */ plan = (Plan *) make_bitmap_indexscan(iscan->scan.scanrelid, iscan->indexid, + iscan->indexam, iscan->indexqual, iscan->indexqualorig, iscan->indexstrategy, *************** make_indexscan(List *qptlist, *** 1986,1991 **** --- 1989,1995 ---- List *qpqual, Index scanrelid, Oid indexid, + Oid indexam, List *indexqual, List *indexqualorig, List *indexstrategy, *************** make_indexscan(List *qptlist, *** 2002,2007 **** --- 2006,2012 ---- plan->righttree = NULL; node->scan.scanrelid = scanrelid; node->indexid = indexid; + node->indexam = indexam; node->indexqual = indexqual; node->indexqualorig = indexqualorig; node->indexstrategy = indexstrategy; *************** make_indexscan(List *qptlist, *** 2014,2019 **** --- 2019,2025 ---- static BitmapIndexScan * make_bitmap_indexscan(Index scanrelid, Oid indexid, + Oid indexam, List *indexqual, List *indexqualorig, List *indexstrategy, *************** make_bitmap_indexscan(Index scanrelid, *** 2029,2038 **** --- 2035,2049 ---- plan->righttree = NULL; node->scan.scanrelid = scanrelid; node->indexid = indexid; + node->indexam = indexam; node->indexqual = indexqual; node->indexqualorig = indexqualorig; node->indexstrategy = indexstrategy; node->indexsubtype = indexsubtype; + if (node->indexam == BITMAP_AM_OID) + node->inmem = false; + else + node->inmem = true; return node; } *************** make_bitmap_heapscan(List *qptlist, *** 2055,2060 **** --- 2066,2080 ---- node->scan.scanrelid = scanrelid; node->bitmapqualorig = bitmapqualorig; + if (IsA(lefttree, BitmapIndexScan)) + node->inmem = (((BitmapIndexScan*)lefttree)->indexam != BITMAP_AM_OID); + else if (IsA(lefttree, BitmapAnd)) + node->inmem = ((BitmapAnd*)lefttree)->inmem; + else if (IsA(lefttree, BitmapOr)) + node->inmem = ((BitmapOr*)lefttree)->inmem; + else + node->inmem = true; + return node; } *************** make_bitmap_and(List *bitmapplans) *** 2166,2171 **** --- 2186,2192 ---- { BitmapAnd *node = makeNode(BitmapAnd); Plan *plan = &node->plan; + ListCell *subnode; /* cost should be inserted by caller */ plan->targetlist = NIL; *************** make_bitmap_and(List *bitmapplans) *** 2174,2179 **** --- 2195,2226 ---- plan->righttree = NULL; node->bitmapplans = bitmapplans; + node->inmem = false; + foreach(subnode, bitmapplans) + { + Plan *subplan = (Plan *) lfirst(subnode); + if (IsA(subplan, BitmapIndexScan) && + ((BitmapIndexScan *)subplan)->indexam != BITMAP_AM_OID) + { + node->inmem = true; + break; + } + + else if (IsA(subplan, BitmapAnd) && + ((BitmapAnd *)subplan)->inmem) + { + node->inmem = true; + break; + } + + else if (IsA(subplan, BitmapOr) && + ((BitmapOr *)subplan)->inmem) + { + node->inmem = true; + break; + } + } + return node; } *************** make_bitmap_or(List *bitmapplans) *** 2182,2187 **** --- 2229,2235 ---- { BitmapOr *node = makeNode(BitmapOr); Plan *plan = &node->plan; + ListCell *subnode; /* cost should be inserted by caller */ plan->targetlist = NIL; *************** make_bitmap_or(List *bitmapplans) *** 2190,2195 **** --- 2238,2269 ---- plan->righttree = NULL; node->bitmapplans = bitmapplans; + node->inmem = false; + foreach(subnode, bitmapplans) + { + Plan *subplan = (Plan *) lfirst(subnode); + if (IsA(subplan, BitmapIndexScan) && + ((BitmapIndexScan *)subplan)->indexam != BITMAP_AM_OID) + { + node->inmem = true; + break; + } + + else if (IsA(subplan, BitmapAnd) && + ((BitmapAnd *)subplan)->inmem) + { + node->inmem = true; + break; + } + + else if (IsA(subplan, BitmapOr) && + ((BitmapOr *)subplan)->inmem) + { + node->inmem = true; + break; + } + } + return node; } diff -Ncpr pgsql.head/src/backend/utils/adt/selfuncs.c bmdist/src/backend/utils/adt/selfuncs.c *** pgsql.head/src/backend/utils/adt/selfuncs.c 2006-07-27 03:17:28.000000000 +1000 --- bmdist/src/backend/utils/adt/selfuncs.c 2006-08-01 12:58:41.000000000 +1000 *************** gincostestimate(PG_FUNCTION_ARGS) *** 4975,4977 **** --- 4975,4996 ---- PG_RETURN_VOID(); } + + Datum + bmcostestimate(PG_FUNCTION_ARGS) + { + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); + List *indexQuals = (List *) PG_GETARG_POINTER(2); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); + double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + + genericcostestimate(root, index, indexQuals, outer_rel, 0.0, + indexStartupCost, indexTotalCost, + indexSelectivity, indexCorrelation); + + PG_RETURN_VOID(); + } diff -Ncpr pgsql.head/src/include/access/bitmap.h bmdist/src/include/access/bitmap.h *** pgsql.head/src/include/access/bitmap.h 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/include/access/bitmap.h 2006-08-01 13:15:09.756707728 +1000 *************** *** 0 **** --- 1,662 ---- + /*------------------------------------------------------------------------- + * + * bitmap.h + * header file for on-disk bitmap index access method implementation. + * + * Portions Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + + #ifndef BITMAP_H + #define BITMAP_H + + #include "access/itup.h" + #include "access/relscan.h" + #include "access/sdir.h" + #include "access/xlogutils.h" + #include "storage/lock.h" + + #define BM_READ BUFFER_LOCK_SHARE + #define BM_WRITE BUFFER_LOCK_EXCLUSIVE + #define BM_NOLOCK (-1) + + /* the encoding schemes for a bitmap index */ + #define BM_EQUALITY 1 + + /* the size in bits of a hybrid run-length(HRL) word */ + #define BM_HRL_WORD_SIZE 8 + /* the type for a HRL word */ + typedef uint8 BM_HRL_WORD; + + #define BM_HRL_WORD_LEFTMOST (BM_HRL_WORD_SIZE-1) + + /* + * Metapage, always the first page (page 0) in the index. + * + * This page stores some meta-data information about this index. + */ + typedef struct BMMetaPageData + { + PageHeaderData bm_phdr; /* pad for page header (do not use) */ + + /* number of indexed tuples in this index */ + uint64 bm_num_tuples; + + /* + * The relation ids for a heap and a btree on this heap. They are + * used to speed up finding the bitmap vector for given attribute + * value(s), see the comments for LOV pages below for more + * information. We consider these as the metadata for LOV pages. + */ + Oid bm_lov_heapId; /* the relation id for the heap */ + Oid bm_lov_indexId; /* the relation id for the index */ + + /* the block number for the last LOV pages. */ + BlockNumber bm_lov_lastpage; + + /* + * Indicates if the bitmap index needs to be re-built during + * vacuuming. + * + * The vacuum command will check this value to determine if + * this bitmap index needs to be re-built. This value is set to + * true if there is an update to a bit that is in the middle + * of its bitmap vector. + */ + bool bm_need_rebuilt; + + } BMMetaPageData; + typedef BMMetaPageData* BMMetaPage; + + #define BM_METAPAGE 0 + + /* the maximum number of heap tuples in one page */ + #define MaxNumHeapTuples \ + ((BLCKSZ - 1) / MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) + \ + sizeof(ItemIdData)) + 1) + + /* + * LOV (List Of Values) page -- pages to store a list of distinct + * values for attribute(s) to be indexed, some metadata related to + * their corresponding bitmap vectors, and the pointers to their + * bitmap vectors. For each distinct value, there is a BMLOVItemData + * associated with it. A LOV page maintains an array of BMLOVItemData + * instances, called lov items. + * + * To speed up finding the lov item for a given value, we + * create a heap to maintain all distinct values along with the + * block numbers and offset numbers for their lov items in LOV pages. + * That is, there are total " + 2" attributes + * in this new heap. Along with this heap, we also create a new btree + * index on this heap using attribute(s) as btree keys. In this way, + * for any given value, we search this btree to find + * the block number and offset number for its corresponding lov item. + */ + + #define BM_LOV_STARTPAGE 1 + + /* + * Items in a LOV page. + * + * Each item is corresponding to a distinct value for attribute(s) + * to be indexed. For multi-column indexes on (a_1,a_2,...,a_n), we say + * two values (l_1,l_2,...,l_n) and (k_1,k_2,...,k_n) for (a_1,a_2,...,a_n) + * are the same if and only if for all i, l_i=k_i. + * + */ + typedef struct BMLOVItemData + { + /* the first page and last page of the bitmap vector. */ + BlockNumber bm_lov_head; + BlockNumber bm_lov_tail; + + /* Additional information to be used to append new bits into + existing bitmap vector that this distinct value is associated with. + The following two words do not store in the regular bitmap page, + defined below. */ + + /* the last complete word in its bitmap vector. */ + BM_HRL_WORD bm_last_compword; + + /* + * the last word in its bitmap vector. This word is not + * a complete word. If a new appending bit makes this word + * to be complete, this word will merge with bm_last_compword. + */ + BM_HRL_WORD bm_last_word; + + /* + * the tid location for the last bit stored in bm_last_compword. + * A tid location represents the index position for a bit in a + * bitmap vector, which is conceptualized as an array + * of bits. This value -- the index position starts from 1, and + * is calculated through (block#)*MaxNumHeapTuples + (offset#), + * where (block#) and (offset#) are from the heap tuple ctid. + * This value is used while updating a bit in the middle of + * its bitmap vector. When moving the last complete word to + * the bitmap page, this value will also be written to that page. + * Each bitmap page maintains a similar value -- the tid location + * for the last bit stored in that page. This will help us + * know the range of tid locations for bits in a bitmap page + * without decompressing all bits. + */ + uint64 bm_last_tid_location; + + /* + * the tid location of the last bit whose value is 1 (a set bit). + * Each bitmap vector will be visited only when there is a new + * set bit to be appended/updated. In the appending case, a new + * tid location is presented. With this value, we can calculate + * how many bits are 0s between this new set bit and the previous + * set bit. + */ + uint64 bm_last_setbit; + + /* + * Only two least-significant bits in this byte is used. + * + * If the first least-significant bit is 1, then it represents + * that bm_last_word is a fill word. If the second least-significant + * bit is 1, it represents that bm_last_compword is a fill word. + */ + uint8 bm_last_two_headerbits; + + } BMLOVItemData; + + typedef BMLOVItemData* BMLOVItem; + + #define BMLOVITEM_SIZE(itup) (IndexTupleSize(itup) + \ + (sizeof(BMLOVItemData) - sizeof(IndexTupleData))) + + #define BM_MAX_LOVITEMS_PER_PAGE \ + ((BLCKSZ-sizeof(PageHeaderData))/sizeof(BMLOVItemData)) + + /* + * Bitmap page -- pages to store bits in a bitmap vector. + * + * Each bitmap page stores two parts of information: header words and + * content words. Each bit in the header words is corresponding to + * a word in the content words. If a bit in the header words is 1, + * then its corresponding content word is a compressed word. Otherwise, + * it is a literal word. + * + * If a content word is a fill word, it means that there is a sequence + * of 0 bits or 1 bits. The most significant bit in this content word + * represents the bits in this sequence are 0s or 1s. The rest of bits + * stores the value of "the number of bits / BM_HRL_WORD_SIZE". + */ + + /* + * Opaque data for a bitmap page. + */ + typedef struct BMBitmapOpaqueData + { + uint32 bm_hrl_words_used; /* the number of words used */ + BlockNumber bm_bitmap_next; /* the next page for this bitmap */ + + /* + * the tid location for the last bit in this page. + */ + uint64 bm_last_tid_location; + } BMBitmapOpaqueData; + typedef BMBitmapOpaqueData* BMBitmapOpaque; + + #define BM_MAX_NUM_OF_HRL_WORDS_PER_PAGE \ + ((BLCKSZ - \ + MAXALIGN(sizeof(PageHeaderData)) - \ + MAXALIGN(sizeof(BMBitmapOpaqueData)))/sizeof(BM_HRL_WORD)) + #define BM_MAX_NUM_OF_HEADER_WORDS \ + (BM_MAX_NUM_OF_HRL_WORDS_PER_PAGE/(BM_HRL_WORD_SIZE+1)) + /* + * To make the last header word a complete word, we limit this number to + * the multiplication of the word size. + */ + #define BM_NUM_OF_HRL_WORDS_PER_PAGE \ + (((BM_MAX_NUM_OF_HRL_WORDS_PER_PAGE - \ + BM_MAX_NUM_OF_HEADER_WORDS)/BM_HRL_WORD_SIZE) * BM_HRL_WORD_SIZE) + + /* debugging... */ + /*#define BM_MAX_NUM_OF_HEADER_WORDS 4 + #define BM_NUM_OF_HRL_WORDS_PER_PAGE 32*/ + + typedef struct BMBitmapData + { + BM_HRL_WORD bm_headerWords[BM_MAX_NUM_OF_HEADER_WORDS]; + BM_HRL_WORD bm_contentWords[BM_NUM_OF_HRL_WORDS_PER_PAGE]; + } BMBitmapData; + typedef BMBitmapData* BMBitmap; + + /* the maxinum tid locations in a BMTidLocsBuffer: # of distinct values * + * (# of tid locations for each distinct value + 1). + * We set the default to 100*10000. When the number of distinct values + * goes beyond 100, then we consider a BMTidLocsBuffer has 200*5000 + * tid locations. If the number of distinct values goes beyond 200, then + * there are 300*3333. And so on. + */ + #define BM_NUM_DISTINCT_VALUES 100 + #define BM_MAX_TIDLOCS_PER_VALUE 10000 + #define BM_MAX_TIDLOCS (BM_NUM_DISTINCT_VALUES*BM_MAX_TIDLOCS_PER_VALUE) + + /* + * BMTidLocsBuffer -- a buffer to store a list of tid locations of + * the last several set bits for each distinct value. + * + * This buffer is used during the bitmap index creation. This buffer is + * designed to avoid writing one bit at a time to a bitmap vector. + * + * Conceptually, without this buffer, when a new tuple is inserted + * into the bitmap index, the corresponding bitmap vector is retrieved, + * and a new set bit is inserted into this bitmap vector. When the next + * tuple comes in, it is very likely that another bitmap vector is + * requested, and a new page is visited. Thus, writing one set bit + * at a time may cause seeking among many different pages in the bitmap + * index. + * + * With this buffer, we are able to write out a list of set bits at once, + * which has the following two advantages: + * + * (1) The number of IOs is reduced. Writing a list of set bits together + * makes it possible to read the corresponding pages only once. + * (2) Pages belonging to a bitmap vector are close together. This can + * further increase the search performance. + */ + typedef struct BMTidLocsBuffer + { + /* + * the maximum number of distinct values that the relation attribute(s) + * will have. + */ + uint32 bm_max_values; + + /* + * This array will be divided into "bm_max_values" parts, each of which + * is corresponding to one distinct value. In the other words, each + * distinct value has at most BM_MAX_TIDLOCS/bm_max_values tid locations + * stored in this array. + */ + uint64 bm_tidLocs[BM_MAX_TIDLOCS]; + } BMTidLocsBuffer; + + #define BM_MAX_TIDLOCS_BUFFER_SIZE \ + sizeof(BMTidLocsBuffer) + + /* + * The number of tid locations to be found at once during query processing. + */ + #define BM_BATCH_TIDS 16*1024 + + /* + * the maximum number of words to be retrieved during BitmapIndexScan. + */ + #define BM_MAX_WORDS BM_NUM_OF_HRL_WORDS_PER_PAGE*4 + + /* Some macros for manipulating a bitmap word. */ + #define LITERAL_ALL_ZERO 0 + #define LITERAL_ALL_ONE ((BM_HRL_WORD)(~((BM_HRL_WORD)0))) + + #define BM_MAKE_FILL_WORD(bit, length) \ + ((((BM_HRL_WORD)bit) << (BM_HRL_WORD_SIZE-1)) | (length)) + #define FILL_LENGTH(w) (((BM_HRL_WORD)(((BM_HRL_WORD)(w))<<1))>>1) + #define MAX_FILL_LENGTH ((((BM_HRL_WORD)1)<<(BM_HRL_WORD_SIZE-1))-1) + #define GET_FILL_BIT(w) (((BM_HRL_WORD)(w))>>BM_HRL_WORD_LEFTMOST) + #define IS_FILL_WORD(words,wordNo) \ + ((((words)[(wordNo)/BM_HRL_WORD_SIZE]) & \ + (((BM_HRL_WORD)1) << (BM_HRL_WORD_SIZE-1-((wordNo)%BM_HRL_WORD_SIZE))))\ + != 0) + + #define BM_NUM_LOV_BLOCKS 100 + + /* + * the state for build + */ + typedef struct BMBuildState + { + Buffer bm_metabuf; + + TupleDesc bm_tupDesc; + Relation bm_lov_heap; + Relation bm_lov_index; + ScanKey bm_lov_scanKeys; + IndexScanDesc bm_lov_scanDesc; + + /* + * the buffer to store last several tid locations for each distinct + * value. + */ + BMTidLocsBuffer* bm_tidLocsBuffer; + + /* + * arrays to store the block numbers for all LOV pages. + * + * This is used to calcuate the starting index position for each + * distinct value in BMTidLocsBuffer.bm_tidLocs. Assume that + * the lov item for a distinct value is stored in th LOV pages, + * and the offset number of , then the starting index position + * for this distinct value in BMTidLocsBuffer.bm_tidLocs is + * *BM_MAX_LOVITEMS_PER_PAGE + (-1). + * + * This array should be sorted. + */ + uint16 bm_max_num_lov_blocks; + uint16 bm_num_lov_blocks; + BlockNumber* bm_lov_blocks; + + /* the number of index tuples */ + double ituples ; + } BMBuildState ; + + /* + * Define an iteration result while scanning an BMBatchWords. + * + * This result includes the last scan position in an BMBatchWords, + * and all tids that are generated from previous scan. + */ + typedef struct BMIterateResult + { + uint64 nextTid; /* the first tid for the next iteration */ + /* the position in a bitmap word after the last iteration */ + uint32 lastScanPos; + /* the position of the bitmap word in an OnDiskBitmapWords after + the last iteration */ + uint32 lastScanWordNo; + /* the tids generated from the previous iteration */ + uint64 nextTids[BM_BATCH_TIDS]; + /* number of tids generated from the previous iteration */ + uint32 numOfTids; + /* the next position in 'nextTids' to be read. */ + uint32 nextTidLoc; + } BMIterateResult; + + /* + * Stores a batch of consecutive bitmap words from a bitmap vector. + * + * These bitmap words come from a bitmap vector stored in this bitmap + * index, or a bitmap vector that is generated by ANDing/ORing several + * bitmap vectors. + * + * This struct also contains information to compute the tid locations + * for the set bits in these bitmap words. + */ + typedef struct BMBatchWords + { + uint32 maxNumOfWords; /* maximum number of words in this list */ + + /* The following two variables are for performing AND/OR operations */ + + /* number of words that have been read in this list */ + uint32 numOfWordsRead; + /* the position of the next word to be read */ + uint32 nextReadNo; + + /* the starting tid number of this list of bitmap words */ + uint64 firstTid; + /* the starting position of meaningful bitmap words in the list */ + uint32 startNo; + uint32 numOfWords; /* the number of bitmap words in this list */ + BM_HRL_WORD* bitmapHeaderWords; /* the header words */ + BM_HRL_WORD* bitmapContentWords; /* the list of bitmap words */ + } BMBatchWords; + + /* + * Scan opaque data for one bitmap vector. + * + * This structure stores a batch of consecutive bitmap words for a + * bitmap vector that have been read from the disk, and remembers + * the next reading position for the next batch of consecutive + * bitmap words. + */ + typedef struct BMBitmapScanPositionData + { + Buffer bm_lovBuffer;/* the buffer that contains the LOV item. */ + OffsetNumber bm_lovOffset; /* the offset of the LOV item */ + BlockNumber bm_nextBlockNo; /* the next bitmap page block */ + + /* indicate if the last two words in the bitmap has been read. + * These two words are stored inside a BMLovItem. If this value + * is true, it means this bitmap vector has no more words. + */ + bool bm_readLastWords; + + /* the words in one bitmap vector that is necessary to obtain + the final bitmap vector for the query. */ + BMBatchWords* bm_batchWords; + + } BMBitmapScanPositionData; + typedef BMBitmapScanPositionData* BMBitmapScanPosition; + + /* + * Defines the current position of a scan. + * + * For each scan, all related bitmap vectors are read from the bitmap + * index, and ORed together into a final bitmap vector. The words + * in each bitmap vector are read in batches. This structure stores + * the following: + * (1) words for a final bitmap vector after ORing words from + * related bitmap vectors. + * (2) tid locations that satisfy the query. + * (3) One BMBitmapScanPositionData for each related bitmap vector. + */ + typedef struct BMScanPositionData + { + /* indicate if this scan is over. */ + bool bm_done; + + /* the number of related bitmap vectors */ + int bm_numBitmapVectors; + + /* + * the start position of bitmap vectors appearing at the end of + * this structure that have not been processed. + */ + int bm_startBitmapVectorNo; + + /* the words in the final bitmap vector that satisfies the query. */ + BMBatchWords* bm_batchWords; + + /* + * the BMIterateResult instance that contains the final + * tid locations for tuples that satisfy the query. + */ + BMIterateResult bm_result; + + /* + * one or more BMBitmapScanPositionData, depending on + * the query predicates. + */ + BMBitmapScanPosition bm_bitmapScanPos; + } BMScanPositionData; + typedef BMScanPositionData* BMScanPosition; + + typedef struct BMScanOpaqueData + { + BMScanPosition bm_currPos; + BMScanPosition bm_markPos; + } BMScanOpaqueData; + typedef BMScanOpaqueData* BMScanOpaque; + + /* + * XLOG records for bitmap index operations + * + * Some information in high 4 bits of log record xl_info field. + */ + #define XLOG_BITMAP_INSERT_NEWMETA 0x00 /* add a new metapage */ + #define XLOG_BITMAP_INSERT_NEWLOV 0x10 /* add a new LOV page */ + #define XLOG_BITMAP_INSERT_LOVITEM 0x20 /* add a new entry into a LOV page */ + #define XLOG_BITMAP_INSERT_META 0x30 /* update the metapage */ + #define XLOG_BITMAP_INSERT_NEWBITMAP 0x40 /* add a new bitmap page */ + #define XLOG_BITMAP_INSERT_BITMAP 0x50 /* add a new set bit */ + #define XLOG_BITMAP_INSERT_BITMAP_LASTWORDS 0x60 /* update the last 2 words + in a bitmap */ + + /* The information about inserting a new lovitem into the LOV list. */ + typedef struct xl_bm_lovitem + { + RelFileNode bm_node; + BlockNumber bm_lov_blkno; + bool bm_isNewItem; + OffsetNumber bm_lov_offset; + BMLOVItemData bm_lovItem; + } xl_bm_lovitem; + + /* The information about adding a new page */ + typedef struct xl_bm_newpage{ + RelFileNode bm_node; + BlockNumber bm_new_blkno; + } xl_bm_newpage; + + /* + * The information about changes on a bitmap page. + * If bm_isOpaque is true, then bm_next_blkno is set. + */ + typedef struct xl_bm_bitmappage{ + RelFileNode bm_node; + BlockNumber bm_bitmap_blkno; + + bool bm_isOpaque; + BlockNumber bm_next_blkno; + + uint32 bm_last_tid_location; + uint32 bm_hrl_words_used; + uint32 bm_num_words; + /* for simplicity, we log the header words each time */ + BM_HRL_WORD bm_headerWords[BM_MAX_NUM_OF_HEADER_WORDS]; + /* followed by the "bm_num_words" content words. */ + } xl_bm_bitmappage; + + /* The information about changes to the last 2 words in a bitmap vector */ + typedef struct xl_bm_bitmap_lastwords + { + RelFileNode bm_node; + BM_HRL_WORD bm_last_compword; + BM_HRL_WORD bm_last_word; + uint8 bm_last_two_headerbits; + + BlockNumber bm_lov_blkno; + OffsetNumber bm_lov_offset; + } xl_bm_bitmap_lastwords; + + /* The information about the changes in the metapage. */ + typedef struct xl_bm_metapage + { + RelFileNode bm_node; + uint64 bm_num_tuples; + Oid bm_lov_heapId; /* the relation id for the heap */ + Oid bm_lov_indexId; /* the relation id for the index */ + /* the block number for the last LOV pages. */ + BlockNumber bm_lov_lastpage; + /* indicate if this bitmap index needs to be re-built while vacuuming. */ + bool bm_need_rebuilt; + } xl_bm_metapage; + + /* public routines */ + extern Datum bmbuild(PG_FUNCTION_ARGS); + extern Datum bminsert(PG_FUNCTION_ARGS); + extern Datum bmbeginscan(PG_FUNCTION_ARGS); + extern Datum bmgettuple(PG_FUNCTION_ARGS); + extern Datum bmgetmulti(PG_FUNCTION_ARGS); + extern Datum bmrescan(PG_FUNCTION_ARGS); + extern Datum bmendscan(PG_FUNCTION_ARGS); + extern Datum bmmarkpos(PG_FUNCTION_ARGS); + extern Datum bmrestrpos(PG_FUNCTION_ARGS); + extern Datum bmbulkdelete(PG_FUNCTION_ARGS); + extern Datum bmvacuumcleanup(PG_FUNCTION_ARGS); + extern Datum bmgetbitmapwords(PG_FUNCTION_ARGS); + extern Datum bmoptions(PG_FUNCTION_ARGS); + + /* bitmappages.c */ + extern Buffer _bitmap_getbuf(Relation rel, BlockNumber blkno, int access); + extern void _bitmap_wrtbuf(Buffer buf); + extern void _bitmap_relbuf(Buffer buf); + extern void _bitmap_wrtnorelbuf(Buffer buf); + extern void _bitmap_init_lovpage(Relation rel, Buffer buf); + extern void _bitmap_init_bitmappage(Relation rel, Buffer buf); + extern void _bitmap_init_buildstate(Relation index, BMBuildState* bmstate); + extern void _bitmap_cleanup_buildstate(Relation index, BMBuildState* bmstate); + extern void _bitmap_init(Relation rel); + + /* bitmapinsert.c */ + extern void _bitmap_buildinsert + (Relation rel, ItemPointerData ht_ctid, Datum* attdata, bool* nulls, + BMBuildState* state); + extern void _bitmap_doinsert + (Relation rel, ItemPointerData ht_ctid, Datum* attdata, bool* nulls); + extern void + _bitmap_write_alltids(Relation rel, Buffer metabuf, + BMTidLocsBuffer* tidLocsBuffer, + uint64 tidLocsPerValue, + BlockNumber* lovBlocks); + + /* bitmaputil.c */ + extern BMLOVItem _bitmap_formitem(uint64 currTidNumber); + extern void _bitmap_init_batchwords(BMBatchWords* words, + uint32 maxNumOfWords, + MemoryContext mcxt); + extern void _bitmap_reset_batchwords(BMBatchWords* words); + extern void _bitmap_cleanup_batchwords(BMBatchWords* words); + extern uint64 _bitmap_findnexttid(BMBatchWords *words, + BMIterateResult *result); + extern void _bitmap_findnexttids(BMBatchWords *words, + BMIterateResult *result, uint32 maxTids); + extern void _bitmap_intersect(BMBatchWords **batches, uint32 numBatches, + BMBatchWords *result); + extern void _bitmap_union(BMBatchWords **batches, uint32 numBatches, + BMBatchWords *result); + extern void _bitmap_begin_iterate(BMBatchWords *words, + BMIterateResult* result); + extern void _bitmap_log_newpage + (Relation rel, uint8 info, Buffer buf); + extern void _bitmap_log_metapage(Relation rel, BMMetaPage metapage); + extern void _bitmap_log_bitmappage + (Relation rel, Buffer bitmapBuffer, bool isOpaque, uint32 numWords); + extern void _bitmap_log_bitmap_lastwords + (Relation rel, Buffer lovBuffer, + OffsetNumber lovOffset, BMLOVItem lovItem); + extern void _bitmap_log_lovitem + (Relation rel, Buffer lovBuffer, bool isNewItem, + OffsetNumber offset, BMLOVItem lovItem); + + /* bitmapsearch.c */ + extern void _bitmap_searchinit(IndexScanDesc scan, ScanDirection dir); + extern bool _bitmap_first(IndexScanDesc scan, ScanDirection dir); + extern bool _bitmap_next(IndexScanDesc scan, ScanDirection dir); + extern bool _bitmap_firstblockwords(IndexScanDesc scan, ScanDirection dir); + extern bool _bitmap_nextblockwords(IndexScanDesc scan, ScanDirection dir); + + /* bitmapattutil.c */ + extern void _bitmap_create_lov_heapandindex + (Relation rel, Oid *heapId, Oid *indexId); + extern void _bitmap_open_lov_heapandindex + (Relation rel, BMMetaPage metapage, + Relation *lovHeapP, Relation *lovIndexP, + LOCKMODE lockMode); + extern void _bitmap_insert_lov + (Relation lovHeap, Relation lovIndex, Datum* datum, bool* nulls); + extern void _bitmap_close_lov_heapandindex + (Relation lovHeap, Relation lovIndex, LOCKMODE lockMode); + extern bool _bitmap_findvalue + (Relation lovHeap, Relation lovIndex, + ScanKey scanKey, IndexScanDesc scanDesc, + BlockNumber *lovBlock, bool *blockNull, + OffsetNumber *lovOffset, bool *offsetNull); + extern void + _bitmap_insert_lov_block_number(BlockNumber lovBlock, + BlockNumber** lovBlocks, + uint16* numLovBlocks, + uint16* maxNumLovBlocks); + extern uint16 + _bitmap_find_lov_index(BlockNumber lovBlock, BlockNumber* lovBlocks, + uint16 numLovBlocks); + + + /* + * prototypes for functions in bitmapxlog.c + */ + extern void bitmap_redo(XLogRecPtr lsn, XLogRecord *record); + extern void bitmap_undo(XLogRecPtr lsn, XLogRecord *record); + extern void bitmap_desc(StringInfo buf, uint8 xl_info, char *rec); + + #endif diff -Ncpr pgsql.head/src/include/access/genam.h bmdist/src/include/access/genam.h *** pgsql.head/src/include/access/genam.h 2006-08-01 06:09:05.000000000 +1000 --- bmdist/src/include/access/genam.h 2006-08-01 12:57:29.000000000 +1000 *************** *** 14,19 **** --- 14,20 ---- #ifndef GENAM_H #define GENAM_H + #include "access/bitmap.h" #include "access/relscan.h" #include "access/sdir.h" #include "nodes/primnodes.h" *************** extern IndexScanDesc index_beginscan(Rel *** 101,106 **** --- 102,110 ---- extern IndexScanDesc index_beginscan_multi(Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key); + extern IndexScanDesc index_beginscan_bitmapwords + (Relation indexRelation, bool need_index_lock, Snapshot snapshot, + int nkeys, ScanKey key); extern void index_rescan(IndexScanDesc scan, ScanKey key); extern void index_endscan(IndexScanDesc scan); extern void index_markpos(IndexScanDesc scan); *************** extern bool index_getnext_indexitem(Inde *** 111,116 **** --- 115,124 ---- extern bool index_getmulti(IndexScanDesc scan, ItemPointer tids, int32 max_tids, int32 *returned_tids); + extern bool index_getbitmapwords(IndexScanDesc scan, uint32 maxNumOfWords, + uint32 *returnedNumOfWords, + BM_HRL_WORD* headerWords, + BM_HRL_WORD* contentWords); extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, diff -Ncpr pgsql.head/src/include/access/rmgr.h bmdist/src/include/access/rmgr.h *** pgsql.head/src/include/access/rmgr.h 2006-05-02 21:28:55.000000000 +1000 --- bmdist/src/include/access/rmgr.h 2006-08-01 12:57:29.000000000 +1000 *************** typedef uint8 RmgrId; *** 26,31 **** #define RM_GIN_ID 13 #define RM_GIST_ID 14 #define RM_SEQ_ID 15 ! #define RM_MAX_ID RM_SEQ_ID #endif /* RMGR_H */ --- 26,32 ---- #define RM_GIN_ID 13 #define RM_GIST_ID 14 #define RM_SEQ_ID 15 ! #define RM_BITMAP_ID 16 ! #define RM_MAX_ID RM_BITMAP_ID #endif /* RMGR_H */ diff -Ncpr pgsql.head/src/include/catalog/pg_am.h bmdist/src/include/catalog/pg_am.h *** pgsql.head/src/include/catalog/pg_am.h 2006-08-01 06:09:05.000000000 +1000 --- bmdist/src/include/catalog/pg_am.h 2006-08-01 13:16:18.388274144 +1000 *************** CATALOG(pg_am,2601) *** 52,61 **** --- 52,64 ---- bool amindexnulls; /* does AM support NULL index entries? */ bool amstorage; /* can storage type differ from column type? */ bool amclusterable; /* does AM support cluster command? */ + bool amcanshrink; /* does AM do anything other than REINDEX in + * VACUUM? */ regproc aminsert; /* "insert this tuple" function */ regproc ambeginscan; /* "start new scan" function */ regproc amgettuple; /* "next valid tuple" function */ regproc amgetmulti; /* "fetch multiple tuples" function */ + regproc amgetbitmapwords; /* "fetch bitmap words" function */ regproc amrescan; /* "restart this scan" function */ regproc amendscan; /* "end this scan" function */ regproc ammarkpos; /* "mark current scan position" function */ *************** typedef FormData_pg_am *Form_pg_am; *** 78,84 **** * compiler constants for pg_am * ---------------- */ ! #define Natts_pg_am 23 #define Anum_pg_am_amname 1 #define Anum_pg_am_amstrategies 2 #define Anum_pg_am_amsupport 3 --- 81,87 ---- * compiler constants for pg_am * ---------------- */ ! #define Natts_pg_am 25 #define Anum_pg_am_amname 1 #define Anum_pg_am_amstrategies 2 #define Anum_pg_am_amsupport 3 *************** typedef FormData_pg_am *Form_pg_am; *** 89,124 **** #define Anum_pg_am_amindexnulls 8 #define Anum_pg_am_amstorage 9 #define Anum_pg_am_amclusterable 10 ! #define Anum_pg_am_aminsert 11 ! #define Anum_pg_am_ambeginscan 12 ! #define Anum_pg_am_amgettuple 13 ! #define Anum_pg_am_amgetmulti 14 ! #define Anum_pg_am_amrescan 15 ! #define Anum_pg_am_amendscan 16 ! #define Anum_pg_am_ammarkpos 17 ! #define Anum_pg_am_amrestrpos 18 ! #define Anum_pg_am_ambuild 19 ! #define Anum_pg_am_ambulkdelete 20 ! #define Anum_pg_am_amvacuumcleanup 21 ! #define Anum_pg_am_amcostestimate 22 ! #define Anum_pg_am_amoptions 23 /* ---------------- * initial contents of pg_am * ---------------- */ ! DATA(insert OID = 403 ( btree 5 1 1 t t t t f t btinsert btbeginscan btgettuple btgetmulti btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate btoptions )); DESCR("b-tree index access method"); #define BTREE_AM_OID 403 ! DATA(insert OID = 405 ( hash 1 1 0 f f f f f f hashinsert hashbeginscan hashgettuple hashgetmulti hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); DESCR("hash index access method"); #define HASH_AM_OID 405 ! DATA(insert OID = 783 ( gist 100 7 0 f t t t t t gistinsert gistbeginscan gistgettuple gistgetmulti gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 ! DATA(insert OID = 2742 ( gin 100 4 0 f f f f t f gininsert ginbeginscan gingettuple gingetmulti ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); DESCR("GIN index access method"); #define GIN_AM_OID 2742 #endif /* PG_AM_H */ --- 92,132 ---- #define Anum_pg_am_amindexnulls 8 #define Anum_pg_am_amstorage 9 #define Anum_pg_am_amclusterable 10 ! #define Anum_pg_am_amcanshrink 11 ! #define Anum_pg_am_aminsert 12 ! #define Anum_pg_am_ambeginscan 13 ! #define Anum_pg_am_amgettuple 14 ! #define Anum_pg_am_amgetmulti 15 ! #define Anum_pg_am_amgetbitmapwords 16 ! #define Anum_pg_am_amrescan 17 ! #define Anum_pg_am_amendscan 18 ! #define Anum_pg_am_ammarkpos 19 ! #define Anum_pg_am_amrestrpos 20 ! #define Anum_pg_am_ambuild 21 ! #define Anum_pg_am_ambulkdelete 22 ! #define Anum_pg_am_amvacuumcleanup 23 ! #define Anum_pg_am_amcostestimate 24 ! #define Anum_pg_am_amoptions 25 /* ---------------- * initial contents of pg_am * ---------------- */ ! DATA(insert OID = 403 ( btree 5 1 1 t t t t f t t btinsert btbeginscan btgettuple btgetmulti - btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate btoptions )); DESCR("b-tree index access method"); #define BTREE_AM_OID 403 ! DATA(insert OID = 405 ( hash 1 1 0 f f f f f f t hashinsert hashbeginscan hashgettuple hashgetmulti - hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); DESCR("hash index access method"); #define HASH_AM_OID 405 ! DATA(insert OID = 783 ( gist 100 7 0 f t t t t t t gistinsert gistbeginscan gistgettuple gistgetmulti - gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 ! DATA(insert OID = 2742 ( gin 100 4 0 f f f f t f t gininsert ginbeginscan gingettuple gingetmulti - ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); DESCR("GIN index access method"); #define GIN_AM_OID 2742 + DATA(insert OID = 2848 ( bitmap 1 1 0 t t t t f f f bminsert bmbeginscan bmgettuple bmgetmulti bmgetbitmapwords bmrescan bmendscan bmmarkpos bmrestrpos bmbuild bmbulkdelete bmvacuumcleanup bmcostestimate bmoptions )); + DESCR("bitmap index access method"); + #define BITMAP_AM_OID 2848 #endif /* PG_AM_H */ diff -Ncpr pgsql.head/src/include/catalog/pg_amop.h bmdist/src/include/catalog/pg_amop.h *** pgsql.head/src/include/catalog/pg_amop.h 2006-07-22 06:51:33.000000000 +1000 --- bmdist/src/include/catalog/pg_amop.h 2006-08-01 12:57:29.000000000 +1000 *************** DATA(insert ( 2780 0 2 f 2751 )); *** 886,889 **** --- 886,942 ---- DATA(insert ( 2780 0 3 t 2752 )); DATA(insert ( 2780 0 4 t 1070 )); + /* + * the operators for the on-disk bitmap index. + */ + DATA(insert ( 2849 0 1 f 560 )); /* abstime */ + DATA(insert ( 2850 0 1 f 1070 )); /* array */ + DATA(insert ( 2851 0 1 f 1784 )); /* bit */ + DATA(insert ( 2852 0 1 f 91 )); /* bool */ + DATA(insert ( 2853 0 1 f 1054 )); /* bpchar */ + DATA(insert ( 2854 0 1 f 1955 )); /* bytea */ + DATA(insert ( 2855 0 1 f 92 )); /* char */ + DATA(insert ( 2856 0 1 f 1201 )); /* cidr */ + DATA(insert ( 2857 0 1 f 1093 )); /* date */ + DATA(insert ( 2857 1114 1 f 2347 )); /* date-timestamp */ + DATA(insert ( 2857 1184 1 f 2360 )); /* date-timestamptz */ + DATA(insert ( 2858 0 1 f 620 )); /* float4 */ + DATA(insert ( 2858 701 1 f 1120 )); /* float48 */ + DATA(insert ( 2859 0 1 f 670 )); /* float8 */ + DATA(insert ( 2859 700 1 f 1130 )); /* float84 */ + DATA(insert ( 2860 0 1 f 1201 )); /* inet */ + DATA(insert ( 2861 0 1 f 94 )); /* int2 */ + DATA(insert ( 2861 23 1 f 532 )); /* int24 */ + DATA(insert ( 2861 20 1 f 1862 )); /* int28 */ + DATA(insert ( 2862 0 1 f 96 )); /* int4 */ + DATA(insert ( 2862 21 1 f 533 )); /* int42 */ + DATA(insert ( 2862 20 1 f 15 )); /* int48 */ + DATA(insert ( 2863 0 1 f 410 )); /* int8 */ + DATA(insert ( 2863 21 1 f 1868 )); /* int82 */ + DATA(insert ( 2863 23 1 f 416 )); /* int84 */ + DATA(insert ( 2864 0 1 f 1330 )); /* interval */ + DATA(insert ( 2865 0 1 f 1220 )); /* macaddr */ + DATA(insert ( 2866 0 1 f 93 )); /* name */ + DATA(insert ( 2867 0 1 f 1752 )); /* numeric */ + DATA(insert ( 2868 0 1 f 607 )); /* oid */ + DATA(insert ( 2869 0 1 f 649 )); /* oidvector */ + DATA(insert ( 2870 0 1 f 98 )); /* text */ + DATA(insert ( 2871 0 1 f 1108 )); /* time */ + DATA(insert ( 2872 0 1 f 1320 )); /* timestamptz */ + DATA(insert ( 2872 1082 1 f 2386 )); /* timestamptz-date */ + DATA(insert ( 2872 1114 1 f 2542 )); /* timestamptz-timestamp */ + DATA(insert ( 2873 0 1 f 1550 )); /* timetz */ + DATA(insert ( 2874 0 1 f 1804 )); /* varbit */ + DATA(insert ( 2875 0 1 f 98 )); /* varchar */ + DATA(insert ( 2876 0 1 f 2060 )); /* timestamp */ + DATA(insert ( 2876 1082 1 f 2373 )); /* timestamp-date */ + DATA(insert ( 2876 1184 1 f 2536 )); /* timestamp-timestamptz */ + DATA(insert ( 2877 0 1 f 2316 )); /* text pattern */ + DATA(insert ( 2878 0 1 f 2316 )); /* varchar pattern */ + DATA(insert ( 2879 0 1 f 2328 )); /* bpchar pattern */ + DATA(insert ( 2880 0 1 f 2334 )); /* name pattern */ + DATA(insert ( 2881 0 1 f 900 )); /* money */ + DATA(insert ( 2882 0 1 f 566 )); /* reltime */ + DATA(insert ( 2883 0 1 f 811 )); /* tinterval */ + #endif /* PG_AMOP_H */ diff -Ncpr pgsql.head/src/include/catalog/pg_amproc.h bmdist/src/include/catalog/pg_amproc.h *** pgsql.head/src/include/catalog/pg_amproc.h 2006-07-22 06:51:33.000000000 +1000 --- bmdist/src/include/catalog/pg_amproc.h 2006-08-01 12:57:29.000000000 +1000 *************** DATA(insert ( 2780 0 2 2743 )); *** 308,311 **** --- 308,365 ---- DATA(insert ( 2780 0 3 2743 )); DATA(insert ( 2780 0 4 2744 )); + /* + * the operator routines for the on-disk bitmap index. + */ + DATA(insert ( 2849 0 1 357 )); /* abstime */ + DATA(insert ( 2850 0 1 382 )); /* array */ + DATA(insert ( 2851 0 1 1596 )); /* bit */ + DATA(insert ( 2852 0 1 1693 )); /* bool */ + DATA(insert ( 2853 0 1 1078 )); /* bpchar */ + DATA(insert ( 2854 0 1 1954 )); /* bytea */ + DATA(insert ( 2855 0 1 358 )); /* char */ + DATA(insert ( 2856 0 1 926 )); /* cidr */ + DATA(insert ( 2857 0 1 1092 )); /* date */ + DATA(insert ( 2857 1114 1 2344 )); /* date-timestamp */ + DATA(insert ( 2857 1184 1 2357 )); /* date-timestamptz */ + DATA(insert ( 2858 0 1 354 )); /* float4 */ + DATA(insert ( 2858 701 1 2194 )); /* float48 */ + DATA(insert ( 2859 0 1 355 )); /* float8 */ + DATA(insert ( 2859 700 1 2195 )); /* float84 */ + DATA(insert ( 2860 0 1 926 )); /* inet */ + DATA(insert ( 2861 0 1 350 )); /* int2 */ + DATA(insert ( 2861 23 1 2190 )); /* int24 */ + DATA(insert ( 2861 20 1 2192 )); /* int28 */ + DATA(insert ( 2862 0 1 351 )); /* int4 */ + DATA(insert ( 2862 20 1 2191 )); /* int42 */ + DATA(insert ( 2862 21 1 2188 )); /* int48 */ + DATA(insert ( 2863 0 1 842 )); /* int8 */ + DATA(insert ( 2863 21 1 2193 )); /* int82 */ + DATA(insert ( 2863 23 1 2189 )); /* int84 */ + DATA(insert ( 2864 0 1 1315 )); /* interval */ + DATA(insert ( 2865 0 1 836 )); /* macaddr */ + DATA(insert ( 2866 0 1 359 )); /* name */ + DATA(insert ( 2867 0 1 1769 )); /* numeric */ + DATA(insert ( 2868 0 1 356 )); /* oid */ + DATA(insert ( 2869 0 1 404 )); /* oidvector */ + DATA(insert ( 2870 0 1 360 )); /* text */ + DATA(insert ( 2871 0 1 1107 )); /* time */ + DATA(insert ( 2872 0 1 1314 )); /* timestamptz */ + DATA(insert ( 2872 1082 1 2383 )); /* timestamptz-date */ + DATA(insert ( 2872 1114 1 2533 )); /* timestamptz-timestamp */ + DATA(insert ( 2873 0 1 1358 )); /* timetz */ + DATA(insert ( 2874 0 1 1672 )); /* varbit */ + DATA(insert ( 2875 0 1 360 )); /* varchar */ + DATA(insert ( 2876 0 1 2045 )); /* timestamp */ + DATA(insert ( 2876 1082 1 2370 )); /* timestamp-date */ + DATA(insert ( 2876 1184 1 2526 )); /* timestamp-timestamptz */ + DATA(insert ( 2877 0 1 2166 )); /* text pattern */ + DATA(insert ( 2878 0 1 2166 )); /* varchar pattern */ + DATA(insert ( 2879 0 1 2180 )); /* bpchar pattern */ + DATA(insert ( 2880 0 1 2187 )); /* name pattern */ + DATA(insert ( 2881 0 1 377 )); /* money */ + DATA(insert ( 2882 0 1 380 )); /* reltime */ + DATA(insert ( 2883 0 1 381 )); /* tinterval */ + + #endif /* PG_AMPROC_H */ diff -Ncpr pgsql.head/src/include/catalog/pg_namespace.h bmdist/src/include/catalog/pg_namespace.h *** pgsql.head/src/include/catalog/pg_namespace.h 2006-03-06 02:58:54.000000000 +1100 --- bmdist/src/include/catalog/pg_namespace.h 2006-08-01 12:57:29.000000000 +1000 *************** DESCR("System catalog schema"); *** 74,79 **** --- 74,82 ---- DATA(insert OID = 99 ( "pg_toast" PGUID _null_ )); DESCR("Reserved schema for TOAST tables"); #define PG_TOAST_NAMESPACE 99 + DATA(insert OID = 3012 ( "pg_bitmapindex" PGUID _null_ )); + DESCR("Reserved schema for internal relations of bitmap indexes"); + #define PG_BITMAPINDEX_NAMESPACE 3012 DATA(insert OID = 2200 ( "public" PGUID _null_ )); DESCR("Standard public schema"); #define PG_PUBLIC_NAMESPACE 2200 diff -Ncpr pgsql.head/src/include/catalog/pg_opclass.h bmdist/src/include/catalog/pg_opclass.h *** pgsql.head/src/include/catalog/pg_opclass.h 2006-07-22 06:51:33.000000000 +1000 --- bmdist/src/include/catalog/pg_opclass.h 2006-08-01 12:57:29.000000000 +1000 *************** DATA(insert OID = 2778 ( 2742 _money_ops *** 208,211 **** --- 208,250 ---- DATA(insert OID = 2779 ( 2742 _reltime_ops PGNSP PGUID 1024 t 703 )); DATA(insert OID = 2780 ( 2742 _tinterval_ops PGNSP PGUID 1025 t 704 )); + /* + * the operators for the on-disk bitmap index. + */ + DATA(insert OID = 2849 ( 2848 abstime_ops PGNSP PGUID 702 t 0 )); + DATA(insert OID = 2850 ( 2848 array_ops PGNSP PGUID 2277 t 0 )); + DATA(insert OID = 2851 ( 2848 bit_ops PGNSP PGUID 1560 t 0 )); + DATA(insert OID = 2852 ( 2848 bool_ops PGNSP PGUID 16 t 0 )); + DATA(insert OID = 2853 ( 2848 bpchar_ops PGNSP PGUID 1042 t 0 )); + DATA(insert OID = 2854 ( 2848 bytea_ops PGNSP PGUID 17 t 0 )); + DATA(insert OID = 2855 ( 2848 char_ops PGNSP PGUID 18 t 0 )); + DATA(insert OID = 2856 ( 2848 cidr_ops PGNSP PGUID 650 t 0 )); + DATA(insert OID = 2857 ( 2848 date_ops PGNSP PGUID 1082 t 0 )); + DATA(insert OID = 2858 ( 2848 float4_ops PGNSP PGUID 700 t 0 )); + DATA(insert OID = 2859 ( 2848 float8_ops PGNSP PGUID 701 t 0 )); + DATA(insert OID = 2860 ( 2848 inet_ops PGNSP PGUID 869 t 0 )); + DATA(insert OID = 2861 ( 2848 int2_ops PGNSP PGUID 21 t 0 )); + DATA(insert OID = 2862 ( 2848 int4_ops PGNSP PGUID 23 t 0 )); + DATA(insert OID = 2863 ( 2848 int8_ops PGNSP PGUID 20 t 0 )); + DATA(insert OID = 2864 ( 2848 interval_ops PGNSP PGUID 1186 t 0 )); + DATA(insert OID = 2865 ( 2848 macaddr_ops PGNSP PGUID 829 t 0 )); + DATA(insert OID = 2866 ( 2848 name_ops PGNSP PGUID 19 t 0 )); + DATA(insert OID = 2867 ( 2848 numeric_ops PGNSP PGUID 1700 t 0 )); + DATA(insert OID = 2868 ( 2848 oid_ops PGNSP PGUID 26 t 0 )); + DATA(insert OID = 2869 ( 2848 oidvector_ops PGNSP PGUID 30 t 0 )); + DATA(insert OID = 2870 ( 2848 text_ops PGNSP PGUID 25 t 0 )); + DATA(insert OID = 2871 ( 2848 time_ops PGNSP PGUID 1083 t 0 )); + DATA(insert OID = 2872 ( 2848 timestamptz_ops PGNSP PGUID 1184 t 0 )); + DATA(insert OID = 2873 ( 2848 timetz_ops PGNSP PGUID 1266 t 0 )); + DATA(insert OID = 2874 ( 2848 varbit_ops PGNSP PGUID 1562 t 0 )); + DATA(insert OID = 2875 ( 2848 varchar_ops PGNSP PGUID 1043 t 0 )); + DATA(insert OID = 2876 ( 2848 timestamp_ops PGNSP PGUID 1114 t 0 )); + DATA(insert OID = 2877 ( 2848 text_pattern_ops PGNSP PGUID 25 f 0 )); + DATA(insert OID = 2878 ( 2848 varchar_pattern_ops PGNSP PGUID 1043 f 0 )); + DATA(insert OID = 2879 ( 2848 bpchar_pattern_ops PGNSP PGUID 1042 f 0 )); + DATA(insert OID = 2880 ( 2848 name_pattern_ops PGNSP PGUID 19 f 0 )); + DATA(insert OID = 2881 ( 2848 money_ops PGNSP PGUID 790 t 0 )); + DATA(insert OID = 2882 ( 2848 reltime_ops PGNSP PGUID 703 t 0 )); + DATA(insert OID = 2883 ( 2848 tinterval_ops PGNSP PGUID 704 t 0 )); + #endif /* PG_OPCLASS_H */ diff -Ncpr pgsql.head/src/include/catalog/pg_proc.h bmdist/src/include/catalog/pg_proc.h *** pgsql.head/src/include/catalog/pg_proc.h 2006-07-29 04:33:04.000000000 +1000 --- bmdist/src/include/catalog/pg_proc.h 2006-08-01 12:57:29.000000000 +1000 *************** DESCR("anyarray contains"); *** 3928,3933 **** --- 3928,3962 ---- DATA(insert OID = 2749 ( arraycontained PGNSP PGUID 12 f f t f i 2 16 "2277 2277" _null_ _null_ _null_ arraycontained - _null_ )); DESCR("anyarray contained"); + /* the bitmap index access method routines */ + DATA(insert OID = 2801 ( bmgettuple PGNSP PGUID 12 f f t f v 2 16 "2281 2281" _null_ _null_ _null_ bmgettuple - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 2802 ( bmgetmulti PGNSP PGUID 12 f f t f v 4 16 "2281 2281 2281 2281" _null_ _null_ _null_ bmgetmulti - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3000 ( bmgetbitmapwords PGNSP PGUID 12 f f t f v 5 16 "2281 2281 2281 2281 2281" _null_ _null_ _null_ bmgetbitmapwords - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3001 ( bminsert PGNSP PGUID 12 f f t f v 6 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ bminsert - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3002 ( bmbeginscan PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_ _null_ _null_ bmbeginscan - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3003 ( bmrescan PGNSP PGUID 12 f f t f v 2 2278 "2281 2281" _null_ _null_ _null_ bmrescan - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3004 ( bmendscan PGNSP PGUID 12 f f t f v 1 2278 "2281" _null_ _null_ _null_ bmendscan - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3005 ( bmmarkpos PGNSP PGUID 12 f f t f v 1 2278 "2281" _null_ _null_ _null_ bmmarkpos - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3006 ( bmrestrpos PGNSP PGUID 12 f f t f v 1 2278 "2281" _null_ _null_ _null_ bmrestrpos - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3007 ( bmbuild PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" _null_ _null_ _null_ bmbuild - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3008 ( bmbulkdelete PGNSP PGUID 12 f f t f v 4 2281 "2281 2281 2281 2281" _null_ _null_ _null_ bmbulkdelete - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3009 ( bmvacuumcleanup PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ bmvacuumcleanup - _null_ )); + DATA(insert OID = 3010 ( bmcostestimate PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ bmcostestimate - _null_ )); + DESCR("bitmap(internal)"); + DATA(insert OID = 3011 ( bmoptions PGNSP PGUID 12 f f t f s 2 17 "1009 16" _null_ _null_ _null_ bmoptions - _null_ )); + DESCR("btree(internal)"); + /* * Symbolic values for provolatile column: these indicate whether the result * of a function is dependent *only* on the values of its explicit arguments, diff -Ncpr pgsql.head/src/include/nodes/execnodes.h bmdist/src/include/nodes/execnodes.h *** pgsql.head/src/include/nodes/execnodes.h 2006-07-28 05:52:07.000000000 +1000 --- bmdist/src/include/nodes/execnodes.h 2006-08-01 12:57:30.000000000 +1000 *************** *** 18,23 **** --- 18,24 ---- #include "nodes/params.h" #include "nodes/plannodes.h" #include "nodes/tidbitmap.h" + #include "nodes/ondiskbitmapwords.h" #include "utils/hsearch.h" #include "utils/tuplestore.h" *************** typedef struct BitmapAndState *** 843,848 **** --- 844,851 ---- PlanState ps; /* its first field is NodeTag */ PlanState **bitmapplans; /* array of PlanStates for my inputs */ int nplans; /* number of input plans */ + OnDiskBitmapWords **odbms; /* OnDiskBitmapWords for each input */ + OnDiskBitmapWords *resultOdbm; /* the output OnDiskBitmapWords */ } BitmapAndState; /* ---------------- *************** typedef struct BitmapOrState *** 854,859 **** --- 857,864 ---- PlanState ps; /* its first field is NodeTag */ PlanState **bitmapplans; /* array of PlanStates for my inputs */ int nplans; /* number of input plans */ + OnDiskBitmapWords **odbms; /* OnDiskBitmapWords for each input */ + OnDiskBitmapWords *resultOdbm; /* the output OnDiskBitmapWords */ } BitmapOrState; /* ---------------------------------------------------------------- *************** typedef struct BitmapIndexScanState *** 958,963 **** --- 963,969 ---- { ScanState ss; /* its first field is NodeTag */ TIDBitmap *biss_result; + OnDiskBitmapWords *odbiss_result; ScanKey biss_ScanKeys; int biss_NumScanKeys; IndexRuntimeKeyInfo *biss_RuntimeKeys; *************** typedef struct BitmapIndexScanState *** 968,973 **** --- 974,980 ---- ExprContext *biss_RuntimeContext; Relation biss_RelationDesc; IndexScanDesc biss_ScanDesc; + IndexScanDesc odbiss_ScanDesc; } BitmapIndexScanState; /* ---------------- *************** typedef struct BitmapHeapScanState *** 983,989 **** --- 990,998 ---- ScanState ss; /* its first field is NodeTag */ List *bitmapqualorig; TIDBitmap *tbm; + OnDiskBitmapWords *odbm; TBMIterateResult *tbmres; + BMIterateResult *odbmres; } BitmapHeapScanState; /* ---------------- diff -Ncpr pgsql.head/src/include/nodes/nodes.h bmdist/src/include/nodes/nodes.h *** pgsql.head/src/include/nodes/nodes.h 2006-05-01 04:30:40.000000000 +1000 --- bmdist/src/include/nodes/nodes.h 2006-08-01 12:57:30.000000000 +1000 *************** typedef enum NodeTag *** 332,338 **** */ T_TriggerData = 900, /* in commands/trigger.h */ T_ReturnSetInfo, /* in nodes/execnodes.h */ ! T_TIDBitmap /* in nodes/tidbitmap.h */ } NodeTag; /* --- 332,340 ---- */ T_TriggerData = 900, /* in commands/trigger.h */ T_ReturnSetInfo, /* in nodes/execnodes.h */ ! T_TIDBitmap, /* in nodes/tidbitmap.h */ ! T_OnDiskBitmapWords /* in nodes/ondiskbitmapwords.h */ ! } NodeTag; /* diff -Ncpr pgsql.head/src/include/nodes/ondiskbitmapwords.h bmdist/src/include/nodes/ondiskbitmapwords.h *** pgsql.head/src/include/nodes/ondiskbitmapwords.h 1970-01-01 10:00:00.000000000 +1000 --- bmdist/src/include/nodes/ondiskbitmapwords.h 2006-08-01 13:17:24.702192896 +1000 *************** *** 0 **** --- 1,44 ---- + /*------------------------------------------------------------------------- + * + * ondiskbitmapwords.h + * + * Define data structures that handle the bitmap words in the + * on-disk bitmap index. + * + * Copyright (c) 2006, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL$ + *------------------------------------------------------------------------- + */ + + #ifndef ONDISKBITMAPWORDS_H + #define ONDISKBITMAPWORDS_H + + #include "nodes/plannodes.h" + #include "access/bitmap.h" + + struct PlanState; + + + /* + * A node that contains a batch of bitmap words in a bitmap vector. + */ + typedef struct OnDiskBitmapWords + { + NodeTag type; /* to make it a valid Node */ + MemoryContext mcxt; /* memory context containing me */ + BMBatchWords bitmapWords; /* a batch of bitmap words */ + } OnDiskBitmapWords; + + /* function prototypes in nodes/ondiskbitmapwords.c */ + extern OnDiskBitmapWords* odbm_create(uint32 maxNumOfWords); + extern void odbm_free(OnDiskBitmapWords *odbm); + extern BMIterateResult* odbm_res_create(OnDiskBitmapWords *odbm); + extern void odbm_res_free(BMIterateResult* odbmres); + + extern void odbm_set_bitmaptype(Plan* plan, bool inmem); + extern void odbm_set_child_resultnode(struct PlanState* ps, + OnDiskBitmapWords* odbm); + + #endif /* ONDISKBITMAPWORDS_H */ diff -Ncpr pgsql.head/src/include/nodes/plannodes.h bmdist/src/include/nodes/plannodes.h *** pgsql.head/src/include/nodes/plannodes.h 2006-07-27 05:31:51.000000000 +1000 --- bmdist/src/include/nodes/plannodes.h 2006-08-01 12:57:30.000000000 +1000 *************** typedef struct BitmapAnd *** 144,149 **** --- 144,150 ---- { Plan plan; List *bitmapplans; + bool inmem; /* use bitmap scan */ } BitmapAnd; /* ---------------- *************** typedef struct BitmapOr *** 158,163 **** --- 159,165 ---- { Plan plan; List *bitmapplans; + bool inmem; /* use bitmap scan */ } BitmapOr; /* *************** typedef struct IndexScan *** 209,214 **** --- 211,217 ---- List *indexstrategy; /* integer list of strategy numbers */ List *indexsubtype; /* OID list of strategy subtypes */ ScanDirection indexorderdir; /* forward or backward or don't care */ + Oid indexam; /* OID of the access method (in pg_am) */ } IndexScan; /* ---------------- *************** typedef struct BitmapIndexScan *** 232,241 **** --- 235,246 ---- { Scan scan; Oid indexid; /* OID of index to scan */ + Oid indexam; /* OID of the access method (in pg_am) */ List *indexqual; /* list of index quals (OpExprs) */ List *indexqualorig; /* the same in original form */ List *indexstrategy; /* integer list of strategy numbers */ List *indexsubtype; /* OID list of strategy subtypes */ + bool inmem; /* use bitmap scan */ } BitmapIndexScan; /* ---------------- *************** typedef struct BitmapHeapScan *** 251,256 **** --- 256,262 ---- { Scan scan; List *bitmapqualorig; /* index quals, in standard expr form */ + bool inmem; /* use bitmap scan */ } BitmapHeapScan; /* ---------------- diff -Ncpr pgsql.head/src/include/utils/array.h bmdist/src/include/utils/array.h *** pgsql.head/src/include/utils/array.h 2006-03-06 02:59:06.000000000 +1100 --- bmdist/src/include/utils/array.h 2006-08-01 12:57:27.000000000 +1000 *************** extern Datum array_gt(PG_FUNCTION_ARGS); *** 193,198 **** --- 193,199 ---- extern Datum array_le(PG_FUNCTION_ARGS); extern Datum array_ge(PG_FUNCTION_ARGS); extern Datum btarraycmp(PG_FUNCTION_ARGS); + extern Datum bmarraycmp(PG_FUNCTION_ARGS); extern Datum array_dims(PG_FUNCTION_ARGS); extern Datum array_lower(PG_FUNCTION_ARGS); extern Datum array_upper(PG_FUNCTION_ARGS); diff -Ncpr pgsql.head/src/include/utils/builtins.h bmdist/src/include/utils/builtins.h *** pgsql.head/src/include/utils/builtins.h 2006-07-29 04:33:04.000000000 +1000 --- bmdist/src/include/utils/builtins.h 2006-08-01 12:57:27.000000000 +1000 *************** extern Datum pg_prepared_statement(PG_FU *** 905,908 **** --- 905,935 ---- /* utils/mmgr/portalmem.c */ extern Datum pg_cursor(PG_FUNCTION_ARGS); + /* comparison functions for the bitmap index. */ + extern Datum bmint4cmp(PG_FUNCTION_ARGS); + extern Datum bmint2cmp(PG_FUNCTION_ARGS); + extern Datum bmint8cmp(PG_FUNCTION_ARGS); + extern Datum bmint48cmp(PG_FUNCTION_ARGS); + extern Datum bmint84cmp(PG_FUNCTION_ARGS); + extern Datum bmint24cmp(PG_FUNCTION_ARGS); + extern Datum bmint42cmp(PG_FUNCTION_ARGS); + extern Datum bmint28cmp(PG_FUNCTION_ARGS); + extern Datum bmint82cmp(PG_FUNCTION_ARGS); + extern Datum bmboolcmp(PG_FUNCTION_ARGS); + extern Datum bmcharcmp(PG_FUNCTION_ARGS); + extern Datum bmtextcmp(PG_FUNCTION_ARGS); + extern Datum bmabstimecmp(PG_FUNCTION_ARGS); + extern Datum bmarraycmp(PG_FUNCTION_ARGS); + extern Datum bmoidcmp(PG_FUNCTION_ARGS); + extern Datum bmoidvectorcmp(PG_FUNCTION_ARGS); + extern Datum bmnamecmp(PG_FUNCTION_ARGS); + extern Datum bmname_pattern_cmp(PG_FUNCTION_ARGS); + extern Datum bmfloat4cmp(PG_FUNCTION_ARGS); + extern Datum bmfloat8cmp(PG_FUNCTION_ARGS); + extern Datum bmfloat48cmp(PG_FUNCTION_ARGS); + extern Datum bmfloat84cmp(PG_FUNCTION_ARGS); + extern Datum bmreltimecmp(PG_FUNCTION_ARGS); + extern Datum bmtintervalcmp(PG_FUNCTION_ARGS); + extern Datum bmtext_pattern_cmp(PG_FUNCTION_ARGS); + #endif /* BUILTINS_H */ diff -Ncpr pgsql.head/src/include/utils/rel.h bmdist/src/include/utils/rel.h *** pgsql.head/src/include/utils/rel.h 2006-07-04 08:45:41.000000000 +1000 --- bmdist/src/include/utils/rel.h 2006-08-01 12:57:27.000000000 +1000 *************** typedef struct RelationAmInfo *** 107,112 **** --- 107,113 ---- FmgrInfo ambeginscan; FmgrInfo amgettuple; FmgrInfo amgetmulti; + FmgrInfo amgetbitmapwords; FmgrInfo amrescan; FmgrInfo amendscan; FmgrInfo ammarkpos; diff -Ncpr pgsql.head/src/include/utils/selfuncs.h bmdist/src/include/utils/selfuncs.h *** pgsql.head/src/include/utils/selfuncs.h 2006-07-02 08:07:23.000000000 +1000 --- bmdist/src/include/utils/selfuncs.h 2006-08-01 12:57:27.000000000 +1000 *************** extern Datum btcostestimate(PG_FUNCTION_ *** 170,174 **** --- 170,175 ---- extern Datum hashcostestimate(PG_FUNCTION_ARGS); extern Datum gistcostestimate(PG_FUNCTION_ARGS); extern Datum gincostestimate(PG_FUNCTION_ARGS); + extern Datum bmcostestimate(PG_FUNCTION_ARGS); #endif /* SELFUNCS_H */ diff -Ncpr pgsql.head/src/test/regress/expected/create_index.out bmdist/src/test/regress/expected/create_index.out *** pgsql.head/src/test/regress/expected/create_index.out 2006-07-12 05:49:14.000000000 +1000 --- bmdist/src/test/regress/expected/create_index.out 2006-08-01 12:58:19.000000000 +1000 *************** INSERT INTO func_index_heap VALUES('QWER *** 360,362 **** --- 360,394 ---- create unique index hash_f8_index_1 on hash_f8_heap(abs(random)); create unique index hash_f8_index_2 on hash_f8_heap((seqno + 1), random); create unique index hash_f8_index_3 on hash_f8_heap(random) where seqno > 1000; + -- + -- bitmap + -- + SET enable_seqscan = OFF; + SET enable_indexscan = ON; + SET enable_bitmapscan = ON; + create table bm_test (i int); + insert into bm_test select i/10 from generate_series(1, 100) i; + create index bm_test_idx on bm_test using bitmap (i); + select i, count(*) from bm_test group by 1 order by 1; + i | count + ----+------- + 0 | 9 + 1 | 10 + 2 | 10 + 3 | 10 + 4 | 10 + 5 | 10 + 6 | 10 + 7 | 10 + 8 | 10 + 9 | 10 + 10 | 1 + (11 rows) + + select count(*) from bm_test where i in(1, 2); + count + ------- + 20 + (1 row) + + drop table bm_test; diff -Ncpr pgsql.head/src/test/regress/expected/opr_sanity.out bmdist/src/test/regress/expected/opr_sanity.out *** pgsql.head/src/test/regress/expected/opr_sanity.out 2006-07-29 04:33:04.000000000 +1000 --- bmdist/src/test/regress/expected/opr_sanity.out 2006-08-01 12:58:18.000000000 +1000 *************** ORDER BY 1, 2, 3; *** 841,847 **** 2742 | 2 | @ 2742 | 3 | ~ 2742 | 4 | = ! (28 rows) -- Check that all operators linked to by opclass entries have selectivity -- estimators. This is not absolutely required, but it seems a reasonable --- 841,849 ---- 2742 | 2 | @ 2742 | 3 | ~ 2742 | 4 | = ! 2848 | 1 | = ! 2848 | 1 | ~=~ ! (30 rows) -- Check that all operators linked to by opclass entries have selectivity -- estimators. This is not absolutely required, but it seems a reasonable diff -Ncpr pgsql.head/src/test/regress/sql/create_index.sql bmdist/src/test/regress/sql/create_index.sql *** pgsql.head/src/test/regress/sql/create_index.sql 2006-07-12 05:49:14.000000000 +1000 --- bmdist/src/test/regress/sql/create_index.sql 2006-08-01 12:58:12.000000000 +1000 *************** INSERT INTO func_index_heap VALUES('QWER *** 219,221 **** --- 219,238 ---- create unique index hash_f8_index_1 on hash_f8_heap(abs(random)); create unique index hash_f8_index_2 on hash_f8_heap((seqno + 1), random); create unique index hash_f8_index_3 on hash_f8_heap(random) where seqno > 1000; + + + -- + -- bitmap + -- + + SET enable_seqscan = OFF; + SET enable_indexscan = ON; + SET enable_bitmapscan = ON; + + create table bm_test (i int); + insert into bm_test select i/10 from generate_series(1, 100) i; + create index bm_test_idx on bm_test using bitmap (i); + select i, count(*) from bm_test group by 1 order by 1; + select count(*) from bm_test where i in(1, 2); + drop table bm_test; +