diff -ruN pgstattuple.orig/Makefile pgstattuple/Makefile --- pgstattuple.orig/Makefile 2006-08-10 19:22:47.000000000 +0900 +++ pgstattuple/Makefile 2006-08-10 19:24:05.000000000 +0900 @@ -6,7 +6,7 @@ # #------------------------------------------------------------------------- -SRCS = pgstattuple.c +SRCS = pgstattuple.c pgstatindex.c MODULE_big = pgstattuple OBJS = $(SRCS:.c=.o) diff -ruN pgstattuple.orig/pgstatindex.c pgstattuple/pgstatindex.c --- pgstattuple.orig/pgstatindex.c 1970-01-01 09:00:00.000000000 +0900 +++ pgstattuple/pgstatindex.c 2006-08-11 17:51:26.000000000 +0900 @@ -0,0 +1,714 @@ +/* + * pgstatindex + * + * Copyright (c) 2006 Satoshi Nagayasu + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "funcapi.h" +#include "access/heapam.h" +#include "access/itup.h" +#include "access/nbtree.h" +#include "access/transam.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "utils/inval.h" + +PG_FUNCTION_INFO_V1(pgstatindex); +PG_FUNCTION_INFO_V1(bt_metap); +PG_FUNCTION_INFO_V1(bt_page_items); +PG_FUNCTION_INFO_V1(bt_page_stats); +PG_FUNCTION_INFO_V1(pg_relpages); + +extern Datum pgstatindex(PG_FUNCTION_ARGS); +extern Datum bt_metap(PG_FUNCTION_ARGS); +extern Datum bt_page_items(PG_FUNCTION_ARGS); +extern Datum bt_page_stats(PG_FUNCTION_ARGS); +extern Datum pg_relpages(PG_FUNCTION_ARGS); + +#define PGSTATINDEX_TYPE "public.pgstatindex_type" +#define PGSTATINDEX_NCOLUMNS 10 + +#define BTMETAP_TYPE "public.bt_metap_type" +#define BTMETAP_NCOLUMNS 6 + +#define BTPAGEITEMS_TYPE "public.bt_page_items_type" +#define BTPAGEITEMS_NCOLUMNS 6 + +#define BTPAGESTATS_TYPE "public.bt_page_stats_type" +#define BTPAGESTATS_NCOLUMNS 12 + + +#define IS_INDEX(r) ((r)->rd_rel->relkind == 'i') +#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) + +#define CHECK_PAGE_OFFSET_RANGE(page, offset) { \ + if ( !(FirstOffsetNumber<=(offset) && \ + (offset)<=PageGetMaxOffsetNumber(page)) ) \ + elog(ERROR, "Page offset number out of range."); } + +#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \ + if ( (blkno)<0 && RelationGetNumberOfBlocks((rel))<=(blkno) ) \ + elog(ERROR, "Block number out of range."); } + +/* ------------------------------------------------ + * structure for single btree page statistics + * ------------------------------------------------ + */ +typedef struct BTPageStat { + uint32 blkno; + uint32 live_items; + uint32 dead_items; + uint32 page_size; + uint32 max_avail; + uint32 free_size; + uint32 avg_item_size; + uint32 fragments; + bool is_root; + bool is_internal; + bool is_leaf; + bool is_deleted; + bool is_empty; +} BTPageStat; + +/* ------------------------------------------------ + * A structure for a whole btree index statistics + * used by pgstatindex(). + * ------------------------------------------------ + */ +typedef struct BTIndexStat { + uint32 version; + BlockNumber root_blkno; + uint32 level; + + uint32 live_items; + uint32 dead_items; + + uint32 root_pages; + uint32 internal_pages; + uint32 leaf_pages; + uint32 empty_pages; + uint32 deleted_pages; + + uint32 page_size; + uint32 avg_item_size; + + uint32 max_avail; + uint32 free_space; + + uint32 fragments; +} BTIndexStat; + +/* ------------------------------------------------- + * GetBTPageStatistics() + * + * Collect statistics of single b-tree leaf page + * ------------------------------------------------- + */ +static bool +GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) +{ + Page page = BufferGetPage(buffer); + PageHeader phdr = (PageHeader) page; + OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + int item_size = 0; + int off; + + stat->blkno = blkno; + + stat->max_avail = BLCKSZ - ( BLCKSZ - phdr->pd_special + SizeOfPageHeaderData ); + + stat->dead_items = stat->live_items = 0; + + stat->page_size = PageGetPageSize(page); + + /* page type */ + stat->is_root = false; + stat->is_leaf = false; + stat->is_deleted = false; + stat->is_empty = false; + + if ( P_ISDELETED(opaque) ) + { + stat->is_deleted = true; + return true; + } + else if ( P_IGNORE(opaque) ) + stat->is_empty = true; + else if ( P_ISLEAF(opaque) ) + stat->is_leaf = true; + else if ( P_ISROOT(opaque) ) + stat->is_root = true; + else + stat->is_internal = true; + + /*---------------------------------------------- + * If a next leaf is on the previous block, + * it means a fragmentation. + *---------------------------------------------- + */ + stat->fragments = 0; + if ( stat->is_leaf ) + { + if ( opaque->btpo_next != P_NONE && opaque->btpo_next < blkno ) + stat->fragments++; + } + + /* count live and dead tuples, and free space */ + for (off=FirstOffsetNumber ; off<=maxoff ; off++) + { + BTItem btitem; + IndexTuple itup; + + ItemId id = PageGetItemId(page, off); + + btitem = (BTItem)PageGetItem(page, id); + + itup = &(btitem->bti_itup); + + item_size += IndexTupleSize(itup); + + if ( !ItemIdDeleted(id) ) + stat->live_items++; + else + stat->dead_items++; + } + stat->free_size = PageGetFreeSpace(page); + +#ifdef NOT_USED + elog(NOTICE, "%d/%d (%.2f%%)", stat->free_size, stat->max_avail, + (float)stat->free_size/(float)stat->max_avail*100.0); +#endif + + if ( (stat->live_items + stat->dead_items) > 0 ) + stat->avg_item_size = item_size / ( stat->live_items + stat->dead_items ); + else + stat->avg_item_size = 0; + + return true; +} + + +/* ------------------------------------------------------ + * pgstatindex() + * + * Usage: SELECT * FROM pgstatindex('t1_pkey'); + * ------------------------------------------------------ + */ +Datum +pgstatindex(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + Relation rel; + RangeVar *relrv; + Datum result; + uint32 nblocks; + uint32 blkno; + BTIndexStat indexStat; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + if ( !IS_INDEX(rel) || !IS_BTREE(rel) ) + elog(ERROR, "pgstatindex() can be used only on b-tree index."); + + /*------------------- + * Read a metapage + *------------------- + */ + { + Buffer buffer = ReadBuffer(rel, 0); + Page page = BufferGetPage(buffer); + BTMetaPageData *metad = BTPageGetMeta(page); + +// snprintf(values[j++], 32, "%d", metad->btm_magic); + indexStat.version = metad->btm_version; + indexStat.root_blkno = metad->btm_root; + indexStat.level = metad->btm_level; +// snprintf(values[j++], 32, "%d", metad->btm_fastroot); +// snprintf(values[j++], 32, "%d", metad->btm_fastlevel); + + ReleaseBuffer(buffer); + } + + nblocks = RelationGetNumberOfBlocks(rel); + + /* -- init stat -- */ + indexStat.fragments = 0; + + indexStat.root_pages = 0; + indexStat.leaf_pages = 0; + indexStat.internal_pages = 0; + indexStat.empty_pages = 0; + indexStat.deleted_pages = 0; + + indexStat.max_avail = 0; + indexStat.free_space = 0; + + /*----------------------- + * Scan all blocks + *----------------------- + */ + for (blkno=1 ; blknobtpo_prev); + snprintf(values[j++], 32, "%d", 0); /*-- not implemented yet --*/ + values[j] = palloc(32); +// snprintf(values[j++], 32, "%d", opaque->btpo_next); + snprintf(values[j++], 32, "%d", 0); /*-- not implemented yet --*/ + values[j] = palloc(32); +// snprintf(values[j++], 32, "%d", opaque->btpo.level); + snprintf(values[j++], 32, "%d", 0); /*-- not implemented yet --*/ + values[j] = palloc(32); +// snprintf(values[j++], 32, "%d", opaque->btpo_flags); + snprintf(values[j++], 32, "%d", 0); /*-- not implemented yet --*/ + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), + values); + + result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); + } + + ReleaseBuffer(buffer); + + relation_close(rel, AccessShareLock); + + PG_RETURN_DATUM(result); +} + +/*------------------------------------------------------- + * bt_page_items() + * + * Get IndexTupleData set in a leaf page + * + * Usage: SELECT * FROM bt_page_items('t1_pkey', 0); + *------------------------------------------------------- + */ +/* --------------------------------------------------- + * data structure for SRF to hold a scan information + * --------------------------------------------------- + */ +struct user_args +{ + TupleDesc tupd; + Relation rel; + Buffer buffer; + Page page; + uint16 offset; +}; + +Datum +bt_page_items(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + uint32 blkno = PG_GETARG_UINT32(1); + + RangeVar *relrv; + Datum result; + char *values[BTPAGEITEMS_NCOLUMNS]; + BTPageOpaque opaque; + HeapTuple tuple; + ItemId id; + + FuncCallContext *fctx; + MemoryContext mctx; + struct user_args *uargs = NULL; + + if ( blkno==0 ) + elog(ERROR, "Block 0 is a meta page."); + + if ( SRF_IS_FIRSTCALL() ) + { + fctx = SRF_FIRSTCALL_INIT(); + mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + uargs = palloc(sizeof(struct user_args)); + + uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE); + uargs->offset = FirstOffsetNumber; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + uargs->rel = relation_openrv(relrv, AccessShareLock); + + CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno); + + uargs->buffer = ReadBuffer(uargs->rel, blkno); + + if ( !IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel) ) + elog(ERROR, "bt_page_items() can be used only on b-tree index."); + + uargs->page = BufferGetPage(uargs->buffer); + + opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); + + if ( P_ISDELETED(opaque) ) + elog(NOTICE, "bt_page_items(): this page is deleted."); + + fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); + fctx->user_fctx = uargs; + + MemoryContextSwitchTo(mctx); + } + + fctx = SRF_PERCALL_SETUP(); + uargs = fctx->user_fctx; + + if ( fctx->call_cntr < fctx->max_calls ) + { + BTItem btitem; + IndexTuple itup; + + id = PageGetItemId(uargs->page, uargs->offset); + + if ( !ItemIdIsValid(id) ) + elog(ERROR, "Invalid ItemId."); + + btitem = (BTItem)PageGetItem(uargs->page, id); + itup = &(btitem->bti_itup); + + { + int j = 0; + + BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)); + + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", uargs->offset); + values[j] = palloc(32); + snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", IndexTupleSize(itup)); + values[j] = palloc(32); + snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f'); + values[j] = palloc(32); + snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); + + { + int off; + char *dump; + char *ptr = (char *)itup + IndexInfoFindDataOffset(itup->t_info); + + dump = palloc(IndexTupleSize(itup)*3); + memset(dump, 0, IndexTupleSize(itup)*3); + + for (off=0 ; + off < IndexTupleSize(itup)-IndexInfoFindDataOffset(itup->t_info) ; + off++) + { + if ( dump[0]=='\0' ) + sprintf(dump, "%02x", *(ptr+off) & 0xff); + else + { + char buf[4]; + sprintf(buf, " %02x", *(ptr+off) & 0xff); + strcat(dump, buf); + } + } + values[j] = dump; + } + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values); + result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple); + } + + uargs->offset = uargs->offset + 1; + + SRF_RETURN_NEXT(fctx, result); + } + else + { + ReleaseBuffer(uargs->buffer); + relation_close(uargs->rel, AccessShareLock); + + SRF_RETURN_DONE(fctx); + } +} + + +/* ------------------------------------------------ + * bt_metap() + * + * Get a btree meta-page information + * + * Usage: SELECT * FROM bt_metap('t1_pkey') + * ------------------------------------------------ + */ +Datum +bt_metap(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + Buffer buffer; + + Relation rel; + RangeVar *relrv; + Datum result; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + if ( !IS_INDEX(rel) || !IS_BTREE(rel) ) + elog(ERROR, "bt_metap() can be used only on b-tree index."); + + buffer = ReadBuffer(rel, 0); + + { + BTMetaPageData *metad; + + TupleDesc tupleDesc; + int j; + char *values[BTMETAP_NCOLUMNS]; + HeapTuple tuple; + + Page page = BufferGetPage(buffer); + + metad = BTPageGetMeta(page); + + tupleDesc = RelationNameGetTupleDesc(BTMETAP_TYPE); + + j = 0; + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_magic); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_version); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_root); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_level); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_fastroot); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_fastlevel); + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), + values); + + result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); + } + + ReleaseBuffer(buffer); + + relation_close(rel, AccessShareLock); + + PG_RETURN_DATUM(result); +} + +/* -------------------------------------------------------- + * pg_relpages() + * + * Get a number of pages of the table/index. + * + * Usage: SELECT pg_relpages('t1'); + * SELECT pg_relpages('t1_pkey'); + * -------------------------------------------------------- + */ +Datum +pg_relpages(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + + Relation rel; + RangeVar *relrv; + int4 relpages; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + relpages = RelationGetNumberOfBlocks(rel); + + relation_close(rel, AccessShareLock); + + PG_RETURN_INT32(relpages); +} + diff -ruN pgstattuple.orig/pgstattuple.sql.in pgstattuple/pgstattuple.sql.in --- pgstattuple.orig/pgstattuple.sql.in 2006-08-10 19:22:47.000000000 +0900 +++ pgstattuple/pgstattuple.sql.in 2006-08-11 17:46:19.000000000 +0900 @@ -22,3 +22,97 @@ RETURNS pgstattuple_type AS 'MODULE_PATHNAME', 'pgstattuplebyid' LANGUAGE 'C' STRICT; + +-- +-- pgstatindex +-- +DROP TYPE pgstatindex_type CASCADE; +CREATE TYPE pgstatindex_type AS ( + version int4, + tree_level int4, + index_size int4, + root_block_no int4, + internal_pages int4, + leaf_pages int4, + empty_pages int4, + deleted_pages int4, + avg_leaf_density float8, + leaf_fragmentation float8 +); + +CREATE OR REPLACE FUNCTION pgstatindex(text) +RETURNS pgstatindex_type +AS 'MODULE_PATHNAME', 'pgstatindex' +LANGUAGE 'C' STRICT; + +-- +-- bt_metap() +-- +DROP TYPE bt_metap_type CASCADE; +CREATE TYPE bt_metap_type AS ( + magic int4, + version int4, + root int4, + level int4, + fastroot int4, + fastlevel int4 +); + +CREATE OR REPLACE FUNCTION bt_metap(text) +RETURNS bt_metap_type +AS 'MODULE_PATHNAME', 'bt_metap' +LANGUAGE 'C' STRICT; + +-- +-- bt_page_items() +-- +DROP TYPE bt_page_items_type CASCADE; +CREATE TYPE bt_page_items_type AS ( + itemoffset int4, + ctid tid, + itemlen int4, + nulls bool, + vars bool, + data text +); + +DROP FUNCTION bt_page_items(text, int4); + +CREATE OR REPLACE FUNCTION bt_page_items(text, int4) +RETURNS SETOF bt_page_items_type +AS 'MODULE_PATHNAME', 'bt_page_items' +LANGUAGE 'C' STRICT; + +-- +-- bt_page_stats() +-- +DROP TYPE bt_page_stats_type CASCADE; +CREATE TYPE bt_page_stats_type AS ( + blkno int4, + live_items int4, + dead_items int4, + total_items int4, + avg_item_size float, + page_size int4, + free_size int4, + is_deleted int4, + btpo_prev int4, + btpo_next int4, + btpo_level int4, + btpo_flags int4 +); + +DROP FUNCTION bt_page_stats(text, int4); + +CREATE OR REPLACE FUNCTION bt_page_stats(text, int4) +RETURNS bt_page_stats_type +AS 'MODULE_PATHNAME', 'bt_page_stats' +LANGUAGE 'C' STRICT; + +-- +-- pg_relpages() +-- +CREATE OR REPLACE FUNCTION pg_relpages(text) +RETURNS int +AS 'MODULE_PATHNAME', 'pg_relpages' +LANGUAGE 'C' STRICT; diff -ruN pgstattuple.orig/test.sh pgstattuple/test.sh --- pgstattuple.orig/test.sh 1970-01-01 09:00:00.000000000 +0900 +++ pgstattuple/test.sh 2006-08-11 17:47:05.000000000 +0900 @@ -0,0 +1,22 @@ +#!/bin/sh + +export PATH=/usr/local/pgsql814/bin:$PATH + +psql pgbench<