diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index f28026b..27efa6b 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -53,6 +53,7 @@ #include "access/xlogutils.h" #include "catalog/catalog.h" #include "catalog/namespace.h" +#include "catalog/pg_type.h" #include "miscadmin.h" #include "pgstat.h" #include "storage/bufmgr.h" @@ -84,7 +85,8 @@ static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, Buffer newbuf, HeapTuple newtup, - bool all_visible_cleared, bool new_all_visible_cleared); + bool all_visible_cleared, bool new_all_visible_cleared, + bool diff_update); static bool HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs, HeapTuple oldtup, HeapTuple newtup); @@ -2673,6 +2675,77 @@ simple_heap_delete(Relation relation, ItemPointer tid) } /* + * get_tuple_info - Gets the tuple offset and value. + * + * calculates the attribute value and offset, where the attribute ends in the + * tuple based on the attribute number and previous fetched attribute info. + * + * offset (I/P and O/P variable) - Input as end of previous attribute offset + * and incase if it is a first attribute then it's value is zero. + * Output as end of the current attribute in the tuple. + * usecacheoff (I/P and O/P variable) - Attribute cacheoff can be used or not. + */ +static void +get_tuple_info(Form_pg_attribute *att, HeapTuple tuple, bits8 *bp, + bool hasnulls, int attnum, Datum *value, uint16 *offset, + bool *usecacheoff) +{ + Form_pg_attribute thisatt = att[attnum]; + uint16 off = *offset; + bool slow = *usecacheoff; + char *tp; + HeapTupleHeader tup = tuple->t_data; + + tp = (char *) tup + tup->t_hoff; + + if (hasnulls && att_isnull(attnum, bp)) + { + slow = true; /* can't use attcacheoff anymore */ + return; + } + + if (!slow && thisatt->attcacheoff >= 0) + off = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute if the offset + * is already suitably aligned, so that there would be no pad bytes in + * any case: then the offset will be valid for either an aligned or + * unaligned value. + */ + if (!slow && + off == att_align_nominal(off, thisatt->attalign)) + thisatt->attcacheoff = off; + else + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + slow = true; + } + } + else + { + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); + + if (!slow) + thisatt->attcacheoff = off; + } + + *value = fetchatt(thisatt, tp + off); + + off = att_addlength_pointer(off, thisatt->attlen, tp + off); + + if (thisatt->attlen <= 0) + slow = true; /* can't use attcacheoff anymore */ + + *offset = off; + *usecacheoff = slow; +} + + +/* * heap_update - replace a tuple * * NB: do not call this directly unless you are prepared to deal with @@ -2707,7 +2780,8 @@ simple_heap_delete(Relation relation, ItemPointer tid) HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemPointer ctid, TransactionId *update_xmax, - CommandId cid, Snapshot crosscheck, bool wait) + CommandId cid, Snapshot crosscheck, Bitmapset *modifiedCols, + bool wait) { HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); @@ -2715,6 +2789,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemId lp; HeapTupleData oldtup; HeapTuple heaptup; + HeapTupleData redotup; Page page; BlockNumber block; Buffer buffer, @@ -2730,6 +2805,14 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, bool use_hot_update = false; bool all_visible_cleared = false; bool all_visible_cleared_new = false; + struct + { + HeapTupleHeaderData hdr; + char data[MaxHeapTupleSize]; + } tbuf; + char *data; + bool diff_update = false; + uint16 offset = 0; Assert(ItemPointerIsValid(otid)); @@ -3098,6 +3181,132 @@ l2: PageSetFull(page); } + if (modifiedCols) + { + Form_pg_attribute *att = relation->rd_att->attrs; + int numberOfAttributes; + uint16 newOffset = 0; + int attnum; + HeapTupleHeader newtuphdr = heaptup->t_data; + bits8 *new_bp = newtuphdr->t_bits; + bool old_hasnulls = HeapTupleHasNulls(&oldtup); + bool new_hasnulls = HeapTupleHasNulls(heaptup); + bool new_usecacheoff = false; + Datum new_value; + uint16 data_length; + + /* For NULL value tuples, don't use the optimized path */ + if (old_hasnulls || new_hasnulls) + { + goto record_insert; + } + + numberOfAttributes = HeapTupleHeaderGetNatts(newtuphdr); + + /* + * Skip the WAL record header for now and frame data of optimized WAL + * update record. + */ + data = (char *) &tbuf.hdr; + offset = newtuphdr->t_hoff; + + for (attnum = 0; attnum < numberOfAttributes; attnum++) + { + /* + * If the attribute is modified by the update operation, store the + * appropiate offsets in the WAL record, otherwise skip to the + * next attribute. + */ + if (bms_is_member((attnum + 1) - FirstLowInvalidHeapAttributeNumber, + modifiedCols)) + { + /* + * calculate the offset where the modified attribute starts in + * the new tuple used to store in the WAL record, this will be + * used to traverse the old tuple during recovery. + */ + newOffset = att_align_nominal(newOffset, att[attnum]->attalign); + offset = SHORTALIGN(offset); + + memcpy((data + offset), &newOffset, sizeof(uint16)); + offset += sizeof(uint16); + + /* get the attribute value and end offset for same */ + get_tuple_info(att, heaptup, new_bp, new_hasnulls, attnum, + &new_value, &newOffset, &new_usecacheoff); + + /* Increment the offset to store the data of modified column */ + offset += sizeof(uint16); + + if (att[attnum]->attbyval) + { + /* pass-by-value */ + data_length = att[attnum]->attlen; + store_att_byval((data + offset), new_value, data_length); + } + else + { + if (BPCHAROID == att[attnum]->atttypid) + { + /* varlena */ + Pointer val = DatumGetPointer(new_value); + + if (VARATT_IS_SHORT(val)) + { + /* no alignment for short varlenas */ + data_length = VARSIZE_SHORT(val); + memcpy((data + offset), val, data_length); + } + else if ((att[attnum]->attstorage != 'p') + && VARATT_CAN_MAKE_SHORT(val)) + { + /* convert to short varlena -- no alignment */ + data_length = VARATT_CONVERTED_SHORT_SIZE(val); + SET_VARSIZE_SHORT(data, data_length); + memcpy((data + offset + 1), + VARDATA(val), + (data_length - 1)); + } + else + { + /* full 4-byte header varlena */ + data_length = VARSIZE(val); + memcpy((data + offset), val, data_length); + } + } + else + { + /* Not a BPCHAR, proceed without optimization */ + goto record_insert; + } + } + + /* Store the length of the modified attribute */ + memcpy((data + offset - sizeof(uint16)), + &data_length, + sizeof(uint16)); + offset += data_length; + } + else + { + get_tuple_info(att, heaptup, new_bp, new_hasnulls, attnum, + &new_value, &newOffset, &new_usecacheoff); + } + } + + /* + * FIXME: At the end of calculating the optimization tuple, if the + * optimized tuple length is more than 3/4 of the original tuple then + * ignore the optimization. + */ + if (offset < ((heaptup->t_len >> 1) + (heaptup->t_len >> 2))) + { + diff_update = true; + } + } + +record_insert:; + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -3173,10 +3382,27 @@ l2: /* XLOG stuff */ if (RelationNeedsWAL(relation)) { - XLogRecPtr recptr = log_heap_update(relation, buffer, oldtup.t_self, - newbuf, heaptup, - all_visible_cleared, - all_visible_cleared_new); + XLogRecPtr recptr; + + if (diff_update) + { + /* Copy the tuple header to the WAL tuple */ + memcpy(&tbuf.hdr, heaptup->t_data, heaptup->t_data->t_hoff); + redotup.t_len = offset; + redotup.t_data = (HeapTupleHeader) &tbuf; + redotup.t_self = heaptup->t_self; + redotup.t_tableOid = heaptup->t_tableOid; + } + else + { + memcpy(&redotup, heaptup, sizeof(HeapTupleData)); + } + + recptr = log_heap_update(relation, buffer, oldtup.t_self, + newbuf, &redotup, + all_visible_cleared, + all_visible_cleared_new, + diff_update); if (newbuf != buffer) { @@ -3363,6 +3589,7 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) result = heap_update(relation, otid, tup, &update_ctid, &update_xmax, GetCurrentCommandId(true), InvalidSnapshot, + NULL, true /* wait for commit */ ); switch (result) { @@ -4407,7 +4634,8 @@ log_heap_visible(RelFileNode rnode, BlockNumber block, Buffer vm_buffer, static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, Buffer newbuf, HeapTuple newtup, - bool all_visible_cleared, bool new_all_visible_cleared) + bool all_visible_cleared, bool new_all_visible_cleared, + bool diff_update) { xl_heap_update xlrec; xl_heap_header xlhdr; @@ -4426,9 +4654,15 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, xlrec.target.node = reln->rd_node; xlrec.target.tid = from; - xlrec.all_visible_cleared = all_visible_cleared; + xlrec.diff_update = diff_update; xlrec.newtid = newtup->t_self; - xlrec.new_all_visible_cleared = new_all_visible_cleared; + + /* + * MSB 4 bits tells PD_ALL_VISIBLE was cleared of new page and rest 4 bits + * for the old page + */ + xlrec.new_all_visible_cleared |= all_visible_cleared; + xlrec.new_all_visible_cleared |= new_all_visible_cleared << 4; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapUpdate; @@ -5217,14 +5451,20 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) } tbuf; xl_heap_header xlhdr; int hsize; - uint32 newlen; + uint32 newlen = 0; Size freespace; + bool old_tup_modify = true; /* flag used to indicate, whether old + * tuple needs the modification or not */ + + /* Initialize the buffer, used to frame the new tuple */ + MemSet((char *) &tbuf.hdr, 0, sizeof(HeapTupleHeaderData)); + hsize = SizeOfHeapUpdate + SizeOfHeapHeader; /* * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->all_visible_cleared) + if (xlrec->new_all_visible_cleared & 0x0F) { Relation reln = CreateFakeRelcacheEntry(xlrec->target.node); BlockNumber block = ItemPointerGetBlockNumber(&xlrec->target.tid); @@ -5240,16 +5480,32 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) { if (samepage) return; /* backup block covered both changes */ - goto newt; + + /* Need the old page to read the old tuple data, no update required */ + if (!xlrec->diff_update) + goto newt; + + old_tup_modify = false; } /* Deal with old tuple version */ - buffer = XLogReadBuffer(xlrec->target.node, ItemPointerGetBlockNumber(&(xlrec->target.tid)), false); if (!BufferIsValid(buffer)) + { + /* + * Incase of diff update, if the old buffer is not available raise a + * panic as diff update needs the old buffer to frame the new tuple. + */ + if (xlrec->diff_update) + { + elog(PANIC, "heap_update_redo: invalid buffer"); + } + goto newt; + } + page = (Page) BufferGetPage(buffer); if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ @@ -5257,7 +5513,12 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) UnlockReleaseBuffer(buffer); if (samepage) return; - goto newt; + + /* Need the old page to read the old tuple data, no update required */ + if (!xlrec->diff_update) + goto newt; + + old_tup_modify = false; } offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid)); @@ -5269,25 +5530,103 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) htup = (HeapTupleHeader) PageGetItem(page, lp); - htup->t_infomask &= ~(HEAP_XMAX_COMMITTED | - HEAP_XMAX_INVALID | - HEAP_XMAX_IS_MULTI | - HEAP_IS_LOCKED | - HEAP_MOVED); - if (hot_update) - HeapTupleHeaderSetHotUpdated(htup); - else - HeapTupleHeaderClearHotUpdated(htup); - HeapTupleHeaderSetXmax(htup, record->xl_xid); - HeapTupleHeaderSetCmax(htup, FirstCommandId, false); - /* Set forward chain link in t_ctid */ - htup->t_ctid = xlrec->newtid; + if (xlrec->diff_update) + { + uint16 len = 0, + data_length, + oldoffset = 0; + uint32 t_length; + char *olddata = (char *) htup + htup->t_hoff; + char *data = (char *) &tbuf.hdr + htup->t_hoff; + char *redodata = (char *) xlrec + hsize + htup->t_hoff + - offsetof(HeapTupleHeaderData, t_bits); - /* Mark the page as a candidate for pruning */ - PageSetPrunable(page, record->xl_xid); + /* + * Frame the new tuple from old and wal tuple + * + * Get the data start pointer from old and redo data. + * + * calculate the tuple length from old tuple and redo record length. + * + * The redo data is in the format as "offset + length + new data" + * + * The first offset in the redo record gives the offset where the + * modification starts, the same will give you the length of the data + * needs to be copied from old tuple. + * + * Once the old tuple data copied, then increase the offset by the + * copied length. + * + * Get the length and value of modified column from wal tuple and + * increase the old tuple offset also with the modified column length. + * + * Repeat this procedure until the wal tuple reaches the end. + */ - if (xlrec->all_visible_cleared) - PageClearAllVisible(page); + newlen = record->xl_len - hsize; + Assert(newlen <= MaxHeapTupleSize); + + t_length = ItemIdGetLength(lp) - htup->t_hoff; + newlen -= (htup->t_hoff - offsetof(HeapTupleHeaderData, t_bits)); + len = 0; + + /* Frame the new tuple from the old and WAL tuples */ + while (len < newlen) + { + data_length = *(uint16 *) (redodata + len) - oldoffset; + + /* Copy the old tuple data */ + memcpy(data, (olddata + oldoffset), data_length); + data += data_length; + oldoffset += data_length; + + len += sizeof(uint16); + data_length = *(uint16 *) (redodata + len); + oldoffset += data_length; + + len += sizeof(uint16); + + /* Copy the modified attribute data from WAL tuple */ + memcpy(data, (redodata + len), data_length); + + data += data_length; + len += data_length; + + len = SHORTALIGN(len); + } + + /* Copy the remaining old tuple data to the new tuple */ + if (oldoffset < t_length) + { + memcpy(data, (olddata + oldoffset), (t_length - oldoffset)); + } + + newlen = t_length + + (htup->t_hoff - offsetof(HeapTupleHeaderData, t_bits)); + } + + if (old_tup_modify) + { + htup->t_infomask &= ~(HEAP_XMAX_COMMITTED | + HEAP_XMAX_INVALID | + HEAP_XMAX_IS_MULTI | + HEAP_IS_LOCKED | + HEAP_MOVED); + if (hot_update) + HeapTupleHeaderSetHotUpdated(htup); + else + HeapTupleHeaderClearHotUpdated(htup); + HeapTupleHeaderSetXmax(htup, record->xl_xid); + HeapTupleHeaderSetCmax(htup, FirstCommandId, false); + /* Set forward chain link in t_ctid */ + htup->t_ctid = xlrec->newtid; + + /* Mark the page as a candidate for pruning */ + PageSetPrunable(page, record->xl_xid); + + if (xlrec->new_all_visible_cleared & 0x0F) + PageClearAllVisible(page); + } /* * this test is ugly, but necessary to avoid thinking that insert change @@ -5295,9 +5634,14 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) */ if (samepage) goto newsame; - PageSetLSN(page, lsn); - PageSetTLI(page, ThisTimeLineID); - MarkBufferDirty(buffer); + + if (old_tup_modify) + { + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + } + UnlockReleaseBuffer(buffer); /* Deal with new tuple */ @@ -5308,7 +5652,7 @@ newt:; * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->new_all_visible_cleared) + if ((xlrec->new_all_visible_cleared >> 4) & 0x0F) { Relation reln = CreateFakeRelcacheEntry(xlrec->target.node); BlockNumber block = ItemPointerGetBlockNumber(&xlrec->newtid); @@ -5355,19 +5699,23 @@ newsame:; if (PageGetMaxOffsetNumber(page) + 1 < offnum) elog(PANIC, "heap_update_redo: invalid max offset number"); - hsize = SizeOfHeapUpdate + SizeOfHeapHeader; - - newlen = record->xl_len - hsize; - Assert(newlen <= MaxHeapTupleSize); memcpy((char *) &xlhdr, (char *) xlrec + SizeOfHeapUpdate, SizeOfHeapHeader); + htup = &tbuf.hdr; - MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData)); - /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */ - memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits), - (char *) xlrec + hsize, - newlen); + + if (!xlrec->diff_update) + { + newlen = record->xl_len - hsize; + Assert(newlen <= MaxHeapTupleSize); + + /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */ + memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits), + (char *) xlrec + hsize, + newlen); + } + newlen += offsetof(HeapTupleHeaderData, t_bits); htup->t_infomask2 = xlhdr.t_infomask2; htup->t_infomask = xlhdr.t_infomask; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index a7bce75..4c22aea 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -48,6 +48,7 @@ #include "utils/memutils.h" #include "utils/rel.h" #include "utils/tqual.h" +#include "parser/parsetree.h" /* @@ -478,12 +479,14 @@ ExecUpdate(ItemPointer tupleid, bool canSetTag) { HeapTuple tuple; + HeapTuple old_tuple; ResultRelInfo *resultRelInfo; Relation resultRelationDesc; HTSU_Result result; ItemPointerData update_ctid; TransactionId update_xmax; List *recheckIndexes = NIL; + Bitmapset *modifiedCols = NULL; /* * abort the operation if not running transactions @@ -495,7 +498,7 @@ ExecUpdate(ItemPointer tupleid, * get the heap tuple out of the tuple table slot, making sure we have a * writable copy */ - tuple = ExecMaterializeSlot(slot); + tuple = old_tuple = ExecMaterializeSlot(slot); /* * get information on the (current) result relation @@ -553,6 +556,13 @@ lreplace:; if (resultRelationDesc->rd_att->constr) ExecConstraints(resultRelInfo, slot, estate); + if ((resultRelationDesc->rd_toastoid == InvalidOid) + && (old_tuple == tuple)) + { + modifiedCols = (rt_fetch(resultRelInfo->ri_RangeTableIndex, + estate->es_range_table)->modifiedCols); + } + /* * replace the heap tuple * @@ -566,6 +576,7 @@ lreplace:; &update_ctid, &update_xmax, estate->es_output_cid, estate->es_crosscheck_snapshot, + modifiedCols, true /* wait for commit */ ); switch (result) { diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 660a854..5e91ba8 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -105,7 +105,8 @@ extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemPointer ctid, TransactionId *update_xmax, - CommandId cid, Snapshot crosscheck, bool wait); + CommandId cid, Snapshot crosscheck, Bitmapset *modifiedCols, + bool wait); extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, ItemPointer ctid, TransactionId *update_xmax, CommandId cid, diff --git a/src/include/access/htup.h b/src/include/access/htup.h index b289e14..f5c08ed 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -692,13 +692,19 @@ typedef struct xl_multi_insert_tuple #define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8)) -/* This is what we need to know about update|hot_update */ +/* This is what we need to know about update|hot_update|optimized_update */ typedef struct xl_heap_update { xl_heaptid target; /* deleted tuple id */ ItemPointerData newtid; /* new inserted tuple id */ - bool all_visible_cleared; /* PD_ALL_VISIBLE was cleared */ - bool new_all_visible_cleared; /* same for the page of newtid */ + bool diff_update; /* optimized update or not */ + /* + * To keep the structure size same all_visible_cleared is merged with + * new_all_visible_cleared. + */ + bool new_all_visible_cleared; /* MSB 4 bits tells PD_ALL_VISIBLE was + cleared of new page and rest 4 bits + for the old page */ /* NEW TUPLE xl_heap_header AND TUPLE DATA FOLLOWS AT END OF STRUCT */ } xl_heap_update;