*** src/backend/storage/buffer/freelist.c.orig Thu Feb 3 18:29:11 2005 --- src/backend/storage/buffer/freelist.c Thu Feb 3 19:21:01 2005 *************** *** 34,47 **** * Definitions for the buffer replacement strategy */ #define STRAT_LIST_UNUSED (-1) ! #define STRAT_LIST_B1 0 ! #define STRAT_LIST_T1 1 ! #define STRAT_LIST_T2 2 ! #define STRAT_LIST_B2 3 ! #define STRAT_NUM_LISTS 4 /* ! * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC) */ typedef struct { --- 34,48 ---- * Definitions for the buffer replacement strategy */ #define STRAT_LIST_UNUSED (-1) ! #define STRAT_LIST_T1 0 ! #define STRAT_LIST_T2 1 ! #define STRAT_NUM_LISTS 2 /* ! * We have a cache directory block (CDB) for every file page currently being ! * tracked by the strategy module. In principle there can be more CDBs than ! * there are actual shared buffers, allowing pages no longer in cache to ! * still be tracked. The current implementation doesn't do that though. */ typedef struct { *************** *** 55,68 **** } BufferStrategyCDB; /* ! * The shared ARC control information. */ typedef struct { int target_T1_size; /* What T1 size are we aiming for */ int listUnusedCDB; /* All unused StrategyCDB */ ! int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2 ! * and B2 */ int listTail[STRAT_NUM_LISTS]; int listSize[STRAT_NUM_LISTS]; Buffer listFreeBuffers; /* List of unused buffers */ --- 56,68 ---- } BufferStrategyCDB; /* ! * The shared strategy control information. */ typedef struct { int target_T1_size; /* What T1 size are we aiming for */ int listUnusedCDB; /* All unused StrategyCDB */ ! int listHead[STRAT_NUM_LISTS]; /* CDB lists T1 and T2 */ int listTail[STRAT_NUM_LISTS]; int listSize[STRAT_NUM_LISTS]; Buffer listFreeBuffers; /* List of unused buffers */ *************** *** 88,97 **** #define T1_TARGET (StrategyControl->target_T1_size) - #define B1_LENGTH (StrategyControl->listSize[STRAT_LIST_B1]) #define T1_LENGTH (StrategyControl->listSize[STRAT_LIST_T1]) #define T2_LENGTH (StrategyControl->listSize[STRAT_LIST_T2]) - #define B2_LENGTH (StrategyControl->listSize[STRAT_LIST_B2]) /* --- 88,95 ---- *************** *** 176,185 **** if (StrategyControl->stat_report + DebugSharedBuffers < now) { long all_hit, - b1_hit, t1_hit, ! t2_hit, ! b2_hit; int id, t1_clean, t2_clean; --- 174,181 ---- if (StrategyControl->stat_report + DebugSharedBuffers < now) { long all_hit, t1_hit, ! t2_hit; int id, t1_clean, t2_clean; *************** *** 205,239 **** } if (StrategyControl->num_lookup == 0) ! all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0; else { - b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 / - StrategyControl->num_lookup); t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 / StrategyControl->num_lookup); t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 / StrategyControl->num_lookup); ! b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 / ! StrategyControl->num_lookup); ! all_hit = b1_hit + t1_hit + t2_hit + b2_hit; } errcxtold = error_context_stack; error_context_stack = NULL; ! elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d", ! T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH); ! elog(DEBUG1, "ARC total =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%", ! all_hit, b1_hit, t1_hit, t2_hit, b2_hit); ! elog(DEBUG1, "ARC clean buffers at LRU T1= %5d T2= %5d", t1_clean, t2_clean); error_context_stack = errcxtold; StrategyControl->num_lookup = 0; - StrategyControl->num_hit[STRAT_LIST_B1] = 0; StrategyControl->num_hit[STRAT_LIST_T1] = 0; StrategyControl->num_hit[STRAT_LIST_T2] = 0; - StrategyControl->num_hit[STRAT_LIST_B2] = 0; StrategyControl->stat_report = now; } } --- 201,229 ---- } if (StrategyControl->num_lookup == 0) ! all_hit = t1_hit = t2_hit = 0; else { t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 / StrategyControl->num_lookup); t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 / StrategyControl->num_lookup); ! all_hit = t1_hit + t2_hit; } errcxtold = error_context_stack; error_context_stack = NULL; ! elog(DEBUG1, "2Q T1target=%5d T1len=%5d T2len=%5d", ! T1_TARGET, T1_LENGTH, T2_LENGTH); ! elog(DEBUG1, "2Q total =%4ld%% T1hit=%4ld%% T2hit=%4ld%%", ! all_hit, t1_hit, t2_hit); ! elog(DEBUG1, "2Q clean buffers at LRU T1= %5d T2= %5d", t1_clean, t2_clean); error_context_stack = errcxtold; StrategyControl->num_lookup = 0; StrategyControl->num_hit[STRAT_LIST_T1] = 0; StrategyControl->num_hit[STRAT_LIST_T2] = 0; StrategyControl->stat_report = now; } } *************** *** 241,249 **** /* * StrategyBufferLookup * ! * Lookup a page request in the cache directory. A buffer is only ! * returned for a T1 or T2 cache hit. B1 and B2 hits are just ! * remembered here, to possibly affect the behaviour later. * * recheck indicates we are rechecking after I/O wait; do not change * internal status in this case. --- 231,237 ---- /* * StrategyBufferLookup * ! * Lookup a page request in the cache directory. * * recheck indicates we are rechecking after I/O wait; do not change * internal status in this case. *************** *** 346,400 **** } /* ! * In the case of a recheck we don't care about B1 or B2 hits here. ! * The bufmgr does this call only to make sure no-one faulted in the ! * block while we where busy flushing another; we don't want to doubly ! * adjust the T1target. ! * ! * Now for this really to end up as a B1 or B2 cache hit, we must have ! * been flushing for quite some time as the block not only must have ! * been read, but also traveled through the queue and evicted from the ! * T cache again already. ! * ! * VACUUM re-reads shouldn't adjust the target either. ! */ ! if (recheck || strategy_hint_vacuum) ! return NULL; ! ! /* ! * Adjust the target size of the T1 cache depending on if this is a B1 ! * or B2 hit. ! */ ! switch (cdb->list) ! { ! case STRAT_LIST_B1: ! ! /* ! * B1 hit means that the T1 cache is probably too small. ! * Adjust the T1 target size and continue below. ! */ ! T1_TARGET = Min(T1_TARGET + Max(B2_LENGTH / B1_LENGTH, 1), ! NBuffers); ! break; ! ! case STRAT_LIST_B2: ! ! /* ! * B2 hit means that the T2 cache is probably too small. ! * Adjust the T1 target size and continue below. ! */ ! T1_TARGET = Max(T1_TARGET - Max(B1_LENGTH / B2_LENGTH, 1), 0); ! break; ! ! default: ! elog(ERROR, "buffer hash table corrupted: CDB->list = %d", ! cdb->list); ! } ! ! /* ! * Even though we had seen the block in the past, its data is not ! * currently in memory ... cache miss to the bufmgr. */ return NULL; } --- 334,343 ---- } /* ! * Shouldn't get here... */ + elog(ERROR, "buffer hash table corrupted: CDB->list = %d", + cdb->list); return NULL; } *************** *** 423,429 **** * We don't have a free buffer, must take one from T1 or T2. * Choose based on trying to converge T1len to T1target. */ ! if (T1_LENGTH >= Max(1, T1_TARGET)) { /* * We should take the first unpinned buffer from T1. --- 366,372 ---- * We don't have a free buffer, must take one from T1 or T2. * Choose based on trying to converge T1len to T1target. */ ! if (T1_LENGTH >= T1_TARGET) { /* * We should take the first unpinned buffer from T1. *************** *** 548,720 **** StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag, int cdb_found_index, int cdb_replace_index) { ! BufferStrategyCDB *cdb_found; ! BufferStrategyCDB *cdb_replace; ! if (cdb_found_index >= 0) { ! /* This must have been a ghost buffer cache hit (B1 or B2) */ ! cdb_found = &StrategyCDB[cdb_found_index]; ! /* Assert that the buffer remembered in cdb_found is the one */ ! /* the buffer manager is currently faulting in */ ! Assert(BUFFERTAGS_EQUAL(cdb_found->buf_tag, *newTag)); ! ! if (cdb_replace_index >= 0) ! { ! /* We are satisfying it with an evicted T buffer */ ! cdb_replace = &StrategyCDB[cdb_replace_index]; ! ! /* Assert that the buffer remembered in cdb_replace is */ ! /* the one the buffer manager has just evicted */ ! Assert(cdb_replace->list == STRAT_LIST_T1 || ! cdb_replace->list == STRAT_LIST_T2); ! Assert(cdb_replace->buf_id == buf->buf_id); ! Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag)); ! /* ! * Under normal circumstances we move the evicted T list entry ! * to the corresponding B list. However, T1 entries that ! * exist only because of VACUUM are just thrown into the ! * unused list instead. We don't expect them to be touched ! * again by the VACUUM, and if we put them into B1 then VACUUM ! * would skew T1_target adjusting. ! */ ! if (cdb_replace->t1_vacuum) ! { ! BufTableDelete(&(cdb_replace->buf_tag)); ! STRAT_LIST_REMOVE(cdb_replace); ! cdb_replace->next = StrategyControl->listUnusedCDB; ! StrategyControl->listUnusedCDB = cdb_replace_index; ! } ! else ! { ! if (cdb_replace->list == STRAT_LIST_T1) ! { ! STRAT_LIST_REMOVE(cdb_replace); ! STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1); ! } ! else ! { ! STRAT_LIST_REMOVE(cdb_replace); ! STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2); ! } ! } ! /* And clear its block reference */ ! cdb_replace->buf_id = -1; ! } ! else ! { ! /* We are satisfying it with an unused buffer */ ! } ! ! /* Now the found B CDB gets the buffer and is moved to T2 */ ! cdb_found->buf_id = buf->buf_id; ! STRAT_LIST_REMOVE(cdb_found); ! STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2); } else { ! /* ! * This was a complete cache miss, so we need to create a new CDB. ! * The goal is to keep T1len+B1len <= c. ! */ ! if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= NBuffers) { ! /* So if B1 isn't empty and T1len+B1len >= c we take B1-LRU */ ! cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]]; ! ! BufTableDelete(&(cdb_found->buf_tag)); ! STRAT_LIST_REMOVE(cdb_found); } else { ! /* Otherwise, we try to use a free one */ ! if (StrategyControl->listUnusedCDB >= 0) ! { ! cdb_found = &StrategyCDB[StrategyControl->listUnusedCDB]; ! StrategyControl->listUnusedCDB = cdb_found->next; ! } ! else ! { ! /* If there isn't, we take B2-LRU ... except if */ ! /* T1len+B1len+T2len = c ... oh my */ ! if (B2_LENGTH > 0) ! cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B2]]; ! else ! cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]]; ! ! BufTableDelete(&(cdb_found->buf_tag)); ! STRAT_LIST_REMOVE(cdb_found); ! } } ! /* Set the CDB's buf_tag and insert it into the hash table */ ! cdb_found->buf_tag = *newTag; ! BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB)); ! ! if (cdb_replace_index >= 0) ! { ! /* ! * The buffer was formerly in a T list, move its CDB to the ! * corresponding B list ! */ ! cdb_replace = &StrategyCDB[cdb_replace_index]; ! Assert(cdb_replace->list == STRAT_LIST_T1 || ! cdb_replace->list == STRAT_LIST_T2); ! Assert(cdb_replace->buf_id == buf->buf_id); ! Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag)); ! if (cdb_replace->list == STRAT_LIST_T1) ! { ! STRAT_LIST_REMOVE(cdb_replace); ! STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1); ! } ! else ! { ! STRAT_LIST_REMOVE(cdb_replace); ! STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2); ! } ! /* And clear its block reference */ ! cdb_replace->buf_id = -1; ! } else { ! /* We are satisfying it with an unused buffer */ } - - /* Assign the buffer id to the new CDB */ - cdb_found->buf_id = buf->buf_id; - - /* - * Specialized VACUUM optimization. If this complete cache miss - * happened because vacuum needed the page, we place it at the LRU - * position of T1; normally it goes at the MRU position. - */ - if (strategy_hint_vacuum) - { - if (TransactionIdEquals(strategy_vacuum_xid, - GetTopTransactionId())) - STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1); - else - { - /* VACUUM must have been aborted by error, reset flag */ - strategy_hint_vacuum = false; - STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1); - } - } - else - STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1); - - /* - * Remember the Xid when this buffer went onto T1 to avoid a - * single UPDATE promoting a newcomer straight into T2. Also - * remember if it was loaded for VACUUM. - */ - cdb_found->t1_xid = GetTopTransactionId(); - cdb_found->t1_vacuum = strategy_hint_vacuum; } } --- 491,565 ---- StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag, int cdb_found_index, int cdb_replace_index) { ! BufferStrategyCDB *new_cdb; ! ! /* StrategyBufferLookup must have failed */ ! Assert(cdb_found_index < 0); ! if (cdb_replace_index >= 0) { ! /* Recycle the buffer's old CDB entry */ ! new_cdb = &StrategyCDB[cdb_replace_index]; ! Assert(new_cdb->list == STRAT_LIST_T1 || ! new_cdb->list == STRAT_LIST_T2); ! Assert(new_cdb->buf_id == buf->buf_id); ! Assert(BUFFERTAGS_EQUAL(new_cdb->buf_tag, buf->tag)); ! BufTableDelete(&(new_cdb->buf_tag)); ! STRAT_LIST_REMOVE(new_cdb); } else { ! /* Unused buffer, so assign it an unused CDB */ ! if (StrategyControl->listUnusedCDB >= 0) { ! /* Use a free one */ ! new_cdb = &StrategyCDB[StrategyControl->listUnusedCDB]; ! StrategyControl->listUnusedCDB = new_cdb->next; } else { ! /* we have more buffers than CDB's ?? */ ! elog(PANIC, "StrategyReplaceBuffer: out of CDBs"); ! new_cdb = NULL; /* keep compiler quiet */ } + } ! /* Set the CDB's buf_tag and insert it into the hash table */ ! new_cdb->buf_tag = *newTag; ! BufTableInsert(&(new_cdb->buf_tag), (new_cdb - StrategyCDB)); ! /* Assign the buffer id to the new CDB */ ! new_cdb->buf_id = buf->buf_id; ! /* ! * Specialized VACUUM optimization. If this complete cache miss ! * happened because vacuum needed the page, we place it at the LRU ! * position of T1; normally it goes at the MRU position. ! */ ! if (strategy_hint_vacuum) ! { ! if (TransactionIdEquals(strategy_vacuum_xid, ! GetTopTransactionId())) ! STRAT_LRU_INSERT(new_cdb, STRAT_LIST_T1); else { ! /* VACUUM must have been aborted by error, reset flag */ ! strategy_hint_vacuum = false; ! STRAT_MRU_INSERT(new_cdb, STRAT_LIST_T1); } } + else + STRAT_MRU_INSERT(new_cdb, STRAT_LIST_T1); + + /* + * Remember the Xid when this buffer went onto T1 to avoid a + * single UPDATE promoting a newcomer straight into T2. Also + * remember if it was loaded for VACUUM. + */ + new_cdb->t1_xid = GetTopTransactionId(); + new_cdb->t1_vacuum = strategy_hint_vacuum; } *************** *** 744,750 **** cdb = &StrategyCDB[cdb_id]; /* ! * Remove the CDB from the hashtable and the ARC queue it is currently * on. */ BufTableDelete(&(cdb->buf_tag)); --- 589,595 ---- cdb = &StrategyCDB[cdb_id]; /* ! * Remove the CDB from the hashtable and the 2Q queue it is currently * on. */ BufTableDelete(&(cdb->buf_tag)); *************** *** 808,814 **** /* * Traverse the T1 and T2 list LRU to MRU in "parallel" and add all ! * dirty buffers found in that order to the list. The ARC strategy * keeps all used buffers including pinned ones in the T1 or T2 list. * So we cannot miss any dirty buffers. */ --- 653,659 ---- /* * Traverse the T1 and T2 list LRU to MRU in "parallel" and add all ! * dirty buffers found in that order to the list. The 2Q strategy * keeps all used buffers including pinned ones in the T1 or T2 list. * So we cannot miss any dirty buffers. */ *************** *** 870,885 **** int StrategyShmemSize(void) { int size = 0; /* size of CDB lookup hash table */ ! size += BufTableShmemSize(NBuffers * 2); /* size of the shared replacement strategy control block */ size += MAXALIGN(sizeof(BufferStrategyControl)); ! /* size of the ARC directory blocks */ ! size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB)); return size; } --- 715,731 ---- int StrategyShmemSize(void) { + int nCDBs = NBuffers; int size = 0; /* size of CDB lookup hash table */ ! size += BufTableShmemSize(nCDBs); /* size of the shared replacement strategy control block */ size += MAXALIGN(sizeof(BufferStrategyControl)); ! /* size of the CDB directory */ ! size += MAXALIGN(nCDBs * sizeof(BufferStrategyCDB)); return size; } *************** *** 894,906 **** void StrategyInitialize(bool init) { bool found; int i; /* * Initialize the shared CDB lookup hashtable */ ! InitBufTable(NBuffers * 2); /* * Get or create the shared strategy control block and the CDB's --- 740,753 ---- void StrategyInitialize(bool init) { + int nCDBs = NBuffers; bool found; int i; /* * Initialize the shared CDB lookup hashtable */ ! InitBufTable(nCDBs); /* * Get or create the shared strategy control block and the CDB's *************** *** 908,914 **** StrategyControl = (BufferStrategyControl *) ShmemInitStruct("Buffer Strategy Status", sizeof(BufferStrategyControl) + ! sizeof(BufferStrategyCDB) * (NBuffers * 2 - 1), &found); StrategyCDB = &(StrategyControl->cdb[0]); --- 755,761 ---- StrategyControl = (BufferStrategyControl *) ShmemInitStruct("Buffer Strategy Status", sizeof(BufferStrategyControl) + ! sizeof(BufferStrategyCDB) * (nCDBs - 1), &found); StrategyCDB = &(StrategyControl->cdb[0]); *************** *** 926,938 **** StrategyControl->listFreeBuffers = 0; /* ! * We start off with a target T1 list size of half the available ! * cache blocks. */ ! StrategyControl->target_T1_size = NBuffers / 2; /* ! * Initialize B1, T1, T2 and B2 lists to be empty */ for (i = 0; i < STRAT_NUM_LISTS; i++) { --- 773,785 ---- StrategyControl->listFreeBuffers = 0; /* ! * We set the target T1 size to 1/4th of available buffers. ! * Possibly this should be a runtime tunable. */ ! StrategyControl->target_T1_size = NBuffers / 4; /* ! * Initialize all lists to be empty */ for (i = 0; i < STRAT_NUM_LISTS; i++) { *************** *** 947,960 **** /* * All CDB's are linked as the listUnusedCDB */ ! for (i = 0; i < NBuffers * 2; i++) { StrategyCDB[i].next = i + 1; StrategyCDB[i].list = STRAT_LIST_UNUSED; CLEAR_BUFFERTAG(StrategyCDB[i].buf_tag); StrategyCDB[i].buf_id = -1; } ! StrategyCDB[NBuffers * 2 - 1].next = -1; StrategyControl->listUnusedCDB = 0; } else --- 794,807 ---- /* * All CDB's are linked as the listUnusedCDB */ ! for (i = 0; i < nCDBs; i++) { StrategyCDB[i].next = i + 1; StrategyCDB[i].list = STRAT_LIST_UNUSED; CLEAR_BUFFERTAG(StrategyCDB[i].buf_tag); StrategyCDB[i].buf_id = -1; } ! StrategyCDB[nCDBs - 1].next = -1; StrategyControl->listUnusedCDB = 0; } else