Index: src/backend/storage/buffer/bufmgr.c =================================================================== RCS file: /d3/pgsql/cvs/pgsql-local/src/backend/storage/buffer/bufmgr.c,v retrieving revision 1.1.1.1 diff -c -r1.1.1.1 bufmgr.c *** src/backend/storage/buffer/bufmgr.c 7 May 2007 01:48:49 -0000 1.1.1.1 --- src/backend/storage/buffer/bufmgr.c 12 May 2007 22:26:56 -0000 *************** *** 67,72 **** --- 67,79 ---- /* interval for calling AbsorbFsyncRequests in BufferSync */ #define WRITES_PER_ABSORB 1000 + /* Return codes describing what SyncOneBuffer found out and did with the + * buffer it processed. The way code here tests for whether a write + * was done depends on BUF_WRITTEN being the highest bit value in this set. */ + #define BUF_WRITTEN 0x80 + #define BUF_CLEAN 0x40 + #define BUF_REUSABLE 0x20 + #define BUF_USAGE_COUNT 0x1F /* GUC variables */ bool zero_damaged_pages = false; *************** *** 101,107 **** static void PinBuffer_Locked(volatile BufferDesc *buf); static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess); ! static bool SyncOneBuffer(int buf_id, bool skip_pinned); static void WaitIO(volatile BufferDesc *buf); static bool StartBufferIO(volatile BufferDesc *buf, bool forInput); static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, --- 108,114 ---- static void PinBuffer_Locked(volatile BufferDesc *buf); static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess); ! static int SyncOneBuffer(int buf_id, bool skip_recently_used); static void WaitIO(volatile BufferDesc *buf); static bool StartBufferIO(volatile BufferDesc *buf, bool forInput); static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, *************** *** 1007,1013 **** absorb_counter = WRITES_PER_ABSORB; while (num_to_scan-- > 0) { ! if (SyncOneBuffer(buf_id, false)) { BgWriterStats.m_buf_written_checkpoints++; --- 1014,1020 ---- absorb_counter = WRITES_PER_ABSORB; while (num_to_scan-- > 0) { ! if (SyncOneBuffer(buf_id, false)>=BUF_WRITTEN) { BgWriterStats.m_buf_written_checkpoints++; *************** *** 1040,1047 **** int buf_id2; int num_to_scan; int num_written; ! int recent_alloc; int num_client_writes; /* Make sure we can handle the pin inside SyncOneBuffer */ ResourceOwnerEnlargeBuffers(CurrentResourceOwner); --- 1047,1063 ---- int buf_id2; int num_to_scan; int num_written; ! ! /* Statistics returned by the freelist strategy code */ int num_client_writes; + int recent_alloc; + + + /* Used to estimate the upcoming LRU eviction activity */ + static int smoothed_alloc = 0; + int upcoming_alloc_estimate; + int reusable_buffers; + int buffer_state; /* Make sure we can handle the pin inside SyncOneBuffer */ ResourceOwnerEnlargeBuffers(CurrentResourceOwner); *************** *** 1073,1079 **** { if (++buf_id1 >= NBuffers) buf_id1 = 0; ! if (SyncOneBuffer(buf_id1, false)) { if (++num_written >= bgwriter_all_maxpages) { --- 1089,1095 ---- { if (++buf_id1 >= NBuffers) buf_id1 = 0; ! if (SyncOneBuffer(buf_id1, false)>=BUF_WRITTEN) { if (++num_written >= bgwriter_all_maxpages) { *************** *** 1092,1097 **** --- 1108,1142 ---- BgWriterStats.m_buf_alloc+=recent_alloc; BgWriterStats.m_buf_written_client+=num_client_writes; + /* Estimate number of buffers to write based on a smoothed weighted + * average of previous and recent buffer allocations */ + smoothed_alloc = smoothed_alloc * 15 / 16 + recent_alloc / 16; + + /* Expect we will soon need either the smoothed amount or the recent allocation amount, + * whichever is larger */ + upcoming_alloc_estimate = smoothed_alloc; + if (recent_alloc > upcoming_alloc_estimate) + upcoming_alloc_estimate = recent_alloc; + + /**** DEBUG show the smoothing in action ***/ + if (1) + { + static int count = 0; + static int alloc[10]; + static int smoothed[10]; + alloc[count % 10]=recent_alloc; + smoothed[count % 10]=smoothed_alloc; + if (++count % 10 == 9) + { + elog(LOG,"alloc = %d %d %d %d %d %d %d %d %d %d", + alloc[0],alloc[1],alloc[2],alloc[3],alloc[4], + alloc[5],alloc[6],alloc[7],alloc[8],alloc[9]); + elog(LOG,"smoothed = %d %d %d %d %d %d %d %d %d %d", + smoothed[0],smoothed[1],smoothed[2],smoothed[3],smoothed[4], + smoothed[5],smoothed[6],smoothed[7],smoothed[8],smoothed[9]); + } + } + /* * This loop considers only unpinned buffers close to the clock sweep * point. *************** *** 1100,1139 **** { num_to_scan = (int) ((NBuffers * bgwriter_lru_percent + 99) / 100); num_written = 0; ! while (num_to_scan-- > 0) { ! if (SyncOneBuffer(buf_id2, true)) { if (++num_written >= bgwriter_lru_maxpages) { BgWriterStats.m_maxwritten_lru++; break; } } if (++buf_id2 >= NBuffers) buf_id2 = 0; } BgWriterStats.m_buf_written_lru += num_written; } } /* * SyncOneBuffer -- process a single buffer during syncing. * ! * If skip_pinned is true, we don't write currently-pinned buffers, nor * buffers marked recently used, as these are not replacement candidates. * ! * Returns true if buffer was written, else false. (This could be in error ! * if FlushBuffers finds the buffer clean after locking it, but we don't ! * care all that much.) * * Note: caller must have done ResourceOwnerEnlargeBuffers. */ ! static bool ! SyncOneBuffer(int buf_id, bool skip_pinned) { volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id]; /* * Check whether buffer needs writing. --- 1145,1207 ---- { num_to_scan = (int) ((NBuffers * bgwriter_lru_percent + 99) / 100); num_written = 0; ! reusable_buffers = 0; while (num_to_scan-- > 0) { ! buffer_state=SyncOneBuffer(buf_id2, true); ! if (buffer_state>=BUF_WRITTEN) { + reusable_buffers++; if (++num_written >= bgwriter_lru_maxpages) { BgWriterStats.m_maxwritten_lru++; break; } } + else if (buffer_state & BUF_REUSABLE) reusable_buffers++; + if (++buf_id2 >= NBuffers) buf_id2 = 0; + + /* Exit when target for upcoming allocations reached */ + if (reusable_buffers>=upcoming_alloc_estimate) break; } BgWriterStats.m_buf_written_lru += num_written; + + if (1 && num_written>0) /**** DEBUG Show what happened this pass */ + { + elog(LOG,"scanned=%d written=%d client write=%d alloc_est=%d reusable=%d", + (int) ((NBuffers * bgwriter_lru_percent + 99) / 100) - num_to_scan, + num_written,num_client_writes,upcoming_alloc_estimate,reusable_buffers); + } + } } /* * SyncOneBuffer -- process a single buffer during syncing. * ! * If skip_recently_used is true, we don't write currently-pinned buffers, nor * buffers marked recently used, as these are not replacement candidates. * ! * Returns an integer code describing both the state the buffer was ! * in when examined and what was done with it. The lower-order bits ! * are set to the usage_count of the buffer, and the following ! * bit masks are set accordingly: BUF_WRITTEN, BUF_CLEAN, BUF_REUSABLE ! * ! * (This could be in error if FlushBuffers finds the buffer clean after ! * locking it, but we don't care all that much.) ! * ! * The results are ordered such that the simple test for whether a buffer was ! * written is to check whether the return code is >=BUF_WRITTEN * * Note: caller must have done ResourceOwnerEnlargeBuffers. */ ! static int ! SyncOneBuffer(int buf_id, bool skip_recently_used) { volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id]; + int buffer_state; /* * Check whether buffer needs writing. *************** *** 1145,1160 **** * upcoming changes and so we are not required to write such dirty buffer. */ LockBufHdr(bufHdr); if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY)) { UnlockBufHdr(bufHdr); ! return false; } ! if (skip_pinned && ! (bufHdr->refcount != 0 || bufHdr->usage_count != 0)) { UnlockBufHdr(bufHdr); ! return false; } /* --- 1213,1237 ---- * upcoming changes and so we are not required to write such dirty buffer. */ LockBufHdr(bufHdr); + + /* Starting state says this buffer is dirty, not reusable, and unwritten */ + buffer_state = bufHdr->usage_count; + if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY)) + buffer_state|=BUF_CLEAN; + + if (bufHdr->refcount == 0 && bufHdr->usage_count == 0) + buffer_state|=BUF_REUSABLE; + else if (skip_recently_used) { UnlockBufHdr(bufHdr); ! return buffer_state; } ! ! if (buffer_state & BUF_CLEAN) { UnlockBufHdr(bufHdr); ! return buffer_state; } /* *************** *** 1169,1175 **** LWLockRelease(bufHdr->content_lock); UnpinBuffer(bufHdr, true, false /* don't change freelist */ ); ! return true; } --- 1246,1252 ---- LWLockRelease(bufHdr->content_lock); UnpinBuffer(bufHdr, true, false /* don't change freelist */ ); ! return buffer_state | BUF_WRITTEN; }