diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c index 00cab73..578aeb3 100644 --- a/contrib/pgbench/pgbench.c +++ b/contrib/pgbench/pgbench.c @@ -130,6 +130,11 @@ int foreign_keys = 0; int unlogged_tables = 0; /* + * use log sampling (rate => 1 = 100%, 0 = don't use) + */ +double use_sample_rate = 0.0; + +/* * tablespace selection */ char *tablespace = NULL; @@ -364,6 +369,8 @@ usage(void) " -f FILENAME read transaction script from FILENAME\n" " -j NUM number of threads (default: 1)\n" " -l write transaction times to log file\n" + " --sampling-rate NUM\n" + " sampling rate of the log (e.g. 0.01 for 1%% sample)\n" " -M simple|extended|prepared\n" " protocol for submitting queries to server (default: simple)\n" " -n do not run VACUUM before tests\n" @@ -877,21 +884,26 @@ top: instr_time diff; double usec; - INSTR_TIME_SET_CURRENT(now); - diff = now; - INSTR_TIME_SUBTRACT(diff, st->txn_begin); - usec = (double) INSTR_TIME_GET_MICROSEC(diff); + /* either no sampling or is within the sample */ + if ((use_sample_rate == 0.0) || (pg_erand48(thread->random_state) <= use_sample_rate)) + { + + INSTR_TIME_SET_CURRENT(now); + diff = now; + INSTR_TIME_SUBTRACT(diff, st->txn_begin); + usec = (double) INSTR_TIME_GET_MICROSEC(diff); #ifndef WIN32 - /* This is more than we really ought to know about instr_time */ - fprintf(logfile, "%d %d %.0f %d %ld %ld\n", - st->id, st->cnt, usec, st->use_file, - (long) now.tv_sec, (long) now.tv_usec); + /* This is more than we really ought to know about instr_time */ + fprintf(logfile, "%d %d %.0f %d %ld %ld\n", + st->id, st->cnt, usec, st->use_file, + (long) now.tv_sec, (long) now.tv_usec); #else - /* On Windows, instr_time doesn't provide a timestamp anyway */ - fprintf(logfile, "%d %d %.0f %d 0 0\n", - st->id, st->cnt, usec, st->use_file); + /* On Windows, instr_time doesn't provide a timestamp anyway */ + fprintf(logfile, "%d %d %.0f %d 0 0\n", + st->id, st->cnt, usec, st->use_file); #endif + } } if (commands[st->state]->type == SQL_COMMAND) @@ -1918,6 +1930,7 @@ main(int argc, char **argv) {"index-tablespace", required_argument, NULL, 3}, {"tablespace", required_argument, NULL, 2}, {"unlogged-tables", no_argument, &unlogged_tables, 1}, + {"sampling-rate", required_argument, NULL, 4}, {NULL, 0, NULL, 0} }; @@ -2123,6 +2136,14 @@ main(int argc, char **argv) case 3: /* index-tablespace */ index_tablespace = optarg; break; + case 4: + use_sample_rate = atof(optarg)/100; + if (use_sample_rate <= 0.0 || use_sample_rate > 1.0) + { + fprintf(stderr, "invalid sampling rate: %f\n", use_sample_rate); + exit(1); + } + break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); @@ -2158,6 +2179,12 @@ main(int argc, char **argv) exit(1); } + /* --sampling-rate may be used only with -l */ + if (use_sample_rate > 0 && (! use_log)) { + fprintf(stderr, "log sampling rate is allowed only when logging transactions\n"); + exit(1); + } + /* * is_latencies only works with multiple threads in thread-based * implementations, not fork-based ones, because it supposes that the diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml index 437fcea..67b21aa 100644 --- a/doc/src/sgml/pgbench.sgml +++ b/doc/src/sgml/pgbench.sgml @@ -317,6 +317,22 @@ pgbench options dbname + rate + + + Sampling rate, used when writing data into the log in percent. 100 means all + transactions will be logged, 1 means only 1% of the transactions will be logged. + This may also be a float, e.g. 12.5 means 12.5% will be sampled. + + + Be careful when processing the log file - e.g. when computing tps values, you + need to multiply the numbers accordingly (e.g. with 1% sample you'll get 1/100 + of the actual tps). + + + + + querymode