diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c index 00cab73..e849b2b 100644 --- a/contrib/pgbench/pgbench.c +++ b/contrib/pgbench/pgbench.c @@ -145,6 +145,9 @@ char *index_tablespace = NULL; #define naccounts 100000 bool use_log; /* log transaction latencies to a file */ +bool use_log_sampling; /* sample the log randomly */ +int nsample_rate = 100; /* default log sampling rate */ + bool is_connect; /* establish connection for each transaction */ bool is_latencies; /* report per-command latencies */ int main_pid; /* main process id used in log filename */ @@ -364,6 +367,7 @@ usage(void) " -f FILENAME read transaction script from FILENAME\n" " -j NUM number of threads (default: 1)\n" " -l write transaction times to log file\n" + " -R NUM log sampling rate in pct (default: 100)\n" " -M simple|extended|prepared\n" " protocol for submitting queries to server (default: simple)\n" " -n do not run VACUUM before tests\n" @@ -877,21 +881,25 @@ top: instr_time diff; double usec; - INSTR_TIME_SET_CURRENT(now); - diff = now; - INSTR_TIME_SUBTRACT(diff, st->txn_begin); - usec = (double) INSTR_TIME_GET_MICROSEC(diff); + /* either no sampling or is within the sample */ + if ((! use_log_sampling) || (rand() % 100 < nsample_rate)) { + + INSTR_TIME_SET_CURRENT(now); + diff = now; + INSTR_TIME_SUBTRACT(diff, st->txn_begin); + usec = (double) INSTR_TIME_GET_MICROSEC(diff); #ifndef WIN32 - /* This is more than we really ought to know about instr_time */ - fprintf(logfile, "%d %d %.0f %d %ld %ld\n", - st->id, st->cnt, usec, st->use_file, - (long) now.tv_sec, (long) now.tv_usec); + /* This is more than we really ought to know about instr_time */ + fprintf(logfile, "%d %d %.0f %d %ld %ld\n", + st->id, st->cnt, usec, st->use_file, + (long) now.tv_sec, (long) now.tv_usec); #else - /* On Windows, instr_time doesn't provide a timestamp anyway */ - fprintf(logfile, "%d %d %.0f %d 0 0\n", - st->id, st->cnt, usec, st->use_file); + /* On Windows, instr_time doesn't provide a timestamp anyway */ + fprintf(logfile, "%d %d %.0f %d 0 0\n", + st->id, st->cnt, usec, st->use_file); #endif + } } if (commands[st->state]->type == SQL_COMMAND) @@ -1962,7 +1970,7 @@ main(int argc, char **argv) state = (CState *) xmalloc(sizeof(CState)); memset(state, 0, sizeof(CState)); - while ((c = getopt_long(argc, argv, "ih:nvp:dSNc:j:Crs:t:T:U:lf:D:F:M:", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "ih:nvp:dSNc:j:Crs:t:T:U:lf:R:D:F:M:", long_options, &optindex)) != -1) { switch (c) { @@ -2070,6 +2078,15 @@ main(int argc, char **argv) case 'l': use_log = true; break; + case 'R': + use_log_sampling = true; + nsample_rate = atoi(optarg); + if (nsample_rate <= 0 || nsample_rate > 100) + { + fprintf(stderr, "invalid sampling rate: %d\n", nsample_rate); + exit(1); + } + break; case 'f': ttype = 3; filename = optarg; @@ -2158,6 +2175,12 @@ main(int argc, char **argv) exit(1); } + /* -R may be used only with -l */ + if (use_log_sampling && (! use_log)) { + fprintf(stderr, "log sampling rate is allowed only when logging transactions\n"); + exit(1); + } + /* * is_latencies only works with multiple threads in thread-based * implementations, not fork-based ones, because it supposes that the diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml index 437fcea..962e446 100644 --- a/doc/src/sgml/pgbench.sgml +++ b/doc/src/sgml/pgbench.sgml @@ -317,6 +317,22 @@ pgbench options dbname + rate + + + Sampling rate, used when writing data into the log in percent. 100 means all + transactions will be logged, 1 means only 1% of the transactions will be logged. + Default is 100 (all transactions). + + + Be careful when processing the log file - e.g. when computing tps values, you + need to multiply the numbers accordingly (e.g. with 1% sample you'll get 1/100 + of the actual tps). + + + + + querymode