Query parsing speedup patch
Hi,
I spent one day trying to to optimise the parsing of ResultSets from the
stream. The attached patch gives around 35% speedup when parsing larger
queries containing equal amount of int4, int8 or varchar columns
assuming the system is CPU bound. I think parsing of other field types
could be similarly optimised.
I also attached the test code which I used to test the performance.
VisibleBufferedInputStream
- from scratch implementation that replaces BufferedInputStream
* not synchronised
* allows direct access to the buffer[] avoiding useless copies
when converting to String
* has method for scanning the length of next null terminated string
PGStream:
- uses VisibleBuffereInputStream
* faster implementations for for ReceiveIntegerR and ReceiveString
Encoding, AbstractJdbc2ResultSet
- parses int and long values directly from byte[] -> number instead
of first creating a throw-away string
Also, what would you think of patches that change all Vectors to
ArrayList and Hashtables to HashMaps? The difference is not too visible
when benchmarking with one thread and CPU but I believe that in larger
machines the useless synchronisation can slow down the system by
flushing the CPU write buffers. Also JIT could more freely modify the
code when there are less synchronisation points.
diff -Nur postgresql-jdbc-8.2dev-503.src/org/postgresql/core/Encoding.java postgresql-jdbc-8.2dev-503.src.patched/org/postgresql/core/Encoding.java
--- postgresql-jdbc-8.2dev-503.src/org/postgresql/core/Encoding.java 2005-07-04 05:18:32.000000000 +0300
+++ postgresql-jdbc-8.2dev-503.src.patched/org/postgresql/core/Encoding.java 2006-07-25 22:03:38.000000000 +0300
@@ -16,7 +16,7 @@
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.io.IOException;
-import java.util.Hashtable;
+import java.util.HashMap;
/**
* Representation of a particular character encoding.
@@ -28,7 +28,7 @@
/*
* Preferred JVM encodings for backend encodings.
*/
- private static final Hashtable encodings = new Hashtable();
+ private static final HashMap encodings = new HashMap();
static {
//Note: this list should match the set of supported server
@@ -75,10 +75,22 @@
}
private final String encoding;
+ private final boolean fastASCIINumbers;
protected Encoding(String encoding)
{
this.encoding = encoding;
+ fastASCIINumbers = testAsciiNumbers();
+ }
+
+ /**
+ * Returns true if this encoding has characters
+ * '-' and '0'..'9' in exactly same posision as ascii.
+ *
+ * @return true if the bytes can be scanned directly for ascii numbers.
+ */
+ public boolean hasAsciiNumbers() {
+ return fastASCIINumbers;
}
/**
@@ -115,9 +127,9 @@
// encoding in the JVM we use that. Otherwise we fall back
// to the default encoding of the JVM.
- if (encodings.containsKey(databaseEncoding))
+ String[] candidates = (String[]) encodings.get(databaseEncoding);
+ if (candidates != null)
{
- String[] candidates = (String[]) encodings.get(databaseEncoding);
for (int i = 0; i < candidates.length; i++)
{
if (isAvailable(candidates[i]))
@@ -254,4 +266,28 @@
public String toString() {
return (encoding == null ? "<default JVM encoding>" : encoding);
}
+
+ /**
+ * Checks weather this encoding is compatible with ASCII for the number
+ * characters '-' and '0'..'9'. Where compatible means that they are encoded
+ * with exactly same values.
+ *
+ * @return If faster ASCII number parsing can be used with this encoding.
+ */
+ private boolean testAsciiNumbers() {
+ // TODO: test all postgres supported encoding to see if there are
+ // any which do _not_ have ascii numbers in same location
+ // at least all the encoding listed in the encodings hashmap have
+ // working ascii numbers
+ try {
+ String test = "-0123456789";
+ byte[] bytes = encode(test);
+ String res = new String(bytes, "US-ASCII");
+ return test.equals(res);
+ } catch (java.io.UnsupportedEncodingException e) {
+ return false;
+ } catch (IOException e) {
+ return false;
+ }
+ }
}
diff -Nur postgresql-jdbc-8.2dev-503.src/org/postgresql/core/PGStream.java postgresql-jdbc-8.2dev-503.src.patched/org/postgresql/core/PGStream.java
--- postgresql-jdbc-8.2dev-503.src/org/postgresql/core/PGStream.java 2006-04-29 16:30:23.000000000 +0300
+++ postgresql-jdbc-8.2dev-503.src.patched/org/postgresql/core/PGStream.java 2006-07-25 22:03:38.000000000 +0300
@@ -9,7 +9,6 @@
*/
package org.postgresql.core;
-import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
@@ -37,7 +36,7 @@
private final int port;
private Socket connection;
- private InputStream pg_input;
+ private VisibleBufferedInputStream pg_input;
private OutputStream pg_output;
private byte[] streamBuffer;
@@ -103,7 +102,7 @@
connection.setTcpNoDelay(true);
// Buffer sizes submitted by Sverre H Huseby <sverrehu(at)online(dot)no>
- pg_input = new BufferedInputStream(connection.getInputStream(), 8192);
+ pg_input = new VisibleBufferedInputStream(connection.getInputStream(), 8192);
pg_output = new BufferedOutputStream(connection.getOutputStream(), 8192);
if (encoding != null)
@@ -268,14 +267,15 @@
*/
public int ReceiveIntegerR(int siz) throws IOException
{
+ if (!pg_input.ensureBytes(siz)) {
+ throw new EOFException();
+ }
int n = 0;
for (int i = 0 ; i < siz ; i++)
{
- int b = pg_input.read();
+ int b = pg_input.readRaw() & 0xFF;
- if (b < 0)
- throw new EOFException();
n = b | (n << 8);
}
@@ -315,36 +315,11 @@
*/
public String ReceiveString() throws IOException
{
- int i = 0;
- byte[] rst = byte_buf;
- int buflen = rst.length;
-
- while (true)
- {
- int c = pg_input.read();
-
- if (c < 0)
- throw new EOFException();
-
- if (c == 0)
- break;
-
- if (i == buflen)
- {
- // Grow the buffer.
- buflen *= 2; // 100% bigger
- if (buflen <= 0) // Watch for overflow
- throw new IOException("Impossibly long string");
-
- byte[] newrst = new byte[buflen];
- System.arraycopy(rst, 0, newrst, 0, i);
- rst = newrst;
- }
-
- rst[i++] = (byte)c;
- }
-
- return encoding.decode(rst, 0, i);
+ int len = pg_input.scanCStringLength();
+ String res = encoding.decode(pg_input.getBuffer(), pg_input.getIndex(),
+ len - 1);
+ pg_input.skip(len);
+ return res;
}
/**
diff -Nur postgresql-jdbc-8.2dev-503.src/org/postgresql/core/VisibleBufferedInputStream.java postgresql-jdbc-8.2dev-503.src.patched/org/postgresql/core/VisibleBufferedInputStream.java
--- postgresql-jdbc-8.2dev-503.src/org/postgresql/core/VisibleBufferedInputStream.java 1970-01-01 02:00:00.000000000 +0200
+++ postgresql-jdbc-8.2dev-503.src.patched/org/postgresql/core/VisibleBufferedInputStream.java 2006-07-25 22:04:40.000000000 +0300
@@ -0,0 +1,284 @@
+/*-------------------------------------------------------------------------
+ *
+ * Copyright (c) 2006, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgjdbc/org/postgresql/core/PGStream.java,v 1.19 2006/04/29 13:30:23 jurka Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+package org.postgresql.core;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A faster version of BufferedInputStream. Does no synchronisation and
+ * allows direct access to the used byte[] buffer.
+ *
+ * @author Mikko Tiihonen
+ */
+public class VisibleBufferedInputStream extends InputStream {
+
+ /**
+ * If there is not enough space at the end of buffer for the
+ * required bytes but only COMPACT_THRESHOLD or smaller amount of
+ * buffer is actually in use then compact the buffer by copying
+ * the bytes to the beginning.
+ */
+ private static final int COMPACT_THRESHOLD = 512;
+
+ /**
+ * In how large spans is the C string zero-byte scanned.
+ */
+ private static final int STRING_SCAN_SPAN = 1024;
+
+ /**
+ * The wrapped input stream.
+ */
+ private final InputStream wrapped;
+
+ /**
+ * The buffer.
+ */
+ private byte[] buffer;
+
+ /**
+ * Current read position in the buffer.
+ */
+ private int index;
+
+ /**
+ * How far is the buffer filled with valid data.
+ */
+ private int endIndex;
+
+ /**
+ * Creates a new buffer around the given stream.
+ *
+ * @param in The stream to buffer.
+ * @param bufferSize The initial size of the buffer.
+ */
+ public VisibleBufferedInputStream(InputStream in, int bufferSize) {
+ wrapped = in;
+ buffer = new byte[bufferSize];
+ }
+
+ /**
+ * {(at)inheritDoc}
+ */
+ public int read() throws IOException {
+ if (ensureBytes(1)) {
+ return buffer[index++] & 0xFF;
+ }
+ return -1;
+ }
+
+ /**
+ * Reads byte from the buffer without any checks. This method never
+ * reads from the underlaying stream.
+ * Before calling this method the {(at)link #ensureBytes} method must
+ * have been called.
+ *
+ * @return The next byte from the buffer.
+ * @throws ArrayIndexOutOfBoundsException If ensureBytes was not called
+ * to make sure the buffer contains the byte.
+ */
+ public byte readRaw() {
+ return buffer[index++];
+ }
+
+ /**
+ * Ensures that the buffer contains at least n bytes.
+ * This method invalidates the buffer and index fields.
+ *
+ * @param n The amount of bytes to ensure exists in buffer
+ * @return true if required bytes are available and false if EOF
+ * @throws IOException If reading of the wrapped stream failed.
+ */
+ public boolean ensureBytes(int n) throws IOException {
+ int required = n - endIndex + index;
+ while (required > 0) {
+ if (!readMore(required)) {
+ return false;
+ }
+ required = n - endIndex + index;
+ }
+ return true;
+ }
+
+ /**
+ * Reads more bytes into the buffer.
+ *
+ * @param required How much should be at least read.
+ * @return True if at least some bytes were read.
+ * @throws IOException If reading of the wrapped stream failed.
+ */
+ private boolean readMore(int required) throws IOException {
+ int canFit = buffer.length - endIndex;
+ if (canFit < required) {
+ int used = endIndex - index;
+ if (used == 0) {
+ index = 0;
+ endIndex = 0;
+ } else if (used < COMPACT_THRESHOLD
+ && required < buffer.length - COMPACT_THRESHOLD) {
+ compact();
+ } else {
+ doubleBuffer();
+ }
+ canFit = buffer.length - endIndex;
+ }
+ int read = wrapped.read(buffer, endIndex, canFit);
+ if (read < 0) {
+ return false;
+ }
+ endIndex += read;
+ return true;
+ }
+
+ /**
+ * Doubles the size of the buffer.
+ */
+ private void doubleBuffer() {
+ byte[] buf = new byte[buffer.length * 2];
+ moveBufferTo(buf);
+ buffer = buf;
+ }
+
+ /**
+ * Compacts the unread bytes of the buffer to the beginning of the buffer.
+ */
+ private void compact() {
+ moveBufferTo(buffer);
+ }
+
+ /**
+ * Moves bytes from the buffer to the begining of the destination buffer.
+ * Also sets the index and endIndex variables.
+ *
+ * @param dest The destination buffer.
+ */
+ private void moveBufferTo(byte[] dest) {
+ int size = endIndex - index;
+ System.arraycopy(buffer, index, dest, 0, size);
+ index = 0;
+ endIndex = size;
+ }
+
+ /**
+ * {(at)inheritDoc}
+ */
+ public int read(byte to[], int off, int len) throws IOException {
+ if ((off | len | (off + len) | (to.length - (off + len))) < 0) {
+ throw new IndexOutOfBoundsException();
+ } else if (len == 0) {
+ return 0;
+ }
+
+ // first copy from buffer
+ int avail = endIndex - index;
+ if (avail > 0) {
+ if (len <= avail) {
+ System.arraycopy(buffer, index, to, off, len);
+ index += len;
+ return len;
+ }
+ System.arraycopy(buffer, index, to, off, avail);
+ len -= avail;
+ off += avail;
+ }
+ int read = avail;
+
+ // good place to reset index because the buffer is fully drained
+ index = 0;
+ endIndex = 0;
+
+ // then directly from wrapped stream
+ do {
+ int r = wrapped.read(to, off, len);
+ if (r <= 0) {
+ return (read == 0) ? r : read;
+ }
+ read += r;
+ off += r;
+ len -= r;
+ } while (len > 0);
+
+ return read;
+ }
+
+ /**
+ * {(at)inheritDoc}
+ */
+ public long skip(long n) throws IOException {
+ int avail = endIndex - index;
+ if (avail >= n) {
+ index += n;
+ return n;
+ }
+ n -= avail;
+ index = 0;
+ endIndex = 0;
+ return wrapped.skip(n);
+ }
+
+ /**
+ * {(at)inheritDoc}
+ */
+ public int available() throws IOException {
+ int avail = endIndex - index;
+ return avail > 0 ? avail : wrapped.available();
+ }
+
+ /**
+ * {(at)inheritDoc}
+ */
+ public void close() throws IOException {
+ wrapped.close();
+ }
+
+ /**
+ * Returns direct handle to the used buffer. Use the {(at)link #ensureBytes}
+ * to prefill required bytes the buffer and {(at)link #getIndex} to fetch
+ * the current position of the buffer.
+ *
+ * @return The underlaying buffer.
+ */
+ public byte[] getBuffer() {
+ return buffer;
+ }
+
+ /**
+ * Returns the current read position in the buffer.
+ *
+ * @return the current read position in the buffer.
+ */
+ public int getIndex() {
+ return index;
+ }
+
+ /**
+ * Scans the length of the next null terminated string (C-style string) from
+ * the stream.
+ *
+ * @return The length of the next null terminated string.
+ * @throws IOException If reading of stream fails.
+ * @throws EOFxception If the stream did not contain any null terminators.
+ */
+ public int scanCStringLength() throws IOException {
+ int pos = index;
+ for (;;) {
+ while (index < endIndex) {
+ if (buffer[pos++] == '\0') {
+ return pos - index;
+ }
+ }
+ if (!readMore(STRING_SCAN_SPAN)) {
+ throw new EOFException();
+ }
+ pos = index;
+ }
+ }
+}
diff -Nur postgresql-jdbc-8.2dev-503.src/org/postgresql/jdbc2/AbstractJdbc2ResultSet.java postgresql-jdbc-8.2dev-503.src.patched/org/postgresql/jdbc2/AbstractJdbc2ResultSet.java
--- postgresql-jdbc-8.2dev-503.src/org/postgresql/jdbc2/AbstractJdbc2ResultSet.java 2005-12-04 23:40:33.000000000 +0200
+++ postgresql-jdbc-8.2dev-503.src.patched/org/postgresql/jdbc2/AbstractJdbc2ResultSet.java 2006-07-25 22:03:38.000000000 +0300
@@ -1957,13 +1957,148 @@
public int getInt(int columnIndex) throws SQLException
{
+ Encoding encoding = connection.getEncoding();
+ if (encoding.hasAsciiNumbers()) {
+ try {
+ return getFastInt(columnIndex);
+ } catch (NumberFormatException ex) {
+ }
+ }
return toInt( getFixedString(columnIndex) );
}
public long getLong(int columnIndex) throws SQLException
{
+ Encoding encoding = connection.getEncoding();
+ if (encoding.hasAsciiNumbers()) {
+ try {
+ return getFastLong(columnIndex);
+ } catch (NumberFormatException ex) {
+ }
+ }
return toLong( getFixedString(columnIndex) );
}
+
+ private static final NumberFormatException FAST_NUMBER_FAILED =
+ new NumberFormatException();
+
+ /**
+ * Optimised byte[] to number parser.
+ *
+ * @param columnIndex The column to parse.
+ * @return The parsed number.
+ * @throws SQLException If an error occurs while fetching column.
+ * @throws NumberFormatException If the number is invalid or the
+ * out of range for fast parsing. The value must then be parsed by
+ * {(at)link #toLong(String)}.
+ */
+ private long getFastLong(int columnIndex) throws SQLException,
+ NumberFormatException {
+
+ checkResultSet( columnIndex );
+
+ columnIndex--;
+ if (this_row[columnIndex] == null) {
+ return 0; // SQL NULL
+ }
+
+ byte[] bytes = this_row[columnIndex];
+
+ if (bytes.length == 0) {
+ throw FAST_NUMBER_FAILED;
+ }
+
+ long val = 0;
+ int start;
+ boolean neg;
+ if (bytes[0] == '-') {
+ neg = true;
+ start = 1;
+ if (bytes.length > 19) {
+ throw FAST_NUMBER_FAILED;
+ }
+ } else {
+ start = 0;
+ neg = false;
+ if (bytes.length > 18) {
+ throw FAST_NUMBER_FAILED;
+ }
+ }
+
+ while (start < bytes.length) {
+ byte b = bytes[start++];
+ if (b < '0' || b > '9') {
+ throw FAST_NUMBER_FAILED;
+ }
+
+ val *= 10;
+ val += b - '0';
+ }
+
+ if (neg) {
+ val = -val;
+ }
+
+ return val;
+ }
+
+ /**
+ * Optimised byte[] to number parser.
+ *
+ * @param columnIndex The column to parse.
+ * @return The parsed number.
+ * @throws SQLException If an error occurs while fetching column.
+ * @throws NumberFormatException If the number is invalid or the
+ * out of range for fast parsing. The value must then be parsed by
+ * {(at)link #toLong(String)}.
+ */
+ private int getFastInt(int columnIndex) throws SQLException {
+ checkResultSet( columnIndex );
+
+ columnIndex--;
+ if (this_row[columnIndex] == null) {
+ return 0; // SQL NULL
+ }
+
+ byte[] bytes = this_row[columnIndex];
+
+ if (bytes.length == 0) {
+ throw FAST_NUMBER_FAILED;
+ }
+
+ int val = 0;
+ int start;
+ boolean neg;
+ if (bytes[0] == '-') {
+ neg = true;
+ start = 1;
+ if (bytes.length > 9) {
+ throw FAST_NUMBER_FAILED;
+ }
+ } else {
+ start = 0;
+ neg = false;
+ if (bytes.length > 8) {
+ throw FAST_NUMBER_FAILED;
+ }
+ }
+
+ while (start < bytes.length) {
+ byte b = bytes[start++];
+ if (b < '0' || b > '9') {
+ throw FAST_NUMBER_FAILED;
+ }
+
+ val *= 10;
+ val += b - '0';
+ }
+
+ if (neg) {
+ val = -val;
+ }
+
+ return val;
+ }
public float getFloat(int columnIndex) throws SQLException
{
@@ -2358,24 +2493,31 @@
*/
public String getFixedString(int col) throws SQLException
{
- String s = getString(col);
-
// Handle SQL Null
wasNullFlag = (this_row[col - 1] == null);
if (wasNullFlag)
return null;
+ String s = getString(col);
+
// if we don't have at least 2 characters it can't be money.
if (s.length() < 2)
return s;
// Handle Money
char ch = s.charAt(0);
+
+ // optimise for non-money type
+ // '(', '$' and '-' are all before numbers in ascii
+ if (ch >= '0') {
+ return s;
+ }
+
if (ch == '(')
{
s = "-" + PGtokenizer.removePara(s).substring(1);
}
- if (ch == '$')
+ else if (ch == '$')
{
s = s.substring(1);
}
/*-------------------------------------------------------------------------
*
* Copyright (c) 2004-2005, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgjdbc/org/postgresql/test/jdbc3/ResultSetTest.java,v 1.3 2005/11/24 02:31:44 oliver Exp $
*
*-------------------------------------------------------------------------
*/
package org.postgresql.test.jdbc3;
import java.sql.*;
import junit.framework.TestCase;
import org.postgresql.test.TestUtil;
public class BenchTest extends TestCase {
/**
* Number of times the table is queried.
*/
private static final int QUERY_LOOP = 20;
/**
* Number of columns in each row.
*/
private static final int COLS = 9;
/**
* Number of rows in temporary table.
*/
private static final int ROWS = 20;
/**
* How many tries long to prime the jit.
*/
private static final int PRIME_LOOP = 100;
/**
* How many times to run the timing and print the times.
*/
private static final int TIMING_LOOP = 200;
private Connection _conn;
private PreparedStatement insert;
private String queryString;
private PreparedStatement query;
public BenchTest(String name) {
super(name);
}
protected void setUp() throws Exception {
_conn = TestUtil.openDB();
createTable();
_conn.setAutoCommit(false);
StringBuilder sb = new StringBuilder();
sb.append("INSERT INTO hold (key");
for (int i=0; i<COLS; ++i) {
sb.append(", val").append(i);
}
sb.append(") VALUES (?");
for (int i=0; i<COLS; ++i) {
sb.append(",?");
}
sb.append(")");
//System.out.println(sb);
insert = _conn.prepareStatement(sb.toString());
sb.setLength(0);
sb.append("SELECT ");
for (int i=0; i<COLS; ++i) {
sb.append("val").append(i).append(',');
}
sb.setLength(sb.length()-1);
sb.append(" FROM hold");
query = _conn.prepareStatement(sb.toString());
query.setFetchSize(ROWS);
}
private void createTable() throws SQLException {
StringBuilder sb = new StringBuilder();
sb.append("CREATE TEMP TABLE hold(key int");
for (int i=0; i<COLS; ++i) {
sb.append(", val").append(i);
switch (i % 3) {
case 0:
sb.append(" varchar(16)");
break;
case 1:
sb.append(" int4");
break;
case 2:
sb.append(" int8");
break;
}
}
sb.append(") ON COMMIT PRESERVE ROWS");
Statement stmt = _conn.createStatement();
stmt.execute(sb.toString());
stmt.close();
_conn.commit();
}
protected void tearDown() throws SQLException {
insert.close();
query.close();
dropTable();
TestUtil.closeDB(_conn);
}
private void dropTable() {
Statement stmt;
try {
stmt = _conn.createStatement();
stmt.execute("DROP TABLE hold");
stmt.close();
_conn.commit();
} catch (SQLException e) {
// ignored
}
}
public void testBenchmark() throws Exception {
insert();
for (int i=0; i<PRIME_LOOP; ++i) {
query();
}
// let jit do it's magic and flush all initial garbage
Thread.sleep(50);
System.runFinalization();
System.gc();
Thread.sleep(500);
double best = 0;
long maxMemUse = 0;
long start, stop;
for (int i=0; i<TIMING_LOOP; ++i) {
start = System.currentTimeMillis();
for (int j=0; j<QUERY_LOOP; ++j) {
query();
}
stop = System.currentTimeMillis();
Runtime rt = Runtime.getRuntime();
long memUse = rt.totalMemory() - rt.freeMemory();
if (memUse > maxMemUse) {
maxMemUse = memUse;
}
double speed = QUERY_LOOP * 1000.0 / (stop-start);
if (speed > best) {
best = speed;
}
System.out.printf("speed: %7.2f %7.2f (%3dms) memory: %5.1fMB %5.1fMB\n",
speed, best, (stop-start), memUse/1024.0/1024.0, maxMemUse/1024.0/1024.0);
}
}
private void query() throws SQLException {
for (int i=0; i<20; ++i) {
ResultSet rs = query.executeQuery();
for (int r=0; r<ROWS; ++r) {
assertTrue(rs.next());
for (int c=0; c<COLS; ++c) {
switch (c % 3) {
case 0:
assertEquals("value", rs.getString(c+1));
break;
case 1:
assertTrue(1000 == rs.getInt(c+1));
break;
case 2:
assertTrue(1000*1000 == rs.getLong(c+1));
break;
}
}
}
rs.close();
}
}
private void insert() throws SQLException {
for (int r=0; r<ROWS; ++r) {
insert.setInt(1, r);
for (int c=0; c<COLS; ++c) {
switch (c % 3) {
case 0:
insert.setString(c+2, "value");
break;
case 1:
insert.setInt(c+2, 1000);
break;
case 2:
insert.setLong(c+2, 1000*1000);
break;
}
}
assertFalse(insert.execute());
assertTrue(1 == insert.getUpdateCount());
}
_conn.commit();
}
}
Home |
Main Index |
Thread Index