BLOB performance test FYI

From: "Joe Shevland" <jshevland(at)j-elite(dot)com>
To: <pgsql-jdbc(at)postgresql(dot)org>
Subject: BLOB performance test FYI
Date: 2002-04-17 03:17:58
Message-ID: HEECIHEEJDBMCCGMGIOBCEBOCHAA.jshevland@j-elite.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-jdbc

Hi,

I'm adding a page on using binary data to a primer doc I'm writing, and have been performing some tests with PostgreSQL 7.2 and the latest JDBC code from CVS (14/4).

The following output shows some results of the test on a FreeBSD 4.5-S machine, P3 1GHz CPU, 1G RAM, PostgreSQL 7.2, JDK1.3.1p6 (OpenJIT); here the test was storing and retrieving increasingly large amounts of random binary data. The bytea type seems to cause real memory problems around the 4MB mark, presumably from trying to build an insert statement/StringBuffer big enough for the escaped data.

I was trying to see what data size would be best for which type. Around even 500k there seems to be a big overhead for storing bytea, again I guess converting the mostly unprintable binary data into the PGbytea string, though I'm not sure if it s a fair comparison test. I don't suppose there's any easy way of streaming the bytea values like the LO manager? I could be missing something fundamental about the bytea type, but given its limits, should the JDBC driver act by default on bytea?

Cheers,
Joe

---
www:~user# java -Xmx128m -classpath .:postgresql.jar BLOBTest postgres username password
[bytea] storing 0k of data -> 24ms
[bytea] retrieving 0k of data -> 7ms
[LO/oid] storing 0k of data -> 19ms
[LO/oid] retrieving 0k of data -> 4ms
[bytea] storing 1k of data -> 8ms
[bytea] retrieving 1k of data -> 1ms
[LO/oid] storing 1k of data -> 3ms
[LO/oid] retrieving 1k of data -> 3ms
[bytea] storing 500k of data -> 4943ms
[bytea] retrieving 500k of data -> 136ms
[LO/oid] storing 500k of data -> 142ms
[LO/oid] retrieving 500k of data -> 28ms
[bytea] storing 1000k of data -> 12423ms
[bytea] retrieving 1000k of data -> 270ms
[LO/oid] storing 1000k of data -> 285ms
[LO/oid] retrieving 1000k of data -> 55ms
Exception in thread "main" java.lang.OutOfMemoryError
at java.lang.StringBuffer.expandCapacity(StringBuffer.java, Compiled Code)
at java.lang.StringBuffer.append(StringBuffer.java, Compiled Code)
at org.postgresql.jdbc2.PreparedStatement.setString(Unknown Source)
at org.postgresql.jdbc2.PreparedStatement.setBytes(Unknown Source)
at org.postgresql.jdbc2.PreparedStatement.setBinaryStream(Unknown Source)
at BLOBTest.storeBlobAsBytea(BLOBTest.java, Compiled Code)
at BLOBTest.testData(BLOBTest.java, Compiled Code)
at BLOBTest.main(BLOBTest.java, Compiled Code)

The LO/oid output is using the LargeObject manager, and performance-wise seems to scale well until about 65M in my case (the results below can be avoided if I bump -Xmx up to 256M). This output is from commenting out the bytea tests:

---
www:~user# java -Xmx128m -classpath .:postgresql.jar BLOBTest postgres username password
[LO/oid] storing 0k of data -> 38ms
[LO/oid] retrieving 0k of data -> 5ms
[LO/oid] storing 1k of data -> 3ms
[LO/oid] retrieving 1k of data -> 3ms
[LO/oid] storing 500k of data -> 139ms
[LO/oid] retrieving 500k of data -> 30ms
[LO/oid] storing 1000k of data -> 268ms
[LO/oid] retrieving 1000k of data -> 55ms
[LO/oid] storing 4000k of data -> 1162ms
[LO/oid] retrieving 4000k of data -> 227ms
[LO/oid] storing 16000k of data -> 4817ms
[LO/oid] retrieving 16000k of data -> 868ms
[LO/oid] storing 32000k of data -> 9711ms
[LO/oid] retrieving 32000k of data -> 1704ms
[LO/oid] storing 63999k of data -> 19748ms
Unknown Response Type
Unknown Response Type
at org.postgresql.core.QueryExecutor.execute(Unknown Source)
at org.postgresql.Connection.ExecSQL(Unknown Source)
at org.postgresql.Connection.ExecSQL(Unknown Source)
at org.postgresql.Connection.setAutoCommit(Unknown Source)
at BLOBTest.retrieveBlobAsLO(BLOBTest.java, Compiled Code)
at BLOBTest.testData(BLOBTest.java, Compiled Code)
at BLOBTest.main(BLOBTest.java, Compiled Code)

--- BLOBTest.java

import java.io.*;
import java.sql.*;
import org.postgresql.largeobject.*;

public class BLOBTest {

public BLOBTest()
throws Exception {

Class.forName("org.postgresql.Driver");

createDummyDataFile("bin0",256);
createDummyDataFile("bin1",1024);
createDummyDataFile("bin512",512000);
createDummyDataFile("bin1024",1024000);
createDummyDataFile("bin4096",4096000);
createDummyDataFile("bin16384",16384000);
createDummyDataFile("bin32768",32768000);
createDummyDataFile("bin65535",65535000);
}

public static void main( String args[] ) {
if ( args.length != 3 ) {
System.err.println("usage: java BLOBTest <database_spec> <username> <password>");
System.exit(1);
}

try {

BLOBTest test = new BLOBTest();
long time = 0;

Connection conn = DriverManager.getConnection(
"jdbc:postgresql:"+args[0],args[1],args[2]);

test.testData(conn,"bytea","bin0",256);
test.testData(conn,"LO/oid","bin0",256);

test.testData(conn,"bytea","bin1",1024);
test.testData(conn,"LO/oid","bin1",1024);

test.testData(conn,"bytea","bin512",512000);
test.testData(conn,"LO/oid","bin512",512000);

test.testData(conn,"bytea","bin1024",1024000);
test.testData(conn,"LO/oid","bin1024",1024000);

test.testData(conn,"bytea","bin4096",4096000);
test.testData(conn,"LO/oid","bin4096",4096000);

test.testData(conn,"bytea","bin16384",16384000);
test.testData(conn,"LO/oid","bin16384",16384000);

test.testData(conn,"bytea","bin32768",32768000);
test.testData(conn,"LO/oid","bin32768",32768000);

test.testData(conn,"bytea","bin65535",65535000);
test.testData(conn,"LO/oid","bin65535",65535000);

} catch ( Exception e ) {
System.err.println(e);
e.printStackTrace();
}
}

private void createDummyDataFile( String fileName, int size )
throws Exception {

byte data[] = new byte[size];
FileOutputStream fos = new FileOutputStream(fileName);
fos.write(data);
fos.close();
}

private void testData( Connection conn, String method, String fileName, int size )
throws Exception {

long time;

// Garbage collect to clean up any garbage objects
System.gc();

if ( method.equals("bytea") ) {
time = storeBlobAsBytea(conn, fileName);
System.err.println("["+method+"] storing "+(size/1024)+"k of data -> "+time+"ms");
time = retrieveBlobAsBytea(conn, fileName);
System.err.println("["+method+"] retrieving "+(size/1024)+"k of data -> "+time+"ms");
} else {
time = storeBlobAsLO(conn, fileName);
System.err.println("["+method+"] storing "+(size/1024)+"k of data -> "+time+"ms");
time = retrieveBlobAsLO(conn, fileName);
System.err.println("["+method+"] retrieving "+(size/1024)+"k of data -> "+time+"ms");
}
}

private long storeBlobAsBytea( Connection conn, String fileName )
throws Exception {

long t1, t2;

t1 = System.currentTimeMillis();
File file = new File(fileName);
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));
PreparedStatement ps = conn.prepareStatement("INSERT INTO blob_bytea VALUES (?, ?)");
ps.setString(1, file.getName());
ps.setBinaryStream(2, bis, (int)file.length());
ps.executeUpdate();
ps.close();
bis.close();
t2 = System.currentTimeMillis();
return (t2 - t1);
}

private long retrieveBlobAsBytea( Connection conn, String fileName )
throws Exception {

long t1, t2;

t1 = System.currentTimeMillis();
PreparedStatement ps = conn.prepareStatement(
"SELECT blob_data FROM blob_bytea WHERE blob_name=?");
ps.setString(1, fileName);
ResultSet rs = ps.executeQuery();
if ( rs.next() ) {
byte[] imgBytes = rs.getBytes(1);
// use the stream in some way here
}
rs.close();
ps.close();
t2 = System.currentTimeMillis();
return (t2 - t1);
}

private long storeBlobAsLO( Connection conn, String fileName )
throws Exception {

long t1, t2;

boolean oldState = conn.getAutoCommit();
try {

t1 = System.currentTimeMillis();

// All LargeObject API calls must be within a transaction
conn.setAutoCommit(false);

// Get the Large Object Manager to perform operations with
LargeObjectManager lobj = ((org.postgresql.Connection)conn).getLargeObjectAPI();

// create a new large object
int oid = lobj.create(LargeObjectManager.READ | LargeObjectManager.WRITE);

// open the large object for write
LargeObject obj = lobj.open(oid, LargeObjectManager.WRITE);

// Now open the file
File file = new File(fileName);
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));

// copy the data from the file to the large object
byte buf[] = new byte[2048];
int s, tl = 0;
while ((s = bis.read(buf, 0, 2048)) > 0) {
obj.write(buf, 0, s);
tl += s;
}
// Close the large object
obj.close();
// Now insert the row
PreparedStatement ps = conn.prepareStatement("INSERT INTO blob_lo VALUES (?, ?)");
ps.setString(1, file.getName());
ps.setInt(2, oid);
ps.executeUpdate();
ps.close();
bis.close();

t2 = System.currentTimeMillis();
return (t2 - t1);

} finally {
conn.setAutoCommit(oldState);
}
}

private long retrieveBlobAsLO( Connection conn, String fileName )
throws Exception {

long t1, t2;

boolean oldState = conn.getAutoCommit();

try {

t1 = System.currentTimeMillis();

// All LargeObject API calls must be within a transaction
conn.setAutoCommit(false);

// Get the Large Object Manager to perform operations with
LargeObjectManager lobj = ((org.postgresql.Connection)conn).getLargeObjectAPI();
PreparedStatement ps = conn.prepareStatement("SELECT blob_data FROM blob_lo WHERE blob_name=?");
ps.setString(1, fileName);
ResultSet rs = ps.executeQuery();

while(rs.next()) {
// open the large object for reading
int oid = rs.getInt(1);
LargeObject obj = lobj.open(oid, LargeObjectManager.READ);
// read the data
byte buf[] = new byte[obj.size()];
obj.read(buf, 0, obj.size());
// do something with the data read here
// Close the object
obj.close();
}
rs.close();
ps.close();

t2 = System.currentTimeMillis();
return (t2 - t1);
} finally {
conn.setAutoCommit(oldState);
}
}
}

---
Finally, to build or run:

javac -classpath .:postgresql.jar BLOBTest.java

java -Xmx128m -classpath .:postgresql.jar BLOBTest //somehost/db user password

Responses

Browse pgsql-jdbc by date

  From Date Subject
Next Message Stuart Robinson 2002-04-17 04:42:02 callable statements
Previous Message AgentM 2002-04-16 18:50:22 Meaningful Exception handling