/** * @file * @brief This test show bug in byteaout PostgreSQL code * @author Sergey N. Yatskevich * * If we have a different client and server encodings and client and server locales are * not C(ASCII) then path of bytea string for client->server transfer is: * -# encode binary data on client with PQescapeBytea (encode in \\ooo form all symbols with * code == 0x0 && code >= 0x80) * -# send encoded data to the server * -# decode recieved characters with pg_client_to_server in * src/backend/libpq/pqformat.c#pq_getmsgstring * -# decode bytea C-string with byteain * . * and path for server->client transfer is: * -# encode bytea into C-string with byteaout (encode in \\ooo form all symbols with * !isprint(symbol code) for current server locale) * -# encode query result characters with pq_server_to_client in * src/backend/libpq/pqformat.c#pq_sendcountedtext * -# send encoded data to the client * -# decode recieved data with PQunescapeBytea * * If we have the next client-server configuration: * @verbatim +---------------------+ | KOI8 (ru_RU.KOI8-R) | | | | Server | +--+----------------+-+ ^ | | | | v +----------+----------+ +------------------+ | KOI8 (ru_RU.KOI8-R) | | WIN (CP1251) | | | | | | Client (Linux) | | Client (Windows) | +---------------------+ +------------------+ @endverbatim * * then, for example, symbol RUSSIAN_A (code 255) from Linux-client will be translated * in database into the symbol with code 255, and then for Windows client --- into the * symbol with code 192, that is wrong for bytea data type !!!! * * In case when database has UNICODE encoding some parts of bytea strings from server will * not be even send to the client, because some symbol chains in current server locale don't * present valid utf8 sequence and can't be translated to client encoding properly. * * Simplest way to solve this problem is to replace isprint check in byteaout and * >= 0x80 check in PQescapeBytea procedures with isascii && isprint checks, because * ASCII symbols for all locales (and database encodings) have the same byte codes. * * Or you may do in byteaout the same symbol check as in PQescapeBytea (encode in \\ooo * form all symbols with code >= 0x80). * * But I prefer first way (with isascii && isprint check in both byteaout and PQesacpeBytea), * because it produce nice printable and editable ASCII dump output for debug :-)). * * Test steps: * -# compile program with command (for gcc 3.2.3): * g++ -Wall -pedantic -I`pg_config --includedir` bytea-test.cxx -o bytea-test -lpq * -# init database cluster with non C(ASCII) locale (for example ru_RU.KOI8-R) * -# create test database with non SQL_ASCII encoding (for example KOI8) * -# create test table in this database with command: CREATE TABLE bytea_test (data BYTEA); * -# run test with command: ./bytea-test test.data * -# try the three prevous steps with UNICODE database * * Then apply patches (at least varlena.c.diff) and run test again. All must be done * successfully. * * @note @c bytea_test table must be available for deleting, inserting and selecting * * @note Attatched test file (test.data) contains sequence of all 256 8-bit symbols. * * @bug I am very sorry for my bad english, but I hope you understand me :-)) */ #include #include #include #include #include using namespace std; int main (int _argc, char **_argv) { // Check arguments count if (_argc != 3) { cerr << "Usage: " << _argv[0] << " \n"; return 1; } // Set up the database connection PGconn *conn = PQsetdb (NULL, NULL, NULL, NULL, _argv[1]); if (PQstatus (conn) == CONNECTION_BAD) { cerr << "Can't connect to database " << _argv[1] << " (" << PQerrorMessage (conn) << ")\n"; PQfinish (conn); return 1; } // First client encoding PQsetClientEncoding (conn, "KOI8"); if (PQstatus (conn) == CONNECTION_BAD) { cerr << "Can't set client encoding for database " << _argv[1] << " (" << PQerrorMessage (conn) << ")\n"; PQfinish (conn); return 1; } // Open binary file stream ifstream is (_argv[2], ios::binary); if (!is.good ()) { cerr << "Can't open test file (" << _argv[2] << ")\n"; PQfinish (conn); return 1; } // Load binary file into memory vector bin; copy (istream_iterator (is), istream_iterator (), back_inserter (bin)); // Clean test table PGresult *res = PQexec (conn, "DELETE FROM bytea_test"); if (PQresultStatus (res) != PGRES_COMMAND_OK) { PQclear (res); cerr << "Can't create test table (" << PQresultErrorMessage (res) << ")\n"; PQfinish (conn); return 1; } PQclear (res); // Convert bin array into escaped string size_t escaped_bin_len = 0; unsigned char *escaped_bin = PQescapeBytea (&bin.front (), bin.size (), &escaped_bin_len); cout << "\nSend to server: " << escaped_bin << endl << flush; // Construct insert query string insert_q = string ("INSERT INTO bytea_test VALUES ('") + string (reinterpret_cast (escaped_bin), escaped_bin_len - 1) + string ("')"); // Free no more need memory free (escaped_bin); // Insert bytea data into database res = PQexec (conn, insert_q.data ()); if (PQresultStatus (res) != PGRES_COMMAND_OK) { PQclear (res); cerr << "Can't insert data into test table (" << PQresultErrorMessage (res) << ")\n"; PQfinish (conn); return 1; } PQclear (res); // Second client encoding PQsetClientEncoding (conn, "WIN"); if (PQstatus (conn) == CONNECTION_BAD) { cerr << "Can't set client encoding for database " << _argv[1] << " (" << PQerrorMessage (conn) << ")\n"; PQfinish (conn); return 1; } // Get back bin array from database res = PQexec (conn, "SELECT data FROM bytea_test"); if ((PQresultStatus (res) != PGRES_TUPLES_OK) || (PQntuples (res) == 0)) { PQclear (res); cerr << "Can't get data from test table (" << PQresultErrorMessage (res) << ")\n"; PQfinish (conn); return 1; } cout << "\nRecieve from server: " << PQgetvalue (res, 0, 0) << endl << flush; // Convert result into binary form size_t unescaped_bin_len = 0; unsigned char *unescaped_bin = PQunescapeBytea (reinterpret_cast (PQgetvalue (res, 0, 0)), &unescaped_bin_len); // Construct binary array vector bin2; copy (unescaped_bin, unescaped_bin + unescaped_bin_len, back_inserter (bin2)); // Free no more need memory free (unescaped_bin); // Clear result PQclear (res); // Close connection PQfinish (conn); // Compare binary arrays size if (bin.size () != bin2.size ()) { cerr << "ERROR: Binary arrays have different size\n"; return 1; } // Compare binary arrays data for (size_t i = 0; i < bin.size (); i++) { if (bin[i] != bin2[i]) { cerr << "ERROR: Binary arrays have different content in [" << i << "] " << (unsigned int)bin[i] << " != " << (unsigned int)bin2[i] << endl; return 1; } } ////////////////////////////////////// cout << "\nTest successfully done.\n"; ////////////////////////////////////// return 0; }