Index: oracle_compat.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v retrieving revision 1.60 diff -c -r1.60 oracle_compat.c *** oracle_compat.c 7 May 2005 15:18:17 -0000 1.60 --- oracle_compat.c 23 Aug 2005 17:13:11 -0000 *************** *** 149,154 **** --- 149,265 ---- #endif /* USE_WIDE_UPPER_LOWER */ + /* + * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding. + * To make use of the upper/lower functionality, we need to map UTF8 to + * UTF16, which for some reason mbstowcs and wcstombs won't do for us. + * This conversion layer takes care of it. + */ + + #ifdef WIN32 + + /* texttowcs for the case of UTF8 to UTF16 */ + static wchar_t * + win32_utf8_texttowcs(const text *txt) + { + int nbytes = VARSIZE(txt) - VARHDRSZ; + wchar_t *result; + int r; + + /* Overflow paranoia */ + if (nbytes < 0 || + nbytes > (int) (INT_MAX / sizeof(wchar_t)) -1) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + + /* stupid Microsloth API does not work for zero-length input */ + if (nbytes == 0) + r = 0; + else + { + /* Do the conversion */ + r = MultiByteToWideChar(CP_UTF8, 0, VARDATA(txt), nbytes, + result, nbytes); + + if (!r) /* assume it's NO_UNICODE_TRANSLATION */ + { + /* see notes above about error reporting */ + pg_verifymbstr(VARDATA(txt), nbytes, false); + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("invalid multibyte character for locale"), + errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); + } + } + + Assert(r <= nbytes); + result[r] = 0; + + return result; + } + + /* wcstotext for the case of UTF16 to UTF8 */ + static text * + win32_utf8_wcstotext(const wchar_t *str) + { + text *result; + int nbytes; + int r; + + nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); + if (nbytes == 0) /* shouldn't happen */ + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("UTF16 to UTF8 translation failed: %lu", + GetLastError()))); + + result = palloc(nbytes+VARHDRSZ); + + r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes, + NULL, NULL); + if (r == 0) /* shouldn't happen */ + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("UTF16 to UTF8 translation failed: %lu", + GetLastError()))); + + VARATT_SIZEP(result) = nbytes + VARHDRSZ - 1; /* -1 to ignore null */ + + return result; + } + + /* interface layer to check which encoding is in use */ + + static wchar_t * + win32_texttowcs(const text *txt) + { + if (GetDatabaseEncoding() == PG_UTF8) + return win32_utf8_texttowcs(txt); + else + return texttowcs(txt); + } + + static text * + win32_wcstotext(const wchar_t *str, int ncodes) + { + if (GetDatabaseEncoding() == PG_UTF8) + return win32_utf8_wcstotext(str); + else + return wcstotext(str, ncodes); + } + + /* use macros to cause routines below to call interface layer */ + + #define texttowcs win32_texttowcs + #define wcstotext win32_wcstotext + + #endif /* WIN32 */ + + /******************************************************************** * * lower Index: varlena.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v retrieving revision 1.131 diff -c -r1.131 varlena.c *** varlena.c 2 Aug 2005 16:11:57 -0000 1.131 --- varlena.c 23 Aug 2005 17:13:12 -0000 *************** *** 849,854 **** --- 849,856 ---- char *a1p, *a2p; + #ifndef WIN32 + if (len1 >= STACKBUFLEN) a1p = (char *) palloc(len1 + 1); else *************** *** 865,874 **** result = strcoll(a1p, a2p); ! if (len1 >= STACKBUFLEN) pfree(a1p); ! if (len2 >= STACKBUFLEN) pfree(a2p); } else { --- 867,953 ---- result = strcoll(a1p, a2p); ! if (a1p != a1buf) pfree(a1p); ! if (a2p != a2buf) pfree(a2p); + + #else /* WIN32 */ + + /* Win32 does not have UTF-8, so we need to map to UTF-16 */ + if (GetDatabaseEncoding() == PG_UTF8) + { + int a1len; + int a2len; + int r; + + if (len1 >= STACKBUFLEN/2) + { + a1len = len1 * 2 + 2; + a1p = palloc(a1len); + } + else + { + a1len = STACKBUFLEN; + a1p = a1buf; + } + if (len2 >= STACKBUFLEN/2) + { + a2len = len2 * 2 + 2; + a2p = palloc(a2len); + } + else + { + a2len = STACKBUFLEN; + a2p = a2buf; + } + + /* stupid Microsloth API does not work for zero-length input */ + if (len1 == 0) + r = 0; + else + { + r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1, + (LPWSTR) a1p, a1len/2); + if (!r) + ereport(ERROR, + (errmsg("could not convert string to UTF16: %lu", + GetLastError()))); + } + ((LPWSTR) a1p)[r] = 0; + + if (len2 == 0) + r = 0; + else + { + r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2, + (LPWSTR) a2p, a2len/2); + if (!r) + ereport(ERROR, + (errmsg("could not convert string to UTF16: %lu", + GetLastError()))); + } + ((LPWSTR) a2p)[r] = 0; + + errno = 0; + result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p); + if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ + ereport(ERROR, + (errmsg("could not compare unicode strings: %d", + errno))); + + if (a1p != a1buf) + pfree(a1p); + if (a2p != a2buf) + pfree(a2p); + + return result; + } + + /* Win32 has strncoll(), so use it to avoid copying */ + return _strncoll(arg1, arg2, Min(len1, len2)); + + #endif /* WIN32 */ } else {