*** a/src/backend/main/main.c --- b/src/backend/main/main.c *************** *** 265,270 **** startup_hacks(const char *progname) --- 265,274 ---- /* * Help display should match the options accepted by PostmasterMain() * and PostgresMain(). + * + * XXX On Windows, non-ASCII localizations of these messages only display + * correctly if the console output code page covers the necessary characters. + * Messages emitted in write_console() do not exhibit this problem. */ static void help(const char *progname) *** a/src/backend/utils/adt/pg_locale.c --- b/src/backend/utils/adt/pg_locale.c *************** *** 131,144 **** static char *IsoLocaleName(const char *); /* MSVC specific */ /* * pg_perm_setlocale * ! * This is identical to the libc function setlocale(), with the addition ! * that if the operation is successful, the corresponding LC_XXX environment ! * variable is set to match. By setting the environment variable, we ensure ! * that any subsequent use of setlocale(..., "") will preserve the settings ! * made through this routine. Of course, LC_ALL must also be unset to fully ! * ensure that, but that has to be done elsewhere after all the individual ! * LC_XXX variables have been set correctly. (Thank you Perl for making this ! * kluge necessary.) */ char * pg_perm_setlocale(int category, const char *locale) --- 131,146 ---- /* * pg_perm_setlocale * ! * This wraps the libc function setlocale(), with two additions. First, when ! * changing LC_CTYPE, update gettext's encoding for the current message ! * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but ! * not on Windows. Second, if the operation is successful, the corresponding ! * LC_XXX environment variable is set to match. By setting the environment ! * variable, we ensure that any subsequent use of setlocale(..., "") will ! * preserve the settings made through this routine. Of course, LC_ALL must ! * also be unset to fully ensure that, but that has to be done elsewhere after ! * all the individual LC_XXX variables have been set correctly. (Thank you ! * Perl for making this kluge necessary.) */ char * pg_perm_setlocale(int category, const char *locale) *************** *** 172,177 **** pg_perm_setlocale(int category, const char *locale) --- 174,195 ---- if (result == NULL) return result; /* fall out immediately on failure */ + /* + * Use the right encoding in translated messages. Under ENABLE_NLS, let + * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message + * format strings are ASCII, but database-encoding strings may enter the + * message via %s. This makes the overall message encoding equal to the + * database encoding. + */ + if (category == LC_CTYPE) + { + #ifdef ENABLE_NLS + SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL))); + #else + SetMessageEncoding(GetDatabaseEncoding()); + #endif + } + switch (category) { case LC_COLLATE: *** a/src/backend/utils/error/elog.c --- b/src/backend/utils/error/elog.c *************** *** 1814,1819 **** write_syslog(int level, const char *line) --- 1814,1835 ---- #ifdef WIN32 /* + * Get the PostgreSQL equivalent of the Windows ANSI code page. "ANSI" system + * interfaces (e.g. CreateFileA()) expect string arguments in this encoding. + * Every process in a given system will find the same value at all times. + */ + static int + GetACPEncoding(void) + { + static int encoding = -2; + + if (encoding == -2) + encoding = pg_codepage_to_encoding(GetACP()); + + return encoding; + } + + /* * Write a message line to the windows event log */ static void *************** *** 1858,1873 **** write_eventlog(int level, const char *line, int len) } /* ! * Convert message to UTF16 text and write it with ReportEventW, but ! * fall-back into ReportEventA if conversion failed. * * Also verify that we are not on our way into error recursion trouble due ! * to error messages thrown deep inside pgwin32_toUTF16(). */ ! if (GetDatabaseEncoding() != GetPlatformEncoding() && ! !in_error_recursion_trouble()) { ! utf16 = pgwin32_toUTF16(line, len, NULL); if (utf16) { ReportEventW(evtHandle, --- 1874,1891 ---- } /* ! * If message character encoding matches the encoding expected by ! * ReportEventA(), call it to avoid the hazards of conversion. Otherwise, ! * try to convert the message to UTF16 and write it with ReportEventW(). ! * Fall back on ReportEventA() if conversion failed. * * Also verify that we are not on our way into error recursion trouble due ! * to error messages thrown deep inside pgwin32_message_to_UTF16(). */ ! if (!in_error_recursion_trouble() && ! GetMessageEncoding() != GetACPEncoding()) { ! utf16 = pgwin32_message_to_UTF16(line, len, NULL); if (utf16) { ReportEventW(evtHandle, *************** *** 1879,1884 **** write_eventlog(int level, const char *line, int len) --- 1897,1903 ---- 0, (LPCWSTR *) &utf16, NULL); + /* XXX Try ReportEventA() when ReportEventW() fails? */ pfree(utf16); return; *************** *** 1904,1925 **** write_console(const char *line, int len) #ifdef WIN32 /* ! * WriteConsoleW() will fail if stdout is redirected, so just fall through * to writing unconverted to the logfile in this case. * * Since we palloc the structure required for conversion, also fall * through to writing unconverted if we have not yet set up * CurrentMemoryContext. */ ! if (GetDatabaseEncoding() != GetPlatformEncoding() && ! !in_error_recursion_trouble() && !redirection_done && CurrentMemoryContext != NULL) { WCHAR *utf16; int utf16len; ! utf16 = pgwin32_toUTF16(line, len, &utf16len); if (utf16 != NULL) { HANDLE stdHandle; --- 1923,1952 ---- #ifdef WIN32 /* ! * Try to convert the message to UTF16 and write it with WriteConsoleW(). ! * Fall back on write() if anything fails. ! * ! * In contrast to write_eventlog(), don't skip straight to write() based ! * on the applicable encodings. Unlike WriteConsoleW(), write() depends ! * on the suitability of the console output code page. Since we put ! * stderr into binary mode in SubPostmasterMain(), write() skips the ! * necessary translation anyway. ! * ! * WriteConsoleW() will fail if stderr is redirected, so just fall through * to writing unconverted to the logfile in this case. * * Since we palloc the structure required for conversion, also fall * through to writing unconverted if we have not yet set up * CurrentMemoryContext. */ ! if (!in_error_recursion_trouble() && !redirection_done && CurrentMemoryContext != NULL) { WCHAR *utf16; int utf16len; ! utf16 = pgwin32_message_to_UTF16(line, len, &utf16len); if (utf16 != NULL) { HANDLE stdHandle; *** a/src/backend/utils/init/postinit.c --- b/src/backend/utils/init/postinit.c *************** *** 357,367 **** CheckMyDatabase(const char *name, bool am_superuser) SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE); SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE); - /* Use the right encoding in translated messages */ - #ifdef ENABLE_NLS - pg_bind_textdomain_codeset(textdomain(NULL)); - #endif - ReleaseSysCache(tup); } --- 357,362 ---- *** a/src/backend/utils/mb/encnames.c --- b/src/backend/utils/mb/encnames.c *************** *** 352,361 **** pg_enc2name pg_enc2name_tbl[] = --- 352,364 ---- /* ---------- * These are encoding names for gettext. + * + * This covers all encodings except MULE_INTERNAL, which is alien to gettext. * ---------- */ pg_enc2gettext pg_enc2gettext_tbl[] = { + {PG_SQL_ASCII, "US-ASCII"}, {PG_UTF8, "UTF-8"}, {PG_LATIN1, "LATIN1"}, {PG_LATIN2, "LATIN2"}, *************** *** 389,394 **** pg_enc2gettext pg_enc2gettext_tbl[] = --- 392,404 ---- {PG_EUC_KR, "EUC-KR"}, {PG_EUC_TW, "EUC-TW"}, {PG_EUC_JIS_2004, "EUC-JP"}, + {PG_SJIS, "SHIFT-JIS"}, + {PG_BIG5, "BIG5"}, + {PG_GBK, "GBK"}, + {PG_UHC, "UHC"}, + {PG_GB18030, "GB18030"}, + {PG_JOHAB, "JOHAB"}, + {PG_SHIFT_JIS_2004, "SHIFT_JISX0213"}, {0, NULL} }; *** a/src/backend/utils/mb/mbutils.c --- b/src/backend/utils/mb/mbutils.c *************** *** 53,63 **** static FmgrInfo *ToServerConvProc = NULL; static FmgrInfo *ToClientConvProc = NULL; /* ! * These variables track the currently selected FE and BE encodings. */ static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; ! static pg_enc2name *PlatformEncoding = NULL; /* * During backend startup we can't set client encoding because we (a) --- 53,63 ---- static FmgrInfo *ToClientConvProc = NULL; /* ! * These variables track the currently-selected encodings. */ static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; ! static pg_enc2name *MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; /* * During backend startup we can't set client encoding because we (a) *************** *** 881,926 **** SetDatabaseEncoding(int encoding) Assert(DatabaseEncoding->encoding == encoding); } - /* - * Bind gettext to the codeset equivalent with the database encoding. - */ void ! pg_bind_textdomain_codeset(const char *domainname) { ! #if defined(ENABLE_NLS) ! int encoding = GetDatabaseEncoding(); ! int i; ! /* ! * gettext() uses the codeset specified by LC_CTYPE by default, so if that ! * matches the database encoding we don't need to do anything. In CREATE ! * DATABASE, we enforce or trust that the locale's codeset matches ! * database encoding, except for the C locale. In C locale, we bind ! * gettext() explicitly to the right codeset. ! * ! * On Windows, though, gettext() tends to get confused so we always bind ! * it. ! */ ! #ifndef WIN32 ! const char *ctype = setlocale(LC_CTYPE, NULL); ! if (pg_strcasecmp(ctype, "C") != 0 && pg_strcasecmp(ctype, "POSIX") != 0) ! return; ! #endif for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++) { if (pg_enc2gettext_tbl[i].encoding == encoding) { if (bind_textdomain_codeset(domainname, ! pg_enc2gettext_tbl[i].name) == NULL) elog(LOG, "bind_textdomain_codeset failed"); break; } } #endif } int GetDatabaseEncoding(void) { --- 881,982 ---- Assert(DatabaseEncoding->encoding == encoding); } void ! SetMessageEncoding(int encoding) { ! /* Some calls happen before we can elog()! */ ! Assert(PG_VALID_ENCODING(encoding)); ! MessageEncoding = &pg_enc2name_tbl[encoding]; ! Assert(MessageEncoding->encoding == encoding); ! } ! #ifdef ENABLE_NLS ! /* ! * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext ! * codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also ! * fail for gettext-internal causes like out-of-memory. ! */ ! static bool ! raw_pg_bind_textdomain_codeset(const char *domainname, int encoding) ! { ! bool elog_ok = (CurrentMemoryContext != NULL); ! int i; for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++) { if (pg_enc2gettext_tbl[i].encoding == encoding) { if (bind_textdomain_codeset(domainname, ! pg_enc2gettext_tbl[i].name) != NULL) ! return true; ! ! if (elog_ok) elog(LOG, "bind_textdomain_codeset failed"); + else + write_stderr("bind_textdomain_codeset failed"); + break; } } + + return false; + } + + /* + * Bind a gettext message domain to the codeset corresponding to the database + * encoding. For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE. + * Return the MessageEncoding implied by the new settings. + * + * On most platforms, gettext defaults to the codeset implied by LC_CTYPE. + * When that matches the database encoding, we don't need to do anything. In + * CREATE DATABASE, we enforce or trust that the locale's codeset matches the + * database encoding, except for the C locale. (On Windows, we also permit a + * discrepancy under the UTF8 encoding.) For the C locale, explicitly bind + * gettext to the right codeset. + * + * On Windows, gettext defaults to the Windows ANSI code page. This is a + * convenient departure for software that passes the strings to Windows ANSI + * APIs, but we don't do that. Compel gettext to use database encoding or, + * failing that, the LC_CTYPE encoding as it would on other platforms. + * + * This function is called before elog() and palloc() are usable. + */ + int + pg_bind_textdomain_codeset(const char *domainname) + { + bool elog_ok = (CurrentMemoryContext != NULL); + int encoding = GetDatabaseEncoding(); + int new_msgenc; + + #ifndef WIN32 + const char *ctype = setlocale(LC_CTYPE, NULL); + + if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0) #endif + if (encoding != PG_SQL_ASCII && + raw_pg_bind_textdomain_codeset(domainname, encoding)) + return encoding; + + new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok); + if (new_msgenc < 0) + new_msgenc = PG_SQL_ASCII; + + #ifdef WIN32 + if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc)) + /* On failure, the old message encoding remains valid. */ + return GetMessageEncoding(); + #endif + + return new_msgenc; } + #endif + /* + * The database encoding, also called the server encoding, represents the + * encoding of data stored in text-like data types. Affected types include + * cstring, text, varchar, name, xml, and json. + */ int GetDatabaseEncoding(void) { *************** *** 949,967 **** pg_client_encoding(PG_FUNCTION_ARGS) return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name)); } int ! GetPlatformEncoding(void) { ! if (PlatformEncoding == NULL) ! { ! /* try to determine encoding of server's environment locale */ ! int encoding = pg_get_encoding_from_locale("", true); ! ! if (encoding < 0) ! encoding = PG_SQL_ASCII; ! PlatformEncoding = &pg_enc2name_tbl[encoding]; ! } ! return PlatformEncoding->encoding; } #ifdef WIN32 --- 1005,1021 ---- return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name)); } + /* + * gettext() returns messages in this encoding. This often matches the + * database encoding, but it differs for SQL_ASCII databases, for processes + * not attached to a database, and under a database encoding lacking iconv + * support (MULE_INTERNAL). + */ int ! GetMessageEncoding(void) { ! Assert(MessageEncoding); ! return MessageEncoding->encoding; } #ifdef WIN32 *************** *** 971,983 **** GetPlatformEncoding(void) * is also passed to utf16len if not null. Returns NULL iff failed. */ WCHAR * ! pgwin32_toUTF16(const char *str, int len, int *utf16len) { WCHAR *utf16; int dstlen; UINT codepage; ! codepage = pg_enc2name_tbl[GetDatabaseEncoding()].codepage; /* * Use MultiByteToWideChar directly if there is a corresponding codepage, --- 1025,1037 ---- * is also passed to utf16len if not null. Returns NULL iff failed. */ WCHAR * ! pgwin32_message_to_UTF16(const char *str, int len, int *utf16len) { WCHAR *utf16; int dstlen; UINT codepage; ! codepage = pg_enc2name_tbl[GetMessageEncoding()].codepage; /* * Use MultiByteToWideChar directly if there is a corresponding codepage, *************** *** 994,1000 **** pgwin32_toUTF16(const char *str, int len, int *utf16len) char *utf8; utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str, ! len, GetDatabaseEncoding(), PG_UTF8); if (utf8 != str) len = strlen(utf8); --- 1048,1054 ---- char *utf8; utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str, ! len, GetMessageEncoding(), PG_UTF8); if (utf8 != str) len = strlen(utf8); *** a/src/include/mb/pg_wchar.h --- b/src/include/mb/pg_wchar.h *************** *** 481,488 **** extern const char *pg_get_client_encoding_name(void); extern void SetDatabaseEncoding(int encoding); extern int GetDatabaseEncoding(void); extern const char *GetDatabaseEncodingName(void); ! extern int GetPlatformEncoding(void); ! extern void pg_bind_textdomain_codeset(const char *domainname); extern int pg_valid_client_encoding(const char *name); extern int pg_valid_server_encoding(const char *name); --- 481,492 ---- extern void SetDatabaseEncoding(int encoding); extern int GetDatabaseEncoding(void); extern const char *GetDatabaseEncodingName(void); ! extern void SetMessageEncoding(int encoding); ! extern int GetMessageEncoding(void); ! ! #ifdef ENABLE_NLS ! extern int pg_bind_textdomain_codeset(const char *domainname); ! #endif extern int pg_valid_client_encoding(const char *name); extern int pg_valid_server_encoding(const char *name); *************** *** 542,548 **** extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p, extern bool pg_utf8_islegal(const unsigned char *source, int length); #ifdef WIN32 ! extern WCHAR *pgwin32_toUTF16(const char *str, int len, int *utf16len); #endif #endif /* PG_WCHAR_H */ --- 546,552 ---- extern bool pg_utf8_islegal(const unsigned char *source, int length); #ifdef WIN32 ! extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len); #endif #endif /* PG_WCHAR_H */ *** a/src/include/port.h --- b/src/include/port.h *************** *** 452,457 **** extern void qsort_arg(void *base, size_t nel, size_t elsize, --- 452,461 ---- /* port/chklocale.c */ extern int pg_get_encoding_from_locale(const char *ctype, bool write_message); + #if defined(WIN32) && !defined(FRONTEND) + extern int pg_codepage_to_encoding(UINT cp); + #endif + /* port/inet_net_ntop.c */ extern char *inet_net_ntop(int af, const void *src, int bits, char *dst, size_t size); *** a/src/port/chklocale.c --- b/src/port/chklocale.c *************** *** 235,240 **** win32_langinfo(const char *ctype) --- 235,266 ---- return r; } + + #ifndef FRONTEND + /* + * Given a Windows code page identifier, find the corresponding PostgreSQL + * encoding. Issue a warning and return -1 if none found. + */ + int + pg_codepage_to_encoding(UINT cp) + { + char sys[16]; + int i; + + sprintf(sys, "CP%u", cp); + + /* Check the table */ + for (i = 0; encoding_match_list[i].system_enc_name; i++) + if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0) + return encoding_match_list[i].pg_enc_code; + + ereport(WARNING, + (errmsg("could not determine encoding for codeset \"%s\"", sys), + errdetail("Please report this to ."))); + + return -1; + } + #endif #endif /* WIN32 */ #if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32) *************** *** 248,253 **** win32_langinfo(const char *ctype) --- 274,282 ---- * * If the result is PG_SQL_ASCII, callers should treat it as being compatible * with any desired encoding. + * + * If running in the backend and write_message is false, this function must + * cope with the possibility that elog() and palloc() are not yet usable. */ int pg_get_encoding_from_locale(const char *ctype, bool write_message)