resolution for bug 1010313

Lists: pgsql-odbc
From: Marcin Ligorowski <ligo(at)interia(dot)pl>
To: pgsql-odbc(at)postgresql(dot)org
Subject: resolution for bug 1010313
Date: 2008-07-29 07:36:26
Message-ID: 20080729073626.37DFD6D57E@f09.poczta.interia.pl
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-odbc


I've resolved bug number 1010313 (it was neccessary for me because I use psqlodbc driver
on Sun Solaris Sparc with UTF-8 characters).

Top resolve this bug I've added is_big_endian() that determines whether platform supports
big endian or little endian bytes order.
This method is also used becuase it is not possible to use any standard define,
that is "standard define" and works for different compiler and operating systems.

Additionaly, becuase I don't want to calculate every time in ucs2_to_utf8 function
platform endianness I've used two static variables, first big_endian stores endianness status
and second is_endian_calc determines whether endianness was calculated.

And finally, in ucs2_to_utf8 I've added swapping bytes for big_endian platform.

Below is source code that I've implemented
(I've send all ucs2_to_utf8 function source code)

int is_big_endian()
{
union { long l; char c[sizeof (long)]; } u;
u.l = 1;
return (u.c[sizeof (long) - 1] == 1);
}

char *ucs2_to_utf8(const SQLWCHAR *ucs2str, SQLLEN ilen, SQLLEN *olen, BOOL lower_identifier)
{
char * utf8str;
/*mylog("ucs2_to_utf8 %p ilen=%d ", ucs2str, ilen);*/

/*
* is this a bigendian machine ?
*/
static int is_endian_calc = 0;
static int big_endian = 0;
if(!is_endian_calc)
{
big_endian = is_big_endian();
is_endian_calc = 1;
}

if (!ucs2str)
{
*olen = SQL_NULL_DATA;
return NULL;
}
if (SQL_NTS == ilen)
ilen = ucs2strlen(ucs2str);
/*mylog(" newlen=%d", ilen);*/
utf8str = (char *) malloc(ilen * 4 + 1);
if (utf8str)
{
int i, len = 0;
union { UInt2 i; char c[sizeof (UInt2)]; } byte2code;
union { Int4 i; char c[sizeof (Int4)]; } byte4code, surrd1, surrd2;
const SQLWCHAR *wstr;

for (i = 0, wstr = ucs2str; i < ilen; i++, wstr++)
{
if (!*wstr)
break;
else if (0 == (*wstr & 0xffffff80)) /* ASCII */
{
if (lower_identifier)
utf8str[len++] = (char) tolower(*wstr);
else
utf8str[len++] = (char) *wstr;
}
else if ((*wstr & byte3check) == 0)
{
byte2code.i = byte2_base |
((byte2_mask1 & *wstr) >> 6) |
((byte2_mask2 & *wstr) << 8);
if(big_endian)
{
memcpy(utf8str + len, (char *) &byte2code.c[1], 1);
memcpy(utf8str + len + 1, (char *) &byte2code.c[0], 1);
}
else
{
memcpy(utf8str + len, (char *) &byte2code.i, sizeof(byte2code.i));
}
len += sizeof(byte2code.i);
}
/* surrogate pair check for non ucs-2 code */
else if (surrog1_bits == (*wstr & surrog_check))
{
surrd1.i = (*wstr & ~surrog_check) + surrogate_adjust;
wstr++;
i++;
surrd2.i = (*wstr & ~surrog_check);
byte4code.i = byte4_base |
((byte4_sr1_mask1 & surrd1.i) >> 8) |
((byte4_sr1_mask2 & surrd1.i) << 6) |
((byte4_sr1_mask3 & surrd1.i) << 20) |
((byte4_sr2_mask1 & surrd2.i) << 10) |
((byte4_sr2_mask2 & surrd2.i) << 24);
if(big_endian)
{
memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
memcpy(utf8str + len + 3, (char *) &byte2code.c[0], 1);
}
else
{
memcpy(utf8str + len, (char *) &byte4code.i, sizeof(byte4code.i));
}
len += sizeof(byte4code.i);
}
else
{
byte4code.i = byte3_base |
((byte3_mask1 & *wstr) >> 12) |
((byte3_mask2 & *wstr) << 2) |
((byte3_mask3 & *wstr) << 16);
if(big_endian)
{
memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
}
else
{
memcpy(utf8str + len, (char *) &byte4code.i, 3);
}
len += 3;
}
}
utf8str[len] = '\0';
if (olen)
*olen = len;
}
/*mylog(" olen=%d %s\n", *olen, utf8str ? utf8str : "");*/
return utf8str;
}

----------------------------------------------------------------------
W kosciele tez zdarzaja sie wpadki!
Smieszny filmik >>> http://link.interia.pl/f1e61


From: "Adam M" <gnuman1(at)gmail(dot)com>
To: "Marcin Ligorowski" <ligo(at)interia(dot)pl>
Cc: pgsql-odbc(at)postgresql(dot)org
Subject: Re: resolution for bug 1010313
Date: 2008-08-08 21:03:14
Message-ID: 84b37b360808081403k749196c1u314d2f1a00cd4e61@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-odbc

On Tue, Jul 29, 2008 at 2:36 AM, Marcin Ligorowski <ligo(at)interia(dot)pl> wrote:

> Below is source code that I've implemented
> (I've send all ucs2_to_utf8 function source code)
>

Is it possible to attach your changes as a patch? In-line posting of
some code changes (and in non-patch format) really is not an ideal way
to post changes.

Thanks,
Adam


From: Marcin Ligorowski <ligo(at)interia(dot)pl>
To: Adam M <gnuman1(at)gmail(dot)com>
Cc: pgsql-odbc(at)postgresql(dot)org
Subject: Re: resolution for bug 1010313
Date: 2008-08-19 18:35:36
Message-ID: 48AB1278.2000703@interia.pl
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-odbc

Bellow are changes in patch format obtained by using diff utility.

Marcin

--- win_unicode.c.org Sat Sep 1 01:40:10 2007
+++ win_unicode.c Fri Jul 25 12:52:06 2008
@@ -54,6 +54,13 @@

+int is_big_endian()
+{
+ union { long l; char c[sizeof (long)]; } u;
+ u.l = 1;
+ return (u.c[sizeof (long) - 1] == 1);
+}
+
SQLULEN ucs2strlen(const SQLWCHAR *ucs2str)
{
SQLULEN len;
@@ -66,6 +73,17 @@
char * utf8str;

+ static int is_endian_calc = 0;
+ static int big_endian = 0;
+ if(!is_endian_calc)
+ {
+ big_endian = is_big_endian();
+ is_endian_calc = 1;
+ }
+
if (!ucs2str)
{
*olen = SQL_NULL_DATA;
@@ -78,8 +96,8 @@
if (utf8str)
{
int i, len = 0;
- UInt2 byte2code;
- Int4 byte4code, surrd1, surrd2;
+ union { UInt2 i; char c[sizeof (UInt2)]; } byte2code;
+ union { Int4 i; char c[sizeof (Int4)]; } byte4code, surrd1, surrd2;
const SQLWCHAR *wstr;

for (i = 0, wstr = ucs2str; i < ilen; i++, wstr++)
@@ -95,35 +113,62 @@
}
else if ((*wstr & byte3check) == 0)
{
- byte2code = byte2_base |
+ byte2code.i = byte2_base |
((byte2_mask1 & *wstr) >> 6) |
((byte2_mask2 & *wstr) << 8);
- memcpy(utf8str + len, (char *) &byte2code, sizeof(byte2code));
- len += sizeof(byte2code);
+ if(big_endian)
+ {
+ memcpy(utf8str + len, (char *) &byte2code.c[1], 1);
+ memcpy(utf8str + len + 1, (char *) &byte2code.c[0], 1);
}
+ else
+ {
+ memcpy(utf8str + len, (char *) &byte2code.i,
sizeof(byte2code.i));
+ }
+ len += sizeof(byte2code.i);
+ }
else if (surrog1_bits == (*wstr & surrog_check))
{
- surrd1 = (*wstr & ~surrog_check) + surrogate_adjust;
+ surrd1.i = (*wstr & ~surrog_check) + surrogate_adjust;
wstr++;
i++;
- surrd2 = (*wstr & ~surrog_check);
- byte4code = byte4_base |
- ((byte4_sr1_mask1 & surrd1) >> 8) |
- ((byte4_sr1_mask2 & surrd1) << 6) |
- ((byte4_sr1_mask3 & surrd1) << 20) |
- ((byte4_sr2_mask1 & surrd2) << 10) |
- ((byte4_sr2_mask2 & surrd2) << 24);
- memcpy(utf8str + len, (char *) &byte4code, sizeof(byte4code));
- len += sizeof(byte4code);
+ surrd2.i = (*wstr & ~surrog_check);
+ byte4code.i = byte4_base |
+ ((byte4_sr1_mask1 & surrd1.i) >> 8) |
+ ((byte4_sr1_mask2 & surrd1.i) << 6) |
+ ((byte4_sr1_mask3 & surrd1.i) << 20) |
+ ((byte4_sr2_mask1 & surrd2.i) << 10) |
+ ((byte4_sr2_mask2 & surrd2.i) << 24);
+ if(big_endian)
+ {
+ memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
+ memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
+ memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
+ memcpy(utf8str + len + 3, (char *) &byte2code.c[0], 1);
}
else
{
- byte4code = byte3_base |
+ memcpy(utf8str + len, (char *) &byte4code.i,
sizeof(byte4code.i));
+ }
+ len += sizeof(byte4code.i);
+ }
+ else
+ {
+ byte4code.i = byte3_base |
((byte3_mask1 & *wstr) >> 12) |
((byte3_mask2 & *wstr) << 2) |
((byte3_mask3 & *wstr) << 16);
- memcpy(utf8str + len, (char *) &byte4code, 3);
+ if(big_endian)
+ {
+ memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
+ memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
+ memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
+ }
+ else
+ {
+ memcpy(utf8str + len, (char *) &byte4code.i, 3);
+ }
len += 3;
}
}

Adam M pisze:
> On Tue, Jul 29, 2008 at 2:36 AM, Marcin Ligorowski <ligo(at)interia(dot)pl> wrote:
>
>> Below is source code that I've implemented
>> (I've send all ucs2_to_utf8 function source code)
>>
>
> Is it possible to attach your changes as a patch? In-line posting of
> some code changes (and in non-patch format) really is not an ideal way
> to post changes.
>
> Thanks,
> Adam
>

----------------------------------------------------------------------
Igrzyska z nagrodami! Kliknij>>>> http://link.interia.pl/f1edb