Re: [PATCH] plpythonu datatype conversion improvements

Lists: pgsql-hackers
From: Caleb Welton <cwelton(at)greenplum(dot)com>
To: "pgsql-hackers(at)postgresql(dot)org" <pgsql-hackers(at)postgresql(dot)org>
Subject: [PATCH] plpythonu datatype conversion improvements
Date: 2009-05-26 23:07:33
Message-ID: C641C445.20F7%cwelton@greenplum.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

Patch for plpythonu

Primary motivation of the attached patch is to support handling bytea conversion allowing for embedded nulls, which in turn allows for supporting the marshal module.

Secondary motivation is slightly improved performance for conversion routines of basic datatypes that have simple mappings between postgres/python.

Primary design is to change the conversion routines from being based on cstrings to datums, eg:
PLyBool_FromString(const char *) => PLyBool_FromBool(PLyDatumToOb, Datum);

Thanks,
Caleb

-----

Index: plpython.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/pl/plpython/plpython.c,v
retrieving revision 1.120
diff -c -r1.120 plpython.c
*** plpython.c 3 Apr 2009 16:59:42 -0000 1.120
--- plpython.c 26 May 2009 22:58:52 -0000
***************
*** 78,84 ****
* objects.
*/

! typedef PyObject *(*PLyDatumToObFunc) (const char *);

typedef struct PLyDatumToOb
{
--- 78,85 ----
* objects.
*/

! struct PLyDatumToOb;
! typedef PyObject *(*PLyDatumToObFunc) (struct PLyDatumToOb*, Datum);

typedef struct PLyDatumToOb
{
***************
*** 104,111 ****
--- 105,120 ----
/* convert PyObject to a Postgresql Datum or tuple.
* output from Python
*/
+
+ struct PLyObToDatum;
+ struct PLyProcedure;
+ typedef Datum (*PLyObToDatumFunc) (struct PLyProcedure*,
+ struct PLyObToDatum*,
+ PyObject *, bool *isnull);
+
typedef struct PLyObToDatum
{
+ PLyObToDatumFunc func;
FmgrInfo typfunc; /* The type's input function */
Oid typoid; /* The OID of the type */
Oid typioparam;
***************
*** 255,270 ****
static void PLy_input_tuple_funcs(PLyTypeInfo *, TupleDesc);

/* conversion functions */
static PyObject *PLyDict_FromTuple(PLyTypeInfo *, HeapTuple, TupleDesc);
! static PyObject *PLyBool_FromString(const char *);
! static PyObject *PLyFloat_FromString(const char *);
! static PyObject *PLyInt_FromString(const char *);
! static PyObject *PLyLong_FromString(const char *);
! static PyObject *PLyString_FromString(const char *);
!
! static HeapTuple PLyMapping_ToTuple(PLyTypeInfo *, PyObject *);
! static HeapTuple PLySequence_ToTuple(PLyTypeInfo *, PyObject *);
! static HeapTuple PLyObject_ToTuple(PLyTypeInfo *, PyObject *);

/*
* Currently active plpython function
--- 264,295 ----
static void PLy_input_tuple_funcs(PLyTypeInfo *, TupleDesc);

/* conversion functions */
+ static PyObject *PLyBool_FromBool(PLyDatumToOb *arg, Datum d);
+ static PyObject *PLyFloat_FromFloat4(PLyDatumToOb *arg, Datum d);
+ static PyObject *PLyFloat_FromFloat8(PLyDatumToOb *arg, Datum d);
+ static PyObject *PLyFloat_FromNumeric(PLyDatumToOb *arg, Datum d);
+ static PyObject *PLyInt_FromInt16(PLyDatumToOb *arg, Datum d);
+ static PyObject *PLyInt_FromInt32(PLyDatumToOb *arg, Datum d);
+ static PyObject *PLyLong_FromInt64(PLyDatumToOb *arg, Datum d);
+ static PyObject *PLyString_FromText(PLyDatumToOb *arg, Datum d);
+ static PyObject *PLyString_FromDatum(PLyDatumToOb *arg, Datum d);
+
static PyObject *PLyDict_FromTuple(PLyTypeInfo *, HeapTuple, TupleDesc);
!
! static Datum PLyObject_ToVoid(PLyProcedure *, PLyObToDatum *,
! PyObject *, bool *isnull);
! static Datum PLyObject_ToBool(PLyProcedure *, PLyObToDatum *,
! PyObject *, bool *isnull);
! static Datum PLyObject_ToBytea(PLyProcedure *, PLyObToDatum *,
! PyObject *, bool *isnull);
! static Datum PLyObject_ToText(PLyProcedure *, PLyObToDatum *,
! PyObject *, bool *isnull);
! static Datum PLyObject_ToDatum(PLyProcedure *, PLyObToDatum *,
! PyObject *, bool *isnull);
!
! static HeapTuple PLyMapping_ToTuple(PLyProcedure *, PyObject *);
! static HeapTuple PLySequence_ToTuple(PLyProcedure *, PyObject *);
! static HeapTuple PLyObject_ToTuple(PLyProcedure *, PyObject *);

/*
* Currently active plpython function
***************
*** 507,514 ****

for (i = 0; i < natts; i++)
{
- char *src;
-
platt = PyList_GetItem(plkeys, i);
if (!PyString_Check(platt))
ereport(ERROR,
--- 532,537 ----
***************
*** 533,564 ****
modvalues[i] = (Datum) 0;
modnulls[i] = 'n';
}
! else if (plval != Py_None)
{
! plstr = PyObject_Str(plval);
! if (!plstr)
! PLy_elog(ERROR, "could not compute string representation of Python object in PL/Python function \"%s\" while modifying trigger row",
! proc->proname);
! src = PyString_AsString(plstr);
!
! modvalues[i] =
! InputFunctionCall(&proc->result.out.r.atts[atti].typfunc,
! src,
! proc->result.out.r.atts[atti].typioparam,
! tupdesc->attrs[atti]->atttypmod);
! modnulls[i] = ' ';
!
! Py_DECREF(plstr);
! plstr = NULL;
! }
! else
! {
! modvalues[i] =
! InputFunctionCall(&proc->result.out.r.atts[atti].typfunc,
! NULL,
! proc->result.out.r.atts[atti].typioparam,
! tupdesc->attrs[atti]->atttypmod);
! modnulls[i] = 'n';
}

Py_DECREF(plval);
--- 556,565 ----
modvalues[i] = (Datum) 0;
modnulls[i] = 'n';
}
! else
{
! PLyObToDatum *att = &proc->result.out.r.atts[atti];
! modvalues[i] = (att->func) (proc, att, plval, &modnulls[i]);
}

Py_DECREF(plval);
***************
*** 784,791 ****
Datum rv;
PyObject *volatile plargs = NULL;
PyObject *volatile plrv = NULL;
- PyObject *volatile plrv_so = NULL;
- char *plrv_sc;

PG_TRY();
{
--- 785,790 ----
***************
*** 862,868 ****

Py_XDECREF(plargs);
Py_XDECREF(plrv);
- Py_XDECREF(plrv_so);

PLy_function_delete_args(proc);

--- 861,866 ----
***************
*** 876,922 ****
}
}

! /*
! * If the function is declared to return void, the Python return value
! * must be None. For void-returning functions, we also treat a None
! * return value as a special "void datum" rather than NULL (as is the
! * case for non-void-returning functions).
! */
! if (proc->result.out.d.typoid == VOIDOID)
! {
! if (plrv != Py_None)
! ereport(ERROR,
! (errcode(ERRCODE_DATATYPE_MISMATCH),
! errmsg("PL/Python function with return type \"void\" did not return None")));
!
! fcinfo->isnull = false;
! rv = (Datum) 0;
! }
! else if (plrv == Py_None)
! {
! fcinfo->isnull = true;
! if (proc->result.is_rowtype < 1)
! rv = InputFunctionCall(&proc->result.out.d.typfunc,
! NULL,
! proc->result.out.d.typioparam,
! -1);
! else
! /* Tuple as None */
! rv = (Datum) NULL;
! }
! else if (proc->result.is_rowtype >= 1)
{
HeapTuple tuple = NULL;

! if (PySequence_Check(plrv))
/* composite type as sequence (tuple, list etc) */
! tuple = PLySequence_ToTuple(&proc->result, plrv);
else if (PyMapping_Check(plrv))
/* composite type as mapping (currently only dict) */
! tuple = PLyMapping_ToTuple(&proc->result, plrv);
else
/* returned as smth, must provide method __getattr__(name) */
! tuple = PLyObject_ToTuple(&proc->result, plrv);

if (tuple != NULL)
{
--- 874,895 ----
}
}

! /* Convert python return value into postgres datatypes */
! if (proc->result.is_rowtype >= 1)
{
HeapTuple tuple = NULL;

! if (plrv == Py_None)
! tuple = NULL;
! else if (PySequence_Check(plrv))
/* composite type as sequence (tuple, list etc) */
! tuple = PLySequence_ToTuple(proc, plrv);
else if (PyMapping_Check(plrv))
/* composite type as mapping (currently only dict) */
! tuple = PLyMapping_ToTuple(proc, plrv);
else
/* returned as smth, must provide method __getattr__(name) */
! tuple = PLyObject_ToTuple(proc, plrv);

if (tuple != NULL)
{
***************
*** 931,952 ****
}
else
{
! fcinfo->isnull = false;
! plrv_so = PyObject_Str(plrv);
! if (!plrv_so)
! PLy_elog(ERROR, "could not create string representation of Python object in PL/Python function \"%s\" while creating return value", proc->proname);
! plrv_sc = PyString_AsString(plrv_so);
! rv = InputFunctionCall(&proc->result.out.d.typfunc,
! plrv_sc,
! proc->result.out.d.typioparam,
! -1);
}
}
PG_CATCH();
{
Py_XDECREF(plargs);
Py_XDECREF(plrv);
- Py_XDECREF(plrv_so);

PG_RE_THROW();
}
--- 904,919 ----
}
else
{
! rv = (proc->result.out.d.func) (proc,
! &proc->result.out.d,
! plrv,
! &fcinfo->isnull);
}
}
PG_CATCH();
{
Py_XDECREF(plargs);
Py_XDECREF(plrv);

PG_RE_THROW();
}
***************
*** 954,960 ****

Py_XDECREF(plargs);
Py_DECREF(plrv);
- Py_XDECREF(plrv_so);

return rv;
}
--- 921,926 ----
***************
*** 1037,1048 ****
arg = NULL;
else
{
! char *ct;
!
! ct = OutputFunctionCall(&(proc->args[i].in.d.typfunc),
! fcinfo->arg[i]);
! arg = (proc->args[i].in.d.func) (ct);
! pfree(ct);
}
}

--- 1003,1010 ----
arg = NULL;
else
{
! arg = (proc->args[i].in.d.func) (&(proc->args[i].in.d),
! fcinfo->arg[i]);
}
}

***************
*** 1593,1598 ****
--- 1555,1589 ----
arg->typoid = HeapTupleGetOid(typeTup);
arg->typioparam = getTypeIOParam(typeTup);
arg->typbyval = typeStruct->typbyval;
+
+ /* Determine which kind of Python object we will convert to */
+ switch (arg->typoid)
+ {
+ case VOIDOID:
+ arg->func = PLyObject_ToVoid;
+ break;
+ case BOOLOID:
+ arg->func = PLyObject_ToBool;
+ break;
+ case BYTEAOID:
+ arg->func = PLyObject_ToBytea;
+ break;
+ case BPCHAROID:
+ case VARCHAROID:
+ case TEXTOID:
+ arg->func = PLyObject_ToText;
+ break;
+
+ case FLOAT4OID:
+ case FLOAT8OID:
+ case NUMERICOID:
+ case INT2OID:
+ case INT4OID:
+ case INT8OID:
+ default:
+ arg->func = PLyObject_ToDatum;
+ break;
+ }
}

static void
***************
*** 1619,1644 ****
switch (typeOid)
{
case BOOLOID:
! arg->func = PLyBool_FromString;
break;
case FLOAT4OID:
case FLOAT8OID:
case NUMERICOID:
! arg->func = PLyFloat_FromString;
break;
case INT2OID:
case INT4OID:
! arg->func = PLyInt_FromString;
break;
case INT8OID:
! arg->func = PLyLong_FromString;
break;
default:
! arg->func = PLyString_FromString;
break;
}
}

static void
PLy_typeinfo_init(PLyTypeInfo * arg)
{
--- 1610,1648 ----
switch (typeOid)
{
case BOOLOID:
! arg->func = PLyBool_FromBool;
break;
case FLOAT4OID:
+ arg->func = PLyFloat_FromFloat4;
+ break;
case FLOAT8OID:
+ arg->func = PLyFloat_FromFloat8;
+ break;
case NUMERICOID:
! arg->func = PLyFloat_FromNumeric;
break;
case INT2OID:
+ arg->func = PLyInt_FromInt16;
+ break;
case INT4OID:
! arg->func = PLyInt_FromInt32;
break;
case INT8OID:
! arg->func = PLyLong_FromInt64;
! break;
! case BPCHAROID:
! case VARCHAROID:
! case TEXTOID:
! case BYTEAOID:
! arg->func = PLyString_FromText;
break;
default:
! arg->func = PLyString_FromDatum;
break;
}
}

+
static void
PLy_typeinfo_init(PLyTypeInfo * arg)
{
***************
*** 1660,1716 ****
}
}

- /* assumes that a bool is always returned as a 't' or 'f' */
static PyObject *
! PLyBool_FromString(const char *src)
{
/*
* We would like to use Py_RETURN_TRUE and Py_RETURN_FALSE here for
* generating SQL from trigger functions, but those are only supported in
* Python >= 2.3, and we support older versions.
* http://docs.python.org/api/boolObjects.html
*/
! if (src[0] == 't')
return PyBool_FromLong(1);
! return PyBool_FromLong(0);
}

static PyObject *
! PLyFloat_FromString(const char *src)
{
! double v;
! char *eptr;

! errno = 0;
! v = strtod(src, &eptr);
! if (*eptr != '\0' || errno)
! return NULL;
! return PyFloat_FromDouble(v);
}

static PyObject *
! PLyInt_FromString(const char *src)
{
! long v;
! char *eptr;

! errno = 0;
! v = strtol(src, &eptr, 0);
! if (*eptr != '\0' || errno)
! return NULL;
! return PyInt_FromLong(v);
}

static PyObject *
! PLyLong_FromString(const char *src)
{
! return PyLong_FromString((char *) src, NULL, 0);
}

static PyObject *
! PLyString_FromString(const char *src)
{
! return PyString_FromString(src);
}

static PyObject *
--- 1664,1758 ----
}
}

static PyObject *
! PLyBool_FromBool(PLyDatumToOb *arg, Datum d)
{
+ bool x = DatumGetBool(d);
+ arg = 0; /* unused */
+
/*
* We would like to use Py_RETURN_TRUE and Py_RETURN_FALSE here for
* generating SQL from trigger functions, but those are only supported in
* Python >= 2.3, and we support older versions.
* http://docs.python.org/api/boolObjects.html
*/
! if (x)
return PyBool_FromLong(1);
! else
! return PyBool_FromLong(0);
}

static PyObject *
! PLyFloat_FromFloat4(PLyDatumToOb *arg, Datum d)
{
! arg = 0; /* unused */
! return PyFloat_FromDouble(DatumGetFloat4(d));
! }

! static PyObject *
! PLyFloat_FromFloat8(PLyDatumToOb *arg, Datum d)
! {
! arg = 0; /* unused */
! return PyFloat_FromDouble(DatumGetFloat8(d));
}

static PyObject *
! PLyFloat_FromNumeric(PLyDatumToOb *arg, Datum d)
{
! /*
! * Numeric is cast to a PyFloat:
! * This results in a loss of precision
! * Would it be better to cast to PyString?
! */
! Datum f = DirectFunctionCall1(numeric_float8, d);
! double x = DatumGetFloat8(f);
! arg = 0; /* unused */
! return PyFloat_FromDouble(x);
! }

! static PyObject *
! PLyInt_FromInt16(PLyDatumToOb *arg, Datum d)
! {
! arg = 0; /* unused */
! return PyInt_FromLong(DatumGetInt16(d));
}

static PyObject *
! PLyInt_FromInt32(PLyDatumToOb *arg, Datum d)
{
! arg = 0; /* unused */
! return PyInt_FromLong(DatumGetInt32(d));
}

static PyObject *
! PLyLong_FromInt64(PLyDatumToOb *arg, Datum d)
{
! arg = 0; /* unused */
!
! /* on 32 bit platforms "long" may be too small */
! if (sizeof(int64) > sizeof(long))
! return PyLong_FromLongLong(DatumGetInt64(d));
! else
! return PyLong_FromLong(DatumGetInt64(d));
! }
!
! static PyObject *
! PLyString_FromText(PLyDatumToOb *arg, Datum d)
! {
! text *txt = DatumGetTextP(d);
! char *str = VARDATA(txt);
! size_t size = VARSIZE(txt) - VARHDRSZ;
!
! return PyString_FromStringAndSize(str, size);
! }
!
! static PyObject *
! PLyString_FromDatum(PLyDatumToOb *arg, Datum d)
! {
! char *x = OutputFunctionCall(&arg->typfunc, d);
! PyObject *r = PyString_FromString(x);
! pfree(x);
! return r;
}

static PyObject *
***************
*** 1730,1737 ****
{
for (i = 0; i < info->in.r.natts; i++)
{
! char *key,
! *vsrc;
Datum vattr;
bool is_null;
PyObject *value;
--- 1772,1778 ----
{
for (i = 0; i < info->in.r.natts; i++)
{
! char *key;
Datum vattr;
bool is_null;
PyObject *value;
***************
*** 1746,1759 ****
PyDict_SetItemString(dict, key, Py_None);
else
{
! vsrc = OutputFunctionCall(&info->in.r.atts[i].typfunc,
! vattr);
!
! /*
! * no exceptions allowed
! */
! value = info->in.r.atts[i].func(vsrc);
! pfree(vsrc);
PyDict_SetItemString(dict, key, value);
Py_DECREF(value);
}
--- 1787,1793 ----
PyDict_SetItemString(dict, key, Py_None);
else
{
! value = (info->in.r.atts[i].func) (&info->in.r.atts[i], vattr);
PyDict_SetItemString(dict, key, value);
Py_DECREF(value);
}
***************
*** 1769,1777 ****
return dict;
}

static HeapTuple
! PLyMapping_ToTuple(PLyTypeInfo * info, PyObject * mapping)
{
TupleDesc desc;
HeapTuple tuple;
--- 1803,2017 ----
return dict;
}

+ static Datum
+ PLyObject_ToVoid(PLyProcedure *proc,
+ PLyObToDatum *arg,
+ PyObject *plrv,
+ bool *isnull)
+ {
+ /*
+ * If the function is declared to return void, the Python return value must
+ * be None. For void-returning functions, we also treat a None return value
+ * as a special "void datum" rather than NULL (as is the case for the
+ * non-void-returning functions).
+ */
+ if (plrv != Py_None)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("PL/Python function with return type \"void\" did not "
+ "return None")));
+
+ *isnull = false;
+ return (Datum) 0;
+ }
+
+ static Datum
+ PLyObject_ToBool(PLyProcedure *proc,
+ PLyObToDatum *arg,
+ PyObject *plrv,
+ bool *isnull)
+ {
+ bool rv;
+
+ if (plrv == Py_None)
+ {
+ *isnull = true;
+ return (Datum) 0;
+ }
+
+ rv = PyObject_IsTrue(plrv);
+ *isnull = false;
+ return BoolGetDatum(rv);
+ }
+
+
+ static Datum
+ PLyObject_ToBytea(PLyProcedure *proc,
+ PLyObToDatum *arg,
+ PyObject *plrv,
+ bool *isnull)
+ {
+ PyObject *volatile plrv_so = NULL;
+ Datum rv;
+
+ if (plrv == Py_None)
+ {
+ *isnull = true;
+ return (Datum) 0;
+ }
+
+ plrv_so = PyObject_Str(plrv);
+ if (!plrv_so)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("could not create string representation of Python "
+ "object in PL/Python function \"%s\" while creating "
+ "return value", proc->proname)));
+ }
+
+ PG_TRY();
+ {
+ char *plrv_sc = PyString_AsString(plrv_so);
+ size_t len = PyString_Size(plrv_so);
+ size_t size = len + VARHDRSZ;
+ bytea *result = (bytea*) palloc(size);
+
+ SET_VARSIZE(result, size);
+ memcpy(VARDATA(result), plrv_sc, len);
+ rv = PointerGetDatum(result);
+ }
+ PG_CATCH();
+ {
+ Py_XDECREF(plrv_so);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ Py_XDECREF(plrv_so);
+
+ *isnull = false;
+ return rv;
+ }
+
+ static Datum
+ PLyObject_ToText(PLyProcedure *proc,
+ PLyObToDatum *arg,
+ PyObject *plrv,
+ bool *isnull)
+ {
+ PyObject *volatile plrv_so = NULL;
+ Datum rv;
+
+ if (plrv == Py_None)
+ {
+ *isnull = true;
+ return (Datum) 0;
+ }
+
+ plrv_so = PyObject_Str(plrv);
+ if (!plrv_so)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("could not create string representation of Python "
+ "object in PL/Python function \"%s\" while creating "
+ "return value", proc->proname)));
+ }
+
+ PG_TRY();
+ {
+ char *plrv_sc = PyString_AsString(plrv_so);
+ size_t len = PyString_Size(plrv_so);
+ size_t size = len + VARHDRSZ;
+ text *result;
+
+ if (strlen(plrv_sc) != (size_t) len)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("PL/Python function \"%s\" could not convert "
+ "Python object into text: expected string without "
+ "null bytes", proc->proname)));
+ }
+
+ result = (bytea*) palloc(size);
+ SET_VARSIZE(result, size);
+ memcpy(VARDATA(result), plrv_sc, len);
+ rv = PointerGetDatum(result);
+ }
+ PG_CATCH();
+ {
+ Py_XDECREF(plrv_so);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ Py_XDECREF(plrv_so);
+
+ *isnull = false;
+ return rv;
+ }
+
+ /*
+ * Generic conversion function:
+ * - Cast PyObject to cstring and cstring into postgres type.
+ */
+ static Datum
+ PLyObject_ToDatum(PLyProcedure *proc,
+ PLyObToDatum *arg,
+ PyObject *plrv,
+ bool *isnull)
+ {
+ PyObject *volatile plrv_so = NULL;
+ Datum rv;
+
+ if (plrv == Py_None)
+ {
+ *isnull = true;
+ return (Datum) 0;
+ }
+
+ plrv_so = PyObject_Str(plrv);
+ if (!plrv_so)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("could not create string representation of Python "
+ "object in PL/Python function \"%s\" while creating "
+ "return value", proc->proname)));
+ }
+
+ PG_TRY();
+ {
+ char *plrv_sc = PyString_AsString(plrv_so);
+ size_t len = PyString_Size(plrv_so);
+
+ if (strlen(plrv_sc) != (size_t) len)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("PL/Python function \"%s\" could not convert "
+ "Python object into cstring: expected string without "
+ "null bytes", proc->proname)));
+ }
+ rv = InputFunctionCall(&arg->typfunc, plrv_sc, arg->typioparam, -1);
+ }
+ PG_CATCH();
+ {
+ Py_XDECREF(plrv_so);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ Py_XDECREF(plrv_so);
+
+ *isnull = false;
+ return rv;
+ }

static HeapTuple
! PLyMapping_ToTuple(PLyProcedure *proc, PyObject *mapping)
{
TupleDesc desc;
HeapTuple tuple;
***************
*** 1781,1840 ****

Assert(PyMapping_Check(mapping));

! desc = lookup_rowtype_tupdesc(info->out.d.typoid, -1);
! if (info->is_rowtype == 2)
! PLy_output_tuple_funcs(info, desc);
! Assert(info->is_rowtype == 1);

/* Build tuple */
values = palloc(sizeof(Datum) * desc->natts);
nulls = palloc(sizeof(bool) * desc->natts);
for (i = 0; i < desc->natts; ++i)
{
! char *key;
! PyObject *volatile value,
! *volatile so;

key = NameStr(desc->attrs[i]->attname);
! value = so = NULL;
PG_TRY();
{
value = PyMapping_GetItemString(mapping, key);
! if (value == Py_None)
{
- values[i] = (Datum) NULL;
- nulls[i] = true;
- }
- else if (value)
- {
- char *valuestr;
-
- so = PyObject_Str(value);
- if (so == NULL)
- PLy_elog(ERROR, "could not compute string representation of Python object");
- valuestr = PyString_AsString(so);
-
- values[i] = InputFunctionCall(&info->out.r.atts[i].typfunc
- ,valuestr
- ,info->out.r.atts[i].typioparam
- ,-1);
- Py_DECREF(so);
- so = NULL;
- nulls[i] = false;
- }
- else
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("key \"%s\" not found in mapping", key),
errhint("To return null in a column, "
! "add the value None to the mapping with the key named after the column.")));

Py_XDECREF(value);
value = NULL;
}
PG_CATCH();
{
- Py_XDECREF(so);
Py_XDECREF(value);
PG_RE_THROW();
}
--- 2021,2062 ----

Assert(PyMapping_Check(mapping));

! desc = lookup_rowtype_tupdesc(proc->result.out.d.typoid, -1);
! if (proc->result.is_rowtype == 2)
! PLy_output_tuple_funcs(&proc->result, desc);
! Assert(proc->result.is_rowtype == 1);

/* Build tuple */
values = palloc(sizeof(Datum) * desc->natts);
nulls = palloc(sizeof(bool) * desc->natts);
for (i = 0; i < desc->natts; ++i)
{
! char *key;
! PLyObToDatum *att;
! PyObject *volatile value;

+ att = &proc->result.out.r.atts[i];
key = NameStr(desc->attrs[i]->attname);
! value = NULL;
PG_TRY();
{
value = PyMapping_GetItemString(mapping, key);
! if (!value)
{
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("key \"%s\" not found in mapping", key),
errhint("To return null in a column, "
! "add the value None to the mapping with the "
! "key named after the column.")));
! }
! values[i] = (att->func) (proc, att, value, &nulls[i]);

Py_XDECREF(value);
value = NULL;
}
PG_CATCH();
{
Py_XDECREF(value);
PG_RE_THROW();
}
***************
*** 1851,1857 ****

static HeapTuple
! PLySequence_ToTuple(PLyTypeInfo * info, PyObject * sequence)
{
TupleDesc desc;
HeapTuple tuple;
--- 2073,2079 ----

static HeapTuple
! PLySequence_ToTuple(PLyProcedure *proc, PyObject *sequence)
{
TupleDesc desc;
HeapTuple tuple;
***************
*** 1866,1922 ****
* can ignore exceeding items or assume missing ones as null but to avoid
* plpython developer's errors we are strict here
*/
! desc = lookup_rowtype_tupdesc(info->out.d.typoid, -1);
if (PySequence_Length(sequence) != desc->natts)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("length of returned sequence did not match number of columns in row")));

! if (info->is_rowtype == 2)
! PLy_output_tuple_funcs(info, desc);
! Assert(info->is_rowtype == 1);

/* Build tuple */
values = palloc(sizeof(Datum) * desc->natts);
nulls = palloc(sizeof(bool) * desc->natts);
for (i = 0; i < desc->natts; ++i)
{
! PyObject *volatile value,
! *volatile so;

! value = so = NULL;
PG_TRY();
{
value = PySequence_GetItem(sequence, i);
Assert(value);
! if (value == Py_None)
! {
! values[i] = (Datum) NULL;
! nulls[i] = true;
! }
! else if (value)
! {
! char *valuestr;
!
! so = PyObject_Str(value);
! if (so == NULL)
! PLy_elog(ERROR, "could not compute string representation of Python object");
! valuestr = PyString_AsString(so);
! values[i] = InputFunctionCall(&info->out.r.atts[i].typfunc
! ,valuestr
! ,info->out.r.atts[i].typioparam
! ,-1);
! Py_DECREF(so);
! so = NULL;
! nulls[i] = false;
! }

Py_XDECREF(value);
value = NULL;
}
PG_CATCH();
{
- Py_XDECREF(so);
Py_XDECREF(value);
PG_RE_THROW();
}
--- 2088,2124 ----
* can ignore exceeding items or assume missing ones as null but to avoid
* plpython developer's errors we are strict here
*/
! desc = lookup_rowtype_tupdesc(proc->result.out.d.typoid, -1);
if (PySequence_Length(sequence) != desc->natts)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("length of returned sequence did not match number of columns in row")));

! if (proc->result.is_rowtype == 2)
! PLy_output_tuple_funcs(&proc->result, desc);
! Assert(proc->result.is_rowtype == 1);

/* Build tuple */
values = palloc(sizeof(Datum) * desc->natts);
nulls = palloc(sizeof(bool) * desc->natts);
for (i = 0; i < desc->natts; ++i)
{
! PLyObToDatum *att;
! PyObject *volatile value;

! att = &proc->result.out.r.atts[i];
! value = NULL;
PG_TRY();
{
value = PySequence_GetItem(sequence, i);
Assert(value);
! values[i] = (att->func) (proc, att, value, &nulls[i]);

Py_XDECREF(value);
value = NULL;
}
PG_CATCH();
{
Py_XDECREF(value);
PG_RE_THROW();
}
***************
*** 1933,1939 ****

static HeapTuple
! PLyObject_ToTuple(PLyTypeInfo * info, PyObject * object)
{
TupleDesc desc;
HeapTuple tuple;
--- 2135,2141 ----

static HeapTuple
! PLyObject_ToTuple(PLyProcedure *proc, PyObject *object)
{
TupleDesc desc;
HeapTuple tuple;
***************
*** 1941,1962 ****
bool *nulls;
volatile int i;

! desc = lookup_rowtype_tupdesc(info->out.d.typoid, -1);
! if (info->is_rowtype == 2)
! PLy_output_tuple_funcs(info, desc);
! Assert(info->is_rowtype == 1);

/* Build tuple */
values = palloc(sizeof(Datum) * desc->natts);
nulls = palloc(sizeof(bool) * desc->natts);
for (i = 0; i < desc->natts; ++i)
{
! char *key;
! PyObject *volatile value,
! *volatile so;

key = NameStr(desc->attrs[i]->attname);
! value = so = NULL;
PG_TRY();
{
value = PyObject_GetAttrString(object, key);
--- 2143,2165 ----
bool *nulls;
volatile int i;

! desc = lookup_rowtype_tupdesc(proc->result.out.d.typoid, -1);
! if (proc->result.is_rowtype == 2)
! PLy_output_tuple_funcs(&proc->result, desc);
! Assert(proc->result.is_rowtype == 1);

/* Build tuple */
values = palloc(sizeof(Datum) * desc->natts);
nulls = palloc(sizeof(bool) * desc->natts);
for (i = 0; i < desc->natts; ++i)
{
! char *key;
! PLyObToDatum *att;
! PyObject *volatile value;

+ att = &proc->result.out.r.atts[i];
key = NameStr(desc->attrs[i]->attname);
! value = NULL;
PG_TRY();
{
value = PyObject_GetAttrString(object, key);
***************
*** 1965,2000 ****
values[i] = (Datum) NULL;
nulls[i] = true;
}
! else if (value)
{
- char *valuestr;
-
- so = PyObject_Str(value);
- if (so == NULL)
- PLy_elog(ERROR, "could not compute string representation of Python object");
- valuestr = PyString_AsString(so);
- values[i] = InputFunctionCall(&info->out.r.atts[i].typfunc
- ,valuestr
- ,info->out.r.atts[i].typioparam
- ,-1);
- Py_DECREF(so);
- so = NULL;
- nulls[i] = false;
- }
- else
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
! errmsg("attribute \"%s\" does not exist in Python object", key),
errhint("To return null in a column, "
! "let the returned object have an attribute named "
! "after column with value None.")));

Py_XDECREF(value);
value = NULL;
}
PG_CATCH();
{
- Py_XDECREF(so);
Py_XDECREF(value);
PG_RE_THROW();
}
--- 2168,2190 ----
values[i] = (Datum) NULL;
nulls[i] = true;
}
! else if (!value)
{
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
! errmsg("key \"%s\" not found in object", key),
errhint("To return null in a column, "
! "add the value None to the mapping with the "
! "key named after the column.")));
! }
! else
! values[i] = (att->func) (proc, att, value, &nulls[i]);

Py_XDECREF(value);
value = NULL;
}
PG_CATCH();
{
Py_XDECREF(value);
PG_RE_THROW();
}
Index: expected/plpython_function.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/pl/plpython/expected/plpython_function.out,v
retrieving revision 1.12
diff -c -r1.12 plpython_function.out
*** expected/plpython_function.out 3 Apr 2009 16:59:42 -0000 1.12
--- expected/plpython_function.out 26 May 2009 22:58:52 -0000
***************
*** 450,452 ****
--- 450,470 ----
CREATE FUNCTION test_inout_params(first inout text) AS $$
return first + '_inout';
$$ LANGUAGE plpythonu;
+ CREATE FUNCTION test_type_conversion_bool(x bool) returns bool AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_char(x char) returns char AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_int2(x int2) returns int2 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_int4(x int4) returns int4 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_int8(x int8) returns int8 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_float4(x float4) returns float4 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_float8(x float8) returns float8 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_numeric(x numeric) returns numeric AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_text(x text) returns text AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_bytea(x bytea) returns bytea AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_marshal() returns bytea AS $$
+ import marshal
+ return marshal.dumps('hello world')
+ $$ language plpythonu;
+ CREATE FUNCTION test_type_unmarshal(x bytea) returns text AS $$
+ import marshal
+ return marshal.loads(x)
+ $$ language plpythonu;
Index: expected/plpython_test.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/pl/plpython/expected/plpython_test.out,v
retrieving revision 1.8
diff -c -r1.8 plpython_test.out
*** expected/plpython_test.out 3 Apr 2009 16:59:42 -0000 1.8
--- expected/plpython_test.out 26 May 2009 22:58:52 -0000
***************
*** 559,561 ****
--- 559,693 ----
test_in_inout
(1 row)

+ SELECT * FROM test_type_conversion_bool(true);
+ test_type_conversion_bool
+ ---------------------------
+ t
+ (1 row)
+
+ SELECT * FROM test_type_conversion_bool(false);
+ test_type_conversion_bool
+ ---------------------------
+ f
+ (1 row)
+
+ SELECT * FROM test_type_conversion_bool(null);
+ test_type_conversion_bool
+ ---------------------------
+
+ (1 row)
+
+ SELECT * FROM test_type_conversion_char('a');
+ test_type_conversion_char
+ ---------------------------
+ a
+ (1 row)
+
+ SELECT * FROM test_type_conversion_char(null);
+ test_type_conversion_char
+ ---------------------------
+
+ (1 row)
+
+ SELECT * FROM test_type_conversion_int2(100::int2);
+ test_type_conversion_int2
+ ---------------------------
+ 100
+ (1 row)
+
+ SELECT * FROM test_type_conversion_int2(null);
+ test_type_conversion_int2
+ ---------------------------
+
+ (1 row)
+
+ SELECT * FROM test_type_conversion_int4(100);
+ test_type_conversion_int4
+ ---------------------------
+ 100
+ (1 row)
+
+ SELECT * FROM test_type_conversion_int4(null);
+ test_type_conversion_int4
+ ---------------------------
+
+ (1 row)
+
+ SELECT * FROM test_type_conversion_int8(100);
+ test_type_conversion_int8
+ ---------------------------
+ 100
+ (1 row)
+
+ SELECT * FROM test_type_conversion_int8(null);
+ test_type_conversion_int8
+ ---------------------------
+
+ (1 row)
+
+ SELECT * FROM test_type_conversion_float4(100);
+ test_type_conversion_float4
+ -----------------------------
+ 100
+ (1 row)
+
+ SELECT * FROM test_type_conversion_float4(null);
+ test_type_conversion_float4
+ -----------------------------
+
+ (1 row)
+
+ SELECT * FROM test_type_conversion_float8(100);
+ test_type_conversion_float8
+ -----------------------------
+ 100
+ (1 row)
+
+ SELECT * FROM test_type_conversion_float8(null);
+ test_type_conversion_float8
+ -----------------------------
+
+ (1 row)
+
+ SELECT * FROM test_type_conversion_numeric(100);
+ test_type_conversion_numeric
+ ------------------------------
+ 100.0
+ (1 row)
+
+ SELECT * FROM test_type_conversion_numeric(null);
+ test_type_conversion_numeric
+ ------------------------------
+
+ (1 row)
+
+ SELECT * FROM test_type_conversion_text('hello world');
+ test_type_conversion_text
+ ---------------------------
+ hello world
+ (1 row)
+
+ SELECT * FROM test_type_conversion_text(null);
+ test_type_conversion_text
+ ---------------------------
+
+ (1 row)
+
+ SELECT * FROM test_type_conversion_bytea('hello world');
+ test_type_conversion_bytea
+ ----------------------------
+ hello world
+ (1 row)
+
+ SELECT * FROM test_type_conversion_bytea(null);
+ test_type_conversion_bytea
+ ----------------------------
+
+ (1 row)
+
+ SELECT test_type_unmarshal(x) FROM test_type_marshal() x;
+ test_type_unmarshal
+ ---------------------
+ hello world
+ (1 row)
+
Index: sql/plpython_function.sql
===================================================================
RCS file: /projects/cvsroot/pgsql/src/pl/plpython/sql/plpython_function.sql,v
retrieving revision 1.12
diff -c -r1.12 plpython_function.sql
*** sql/plpython_function.sql 3 Apr 2009 16:59:43 -0000 1.12
--- sql/plpython_function.sql 26 May 2009 22:58:52 -0000
***************
*** 497,499 ****
--- 497,518 ----
CREATE FUNCTION test_inout_params(first inout text) AS $$
return first + '_inout';
$$ LANGUAGE plpythonu;
+
+ CREATE FUNCTION test_type_conversion_bool(x bool) returns bool AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_char(x char) returns char AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_int2(x int2) returns int2 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_int4(x int4) returns int4 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_int8(x int8) returns int8 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_float4(x float4) returns float4 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_float8(x float8) returns float8 AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_numeric(x numeric) returns numeric AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_text(x text) returns text AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_conversion_bytea(x bytea) returns bytea AS $$ return x $$ language plpythonu;
+ CREATE FUNCTION test_type_marshal() returns bytea AS $$
+ import marshal
+ return marshal.dumps('hello world')
+ $$ language plpythonu;
+ CREATE FUNCTION test_type_unmarshal(x bytea) returns text AS $$
+ import marshal
+ return marshal.loads(x)
+ $$ language plpythonu;
Index: sql/plpython_test.sql
===================================================================
RCS file: /projects/cvsroot/pgsql/src/pl/plpython/sql/plpython_test.sql,v
retrieving revision 1.5
diff -c -r1.5 plpython_test.sql
*** sql/plpython_test.sql 3 Apr 2009 16:59:43 -0000 1.5
--- sql/plpython_test.sql 26 May 2009 22:58:52 -0000
***************
*** 149,151 ****
--- 149,176 ----
-- this doesn't work yet :-(
SELECT * FROM test_in_out_params_multi('test_in');
SELECT * FROM test_inout_params('test_in');
+
+ SELECT * FROM test_type_conversion_bool(true);
+ SELECT * FROM test_type_conversion_bool(false);
+ SELECT * FROM test_type_conversion_bool(null);
+ SELECT * FROM test_type_conversion_char('a');
+ SELECT * FROM test_type_conversion_char(null);
+ SELECT * FROM test_type_conversion_int2(100::int2);
+ SELECT * FROM test_type_conversion_int2(null);
+ SELECT * FROM test_type_conversion_int4(100);
+ SELECT * FROM test_type_conversion_int4(null);
+ SELECT * FROM test_type_conversion_int8(100);
+ SELECT * FROM test_type_conversion_int8(null);
+ SELECT * FROM test_type_conversion_float4(100);
+ SELECT * FROM test_type_conversion_float4(null);
+ SELECT * FROM test_type_conversion_float8(100);
+ SELECT * FROM test_type_conversion_float8(null);
+ SELECT * FROM test_type_conversion_numeric(100);
+ SELECT * FROM test_type_conversion_numeric(null);
+ SELECT * FROM test_type_conversion_text('hello world');
+ SELECT * FROM test_type_conversion_text(null);
+ SELECT * FROM test_type_conversion_bytea('hello world');
+ SELECT * FROM test_type_conversion_bytea(null);
+ SELECT test_type_unmarshal(x) FROM test_type_marshal() x;
+
+


From: Peter Eisentraut <peter_e(at)gmx(dot)net>
To: pgsql-hackers(at)postgresql(dot)org
Cc: Caleb Welton <cwelton(at)greenplum(dot)com>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-05-27 10:11:36
Message-ID: 200905271311.37085.peter_e@gmx.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On Wednesday 27 May 2009 02:07:33 Caleb Welton wrote:
> Patch for plpythonu
>
> Primary motivation of the attached patch is to support handling bytea
> conversion allowing for embedded nulls, which in turn allows for supporting
> the marshal module.
>
> Secondary motivation is slightly improved performance for conversion
> routines of basic datatypes that have simple mappings between
> postgres/python.
>
> Primary design is to change the conversion routines from being based on
> cstrings to datums, eg: PLyBool_FromString(const char *) =>
> PLyBool_FromBool(PLyDatumToOb, Datum);

Makes sense; please add it to the next commit fest.

Are there any compatibility implications, that is, do any of the conversions
work differently from before (except when they were broken before, as in the
case of bytea)?


From: Caleb Welton <cwelton(at)greenplum(dot)com>
To: Peter Eisentraut <peter_e(at)gmx(dot)net>, "pgsql-hackers(at)postgresql(dot)org" <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-05-27 18:53:31
Message-ID: C642DA3B.2117%cwelton@greenplum.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

All data types should map to the same python object types as they did before, so int32->PyInt, int64->PyLong, numeric->PyFloat, etc.

The conversion routines are slightly different, eg int32 is initialized via PyInt_FromLong() instead of first converting the integer to a string then calling PyInt_FromString, this is a little faster, but shouldn't result in differences, if anything this should be more correct.

Previously numeric->string->PyFloat_FromString, now numeric->double->PyFloat_FromDouble, which makes use of postgres numeric->double routines rather than python string->double routines, and it is conceivable that there are precision variations between the two. My own feeling on the matter is that PyFloat is the wrong mapping for numeric, but I didn't want to muddy this patch by changing that.

The main compatibility issue is with Python Strings that contain embedded nulls. Conversion to bytea will now work correctly and build the bytea using PyString_FromStringAndSize instead of PyString_FromString. Other datatypes will now error if the PyString contains embedded nulls, previously they would silently truncate.

Thanks for the comments,
Caleb

On 5/27/09 3:11 AM, "Peter Eisentraut" <peter_e(at)gmx(dot)net> wrote:

On Wednesday 27 May 2009 02:07:33 Caleb Welton wrote:
> Patch for plpythonu
>
> Primary motivation of the attached patch is to support handling bytea
> conversion allowing for embedded nulls, which in turn allows for supporting
> the marshal module.
>
> Secondary motivation is slightly improved performance for conversion
> routines of basic datatypes that have simple mappings between
> postgres/python.
>
> Primary design is to change the conversion routines from being based on
> cstrings to datums, eg: PLyBool_FromString(const char *) =>
> PLyBool_FromBool(PLyDatumToOb, Datum);

Makes sense; please add it to the next commit fest.

Are there any compatibility implications, that is, do any of the conversions
work differently from before (except when they were broken before, as in the
case of bytea)?


From: Peter Eisentraut <peter_e(at)gmx(dot)net>
To: pgsql-hackers(at)postgresql(dot)org
Cc: Caleb Welton <cwelton(at)greenplum(dot)com>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-05-27 21:00:21
Message-ID: 200905280000.21713.peter_e@gmx.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On Wednesday 27 May 2009 21:53:31 Caleb Welton wrote:
> Previously numeric->string->PyFloat_FromString, now
> numeric->double->PyFloat_FromDouble, which makes use of postgres
> numeric->double routines rather than python string->double routines, and it
> is conceivable that there are precision variations between the two. My own
> feeling on the matter is that PyFloat is the wrong mapping for numeric, but
> I didn't want to muddy this patch by changing that.

Yeah, that one had me wondering for a while as well, but as you say it is
better to address that separately.


From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Peter Eisentraut <peter_e(at)gmx(dot)net>
Cc: pgsql-hackers(at)postgresql(dot)org, Caleb Welton <cwelton(at)greenplum(dot)com>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-05-27 21:07:27
Message-ID: 18095.1243458447@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

Peter Eisentraut <peter_e(at)gmx(dot)net> writes:
> On Wednesday 27 May 2009 21:53:31 Caleb Welton wrote:
>> ... My own
>> feeling on the matter is that PyFloat is the wrong mapping for numeric, but
>> I didn't want to muddy this patch by changing that.

> Yeah, that one had me wondering for a while as well, but as you say it is
> better to address that separately.

That was making me itch as well, in my very cursory look at the patch.
Does Python have a saner mapping for it?

regards, tom lane


From: Caleb Welton <cwelton(at)greenplum(dot)com>
To: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Peter Eisentraut <peter_e(at)gmx(dot)net>
Cc: "pgsql-hackers(at)postgresql(dot)org" <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-05-27 21:25:58
Message-ID: C642FDF6.2132%cwelton@greenplum.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

Yes, in Python >= 2.4 there is the Decimal datatype.

However, unlike the other mappings employed by plpythonu, Decimal requires an import statement to be in scope.

-Caleb

On 5/27/09 2:07 PM, "Tom Lane" <tgl(at)sss(dot)pgh(dot)pa(dot)us> wrote:

Peter Eisentraut <peter_e(at)gmx(dot)net> writes:
> On Wednesday 27 May 2009 21:53:31 Caleb Welton wrote:
>> ... My own
>> feeling on the matter is that PyFloat is the wrong mapping for numeric, but
>> I didn't want to muddy this patch by changing that.

> Yeah, that one had me wondering for a while as well, but as you say it is
> better to address that separately.

That was making me itch as well, in my very cursory look at the patch.
Does Python have a saner mapping for it?

regards, tom lane


From: Peter Eisentraut <peter_e(at)gmx(dot)net>
To: pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-07-16 14:16:45
Message-ID: 200907161716.45118.peter_e@gmx.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On Wednesday 27 May 2009 02:07:33 Caleb Welton wrote:
> Patch for plpythonu

This patch doesn't apply; I think it got mangled during email transport.
(Tabs changed to spaces, it looks like.) Could you resend the patch as a
separate attachment in a way that it doesn't get mangled?


From: Caleb Welton <cwelton(at)greenplum(dot)com>
To: Peter Eisentraut <peter_e(at)gmx(dot)net>, "pgsql-hackers(at)postgresql(dot)org" <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-07-16 18:17:10
Message-ID: C684BCB8.2AC8%cwelton@greenplum.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

Sorry about that. Here it is again as an attachment.

-Caleb

On 7/16/09 7:16 AM, "Peter Eisentraut" <peter_e(at)gmx(dot)net> wrote:

On Wednesday 27 May 2009 02:07:33 Caleb Welton wrote:
> Patch for plpythonu

This patch doesn't apply; I think it got mangled during email transport.
(Tabs changed to spaces, it looks like.) Could you resend the patch as a
separate attachment in a way that it doesn't get mangled?

Attachment Content-Type Size
plpython_bytea.patch application/octet-stream 37.3 KB

From: Peter Eisentraut <peter_e(at)gmx(dot)net>
To: Caleb Welton <cwelton(at)greenplum(dot)com>
Cc: pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-15 23:44:20
Message-ID: 1250379860.18992.13.camel@vanquo.pezone.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On tis, 2009-05-26 at 16:07 -0700, Caleb Welton wrote:
> Patch for plpythonu
>
> Primary motivation of the attached patch is to support handling bytea
> conversion allowing for embedded nulls, which in turn allows for
> supporting the marshal module.
>
> Secondary motivation is slightly improved performance for conversion
> routines of basic datatypes that have simple mappings between
> postgres/python.
>
> Primary design is to change the conversion routines from being based
> on cstrings to datums, eg:
> PLyBool_FromString(const char *) =>
> PLyBool_FromBool(PLyDatumToOb, Datum);

I have reworked this patch a bit and extended the plpython test suite
around it. Current copy attached.

The remaining problem is that the patch loses domain checking on the
return types, because some paths no longer go through the data type's
input function. I have marked these places as FIXME, and the regression
tests also contain a failing test case for this.

What's needed here, I think, is an API that takes a datum plus type
information and checks whether the datum is valid within the domain. I
haven't found one that is exported, but maybe someone could give a tip.

Attachment Content-Type Size
plpython-datatypes.patch text/x-patch 31.7 KB

From: Pierre Frédéric Caillaud <lists(at)peufeu(dot)com>
To: "Peter Eisentraut" <peter_e(at)gmx(dot)net>, "Caleb Welton" <cwelton(at)greenplum(dot)com>
Cc: pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-16 12:35:40
Message-ID: op.uyq89qqfcke6l8@soyouz
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers


>> Primary motivation of the attached patch is to support handling bytea
>> conversion allowing for embedded nulls, which in turn allows for
>> supporting the marshal module.
>>
>> Secondary motivation is slightly improved performance for conversion
>> routines of basic datatypes that have simple mappings between
>> postgres/python.
>>
>> Primary design is to change the conversion routines from being based
>> on cstrings to datums, eg:
>> PLyBool_FromString(const char *) =>
>> PLyBool_FromBool(PLyDatumToOb, Datum);
>
> I have reworked this patch a bit and extended the plpython test suite
> around it. Current copy attached.
>
> The remaining problem is that the patch loses domain checking on the
> return types, because some paths no longer go through the data type's
> input function. I have marked these places as FIXME, and the regression
> tests also contain a failing test case for this.
>
> What's needed here, I think, is an API that takes a datum plus type
> information and checks whether the datum is valid within the domain. I
> haven't found one that is exported, but maybe someone could give a tip.

I see an intersection between the work I'm currently doing on COPY BINARY
and this.

Basically if you have an INT, you aren't going to make lots of checks.

However, for a TEXT, postgres needs to reject it if it has a NULL in it
(which doesn't bother Python at all), or if it is has chars which are not
valid in the current encoding, etc.
Many other types like TIMESTAMP have checks which are absolutely necessary
for correctness...

> What's needed here, I think, is an API that takes a datum plus type
> information and checks whether the datum is valid within the domain. I
> haven't found one that is exported, but maybe someone could give a tip.

Problems :

- If the data you're trying to put in the Datum doesn't fit (example : out
of range error, varchar too small, etc), and you want a
datum-type-specific function to check your datum and reject it, how are
you going to build the datum ? perhaps you can't, since your value doesn't
fit. It's a chicken and egg problem : the check function that you expect
to reject your invalid datum will not know it's invalid, since you've
trimmed it at the edges to make it fit in the required Datum type...

- you are going to build a datum that is perhaps valid, and perhaps not,
and send this to a function... having known-invalid datums moving around
could be not such a good idea...

Why not use the copy binary format to communicate between python and pg ?

-> you write code to serialize python objects to binary form
-> you call the recv function to get a postgres datum
-> recv function throws an error if there is any problem

-> as a bonus, you release your python object <-> postgres binary code as
a separate library so people can use it to output data readable by COPY
BINARY and parse COPY BINARY dumps.


From: James Pye <lists(at)jwp(dot)name>
To: Peter Eisentraut <peter_e(at)gmx(dot)net>
Cc: Caleb Welton <cwelton(at)greenplum(dot)com>, pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-16 20:34:26
Message-ID: 2020360E-7A66-45F5-BEC3-3137DF379C84@jwp.name
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On Aug 15, 2009, at 4:44 PM, Peter Eisentraut wrote:
> What's needed here, I think, is an API that takes a datum plus type
> information and checks whether the datum is valid within the domain.

/agree =)


From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Peter Eisentraut <peter_e(at)gmx(dot)net>
Cc: Caleb Welton <cwelton(at)greenplum(dot)com>, pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-17 14:42:49
Message-ID: 503.1250520169@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

Peter Eisentraut <peter_e(at)gmx(dot)net> writes:
> The remaining problem is that the patch loses domain checking on the
> return types, because some paths no longer go through the data type's
> input function. I have marked these places as FIXME, and the regression
> tests also contain a failing test case for this.

For the record, I think this entire patch is a bad idea. PLs should not
be so much in bed with the internal representation of datatypes. To
take just one example, this *will* break when/if we change text to carry
some internal locale indicator. There has been absolutely zero evidence
presented to justify that there's a need to break abstraction to gain
performance in this area.

> What's needed here, I think, is an API that takes a datum plus type
> information and checks whether the datum is valid within the domain. I
> haven't found one that is exported, but maybe someone could give a tip.

There isn't one, but maybe you could expose domain_state_setup and
domain_check_input, or some simple wrapper around them.

regards, tom lane


From: Andrew Dunstan <andrew(at)dunslane(dot)net>
To: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
Cc: Peter Eisentraut <peter_e(at)gmx(dot)net>, Caleb Welton <cwelton(at)greenplum(dot)com>, pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-17 15:04:02
Message-ID: 4A897162.5020807@dunslane.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

Tom Lane wrote:
>
> For the record, I think this entire patch is a bad idea. PLs should not
> be so much in bed with the internal representation of datatypes.
>

I thought there was some suggestion in the past that we should move some
in that direction. The discussion context was Theo Schlossnagle's
complaint about the overhead of passing bytea to and from PLPerl,
although that might be ameliorated by the hex gadget. The other major
case that would benefit would be passing Array values as the PL's native
array type, and Composite values as the PL's associative array type.
That would save PL users a lot of highly error-prone coding
deconstructing the text representation of such objects.

cheers

andrew


From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Andrew Dunstan <andrew(at)dunslane(dot)net>
Cc: Peter Eisentraut <peter_e(at)gmx(dot)net>, Caleb Welton <cwelton(at)greenplum(dot)com>, pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-17 15:12:12
Message-ID: 965.1250521932@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

Andrew Dunstan <andrew(at)dunslane(dot)net> writes:
> Tom Lane wrote:
>> For the record, I think this entire patch is a bad idea. PLs should not
>> be so much in bed with the internal representation of datatypes.

> I thought there was some suggestion in the past that we should move some
> in that direction.

There's been some discussion about functional improvements like
translating arrays to arrays. I don't know what we'd have to do
to manage that, but possibly some API extensions to the array code
would make it feasible without violating abstractions. The present
patch, however, doesn't appear to have any reason to live other than an
undocumented amount of performance improvement. My feeling about that
is if you're concerned about micro-performance, why are you coding in
python to begin with? It isn't the best choice out there.

regards, tom lane


From: Alvaro Herrera <alvherre(at)commandprompt(dot)com>
To: Peter Eisentraut <peter_e(at)gmx(dot)net>
Cc: Caleb Welton <cwelton(at)greenplum(dot)com>, pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-17 15:55:05
Message-ID: 20090817155505.GA4782@alvh.no-ip.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

Peter Eisentraut wrote:

> I have reworked this patch a bit and extended the plpython test suite
> around it. Current copy attached.

I think the errcontext bits should be committed separately to get them
out of the way (and to ensure that they get in, regardless of objections
to other parts of the patch).

--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support


From: Peter Eisentraut <peter_e(at)gmx(dot)net>
To: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
Cc: Caleb Welton <cwelton(at)greenplum(dot)com>, pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-17 22:10:33
Message-ID: 1250547033.22690.13.camel@vanquo.pezone.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On mån, 2009-08-17 at 10:42 -0400, Tom Lane wrote:
> For the record, I think this entire patch is a bad idea. PLs should not
> be so much in bed with the internal representation of datatypes. To
> take just one example, this *will* break when/if we change text to carry
> some internal locale indicator. There has been absolutely zero evidence
> presented to justify that there's a need to break abstraction to gain
> performance in this area.

The motivation for this patch has nothing to do with performance. The
point is to pass data types into and out of PL/Python sensibly. In
particular, passing bytea into and out of PL/Python is currently
completely broken, in the sense that what you get in Python is not a
byte string that you can process sensibly.

We could argue that peeking inside the internal representation of data
types might be inappropriate. In which case the solution would be to
run the data through the data type output function and have PL/Python
parse that back in. That would just be a localized change in the patch,
however. (It might be less than ideal for passing float types,
perhaps.) We do, however, expose a data types binary format through the
binary protocol, so perhaps we should be using the send/recv functions
instead of input/output. Which would require hardcoding the bytea
binary format, at least. Either of these solutions would probably solve
the domains problem, though.

Note also that we have historically broken the bytea text format twice
as often as the bytea binary format. ;-)


From: Pavel Stehule <pavel(dot)stehule(at)gmail(dot)com>
To: Peter Eisentraut <peter_e(at)gmx(dot)net>
Cc: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Caleb Welton <cwelton(at)greenplum(dot)com>, pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-18 04:44:33
Message-ID: 162867790908172144n1105b10ga789a1b2eb61205c@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

2009/8/18 Peter Eisentraut <peter_e(at)gmx(dot)net>:
> On mån, 2009-08-17 at 10:42 -0400, Tom Lane wrote:
>> For the record, I think this entire patch is a bad idea.  PLs should not
>> be so much in bed with the internal representation of datatypes.  To
>> take just one example, this *will* break when/if we change text to carry
>> some internal locale indicator.  There has been absolutely zero evidence
>> presented to justify that there's a need to break abstraction to gain
>> performance in this area.
>

I thing, so communication based on text type is bad. Maybe we should
to use binary communication based on send and recv function? It's
should be better and maybe stable than direct transfer - but maybe
little bit slower.

> The motivation for this patch has nothing to do with performance.  The
> point is to pass data types into and out of PL/Python sensibly.  In
> particular, passing bytea into and out of PL/Python is currently
> completely broken, in the sense that what you get in Python is not a
> byte string that you can process sensibly.
>

I thing so bytea should be very well optimized. You can expect, so
there be moved bigger block of data.

regards
Pavel
> We could argue that peeking inside the internal representation of data
> types might be inappropriate.  In which case the solution would be to
> run the data through the data type output function and have PL/Python
> parse that back in.  That would just be a localized change in the patch,
> however.  (It might be less than ideal for passing float types,
> perhaps.)  We do, however, expose a data types binary format through the
> binary protocol, so perhaps we should be using the send/recv functions
> instead of input/output.  Which would require hardcoding the bytea
> binary format, at least.  Either of these solutions would probably solve
> the domains problem, though.
>
> Note also that we have historically broken the bytea text format twice
> as often as the bytea binary format. ;-)
>
>
> --
> Sent via pgsql-hackers mailing list (pgsql-hackers(at)postgresql(dot)org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers
>


From: Caleb Welton <cwelton(at)greenplum(dot)com>
To: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Andrew Dunstan <andrew(at)dunslane(dot)net>
Cc: Peter Eisentraut <peter_e(at)gmx(dot)net>, "pgsql-hackers(at)postgresql(dot)org" <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-22 10:45:24
Message-ID: C6B51A54.2F9D%cwelton@greenplum.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

As documented in the patch, the primary motivation was support of BYTEA datatype, which when cast through cstring was truncating python strings with embedded nulls,
performance was only a secondary consideration.

Regards,
Caleb

(Sorry for my slow entry on this thread, I'm on vacation right now.)

On 8/17/09 8:12 AM, "Tom Lane" <tgl(at)sss(dot)pgh(dot)pa(dot)us> wrote:

Andrew Dunstan <andrew(at)dunslane(dot)net> writes:
> Tom Lane wrote:
>> For the record, I think this entire patch is a bad idea. PLs should not
>> be so much in bed with the internal representation of datatypes.

> I thought there was some suggestion in the past that we should move some
> in that direction.

There's been some discussion about functional improvements like
translating arrays to arrays. I don't know what we'd have to do
to manage that, but possibly some API extensions to the array code
would make it feasible without violating abstractions. The present
patch, however, doesn't appear to have any reason to live other than an
undocumented amount of performance improvement. My feeling about that
is if you're concerned about micro-performance, why are you coding in
python to begin with? It isn't the best choice out there.

regards, tom lane


From: Greg Stark <gsstark(at)mit(dot)edu>
To: Caleb Welton <cwelton(at)greenplum(dot)com>
Cc: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Andrew Dunstan <andrew(at)dunslane(dot)net>, Peter Eisentraut <peter_e(at)gmx(dot)net>, "pgsql-hackers(at)postgresql(dot)org" <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-22 11:35:48
Message-ID: 407d949e0908220435v5ba7c72erb4290d7cc0f4cdab@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On Sat, Aug 22, 2009 at 11:45 AM, Caleb Welton<cwelton(at)greenplum(dot)com> wrote:
> As documented in the patch, the primary motivation was support of BYTEA
> datatype, which when cast through cstring was truncating python strings with
> embedded nulls,
> performance was only a secondary consideration.

The alternative to attaching to the internal representation would be
to marshal and unmarshal the text representation where nuls are
escaped as \000.

However I dispute this this is "micro-performance" that we're talking
about. On any given small datum it may be a small incremental amount
of time but it's not incremental time that matters, it's aggregate. If
you're processing 1TB of data and you have to marshal and unmarshal
all 1TB it doesn't matter that you're doing it in 100 byte chunks. And
in any case there are plenty of people throwing around multi-megabyte
bytea blobs and having to marshal and unmarshal them every time they
go from the database into a PL or back would be a noticeable delay and
risk of out-of-memory errors.

If we want PLs to not be overly in bed with Postgres data types then
the way to do it is to have data types provide abstract methods for
accessing their internals. At least for bytea and text that would be
fairly straightforward. For numeric I don't see that it would really
buy much since it wouldn't really let us completely change
representations.

--
greg
http://mit.edu/~gsstark/resume.pdf


From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Greg Stark <gsstark(at)mit(dot)edu>
Cc: Caleb Welton <cwelton(at)greenplum(dot)com>, Andrew Dunstan <andrew(at)dunslane(dot)net>, Peter Eisentraut <peter_e(at)gmx(dot)net>, "pgsql-hackers(at)postgresql(dot)org" <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-22 14:03:22
Message-ID: 8621.1250949802@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

Greg Stark <gsstark(at)mit(dot)edu> writes:
> On Sat, Aug 22, 2009 at 11:45 AM, Caleb Welton<cwelton(at)greenplum(dot)com> wrote:
>> As documented in the patch, the primary motivation was support of BYTEA
>> datatype, which when cast through cstring was truncating python strings with
>> embedded nulls,
>> performance was only a secondary consideration.

> The alternative to attaching to the internal representation would be
> to marshal and unmarshal the text representation where nuls are
> escaped as \000.

I don't actually have a problem with depending on the internal
representation of bytea. What I'm unhappy about is that (despite
Caleb's assertions that this is only about bytea) the patch proceeds
to make plpython intimate with the internal representation of a bunch
of *other* datatypes, some of which we have good reason to think may
change in future. If it were only touching bytea I would not have
complained.

regards, tom lane


From: Caleb Welton <cwelton(at)greenplum(dot)com>
To: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Greg Stark <gsstark(at)mit(dot)edu>
Cc: Andrew Dunstan <andrew(at)dunslane(dot)net>, Peter Eisentraut <peter_e(at)gmx(dot)net>, "pgsql-hackers(at)postgresql(dot)org" <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-22 16:44:00
Message-ID: C6B56E60.2FA6%cwelton@greenplum.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

I didn't say that it _only_ affects bytea, I said that was the _primary motivation_ for it.

Converting from postgres=>python this change affects boolean, float4, float8, numeric, int16, int32, int64, text, and bytea. The code to handle this goes through DatumGetXXX for the native C type for the datatype, with the exception of the Varlena types (special case) and Numeric which calls numeric_float8() to convert the numeric to a native C double precision float. As mentioned in the original post I do not think that this is appropriate for numeric, and I would prefer a better mapping, but this was a pre-existing issue and is not a change in behavior for the patch. Since this is a separate issue I opted not to change it to keep the patch concise.

Converting from python=>postgres this change effects void, bool, bytea, and text.

The reason for this asymmetry is that there is not a 1:1 mapping of Postgres datatypes to Python datatypes and conciseness of the patch.

All other datatypes (including arrays unfortunately) go through the same text input functions that they did before.

Of the above I would expect the only type that we would have good reason to expect to change would be numeric, and this patch _doesn't_ rely on it's internal representation: it calls numeric_float8().

I think it would be good to have mappings for other datatypes, depending on internal representation or not, but thought that was beyond the scope of the patch.

Regards,
Caleb

On 8/22/09 7:03 AM, "Tom Lane" <tgl(at)sss(dot)pgh(dot)pa(dot)us> wrote:

Greg Stark <gsstark(at)mit(dot)edu> writes:
> On Sat, Aug 22, 2009 at 11:45 AM, Caleb Welton<cwelton(at)greenplum(dot)com> wrote:
>> As documented in the patch, the primary motivation was support of BYTEA
>> datatype, which when cast through cstring was truncating python strings with
>> embedded nulls,
>> performance was only a secondary consideration.

> The alternative to attaching to the internal representation would be
> to marshal and unmarshal the text representation where nuls are
> escaped as \000.

I don't actually have a problem with depending on the internal
representation of bytea. What I'm unhappy about is that (despite
Caleb's assertions that this is only about bytea) the patch proceeds
to make plpython intimate with the internal representation of a bunch
of *other* datatypes, some of which we have good reason to think may
change in future. If it were only touching bytea I would not have
complained.

regards, tom lane


From: Peter Eisentraut <peter_e(at)gmx(dot)net>
To: Alvaro Herrera <alvherre(at)commandprompt(dot)com>
Cc: Caleb Welton <cwelton(at)greenplum(dot)com>, pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-25 12:45:22
Message-ID: 1251204322.2432.7.camel@vanquo.pezone.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On mån, 2009-08-17 at 11:55 -0400, Alvaro Herrera wrote:
> Peter Eisentraut wrote:
>
> > I have reworked this patch a bit and extended the plpython test suite
> > around it. Current copy attached.
>
> I think the errcontext bits should be committed separately to get them
> out of the way (and to ensure that they get in, regardless of objections
> to other parts of the patch).

Done that now.


From: Peter Eisentraut <peter_e(at)gmx(dot)net>
To: Caleb Welton <cwelton(at)greenplum(dot)com>
Cc: pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-08-31 20:41:45
Message-ID: 1251751305.20938.10.camel@vanquo.pezone.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On sön, 2009-08-16 at 02:44 +0300, Peter Eisentraut wrote:
> The remaining problem is that the patch loses domain checking on the
> return types, because some paths no longer go through the data type's
> input function. I have marked these places as FIXME, and the regression
> tests also contain a failing test case for this.
>
> What's needed here, I think, is an API that takes a datum plus type
> information and checks whether the datum is valid within the domain. I
> haven't found one that is exported, but maybe someone could give a tip.

Got that fixed now. Updated patch is attached. I will sleep over it,
but I think it's good to go.

Attachment Content-Type Size
plpython-datatypes.patch text/x-patch 28.2 KB

From: Peter Eisentraut <peter_e(at)gmx(dot)net>
To: Caleb Welton <cwelton(at)greenplum(dot)com>
Cc: pgsql-hackers(at)postgresql(dot)org
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-09-09 19:00:32
Message-ID: 1252522832.17405.0.camel@vanquo.pezone.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On mån, 2009-08-31 at 23:41 +0300, Peter Eisentraut wrote:
> On sön, 2009-08-16 at 02:44 +0300, Peter Eisentraut wrote:
> > The remaining problem is that the patch loses domain checking on the
> > return types, because some paths no longer go through the data type's
> > input function. I have marked these places as FIXME, and the regression
> > tests also contain a failing test case for this.
> >
> > What's needed here, I think, is an API that takes a datum plus type
> > information and checks whether the datum is valid within the domain. I
> > haven't found one that is exported, but maybe someone could give a tip.
>
> Got that fixed now. Updated patch is attached. I will sleep over it,
> but I think it's good to go.

committed


From: Hannu Krosing <hannu(at)2ndQuadrant(dot)com>
To: Caleb Welton <cwelton(at)greenplum(dot)com>
Cc: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Peter Eisentraut <peter_e(at)gmx(dot)net>, "pgsql-hackers(at)postgresql(dot)org" <pgsql-hackers(at)postgresql(dot)org>
Subject: Re: [PATCH] plpythonu datatype conversion improvements
Date: 2009-09-09 21:45:35
Message-ID: 1252532735.4080.31.camel@hvost1700
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-hackers

On Wed, 2009-05-27 at 14:25 -0700, Caleb Welton wrote:
> Yes, in Python >= 2.4 there is the Decimal datatype.
>
> However, unlike the other mappings employed by plpythonu, Decimal
> requires an import statement to be in scope.

adding it as already-imported module should not be hard

I think that moving to saner mappings should at least be discussed

and even if it is not in scope for the user-defined function body there
is nothing that prevents one from using it for conversion.

The Decimal _type_ needs not to be in scope for using Decimal
_instances_

maybe this should/could be controlled by a GUC.

btw, can we currently use funtions in setting GUC parameters ?

if we can , then we could define some python environment initializing
function and then do

ALTER USER xxx SET pyinit = initialise_python_for_xxx()

> -Caleb
>
> On 5/27/09 2:07 PM, "Tom Lane" <tgl(at)sss(dot)pgh(dot)pa(dot)us> wrote:
>
> Peter Eisentraut <peter_e(at)gmx(dot)net> writes:
> > On Wednesday 27 May 2009 21:53:31 Caleb Welton wrote:
> >> ... My own
> >> feeling on the matter is that PyFloat is the wrong mapping
> for numeric, but
> >> I didn't want to muddy this patch by changing that.
>
> > Yeah, that one had me wondering for a while as well, but as
> you say it is
> > better to address that separately.
>
> That was making me itch as well, in my very cursory look at
> the patch.
> Does Python have a saner mapping for it?
>
> regards, tom lane
>
--
Hannu Krosing http://www.2ndQuadrant.com
PostgreSQL Scalability and Availability
Services, Consulting and Training