Index: src/backend/utils/adt/regexp.c
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/backend/utils/adt/regexp.c,v
retrieving revision 1.68
diff -c -r1.68 regexp.c
*** src/backend/utils/adt/regexp.c	5 Jan 2007 22:19:41 -0000	1.68
--- src/backend/utils/adt/regexp.c	8 Feb 2007 00:04:08 -0000
***************
*** 29,36 ****
--- 29,39 ----
   */
  #include "postgres.h"
  
+ #include "funcapi.h"
+ #include "access/heapam.h"
  #include "regex/regex.h"
  #include "utils/builtins.h"
+ #include "utils/lsyscache.h"
  #include "utils/guc.h"
  
  
***************
*** 75,80 ****
--- 78,127 ----
  	regex_t		cre_re;			/* the compiled regular expression */
  } cached_re_str;
  
+ typedef struct re_comp_flags {
+ 	bool		  return_pre_and_post;
+ 	bool		  glob;
+ 	int			  cflags;
+ } re_comp_flags;
+ 
+ typedef struct regexp_matches_ctx {
+ 	text		* orig_str;
+ 	size_t		  orig_len;
+ 	pg_wchar	* wide_str;
+ 	size_t		  wide_len;
+ 	regex_t		* cpattern;
+ 	size_t		  offset;
+ 	regmatch_t	* pmatch;
+ 
+ 	/* flag options */
+ 	re_comp_flags flags;
+ 
+ 	/* return type info */
+ 	TupleDesc	  rettupdesc;
+ 	Oid			  rettype;
+ 	TypeFuncClass typefunc;
+ 
+ 	/* text type info */
+ 	Oid		  param_type;
+ 	int16		  typlen;
+ 	bool		  typbyval;
+ 	char		  typalign;
+ } regexp_matches_ctx;
+ 
+ typedef struct regexp_split_ctx {
+ 	text		* orig_str;
+ 	size_t		  orig_len;
+ 	pg_wchar	* wide_str;
+ 	size_t		  wide_len;
+ 	regex_t		* cpattern;
+ 	regmatch_t	  match;
+ 	size_t		  offset;
+ 
+ 	/* flag options */
+ 	re_comp_flags flags;
+ } regexp_split_ctx;
+ 
+ 
  static int	num_res = 0;		/* # of cached re's */
  static cached_re_str re_array[MAX_CACHED_RES];	/* cached re's */
  
***************
*** 191,238 ****
  }
  
  /*
!  * RE_compile_and_execute - compile and execute a RE
   *
   * Returns TRUE on match, FALSE on no match
   *
!  *	text_re --- the pattern, expressed as an *untoasted* TEXT object
!  *	dat --- the data to match against (need not be null-terminated)
!  *	dat_len --- the length of the data string
!  *	cflags --- compile options for the pattern
   *	nmatch, pmatch	--- optional return area for match details
   *
!  * Both pattern and data are given in the database encoding.  We internally
!  * convert to array of pg_wchar which is what Spencer's regex package wants.
   */
  static bool
! RE_compile_and_execute(text *text_re, char *dat, int dat_len,
! 					   int cflags, int nmatch, regmatch_t *pmatch)
  {
- 	pg_wchar   *data;
- 	size_t		data_len;
  	int			regexec_result;
- 	regex_t    *re;
  	char		errMsg[100];
  
- 	/* Convert data string to wide characters */
- 	data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
- 	data_len = pg_mb2wchar_with_len(dat, data, dat_len);
- 
- 	/* Compile RE */
- 	re = RE_compile_and_cache(text_re, cflags);
- 
  	/* Perform RE match and return result */
  	regexec_result = pg_regexec(re,
  								data,
  								data_len,
! 								0,
  								NULL,	/* no details */
  								nmatch,
  								pmatch,
  								0);
  
- 	pfree(data);
- 
  	if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
  	{
  		/* re failed??? */
--- 238,273 ----
  }
  
  /*
!  * RE_wchar_execute - execute a RE
   *
   * Returns TRUE on match, FALSE on no match
   *
!  *	re --- the compiled pattern as returned by RE_compile_and_cache
!  *	data --- the data to match against (need not be null-terminated)
!  *	data_len --- the length of the data string
!  *	start_search -- the offset in the data to start searching
   *	nmatch, pmatch	--- optional return area for match details
   *
!  * Data is given as array of pg_wchar which is what Spencer's regex package
!  * wants.
   */
  static bool
! RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len, size_t start_search,
! 					   int nmatch, regmatch_t *pmatch)
  {
  	int			regexec_result;
  	char		errMsg[100];
  
  	/* Perform RE match and return result */
  	regexec_result = pg_regexec(re,
  								data,
  								data_len,
! 								start_search,
  								NULL,	/* no details */
  								nmatch,
  								pmatch,
  								0);
  
  	if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
  	{
  		/* re failed??? */
***************
*** 245,250 ****
--- 280,391 ----
  	return (regexec_result == REG_OKAY);
  }
  
+ /*
+  * RE_execute - execute a RE
+  *
+  * Returns TRUE on match, FALSE on no match
+  *
+  *	re --- the compiled pattern as returned by RE_compile_and_cache
+  *	dat --- the data to match against (need not be null-terminated)
+  *	dat_len --- the length of the data string
+  *	nmatch, pmatch	--- optional return area for match details
+  *
+  * Data is given in the database encoding.  We internally
+  * convert to array of pg_wchar which is what Spencer's regex package wants.
+  */
+ static bool
+ RE_execute(regex_t *re, char *dat, int dat_len,
+ 					   int nmatch, regmatch_t *pmatch)
+ {
+ 	pg_wchar   *data;
+ 	size_t		data_len;
+ 	bool		match;
+ 
+ 	/* Convert data string to wide characters */
+ 	data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
+ 	data_len = pg_mb2wchar_with_len(dat, data, dat_len);
+ 
+ 	/* Perform RE match and return result */
+ 	match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch);
+ 	pfree(data);
+ 	return match;
+ }
+ 
+ /*
+  * RE_compile_and_execute - compile and execute a RE
+  *
+  * Returns TRUE on match, FALSE on no match
+  *
+  *	text_re --- the pattern, expressed as an *untoasted* TEXT object
+  *	dat --- the data to match against (need not be null-terminated)
+  *	dat_len --- the length of the data string
+  *	cflags --- compile options for the pattern
+  *	nmatch, pmatch	--- optional return area for match details
+  *
+  * Both pattern and data are given in the database encoding.  We internally
+  * convert to array of pg_wchar which is what Spencer's regex package wants.
+  */
+ static bool
+ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
+ 					   int cflags, int nmatch, regmatch_t *pmatch)
+ {
+ 	regex_t    *re;
+ 
+ 	/* Compile RE */
+ 	re = RE_compile_and_cache(text_re, cflags);
+ 
+ 	return RE_execute(re, dat, dat_len, nmatch, pmatch);
+ }
+ 
+ static void parse_re_comp_flags(re_comp_flags *flags, text *o)
+ {
+ 	memset(flags, 0, sizeof(re_comp_flags));
+ 	flags->cflags = regex_flavor;
+ 
+ 	if (o)
+ 	{
+ 		char * opt_p = VARDATA(o);
+ 		size_t opt_len = (VARSIZE(o) - VARHDRSZ);
+ 		size_t i;
+ 
+ 		for (i = 0; i < opt_len; i++)
+ 		{
+ 			switch (opt_p[i])
+ 			{
+ 				case 'g':
+ 					flags->glob = true;
+ 					break;
+ 				case 'i':
+ 					flags->cflags |= REG_ICASE;
+ 					break;
+ 				case 'm':
+ 				case 'n':
+ 					flags->cflags |= REG_NEWLINE;
+ 					break;
+ 				case 'p':
+ 					flags->cflags |= REG_NLSTOP;
+ 					flags->cflags &= ~REG_NLANCH;
+ 					break;
+ 				case 'r':
+ 					flags->return_pre_and_post = true;
+ 					break;
+ 				case 'w':
+ 					flags->cflags &= ~REG_NLSTOP;
+ 					flags->cflags |= REG_NLANCH;
+ 					break;
+ 				case 'x':
+ 					flags->cflags |= REG_EXPANDED;
+ 					break;
+ 				default:
+ 					ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 							 errmsg("invalid regexp option: %c", opt_p[i])));
+ 					break;
+ 			}
+ 		}
+ 	}
+ }
+ 
  
  /*
   * assign_regex_flavor - GUC hook to validate and set REGEX_FLAVOR
***************
*** 469,507 ****
  	text	   *p = PG_GETARG_TEXT_P(1);
  	text	   *r = PG_GETARG_TEXT_P(2);
  	text	   *opt = PG_GETARG_TEXT_P(3);
- 	char	   *opt_p = VARDATA(opt);
- 	int			opt_len = (VARSIZE(opt) - VARHDRSZ);
- 	int			i;
- 	bool		glob = false;
- 	bool		ignorecase = false;
  	regex_t    *re;
  
  	/* parse options */
! 	for (i = 0; i < opt_len; i++)
! 	{
! 		switch (opt_p[i])
! 		{
! 			case 'i':
! 				ignorecase = true;
! 				break;
! 			case 'g':
! 				glob = true;
! 				break;
! 			default:
! 				ereport(ERROR,
! 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! 						 errmsg("invalid option of regexp_replace: %c",
! 								opt_p[i])));
! 				break;
! 		}
! 	}
  
! 	if (ignorecase)
! 		re = RE_compile_and_cache(p, regex_flavor | REG_ICASE);
! 	else
! 		re = RE_compile_and_cache(p, regex_flavor);
  
! 	PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, glob));
  }
  
  /* similar_escape()
--- 610,629 ----
  	text	   *p = PG_GETARG_TEXT_P(1);
  	text	   *r = PG_GETARG_TEXT_P(2);
  	text	   *opt = PG_GETARG_TEXT_P(3);
  	regex_t    *re;
+ 	re_comp_flags flags;
  
  	/* parse options */
! 	parse_re_comp_flags(&flags, opt);
  
! 	if (flags.return_pre_and_post)
! 		ereport(ERROR,
! 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! 				 errmsg("invalid option of regexp_replace: r")));
  
! 	re = RE_compile_and_cache(p, flags.cflags);
! 
! 	PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob));
  }
  
  /* similar_escape()
***************
*** 625,630 ****
--- 747,1073 ----
  	PG_RETURN_TEXT_P(result);
  }
  
+ #define PG_RETURN_DATUM_MAYBE_SRF(_result) \
+ 	do { \
+ 		if (doing_srf) \
+ 			SRF_RETURN_NEXT(funcctx, _result); \
+ 		else \
+ 			PG_RETURN_DATUM(_result); \
+ 	} while (0)
+ 
+ #define PG_RETURN_DONE_MAYBE_SRF() \
+ 	do { \
+ 		if (doing_srf) \
+ 			SRF_RETURN_DONE(funcctx); \
+ 		else \
+ 			PG_RETURN_NULL(); \
+ 	} while (0)
+ 
+ #define PG_GETARG_TEXT_P_COPY_IF_SRF(_argno) \
+ 	(doing_srf ? PG_GETARG_TEXT_P_COPY(_argno) : PG_GETARG_TEXT_P(_argno))
+ 
+ #define PG_GETARG_TEXT_P_IF_EXISTS(_n) \
+ 	(PG_NARGS() > _n ? PG_GETARG_TEXT_P(_n) : NULL)
+ 
+ Datum
+ regexp_matches(PG_FUNCTION_ARGS)
+ {
+ 	FuncCallContext		*funcctx = NULL;
+ 	MemoryContext		 oldcontext = NULL;
+ 	regexp_matches_ctx	*matchctx = NULL;
+ 	re_comp_flags		 flags;
+ 	bool doing_srf = (fcinfo->resultinfo != NULL && IsA(fcinfo->resultinfo, ReturnSetInfo));
+ 
+ 	if (!doing_srf || SRF_IS_FIRSTCALL())
+ 	{
+ 		text *p;
+ 		text *o = PG_GETARG_TEXT_P_IF_EXISTS(2);
+ 
+ 		parse_re_comp_flags(&flags, o);
+ 
+ 		/* it is questionable whether or not this is allowable:
+ 		 * we were called as an SRF, but only want to return one row,
+ 		 * this will cheat and return that in a normal rather than
+ 		 * SRF way
+ 		 */
+ #if 0
+ 		if (!flags.glob)
+ 			doing_srf = false;
+ #endif
+ 
+ 		if (doing_srf)
+ 		{
+ 			funcctx = SRF_FIRSTCALL_INIT();
+ 			oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ 		}
+ 
+ 		matchctx = palloc0 (sizeof(regexp_matches_ctx));
+ 
+ 		/* this needs to be visible so copy it if SRF */
+ 		matchctx->orig_str = PG_GETARG_TEXT_P_COPY_IF_SRF(0);
+ 		matchctx->orig_len = VARSIZE(matchctx->orig_str) - VARHDRSZ;
+ 
+ 		p = PG_GETARG_TEXT_P(1);
+ 
+ 		memcpy(&matchctx->flags, &flags, sizeof(re_comp_flags));
+ 
+ 		matchctx->cpattern = RE_compile_and_cache(p, matchctx->flags.cflags);
+ 		matchctx->pmatch = palloc0(sizeof(regmatch_t) * (matchctx->cpattern->re_nsub + 1));
+ 		matchctx->offset = 0;
+ 
+ 		/* get text type oid, too lazy to do it some other way */
+ 		matchctx->param_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ 		get_typlenbyvalalign(matchctx->param_type, &matchctx->typlen, &matchctx->typbyval, &matchctx->typalign);
+ 
+ 		matchctx->typefunc = get_call_result_type(fcinfo, &matchctx->rettype, &matchctx->rettupdesc);
+ 
+ 		if (matchctx->typefunc == TYPEFUNC_COMPOSITE)
+ 			matchctx->rettupdesc = BlessTupleDesc(matchctx->rettupdesc);
+ 
+ 		matchctx->wide_str = (pg_wchar *) palloc((matchctx->orig_len + 1) * sizeof(pg_wchar));
+ 		matchctx->wide_len = pg_mb2wchar_with_len(VARDATA(matchctx->orig_str), matchctx->wide_str, matchctx->orig_len);
+ 
+ 		matchctx->pmatch[0].rm_so = -1;
+ 		/* both < 0 but not equal */
+ 		matchctx->pmatch[0].rm_eo = -2;
+ 
+ 		if (doing_srf)
+ 		{
+ 			MemoryContextSwitchTo(oldcontext);
+ 			funcctx->user_fctx = (void*)matchctx;
+ 
+ 			/* Avoid run-away function by making sure we never iterate more than the length of the text */
+ 			funcctx->max_calls = (matchctx->flags.glob ? (3 * matchctx->orig_len) : 1);
+ 		}
+ 	}
+ 
+ 	if (doing_srf)
+ 	{
+ 		funcctx = SRF_PERCALL_SETUP();
+ 		matchctx = (regexp_matches_ctx*)(funcctx->user_fctx);
+ 
+ 		if (funcctx->call_cntr >= funcctx->max_calls)
+ 		{
+ 			if (funcctx->max_calls != 1)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_INTERNAL_ERROR),
+ 						 errmsg("set returning match function terminated after iterating %d times", funcctx->call_cntr)));
+ 			SRF_RETURN_DONE(funcctx);
+ 		}
+ 	}
+ 
+ 	if (matchctx->offset < matchctx->orig_len)
+ 	{
+ 		if (matchctx->pmatch[0].rm_so == matchctx->pmatch[0].rm_eo)
+ 			matchctx->offset++;
+ 
+ 		if (RE_wchar_execute(matchctx->cpattern,
+ 					matchctx->wide_str,
+ 					matchctx->wide_len,
+ 					matchctx->offset,
+ 					matchctx->cpattern->re_nsub + 1,
+ 					matchctx->pmatch))
+ 		{
+ 			ArrayType * resultarray;
+ 			Datum result;
+ 			Datum elems[matchctx->cpattern->re_nsub];
+ 			bool nulls[matchctx->cpattern->re_nsub];
+ 			size_t i;
+ 			int ndims = 1;
+ 			int dims[1] = {matchctx->cpattern->re_nsub};
+ 			int lbs[1] = {1};
+ 
+ 			for (i = 0; i < matchctx->cpattern->re_nsub; ++i)
+ 			{
+ 				int so = matchctx->pmatch[i+1].rm_so,
+ 					eo = matchctx->pmatch[i+1].rm_eo;
+ 
+ 				if (so < 0 || eo < 0)
+ 				{
+ 					elems[i] = 0;
+ 					nulls[i] = true;
+ 				}
+ 				else
+ 				{
+ 					elems[i] = DirectFunctionCall3(text_substr,
+ 							PointerGetDatum(matchctx->orig_str),
+ 							Int32GetDatum(so + 1),
+ 							Int32GetDatum(eo - so));
+ 					nulls[i] = false;
+ 				}
+ 			}
+ 
+ 			resultarray = construct_md_array(elems, nulls, ndims, dims, lbs,
+ 					matchctx->param_type, matchctx->typlen, matchctx->typbyval, matchctx->typalign);
+ 
+ 			if (matchctx->typefunc == TYPEFUNC_COMPOSITE)
+ 			{
+ 				Datum vals[4];
+ 				bool  valnulls[4] = {true, true, true, true};
+ 
+ 				if (matchctx->flags.return_pre_and_post)
+ 				{
+ 					valnulls[0] = valnulls[3] = false;
+ 
+ 					vals[0] = DirectFunctionCall3(text_substr,
+ 							PointerGetDatum(matchctx->orig_str),
+ 							Int32GetDatum(1),
+ 							Int32GetDatum(matchctx->pmatch[0].rm_so));
+ 
+ 					vals[3] = DirectFunctionCall2(text_substr_no_len,
+ 							PointerGetDatum(matchctx->orig_str),
+ 							Int32GetDatum(matchctx->pmatch[0].rm_eo + 1));
+ 				}
+ 
+ 				vals[1] = DirectFunctionCall3(text_substr,
+ 						PointerGetDatum(matchctx->orig_str),
+ 						Int32GetDatum(matchctx->pmatch[0].rm_so + 1),
+ 						Int32GetDatum(matchctx->pmatch[0].rm_eo - matchctx->pmatch[0].rm_so));
+ 				valnulls[1] = false;
+ 
+ 
+ 				vals[2] = PointerGetDatum(resultarray);
+ 				valnulls[2] = false;
+ 
+ 				result = HeapTupleGetDatum(
+ 						heap_form_tuple (matchctx->rettupdesc, vals, valnulls));
+ 			}
+ 			else
+ 			{
+ 				result = PointerGetDatum(resultarray);
+ 			}
+ 
+ 			matchctx->offset = matchctx->pmatch[0].rm_eo;
+ 			PG_RETURN_DATUM_MAYBE_SRF(result);
+ 		}
+ 		/* else fall through and return done (or null) */
+ 	}
+ 
+ 	PG_RETURN_DONE_MAYBE_SRF();
+ }
+ 
+ /* Hack to make oprsanity happy */
+ Datum regexp_matches_noopts(PG_FUNCTION_ARGS)
+ {
+ 	return regexp_matches(fcinfo);
+ }
+ 
+ Datum
+ regexp_split(PG_FUNCTION_ARGS)
+ {
+ 	FuncCallContext  *funcctx;
+ 	MemoryContext     oldcontext;
+ 	regexp_split_ctx *splitctx;
+ 
+ 	if (SRF_IS_FIRSTCALL())
+ 	{
+ 		text * o = PG_GETARG_TEXT_P_IF_EXISTS(2);
+ 
+ 		funcctx = SRF_FIRSTCALL_INIT();
+ 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ 		splitctx = (regexp_split_ctx *) palloc(sizeof(regexp_split_ctx));
+ 
+ 		splitctx->orig_str = PG_GETARG_TEXT_P_COPY(0);
+ 		splitctx->orig_len = VARSIZE(splitctx->orig_str) - VARHDRSZ;
+ 
+ 		parse_re_comp_flags(&splitctx->flags, o);
+ 		if (splitctx->flags.glob)
+ 		{
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 					 errmsg("regexp_split does not support the global option")));
+ 		}
+ 
+ 		if (splitctx->flags.return_pre_and_post)
+ 		{
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 					 errmsg("regexp_split does not support the return pre and postmatch option")));
+ 		}
+ 
+ 
+ 		splitctx->cpattern = RE_compile_and_cache(PG_GETARG_TEXT_P(1), splitctx->flags.cflags);
+ 
+ 		splitctx->wide_str = (pg_wchar *) palloc((splitctx->orig_len + 1) * sizeof(pg_wchar));
+ 		splitctx->wide_len = pg_mb2wchar_with_len(VARDATA(splitctx->orig_str), splitctx->wide_str, splitctx->orig_len);
+ 
+ 		splitctx->offset = 0;
+ 
+ 		splitctx->match.rm_so = -1;
+ 		/* both < 0 but not equal */
+ 		splitctx->match.rm_eo = -2;
+ 
+ 		MemoryContextSwitchTo(oldcontext);
+ 		funcctx->user_fctx = (void*)splitctx;
+ 
+ 		/* Avoid run-away function by making sure we never iterate more than the length of the text */
+ 		funcctx->max_calls = 3 * splitctx->orig_len;
+ 	}
+ 
+ 	funcctx = SRF_PERCALL_SETUP();
+ 	splitctx = (regexp_split_ctx*)(funcctx->user_fctx);
+ 
+ 	if (funcctx->call_cntr > funcctx->max_calls)
+ 	{
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INTERNAL_ERROR),
+ 				 errmsg("set returning split function terminated after iterating %d times", funcctx->call_cntr)));
+ 		SRF_RETURN_DONE(funcctx);
+ 	}
+ 
+ 	if (splitctx->offset < splitctx->orig_len)
+ 	{
+ 		regmatch_t * pmatch = &(splitctx->match);
+ 		bool first_match = (pmatch->rm_so < 0 || pmatch->rm_eo < 0);
+ 		bool incremented_offset = false;
+ 		do
+ 		{
+ 			if (pmatch->rm_so == pmatch->rm_eo)
+ 			{
+ 				splitctx->offset++;
+ 				incremented_offset = true;
+ 			}
+ 
+ 			if (RE_wchar_execute(splitctx->cpattern,
+ 						splitctx->wide_str,
+ 						splitctx->wide_len,
+ 						splitctx->offset,
+ 						1,
+ 						pmatch))
+ 			{
+ 				if (!((first_match && pmatch->rm_so == pmatch->rm_eo) || (pmatch->rm_so == splitctx->offset && !incremented_offset)))
+ 				{
+ 					Datum result = DirectFunctionCall3(text_substr,
+ 							PointerGetDatum(splitctx->orig_str),
+ 							Int32GetDatum(splitctx->offset + (incremented_offset ? 0 : 1)),
+ 							Int32GetDatum(pmatch->rm_so - splitctx->offset + (incremented_offset ? 1 : 0)));
+ 					splitctx->offset = pmatch->rm_eo;
+ 					SRF_RETURN_NEXT(funcctx, result);
+ 				}
+ 			}
+ 			else
+ 			{
+ 				/* no more matches, return rest of string */
+ 				Datum result = DirectFunctionCall2(text_substr_no_len,
+ 						PointerGetDatum(splitctx->orig_str),
+ 						Int32GetDatum(splitctx->offset + (incremented_offset ? 0 : 1)));
+ 				/* so we know we're done next time thru */
+ 				splitctx->offset = splitctx->orig_len;
+ 				SRF_RETURN_NEXT(funcctx, result);
+ 			}
+ 			first_match = false;
+ 		} while (1);
+ 	}
+ 
+ 	SRF_RETURN_DONE(funcctx);
+ }
+ 
+ /* Hack to make oprsanity happy */
+ Datum regexp_split_noopts(PG_FUNCTION_ARGS)
+ {
+ 	return regexp_split(fcinfo);
+ }
+ 
  /*
   * report whether regex_flavor is currently BASIC
   */
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.443
diff -c -r1.443 pg_proc.h
*** src/include/catalog/pg_proc.h	7 Feb 2007 23:11:30 -0000	1.443
--- src/include/catalog/pg_proc.h	7 Feb 2007 23:50:47 -0000
***************
*** 2259,2266 ****
--- 2259,2274 ----
  DESCR("replace text using regexp");
  DATA(insert OID =  2285 ( regexp_replace	   PGNSP PGUID 12 1 0 f f t f i 4 25 "25 25 25 25" _null_ _null_ _null_ textregexreplace - _null_ ));
  DESCR("replace text using regexp");
+ DATA(insert OID =  2760 ( regexp_matches   PGNSP PGUID 12 1 0 f f t f i 2 1009 "25 25" _null_ _null_ _null_	regexp_matches_noopts - _null_ ));
+ DESCR("return all match groups for regexp");
+ DATA(insert OID =  2761 ( regexp_matches   PGNSP PGUID 12 1 10 f f t t i 3 2249 "25 25 25" "{25,25,25,25,25,1009,25}" "{i,i,i,o,o,o,o}" "{str,pattern,flags,prematch,fullmatch,matches,postmatch}"	regexp_matches - _null_ ));
+ DESCR("return all match groups for regexp");
  DATA(insert OID =  2088 ( split_part   PGNSP PGUID 12 1 0 f f t f i 3 25 "25 25 23" _null_ _null_ _null_	split_text - _null_ ));
  DESCR("split string by field_sep and return field_num");
+ DATA(insert OID =  2762 ( regexp_split PGNSP PGUID 12 1 1000 f f t t i 2 25 "25 25" _null_ _null_ _null_	regexp_split_noopts - _null_ ));
+ DESCR("split string by pattern");
+ DATA(insert OID =  2763 ( regexp_split PGNSP PGUID 12 1 1000 f f t t i 3 25 "25 25 25" _null_ _null_ _null_	regexp_split - _null_ ));
+ DESCR("split string by pattern");
  DATA(insert OID =  2089 ( to_hex	   PGNSP PGUID 12 1 0 f f t f i 1 25 "23" _null_ _null_ _null_	to_hex32 - _null_ ));
  DESCR("convert int4 number to hex");
  DATA(insert OID =  2090 ( to_hex	   PGNSP PGUID 12 1 0 f f t f i 1 25 "20" _null_ _null_ _null_	to_hex64 - _null_ ));
Index: src/include/utils/builtins.h
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.287
diff -c -r1.287 builtins.h
*** src/include/utils/builtins.h	28 Jan 2007 16:16:54 -0000	1.287
--- src/include/utils/builtins.h	8 Feb 2007 00:02:06 -0000
***************
*** 478,483 ****
--- 478,487 ----
  extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS);
  extern Datum textregexreplace(PG_FUNCTION_ARGS);
  extern Datum similar_escape(PG_FUNCTION_ARGS);
+ extern Datum regexp_matches(PG_FUNCTION_ARGS);
+ extern Datum regexp_matches_noopts(PG_FUNCTION_ARGS);
+ extern Datum regexp_split(PG_FUNCTION_ARGS);
+ extern Datum regexp_split_noopts(PG_FUNCTION_ARGS);
  extern bool regex_flavor_is_basic(void);
  
  /* regproc.c */
Index: src/test/regress/expected/strings.out
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/test/regress/expected/strings.out,v
retrieving revision 1.29
diff -c -r1.29 strings.out
*** src/test/regress/expected/strings.out	11 May 2006 19:15:36 -0000	1.29
--- src/test/regress/expected/strings.out	8 Feb 2007 00:00:05 -0000
***************
*** 217,225 ****
   Z Z
  (1 row)
  
! -- invalid option of REGEXP_REPLACE
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
! ERROR:  invalid option of regexp_replace: z
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";
   4 
--- 217,479 ----
   Z Z
  (1 row)
  
! -- invalid regexp option
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
! ERROR:  invalid regexp option: z
! -- set so we can tell NULL from empty string
! \pset null '\\N'
! -- return all matches from regexp
! SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
!  regexp_matches 
! ----------------
!  {bar,beque}
! (1 row)
! 
! SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, '');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!  \N       | barbeque  | {bar,beque} | \N
! (1 row)
! 
! SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'r');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!  foo      | barbeque  | {bar,beque} | baz
! (1 row)
! 
! -- test case insensitive - first variant not possible
! SELECT * FROM regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!  \N       | bARbEqUE  | {bAR,bEqUE} | \N
! (1 row)
! 
! SELECT * FROM regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'ir');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!  foO      | bARbEqUE  | {bAR,bEqUE} | bAz
! (1 row)
! 
! -- global option - more than one match
! SELECT * FROM regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'gr');
!   prematch   | fullmatch |   matches    |   postmatch   
! -------------+-----------+--------------+---------------
!  foo         | barbeque  | {bar,beque}  | bazilbarfbonk
!  foobarbeque | bazilbarf | {bazil,barf} | bonk
! (2 rows)
! 
! -- empty capture group (matched empty string)
! SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
!  regexp_matches 
! ----------------
!  {bar,"",beque}
! (1 row)
! 
! -- no match
! SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
!  regexp_matches 
! ----------------
!  \N
! (1 row)
! 
! -- optional capture group did not match, null entry in array
! SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
!   regexp_matches  
! ------------------
!  {bar,NULL,beque}
! (1 row)
! 
! -- nothing before match, empty string
! SELECT * FROM regexp_matches('barbequebaz', $re$(bar)(beque)$re$, 'r');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!           | barbeque  | {bar,beque} | baz
! (1 row)
! 
! SELECT * FROM regexp_matches('barbequebaz', $re$^(bar)(beque)$re$, 'r');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!           | barbeque  | {bar,beque} | baz
! (1 row)
! 
! SELECT * FROM regexp_matches('barbequebaz', $re$(^bar)(beque)$re$, 'r');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!           | barbeque  | {bar,beque} | baz
! (1 row)
! 
! -- nothing after match, empty string
! SELECT * FROM regexp_matches('foobarbeque', $re$(bar)(beque)$re$, 'r');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!  foo      | barbeque  | {bar,beque} | 
! (1 row)
! 
! SELECT * FROM regexp_matches('foobarbeque', $re$(bar)(beque)$$re$, 'r');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!  foo      | barbeque  | {bar,beque} | 
! (1 row)
! 
! SELECT * FROM regexp_matches('foobarbeque', $re$(bar)(beque$)$re$, 'r');
!  prematch | fullmatch |   matches   | postmatch 
! ----------+-----------+-------------+-----------
!  foo      | barbeque  | {bar,beque} | 
! (1 row)
! 
! -- no capture groups, should first throw error?
! SELECT * FROM regexp_matches('foobarbequebaz', $re$barbeque$re$);
!  regexp_matches 
! ----------------
!  {}
! (1 row)
! 
! SELECT * FROM regexp_matches('foobarbequebaz', $re$barbeque$re$, '');
!  prematch | fullmatch | matches | postmatch 
! ----------+-----------+---------+-----------
!  \N       | barbeque  | {}      | \N
! (1 row)
! 
! SELECT * FROM regexp_matches('foobarbequebaz', $re$barbeque$re$, 'r');
!  prematch | fullmatch | matches | postmatch 
! ----------+-----------+---------+-----------
!  foo      | barbeque  | {}      | baz
! (1 row)
! 
! -- give me errors
! SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper');
! ERROR:  invalid regexp option: z
! SELECT * FROM regexp_matches('foobarbequebaz', $re$(barbeque$re$, '');
! ERROR:  invalid regular expression: parentheses () not balanced
! SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$, '');
! ERROR:  invalid regular expression: invalid repetition count(s)
! -- split string on regexp
! SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
!   foo   | length 
! --------+--------
!  the    |      3
!  quick  |      5
!  brown  |      5
!  fox    |      3
!  jumped |      6
!  over   |      4
!  the    |      3
!  lazy   |      4
!  dog    |      3
! (9 rows)
! 
! SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo;
!  foo | length 
! -----+--------
!  t   |      1
!  h   |      1
!  e   |      1
!  q   |      1
!  u   |      1
!  i   |      1
!  c   |      1
!  k   |      1
!  b   |      1
!  r   |      1
!  o   |      1
!  w   |      1
!  n   |      1
!  f   |      1
!  o   |      1
!  x   |      1
!  j   |      1
!  u   |      1
!  m   |      1
!  p   |      1
!  e   |      1
!  d   |      1
!  o   |      1
!  v   |      1
!  e   |      1
!  r   |      1
!  t   |      1
!  h   |      1
!  e   |      1
!  l   |      1
!  a   |      1
!  z   |      1
!  y   |      1
!  d   |      1
!  o   |      1
!  g   |      1
! (36 rows)
! 
! SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', '') AS foo;
!  foo | length 
! -----+--------
!  t   |      1
!  h   |      1
!  e   |      1
!      |      1
!  q   |      1
!  u   |      1
!  i   |      1
!  c   |      1
!  k   |      1
!      |      1
!  b   |      1
!  r   |      1
!  o   |      1
!  w   |      1
!  n   |      1
!      |      1
!  f   |      1
!  o   |      1
!  x   |      1
!      |      1
!  j   |      1
!  u   |      1
!  m   |      1
!  p   |      1
!  e   |      1
!  d   |      1
!      |      1
!  o   |      1
!  v   |      1
!  e   |      1
!  r   |      1
!      |      1
!  t   |      1
!  h   |      1
!  e   |      1
!      |      1
!  l   |      1
!  a   |      1
!  z   |      1
!  y   |      1
!      |      1
!  d   |      1
!  o   |      1
!  g   |      1
! (44 rows)
! 
! -- case insensitive
! SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo;
!           foo          | length 
! -----------------------+--------
!  th                    |      2
!   QUick bROWn FOx jUMP |     21
!  d ov                  |      4
!  r TH                  |      4
!   lazy dOG             |      9
! (5 rows)
! 
! -- errors
! SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo;
! ERROR:  invalid regexp option: z
! -- global option meaningless for regexp_split
! SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo;
! ERROR:  regexp_split does not support the global option
! -- return pre and post option meaningless for regexp_split
! SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'r') AS foo;
! ERROR:  regexp_split does not support the return pre and postmatch option
! -- change NULL-display back
! \pset null ''
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";
   4 
Index: src/test/regress/sql/strings.sql
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/test/regress/sql/strings.sql,v
retrieving revision 1.18
diff -c -r1.18 strings.sql
*** src/test/regress/sql/strings.sql	6 Mar 2006 19:49:20 -0000	1.18
--- src/test/regress/sql/strings.sql	7 Feb 2007 23:58:23 -0000
***************
*** 85,93 ****
  SELECT regexp_replace('AAA   BBB   CCC   ', E'\\s+', ' ', 'g');
  SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
! -- invalid option of REGEXP_REPLACE
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
  
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";
  
--- 85,153 ----
  SELECT regexp_replace('AAA   BBB   CCC   ', E'\\s+', ' ', 'g');
  SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
! -- invalid regexp option
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
  
+ -- set so we can tell NULL from empty string
+ \pset null '\\N'
+ 
+ -- return all matches from regexp
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, '');
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'r');
+ 
+ -- test case insensitive - first variant not possible
+ SELECT * FROM regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
+ SELECT * FROM regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'ir');
+ 
+ -- global option - more than one match
+ SELECT * FROM regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'gr');
+ 
+ -- empty capture group (matched empty string)
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
+ -- no match
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
+ -- optional capture group did not match, null entry in array
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
+ 
+ -- nothing before match, empty string
+ SELECT * FROM regexp_matches('barbequebaz', $re$(bar)(beque)$re$, 'r');
+ SELECT * FROM regexp_matches('barbequebaz', $re$^(bar)(beque)$re$, 'r');
+ SELECT * FROM regexp_matches('barbequebaz', $re$(^bar)(beque)$re$, 'r');
+ 
+ -- nothing after match, empty string
+ SELECT * FROM regexp_matches('foobarbeque', $re$(bar)(beque)$re$, 'r');
+ SELECT * FROM regexp_matches('foobarbeque', $re$(bar)(beque)$$re$, 'r');
+ SELECT * FROM regexp_matches('foobarbeque', $re$(bar)(beque$)$re$, 'r');
+ 
+ -- no capture groups, should first throw error?
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$barbeque$re$);
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$barbeque$re$, '');
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$barbeque$re$, 'r');
+ 
+ -- give me errors
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper');
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$(barbeque$re$, '');
+ SELECT * FROM regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$, '');
+ 
+ -- split string on regexp
+ SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
+ 
+ SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo;
+ SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', '') AS foo;
+ -- case insensitive
+ SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo;
+ 
+ -- errors
+ SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo;
+ -- global option meaningless for regexp_split
+ SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo;
+ -- return pre and post option meaningless for regexp_split
+ SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'r') AS foo;
+ 
+ -- change NULL-display back
+ \pset null ''
+ 
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";