Skip site navigation (1) Skip section navigation (2)

Peripheral Links

Header And Logo

PostgreSQL
| The world's most advanced open source database.

Site Navigation

Search archives
  Advanced Search

Re: regexp_replace


  • From: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
  • To: Atsushi Ogawa <a_ogawa(at)hi-ho(dot)ne(dot)jp>
  • Cc: pgsql-patches(at)postgresql(dot)org
  • Subject: Re: regexp_replace
  • Date: Sun, 10 Jul 2005 00:56:03 -0400 (EDT)
  • Message-id: <200507100456.j6A4u3r08884@candle.pha.pa.us> <text/plain>

I have applied your patch, with slight adjustments in spacing and
documentation.

Patch applied.  Thanks.

---------------------------------------------------------------------------

Atsushi Ogawa wrote:
> 
> I made the patch that implements regexp_replace again. 
> The specification of this function is as follows. 
> 
> regexp_replace(source text, pattern text, replacement text, [flags text])
> returns text
> 
> Replace string that matches to regular expression in source text to
> replacement text.
> 
>  - pattern is regular expression pattern.
>  - replacement is replace string that can use '\1'-'\9', and '\&'.
>     '\1'-'\9': back reference to the n'th subexpression.
>     '\&'     : entire matched string.
>  - flags can use the following values:
>     g: global (replace all)
>     i: ignore case
>     When the flags is not specified, case sensitive, replace the first
>     instance only.
> 
> regards,
> 
> --- Atsushi Ogawa

[ Attachment, skipping... ]

> 
> ---------------------------(end of broadcast)---------------------------
> TIP 4: Don't 'kill -9' the postmaster

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman(at)candle(dot)pha(dot)pa(dot)us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.263
diff -c -c -r1.263 func.sgml
*** doc/src/sgml/func.sgml	6 Jul 2005 19:02:52 -0000	1.263
--- doc/src/sgml/func.sgml	10 Jul 2005 04:52:43 -0000
***************
*** 1257,1262 ****
--- 1257,1282 ----
        </row>
  
        <row>
+        <entry><literal><function>regexp_replace</function>(<parameter>source</parameter> <type>text</type>,
+        <parameter>pattern</parameter> <type>text</type>,
+        <parameter>replacement</parameter> <type>text</type>
+        <optional>, <parameter>flags</parameter> <type>text</type></optional>)</literal></entry>
+        <entry><type>text</type></entry>
+        <entry>Replace string that matches the regular expression
+         <parameter>pattern</parameter> in <parameter>source</parameter> to
+         <parameter>replacement</parameter>.
+         <parameter>replacement</parameter> can use <literal>\1</>-<literal>\9</> and <literal>\&amp;</>.
+         <literal>\1</>-<literal>\9</> is a back reference to the n'th subexpression, and
+         <literal>\&amp;</> is the entire matched string.
+         <parameter>flags</parameter> can use <literal>g</>(global) and <literal>i</>(ignore case).
+         When flags is not specified, case sensitive matching is used, and it replaces
+         only the instance.
+        </entry>
+        <entry><literal>regexp_replace('1112223333', '(\\d{3})(\\d{3})(\\d{4})', '(\\1) \\2-\\3')</literal></entry>
+        <entry><literal>(111) 222-3333</literal></entry>
+       </row>
+ 
+       <row>
         <entry><literal><function>repeat</function>(<parameter>string</parameter> <type>text</type>, <parameter>number</parameter> <type>integer</type>)</literal></entry>
         <entry><type>text</type></entry>
         <entry>Repeat <parameter>string</parameter> the specified
Index: src/backend/regex/regexec.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/regex/regexec.c,v
retrieving revision 1.24
diff -c -c -r1.24 regexec.c
*** src/backend/regex/regexec.c	29 Nov 2003 19:51:55 -0000	1.24
--- src/backend/regex/regexec.c	10 Jul 2005 04:52:44 -0000
***************
*** 110,115 ****
--- 110,116 ----
  	regmatch_t *pmatch;
  	rm_detail_t *details;
  	chr		   *start;			/* start of string */
+ 	chr		   *search_start;	/* search start of string */
  	chr		   *stop;			/* just past end of string */
  	int			err;			/* error code if any (0 none) */
  	regoff_t   *mem;			/* memory vector for backtracking */
***************
*** 168,173 ****
--- 169,175 ----
  pg_regexec(regex_t *re,
  		   const chr *string,
  		   size_t len,
+ 		   size_t search_start,
  		   rm_detail_t *details,
  		   size_t nmatch,
  		   regmatch_t pmatch[],
***************
*** 219,224 ****
--- 221,227 ----
  		v->pmatch = pmatch;
  	v->details = details;
  	v->start = (chr *) string;
+ 	v->search_start = (chr *) string + search_start;
  	v->stop = (chr *) string + len;
  	v->err = 0;
  	if (backref)
***************
*** 288,294 ****
  	NOERR();
  	MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
  	cold = NULL;
! 	close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL);
  	freedfa(s);
  	NOERR();
  	if (v->g->cflags & REG_EXPECT)
--- 291,298 ----
  	NOERR();
  	MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
  	cold = NULL;
! 	close = shortest(v, s, v->search_start, v->search_start, v->stop,
! 					 &cold, (int *) NULL);
  	freedfa(s);
  	NOERR();
  	if (v->g->cflags & REG_EXPECT)
***************
*** 415,421 ****
  
  	assert(d != NULL && s != NULL);
  	cold = NULL;
! 	close = v->start;
  	do
  	{
  		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
--- 419,425 ----
  
  	assert(d != NULL && s != NULL);
  	cold = NULL;
! 	close = v->search_start;
  	do
  	{
  		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
Index: src/backend/utils/adt/regexp.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v
retrieving revision 1.56
diff -c -c -r1.56 regexp.c
*** src/backend/utils/adt/regexp.c	31 Dec 2004 22:01:22 -0000	1.56
--- src/backend/utils/adt/regexp.c	10 Jul 2005 04:52:45 -0000
***************
*** 81,118 ****
  
  
  /*
!  * RE_compile_and_execute - compile and execute a RE, caching if possible
   *
!  * Returns TRUE on match, FALSE on no match
   *
!  *	text_re --- the pattern, expressed as an *untoasted* TEXT object
!  *	dat --- the data to match against (need not be null-terminated)
!  *	dat_len --- the length of the data string
!  *	cflags --- compile options for the pattern
!  *	nmatch, pmatch	--- optional return area for match details
   *
!  * Both pattern and data are given in the database encoding.  We internally
!  * convert to array of pg_wchar which is what Spencer's regex package wants.
   */
! static bool
! RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
! 					   int cflags, int nmatch, regmatch_t *pmatch)
  {
  	int			text_re_len = VARSIZE(text_re);
- 	pg_wchar   *data;
- 	size_t		data_len;
  	pg_wchar   *pattern;
  	size_t		pattern_len;
  	int			i;
  	int			regcomp_result;
- 	int			regexec_result;
  	cached_re_str re_temp;
  	char		errMsg[100];
  
- 	/* Convert data string to wide characters */
- 	data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
- 	data_len = pg_mb2wchar_with_len(dat, data, dat_len);
- 
  	/*
  	 * Look for a match among previously compiled REs.	Since the data
  	 * structure is self-organizing with most-used entries at the front,
--- 81,107 ----
  
  
  /*
!  * RE_compile_and_cache - compile a RE, caching if possible
   *
!  * Returns regex_t
   *
!  *  text_re --- the pattern, expressed as an *untoasted* TEXT object
!  *  cflags --- compile options for the pattern
   *
!  * Pattern is given in the database encoding.  We internally convert to
!  * array of pg_wchar which is what Spencer's regex package wants.
   */
! static regex_t
! RE_compile_and_cache(text *text_re, int cflags)
  {
  	int			text_re_len = VARSIZE(text_re);
  	pg_wchar   *pattern;
  	size_t		pattern_len;
  	int			i;
  	int			regcomp_result;
  	cached_re_str re_temp;
  	char		errMsg[100];
  
  	/*
  	 * Look for a match among previously compiled REs.	Since the data
  	 * structure is self-organizing with most-used entries at the front,
***************
*** 134,161 ****
  				re_array[0] = re_temp;
  			}
  
! 			/* Perform RE match and return result */
! 			regexec_result = pg_regexec(&re_array[0].cre_re,
! 										data,
! 										data_len,
! 										NULL,	/* no details */
! 										nmatch,
! 										pmatch,
! 										0);
! 
! 			pfree(data);
! 
! 			if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
! 			{
! 				/* re failed??? */
! 				pg_regerror(regexec_result, &re_array[0].cre_re,
! 							errMsg, sizeof(errMsg));
! 				ereport(ERROR,
! 						(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
! 						 errmsg("regular expression failed: %s", errMsg)));
! 			}
! 
! 			return (regexec_result == REG_OKAY);
  		}
  	}
  
--- 123,129 ----
  				re_array[0] = re_temp;
  			}
  
! 			return re_array[0].cre_re;
  		}
  	}
  
***************
*** 220,229 ****
--- 188,232 ----
  	re_array[0] = re_temp;
  	num_res++;
  
+ 	return re_array[0].cre_re;
+ }
+ 
+ /*
+  * RE_compile_and_execute - compile and execute a RE
+  *
+  * Returns TRUE on match, FALSE on no match
+  *
+  *	text_re --- the pattern, expressed as an *untoasted* TEXT object
+  *	dat --- the data to match against (need not be null-terminated)
+  *	dat_len --- the length of the data string
+  *	cflags --- compile options for the pattern
+  *	nmatch, pmatch	--- optional return area for match details
+  *
+  * Both pattern and data are given in the database encoding.  We internally
+  * convert to array of pg_wchar which is what Spencer's regex package wants.
+  */
+ static bool
+ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
+ 					   int cflags, int nmatch, regmatch_t *pmatch)
+ {
+ 	pg_wchar   *data;
+ 	size_t		data_len;
+ 	int			regexec_result;
+ 	regex_t		re;
+ 	char        errMsg[100];
+ 
+ 	/* Convert data string to wide characters */
+ 	data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
+ 	data_len = pg_mb2wchar_with_len(dat, data, dat_len);
+ 
+ 	/* Compile RE */
+ 	re = RE_compile_and_cache(text_re, cflags);
+ 
  	/* Perform RE match and return result */
  	regexec_result = pg_regexec(&re_array[0].cre_re,
  								data,
  								data_len,
+ 								0,
  								NULL,	/* no details */
  								nmatch,
  								pmatch,
***************
*** 428,442 ****
  			eo = pmatch[0].rm_eo;
  		}
  
! 		return (DirectFunctionCall3(text_substr,
  									PointerGetDatum(s),
  									Int32GetDatum(so + 1),
! 									Int32GetDatum(eo - so)));
  	}
  
  	PG_RETURN_NULL();
  }
  
  /* similar_escape()
   * Convert a SQL99 regexp pattern to POSIX style, so it can be used by
   * our regexp engine.
--- 431,519 ----
  			eo = pmatch[0].rm_eo;
  		}
  
! 		return DirectFunctionCall3(text_substr,
  									PointerGetDatum(s),
  									Int32GetDatum(so + 1),
! 									Int32GetDatum(eo - so));
  	}
  
  	PG_RETURN_NULL();
  }
  
+ /*
+  * textregexreplace_noopt()
+  *      Return a replace string matched by a regular expression.
+  *		This function is a version that doesn't specify the option of
+  *		textregexreplace. This is case sensitive, replace the first
+  *		instance only.
+  */
+ Datum
+ textregexreplace_noopt(PG_FUNCTION_ARGS)
+ {
+ 	text	   *s = PG_GETARG_TEXT_P(0);
+ 	text	   *p = PG_GETARG_TEXT_P(1);
+ 	text	   *r = PG_GETARG_TEXT_P(2);
+ 	regex_t		re;
+ 
+ 	re = RE_compile_and_cache(p, regex_flavor);
+ 
+ 	return DirectFunctionCall4(replace_text_regexp,
+ 								PointerGetDatum(s),
+ 								PointerGetDatum(&re),
+ 								PointerGetDatum(r),
+ 								BoolGetDatum(false));
+ }
+ 
+ /*
+  * textregexreplace()
+  *      Return a replace string matched by a regular expression.
+  */
+ Datum
+ textregexreplace(PG_FUNCTION_ARGS)
+ {
+ 	text	   *s = PG_GETARG_TEXT_P(0);
+ 	text	   *p = PG_GETARG_TEXT_P(1);
+ 	text	   *r = PG_GETARG_TEXT_P(2);
+ 	text	   *opt = PG_GETARG_TEXT_P(3);
+ 	char	   *opt_p = VARDATA(opt);
+ 	int			opt_len = (VARSIZE(opt) - VARHDRSZ);
+ 	int			i;
+ 	bool		global = false;
+ 	bool		ignorecase = false;
+ 	regex_t		re;
+ 
+ 	/* parse options */
+ 	for (i = 0; i < opt_len; i++)
+ 	{
+ 		switch (opt_p[i])
+ 		{
+ 			case 'i':
+ 				ignorecase = true;
+ 				break;
+ 			case 'g':
+ 				global = true;
+ 				break;
+ 			default:
+ 				ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 					 errmsg("invalid option of regexp_replace: %c",
+ 					 opt_p[i])));
+ 				break;
+ 		}
+ 	}
+ 
+ 	if (ignorecase)
+ 		re = RE_compile_and_cache(p, regex_flavor | REG_ICASE);
+ 	else
+ 		re = RE_compile_and_cache(p, regex_flavor);
+ 
+ 	return DirectFunctionCall4(replace_text_regexp,
+ 								PointerGetDatum(s),
+ 								PointerGetDatum(&re),
+ 								PointerGetDatum(r),
+ 								BoolGetDatum(global));
+ }
+ 
  /* similar_escape()
   * Convert a SQL99 regexp pattern to POSIX style, so it can be used by
   * our regexp engine.
Index: src/backend/utils/adt/varlena.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v
retrieving revision 1.126
diff -c -c -r1.126 varlena.c
*** src/backend/utils/adt/varlena.c	7 Jul 2005 04:36:08 -0000	1.126
--- src/backend/utils/adt/varlena.c	10 Jul 2005 04:52:46 -0000
***************
*** 28,33 ****
--- 28,34 ----
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/pg_locale.h"
+ #include "regex/regex.h"
  
  
  typedef struct varlena unknown;
***************
*** 1994,1999 ****
--- 1995,2219 ----
  }
  
  /*
+  * check_replace_text_has_escape_char
+  * check whether replace_text has escape char. 
+  */
+ static bool
+ check_replace_text_has_escape_char(const text *replace_text)
+ {
+ 	const char *p = VARDATA(replace_text);
+ 	const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
+ 
+ 	if (pg_database_encoding_max_length() == 1)
+ 	{
+ 		for (; p < p_end; p++)
+ 			if (*p == '\\') return true;
+ 	}
+ 	else
+ 	{
+ 		for (; p < p_end; p += pg_mblen(p))
+ 			if (*p == '\\') return true;
+ 	}
+ 
+ 	return false;
+ }
+ 
+ /*
+  * appendStringInfoRegexpSubstr
+  * append string by using back references of regexp.
+  */
+ static void
+ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
+     regmatch_t *pmatch, text *src_text)
+ {
+ 	const char *p = VARDATA(replace_text);
+ 	const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
+ 
+ 	int			eml = pg_database_encoding_max_length();
+ 
+ 	int			substr_start = 1;
+ 	int			ch_cnt;
+ 
+ 	int			so;
+ 	int			eo;
+ 
+ 	while (1)
+ 	{
+ 		/* Find escape char. */
+ 		ch_cnt = 0;
+ 		if (eml == 1)
+ 		{
+ 			for (; p < p_end && *p != '\\'; p++)
+ 				ch_cnt++;
+ 		}
+ 		else
+ 		{
+ 			for (; p < p_end && *p != '\\'; p += pg_mblen(p))
+ 				ch_cnt++;
+ 		}
+ 
+ 		/*
+ 		 * Copy the text when there is a text in the left of escape char
+ 		 * or escape char is not found.
+ 		 */
+ 		if (ch_cnt)
+ 		{
+ 			text *append_text = text_substring(PointerGetDatum(replace_text),
+ 									  substr_start, ch_cnt, false);
+ 			appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
+ 			pfree(append_text);
+ 		}
+ 		substr_start += ch_cnt + 1;
+ 
+ 		if (p >= p_end) /* When escape char is not found. */
+ 			break;
+ 
+ 		/* See the next character of escape char. */
+ 		p++;
+ 		so = eo = -1;
+ 
+ 		if (*p >= '1' && *p <= '9')
+ 		{
+ 			/* Use the back reference of regexp. */
+ 			int		idx = *p - '0';
+ 			so = pmatch[idx].rm_so;
+ 			eo = pmatch[idx].rm_eo;
+ 			p++;
+ 			substr_start++;
+ 		}
+ 		else if (*p == '&')
+ 		{
+ 			/* Use the entire matched string. */
+ 			so = pmatch[0].rm_so;
+ 			eo = pmatch[0].rm_eo;
+ 			p++;
+ 			substr_start++;
+ 		}
+ 
+ 		if (so != -1 && eo != -1)
+ 		{
+ 			/* Copy the text that is back reference of regexp. */
+ 			text *append_text = text_substring(PointerGetDatum(src_text),
+ 									  so + 1, (eo - so), false);
+ 			appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
+ 			pfree(append_text);
+ 		}
+ 	}
+ }
+ 
+ #define REGEXP_REPLACE_BACKREF_CNT		10
+ 
+ /*
+  * replace_text_regexp
+  * replace text that matches to regexp in src_text to replace_text.
+  */
+ Datum
+ replace_text_regexp(PG_FUNCTION_ARGS)
+ {
+ 	text	   *ret_text;
+ 	text	   *src_text = PG_GETARG_TEXT_P(0);
+ 	int			src_text_len = VARSIZE(src_text) - VARHDRSZ;
+ 	regex_t	   *re = (regex_t *)PG_GETARG_POINTER(1);
+ 	text	   *replace_text = PG_GETARG_TEXT_P(2);
+ 	bool		global = PG_GETARG_BOOL(3);
+ 	StringInfo	str = makeStringInfo();
+ 	int			regexec_result;
+ 	regmatch_t	pmatch[REGEXP_REPLACE_BACKREF_CNT];
+ 	pg_wchar   *data;
+ 	size_t		data_len;
+ 	int			search_start;
+ 	int			data_pos;
+ 	bool		have_escape;
+ 
+ 	/* Convert data string to wide characters. */
+ 	data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
+ 	data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
+ 
+ 	/* Check whether replace_text has escape char. */
+ 	have_escape = check_replace_text_has_escape_char(replace_text);
+ 
+ 	for (search_start = data_pos = 0; search_start <= data_len;)
+ 	{
+ 		regexec_result = pg_regexec(re,
+ 									data,
+ 									data_len,
+ 									search_start,
+ 									NULL,   /* no details */
+ 									REGEXP_REPLACE_BACKREF_CNT,
+ 									pmatch,
+ 									0);
+ 
+ 		if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
+ 		{
+ 			char	errMsg[100];
+ 
+ 			/* re failed??? */
+ 			pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
+ 			ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ 				 errmsg("regular expression failed: %s", errMsg)));
+ 		}
+ 
+ 		if (regexec_result == REG_NOMATCH)
+ 			break;
+ 
+         /*
+          * Copy the text when there is a text in the left of matched position.
+          */
+ 		if (pmatch[0].rm_so - data_pos > 0)
+ 		{
+ 			text *left_text = text_substring(PointerGetDatum(src_text),
+ 									   data_pos + 1,
+ 									   pmatch[0].rm_so - data_pos, false);
+ 			appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
+ 			pfree(left_text);
+ 		}
+ 
+ 		/*
+ 		 * Copy the replace_text. Process back references when the
+ 		 * replace_text has escape characters. 
+ 		 */
+ 		if (have_escape)
+ 			appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
+ 		else
+ 			appendStringInfoString(str, PG_TEXT_GET_STR(replace_text));
+ 
+ 		search_start = data_pos = pmatch[0].rm_eo;
+ 
+ 		/*
+ 		 * When global option is off, replace the first instance only.
+ 		 */
+ 		if (!global)
+ 			break;
+ 
+ 		/*
+ 		 * Search from next character when the matching text is zero width.
+ 		 */
+ 		if (pmatch[0].rm_so == pmatch[0].rm_eo)
+ 			search_start++;
+ 	}
+ 
+ 	/*
+      * Copy the text when there is a text at the right of last matched
+ 	 * or regexp is not matched.
+ 	 */
+ 	if (data_pos < data_len)
+ 	{
+ 		text *right_text = text_substring(PointerGetDatum(src_text),
+ 								   data_pos + 1, -1, true);
+ 		appendStringInfoString(str, PG_TEXT_GET_STR(right_text));
+ 		pfree(right_text);
+ 	}
+ 
+ 	ret_text = PG_STR_GET_TEXT(str->data);
+ 	pfree(str->data);
+ 	pfree(str);
+ 	pfree(data);
+ 
+ 	PG_RETURN_TEXT_P(ret_text);
+ }
+ 
+ /*
   * split_text
   * parse input string
   * return ord item (1 based)
Index: src/include/catalog/catversion.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/catversion.h,v
retrieving revision 1.286
diff -c -c -r1.286 catversion.h
*** src/include/catalog/catversion.h	8 Jul 2005 04:12:26 -0000	1.286
--- src/include/catalog/catversion.h	10 Jul 2005 04:52:46 -0000
***************
*** 53,58 ****
   */
  
  /*							yyyymmddN */
! #define CATALOG_VERSION_NO	200507081
  
  #endif
--- 53,58 ----
   */
  
  /*							yyyymmddN */
! #define CATALOG_VERSION_NO	200507101
  
  #endif
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.374
diff -c -c -r1.374 pg_proc.h
*** src/include/catalog/pg_proc.h	6 Jul 2005 19:02:53 -0000	1.374
--- src/include/catalog/pg_proc.h	10 Jul 2005 04:52:51 -0000
***************
*** 2207,2212 ****
--- 2207,2216 ----
  DESCR("return portion of string");
  DATA(insert OID =  2087 ( replace	   PGNSP PGUID 12 f f t f i 3 25 "25 25 25" _null_ _null_ _null_	replace_text - _null_ ));
  DESCR("replace all occurrences of old_substr with new_substr in string");
+ DATA(insert OID =  2284 ( regexp_replace	   PGNSP PGUID 12 f f t f i 3 25 "25 25 25" _null_ _null_ _null_	textregexreplace_noopt - _null_ ));
+ DESCR("replace text using regexp");
+ DATA(insert OID =  2285 ( regexp_replace	   PGNSP PGUID 12 f f t f i 4 25 "25 25 25 25" _null_ _null_ _null_	textregexreplace - _null_ ));
+ DESCR("replace text using regexp");
  DATA(insert OID =  2088 ( split_part   PGNSP PGUID 12 f f t f i 3 25 "25 25 23" _null_ _null_ _null_	split_text - _null_ ));
  DESCR("split string by field_sep and return field_num");
  DATA(insert OID =  2089 ( to_hex	   PGNSP PGUID 12 f f t f i 1 25 "23" _null_ _null_ _null_  to_hex32 - _null_ ));
Index: src/include/regex/regex.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/regex/regex.h,v
retrieving revision 1.26
diff -c -c -r1.26 regex.h
*** src/include/regex/regex.h	29 Nov 2003 22:41:10 -0000	1.26
--- src/include/regex/regex.h	10 Jul 2005 04:52:51 -0000
***************
*** 163,169 ****
   * the prototypes for exported functions
   */
  extern int	pg_regcomp(regex_t *, const pg_wchar *, size_t, int);
! extern int	pg_regexec(regex_t *, const pg_wchar *, size_t, rm_detail_t *, size_t, regmatch_t[], int);
  extern void pg_regfree(regex_t *);
  extern size_t pg_regerror(int, const regex_t *, char *, size_t);
  
--- 163,169 ----
   * the prototypes for exported functions
   */
  extern int	pg_regcomp(regex_t *, const pg_wchar *, size_t, int);
! extern int	pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);
  extern void pg_regfree(regex_t *);
  extern size_t pg_regerror(int, const regex_t *, char *, size_t);
  
Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.259
diff -c -c -r1.259 builtins.h
*** src/include/utils/builtins.h	6 Jul 2005 19:02:54 -0000	1.259
--- src/include/utils/builtins.h	10 Jul 2005 04:52:52 -0000
***************
*** 429,434 ****
--- 429,436 ----
  extern Datum texticregexeq(PG_FUNCTION_ARGS);
  extern Datum texticregexne(PG_FUNCTION_ARGS);
  extern Datum textregexsubstr(PG_FUNCTION_ARGS);
+ extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS);
+ extern Datum textregexreplace(PG_FUNCTION_ARGS);
  extern Datum similar_escape(PG_FUNCTION_ARGS);
  
  /* regproc.c */
***************
*** 566,571 ****
--- 568,574 ----
  extern bool SplitIdentifierString(char *rawstring, char separator,
  					  List **namelist);
  extern Datum replace_text(PG_FUNCTION_ARGS);
+ extern Datum replace_text_regexp(PG_FUNCTION_ARGS);
  extern Datum split_text(PG_FUNCTION_ARGS);
  extern Datum text_to_array(PG_FUNCTION_ARGS);
  extern Datum array_to_text(PG_FUNCTION_ARGS);
Index: src/test/regress/expected/strings.out
===================================================================
RCS file: /cvsroot/pgsql/src/test/regress/expected/strings.out,v
retrieving revision 1.25
diff -c -c -r1.25 strings.out
*** src/test/regress/expected/strings.out	20 May 2005 01:29:56 -0000	1.25
--- src/test/regress/expected/strings.out	10 Jul 2005 04:52:52 -0000
***************
*** 192,197 ****
--- 192,225 ----
   cde
  (1 row)
  
+ -- PostgreSQL extension to allow using back reference in replace string;
+ SELECT regexp_replace('1112223333', '(\\d{3})(\\d{3})(\\d{4})', '(\\1) \\2-\\3');
+  regexp_replace 
+ ----------------
+  (111) 222-3333
+ (1 row)
+ 
+ SELECT regexp_replace('AAA   BBB   CCC   ', '\\s+', ' ', 'g');
+  regexp_replace 
+ ----------------
+  AAA BBB CCC 
+ (1 row)
+ 
+ SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
+  regexp_replace 
+ ----------------
+  ZAAAZ
+ (1 row)
+ 
+ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
+  regexp_replace 
+ ----------------
+  Z Z
+ (1 row)
+ 
+ -- invalid option of REGEXP_REPLACE
+ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
+ ERROR:  invalid option of regexp_replace: z
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";
   4 
Index: src/test/regress/sql/strings.sql
===================================================================
RCS file: /cvsroot/pgsql/src/test/regress/sql/strings.sql,v
retrieving revision 1.16
diff -c -c -r1.16 strings.sql
*** src/test/regress/sql/strings.sql	20 May 2005 01:29:56 -0000	1.16
--- src/test/regress/sql/strings.sql	10 Jul 2005 04:52:53 -0000
***************
*** 80,85 ****
--- 80,92 ----
  -- With a parenthesized subexpression, return only what matches the subexpr
  SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
  
+ -- PostgreSQL extension to allow using back reference in replace string;
+ SELECT regexp_replace('1112223333', '(\\d{3})(\\d{3})(\\d{4})', '(\\1) \\2-\\3');
+ SELECT regexp_replace('AAA   BBB   CCC   ', '\\s+', ' ', 'g');
+ SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
+ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
+ -- invalid option of REGEXP_REPLACE
+ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
  
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";


Home | Main Index | Thread Index

Privacy Policy | PostgreSQL Archives hosted by Command Prompt, Inc. | Designed by tinysofa
Copyright © 1996 – 2008 PostgreSQL Global Development Group