Selecting and deleting duplicate rows

Lists: pgsql-sql
From: Andrew Merrill <andrew(at)compclass(dot)com>
To: pgsql-sql(at)hub(dot)org
Subject: regexp strangeness
Date: 1999-03-30 17:44:09
Message-ID: 37010D69.6D1868ED@compclass.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-sql

I'm using PostgreSQL 6.4.2 regexps, and they don't seem to work the way
I'm used to (Perl).

Here's the records in a simple table:

select * from t5 where name ~ 'bar';
name
--------
bar
abar
xbar
not abar
(4 rows)

I'd like to match the ones that begin with 'bar' or 'abar'.

select * from t5 where name ~ '^a?bar';
name
----
abar
(1 row)

select * from t5 where name ~ '^bar|^abar';
name
----
(0 rows)

Neither of these works. I looked at
pgsql/src/backend/regex/re_format.7, and it claims that ^, ?, and | all
work as expected. The type of name is varchar(30), in case that is
relevant.

Am I misunderstanding PostgreSQL's use of regexps, or is this a bug
others have seen?

Thanks for your help.

Andrew Merrill


From: Clark Evans <clark(dot)evans(at)manhattanproject(dot)com>
To: pgsql-sql(at)hub(dot)org
Subject: Selecting and deleting duplicate rows
Date: 1999-03-30 20:33:04
Message-ID: 37013500.DFF0A64A@manhattanproject.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Lists: pgsql-sql

This is a question I've seen a few times, and
had to research, so I figured I'd share the
answer.

-------------------------------------------------

drop table test;
--
create table test ( a text, b text );
-- unique values
insert into test values ( 'x', 'y');
insert into test values ( 'x', 'x');
insert into test values ( 'y', 'y' );
insert into test values ( 'y', 'x' );
-- duplicate values
insert into test values ( 'x', 'y');
insert into test values ( 'x', 'x');
insert into test values ( 'y', 'y' );
insert into test values ( 'y', 'x' );
-- one more double duplicate
insert into test values ( 'x', 'y');
--
select oid, a, b from test;
--
-- select duplicate rows
--
select o.oid, o.a, o.b from test o
where exists ( select 'x'
from test i
where i.a = o.a
and i.b = o.b
and i.oid < o.oid
);
--
-- delete duplicate rows
--
-- Note: PostgreSQL dosn't support aliases on
-- the table mentioned in the from clause
-- of a delete.
--

delete from test
where exists ( select 'x'
from test i
where i.a = test.a
and i.b = test.b
and i.oid < test.oid
);
--
-- Let's see if it worked.
--

select oid, a, b from test;

--
-- Delete duplicates with respect to a only, ignoring
-- the value in b. Note, the first deletion leaves the
-- first oid with the unique values and removes subsequent
-- ones, in this delete we reverse the direction of the <
-- to save the last oid, and remove the previous ones.
--

delete from test
where exists ( select 'x'
from test i
where i.a = test.a
and i.oid > test.oid
);

--
-- Let's see if it worked.
--

select oid, a, b from test;