CREATE EXTENSION hstore; CREATE TABLE lists ( id VARCHAR(64) PRIMARY KEY, language VARCHAR(32) NOT NULL, msg_first TIMESTAMP, msg_last TIMESTAMP, msg_count INT NOT NULL DEFAULT 0, CONSTRAINT date_check CHECK (msg_last >= msg_first), CONSTRAINT count_check CHECK (msg_count >= 0) ); CREATE TABLE messages ( id SERIAL PRIMARY KEY, parent_id INT REFERENCES messages(id), thread_id INT, level INT, hash_id VARCHAR(32) NOT NULL UNIQUE, list VARCHAR(32) NOT NULL REFERENCES lists(id), message_id VARCHAR(200), in_reply_to TEXT[], refs TEXT[], sent TIMESTAMP, subject TEXT, author TEXT, body_plain TEXT, body_tsvector tsvector, subject_tsvector tsvector, headers HSTORE, raw_message TEXT ); CREATE TABLE message_replies ( id INT REFERENCES messages(id), parent_id INT REFERENCES messages(id), PRIMARY KEY (id, parent_id) ); CREATE OR REPLACE FUNCTION message_update_tsvector() RETURNS trigger AS $$ BEGIN IF TG_OP = 'INSERT' THEN NEW.body_tsvector := to_tsvector('english', strip_replies(NEW.body_plain)); NEW.subject_tsvector := to_tsvector('english', NEW.subject); ELSIF TG_OP = 'UPDATE' THEN IF NEW.body_plain IS DISTINCT FROM OLD.body_plain THEN NEW.body_tsvector := to_tsvector('english', strip_replies(NEW.body_plain)); END IF; IF NEW.subject IS DISTINCT FROM OLD.subject THEN NEW.subject_tsvector := to_tsvector('english', NEW.subject); END IF; END IF; RETURN NEW; END; $$ LANGUAGE plpgsql; -- TODO detect sudden changes in subject, which usually mean someone started -- a new thread by 'reply-to' an existing message CREATE OR REPLACE FUNCTION message_update_thread() RETURNS trigger AS $$ DECLARE v_min_tid INT; v_max_tid INT; v_level INT; v_id INT; v_subject TEXT; v_len INT; BEGIN NEW.hash_id := md5(NEW.raw_message); IF (NEW.in_reply_to IS NOT NULL) THEN SELECT MIN(id), MIN(thread_id), MAX(thread_id), MIN(level) INTO v_id, v_min_tid, v_max_tid, v_level FROM messages WHERE list = NEW.list AND message_id = ANY(NEW.in_reply_to); IF v_min_tid IS NOT NULL THEN NEW.parent_id := v_id; NEW.thread_id := v_min_tid; NEW.level := v_level+1; END IF; IF v_min_tid IS DISTINCT FROM v_max_tid THEN RAISE WARNING 'min = %, max = %', v_min_tid, v_max_tid; END IF; END IF; IF (NEW.thread_id IS NULL) AND (NEW.refs IS NOT NULL) THEN SELECT MIN(id), MIN(thread_id), MAX(thread_id), MIN(level) INTO v_id, v_min_tid, v_max_tid, v_level FROM messages WHERE list = NEW.list AND message_id = ANY(NEW.refs); IF v_min_tid IS NOT NULL THEN NEW.parent_id := v_id; NEW.thread_id := v_min_tid; NEW.level := v_level+1; END IF; IF v_min_tid IS DISTINCT FROM v_max_tid THEN RAISE WARNING 'min = %, max = %', v_min_tid, v_max_tid; END IF; END IF; IF (NEW.thread_id IS NULL) AND (NEW.subject IS NOT NULL) AND (NEW.subject ~* '[Rr][Ee]:.*') THEN v_subject := NEW.subject; v_len := length(v_subject); LOOP v_subject := trim(regexp_replace(v_subject, '^[Rr][Ee]:\s?', '')); EXIT WHEN length(v_subject) = v_len; v_len := length(v_subject); END LOOP; SELECT MAX(id) INTO v_max_tid FROM messages WHERE subject = v_subject AND list = NEW.list AND NOT (subject ~* '[Rr][Ee]:.*'); IF v_max_tid IS NOT NULL THEN SELECT level INTO v_level FROM messages WHERE id = v_max_tid; NEW.thread_id := v_max_tid; NEW.level := v_level + 1; END IF; END IF; IF (NEW.thread_id IS NULL) THEN NEW.thread_id := NEW.id; NEW.level := 0; END IF; RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION message_fill_replies() RETURNS trigger AS $$ DECLARE v_min_id INT; v_max_id INT; BEGIN INSERT INTO message_replies SELECT NEW.id, id FROM messages WHERE list = NEW.list AND message_id = ANY(NEW.in_reply_to); RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER update_vectors BEFORE INSERT OR UPDATE ON messages FOR EACH ROW EXECUTE PROCEDURE message_update_tsvector(); CREATE TRIGGER update_thread BEFORE INSERT ON messages FOR EACH ROW EXECUTE PROCEDURE message_update_thread(); CREATE TRIGGER fill_replies AFTER INSERT ON messages FOR EACH ROW EXECUTE PROCEDURE message_fill_replies(); CREATE INDEX message_body_idx on messages using gin(body_tsvector); CREATE INDEX message_subject_idx on messages using gin(subject_tsvector); CREATE INDEX message_send_idx on messages(sent); CREATE INDEX message_id_idx on messages(message_id); CREATE INDEX message_subj_idx on messages(subject) WHERE NOT (subject ~* '[Rr][Ee]:.*'); CREATE INDEX message_thread_idx on messages(thread_id); -- rewrap e-mail body to given line length CREATE OR REPLACE FUNCTION rewrap(p_text TEXT, p_len INT) RETURNS text AS $$ DECLARE v_lines TEXT[]; v_words TEXT[]; v_line TEXT; v_word TEXT; v_result TEXT; line_idx INT; word_idx INT; v_len INT; BEGIN -- handle tabs (replace with 4 spaces) p_text := regexp_replace(p_text, '\t', ' ', 'g'); v_lines := regexp_split_to_array(p_text, '\n'); v_result := ''; IF v_lines IS NULL THEN RETURN NULL; END IF; FOR line_idx IN array_lower(v_lines, 1) .. array_upper(v_lines, 1) LOOP v_line := v_lines[line_idx]; IF length(v_line) < p_len THEN v_result := v_result || trim(trailing ' ' from v_line) || E'\n'; v_line := ''; ELSE v_words := regexp_split_to_array(v_line, ' '); v_line := ''; FOR word_idx IN array_lower(v_words, 1) .. array_upper(v_words, 1) LOOP v_word := v_words[word_idx]; IF length(v_line) + length(v_word) + 1 <= p_len THEN v_line := v_line || v_word || ' '; ELSIF length(v_word) > p_len THEN v_len := p_len - length(v_line); WHILE length(v_word) > 0 LOOP v_line := v_line || substr(v_word, 0, v_len); v_result := v_result || trim(trailing ' ' from v_line) || E'\n'; v_line := ''; v_word := substr(v_word, v_len); END LOOP; ELSE v_result := v_result || trim(trailing ' ' from v_line) || E'\n'; v_line := v_words[word_idx] || ' '; -- FIXME handle case when the line is a quotation (prepend >) END IF; END LOOP; END IF; IF length(v_line) > 0 THEN v_result := v_result || trim(trailing ' ' from v_line) || E'\n'; END IF; END LOOP; RETURN v_result; END; $$ LANGUAGE plpgsql; -- extracts e-mail address from the "From:" header CREATE OR REPLACE FUNCTION getmail(p_text TEXT) RETURNS text AS $$ DECLARE v_email TEXT; BEGIN IF p_text LIKE '%<%>%' THEN v_email := SUBSTRING(p_text,'<(.*)>'); ELSE v_email := p_text; END IF; RETURN lower(trim(v_email)); END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION strip_replies(p_text TEXT) RETURNS text AS $$ DECLARE v_result TEXT; v_lines TEXT[]; v_line TEXT; line_idx INT; BEGIN v_lines := regexp_split_to_array(p_text, '\n'); v_result := ''; IF v_lines IS NULL THEN RETURN NULL; END IF; FOR line_idx IN array_lower(v_lines, 1) .. array_upper(v_lines, 1) LOOP v_line := v_lines[line_idx]; IF NOT v_line ~* '^>[\s>]*' THEN v_result := v_result || v_line || E'\n'; END IF; END LOOP; RETURN v_result; END; $$ LANGUAGE plpgsql; INSERT INTO lists (id, language) VALUES ('pgsql-general', 'english'); INSERT INTO lists (id, language) VALUES ('pgsql-hackers', 'english'); INSERT INTO lists (id, language) VALUES ('pgsql-novice', 'english'); INSERT INTO lists (id, language) VALUES ('pgsql-sql', 'english'); INSERT INTO lists (id, language) VALUES ('pgsql-bugs', 'english'); INSERT INTO lists (id, language) VALUES ('pgadmin-hackers', 'english'); INSERT INTO lists (id, language) VALUES ('pgsql-jdbc', 'english'); INSERT INTO lists (id, language) VALUES ('pgsql-odbc', 'english'); INSERT INTO lists (id, language) VALUES ('pgsql-patches', 'english'); INSERT INTO lists (id, language) VALUES ('pgsql-advocacy', 'english'); INSERT INTO lists (id, language) VALUES ('pgsql-announce', 'english');