|
| 1 | +-- Keep only persons where at least one tag is reachable (Not 3) -- |
| 2 | +CREATE OR REPLACE FUNCTION private.refine_persons(userid uuid) |
| 3 | +RETURNS void |
| 4 | +LANGUAGE plpgsql |
| 5 | +SET search_path = '' |
| 6 | +AS $$ |
| 7 | +BEGIN |
| 8 | + CREATE TEMP TABLE user_points_of_contact AS |
| 9 | + SELECT |
| 10 | + poc.person_email, |
| 11 | + poc.name, |
| 12 | + poc.plus_address, |
| 13 | + poc.message_id, |
| 14 | + poc."from", |
| 15 | + poc."to", |
| 16 | + poc.bcc, |
| 17 | + poc.cc, |
| 18 | + poc.reply_to, |
| 19 | + m.date, |
| 20 | + m.conversation |
| 21 | + FROM private.pointsofcontact poc |
| 22 | + JOIN private.messages m |
| 23 | + ON poc.message_id = m.message_id |
| 24 | + AND poc.user_id = m.user_id |
| 25 | + WHERE poc.user_id = userid; |
| 26 | + |
| 27 | + CREATE TEMP TABLE grouped_tags AS |
| 28 | + SELECT |
| 29 | + person_email AS email, |
| 30 | + array_agg(name) AS tags |
| 31 | + FROM private.tags tg |
| 32 | + WHERE |
| 33 | + tg.user_id = userid |
| 34 | + -- email address must be reachable |
| 35 | + AND tg.source = 'refined#email_address' |
| 36 | + AND tg.reachable IN (1, 2) |
| 37 | + -- at least one reachable message_header must exist |
| 38 | + AND EXISTS ( |
| 39 | + SELECT 1 |
| 40 | + FROM private.tags mh |
| 41 | + WHERE mh.user_id = userid |
| 42 | + AND mh.person_email = tg.person_email |
| 43 | + AND mh.source = 'refined#message_header' |
| 44 | + AND mh.reachable IN (1, 2) |
| 45 | + ) |
| 46 | + GROUP BY person_email; |
| 47 | + |
| 48 | + CREATE TEMP TABLE name_aggregates AS |
| 49 | + SELECT |
| 50 | + upc.person_email, |
| 51 | + upc.name, |
| 52 | + MAX(upc.date) AS recent_date, |
| 53 | + COUNT(*) AS total, |
| 54 | + array_agg(upc.name) OVER (PARTITION BY upc.person_email) AS alternate_name |
| 55 | + FROM user_points_of_contact upc |
| 56 | + WHERE upc.name IS NOT NULL |
| 57 | + GROUP BY upc.person_email, upc.name; |
| 58 | + |
| 59 | + CREATE TEMP TABLE real_names AS |
| 60 | + SELECT DISTINCT |
| 61 | + na.person_email, |
| 62 | + FIRST_VALUE(na.name) OVER ( |
| 63 | + PARTITION BY na.person_email |
| 64 | + ORDER BY na.total DESC, na.recent_date DESC |
| 65 | + ) AS preferred_name, |
| 66 | + na.alternate_name |
| 67 | + FROM name_aggregates na; |
| 68 | + |
| 69 | + CREATE TEMP TABLE email_aggregates AS |
| 70 | + SELECT |
| 71 | + upc.person_email, |
| 72 | + MAX(upc.date) AS recency, |
| 73 | + MIN(upc.date) AS seniority, |
| 74 | + private.get_distinct_or_exclude_from_array( |
| 75 | + array_agg(upc.plus_address)::text[], |
| 76 | + ARRAY[]::text[] |
| 77 | + ) AS alternate_email, |
| 78 | + COUNT(*) AS occurrence, |
| 79 | + COUNT(CASE WHEN upc."from" = true OR upc.reply_to = true THEN 1 END) AS sender, |
| 80 | + COUNT(CASE WHEN upc."to" = true OR upc.bcc = true OR upc.cc = true THEN 1 END) AS recipient, |
| 81 | + COUNT(CASE WHEN upc.conversation = true THEN 1 END) AS conversations, |
| 82 | + COUNT(CASE WHEN upc.conversation = true AND upc."from" = true THEN 1 END) AS replied_conversations |
| 83 | + FROM user_points_of_contact upc |
| 84 | + GROUP BY upc.person_email; |
| 85 | + |
| 86 | + CREATE TEMP TABLE combined_data AS |
| 87 | + SELECT |
| 88 | + ea.*, |
| 89 | + gt.tags as tags, |
| 90 | + pn.preferred_name AS name, |
| 91 | + private.get_distinct_or_exclude_from_array(pn.alternate_name, ARRAY[pn.preferred_name, ea.person_email]) AS alternate_name |
| 92 | + FROM email_aggregates ea |
| 93 | + LEFT JOIN real_names pn ON ea.person_email = pn.person_email |
| 94 | + JOIN grouped_tags gt ON ea.person_email = gt.email; |
| 95 | + |
| 96 | + UPDATE private.persons |
| 97 | + SET |
| 98 | + name = cd.name, |
| 99 | + alternate_name = cd.alternate_name, |
| 100 | + alternate_email = cd.alternate_email |
| 101 | + FROM combined_data cd |
| 102 | + WHERE private.persons.email = cd.person_email; |
| 103 | + |
| 104 | + INSERT INTO private.refinedpersons ( |
| 105 | + user_id, email, occurrence, recency, seniority, |
| 106 | + sender, recipient, conversations, replied_conversations, tags |
| 107 | + ) |
| 108 | + SELECT |
| 109 | + userid, |
| 110 | + cd.person_email, |
| 111 | + cd.occurrence, |
| 112 | + cd.recency, |
| 113 | + cd.seniority, |
| 114 | + cd.sender, |
| 115 | + cd.recipient, |
| 116 | + cd.conversations, |
| 117 | + cd.replied_conversations, |
| 118 | + cd.tags |
| 119 | + FROM combined_data cd |
| 120 | + ON CONFLICT (user_id, email) DO UPDATE |
| 121 | + SET |
| 122 | + occurrence = EXCLUDED.occurrence, |
| 123 | + recency = EXCLUDED.recency, |
| 124 | + seniority = EXCLUDED.seniority, |
| 125 | + sender = EXCLUDED.sender, |
| 126 | + recipient = EXCLUDED.recipient, |
| 127 | + conversations = EXCLUDED.conversations, |
| 128 | + replied_conversations = EXCLUDED.replied_conversations, |
| 129 | + tags = EXCLUDED.tags; |
| 130 | + |
| 131 | + -- Drop temp tables after function execution |
| 132 | + DROP TABLE IF EXISTS user_points_of_contact; |
| 133 | + DROP TABLE IF EXISTS grouped_tags; |
| 134 | + DROP TABLE IF EXISTS name_aggregates; |
| 135 | + DROP TABLE IF EXISTS real_names; |
| 136 | + DROP TABLE IF EXISTS email_aggregates; |
| 137 | + DROP TABLE IF EXISTS combined_data; |
| 138 | + |
| 139 | + -- Clear table messages |
| 140 | + DELETE FROM private.messages m WHERE m.user_id = user_id; |
| 141 | +END; |
| 142 | +$$; |
0 commit comments