Skip to content

Commit edd7374

Browse files
authored
optimize refine_persons function (#1993)
* optimize refine_persons function * change fd to upc ( user points of contacts)
1 parent c77f40c commit edd7374

File tree

1 file changed

+120
-0
lines changed

1 file changed

+120
-0
lines changed
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
DROP FUNCTION private.get_grouped_tags_by_person;
2+
DROP FUNCTION private.refine_persons;
3+
CREATE OR REPLACE FUNCTION private.refine_persons(userid uuid)
4+
RETURNS void
5+
LANGUAGE plpgsql
6+
SET search_path = ''
7+
AS $$
8+
BEGIN
9+
CREATE TEMP TABLE user_points_of_contact AS
10+
SELECT
11+
poc.person_email,
12+
poc.name,
13+
poc.plus_address,
14+
poc.message_id,
15+
poc."from",
16+
poc."to",
17+
poc.bcc,
18+
poc.cc,
19+
poc.reply_to,
20+
m.date,
21+
m.conversation
22+
FROM private.pointsofcontact poc
23+
JOIN private.messages m
24+
ON poc.message_id = m.message_id
25+
AND poc.user_id = m.user_id
26+
WHERE poc.user_id = userid;
27+
28+
CREATE TEMP TABLE grouped_tags AS
29+
SELECT
30+
person_email AS email,
31+
array_agg(name) AS tags
32+
FROM private.tags tg
33+
WHERE
34+
tg.user_id = userid
35+
AND reachable IN (1, 2)
36+
AND person_email NOT IN (
37+
SELECT person_email
38+
FROM private.tags tg
39+
WHERE tg.user_id = userid
40+
GROUP BY person_email
41+
HAVING
42+
MAX(reachable) = 3
43+
OR (count(DISTINCT reachable) = 1 AND MAX(reachable) = 3)
44+
OR (count(DISTINCT reachable) = 2 AND ARRAY[1, 3] <@ array_agg(DISTINCT reachable))
45+
)
46+
GROUP BY person_email;
47+
48+
CREATE TEMP TABLE name_aggregates AS
49+
SELECT
50+
upc.person_email,
51+
upc.name,
52+
MAX(upc.date) AS recent_date,
53+
COUNT(*) AS total,
54+
array_agg(upc.name) OVER (PARTITION BY upc.person_email) AS alternate_name
55+
FROM user_points_of_contact upc
56+
WHERE upc.name IS NOT NULL
57+
GROUP BY upc.person_email, upc.name;
58+
59+
CREATE TEMP TABLE real_names AS
60+
SELECT DISTINCT
61+
na.person_email,
62+
FIRST_VALUE(na.name) OVER (
63+
PARTITION BY na.person_email
64+
ORDER BY na.total DESC, na.recent_date DESC
65+
) AS preferred_name,
66+
na.alternate_name
67+
FROM name_aggregates na;
68+
69+
CREATE TEMP TABLE email_aggregates AS
70+
SELECT
71+
upc.person_email,
72+
MAX(upc.date) AS recency,
73+
MIN(upc.date) AS seniority,
74+
private.get_distinct_or_exclude_from_array(array_agg(upc.plus_address)) AS alternate_email,
75+
COUNT(*) AS occurrence,
76+
COUNT(CASE WHEN upc."from" = true OR upc.reply_to = true THEN 1 END) AS sender,
77+
COUNT(CASE WHEN upc."to" = true OR upc.bcc = true OR upc.cc = true THEN 1 END) AS recipient,
78+
COUNT(CASE WHEN upc.conversation = true THEN 1 END) AS conversations,
79+
COUNT(CASE WHEN upc.conversation = true AND upc."from" = true THEN 1 END) AS replied_conversations
80+
FROM user_points_of_contact upc
81+
GROUP BY upc.person_email;
82+
83+
CREATE TEMP TABLE combined_data AS
84+
SELECT
85+
ea.*,
86+
gt.tags as tags,
87+
pn.preferred_name AS name,
88+
private.get_distinct_or_exclude_from_array(pn.alternate_name, pn.preferred_name) AS alternate_name
89+
FROM email_aggregates ea
90+
LEFT JOIN real_names pn ON ea.person_email = pn.person_email
91+
JOIN grouped_tags gt ON ea.person_email = gt.email;
92+
93+
INSERT INTO private.refinedpersons (
94+
user_id, email, occurrence, recency, seniority,
95+
sender, recipient, conversations, replied_conversations, tags
96+
)
97+
SELECT
98+
userid,
99+
cd.person_email,
100+
cd.occurrence,
101+
cd.recency,
102+
cd.seniority,
103+
cd.sender,
104+
cd.recipient,
105+
cd.conversations,
106+
cd.replied_conversations,
107+
cd.tags
108+
FROM combined_data cd
109+
ON CONFLICT (user_id, email) DO UPDATE
110+
SET
111+
occurrence = EXCLUDED.occurrence,
112+
recency = EXCLUDED.recency,
113+
seniority = EXCLUDED.seniority,
114+
sender = EXCLUDED.sender,
115+
recipient = EXCLUDED.recipient,
116+
conversations = EXCLUDED.conversations,
117+
replied_conversations = EXCLUDED.replied_conversations,
118+
tags = EXCLUDED.tags;
119+
END;
120+
$$;

0 commit comments

Comments
 (0)