Skip to content

Commit f40ae3a

Browse files
authored
Fix migrations & low contacts extraction (#2529)
* Create 20251125143028_update_function_get_mining_stats.sql * Create 20251125143059_fix_get_distinct_or_exclude_from_array.sql * Revert 20251024142256_update_function_get_mining_stats.sql * Delete 20251120150626_fix-get_distinct_or_exclude_from_array.sql * Create 20251125160609_refine_persons_fix_tags_filter_2.sql - resolves #2530
1 parent 06585f0 commit f40ae3a

4 files changed

+176
-10
lines changed

supabase/migrations/20251024142256_update_function_get_mining_stats.sql

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,15 @@ RETURNS TABLE(
88
total_reachable BIGINT,
99
total_with_phone BIGINT,
1010
total_with_company BIGINT
11-
total_with_location BIGINT
1211
) AS $$
1312
DECLARE
1413
user_id UUID;
1514
BEGIN
1615
-- Get the user_id first
17-
SELECT pt.user_id INTO user_id
18-
FROM private.tasks pt
19-
WHERE pt.details->>'miningId' = $1
20-
AND pt.status = 'done'
21-
ORDER BY pt.started_at DESC
16+
SELECT p.user_id INTO user_id
17+
FROM private.tasks p
18+
WHERE p.details->>'miningId' = $1
19+
AND p.status = 'done'
2220
LIMIT 1;
2321

2422
-- Return the statistics
@@ -30,8 +28,7 @@ BEGIN
3028
COUNT(*) FILTER (WHERE status = 'VALID') AS total_reachable,
3129
COUNT(telephone) AS total_with_phone,
3230
COUNT(*) FILTER (WHERE job_title IS NOT NULL OR works_for IS NOT NULL) AS total_with_company
33-
COUNT(*) FILTER (locations IS NOT NULL AND locations <> '') AS total_with_'location'
3431
FROM private.get_contacts_table(user_id) AS contacts
3532
WHERE contacts.mining_id = $1;
3633
END;
37-
$$ LANGUAGE plpgsql;
34+
$$ LANGUAGE plpgsql;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
DROP FUNCTION IF EXISTS private.get_mining_stats(text);
2+
3+
CREATE FUNCTION private.get_mining_stats(mining_id text)
4+
RETURNS TABLE(
5+
user_id UUID,
6+
source text,
7+
total_contacts_mined BIGINT,
8+
total_reachable BIGINT,
9+
total_with_phone BIGINT,
10+
total_with_company BIGINT,
11+
total_with_location BIGINT
12+
) AS $$
13+
DECLARE
14+
user_id UUID;
15+
BEGIN
16+
-- Get the user_id first
17+
SELECT pt.user_id INTO user_id
18+
FROM private.tasks pt
19+
WHERE pt.details->>'miningId' = $1
20+
AND pt.status = 'done'
21+
ORDER BY pt.started_at DESC
22+
LIMIT 1;
23+
24+
-- Return the statistics
25+
RETURN QUERY
26+
SELECT
27+
user_id,
28+
(SELECT p.source FROM private.persons p WHERE p.mining_id = $1 LIMIT 1) AS source,
29+
COUNT(*) AS total_contacts_mined,
30+
COUNT(*) FILTER (WHERE contacts.status = 'VALID') AS total_reachable,
31+
COUNT(contacts.telephone) AS total_with_phone,
32+
COUNT(*) FILTER (WHERE contacts.job_title IS NOT NULL OR contacts.works_for IS NOT NULL) AS total_with_company,
33+
COUNT(*) FILTER (WHERE contacts.locations IS NOT NULL AND contacts.locations <> '') AS total_with_location
34+
FROM private.get_contacts_table(user_id) AS contacts
35+
WHERE contacts.mining_id = $1;
36+
END;
37+
$$ LANGUAGE plpgsql;

supabase/migrations/20251120150626_fix-get_distinct_or_exclude_from_array.sql renamed to supabase/migrations/20251125143059_fix_get_distinct_or_exclude_from_array.sql

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
DROP FUNCTION private.get_distinct_or_exclude_from_array;
2-
CREATE FUNCTION private.get_distinct_or_exclude_from_array(
1+
CREATE OR REPLACE FUNCTION private.get_distinct_or_exclude_from_array(
32
input_array text[],
43
exclude_array text[] DEFAULT NULL
54
)
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
-- Keep only persons where at least one tag is reachable (Not 3) --
2+
CREATE OR REPLACE FUNCTION private.refine_persons(userid uuid)
3+
RETURNS void
4+
LANGUAGE plpgsql
5+
SET search_path = ''
6+
AS $$
7+
BEGIN
8+
CREATE TEMP TABLE user_points_of_contact AS
9+
SELECT
10+
poc.person_email,
11+
poc.name,
12+
poc.plus_address,
13+
poc.message_id,
14+
poc."from",
15+
poc."to",
16+
poc.bcc,
17+
poc.cc,
18+
poc.reply_to,
19+
m.date,
20+
m.conversation
21+
FROM private.pointsofcontact poc
22+
JOIN private.messages m
23+
ON poc.message_id = m.message_id
24+
AND poc.user_id = m.user_id
25+
WHERE poc.user_id = userid;
26+
27+
CREATE TEMP TABLE grouped_tags AS
28+
SELECT
29+
person_email AS email,
30+
array_agg(name) AS tags
31+
FROM private.tags tg
32+
WHERE
33+
tg.user_id = userid
34+
-- email address must be reachable
35+
AND tg.source = 'refined#email_address'
36+
AND tg.reachable IN (1, 2)
37+
GROUP BY person_email;
38+
39+
CREATE TEMP TABLE name_aggregates AS
40+
SELECT
41+
upc.person_email,
42+
upc.name,
43+
MAX(upc.date) AS recent_date,
44+
COUNT(*) AS total,
45+
array_agg(upc.name) OVER (PARTITION BY upc.person_email) AS alternate_name
46+
FROM user_points_of_contact upc
47+
WHERE upc.name IS NOT NULL
48+
GROUP BY upc.person_email, upc.name;
49+
50+
CREATE TEMP TABLE real_names AS
51+
SELECT DISTINCT
52+
na.person_email,
53+
FIRST_VALUE(na.name) OVER (
54+
PARTITION BY na.person_email
55+
ORDER BY na.total DESC, na.recent_date DESC
56+
) AS preferred_name,
57+
na.alternate_name
58+
FROM name_aggregates na;
59+
60+
CREATE TEMP TABLE email_aggregates AS
61+
SELECT
62+
upc.person_email,
63+
MAX(upc.date) AS recency,
64+
MIN(upc.date) AS seniority,
65+
private.get_distinct_or_exclude_from_array(
66+
array_agg(upc.plus_address)::text[],
67+
ARRAY[]::text[]
68+
) AS alternate_email,
69+
COUNT(*) AS occurrence,
70+
COUNT(CASE WHEN upc."from" = true OR upc.reply_to = true THEN 1 END) AS sender,
71+
COUNT(CASE WHEN upc."to" = true OR upc.bcc = true OR upc.cc = true THEN 1 END) AS recipient,
72+
COUNT(CASE WHEN upc.conversation = true THEN 1 END) AS conversations,
73+
COUNT(CASE WHEN upc.conversation = true AND upc."from" = true THEN 1 END) AS replied_conversations
74+
FROM user_points_of_contact upc
75+
GROUP BY upc.person_email;
76+
77+
CREATE TEMP TABLE combined_data AS
78+
SELECT
79+
ea.*,
80+
gt.tags as tags,
81+
pn.preferred_name AS name,
82+
private.get_distinct_or_exclude_from_array(pn.alternate_name, ARRAY[pn.preferred_name, ea.person_email]) AS alternate_name
83+
FROM email_aggregates ea
84+
LEFT JOIN real_names pn ON ea.person_email = pn.person_email
85+
JOIN grouped_tags gt ON ea.person_email = gt.email;
86+
87+
UPDATE private.persons
88+
SET
89+
name = cd.name,
90+
alternate_name = cd.alternate_name,
91+
alternate_email = cd.alternate_email
92+
FROM combined_data cd
93+
WHERE private.persons.email = cd.person_email;
94+
95+
INSERT INTO private.refinedpersons (
96+
user_id, email, occurrence, recency, seniority,
97+
sender, recipient, conversations, replied_conversations, tags
98+
)
99+
SELECT
100+
userid,
101+
cd.person_email,
102+
cd.occurrence,
103+
cd.recency,
104+
cd.seniority,
105+
cd.sender,
106+
cd.recipient,
107+
cd.conversations,
108+
cd.replied_conversations,
109+
cd.tags
110+
FROM combined_data cd
111+
ON CONFLICT (user_id, email) DO UPDATE
112+
SET
113+
occurrence = EXCLUDED.occurrence,
114+
recency = EXCLUDED.recency,
115+
seniority = EXCLUDED.seniority,
116+
sender = EXCLUDED.sender,
117+
recipient = EXCLUDED.recipient,
118+
conversations = EXCLUDED.conversations,
119+
replied_conversations = EXCLUDED.replied_conversations,
120+
tags = EXCLUDED.tags;
121+
122+
-- Drop temp tables after function execution
123+
DROP TABLE IF EXISTS user_points_of_contact;
124+
DROP TABLE IF EXISTS grouped_tags;
125+
DROP TABLE IF EXISTS name_aggregates;
126+
DROP TABLE IF EXISTS real_names;
127+
DROP TABLE IF EXISTS email_aggregates;
128+
DROP TABLE IF EXISTS combined_data;
129+
130+
-- Clear table messages
131+
DELETE FROM private.messages m WHERE m.user_id = userid;
132+
END;
133+
$$;

0 commit comments

Comments
 (0)