Skip to content

Commit 3ce9809

Browse files
authored
Fix signature bugs (#2329)
* use string concat instead of array * enable loki graceful shutdown * consider enriched when there is fields other than email, user_id * update useful signature rules * fix: set to null instead of empty array * update prompt and llm models list * set the best accurate llm model
1 parent 205e6ac commit 3ce9809

File tree

7 files changed

+56
-40
lines changed

7 files changed

+56
-40
lines changed

backend/src/emailSignatureWorker.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@ const subscriberRedisClient = redis.getSubscriberClient();
2323

2424
const emailSignatureCache = new RedisEmailSignatureCache(redisClient);
2525

26-
const llmModel = LLMModels.mistralai7bInstruct;
26+
const llmModel = LLMModels.cohere;
2727

2828
const { processStreamData } = initializeEmailSignatureProcessor(
2929
supabaseClient,
3030
new Signature(
3131
new TokenBucketRateLimiter(llmModel.includes('free') ? 15 : 500, 60 * 1000),
3232
logger,
3333
{
34-
model: LLMModels.mistralai7bInstruct,
34+
model: llmModel,
3535
apiKey: ENV.SIGNATURE_OPENROUTER_API_KEY,
3636
useLLM: ENV.SIGNATURE_USE_LLM
3737
}

backend/src/services/imap/ImapEmailsFetcher.ts

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,8 @@ export default class ImapEmailsFetcher {
250250
let messageCounter = 0;
251251

252252
fetchResult.on('message', (msg, seqNumber) => {
253-
let headerChunks: Buffer[] = [];
254-
let bodyChunks: Buffer[] = [];
253+
let headerChunks = '';
254+
let bodyChunks = '';
255255

256256
if (this.isCanceled === true) {
257257
const message = `Canceled process on folder ${folderPath} with ID ${this.miningId}`;
@@ -264,25 +264,22 @@ export default class ImapEmailsFetcher {
264264
msg.on('body', (stream, streamInfo) => {
265265
stream.on('data', (chunk) => {
266266
if (streamInfo.which.includes('HEADER')) {
267-
headerChunks.push(chunk);
267+
headerChunks += chunk;
268268
} else if (this.fetchEmailBody) {
269-
bodyChunks.push(chunk);
269+
bodyChunks += chunk;
270270
}
271271
});
272272
});
273273

274274
msg.once('end', async () => {
275-
const headerBuf = Buffer.concat(headerChunks);
276-
const bodyBuf = Buffer.concat(bodyChunks);
275+
const parsedHeader = parseHeader(headerChunks);
277276

278-
const parsedHeader = parseHeader(headerBuf.toString('utf8'));
279-
280-
const mail = await simpleParser(Buffer.concat([headerBuf, bodyBuf]));
277+
const mail = await simpleParser(headerChunks + bodyChunks);
281278
const text = (mail.text || '').slice(0, 4000);
282279

283280
// Clear large chunks early
284-
headerChunks = [];
285-
bodyChunks = [];
281+
headerChunks = '';
282+
bodyChunks = '';
286283

287284
const messageId = getMessageId(parsedHeader);
288285

backend/src/services/signature/llm/index.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ import {
1414
} from './output-checkers';
1515

1616
export enum LLMModels {
17-
DeepSeek8bFree = 'deepseek/deepseek-r1-0528-qwen3-8b:free',
18-
qwen7bInstructFree = 'qwen/qwen-2.5-7b-instruct:free',
19-
googleGemma9bIt = 'google/gemma-2-9b-it',
20-
deepseekR1DistillQwen32B = 'deepseek/deepseek-r1-distill-qwen-1.5b',
21-
mistralai7bInstruct = 'mistralai/mistral-7b-instruct-v0.2'
17+
qwenFree = 'qwen/qwen-2.5-7b-instruct:free',
18+
deepseekFree = 'deepseek/deepseek-r1-0528-qwen3-8b:free',
19+
cohere = 'cohere/command-r',
20+
cohere7b = 'cohere/command-r7b-12-2024',
21+
meta = 'meta-llama/llama-3.1-8b-instruct',
22+
google = 'google/gemma-2-9b-it'
2223
}
2324

2425
export type LLMModelType = `${LLMModels}`;
@@ -107,7 +108,8 @@ export const SignaturePrompt = {
107108
type: 'json_object'
108109
},
109110

110-
buildUserPrompt: (signature: string) => `${signature}`
111+
buildUserPrompt: (signature: string) =>
112+
`RETURN NULL IF NOT A REAL PERSON SIGNATURE:\n${signature}`
111113
};
112114

113115
export class SignatureLLM implements ExtractSignature {

backend/src/utils/logger.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ function initLogger() {
2626
json: true,
2727
replaceTimestamp: true,
2828
format: format.combine(commonFormat, format.json()),
29+
gracefulShutdown: true,
2930
// eslint-disable-next-line no-console
3031
onConnectionError: (err) => console.error(err)
3132
})

backend/src/workers/email-signature/handler.ts

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -198,17 +198,25 @@ export class EmailSignatureProcessor {
198198

199199
const contact = await this.signature.extract(signature);
200200
if (!contact) return null;
201-
return {
201+
202+
const enrichedContact: Partial<Contact> = {
202203
email,
203204
user_id: userId,
204-
name: contact?.name,
205-
image: contact?.image,
206-
location: contact?.address,
207-
telephone: contact?.telephone,
208-
job_title: contact?.jobTitle,
209-
works_for: contact?.worksFor,
210-
same_as: contact?.sameAs
205+
name: contact.name,
206+
image: contact.image,
207+
location: contact.address,
208+
telephone: contact.telephone,
209+
job_title: contact.jobTitle,
210+
works_for: contact.worksFor,
211+
same_as: contact.sameAs
211212
};
213+
214+
// Check if anything beyond email and user_id is present
215+
const hasExtraInfo = Object.entries(enrichedContact).some(
216+
([key, value]) => !['email', 'user_id'].includes(key) && value
217+
);
218+
219+
return hasExtraInfo ? enrichedContact : null;
212220
}
213221

214222
private async upsertContact(contact: Partial<Contact>): Promise<void> {

backend/src/workers/email-signature/utils.ts

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,23 @@ export async function pushNotificationDB(
2929

3030
export function isUsefulSignatureContent(signature: string): boolean {
3131
const text = signature.trim();
32-
32+
const words = text.split(/\s+/);
3333
const hasURL = /(https?:\/\/|www\.)\S+/i.test(text);
3434
const hasDigits = /\d{3,}/.test(text);
35-
const hasMultipleWords = text.split(/\s+/).length >= 5;
3635
const hasSymbols = /[@+:]/.test(text); // Common in phone/email/title formats
3736

38-
const useful = hasURL || hasDigits || hasSymbols || hasMultipleWords;
37+
const wordsMinMax =
38+
words.length >= 5 && words.length <= 40 && text.length <= 300;
39+
40+
// 2. At least one positive signal
41+
const positive = hasURL || hasDigits || hasSymbols;
42+
43+
const blocks = [
44+
/^(Envoyé\s+à\s+partir\s+de|Sent\s+from)\s+(Outlook|Gmail|iPhone|Android)/i
45+
];
46+
47+
const isUseful =
48+
wordsMinMax && positive && !blocks.some((rx) => rx.test(text));
3949

40-
return useful;
50+
return isUseful;
4151
}

supabase/migrations/20250520002931_add_persons_phone_numbers.sql

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,7 @@ BEGIN
209209
END IF;
210210

211211
-- Merge incoming phone numbers into telephone array
212-
new_telephone := (
213-
string_to_array(contact_record->>'telephone', ',')
214-
);
212+
new_telephone := string_to_array(NULLIF(contact_record->>'telephone', ''), ',');
215213

216214
IF new_telephone IS NOT NULL THEN
217215
SELECT p.telephone
@@ -257,14 +255,14 @@ BEGIN
257255
name = COALESCE(pp.name, new_name::TEXT),
258256
url = COALESCE(pp.url, (contact_record->>'url')::TEXT),
259257
image = COALESCE(pp.image, (contact_record->>'image')::TEXT),
260-
location = COALESCE(pp.location, string_to_array(contact_record->>'location', ',')::TEXT[]),
258+
location = COALESCE(pp.location, string_to_array(NULLIF(contact_record->>'location', ''), ',')::TEXT[]),
261259
alternate_name = COALESCE(pp.alternate_name, (new_alternate_name)::TEXT[]),
262-
same_as = COALESCE(pp.same_as, string_to_array(contact_record->>'same_as', ',')::TEXT[]),
260+
same_as = COALESCE(pp.same_as, string_to_array(NULLIF(contact_record->>'same_as', ''), ',')::TEXT[]),
263261
given_name = COALESCE(pp.given_name, (contact_record->>'given_name')::TEXT),
264262
family_name = COALESCE(pp.family_name, (contact_record->>'family_name')::TEXT),
265263
job_title = COALESCE(pp.job_title, (contact_record->>'job_title')::TEXT),
266264
works_for = COALESCE(pp.works_for, organization_id),
267-
identifiers = COALESCE(pp.identifiers, string_to_array(contact_record->>'identifiers', ',')::TEXT[]),
265+
identifiers = COALESCE(pp.identifiers, string_to_array(NULLIF(contact_record->>'identifiers', ''), ',')::TEXT[]),
268266
status = COALESCE(pp.status, (contact_record->>'status')::TEXT),
269267
telephone = COALESCE(pp.telephone, (merged_telephone)::TEXT[])
270268
WHERE
@@ -276,14 +274,14 @@ BEGIN
276274
name = COALESCE(new_name::TEXT, pp.name),
277275
url = COALESCE((contact_record->>'url')::TEXT, pp.url),
278276
image = COALESCE((contact_record->>'image')::TEXT, pp.image),
279-
location = COALESCE(string_to_array(contact_record->>'location', ',')::TEXT[], pp.location),
277+
location = COALESCE(string_to_array(NULLIF(contact_record->>'location', ''), ',')::TEXT[], pp.location),
280278
alternate_name = COALESCE((new_alternate_name)::TEXT[], pp.alternate_name),
281-
same_as = COALESCE(string_to_array(contact_record->>'same_as', ',')::TEXT[], pp.same_as),
279+
same_as = COALESCE(string_to_array(NULLIF(contact_record->>'same_as', ''), ',')::TEXT[], pp.same_as),
282280
given_name = COALESCE((contact_record->>'given_name')::TEXT, pp.given_name),
283281
family_name = COALESCE((contact_record->>'family_name')::TEXT, pp.family_name),
284282
job_title = COALESCE((contact_record->>'job_title')::TEXT, pp.job_title),
285283
works_for = COALESCE(organization_id, pp.works_for),
286-
identifiers = COALESCE(string_to_array(contact_record->>'identifiers', ',')::TEXT[], pp.identifiers),
284+
identifiers = COALESCE(string_to_array(NULLIF(contact_record->>'identifiers', ''), ',')::TEXT[], pp.identifiers),
287285
status = COALESCE((contact_record->>'status')::TEXT, pp.status),
288286
telephone = COALESCE((merged_telephone)::TEXT[], pp.telephone)
289287

0 commit comments

Comments
 (0)