|
| 1 | +import { Logger } from 'winston'; |
| 2 | +import { ExtractSignature, PersonLD } from './types'; |
| 3 | + |
| 4 | +export enum LLMModels { |
| 5 | + DeepSeek8bFree = 'deepseek/deepseek-r1-0528-qwen3-8b:free', |
| 6 | + Mistral7bFree = 'mistralai/mistral-7b-instruct:free', |
| 7 | + metaLlama8bInstructFree = 'meta-llama/llama-3.3-8b-instruct:free', |
| 8 | + googleGemma4bFree = 'google/gemma-3n-e4b-it:free', |
| 9 | + qwen7bInstructFree = 'qwen/qwen-2.5-7b-instruct:free' |
| 10 | +} |
| 11 | + |
| 12 | +export type LLMModelType = `${LLMModels}`; |
| 13 | + |
| 14 | +export const SignaturePrompt = { |
| 15 | + system: `<system_prompt> |
| 16 | + YOU ARE A DATA EXTRACTION AGENT THAT PARSES EMAIL SIGNATURES AND OUTPUTS CLEAN JSON IN THE schema.org "Person" FORMAT |
| 17 | +
|
| 18 | + ###TASK### |
| 19 | +
|
| 20 | + EXTRACT ONLY WHAT IS EXPLICITLY PRESENT IN THE TEXT. FORMAT AND RETURN AS VALID JSON. DO NOT ADD, GUESS, OR ALTER DATA. |
| 21 | +
|
| 22 | + ###RULES### |
| 23 | +
|
| 24 | + - **ONLY USE WHAT IS CLEARLY WRITTEN** — NO INFERENCE |
| 25 | + - **PRESERVE ORIGINAL SPELLING AND CAPITALIZATION** |
| 26 | + - **REMOVE SPACES FROM PHONE NUMBERS** (E.G., '+1234567890') |
| 27 | + - **CONVERT SOCIAL HANDLES TO FULL URLS IN 'sameAs'** |
| 28 | + - **OMIT FIELDS NOT FULLY PRESENT** |
| 29 | + - **OUTPUT VALID JSON ONLY** — NO MARKDOWN, NO EXPLANATION, NO TEXT |
| 30 | +
|
| 31 | + ###REQUIRED FIELDS### |
| 32 | +
|
| 33 | + - '@type' (MUST be '"Person"') |
| 34 | + - 'name' (MUST be present) |
| 35 | +
|
| 36 | + OPTIONAL (include only if clearly found): |
| 37 | + - 'image': string (URL to an image of the person (avatar or profile picture)) |
| 38 | + - 'jobTitle': string (The persons job title) |
| 39 | + - 'worksFor': string (Company or organization the person works for) |
| 40 | + - 'address': string (The persons physical address) |
| 41 | + - 'telephone': string[] (Array of the persons phone numbers (e.g., mobile, WhatsApp)) |
| 42 | + - 'sameAs': string[] (Array of the persons full correct profile URLs) |
| 43 | +
|
| 44 | + ###CHAIN OF THOUGHT### |
| 45 | +
|
| 46 | + 1. **READ** the signature line by line |
| 47 | + 2. **IDENTIFY** explicit values only (no assumptions) |
| 48 | + 3. **PARSE** email, phone, socials, name, etc. |
| 49 | + 4. **FORMAT** phone and links as required |
| 50 | + 5. **BUILD** strict schema.org Person JSON |
| 51 | + 6. **IGNORE** incomplete, unclear, or ambiguous data |
| 52 | + 7. **OUTPUT** JSON only — no preamble or notes |
| 53 | +
|
| 54 | + ###WHAT NOT TO DO### |
| 55 | +
|
| 56 | + - DO NOT GUESS OR FIX SPELLING/CAPITALIZATION |
| 57 | + - DO NOT OUTPUT FIELDS NOT FULLY FOUND |
| 58 | + - DO NOT ADD FAKE OR DEFAULT DATA |
| 59 | + - DO NOT USE PARTIAL SOCIAL LINKS |
| 60 | + - DO NOT OUTPUT ANYTHING BUT JSON |
| 61 | +
|
| 62 | + ###EXAMPLE### |
| 63 | +
|
| 64 | + **Input:** |
| 65 | + Jane Smith |
| 66 | + Marketing Lead |
| 67 | + Bright Horizons Ltd. |
| 68 | + jane@brighthorizons.co.uk |
| 69 | + +44 7911 123456 |
| 70 | + 1 Sunrise Way, London |
| 71 | + Twitter: twitter.com/janesmith |
| 72 | +
|
| 73 | + **Output:** |
| 74 | + { |
| 75 | + "@type": "Person", |
| 76 | + "name": "Jane Smith", |
| 77 | + "jobTitle": "Marketing Lead", |
| 78 | + "worksFor": "Bright Horizons Ltd.", |
| 79 | + "email": "jane@brighthorizons.co.uk", |
| 80 | + "telephone": "+447911123456", |
| 81 | + "address": "1 Sunrise Way, London", |
| 82 | + "sameAs": ["twitter.com/janesmith"] |
| 83 | + } |
| 84 | +
|
| 85 | + </system_prompt> |
| 86 | + `, |
| 87 | + response_format: { |
| 88 | + type: 'json_object' |
| 89 | + }, |
| 90 | + |
| 91 | + buildUserPrompt: (signature: string) => `${signature}` |
| 92 | +}; |
| 93 | + |
| 94 | +export class SignatureLLM implements ExtractSignature { |
| 95 | + LLM_ENDPOINT = 'https://openrouter.ai/api/v1/chat/completions'; |
| 96 | + |
| 97 | + constructor( |
| 98 | + private readonly logger: Logger, |
| 99 | + private readonly model: LLMModelType, |
| 100 | + private readonly apiKey: string |
| 101 | + ) {} |
| 102 | + |
| 103 | + private headers() { |
| 104 | + return { |
| 105 | + Authorization: `Bearer ${this.apiKey}`, |
| 106 | + 'Content-Type': 'application/json' |
| 107 | + }; |
| 108 | + } |
| 109 | + |
| 110 | + private body(signature: string) { |
| 111 | + return JSON.stringify({ |
| 112 | + model: this.model, |
| 113 | + messages: [ |
| 114 | + { role: 'system', content: SignaturePrompt.system }, |
| 115 | + { |
| 116 | + role: 'user', |
| 117 | + content: SignaturePrompt.buildUserPrompt(signature) |
| 118 | + } |
| 119 | + ], |
| 120 | + response_format: SignaturePrompt.response_format |
| 121 | + }); |
| 122 | + } |
| 123 | + |
| 124 | + private async sendPrompt(signature: string): Promise<string | null> { |
| 125 | + try { |
| 126 | + const response = await fetch(this.LLM_ENDPOINT, { |
| 127 | + method: 'POST', |
| 128 | + headers: this.headers(), |
| 129 | + body: this.body(signature) |
| 130 | + }); |
| 131 | + const data = await response.json(); |
| 132 | + const error = data?.error?.message; |
| 133 | + if (error) throw new Error(error); |
| 134 | + return data.choices?.[0]?.message?.content; |
| 135 | + } catch (err) { |
| 136 | + this.logger.error('SignatureExtractionLLM error:', err); |
| 137 | + return null; |
| 138 | + } |
| 139 | + } |
| 140 | + |
| 141 | + public async extract(signature: string): Promise<PersonLD | null> { |
| 142 | + try { |
| 143 | + const content = await this.sendPrompt(signature); |
| 144 | + |
| 145 | + console.log(content); |
| 146 | + |
| 147 | + if (!content) return null; |
| 148 | + |
| 149 | + const person = JSON.parse(content); |
| 150 | + |
| 151 | + if (person['@type'] !== 'Person') return null; |
| 152 | + |
| 153 | + return { |
| 154 | + name: person.name, |
| 155 | + image: person.image, |
| 156 | + jobTitle: person.jobTitle, |
| 157 | + worksFor: person.worksFor, |
| 158 | + address: person.address ? [person.address] : [], |
| 159 | + telephone: person.telephone ?? [], |
| 160 | + sameAs: person.sameAs |
| 161 | + }; |
| 162 | + } catch (err) { |
| 163 | + this.logger.error('SignatureExtractionLLM error:', err); |
| 164 | + return null; |
| 165 | + } |
| 166 | + } |
| 167 | +} |
0 commit comments