From 17fdca17d83315939456e23207382dc34f9337c3 Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Wed, 26 Mar 2025 16:44:54 -0700 Subject: [PATCH 01/10] Working end-to-end transcription integrations. --- functions/src/events/scrapeEvents.ts | 59 ++++++++++++------------- functions/src/webhooks/transcription.ts | 20 +++++---- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/functions/src/events/scrapeEvents.ts b/functions/src/events/scrapeEvents.ts index cde11aa3f..cb6d51e7f 100644 --- a/functions/src/events/scrapeEvents.ts +++ b/functions/src/events/scrapeEvents.ts @@ -158,20 +158,6 @@ class HearingScraper extends EventScraper { const hearing = Hearing.check(eventData) const shouldScrape = withinCutoff(hearing.startsAt.toDate()) - let payload: Hearing = { - id: `hearing-${EventId}`, - type: "hearing", - content, - ...this.timestamps(content) - } - if (hearing) { - payload = { - ...payload, - videoURL: hearing.videoURL, - videoFetchedAt: hearing.videoFetchedAt, - videoAssemblyId: hearing.videoAssemblyId - } - } let maybeVideoURL = null let transcript = null @@ -192,24 +178,38 @@ class HearingScraper extends EventScraper { transcript = await assembly.transcripts.submit({ webhook_url: + // test with: "https://ngrokid.ngrok-free.app/demo-dtp/us-central1/transcription", process.env.NODE_ENV === "development" ? "https://us-central1-digital-testimony-dev.cloudfunctions.net/transcription" : "https://us-central1-digital-testimony-prod.cloudfunctions.net/transcription", - webhook_auth_header_name: "X-Maple-Webhook", + webhook_auth_header_name: "x-maple-webhook", webhook_auth_header_value: newToken, - audio: firstVideoSource.src, - auto_highlights: true, - custom_topics: true, - entity_detection: true, + audio: + // test with: "https://assemblyaiusercontent.com/playground/aKUqpEtmYmI.flac", + firstVideoSource.src, + auto_highlights: false, + custom_topics: false, + entity_detection: false, iab_categories: false, format_text: true, punctuate: true, speaker_labels: true, - summarization: true, - summary_model: "informative", - summary_type: "bullets" + summarization: false }) + await db + .collection("events") + .doc(`hearing-${String(EventId)}`) + .set({ + id: `hearing-${EventId}`, + type: "hearing", + content, + ...this.timestamps(content), + videoURL: maybeVideoURL, + videoFetchedAt: Timestamp.now(), + videoAssemblyId: transcript.id + }) + await db .collection("events") .doc(`hearing-${String(EventId)}`) @@ -218,20 +218,17 @@ class HearingScraper extends EventScraper { .set({ videoAssemblyWebhookToken: sha256(newToken) }) - - payload = { - ...payload, - videoURL: maybeVideoURL, - videoFetchedAt: Timestamp.now(), - videoAssemblyId: transcript.id - } } } } } - const event: Hearing = payload - return event + return { + id: `hearing-${EventId}`, + type: "hearing", + content, + ...this.timestamps(content) + } as Hearing } } diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts index c8181b2b3..4cca8ce32 100644 --- a/functions/src/webhooks/transcription.ts +++ b/functions/src/webhooks/transcription.ts @@ -8,23 +8,23 @@ const assembly = new AssemblyAI({ }) export const transcription = functions.https.onRequest(async (req, res) => { - if ( - req.headers["X-Maple-Webhook"] && - req.headers["webhook_auth_header_value"] - ) { + console.log("req.headers", req.headers) + if (req.headers["x-maple-webhook"]) { + console.log("req.body.status", req.body.status) + if (req.body.status === "completed") { const transcript = await assembly.transcripts.get(req.body.transcript_id) + console.log("transcript.webhook_auth", transcript.webhook_auth) if (transcript && transcript.webhook_auth) { const maybeEventInDb = await db .collection("events") .where("videoAssemblyId", "==", transcript.id) .get() + console.log("maybeEventInDb.docs.length", maybeEventInDb.docs.length) if (maybeEventInDb.docs.length) { const authenticatedEventsInDb = maybeEventInDb.docs.filter( async e => { - const hashedToken = sha256( - String(req.headers["webhook_auth_header_value"]) - ) + const hashedToken = sha256(String(req.headers["x-maple-webhook"])) const tokenInDb = await db .collection("events") @@ -33,12 +33,16 @@ export const transcription = functions.https.onRequest(async (req, res) => { .doc("webhookAuth") .get() const tokenInDbData = tokenInDb.data() + console.log("tokenInDbData", tokenInDbData) + if (tokenInDbData) { return hashedToken === tokenInDbData.videoAssemblyWebhookToken } return false } ) + console.log("authenticatedEventsInDb", authenticatedEventsInDb) + if (authenticatedEventsInDb) { try { await db @@ -48,7 +52,7 @@ export const transcription = functions.https.onRequest(async (req, res) => { authenticatedEventsInDb.forEach(async d => { await d.ref.update({ - ["webhook_auth_header_value"]: null + ["x-maple-webhook"]: null }) }) console.log("transcript saved in db") From ca0f2bb945596995d76c1cfe4ed353ea2eb722c3 Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Thu, 27 Mar 2025 09:01:55 -0700 Subject: [PATCH 02/10] Simplify Assembly API call. --- functions/src/events/scrapeEvents.ts | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/functions/src/events/scrapeEvents.ts b/functions/src/events/scrapeEvents.ts index cb6d51e7f..64072bb84 100644 --- a/functions/src/events/scrapeEvents.ts +++ b/functions/src/events/scrapeEvents.ts @@ -177,24 +177,17 @@ class HearingScraper extends EventScraper { maybeVideoURL = firstVideoSource.src transcript = await assembly.transcripts.submit({ + audio: + // test with: "https://assemblyaiusercontent.com/playground/aKUqpEtmYmI.flac", + firstVideoSource.src, webhook_url: // test with: "https://ngrokid.ngrok-free.app/demo-dtp/us-central1/transcription", process.env.NODE_ENV === "development" ? "https://us-central1-digital-testimony-dev.cloudfunctions.net/transcription" : "https://us-central1-digital-testimony-prod.cloudfunctions.net/transcription", - webhook_auth_header_name: "x-maple-webhook", - webhook_auth_header_value: newToken, - audio: - // test with: "https://assemblyaiusercontent.com/playground/aKUqpEtmYmI.flac", - firstVideoSource.src, - auto_highlights: false, - custom_topics: false, - entity_detection: false, - iab_categories: false, - format_text: true, - punctuate: true, speaker_labels: true, - summarization: false + webhook_auth_header_name: "x-maple-webhook", + webhook_auth_header_value: newToken }) await db From f16f46ef51501cb24cc30a50c9e9b38c72efbf5d Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Fri, 4 Apr 2025 17:21:53 -0700 Subject: [PATCH 03/10] Address review feedback on transcriptions system. --- functions/src/events/scrapeEvents.ts | 150 ++++++++++++++---------- functions/src/webhooks/transcription.ts | 52 ++++++-- 2 files changed, 128 insertions(+), 74 deletions(-) diff --git a/functions/src/events/scrapeEvents.ts b/functions/src/events/scrapeEvents.ts index 64072bb84..0f77c075e 100644 --- a/functions/src/events/scrapeEvents.ts +++ b/functions/src/events/scrapeEvents.ts @@ -137,6 +137,75 @@ class SessionScraper extends EventScraper { } } +const submitTranscription = async ({ + EventId, + maybeVideoUrl +}: { + EventId: number + maybeVideoUrl: string +}) => { + const newToken = randomBytes(16).toString("hex") + + const transcript = await assembly.transcripts.submit({ + audio: + // test with: "https://assemblyaiusercontent.com/playground/aKUqpEtmYmI.flac", + maybeVideoUrl, + webhook_url: + // test with: "https://ngrokid.ngrok-free.app/demo-dtp/us-central1/transcription", + process.env.NODE_ENV === "development" + ? "https://us-central1-digital-testimony-dev.cloudfunctions.net/transcription" + : "https://us-central1-digital-testimony-prod.cloudfunctions.net/transcription", + speaker_labels: true, + webhook_auth_header_name: "x-maple-webhook", + webhook_auth_header_value: newToken + }) + + await db + .collection("events") + .doc(`hearing-${String(EventId)}`) + .collection("private") + .doc("webhookAuth") + .set({ + videoAssemblyWebhookToken: sha256(newToken) + }) + + return transcript.id +} + +const getHearingVideoUrl = async (EventId: number) => { + const req = await fetch( + `https://malegislature.gov/Events/Hearings/Detail/${EventId}` + ) + const res = await req.text() + if (res) { + const dom = new JSDOM(res) + if (dom) { + const maybeVideoSource = + dom.window.document.querySelectorAll("video source") + if (maybeVideoSource.length && maybeVideoSource[0]) { + const firstVideoSource = maybeVideoSource[0] as HTMLSourceElement + return firstVideoSource.src + } + } + } + return null +} + +const shouldScrapeVideo = async (EventId: number) => { + const eventInDb = await db + .collection("events") + .doc(`hearing-${String(EventId)}`) + .get() + const eventData = eventInDb.data() + if (!eventData) { + return false + } + if (!eventData.videoFetchedAt) { + return withinCutoff(new Date(eventData.StartTime)) + } + return false +} + class HearingScraper extends EventScraper { constructor() { super("every 60 minutes", 240) @@ -150,69 +219,24 @@ class HearingScraper extends EventScraper { async getEvent({ EventId }: HearingListItem /* e.g. 4962 */) { const data = await api.getHearing(EventId) const content = HearingContent.check(data) - const eventInDb = await db - .collection("events") - .doc(`hearing-${String(EventId)}`) - .get() - const eventData = eventInDb.data() - const hearing = Hearing.check(eventData) - const shouldScrape = withinCutoff(hearing.startsAt.toDate()) - - let maybeVideoURL = null - let transcript = null - - if (!hearing.videoFetchedAt && shouldScrape) { - const req = await fetch( - `https://malegislature.gov/Events/Hearings/Detail/${EventId}` - ) - const res = await req.text() - if (res) { - const dom = new JSDOM(res) - if (dom) { - const maybeVideoSource = - dom.window.document.querySelectorAll("video source") - if (maybeVideoSource.length && maybeVideoSource[0]) { - const newToken = randomBytes(16).toString("hex") - const firstVideoSource = maybeVideoSource[0] as HTMLSourceElement - maybeVideoURL = firstVideoSource.src - - transcript = await assembly.transcripts.submit({ - audio: - // test with: "https://assemblyaiusercontent.com/playground/aKUqpEtmYmI.flac", - firstVideoSource.src, - webhook_url: - // test with: "https://ngrokid.ngrok-free.app/demo-dtp/us-central1/transcription", - process.env.NODE_ENV === "development" - ? "https://us-central1-digital-testimony-dev.cloudfunctions.net/transcription" - : "https://us-central1-digital-testimony-prod.cloudfunctions.net/transcription", - speaker_labels: true, - webhook_auth_header_name: "x-maple-webhook", - webhook_auth_header_value: newToken - }) - - await db - .collection("events") - .doc(`hearing-${String(EventId)}`) - .set({ - id: `hearing-${EventId}`, - type: "hearing", - content, - ...this.timestamps(content), - videoURL: maybeVideoURL, - videoFetchedAt: Timestamp.now(), - videoAssemblyId: transcript.id - }) - - await db - .collection("events") - .doc(`hearing-${String(EventId)}`) - .collection("private") - .doc("webhookAuth") - .set({ - videoAssemblyWebhookToken: sha256(newToken) - }) - } - } + + if (await shouldScrapeVideo(EventId)) { + const maybeVideoUrl = await getHearingVideoUrl(EventId) + if (maybeVideoUrl) { + const transcriptId = await submitTranscription({ + maybeVideoUrl, + EventId + }) + + return { + id: `hearing-${EventId}`, + type: "hearing", + content, + ...this.timestamps(content), + videoURL: maybeVideoUrl, + videoFetchedAt: Timestamp.now(), + videoTranscriptionId: transcriptId // using the assembly Id as our transcriptionId + } as Hearing } } diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts index 4cca8ce32..b1113b511 100644 --- a/functions/src/webhooks/transcription.ts +++ b/functions/src/webhooks/transcription.ts @@ -8,19 +8,15 @@ const assembly = new AssemblyAI({ }) export const transcription = functions.https.onRequest(async (req, res) => { - console.log("req.headers", req.headers) if (req.headers["x-maple-webhook"]) { - console.log("req.body.status", req.body.status) - if (req.body.status === "completed") { const transcript = await assembly.transcripts.get(req.body.transcript_id) - console.log("transcript.webhook_auth", transcript.webhook_auth) if (transcript && transcript.webhook_auth) { const maybeEventInDb = await db .collection("events") .where("videoAssemblyId", "==", transcript.id) .get() - console.log("maybeEventInDb.docs.length", maybeEventInDb.docs.length) + if (maybeEventInDb.docs.length) { const authenticatedEventsInDb = maybeEventInDb.docs.filter( async e => { @@ -41,20 +37,54 @@ export const transcription = functions.https.onRequest(async (req, res) => { return false } ) - console.log("authenticatedEventsInDb", authenticatedEventsInDb) + const { id, text, audio_url, utterances, words } = transcript if (authenticatedEventsInDb) { try { - await db + const transcriptionInDb = db .collection("transcriptions") .doc(transcript.id) - .set({ _timestamp: new Date(), ...transcript }) - authenticatedEventsInDb.forEach(async d => { - await d.ref.update({ - ["x-maple-webhook"]: null + transcriptionInDb.set({ + id, + text, + timestamp: new Date(), + audio_url, + words + }) + + transcriptionInDb + .collection("timestamps") + .doc("utterances") + .set({ + utterances: utterances?.map( + ({ speaker, confidence, start, end, text }) => ({ + speaker, + confidence, + start, + end, + text + }) + ) }) + + transcriptionInDb.collection("timestamps").doc("words").set({ + words + }) + + const batch = db.batch() + + batch.set(db.collection("transcriptions").doc(transcript.id), { + _timestamp: new Date(), + ...transcript + }) + + authenticatedEventsInDb.forEach(doc => { + batch.update(doc.ref, { ["x-maple-webhook"]: null }) }) + + await batch.commit() + console.log("transcript saved in db") } catch (error) { console.log(error) From e054c49a297afa398e5146935cafa565353890f9 Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Wed, 9 Apr 2025 14:40:39 -0700 Subject: [PATCH 04/10] Address further review feedback on transcriptions system. --- functions/src/events/scrapeEvents.ts | 3 ++- functions/src/webhooks/transcription.ts | 25 +++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/functions/src/events/scrapeEvents.ts b/functions/src/events/scrapeEvents.ts index 0f77c075e..7cc0b7ead 100644 --- a/functions/src/events/scrapeEvents.ts +++ b/functions/src/events/scrapeEvents.ts @@ -197,11 +197,12 @@ const shouldScrapeVideo = async (EventId: number) => { .doc(`hearing-${String(EventId)}`) .get() const eventData = eventInDb.data() + if (!eventData) { return false } if (!eventData.videoFetchedAt) { - return withinCutoff(new Date(eventData.StartTime)) + return withinCutoff(new Date(Hearing.check(eventData).startsAt.toDate())) } return false } diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts index b1113b511..2e1f1ba63 100644 --- a/functions/src/webhooks/transcription.ts +++ b/functions/src/webhooks/transcription.ts @@ -1,6 +1,6 @@ import * as functions from "firebase-functions" import { AssemblyAI } from "assemblyai" -import { db } from "../firebase" +import { db, Timestamp } from "../firebase" import { sha256 } from "js-sha256" const assembly = new AssemblyAI({ @@ -29,7 +29,6 @@ export const transcription = functions.https.onRequest(async (req, res) => { .doc("webhookAuth") .get() const tokenInDbData = tokenInDb.data() - console.log("tokenInDbData", tokenInDbData) if (tokenInDbData) { return hashedToken === tokenInDbData.videoAssemblyWebhookToken @@ -41,19 +40,18 @@ export const transcription = functions.https.onRequest(async (req, res) => { const { id, text, audio_url, utterances, words } = transcript if (authenticatedEventsInDb) { try { - const transcriptionInDb = db + const transcriptionInDb = await db .collection("transcriptions") .doc(transcript.id) - transcriptionInDb.set({ + await transcriptionInDb.set({ id, text, - timestamp: new Date(), - audio_url, - words + createdAt: Timestamp.now(), + audio_url }) - transcriptionInDb + await transcriptionInDb .collection("timestamps") .doc("utterances") .set({ @@ -68,14 +66,17 @@ export const transcription = functions.https.onRequest(async (req, res) => { ) }) - transcriptionInDb.collection("timestamps").doc("words").set({ - words - }) + await transcriptionInDb + .collection("timestamps") + .doc("words") + .set({ + words + }) const batch = db.batch() batch.set(db.collection("transcriptions").doc(transcript.id), { - _timestamp: new Date(), + _timestamp: Timestamp.now(), ...transcript }) From 5c4a21bb02f21a5d99f2e508b194c15babda2ff5 Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Sun, 13 Apr 2025 15:14:23 -0700 Subject: [PATCH 05/10] Separate each utterance and word into its own doc. --- functions/src/webhooks/transcription.ts | 52 +++++++++++++------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts index 2e1f1ba63..62138361f 100644 --- a/functions/src/webhooks/transcription.ts +++ b/functions/src/webhooks/transcription.ts @@ -42,7 +42,7 @@ export const transcription = functions.https.onRequest(async (req, res) => { try { const transcriptionInDb = await db .collection("transcriptions") - .doc(transcript.id) + .doc(id) await transcriptionInDb.set({ id, @@ -51,42 +51,44 @@ export const transcription = functions.https.onRequest(async (req, res) => { audio_url }) - await transcriptionInDb - .collection("timestamps") - .doc("utterances") - .set({ - utterances: utterances?.map( - ({ speaker, confidence, start, end, text }) => ({ - speaker, - confidence, - start, - end, - text - }) + if (utterances) { + const writer = db.bulkWriter() + for (let utterance of utterances) { + const { speaker, confidence, start, end, text } = utterance + writer.set( + db.doc( + `/transcriptions/${transcript.id}/utterances/${utterance.start}` + ), + { speaker, confidence, start, end, text } ) - }) + } - await transcriptionInDb - .collection("timestamps") - .doc("words") - .set({ - words - }) + await writer.close() + } - const batch = db.batch() + if (words) { + const writer = db.bulkWriter() + for (let word of words) { + writer.set( + db.doc( + `/transcriptions/${transcript.id}/words/${word.start}` + ), + word + ) + } + + await writer.close() + } + const batch = db.batch() batch.set(db.collection("transcriptions").doc(transcript.id), { _timestamp: Timestamp.now(), ...transcript }) - authenticatedEventsInDb.forEach(doc => { batch.update(doc.ref, { ["x-maple-webhook"]: null }) }) - await batch.commit() - - console.log("transcript saved in db") } catch (error) { console.log(error) } From f1f947c8d7c60b0884c0df1da76276efefe5dee2 Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Mon, 14 Apr 2025 08:19:16 -0700 Subject: [PATCH 06/10] Remove utterance sequential ids and words all together. --- functions/src/webhooks/transcription.ts | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts index 62138361f..f49908573 100644 --- a/functions/src/webhooks/transcription.ts +++ b/functions/src/webhooks/transcription.ts @@ -55,25 +55,10 @@ export const transcription = functions.https.onRequest(async (req, res) => { const writer = db.bulkWriter() for (let utterance of utterances) { const { speaker, confidence, start, end, text } = utterance - writer.set( - db.doc( - `/transcriptions/${transcript.id}/utterances/${utterance.start}` - ), - { speaker, confidence, start, end, text } - ) - } - - await writer.close() - } - if (words) { - const writer = db.bulkWriter() - for (let word of words) { writer.set( - db.doc( - `/transcriptions/${transcript.id}/words/${word.start}` - ), - word + db.doc(`/transcriptions/${transcript.id}/utterances/`), + { speaker, confidence, start, end, text } ) } @@ -81,10 +66,6 @@ export const transcription = functions.https.onRequest(async (req, res) => { } const batch = db.batch() - batch.set(db.collection("transcriptions").doc(transcript.id), { - _timestamp: Timestamp.now(), - ...transcript - }) authenticatedEventsInDb.forEach(doc => { batch.update(doc.ref, { ["x-maple-webhook"]: null }) }) From 92fccb303150013a28f206110255be6609d01f52 Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Tue, 15 Apr 2025 06:59:53 -0700 Subject: [PATCH 07/10] Implement further review feedback, add comments --- functions/src/webhooks/transcription.ts | 65 ++++++++++++++++--------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts index f49908573..ade7af71a 100644 --- a/functions/src/webhooks/transcription.ts +++ b/functions/src/webhooks/transcription.ts @@ -10,35 +10,44 @@ const assembly = new AssemblyAI({ export const transcription = functions.https.onRequest(async (req, res) => { if (req.headers["x-maple-webhook"]) { if (req.body.status === "completed") { + // If we get a request with the right header and status, get the + // transcription from the assembly API. const transcript = await assembly.transcripts.get(req.body.transcript_id) if (transcript && transcript.webhook_auth) { - const maybeEventInDb = await db + // If there is a transcript and the transcript has an auth property, + // look for an event (aka Hearing) in the DB with a matching ID. + const maybeEventsInDb = await db .collection("events") .where("videoAssemblyId", "==", transcript.id) .get() - if (maybeEventInDb.docs.length) { - const authenticatedEventsInDb = maybeEventInDb.docs.filter( - async e => { - const hashedToken = sha256(String(req.headers["x-maple-webhook"])) + if (maybeEventsInDb.docs.length) { + // If we have a match look for one that matches a hash of the token + // we gave Assembly. There should only be one of these but firestore + // gives us an array. If there is more than one member, something is + // wrong + const authenticatedEventIds = [] as string[] + const hashedToken = sha256(String(req.headers["x-maple-webhook"])) - const tokenInDb = await db - .collection("events") - .doc(e.id) - .collection("private") - .doc("webhookAuth") - .get() - const tokenInDbData = tokenInDb.data() + maybeEventsInDb.docs.forEach(async doc => { + const tokenDocInDb = await db + .collection("events") + .doc(doc.id) + .collection("private") + .doc("webhookAuth") + .get() - if (tokenInDbData) { - return hashedToken === tokenInDbData.videoAssemblyWebhookToken - } - return false + const tokenDataInDb = tokenDocInDb.data()?.videoAssemblyWebhookToken + + if (hashedToken === tokenDataInDb) { + authenticatedEventIds.push(doc.id) } - ) + }) - const { id, text, audio_url, utterances, words } = transcript - if (authenticatedEventsInDb) { + if (authenticatedEventIds.length === 1) { + // If there is one authenticated event, pull out the parts we want to + // save and try to save them in the db. + const { id, text, audio_url, utterances } = transcript try { const transcriptionInDb = await db .collection("transcriptions") @@ -51,13 +60,21 @@ export const transcription = functions.https.onRequest(async (req, res) => { audio_url }) + // Put each `utterance` in a separate doc in an utterances + // collection. Previously had done the same for `words` but + // got worried about collection size and write times since + // `words` can be tens of thousands of members. if (utterances) { const writer = db.bulkWriter() for (let utterance of utterances) { const { speaker, confidence, start, end, text } = utterance writer.set( - db.doc(`/transcriptions/${transcript.id}/utterances/`), + db + .collection("transcriptions") + .doc(`${transcript.id}`) + .collection("utterances") + .doc(), { speaker, confidence, start, end, text } ) } @@ -65,11 +82,11 @@ export const transcription = functions.https.onRequest(async (req, res) => { await writer.close() } - const batch = db.batch() - authenticatedEventsInDb.forEach(doc => { - batch.update(doc.ref, { ["x-maple-webhook"]: null }) + // Delete the hashed webhook auth token from our db now that + // we're done. + authenticatedEventIds.forEach(async docId => { + await db.doc(docId).set({ ["x-maple-webhook"]: null }) }) - await batch.commit() } catch (error) { console.log(error) } From 24474c620d25438dd06c994caadf2ea897ee9e58 Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Wed, 16 Apr 2025 11:17:54 -0700 Subject: [PATCH 08/10] Address additional review feedback. --- functions/src/webhooks/transcription.ts | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts index ade7af71a..c77c6ac7a 100644 --- a/functions/src/webhooks/transcription.ts +++ b/functions/src/webhooks/transcription.ts @@ -29,7 +29,9 @@ export const transcription = functions.https.onRequest(async (req, res) => { const authenticatedEventIds = [] as string[] const hashedToken = sha256(String(req.headers["x-maple-webhook"])) - maybeEventsInDb.docs.forEach(async doc => { + for (const index in maybeEventsInDb.docs){ + const doc = maybeEventsInDb.docs[index] + const tokenDocInDb = await db .collection("events") .doc(doc.id) @@ -42,7 +44,7 @@ export const transcription = functions.https.onRequest(async (req, res) => { if (hashedToken === tokenDataInDb) { authenticatedEventIds.push(doc.id) } - }) + } if (authenticatedEventIds.length === 1) { // If there is one authenticated event, pull out the parts we want to @@ -85,7 +87,15 @@ export const transcription = functions.https.onRequest(async (req, res) => { // Delete the hashed webhook auth token from our db now that // we're done. authenticatedEventIds.forEach(async docId => { - await db.doc(docId).set({ ["x-maple-webhook"]: null }) + + await db.collection("events") + .doc(docId) + .collection("private") + .doc("webhookAuth") + .set({ + videoAssemblyWebhookToken: null + }) + }) } catch (error) { console.log(error) From c9d5a90e25b62bdaf24b802c97a69d9f8a985117 Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Thu, 17 Apr 2025 06:59:48 -0700 Subject: [PATCH 09/10] Implement further review feedback. --- functions/src/webhooks/transcription.ts | 34 ++++++++++++++++--------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts index c77c6ac7a..08a6c4b1b 100644 --- a/functions/src/webhooks/transcription.ts +++ b/functions/src/webhooks/transcription.ts @@ -29,7 +29,7 @@ export const transcription = functions.https.onRequest(async (req, res) => { const authenticatedEventIds = [] as string[] const hashedToken = sha256(String(req.headers["x-maple-webhook"])) - for (const index in maybeEventsInDb.docs){ + for (const index in maybeEventsInDb.docs) { const doc = maybeEventsInDb.docs[index] const tokenDocInDb = await db @@ -46,6 +46,17 @@ export const transcription = functions.https.onRequest(async (req, res) => { } } + // Log edge cases + if (maybeEventsInDb.docs.length === 0) { + console.log("No matching event in db.") + } + if (authenticatedEventIds.length === 0) { + console.log("No authenticated events in db.") + } + if (authenticatedEventIds.length > 1) { + console.log("More than one matching event in db.") + } + if (authenticatedEventIds.length === 1) { // If there is one authenticated event, pull out the parts we want to // save and try to save them in the db. @@ -86,17 +97,16 @@ export const transcription = functions.https.onRequest(async (req, res) => { // Delete the hashed webhook auth token from our db now that // we're done. - authenticatedEventIds.forEach(async docId => { - - await db.collection("events") - .doc(docId) - .collection("private") - .doc("webhookAuth") - .set({ - videoAssemblyWebhookToken: null - }) - - }) + for (const index in authenticatedEventIds) { + await db + .collection("events") + .doc(authenticatedEventIds[index]) + .collection("private") + .doc("webhookAuth") + .set({ + videoAssemblyWebhookToken: null + }) + } } catch (error) { console.log(error) } From 64475c47d03d821aa050992b906ef223897a5639 Mon Sep 17 00:00:00 2001 From: Boaz Sender Date: Thu, 17 Apr 2025 08:28:47 -0700 Subject: [PATCH 10/10] fix typo in webhook event query --- functions/src/webhooks/transcription.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts index 08a6c4b1b..ba64f66aa 100644 --- a/functions/src/webhooks/transcription.ts +++ b/functions/src/webhooks/transcription.ts @@ -18,7 +18,7 @@ export const transcription = functions.https.onRequest(async (req, res) => { // look for an event (aka Hearing) in the DB with a matching ID. const maybeEventsInDb = await db .collection("events") - .where("videoAssemblyId", "==", transcript.id) + .where("videoTranscriptionId", "==", transcript.id) .get() if (maybeEventsInDb.docs.length) {