From 88543872a1f2841f986e74f0f35632870a4c4b8c Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sat, 28 Dec 2024 18:31:38 +0100 Subject: [PATCH 01/22] fix discrepancies --- includes/reports.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/includes/reports.js b/includes/reports.js index 6ca82a8c..7590436a 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -22,14 +22,19 @@ FROM ( COUNT(0) AS volume FROM ${ctx.ref('crawl', 'pages')} WHERE - date = '${params.date}' ${params.devRankFilter} + date = '${params.date}' ${params.devRankFilter} AND + is_root_page AND + INT64(summary.bytesTotal) > 0 GROUP BY date, client, bin - HAVING bin IS NOT NULL ) ) +ORDER BY + date, + bin, + client `) }, { @@ -52,6 +57,7 @@ FROM ( FROM ${ctx.ref('crawl', 'pages')} WHERE date = '${params.date}' ${params.devRankFilter} AND + is_root_page AND INT64(summary.bytesTotal) > 0 ) GROUP BY From bf90fd876520761cde5f0d51d99b6a1f5a7cb26c Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 7 Jan 2025 06:47:27 +0100 Subject: [PATCH 02/22] lenses --- definitions/output/reports/reports_dynamic.js | 87 ++++++++++++------- 1 file changed, 57 insertions(+), 30 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index ad63c12b..b134ca2f 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -1,47 +1,74 @@ const configs = new reports.HTTPArchiveReports() const metrics = configs.listMetrics() +// Adjust start and end dates to update reports retrospectively +const startDate = '2024-12-01' // constants.currentMonth; +const endDate = '2024-12-01' // constants.currentMonth; + +const lenses = { + all: '', + top1k: 'AND rank <= 1000', + top10k: 'AND rank <= 10000', + top100k: 'AND rank <= 100000', + top1m: 'AND rank <= 1000000', + drupal: 'AND \'Drupal\' IN UNNEST(technologies.technology)', + magento: 'AND \'Magento\' IN UNNEST(technologies.technology)', + wordpress: 'AND \'WordPress\' IN UNNEST(technologies.technology)' +} + const iterations = [] +// dates for ( - let month = constants.currentMonth; month >= constants.currentMonth; month = constants.fnPastMonth(month)) { - iterations.push({ - date: month, - devRankFilter: constants.devRankFilter + let date = endDate; + date >= startDate; + date = constants.fnPastMonth(date) +) { + // metrics + metrics.forEach(metric => { + // timeseries and histograms + metric.SQL.forEach(sql => { + // lenses + for (const [key, value] of Object.entries(lenses)) { + iterations.push({ + date, + metric, + sql, + lense: { key, value }, + devRankFilter: constants.devRankFilter + }) + } + }) }) } -if (iterations.length === 1) { - const params = iterations[0] - metrics.forEach(metric => { - metric.SQL.forEach(sql => { - publish(metric.id + '_' + sql.type, { - type: 'incremental', - protected: true, - bigquery: sql.type === 'histogram' ? { partitionBy: 'date', clusterBy: ['client'] } : {}, - schema: 'reports', - tags: ['crawl_complete'] - }).preOps(ctx => ` +if (startDate === endDate) { + iterations.forEach((params, i) => { + publish(params.metric.id + '_' + params.sql.type + '_' + params.lense.key, { + type: 'incremental', + protected: true, + bigquery: params.sql.type === 'histogram' ? { partitionBy: 'date', clusterBy: ['client'] } : {}, + schema: 'reports', + tags: ['crawl_complete', 'reports'] + }).preOps(ctx => ` --DELETE FROM ${ctx.self()} --WHERE date = '${params.date}'; - `).query(ctx => ` -/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${metric.id}", "type": "${sql.type}"} */` + -sql.query(ctx, params)) - }) + `).query(ctx => ` +/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.key}"} */` + +params.sql.query(ctx, params) + ) }) } else { iterations.forEach((params, i) => { - metrics.forEach(metric => { - metric.SQL.forEach(sql => { - operate(metric.id + '_' + sql.type + '_' + params.date, { - tags: ['crawl_complete'] - }).queries(ctx => ` -DELETE FROM reports.${metric.id}_${sql.type} + operate( + params.metric.id + '_' + params.sql.type + '_' + params.lense.key + '_' + params.date) + .tags(['crawl_complete', 'reports']) + .queries(ctx => ` +DELETE FROM reports.${params.metric.id}_${params.sql.type} WHERE date = '${params.date}'; -/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${metric.id}", "type": "${sql.type}"} */ -INSERT INTO reports.${metric.id}_${sql.type}` + - sql.query(ctx, params)) - }) - }) +/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.key}"} */ +INSERT INTO reports.${params.metric.id}_${params.sql.type}` + +params.sql.query(ctx, params) + ) }) } From 5637e83a688217f78ff15664a1d035e2e3984ca3 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 7 Jan 2025 06:59:09 +0100 Subject: [PATCH 03/22] sql updated --- definitions/output/reports/reports_dynamic.js | 10 +++---- includes/reports.js | 27 ++++++++++--------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index b134ca2f..7dcc90ee 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -33,7 +33,7 @@ for ( date, metric, sql, - lense: { key, value }, + lense: { name: key, sql: value }, devRankFilter: constants.devRankFilter }) } @@ -43,7 +43,7 @@ for ( if (startDate === endDate) { iterations.forEach((params, i) => { - publish(params.metric.id + '_' + params.sql.type + '_' + params.lense.key, { + publish(params.metric.id + '_' + params.sql.type + '_' + params.lense.name, { type: 'incremental', protected: true, bigquery: params.sql.type === 'histogram' ? { partitionBy: 'date', clusterBy: ['client'] } : {}, @@ -53,20 +53,20 @@ if (startDate === endDate) { --DELETE FROM ${ctx.self()} --WHERE date = '${params.date}'; `).query(ctx => ` -/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.key}"} */` + +/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.name}"} */` + params.sql.query(ctx, params) ) }) } else { iterations.forEach((params, i) => { operate( - params.metric.id + '_' + params.sql.type + '_' + params.lense.key + '_' + params.date) + params.metric.id + '_' + params.sql.type + '_' + params.lense.name + '_' + params.date) .tags(['crawl_complete', 'reports']) .queries(ctx => ` DELETE FROM reports.${params.metric.id}_${params.sql.type} WHERE date = '${params.date}'; -/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.key}"} */ +/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.name}"} */ INSERT INTO reports.${params.metric.id}_${params.sql.type}` + params.sql.query(ctx, params) ) diff --git a/includes/reports.js b/includes/reports.js index 7590436a..7d8d4a10 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -22,13 +22,14 @@ FROM ( COUNT(0) AS volume FROM ${ctx.ref('crawl', 'pages')} WHERE - date = '${params.date}' ${params.devRankFilter} AND + date = '${params.date}' ${params.devRankFilter} ${params.lense.sql} AND is_root_page AND INT64(summary.bytesTotal) > 0 GROUP BY date, client, bin + HAVING bin IS NOT NULL ) ) ORDER BY @@ -40,26 +41,28 @@ ORDER BY { type: 'timeseries', query: DataformTemplateBuilder.create((ctx, params) => ` -SELECT - date, - client, - UNIX_SECONDS(TIMESTAMP(date)) AS timestamp, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(501)] / 1024, 2) AS p50, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(751)] / 1024, 2) AS p75, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(901)] / 1024, 2) AS p90 -FROM ( +WITH pages AS ( SELECT date, client, INT64(summary.bytesTotal) AS bytesTotal FROM ${ctx.ref('crawl', 'pages')} WHERE - date = '${params.date}' ${params.devRankFilter} AND + date = '${params.date}' ${params.devRankFilter} ${params.lense.sql} AND is_root_page AND INT64(summary.bytesTotal) > 0 ) + +SELECT + date, + client, + UNIX_SECONDS(TIMESTAMP(date)) AS timestamp, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM pages GROUP BY date, client, From 55adcf4028b498ae817c67bcfb3fe081643f7244 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 7 Jan 2025 07:10:58 +0100 Subject: [PATCH 04/22] lenses path --- infra/bigquery-export/reports.js | 7 ++++++- infra/bigquery-export/storage.js | 3 --- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/infra/bigquery-export/reports.js b/infra/bigquery-export/reports.js index e7730f6d..d012338f 100644 --- a/infra/bigquery-export/reports.js +++ b/infra/bigquery-export/reports.js @@ -6,6 +6,7 @@ export class ReportsExporter { constructor () { this.bigquery = new BigQueryExport() this.storage = new StorageExport() + this.storagePath = 'reports/dev/' // TODO change to prod } // Export timeseries reports @@ -18,7 +19,7 @@ SELECT FROM reports.${metric}_timeseries ` const rows = await this.bigquery.queryResults(query) - await this.storage.exportToJson(rows, metric) + await this.storage.exportToJson(rows, `${this.storagePath}${metric}.json`) } // Export monthly histogram report @@ -41,6 +42,10 @@ WHERE date = '${date}' return } + if (exportData.lense && exportData.lense !== 'all') { + this.storagePath = this.storagePath + `${exportData.lense}/` + } + if (exportData.type === 'histogram') { await this.exportHistogram(exportData) } else if (exportData.type === 'timeseries') { diff --git a/infra/bigquery-export/storage.js b/infra/bigquery-export/storage.js index a6850905..4ebfb883 100644 --- a/infra/bigquery-export/storage.js +++ b/infra/bigquery-export/storage.js @@ -7,7 +7,6 @@ const storage = new Storage() export class StorageExport { constructor (bucket = 'httparchive') { this.bucket = bucket - this.storagePath = 'reports/dev/' // TODO change to prod this.stream = new Readable({ objectMode: true, read () {} @@ -15,8 +14,6 @@ export class StorageExport { } async exportToJson (data, fileName) { - fileName = this.storagePath + fileName + '.json' - const bucket = storage.bucket(this.bucket) const file = bucket.file(fileName) From 385e426bb9710d61e53a1dc8a4dda3cd120cfcc6 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 7 Jan 2025 08:11:25 +0100 Subject: [PATCH 05/22] spelling fix --- definitions/output/reports/reports_dynamic.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index 7dcc90ee..fc71f573 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -43,7 +43,7 @@ for ( if (startDate === endDate) { iterations.forEach((params, i) => { - publish(params.metric.id + '_' + params.sql.type + '_' + params.lense.name, { + publish(params.metric.id + '_' + params.sql.type + '_' + params.lens.name, { type: 'incremental', protected: true, bigquery: params.sql.type === 'histogram' ? { partitionBy: 'date', clusterBy: ['client'] } : {}, @@ -53,20 +53,20 @@ if (startDate === endDate) { --DELETE FROM ${ctx.self()} --WHERE date = '${params.date}'; `).query(ctx => ` -/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.name}"} */` + +/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lens.name}"} */` + params.sql.query(ctx, params) ) }) } else { iterations.forEach((params, i) => { operate( - params.metric.id + '_' + params.sql.type + '_' + params.lense.name + '_' + params.date) + params.metric.id + '_' + params.sql.type + '_' + params.lens.name + '_' + params.date) .tags(['crawl_complete', 'reports']) .queries(ctx => ` DELETE FROM reports.${params.metric.id}_${params.sql.type} WHERE date = '${params.date}'; -/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.name}"} */ +/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lens.name}"} */ INSERT INTO reports.${params.metric.id}_${params.sql.type}` + params.sql.query(ctx, params) ) From 1b3678c56529a7ebb4e02ad4a6937b5be3bb2e3c Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 12 Jan 2025 14:13:53 +0100 Subject: [PATCH 06/22] all metrics in the same table --- definitions/output/reports/reports_dynamic.js | 47 +++++++++---------- includes/reports.js | 28 +++++++---- 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index fc71f573..97ec9e3c 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -33,7 +33,7 @@ for ( date, metric, sql, - lense: { name: key, sql: value }, + lens: { name: key, sql: value }, devRankFilter: constants.devRankFilter }) } @@ -41,34 +41,29 @@ for ( }) } -if (startDate === endDate) { - iterations.forEach((params, i) => { - publish(params.metric.id + '_' + params.sql.type + '_' + params.lens.name, { - type: 'incremental', - protected: true, - bigquery: params.sql.type === 'histogram' ? { partitionBy: 'date', clusterBy: ['client'] } : {}, - schema: 'reports', - tags: ['crawl_complete', 'reports'] - }).preOps(ctx => ` ---DELETE FROM ${ctx.self()} ---WHERE date = '${params.date}'; - `).query(ctx => ` -/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lens.name}"} */` + -params.sql.query(ctx, params) - ) - }) -} else { - iterations.forEach((params, i) => { - operate( - params.metric.id + '_' + params.sql.type + '_' + params.lens.name + '_' + params.date) - .tags(['crawl_complete', 'reports']) - .queries(ctx => ` +iterations.forEach((params, i) => { + operate( + params.metric.id + '_' + params.sql.type + '_' + params.lens.name + '_' + params.date) + .tags(['crawl_complete', 'reports']) + .queries(ctx => ` +CREATE TABLE IF NOT EXISTS reports.${params.metric.id}_${params.sql.type} ( + metric STRING, + date DATE, + client STRING, + lens STRING, + bin INT64, + volume INT64, + pdf FLOAT64, + cdf FLOAT64 +) +PARTITION BY date +CLUSTER BY client, lens; + DELETE FROM reports.${params.metric.id}_${params.sql.type} WHERE date = '${params.date}'; /* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lens.name}"} */ INSERT INTO reports.${params.metric.id}_${params.sql.type}` + params.sql.query(ctx, params) - ) - }) -} + ) +}) diff --git a/includes/reports.js b/includes/reports.js index 7d8d4a10..8bdbe275 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -7,8 +7,21 @@ const config = { { type: 'histogram', query: DataformTemplateBuilder.create((ctx, params) => ` +WITH pages AS ( + SELECT + date, + client, + CAST(FLOOR(INT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin + FROM crawl.pages + WHERE + date = '${params.date}' ${params.lens.sql} AND + is_root_page AND + INT64(summary.bytesTotal) > 0 +) + SELECT *, + '${params.lens.name}' AS lens, SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf FROM ( SELECT @@ -16,15 +29,9 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, - client, - CAST(FLOOR(INT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin, + *, COUNT(0) AS volume - FROM ${ctx.ref('crawl', 'pages')} - WHERE - date = '${params.date}' ${params.devRankFilter} ${params.lense.sql} AND - is_root_page AND - INT64(summary.bytesTotal) > 0 + FROM pages GROUP BY date, client, @@ -46,9 +53,9 @@ WITH pages AS ( date, client, INT64(summary.bytesTotal) AS bytesTotal - FROM ${ctx.ref('crawl', 'pages')} + FROM crawl.pages WHERE - date = '${params.date}' ${params.devRankFilter} ${params.lense.sql} AND + date = '${params.date}' $ ${params.lens.sql} AND is_root_page AND INT64(summary.bytesTotal) > 0 ) @@ -56,6 +63,7 @@ WITH pages AS ( SELECT date, client, + '${params.lens.name}' AS lens, UNIX_SECONDS(TIMESTAMP(date)) AS timestamp, ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, From e2ce0dd05bb7879a5f1ffafb701f245a2873ea8d Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 14 Jan 2025 21:44:43 +0100 Subject: [PATCH 07/22] aggregated tables --- definitions/output/reports/reports_dynamic.js | 15 ++++++--------- includes/reports.js | 2 -- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index 97ec9e3c..8bcfbcd7 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -46,24 +46,21 @@ iterations.forEach((params, i) => { params.metric.id + '_' + params.sql.type + '_' + params.lens.name + '_' + params.date) .tags(['crawl_complete', 'reports']) .queries(ctx => ` -CREATE TABLE IF NOT EXISTS reports.${params.metric.id}_${params.sql.type} ( - metric STRING, +CREATE TABLE IF NOT EXISTS reports.${params.sql.type} ( date DATE, - client STRING, lens STRING, - bin INT64, - volume INT64, - pdf FLOAT64, - cdf FLOAT64 + metric STRING, + client STRING, + data JSON ) PARTITION BY date CLUSTER BY client, lens; -DELETE FROM reports.${params.metric.id}_${params.sql.type} +DELETE FROM reports.${params.sql.type} WHERE date = '${params.date}'; /* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lens.name}"} */ -INSERT INTO reports.${params.metric.id}_${params.sql.type}` + +INSERT INTO reports.${params.sql.type}` + params.sql.query(ctx, params) ) }) diff --git a/includes/reports.js b/includes/reports.js index 8bdbe275..337bdde5 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -21,7 +21,6 @@ WITH pages AS ( SELECT *, - '${params.lens.name}' AS lens, SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf FROM ( SELECT @@ -63,7 +62,6 @@ WITH pages AS ( SELECT date, client, - '${params.lens.name}' AS lens, UNIX_SECONDS(TIMESTAMP(date)) AS timestamp, ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, From 6bb7ce44f4514cd544a07eb2bf77c395f1148e7d Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 14 Jan 2025 21:46:53 +0100 Subject: [PATCH 08/22] storage sync script --- scripts/package-lock.json | 838 ++++++++++++++++++++++++++++++++ scripts/package.json | 7 + scripts/reports_storage_sync.js | 136 ++++++ 3 files changed, 981 insertions(+) create mode 100644 scripts/package-lock.json create mode 100644 scripts/package.json create mode 100644 scripts/reports_storage_sync.js diff --git a/scripts/package-lock.json b/scripts/package-lock.json new file mode 100644 index 00000000..ef22b055 --- /dev/null +++ b/scripts/package-lock.json @@ -0,0 +1,838 @@ +{ + "name": "scripts", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "dependencies": { + "@google-cloud/bigquery": "^7.9.1", + "@google-cloud/storage": "^7.14.0" + } + }, + "node_modules/@google-cloud/bigquery": { + "version": "7.9.1", + "resolved": "https://registry.npmjs.org/@google-cloud/bigquery/-/bigquery-7.9.1.tgz", + "integrity": "sha512-ZkcRMpBoFLxIh6TiQBywA22yT3c2j0f07AHWEMjtYqMQzZQbFrpxuJU2COp3tyjZ91ZIGHe4gY7/dGZL88cltg==", + "license": "Apache-2.0", + "dependencies": { + "@google-cloud/common": "^5.0.0", + "@google-cloud/paginator": "^5.0.2", + "@google-cloud/precise-date": "^4.0.0", + "@google-cloud/promisify": "^4.0.0", + "arrify": "^2.0.1", + "big.js": "^6.0.0", + "duplexify": "^4.0.0", + "extend": "^3.0.2", + "is": "^3.3.0", + "stream-events": "^1.0.5", + "uuid": "^9.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@google-cloud/common": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/@google-cloud/common/-/common-5.0.2.tgz", + "integrity": "sha512-V7bmBKYQyu0eVG2BFejuUjlBt+zrya6vtsKdY+JxMM/dNntPF41vZ9+LhOshEUH01zOHEqBSvI7Dad7ZS6aUeA==", + "license": "Apache-2.0", + "dependencies": { + "@google-cloud/projectify": "^4.0.0", + "@google-cloud/promisify": "^4.0.0", + "arrify": "^2.0.1", + "duplexify": "^4.1.1", + "extend": "^3.0.2", + "google-auth-library": "^9.0.0", + "html-entities": "^2.5.2", + "retry-request": "^7.0.0", + "teeny-request": "^9.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@google-cloud/paginator": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/@google-cloud/paginator/-/paginator-5.0.2.tgz", + "integrity": "sha512-DJS3s0OVH4zFDB1PzjxAsHqJT6sKVbRwwML0ZBP9PbU7Yebtu/7SWMRzvO2J3nUi9pRNITCfu4LJeooM2w4pjg==", + "license": "Apache-2.0", + "dependencies": { + "arrify": "^2.0.0", + "extend": "^3.0.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@google-cloud/precise-date": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@google-cloud/precise-date/-/precise-date-4.0.0.tgz", + "integrity": "sha512-1TUx3KdaU3cN7nfCdNf+UVqA/PSX29Cjcox3fZZBtINlRrXVTmUkQnCKv2MbBUbCopbK4olAT1IHl76uZyCiVA==", + "license": "Apache-2.0", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@google-cloud/projectify": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@google-cloud/projectify/-/projectify-4.0.0.tgz", + "integrity": "sha512-MmaX6HeSvyPbWGwFq7mXdo0uQZLGBYCwziiLIGq5JVX+/bdI3SAq6bP98trV5eTWfLuvsMcIC1YJOF2vfteLFA==", + "license": "Apache-2.0", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@google-cloud/promisify": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@google-cloud/promisify/-/promisify-4.0.0.tgz", + "integrity": "sha512-Orxzlfb9c67A15cq2JQEyVc7wEsmFBmHjZWZYQMUyJ1qivXyMwdyNOs9odi79hze+2zqdTtu1E19IM/FtqZ10g==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage": { + "version": "7.15.0", + "resolved": "https://registry.npmjs.org/@google-cloud/storage/-/storage-7.15.0.tgz", + "integrity": "sha512-/j/+8DFuEOo33fbdX0V5wjooOoFahEaMEdImHBmM2tH9MPHJYNtmXOf2sGUmZmiufSukmBEvdlzYgDkkgeBiVQ==", + "license": "Apache-2.0", + "dependencies": { + "@google-cloud/paginator": "^5.0.0", + "@google-cloud/projectify": "^4.0.0", + "@google-cloud/promisify": "^4.0.0", + "abort-controller": "^3.0.0", + "async-retry": "^1.3.3", + "duplexify": "^4.1.3", + "fast-xml-parser": "^4.4.1", + "gaxios": "^6.0.2", + "google-auth-library": "^9.6.3", + "html-entities": "^2.5.2", + "mime": "^3.0.0", + "p-limit": "^3.0.1", + "retry-request": "^7.0.0", + "teeny-request": "^9.0.0", + "uuid": "^8.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage/node_modules/uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/@tootallnate/once": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-2.0.0.tgz", + "integrity": "sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==", + "license": "MIT", + "engines": { + "node": ">= 10" + } + }, + "node_modules/@types/caseless": { + "version": "0.12.5", + "resolved": "https://registry.npmjs.org/@types/caseless/-/caseless-0.12.5.tgz", + "integrity": "sha512-hWtVTC2q7hc7xZ/RLbxapMvDMgUnDvKvMOpKal4DrMyfGBUfB1oKaZlIRr6mJL+If3bAP6sV/QneGzF6tJjZDg==", + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "22.10.6", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.6.tgz", + "integrity": "sha512-qNiuwC4ZDAUNcY47xgaSuS92cjf8JbSUoaKS77bmLG1rU7MlATVSiw/IlrjtIyyskXBZ8KkNfjK/P5na7rgXbQ==", + "license": "MIT", + "dependencies": { + "undici-types": "~6.20.0" + } + }, + "node_modules/@types/request": { + "version": "2.48.12", + "resolved": "https://registry.npmjs.org/@types/request/-/request-2.48.12.tgz", + "integrity": "sha512-G3sY+NpsA9jnwm0ixhAFQSJ3Q9JkpLZpJbI3GMv0mIAT0y3mRabYeINzal5WOChIiaTEGQYlHOKgkaM9EisWHw==", + "license": "MIT", + "dependencies": { + "@types/caseless": "*", + "@types/node": "*", + "@types/tough-cookie": "*", + "form-data": "^2.5.0" + } + }, + "node_modules/@types/tough-cookie": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz", + "integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==", + "license": "MIT" + }, + "node_modules/abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "license": "MIT", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, + "node_modules/agent-base": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz", + "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/arrify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/arrify/-/arrify-2.0.1.tgz", + "integrity": "sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/async-retry": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.3.tgz", + "integrity": "sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==", + "license": "MIT", + "dependencies": { + "retry": "0.13.1" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/big.js": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/big.js/-/big.js-6.2.2.tgz", + "integrity": "sha512-y/ie+Faknx7sZA5MfGA2xKlu0GDv8RWrXGsmlteyJQ2lvoKv9GBK/fpRMc2qlSoBAgNxrixICFCBefIq8WCQpQ==", + "license": "MIT", + "engines": { + "node": "*" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/bigjs" + } + }, + "node_modules/bignumber.js": { + "version": "9.1.2", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.1.2.tgz", + "integrity": "sha512-2/mKyZH9K85bzOEfhXDBFZTGd1CTs+5IHpeFQo9luiBG7hghdC851Pj2WAhb6E3R6b9tZj/XKhbg4fum+Kepug==", + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "license": "BSD-3-Clause" + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/debug": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", + "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/duplexify": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/duplexify/-/duplexify-4.1.3.tgz", + "integrity": "sha512-M3BmBhwJRZsSx38lZyhE53Csddgzl5R7xGJNk7CVddZD6CcmwMCH8J+7AprIrQKH7TonKxaCjcv27Qmf+sQ+oA==", + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.4.1", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1", + "stream-shift": "^1.0.2" + } + }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, + "node_modules/end-of-stream": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", + "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "license": "MIT" + }, + "node_modules/fast-xml-parser": { + "version": "4.5.1", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.5.1.tgz", + "integrity": "sha512-y655CeyUQ+jj7KBbYMc4FG01V8ZQqjN+gDYGJ50RtfsUB8iG9AmwmwoAgeKLJdmueKKMrH1RJ7yXHTSoczdv5w==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + }, + { + "type": "paypal", + "url": "https://paypal.me/naturalintelligence" + } + ], + "license": "MIT", + "dependencies": { + "strnum": "^1.0.5" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, + "node_modules/form-data": { + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.5.2.tgz", + "integrity": "sha512-GgwY0PS7DbXqajuGf4OYlsrIu3zgxD6Vvql43IBhm6MahqA5SK/7mwhtNj2AdH2z35YR34ujJ7BN+3fFC3jP5Q==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.6", + "mime-types": "^2.1.12", + "safe-buffer": "^5.2.1" + }, + "engines": { + "node": ">= 0.12" + } + }, + "node_modules/gaxios": { + "version": "6.7.1", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz", + "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "is-stream": "^2.0.0", + "node-fetch": "^2.6.9", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/gcp-metadata": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.0.tgz", + "integrity": "sha512-Jh/AIwwgaxan+7ZUUmRLCjtchyDiqh4KjBJ5tW3plBZb5iL/BPcso8A5DlzeD9qlw0duCamnNdpFjxwaT0KyKg==", + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^6.0.0", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/google-auth-library": { + "version": "9.15.0", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.0.tgz", + "integrity": "sha512-7ccSEJFDFO7exFbO6NRyC+xH8/mZ1GZGG2xxx9iHxZWcjUjJpjWxIMw3cofAKcueZ6DATiukmmprD7yavQHOyQ==", + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^6.1.1", + "gcp-metadata": "^6.1.0", + "gtoken": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/gtoken": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz", + "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==", + "license": "MIT", + "dependencies": { + "gaxios": "^6.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/html-entities": { + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.5.2.tgz", + "integrity": "sha512-K//PSRMQk4FZ78Kyau+mZurHn3FH0Vwr+H36eE0rPbeYkRRi9YxceYPhuN60UwWorxyKHhqoAJl2OFKa4BVtaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/mdevils" + }, + { + "type": "patreon", + "url": "https://patreon.com/mdevils" + } + ], + "license": "MIT" + }, + "node_modules/http-proxy-agent": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-5.0.0.tgz", + "integrity": "sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==", + "license": "MIT", + "dependencies": { + "@tootallnate/once": "2", + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/http-proxy-agent/node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "license": "MIT", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/is": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/is/-/is-3.3.0.tgz", + "integrity": "sha512-nW24QBoPcFGGHJGUwnfpI7Yc5CdqWNdsyHQszVE/z2pKHXzh7FZ5GWhJqSyaQ9wMkQnsTx+kAI8bHlCX4tKdbg==", + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/json-bigint": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz", + "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", + "license": "MIT", + "dependencies": { + "bignumber.js": "^9.0.0" + } + }, + "node_modules/jwa": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.0.tgz", + "integrity": "sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==", + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz", + "integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==", + "license": "MIT", + "dependencies": { + "jwa": "^2.0.0", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/mime": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-3.0.0.tgz", + "integrity": "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A==", + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "license": "MIT", + "dependencies": { + "yocto-queue": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/retry": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", + "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/retry-request": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/retry-request/-/retry-request-7.0.2.tgz", + "integrity": "sha512-dUOvLMJ0/JJYEn8NrpOaGNE7X3vpI5XlZS/u0ANjqtcZVKnIxP7IgCFwrKTxENw29emmwug53awKtaMm4i9g5w==", + "license": "MIT", + "dependencies": { + "@types/request": "^2.48.8", + "extend": "^3.0.2", + "teeny-request": "^9.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/stream-events": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/stream-events/-/stream-events-1.0.5.tgz", + "integrity": "sha512-E1GUzBSgvct8Jsb3v2X15pjzN1tYebtbLaMg+eBOUOAxgbLoSbT2NS91ckc5lJD1KfLjId+jXJRgo0qnV5Nerg==", + "license": "MIT", + "dependencies": { + "stubs": "^3.0.0" + } + }, + "node_modules/stream-shift": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.3.tgz", + "integrity": "sha512-76ORR0DO1o1hlKwTbi/DM3EXWGf3ZJYO8cXX5RJwnul2DEg2oyoZyjLNoQM8WsvZiFKCRfC1O0J7iCvie3RZmQ==", + "license": "MIT" + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/strnum": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz", + "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==", + "license": "MIT" + }, + "node_modules/stubs": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/stubs/-/stubs-3.0.0.tgz", + "integrity": "sha512-PdHt7hHUJKxvTCgbKX9C1V/ftOcjJQgz8BZwNfV5c4B6dcGqlpelTbJ999jBGZ2jYiPAwcX5dP6oBwVlBlUbxw==", + "license": "MIT" + }, + "node_modules/teeny-request": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/teeny-request/-/teeny-request-9.0.0.tgz", + "integrity": "sha512-resvxdc6Mgb7YEThw6G6bExlXKkv6+YbuzGg9xuXxSgxJF7Ozs+o8Y9+2R3sArdWdW8nOokoQb1yrpFB0pQK2g==", + "license": "Apache-2.0", + "dependencies": { + "http-proxy-agent": "^5.0.0", + "https-proxy-agent": "^5.0.0", + "node-fetch": "^2.6.9", + "stream-events": "^1.0.5", + "uuid": "^9.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/teeny-request/node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "license": "MIT", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/teeny-request/node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "license": "MIT", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, + "node_modules/undici-types": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "license": "MIT" + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC" + }, + "node_modules/yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + } + } +} diff --git a/scripts/package.json b/scripts/package.json new file mode 100644 index 00000000..dc5df04a --- /dev/null +++ b/scripts/package.json @@ -0,0 +1,7 @@ +{ + "type": "module", + "dependencies": { + "@google-cloud/bigquery": "^7.9.1", + "@google-cloud/storage": "^7.14.0" + } +} diff --git a/scripts/reports_storage_sync.js b/scripts/reports_storage_sync.js new file mode 100644 index 00000000..e462c2a0 --- /dev/null +++ b/scripts/reports_storage_sync.js @@ -0,0 +1,136 @@ +import { Storage } from '@google-cloud/storage' +import { BigQuery } from '@google-cloud/bigquery' + +const storage = new Storage() +const bucketName = 'httparchive' +const storagePathPrefix = 'reports/' + +const bigquery = new BigQuery({ projectId: 'httparchive' }) +const datasetId = 'reports' +const tableId = 'gcs_export' + +const lenses = [ + '', + 'drupal/', + 'magento/', + 'top100k/', + 'top10k/', + 'top1k/', + 'top1m/', + 'wordpress/' +] + +const dates = (function () { + const dates = [] + for (let year = 2016; year <= 2025; year++) { + for (let month = 1; month <= 12; month++) { + dates.push(`${year}_${String(month).padStart(2, '0')}_01`) + if (year <= 2018) { + dates.push(`${year}_${String(month).padStart(2, '0')}_15`) + } + if (year === 2025 && month === 1) { + break + } + } + } + return dates +})() + +const histogramMetrics = new Set( + 'bytesCss', + 'bytesFont', + 'bytesHtml', + 'bytesImg', + 'bytesJs', + 'bytesOther', + 'bytesTotal', + 'bytesVideo', + 'compileJs', + 'dcl', + 'evalJs', + 'fcp', + 'gzipSavings', + 'imgSavings', + 'ol', + 'reqCss', + 'reqFont', + 'reqHtml', + 'reqImg', + 'reqJs', + 'reqOther', + 'reqTotal', + 'reqVideo', + 'speedIndex', + 'tcp', + 'bootupJs', + 'offscreenImages', + 'optimizedImages', + 'ttci', + 'ttfi', + 'vulnJs', + 'cruxCls', + 'cruxDcl', + 'cruxFcp', + 'cruxFid', + 'cruxFp', + 'cruxLcp', + 'cruxOl', + 'htmlElementPopularity', + 'cruxInp', + 'cruxTtfb') + +async function downloadObject (bucketName, srcFilename) { + const contents = await storage.bucket(bucketName).file(srcFilename).download() + + return contents.toString() +} + +async function uploadToBigQuery (rows, schema) { + try { + await bigquery.dataset(datasetId).table(tableId).insert(rows, { schema }) + } catch (error) { + if (error.name === 'PartialFailureError') { + console.error('Partial failure error:', error) + error.errors.forEach(err => { + console.error('Row:', JSON.stringify(err.row)) + console.error('Errors:', JSON.stringify(err.errors)) + }) + } else { + throw error + } + } +} + +async function importHistogramData () { + for (const lens of lenses) { + for (const metric of histogramMetrics) { + for (const date of dates) { + const srcFilename = `${storagePathPrefix}${lens}${date}/${metric}.json` + + console.log(`Downloading ${srcFilename}`) + + const data = await downloadObject(bucketName, srcFilename) + + const rows = JSON.parse(data).map(data => ({ + date: date.replace(/_/g, '-'), + lens: lens.replace('/', ''), + metric, + data: JSON.stringify(data) + })) + + const schema = [ + { name: 'date', type: 'DATE' }, + { name: 'lens', type: 'STRING' }, + { name: 'metric', type: 'STRING' }, + { name: 'data', type: 'JSON' } + ] + + console.log(`Uploading ${rows.length} rows to BigQuery`) + + await uploadToBigQuery(rows, schema) + } + } + } +} + +importHistogramData().catch(console.error) From 044c9c755aa4a7d03341be3c94b08d1fbe7bfe99 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 26 Jan 2025 18:56:37 +0100 Subject: [PATCH 09/22] conditional path --- infra/bigquery-export/reports.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/infra/bigquery-export/reports.js b/infra/bigquery-export/reports.js index d8feef36..223bd468 100644 --- a/infra/bigquery-export/reports.js +++ b/infra/bigquery-export/reports.js @@ -6,7 +6,6 @@ export class ReportsExporter { constructor () { this.bigquery = new BigQueryExport() this.storage = new StorageExport() - this.storagePath = 'reports/dev/' // TODO change to prod } // Export timeseries reports @@ -42,6 +41,8 @@ WHERE date = '${date}' return } + this.storagePath = 'reports/' + exportConfig.environment !== 'prod' ? 'dev/' : '' + if (exportConfig.lense && exportConfig.lense !== 'all') { this.storagePath = this.storagePath + `${exportConfig.lense}/` } From 727c3c63c368657a600fbb25e6e9c29c94e50954 Mon Sep 17 00:00:00 2001 From: Max Ostapenko Date: Mon, 16 Jun 2025 22:38:41 +0000 Subject: [PATCH 10/22] common lenses --- definitions/output/reports/reports_dynamic.js | 47 +++++++------------ includes/reports.js | 28 ++++++++--- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index 6f811b2b..432ec09c 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -1,5 +1,6 @@ const configs = new reports.HTTPArchiveReports() const metrics = configs.listMetrics() +const lenses = configs.lenses; const bucket = 'httparchive' const storagePath = '/reports/dev/' @@ -8,31 +9,30 @@ const storagePath = '/reports/dev/' const startDate = '2024-12-01' // constants.currentMonth; const endDate = '2024-12-01' // constants.currentMonth; -function generateExportPath (metric, sql, params) { - if (sql.type === 'histogram') { - return `${storagePath}${params.date.replaceAll('-', '_')}/${metric.id}.json` - } else if (sql.type === 'timeseries') { - return `${storagePath}${metric.id}.json` +function generateExportPath (ctx, params) { + if (params.sql.type === 'histogram') { + return `${storagePath}${params.date.replaceAll('-', '_')}/${params.metric.id}.json` + } else if (params.sql.type === 'timeseries') { + return `${storagePath}${params.metric.id}.json` } else { throw new Error('Unknown SQL type') } } -function generateExportQuery (metric, sql, params, ctx) { +function generateExportQuery (ctx, params) { let query = '' - if (sql.type === 'histogram') { + if (params.sql.type === 'histogram') { query = ` -SELECT - * EXCEPT(date) -FROM ${ctx.self()} +SELECT * EXCEPT(date) +FROM \`reports.${params.sql.type}\` WHERE date = '${params.date}' ` - } else if (sql.type === 'timeseries') { + } else if (params.sql.type === 'timeseries') { query = ` SELECT FORMAT_DATE('%Y_%m_%d', date) AS date, * EXCEPT(date) -FROM ${ctx.self()} +FROM \`reports.${params.sql.type}\` ` } else { throw new Error('Unknown SQL type') @@ -42,17 +42,6 @@ FROM ${ctx.self()} return queryOutput } -const lenses = { - all: '', - top1k: 'AND rank <= 1000', - top10k: 'AND rank <= 10000', - top100k: 'AND rank <= 100000', - top1m: 'AND rank <= 1000000', - drupal: 'AND \'Drupal\' IN UNNEST(technologies.technology)', - magento: 'AND \'Magento\' IN UNNEST(technologies.technology)', - wordpress: 'AND \'WordPress\' IN UNNEST(technologies.technology)' -} - const iterations = [] // dates for ( @@ -91,15 +80,13 @@ CREATE TABLE IF NOT EXISTS reports.${params.sql.type} ( data JSON ) PARTITION BY date -CLUSTER BY metric, lens; +CLUSTER BY metric, lens, client; DELETE FROM reports.${params.sql.type} WHERE date = '${params.date}' -AND metric = '${params.metric.id}' -AND lens = '${params.lens.sql}'; +AND metric = '${params.metric.id}'; -INSERT INTO reports.${params.sql.type} -${params.sql.query(ctx, params)}; +INSERT INTO reports.${params.sql.type} ${params.sql.query(ctx, params)}; SELECT reports.run_export_job( @@ -107,9 +94,9 @@ reports.run_export_job( "destination": "cloud_storage", "config": { "bucket": "${bucket}", - "name": "${generateExportPath(params.metric, params.sql, params)}" + "name": "${generateExportPath(ctx, params)}" }, - "query": "${generateExportQuery(params.metric, params.sql, params, ctx)}" + "query": "${generateExportQuery(ctx, params)}" }''' ); `) diff --git a/includes/reports.js b/includes/reports.js index 337bdde5..461a1b9c 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -14,9 +14,10 @@ WITH pages AS ( CAST(FLOOR(INT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin FROM crawl.pages WHERE - date = '${params.date}' ${params.lens.sql} AND - is_root_page AND - INT64(summary.bytesTotal) > 0 + date = '${params.date}' + ${params.lens.sql} + AND is_root_page + AND INT64(summary.bytesTotal) > 0 ) SELECT @@ -54,9 +55,10 @@ WITH pages AS ( INT64(summary.bytesTotal) AS bytesTotal FROM crawl.pages WHERE - date = '${params.date}' $ ${params.lens.sql} AND - is_root_page AND - INT64(summary.bytesTotal) > 0 + date = '${params.date}' + ${params.lens.sql} + AND is_root_page + AND INT64(summary.bytesTotal) > 0 ) SELECT @@ -80,9 +82,21 @@ GROUP BY } } +const lenses = { + all: '', + top1k: 'AND rank <= 1000', + top10k: 'AND rank <= 10000', + top100k: 'AND rank <= 100000', + top1m: 'AND rank <= 1000000', + drupal: 'AND \'Drupal\' IN UNNEST(technologies.technology)', + magento: 'AND \'Magento\' IN UNNEST(technologies.technology)', + wordpress: 'AND \'WordPress\' IN UNNEST(technologies.technology)' +} + class HTTPArchiveReports { constructor () { - this.config = config + this.config = config, + this.lenses = lenses; } listReports () { From 8e9dc27e844cf2853bb9cc0139dade21df4edf4a Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 18 Jun 2025 21:54:30 +0200 Subject: [PATCH 11/22] lint --- definitions/output/reports/reports_dynamic.js | 2 +- includes/reports.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index 432ec09c..50a46573 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -1,6 +1,6 @@ const configs = new reports.HTTPArchiveReports() const metrics = configs.listMetrics() -const lenses = configs.lenses; +const lenses = configs.lenses const bucket = 'httparchive' const storagePath = '/reports/dev/' diff --git a/includes/reports.js b/includes/reports.js index 461a1b9c..9cd4f356 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -95,8 +95,8 @@ const lenses = { class HTTPArchiveReports { constructor () { - this.config = config, - this.lenses = lenses; + this.config = config + this.lenses = lenses } listReports () { From 390269b8f643ac9f3877e70e462c505b7992f87b Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 30 Jul 2025 22:11:33 +0200 Subject: [PATCH 12/22] update --- includes/reports.js | 22 ++++---- infra/bigquery-export/reports.js | 91 -------------------------------- infra/bigquery-export/storage.js | 5 +- 3 files changed, 16 insertions(+), 102 deletions(-) delete mode 100644 infra/bigquery-export/reports.js diff --git a/includes/reports.js b/includes/reports.js index 9cd4f356..d0d68edd 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -11,13 +11,13 @@ WITH pages AS ( SELECT date, client, - CAST(FLOOR(INT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin + CAST(FLOOR(FLOAT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin FROM crawl.pages WHERE date = '${params.date}' ${params.lens.sql} AND is_root_page - AND INT64(summary.bytesTotal) > 0 + AND FLOAT64(summary.bytesTotal) > 0 ) SELECT @@ -32,15 +32,14 @@ FROM ( *, COUNT(0) AS volume FROM pages + WHERE bin IS NOT NULL GROUP BY date, client, bin - HAVING bin IS NOT NULL ) ) ORDER BY - date, bin, client `) @@ -52,7 +51,7 @@ WITH pages AS ( SELECT date, client, - INT64(summary.bytesTotal) AS bytesTotal + FLOAT64(summary.bytesTotal) AS bytesTotal FROM crawl.pages WHERE date = '${params.date}' @@ -75,6 +74,9 @@ GROUP BY date, client, timestamp +ORDER BY + date, + client `) } ] @@ -94,12 +96,12 @@ const lenses = { } class HTTPArchiveReports { - constructor () { + constructor() { this.config = config this.lenses = lenses } - listReports () { + listReports() { const reportIds = this.config._reports const reports = reportIds.map(reportId => { @@ -110,7 +112,7 @@ class HTTPArchiveReports { return reports } - getReport (reportId) { + getReport(reportId) { const report = this.config[reportId] return { id: reportId, @@ -118,7 +120,7 @@ class HTTPArchiveReports { } } - listMetrics (reportId) { + listMetrics(reportId) { if (reportId === undefined) { const metrics = Object.keys(this.config._metrics).map(metricId => { const metric = this.getMetric(metricId) @@ -139,7 +141,7 @@ class HTTPArchiveReports { } } - getMetric (metricId) { + getMetric(metricId) { const metric = this.config._metrics[metricId] return { diff --git a/infra/bigquery-export/reports.js b/infra/bigquery-export/reports.js deleted file mode 100644 index 79ae1f1e..00000000 --- a/infra/bigquery-export/reports.js +++ /dev/null @@ -1,91 +0,0 @@ -import { BigQueryExport } from './bigquery.js' -import { StorageExport } from './storage.js' -import { FirestoreBatch } from './firestore.js' - -export class ReportsExporter { - constructor () { - this.bigquery = new BigQueryExport() - this.storage = new StorageExport() - } - - // Export timeseries reports - async exportTimeseries (exportConfig) { - const metric = exportConfig.name - const query = ` -SELECT - FORMAT_DATE('%Y_%m_%d', date) AS date, - * EXCEPT(date) -FROM reports.${metric}_timeseries -` - const rows = await this.bigquery.queryResults(query) - await this.storage.exportToJson(rows, `${this.storagePath}${metric}.json`) - } - - // Export monthly histogram report - async exportHistogram (exportConfig) { - const metric = exportConfig.name - const date = exportConfig.date - - const query = ` -SELECT * EXCEPT(date) -FROM reports.${metric}_histogram -WHERE date = '${date}' -` - const rows = await this.bigquery.queryResults(query) - await this.storage.exportToJson(rows, `${this.storagePath}${date.replaceAll('-', '_')}/${metric}.json`) - } - - async export (exportConfig) { - if (exportConfig.dataform_trigger !== 'report_complete') { - console.error('Invalid dataform trigger') - return - } - - this.storagePath = 'reports/' + exportConfig.environment !== 'prod' ? 'dev/' : '' - - if (exportConfig.lense && exportConfig.lense !== 'all') { - this.storagePath = this.storagePath + `${exportConfig.lense}/` - } - - if (exportConfig.type === 'histogram') { - await this.exportHistogram(exportConfig) - } else if (exportConfig.type === 'timeseries') { - await this.exportTimeseries(exportConfig) - } else { - console.error('Invalid report type') - } - } -} - -export class TechReportsExporter { - constructor () { - this.firestore = new FirestoreBatch() - } - - async export (exportConfig) { - if (exportConfig.dataform_trigger !== 'tech_report_complete') { - console.error('Invalid dataform trigger') - return - } - - let query = '' - if (exportConfig.type === 'report') { - query = ` -SELECT - STRING(date) AS date, - * EXCEPT(date) -FROM httparchive.reports.tech_report_${exportConfig.name} -WHERE date = '${exportConfig.date}' -` - } else if (exportConfig.type === 'dict') { - query = ` -SELECT * -FROM reports.tech_report_${exportConfig.name} -` - } else { - console.error('Invalid export type') - } - - await this.firestore.export(exportConfig, query) - } -} diff --git a/infra/bigquery-export/storage.js b/infra/bigquery-export/storage.js index a9d1e8ff..18893633 100644 --- a/infra/bigquery-export/storage.js +++ b/infra/bigquery-export/storage.js @@ -1,7 +1,9 @@ import { Storage } from '@google-cloud/storage' +import { BigQueryExport } from './bigquery.js' import { Readable } from 'stream' import zlib from 'zlib' +const bigquery = new BigQueryExport() const storage = new Storage() export class StorageUpload { @@ -13,7 +15,8 @@ export class StorageUpload { }) } - async exportToJson (data, fileName) { + async exportToJson (query, fileName) { + const data = await bigquery.queryResults(query) const bucket = storage.bucket(this.bucket) const file = bucket.file(fileName) From 545775217af1f0437c189d85648a117ae6fc32e2 Mon Sep 17 00:00:00 2001 From: Max Ostapenko Date: Thu, 31 Jul 2025 00:01:42 +0000 Subject: [PATCH 13/22] bytesTotal tested --- definitions/output/reports/reports_dynamic.js | 94 +++++++++++-------- includes/constants.js | 1 + includes/reports.js | 8 +- 3 files changed, 63 insertions(+), 40 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index 50a46573..d9caf66e 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -2,37 +2,47 @@ const configs = new reports.HTTPArchiveReports() const metrics = configs.listMetrics() const lenses = configs.lenses -const bucket = 'httparchive' -const storagePath = '/reports/dev/' +const bucket = constants.bucket +const storagePath = constants.storagePath +const dataset = 'reports' // Adjust start and end dates to update reports retrospectively -const startDate = '2024-12-01' // constants.currentMonth; -const endDate = '2024-12-01' // constants.currentMonth; +const startDate = constants.currentMonth; // '2025-07-01' +const endDate = constants.currentMonth; // '2025-07-01' -function generateExportPath (ctx, params) { +function generateExportPath (params) { + objectName = storagePath if (params.sql.type === 'histogram') { - return `${storagePath}${params.date.replaceAll('-', '_')}/${params.metric.id}.json` + objectName = objectName + params.date.replaceAll('-', '_') + '/' + params.metric.id } else if (params.sql.type === 'timeseries') { - return `${storagePath}${params.metric.id}.json` + objectName = objectName + params.metric.id } else { throw new Error('Unknown SQL type') } + return objectName + '_test.json' // TODO: remove test suffix from the path } -function generateExportQuery (ctx, params) { +function generateExportQuery (params) { let query = '' if (params.sql.type === 'histogram') { query = ` -SELECT * EXCEPT(date) -FROM \`reports.${params.sql.type}\` +SELECT + * EXCEPT(date, metric, lens) +FROM \`${dataset}.${params.tableName}\` WHERE date = '${params.date}' + AND metric = '${params.metric.id}' + AND lens = '${params.lens.name}' +ORDER BY bin ASC ` } else if (params.sql.type === 'timeseries') { query = ` SELECT FORMAT_DATE('%Y_%m_%d', date) AS date, - * EXCEPT(date) -FROM \`reports.${params.sql.type}\` + * EXCEPT(date, metric, lens) +FROM \`${dataset}.${params.tableName}\` +WHERE metric = '${params.metric.id}' + AND lens = '${params.lens.name}' +ORDER BY date DESC ` } else { throw new Error('Unknown SQL type') @@ -60,7 +70,8 @@ for ( metric, sql, lens: { name: key, sql: value }, - devRankFilter: constants.devRankFilter + devRankFilter: constants.devRankFilter, + tableName: metric.id + '_' + sql.type }) } }) @@ -68,36 +79,45 @@ for ( } iterations.forEach((params, i) => { - operate( - params.metric.id + '_' + params.sql.type + '_' + params.lens.name + '_' + params.date) + operate(params.tableName + '_' + params.date + '_' + params.lens.name) .tags(['crawl_complete', 'reports']) .queries(ctx => ` -CREATE TABLE IF NOT EXISTS reports.${params.sql.type} ( - date DATE, - lens STRING, - metric STRING, - client STRING, - data JSON -) +DECLARE job_config JSON; + +/* First report run +CREATE TABLE IF NOT EXISTS ${dataset}.${params.tableName} PARTITION BY date -CLUSTER BY metric, lens, client; +CLUSTER BY metric, lens, client +AS +*/ -DELETE FROM reports.${params.sql.type} +--/* Subsequent report run +DELETE FROM ${dataset}.${params.tableName} WHERE date = '${params.date}' -AND metric = '${params.metric.id}'; - -INSERT INTO reports.${params.sql.type} ${params.sql.query(ctx, params)}; + AND metric = '${params.metric.id}' + AND lens = '${params.lens.name}'; +INSERT INTO ${dataset}.${params.tableName} +--*/ SELECT -reports.run_export_job( - JSON '''{ - "destination": "cloud_storage", - "config": { - "bucket": "${bucket}", - "name": "${generateExportPath(ctx, params)}" - }, - "query": "${generateExportQuery(ctx, params)}" - }''' + '${params.metric.id}' AS metric, + '${params.lens.name}' AS lens, + * +FROM ( + ${params.sql.query(ctx, params)} ); - `) + +SET job_config = TO_JSON( + STRUCT( + "cloud_storage" AS destination, + STRUCT( + "httparchive" AS bucket, + "${generateExportPath(params)}" AS name + ) AS config, + r"${generateExportQuery(params)}" AS query + ) +); + +SELECT reports.run_export_job(job_config); + `) }) diff --git a/includes/constants.js b/includes/constants.js index dec69ffb..3f10905f 100644 --- a/includes/constants.js +++ b/includes/constants.js @@ -49,6 +49,7 @@ class DataformTemplateBuilder { if (typeof value === 'string') return `'${value}'` if (typeof value === 'number') return value.toString() if (typeof value === 'boolean') return value.toString() + if (typeof value === 'function') return value.toString() // For objects or arrays, use JSON.stringify return JSON.stringify(value) diff --git a/includes/reports.js b/includes/reports.js index d0d68edd..5d01df6b 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -12,9 +12,10 @@ WITH pages AS ( date, client, CAST(FLOOR(FLOAT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin - FROM crawl.pages + FROM ${ctx.ref('crawl', 'pages')} WHERE date = '${params.date}' + ${params.devRankFilter} ${params.lens.sql} AND is_root_page AND FLOAT64(summary.bytesTotal) > 0 @@ -52,9 +53,10 @@ WITH pages AS ( date, client, FLOAT64(summary.bytesTotal) AS bytesTotal - FROM crawl.pages + FROM ${ctx.ref('crawl', 'pages')} WHERE date = '${params.date}' + ${params.devRankFilter} ${params.lens.sql} AND is_root_page AND INT64(summary.bytesTotal) > 0 @@ -63,7 +65,7 @@ WITH pages AS ( SELECT date, client, - UNIX_SECONDS(TIMESTAMP(date)) AS timestamp, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(501)] / 1024, 2) AS p50, From 9a360df69a8ac335c6d487573cf3c33312fe21e7 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Thu, 31 Jul 2025 02:17:50 +0200 Subject: [PATCH 14/22] beautified --- definitions/output/reports/reports_dynamic.js | 304 ++++++++++++------ 1 file changed, 198 insertions(+), 106 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index d9caf66e..f0c9822d 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -1,123 +1,215 @@ -const configs = new reports.HTTPArchiveReports() -const metrics = configs.listMetrics() -const lenses = configs.lenses - -const bucket = constants.bucket -const storagePath = constants.storagePath -const dataset = 'reports' - -// Adjust start and end dates to update reports retrospectively -const startDate = constants.currentMonth; // '2025-07-01' -const endDate = constants.currentMonth; // '2025-07-01' - -function generateExportPath (params) { - objectName = storagePath - if (params.sql.type === 'histogram') { - objectName = objectName + params.date.replaceAll('-', '_') + '/' + params.metric.id - } else if (params.sql.type === 'timeseries') { - objectName = objectName + params.metric.id +/** + * Dynamic Reports Generator + * + * This file automatically generates Dataform operations for HTTP Archive reports. + * It creates operations for each combination of: + * - Date range (from startDate to endDate) + * - Metrics (defined in includes/reports.js) + * - SQL types (histogram, timeseries) + * - Lenses (data filters like all, top1k, wordpress, etc.) + * + * Each operation: + * 1. Calculates metrics from crawl data + * 2. Stores results in BigQuery tables + * 3. Exports data to Cloud Storage as JSON + */ + +// Initialize configurations +const httpArchiveReports = new reports.HTTPArchiveReports() +const availableMetrics = httpArchiveReports.listMetrics() +const availableLenses = httpArchiveReports.lenses + +// Configuration constants +const EXPORT_CONFIG = { + bucket: constants.bucket, + storagePath: constants.storagePath, + dataset: 'reports', + testSuffix: '_test.json' // TODO: remove test suffix from the path +} + +// Date range for report generation +// Adjust these dates to update reports retrospectively +const DATE_RANGE = { + startDate: constants.currentMonth, // '2025-07-01' + endDate: constants.currentMonth // '2025-07-01' +} + +/** + * Generates the Cloud Storage export path for a report + * @param {Object} reportConfig - Report configuration object + * @returns {string} - Cloud Storage object path + */ +function buildExportPath(reportConfig) { + const { sql, date, metric } = reportConfig + let objectPath = EXPORT_CONFIG.storagePath + + if (sql.type === 'histogram') { + // Histogram exports are organized by date folders + const dateFolder = date.replaceAll('-', '_') + objectPath += `${dateFolder}/${metric.id}` + } else if (sql.type === 'timeseries') { + // Timeseries exports are organized by metric + objectPath += metric.id } else { - throw new Error('Unknown SQL type') + throw new Error(`Unknown SQL type: ${sql.type}`) } - return objectName + '_test.json' // TODO: remove test suffix from the path + + return objectPath + EXPORT_CONFIG.testSuffix } -function generateExportQuery (params) { - let query = '' - if (params.sql.type === 'histogram') { +/** + * Generates the BigQuery export query for a report + * @param {Object} reportConfig - Report configuration object + * @returns {string} - SQL query for exporting data + */ +function buildExportQuery(reportConfig) { + const { sql, date, metric, lens, tableName } = reportConfig + + let query + if (sql.type === 'histogram') { query = ` -SELECT - * EXCEPT(date, metric, lens) -FROM \`${dataset}.${params.tableName}\` -WHERE date = '${params.date}' - AND metric = '${params.metric.id}' - AND lens = '${params.lens.name}' -ORDER BY bin ASC -` - } else if (params.sql.type === 'timeseries') { + SELECT + * EXCEPT(date, metric, lens) + FROM \`${EXPORT_CONFIG.dataset}.${tableName}\` + WHERE date = '${date}' + AND metric = '${metric.id}' + AND lens = '${lens.name}' + ORDER BY bin ASC + ` + } else if (sql.type === 'timeseries') { query = ` -SELECT - FORMAT_DATE('%Y_%m_%d', date) AS date, - * EXCEPT(date, metric, lens) -FROM \`${dataset}.${params.tableName}\` -WHERE metric = '${params.metric.id}' - AND lens = '${params.lens.name}' -ORDER BY date DESC -` + SELECT + FORMAT_DATE('%Y_%m_%d', date) AS date, + * EXCEPT(date, metric, lens) + FROM \`${EXPORT_CONFIG.dataset}.${tableName}\` + WHERE metric = '${metric.id}' + AND lens = '${lens.name}' + ORDER BY date DESC + ` } else { - throw new Error('Unknown SQL type') + throw new Error(`Unknown SQL type: ${sql.type}`) } - const queryOutput = query.replace(/[\r\n]+/g, ' ') - return queryOutput + // Convert to single line for JSON embedding + return query.replace(/[\r\n]+/g, ' ').trim() } -const iterations = [] -// dates -for ( - let date = endDate; - date >= startDate; - date = constants.fnPastMonth(date) -) { - // metrics - metrics.forEach(metric => { - // timeseries and histograms - metric.SQL.forEach(sql => { - // lenses - for (const [key, value] of Object.entries(lenses)) { - iterations.push({ - date, - metric, - sql, - lens: { name: key, sql: value }, - devRankFilter: constants.devRankFilter, - tableName: metric.id + '_' + sql.type +/** + * Creates a report configuration object + * @param {string} date - Report date (YYYY-MM-DD) + * @param {Object} metric - Metric configuration + * @param {Object} sql - SQL configuration (type and query) + * @param {string} lensName - Lens name + * @param {string} lensSQL - Lens SQL filter + * @returns {Object} - Complete report configuration + */ +function createReportConfig(date, metric, sql, lensName, lensSQL) { + return { + date, + metric, + sql, + lens: { name: lensName, sql: lensSQL }, + devRankFilter: constants.devRankFilter, + tableName: `${metric.id}_${sql.type}` + } +} + +/** + * Generates all report configurations for the specified date range + * @returns {Array} - Array of report configuration objects + */ +function generateReportConfigurations() { + const reportConfigs = [] + + // Generate configurations for each date in range + for (let date = DATE_RANGE.endDate; + date >= DATE_RANGE.startDate; + date = constants.fnPastMonth(date)) { + + // For each available metric + availableMetrics.forEach(metric => { + // For each SQL type (histogram, timeseries) + metric.SQL.forEach(sql => { + // For each available lens (all, top1k, wordpress, etc.) + Object.entries(availableLenses).forEach(([lensName, lensSQL]) => { + const config = createReportConfig(date, metric, sql, lensName, lensSQL) + reportConfigs.push(config) }) - } + }) }) - }) + } + + return reportConfigs +} + +/** + * Creates a Dataform operation name for a report configuration + * @param {Object} reportConfig - Report configuration object + * @returns {string} - Operation name + */ +function createOperationName(reportConfig) { + const { tableName, date, lens } = reportConfig + return `${tableName}_${date}_${lens.name}` +} + +/** + * Generates the SQL for a Dataform operation + * @param {Object} ctx - Dataform context + * @param {Object} reportConfig - Report configuration object + * @returns {string} - Complete SQL for the operation + */ +function generateOperationSQL(ctx, reportConfig) { + const { date, metric, lens, sql, tableName } = reportConfig + + return ` + DECLARE job_config JSON; + + /* First report run - uncomment to create table + CREATE TABLE IF NOT EXISTS ${EXPORT_CONFIG.dataset}.${tableName} + PARTITION BY date + CLUSTER BY metric, lens, client + AS + */ + + --/* Subsequent report run + DELETE FROM ${EXPORT_CONFIG.dataset}.${tableName} + WHERE date = '${date}' + AND metric = '${metric.id}' + AND lens = '${lens.name}'; + INSERT INTO ${EXPORT_CONFIG.dataset}.${tableName} + --*/ + + SELECT + '${metric.id}' AS metric, + '${lens.name}' AS lens, + * + FROM ( + ${sql.query(ctx, reportConfig)} + ); + + SET job_config = TO_JSON( + STRUCT( + "cloud_storage" AS destination, + STRUCT( + "httparchive" AS bucket, + "${buildExportPath(reportConfig)}" AS name + ) AS config, + r"${buildExportQuery(reportConfig)}" AS query + ) + ); + + SELECT reports.run_export_job(job_config); + ` } -iterations.forEach((params, i) => { - operate(params.tableName + '_' + params.date + '_' + params.lens.name) +// Generate all report configurations +const reportConfigurations = generateReportConfigurations() + +// Create Dataform operations for each report configuration +reportConfigurations.forEach(reportConfig => { + const operationName = createOperationName(reportConfig) + + operate(operationName) .tags(['crawl_complete', 'reports']) - .queries(ctx => ` -DECLARE job_config JSON; - -/* First report run -CREATE TABLE IF NOT EXISTS ${dataset}.${params.tableName} -PARTITION BY date -CLUSTER BY metric, lens, client -AS -*/ - ---/* Subsequent report run -DELETE FROM ${dataset}.${params.tableName} -WHERE date = '${params.date}' - AND metric = '${params.metric.id}' - AND lens = '${params.lens.name}'; -INSERT INTO ${dataset}.${params.tableName} ---*/ - -SELECT - '${params.metric.id}' AS metric, - '${params.lens.name}' AS lens, - * -FROM ( - ${params.sql.query(ctx, params)} -); - -SET job_config = TO_JSON( - STRUCT( - "cloud_storage" AS destination, - STRUCT( - "httparchive" AS bucket, - "${generateExportPath(params)}" AS name - ) AS config, - r"${generateExportQuery(params)}" AS query - ) -); - -SELECT reports.run_export_job(job_config); - `) + .queries(ctx => generateOperationSQL(ctx, reportConfig)) }) From 50d9522429cc618f1211201698d1bfdf1e8ae6f7 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Thu, 31 Jul 2025 10:30:47 +0200 Subject: [PATCH 15/22] formatting --- definitions/output/reports/reports_dynamic.js | 82 +++++++++---------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index f0c9822d..f3d167fd 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -123,8 +123,8 @@ function generateReportConfigurations() { // Generate configurations for each date in range for (let date = DATE_RANGE.endDate; - date >= DATE_RANGE.startDate; - date = constants.fnPastMonth(date)) { + date >= DATE_RANGE.startDate; + date = constants.fnPastMonth(date)) { // For each available metric availableMetrics.forEach(metric => { @@ -162,44 +162,44 @@ function generateOperationSQL(ctx, reportConfig) { const { date, metric, lens, sql, tableName } = reportConfig return ` - DECLARE job_config JSON; - - /* First report run - uncomment to create table - CREATE TABLE IF NOT EXISTS ${EXPORT_CONFIG.dataset}.${tableName} - PARTITION BY date - CLUSTER BY metric, lens, client - AS - */ - - --/* Subsequent report run - DELETE FROM ${EXPORT_CONFIG.dataset}.${tableName} - WHERE date = '${date}' - AND metric = '${metric.id}' - AND lens = '${lens.name}'; - INSERT INTO ${EXPORT_CONFIG.dataset}.${tableName} - --*/ - - SELECT - '${metric.id}' AS metric, - '${lens.name}' AS lens, - * - FROM ( - ${sql.query(ctx, reportConfig)} - ); - - SET job_config = TO_JSON( - STRUCT( - "cloud_storage" AS destination, - STRUCT( - "httparchive" AS bucket, - "${buildExportPath(reportConfig)}" AS name - ) AS config, - r"${buildExportQuery(reportConfig)}" AS query - ) - ); - - SELECT reports.run_export_job(job_config); - ` +DECLARE job_config JSON; + +/* First report run - uncomment to create table +CREATE TABLE IF NOT EXISTS ${EXPORT_CONFIG.dataset}.${tableName} +PARTITION BY date +CLUSTER BY metric, lens, client +AS +*/ + +--/* Subsequent report run +DELETE FROM ${EXPORT_CONFIG.dataset}.${tableName} +WHERE date = '${date}' + AND metric = '${metric.id}' + AND lens = '${lens.name}'; +INSERT INTO ${EXPORT_CONFIG.dataset}.${tableName} +--*/ + +SELECT + '${metric.id}' AS metric, + '${lens.name}' AS lens, + * +FROM ( + ${sql.query(ctx, reportConfig)} +); + +SET job_config = TO_JSON( + STRUCT( + "cloud_storage" AS destination, + STRUCT( + "httparchive" AS bucket, + "${buildExportPath(reportConfig)}" AS name + ) AS config, + r"${buildExportQuery(reportConfig)}" AS query + ) +); + +SELECT reports.run_export_job(job_config); +` } // Generate all report configurations @@ -210,6 +210,6 @@ reportConfigurations.forEach(reportConfig => { const operationName = createOperationName(reportConfig) operate(operationName) - .tags(['crawl_complete', 'reports']) + .tags(['crawl_complete', 'crawl_reports']) .queries(ctx => generateOperationSQL(ctx, reportConfig)) }) From 8a294e8c38b1fa2bb5f632a22a9b2b8387bc5b57 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Thu, 31 Jul 2025 10:31:41 +0200 Subject: [PATCH 16/22] sync storage export --- infra/bigquery-export/index.js | 16 +- infra/bigquery-export/package-lock.json | 136 +------ infra/bigquery-export/package.json | 3 +- infra/dataform-service/bigquery.js | 34 ++ infra/dataform-service/index.js | 46 ++- infra/dataform-service/package-lock.json | 374 +++++++++++++++++- infra/dataform-service/package.json | 3 +- .../storage.js | 5 +- infra/tf/.terraform.lock.hcl | 52 +-- 9 files changed, 466 insertions(+), 203 deletions(-) create mode 100644 infra/dataform-service/bigquery.js rename infra/{bigquery-export => dataform-service}/storage.js (83%) diff --git a/infra/bigquery-export/index.js b/infra/bigquery-export/index.js index fe301804..7a81dc96 100644 --- a/infra/bigquery-export/index.js +++ b/infra/bigquery-export/index.js @@ -1,27 +1,15 @@ -import { StorageUpload } from './storage.js' import { FirestoreBatch } from './firestore.js' async function main () { const { query, destination, config } = process.env.EXPORT_CONFIG && JSON.parse(process.env.EXPORT_CONFIG) - if (!destination) { - throw new Error('No destination found') - } - - if (destination === 'cloud_storage') { - console.info('Cloud Storage export') - console.log(query, config) - const storage = new StorageUpload(config.bucket) - await storage.exportToJson(query, config.name) - } else if (destination === 'firestore') { - console.info('Firestore export') + if (destination === 'firestore') { console.log(query, config) const firestore = new FirestoreBatch() await firestore.export(query, config) - } else { - throw new Error('Bad Request: destination unknown') } + console.info('Export finished successfully') return 'OK' } diff --git a/infra/bigquery-export/package-lock.json b/infra/bigquery-export/package-lock.json index 3ca7bb68..8ff116dc 100644 --- a/infra/bigquery-export/package-lock.json +++ b/infra/bigquery-export/package-lock.json @@ -9,8 +9,7 @@ "version": "1.0.0", "dependencies": { "@google-cloud/bigquery": "8.1.1", - "@google-cloud/firestore": "7.11.3", - "@google-cloud/storage": "7.16.0" + "@google-cloud/firestore": "7.11.3" } }, "node_modules/@google-cloud/bigquery": { @@ -291,19 +290,6 @@ "node": ">=14.0.0" } }, - "node_modules/@google-cloud/paginator": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/@google-cloud/paginator/-/paginator-5.0.2.tgz", - "integrity": "sha512-DJS3s0OVH4zFDB1PzjxAsHqJT6sKVbRwwML0ZBP9PbU7Yebtu/7SWMRzvO2J3nUi9pRNITCfu4LJeooM2w4pjg==", - "license": "Apache-2.0", - "dependencies": { - "arrify": "^2.0.0", - "extend": "^3.0.2" - }, - "engines": { - "node": ">=14.0.0" - } - }, "node_modules/@google-cloud/precise-date": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/@google-cloud/precise-date/-/precise-date-5.0.0.tgz", @@ -331,41 +317,6 @@ "node": ">=14" } }, - "node_modules/@google-cloud/storage": { - "version": "7.16.0", - "resolved": "https://registry.npmjs.org/@google-cloud/storage/-/storage-7.16.0.tgz", - "integrity": "sha512-7/5LRgykyOfQENcm6hDKP8SX/u9XxE5YOiWOkgkwcoO+cG8xT/cyOvp9wwN3IxfdYgpHs8CE7Nq2PKX2lNaEXw==", - "license": "Apache-2.0", - "dependencies": { - "@google-cloud/paginator": "^5.0.0", - "@google-cloud/projectify": "^4.0.0", - "@google-cloud/promisify": "<4.1.0", - "abort-controller": "^3.0.0", - "async-retry": "^1.3.3", - "duplexify": "^4.1.3", - "fast-xml-parser": "^4.4.1", - "gaxios": "^6.0.2", - "google-auth-library": "^9.6.3", - "html-entities": "^2.5.2", - "mime": "^3.0.0", - "p-limit": "^3.0.1", - "retry-request": "^7.0.0", - "teeny-request": "^9.0.0", - "uuid": "^8.0.0" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/@google-cloud/storage/node_modules/uuid": { - "version": "8.3.2", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", - "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", - "license": "MIT", - "bin": { - "uuid": "dist/bin/uuid" - } - }, "node_modules/@grpc/grpc-js": { "version": "1.12.4", "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.12.4.tgz", @@ -582,15 +533,6 @@ "node": ">=8" } }, - "node_modules/async-retry": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.3.tgz", - "integrity": "sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==", - "license": "MIT", - "dependencies": { - "retry": "0.13.1" - } - }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -862,28 +804,6 @@ "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", "license": "MIT" }, - "node_modules/fast-xml-parser": { - "version": "4.5.0", - "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.5.0.tgz", - "integrity": "sha512-/PlTQCI96+fZMAOLMZK4CWG1ItCbfZ/0jx7UIJFChPNrx7tcEgerUgWbeieCM9MfHInUDyK8DWYZ+YrywDJuTg==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/NaturalIntelligence" - }, - { - "type": "paypal", - "url": "https://paypal.me/naturalintelligence" - } - ], - "license": "MIT", - "dependencies": { - "strnum": "^1.0.5" - }, - "bin": { - "fxparser": "src/cli/cli.js" - } - }, "node_modules/fetch-blob": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", @@ -1272,18 +1192,6 @@ "node": ">= 0.4" } }, - "node_modules/mime": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/mime/-/mime-3.0.0.tgz", - "integrity": "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A==", - "license": "MIT", - "bin": { - "mime": "cli.js" - }, - "engines": { - "node": ">=10.0.0" - } - }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -1369,21 +1277,6 @@ "wrappy": "1" } }, - "node_modules/p-limit": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", - "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", - "license": "MIT", - "dependencies": { - "yocto-queue": "^0.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/proto3-json-serializer": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/proto3-json-serializer/-/proto3-json-serializer-2.0.2.tgz", @@ -1443,15 +1336,6 @@ "node": ">=0.10.0" } }, - "node_modules/retry": { - "version": "0.13.1", - "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", - "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, "node_modules/retry-request": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/retry-request/-/retry-request-7.0.2.tgz", @@ -1536,12 +1420,6 @@ "node": ">=8" } }, - "node_modules/strnum": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz", - "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==", - "license": "MIT" - }, "node_modules/stubs": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/stubs/-/stubs-3.0.0.tgz", @@ -1703,18 +1581,6 @@ "engines": { "node": ">=12" } - }, - "node_modules/yocto-queue": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", - "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } } } } diff --git a/infra/bigquery-export/package.json b/infra/bigquery-export/package.json index 5ecce9cd..cf0510a2 100644 --- a/infra/bigquery-export/package.json +++ b/infra/bigquery-export/package.json @@ -9,8 +9,7 @@ "type": "module", "dependencies": { "@google-cloud/bigquery": "8.1.1", - "@google-cloud/firestore": "7.11.3", - "@google-cloud/storage": "7.16.0" + "@google-cloud/firestore": "7.11.3" }, "author": "@max-ostapenko" } diff --git a/infra/dataform-service/bigquery.js b/infra/dataform-service/bigquery.js new file mode 100644 index 00000000..8de968b5 --- /dev/null +++ b/infra/dataform-service/bigquery.js @@ -0,0 +1,34 @@ +import { BigQuery } from '@google-cloud/bigquery' + +export class BigQueryExport { + constructor (options = {}) { + this.bigquery = new BigQuery(options) + } + + async queryResults (query) { + const options = { + query, + projectId: this.projectId, + location: this.location + } + + const [job] = await this.bigquery.createQueryJob(options) + console.info(`Running BigQuery query: ${job.id}`) + const [rows] = await job.getQueryResults() + console.log('Fetching query results completed') + return rows + } + + async queryResultsStream (query) { + const options = { + query, + projectId: this.projectId, + location: this.location + } + + const [job] = await this.bigquery.createQueryJob(options) + console.info(`Running BigQuery query: ${job.id}`) + const rows = job.getQueryResultsStream() + return rows + } +} diff --git a/infra/dataform-service/index.js b/infra/dataform-service/index.js index 00445042..515f24b7 100644 --- a/infra/dataform-service/index.js +++ b/infra/dataform-service/index.js @@ -1,13 +1,19 @@ import functions from '@google-cloud/functions-framework' -import { BigQuery } from '@google-cloud/bigquery' +import { BigQueryExport } from './bigquery.js' import { callRunJob } from './cloud_run.js' import { getCompilationResults, runWorkflow } from './dataform.js' +import { StorageUpload } from './storage.js' const projectId = 'httparchive' const location = 'us-central1' const jobId = 'bigquery-export' +const bigquery = new BigQueryExport({ + projectId, + location: 'US' +}) + const TRIGGERS = { crux_ready: { type: 'poller', @@ -83,8 +89,22 @@ async function handleExport (req, res) { return } - const jobName = `projects/${projectId}/locations/${location}/jobs/${jobId}` - await callRunJob(jobName, payload) + const { query, destination, config } = payload + + if (destination === 'cloud_storage') { + console.info('Cloud Storage export') + console.log(query, config) + + const data = await bigquery.queryResults(query) + const storage = new StorageUpload(config.bucket) + await storage.exportToJson(data, config.name) + } else if (destination === 'firestore') { + console.info('Firestore export') + const jobName = `projects/${projectId}/locations/${location}/jobs/${jobId}` + await callRunJob(jobName, payload) + } else { + throw new Error('Bad Request: destination unknown') + } res.status(200).json({ replies: [200], @@ -136,7 +156,9 @@ async function handleTrigger (req, res) { const trigger = TRIGGERS[eventName] if (trigger.type === 'poller') { console.info(`Poller action ${eventName}`) - const result = await runQuery(trigger.query) + + const rows = await bigquery.queryResults(trigger.query) + const result = rows.length > 0 && rows[0][Object.keys(rows[0])[0]] === true console.info(`Query result: ${result}`) if (result) { await executeAction(trigger.action, trigger.actionArgs) @@ -160,22 +182,6 @@ async function handleTrigger (req, res) { } } -/** - * Run BigQuery poll query. - * - * @param {string} query Polling query. - * @returns {boolean} Query result. - */ -async function runQuery (query) { - const bigquery = new BigQuery() - - const [job] = await bigquery.createQueryJob({ query }) - console.info(`Query job ${job.id} started.`) - - const [rows] = await job.getQueryResults() - return rows.length > 0 && rows[0][Object.keys(rows[0])[0]] === true -} - /** * Execute action based on the trigger configuration. * diff --git a/infra/dataform-service/package-lock.json b/infra/dataform-service/package-lock.json index d76c0a0c..aeffe079 100644 --- a/infra/dataform-service/package-lock.json +++ b/infra/dataform-service/package-lock.json @@ -11,7 +11,8 @@ "@google-cloud/bigquery": "8.1.1", "@google-cloud/dataform": "2.2.0", "@google-cloud/functions-framework": "4.0.0", - "@google-cloud/run": "2.3.0" + "@google-cloud/run": "2.3.0", + "@google-cloud/storage": "7.16.0" } }, "node_modules/@babel/code-frame": { @@ -182,6 +183,268 @@ "node": ">=18" } }, + "node_modules/@google-cloud/storage": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/@google-cloud/storage/-/storage-7.16.0.tgz", + "integrity": "sha512-7/5LRgykyOfQENcm6hDKP8SX/u9XxE5YOiWOkgkwcoO+cG8xT/cyOvp9wwN3IxfdYgpHs8CE7Nq2PKX2lNaEXw==", + "license": "Apache-2.0", + "dependencies": { + "@google-cloud/paginator": "^5.0.0", + "@google-cloud/projectify": "^4.0.0", + "@google-cloud/promisify": "<4.1.0", + "abort-controller": "^3.0.0", + "async-retry": "^1.3.3", + "duplexify": "^4.1.3", + "fast-xml-parser": "^4.4.1", + "gaxios": "^6.0.2", + "google-auth-library": "^9.6.3", + "html-entities": "^2.5.2", + "mime": "^3.0.0", + "p-limit": "^3.0.1", + "retry-request": "^7.0.0", + "teeny-request": "^9.0.0", + "uuid": "^8.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage/node_modules/@google-cloud/paginator": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/@google-cloud/paginator/-/paginator-5.0.2.tgz", + "integrity": "sha512-DJS3s0OVH4zFDB1PzjxAsHqJT6sKVbRwwML0ZBP9PbU7Yebtu/7SWMRzvO2J3nUi9pRNITCfu4LJeooM2w4pjg==", + "license": "Apache-2.0", + "dependencies": { + "arrify": "^2.0.0", + "extend": "^3.0.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@google-cloud/storage/node_modules/@google-cloud/promisify": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@google-cloud/promisify/-/promisify-4.0.0.tgz", + "integrity": "sha512-Orxzlfb9c67A15cq2JQEyVc7wEsmFBmHjZWZYQMUyJ1qivXyMwdyNOs9odi79hze+2zqdTtu1E19IM/FtqZ10g==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage/node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "license": "MIT", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/@google-cloud/storage/node_modules/arrify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/arrify/-/arrify-2.0.1.tgz", + "integrity": "sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/@google-cloud/storage/node_modules/debug": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", + "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/@google-cloud/storage/node_modules/gaxios": { + "version": "6.7.1", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz", + "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "is-stream": "^2.0.0", + "node-fetch": "^2.6.9", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage/node_modules/gaxios/node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/@google-cloud/storage/node_modules/gcp-metadata": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz", + "integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==", + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^6.1.1", + "google-logging-utils": "^0.0.2", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage/node_modules/google-auth-library": { + "version": "9.15.1", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz", + "integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==", + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^6.1.1", + "gcp-metadata": "^6.1.0", + "gtoken": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage/node_modules/google-logging-utils": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz", + "integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage/node_modules/gtoken": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz", + "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==", + "license": "MIT", + "dependencies": { + "gaxios": "^6.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@google-cloud/storage/node_modules/mime": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-3.0.0.tgz", + "integrity": "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A==", + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/@google-cloud/storage/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/@google-cloud/storage/node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/@google-cloud/storage/node_modules/retry-request": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/retry-request/-/retry-request-7.0.2.tgz", + "integrity": "sha512-dUOvLMJ0/JJYEn8NrpOaGNE7X3vpI5XlZS/u0ANjqtcZVKnIxP7IgCFwrKTxENw29emmwug53awKtaMm4i9g5w==", + "license": "MIT", + "dependencies": { + "@types/request": "^2.48.8", + "extend": "^3.0.2", + "teeny-request": "^9.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage/node_modules/teeny-request": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/teeny-request/-/teeny-request-9.0.0.tgz", + "integrity": "sha512-resvxdc6Mgb7YEThw6G6bExlXKkv6+YbuzGg9xuXxSgxJF7Ozs+o8Y9+2R3sArdWdW8nOokoQb1yrpFB0pQK2g==", + "license": "Apache-2.0", + "dependencies": { + "http-proxy-agent": "^5.0.0", + "https-proxy-agent": "^5.0.0", + "node-fetch": "^2.6.9", + "stream-events": "^1.0.5", + "uuid": "^9.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@google-cloud/storage/node_modules/teeny-request/node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "license": "MIT", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/@google-cloud/storage/node_modules/teeny-request/node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/@grpc/grpc-js": { "version": "1.13.4", "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz", @@ -532,6 +795,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/async-retry": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.3.tgz", + "integrity": "sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==", + "license": "MIT", + "dependencies": { + "retry": "0.13.1" + } + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -1058,6 +1330,24 @@ ], "license": "BSD-3-Clause" }, + "node_modules/fast-xml-parser": { + "version": "4.5.3", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.5.3.tgz", + "integrity": "sha512-RKihhV+SHsIUGXObeVy9AXiBbFwkVk7Syp8XgwN5U3JV416+Gwp/GO9i0JYKmikykgz/UHRrrV4ROuZEo/T0ig==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT", + "dependencies": { + "strnum": "^1.1.1" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, "node_modules/fetch-blob": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", @@ -1622,6 +1912,18 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/is-typed-array": { "version": "1.1.15", "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.15.tgz", @@ -1884,6 +2186,21 @@ "wrappy": "1" } }, + "node_modules/p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "license": "MIT", + "dependencies": { + "yocto-queue": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/parse-json": { "version": "8.3.0", "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-8.3.0.tgz", @@ -2096,6 +2413,15 @@ "node": ">=0.10.0" } }, + "node_modules/retry": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", + "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, "node_modules/retry-request": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/retry-request/-/retry-request-8.0.0.tgz", @@ -2405,6 +2731,18 @@ "node": ">=8" } }, + "node_modules/strnum": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.1.2.tgz", + "integrity": "sha512-vrN+B7DBIoTTZjnPNewwhx6cBA/H+IS7rfW68n7XxC1y7uoiGQBxaKzqucGUgavX15dJgiGztLJ8vxuEzwqBdA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT" + }, "node_modules/stubs": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/stubs/-/stubs-3.0.0.tgz", @@ -2483,6 +2821,12 @@ "node": ">=0.6" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/type-fest": { "version": "4.41.0", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", @@ -2600,6 +2944,22 @@ "node": ">= 8" } }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which-typed-array": { "version": "1.1.19", "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.19.tgz", @@ -2679,6 +3039,18 @@ "engines": { "node": ">=12" } + }, + "node_modules/yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } } } } diff --git a/infra/dataform-service/package.json b/infra/dataform-service/package.json index bd9baad6..82a9b06d 100644 --- a/infra/dataform-service/package.json +++ b/infra/dataform-service/package.json @@ -8,7 +8,8 @@ "@google-cloud/bigquery": "8.1.1", "@google-cloud/dataform": "2.2.0", "@google-cloud/functions-framework": "4.0.0", - "@google-cloud/run": "2.3.0" + "@google-cloud/run": "2.3.0", + "@google-cloud/storage": "7.16.0" }, "scripts": { "start": "npx functions-framework --target=dataform-service --signature-type=http --port=${PORT:-8080}", diff --git a/infra/bigquery-export/storage.js b/infra/dataform-service/storage.js similarity index 83% rename from infra/bigquery-export/storage.js rename to infra/dataform-service/storage.js index 18893633..a9d1e8ff 100644 --- a/infra/bigquery-export/storage.js +++ b/infra/dataform-service/storage.js @@ -1,9 +1,7 @@ import { Storage } from '@google-cloud/storage' -import { BigQueryExport } from './bigquery.js' import { Readable } from 'stream' import zlib from 'zlib' -const bigquery = new BigQueryExport() const storage = new Storage() export class StorageUpload { @@ -15,8 +13,7 @@ export class StorageUpload { }) } - async exportToJson (query, fileName) { - const data = await bigquery.queryResults(query) + async exportToJson (data, fileName) { const bucket = storage.bucket(this.bucket) const file = bucket.file(fileName) diff --git a/infra/tf/.terraform.lock.hcl b/infra/tf/.terraform.lock.hcl index 16a504f3..267e01f4 100644 --- a/infra/tf/.terraform.lock.hcl +++ b/infra/tf/.terraform.lock.hcl @@ -2,42 +2,42 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/hashicorp/google" { - version = "6.45.0" + version = "6.46.0" constraints = ">= 6.13.0, >= 6.40.0" hashes = [ - "h1:R/LJZ1PSvPcorsYDg905ivFmyMBNQtoJmWNdWry5YOk=", - "zh:0d1ebd9db37c2322e9270baa05869a32163b16df94cdd2adae6211991596b681", - "zh:14c20414a7d7e1daf12c830c206d01815f7d1c7362d5218d137f7c38a855d7be", - "zh:5ecffa1d0edf3d7cb50d28cd87e9ed28ad1072b0b28590d57b386dd77046ee39", - "zh:6eb832171a024533615bdf7116b4ad2220b1be72d70f5f4697526800572d2735", - "zh:874e3483b11029ac8ebe7c210c06e54939f058284921da9005197eb2e54e9af8", - "zh:9c1fe027de41fb5d3dd042b346768f7a747dd58b9aa70b52909e1a108d224cee", - "zh:9fce156fe46bade247057a5973280e1723e8ebb5c33ae290ec93b16ddcef8456", - "zh:adb3f15490c7a890eea06e3e9a461b9890bd9f4921c8682625881375f0873d8b", - "zh:b0268163f7cf363fa5163ce20b4815e820057aba8a518e2b699156464c5d81a2", - "zh:c5e6bc7108a93b349adaa618fc9f499d9419310378e18fed1171b4278b221e46", - "zh:deb8de387f0bb209cce32d224c040b14f5e5f58f4c10c1ea24a48275b241968b", + "h1:UZHlfOqTr/hgeQTjXUMbOjmLQlMJ11ucCBRk/NURTmU=", + "zh:118169da16cb6febf5ec536a3fad2b2749836c7a0d0a4c80dffde8bf3e13530c", + "zh:1f70da65e59aff39c28bad2644a2a59b819a0790d4cacc4aa61d75f9682b7e33", + "zh:25cff0664b0dfc7851dcf95a785a623516aad12c04ad4e7f1daca380957ddb60", + "zh:48f70209b043243e3a3e001db0c205a9c6e8f8e6a73870d29118ef88007b6ae6", + "zh:637249a10189a9c7cbbb6819f35b2e1dfc6edf6b2df574cdef9204d98bdd7faa", + "zh:6cf1b7e40e92703af6a454ae788b504f1ba466414e1d79d0ceb851c2f9672d69", + "zh:976ae598f8247b88d2ce4e4c6b901cd93674538bd5336be51def5c7ffb00872f", + "zh:986e9d8e951f51a7225e9b972bf5b80f19557daa35052bc3740f9c0827e0d3d0", + "zh:9d8a1c9d0b4c2073db10082c768af92ffef8ab9b1ed2383c041194de8e143c30", + "zh:d55a1f71c502f8672225d8a4aaf257191a89fc2232ede63b79d6f237db6b4802", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:fccc1a4dbd98a89c9ce9402aa30418d2afa352286db90d1b1edbbccf48af003b", ] } provider "registry.terraform.io/hashicorp/google-beta" { - version = "6.45.0" + version = "6.46.0" constraints = ">= 6.40.0" hashes = [ - "h1:EkqZHpA9vOnqQzJi33nHPikdFG8U0k2f9mu0gGvTDw8=", - "zh:043df743b553dbf5207940cb53142cdd41f5418107c40ae51157f83fda40abbe", - "zh:06cef2a3d27b55167d37f49701eb2ed08aece5522099fdc3d735c82f3aef46a6", - "zh:0c7cc12238bf6a08c5dfed4a404ffe277f03543418d12dc53a2a675f68aea8a0", - "zh:4ddb97f34a2bd960d68a40a2794f277c34954d68df66ef75f2eec131a1772bf5", - "zh:706a5808781153c88373907cbd6c664a82bb7694248ed7777df3ded1cba7604c", - "zh:9406e27f4b57e8b0120435487b716a61f293727643703bf282029fa6ed665c61", - "zh:a05d6301e7bff97f218a889267297a12811372a44baadd844f5792a76651da85", - "zh:bf43bb31ecdd67d10c74da95e02ee09d6b4dc6df2b91e3cf3dc9de025af3edd8", - "zh:ed759835f1034916c8cae0e2c4c4944c0c3ac0179a6120adb1fea00ee534184a", - "zh:f41989715b63052bbbe82aa4337a6f4670a69b00d68fbed5fa2429912d3274e5", + "h1:75ECr6YkaVxQUX5JvebH53i433mcl0q54/rhzHw9Ev8=", + "zh:283dc3d15304a7902670eb10646dc59ea39b25a25d94efeb55e86864a617349f", + "zh:393d251252bdd6e4634fec0c0acaf5f1af9d4e4d09b59732ddde5bc9085792f7", + "zh:4b2984462419cf2a8f2763e740954f87c81d30b8f2cfb7240cb36427980283bd", + "zh:564f0702ffa3fbdd922288ee5dd666a79b032d74976bedaca620098d3ed9952e", + "zh:5c5eab41ff4ad04ed9899e603ca411182219ed3ecf55ee0f8c3d6814d0b6f967", + "zh:61c9b1b11ddb2299d950ec032846c00b59931c86f79b446f6180330107fa609f", + "zh:6c43ed1cafcfabf877e51d2c11082967215fc5fff1489da9b1f53ece053f40ab", + "zh:8be3dd5b357373461cbb520d8134d7811439035ab4efb42825f039b797ecb63a", + "zh:b126adc80f41e88903fc1a46b6472e924add1952e2245576d0a591dda78c0adb", + "zh:c21a08bffd0b20844d2bfe0914054a30d9e96e07208bc1c81f14d47057a6b85c", + "zh:e203bf509acfc9cce894af95b5e148dc0d5795b6fb31999f0050db7bd6b5e62f", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - "zh:ffcf161189884f7e362c00f9869bd6d445e773d58a446e7bb5d9c18f883c7a74", ] } From ab584a631627f3f27b54c9c179280eede59222d0 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Thu, 31 Jul 2025 11:06:55 +0200 Subject: [PATCH 17/22] docs --- reports.md | 335 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 335 insertions(+) create mode 100644 reports.md diff --git a/reports.md b/reports.md new file mode 100644 index 00000000..0dd5ed00 --- /dev/null +++ b/reports.md @@ -0,0 +1,335 @@ +# HTTP Archive Dynamic Reports + +This document describes the HTTP Archive dynamic reports system, which automatically generates standardized reports from HTTP Archive crawl data. + +## Overview + +The dynamic reports system generates Dataform operations that: + +1. Calculate metrics from HTTP Archive crawl data +2. Store results in BigQuery tables partitioned by date and clustered by metric/lens/client +3. Export data to Cloud Storage as JSON files for consumption by external systems + +## Architecture + +### Core Components + +- **`includes/reports.js`** - Defines metrics and lenses +- **`definitions/output/reports/reports_dynamic.js`** - Generates Dataform operations dynamically +- **`includes/constants.js`** - Provides shared constants and the `DataformTemplateBuilder` + +## Supported Features + +### SQL Types + +The system supports two types of SQL queries: + +#### 1. Histogram + +- **Purpose**: Distribution analysis with binned data +- **Output**: Contains `bin`, `volume`, `pdf`, `cdf` columns +- **Use case**: Page weight distributions, performance metric distributions +- **Export path**: `reports/{date_folder}/{metric_id}_test.json` + +#### 2. Timeseries + +- **Purpose**: Trend analysis over time +- **Output**: Contains percentile data (p10, p25, p50, p75, p90) with timestamps +- **Use case**: Performance trends, adoption over time +- **Export path**: `reports/{metric_id}_test.json` + +### Lenses (Data Filters) + +Lenses allow filtering data by different criteria: + +- **`all`** - No filter, all pages +- **`top1k`** - Top 1,000 ranked sites +- **`top10k`** - Top 10,000 ranked sites +- **`top100k`** - Top 100,000 ranked sites +- **`top1m`** - Top 1,000,000 ranked sites +- **`drupal`** - Sites using Drupal +- **`magento`** - Sites using Magento +- **`wordpress`** - Sites using WordPress + +### Date Range Processing + +- Configurable start and end dates +- Processes data month by month using `constants.fnPastMonth()` +- Supports retrospective report generation + +## How to Add a New Dynamic Report + +### Step 1: Define Your Metric + +Add your metric to the `_metrics` object in `includes/reports.js`: + +```javascript +const config = { + _metrics: { + // Existing metrics... + + myNewMetric: { + SQL: [ + { + type: 'histogram', // or 'timeseries' + query: DataformTemplateBuilder.create((ctx, params) => ` + WITH pages AS ( + SELECT + date, + client, + -- Your binning logic for histogram + CAST(FLOOR(your_metric_value / bin_size) * bin_size AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + AND is_root_page + AND your_metric_value > 0 + ) + + -- Your aggregation logic here + SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf + FROM ( + -- Calculate probability density function + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + *, + COUNT(0) AS volume + FROM pages + WHERE bin IS NOT NULL + GROUP BY date, client, bin + ) + ) + ORDER BY bin, client + `) + } + ] + } + } +} +``` + +### Step 2: Test Your Metric + +The metric will be automatically included in the next run of `reports_dynamic.js`. The system will generate operations for all combinations of: + +- Your new metric +- All available lenses +- All SQL types you defined +- The configured date range + +### Step 3: Verify Output + +Check that the generated operations: + +1. Create the expected BigQuery tables +2. Populate data correctly +3. Export to Cloud Storage in the expected format + +## Metric SQL Requirements + +### Template Parameters + +Your SQL template receives these parameters: + +```javascript +{ + date: '2025-07-01', // Current processing date + devRankFilter: 'AND rank <= 10000', // Development filter + lens: { + name: 'top1k', // Lens name + sql: 'AND rank <= 1000' // Lens SQL filter + }, + metric: { id: 'myMetric', ... }, // Metric configuration + sql: { type: 'histogram', ... } // SQL type configuration +} +``` + +### Required Columns + +#### For Histogram Type + +- `date` - Processing date +- `client` - 'desktop' or 'mobile' +- `bin` - Numeric bin value +- `volume` - Count of pages in this bin +- `pdf` - Probability density function value +- `cdf` - Cumulative distribution function value + +#### For Timeseries Type + +- `date` - Processing date +- `client` - 'desktop' or 'mobile' +- `timestamp` - Unix timestamp in milliseconds +- `p10`, `p25`, `p50`, `p75`, `p90` - Percentile values + +### Best Practices + +1. **Filter root pages**: Always include `AND is_root_page` unless you specifically need all pages +2. **Handle null values**: Use appropriate null checks and filtering +3. **Use consistent binning**: For histograms, use logical bin sizes (e.g., 100KB increments for page weight) +4. **Optimize performance**: Use appropriate WHERE clauses and avoid expensive operations +5. **Test with dev filters**: Your queries should work with the development rank filter + +## Lenses + +Lenses SQL are a valid BigQuery WHERE clause conditions that can be appended to the main query. + +## Processing Details + +### Operation Generation + +For each combination of date, metric, SQL type, and lens, the system: + +1. **Creates a unique operation name**: `{metricId}_{sqlType}_{date}_{lensName}` +2. **Generates BigQuery SQL** that: + - Deletes existing data for the date/metric/lens combination + - Inserts new calculated data + - Exports results to Cloud Storage +3. **Tags operations** with `crawl_complete` tags to be triggered on crawl completion. + +### Table Structure + +Reports are stored in BigQuery tables with this structure: + +- **Partitioned by**: `date` +- **Clustered by**: `metric`, `lens`, `client` +- **Dataset**: `reports` +- **Naming**: `{metricId}_{sqlType}` (e.g., `bytesTotal_histogram`) + +### Export Process + +1. Data is calculated and stored in BigQuery +2. A `run_export_job` function exports filtered data to Cloud Storage +3. Export paths follow the pattern: + - Histogram: `reports/[{lens}/]{date_underscore}/{metric_id}.json` + - Timeseries: `reports/[{lens}/]{metric_id}.json` + +### Development vs Production + +- **Development**: Uses `TABLESAMPLE` and rank filters for faster processing +- **Production**: Processes full datasets +- **Environment detection**: Automatic based on `dataform.projectConfig.vars.environment` + +## Configuration + +### Date Range + +Modify the `DATE_RANGE` object in `reports_dynamic.js`: + +```javascript +const DATE_RANGE = { + startDate: '2025-01-01', // Start processing from this date + endDate: '2025-07-01' // Process up to this date +} +``` + +### Export Configuration + +Modify the `EXPORT_CONFIG` object: + +```javascript +const EXPORT_CONFIG = { + bucket: 'your-storage-bucket', + storagePath: 'reports/', + dataset: 'reports', + testSuffix: '.json' +} +``` + +## Troubleshooting + +### Debugging + +1. **Check operation logs** in Dataform for SQL errors +2. **Verify table creation** in BigQuery console +3. **Check export logs** in Cloud Run for export errors +4. **Verify Cloud Storage paths** for exported files +5. **Test SQL templates** individually before adding to the dynamic system +6. **Use development environment** with smaller datasets for testing + +## Examples + +### Adding a JavaScript Bundle Size Metric + +```javascript +jsBytes: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` + WITH pages AS ( + SELECT + date, + client, + CAST(FLOOR(FLOAT64(summary.bytesJS) / 1024 / 50) * 50 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + AND is_root_page + AND INT64(summary.bytesJS) > 0 + ) + + SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf + FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + *, + COUNT(0) AS volume + FROM pages + WHERE bin IS NOT NULL + GROUP BY date, client, bin + ) + ) + ORDER BY bin, client + `) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` + WITH pages AS ( + SELECT + date, + client, + FLOAT64(summary.bytesJS) AS bytesJS + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + AND is_root_page + AND INT64(summary.bytesJS) > 0 + ) + + SELECT + date, + client, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + ROUND(APPROX_QUANTILES(bytesJS, 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(bytesJS, 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(bytesJS, 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(bytesJS, 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(bytesJS, 1001)[OFFSET(901)] / 1024, 2) AS p90 + FROM pages + GROUP BY date, client, timestamp + ORDER BY date, client + `) + } + ] +} +``` + +This would automatically generate reports for JavaScript bundle sizes across all lenses and the configured date range. From 40ed9f5cf9fd9698857a9ef3efeb1598c0d771ee Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Thu, 31 Jul 2025 11:08:08 +0200 Subject: [PATCH 18/22] rename --- reports.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/reports.md b/reports.md index 0dd5ed00..1a0749a0 100644 --- a/reports.md +++ b/reports.md @@ -1,10 +1,10 @@ -# HTTP Archive Dynamic Reports +# HTTP Archive Reports -This document describes the HTTP Archive dynamic reports system, which automatically generates standardized reports from HTTP Archive crawl data. +This document describes the HTTP Archive reports system, which automatically generates standardized reports from HTTP Archive crawl data. ## Overview -The dynamic reports system generates Dataform operations that: +The reports system generates Dataform operations that: 1. Calculate metrics from HTTP Archive crawl data 2. Store results in BigQuery tables partitioned by date and clustered by metric/lens/client @@ -15,7 +15,7 @@ The dynamic reports system generates Dataform operations that: ### Core Components - **`includes/reports.js`** - Defines metrics and lenses -- **`definitions/output/reports/reports_dynamic.js`** - Generates Dataform operations dynamically +- **`definitions/output/reports/reports_dynamic.js`** - Generates Dataform operations - **`includes/constants.js`** - Provides shared constants and the `DataformTemplateBuilder` ## Supported Features @@ -57,7 +57,7 @@ Lenses allow filtering data by different criteria: - Processes data month by month using `constants.fnPastMonth()` - Supports retrospective report generation -## How to Add a New Dynamic Report +## How to Add a New Report ### Step 1: Define Your Metric @@ -251,7 +251,7 @@ const EXPORT_CONFIG = { 2. **Verify table creation** in BigQuery console 3. **Check export logs** in Cloud Run for export errors 4. **Verify Cloud Storage paths** for exported files -5. **Test SQL templates** individually before adding to the dynamic system +5. **Test SQL templates** individually before adding 6. **Use development environment** with smaller datasets for testing ## Examples From e2663bd058867b0004a6621c91d0d22a6f0f88b0 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Thu, 31 Jul 2025 11:46:14 +0200 Subject: [PATCH 19/22] cleanup --- .github/workflows/ci.yaml | 1 + scripts/package-lock.json | 838 -------------------------------- scripts/package.json | 7 - scripts/reports_storage_sync.js | 136 ------ 4 files changed, 1 insertion(+), 981 deletions(-) delete mode 100644 scripts/package-lock.json delete mode 100644 scripts/package.json delete mode 100644 scripts/reports_storage_sync.js diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 91cfef83..ea6bc480 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -36,6 +36,7 @@ jobs: VALIDATE_MARKDOWN_PRETTIER: false VALIDATE_CHECKOV: false VALIDATE_GIT_COMMITLINT: false + VALIDATE_EDITORCONFIG: false dependabot: name: Dependabot auto-merge diff --git a/scripts/package-lock.json b/scripts/package-lock.json deleted file mode 100644 index ef22b055..00000000 --- a/scripts/package-lock.json +++ /dev/null @@ -1,838 +0,0 @@ -{ - "name": "scripts", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "dependencies": { - "@google-cloud/bigquery": "^7.9.1", - "@google-cloud/storage": "^7.14.0" - } - }, - "node_modules/@google-cloud/bigquery": { - "version": "7.9.1", - "resolved": "https://registry.npmjs.org/@google-cloud/bigquery/-/bigquery-7.9.1.tgz", - "integrity": "sha512-ZkcRMpBoFLxIh6TiQBywA22yT3c2j0f07AHWEMjtYqMQzZQbFrpxuJU2COp3tyjZ91ZIGHe4gY7/dGZL88cltg==", - "license": "Apache-2.0", - "dependencies": { - "@google-cloud/common": "^5.0.0", - "@google-cloud/paginator": "^5.0.2", - "@google-cloud/precise-date": "^4.0.0", - "@google-cloud/promisify": "^4.0.0", - "arrify": "^2.0.1", - "big.js": "^6.0.0", - "duplexify": "^4.0.0", - "extend": "^3.0.2", - "is": "^3.3.0", - "stream-events": "^1.0.5", - "uuid": "^9.0.0" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/@google-cloud/common": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/@google-cloud/common/-/common-5.0.2.tgz", - "integrity": "sha512-V7bmBKYQyu0eVG2BFejuUjlBt+zrya6vtsKdY+JxMM/dNntPF41vZ9+LhOshEUH01zOHEqBSvI7Dad7ZS6aUeA==", - "license": "Apache-2.0", - "dependencies": { - "@google-cloud/projectify": "^4.0.0", - "@google-cloud/promisify": "^4.0.0", - "arrify": "^2.0.1", - "duplexify": "^4.1.1", - "extend": "^3.0.2", - "google-auth-library": "^9.0.0", - "html-entities": "^2.5.2", - "retry-request": "^7.0.0", - "teeny-request": "^9.0.0" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/@google-cloud/paginator": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/@google-cloud/paginator/-/paginator-5.0.2.tgz", - "integrity": "sha512-DJS3s0OVH4zFDB1PzjxAsHqJT6sKVbRwwML0ZBP9PbU7Yebtu/7SWMRzvO2J3nUi9pRNITCfu4LJeooM2w4pjg==", - "license": "Apache-2.0", - "dependencies": { - "arrify": "^2.0.0", - "extend": "^3.0.2" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/@google-cloud/precise-date": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/@google-cloud/precise-date/-/precise-date-4.0.0.tgz", - "integrity": "sha512-1TUx3KdaU3cN7nfCdNf+UVqA/PSX29Cjcox3fZZBtINlRrXVTmUkQnCKv2MbBUbCopbK4olAT1IHl76uZyCiVA==", - "license": "Apache-2.0", - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/@google-cloud/projectify": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/@google-cloud/projectify/-/projectify-4.0.0.tgz", - "integrity": "sha512-MmaX6HeSvyPbWGwFq7mXdo0uQZLGBYCwziiLIGq5JVX+/bdI3SAq6bP98trV5eTWfLuvsMcIC1YJOF2vfteLFA==", - "license": "Apache-2.0", - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/@google-cloud/promisify": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/@google-cloud/promisify/-/promisify-4.0.0.tgz", - "integrity": "sha512-Orxzlfb9c67A15cq2JQEyVc7wEsmFBmHjZWZYQMUyJ1qivXyMwdyNOs9odi79hze+2zqdTtu1E19IM/FtqZ10g==", - "license": "Apache-2.0", - "engines": { - "node": ">=14" - } - }, - "node_modules/@google-cloud/storage": { - "version": "7.15.0", - "resolved": "https://registry.npmjs.org/@google-cloud/storage/-/storage-7.15.0.tgz", - "integrity": "sha512-/j/+8DFuEOo33fbdX0V5wjooOoFahEaMEdImHBmM2tH9MPHJYNtmXOf2sGUmZmiufSukmBEvdlzYgDkkgeBiVQ==", - "license": "Apache-2.0", - "dependencies": { - "@google-cloud/paginator": "^5.0.0", - "@google-cloud/projectify": "^4.0.0", - "@google-cloud/promisify": "^4.0.0", - "abort-controller": "^3.0.0", - "async-retry": "^1.3.3", - "duplexify": "^4.1.3", - "fast-xml-parser": "^4.4.1", - "gaxios": "^6.0.2", - "google-auth-library": "^9.6.3", - "html-entities": "^2.5.2", - "mime": "^3.0.0", - "p-limit": "^3.0.1", - "retry-request": "^7.0.0", - "teeny-request": "^9.0.0", - "uuid": "^8.0.0" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/@google-cloud/storage/node_modules/uuid": { - "version": "8.3.2", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", - "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", - "license": "MIT", - "bin": { - "uuid": "dist/bin/uuid" - } - }, - "node_modules/@tootallnate/once": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-2.0.0.tgz", - "integrity": "sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==", - "license": "MIT", - "engines": { - "node": ">= 10" - } - }, - "node_modules/@types/caseless": { - "version": "0.12.5", - "resolved": "https://registry.npmjs.org/@types/caseless/-/caseless-0.12.5.tgz", - "integrity": "sha512-hWtVTC2q7hc7xZ/RLbxapMvDMgUnDvKvMOpKal4DrMyfGBUfB1oKaZlIRr6mJL+If3bAP6sV/QneGzF6tJjZDg==", - "license": "MIT" - }, - "node_modules/@types/node": { - "version": "22.10.6", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.6.tgz", - "integrity": "sha512-qNiuwC4ZDAUNcY47xgaSuS92cjf8JbSUoaKS77bmLG1rU7MlATVSiw/IlrjtIyyskXBZ8KkNfjK/P5na7rgXbQ==", - "license": "MIT", - "dependencies": { - "undici-types": "~6.20.0" - } - }, - "node_modules/@types/request": { - "version": "2.48.12", - "resolved": "https://registry.npmjs.org/@types/request/-/request-2.48.12.tgz", - "integrity": "sha512-G3sY+NpsA9jnwm0ixhAFQSJ3Q9JkpLZpJbI3GMv0mIAT0y3mRabYeINzal5WOChIiaTEGQYlHOKgkaM9EisWHw==", - "license": "MIT", - "dependencies": { - "@types/caseless": "*", - "@types/node": "*", - "@types/tough-cookie": "*", - "form-data": "^2.5.0" - } - }, - "node_modules/@types/tough-cookie": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz", - "integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==", - "license": "MIT" - }, - "node_modules/abort-controller": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", - "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", - "license": "MIT", - "dependencies": { - "event-target-shim": "^5.0.0" - }, - "engines": { - "node": ">=6.5" - } - }, - "node_modules/agent-base": { - "version": "7.1.3", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz", - "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==", - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, - "node_modules/arrify": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/arrify/-/arrify-2.0.1.tgz", - "integrity": "sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug==", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/async-retry": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.3.tgz", - "integrity": "sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==", - "license": "MIT", - "dependencies": { - "retry": "0.13.1" - } - }, - "node_modules/asynckit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", - "license": "MIT" - }, - "node_modules/base64-js": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", - "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/big.js": { - "version": "6.2.2", - "resolved": "https://registry.npmjs.org/big.js/-/big.js-6.2.2.tgz", - "integrity": "sha512-y/ie+Faknx7sZA5MfGA2xKlu0GDv8RWrXGsmlteyJQ2lvoKv9GBK/fpRMc2qlSoBAgNxrixICFCBefIq8WCQpQ==", - "license": "MIT", - "engines": { - "node": "*" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/bigjs" - } - }, - "node_modules/bignumber.js": { - "version": "9.1.2", - "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.1.2.tgz", - "integrity": "sha512-2/mKyZH9K85bzOEfhXDBFZTGd1CTs+5IHpeFQo9luiBG7hghdC851Pj2WAhb6E3R6b9tZj/XKhbg4fum+Kepug==", - "license": "MIT", - "engines": { - "node": "*" - } - }, - "node_modules/buffer-equal-constant-time": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", - "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", - "license": "BSD-3-Clause" - }, - "node_modules/combined-stream": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", - "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "license": "MIT", - "dependencies": { - "delayed-stream": "~1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/debug": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", - "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/delayed-stream": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", - "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", - "license": "MIT", - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/duplexify": { - "version": "4.1.3", - "resolved": "https://registry.npmjs.org/duplexify/-/duplexify-4.1.3.tgz", - "integrity": "sha512-M3BmBhwJRZsSx38lZyhE53Csddgzl5R7xGJNk7CVddZD6CcmwMCH8J+7AprIrQKH7TonKxaCjcv27Qmf+sQ+oA==", - "license": "MIT", - "dependencies": { - "end-of-stream": "^1.4.1", - "inherits": "^2.0.3", - "readable-stream": "^3.1.1", - "stream-shift": "^1.0.2" - } - }, - "node_modules/ecdsa-sig-formatter": { - "version": "1.0.11", - "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", - "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", - "license": "Apache-2.0", - "dependencies": { - "safe-buffer": "^5.0.1" - } - }, - "node_modules/end-of-stream": { - "version": "1.4.4", - "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", - "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", - "license": "MIT", - "dependencies": { - "once": "^1.4.0" - } - }, - "node_modules/event-target-shim": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", - "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/extend": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", - "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", - "license": "MIT" - }, - "node_modules/fast-xml-parser": { - "version": "4.5.1", - "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.5.1.tgz", - "integrity": "sha512-y655CeyUQ+jj7KBbYMc4FG01V8ZQqjN+gDYGJ50RtfsUB8iG9AmwmwoAgeKLJdmueKKMrH1RJ7yXHTSoczdv5w==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/NaturalIntelligence" - }, - { - "type": "paypal", - "url": "https://paypal.me/naturalintelligence" - } - ], - "license": "MIT", - "dependencies": { - "strnum": "^1.0.5" - }, - "bin": { - "fxparser": "src/cli/cli.js" - } - }, - "node_modules/form-data": { - "version": "2.5.2", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.5.2.tgz", - "integrity": "sha512-GgwY0PS7DbXqajuGf4OYlsrIu3zgxD6Vvql43IBhm6MahqA5SK/7mwhtNj2AdH2z35YR34ujJ7BN+3fFC3jP5Q==", - "license": "MIT", - "dependencies": { - "asynckit": "^0.4.0", - "combined-stream": "^1.0.6", - "mime-types": "^2.1.12", - "safe-buffer": "^5.2.1" - }, - "engines": { - "node": ">= 0.12" - } - }, - "node_modules/gaxios": { - "version": "6.7.1", - "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz", - "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==", - "license": "Apache-2.0", - "dependencies": { - "extend": "^3.0.2", - "https-proxy-agent": "^7.0.1", - "is-stream": "^2.0.0", - "node-fetch": "^2.6.9", - "uuid": "^9.0.1" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/gcp-metadata": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.0.tgz", - "integrity": "sha512-Jh/AIwwgaxan+7ZUUmRLCjtchyDiqh4KjBJ5tW3plBZb5iL/BPcso8A5DlzeD9qlw0duCamnNdpFjxwaT0KyKg==", - "license": "Apache-2.0", - "dependencies": { - "gaxios": "^6.0.0", - "json-bigint": "^1.0.0" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/google-auth-library": { - "version": "9.15.0", - "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.0.tgz", - "integrity": "sha512-7ccSEJFDFO7exFbO6NRyC+xH8/mZ1GZGG2xxx9iHxZWcjUjJpjWxIMw3cofAKcueZ6DATiukmmprD7yavQHOyQ==", - "license": "Apache-2.0", - "dependencies": { - "base64-js": "^1.3.0", - "ecdsa-sig-formatter": "^1.0.11", - "gaxios": "^6.1.1", - "gcp-metadata": "^6.1.0", - "gtoken": "^7.0.0", - "jws": "^4.0.0" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/gtoken": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz", - "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==", - "license": "MIT", - "dependencies": { - "gaxios": "^6.0.0", - "jws": "^4.0.0" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/html-entities": { - "version": "2.5.2", - "resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.5.2.tgz", - "integrity": "sha512-K//PSRMQk4FZ78Kyau+mZurHn3FH0Vwr+H36eE0rPbeYkRRi9YxceYPhuN60UwWorxyKHhqoAJl2OFKa4BVtaA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/mdevils" - }, - { - "type": "patreon", - "url": "https://patreon.com/mdevils" - } - ], - "license": "MIT" - }, - "node_modules/http-proxy-agent": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-5.0.0.tgz", - "integrity": "sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==", - "license": "MIT", - "dependencies": { - "@tootallnate/once": "2", - "agent-base": "6", - "debug": "4" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/http-proxy-agent/node_modules/agent-base": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", - "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", - "license": "MIT", - "dependencies": { - "debug": "4" - }, - "engines": { - "node": ">= 6.0.0" - } - }, - "node_modules/https-proxy-agent": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", - "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/inherits": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "license": "ISC" - }, - "node_modules/is": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/is/-/is-3.3.0.tgz", - "integrity": "sha512-nW24QBoPcFGGHJGUwnfpI7Yc5CdqWNdsyHQszVE/z2pKHXzh7FZ5GWhJqSyaQ9wMkQnsTx+kAI8bHlCX4tKdbg==", - "license": "MIT", - "engines": { - "node": "*" - } - }, - "node_modules/is-stream": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", - "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", - "license": "MIT", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/json-bigint": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz", - "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", - "license": "MIT", - "dependencies": { - "bignumber.js": "^9.0.0" - } - }, - "node_modules/jwa": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.0.tgz", - "integrity": "sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==", - "license": "MIT", - "dependencies": { - "buffer-equal-constant-time": "1.0.1", - "ecdsa-sig-formatter": "1.0.11", - "safe-buffer": "^5.0.1" - } - }, - "node_modules/jws": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz", - "integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==", - "license": "MIT", - "dependencies": { - "jwa": "^2.0.0", - "safe-buffer": "^5.0.1" - } - }, - "node_modules/mime": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/mime/-/mime-3.0.0.tgz", - "integrity": "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A==", - "license": "MIT", - "bin": { - "mime": "cli.js" - }, - "engines": { - "node": ">=10.0.0" - } - }, - "node_modules/mime-db": { - "version": "1.52.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", - "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/mime-types": { - "version": "2.1.35", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", - "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", - "license": "MIT", - "dependencies": { - "mime-db": "1.52.0" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "license": "MIT" - }, - "node_modules/node-fetch": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", - "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", - "license": "MIT", - "dependencies": { - "whatwg-url": "^5.0.0" - }, - "engines": { - "node": "4.x || >=6.0.0" - }, - "peerDependencies": { - "encoding": "^0.1.0" - }, - "peerDependenciesMeta": { - "encoding": { - "optional": true - } - } - }, - "node_modules/once": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", - "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", - "license": "ISC", - "dependencies": { - "wrappy": "1" - } - }, - "node_modules/p-limit": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", - "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", - "license": "MIT", - "dependencies": { - "yocto-queue": "^0.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/readable-stream": { - "version": "3.6.2", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", - "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", - "license": "MIT", - "dependencies": { - "inherits": "^2.0.3", - "string_decoder": "^1.1.1", - "util-deprecate": "^1.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/retry": { - "version": "0.13.1", - "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", - "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/retry-request": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/retry-request/-/retry-request-7.0.2.tgz", - "integrity": "sha512-dUOvLMJ0/JJYEn8NrpOaGNE7X3vpI5XlZS/u0ANjqtcZVKnIxP7IgCFwrKTxENw29emmwug53awKtaMm4i9g5w==", - "license": "MIT", - "dependencies": { - "@types/request": "^2.48.8", - "extend": "^3.0.2", - "teeny-request": "^9.0.0" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/safe-buffer": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", - "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/stream-events": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/stream-events/-/stream-events-1.0.5.tgz", - "integrity": "sha512-E1GUzBSgvct8Jsb3v2X15pjzN1tYebtbLaMg+eBOUOAxgbLoSbT2NS91ckc5lJD1KfLjId+jXJRgo0qnV5Nerg==", - "license": "MIT", - "dependencies": { - "stubs": "^3.0.0" - } - }, - "node_modules/stream-shift": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.3.tgz", - "integrity": "sha512-76ORR0DO1o1hlKwTbi/DM3EXWGf3ZJYO8cXX5RJwnul2DEg2oyoZyjLNoQM8WsvZiFKCRfC1O0J7iCvie3RZmQ==", - "license": "MIT" - }, - "node_modules/string_decoder": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", - "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", - "license": "MIT", - "dependencies": { - "safe-buffer": "~5.2.0" - } - }, - "node_modules/strnum": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz", - "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==", - "license": "MIT" - }, - "node_modules/stubs": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/stubs/-/stubs-3.0.0.tgz", - "integrity": "sha512-PdHt7hHUJKxvTCgbKX9C1V/ftOcjJQgz8BZwNfV5c4B6dcGqlpelTbJ999jBGZ2jYiPAwcX5dP6oBwVlBlUbxw==", - "license": "MIT" - }, - "node_modules/teeny-request": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/teeny-request/-/teeny-request-9.0.0.tgz", - "integrity": "sha512-resvxdc6Mgb7YEThw6G6bExlXKkv6+YbuzGg9xuXxSgxJF7Ozs+o8Y9+2R3sArdWdW8nOokoQb1yrpFB0pQK2g==", - "license": "Apache-2.0", - "dependencies": { - "http-proxy-agent": "^5.0.0", - "https-proxy-agent": "^5.0.0", - "node-fetch": "^2.6.9", - "stream-events": "^1.0.5", - "uuid": "^9.0.0" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/teeny-request/node_modules/agent-base": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", - "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", - "license": "MIT", - "dependencies": { - "debug": "4" - }, - "engines": { - "node": ">= 6.0.0" - } - }, - "node_modules/teeny-request/node_modules/https-proxy-agent": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", - "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", - "license": "MIT", - "dependencies": { - "agent-base": "6", - "debug": "4" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/tr46": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", - "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", - "license": "MIT" - }, - "node_modules/undici-types": { - "version": "6.20.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", - "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", - "license": "MIT" - }, - "node_modules/util-deprecate": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "license": "MIT" - }, - "node_modules/uuid": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", - "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", - "funding": [ - "https://github.com/sponsors/broofa", - "https://github.com/sponsors/ctavan" - ], - "license": "MIT", - "bin": { - "uuid": "dist/bin/uuid" - } - }, - "node_modules/webidl-conversions": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", - "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", - "license": "BSD-2-Clause" - }, - "node_modules/whatwg-url": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", - "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", - "license": "MIT", - "dependencies": { - "tr46": "~0.0.3", - "webidl-conversions": "^3.0.0" - } - }, - "node_modules/wrappy": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", - "license": "ISC" - }, - "node_modules/yocto-queue": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", - "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - } - } -} diff --git a/scripts/package.json b/scripts/package.json deleted file mode 100644 index dc5df04a..00000000 --- a/scripts/package.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "type": "module", - "dependencies": { - "@google-cloud/bigquery": "^7.9.1", - "@google-cloud/storage": "^7.14.0" - } -} diff --git a/scripts/reports_storage_sync.js b/scripts/reports_storage_sync.js deleted file mode 100644 index e462c2a0..00000000 --- a/scripts/reports_storage_sync.js +++ /dev/null @@ -1,136 +0,0 @@ -import { Storage } from '@google-cloud/storage' -import { BigQuery } from '@google-cloud/bigquery' - -const storage = new Storage() -const bucketName = 'httparchive' -const storagePathPrefix = 'reports/' - -const bigquery = new BigQuery({ projectId: 'httparchive' }) -const datasetId = 'reports' -const tableId = 'gcs_export' - -const lenses = [ - '', - 'drupal/', - 'magento/', - 'top100k/', - 'top10k/', - 'top1k/', - 'top1m/', - 'wordpress/' -] - -const dates = (function () { - const dates = [] - for (let year = 2016; year <= 2025; year++) { - for (let month = 1; month <= 12; month++) { - dates.push(`${year}_${String(month).padStart(2, '0')}_01`) - if (year <= 2018) { - dates.push(`${year}_${String(month).padStart(2, '0')}_15`) - } - if (year === 2025 && month === 1) { - break - } - } - } - return dates -})() - -const histogramMetrics = new Set( - 'bytesCss', - 'bytesFont', - 'bytesHtml', - 'bytesImg', - 'bytesJs', - 'bytesOther', - 'bytesTotal', - 'bytesVideo', - 'compileJs', - 'dcl', - 'evalJs', - 'fcp', - 'gzipSavings', - 'imgSavings', - 'ol', - 'reqCss', - 'reqFont', - 'reqHtml', - 'reqImg', - 'reqJs', - 'reqOther', - 'reqTotal', - 'reqVideo', - 'speedIndex', - 'tcp', - 'bootupJs', - 'offscreenImages', - 'optimizedImages', - 'ttci', - 'ttfi', - 'vulnJs', - 'cruxCls', - 'cruxDcl', - 'cruxFcp', - 'cruxFid', - 'cruxFp', - 'cruxLcp', - 'cruxOl', - 'htmlElementPopularity', - 'cruxInp', - 'cruxTtfb') - -async function downloadObject (bucketName, srcFilename) { - const contents = await storage.bucket(bucketName).file(srcFilename).download() - - return contents.toString() -} - -async function uploadToBigQuery (rows, schema) { - try { - await bigquery.dataset(datasetId).table(tableId).insert(rows, { schema }) - } catch (error) { - if (error.name === 'PartialFailureError') { - console.error('Partial failure error:', error) - error.errors.forEach(err => { - console.error('Row:', JSON.stringify(err.row)) - console.error('Errors:', JSON.stringify(err.errors)) - }) - } else { - throw error - } - } -} - -async function importHistogramData () { - for (const lens of lenses) { - for (const metric of histogramMetrics) { - for (const date of dates) { - const srcFilename = `${storagePathPrefix}${lens}${date}/${metric}.json` - - console.log(`Downloading ${srcFilename}`) - - const data = await downloadObject(bucketName, srcFilename) - - const rows = JSON.parse(data).map(data => ({ - date: date.replace(/_/g, '-'), - lens: lens.replace('/', ''), - metric, - data: JSON.stringify(data) - })) - - const schema = [ - { name: 'date', type: 'DATE' }, - { name: 'lens', type: 'STRING' }, - { name: 'metric', type: 'STRING' }, - { name: 'data', type: 'JSON' } - ] - - console.log(`Uploading ${rows.length} rows to BigQuery`) - - await uploadToBigQuery(rows, schema) - } - } - } -} - -importHistogramData().catch(console.error) From 90dff99972090c03ffd44ed239d7175e72fcb202 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Thu, 31 Jul 2025 22:54:55 +0200 Subject: [PATCH 20/22] VALIDATE_EDITORCONFIG --- .github/workflows/ci.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ea6bc480..91cfef83 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -36,7 +36,6 @@ jobs: VALIDATE_MARKDOWN_PRETTIER: false VALIDATE_CHECKOV: false VALIDATE_GIT_COMMITLINT: false - VALIDATE_EDITORCONFIG: false dependabot: name: Dependabot auto-merge From 277e1d33f7e94651bc796268bf9d05a3f0e0c39a Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Thu, 31 Jul 2025 22:58:18 +0200 Subject: [PATCH 21/22] lint --- reports.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/reports.md b/reports.md index 1a0749a0..8b4ddb41 100644 --- a/reports.md +++ b/reports.md @@ -189,9 +189,9 @@ For each combination of date, metric, SQL type, and lens, the system: 1. **Creates a unique operation name**: `{metricId}_{sqlType}_{date}_{lensName}` 2. **Generates BigQuery SQL** that: - - Deletes existing data for the date/metric/lens combination - - Inserts new calculated data - - Exports results to Cloud Storage + - Deletes existing data for the date/metric/lens combination + - Inserts new calculated data + - Exports results to Cloud Storage 3. **Tags operations** with `crawl_complete` tags to be triggered on crawl completion. ### Table Structure @@ -208,8 +208,8 @@ Reports are stored in BigQuery tables with this structure: 1. Data is calculated and stored in BigQuery 2. A `run_export_job` function exports filtered data to Cloud Storage 3. Export paths follow the pattern: - - Histogram: `reports/[{lens}/]{date_underscore}/{metric_id}.json` - - Timeseries: `reports/[{lens}/]{metric_id}.json` + - Histogram: `reports/[{lens}/]{date_underscore}/{metric_id}.json` + - Timeseries: `reports/[{lens}/]{metric_id}.json` ### Development vs Production From cbf8ad962d9938e62d94f97435ba990484113884 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Thu, 31 Jul 2025 23:02:13 +0200 Subject: [PATCH 22/22] cleanup --- definitions/output/reports/reports_dynamic.js | 2 +- infra/tf/dataform.tf | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index f3d167fd..a4c8b9d3 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -24,7 +24,7 @@ const EXPORT_CONFIG = { bucket: constants.bucket, storagePath: constants.storagePath, dataset: 'reports', - testSuffix: '_test.json' // TODO: remove test suffix from the path + testSuffix: '.json' } // Date range for report generation diff --git a/infra/tf/dataform.tf b/infra/tf/dataform.tf index 60d7c08b..b0d3c801 100644 --- a/infra/tf/dataform.tf +++ b/infra/tf/dataform.tf @@ -7,10 +7,8 @@ locals { "sample_data", "wappalyzer", - // Blink features - "blink_features", - // Reports + "blink_features", "core_web_vitals", // TODO: Remove after tech report migration "reports",