From 5652f95ce87270758b429dd2a336eb535952f3c9 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 30 Jul 2024 19:58:19 +0200 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=94=A8=20add=20saving=20of=20grapher?= =?UTF-8?q?=20configs=20to=20R2=20and=20a=20sync=20tool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.devcontainer | 6 +- .env.example-full | 12 +- .gitignore | 1 + adminSiteServer/apiRouter.ts | 115 +++++-- adminSiteServer/chartConfigR2Helpers.ts | 173 +++++++++++ .../1722415645057-AddChartConfigHash.ts | 22 ++ db/model/Image.ts | 16 +- devTools/syncGraphersToR2/syncGraphersToR2.ts | 292 ++++++++++++++++++ devTools/syncGraphersToR2/tsconfig.json | 18 ++ package.json | 3 +- .../@ourworldindata/types/src/NominalType.ts | 8 + .../types/src/dbTypes/ChartConfigs.ts | 1 + packages/@ourworldindata/types/src/index.ts | 2 +- .../@ourworldindata/utils/src/Util.test.ts | 26 ++ packages/@ourworldindata/utils/src/Util.ts | 38 +++ packages/@ourworldindata/utils/src/index.ts | 6 + settings/serverSettings.ts | 27 +- site/gdocs/components/Image.tsx | 4 +- tsconfig.json | 3 + 19 files changed, 725 insertions(+), 48 deletions(-) create mode 100644 adminSiteServer/chartConfigR2Helpers.ts create mode 100644 db/migration/1722415645057-AddChartConfigHash.ts create mode 100644 devTools/syncGraphersToR2/syncGraphersToR2.ts create mode 100644 devTools/syncGraphersToR2/tsconfig.json diff --git a/.env.devcontainer b/.env.devcontainer index 7bc29cc6500..dc8a8bd8d54 100644 --- a/.env.devcontainer +++ b/.env.devcontainer @@ -16,8 +16,8 @@ GDOCS_CLIENT_ID='' GDOCS_BASIC_ARTICLE_TEMPLATE_URL='' GDOCS_SHARED_DRIVE_ID='' -IMAGE_HOSTING_R2_ENDPOINT='' +R2_ENDPOINT='' IMAGE_HOSTING_R2_CDN_URL='' IMAGE_HOSTING_R2_BUCKET_PATH='' -IMAGE_HOSTING_R2_ACCESS_KEY_ID='' -IMAGE_HOSTING_R2_SECRET_ACCESS_KEY='' +R2_ACCESS_KEY_ID='' +R2_SECRET_ACCESS_KEY='' diff --git a/.env.example-full b/.env.example-full index 8407d05b69b..d0cb5063adf 100644 --- a/.env.example-full +++ b/.env.example-full @@ -22,11 +22,17 @@ GDOCS_BASIC_ARTICLE_TEMPLATE_URL= GDOCS_SHARED_DRIVE_ID= GDOCS_DONATE_FAQS_DOCUMENT_ID= # optional -IMAGE_HOSTING_R2_ENDPOINT= # optional +R2_ENDPOINT= # optional IMAGE_HOSTING_R2_CDN_URL= IMAGE_HOSTING_R2_BUCKET_PATH= -IMAGE_HOSTING_R2_ACCESS_KEY_ID= # optional -IMAGE_HOSTING_R2_SECRET_ACCESS_KEY= # optional +R2_ACCESS_KEY_ID= # optional +R2_SECRET_ACCESS_KEY= # optional +# These two GRAPHER_CONFIG_ settings are used to store grapher configs in an R2 bucket. +# The cloudflare workers for thumbnail rendering etc use these settings to fetch the grapher configs. +# This means that for most local dev it is not necessary to set these. +GRAPHER_CONFIG_R2_BUCKET= # optional - for local dev set it to "owid-grapher-configs-staging" +GRAPHER_CONFIG_R2_BUCKET_PATH= # optional - for local dev set it to "devs/YOURNAME" + OPENAI_API_KEY= diff --git a/.gitignore b/.gitignore index 388475e6261..e33cb39db80 100755 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,4 @@ dist/ .nx/workspace-data .dev.vars **/tsup.config.bundled*.mjs +cfstorage/ diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index c5096c16b24..8b7ae33f20f 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -155,6 +155,14 @@ import { GdocDataInsight } from "../db/model/Gdoc/GdocDataInsight.js" import { GdocHomepage } from "../db/model/Gdoc/GdocHomepage.js" import { GdocAuthor } from "../db/model/Gdoc/GdocAuthor.js" import path from "path" +import { + deleteGrapherConfigFromR2, + deleteGrapherConfigFromR2ByUUID, + R2GrapherConfigDirectory, + saveGrapherConfigToR2, + saveGrapherConfigToR2ByUUID, + getMd5HashBase64, +} from "./chartConfigR2Helpers.js" const apiRouter = new FunctionalRouter() @@ -275,7 +283,7 @@ const expectChartById = async ( const saveNewChart = async ( knex: db.KnexReadWriteTransaction, { config, user }: { config: GrapherInterface; user: DbPlainUser } -): Promise => { +): Promise<{ patchConfig: GrapherInterface; fullConfig: GrapherInterface }> => { // if the schema version is missing, assume it's the latest if (!config["$schema"]) { config["$schema"] = defaultGrapherConfig["$schema"] @@ -285,16 +293,25 @@ const saveNewChart = async ( const parentConfig = defaultGrapherConfig const patchConfig = diffGrapherConfigs(config, parentConfig) const fullConfig = mergeGrapherConfigs(parentConfig, patchConfig) + const fullConfigStringified = JSON.stringify(fullConfig) + + // compute a sha-1 hash of the full config + const fullConfigMd5 = await getMd5HashBase64(fullConfigStringified) // insert patch & full configs into the chart_configs table - const configId = uuidv7() + const chartConfigId = uuidv7() await db.knexRaw( knex, `-- sql - INSERT INTO chart_configs (id, patch, full) - VALUES (?, ?, ?) + INSERT INTO chart_configs (id, patch, full, fullMd5) + VALUES (?, ?, ?, ?) `, - [configId, JSON.stringify(patchConfig), JSON.stringify(fullConfig)] + [ + chartConfigId, + JSON.stringify(patchConfig), + fullConfigStringified, + fullConfigMd5, + ] ) // add a new chart to the charts table @@ -304,7 +321,7 @@ const saveNewChart = async ( INSERT INTO charts (configId, lastEditedAt, lastEditedByUserId) VALUES (?, ?, ?) `, - [configId, new Date(), user.id] + [chartConfigId, new Date(), user.id] ) // The chart config itself has an id field that should store the id of the chart - update the chart now so this is true @@ -324,7 +341,9 @@ const saveNewChart = async ( [chartId, chartId, chartId] ) - return patchConfig + await saveGrapherConfigToR2ByUUID(chartConfigId, fullConfigStringified) + + return { patchConfig, fullConfig } } const updateExistingChart = async ( @@ -334,7 +353,7 @@ const updateExistingChart = async ( user, chartId, }: { config: GrapherInterface; user: DbPlainUser; chartId: number } -): Promise => { +): Promise<{ patchConfig: GrapherInterface; fullConfig: GrapherInterface }> => { // make sure that the id of the incoming config matches the chart id config.id = chartId @@ -347,19 +366,36 @@ const updateExistingChart = async ( const parentConfig = defaultGrapherConfig const patchConfig = diffGrapherConfigs(config, parentConfig) const fullConfig = mergeGrapherConfigs(parentConfig, patchConfig) + const fullConfigStringified = JSON.stringify(fullConfig) + + const fullConfigMd5 = await getMd5HashBase64(fullConfigStringified) + + const chartConfigId = await db.knexRawFirst>( + knex, + `SELECT configId FROM charts WHERE id = ?`, + [chartId] + ) + + if (!chartConfigId) + throw new JsonError(`No chart config found for id ${chartId}`, 404) // update configs await db.knexRaw( knex, `-- sql - UPDATE chart_configs cc - JOIN charts c ON c.configId = cc.id + UPDATE chart_configs SET - cc.patch=?, - cc.full=? - WHERE c.id = ? + patch=?, + full=?, + fullMd5=? + WHERE id = ? `, - [JSON.stringify(patchConfig), JSON.stringify(fullConfig), chartId] + [ + JSON.stringify(patchConfig), + fullConfigStringified, + fullConfigMd5, + chartConfigId.configId, + ] ) // update charts row @@ -373,7 +409,12 @@ const updateExistingChart = async ( [new Date(), user.id, chartId] ) - return patchConfig + await saveGrapherConfigToR2ByUUID( + chartConfigId.configId, + fullConfigStringified + ) + + return { patchConfig, fullConfig } } const saveGrapher = async ( @@ -443,6 +484,11 @@ const saveGrapher = async ( `INSERT INTO chart_slug_redirects (chart_id, slug) VALUES (?, ?)`, [existingConfig.id, existingConfig.slug] ) + // When we rename grapher configs, make sure to delete the old one (the new one will be saved below) + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${existingConfig.slug}.json` + ) } } @@ -457,20 +503,27 @@ const saveGrapher = async ( // Execute the actual database update or creation let chartId: number + let patchConfig: GrapherInterface + let fullConfig: GrapherInterface if (existingConfig) { chartId = existingConfig.id! - newConfig = await updateExistingChart(knex, { + const configs = await updateExistingChart(knex, { config: newConfig, user, chartId, }) + patchConfig = configs.patchConfig + fullConfig = configs.fullConfig } else { - newConfig = await saveNewChart(knex, { + const configs = await saveNewChart(knex, { config: newConfig, user, }) - chartId = newConfig.id! + patchConfig = configs.patchConfig + fullConfig = configs.fullConfig + chartId = fullConfig.id! } + newConfig = patchConfig // Record this change in version history const chartRevisionLog = { @@ -515,6 +568,17 @@ const saveGrapher = async ( newDimensions.map((d) => d.variableId) ) + if (newConfig.isPublished) { + const configStringified = JSON.stringify(fullConfig) + const configMd5 = await getMd5HashBase64(configStringified) + await saveGrapherConfigToR2( + configStringified, + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${newConfig.slug}.json`, + configMd5 + ) + } + if ( newConfig.isPublished && (!existingConfig || !existingConfig.isPublished) @@ -537,6 +601,10 @@ const saveGrapher = async ( `DELETE FROM chart_slug_redirects WHERE chart_id = ?`, [existingConfig.id] ) + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${existingConfig.slug}.json` + ) await triggerStaticBuild(user, `Unpublishing chart ${newConfig.slug}`) } else if (newConfig.isPublished) await triggerStaticBuild(user, `Updating chart ${newConfig.slug}`) @@ -883,11 +951,13 @@ deleteRouteWithRWTransaction( [chart.id] ) - const row = await db.knexRawFirst<{ configId: number }>( + const row = await db.knexRawFirst>( trx, `SELECT configId FROM charts WHERE id = ?`, [chart.id] ) + if (!row) + throw new JsonError(`No chart config found for id ${chart.id}`, 404) if (row) { await db.knexRaw(trx, `DELETE FROM charts WHERE id=?`, [chart.id]) await db.knexRaw(trx, `DELETE FROM chart_configs WHERE id=?`, [ @@ -901,6 +971,13 @@ deleteRouteWithRWTransaction( `Deleting chart ${chart.slug}` ) + await deleteGrapherConfigFromR2ByUUID(row.configId) + if (chart.isPublished) + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${chart.slug}.json` + ) + return { success: true } } ) diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts new file mode 100644 index 00000000000..781fbd233fb --- /dev/null +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -0,0 +1,173 @@ +import { + GRAPHER_CONFIG_R2_BUCKET, + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2_ACCESS_KEY_ID, + R2_ENDPOINT, + R2_REGION, + R2_SECRET_ACCESS_KEY, +} from "../settings/serverSettings.js" +import { + DeleteObjectCommand, + DeleteObjectCommandInput, + PutObjectCommand, + PutObjectCommandInput, + S3Client, +} from "@aws-sdk/client-s3" +import { Base64String, JsonError } from "@ourworldindata/utils" +import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" +import { createHash } from "crypto" + +export function getMd5HashBase64(data: string): Base64String { + // I would have liked to create a function in utils that can compute a varienty of hashes + // in both the browser, CF workers and node but unfortunately this isn't easily possible + // for md5 - so here we just special case for md5, node and base64 encoding for now. + return createHash("md5") + .update(data, "utf-8") + .digest("base64") as Base64String +} +export enum R2GrapherConfigDirectory { + byUUID = "config/by-uuid", + publishedGrapherBySlug = "grapher/by-slug", +} + +let s3Client: S3Client | undefined = undefined + +export async function saveGrapherConfigToR2ByUUID( + id: string, + chartConfigStringified: string +) { + const configMd5 = await getMd5HashBase64(chartConfigStringified) + + await saveGrapherConfigToR2( + chartConfigStringified, + R2GrapherConfigDirectory.byUUID, + `${id}.json`, + configMd5 + ) +} + +export async function deleteGrapherConfigFromR2ByUUID(id: string) { + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.byUUID, + `${id}.json` + ) +} + +export async function saveGrapherConfigToR2( + config_stringified: string, + directory: R2GrapherConfigDirectory, + filename: string, + configMd5: Base64String +) { + if (process.env.NODE_ENV === "test") { + console.log("Skipping saving grapher config to R2 in test environment") + return + } + if ( + GRAPHER_CONFIG_R2_BUCKET === undefined || + GRAPHER_CONFIG_R2_BUCKET_PATH === undefined + ) { + console.info( + "R2 bucket not configured, not storing grapher config to R2" + ) + return + } + try { + if (!s3Client) { + s3Client = new S3Client({ + endpoint: R2_ENDPOINT, + forcePathStyle: false, + region: R2_REGION, + credentials: { + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, + }, + }) + } + + if (!GRAPHER_CONFIG_R2_BUCKET || !GRAPHER_CONFIG_R2_BUCKET_PATH) { + throw new Error("R2 bucket not configured") + } + + const bucket = GRAPHER_CONFIG_R2_BUCKET + const path = [GRAPHER_CONFIG_R2_BUCKET_PATH, directory, filename].join( + "/" + ) + + const MIMEType = "application/json" + + const params: PutObjectCommandInput = { + Bucket: bucket, + Key: path, + Body: config_stringified, + ContentType: MIMEType, + ContentMD5: configMd5, + } + + await s3Client.send(new PutObjectCommand(params)) + console.log( + `Successfully uploaded object: ${params.Bucket}/${params.Key}` + ) + } catch (err) { + await logErrorAndMaybeSendToBugsnag(err) + throw new JsonError( + `Failed to save the grapher config to R2. Inner error: ${err}` + ) + } +} + +export async function deleteGrapherConfigFromR2( + directory: R2GrapherConfigDirectory, + filename: string +) { + if (process.env.NODE_ENV === "test") { + console.log("Skipping saving grapher config to R2 in test environment") + return + } + if ( + GRAPHER_CONFIG_R2_BUCKET === undefined || + GRAPHER_CONFIG_R2_BUCKET_PATH === undefined + ) { + console.info( + "R2 bucket not configured, not deleting grapher config to R2" + ) + return + } + try { + if (!s3Client) { + s3Client = new S3Client({ + endpoint: R2_ENDPOINT, + forcePathStyle: false, + region: R2_REGION, + credentials: { + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, + }, + }) + } + + if (!GRAPHER_CONFIG_R2_BUCKET || !GRAPHER_CONFIG_R2_BUCKET_PATH) { + throw new Error("R2 bucket not configured") + } + + const bucket = GRAPHER_CONFIG_R2_BUCKET + const path = [GRAPHER_CONFIG_R2_BUCKET_PATH, directory, filename].join( + "/" + ) + + const params: DeleteObjectCommandInput = { + Bucket: bucket, + Key: path, + } + + await s3Client.send(new DeleteObjectCommand(params)) + console.log( + `Successfully deleted object: ${params.Bucket}/${params.Key}` + ) + } catch (err) { + await logErrorAndMaybeSendToBugsnag(err) + throw new JsonError( + `Failed to delete the grapher config to R2 at ${directory}/${filename}. Inner error: ${err}` + ) + } +} diff --git a/db/migration/1722415645057-AddChartConfigHash.ts b/db/migration/1722415645057-AddChartConfigHash.ts new file mode 100644 index 00000000000..8885900a088 --- /dev/null +++ b/db/migration/1722415645057-AddChartConfigHash.ts @@ -0,0 +1,22 @@ +import { MigrationInterface, QueryRunner } from "typeorm" + +export class AddChartConfigHash1722415645057 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + ALTER TABLE chart_configs + ADD COLUMN fullMd5 CHAR(24); + `) + + await queryRunner.query(` + UPDATE chart_configs + SET fullMd5 = to_base64(unhex(md5(full))) + `) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + ALTER TABLE chart_configs + DROP COLUMN fullMd5; + `) + } +} diff --git a/db/model/Image.ts b/db/model/Image.ts index 60799fc9782..7042f529eca 100644 --- a/db/model/Image.ts +++ b/db/model/Image.ts @@ -21,10 +21,10 @@ import { } from "@ourworldindata/utils" import { OwidGoogleAuth } from "../OwidGoogleAuth.js" import { - IMAGE_HOSTING_R2_ENDPOINT, - IMAGE_HOSTING_R2_ACCESS_KEY_ID, - IMAGE_HOSTING_R2_SECRET_ACCESS_KEY, - IMAGE_HOSTING_R2_REGION, + R2_ENDPOINT, + R2_ACCESS_KEY_ID, + R2_SECRET_ACCESS_KEY, + R2_REGION, IMAGE_HOSTING_R2_BUCKET_PATH, GDOCS_CLIENT_EMAIL, GDOCS_SHARED_DRIVE_ID, @@ -139,12 +139,12 @@ class ImageStore { export const imageStore = new ImageStore() export const s3Client = new S3Client({ - endpoint: IMAGE_HOSTING_R2_ENDPOINT, + endpoint: R2_ENDPOINT, forcePathStyle: false, - region: IMAGE_HOSTING_R2_REGION, + region: R2_REGION, credentials: { - accessKeyId: IMAGE_HOSTING_R2_ACCESS_KEY_ID, - secretAccessKey: IMAGE_HOSTING_R2_SECRET_ACCESS_KEY, + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, }, }) diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts new file mode 100644 index 00000000000..bedbc1d722d --- /dev/null +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -0,0 +1,292 @@ +import fs from "fs-extra" +import parseArgs from "minimist" +import { + DeleteObjectCommand, + DeleteObjectCommandInput, + ListObjectsCommand, + ListObjectsV2Command, + ListObjectsV2CommandOutput, + PutObjectCommand, + PutObjectCommandInput, + S3Client, +} from "@aws-sdk/client-s3" +import { + GRAPHER_CONFIG_R2_BUCKET, + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2_ACCESS_KEY_ID, + R2_ENDPOINT, + R2_REGION, + R2_SECRET_ACCESS_KEY, +} from "../../settings/serverSettings.js" +import { + knexRaw, + KnexReadonlyTransaction, + knexReadonlyTransaction, +} from "../../db/db.js" +import { R2GrapherConfigDirectory } from "../../adminSiteServer/chartConfigR2Helpers.js" +import { + base64ToBytes, + bytesToBase64, + DbRawChartConfig, + differenceOfSets, + excludeUndefined, + HexString, + hexToBytes, +} from "@ourworldindata/utils" +import { string } from "ts-pattern/dist/patterns.js" +import { chunk, take } from "lodash" +import ProgressBar from "progress" + +type HashAndId = Pick + +/** Sync a set of chart configs with R2. Pass in a map of the keys to their md5 hashes and UUIDs + and this function will upsert all missing/outdated ones and delete any that are no longer needed. + + @param s3Client The S3 client to use + @param pathPrefix The path prefix to use for the files (e.g. "config/by-uuid" then everything inside it will be synced) + @param hashesOfFilesToToUpsert A map of the keys to their md5 hashes and UUIDs + @param trx The transaction to use for querying the DB for full configs + @param dryRun Whether to actually make changes to R2 or just log what would + */ +async function syncWithR2( + s3Client: S3Client, + pathPrefix: string, + hashesOfFilesToToUpsert: Map, + trx: KnexReadonlyTransaction, + dryRun: boolean = false +) { + // We'll first get all the files in the R2 bucket under the path prefix + // and check if the hash of each file that exist in R2 matches the hash + // of the file we want to upsert. If it does, we'll remove it from the + // list of files to upsert. If it doesn't, we'll add it to the list of + // files to delete. + + const hashesOfFilesToDelete = new Map() + + // list the files in the R2 bucket. There may be more files in the + // bucket than can be returned in one list operation so loop until + // all files are listed + let continuationToken: string | undefined = undefined + do { + const listObjectsCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Prefix: pathPrefix, + ContinuationToken: continuationToken, + } + const listObjectsCommandOutput: ListObjectsV2CommandOutput = + await s3Client.send( + new ListObjectsV2Command(listObjectsCommandInput) + ) + if ((listObjectsCommandOutput.Contents?.length ?? 0) > 0) { + listObjectsCommandOutput.Contents!.forEach((object) => { + if (object.Key && object.ETag) { + // For some reason the etag has quotes around it, strip those + const md5 = object.ETag.replace(/"/g, "") as HexString + const md5Base64 = bytesToBase64(hexToBytes(md5)) + + if (hashesOfFilesToToUpsert.has(object.Key)) { + if ( + hashesOfFilesToToUpsert.get(object.Key)?.fullMd5 === + md5Base64 + ) { + hashesOfFilesToToUpsert.delete(object.Key) + } + // If the existing full config in R2 is different then + // we just keep the hashesOfFilesToToUpsert entry around + // which will upsert the new full config later on + } else { + // if the file in R2 is not in the list of files to upsert + // then we should delete it + hashesOfFilesToDelete.set(object.Key, md5Base64) + } + } + }) + } + continuationToken = listObjectsCommandOutput.NextContinuationToken + } while (continuationToken) + + console.log("Number of files to upsert", hashesOfFilesToToUpsert.size) + console.log("Number of files to delete", hashesOfFilesToDelete.size) + + let progressBar = new ProgressBar( + "--- Deleting obsolete configs [:bar] :current/:total :elapseds\n", + { + total: hashesOfFilesToDelete.size, + } + ) + + // Delete the files in R2 that are no longer needed + for (const batch of chunk([...hashesOfFilesToDelete.entries()], 100)) { + const deletePromises = batch.map(async ([key, _]) => { + const deleteObjectCommandInput: DeleteObjectCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Key: key, + } + if (!dryRun) + await s3Client.send( + new DeleteObjectCommand(deleteObjectCommandInput) + ) + else console.log("Would have deleted", key) + progressBar.tick() + }) + await Promise.allSettled(deletePromises) + } + + console.log("Finished deletes") + + progressBar = new ProgressBar( + "--- Storing missing configs [:bar] :current/:total :elapseds\n", + { + total: hashesOfFilesToToUpsert.size, + } + ) + + const errors = [] + + // Chunk the inserts so that we don't need to keep all the full configs in memory + for (const batch of chunk([...hashesOfFilesToToUpsert.entries()], 100)) { + // Get the full configs for the batch + const fullConfigs = await knexRaw< + Pick + >(trx, `select id, full from chart_configs where id in (?)`, [ + batch.map((entry) => entry[1].id), + ]) + const fullConfigMap = new Map( + fullConfigs.map(({ id, full }) => [id, full]) + ) + + // Upload the full configs to R2 in parallel + const uploadPromises = batch.map(async ([key, val]) => { + const id = val.id + const fullMd5 = val.fullMd5 + const full = fullConfigMap.get(id) + if (full === undefined) { + return Promise.reject( + new Error(`Full config not found for id ${id}`) + ) + } + const putObjectCommandInput: PutObjectCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Key: key, + Body: full, + ContentMD5: fullMd5, + ContentType: "application/json", + } + if (!dryRun) + await s3Client.send(new PutObjectCommand(putObjectCommandInput)) + else console.log("Would have upserted", key) + progressBar.tick() + return + }) + const promiseResults = await Promise.allSettled(uploadPromises) + const batchErrors = promiseResults + .filter((result) => result.status === "rejected") + .map((result) => result.reason) + errors.push(...batchErrors) + } + + console.log("Finished upserts") + if (errors.length > 0) { + console.error(`${errors.length} Errors during upserts`) + for (const error of errors) { + console.error(error) + } + } +} + +async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { + if ( + GRAPHER_CONFIG_R2_BUCKET === undefined || + GRAPHER_CONFIG_R2_BUCKET_PATH === undefined + ) { + console.info("R2 bucket not configured, exiting") + return + } + + const s3Client = new S3Client({ + endpoint: R2_ENDPOINT, + forcePathStyle: false, + region: R2_REGION, + credentials: { + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, + }, + }) + + const hashesOfFilesToToUpsertBySlug = new Map() + const hashesOfFilesToToUpsertByUuid = new Map() + const pathPrefixBySlug = excludeUndefined([ + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2GrapherConfigDirectory.publishedGrapherBySlug, + ]).join("/") + + const pathPrefixByUuid = excludeUndefined([ + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2GrapherConfigDirectory.byUUID, + ]).join("/") + + await knexReadonlyTransaction(async (trx) => { + // Sync charts published by slug + const slugsAndHashesFromDb = await knexRaw< + Pick + >( + trx, + `select slug, fullMd5, id + from chart_configs + where slug is not null + and full ->> '$.isPublished' = "true"` + ) + + slugsAndHashesFromDb.forEach((row) => { + hashesOfFilesToToUpsertBySlug.set( + `${pathPrefixBySlug}/${row.slug}.json`, + { + fullMd5: row.fullMd5, + id: row.id, + } + ) + }) + + await syncWithR2( + s3Client, + pathPrefixBySlug, + hashesOfFilesToToUpsertBySlug, + trx, + dryRun + ) + + // Sync charts by UUID + const slugsAndHashesFromDbByUuid = await knexRaw< + Pick + >(trx, `select fullMd5, id from chart_configs`) + + slugsAndHashesFromDbByUuid.forEach((row) => { + hashesOfFilesToToUpsertByUuid.set( + `${pathPrefixByUuid}/${row.id}.json`, + { + fullMd5: row.fullMd5, + id: row.id, + } + ) + }) + + await syncWithR2( + s3Client, + pathPrefixByUuid, + hashesOfFilesToToUpsertByUuid, + trx, + dryRun + ) + }) +} + +const parsedArgs = parseArgs(process.argv.slice(2)) +if (parsedArgs["h"]) { + console.log( + `syncGraphersToR2.js - sync grapher configs from the chart_configs table to R2 + +--dry-run: Don't make any actual changes to R2` + ) +} else { + main(parsedArgs, parsedArgs["dry-run"]) +} diff --git a/devTools/syncGraphersToR2/tsconfig.json b/devTools/syncGraphersToR2/tsconfig.json new file mode 100644 index 00000000000..74f2eaadbb6 --- /dev/null +++ b/devTools/syncGraphersToR2/tsconfig.json @@ -0,0 +1,18 @@ +{ + "extends": "../tsconfigs/tsconfig.base.json", + "compilerOptions": { + "outDir": "../../itsJustJavascript/devTools/syncGrapherToR2", + "rootDir": "." + }, + "references": [ + { + "path": "../../db" + }, + { + "path": "../../adminSiteServer" + }, + { + "path": "../../settings" + } + ] +} diff --git a/package.json b/package.json index 8ab030f444c..20f8e503d94 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,8 @@ "testPrettierAll": "yarn prettier --check \"**/*.{tsx,ts,jsx,js,json,md,html,css,scss,yml}\"", "testJest": "lerna run buildTests && jest", "testSiteNavigation": "tsx --tsconfig tsconfig.tsx.json devTools/navigationTest/navigationTest.ts", - "generateDbTypes": "npx @rmp135/sql-ts -c db/sql-ts/sql-ts-config.json" + "generateDbTypes": "npx @rmp135/sql-ts -c db/sql-ts/sql-ts-config.json", + "syncGraphersToR2": "tsx --tsconfig tsconfig.tsx.json devTools/syncGraphersToR2/syncGraphersToR2.ts" }, "dependencies": { "@algolia/autocomplete-js": "^1.17.2", diff --git a/packages/@ourworldindata/types/src/NominalType.ts b/packages/@ourworldindata/types/src/NominalType.ts index f3487f54232..f24497dfb29 100644 --- a/packages/@ourworldindata/types/src/NominalType.ts +++ b/packages/@ourworldindata/types/src/NominalType.ts @@ -20,3 +20,11 @@ declare const __nominal__type: unique symbol export type Nominal = Type & { readonly [__nominal__type]: Identifier } + +export function wrap(obj: T): Nominal { + return obj as Nominal +} + +export function unwrap(obj: Nominal): T { + return obj +} diff --git a/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts b/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts index 24c98ee6b6f..b1db3c1e82b 100644 --- a/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts +++ b/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts @@ -6,6 +6,7 @@ export interface DbInsertChartConfig { id: string patch: JsonString full: JsonString + fullMd5?: string slug?: string | null createdAt?: Date updatedAt?: Date | null diff --git a/packages/@ourworldindata/types/src/index.ts b/packages/@ourworldindata/types/src/index.ts index 3ca08233205..3c8e8af77d0 100644 --- a/packages/@ourworldindata/types/src/index.ts +++ b/packages/@ourworldindata/types/src/index.ts @@ -646,7 +646,7 @@ export { export { RedirectCode, type DbPlainRedirect } from "./dbTypes/Redirects.js" -export type { Nominal } from "./NominalType.js" +export { type Nominal, wrap, unwrap } from "./NominalType.js" export { type DbRawLatestWork, diff --git a/packages/@ourworldindata/utils/src/Util.test.ts b/packages/@ourworldindata/utils/src/Util.test.ts index c1cd463fac5..a10e4c9efd0 100755 --- a/packages/@ourworldindata/utils/src/Util.test.ts +++ b/packages/@ourworldindata/utils/src/Util.test.ts @@ -29,12 +29,17 @@ import { traverseEnrichedBlock, cartesian, formatInlineList, + base64ToBytes, + bytesToBase64, + hexToBytes, + bytesToHex, } from "./Util.js" import { BlockImageSize, OwidEnrichedGdocBlock, SortOrder, } from "@ourworldindata/types" +import { webcrypto as crypto } from "node:crypto" describe(findClosestTime, () => { describe("without tolerance", () => { @@ -795,3 +800,24 @@ describe(formatInlineList, () => { ) }) }) + +function generateRandomBytes(length: number): Uint8Array { + const bytes = new Uint8Array(length) + crypto.getRandomValues(bytes) + return bytes +} + +describe("hex/base64 conversion is reversible", () => { + const originalBytes = generateRandomBytes(33) + const base64String = bytesToBase64(originalBytes) + const roundTrippedBytes = base64ToBytes(base64String) + it("is the same after converting to base64 and back", () => { + expect(originalBytes).toEqual(roundTrippedBytes) + }) + + const hexString = bytesToHex(originalBytes) + const roundTrippedBytesHex = hexToBytes(hexString) + it("is the same after converting to hex and back", () => { + expect(originalBytes).toEqual(roundTrippedBytesHex) + }) +}) diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index 93f0aa0289e..58c753efa91 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -174,10 +174,12 @@ import { TagGraphRoot, TagGraphRootName, TagGraphNode, + Nominal, } from "@ourworldindata/types" import { PointVector } from "./PointVector.js" import React from "react" import { match, P } from "ts-pattern" +// import "crypto" export type NoUndefinedValues = { [P in keyof T]: Required> @@ -454,6 +456,42 @@ export const cagr = ( ) } +export type Base64String = Nominal +export type HexString = Nominal + +export function base64ToBytes(base64: Base64String): Uint8Array { + const binString = atob(base64) + return Uint8Array.from(binString, (m) => { + const cp = m.codePointAt(0) + if (cp === undefined) throw new Error("Invalid base64") + return cp + }) +} + +export function bytesToBase64(bytes: Uint8Array): Base64String { + const binString = Array.from(bytes, (byte) => + String.fromCodePoint(byte) + ).join("") + return btoa(binString) as Base64String +} + +export function hexToBytes(hex: string): Uint8Array { + if (hex.length % 2 !== 0) throw new Error("Invalid hex") + const bytes = new Uint8Array(hex.length / 2) + for (let i = 0; i < hex.length; i += 2) { + const parsed = parseInt(hex.slice(i, i + 2), 16) + if (isNaN(parsed)) throw new Error("Invalid hex") + bytes[i / 2] = parsed + } + return bytes +} + +export function bytesToHex(bytes: Uint8Array): HexString { + return Array.from(bytes) + .map((byte) => byte.toString(16).padStart(2, "0")) + .join("") as HexString +} + export const makeAnnotationsSlug = (columnSlug: string): string => `${columnSlug}-annotations` diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index bad8f5efa17..c94ef244060 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -20,6 +20,12 @@ export { firstOfNonEmptyArray, lastOfNonEmptyArray, mapToObjectLiteral, + type Base64String, + type HexString, + bytesToBase64, + base64ToBytes, + bytesToHex, + hexToBytes, next, previous, domainExtent, diff --git a/settings/serverSettings.ts b/settings/serverSettings.ts index c6f3c42cbf2..d945b49173c 100644 --- a/settings/serverSettings.ts +++ b/settings/serverSettings.ts @@ -154,22 +154,29 @@ export const IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH: string = IMAGE_HOSTING_R2_BUCKET_PATH.indexOf("/") + 1 ) // extract R2 credentials from rclone config as defaults -export const IMAGE_HOSTING_R2_ENDPOINT: string = - serverSettings.IMAGE_HOSTING_R2_ENDPOINT || +export const R2_ENDPOINT: string = + serverSettings.R2_ENDPOINT || rcloneConfig["owid-r2"]?.endpoint || "https://078fcdfed9955087315dd86792e71a7e.r2.cloudflarestorage.com" -export const IMAGE_HOSTING_R2_ACCESS_KEY_ID: string = - serverSettings.IMAGE_HOSTING_R2_ACCESS_KEY_ID || +export const R2_ACCESS_KEY_ID: string = + serverSettings.R2_ACCESS_KEY_ID || rcloneConfig["owid-r2"]?.access_key_id || "" -export const IMAGE_HOSTING_R2_SECRET_ACCESS_KEY: string = - serverSettings.IMAGE_HOSTING_R2_SECRET_ACCESS_KEY || +export const R2_SECRET_ACCESS_KEY: string = + serverSettings.R2_SECRET_ACCESS_KEY || rcloneConfig["owid-r2"]?.secret_access_key || "" -export const IMAGE_HOSTING_R2_REGION: string = - serverSettings.IMAGE_HOSTING_R2_REGION || - rcloneConfig["owid-r2"]?.region || - "auto" +export const R2_REGION: string = + serverSettings.R2_REGION || rcloneConfig["owid-r2"]?.region || "auto" + +export const GRAPHER_CONFIG_BASE_URL: string = + serverSettings.GRAPHER_CONFIG_BASE_URL || + "https://ourworldindata.org/grapher/" + +export const GRAPHER_CONFIG_R2_BUCKET: string | undefined = + serverSettings.GRAPHER_CONFIG_R2_BUCKET +export const GRAPHER_CONFIG_R2_BUCKET_PATH: string | undefined = + serverSettings.GRAPHER_CONFIG_R2_BUCKET_PATH export const DATA_API_URL: string = clientSettings.DATA_API_URL diff --git a/site/gdocs/components/Image.tsx b/site/gdocs/components/Image.tsx index 57e58bbb967..3aceb05b773 100644 --- a/site/gdocs/components/Image.tsx +++ b/site/gdocs/components/Image.tsx @@ -115,9 +115,7 @@ export default function Image(props: { if (isPreviewing) { const makePreviewUrl = (f: string) => - `${IMAGE_HOSTING_R2_CDN_URL}/${IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH}/${encodeURIComponent( - f - )}` + `${IMAGE_HOSTING_R2_CDN_URL}/${IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH}/${encodeURIComponent(f)}` const PreviewSource = (props: { i?: ImageMetadata; sm?: boolean }) => { const { i, sm } = props diff --git a/tsconfig.json b/tsconfig.json index 94bbeed9aae..ae863d527a2 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -54,6 +54,9 @@ }, { "path": "./devTools/navigationTest" + }, + { + "path": "./devTools/syncGraphersToR2" } ] } From ba404e32ea5f95de733a0790e480bc6f6646496a Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 5 Aug 2024 19:19:23 +0200 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=94=A8=20use=20grapher=20configs=20fr?= =?UTF-8?q?om=20R2=20for=20thumbnail=20rendering?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/apiRouter.ts | 2 +- adminSiteServer/chartConfigR2Helpers.ts | 5 +- devTools/syncGraphersToR2/syncGraphersToR2.ts | 2 +- functions/README.md | 8 +++- functions/_common/grapherRenderer.ts | 47 ++++++++++++++----- functions/grapher/thumbnail/[slug].ts | 5 ++ package.json | 2 +- .../types/src/domainTypes/Various.ts | 5 ++ packages/@ourworldindata/types/src/index.ts | 1 + wrangler.toml | 15 ++++++ 10 files changed, 71 insertions(+), 21 deletions(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 8b7ae33f20f..a60f472803d 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -85,6 +85,7 @@ import { DbInsertUser, FlatTagGraph, DbRawChartConfig, + R2GrapherConfigDirectory, } from "@ourworldindata/types" import { uuidv7 } from "uuidv7" import { @@ -158,7 +159,6 @@ import path from "path" import { deleteGrapherConfigFromR2, deleteGrapherConfigFromR2ByUUID, - R2GrapherConfigDirectory, saveGrapherConfigToR2, saveGrapherConfigToR2ByUUID, getMd5HashBase64, diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts index 781fbd233fb..eb670bcb46e 100644 --- a/adminSiteServer/chartConfigR2Helpers.ts +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -14,6 +14,7 @@ import { S3Client, } from "@aws-sdk/client-s3" import { Base64String, JsonError } from "@ourworldindata/utils" +import { R2GrapherConfigDirectory } from "@ourworldindata/types" import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" import { createHash } from "crypto" @@ -25,10 +26,6 @@ export function getMd5HashBase64(data: string): Base64String { .update(data, "utf-8") .digest("base64") as Base64String } -export enum R2GrapherConfigDirectory { - byUUID = "config/by-uuid", - publishedGrapherBySlug = "grapher/by-slug", -} let s3Client: S3Client | undefined = undefined diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index bedbc1d722d..081d83adc6c 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -23,7 +23,6 @@ import { KnexReadonlyTransaction, knexReadonlyTransaction, } from "../../db/db.js" -import { R2GrapherConfigDirectory } from "../../adminSiteServer/chartConfigR2Helpers.js" import { base64ToBytes, bytesToBase64, @@ -32,6 +31,7 @@ import { excludeUndefined, HexString, hexToBytes, + R2GrapherConfigDirectory, } from "@ourworldindata/utils" import { string } from "ts-pattern/dist/patterns.js" import { chunk, take } from "lodash" diff --git a/functions/README.md b/functions/README.md index caaa61334a0..6fdf5ca8bf4 100644 --- a/functions/README.md +++ b/functions/README.md @@ -10,6 +10,8 @@ Pages Functions are very similar to Cloudflare Workers; however they will always Pages Functions use file-based routing, which means that the file `grapher/[slug].ts` will serve routes like `/grapher/child-mortality`. In addition, there's a [`_routes.json`](../_routes.json) file that specifies which routes are to be served dynamically. +Inside a file-based route we sometimes use an instance of itty-router to decide on the exact functionality to provide (e.g. png vs svg generation) + ## Development 1. Copy `.dev.vars.example` to `.dev.vars` and fill in the required variables. @@ -28,7 +30,9 @@ Note: compatibility dates between local development, production and preview envi ## Testing on Fondation staging sites vs Cloudfare previews -`yarn deployContentPreview` deploys the staging `bakedSite` to a Cloudflare preview at https://[PREVIEW_BRANCH].owid-staging.pages.dev. This is the recommended way to test functions in a production-like environment. See [../ops/buildkite/deploy-content-preview](../ops/buildkite/deploy-content-preview) for more details. +We have two cloudflare projects set up that you can deploy previews to. `owid` which is also where our production deployment runs, and `owid-staging`. Currently, `owid` is configured to require authentication while `owid-staging` is accessible from the internet without any kind of auth. + +`yarn deployContentPreview` deploys the staging `bakedSite` to a Cloudflare preview at https://[PREVIEW_BRANCH].[PROJECT].pages.dev. This is the recommended way to test functions in a production-like environment. See [../ops/buildkite/deploy-content-preview](../ops/buildkite/deploy-content-preview) for more details. ### Rationale @@ -36,7 +40,7 @@ A custom staging site is available at http://staging-site-[BRANCH] upon pushing When it comes to testing functions in a production-like environment, Cloudflare previews are recommended. -Cloudflare previews are served by Cloudflare (as opposed to `wrangler` on staging sites) and are available at https://[RANDOM_ID].owid-staging.pages.dev. Cloudflare previews do not rely on the `wrangler` CLI and its `.dev.vars` file. Instead, they use the [Cloudflare dashboard to configure environment variables](https://dash.cloudflare.com/078fcdfed9955087315dd86792e71a7e/pages/view/owid/settings/environment-variables), in the same way and place as the production site. +Cloudflare previews are served by Cloudflare (as opposed to `wrangler` on staging sites) and are available at https://[RANDOM_ID].[PROJECT].pages.dev. Cloudflare previews do not rely on the `wrangler` CLI and its `.dev.vars` file, but they do take the `wrangler.toml` file into account for environment variables. For secrets, they use the [values set via the Cloudflare dashboard](https://dash.cloudflare.com/078fcdfed9955087315dd86792e71a7e/pages/view/owid/settings/environment-variables), in the same way and place as the production site. This proximity of configurations in the Cloudflare dashboard makes spotting differences between production and preview environments easier - and is one of the reason of using Cloudflare previews in the same project (owid) over using a new project specific to staging. diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index 249488b75bf..ffe054581dc 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -1,5 +1,10 @@ -import { Grapher, GrapherInterface } from "@ourworldindata/grapher" -import { Bounds, deserializeJSONFromHTML } from "@ourworldindata/utils" +import { Grapher } from "@ourworldindata/grapher" +import { + Bounds, + excludeUndefined, + GrapherInterface, + R2GrapherConfigDirectory, +} from "@ourworldindata/utils" import { svg2png, initialize as initializeSvg2Png } from "svg2png-wasm" import { TimeLogger } from "./timeLogger" import { png } from "itty-router" @@ -143,19 +148,33 @@ async function fetchAndRenderGrapherToSvg({ }) { const grapherLogger = new TimeLogger("grapher") - // Fetch grapher config and extract it from the HTML - const grapherConfig: GrapherInterface = await env.ASSETS.fetch( - new URL(`/grapher/${slug}`, env.url) - ) - .then((r) => (r.ok ? r : Promise.reject("Failed to load grapher page"))) - .then((r) => r.text()) - .then((html) => deserializeJSONFromHTML(html)) + const url = new URL(`/grapher/${slug}`, env.url) + const slugOnly = url.pathname.split("/").pop() - if (!grapherConfig) { - throw new Error("Could not find grapher config") + // The top level directory is either the bucket path (should be set in dev environments and production) + // or the branch name on preview staging environments + console.log("branch", env.CF_PAGES_BRANCH) + const topLevelDirectory = env.GRAPHER_CONFIG_R2_BUCKET_PATH + ? [env.GRAPHER_CONFIG_R2_BUCKET_PATH] + : ["by-branch", env.CF_PAGES_BRANCH] + + const key = excludeUndefined([ + ...topLevelDirectory, + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${slugOnly}.json`, + ]).join("/") + + console.log("fetching grapher config from this key", key) + + // Fetch grapher config + const fetchResponse = await env.r2ChartConfigs.get(key) + + if (!fetchResponse) { + return null } - grapherLogger.log("fetchGrapherConfig") + const grapherConfig: GrapherInterface = await fetchResponse.json() + console.log("grapher title", grapherConfig.title) const bounds = new Bounds(0, 0, options.svgWidth, options.svgHeight) const grapher = new Grapher({ @@ -206,6 +225,10 @@ export const fetchAndRenderGrapher = async ( env, }) + if (!svg) { + return new Response("Not found", { status: 404 }) + } + switch (outType) { case "png": return png(await renderSvgToPng(svg, options)) diff --git a/functions/grapher/thumbnail/[slug].ts b/functions/grapher/thumbnail/[slug].ts index b5efae2ac13..a62cb8c8d17 100644 --- a/functions/grapher/thumbnail/[slug].ts +++ b/functions/grapher/thumbnail/[slug].ts @@ -5,7 +5,12 @@ export interface Env { ASSETS: { fetch: typeof fetch } + r2ChartConfigs: { + get: (url: string) => Promise + } url: URL + GRAPHER_CONFIG_R2_BUCKET_PATH: string + CF_PAGES_BRANCH: string ENV: string } diff --git a/package.json b/package.json index 20f8e503d94..7a5c7142f9b 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,7 @@ "revertLastDbMigration": "tsx --tsconfig tsconfig.tsx.json node_modules/typeorm/cli.js migration:revert -d db/dataSource.ts", "startAdminServer": "node --enable-source-maps ./itsJustJavascript/adminSiteServer/app.js", "startAdminDevServer": "tsx watch --ignore '**.mjs' --tsconfig tsconfig.tsx.json adminSiteServer/app.tsx", - "startLocalCloudflareFunctions": "wrangler pages dev", + "startLocalCloudflareFunctions": "wrangler pages dev --local --persist-to ./cfstorage", "startDeployQueueServer": "node --enable-source-maps ./itsJustJavascript/baker/startDeployQueueServer.js", "startLernaWatcher": "lerna watch --scope '@ourworldindata/*' -- lerna run build --scope=\\$LERNA_PACKAGE_NAME --include-dependents", "startTmuxServer": "node_modules/tmex/tmex dev \"yarn startLernaWatcher\" \"yarn startAdminDevServer\" \"yarn startViteServer\"", diff --git a/packages/@ourworldindata/types/src/domainTypes/Various.ts b/packages/@ourworldindata/types/src/domainTypes/Various.ts index 946339baa14..bc23e990f9d 100644 --- a/packages/@ourworldindata/types/src/domainTypes/Various.ts +++ b/packages/@ourworldindata/types/src/domainTypes/Various.ts @@ -65,3 +65,8 @@ export class JsonError extends Error { export interface QueryParams { [key: string]: string | undefined } + +export enum R2GrapherConfigDirectory { + byUUID = "config/by-uuid", + publishedGrapherBySlug = "grapher/by-slug", +} diff --git a/packages/@ourworldindata/types/src/index.ts b/packages/@ourworldindata/types/src/index.ts index 3c8e8af77d0..019d446f9ec 100644 --- a/packages/@ourworldindata/types/src/index.ts +++ b/packages/@ourworldindata/types/src/index.ts @@ -18,6 +18,7 @@ export { type RawPageview, type UserCountryInformation, type QueryParams, + R2GrapherConfigDirectory, } from "./domainTypes/Various.js" export { type BreadcrumbItem, type KeyValueProps } from "./domainTypes/Site.js" export { diff --git a/wrangler.toml b/wrangler.toml index 4d88b657784..ab8b57941aa 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -13,11 +13,26 @@ MAILGUN_DOMAIN = "mg.ourworldindata.org" SLACK_ERROR_CHANNEL_ID = "C016H0BNNB1" ENV = "preview" +[[r2_buckets]] +binding = "r2ChartConfigs" +bucket_name = "owid-grapher-configs-staging" + # Overrides for CF production deployment [env.production] compatibility_date = "2024-04-29" +[[env.production.r2_buckets]] +binding = "r2ChartConfigs" +bucket_name = "owid-grapher-configs" + [env.production.vars] ENV = "production" MAILGUN_DOMAIN = "mg.ourworldindata.org" SLACK_ERROR_CHANNEL_ID = "C5JJW19PS" +GRAPHER_CONFIG_R2_BUCKET_PATH = "v1" + + +[[env.preview.r2_buckets]] +binding = "r2ChartConfigs" +bucket_name = "owid-grapher-configs-staging" + From ff6fe4c70003fca64193237e44058a78ce198f34 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 15 Aug 2024 15:21:10 +0200 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=94=A8=20switch=20from=20R2=20binding?= =?UTF-8?q?s=20to=20using=20fetch=20against=20a=20public=20bucket?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .dev.vars.example | 2 + functions/_common/grapherRenderer.ts | 56 +++++++++++++++++++++++++-- functions/grapher/thumbnail/[slug].ts | 6 +-- wrangler.toml | 19 ++++----- 4 files changed, 65 insertions(+), 18 deletions(-) diff --git a/.dev.vars.example b/.dev.vars.example index 5e3ac2ec579..384f1dc15a7 100644 --- a/.dev.vars.example +++ b/.dev.vars.example @@ -20,3 +20,5 @@ MAILGUN_SENDING_KEY= # optional SLACK_BOT_OAUTH_TOKEN= SLACK_ERROR_CHANNEL_ID=C016H0BNNB1 #bot-testing channel + +GRAPHER_CONFIG_R2_BUCKET_PATH=devs/YOURNAME diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index ffe054581dc..52ac0a3b2e0 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -17,6 +17,7 @@ import LatoMedium from "../_common/fonts/LatoLatin-Medium.ttf.bin" import LatoBold from "../_common/fonts/LatoLatin-Bold.ttf.bin" import PlayfairSemiBold from "../_common/fonts/PlayfairDisplayLatin-SemiBold.ttf.bin" import { Env } from "../grapher/thumbnail/[slug].js" +import { createProxy } from "./proxy.js" declare global { // eslint-disable-next-line no-var @@ -135,16 +136,41 @@ const extractOptions = (params: URLSearchParams): ImageOptions => { return options as ImageOptions } +const WORKER_CACHE_TIME_IN_SECONDS = 60 + +async function fetchFromR2( + url: URL, + etag: string | undefined, + fallbackUrl?: URL +) { + const headers = new Headers() + if (etag) headers.set("If-None-Match", etag) + const init = { + cf: { + cacheEverything: true, + cacheTtl: WORKER_CACHE_TIME_IN_SECONDS, + }, + headers, + } + const primaryResponse = await fetch(url.toString(), init) + if (primaryResponse.status === 404 && fallbackUrl) { + return fetch(fallbackUrl.toString(), init) + } + return primaryResponse +} + async function fetchAndRenderGrapherToSvg({ slug, options, searchParams, env, + etag, }: { slug: string options: ImageOptions searchParams: URLSearchParams env: Env + etag?: string }) { const grapherLogger = new TimeLogger("grapher") @@ -166,10 +192,31 @@ async function fetchAndRenderGrapherToSvg({ console.log("fetching grapher config from this key", key) + const requestUrl = new URL(key, env.GRAPHER_CONFIG_R2_BUCKET_URL) + + let fallbackUrl + + if ( + env.GRAPHER_CONFIG_R2_BUCKET_FALLBACK_URL && + env.GRAPHER_CONFIG_R2_BUCKET_FALLBACK_PATH + ) { + const topLevelDirectory = env.GRAPHER_CONFIG_R2_BUCKET_FALLBACK_PATH + const fallbackKey = excludeUndefined([ + ...topLevelDirectory, + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${slugOnly}.json`, + ]).join("/") + fallbackUrl = new URL( + fallbackKey, + env.GRAPHER_CONFIG_R2_BUCKET_FALLBACK_URL + ) + } + // Fetch grapher config - const fetchResponse = await env.r2ChartConfigs.get(key) + const fetchResponse = await fetchFromR2(requestUrl, etag, fallbackUrl) - if (!fetchResponse) { + if (fetchResponse.status !== 200) { + console.log("Failed to fetch grapher config", fetchResponse.status) return null } @@ -213,7 +260,8 @@ export const fetchAndRenderGrapher = async ( slug: string, searchParams: URLSearchParams, outType: "png" | "svg", - env: Env + env: Env, + etag?: string ) => { const options = extractOptions(searchParams) @@ -223,7 +271,9 @@ export const fetchAndRenderGrapher = async ( options, searchParams, env, + etag, }) + console.log("fetched svg") if (!svg) { return new Response("Not found", { status: 404 }) diff --git a/functions/grapher/thumbnail/[slug].ts b/functions/grapher/thumbnail/[slug].ts index a62cb8c8d17..e49e32508fc 100644 --- a/functions/grapher/thumbnail/[slug].ts +++ b/functions/grapher/thumbnail/[slug].ts @@ -5,11 +5,11 @@ export interface Env { ASSETS: { fetch: typeof fetch } - r2ChartConfigs: { - get: (url: string) => Promise - } url: URL + GRAPHER_CONFIG_R2_BUCKET_URL: string + GRAPHER_CONFIG_R2_BUCKET_FALLBACK_URL: string GRAPHER_CONFIG_R2_BUCKET_PATH: string + GRAPHER_CONFIG_R2_BUCKET_FALLBACK_PATH: string CF_PAGES_BRANCH: string ENV: string } diff --git a/wrangler.toml b/wrangler.toml index ab8b57941aa..d9204565238 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -6,6 +6,10 @@ pages_build_output_dir = "./localBake" # Vars that should be available in all envs, including local dev [vars] ENV = "development" +GRAPHER_CONFIG_R2_BUCKET_URL = "https://grapher-configs-staging.ourworldindata.org" +GRAPHER_CONFIG_R2_BUCKET_FALLBACK_URL = "https://grapher-configs.ourworldindata.org" +GRAPHER_CONFIG_R2_BUCKET_FALLBACK_PATH = "v1" + # Overrides for CF preview deployments [env.preview.vars] @@ -13,26 +17,17 @@ MAILGUN_DOMAIN = "mg.ourworldindata.org" SLACK_ERROR_CHANNEL_ID = "C016H0BNNB1" ENV = "preview" -[[r2_buckets]] -binding = "r2ChartConfigs" -bucket_name = "owid-grapher-configs-staging" - # Overrides for CF production deployment [env.production] compatibility_date = "2024-04-29" -[[env.production.r2_buckets]] -binding = "r2ChartConfigs" -bucket_name = "owid-grapher-configs" - [env.production.vars] ENV = "production" MAILGUN_DOMAIN = "mg.ourworldindata.org" SLACK_ERROR_CHANNEL_ID = "C5JJW19PS" +GRAPHER_CONFIG_R2_BUCKET_URL = "https://grapher-configs.ourworldindata.org" +GRAPHER_CONFIG_R2_BUCKET_FALLBACK_URL = "" +GRAPHER_CONFIG_R2_BUCKET_FALLBACK_PATH = "" GRAPHER_CONFIG_R2_BUCKET_PATH = "v1" -[[env.preview.r2_buckets]] -binding = "r2ChartConfigs" -bucket_name = "owid-grapher-configs-staging" - From e35e11e958eb2c65cf51e93c603772f4bc2d238b Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 12 Aug 2024 22:35:09 +0200 Subject: [PATCH 4/4] Make syncGraphersToR2 support subcommands old behavior is sync, new is saving single files to the local dev R2 bucket using the wrangler cli --- devTools/syncGraphersToR2/README.md | 19 ++++ devTools/syncGraphersToR2/syncGraphersToR2.ts | 92 +++++++++++++++++-- 2 files changed, 103 insertions(+), 8 deletions(-) create mode 100644 devTools/syncGraphersToR2/README.md diff --git a/devTools/syncGraphersToR2/README.md b/devTools/syncGraphersToR2/README.md new file mode 100644 index 00000000000..649235d35be --- /dev/null +++ b/devTools/syncGraphersToR2/README.md @@ -0,0 +1,19 @@ +# syncGraphersToR2 + +This script, `syncGraphersToR2.ts`, is used to sync grapher configurations from the `chart_configs` table to R2 storage. It supports different commands to perform specific tasks. + +## Available Commands + +- `sync`: Sync all grapher configs from the DB to R2 buckets, both upserting into R2 and deleting obsolete ones from R2. This command is useful for production if the R2 storage should get out of sync with the database and/or to initially fill R2. It can't be used to fill local development R2 buckets. +- `store-dev-by-slug`: Fetch a grapher config by slug from the `chart_configs` table and store it in the local dev R2 storage. This is useful for your local dev environment when you want to test the CF Pages Functions that need R2 files to exist. CF Pages Functions using R2 bindings can (as of 2024-08-13) not access real remote R2 buckets. + +## Usage + +To run the script, use the following command: + +```sh +yarn syncGraphersToR2 [command] [options] +``` + +Options +--dry-run: Don't make any actual changes to R2. diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index 081d83adc6c..f332f9ed59b 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -3,7 +3,6 @@ import parseArgs from "minimist" import { DeleteObjectCommand, DeleteObjectCommandInput, - ListObjectsCommand, ListObjectsV2Command, ListObjectsV2CommandOutput, PutObjectCommand, @@ -20,22 +19,21 @@ import { } from "../../settings/serverSettings.js" import { knexRaw, + knexRawFirst, KnexReadonlyTransaction, knexReadonlyTransaction, } from "../../db/db.js" import { - base64ToBytes, bytesToBase64, DbRawChartConfig, - differenceOfSets, excludeUndefined, HexString, hexToBytes, R2GrapherConfigDirectory, } from "@ourworldindata/utils" -import { string } from "ts-pattern/dist/patterns.js" -import { chunk, take } from "lodash" +import { chunk } from "lodash" import ProgressBar from "progress" +import { exec } from "child_process" type HashAndId = Pick @@ -194,7 +192,7 @@ async function syncWithR2( } } -async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { +async function sync(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { if ( GRAPHER_CONFIG_R2_BUCKET === undefined || GRAPHER_CONFIG_R2_BUCKET_PATH === undefined @@ -280,13 +278,91 @@ async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { }) } +async function storeDevBySlug( + parsedArgs: parseArgs.ParsedArgs, + dryRun: boolean +) { + const slug = parsedArgs._[1] + if (!slug) { + console.error("No slug provided") + return + } + + await knexReadonlyTransaction(async (trx) => { + // Fetch the chart config from the DB by slug + const chart = await knexRawFirst( + trx, + `SELECT full FROM chart_configs WHERE slug = ? and full ->> '$.isPublished' = "true"`, + [slug] + ) + if (!chart) { + console.error(`No chart found for slug ${slug}`) + return + } + + console.log("Config retrieved for", slug) + + const fullConfig = chart.full + const command = `npx wrangler r2 object put --local ${GRAPHER_CONFIG_R2_BUCKET}/${GRAPHER_CONFIG_R2_BUCKET_PATH}/${R2GrapherConfigDirectory.publishedGrapherBySlug}/${slug}.json --pipe --content-type application/json --persist-to ./cfstorage` + + const process = exec(command, (error, stdout, stderr) => { + if (error) { + console.error( + `Error executing wrangler command: ${error.message}` + ) + return + } + if (stderr) { + console.error(`Wrangler stderr: ${stderr}`) + return + } + console.log(`Wrangler stdout: ${stdout}`) + }) + + if (process.stdin) { + process.stdin.write(fullConfig) + process.stdin.end() + } + // wait until the process exits + await new Promise((resolve) => { + process.on("exit", resolve) + }) + console.log("Config stored for", slug) + }) +} + +async function main(parsedArgs: parseArgs.ParsedArgs) { + const dryRun = parsedArgs["dry-run"] + + const command = parsedArgs._[0] + + switch (command) { + case "sync": + await sync(parsedArgs, dryRun) + break + case "store-dev-by-slug": + await storeDevBySlug(parsedArgs, dryRun) + break + default: + console.log( + `Unknown command: ${command}\n\nAvailable commands:\n sync\n store-dev-by-slug` + ) + break + } + process.exit(0) +} + const parsedArgs = parseArgs(process.argv.slice(2)) if (parsedArgs["h"]) { console.log( `syncGraphersToR2.js - sync grapher configs from the chart_configs table to R2 ---dry-run: Don't make any actual changes to R2` +--dry-run: Don't make any actual changes to R2 + +Commands: + sync: Sync grapher configs to R2 + store-dev-by-slug: Fetch a grapher config by slug from the chart_configs table and store it the local dev R2 storage` ) } else { - main(parsedArgs, parsedArgs["dry-run"]) + main(parsedArgs) }