diff --git a/integration-templates/google-drive/actions/fetch-document.ts b/integration-templates/google-drive/actions/fetch-document.ts new file mode 100644 index 00000000000..7beb20d4bb9 --- /dev/null +++ b/integration-templates/google-drive/actions/fetch-document.ts @@ -0,0 +1,70 @@ +import type { NangoAction, ProxyConfiguration } from '../../models'; +import type { GoogleDriveFileResponse } from '../types.js'; +import { mimeTypeMapping } from '../types.js'; + +/** + * Retrieves and returns the content of a Google Drive file as a base64-encoded string. + * + * For detailed endpoint documentation, refer to: + * + * https://developers.google.com/drive/api/reference/rest/v3/files/get + * https://developers.google.com/drive/api/reference/rest/v3/files/export + * @param nango - An instance of NangoAction used for making API requests. + * @param input - The ID of the file to be retrieved, provided as a string. + * @returns The base64-encoded content of the file. + * @throws Error if the input is invalid, or if the file metadata or content retrieval fails. + */ +export default async function runAction(nango: NangoAction, input: string): Promise { + if (!input || typeof input !== 'string') { + throw new Error('Missing or invalid input: a file ID is required and should be a string'); + } + + // Fetch the file metadata first to get the MIME type + const Config: ProxyConfiguration = { + endpoint: `drive/v3/files/${input}`, + params: { + fields: 'id, name, mimeType' + } + }; + const fileMetadataResponse = await nango.get(Config); + + if (fileMetadataResponse.status !== 200 || !fileMetadataResponse.data) { + throw new Error(`Failed to retrieve file metadata: Status Code ${fileMetadataResponse.status}`); + } + + const file = fileMetadataResponse.data; + const mimeTypeDetails = mimeTypeMapping[file.mimeType]; + + if (!mimeTypeDetails) { + throw new Error(`Unsupported MIME type: ${file.mimeType}`); + } + + const { mimeType: exportMimeType, responseType } = mimeTypeDetails; + + await nango.log('Fetching document of ', { exportMimeType }); + + const endpoint = responseType === 'text' ? `drive/v3/files/${file.id}/export` : `drive/v3/files/${file.id}`; + const params = responseType === 'text' ? { mimeType: exportMimeType } : { alt: 'media' }; + + const config: ProxyConfiguration = { + endpoint, + params, + responseType + }; + const response = await nango.get(config); + + if (response.status !== 200) { + throw new Error(`Failed to retrieve file content: Status Code ${response.status}`); + } + + if (responseType === 'text') { + return response.data ?? ''; + } else { + const chunks: Buffer[] = []; + for await (const chunk of response.data) { + chunks.push(chunk); + } + const buffer = Buffer.concat(chunks); + return buffer.toString('base64'); + } +} diff --git a/integration-templates/google-drive/actions/fetch-pdf.ts b/integration-templates/google-drive/actions/fetch-pdf.ts deleted file mode 100644 index 7db0dd178a8..00000000000 --- a/integration-templates/google-drive/actions/fetch-pdf.ts +++ /dev/null @@ -1,35 +0,0 @@ -import type { NangoAction } from '../../models'; - -export default async function runAction(nango: NangoAction, input: string): Promise { - if (!input || typeof input !== 'string') { - throw new Error('Missing or invalid input: a pdf id is required and should be a string'); - } - - const response = await nango.get({ - endpoint: `drive/v3/files/${input}`, - params: { - alt: 'media' - }, - responseType: 'stream' - }); - - if (response.status !== 200) { - throw new Error(`Failed to retrieve file: Status Code ${response.status}`); - } - - const chunks = []; - - try { - for await (const chunk of response.data) { - chunks.push(chunk); - } - } catch (streamError: any) { - throw new Error(`Error during stream processing: ${streamError.message}`); - } - - const buffer = Buffer.concat(chunks); - - const base64Data = buffer.toString('base64'); - - return base64Data; -} diff --git a/integration-templates/google-drive/nango.yaml b/integration-templates/google-drive/nango.yaml index 29f3de2dea8..5d68076894e 100644 --- a/integration-templates/google-drive/nango.yaml +++ b/integration-templates/google-drive/nango.yaml @@ -5,11 +5,9 @@ integrations: runs: every day track_deletes: true description: | - Sync all the content of google drive from a selected file or - folders. PDF files won't be parsed but rather should be fetched - via the proxy using the "google-drive-fetch-pdf" action. - Details: full sync, supports deletes, goes back all time. Metadata - required to filter on a particular folder, or file(s). Metadata + Sync the metadata of a specified file or folders from Google Drive, + handling both individual files and nested folders. + Metadata required to filter on a particular folder, or file(s). Metadata fields should be {"files": [""]} OR {"folders": [""]}. The ID should be able to be provided by using the Google Picker API @@ -18,20 +16,21 @@ integrations: (https://developers.google.com/drive/picker/reference/results) input: DocumentMetadata auto_start: false - version: 1.0.1 + version: 1.0.2 output: Document sync_type: full endpoint: GET /google-drive/documents scopes: https://www.googleapis.com/auth/drive.readonly actions: - fetch-pdf: + fetch-document: input: string description: | - Fetch PDF data given an ID using a responseStream, chunk the data - into a buffer and return a base64 encoded string that can later - be converted into a PDF using an external tool. + Fetches the content of a file given its ID, processes the data using + a response stream, and encodes it into a base64 string. This base64-encoded + string can be used to recreate the file in its original format using an external tool. output: string - endpoint: POST /google-drive/fetch-pdf + version: 1.0.1 + endpoint: GET /google-drive/fetch-document scopes: https://www.googleapis.com/auth/drive.readonly models: DocumentMetadata: @@ -40,5 +39,4 @@ models: Document: id: string url: string - content: string title: string diff --git a/integration-templates/google-drive/syncs/documents.ts b/integration-templates/google-drive/syncs/documents.ts index 64d36116c4f..29bbfb87940 100644 --- a/integration-templates/google-drive/syncs/documents.ts +++ b/integration-templates/google-drive/syncs/documents.ts @@ -1,23 +1,15 @@ -import type { NangoSync, Document } from '../../models'; - -interface GoogleDriveFileResponse { - id: string; - name: string; - mimeType: string; - webViewLink: string; -} - -interface Metadata { - files?: string[]; - folders?: string[]; -} - -const mimeTypeMapping: Record = { - 'application/vnd.google-apps.document': 'text/plain', - 'application/vnd.google-apps.spreadsheet': 'text/csv', - 'application/vnd.google-apps.presentation': 'application/vnd.openxmlformats-officedocument.presentationml.presentation' -}; - +import type { NangoSync, Document, ProxyConfiguration } from '../../models'; +import type { GoogleDriveFileResponse, Metadata } from '../types'; + +/** + * Fetches and processes documents from Google Drive, saving their metadata in batches. + * For detailed endpoint documentation, refer to: + * + * https://developers.google.com/drive/api/reference/rest/v3/files/get + * @param nango - An instance of NangoSync used for API interactions and metadata management. + * @returns A promise that resolves when all documents are fetched and saved. + * @throws Error if metadata is missing or if there is an issue during the fetching or saving of documents. + */ export default async function fetchData(nango: NangoSync): Promise { const metadata = await nango.getMetadata(); @@ -25,17 +17,24 @@ export default async function fetchData(nango: NangoSync): Promise { throw new Error('Metadata for files or folders is required.'); } + // Initialize folders to process and a set to keep track of processed folders const initialFolders = metadata?.folders ? [...metadata.folders] : []; const processedFolders = new Set(); const batchSize = 100; let batch: Document[] = []; + /** + * Processes a folder by fetching and processing its files. + * + * @param folderId - The ID of the folder to process. + */ async function processFolder(folderId: string) { if (processedFolders.has(folderId)) return; processedFolders.add(folderId); + // Query to fetch files in the current folder const query = `('${folderId}' in parents) and trashed = false`; - const proxyConfiguration = { + const proxyConfiguration: ProxyConfiguration = { endpoint: `drive/v3/files`, params: { fields: 'files(id, name, mimeType, webViewLink, parents), nextPageToken', @@ -44,50 +43,53 @@ export default async function fetchData(nango: NangoSync): Promise { }, paginate: { response_path: 'files' - } + }, + retries: 10 }; + // Fetch and process files from the folder for await (const files of nango.paginate(proxyConfiguration)) { for (const file of files) { if (file.mimeType === 'application/vnd.google-apps.folder') { - await processFolder(file.id); - } else if (file.mimeType === 'application/vnd.google-apps.document' || file.mimeType === 'application/pdf') { - const content = await fetchDocumentContent(nango, file, file.mimeType); + await processFolder(file.id); // Recursively process subfolders + } else { batch.push({ id: file.id, url: file.webViewLink, - content: content || '', title: file.name }); if (batch.length === batchSize) { await nango.batchSave(batch, 'Document'); - batch = []; + batch = []; // Clear batch after saving } } } } } + // Start processing initial folders for (const folderId of initialFolders) { await processFolder(folderId); } + // Process individual files specified in metadata if (metadata?.files) { for (const file of metadata.files) { try { - const documentResponse = await nango.get({ + const config: ProxyConfiguration = { endpoint: `drive/v3/files/${file}`, params: { fields: 'id, name, mimeType, webViewLink, parents' - } - }); - const content = await fetchDocumentContent(nango, documentResponse.data, documentResponse.data.mimeType); + }, + retries: 10 + }; + + const documentResponse = await nango.get(config); batch.push({ id: documentResponse.data.id, url: documentResponse.data.webViewLink, - content: content || '', title: documentResponse.data.name }); @@ -105,33 +107,3 @@ export default async function fetchData(nango: NangoSync): Promise { await nango.batchSave(batch, 'Document'); } } - -async function fetchDocumentContent(nango: NangoSync, doc: GoogleDriveFileResponse, mimeType: string): Promise { - try { - if (mimeType === 'application/vnd.google-apps.spreadsheet') { - const contentResponse = await nango.get({ - endpoint: `drive/v3/files/${doc.id}/export`, - params: { - mimeType: 'text/csv' - }, - responseType: 'text' - }); - return contentResponse.data; - } else if (mimeType === 'application/pdf') { - return ''; - } else { - const exportType = mimeTypeMapping[mimeType] || 'text/plain'; - const contentResponse = await nango.get({ - endpoint: `drive/v3/files/${doc.id}/export`, - params: { - mimeType: exportType - } - }); - - return contentResponse.data; - } - } catch (e) { - await nango.log(`Error fetching content for ${doc.name}: ${e}`); - return null; - } -} diff --git a/integration-templates/google-drive/types.ts b/integration-templates/google-drive/types.ts new file mode 100644 index 00000000000..0fad2cf6e1b --- /dev/null +++ b/integration-templates/google-drive/types.ts @@ -0,0 +1,52 @@ +export interface GoogleDriveFileResponse { + id: string; + name: string; + mimeType: string; + webViewLink: string; +} + +export interface Metadata { + files?: string[]; + folders?: string[]; +} + +interface MimeTypeMapping { + mimeType: string; + responseType: 'text' | 'stream'; +} + +export const mimeTypeMapping: Record = { + // Documents + 'application/vnd.google-apps.document': { mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', responseType: 'text' }, + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': { + mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + responseType: 'stream' + }, + 'application/vnd.oasis.opendocument.text': { mimeType: 'application/vnd.oasis.opendocument.text', responseType: 'stream' }, + 'application/rtf': { mimeType: 'application/rtf', responseType: 'stream' }, + 'text/plain': { mimeType: 'text/plain', responseType: 'stream' }, + // Spreadsheets + 'application/vnd.google-apps.spreadsheet': { mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', responseType: 'text' }, + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': { + mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + responseType: 'stream' + }, + 'application/vnd.oasis.opendocument.spreadsheet': { mimeType: 'application/vnd.oasis.opendocument.spreadsheet', responseType: 'stream' }, + // PDFs + 'application/pdf': { mimeType: 'application/pdf', responseType: 'stream' }, + // Text Files + 'text/csv': { mimeType: 'text/csv', responseType: 'text' }, + 'text/tab-separated-values': { mimeType: 'text/tab-separated-values', responseType: 'text' }, + // Presentations + 'application/vnd.google-apps.presentation': { mimeType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', responseType: 'text' }, + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': { + mimeType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + responseType: 'stream' + }, + 'application/vnd.oasis.opendocument.presentation': { mimeType: 'application/vnd.oasis.opendocument.presentation', responseType: 'stream' }, + // Drawings and Images + 'application/vnd.google-apps.drawing': { mimeType: 'image/jpeg', responseType: 'stream' }, + 'image/jpeg': { mimeType: 'image/jpeg', responseType: 'stream' }, + 'image/png': { mimeType: 'image/png', responseType: 'stream' }, + 'image/svg+xml': { mimeType: 'image/svg+xml', responseType: 'stream' } +}; diff --git a/packages/shared/flows.yaml b/packages/shared/flows.yaml index 5047adf6e89..ce927b0f767 100644 --- a/packages/shared/flows.yaml +++ b/packages/shared/flows.yaml @@ -1824,34 +1824,46 @@ integrations: documents: runs: every day track_deletes: true - description: | - Sync all the content of google drive from a selected file or - folders. PDF files won't be parsed but rather should be fetched - via the proxy using the "google-drive-fetch-pdf" action. - Details: full sync, supports deletes, goes back all time. Metadata - required to filter on a particular folder, or file(s). Metadata + description: > + Sync the metadata of a specified file or folders from Google Drive, + + handling both individual files and nested folders. + + Metadata required to filter on a particular folder, or file(s). + Metadata + fields should be {"files": [""]} OR + {"folders": [""]}. The ID should be able to be provided + by using the Google Picker API + (https://developers.google.com/drive/picker/guides/overview) + and using the ID field provided by the response + (https://developers.google.com/drive/picker/reference/results) input: DocumentMetadata auto_start: false - version: 1.0.1 + version: 1.0.2 output: Document sync_type: full endpoint: GET /google-drive/documents scopes: https://www.googleapis.com/auth/drive.readonly actions: - fetch-pdf: + fetch-document: input: string - description: | - Fetch PDF data given an ID using a responseStream, chunk the data - into a buffer and return a base64 encoded string that can later - be converted into a PDF using an external tool. + description: > + Fetches the content of a file given its ID, processes the data using + + a response stream, and encodes it into a base64 string. This + base64-encoded + + string can be used to recreate the file in its original format using + an external tool. output: string - endpoint: POST /google-drive/fetch-pdf + version: 1.0.1 + endpoint: GET /google-drive/fetch-document scopes: https://www.googleapis.com/auth/drive.readonly models: DocumentMetadata: @@ -1860,7 +1872,6 @@ integrations: Document: id: string url: string - content: string title: string google-mail: syncs: