Skip to content

Commit

Permalink
Merge branch 'master' into fix/identify-lock
Browse files Browse the repository at this point in the history
  • Loading branch information
bodinsamuel authored Sep 17, 2024
2 parents ada0b4e + 7bd22eb commit 14adf59
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 123 deletions.
70 changes: 70 additions & 0 deletions integration-templates/google-drive/actions/fetch-document.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import type { NangoAction, ProxyConfiguration } from '../../models';
import type { GoogleDriveFileResponse } from '../types.js';
import { mimeTypeMapping } from '../types.js';

/**
* Retrieves and returns the content of a Google Drive file as a base64-encoded string.
*
* For detailed endpoint documentation, refer to:
*
* https://developers.google.com/drive/api/reference/rest/v3/files/get
* https://developers.google.com/drive/api/reference/rest/v3/files/export
* @param nango - An instance of NangoAction used for making API requests.
* @param input - The ID of the file to be retrieved, provided as a string.
* @returns The base64-encoded content of the file.
* @throws Error if the input is invalid, or if the file metadata or content retrieval fails.
*/
export default async function runAction(nango: NangoAction, input: string): Promise<string> {
if (!input || typeof input !== 'string') {
throw new Error('Missing or invalid input: a file ID is required and should be a string');
}

// Fetch the file metadata first to get the MIME type
const Config: ProxyConfiguration = {
endpoint: `drive/v3/files/${input}`,
params: {
fields: 'id, name, mimeType'
}
};
const fileMetadataResponse = await nango.get<GoogleDriveFileResponse>(Config);

if (fileMetadataResponse.status !== 200 || !fileMetadataResponse.data) {
throw new Error(`Failed to retrieve file metadata: Status Code ${fileMetadataResponse.status}`);
}

const file = fileMetadataResponse.data;
const mimeTypeDetails = mimeTypeMapping[file.mimeType];

if (!mimeTypeDetails) {
throw new Error(`Unsupported MIME type: ${file.mimeType}`);
}

const { mimeType: exportMimeType, responseType } = mimeTypeDetails;

await nango.log('Fetching document of ', { exportMimeType });

const endpoint = responseType === 'text' ? `drive/v3/files/${file.id}/export` : `drive/v3/files/${file.id}`;
const params = responseType === 'text' ? { mimeType: exportMimeType } : { alt: 'media' };

const config: ProxyConfiguration = {
endpoint,
params,
responseType
};
const response = await nango.get(config);

if (response.status !== 200) {
throw new Error(`Failed to retrieve file content: Status Code ${response.status}`);
}

if (responseType === 'text') {
return response.data ?? '';
} else {
const chunks: Buffer[] = [];
for await (const chunk of response.data) {
chunks.push(chunk);
}
const buffer = Buffer.concat(chunks);
return buffer.toString('base64');
}
}
35 changes: 0 additions & 35 deletions integration-templates/google-drive/actions/fetch-pdf.ts

This file was deleted.

22 changes: 10 additions & 12 deletions integration-templates/google-drive/nango.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@ integrations:
runs: every day
track_deletes: true
description: |
Sync all the content of google drive from a selected file or
folders. PDF files won't be parsed but rather should be fetched
via the proxy using the "google-drive-fetch-pdf" action.
Details: full sync, supports deletes, goes back all time. Metadata
required to filter on a particular folder, or file(s). Metadata
Sync the metadata of a specified file or folders from Google Drive,
handling both individual files and nested folders.
Metadata required to filter on a particular folder, or file(s). Metadata
fields should be {"files": ["<some-id>"]} OR
{"folders": ["<some-id>"]}. The ID should be able to be provided
by using the Google Picker API
Expand All @@ -18,20 +16,21 @@ integrations:
(https://developers.google.com/drive/picker/reference/results)
input: DocumentMetadata
auto_start: false
version: 1.0.1
version: 1.0.2
output: Document
sync_type: full
endpoint: GET /google-drive/documents
scopes: https://www.googleapis.com/auth/drive.readonly
actions:
fetch-pdf:
fetch-document:
input: string
description: |
Fetch PDF data given an ID using a responseStream, chunk the data
into a buffer and return a base64 encoded string that can later
be converted into a PDF using an external tool.
Fetches the content of a file given its ID, processes the data using
a response stream, and encodes it into a base64 string. This base64-encoded
string can be used to recreate the file in its original format using an external tool.
output: string
endpoint: POST /google-drive/fetch-pdf
version: 1.0.1
endpoint: GET /google-drive/fetch-document
scopes: https://www.googleapis.com/auth/drive.readonly
models:
DocumentMetadata:
Expand All @@ -40,5 +39,4 @@ models:
Document:
id: string
url: string
content: string
title: string
96 changes: 34 additions & 62 deletions integration-templates/google-drive/syncs/documents.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,40 @@
import type { NangoSync, Document } from '../../models';

interface GoogleDriveFileResponse {
id: string;
name: string;
mimeType: string;
webViewLink: string;
}

interface Metadata {
files?: string[];
folders?: string[];
}

const mimeTypeMapping: Record<string, string> = {
'application/vnd.google-apps.document': 'text/plain',
'application/vnd.google-apps.spreadsheet': 'text/csv',
'application/vnd.google-apps.presentation': 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
};

import type { NangoSync, Document, ProxyConfiguration } from '../../models';
import type { GoogleDriveFileResponse, Metadata } from '../types';

/**
* Fetches and processes documents from Google Drive, saving their metadata in batches.
* For detailed endpoint documentation, refer to:
*
* https://developers.google.com/drive/api/reference/rest/v3/files/get
* @param nango - An instance of NangoSync used for API interactions and metadata management.
* @returns A promise that resolves when all documents are fetched and saved.
* @throws Error if metadata is missing or if there is an issue during the fetching or saving of documents.
*/
export default async function fetchData(nango: NangoSync): Promise<void> {
const metadata = await nango.getMetadata<Metadata>();

if (!metadata || (!metadata.files && !metadata.folders)) {
throw new Error('Metadata for files or folders is required.');
}

// Initialize folders to process and a set to keep track of processed folders
const initialFolders = metadata?.folders ? [...metadata.folders] : [];
const processedFolders = new Set<string>();
const batchSize = 100;
let batch: Document[] = [];

/**
* Processes a folder by fetching and processing its files.
*
* @param folderId - The ID of the folder to process.
*/
async function processFolder(folderId: string) {
if (processedFolders.has(folderId)) return;
processedFolders.add(folderId);

// Query to fetch files in the current folder
const query = `('${folderId}' in parents) and trashed = false`;
const proxyConfiguration = {
const proxyConfiguration: ProxyConfiguration = {
endpoint: `drive/v3/files`,
params: {
fields: 'files(id, name, mimeType, webViewLink, parents), nextPageToken',
Expand All @@ -44,50 +43,53 @@ export default async function fetchData(nango: NangoSync): Promise<void> {
},
paginate: {
response_path: 'files'
}
},
retries: 10
};

// Fetch and process files from the folder
for await (const files of nango.paginate<GoogleDriveFileResponse>(proxyConfiguration)) {
for (const file of files) {
if (file.mimeType === 'application/vnd.google-apps.folder') {
await processFolder(file.id);
} else if (file.mimeType === 'application/vnd.google-apps.document' || file.mimeType === 'application/pdf') {
const content = await fetchDocumentContent(nango, file, file.mimeType);
await processFolder(file.id); // Recursively process subfolders
} else {
batch.push({
id: file.id,
url: file.webViewLink,
content: content || '',
title: file.name
});

if (batch.length === batchSize) {
await nango.batchSave<Document>(batch, 'Document');
batch = [];
batch = []; // Clear batch after saving
}
}
}
}
}

// Start processing initial folders
for (const folderId of initialFolders) {
await processFolder(folderId);
}

// Process individual files specified in metadata
if (metadata?.files) {
for (const file of metadata.files) {
try {
const documentResponse = await nango.get({
const config: ProxyConfiguration = {
endpoint: `drive/v3/files/${file}`,
params: {
fields: 'id, name, mimeType, webViewLink, parents'
}
});
const content = await fetchDocumentContent(nango, documentResponse.data, documentResponse.data.mimeType);
},
retries: 10
};

const documentResponse = await nango.get<GoogleDriveFileResponse>(config);

batch.push({
id: documentResponse.data.id,
url: documentResponse.data.webViewLink,
content: content || '',
title: documentResponse.data.name
});

Expand All @@ -105,33 +107,3 @@ export default async function fetchData(nango: NangoSync): Promise<void> {
await nango.batchSave<Document>(batch, 'Document');
}
}

async function fetchDocumentContent(nango: NangoSync, doc: GoogleDriveFileResponse, mimeType: string): Promise<string | null> {
try {
if (mimeType === 'application/vnd.google-apps.spreadsheet') {
const contentResponse = await nango.get({
endpoint: `drive/v3/files/${doc.id}/export`,
params: {
mimeType: 'text/csv'
},
responseType: 'text'
});
return contentResponse.data;
} else if (mimeType === 'application/pdf') {
return '';
} else {
const exportType = mimeTypeMapping[mimeType] || 'text/plain';
const contentResponse = await nango.get({
endpoint: `drive/v3/files/${doc.id}/export`,
params: {
mimeType: exportType
}
});

return contentResponse.data;
}
} catch (e) {
await nango.log(`Error fetching content for ${doc.name}: ${e}`);
return null;
}
}
52 changes: 52 additions & 0 deletions integration-templates/google-drive/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
export interface GoogleDriveFileResponse {
id: string;
name: string;
mimeType: string;
webViewLink: string;
}

export interface Metadata {
files?: string[];
folders?: string[];
}

interface MimeTypeMapping {
mimeType: string;
responseType: 'text' | 'stream';
}

export const mimeTypeMapping: Record<string, MimeTypeMapping> = {
// Documents
'application/vnd.google-apps.document': { mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', responseType: 'text' },
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': {
mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
responseType: 'stream'
},
'application/vnd.oasis.opendocument.text': { mimeType: 'application/vnd.oasis.opendocument.text', responseType: 'stream' },
'application/rtf': { mimeType: 'application/rtf', responseType: 'stream' },
'text/plain': { mimeType: 'text/plain', responseType: 'stream' },
// Spreadsheets
'application/vnd.google-apps.spreadsheet': { mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', responseType: 'text' },
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': {
mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
responseType: 'stream'
},
'application/vnd.oasis.opendocument.spreadsheet': { mimeType: 'application/vnd.oasis.opendocument.spreadsheet', responseType: 'stream' },
// PDFs
'application/pdf': { mimeType: 'application/pdf', responseType: 'stream' },
// Text Files
'text/csv': { mimeType: 'text/csv', responseType: 'text' },
'text/tab-separated-values': { mimeType: 'text/tab-separated-values', responseType: 'text' },
// Presentations
'application/vnd.google-apps.presentation': { mimeType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', responseType: 'text' },
'application/vnd.openxmlformats-officedocument.presentationml.presentation': {
mimeType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
responseType: 'stream'
},
'application/vnd.oasis.opendocument.presentation': { mimeType: 'application/vnd.oasis.opendocument.presentation', responseType: 'stream' },
// Drawings and Images
'application/vnd.google-apps.drawing': { mimeType: 'image/jpeg', responseType: 'stream' },
'image/jpeg': { mimeType: 'image/jpeg', responseType: 'stream' },
'image/png': { mimeType: 'image/png', responseType: 'stream' },
'image/svg+xml': { mimeType: 'image/svg+xml', responseType: 'stream' }
};
Loading

0 comments on commit 14adf59

Please sign in to comment.