diff --git a/llama-index-core/llama_index/core/readers/file/base.py b/llama-index-core/llama_index/core/readers/file/base.py index ee4a70df68254..d7d7d2b76bd0a 100644 --- a/llama-index-core/llama_index/core/readers/file/base.py +++ b/llama-index-core/llama_index/core/readers/file/base.py @@ -816,3 +816,9 @@ def iter_data( if len(documents) > 0: yield documents + + def get_file_name(self, resource_id: str) -> str: + return Path(resource_id).name + + def get_file_path(self, resource_id: str) -> str: + return resource_id diff --git a/llama-index-integrations/readers/llama-index-readers-azstorage-blob/llama_index/readers/azstorage_blob/base.py b/llama-index-integrations/readers/llama-index-readers-azstorage-blob/llama_index/readers/azstorage_blob/base.py index f52e7b1d0b9a1..326c95a46bb36 100644 --- a/llama-index-integrations/readers/llama-index-readers-azstorage-blob/llama_index/readers/azstorage_blob/base.py +++ b/llama-index-integrations/readers/llama-index-readers-azstorage-blob/llama_index/readers/azstorage_blob/base.py @@ -7,6 +7,7 @@ import logging import math import os +from pathlib import Path import tempfile import time from typing import Any, Dict, List, Optional, Union @@ -229,3 +230,9 @@ def load_data(self) -> List[Document]: logger.info("Document creation starting") return self._load_documents_with_metadata(files_metadata, temp_dir) + + def get_file_name(self, resource_id: str) -> str: + return Path(resource_id).name + + def get_file_path(self, resource_id: str) -> str: + return resource_id diff --git a/llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxReader/base.py b/llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxReader/base.py index e47ab6ff79769..3912b835b5018 100644 --- a/llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxReader/base.py +++ b/llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxReader/base.py @@ -378,3 +378,12 @@ def _download_files(self, box_files: List[File], temp_dir: str) -> List[File]: file.downloaded_file_path = local_path box_files_with_path.append(file) return box_files_with_path + + def get_file_name(self, resource_id: str) -> str: + box_file = get_box_files_details( + box_client=self._box_client, file_ids=[resource_id] + ) + return box_file[0].name + + def get_file_path(self, resource_id: str) -> str: + return self.get_file_name(resource_id) diff --git a/llama-index-integrations/readers/llama-index-readers-gcs/llama_index/readers/gcs/base.py b/llama-index-integrations/readers/llama-index-readers-gcs/llama_index/readers/gcs/base.py index 59136e9182b0f..a48df53e9c9f8 100644 --- a/llama-index-integrations/readers/llama-index-readers-gcs/llama_index/readers/gcs/base.py +++ b/llama-index-integrations/readers/llama-index-readers-gcs/llama_index/readers/gcs/base.py @@ -273,3 +273,9 @@ def read_file_content(self, resource_id: str, **kwargs) -> bytes: except Exception as e: logger.error(f"Error reading file content from GCS: {e!s}") raise + + def get_file_name(self, resource_id: str) -> str: + return Path(resource_id).name + + def get_file_path(self, resource_id: str) -> str: + return resource_id diff --git a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/drive/base.py b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/drive/base.py index 5509d70a68ad3..a3b49ba00aa6f 100644 --- a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/drive/base.py +++ b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/drive/base.py @@ -689,3 +689,25 @@ def read_file_content(self, resource_id: str, **kwargs) -> bytes: downloaded_file = self._download_file(resource_id, temp_file) with open(downloaded_file, "rb") as file: return file.read() + + def get_file_name(self, resource_id: str) -> str: + """Get the file name from the resource ID.""" + from googleapiclient.discovery import build + + self._creds = self._get_credentials() + + service = build("drive", "v3", credentials=self._creds) + file = service.files().get(fileId=resource_id, supportsAllDrives=True).execute() + return file["name"] + + def get_file_path(self, resource_id: str) -> str: + """Get the file path from the resource ID.""" + from googleapiclient.discovery import build + + self._creds = self._get_credentials() + + service = build("drive", "v3", credentials=self._creds) + + return self._get_relative_path( + service, file_id=resource_id, folder_id=self.folder_id + ) diff --git a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/llama_index/readers/microsoft_onedrive/base.py b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/llama_index/readers/microsoft_onedrive/base.py index f83c1cecd6ad4..78549ec980222 100644 --- a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/llama_index/readers/microsoft_onedrive/base.py +++ b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/llama_index/readers/microsoft_onedrive/base.py @@ -2,6 +2,7 @@ import logging import os +from pathlib import Path import tempfile import time from typing import Any, Dict, List, Optional, Union @@ -835,3 +836,9 @@ def read_file_content(self, resource_id: str, **kwargs) -> bytes: async def aread_file_content(self, resource_id: str, **kwargs) -> bytes: return self.read_file_content(resource_id, **kwargs) + + def get_file_name(self, resource_id: str) -> str: + return Path(resource_id).name + + def get_file_path(self, resource_id: str) -> str: + return resource_id diff --git a/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/llama_index/readers/microsoft_sharepoint/base.py b/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/llama_index/readers/microsoft_sharepoint/base.py index 2dfb3c226f058..9c12d291cc87d 100644 --- a/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/llama_index/readers/microsoft_sharepoint/base.py +++ b/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/llama_index/readers/microsoft_sharepoint/base.py @@ -863,3 +863,9 @@ def read_file_content(self, resource_id: str, **kwargs) -> bytes: "An error occurred while reading file content from SharePoint: %s", exp ) raise + + def get_file_name(self, resource_id: str) -> str: + return Path(resource_id).name + + def get_file_path(self, resource_id: str) -> str: + return resource_id diff --git a/llama-index-integrations/readers/llama-index-readers-s3/llama_index/readers/s3/base.py b/llama-index-integrations/readers/llama-index-readers-s3/llama_index/readers/s3/base.py index d05969c528b0a..c1b7f5128027f 100644 --- a/llama-index-integrations/readers/llama-index-readers-s3/llama_index/readers/s3/base.py +++ b/llama-index-integrations/readers/llama-index-readers-s3/llama_index/readers/s3/base.py @@ -213,3 +213,9 @@ def load_resource(self, resource_id: str, **kwargs) -> List[Document]: def read_file_content(self, resource_id: str, **kwargs) -> bytes: simple_directory_reader = self._get_simple_directory_reader() return simple_directory_reader.read_file_content(Path(resource_id), **kwargs) + + def get_file_name(self, resource_id: str) -> str: + return Path(resource_id).name + + def get_file_path(self, resource_id: str) -> str: + return resource_id