Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
EmanuelCampos committed Nov 26, 2024
1 parent 958b62b commit 9e869d3
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 0 deletions.
6 changes: 6 additions & 0 deletions llama-index-core/llama_index/core/readers/file/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,3 +816,9 @@ def iter_data(

if len(documents) > 0:
yield documents

def get_file_name(self, resource_id: str) -> str:
return Path(resource_id).name

def get_file_path(self, resource_id: str) -> str:
return resource_id
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
import math
import os
from pathlib import Path
import tempfile
import time
from typing import Any, Dict, List, Optional, Union
Expand Down Expand Up @@ -229,3 +230,9 @@ def load_data(self) -> List[Document]:
logger.info("Document creation starting")

return self._load_documents_with_metadata(files_metadata, temp_dir)

def get_file_name(self, resource_id: str) -> str:
return Path(resource_id).name

def get_file_path(self, resource_id: str) -> str:
return resource_id
Original file line number Diff line number Diff line change
Expand Up @@ -378,3 +378,12 @@ def _download_files(self, box_files: List[File], temp_dir: str) -> List[File]:
file.downloaded_file_path = local_path
box_files_with_path.append(file)
return box_files_with_path

def get_file_name(self, resource_id: str) -> str:
box_file = get_box_files_details(
box_client=self._box_client, file_ids=[resource_id]
)
return box_file[0].name

def get_file_path(self, resource_id: str) -> str:
return self.get_file_name(resource_id)
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,9 @@ def read_file_content(self, resource_id: str, **kwargs) -> bytes:
except Exception as e:
logger.error(f"Error reading file content from GCS: {e!s}")
raise

def get_file_name(self, resource_id: str) -> str:
return Path(resource_id).name

def get_file_path(self, resource_id: str) -> str:
return resource_id
Original file line number Diff line number Diff line change
Expand Up @@ -689,3 +689,25 @@ def read_file_content(self, resource_id: str, **kwargs) -> bytes:
downloaded_file = self._download_file(resource_id, temp_file)
with open(downloaded_file, "rb") as file:
return file.read()

def get_file_name(self, resource_id: str) -> str:
"""Get the file name from the resource ID."""
from googleapiclient.discovery import build

self._creds = self._get_credentials()

service = build("drive", "v3", credentials=self._creds)
file = service.files().get(fileId=resource_id, supportsAllDrives=True).execute()
return file["name"]

def get_file_path(self, resource_id: str) -> str:
"""Get the file path from the resource ID."""
from googleapiclient.discovery import build

self._creds = self._get_credentials()

service = build("drive", "v3", credentials=self._creds)

return self._get_relative_path(
service, file_id=resource_id, folder_id=self.folder_id
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import os
from pathlib import Path
import tempfile
import time
from typing import Any, Dict, List, Optional, Union
Expand Down Expand Up @@ -835,3 +836,9 @@ def read_file_content(self, resource_id: str, **kwargs) -> bytes:

async def aread_file_content(self, resource_id: str, **kwargs) -> bytes:
return self.read_file_content(resource_id, **kwargs)

def get_file_name(self, resource_id: str) -> str:
return Path(resource_id).name

def get_file_path(self, resource_id: str) -> str:
return resource_id
Original file line number Diff line number Diff line change
Expand Up @@ -863,3 +863,9 @@ def read_file_content(self, resource_id: str, **kwargs) -> bytes:
"An error occurred while reading file content from SharePoint: %s", exp
)
raise

def get_file_name(self, resource_id: str) -> str:
return Path(resource_id).name

def get_file_path(self, resource_id: str) -> str:
return resource_id
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,9 @@ def load_resource(self, resource_id: str, **kwargs) -> List[Document]:
def read_file_content(self, resource_id: str, **kwargs) -> bytes:
simple_directory_reader = self._get_simple_directory_reader()
return simple_directory_reader.read_file_content(Path(resource_id), **kwargs)

def get_file_name(self, resource_id: str) -> str:
return Path(resource_id).name

def get_file_path(self, resource_id: str) -> str:
return resource_id

0 comments on commit 9e869d3

Please sign in to comment.