Skip to content

Commit

Permalink
List and Restores versions (#153)
Browse files Browse the repository at this point in the history
* List non-current and live version(s) and Restore specific version from non-current to live

* Added unit tests for list_versions and restore()

* Bumped new version 2.0.14 -> 2.0.15
  • Loading branch information
RupinderKaurSSB authored Jun 11, 2024
1 parent d93b008 commit 1d176f9
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dapla-toolbelt"
version = "2.0.14"
version = "2.0.15"
description = "Dapla Toolbelt"
authors = ["Dapla Developers <dapla-platform-developers@ssb.no>"]
license = "MIT"
Expand Down
52 changes: 52 additions & 0 deletions src/dapla/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pandas as pd
from fsspec.spec import AbstractBufferedFile
from google.cloud import storage

from .auth import AuthClient
from .gcs import GCSFileSystem
Expand Down Expand Up @@ -63,6 +64,57 @@ def ls(gcs_path: str, detail: bool = False, **kwargs: Any) -> Any:
"""
return FileClient.get_gcs_file_system().ls(gcs_path, detail=detail, **kwargs)

@staticmethod
def get_versions(bucket_name: str, file_name: str) -> Any:
"""Get all versions of a file in a bucket.
Args:
bucket_name: Bucket name where the file is located.
file_name: Name of the file.
Returns:
List of versions of the file.
"""
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)

return bucket.list_blobs(prefix=file_name, versions=True)

@staticmethod
def restore_version(
bucket_name: str,
file_name: str,
destination_file: str,
generation_id: str,
destination_generation_id: str,
) -> Any:
"""Restores deleted/non-current version of file to the live version.
Args:
bucket_name: source bucket name where the file is located.
file_name: non-current file name.
destination_file: name of the file to be restored .
generation_id: generation_id of the non-current.
destination_generation_id: Incase live version already exists, generation_id of the live version
Returns:
A new blob with new generation id.
"""
storage_client = storage.Client()
source_bucket = storage_client.bucket(bucket_name)
source_file = source_bucket.blob(file_name)

# Restoring file means the destination bucket will be same as source
destination_bucket = storage_client.bucket(bucket_name)

return source_bucket.copy_blob(
source_file,
destination_bucket,
destination_file,
source_generation=generation_id,
if_generation_match=destination_generation_id,
)

@staticmethod
def cat(gcs_path: str) -> str:
"""Get string content of a file from GCS.
Expand Down
145 changes: 138 additions & 7 deletions tests/test_files.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,148 @@
# Test for FileClient class

import unittest
from unittest.mock import Mock
from unittest.mock import patch

from dapla import FileClient

PATH_WITH_PREFIX = "gs://bucket/path"
PATH_WITHOUT_PREFIX = "bucket/path"


def test_ensure_gcs_uri_prefix() -> None:
assert FileClient._ensure_gcs_uri_prefix(PATH_WITH_PREFIX) == PATH_WITH_PREFIX
assert FileClient._ensure_gcs_uri_prefix(PATH_WITHOUT_PREFIX) == PATH_WITH_PREFIX
class TestFiles(unittest.TestCase):

def test_ensure_gcs_uri_prefix(self) -> None:
assert FileClient._ensure_gcs_uri_prefix(PATH_WITH_PREFIX) == PATH_WITH_PREFIX
assert (
FileClient._ensure_gcs_uri_prefix(PATH_WITHOUT_PREFIX) == PATH_WITH_PREFIX
)

def test_remove_gcs_uri_prefix(self) -> None:
assert (
FileClient._remove_gcs_uri_prefix(PATH_WITH_PREFIX) == PATH_WITHOUT_PREFIX
)
assert (
FileClient._remove_gcs_uri_prefix(PATH_WITHOUT_PREFIX)
== PATH_WITHOUT_PREFIX
)

@patch("google.cloud.storage.Client")
def test_get_versions_valid(self, mock_client: Mock) -> None:
# Arrange
bucket_name = "test-bucket"
file_name = "test-file.txt"
mock_bucket = Mock()
mock_client.return_value.bucket.return_value = mock_bucket
mock_blob1 = Mock(
name="test-file.txt",
generation=1,
updated="2023-04-01T00:00:00Z",
time_deleted=None,
)
mock_blob2 = Mock(
name="test-file.txt",
generation=2,
updated="2023-04-02T00:00:00Z",
time_deleted=None,
)
mock_bucket.list_blobs.return_value = [mock_blob1, mock_blob2]

files = FileClient.get_versions(bucket_name, file_name)

mock_client.return_value.bucket.assert_called_with(bucket_name)
mock_bucket.list_blobs.assert_called_with(prefix=file_name, versions=True)

assert len(files) == 2

assert files[0].name == mock_blob1.name
assert files[0].generation == mock_blob1.generation
assert files[0].updated == mock_blob1.updated
assert files[0].time_deleted is None

@patch("google.cloud.storage.Client")
def test_get_versions_nonexistent_file(self, mock_client: Mock) -> None:
bucket_name = "test-bucket"
file_name = "nonexistent-file.txt"
mock_bucket = Mock()
mock_client.return_value.bucket.return_value = mock_bucket
mock_bucket.list_blobs.return_value = []

files = FileClient.get_versions(bucket_name, file_name)

mock_client.return_value.bucket.assert_called_with(bucket_name)
mock_bucket.list_blobs.assert_called_with(prefix=file_name, versions=True)

assert len(files) == 0
assert files == []

@patch("google.cloud.storage.Client")
def test_get_versions_empty_bucket(self, mock_client: Mock) -> None:
bucket_name = "test-bucket"
file_name = "test-file.txt"
mock_bucket = Mock()
mock_client.return_value.bucket.return_value = mock_bucket
mock_bucket.list_blobs.return_value = []

files = FileClient.get_versions(bucket_name, file_name)

mock_client.return_value.bucket.assert_called_with(bucket_name)
mock_bucket.list_blobs.assert_called_with(prefix=file_name, versions=True)

assert len(files) == 0
assert files == []

@patch("google.cloud.storage.Client")
def test_restore_version_success(self, mock_client: Mock) -> None:
mock_bucket = Mock()
mock_source_blob = Mock()
mock_client.return_value.bucket.return_value = mock_bucket
mock_bucket.blob.return_value = mock_source_blob

blob = FileClient.restore_version(
bucket_name="test-bucket",
file_name="test-file.txt",
destination_file="restored-file.txt",
generation_id="1234567890",
destination_generation_id="0",
)

mock_client.return_value.bucket.assert_called_with("test-bucket")
mock_bucket.blob.assert_called_with("test-file.txt")
mock_bucket.copy_blob.assert_called_with(
mock_source_blob,
mock_bucket,
"restored-file.txt",
source_generation="1234567890",
if_generation_match="0",
)
assert blob == mock_bucket.copy_blob.return_value

@patch("google.cloud.storage.Client")
def test_restore_version_existing_live_version(self, mock_client: Mock) -> None:
mock_bucket = Mock()
mock_source_blob = Mock()
mock_client.return_value.bucket.return_value = mock_bucket
mock_bucket.blob.return_value = mock_source_blob

blob = FileClient.restore_version(
bucket_name="test-bucket",
file_name="test-file.txt",
destination_file="restored-file.txt",
generation_id="1234567890",
destination_generation_id="0987654321",
)

mock_client.return_value.bucket.assert_called_with("test-bucket")
mock_bucket.blob.assert_called_with("test-file.txt")
mock_bucket.copy_blob.assert_called_with(
mock_source_blob,
mock_bucket,
"restored-file.txt",
source_generation="1234567890",
if_generation_match="0987654321",
)
assert blob == mock_bucket.copy_blob.return_value


def test_remove_gcs_uri_prefix() -> None:
assert FileClient._remove_gcs_uri_prefix(PATH_WITH_PREFIX) == PATH_WITHOUT_PREFIX
assert FileClient._remove_gcs_uri_prefix(PATH_WITHOUT_PREFIX) == PATH_WITHOUT_PREFIX
if __name__ == "__main__":
unittest.main()

0 comments on commit 1d176f9

Please sign in to comment.