Skip to content

Commit

Permalink
feat: adds the SerDeInfo class and tests (#2108)
Browse files Browse the repository at this point in the history
* feat: adds SerDeInfo class and tests

* cleans up type hints and some minor tweaks
  • Loading branch information
chalmerlowe authored Jan 10, 2025
1 parent a2bebb9 commit 62960f2
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 4 deletions.
88 changes: 88 additions & 0 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@

"""Schemas for BigQuery tables / queries."""

from __future__ import annotations
import collections
import enum
import typing
from typing import Any, cast, Dict, Iterable, Optional, Union

from google.cloud.bigquery import _helpers
Expand Down Expand Up @@ -556,3 +558,89 @@ def to_api_repr(self) -> dict:
"""
answer = {"names": list(self.names)}
return answer


class SerDeInfo:
"""Serializer and deserializer information.
Args:
serialization_library (str): Required. Specifies a fully-qualified class
name of the serialization library that is responsible for the
translation of data between table representation and the underlying
low-level input and output format structures. The maximum length is
256 characters.
name (Optional[str]): Name of the SerDe. The maximum length is 256
characters.
parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization
parameters for the serialization library. Maximum size 10 Kib.
"""

def __init__(
self,
serialization_library: str,
name: Optional[str] = None,
parameters: Optional[dict[str, str]] = None,
):
self._properties: Dict[str, Any] = {}
self.serialization_library = serialization_library
self.name = name
self.parameters = parameters

@property
def serialization_library(self) -> str:
"""Required. Specifies a fully-qualified class name of the serialization
library that is responsible for the translation of data between table
representation and the underlying low-level input and output format
structures. The maximum length is 256 characters."""

return typing.cast(str, self._properties.get("serializationLibrary"))

@serialization_library.setter
def serialization_library(self, value: str):
value = _helpers._isinstance_or_raise(value, str, none_allowed=False)
self._properties["serializationLibrary"] = value

@property
def name(self) -> Optional[str]:
"""Optional. Name of the SerDe. The maximum length is 256 characters."""

return self._properties.get("name")

@name.setter
def name(self, value: Optional[str] = None):
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
self._properties["name"] = value

@property
def parameters(self) -> Optional[dict[str, str]]:
"""Optional. Key-value pairs that define the initialization parameters
for the serialization library. Maximum size 10 Kib."""

return self._properties.get("parameters")

@parameters.setter
def parameters(self, value: Optional[dict[str, str]] = None):
value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
self._properties["parameters"] = value

def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return self._properties

@classmethod
def from_api_repr(cls, api_repr: dict) -> SerDeInfo:
"""Factory: constructs an instance of the class (cls)
given its API representation.
Args:
resource (Dict[str, Any]):
API representation of the object to be instantiated.
Returns:
An instance of the class initialized with data from 'resource'.
"""
config = cls("PLACEHOLDER")
config._properties = api_repr
return config
92 changes: 88 additions & 4 deletions tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from google.cloud import bigquery
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery import schema
from google.cloud.bigquery.schema import PolicyTagList


Expand Down Expand Up @@ -130,8 +131,6 @@ def test_constructor_range_str(self):
self.assertEqual(field.range_element_type.element_type, "DATETIME")

def test_to_api_repr(self):
from google.cloud.bigquery.schema import PolicyTagList

policy = PolicyTagList(names=("foo", "bar"))
self.assertEqual(
policy.to_api_repr(),
Expand Down Expand Up @@ -886,8 +885,6 @@ def test_valid_mapping_representation(self):
class TestPolicyTags(unittest.TestCase):
@staticmethod
def _get_target_class():
from google.cloud.bigquery.schema import PolicyTagList

return PolicyTagList

def _make_one(self, *args, **kw):
Expand Down Expand Up @@ -1129,3 +1126,90 @@ def test_to_api_repr_parameterized(field, api):
from google.cloud.bigquery.schema import SchemaField

assert SchemaField(**field).to_api_repr() == api


class TestSerDeInfo:
"""Tests for the SerDeInfo class."""

@staticmethod
def _get_target_class():
return schema.SerDeInfo

def _make_one(self, *args, **kwargs):
return self._get_target_class()(*args, **kwargs)

@pytest.mark.parametrize(
"serialization_library,name,parameters",
[
("testpath.to.LazySimpleSerDe", None, None),
("testpath.to.LazySimpleSerDe", "serde_name", None),
("testpath.to.LazySimpleSerDe", None, {"key": "value"}),
("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}),
],
)
def test_ctor_valid_input(self, serialization_library, name, parameters):
serde_info = self._make_one(
serialization_library=serialization_library,
name=name,
parameters=parameters,
)
assert serde_info.serialization_library == serialization_library
assert serde_info.name == name
assert serde_info.parameters == parameters

@pytest.mark.parametrize(
"serialization_library,name,parameters",
[
(123, None, None),
("testpath.to.LazySimpleSerDe", 123, None),
("testpath.to.LazySimpleSerDe", None, ["test", "list"]),
("testpath.to.LazySimpleSerDe", None, 123),
],
)
def test_ctor_invalid_input(self, serialization_library, name, parameters):
with pytest.raises(TypeError) as e:
self._make_one(
serialization_library=serialization_library,
name=name,
parameters=parameters,
)
# Looking for the first word from the string "Pass <variable> as..."
assert "Pass " in str(e.value)

def test_to_api_repr(self):
serde_info = self._make_one(
serialization_library="testpath.to.LazySimpleSerDe",
name="serde_name",
parameters={"key": "value"},
)
expected_repr = {
"serializationLibrary": "testpath.to.LazySimpleSerDe",
"name": "serde_name",
"parameters": {"key": "value"},
}
assert serde_info.to_api_repr() == expected_repr

def test_from_api_repr(self):
"""GIVEN an api representation of a SerDeInfo object (i.e. resource)
WHEN converted into a SerDeInfo object using from_api_repr()
THEN it will have the representation in dict format as a SerDeInfo
object made directly (via _make_one()) and represented in dict format.
"""
api_repr = {
"serializationLibrary": "testpath.to.LazySimpleSerDe",
"name": "serde_name",
"parameters": {"key": "value"},
}

expected = self._make_one(
serialization_library="testpath.to.LazySimpleSerDe",
name="serde_name",
parameters={"key": "value"},
)

klass = self._get_target_class()
result = klass.from_api_repr(api_repr)

# We convert both to dict format because these classes do not have a
# __eq__() method to facilitate direct equality comparisons.
assert result.to_api_repr() == expected.to_api_repr()

0 comments on commit 62960f2

Please sign in to comment.