Skip to content

Commit

Permalink
[DP-2782] Fix data product assets issue and bump the version (#3040)
Browse files Browse the repository at this point in the history
* Don't import openmetadata by default

It's incompatable with python 3.11 which is really annoying.

* Fix bug appending assets to a data product

The list append() method returns None, so this was clearing the
asset list every other time an asset was added.

* Bump version
  • Loading branch information
MatMoore authored Jan 23, 2024
1 parent 8b0d563 commit aa5f07b
Show file tree
Hide file tree
Showing 9 changed files with 314 additions and 13 deletions.
4 changes: 3 additions & 1 deletion python-libraries/data-platform-catalogue/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [0.4.0] 2024-01-19

### Breaking changes

Expand All @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
about where a node in the metadata graph should be located, and what kind
of database it comes from.

- Renamed `create_or_update_*` methods to `upsert_*`.

- Extracted `BaseCatalogueClient` base class from `CatalogueClient`. Use this
as a type annotation to avoid coupling to the OpenMetadata implementation.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from .client import DataHubCatalogueClient # noqa: F401
from .client import OpenMetadataCatalogueClient # noqa: F401
from .client import CatalogueError, ReferencedEntityMissing # noqa: F401
from .entities import DataProductMetadata # noqa: F401
from .entities import CatalogueMetadata, DataLocation, TableMetadata # noqa: F401
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
from .base import CatalogueError # noqa: F401
from .base import ReferencedEntityMissing # noqa: F401
from .datahub import DataHubCatalogueClient # noqa: F401
from .openmetadata import OpenMetadataCatalogueClient # noqa: F401
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,11 @@ def upsert_table(
data_product_existing_properties is not None
and data_product_existing_properties.assets is not None
):
assets = data_product_existing_properties.assets.append(
data_product_association
)
assets = data_product_existing_properties.assets[::]
assets.append(data_product_association)
else:
assets = [data_product_association]

data_product_properties = DataProductPropertiesClass(assets=assets)

metadata_event = MetadataChangeProposalWrapper(
Expand Down
4 changes: 2 additions & 2 deletions python-libraries/data-platform-catalogue/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ministryofjustice-data-platform-catalogue"
version = "0.3.1"
version = "0.4.0"
description = "Library to integrate the MoJ data platform with the catalogue component."
authors = ["MoJ Data Platform Team <data-platform-tech@digital.justice.gov.uk>"]
license = "MIT"
Expand All @@ -10,7 +10,7 @@ packages = [{ include = "data_platform_catalogue" }]
[tool.poetry.dependencies]
python = "^3.10"
openmetadata-ingestion = "~1.2.0.1"
acryl-datahub = {extras = ["datahub-rest"], version = "^0.12.1.3"}
acryl-datahub = { extras = ["datahub-rest"], version = "^0.12.1.3" }
freezegun = "^1.4.0"
deepdiff = "^6.7.1"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
[
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {},
"description": "bla bla",
"tags": []
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
"schemaName": "my_table",
"platform": "urn:li:dataPlatform:glue",
"version": 1,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "foo",
"nullable": false,
"description": "a",
"type": {
"type": {
"com.linkedin.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false
},
{
"fieldPath": "bar",
"nullable": false,
"description": "b",
"type": {
"type": {
"com.linkedin.schema.NumberType": {}
}
},
"nativeDataType": "int",
"recursive": false,
"isPartOfKey": false
}
]
}
}
},
{
"entityType": "domain",
"entityUrn": "urn:li:domain:legal-aid",
"changeType": "UPSERT",
"aspectName": "domainProperties",
"aspect": {
"json": {
"name": "legal-aid",
"description": ""
}
}
},
{
"entityType": "dataproduct",
"entityUrn": "urn:li:dataProduct:my_data_product",
"changeType": "UPSERT",
"aspectName": "domains",
"aspect": {
"json": {
"domains": [
"urn:li:domain:legal-aid"
]
}
}
},
{
"entityType": "dataproduct",
"entityUrn": "urn:li:dataProduct:my_data_product",
"changeType": "UPSERT",
"aspectName": "dataProductProperties",
"aspect": {
"json": {
"customProperties": {
"email": "justice@justice.gov.uk",
"retention_period_in_days": "365",
"dpia_required": "False"
},
"name": "my_data_product",
"description": "bla bla"
}
}
},
{
"entityType": "dataproduct",
"entityUrn": "urn:li:dataProduct:my_data_product",
"changeType": "UPSERT",
"aspectName": "dataProductProperties",
"aspect": {
"json": {
"customProperties": {},
"assets": [
{
"sourceUrn": "urn:li:dataProduct:my_data_product",
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)"
}
]
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table2,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {},
"description": "this is a different table",
"tags": []
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table2,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
"schemaName": "my_table2",
"platform": "urn:li:dataPlatform:glue",
"version": 1,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "boo",
"nullable": false,
"description": "spooky",
"type": {
"type": {
"com.linkedin.schema.BooleanType": {}
}
},
"nativeDataType": "boolean",
"recursive": false,
"isPartOfKey": false
},
{
"fieldPath": "yar",
"nullable": false,
"description": "shiver my timbers",
"type": {
"type": {
"com.linkedin.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false
}
]
}
}
},
{
"entityType": "domain",
"entityUrn": "urn:li:domain:legal-aid",
"changeType": "UPSERT",
"aspectName": "domainProperties",
"aspect": {
"json": {
"name": "legal-aid",
"description": ""
}
}
},
{
"entityType": "dataproduct",
"entityUrn": "urn:li:dataProduct:my_data_product",
"changeType": "UPSERT",
"aspectName": "domains",
"aspect": {
"json": {
"domains": [
"urn:li:domain:legal-aid"
]
}
}
},
{
"entityType": "dataproduct",
"entityUrn": "urn:li:dataProduct:my_data_product",
"changeType": "UPSERT",
"aspectName": "dataProductProperties",
"aspect": {
"json": {
"customProperties": {
"email": "justice@justice.gov.uk",
"retention_period_in_days": "365",
"dpia_required": "False"
},
"name": "my_data_product",
"description": "bla bla"
}
}
},
{
"entityType": "dataproduct",
"entityUrn": "urn:li:dataProduct:my_data_product",
"changeType": "UPSERT",
"aspectName": "dataProductProperties",
"aspect": {
"json": {
"customProperties": {},
"assets": [
{
"sourceUrn": "urn:li:dataProduct:my_data_product",
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table2,PROD)"
}
]
}
}
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ def table(self):
retention_period_in_days=365,
)

@pytest.fixture
def table2(self):
return TableMetadata(
name="my_table2",
description="this is a different table",
column_details=[
{"name": "boo", "type": "boolean", "description": "spooky"},
{"name": "yar", "type": "string", "description": "shiver my timbers"},
],
retention_period_in_days=1,
)

@pytest.fixture
def datahub_client(self, base_mock_graph) -> DataHubCatalogueClient:
return DataHubCatalogueClient(
Expand Down Expand Up @@ -100,6 +112,41 @@ def test_create_table_with_metadata_datahub(
base_mock_graph.sink_to_file(output_file)
check_snapshot("datahub_create_table_with_metadata.json", output_file)

def test_create_two_tables_with_metadata(
self,
datahub_client,
table,
table2,
data_product,
base_mock_graph,
tmp_path,
check_snapshot,
):
"""
Case where we create a dataset, data product and domain
"""
fqn = datahub_client.upsert_table(
metadata=table,
data_product_metadata=data_product,
location=DataLocation("my_database"),
)
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)"

assert fqn == fqn_out

fqn = datahub_client.upsert_table(
metadata=table2,
data_product_metadata=data_product,
location=DataLocation("my_database"),
)
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table2,PROD)"

assert fqn == fqn_out

output_file = Path(tmp_path / "datahub_create_table_with_metadata.json")
base_mock_graph.sink_to_file(output_file)
check_snapshot("datahub_create_two_tables_with_metadata.json", output_file)

def test_create_table_and_metadata_idempotent_datahub(
self,
datahub_client,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import pytest
from data_platform_catalogue.client import (
OpenMetadataCatalogueClient,
ReferencedEntityMissing,
)
from data_platform_catalogue.client import ReferencedEntityMissing
from data_platform_catalogue.client.openmetadata import OpenMetadataCatalogueClient
from data_platform_catalogue.entities import (
CatalogueMetadata,
DataLocation,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import pytest
from data_platform_catalogue import DataProductMetadata, TableMetadata
from data_platform_catalogue.client import OpenMetadataCatalogueClient
from data_platform_catalogue.client.openmetadata import OpenMetadataCatalogueClient
from data_platform_catalogue.entities import DataLocation

jwt_token = os.environ.get("JWT_TOKEN")
Expand Down

0 comments on commit aa5f07b

Please sign in to comment.