Skip to content

Commit

Permalink
Test cases where we create data product, domain, table at the same time
Browse files Browse the repository at this point in the history
  • Loading branch information
MatMoore committed Jan 17, 2024
1 parent 53befcb commit 6a43861
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def upsert_data_product(self, metadata: DataProductMetadata):
"""
Define a data product. Must belong to a domain
"""
metadata_dict = vars(metadata)
metadata_dict = dict(metadata.__dict__)
metadata_dict.pop("version")
metadata_dict.pop("owner")
metadata_dict.pop("tags")
Expand Down Expand Up @@ -550,6 +550,7 @@ def upsert_table( # type: ignore[override]
data_product_metadata.name.split()
)
data_product_exists = self.graph.exists(entity_urn=data_product_urn)

if not data_product_exists:
data_product_urn = self.upsert_data_product(
metadata=data_product_metadata
Expand All @@ -564,10 +565,10 @@ def upsert_table( # type: ignore[override]
)

if (
data_product_existing_properties.assets # pyright: ignore[reportOptionalMemberAccess]
is not None
data_product_existing_properties is not None
and data_product_existing_properties.assets is not None
):
assets = data_product_existing_properties.assets.append( # pyright: ignore[reportOptionalMemberAccess]
assets = data_product_existing_properties.assets.append(
data_product_association
)
else:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
[
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
"schemaName": "my_table",
"platform": "urn:li:dataPlatform:glue",
"version": 1,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.schema.OtherSchema": {
"rawSchema": "__insert raw schema here__"
}
},
"fields": [
{
"fieldPath": "foo",
"nullable": false,
"description": "a",
"type": {
"type": {
"com.linkedin.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false
},
{
"fieldPath": "bar",
"nullable": false,
"description": "b",
"type": {
"type": {
"com.linkedin.schema.NumberType": {}
}
},
"nativeDataType": "int",
"recursive": false,
"isPartOfKey": false
}
]
}
}
},
{
"entityType": "domain",
"entityUrn": "urn:li:domain:legal-aid",
"changeType": "UPSERT",
"aspectName": "domainProperties",
"aspect": {
"json": {
"name": "legal-aid",
"description": ""
}
}
},
{
"entityType": "dataproduct",
"entityUrn": "urn:li:dataProduct:my_data_product",
"changeType": "UPSERT",
"aspectName": "domains",
"aspect": {
"json": {
"domains": [
"urn:li:domain:legal-aid"
]
}
}
},
{
"entityType": "dataproduct",
"entityUrn": "urn:li:dataProduct:my_data_product",
"changeType": "UPSERT",
"aspectName": "dataProductProperties",
"aspect": {
"json": {
"customProperties": {
"email": "justice@justice.gov.uk",
"retention_period_in_days": "365",
"dpia_required": "False"
},
"name": "my_data_product",
"description": "bla bla"
}
}
},
{
"entityType": "dataproduct",
"entityUrn": "urn:li:dataProduct:my_data_product",
"changeType": "UPSERT",
"aspectName": "dataProductProperties",
"aspect": {
"json": {
"customProperties": {},
"assets": [
{
"sourceUrn": "urn:li:dataProduct:my_data_product",
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)"
}
]
}
}
}
]
69 changes: 66 additions & 3 deletions python-libraries/data-platform-catalogue/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
from pathlib import Path

import pytest
Expand All @@ -13,7 +12,6 @@
TableMetadata,
)
from datahub.api.entities.dataproduct.dataproduct import DataProduct
from datahub.configuration.common import OperationalError
from freezegun import freeze_time
from tests.test_helpers.graph_helpers import MockDataHubGraph
from tests.test_helpers.mce_helpers import check_golden_file
Expand Down Expand Up @@ -365,7 +363,6 @@ def test_create_table_omd(self, request, omd_client, requests_mock, table):

def test_create_table_datahub(
self,
request,
datahub_client,
base_mock_graph,
table,
Expand All @@ -388,6 +385,72 @@ def test_create_table_datahub(
last_snapshot = Path(test_snapshots_dir / "datahub_create_table.json")
check_golden_file(pytestconfig, output_file, last_snapshot)

def test_create_table_with_metadata_datahub(
self,
datahub_client,
table,
data_product,
base_mock_graph,
tmp_path,
test_snapshots_dir,
pytestconfig,
):
"""
Test that the contract with DataHubGraph has not changed, using a mock.
If so, then the final metadata graph should match the snapshot in
snapshots/datahub_create_table_with_metadata.json.
This version of the method upserts the domain, data product and table in one step.
"""
fqn = datahub_client.upsert_table(
metadata=table, data_product_metadata=data_product
)
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)"

assert fqn == fqn_out

output_file = Path(tmp_path / "datahub_create_table.json")
base_mock_graph.sink_to_file(output_file)
last_snapshot = Path(
test_snapshots_dir / "datahub_create_table_with_metadata.json"
)
check_golden_file(pytestconfig, output_file, last_snapshot)

def test_create_table_and_metadata_idempotent_datahub(
self,
datahub_client,
table,
data_product,
base_mock_graph,
tmp_path,
test_snapshots_dir,
pytestconfig,
):
"""
Test that the contract with DataHubGraph has not changed, using a mock.
If so, then the final metadata graph should match the snapshot in
snapshots/datahub_create_table_with_metadata.json.
This should work even if the entities already exist in the metadata graph.
"""
datahub_client.upsert_table(metadata=table, data_product_metadata=data_product)

fqn = datahub_client.upsert_table(
metadata=table, data_product_metadata=data_product
)
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)"

assert fqn == fqn_out

output_file = Path(tmp_path / "datahub_create_table.json")
base_mock_graph.sink_to_file(output_file)
last_snapshot = Path(
test_snapshots_dir / "datahub_create_table_with_metadata.json"
)
check_golden_file(pytestconfig, output_file, last_snapshot)

def test_404_handling_omd(self, request, omd_client, requests_mock, table):
requests_mock.put(
"http://example.com/api/v1/tables",
Expand Down

0 comments on commit 6a43861

Please sign in to comment.