Skip to content

Commit

Permalink
Tweak datahub output
Browse files Browse the repository at this point in the history
- Pass a fully qualified table name ('database.table' rather than 'table') so
  that a container is created rather than allocating to the default
  container.

- Remove dummy value for mandatory platformSchema argument. Just use
  empty string.
  • Loading branch information
MatMoore committed Jan 18, 2024
1 parent 0a82461 commit 37130d2
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,13 @@ def upsert_table(
Returns:
dataset_urn: the dataset URN
"""
if location.fully_qualified_name:
name = f"{location.fully_qualified_name}.{metadata.name}"
else:
name = metadata.name

dataset_urn = mce_builder.make_dataset_urn(
platform=location.platform_id, name=f"{metadata.name}", env="PROD"
platform=location.platform_id, name=name, env="PROD"
)

dataset_properties = DatasetPropertiesClass(
Expand All @@ -232,7 +237,7 @@ def upsert_table(
platform=make_data_platform_urn(platform=location.platform_id),
version=metadata.major_version,
hash="",
platformSchema=OtherSchemaClass(rawSchema="__insert raw schema here__"),
platformSchema=OtherSchemaClass(rawSchema=""),
fields=[
SchemaFieldClass(
fieldPath=f"{column['name']}",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
Expand All @@ -14,7 +14,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
Expand All @@ -33,7 +33,7 @@
"hash": "",
"platformSchema": {
"com.linkedin.schema.OtherSchema": {
"rawSchema": "__insert raw schema here__"
"rawSchema": ""
}
},
"fields": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
Expand All @@ -14,7 +14,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
Expand All @@ -33,7 +33,7 @@
"hash": "",
"platformSchema": {
"com.linkedin.schema.OtherSchema": {
"rawSchema": "__insert raw schema here__"
"rawSchema": ""
}
},
"fields": [
Expand Down Expand Up @@ -120,7 +120,7 @@
"assets": [
{
"sourceUrn": "urn:li:dataProduct:my_data_product",
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)"
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)"
}
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_create_table_datahub(
metadata=table,
location=DataLocation(fully_qualified_name="my_database"),
)
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)"
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)"

assert fqn == fqn_out

Expand All @@ -92,7 +92,7 @@ def test_create_table_with_metadata_datahub(
data_product_metadata=data_product,
location=DataLocation("my_database"),
)
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)"
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)"

assert fqn == fqn_out

Expand Down Expand Up @@ -123,7 +123,7 @@ def test_create_table_and_metadata_idempotent_datahub(
data_product_metadata=data_product,
location=DataLocation("my_database"),
)
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_table,PROD)"
fqn_out = "urn:li:dataset:(urn:li:dataPlatform:glue,my_database.my_table,PROD)"

assert fqn == fqn_out

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,12 @@ def test_upsert_test_hierarchy():
table_fqn = client.upsert_table(
metadata=table,
data_product_metadata=data_product,
location=DataLocation("test_data_product_v1"),
location=DataLocation("test_data_product_v2"),
)
assert (
table_fqn
== "urn:li:dataset:(urn:li:dataPlatform:glue,test_data_product_v2.test_table,PROD)"
)
assert table_fqn == "urn:li:dataset:(urn:li:dataPlatform:glue,test_table,PROD)"

# Ensure data went through
assert client.graph.get_aspect(table_fqn, DatasetPropertiesClass)
Expand Down

0 comments on commit 37130d2

Please sign in to comment.