-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DBT-556 Added support for materializing Kudu table through impala ada…
…pter (#207) * DBT-556 Added support for materializing Kudu table through impala adapter. * DBT-556 Addressed review comment. * DBT-556 Incorporated a review comment in CONTRIBUTING.md file. * DBT-556 Updated README.md with correct set of available tests.
- Loading branch information
1 parent
eb66439
commit df1d2d9
Showing
7 changed files
with
246 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Kudu Integration using dbt-impala | ||
|
||
The `dbt-impala` adapter allows you to use [dbt](https://www.getdbt.com/) along with [Apache Kudu](https://kudu.apache.org) and [Cloudera Data Platform](https://cloudera.com) | ||
|
||
|
||
## Getting started | ||
|
||
- [Install dbt](https://docs.getdbt.com/docs/installation) | ||
- Read the [introduction](https://docs.getdbt.com/docs/introduction/) and [viewpoint](https://docs.getdbt.com/docs/about/viewpoint/) | ||
|
||
### Requirements | ||
|
||
- In a CDP public cloud deployment, Kudu is available as one of the many Cloudera Runtime services within the Real-time Data Mart template. | ||
- To use Kudu, you can create a Data Hub cluster by selecting Real-time Data Mart template template in the Management Console. | ||
- Follow this [article](https://blog.cloudera.com/integrating-cloudera-data-warehouse-with-kudu-clusters) on integrating the created Kudu service with Impala CDW. | ||
|
||
|
||
For general instructions, please follow [Readme](README.md) guidelines. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
# Copyright 2024 Cloudera Inc. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import pytest | ||
import os | ||
from dbt.tests.util import run_dbt, relation_from_name, check_relations_equal | ||
|
||
from dbt.tests.adapter.basic.test_incremental import ( | ||
BaseIncremental, | ||
BaseIncrementalNotSchemaChange, | ||
) | ||
|
||
from dbt.tests.adapter.basic.files import ( | ||
schema_base_yml, | ||
model_incremental, | ||
) | ||
|
||
pytestmark = pytest.mark.skipif( | ||
os.getenv(key="DISABLE_KUDU_TEST", default="true") == "true", | ||
reason="Kudu tests will be run when DISABLE_KUDU_TEST is set to false in test.env", | ||
) | ||
|
||
incremental_kudu_sql = ( | ||
""" | ||
{{ | ||
config( | ||
materialized="incremental", | ||
stored_as="kudu", | ||
primary_key="(id)" | ||
) | ||
}} | ||
""".strip() | ||
+ model_incremental | ||
) | ||
|
||
|
||
class TestIncrementalKudu(BaseIncremental): | ||
@pytest.fixture(scope="class") | ||
def project_config_update(self): | ||
return {"name": "incremental_test_model"} | ||
|
||
@pytest.fixture(scope="class") | ||
def models(self): | ||
return {"incremental_test_model.sql": incremental_kudu_sql, "schema.yml": schema_base_yml} | ||
|
||
def test_incremental(self, project): | ||
# seed command | ||
results = run_dbt(["seed"]) | ||
assert len(results) == 2 | ||
|
||
# base table rowcount | ||
relation = relation_from_name(project.adapter, "base") | ||
result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") | ||
assert result[0] == 10 | ||
|
||
# added table rowcount | ||
relation = relation_from_name(project.adapter, "added") | ||
result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one") | ||
assert result[0] == 20 | ||
|
||
# run command | ||
# the "seed_name" var changes the seed identifier in the schema file | ||
results = run_dbt(["run", "--vars", "seed_name: base"]) | ||
assert len(results) == 1 | ||
|
||
# check relations equal | ||
check_relations_equal(project.adapter, ["base", "incremental_test_model"]) | ||
|
||
# change seed_name var | ||
# the "seed_name" var changes the seed identifier in the schema file | ||
results = run_dbt(["run", "--vars", "seed_name: added"]) | ||
assert len(results) == 1 | ||
|
||
# check relations equal | ||
check_relations_equal(project.adapter, ["added", "incremental_test_model"]) | ||
|
||
# run full-refresh and compare with base table again | ||
results = run_dbt( | ||
[ | ||
"run", | ||
"--select", | ||
"incremental_test_model", | ||
"--full-refresh", | ||
"--vars", | ||
"seed_name: base", | ||
] | ||
) | ||
assert len(results) == 1 | ||
|
||
check_relations_equal(project.adapter, ["base", "incremental_test_model"]) | ||
|
||
# get catalog from docs generate | ||
catalog = run_dbt(["docs", "generate"]) | ||
assert len(catalog.nodes) == 3 | ||
assert len(catalog.sources) == 1 | ||
|
||
|
||
insertoverwrite_sql = """ | ||
{{ | ||
config( | ||
materialized="incremental", | ||
incremental_strategy="insert_overwrite", | ||
partition_by="id_partition", | ||
stored_as="kudu", | ||
primary_key="(id)" | ||
) | ||
}} | ||
select *, id as id_partition from {{ source('raw', 'seed') }} | ||
{% if is_incremental() %} | ||
where id > (select max(id) from {{ this }}) | ||
{% endif %} | ||
""".strip() | ||
|
||
|
||
@pytest.mark.skip(reason="Need to fix partition by syntax for Kudu") | ||
class TestInsertoverwriteKudu(TestIncrementalKudu): | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
return {"incremental_test_model.sql": insertoverwrite_sql, "schema.yml": schema_base_yml} | ||
|
||
|
||
incremental_single_partitionby_sql = """ | ||
{{ | ||
config( | ||
materialized="incremental", | ||
partition_by="id_partition", | ||
stored_as="kudu", | ||
primary_key="(id)" | ||
) | ||
}} | ||
select *, id as id_partition from {{ source('raw', 'seed') }} | ||
{% if is_incremental() %} | ||
where id > (select max(id) from {{ this }}) | ||
{% endif %} | ||
""".strip() | ||
|
||
|
||
@pytest.mark.skip(reason="Need to fix partition by syntax for Kudu") | ||
class TestIncrementalWithSinglePartitionKeyKudu(TestIncrementalKudu): | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
return { | ||
"incremental_test_model.sql": incremental_single_partitionby_sql, | ||
"schema.yml": schema_base_yml, | ||
} | ||
|
||
|
||
incremental_multiple_partitionby_sql = """ | ||
{{ | ||
config( | ||
materialized="incremental", | ||
partition_by=["id_partition1", "id_partition2"], | ||
stored_as="kudu", | ||
primary_key="(id)" | ||
) | ||
}} | ||
select *, id as id_partition1, id as id_partition2 from {{ source('raw', 'seed') }} | ||
{% if is_incremental() %} | ||
where id > (select max(id) from {{ this }}) | ||
{% endif %} | ||
""".strip() | ||
|
||
|
||
@pytest.mark.skip(reason="Need to fix partition by syntax for Kudu") | ||
class TestIncrementalWithMultiplePartitionKeyKudu(TestIncrementalKudu): | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
return { | ||
"incremental_test_model.sql": incremental_multiple_partitionby_sql, | ||
"schema.yml": schema_base_yml, | ||
} |