From a4971f20a9850ca1ebff2047df0997157eb7c83d Mon Sep 17 00:00:00 2001 From: Christine Berger Date: Tue, 13 Aug 2024 13:38:10 -0500 Subject: [PATCH] Moved this to pro-serv-tools repo --- _demos/programmatic_model_gen/DEMO.md | 37 -------------- .../macros/generate_inc_stream_model_sql.sql | 41 ---------------- .../scripts/create_dbt_inc_stream_models.py | 49 ------------------- 3 files changed, 127 deletions(-) delete mode 100644 _demos/programmatic_model_gen/DEMO.md delete mode 100644 _demos/programmatic_model_gen/macros/generate_inc_stream_model_sql.sql delete mode 100644 _demos/programmatic_model_gen/scripts/create_dbt_inc_stream_models.py diff --git a/_demos/programmatic_model_gen/DEMO.md b/_demos/programmatic_model_gen/DEMO.md deleted file mode 100644 index e567c35..0000000 --- a/_demos/programmatic_model_gen/DEMO.md +++ /dev/null @@ -1,37 +0,0 @@ -# Programmatic Model Generation Demo -This code demonstrates how you can use command line/terminal -and python to generate many models at one time. - -This demo was created using dbt Cloud CLI and Python 3.11. - -Python modules used: -+ subprocess: to run a macro using dbt -+ ast: to convert the results of the macro to a Python dictionary -+ pathlib: to create the directory to store the generated files in if it doesn't exist.\ - -Additionally, this was created for a specific use case to generate -models using the inc_stream package, but should be flexible enough -to customize to different use cases. - -### How to use -For visual learners, [here's a loom video](https://www.loom.com/share/daf635da1f164109bf41989b4c0e13b8?sid=bbc10104-d0c0-4189-aaa8-f4a247707cef)! - -1. **Add a macro to the project** - This macro works similar to code-gen and returns the code content - of all tables in a particular database/schema, with the option to - match table patterns or exclude tables. - - Use the file located at `macros/generate_inc_stream_model_sql.sql` within this folder. - -2. **Add a python script to the project** - The location here is absolutely optional as long as you change the script to - work with the file paths you want. This demo puts it at the same level as dbt_project.yml. - - This script will run the macro and get the results, then use - the results to create and write the files. - - Use the file located at `scripts/create_dbt_inc_stream_model.py` within this folder. - -3. **Sit back and watch the magic happen** - After ensuring you've covered customizing the "To Use" points in the script, - run the Python script and watch it do all the work! diff --git a/_demos/programmatic_model_gen/macros/generate_inc_stream_model_sql.sql b/_demos/programmatic_model_gen/macros/generate_inc_stream_model_sql.sql deleted file mode 100644 index f1a2943..0000000 --- a/_demos/programmatic_model_gen/macros/generate_inc_stream_model_sql.sql +++ /dev/null @@ -1,41 +0,0 @@ -{% macro generate_inc_stream_model_sql(database_name, schema_name, table_pattern='%', exclude='') %} - -{% set table_list = dbt_utils.get_relations_by_pattern( - schema_pattern=schema_name, - database=database_name, - table_pattern=table_pattern, - exclude=exclude -) %} - -{%- set objects = [] %} -{%- for table in table_list %} - {%- set column_response = adapter.get_columns_in_relation(adapter.get_relation( - database=table.database, - schema=table.schema, - identifier=table.identifier - )) -%} - {%- set column_names = column_response | map(attribute='name') %} - -{%- set model_sql %} -{% raw %}{{- config({%- endraw %} - materialized="incremental_stream", - unique_key=["id"] - ) - select - {%- for column in column_names %} - {{ column | lower }}, - {%- endfor %} - {% raw %}{{ incr_stream.get_stream_metadata_columns() }}{%- endraw %} - from incr_stream.stream_source({{ table.schema | lower }}, {{ table.identifier | lower }}) -{% raw %}-}}{% endraw %} -{%- endset %} - - {%- do objects.append({ - "name": "stg_" ~ table.schema | lower ~ "__" ~ table.identifier | lower ~ ".sql", - "sql": model_sql - }) %} -{%- endfor %} - -{% if execute %}{{ print(objects) }}{% endif %} - -{% endmacro %} \ No newline at end of file diff --git a/_demos/programmatic_model_gen/scripts/create_dbt_inc_stream_models.py b/_demos/programmatic_model_gen/scripts/create_dbt_inc_stream_models.py deleted file mode 100644 index 571e3c7..0000000 --- a/_demos/programmatic_model_gen/scripts/create_dbt_inc_stream_models.py +++ /dev/null @@ -1,49 +0,0 @@ -# You could also use dbtRunner that comes with dbt Core, if not using dbt Cloud -# CLI. This is written to operate using dbt Cloud CLI. - -# To use: -# 1. This is for creating multiple dbt models using the inc_stream package -# (https://hub.getdbt.com/arnoN7/incr_stream/latest/). Make sure this is -# included in your packages.yml and you have run dbt deps before you test -# the models generated. -# 2. Make sure you have the generate_inc_stream_model_sql.sql macro in your project -# 3. Make sure you are at the parent directory of your dbt project and this .py -# file is at the same level as your dbt_project.yml folder -# 4. Change these parameters if desired: -# + file_path: The folder that you want the generated files to go in. This -# does not have to exist before you run the .py -# + source_config: Make sure this is pointed to the databases and schema -# that your sources are located in. -import subprocess -import ast -import pathlib - -file_path = 'models/staging/_generated_models/' -pathlib.Path(file_path).mkdir(parents=True, exist_ok=True) - -source_config = '''{ - "database_name": "raw_tpch", - "schema_name": "tpch_sf1" -}''' - -response = subprocess.run( - ['dbt', 'run-operation', 'generate_inc_stream_model_sql', '--args', source_config], - capture_output = True, # Python >= 3.7 only - text = True # Python >= 3.7 only -) - -start = '[{' -end = '}]' -result_string = start + response.stdout.split(start)[1].split(end)[0] + end -results = ast.literal_eval(result_string) - -for result in results: - model_path = file_path + result["name"] - model_content = result["sql"] - - with open(model_path, "w") as model: - model.write(model_content) - - print('Generated model ' + model_path + '.') - -print('Done.') \ No newline at end of file