diff --git a/queries/dbt_project.yml b/queries/dbt_project.yml index 4ae666c1..d6a5a324 100644 --- a/queries/dbt_project.yml +++ b/queries/dbt_project.yml @@ -285,4 +285,7 @@ models: +schema: validacao_dados_jae staging: +materialized: incremental - +schema: validacao_dados_jae_staging \ No newline at end of file + +schema: validacao_dados_jae_staging + catalogo: + +materialized: view + +schema: catalogo \ No newline at end of file diff --git a/queries/dev/utils.py b/queries/dev/utils.py index db021da6..439e0ce2 100644 --- a/queries/dev/utils.py +++ b/queries/dev/utils.py @@ -5,6 +5,8 @@ # from datetime import timedelta from typing import Dict, List, Union +import requests + # import pandas as pd @@ -62,3 +64,16 @@ def run_dbt_model( print(f"\n>>> RUNNING: {run_command}\n") os.system(run_command) + + +def fetch_dataset_sha(dataset_id: str): + """Fetches the SHA of a branch from Github""" + url = "https://api.github.com/repos/prefeitura-rio/queries-rj-smtr" + url += f"/commits?queries-rj-smtr/rj_smtr/{dataset_id}" + response = requests.get(url) + + if response.status_code != 200: + return None + + dataset_version = response.json()[0]["sha"] + return {"version": dataset_version} diff --git a/queries/macros/get_models_with_tags.sql b/queries/macros/get_models_with_tags.sql new file mode 100644 index 00000000..f68dce50 --- /dev/null +++ b/queries/macros/get_models_with_tags.sql @@ -0,0 +1,18 @@ +/* https://discourse.getdbt.com/t/get-all-dbt-table-model-names-from-a-tag-inside-another-model/7703 (modificado) */ +{% macro get_models_with_tags(tags) %} + +{% set models_with_tag = [] %} + +{% for model in graph.nodes.values() | selectattr("resource_type", "equalto", "model") %} + + {% for tag in tags %} + {% if tag in model.config.tags %} + {{ models_with_tag.append(model) }} + {% endif %} + {% endfor %} + +{% endfor %} + +{{ return(models_with_tag) }} + +{% endmacro %} \ No newline at end of file diff --git a/queries/models/br_rj_riodejaneiro_bilhetagem/CHANGELOG.md b/queries/models/br_rj_riodejaneiro_bilhetagem/CHANGELOG.md index a74e347f..defd4fb6 100644 --- a/queries/models/br_rj_riodejaneiro_bilhetagem/CHANGELOG.md +++ b/queries/models/br_rj_riodejaneiro_bilhetagem/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog - bilhetagem +## [2.1.4] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` aos modelos `gps_validador_van.sql` e `gps_validador.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) +- Adiciona tag `identificacao` ao modelo `staging_cliente.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) + ## [2.1.3] - 2024-07-18 ### Adicionado diff --git a/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador.sql b/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador.sql index ce3b4b71..9ec4fec0 100644 --- a/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador.sql +++ b/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador.sql @@ -6,6 +6,7 @@ "data_type":"date", "granularity": "day" }, + tags=['geolocalizacao'] ) }} diff --git a/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador_van.sql b/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador_van.sql index 84aaa992..601cd3ae 100644 --- a/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador_van.sql +++ b/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador_van.sql @@ -6,6 +6,7 @@ "data_type":"date", "granularity": "day" }, + tags=['geolocalizacao'] ) }} diff --git a/queries/models/br_rj_riodejaneiro_bilhetagem_staging/staging_cliente.sql b/queries/models/br_rj_riodejaneiro_bilhetagem_staging/staging_cliente.sql index 7b9c0d93..4feda28c 100644 --- a/queries/models/br_rj_riodejaneiro_bilhetagem_staging/staging_cliente.sql +++ b/queries/models/br_rj_riodejaneiro_bilhetagem_staging/staging_cliente.sql @@ -1,6 +1,7 @@ {{ config( alias='cliente', + tags=['identificacao'] ) }} diff --git a/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/CHANGELOG.md b/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/CHANGELOG.md index d1b45343..3cf994d5 100644 --- a/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/CHANGELOG.md +++ b/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog - onibus_gps_zirix +## [1.0.3] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` ao modelo `gps_sppo_zirix.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) + ## [1.0.2] - 2024-07-02 ### Adicionado diff --git a/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/gps_sppo_zirix.sql b/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/gps_sppo_zirix.sql index 0c2ac39b..6990d828 100644 --- a/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/gps_sppo_zirix.sql +++ b/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/gps_sppo_zirix.sql @@ -6,7 +6,8 @@ 'data_type':'date', 'granularity': 'day' }, - alias='gps_sppo' + alias='gps_sppo', + tags=['geolocalizacao'] ) }} /* diff --git a/queries/models/br_rj_riodejaneiro_veiculos/CHANGELOG.md b/queries/models/br_rj_riodejaneiro_veiculos/CHANGELOG.md new file mode 100644 index 00000000..946c270f --- /dev/null +++ b/queries/models/br_rj_riodejaneiro_veiculos/CHANGELOG.md @@ -0,0 +1,6 @@ +# Changelog - br_rj_riodejaneiro_veiculos + +## [1.0.1] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` aos modelos `gps_brt.sql` e `gps_sppo.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) \ No newline at end of file diff --git a/queries/models/br_rj_riodejaneiro_veiculos/gps_brt.sql b/queries/models/br_rj_riodejaneiro_veiculos/gps_brt.sql index e0d5ace8..4b19400e 100644 --- a/queries/models/br_rj_riodejaneiro_veiculos/gps_brt.sql +++ b/queries/models/br_rj_riodejaneiro_veiculos/gps_brt.sql @@ -5,7 +5,8 @@ 'field': 'data', 'data_type': 'date', 'granularity': 'day' - } + }, + tags=['geolocalizacao'] ) }} /* diff --git a/queries/models/br_rj_riodejaneiro_veiculos/gps_sppo.sql b/queries/models/br_rj_riodejaneiro_veiculos/gps_sppo.sql index 41e56c81..b62e6353 100644 --- a/queries/models/br_rj_riodejaneiro_veiculos/gps_sppo.sql +++ b/queries/models/br_rj_riodejaneiro_veiculos/gps_sppo.sql @@ -5,7 +5,8 @@ 'field':"data", 'data_type':'date', 'granularity': 'day' - } + }, + tags=['geolocalizacao'] ) }} /* diff --git a/queries/models/cadastro/CHANGELOG.md b/queries/models/cadastro/CHANGELOG.md index 49cc575a..f496cc7a 100644 --- a/queries/models/cadastro/CHANGELOG.md +++ b/queries/models/cadastro/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog - cadastro +## [1.2.1] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` ao modelo `servicos.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) +- Adiciona tag `identificacao` ao modelo `operadoras.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) + ## [1.2.0] - 2024-07-17 ### Adicionado diff --git a/queries/models/cadastro/operadoras.sql b/queries/models/cadastro/operadoras.sql index 65dcdd79..aa1c7b7c 100644 --- a/queries/models/cadastro/operadoras.sql +++ b/queries/models/cadastro/operadoras.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["identificacao"] ) }} diff --git a/queries/models/cadastro/servicos.sql b/queries/models/cadastro/servicos.sql index 667125fe..a32bd988 100644 --- a/queries/models/cadastro/servicos.sql +++ b/queries/models/cadastro/servicos.sql @@ -1,7 +1,8 @@ {{ config( - materialized='table' - ) + materialized='table', + tags=['geolocalizacao'] + ), }} SELECT diff --git a/queries/models/catalogo/ed_metadado_coluna.sql b/queries/models/catalogo/ed_metadado_coluna.sql new file mode 100644 index 00000000..6915b4ea --- /dev/null +++ b/queries/models/catalogo/ed_metadado_coluna.sql @@ -0,0 +1,20 @@ +{% if execute %} + {% set models_with_tag = get_models_with_tags(["geolocalizacao", "identificacao"]) %} + {% do log("Models: \n", info=true) %} + {% for model in models_with_tag %} + {% do log(model.schema~"."~model.alias~"\n", info=true) %} + {% endfor %} +{% endif %} + +SELECT + * +FROM + {{ ref("metadado_coluna") }} +WHERE + {% for model in models_with_tag %} + {% if not loop.first %}OR {% endif %}(dataset_id = "{{ model.schema }}" + AND table_id = "{{ model.alias }}") + {% endfor %} + + OR (dataset_id = "br_rj_riodejaneiro_stpl_gps" + AND table_id = "registros") \ No newline at end of file diff --git a/queries/models/catalogo/metadado_coluna.sql b/queries/models/catalogo/metadado_coluna.sql new file mode 100644 index 00000000..34a20e45 --- /dev/null +++ b/queries/models/catalogo/metadado_coluna.sql @@ -0,0 +1,9 @@ +SELECT + table_catalog AS project_id, + table_schema AS dataset_id, + table_name AS table_id, + column_name, + data_type, + description +FROM + rj-smtr.`region-US`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS \ No newline at end of file diff --git a/queries/models/catalogo/schema.yml b/queries/models/catalogo/schema.yml new file mode 100644 index 00000000..3b2233c7 --- /dev/null +++ b/queries/models/catalogo/schema.yml @@ -0,0 +1,33 @@ +version: 2 + +models: + - name: ed_metadado_coluna + description: "Catálogo de dados de geolocalização e identificação do data lake da SMTR destinados ao Escritório de Dados (GP/ED)" + columns: + - name: project_id + description: "{{ doc('project_id') }}" + - name: dataset_id + description: "{{ doc('dataset_id') }}" + - name: table_id + description: "{{ doc('table_id') }}" + - name: column_name + description: "{{ doc('column_name') }}" + - name: data_type + description: "{{ doc('data_type') }}" + - name: description + description: "{{ doc('metadado_descricao') }}" + - name: metadado_coluna + description: "Catálogo de dados do data lake da SMTR" + columns: + - name: project_id + description: "{{ doc('project_id') }}" + - name: dataset_id + description: "{{ doc('dataset_id') }}" + - name: table_id + description: "{{ doc('table_id') }}" + - name: column_name + description: "{{ doc('column_name') }}" + - name: data_type + description: "{{ doc('data_type') }}" + - name: description + description: "{{ doc('metadado_descricao') }}" \ No newline at end of file diff --git a/queries/models/docs.md b/queries/models/docs.md index 805d8330..56bbb32f 100644 --- a/queries/models/docs.md +++ b/queries/models/docs.md @@ -1,3 +1,27 @@ {% docs consorcio %} Consórcio ao qual o serviço pertence -{% enddocs %} \ No newline at end of file +{% enddocs %} + +{% docs project_id %} +Nome do projeto (rj-smtr) +{% enddocs %} + +{% docs dataset_id %} +Nome do conjunto de dados +{% enddocs %} + +{% docs table_id %} +Nome da tabela +{% enddocs %} + +{% docs column_name %} +Nome da coluna +{% enddocs %} + +{% docs data_type %} +Tipo de dado da coluna +{% enddocs %} + +{% docs metadado_descricao %} +Descrição da coluna +{% enddocs %} diff --git a/queries/models/gtfs/CHANGELOG.md b/queries/models/gtfs/CHANGELOG.md index a153ff17..2790158e 100644 --- a/queries/models/gtfs/CHANGELOG.md +++ b/queries/models/gtfs/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog - gtfs +## [1.1.8] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` aos modelos `shapes_geom_gtfs.sql`, `shapes_gtfs.sql` e `stops_gtfs.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) + ## [1.1.7] - 2024-07-23 ### Adicionado diff --git a/queries/models/gtfs/shapes_geom_gtfs.sql b/queries/models/gtfs/shapes_geom_gtfs.sql index e6098470..4c84c80c 100644 --- a/queries/models/gtfs/shapes_geom_gtfs.sql +++ b/queries/models/gtfs/shapes_geom_gtfs.sql @@ -3,7 +3,8 @@ 'data_type' :'date', 'granularity': 'day' }, unique_key = ['shape_id', 'feed_start_date'], - alias = 'shapes_geom' + alias = 'shapes_geom', + tags=['geolocalizacao'] ) }} {% if execute and is_incremental() %} diff --git a/queries/models/gtfs/shapes_gtfs.sql b/queries/models/gtfs/shapes_gtfs.sql index 926e725a..c9409a45 100644 --- a/queries/models/gtfs/shapes_gtfs.sql +++ b/queries/models/gtfs/shapes_gtfs.sql @@ -3,7 +3,8 @@ 'data_type' :'date', 'granularity': 'day' }, unique_key = ['shape_id', 'shape_pt_sequence', 'feed_start_date'], - alias = 'shapes' + alias = 'shapes', + tags=['geolocalizacao'] )}} {% if execute and is_incremental() %} diff --git a/queries/models/gtfs/stops_gtfs.sql b/queries/models/gtfs/stops_gtfs.sql index 98a1b69f..38f73004 100644 --- a/queries/models/gtfs/stops_gtfs.sql +++ b/queries/models/gtfs/stops_gtfs.sql @@ -3,7 +3,8 @@ 'data_type' :'date', 'granularity': 'day' }, unique_key = ['stop_id', 'feed_start_date'], - alias = 'stops' + alias = 'stops', + tags=['geolocalizacao'] )}} {% if execute and is_incremental() %}