diff --git a/pipelines/migration/br_rj_riodejaneiro_gtfs/CHANGELOG.md b/pipelines/migration/br_rj_riodejaneiro_gtfs/CHANGELOG.md index 2a4b55ad..2df6721a 100644 --- a/pipelines/migration/br_rj_riodejaneiro_gtfs/CHANGELOG.md +++ b/pipelines/migration/br_rj_riodejaneiro_gtfs/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog - gtfs +## Adicionado + +## [1.0.6] - 2024-08-02 + +- Adiciona filtro para os nomes de tabs da planilha de controle os na task `get_raw_drive_files` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/128/files) + +- Adiociona etapa de remover pontos antes da converção de metro para km no processamento da OS (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/129) + ## Corrigido ## [1.0.5] - 2024-07-23 diff --git a/pipelines/migration/br_rj_riodejaneiro_gtfs/tasks.py b/pipelines/migration/br_rj_riodejaneiro_gtfs/tasks.py index 04488ed1..b40d1251 100644 --- a/pipelines/migration/br_rj_riodejaneiro_gtfs/tasks.py +++ b/pipelines/migration/br_rj_riodejaneiro_gtfs/tasks.py @@ -188,6 +188,8 @@ def get_raw_drive_files(os_control, local_filepath: list, regular_sheet_index: i # Salva os nomes das planilhas sheetnames = xl.load_workbook(file_bytes_os).sheetnames + sheetnames = [name for name in sheetnames if "ANEXO" in name] + log(f"tabs encontradas na planilha Controle OS: {sheetnames}") with zipfile.ZipFile(file_bytes_gtfs, "r") as zipped_file: for filename in list(constants.GTFS_TABLE_CAPTURE_PARAMS.value.keys()): diff --git a/pipelines/migration/br_rj_riodejaneiro_gtfs/utils.py b/pipelines/migration/br_rj_riodejaneiro_gtfs/utils.py index 18f2a20a..fa3cd8d1 100644 --- a/pipelines/migration/br_rj_riodejaneiro_gtfs/utils.py +++ b/pipelines/migration/br_rj_riodejaneiro_gtfs/utils.py @@ -270,6 +270,8 @@ def processa_ordem_servico( extensao_cols = ["extensao_ida", "extensao_volta"] quadro[extensao_cols] = quadro[extensao_cols].astype(str) + for col in extensao_cols: + quadro[col] = quadro[col].str.replace(".", "", regex=False) quadro[extensao_cols] = quadro[extensao_cols].apply(pd.to_numeric) quadro["extensao_ida"] = quadro["extensao_ida"] / 1000 diff --git a/queries/dbt_project.yml b/queries/dbt_project.yml index 670301a9..8070cd7d 100644 --- a/queries/dbt_project.yml +++ b/queries/dbt_project.yml @@ -291,4 +291,7 @@ models: subsidio: +materialized: incremental +incremental_strategy: insert_overwrite - +schema: subsidio \ No newline at end of file + +schema: subsidio + catalogo: + +materialized: view + +schema: catalogo diff --git a/queries/dev/utils.py b/queries/dev/utils.py index db021da6..439e0ce2 100644 --- a/queries/dev/utils.py +++ b/queries/dev/utils.py @@ -5,6 +5,8 @@ # from datetime import timedelta from typing import Dict, List, Union +import requests + # import pandas as pd @@ -62,3 +64,16 @@ def run_dbt_model( print(f"\n>>> RUNNING: {run_command}\n") os.system(run_command) + + +def fetch_dataset_sha(dataset_id: str): + """Fetches the SHA of a branch from Github""" + url = "https://api.github.com/repos/prefeitura-rio/queries-rj-smtr" + url += f"/commits?queries-rj-smtr/rj_smtr/{dataset_id}" + response = requests.get(url) + + if response.status_code != 200: + return None + + dataset_version = response.json()[0]["sha"] + return {"version": dataset_version} diff --git a/queries/macros/get_models_with_tags.sql b/queries/macros/get_models_with_tags.sql new file mode 100644 index 00000000..f68dce50 --- /dev/null +++ b/queries/macros/get_models_with_tags.sql @@ -0,0 +1,18 @@ +/* https://discourse.getdbt.com/t/get-all-dbt-table-model-names-from-a-tag-inside-another-model/7703 (modificado) */ +{% macro get_models_with_tags(tags) %} + +{% set models_with_tag = [] %} + +{% for model in graph.nodes.values() | selectattr("resource_type", "equalto", "model") %} + + {% for tag in tags %} + {% if tag in model.config.tags %} + {{ models_with_tag.append(model) }} + {% endif %} + {% endfor %} + +{% endfor %} + +{{ return(models_with_tag) }} + +{% endmacro %} \ No newline at end of file diff --git a/queries/models/br_rj_riodejaneiro_bilhetagem/CHANGELOG.md b/queries/models/br_rj_riodejaneiro_bilhetagem/CHANGELOG.md index a74e347f..defd4fb6 100644 --- a/queries/models/br_rj_riodejaneiro_bilhetagem/CHANGELOG.md +++ b/queries/models/br_rj_riodejaneiro_bilhetagem/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog - bilhetagem +## [2.1.4] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` aos modelos `gps_validador_van.sql` e `gps_validador.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) +- Adiciona tag `identificacao` ao modelo `staging_cliente.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) + ## [2.1.3] - 2024-07-18 ### Adicionado diff --git a/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador.sql b/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador.sql index ce3b4b71..9ec4fec0 100644 --- a/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador.sql +++ b/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador.sql @@ -6,6 +6,7 @@ "data_type":"date", "granularity": "day" }, + tags=['geolocalizacao'] ) }} diff --git a/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador_van.sql b/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador_van.sql index 84aaa992..601cd3ae 100644 --- a/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador_van.sql +++ b/queries/models/br_rj_riodejaneiro_bilhetagem/gps_validador_van.sql @@ -6,6 +6,7 @@ "data_type":"date", "granularity": "day" }, + tags=['geolocalizacao'] ) }} diff --git a/queries/models/br_rj_riodejaneiro_bilhetagem_staging/staging_cliente.sql b/queries/models/br_rj_riodejaneiro_bilhetagem_staging/staging_cliente.sql index 7b9c0d93..4feda28c 100644 --- a/queries/models/br_rj_riodejaneiro_bilhetagem_staging/staging_cliente.sql +++ b/queries/models/br_rj_riodejaneiro_bilhetagem_staging/staging_cliente.sql @@ -1,6 +1,7 @@ {{ config( alias='cliente', + tags=['identificacao'] ) }} diff --git a/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/CHANGELOG.md b/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/CHANGELOG.md index d1b45343..3cf994d5 100644 --- a/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/CHANGELOG.md +++ b/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog - onibus_gps_zirix +## [1.0.3] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` ao modelo `gps_sppo_zirix.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) + ## [1.0.2] - 2024-07-02 ### Adicionado diff --git a/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/gps_sppo_zirix.sql b/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/gps_sppo_zirix.sql index 0c2ac39b..6990d828 100644 --- a/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/gps_sppo_zirix.sql +++ b/queries/models/br_rj_riodejaneiro_onibus_gps_zirix/gps_sppo_zirix.sql @@ -6,7 +6,8 @@ 'data_type':'date', 'granularity': 'day' }, - alias='gps_sppo' + alias='gps_sppo', + tags=['geolocalizacao'] ) }} /* diff --git a/queries/models/br_rj_riodejaneiro_veiculos/CHANGELOG.md b/queries/models/br_rj_riodejaneiro_veiculos/CHANGELOG.md new file mode 100644 index 00000000..946c270f --- /dev/null +++ b/queries/models/br_rj_riodejaneiro_veiculos/CHANGELOG.md @@ -0,0 +1,6 @@ +# Changelog - br_rj_riodejaneiro_veiculos + +## [1.0.1] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` aos modelos `gps_brt.sql` e `gps_sppo.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) \ No newline at end of file diff --git a/queries/models/br_rj_riodejaneiro_veiculos/gps_brt.sql b/queries/models/br_rj_riodejaneiro_veiculos/gps_brt.sql index e0d5ace8..4b19400e 100644 --- a/queries/models/br_rj_riodejaneiro_veiculos/gps_brt.sql +++ b/queries/models/br_rj_riodejaneiro_veiculos/gps_brt.sql @@ -5,7 +5,8 @@ 'field': 'data', 'data_type': 'date', 'granularity': 'day' - } + }, + tags=['geolocalizacao'] ) }} /* diff --git a/queries/models/br_rj_riodejaneiro_veiculos/gps_sppo.sql b/queries/models/br_rj_riodejaneiro_veiculos/gps_sppo.sql index 41e56c81..b62e6353 100644 --- a/queries/models/br_rj_riodejaneiro_veiculos/gps_sppo.sql +++ b/queries/models/br_rj_riodejaneiro_veiculos/gps_sppo.sql @@ -5,7 +5,8 @@ 'field':"data", 'data_type':'date', 'granularity': 'day' - } + }, + tags=['geolocalizacao'] ) }} /* diff --git a/queries/models/cadastro/CHANGELOG.md b/queries/models/cadastro/CHANGELOG.md index 49cc575a..f496cc7a 100644 --- a/queries/models/cadastro/CHANGELOG.md +++ b/queries/models/cadastro/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog - cadastro +## [1.2.1] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` ao modelo `servicos.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) +- Adiciona tag `identificacao` ao modelo `operadoras.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) + ## [1.2.0] - 2024-07-17 ### Adicionado diff --git a/queries/models/cadastro/operadoras.sql b/queries/models/cadastro/operadoras.sql index 65dcdd79..aa1c7b7c 100644 --- a/queries/models/cadastro/operadoras.sql +++ b/queries/models/cadastro/operadoras.sql @@ -1,6 +1,7 @@ {{ config( - materialized="table" + materialized="table", + tags=["identificacao"] ) }} diff --git a/queries/models/cadastro/servicos.sql b/queries/models/cadastro/servicos.sql index 667125fe..a32bd988 100644 --- a/queries/models/cadastro/servicos.sql +++ b/queries/models/cadastro/servicos.sql @@ -1,7 +1,8 @@ {{ config( - materialized='table' - ) + materialized='table', + tags=['geolocalizacao'] + ), }} SELECT diff --git a/queries/models/catalogo/ed_metadado_coluna.sql b/queries/models/catalogo/ed_metadado_coluna.sql new file mode 100644 index 00000000..6915b4ea --- /dev/null +++ b/queries/models/catalogo/ed_metadado_coluna.sql @@ -0,0 +1,20 @@ +{% if execute %} + {% set models_with_tag = get_models_with_tags(["geolocalizacao", "identificacao"]) %} + {% do log("Models: \n", info=true) %} + {% for model in models_with_tag %} + {% do log(model.schema~"."~model.alias~"\n", info=true) %} + {% endfor %} +{% endif %} + +SELECT + * +FROM + {{ ref("metadado_coluna") }} +WHERE + {% for model in models_with_tag %} + {% if not loop.first %}OR {% endif %}(dataset_id = "{{ model.schema }}" + AND table_id = "{{ model.alias }}") + {% endfor %} + + OR (dataset_id = "br_rj_riodejaneiro_stpl_gps" + AND table_id = "registros") \ No newline at end of file diff --git a/queries/models/catalogo/metadado_coluna.sql b/queries/models/catalogo/metadado_coluna.sql new file mode 100644 index 00000000..34a20e45 --- /dev/null +++ b/queries/models/catalogo/metadado_coluna.sql @@ -0,0 +1,9 @@ +SELECT + table_catalog AS project_id, + table_schema AS dataset_id, + table_name AS table_id, + column_name, + data_type, + description +FROM + rj-smtr.`region-US`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS \ No newline at end of file diff --git a/queries/models/catalogo/schema.yml b/queries/models/catalogo/schema.yml new file mode 100644 index 00000000..3b2233c7 --- /dev/null +++ b/queries/models/catalogo/schema.yml @@ -0,0 +1,33 @@ +version: 2 + +models: + - name: ed_metadado_coluna + description: "Catálogo de dados de geolocalização e identificação do data lake da SMTR destinados ao Escritório de Dados (GP/ED)" + columns: + - name: project_id + description: "{{ doc('project_id') }}" + - name: dataset_id + description: "{{ doc('dataset_id') }}" + - name: table_id + description: "{{ doc('table_id') }}" + - name: column_name + description: "{{ doc('column_name') }}" + - name: data_type + description: "{{ doc('data_type') }}" + - name: description + description: "{{ doc('metadado_descricao') }}" + - name: metadado_coluna + description: "Catálogo de dados do data lake da SMTR" + columns: + - name: project_id + description: "{{ doc('project_id') }}" + - name: dataset_id + description: "{{ doc('dataset_id') }}" + - name: table_id + description: "{{ doc('table_id') }}" + - name: column_name + description: "{{ doc('column_name') }}" + - name: data_type + description: "{{ doc('data_type') }}" + - name: description + description: "{{ doc('metadado_descricao') }}" \ No newline at end of file diff --git a/queries/models/docs.md b/queries/models/docs.md index a891861a..bfe07f98 100644 --- a/queries/models/docs.md +++ b/queries/models/docs.md @@ -32,4 +32,28 @@ Serviço realizado pelo veículo. {% docs id_viagem %} Código único identificador da viagem. +{% enddocs %} + +{% docs project_id %} +Nome do projeto (rj-smtr) +{% enddocs %} + +{% docs dataset_id %} +Nome do conjunto de dados +{% enddocs %} + +{% docs table_id %} +Nome da tabela +{% enddocs %} + +{% docs column_name %} +Nome da coluna +{% enddocs %} + +{% docs data_type %} +Tipo de dado da coluna +{% enddocs %} + +{% docs metadado_descricao %} +Descrição da coluna {% enddocs %} \ No newline at end of file diff --git a/queries/models/gtfs/CHANGELOG.md b/queries/models/gtfs/CHANGELOG.md index a153ff17..2790158e 100644 --- a/queries/models/gtfs/CHANGELOG.md +++ b/queries/models/gtfs/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog - gtfs +## [1.1.8] - 2024-08-02 + +### Alterado +- Adiciona tag `geolocalizacao` aos modelos `shapes_geom_gtfs.sql`, `shapes_gtfs.sql` e `stops_gtfs.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/127) + ## [1.1.7] - 2024-07-23 ### Adicionado diff --git a/queries/models/gtfs/shapes_geom_gtfs.sql b/queries/models/gtfs/shapes_geom_gtfs.sql index e6098470..4c84c80c 100644 --- a/queries/models/gtfs/shapes_geom_gtfs.sql +++ b/queries/models/gtfs/shapes_geom_gtfs.sql @@ -3,7 +3,8 @@ 'data_type' :'date', 'granularity': 'day' }, unique_key = ['shape_id', 'feed_start_date'], - alias = 'shapes_geom' + alias = 'shapes_geom', + tags=['geolocalizacao'] ) }} {% if execute and is_incremental() %} diff --git a/queries/models/gtfs/shapes_gtfs.sql b/queries/models/gtfs/shapes_gtfs.sql index 926e725a..c9409a45 100644 --- a/queries/models/gtfs/shapes_gtfs.sql +++ b/queries/models/gtfs/shapes_gtfs.sql @@ -3,7 +3,8 @@ 'data_type' :'date', 'granularity': 'day' }, unique_key = ['shape_id', 'shape_pt_sequence', 'feed_start_date'], - alias = 'shapes' + alias = 'shapes', + tags=['geolocalizacao'] )}} {% if execute and is_incremental() %} diff --git a/queries/models/gtfs/stops_gtfs.sql b/queries/models/gtfs/stops_gtfs.sql index 98a1b69f..38f73004 100644 --- a/queries/models/gtfs/stops_gtfs.sql +++ b/queries/models/gtfs/stops_gtfs.sql @@ -3,7 +3,8 @@ 'data_type' :'date', 'granularity': 'day' }, unique_key = ['stop_id', 'feed_start_date'], - alias = 'stops' + alias = 'stops', + tags=['geolocalizacao'] )}} {% if execute and is_incremental() %} diff --git a/queries/models/projeto_subsidio_sppo/CHANGELOG.md b/queries/models/projeto_subsidio_sppo/CHANGELOG.md index 14b408c7..cc87df5b 100644 --- a/queries/models/projeto_subsidio_sppo/CHANGELOG.md +++ b/queries/models/projeto_subsidio_sppo/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog - projeto_subsidio_sppo +## [6.0.3] - 2024-08-01 + +### Alterado + +- Alterados modelos `viagem_planejada.sql` e `subsidio_data_versao_efetiva.sql` para materializar sempre em D+0 e permitir acompanhamento pelos operadores (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/125) + ## [6.0.2] - 2024-04-22 ### Adicionado diff --git a/queries/models/projeto_subsidio_sppo/subsidio_data_versao_efetiva.sql b/queries/models/projeto_subsidio_sppo/subsidio_data_versao_efetiva.sql index 5674df59..18b6a900 100644 --- a/queries/models/projeto_subsidio_sppo/subsidio_data_versao_efetiva.sql +++ b/queries/models/projeto_subsidio_sppo/subsidio_data_versao_efetiva.sql @@ -368,9 +368,9 @@ WITH (feed_version) WHERE {% if is_incremental() %} - data = DATE_SUB(DATE("{{ var("run_date") }}"), INTERVAL 1 DAY) + data BETWEEN DATE_SUB("{{ var('run_date') }}", INTERVAL 1 DAY) AND DATE("{{ var('run_date') }}") {% else %} - data <= DATE_SUB(DATE("{{ var("run_date") }}"), INTERVAL 1 DAY) + data <= DATE("{{ var('run_date') }}") {% endif %} ) SELECT diff --git a/queries/models/projeto_subsidio_sppo/viagem_planejada.sql b/queries/models/projeto_subsidio_sppo/viagem_planejada.sql index d47c52bc..cabcc315 100644 --- a/queries/models/projeto_subsidio_sppo/viagem_planejada.sql +++ b/queries/models/projeto_subsidio_sppo/viagem_planejada.sql @@ -213,7 +213,7 @@ WITH {{ ref("subsidio_data_versao_efetiva") }} -- rj-smtr-dev.projeto_subsidio_sppo.subsidio_data_versao_efetiva WHERE - data = DATE_SUB("{{ var('run_date') }}", INTERVAL 1 DAY) ) + data BETWEEN DATE_SUB("{{ var('run_date') }}", INTERVAL 1 DAY) AND DATE("{{ var('run_date') }}")) SELECT d.data, CASE