From c209c39a9de790f627474c4facb6bbbab64f551c Mon Sep 17 00:00:00 2001 From: Guilherme Botelho Date: Tue, 29 Oct 2024 18:58:14 -0300 Subject: [PATCH] Revisa tratamento GPS (#287) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * tratamento captura vazia * add package dbt-labs/dbt_utils * add flag store_failures * altera tratamento realocação * Revert "add flag store_failures" This reverts commit 30d123c8b81cb718e523ac015a160334b8e686ae. * add dbt tests * corrige filtro realocacao * add changelogs * ajuste config dos testes * altera nome dos testes * corrige nomes dos testes * fix config where dos tests e add macro custom_get_where_subquery --- .../CHANGELOG.md | 10 ++++++ .../constants.py | 36 +++++++++++++++++++ pipelines/migration/tasks.py | 6 ++++ queries/dbt_project.yml | 6 +++- queries/macros/custom_get_where_subquery.sql | 21 +++++++++++ queries/macros/not_null.sql | 10 ------ .../sppo_aux_registros_realocacao.sql | 5 +-- .../br_rj_riodejaneiro_veiculos/schema.yaml | 26 ++++++++++++++ queries/models/veiculo/CHANGELOG.md | 10 ++++++ queries/packages.yml | 4 ++- 10 files changed, 118 insertions(+), 16 deletions(-) create mode 100644 pipelines/migration/br_rj_riodejaneiro_onibus_gps/constants.py create mode 100644 queries/macros/custom_get_where_subquery.sql delete mode 100644 queries/macros/not_null.sql diff --git a/pipelines/migration/br_rj_riodejaneiro_onibus_gps/CHANGELOG.md b/pipelines/migration/br_rj_riodejaneiro_onibus_gps/CHANGELOG.md index 533de2d0b..2a34272d7 100644 --- a/pipelines/migration/br_rj_riodejaneiro_onibus_gps/CHANGELOG.md +++ b/pipelines/migration/br_rj_riodejaneiro_onibus_gps/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog - br_rj_riodejaneiro_onibus_gps +## [1.0.2] - 2024-08-25 + +### Adicionado + +- Cria arquivo `constants.py` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/287) + +### Alterado + +- Altera a task `get_raw` para verificar se a captura está vazia (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/287) + ## [1.0.1] - 2024-08-19 ### Alterado diff --git a/pipelines/migration/br_rj_riodejaneiro_onibus_gps/constants.py b/pipelines/migration/br_rj_riodejaneiro_onibus_gps/constants.py new file mode 100644 index 000000000..5a7d47b7e --- /dev/null +++ b/pipelines/migration/br_rj_riodejaneiro_onibus_gps/constants.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +""" +Constant values for rj_smtr br_rj_riodejaneiro_onibus_gps +""" + +from enum import Enum + + +class constants(Enum): # pylint: disable=c0103 + """ + Constant values for rj_smtr br_rj_riodejaneiro_onibus_gps + """ + + GPS_DATA_CHECKS_LIST = { + "gps_sppo": { + "unique_columns__gps_sppo": {"description": "Todos os registros são únicos"}, + "not_null__timestamp_gps__gps_sppo": { + "description": "Todos os registros possuem timestamp_gps não nulo" + }, + "not_null__id_veiculo__gps_sppo": { + "description": "Todos os registros possuem id_veiculo não nulo" + }, + "not_null__servico__gps_sppo": { + "description": "Todos os registros possuem servico não nulo" + }, + "not_null__latitude__gps_sppo": { + "description": "Todos os registros possuem latitude não nula" + }, + "not_null__longitude__gps_sppo": { + "description": "Todos os registros possuem longitude não nula" + }, + "not_null__status__gps_sppo": { + "description": "Todos os registros possuem servico não nulo" + }, + } + } diff --git a/pipelines/migration/tasks.py b/pipelines/migration/tasks.py index 0317246b1..d1c53e376 100644 --- a/pipelines/migration/tasks.py +++ b/pipelines/migration/tasks.py @@ -629,6 +629,12 @@ def get_raw( # pylint: disable=R0912 ) if response.ok: # status code is less than 400 + if not response.content and url in [ + constants.GPS_SPPO_API_BASE_URL_V2.value, + constants.GPS_SPPO_API_BASE_URL.value, + ]: + error = "Dados de GPS vazios" + if filetype == "json": data = response.json() diff --git a/queries/dbt_project.yml b/queries/dbt_project.yml index 0828e353b..bd594b102 100644 --- a/queries/dbt_project.yml +++ b/queries/dbt_project.yml @@ -186,6 +186,10 @@ vars: ### Encontro de Contas ### encontro_contas_modo: "" +tests: + rj_smtr: + where: "DATA BETWEEN DATE('__date_range_start__') AND DATE('__date_range_end__')" + models: +persist_docs: relation: true @@ -327,4 +331,4 @@ models: +schema: monitoramento staging: +materialized: view - +schema: monitoramento_staging + +schema: monitoramento_staging \ No newline at end of file diff --git a/queries/macros/custom_get_where_subquery.sql b/queries/macros/custom_get_where_subquery.sql new file mode 100644 index 000000000..03a368448 --- /dev/null +++ b/queries/macros/custom_get_where_subquery.sql @@ -0,0 +1,21 @@ +{% macro get_where_subquery(relation) -%} + {% set where = config.get('where') %} + {% if where %} + {% if "__date_range_start__" in where %} + {# replace placeholder string with result of custom macro #} + {% set date_range_start = var('date_range_start') %} + {% set where = where | replace("__date_range_start__", date_range_start) %} + {% endif %} + {% if "__date_range_end__" in where %} + {# replace placeholder string with result of custom macro #} + {% set date_range_end = var('date_range_end') %} + {% set where = where | replace("__date_range_end__", date_range_end) %} + {% endif %} + {%- set filtered -%} + (select * from {{ relation }} where {{ where }}) dbt_subquery + {%- endset -%} + {% do return(filtered) %} + {%- else -%} + {% do return(relation) %} + {%- endif -%} +{%- endmacro %} \ No newline at end of file diff --git a/queries/macros/not_null.sql b/queries/macros/not_null.sql deleted file mode 100644 index 95bcd6c91..000000000 --- a/queries/macros/not_null.sql +++ /dev/null @@ -1,10 +0,0 @@ -{%test not_null(model, column_name, partition_column)%} -SELECT - {{column_name}} -FROM - {{model}} -WHERE - {{partition_column}} = (SELECT MAX({{partition_column}}) FROM {{model}}) -AND - {{column_name}} is null -{%endtest%} \ No newline at end of file diff --git a/queries/models/br_rj_riodejaneiro_onibus_gps/sppo_aux_registros_realocacao.sql b/queries/models/br_rj_riodejaneiro_onibus_gps/sppo_aux_registros_realocacao.sql index d3032f49d..2ab44127b 100644 --- a/queries/models/br_rj_riodejaneiro_onibus_gps/sppo_aux_registros_realocacao.sql +++ b/queries/models/br_rj_riodejaneiro_onibus_gps/sppo_aux_registros_realocacao.sql @@ -32,10 +32,7 @@ with realocacao as ( datetime_diff(datetime_operacao, datetime_entrada, minute) between 0 and 60 and data between DATE("{{var('date_range_start')}}") and DATE(datetime_add("{{var('date_range_end')}}", interval 1 hour)) - {% if is_incremental() -%} - and datetime_operacao between datetime("{{var('date_range_start')}}") - and datetime_add("{{var('date_range_end')}}", interval 1 hour) - {%- endif -%} + and (datetime_saida >= datetime("{{var('date_range_start')}}") or datetime_operacao >= datetime("{{var('date_range_start')}}")) ), -- 2. Altera registros de GPS com servicos realocados gps as ( diff --git a/queries/models/br_rj_riodejaneiro_veiculos/schema.yaml b/queries/models/br_rj_riodejaneiro_veiculos/schema.yaml index d83be97d2..aa8eb4a7a 100644 --- a/queries/models/br_rj_riodejaneiro_veiculos/schema.yaml +++ b/queries/models/br_rj_riodejaneiro_veiculos/schema.yaml @@ -44,23 +44,46 @@ models: description: "{{ doc('versao') }}" - name: gps_sppo description: "Tabela com os dados tratados de registros de GPS do SPPO, incluindo velocidade estimada, estado de movimento, parada em terminal ou garagem e interseção com o traçado da linha informada." + tests: + - dbt_utils.unique_combination_of_columns: + name: unique_columns__gps_sppo + combination_of_columns: + - timestamp_gps + - id_veiculo + - latitude + - longitude columns: - name: modo description: "SPPO – nesse arquivo só consta esse modo" - name: timestamp_gps description: "{{ doc('timestamp_gps') }}" + tests: + - not_null: + name: not_null__timestamp_gps__gps_sppo - name: data description: "{{ doc('data_gps') }}" - name: hora description: "{{ doc('hora_gps') }}" - name: id_veiculo description: "{{ doc('id_veiculo') }}" + tests: + - not_null: + name: not_null__id_veiculo__gps_sppo - name: servico description: "{{ doc('servico') }}" + tests: + - not_null: + name: not_null__servico__gps_sppo - name: latitude description: "{{ doc('latitude_gps') }}" + tests: + - not_null: + name: not_null__latitude__gps_sppo - name: longitude description: "{{ doc('longitude_gps') }}" + tests: + - not_null: + name: not_null__longitude__gps_sppo - name: flag_em_operacao description: "{{ doc('flag_em_operacao') }}" - name: flag_em_movimento @@ -75,6 +98,9 @@ models: description: "{{ doc('flag_trajeto_correto_hist') }}" - name: status description: "{{ doc('status_veiculo_gps') }}" + tests: + - not_null: + name: not_null__status__gps_sppo - name: velocidade_instantanea description: "{{ doc('velocidade_instantanea') }}" - name: velocidade_estimada_10_min diff --git a/queries/models/veiculo/CHANGELOG.md b/queries/models/veiculo/CHANGELOG.md index c3ea77f1d..6577475e9 100644 --- a/queries/models/veiculo/CHANGELOG.md +++ b/queries/models/veiculo/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog - veiculo +## [1.1.3] - 2024-10-25 + +#### Alterado + +- Altera lógica do filtro do modelo `sppo_aux_registros_realocacao.sql` (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/287) + +#### Adicionado + +- Adiciona testes do DBT no schema (https://github.com/prefeitura-rio/pipelines_rj_smtr/pull/287) + ## [1.1.2] - 2024-04-25 #### Adicionado diff --git a/queries/packages.yml b/queries/packages.yml index acd98134c..f6f4a01c5 100644 --- a/queries/packages.yml +++ b/queries/packages.yml @@ -2,4 +2,6 @@ packages: - package: dbt-labs/audit_helper version: 0.12.0 - package: data-mie/dbt_profiler - version: 0.8.2 \ No newline at end of file + version: 0.8.2 + - package: dbt-labs/dbt_utils + version: 1.3.0 \ No newline at end of file