Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QUERIES] Adiciona o conteúdo do repositório queries-rj-smtr #3

Merged
merged 1 commit into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added queries/analysis/.gitkeep
Empty file.
187 changes: 176 additions & 11 deletions queries/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,191 @@
name: 'queries'
version: '1.0.0'
name: "rj_smtr"
version: "1.0.0"
config-version: 2

profile: 'queries'
profile: "queries"

model-paths: ["models"]
analysis-paths: ["analyses"]
source-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
seed-paths: ["seeds"]
data-paths: ["data"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

clean-targets: # directories to be removed by `dbt clean`
target-path: "target"
clean-targets:
- "target"
- "dbt_packages"
- "dbt_modules"

vars:
### Date default variables ###
date_range_start: "2022-01-01T00:00:00"
date_range_end: "2022-01-01T01:00:00"
start_date: "2022-01-01T01:00:00"
end_date: "2022-01-01T01:00:00"
run_date: "2022-01-01T01:00:00"

### Version default variable (replaced on deploy) ###
version: ""

### GPS ###
brt_terminais: "rj-smtr.br_rj_riodejaneiro_transporte.estacoes_e_terminais_brt" # aux_registros_parada
brt_registros_staging: rj-smtr-staging.br_rj_riodejaneiro_brt_gps_staging.registros
limites_caixa: "rj-smtr.br_rj_riodejaneiro_geo.limites_geograficos_caixa" # registros_filtrada
linhas_sppo: "rj-smtr.br_rj_riodejaneiro_transporte.linhas_sppo" # registros_agg_data_hora_consorcio
polygon_garagem: "rj-smtr.br_rj_riodejaneiro_geo.garagens_polygon" # aux_registros_parada
sppo_terminais: "rj-smtr.br_rj_riodejaneiro_transporte.terminais_onibus_coordenadas" # aux_registros_parada
sppo_registros_staging: "rj-smtr-staging.br_rj_riodejaneiro_onibus_gps_staging.registros"
sppo_realocacao_staging: "rj-smtr-staging.br_rj_riodejaneiro_onibus_gps_staging.realocacao"
data_inicio_realocacao: "2022-11-15"

# Parametros de intersecção do ônibus com rota
## Tamanho do buffer da rota
tamanho_buffer_metros: 500 # flag_trajeto_correto
## Intervalo máximo que um veículo pode ficar fora da rota para ser considerado
## dentro da rota. Afeta a flag flag_trajeto_correto_hist
intervalo_max_desvio_segundos: 600 # flag_trajeto_correto
## Tamanho da janela de tempo (em segundos) para cálculo da média móvel de velocidade do veículo
janela_movel_velocidade: 600 # aux_registros_velocidade
## Velocidade máxima média que o veículo pode atingir para evitar outliers provenientes de túneis
velocidade_maxima: 60
## Velocidade mínima para que o carro seja considerado em movimento em aux_registros_velocidade
velocidade_limiar_parado: 3
## Distância mínima para que o veículo seja identificado parado em um terminal ou garagem em aux_registros_parada
distancia_limiar_parada: 250

### [ANTIGO] SIGMOB ###

# Modal SPPO (ônibus)
sppo_id_modal_smtr: ["'22'", "'23'", "'O'"]

# Modal BRT
brt_id_modal_smtr: ["'20'", "'B'"]

# data_versao fixada para operações que envolvem o uso do SIGMOB
versao_fixa_sigmob: "2022-06-10"

data_inclusao_agency: "2021-08-03"
data_inclusao_stop_times: "2021-08-03"
data_inclusao_linhas: "2021-08-03"
data_inclusao_routes: "2021-08-03"
data_inclusao_trips: "2021-08-03"
data_inclusao_shapes: "2021-08-24"
data_inclusao_stops: "2021-08-24"
data_inclusao_calendar: "2021-09-30"
data_inclusao_frota_determinada: "2021-09-30"
data_inclusao_stop_details: "2021-09-30"
data_inclusao_holidays: "2021-11-05"

agency_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.agency"
calendar_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.calendar"
frota_determinada_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.frota_determinada"
holidays_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.holidays"
linhas_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.linhas"
routes_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.routes"
shapes_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.shapes"
shapes_geom_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.shapes_geom"
stop_times_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.stop_times"
stop_details_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.stop_details"
stops_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.stops"
trips_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.trips"

## GTFS
data_versao_gtfs: "YYYY-MM-DD"

### Subsídio SPPO (Ônibus) ###
buffer: 500 # distância em metros para buffer
perc_conformidade_distancia_min: 0
perc_conformidade_shape_min: 80
perc_conformidade_registros_min: 50
perc_distancia_total_subsidio: 80
quadro_horario: "`rj-smtr-staging.projeto_subsidio_sppo_staging.quadro_horario`"
subsidio_shapes: "`rj-smtr-staging.projeto_subsidio_sppo_staging.shapes`"
subsidio_trips: "`rj-smtr-staging.projeto_subsidio_sppo_staging.trips`"
subsidio_parametros: "`rj-smtr-staging.dashboard_subsidio_sppo_staging.subsidio_parametros`"
shapes_version: "YYYY-MM-DD"
frequencies_version: "YYYY-MM-DD"
DATA_SUBSIDIO_V2_INICIO: "2023-01-16"
DATA_SUBSIDIO_V3_INICIO: "2023-07-16"
# valor_subsidio: "`rj-smtr-dev.projeto_subsidio_sppo.valor_subsidio`"

# Recursos #
recurso_staging: "rj-smtr-staging.projeto_subsidio_sppo_staging.recurso"
recurso_prazo: "rj-smtr.projeto_subsidio_sppo.recurso_prazo"
recurso_julgamento: "rj-smtr.projeto_subsidio_sppo.recurso_julgamento"
recurso_viagem_start: "2022-07-01 00:00:00"
recurso_viagem_end: "2022-07-15 00:00:00"
recurso_timestamp_captura: "2022-11-04T14:17:00"
perc_conformidade_distancia_recurso_min: 50

### Veiculos ###
sppo_licenciamento_stu_staging: "rj-smtr-staging.veiculo_staging.sppo_licenciamento_stu"
sppo_licenciamento_solicitacao_staging: "rj-smtr-staging.veiculo_staging.sppo_licenciamento_solicitacao"
sppo_infracao_staging: "rj-smtr-staging.veiculo_staging.sppo_infracao"
sppo_licenciamento_solicitacao_data_versao: "2023-02-06"
stu_data_versao: ""

### RDO ###
rho_registros_sppo_staging: "rj-smtr-staging.br_rj_riodejaneiro_rdo_staging.rho_registros_sppo"
rho_max_processing_interval: 7
rho_sppo_start_date: "2021-03-01"

### Bilhetagem ###
var_percentual_tarifa_cbd: 0.04

### Diretorios ###
ids_consorcios:
{
"'221000014'": "'6'",
"'221000023'": "'4'",
"'221000032'": "'3'",
"'221000041'": "'5'",
"'221000050'": "'1'",
}

models:
+persist_docs:
relation: true
columns: true
queries:
example:
rj_smtr:
projeto_subsidio_sppo:
+materialized: view
+schema: projeto_subsidio_sppo
br_rj_riodejaneiro_gtfs:
+materialized: incremental
+incremental_strategy: insert_overwrite
+schema: br_rj_riodejaneiro_gtfs
br_rj_riodejaneiro_sigmob:
+materialized: view
+schema: br_rj_riodejaneiro_sigmob
br_rj_riodejaneiro_onibus_gps:
+materialized: view
+schema: br_rj_riodejaneiro_onibus_gps
br_rj_riodejaneiro_brt_gps:
+materialized: view
+schema: br_rj_riodejaneiro_brt_gps
br_rj_riodejaneiro_veiculos:
+materialized: view
+schema: br_rj_riodejaneiro_veiculos
dashboard_subsidio_sppo:
+materialized: view
+schema: dashboard_subsidio_sppo
veiculo:
+materialized: view
+schema: veiculo
br_rj_riodejaneiro_rdo:
+materialized: view
+schema: br_rj_riodejaneiro_rdo
br_rj_riodejaneiro_bilhetagem:
+materialized: incremental
+schema: br_rj_riodejaneiro_bilhetagem
br_rj_riodejaneiro_bilhetagem_staging:
+materialized: view
+schema: br_rj_riodejaneiro_bilhetagem_staging
br_rj_riodejaneiro_stu:
+materialized: view
+schema: example
+schema: br_rj_riodejaneiro_stu
cadastro:
+schema: cadastro
br_rj_riodejaneiro_recurso:
+materialized: incremental
+schema: br_rj_riodejaneiro_recurso
Empty file added queries/dev/__init__.py
Empty file.
19 changes: 19 additions & 0 deletions queries/dev/profiles-example.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
default:
target: dev
outputs:
dev:
type: bigquery
method: service-account
project: rj-smtr-dev
dataset: dbt
location: US
threads: 2
keyfile: # caminho/para/sua/credencial.json
prod:
type: bigquery
method: service-account
project: rj-smtr
dataset: dbt
location: US
threads: 2
keyfile: # caminho/para/sua/credencial.json
9 changes: 9 additions & 0 deletions queries/dev/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from utils import run_dbt_model
import os

# Veja os parâmetros disponíveis da função run_dbt_model em util.py

run_dbt_model(
dataset_id="example",
table_id="my_first_dbt_model",
)
62 changes: 62 additions & 0 deletions queries/dev/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import os
from datetime import datetime as dt
from datetime import timedelta
import pandas as pd

from typing import Any, Dict, List, Union


def run_dbt_model(
dataset_id: str = None,
table_id: str = None,
model: str = None,
upstream: bool = None,
downstream: bool = None,
exclude: str = None,
flags: str = None,
_vars: Union[dict, List[Dict]] = None,
):
"""
Run a DBT model.
"""
run_command = "dbt run"

common_flags = "-x --profiles-dir ./dev"

if not flags:
flags = common_flags
else:
flags = common_flags + " " + flags

if not model:
model = f"{dataset_id}"
if table_id:
model += f".{table_id}"

# Set models and upstream/downstream for dbt
if model:
run_command += " --select "
if upstream:
run_command += "+"
run_command += f"{model}"
if downstream:
run_command += "+"

if exclude:
run_command += f" --exclude {exclude}"

if _vars:
if isinstance(_vars, list):
vars_dict = {}
for elem in _vars:
vars_dict.update(elem)
vars_str = f'"{vars_dict}"'
run_command += f" --vars {vars_str}"
else:
vars_str = f'"{_vars}"'
run_command += f" --vars {vars_str}"
if flags:
run_command += f" {flags}"

print(f"\n>>> RUNNING: {run_command}\n")
os.system(run_command)
Empty file added queries/macros/.gitkeep
Empty file.
28 changes: 28 additions & 0 deletions queries/macros/contained.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{% test contained(model, column_name, in, field, partition_column) %}
select *
from (
SELECT DISTINCT
{{ column_name }} contained,
_contains
FROM (
SELECT DISTINCT
{{column_name}}
FROM {{model}}
{% if execute -%}
{%- set query ="SELECT MAX(" ~ partition_column ~ ") FROM " ~ model -%}
{%- set max_partition_date = run_query(query).columns[0].values()[0] -%}
{{- log(query, info=True) -}}
{{- log(max_partition_date, info=True) -}}
{%- endif -%}
WHERE {{partition_column}} = "{{max_partition_date}}"
)
LEFT JOIN (
SELECT DISTINCT
{{ field }} _contains
FROM {{ in }}
WHERE {{partition_column}} = "{{max_partition_date}}"
)
ON {{column_name }} = _contains
)
where _contains is null
{% endtest %}
13 changes: 13 additions & 0 deletions queries/macros/generate_schema_name.sql
Original file line number Diff line number Diff line change
@@ -1 +1,14 @@
{% macro generate_schema_name(custom_schema_name, node) -%}

{%- set default_schema = target.schema -%}
{%- if custom_schema_name is none -%}

{{ default_schema }}

{%- else -%}

{{ custom_schema_name | trim }}

{%- endif -%}

{%- endmacro %}
29 changes: 29 additions & 0 deletions queries/macros/many_to_one.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{% test many_to_one(model, column_name, foreign_key, partition_column, to_table) %}
{%- if execute -%}
{%- set model_max_partition = run_query('SELECT MAX('~partition_column~') FROM '~model).columns[0].values()[0] -%}
{%- set to_table_max_partition = run_query('SELECT MAX('~partition_column~') FROM '~to_table).columns[0].values()[0] -%}
{%- endif -%}
with t as (
SELECT
m.{{column_name}} from_col,
n.{{column_name}} to_col
FROM (
SELECT
{{column_name}}
FROM {{model}}
WHERE {{partition_column}} = "{{model_max_partition}}"
) m
LEFT JOIN (
SELECT
{{column_name}}
FROM {{to_table}}
WHERE {{partition_column}} = "{{to_table_max_partition}}"
) n
ON m.{{column_name}} = n.{{column_name}}
)

SELECT
*
FROM t
WHERE to_col is null
{% endtest %}
10 changes: 10 additions & 0 deletions queries/macros/not_null.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{%test not_null(model, column_name, partition_column)%}
SELECT
{{column_name}}
FROM
{{model}}
WHERE
{{partition_column}} = (SELECT MAX({{partition_column}}) FROM {{model}})
AND
{{column_name}} is null
{%endtest%}
Loading
Loading