From 0e330325283346906aa76d34ee89d4a1319def54 Mon Sep 17 00:00:00 2001 From: hellcassius Date: Fri, 22 Dec 2023 08:56:06 -0300 Subject: [PATCH] replicate constants --- pipelines/constants.py | 167 ++++++++++++++++++++++++++++++++++------- 1 file changed, 141 insertions(+), 26 deletions(-) diff --git a/pipelines/constants.py b/pipelines/constants.py index 3bc3ed39..36768a38 100644 --- a/pipelines/constants.py +++ b/pipelines/constants.py @@ -10,17 +10,7 @@ class constants(Enum): # pylint: disable=c0103 """ Constant values for the rj_smtr projects """ - # CONFIGS # - DOCKER_TAG = "AUTO_REPLACE_DOCKER_TAG" - DOCKER_IMAGE_NAME = "AUTO_REPLACE_DOCKER_IMAGE" - DOCKER_IMAGE = f"{DOCKER_IMAGE_NAME}:{DOCKER_TAG}" - GCS_FLOWS_BUCKET = "datario-public" - PREFECT_DEFAULT_PROJECT='staging' - - # AGENT LABELS # - RJ_SMTR_AGENT_LABEL = 'rj-smtr' - RJ_SMTR_DEV_AGENT_LABEL = 'rj-smtr-dev' - + # DEFAULT TIMEZONE # TIMEZONE = "America/Sao_Paulo" @@ -232,12 +222,13 @@ class constants(Enum): # pylint: disable=c0103 FROM tracking_detalhe WHERE - data_tracking BETWEEN '{start}' - AND '{end}' + id > {last_id} AND id <= {max_id} """, + "page_size": 1000, + "max_pages": 100, }, "primary_key": ["id"], - "interval_minutes": 1, + "interval_minutes": 5, } BILHETAGEM_ORDEM_PAGAMENTO_CAPTURE_PARAMS = [ @@ -283,6 +274,8 @@ class constants(Enum): # pylint: disable=c0103 BILHETAGEM_TRATAMENTO_INTERVAL = 60 + BILHETAGEM_PRIVATE_BUCKET = "rj-smtr-jae-private" + BILHETAGEM_CAPTURE_PARAMS = [ { "table_id": "linha", @@ -366,9 +359,14 @@ class constants(Enum): # pylint: disable=c0103 "database": "principal_db", "query": """ SELECT - * + o.*, + m.DS_TIPO_MODAL FROM - OPERADORA_TRANSPORTE + OPERADORA_TRANSPORTE o + LEFT JOIN + TIPO_MODAL m + ON + o.CD_TIPO_MODAL = m.CD_TIPO_MODAL WHERE DT_INCLUSAO BETWEEN '{start}' AND '{end}' @@ -378,19 +376,28 @@ class constants(Enum): # pylint: disable=c0103 "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL, }, { - "table_id": "pessoa_juridica", + "table_id": "cliente", "partition_date_only": True, "extract_params": { "database": "principal_db", "query": """ SELECT - * + c.* FROM - PESSOA_JURIDICA + CLIENTE c + JOIN + OPERADORA_TRANSPORTE o + ON + c.CD_CLIENTE = o.CD_CLIENTE + WHERE + DT_CADASTRO BETWEEN '{start}' + AND '{end}' """, }, "primary_key": ["CD_CLIENTE"], # id column to nest data on "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL, + "save_bucket_name": BILHETAGEM_PRIVATE_BUCKET, + "pre_treatment_reader_args": {"dtype": {"NR_DOCUMENTO": "object"}}, }, { "table_id": "consorcio", @@ -428,6 +435,53 @@ class constants(Enum): # pylint: disable=c0103 "primary_key": ["CD_CONSORCIO", "CD_LINHA"], # id column to nest data on "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL, }, + { + "table_id": "conta_bancaria", + "partition_date_only": True, + "extract_params": { + "database": "principal_db", + "query": """ + SELECT + c.*, + b.NM_BANCO + FROM + CONTA_BANCARIA c + JOIN + BANCO b + ON + b.NR_BANCO = c.NR_BANCO + JOIN + OPERADORA_TRANSPORTE o + ON + o.CD_CLIENTE = c.CD_CLIENTE + """, + }, + "primary_key": ["CD_CLIENTE"], # id column to nest data on + "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL, + "save_bucket_name": BILHETAGEM_PRIVATE_BUCKET, + }, + { + "table_id": "contato_pessoa_juridica", + "partition_date_only": True, + "extract_params": { + "database": "principal_db", + "query": """ + SELECT + * + FROM + CONTATO_PESSOA_JURIDICA + WHERE + DT_INCLUSAO BETWEEN '{start}' + AND '{end}' + """, + }, + "primary_key": [ + "NR_SEQ_CONTATO", + "CD_CLIENTE", + ], # id column to nest data on + "interval_minutes": BILHETAGEM_TRATAMENTO_INTERVAL, + "save_bucket_name": BILHETAGEM_PRIVATE_BUCKET, + }, ] BILHETAGEM_MATERIALIZACAO_TRANSACAO_PARAMS = { @@ -538,19 +592,70 @@ class constants(Enum): # pylint: disable=c0103 }, } + # STU + + STU_DATASET_ID = "br_rj_riodejaneiro_stu" + + STU_BUCKET_NAME = "rj-smtr-stu-private" + + STU_MODE_MAPPING = { + "1": "Táxi", + "2": "Ônibus", + "3": "Escolar", + "4": "Complementar (cabritinho)", + "6": "Fretamento", + "7": "TEC", + "8": "Van", + } + + STU_TYPE_MAPPING = [ + "Autônomo", + "Empresa", + "Cooperativa", + "Instituicao de Ensino", + "Associações", + "Autônomo Provisório", + "Contrato Público", + "Prestadora de Serviços", + ] + + STU_GENERAL_CAPTURE_PARAMS = { + "partition_date_only": True, + "source_type": "gcs", + "dataset_id": STU_DATASET_ID, + "save_bucket_name": STU_BUCKET_NAME, + } + + STU_TABLE_CAPTURE_PARAMS = [ + { + "table_id": "operadora_empresa", + "primary_key": ["Perm_Autor"], + "pre_treatment_reader_args": {"dtype": "object"}, + }, + { + "table_id": "operadora_pessoa_fisica", + "primary_key": ["Perm_Autor"], + "pre_treatment_reader_args": {"dtype": "object"}, + }, + ] + # SUBSÍDIO RECURSOS VIAGENS INDIVIDUAIS - SUBSIDIO_SPPO_RECURSOS_DATASET_ID = "br_rj_riodejaneiro_recurso" - SUBSIDIO_SPPO_RECURSO_API_BASE_URL = "https://api.movidesk.com/public/v1/tickets?" + + SUBSIDIO_SPPO_RECURSOS_DATASET_ID = "br_rj_riodejaneiro_recursos" + SUBSIDIO_SPPO_RECURSO_API_BASE_URL = "https://api.movidesk.com/public/v1/tickets" SUBSIDIO_SPPO_RECURSO_API_SECRET_PATH = "sppo_subsidio_recursos_api" - SUBSIDIO_SPPO_RECURSO_SERVICE = "serviceFull eq 'SPPO'" + SUBSIDIO_SPPO_RECURSO_SERVICE = ( + "serviceFirstLevel eq 'Viagem Individual - Recurso Viagens Subsídio'" + ) SUBSIDIO_SPPO_RECURSO_CAPTURE_PARAMS = { "partition_date_only": True, - "table_id": "recurso_sppo", + "table_id": "recursos_sppo_viagens_individuais", "dataset_id": SUBSIDIO_SPPO_RECURSOS_DATASET_ID, "extract_params": { "token": "", - "$select": "id,protocol,createdDate", - "$filter": "{dates} and serviceFull/any(serviceFull: {service})", + "$select": "id,protocol,createdDate,lastUpdate", + "$filter": "{service} and (lastUpdate ge {start} and lastUpdate lt {end} \ +or createdDate ge {start} and createdDate lt {end})", "$expand": "customFieldValues,customFieldValues($expand=items)", "$orderby": "createdDate asc", }, @@ -565,9 +670,19 @@ class constants(Enum): # pylint: disable=c0103 "upstream": True, "dbt_vars": { "date_range": { - "table_run_datetime_column_name": "data_recurso", + "table_run_datetime_column_name": "datetime_recurso", "delay_hours": 0, }, "version": {}, }, } + + DIRETORIO_MATERIALIZACAO_PARAMS = { + "dataset_id": "cadastro", + "upstream": True, + } + + DIRETORIO_MATERIALIZACAO_TABLE_PARAMS = [ + {"table_id": "diretorio_consorcios"}, + {"table_id": "operadoras_contatos"}, + ]