diff --git a/pipelines/br_rj_riodejaneiro_brt_gps/tasks.py b/pipelines/br_rj_riodejaneiro_brt_gps/tasks.py
index 7cfe41f5..aee2a7b2 100644
--- a/pipelines/br_rj_riodejaneiro_brt_gps/tasks.py
+++ b/pipelines/br_rj_riodejaneiro_brt_gps/tasks.py
@@ -3,19 +3,21 @@
 Tasks for br_rj_riodejaneiro_brt_gps
 """
 
-from datetime import timedelta
 import traceback
+from datetime import timedelta
+
 import pandas as pd
 from prefect import task
+from prefeitura_rio.pipelines_utils.logging import log
+
+from pipelines.constants import constants
+from pipelines.utils.backup.utils import log_critical, map_dict_keys
 
 # EMD Imports #
 
-from prefeitura_rio.pipelines_utils.logging import log
 
 # SMTR Imports #
 
-from pipelines.constants import constants
-from pipelines.utils.utils import log_critical, map_dict_keys
 
 # Tasks #
 
@@ -53,9 +55,7 @@ def pre_treatment_br_rj_riodejaneiro_brt_gps(status: dict, timestamp):
     df = pd.DataFrame(columns=columns)  # pylint: disable=c0103
 
     # map_dict_keys change data keys to match project data structure
-    df["content"] = [
-        map_dict_keys(piece, constants.GPS_BRT_MAPPING_KEYS.value) for piece in data
-    ]
+    df["content"] = [map_dict_keys(piece, constants.GPS_BRT_MAPPING_KEYS.value) for piece in data]
     df[key_column] = [piece[key_column] for piece in data]
     df["timestamp_gps"] = [piece["timestamp_gps"] for piece in data]
     df["timestamp_captura"] = timestamp
@@ -64,9 +64,7 @@ def pre_treatment_br_rj_riodejaneiro_brt_gps(status: dict, timestamp):
     # Remove timezone and force it to be config timezone
     log(f"Before converting, timestamp_gps was: \n{df['timestamp_gps']}")
     df["timestamp_gps"] = (
-        pd.to_datetime(df["timestamp_gps"], unit="ms")
-        .dt.tz_localize("UTC")
-        .dt.tz_convert(timezone)
+        pd.to_datetime(df["timestamp_gps"], unit="ms").dt.tz_localize("UTC").dt.tz_convert(timezone)
     )
     log(f"After converting the timezone, timestamp_gps is: \n{df['timestamp_gps']}")
 
diff --git a/pipelines/schedules.py b/pipelines/schedules.py
index 4312feb9..2cadec57 100644
--- a/pipelines/schedules.py
+++ b/pipelines/schedules.py
@@ -3,21 +3,21 @@
 Schedules for rj_smtr
 """
 
-from datetime import timedelta, datetime
-from pytz import timezone
+from datetime import datetime, timedelta
+
 from prefect.schedules import Schedule
-from prefect.schedules.clocks import IntervalClock, CronClock
-from pipelines.constants import constants as emd_constants
-from pipelines.utils.utils import generate_ftp_schedules
+from prefect.schedules.clocks import CronClock, IntervalClock
+from pytz import timezone
+
 from pipelines.constants import constants
+from pipelines.constants import constants as emd_constants
+from pipelines.utils.backup.utils import generate_ftp_schedules
 
 every_minute = Schedule(
     clocks=[
         IntervalClock(
             interval=timedelta(minutes=1),
-            start_date=datetime(
-                2021, 1, 1, 0, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_AGENT_LABEL.value,
             ],
@@ -28,9 +28,7 @@
     clocks=[
         IntervalClock(
             interval=timedelta(minutes=1),
-            start_date=datetime(
-                2021, 1, 1, 0, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_DEV_AGENT_LABEL.value,
             ],
@@ -42,9 +40,7 @@
     clocks=[
         IntervalClock(
             interval=timedelta(minutes=10),
-            start_date=datetime(
-                2021, 1, 1, 0, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_AGENT_LABEL.value,
             ],
@@ -57,9 +53,7 @@
     clocks=[
         IntervalClock(
             interval=timedelta(hours=1),
-            start_date=datetime(
-                2021, 1, 1, 0, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_AGENT_LABEL.value,
             ],
@@ -71,9 +65,7 @@
     clocks=[
         IntervalClock(
             interval=timedelta(hours=1),
-            start_date=datetime(
-                2021, 1, 1, 0, 6, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2021, 1, 1, 0, 6, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_AGENT_LABEL.value,
             ],
@@ -85,9 +77,7 @@
     clocks=[
         IntervalClock(
             interval=timedelta(days=1),
-            start_date=datetime(
-                2021, 1, 1, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2021, 1, 1, 0, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_AGENT_LABEL.value,
             ],
@@ -103,9 +93,7 @@
     clocks=[
         IntervalClock(
             interval=timedelta(days=1),
-            start_date=datetime(
-                2022, 11, 30, 5, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2022, 11, 30, 5, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_AGENT_LABEL.value,
             ],
@@ -117,9 +105,7 @@
     clocks=[
         IntervalClock(
             interval=timedelta(days=1),
-            start_date=datetime(
-                2022, 11, 30, 7, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2022, 11, 30, 7, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_AGENT_LABEL.value,
             ],
@@ -131,18 +117,14 @@
     clocks=[
         CronClock(
             cron="0 12 16 * *",
-            start_date=datetime(
-                2022, 12, 16, 12, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2022, 12, 16, 12, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_AGENT_LABEL.value,
             ],
         ),
         CronClock(
             cron="0 12 1 * *",
-            start_date=datetime(
-                2023, 1, 1, 12, 0, tzinfo=timezone(constants.TIMEZONE.value)
-            ),
+            start_date=datetime(2023, 1, 1, 12, 0, tzinfo=timezone(constants.TIMEZONE.value)),
             labels=[
                 emd_constants.RJ_SMTR_AGENT_LABEL.value,
             ],
diff --git a/pipelines/tasks.py b/pipelines/tasks.py
index 0f75221c..fd953cfb 100644
--- a/pipelines/tasks.py
+++ b/pipelines/tasks.py
@@ -3,49 +3,49 @@
 """
 Tasks for rj_smtr
 """
-from datetime import datetime, timedelta, date
+import io
 import json
 import os
-from pathlib import Path
 import traceback
-from typing import Dict, List, Union, Iterable, Any
-import io
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Union
 
-from basedosdados import Storage, Table
 import basedosdados as bd
 import pandas as pd
 import pendulum
 import prefect
-from prefect import task, Client
+import requests
+from basedosdados import Storage, Table
+from prefect import Client, task
 from prefect.backend import FlowRunView
+from prefeitura_rio.pipelines_utils.dbt import run_dbt_model
+from prefeitura_rio.pipelines_utils.infisical import inject_bd_credentials
+from prefeitura_rio.pipelines_utils.logging import log
+from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
 from pytz import timezone
-import requests
 
 from pipelines.constants import constants
-from pipelines.utils.utils import (
-    create_or_append_table,
+from pipelines.utils.backup.utils import (  # normalize_keys,
     bq_project,
-    get_table_min_max_value,
-    get_last_run_timestamp,
+    create_or_append_table,
     data_info_str,
     dict_contains_keys,
+    get_datetime_range,
+    get_last_run_timestamp,
     get_raw_data_api,
-    get_raw_data_gcs,
     get_raw_data_db,
+    get_raw_data_gcs,
     get_raw_recursos,
-    upload_run_logs_to_bq,
-    get_datetime_range,
+    get_table_min_max_value,
+    log_critical,
     read_raw_data,
-    save_treated_local_func,
     save_raw_local_func,
-    log_critical,
-    normalize_keys
+    save_treated_local_func,
+    upload_run_logs_to_bq,
 )
 from pipelines.utils.secret import get_secret
-from prefeitura_rio.pipelines_utils.dbt import run_dbt_model
-from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
-from prefeitura_rio.pipelines_utils.infisical import inject_bd_credentials
-from prefeitura_rio.pipelines_utils.logging import log
+
 
 ###############
 #
@@ -56,6 +56,7 @@
 def setup_task():
     return inject_bd_credentials()
 
+
 @task
 def get_current_flow_labels() -> List[str]:
     """
@@ -65,14 +66,16 @@ def get_current_flow_labels() -> List[str]:
     flow_run_view = FlowRunView.from_flow_run_id(flow_run_id)
     return flow_run_view.labels
 
+
 ###############
 #
 # DBT
 #
 ###############
 
+
 @task
-def run_dbt_model_task( 
+def run_dbt_model_task(
     dataset_id: str = None,
     table_id: str = None,
     dbt_alias: bool = False,
@@ -80,8 +83,8 @@ def run_dbt_model_task(
     downstream: bool = None,
     exclude: str = None,
     flags: str = None,
-    _vars: dict | List[Dict] = None
-    ):
+    _vars: dict | List[Dict] = None,
+):
     return run_dbt_model(
         dataset_id=dataset_id,
         table_id=table_id,
@@ -90,7 +93,7 @@ def run_dbt_model_task(
         downstream=downstream,
         exclude=exclude,
         flags=flags,
-        _vars=_vars
+        _vars=_vars,
     )
 
 
@@ -140,7 +143,7 @@ def build_incremental_model(  # pylint: disable=too-many-arguments
     if refresh:
         log("Running in full refresh mode")
         log(f"DBT will run the following command:\n{run_command+' --full-refresh'}")
-        run_dbt_model(dataset_id=dataset_id, table_id=mat_table_id, flags='--full-refresh')
+        run_dbt_model(dataset_id=dataset_id, table_id=mat_table_id, flags="--full-refresh")
         last_mat_date = get_table_min_max_value(
             query_project_id, dataset_id, mat_table_id, field_name, "max"
         )
@@ -208,9 +211,7 @@ def create_dbt_run_vars(
         log("Creating date_range variable")
 
         # Set date_range variable manually
-        if dict_contains_keys(
-            dbt_vars["date_range"], ["date_range_start", "date_range_end"]
-        ):
+        if dict_contains_keys(dbt_vars["date_range"], ["date_range_start", "date_range_end"]):
             date_var = {
                 "date_range_start": dbt_vars["date_range"]["date_range_start"],
                 "date_range_end": dbt_vars["date_range"]["date_range_end"],
@@ -490,13 +491,9 @@ def query_logs(
     """
 
     if not datetime_filter:
-        datetime_filter = pendulum.now(constants.TIMEZONE.value).replace(
-            second=0, microsecond=0
-        )
+        datetime_filter = pendulum.now(constants.TIMEZONE.value).replace(second=0, microsecond=0)
     elif isinstance(datetime_filter, str):
-        datetime_filter = datetime.fromisoformat(datetime_filter).replace(
-            second=0, microsecond=0
-        )
+        datetime_filter = datetime.fromisoformat(datetime_filter).replace(second=0, microsecond=0)
 
     datetime_filter = datetime_filter.strftime("%Y-%m-%d %H:%M:%S")
 
@@ -638,13 +635,9 @@ def get_raw(  # pylint: disable=R0912
             elif filetype in ("txt", "csv"):
                 if csv_args is None:
                     csv_args = {}
-                data = pd.read_csv(io.StringIO(response.text), **csv_args).to_dict(
-                    orient="records"
-                )
+                data = pd.read_csv(io.StringIO(response.text), **csv_args).to_dict(orient="records")
             else:
-                error = (
-                    "Unsupported raw file extension. Supported only: json, csv and txt"
-                )
+                error = "Unsupported raw file extension. Supported only: json, csv and txt"
 
     except Exception:
         error = traceback.format_exc()
@@ -750,9 +743,7 @@ def get_raw_from_sources(
 
     source_values = source_type.split("-", 1)
 
-    source_type, filetype = (
-        source_values if len(source_values) == 2 else (source_values[0], None)
-    )
+    source_type, filetype = source_values if len(source_values) == 2 else (source_values[0], None)
 
     log(f"Getting raw data from source type: {source_type}")
 
@@ -779,9 +770,7 @@ def get_raw_from_sources(
         else:
             raise NotImplementedError(f"{source_type} not supported")
 
-        filepath = save_raw_local_func(
-            data=data, filepath=local_filepath, filetype=filetype
-        )
+        filepath = save_raw_local_func(data=data, filepath=local_filepath, filetype=filetype)
 
     except NotImplementedError:
         error = traceback.format_exc()
@@ -927,9 +916,7 @@ def upload_logs_to_bq(  # pylint: disable=R0913
     # Create partition directory
     filename = f"{table_id}_{timestamp.isoformat()}"
     partition = f"data={timestamp.date()}"
-    filepath = Path(
-        f"""data/staging/{dataset_id}/{table_id}/{partition}/{filename}.csv"""
-    )
+    filepath = Path(f"""data/staging/{dataset_id}/{table_id}/{partition}/{filename}.csv""")
     filepath.parent.mkdir(exist_ok=True, parents=True)
     # Create dataframe to be uploaded
     if not error and recapture is True:
@@ -1099,9 +1086,7 @@ def get_materialization_date_range(  # pylint: disable=R0913
     """
     timestr = "%Y-%m-%dT%H:%M:%S"
     # get start from redis
-    last_run = get_last_run_timestamp(
-        dataset_id=dataset_id, table_id=table_id, mode=mode
-    )
+    last_run = get_last_run_timestamp(dataset_id=dataset_id, table_id=table_id, mode=mode)
     # if there's no timestamp set on redis, get max timestamp on source table
     if last_run is None:
         log("Failed to fetch key from Redis...\n Querying tables for last suceeded run")
@@ -1151,9 +1136,7 @@ def get_materialization_date_range(  # pylint: disable=R0913
             tzinfo=None, minute=0, second=0, microsecond=0
         )
 
-    end_ts = (end_ts - timedelta(hours=delay_hours)).replace(
-        minute=0, second=0, microsecond=0
-    )
+    end_ts = (end_ts - timedelta(hours=delay_hours)).replace(minute=0, second=0, microsecond=0)
 
     end_ts = end_ts.strftime(timestr)
 
@@ -1343,15 +1326,9 @@ def transform_raw_to_nested_structure(
                 pk_cols = primary_key + ["timestamp_captura"]
                 data = (
                     data.groupby(pk_cols)
-                    .apply(
-                        lambda x: x[data.columns.difference(pk_cols)].to_json(
-                            orient="records"
-                        )
-                    )
+                    .apply(lambda x: x[data.columns.difference(pk_cols)].to_json(orient="records"))
                     .str.strip("[]")
-                    .reset_index(name="content")[
-                        primary_key + ["content", "timestamp_captura"]
-                    ]
+                    .reset_index(name="content")[primary_key + ["content", "timestamp_captura"]]
                 )
 
                 log(
@@ -1360,9 +1337,7 @@ def transform_raw_to_nested_structure(
                 )
 
             # save treated local
-            filepath = save_treated_local_func(
-                data=data, error=error, filepath=filepath
-            )
+            filepath = save_treated_local_func(data=data, error=error, filepath=filepath)
 
         except Exception:  # pylint: disable=W0703
             error = traceback.format_exc()
@@ -1462,13 +1437,12 @@ def get_scheduled_start_times(
     last_schedule = timestamp
 
     for param in parameters[1:]:
-        last_schedule += intervals.get(
-            param.get("table_id", "default"), intervals["default"]
-        )
+        last_schedule += intervals.get(param.get("table_id", "default"), intervals["default"])
         timestamps.append(last_schedule)
 
     return timestamps
 
+
 @task
 def rename_current_flow_run_now_time(prefix: str, now_time=None, wait=None) -> None:
     """
@@ -1478,6 +1452,7 @@ def rename_current_flow_run_now_time(prefix: str, now_time=None, wait=None) -> N
     client = Client()
     return client.set_flow_run_name(flow_run_id, f"{prefix}{now_time}")
 
+
 @prefect.task(checkpoint=False)
 def get_now_time():
     """
@@ -1487,6 +1462,7 @@ def get_now_time():
 
     return f"{now.hour}:{f'0{now.minute}' if len(str(now.minute))==1 else now.minute}"
 
+
 @prefect.task(checkpoint=False)
 def get_now_date():
     """
@@ -1496,6 +1472,7 @@ def get_now_date():
 
     return now.to_date_string()
 
+
 @task
 def get_current_flow_mode(labels: List[str]) -> str:
     """
diff --git a/pipelines/templates/__init__.py b/pipelines/templates/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pipelines/utils/backup/__init__.py b/pipelines/utils/backup/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pipelines/utils/backup/utils.py b/pipelines/utils/backup/utils.py
new file mode 100644
index 00000000..80cc11c3
--- /dev/null
+++ b/pipelines/utils/backup/utils.py
@@ -0,0 +1,927 @@
+# -*- coding: utf-8 -*-
+# flake8: noqa: E501
+"""
+General purpose functions for rj_smtr
+"""
+
+import io
+import json
+import math
+import time
+import traceback
+import zipfile
+from datetime import date, datetime, timedelta
+from ftplib import FTP
+from pathlib import Path
+from typing import Any, List, Union
+
+import basedosdados as bd
+import pandas as pd
+import psycopg2
+import psycopg2.extras
+import pymysql
+import pytz
+import requests
+from basedosdados import Storage, Table
+from google.cloud.storage.blob import Blob
+from prefect.schedules.clocks import IntervalClock
+from prefeitura_rio.pipelines_utils.infisical import get_secret
+from prefeitura_rio.pipelines_utils.logging import log  # TODO: add or relocate imports
+from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
+from pytz import timezone
+from redis_pal import RedisPal
+
+from pipelines.constants import constants
+from pipelines.utils.implicit_ftp import ImplicitFtpTls
+
+# Set BD config to run on cloud #
+bd.config.from_file = True
+
+
+def send_discord_message(
+    message: str,
+    webhook_url: str,
+) -> None:
+    """
+    Sends a message to a Discord channel.
+    """
+    requests.post(
+        webhook_url,
+        data={"content": message},
+    )
+
+
+def log_critical(message: str, secret_path: str = constants.CRITICAL_SECRET_PATH.value):
+    """Logs message to critical discord channel specified
+
+    Args:
+        message (str): Message to post on the channel
+        secret_path (str, optional): Secret path storing the webhook to critical channel.
+        Defaults to constants.CRITICAL_SECRETPATH.value.
+
+    """
+    url = get_secret(secret_path=secret_path)["data"]["url"]
+    return send_discord_message(message=message, webhook_url=url)
+
+
+def create_or_append_table(dataset_id: str, table_id: str, path: str, partitions: str = None):
+    """Conditionally create table or append data to its relative GCS folder.
+
+    Args:
+        dataset_id (str): target dataset_id on BigQuery
+        table_id (str): target table_id on BigQuery
+        path (str): Path to .csv data file
+    """
+    tb_obj = Table(table_id=table_id, dataset_id=dataset_id)
+    if not tb_obj.table_exists("staging"):
+        log("Table does not exist in STAGING, creating table...")
+        dirpath = path.split(partitions)[0]
+        tb_obj.create(
+            path=dirpath,
+            if_table_exists="pass",
+            if_storage_data_exists="replace",
+        )
+        log("Table created in STAGING")
+    else:
+        log("Table already exists in STAGING, appending to it...")
+        tb_obj.append(filepath=path, if_exists="replace", timeout=600, partitions=partitions)
+        log("Appended to table on STAGING successfully.")
+
+
+def generate_df_and_save(data: dict, fname: Path):
+    """Save DataFrame as csv
+
+    Args:
+        data (dict): dict with the data which to build the DataFrame
+        fname (Path): _description_
+    """
+    # Generate dataframe
+    dataframe = pd.DataFrame()
+    dataframe[data["key_column"]] = [piece[data["key_column"]] for piece in data["data"]]
+    dataframe["content"] = list(data["data"])
+
+    # Save dataframe to CSV
+    dataframe.to_csv(fname, index=False)
+
+
+def bq_project(kind: str = "bigquery_prod"):
+    """Get the set BigQuery project_id
+
+    Args:
+        kind (str, optional): Which client to get the project name from.
+        Options are 'bigquery_staging', 'bigquery_prod' and 'storage_staging'
+        Defaults to 'bigquery_prod'.
+
+    Returns:
+        str: the requested project_id
+    """
+    return bd.upload.base.Base().client[kind].project
+
+
+def get_table_min_max_value(  # pylint: disable=R0913
+    query_project_id: str,
+    dataset_id: str,
+    table_id: str,
+    field_name: str,
+    kind: str,
+    wait=None,  # pylint: disable=unused-argument
+):
+    """Query a table to get the maximum value for the chosen field.
+    Useful to incrementally materialize tables via DBT
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+        field_name (str): column name to query
+        kind (str): which value to get. Accepts min and max
+    """
+    log(f"Getting {kind} value for {table_id}")
+    query = f"""
+        SELECT
+            {kind}({field_name})
+        FROM {query_project_id}.{dataset_id}.{table_id}
+    """
+    log(f"Will run query:\n{query}")
+    result = bd.read_sql(query=query, billing_project_id=bq_project())
+
+    return result.iloc[0][0]
+
+
+def get_last_run_timestamp(dataset_id: str, table_id: str, mode: str = "prod") -> str:
+    """
+    Query redis to retrive the time for when the last materialization
+    ran.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): model filename on the queries repo.
+        eg: if you have a model defined in the file <filename>.sql,
+        the table_id should be <filename>
+        mode (str):
+
+    Returns:
+        Union[str, None]: _description_
+    """
+    redis_client = get_redis_client()
+    key = dataset_id + "." + table_id
+    log(f"Fetching key {key} from redis, working on mode {mode}")
+    if mode == "dev":
+        key = f"{mode}.{key}"
+    runs = redis_client.get(key)
+    # if runs is None:
+    #     redis_client.set(key, "")
+    try:
+        last_run_timestamp = runs["last_run_timestamp"]
+    except KeyError:
+        return None
+    except TypeError:
+        return None
+    log(f"Got value {last_run_timestamp}")
+    return last_run_timestamp
+
+
+def map_dict_keys(data: dict, mapping: dict) -> None:
+    """
+    Map old keys to new keys in a dict.
+    """
+    for old_key, new_key in mapping.items():
+        data[new_key] = data.pop(old_key)
+    return data
+
+
+def normalize_keys(data: dict):
+    _data = {key.lower(): value for key, value in data.items()}
+    return _data
+
+
+def connect_ftp(secret_path: str = None, secure: bool = True):
+    """Connect to FTP
+
+    Returns:
+        ImplicitFTP_TLS: ftp client
+    """
+
+    ftp_data = get_secret(secret_path)["data"]
+    if secure:
+        ftp_client = ImplicitFtpTls()
+    else:
+        ftp_client = FTP()
+    ftp_client.connect(host=ftp_data["host"], port=int(ftp_data["port"]))
+    ftp_client.login(user=ftp_data["username"], passwd=ftp_data["pwd"])
+    if secure:
+        ftp_client.prot_p()
+    return ftp_client
+
+
+def safe_cast(val, to_type, default=None):
+    """
+    Safe cast value.
+    """
+    try:
+        return to_type(val)
+    except ValueError:
+        return default
+
+
+def set_redis_rdo_files(redis_client, dataset_id: str, table_id: str):
+    """
+    Register downloaded files to Redis
+
+    Args:
+        redis_client (_type_): _description_
+        dataset_id (str): dataset_id on BigQuery
+        table_id (str): table_id on BigQuery
+
+    Returns:
+        bool: if the key was properly set
+    """
+    try:
+        content = redis_client.get(f"{dataset_id}.{table_id}")["files"]
+    except TypeError as e:
+        log(f"Caught error {e}. Will set unexisting key")
+        # set key to empty dict for filling later
+        redis_client.set(f"{dataset_id}.{table_id}", {"files": []})
+        content = redis_client.get(f"{dataset_id}.{table_id}")
+    # update content
+    st_client = bd.Storage(dataset_id=dataset_id, table_id=table_id)
+    blob_names = [
+        blob.name
+        for blob in st_client.client["storage_staging"].list_blobs(
+            st_client.bucket, prefix=f"staging/{dataset_id}/{table_id}"
+        )
+    ]
+    files = [blob_name.split("/")[-1].replace(".csv", "") for blob_name in blob_names]
+    log(f"When setting key, found {len(files)} files. Will register on redis...")
+    content["files"] = files
+    # set key
+    return redis_client.set(f"{dataset_id}.{table_id}", content)
+
+
+# PRE TREAT #
+
+
+def check_not_null(data: pd.DataFrame, columns: list, subset_query: str = None):
+    """
+    Check if there are null values in columns.
+
+    Args:
+        columns (list): list of columns to check
+        subset_query (str): query to check if there are important data
+        being removed
+
+    Returns:
+        None
+    """
+
+    for col in columns:
+        remove = data.query(f"{col} != {col}")  # null values
+        log(
+            f"[data-check] There are {len(remove)} rows with null values in '{col}'",
+            level="info",
+        )
+
+        if subset_query is not None:
+            # Check if there are important data being removed
+            remove = remove.query(subset_query)
+            if len(remove) > 0:
+                log(
+                    f"""[data-check] There are {len(remove)} critical rows with
+                    null values in '{col}' (query: {subset_query})""",
+                    level="warning",
+                )
+
+
+def filter_null(data: pd.DataFrame, columns: list, subset_query: str = None):
+    """
+    Filter null values in columns.
+
+    Args:
+        columns (list): list of columns to check
+        subset_query (str): query to check if there are important data
+        being removed
+
+    Returns:
+        pandas.DataFrame: data without null values
+    """
+
+    for col in columns:
+        remove = data.query(f"{col} != {col}")  # null values
+        data = data.drop(remove.index)
+        log(
+            f"[data-filter] Removed {len(remove)} rows with null '{col}'",
+            level="info",
+        )
+
+        if subset_query is not None:
+            # Check if there are important data being removed
+            remove = remove.query(subset_query)
+            if len(remove) > 0:
+                log(
+                    f"[data-filter] Removed {len(remove)} critical rows with null '{col}'",
+                    level="warning",
+                )
+
+    return data
+
+
+def filter_data(data: pd.DataFrame, filters: list, subset_query: str = None):
+    """
+    Filter data from a dataframe
+
+    Args:
+        data (pd.DataFrame): data DataFrame
+        filters (list): list of queries to filter data
+
+    Returns:
+        pandas.DataFrame: data without filter data
+    """
+    for item in filters:
+        remove = data.query(item)
+        data = data.drop(remove.index)
+        log(
+            f"[data-filter] Removed {len(remove)} rows from filter: {item}",
+            level="info",
+        )
+
+        if subset_query is not None:
+            # Check if there are important data being removed
+            remove = remove.query(subset_query)
+            if len(remove) > 0:
+                log(
+                    f"""[data-filter] Removed {len(remove)} critical rows
+                    from filter: {item} (subquery: {subset_query})""",
+                    level="warning",
+                )
+
+    return data
+
+
+def check_relation(data: pd.DataFrame, columns: list):
+    """
+    Check relation between collumns.
+
+    Args:
+        data (pd.DataFrame): dataframe to be modified
+        columns (list): list of lists of columns to be checked
+
+    Returns:
+        None
+    """
+
+    for cols in columns:
+        df_dup = data[~data.duplicated(subset=cols)].groupby(cols).count().reset_index().iloc[:, :1]
+
+        for col in cols:
+            df_dup_col = (
+                data[~data.duplicated(subset=col)].groupby(col).count().reset_index().iloc[:, :1]
+            )
+
+            if len(df_dup_col[~df_dup_col[col].duplicated()]) == len(df_dup):
+                log(
+                    f"[data-check] Comparing '{col}' in '{cols}', there are no duplicated values",
+                    level="info",
+                )
+            else:
+                log(
+                    f"[data-check] Comparing '{col}' in '{cols}', there are duplicated values",
+                    level="warning",
+                )
+
+
+def data_info_str(data: pd.DataFrame):
+    """
+    Return dataframe info as a str to log
+
+    Args:
+        data (pd.DataFrame): dataframe
+
+    Returns:
+        data.info() as a string
+    """
+    buffer = io.StringIO()
+    data.info(buf=buffer)
+    return buffer.getvalue()
+
+
+def generate_execute_schedules(  # pylint: disable=too-many-arguments,too-many-locals
+    clock_interval: timedelta,
+    labels: List[str],
+    table_parameters: Union[list[dict], dict],
+    runs_interval_minutes: int = 15,
+    start_date: datetime = datetime(2020, 1, 1, tzinfo=pytz.timezone(constants.TIMEZONE.value)),
+    **general_flow_params,
+) -> List[IntervalClock]:
+    """
+    Generates multiple schedules
+
+    Args:
+        clock_interval (timedelta): The interval to run the schedule
+        labels (List[str]): The labels to be added to the schedule
+        table_parameters (list): The table parameters to iterate over
+        runs_interval_minutes (int, optional): The interval between each schedule. Defaults to 15.
+        start_date (datetime, optional): The start date of the schedule.
+            Defaults to datetime(2020, 1, 1, tzinfo=pytz.timezone(constants.TIMEZONE.value)).
+        general_flow_params: Any param that you want to pass to the flow
+    Returns:
+        List[IntervalClock]: The list of schedules
+
+    """
+    if isinstance(table_parameters, dict):
+        table_parameters = [table_parameters]
+
+    clocks = []
+    for count, parameters in enumerate(table_parameters):
+        parameter_defaults = parameters | general_flow_params
+        clocks.append(
+            IntervalClock(
+                interval=clock_interval,
+                start_date=start_date + timedelta(minutes=runs_interval_minutes * count),
+                labels=labels,
+                parameter_defaults=parameter_defaults,
+            )
+        )
+    return clocks
+
+
+def dict_contains_keys(input_dict: dict, keys: list[str]) -> bool:
+    """
+    Test if the input dict has all keys present in the list
+
+    Args:
+        input_dict (dict): the dict to test if has the keys
+        keys (list[str]): the list containing the keys to check
+    Returns:
+        bool: True if the input_dict has all the keys otherwise False
+    """
+    return all(x in input_dict.keys() for x in keys)
+
+
+def custom_serialization(obj: Any) -> Any:
+    """
+    Function to serialize not JSON serializable objects
+
+    Args:
+        obj (Any): Object to serialize
+
+    Returns:
+        Any: Serialized object
+    """
+    if isinstance(obj, (pd.Timestamp, date)):
+        if isinstance(obj, pd.Timestamp):
+            if obj.tzinfo is None:
+                obj = obj.tz_localize("UTC").tz_convert(constants.TIMEZONE.value)
+        return obj.isoformat()
+
+    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
+
+
+def save_raw_local_func(
+    data: Union[dict, str],
+    filepath: str,
+    mode: str = "raw",
+    filetype: str = "json",
+) -> str:
+    """
+    Saves json response from API to .json file.
+    Args:
+        data (Union[dict, str]): Raw data to save
+        filepath (str): Path which to save raw file
+        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
+        filetype (str, optional): The file format
+    Returns:
+        str: Path to the saved file
+    """
+
+    # diferentes tipos de arquivos para salvar
+    _filepath = filepath.format(mode=mode, filetype=filetype)
+    Path(_filepath).parent.mkdir(parents=True, exist_ok=True)
+
+    if filetype == "json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        with Path(_filepath).open("w", encoding="utf-8") as fi:
+            json.dump(data, fi, default=custom_serialization)
+
+    if filetype in ("txt", "csv"):
+        with open(_filepath, "w", encoding="utf-8") as file:
+            file.write(data)
+
+    log(f"Raw data saved to: {_filepath}")
+    return _filepath
+
+
+def get_raw_data_api(  # pylint: disable=R0912
+    url: str,
+    secret_path: str = None,
+    api_params: dict = None,
+    filetype: str = None,
+) -> tuple[str, str, str]:
+    """
+    Request data from URL API
+
+    Args:
+        url (str): URL to request data
+        secret_path (str, optional): Secret path to get headers. Defaults to None.
+        api_params (dict, optional): Parameters to pass to API. Defaults to None.
+        filetype (str, optional): Filetype to save raw file. Defaults to None.
+
+    Returns:
+        tuple[str, str, str]: Error, data and filetype
+    """
+    error = None
+    data = None
+    try:
+        if secret_path is None:
+            headers = secret_path
+        else:
+            headers = get_secret(secret_path)["data"]
+
+        response = requests.get(
+            url,
+            headers=headers,
+            timeout=constants.MAX_TIMEOUT_SECONDS.value,
+            params=api_params,
+        )
+
+        response.raise_for_status()
+
+        if filetype == "json":
+            data = response.json()
+        else:
+            data = response.text
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, data, filetype
+
+
+def get_upload_storage_blob(
+    dataset_id: str,
+    filename: str,
+) -> Blob:
+    """
+    Get a blob from upload zone in storage
+
+    Args:
+        dataset_id (str): The dataset id on BigQuery.
+        filename (str): The filename in GCS.
+
+    Returns:
+        Blob: blob object
+    """
+    bucket = bd.Storage(dataset_id="", table_id="")
+    log(f"Filename: {filename}, dataset_id: {dataset_id}")
+    blob_list = list(
+        bucket.client["storage_staging"]
+        .bucket(bucket.bucket_name)
+        .list_blobs(prefix=f"upload/{dataset_id}/{filename}.")
+    )
+
+    return blob_list[0]
+
+
+def get_raw_data_gcs(
+    dataset_id: str,
+    table_id: str,
+    zip_filename: str = None,
+) -> tuple[str, str, str]:
+    """
+    Get raw data from GCS
+
+    Args:
+        dataset_id (str): The dataset id on BigQuery.
+        table_id (str): The table id on BigQuery.
+        zip_filename (str, optional): The zip file name. Defaults to None.
+
+    Returns:
+        tuple[str, str, str]: Error, data and filetype
+    """
+    error = None
+    data = None
+    filetype = None
+
+    try:
+        blob_search_name = zip_filename or table_id
+        blob = get_upload_storage_blob(dataset_id=dataset_id, filename=blob_search_name)
+
+        filename = blob.name
+        filetype = filename.split(".")[-1]
+
+        data = blob.download_as_bytes()
+
+        if filetype == "zip":
+            with zipfile.ZipFile(io.BytesIO(data), "r") as zipped_file:
+                filenames = zipped_file.namelist()
+                filename = list(filter(lambda x: x.split(".")[0] == table_id, filenames))[0]
+                filetype = filename.split(".")[-1]
+                data = zipped_file.read(filename)
+
+        data = data.decode(encoding="utf-8")
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, data, filetype
+
+
+def get_raw_data_db(
+    query: str, engine: str, host: str, secret_path: str, database: str
+) -> tuple[str, str, str]:
+    """
+    Get data from Databases
+
+    Args:
+        query (str): the SQL Query to execute
+        engine (str): The datase management system
+        host (str): The database host
+        secret_path (str): Secret path to get credentials
+        database (str): The database to connect
+
+    Returns:
+        tuple[str, str, str]: Error, data and filetype
+    """
+    connector_mapping = {
+        "postgresql": psycopg2.connect,
+        "mysql": pymysql.connect,
+    }
+
+    data = None
+    error = None
+    filetype = "json"
+
+    try:
+        credentials = get_secret(secret_path)["data"]
+
+        with connector_mapping[engine](
+            host=host,
+            user=credentials["user"],
+            password=credentials["password"],
+            database=database,
+        ) as connection:
+            data = pd.read_sql(sql=query, con=connection).to_dict(orient="records")
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, data, filetype
+
+
+def save_treated_local_func(
+    filepath: str, data: pd.DataFrame, error: str, mode: str = "staging"
+) -> str:
+    """
+    Save treated file to CSV.
+
+    Args:
+        filepath (str): Path to save file
+        data (pd.DataFrame): Dataframe to save
+        error (str): Error catched during execution
+        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
+
+    Returns:
+        str: Path to the saved file
+    """
+    _filepath = filepath.format(mode=mode, filetype="csv")
+    Path(_filepath).parent.mkdir(parents=True, exist_ok=True)
+    if error is None:
+        data.to_csv(_filepath, index=False)
+        log(f"Treated data saved to: {_filepath}")
+    return _filepath
+
+
+def upload_run_logs_to_bq(  # pylint: disable=R0913
+    dataset_id: str,
+    parent_table_id: str,
+    timestamp: str,
+    error: str = None,
+    previous_error: str = None,
+    recapture: bool = False,
+    mode: str = "raw",
+):
+    """
+    Upload execution status table to BigQuery.
+    Table is uploaded to the same dataset, named {parent_table_id}_logs.
+    If passing status_dict, should not pass timestamp and error.
+
+    Args:
+        dataset_id (str): dataset_id on BigQuery
+        parent_table_id (str): table_id on BigQuery
+        timestamp (str): timestamp to get datetime range
+        error (str): error catched during execution
+        previous_error (str): previous error catched during execution
+        recapture (bool): if the execution was a recapture
+        mode (str): folder to save locally, later folder which to upload to GCS
+
+    Returns:
+        None
+    """
+    table_id = parent_table_id + "_logs"
+    # Create partition directory
+    filename = f"{table_id}_{timestamp.isoformat()}"
+    partition = f"data={timestamp.date()}"
+    filepath = Path(f"""data/{mode}/{dataset_id}/{table_id}/{partition}/{filename}.csv""")
+    filepath.parent.mkdir(exist_ok=True, parents=True)
+    # Create dataframe to be uploaded
+    if not error and recapture is True:
+        # if the recapture is succeeded, update the column erro
+        dataframe = pd.DataFrame(
+            {
+                "timestamp_captura": [timestamp],
+                "sucesso": [True],
+                "erro": [f"[recapturado]{previous_error}"],
+            }
+        )
+        log(f"Recapturing {timestamp} with previous error:\n{previous_error}")
+    else:
+        # not recapturing or error during flow execution
+        dataframe = pd.DataFrame(
+            {
+                "timestamp_captura": [timestamp],
+                "sucesso": [error is None],
+                "erro": [error],
+            }
+        )
+    # Save data local
+    dataframe.to_csv(filepath, index=False)
+    # Upload to Storage
+    create_or_append_table(
+        dataset_id=dataset_id,
+        table_id=table_id,
+        path=filepath.as_posix(),
+        partitions=partition,
+    )
+    if error is not None:
+        raise Exception(f"Pipeline failed with error: {error}")
+
+
+def get_datetime_range(
+    timestamp: datetime,
+    interval: timedelta,
+) -> dict:
+    """
+    Task to get datetime range in UTC
+
+    Args:
+        timestamp (datetime): timestamp to get datetime range
+        interval (timedelta): interval to get datetime range
+
+    Returns:
+        dict: datetime range
+    """
+
+    start = (timestamp - interval).astimezone(tz=pytz.timezone("UTC")).strftime("%Y-%m-%d %H:%M:%S")
+
+    end = timestamp.astimezone(tz=pytz.timezone("UTC")).strftime("%Y-%m-%d %H:%M:%S")
+
+    return {"start": start, "end": end}
+
+
+def read_raw_data(filepath: str, csv_args: dict = None) -> tuple[str, pd.DataFrame]:
+    """
+    Read raw data from file
+
+    Args:
+        filepath (str): filepath to read
+        csv_args (dict): arguments to pass to pandas.read_csv
+
+    Returns:
+        tuple[str, pd.DataFrame]: error and data
+    """
+    error = None
+    data = None
+    try:
+        file_type = filepath.split(".")[-1]
+
+        if file_type == "json":
+            data = pd.read_json(filepath)
+
+            # data = json.loads(data)
+        elif file_type in ("txt", "csv"):
+            if csv_args is None:
+                csv_args = {}
+            data = pd.read_csv(filepath, **csv_args)
+        else:
+            error = "Unsupported raw file extension. Supported only: json, csv and txt"
+
+    except Exception:
+        error = traceback.format_exc()
+        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+
+    return error, data
+
+
+def get_raw_recursos(request_url: str, request_params: dict) -> tuple[str, str, str]:
+    """
+    Returns a dataframe with recursos data from movidesk api.
+    """
+    all_records = False
+    top = 1000
+    skip = 0
+    error = None
+    filetype = "json"
+    data = []
+
+    while not all_records:
+        try:
+            request_params["$top"] = top
+            request_params["$skip"] = skip
+
+            log(f"Request url {request_url}")
+
+            response = requests.get(
+                request_url,
+                params=request_params,
+                timeout=constants.MAX_TIMEOUT_SECONDS.value,
+            )
+            response.raise_for_status()
+
+            paginated_data = response.json()
+
+            if isinstance(paginated_data, dict):
+                paginated_data = [paginated_data]
+
+            if len(paginated_data) == top:
+                skip += top
+                time.sleep(36)
+            else:
+                if len(paginated_data) == 0:
+                    log("Nenhum dado para tratar.")
+                    break
+                all_records = True
+            data += paginated_data
+
+            log(f"Dados (paginados): {len(data)}")
+
+        except Exception as error:
+            error = traceback.format_exc()
+            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
+            data = []
+            break
+
+    log(f"Request concluído, tamanho dos dados: {len(data)}.")
+
+    return error, data, filetype
+
+
+def build_table_id(mode: str, report_type: str):
+    """Build table_id based on which table is the target
+    of current flow run
+
+    Args:
+        mode (str): SPPO or STPL
+        report_type (str): RHO or RDO
+
+    Returns:
+        str: table_id
+    """
+    if mode == "SPPO":
+        if report_type == "RDO":
+            table_id = constants.SPPO_RDO_TABLE_ID.value
+        else:
+            table_id = constants.SPPO_RHO_TABLE_ID.value
+    if mode == "STPL":
+        # slice the string to get rid of V at end of
+        # STPL reports filenames
+        if report_type[:3] == "RDO":
+            table_id = constants.STPL_RDO_TABLE_ID.value
+        else:
+            table_id = constants.STPL_RHO_TABLE_ID.value
+    return table_id
+
+
+def generate_ftp_schedules(interval_minutes: int, label: str = constants.RJ_SMTR_AGENT_LABEL.value):
+    """Generates IntervalClocks with the parameters needed to capture
+    each report.
+
+    Args:
+        interval_minutes (int): interval which this flow will be run.
+        label (str, optional): Prefect label, defines which agent to use when launching flow run.
+        Defaults to constants.RJ_SMTR_AGENT_LABEL.value.
+
+    Returns:
+        List(IntervalClock): containing the clocks for scheduling runs
+    """
+    modes = ["SPPO", "STPL"]
+    reports = ["RDO", "RHO"]
+    clocks = []
+    for mode in modes:
+        for report in reports:
+            clocks.append(
+                IntervalClock(
+                    interval=timedelta(minutes=interval_minutes),
+                    start_date=datetime(
+                        2022, 12, 16, 5, 0, tzinfo=timezone(constants.TIMEZONE.value)
+                    ),
+                    parameter_defaults={
+                        "transport_mode": mode,
+                        "report_type": report,
+                        "table_id": build_table_id(mode=mode, report_type=report),
+                    },
+                    labels=[label],
+                )
+            )
+    return clocks
diff --git a/pipelines/utils/capture.py b/pipelines/utils/capture.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pipelines/utils/fs.py b/pipelines/utils/fs.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pipelines/utils/gcp.py b/pipelines/utils/gcp.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pipelines/utils/logging.py b/pipelines/utils/logging.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pipelines/utils/pre_treatment.py b/pipelines/utils/pre_treatment.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pipelines/utils/redis.py b/pipelines/utils/redis.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pipelines/utils/secret.py b/pipelines/utils/secret.py
index e4120538..12d2c55d 100644
--- a/pipelines/utils/secret.py
+++ b/pipelines/utils/secret.py
@@ -1,8 +1,10 @@
+# -*- coding: utf-8 -*-
 from prefeitura_rio.pipelines_utils.infisical import get_infisical_client
-from pipelines.utils.utils import normalize_keys
 
+from pipelines.utils.backup.utils import normalize_keys
 
-def get_secret(secret_path:str='/', secret_name:str=None, environment:str='dev'):
+
+def get_secret(secret_path: str = "/", secret_name: str = None, environment: str = "dev"):
     """
     Fetches secrets from Infisical. If passing only `secret_path` and
     no `secret_name`, returns all secrets inside a folder.
@@ -16,10 +18,10 @@ def get_secret(secret_path:str='/', secret_name:str=None, environment:str='dev')
         _type_: _description_
     """
     client = get_infisical_client()
-    if not secret_path.startswith('/'):
+    if not secret_path.startswith("/"):
         secret_path = f"/{secret_path}"
     if secret_path and not secret_name:
         secrets = client.get_all_secrets(path=secret_path)
-        return normalize_keys({s.secret_name:s.secret_value for s in secrets})
+        return normalize_keys({s.secret_name: s.secret_value for s in secrets})
     secret = client.get_secret(secret_name=secret_name, path=secret_path, environment=environment)
-    return {secret_name:secret.secret_value}
\ No newline at end of file
+    return {secret_name: secret.secret_value}
diff --git a/pipelines/utils/utils.py b/pipelines/utils/utils.py
index 8468307a..e69de29b 100644
--- a/pipelines/utils/utils.py
+++ b/pipelines/utils/utils.py
@@ -1,959 +0,0 @@
-# -*- coding: utf-8 -*-
-# flake8: noqa: E501
-"""
-General purpose functions for rj_smtr
-"""
-
-from ftplib import FTP
-from pathlib import Path
-
-from datetime import timedelta, datetime, date
-from typing import List, Union, Any
-import traceback
-import io
-import json
-import zipfile
-import pytz
-import requests
-import basedosdados as bd
-from basedosdados import Table
-from basedosdados import Storage
-from pytz import timezone
-import math
-import pandas as pd
-from google.cloud.storage.blob import Blob
-import pymysql
-import psycopg2
-import psycopg2.extras
-from redis_pal import RedisPal
-import time
-
-
-from prefect.schedules.clocks import IntervalClock
-
-from pipelines.constants import constants
-
-
-from pipelines.utils.implicit_ftp import ImplicitFtpTls
-from pipelines.constants import constants
-
-from prefeitura_rio.pipelines_utils.logging import log #TODO: add or relocate imports
-from prefeitura_rio.pipelines_utils.infisical import get_secret
-from prefeitura_rio.pipelines_utils.redis_pal import get_redis_client
-
-# Set BD config to run on cloud #
-bd.config.from_file = True
-
-def send_discord_message(
-    message: str,
-    webhook_url: str,
-) -> None:
-    """
-    Sends a message to a Discord channel.
-    """
-    requests.post(
-        webhook_url,
-        data={"content": message},
-    )
-
-
-def log_critical(message: str, secret_path: str = constants.CRITICAL_SECRET_PATH.value):
-    """Logs message to critical discord channel specified
-
-    Args:
-        message (str): Message to post on the channel
-        secret_path (str, optional): Secret path storing the webhook to critical channel.
-        Defaults to constants.CRITICAL_SECRETPATH.value.
-
-    """
-    url = get_secret(secret_path=secret_path)["data"]["url"]
-    return send_discord_message(message=message, webhook_url=url)
-
-
-def create_or_append_table(
-    dataset_id: str, table_id: str, path: str, partitions: str = None
-):
-    """Conditionally create table or append data to its relative GCS folder.
-
-    Args:
-        dataset_id (str): target dataset_id on BigQuery
-        table_id (str): target table_id on BigQuery
-        path (str): Path to .csv data file
-    """
-    tb_obj = Table(table_id=table_id, dataset_id=dataset_id)
-    if not tb_obj.table_exists("staging"):
-        log("Table does not exist in STAGING, creating table...")
-        dirpath = path.split(partitions)[0]
-        tb_obj.create(
-            path=dirpath,
-            if_table_exists="pass",
-            if_storage_data_exists="replace",
-        )
-        log("Table created in STAGING")
-    else:
-        log("Table already exists in STAGING, appending to it...")
-        tb_obj.append(
-            filepath=path, if_exists="replace", timeout=600, partitions=partitions
-        )
-        log("Appended to table on STAGING successfully.")
-
-
-def generate_df_and_save(data: dict, fname: Path):
-    """Save DataFrame as csv
-
-    Args:
-        data (dict): dict with the data which to build the DataFrame
-        fname (Path): _description_
-    """
-    # Generate dataframe
-    dataframe = pd.DataFrame()
-    dataframe[data["key_column"]] = [
-        piece[data["key_column"]] for piece in data["data"]
-    ]
-    dataframe["content"] = list(data["data"])
-
-    # Save dataframe to CSV
-    dataframe.to_csv(fname, index=False)
-
-
-def bq_project(kind: str = "bigquery_prod"):
-    """Get the set BigQuery project_id
-
-    Args:
-        kind (str, optional): Which client to get the project name from.
-        Options are 'bigquery_staging', 'bigquery_prod' and 'storage_staging'
-        Defaults to 'bigquery_prod'.
-
-    Returns:
-        str: the requested project_id
-    """
-    return bd.upload.base.Base().client[kind].project
-
-
-def get_table_min_max_value(  # pylint: disable=R0913
-    query_project_id: str,
-    dataset_id: str,
-    table_id: str,
-    field_name: str,
-    kind: str,
-    wait=None,  # pylint: disable=unused-argument
-):
-    """Query a table to get the maximum value for the chosen field.
-    Useful to incrementally materialize tables via DBT
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): table_id on BigQuery
-        field_name (str): column name to query
-        kind (str): which value to get. Accepts min and max
-    """
-    log(f"Getting {kind} value for {table_id}")
-    query = f"""
-        SELECT
-            {kind}({field_name})
-        FROM {query_project_id}.{dataset_id}.{table_id}
-    """
-    log(f"Will run query:\n{query}")
-    result = bd.read_sql(query=query, billing_project_id=bq_project())
-
-    return result.iloc[0][0]
-
-def get_last_run_timestamp(dataset_id: str, table_id: str, mode: str = "prod") -> str:
-    """
-    Query redis to retrive the time for when the last materialization
-    ran.
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): model filename on the queries repo.
-        eg: if you have a model defined in the file <filename>.sql,
-        the table_id should be <filename>
-        mode (str):
-
-    Returns:
-        Union[str, None]: _description_
-    """
-    redis_client = get_redis_client()
-    key = dataset_id + "." + table_id
-    log(f"Fetching key {key} from redis, working on mode {mode}")
-    if mode == "dev":
-        key = f"{mode}.{key}"
-    runs = redis_client.get(key)
-    # if runs is None:
-    #     redis_client.set(key, "")
-    try:
-        last_run_timestamp = runs["last_run_timestamp"]
-    except KeyError:
-        return None
-    except TypeError:
-        return None
-    log(f"Got value {last_run_timestamp}")
-    return last_run_timestamp
-
-
-def map_dict_keys(data: dict, mapping: dict) -> None:
-    """
-    Map old keys to new keys in a dict.
-    """
-    for old_key, new_key in mapping.items():
-        data[new_key] = data.pop(old_key)
-    return data
-
-def normalize_keys(data:dict):
-    _data = {key.lower():value for key, value in data.items()}
-    return _data
-
-def connect_ftp(secret_path: str = None, secure: bool = True):
-    """Connect to FTP
-
-    Returns:
-        ImplicitFTP_TLS: ftp client
-    """
-
-    ftp_data = get_secret(secret_path)["data"]
-    if secure:
-        ftp_client = ImplicitFtpTls()
-    else:
-        ftp_client = FTP()
-    ftp_client.connect(host=ftp_data["host"], port=int(ftp_data["port"]))
-    ftp_client.login(user=ftp_data["username"], passwd=ftp_data["pwd"])
-    if secure:
-        ftp_client.prot_p()
-    return ftp_client
-
-
-def safe_cast(val, to_type, default=None):
-    """
-    Safe cast value.
-    """
-    try:
-        return to_type(val)
-    except ValueError:
-        return default
-
-
-def set_redis_rdo_files(redis_client, dataset_id: str, table_id: str):
-    """
-    Register downloaded files to Redis
-
-    Args:
-        redis_client (_type_): _description_
-        dataset_id (str): dataset_id on BigQuery
-        table_id (str): table_id on BigQuery
-
-    Returns:
-        bool: if the key was properly set
-    """
-    try:
-        content = redis_client.get(f"{dataset_id}.{table_id}")["files"]
-    except TypeError as e:
-        log(f"Caught error {e}. Will set unexisting key")
-        # set key to empty dict for filling later
-        redis_client.set(f"{dataset_id}.{table_id}", {"files": []})
-        content = redis_client.get(f"{dataset_id}.{table_id}")
-    # update content
-    st_client = bd.Storage(dataset_id=dataset_id, table_id=table_id)
-    blob_names = [
-        blob.name
-        for blob in st_client.client["storage_staging"].list_blobs(
-            st_client.bucket, prefix=f"staging/{dataset_id}/{table_id}"
-        )
-    ]
-    files = [blob_name.split("/")[-1].replace(".csv", "") for blob_name in blob_names]
-    log(f"When setting key, found {len(files)} files. Will register on redis...")
-    content["files"] = files
-    # set key
-    return redis_client.set(f"{dataset_id}.{table_id}", content)
-
-
-# PRE TREAT #
-
-
-def check_not_null(data: pd.DataFrame, columns: list, subset_query: str = None):
-    """
-    Check if there are null values in columns.
-
-    Args:
-        columns (list): list of columns to check
-        subset_query (str): query to check if there are important data
-        being removed
-
-    Returns:
-        None
-    """
-
-    for col in columns:
-        remove = data.query(f"{col} != {col}")  # null values
-        log(
-            f"[data-check] There are {len(remove)} rows with null values in '{col}'",
-            level="info",
-        )
-
-        if subset_query is not None:
-            # Check if there are important data being removed
-            remove = remove.query(subset_query)
-            if len(remove) > 0:
-                log(
-                    f"""[data-check] There are {len(remove)} critical rows with
-                    null values in '{col}' (query: {subset_query})""",
-                    level="warning",
-                )
-
-
-def filter_null(data: pd.DataFrame, columns: list, subset_query: str = None):
-    """
-    Filter null values in columns.
-
-    Args:
-        columns (list): list of columns to check
-        subset_query (str): query to check if there are important data
-        being removed
-
-    Returns:
-        pandas.DataFrame: data without null values
-    """
-
-    for col in columns:
-        remove = data.query(f"{col} != {col}")  # null values
-        data = data.drop(remove.index)
-        log(
-            f"[data-filter] Removed {len(remove)} rows with null '{col}'",
-            level="info",
-        )
-
-        if subset_query is not None:
-            # Check if there are important data being removed
-            remove = remove.query(subset_query)
-            if len(remove) > 0:
-                log(
-                    f"[data-filter] Removed {len(remove)} critical rows with null '{col}'",
-                    level="warning",
-                )
-
-    return data
-
-
-def filter_data(data: pd.DataFrame, filters: list, subset_query: str = None):
-    """
-    Filter data from a dataframe
-
-    Args:
-        data (pd.DataFrame): data DataFrame
-        filters (list): list of queries to filter data
-
-    Returns:
-        pandas.DataFrame: data without filter data
-    """
-    for item in filters:
-        remove = data.query(item)
-        data = data.drop(remove.index)
-        log(
-            f"[data-filter] Removed {len(remove)} rows from filter: {item}",
-            level="info",
-        )
-
-        if subset_query is not None:
-            # Check if there are important data being removed
-            remove = remove.query(subset_query)
-            if len(remove) > 0:
-                log(
-                    f"""[data-filter] Removed {len(remove)} critical rows
-                    from filter: {item} (subquery: {subset_query})""",
-                    level="warning",
-                )
-
-    return data
-
-
-def check_relation(data: pd.DataFrame, columns: list):
-    """
-    Check relation between collumns.
-
-    Args:
-        data (pd.DataFrame): dataframe to be modified
-        columns (list): list of lists of columns to be checked
-
-    Returns:
-        None
-    """
-
-    for cols in columns:
-        df_dup = (
-            data[~data.duplicated(subset=cols)]
-            .groupby(cols)
-            .count()
-            .reset_index()
-            .iloc[:, :1]
-        )
-
-        for col in cols:
-            df_dup_col = (
-                data[~data.duplicated(subset=col)]
-                .groupby(col)
-                .count()
-                .reset_index()
-                .iloc[:, :1]
-            )
-
-            if len(df_dup_col[~df_dup_col[col].duplicated()]) == len(df_dup):
-                log(
-                    f"[data-check] Comparing '{col}' in '{cols}', there are no duplicated values",
-                    level="info",
-                )
-            else:
-                log(
-                    f"[data-check] Comparing '{col}' in '{cols}', there are duplicated values",
-                    level="warning",
-                )
-
-
-def data_info_str(data: pd.DataFrame):
-    """
-    Return dataframe info as a str to log
-
-    Args:
-        data (pd.DataFrame): dataframe
-
-    Returns:
-        data.info() as a string
-    """
-    buffer = io.StringIO()
-    data.info(buf=buffer)
-    return buffer.getvalue()
-
-
-def generate_execute_schedules(  # pylint: disable=too-many-arguments,too-many-locals
-    clock_interval: timedelta,
-    labels: List[str],
-    table_parameters: Union[list[dict], dict],
-    runs_interval_minutes: int = 15,
-    start_date: datetime = datetime(
-        2020, 1, 1, tzinfo=pytz.timezone(constants.TIMEZONE.value)
-    ),
-    **general_flow_params,
-) -> List[IntervalClock]:
-    """
-    Generates multiple schedules
-
-    Args:
-        clock_interval (timedelta): The interval to run the schedule
-        labels (List[str]): The labels to be added to the schedule
-        table_parameters (list): The table parameters to iterate over
-        runs_interval_minutes (int, optional): The interval between each schedule. Defaults to 15.
-        start_date (datetime, optional): The start date of the schedule.
-            Defaults to datetime(2020, 1, 1, tzinfo=pytz.timezone(constants.TIMEZONE.value)).
-        general_flow_params: Any param that you want to pass to the flow
-    Returns:
-        List[IntervalClock]: The list of schedules
-
-    """
-    if isinstance(table_parameters, dict):
-        table_parameters = [table_parameters]
-
-    clocks = []
-    for count, parameters in enumerate(table_parameters):
-        parameter_defaults = parameters | general_flow_params
-        clocks.append(
-            IntervalClock(
-                interval=clock_interval,
-                start_date=start_date
-                + timedelta(minutes=runs_interval_minutes * count),
-                labels=labels,
-                parameter_defaults=parameter_defaults,
-            )
-        )
-    return clocks
-
-
-def dict_contains_keys(input_dict: dict, keys: list[str]) -> bool:
-    """
-    Test if the input dict has all keys present in the list
-
-    Args:
-        input_dict (dict): the dict to test if has the keys
-        keys (list[str]): the list containing the keys to check
-    Returns:
-        bool: True if the input_dict has all the keys otherwise False
-    """
-    return all(x in input_dict.keys() for x in keys)
-
-
-def custom_serialization(obj: Any) -> Any:
-    """
-    Function to serialize not JSON serializable objects
-
-    Args:
-        obj (Any): Object to serialize
-
-    Returns:
-        Any: Serialized object
-    """
-    if isinstance(obj, (pd.Timestamp, date)):
-        if isinstance(obj, pd.Timestamp):
-            if obj.tzinfo is None:
-                obj = obj.tz_localize("UTC").tz_convert(
-                    constants.TIMEZONE.value
-                )
-        return obj.isoformat()
-
-    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
-
-
-def save_raw_local_func(
-    data: Union[dict, str],
-    filepath: str,
-    mode: str = "raw",
-    filetype: str = "json",
-) -> str:
-    """
-    Saves json response from API to .json file.
-    Args:
-        data (Union[dict, str]): Raw data to save
-        filepath (str): Path which to save raw file
-        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
-        filetype (str, optional): The file format
-    Returns:
-        str: Path to the saved file
-    """
-
-    # diferentes tipos de arquivos para salvar
-    _filepath = filepath.format(mode=mode, filetype=filetype)
-    Path(_filepath).parent.mkdir(parents=True, exist_ok=True)
-
-    if filetype == "json":
-        if isinstance(data, str):
-            data = json.loads(data)
-        with Path(_filepath).open("w", encoding="utf-8") as fi:
-            json.dump(data, fi, default=custom_serialization)
-
-    if filetype in ("txt", "csv"):
-        with open(_filepath, "w", encoding="utf-8") as file:
-            file.write(data)
-
-    log(f"Raw data saved to: {_filepath}")
-    return _filepath
-
-
-def get_raw_data_api(  # pylint: disable=R0912
-    url: str,
-    secret_path: str = None,
-    api_params: dict = None,
-    filetype: str = None,
-) -> tuple[str, str, str]:
-    """
-    Request data from URL API
-
-    Args:
-        url (str): URL to request data
-        secret_path (str, optional): Secret path to get headers. Defaults to None.
-        api_params (dict, optional): Parameters to pass to API. Defaults to None.
-        filetype (str, optional): Filetype to save raw file. Defaults to None.
-
-    Returns:
-        tuple[str, str, str]: Error, data and filetype
-    """
-    error = None
-    data = None
-    try:
-        if secret_path is None:
-            headers = secret_path
-        else:
-            headers = get_secret(secret_path)["data"]
-
-        response = requests.get(
-            url,
-            headers=headers,
-            timeout=constants.MAX_TIMEOUT_SECONDS.value,
-            params=api_params,
-        )
-
-        response.raise_for_status()
-
-        if filetype == "json":
-            data = response.json()
-        else:
-            data = response.text
-
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error, data, filetype
-
-
-def get_upload_storage_blob(
-    dataset_id: str,
-    filename: str,
-) -> Blob:
-    """
-    Get a blob from upload zone in storage
-
-    Args:
-        dataset_id (str): The dataset id on BigQuery.
-        filename (str): The filename in GCS.
-
-    Returns:
-        Blob: blob object
-    """
-    bucket = bd.Storage(dataset_id="", table_id="")
-    log(f"Filename: {filename}, dataset_id: {dataset_id}")
-    blob_list = list(
-        bucket.client["storage_staging"]
-        .bucket(bucket.bucket_name)
-        .list_blobs(prefix=f"upload/{dataset_id}/{filename}.")
-    )
-
-    return blob_list[0]
-
-
-def get_raw_data_gcs(
-    dataset_id: str,
-    table_id: str,
-    zip_filename: str = None,
-) -> tuple[str, str, str]:
-    """
-    Get raw data from GCS
-
-    Args:
-        dataset_id (str): The dataset id on BigQuery.
-        table_id (str): The table id on BigQuery.
-        zip_filename (str, optional): The zip file name. Defaults to None.
-
-    Returns:
-        tuple[str, str, str]: Error, data and filetype
-    """
-    error = None
-    data = None
-    filetype = None
-
-    try:
-        blob_search_name = zip_filename or table_id
-        blob = get_upload_storage_blob(dataset_id=dataset_id, filename=blob_search_name)
-
-        filename = blob.name
-        filetype = filename.split(".")[-1]
-
-        data = blob.download_as_bytes()
-
-        if filetype == "zip":
-            with zipfile.ZipFile(io.BytesIO(data), "r") as zipped_file:
-                filenames = zipped_file.namelist()
-                filename = list(
-                    filter(lambda x: x.split(".")[0] == table_id, filenames)
-                )[0]
-                filetype = filename.split(".")[-1]
-                data = zipped_file.read(filename)
-
-        data = data.decode(encoding="utf-8")
-
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error, data, filetype
-
-
-def get_raw_data_db(
-    query: str, engine: str, host: str, secret_path: str, database: str
-) -> tuple[str, str, str]:
-    """
-    Get data from Databases
-
-    Args:
-        query (str): the SQL Query to execute
-        engine (str): The datase management system
-        host (str): The database host
-        secret_path (str): Secret path to get credentials
-        database (str): The database to connect
-
-    Returns:
-        tuple[str, str, str]: Error, data and filetype
-    """
-    connector_mapping = {
-        "postgresql": psycopg2.connect,
-        "mysql": pymysql.connect,
-    }
-
-    data = None
-    error = None
-    filetype = "json"
-
-    try:
-        credentials = get_secret(secret_path)["data"]
-
-        with connector_mapping[engine](
-            host=host,
-            user=credentials["user"],
-            password=credentials["password"],
-            database=database,
-        ) as connection:
-            data = pd.read_sql(sql=query, con=connection).to_dict(orient="records")
-
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error, data, filetype
-
-
-def save_treated_local_func(
-    filepath: str, data: pd.DataFrame, error: str, mode: str = "staging"
-) -> str:
-    """
-    Save treated file to CSV.
-
-    Args:
-        filepath (str): Path to save file
-        data (pd.DataFrame): Dataframe to save
-        error (str): Error catched during execution
-        mode (str, optional): Folder to save locally, later folder which to upload to GCS.
-
-    Returns:
-        str: Path to the saved file
-    """
-    _filepath = filepath.format(mode=mode, filetype="csv")
-    Path(_filepath).parent.mkdir(parents=True, exist_ok=True)
-    if error is None:
-        data.to_csv(_filepath, index=False)
-        log(f"Treated data saved to: {_filepath}")
-    return _filepath
-
-
-def upload_run_logs_to_bq(  # pylint: disable=R0913
-    dataset_id: str,
-    parent_table_id: str,
-    timestamp: str,
-    error: str = None,
-    previous_error: str = None,
-    recapture: bool = False,
-    mode: str = "raw",
-):
-    """
-    Upload execution status table to BigQuery.
-    Table is uploaded to the same dataset, named {parent_table_id}_logs.
-    If passing status_dict, should not pass timestamp and error.
-
-    Args:
-        dataset_id (str): dataset_id on BigQuery
-        parent_table_id (str): table_id on BigQuery
-        timestamp (str): timestamp to get datetime range
-        error (str): error catched during execution
-        previous_error (str): previous error catched during execution
-        recapture (bool): if the execution was a recapture
-        mode (str): folder to save locally, later folder which to upload to GCS
-
-    Returns:
-        None
-    """
-    table_id = parent_table_id + "_logs"
-    # Create partition directory
-    filename = f"{table_id}_{timestamp.isoformat()}"
-    partition = f"data={timestamp.date()}"
-    filepath = Path(
-        f"""data/{mode}/{dataset_id}/{table_id}/{partition}/{filename}.csv"""
-    )
-    filepath.parent.mkdir(exist_ok=True, parents=True)
-    # Create dataframe to be uploaded
-    if not error and recapture is True:
-        # if the recapture is succeeded, update the column erro
-        dataframe = pd.DataFrame(
-            {
-                "timestamp_captura": [timestamp],
-                "sucesso": [True],
-                "erro": [f"[recapturado]{previous_error}"],
-            }
-        )
-        log(f"Recapturing {timestamp} with previous error:\n{previous_error}")
-    else:
-        # not recapturing or error during flow execution
-        dataframe = pd.DataFrame(
-            {
-                "timestamp_captura": [timestamp],
-                "sucesso": [error is None],
-                "erro": [error],
-            }
-        )
-    # Save data local
-    dataframe.to_csv(filepath, index=False)
-    # Upload to Storage
-    create_or_append_table(
-        dataset_id=dataset_id,
-        table_id=table_id,
-        path=filepath.as_posix(),
-        partitions=partition,
-    )
-    if error is not None:
-        raise Exception(f"Pipeline failed with error: {error}")
-
-
-def get_datetime_range(
-    timestamp: datetime,
-    interval: timedelta,
-) -> dict:
-    """
-    Task to get datetime range in UTC
-
-    Args:
-        timestamp (datetime): timestamp to get datetime range
-        interval (timedelta): interval to get datetime range
-
-    Returns:
-        dict: datetime range
-    """
-
-    start = (
-        (timestamp - interval)
-        .astimezone(tz=pytz.timezone("UTC"))
-        .strftime("%Y-%m-%d %H:%M:%S")
-    )
-
-    end = timestamp.astimezone(tz=pytz.timezone("UTC")).strftime("%Y-%m-%d %H:%M:%S")
-
-    return {"start": start, "end": end}
-
-
-def read_raw_data(filepath: str, csv_args: dict = None) -> tuple[str, pd.DataFrame]:
-    """
-    Read raw data from file
-
-    Args:
-        filepath (str): filepath to read
-        csv_args (dict): arguments to pass to pandas.read_csv
-
-    Returns:
-        tuple[str, pd.DataFrame]: error and data
-    """
-    error = None
-    data = None
-    try:
-        file_type = filepath.split(".")[-1]
-
-        if file_type == "json":
-            data = pd.read_json(filepath)
-
-            # data = json.loads(data)
-        elif file_type in ("txt", "csv"):
-            if csv_args is None:
-                csv_args = {}
-            data = pd.read_csv(filepath, **csv_args)
-        else:
-            error = "Unsupported raw file extension. Supported only: json, csv and txt"
-
-    except Exception:
-        error = traceback.format_exc()
-        log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-
-    return error, data
-
-
-def get_raw_recursos(request_url: str, request_params: dict) -> tuple[str, str, str]:
-    """
-    Returns a dataframe with recursos data from movidesk api.
-    """
-    all_records = False
-    top = 1000
-    skip = 0
-    error = None
-    filetype = "json"
-    data = []
-
-    while not all_records:
-        try:
-            request_params["$top"] = top
-            request_params["$skip"] = skip
-
-            log(f"Request url {request_url}")
-
-            response = requests.get(
-                request_url,
-                params=request_params,
-                timeout=constants.MAX_TIMEOUT_SECONDS.value,
-            )
-            response.raise_for_status()
-
-            paginated_data = response.json()
-
-            if isinstance(paginated_data, dict):
-                paginated_data = [paginated_data]
-
-            if len(paginated_data) == top:
-                skip += top
-                time.sleep(36)
-            else:
-                if len(paginated_data) == 0:
-                    log("Nenhum dado para tratar.")
-                    break
-                all_records = True
-            data += paginated_data
-
-            log(f"Dados (paginados): {len(data)}")
-
-        except Exception as error:
-            error = traceback.format_exc()
-            log(f"[CATCHED] Task failed with error: \n{error}", level="error")
-            data = []
-            break
-
-    log(f"Request concluído, tamanho dos dados: {len(data)}.")
-
-    return error, data, filetype
-
-def build_table_id(mode: str, report_type: str):
-    """Build table_id based on which table is the target
-    of current flow run
-
-    Args:
-        mode (str): SPPO or STPL
-        report_type (str): RHO or RDO
-
-    Returns:
-        str: table_id
-    """
-    if mode == "SPPO":
-        if report_type == "RDO":
-            table_id = constants.SPPO_RDO_TABLE_ID.value
-        else:
-            table_id = constants.SPPO_RHO_TABLE_ID.value
-    if mode == "STPL":
-        # slice the string to get rid of V at end of
-        # STPL reports filenames
-        if report_type[:3] == "RDO":
-            table_id = constants.STPL_RDO_TABLE_ID.value
-        else:
-            table_id = constants.STPL_RHO_TABLE_ID.value
-    return table_id
-
-def generate_ftp_schedules(
-    interval_minutes: int, label: str = constants.RJ_SMTR_AGENT_LABEL.value
-):
-    """Generates IntervalClocks with the parameters needed to capture
-    each report.
-
-    Args:
-        interval_minutes (int): interval which this flow will be run.
-        label (str, optional): Prefect label, defines which agent to use when launching flow run.
-        Defaults to constants.RJ_SMTR_AGENT_LABEL.value.
-
-    Returns:
-        List(IntervalClock): containing the clocks for scheduling runs
-    """
-    modes = ["SPPO", "STPL"]
-    reports = ["RDO", "RHO"]
-    clocks = []
-    for mode in modes:
-        for report in reports:
-            clocks.append(
-                IntervalClock(
-                    interval=timedelta(minutes=interval_minutes),
-                    start_date=datetime(
-                        2022, 12, 16, 5, 0, tzinfo=timezone(constants.TIMEZONE.value)
-                    ),
-                    parameter_defaults={
-                        "transport_mode": mode,
-                        "report_type": report,
-                        "table_id": build_table_id(mode=mode, report_type=report),
-                    },
-                    labels=[label],
-                )
-            )
-    return clocks
\ No newline at end of file