From 97d3344ceed0015b65a767c3035c2174e90c3fc3 Mon Sep 17 00:00:00 2001 From: Bruno Almeida Date: Mon, 7 Aug 2023 15:47:43 -0300 Subject: [PATCH 01/12] add: schedules to get bimonthly tests data from csv to datalake --- .../rj_sme/dump_url_educacao_basica/flows.py | 12 +-- .../dump_url_educacao_basica/schedules.py | 79 ++++++++++++++++++- 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/pipelines/rj_sme/dump_url_educacao_basica/flows.py b/pipelines/rj_sme/dump_url_educacao_basica/flows.py index 3e06f1039..a8dc7b8fe 100644 --- a/pipelines/rj_sme/dump_url_educacao_basica/flows.py +++ b/pipelines/rj_sme/dump_url_educacao_basica/flows.py @@ -26,11 +26,11 @@ ], ) -sme_gsheets_default_parameters = { - "dataset_id": "educacao_basica_alocacao", -} -sme_gsheets_flow = set_default_parameters( - sme_gsheets_flow, default_parameters=sme_gsheets_default_parameters -) +# sme_gsheets_default_parameters = { +# "dataset_id": "educacao_basica_alocacao", +# } +# sme_gsheets_flow = set_default_parameters( +# sme_gsheets_flow, default_parameters=sme_gsheets_default_parameters +# ) sme_gsheets_flow.schedule = gsheets_year_update_schedule diff --git a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py index ab508bbb8..a9ada44c5 100644 --- a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py +++ b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py @@ -25,7 +25,84 @@ "url_type": "google_drive", "materialize_after_dump": True, "dataset_id": "educacao_basica_alocacao", - } + }, + "bimestral_2023": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/1bC-I6mT9SdRVDDL583WpeK8WOJMuIhfz/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2022": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/19PFXJKvaOrbexnt_jA4otE-LnMfHUH0H/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2021": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/1k-taU8bMEYJ2U5EHvrNWQZnzN2Ht3uso/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2019": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/1Q_drlgajGOpSsNlqw1cV2pRJ30Oh47MJ/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2018": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/1b7wyFsX6T4W6U_VWIjPmJZ4HI9btaLah/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2017": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/1kclQeNuzDCy0Npny1ZZLPjqiPMScw_1P/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2016": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/1QH9VsphqPvFwUfE7FgQYI6YJ4TJFTptv/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2015": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/1VKDnvgOzrEdT5LkNYBDE_ayVvKsj5jR0/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2014": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/18pJonyKwV210dpXr_B2M0p708jYYGwKz/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2013": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/1rSi-UgB3qZDLh8U3geKRkMgSdmxddO5v/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, + "bimestral_2012": { + "dump_mode": "overwrite", + "url": "https://drive.google.com/file/d/1scfnos9iER86QVMx7Y_qPM1SKVv0MUED/view?usp=drive_link", + "url_type": "google_drive", + "materialize_after_dump": True, + "dataset_id": "educacao_basica_avaliacao", + }, } gsheets_clocks = generate_dump_url_schedules( From be2df6fb85933aa7a65e8033b9681e90282a764e Mon Sep 17 00:00:00 2001 From: Bruno Almeida Date: Wed, 9 Aug 2023 10:51:42 -0300 Subject: [PATCH 02/12] add: encoding parameter for csv generated by dump_url pipeline --- pipelines/rj_sme/dump_url_educacao_basica/schedules.py | 2 ++ pipelines/utils/dump_url/flows.py | 2 ++ pipelines/utils/dump_url/tasks.py | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py index a9ada44c5..3b0cc069c 100644 --- a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py +++ b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py @@ -39,6 +39,7 @@ "url_type": "google_drive", "materialize_after_dump": True, "dataset_id": "educacao_basica_avaliacao", + "encoding": "latin-1", }, "bimestral_2021": { "dump_mode": "overwrite", @@ -46,6 +47,7 @@ "url_type": "google_drive", "materialize_after_dump": True, "dataset_id": "educacao_basica_avaliacao", + "encoding": "latin-1", }, "bimestral_2019": { "dump_mode": "overwrite", diff --git a/pipelines/utils/dump_url/flows.py b/pipelines/utils/dump_url/flows.py index 12d1f5205..cf2507c2e 100644 --- a/pipelines/utils/dump_url/flows.py +++ b/pipelines/utils/dump_url/flows.py @@ -48,6 +48,7 @@ # Table parameters partition_columns = Parameter("partition_columns", required=False, default="") + encoding = Parameter("encoding", required=False, default="utf-8") # Materialization parameters materialize_after_dump = Parameter( @@ -121,6 +122,7 @@ save_path=DUMP_DATA_PATH, build_json_dataframe=build_json_dataframe, dataframe_key_column=dataframe_key_column, + encoding=encoding, ) DUMP_CHUNKS_TASK.set_upstream(DOWNLOAD_URL_TASK) diff --git a/pipelines/utils/dump_url/tasks.py b/pipelines/utils/dump_url/tasks.py index 91f9f75ac..26a1fff1e 100644 --- a/pipelines/utils/dump_url/tasks.py +++ b/pipelines/utils/dump_url/tasks.py @@ -151,12 +151,13 @@ def dump_files( chunksize: int = 10**6, build_json_dataframe: bool = False, dataframe_key_column: str = None, + encoding: str = 'utf-8', ) -> None: """ Dump files according to chunk size """ event_id = datetime.now().strftime("%Y%m%d-%H%M%S") - for idx, chunk in enumerate(pd.read_csv(Path(file_path), chunksize=chunksize)): + for idx, chunk in enumerate(pd.read_csv(Path(file_path), chunksize=chunksize, encoding=encoding)): log(f"Dumping batch {idx} with size {chunksize}") handle_dataframe_chunk( dataframe=chunk, From e87170209036aefa952d1ff586ee3b6fed8ba414 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Aug 2023 13:52:03 +0000 Subject: [PATCH 03/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/utils/dump_url/tasks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pipelines/utils/dump_url/tasks.py b/pipelines/utils/dump_url/tasks.py index 26a1fff1e..6d94ba612 100644 --- a/pipelines/utils/dump_url/tasks.py +++ b/pipelines/utils/dump_url/tasks.py @@ -151,13 +151,15 @@ def dump_files( chunksize: int = 10**6, build_json_dataframe: bool = False, dataframe_key_column: str = None, - encoding: str = 'utf-8', + encoding: str = "utf-8", ) -> None: """ Dump files according to chunk size """ event_id = datetime.now().strftime("%Y%m%d-%H%M%S") - for idx, chunk in enumerate(pd.read_csv(Path(file_path), chunksize=chunksize, encoding=encoding)): + for idx, chunk in enumerate( + pd.read_csv(Path(file_path), chunksize=chunksize, encoding=encoding) + ): log(f"Dumping batch {idx} with size {chunksize}") handle_dataframe_chunk( dataframe=chunk, From c74c070a3118c05886fbbaf419aa4f10fb29ed1d Mon Sep 17 00:00:00 2001 From: Bruno Almeida Date: Wed, 9 Aug 2023 11:23:09 -0300 Subject: [PATCH 04/12] fix: encoding stop getting default value always --- pipelines/utils/dump_url/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pipelines/utils/dump_url/utils.py b/pipelines/utils/dump_url/utils.py index 4bfe0fcc8..ac26438a0 100644 --- a/pipelines/utils/dump_url/utils.py +++ b/pipelines/utils/dump_url/utils.py @@ -135,6 +135,10 @@ def generate_dump_url_schedules( # pylint: disable=too-many-arguments,too-many- parameter_defaults["materialize_to_datario"] = parameters[ "materialize_to_datario" ] + if "encoding" in parameters: + parameter_defaults["encoding"] = parameters[ + "encoding" + ] # if "dbt_model_secret_parameters" in parameters: # parameter_defaults["dbt_model_secret_parameters"] = parameters[ # "dbt_model_secret_parameters" From b45fcc0a9829560b0e607ceb1fb3ed3032c18c25 Mon Sep 17 00:00:00 2001 From: Bruno Almeida Date: Wed, 9 Aug 2023 12:39:39 -0300 Subject: [PATCH 05/12] add: on_bad_lines parameter to skip lines with errors --- pipelines/rj_sme/dump_url_educacao_basica/schedules.py | 1 + pipelines/utils/dump_url/tasks.py | 3 ++- pipelines/utils/dump_url/utils.py | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py index 3b0cc069c..850948cc2 100644 --- a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py +++ b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py @@ -40,6 +40,7 @@ "materialize_after_dump": True, "dataset_id": "educacao_basica_avaliacao", "encoding": "latin-1", + "on_bad_lines": "skip", }, "bimestral_2021": { "dump_mode": "overwrite", diff --git a/pipelines/utils/dump_url/tasks.py b/pipelines/utils/dump_url/tasks.py index 26a1fff1e..72a4f47a9 100644 --- a/pipelines/utils/dump_url/tasks.py +++ b/pipelines/utils/dump_url/tasks.py @@ -152,12 +152,13 @@ def dump_files( build_json_dataframe: bool = False, dataframe_key_column: str = None, encoding: str = 'utf-8', + on_bad_lines: str = 'error', ) -> None: """ Dump files according to chunk size """ event_id = datetime.now().strftime("%Y%m%d-%H%M%S") - for idx, chunk in enumerate(pd.read_csv(Path(file_path), chunksize=chunksize, encoding=encoding)): + for idx, chunk in enumerate(pd.read_csv(Path(file_path), chunksize=chunksize, encoding=encoding, on_bad_lines=on_bad_lines)): log(f"Dumping batch {idx} with size {chunksize}") handle_dataframe_chunk( dataframe=chunk, diff --git a/pipelines/utils/dump_url/utils.py b/pipelines/utils/dump_url/utils.py index ac26438a0..a35a97907 100644 --- a/pipelines/utils/dump_url/utils.py +++ b/pipelines/utils/dump_url/utils.py @@ -139,6 +139,10 @@ def generate_dump_url_schedules( # pylint: disable=too-many-arguments,too-many- parameter_defaults["encoding"] = parameters[ "encoding" ] + if "on_bad_lines" in parameters: + parameter_defaults["on_bad_lines"] = parameters[ + "on_bad_lines" + ] # if "dbt_model_secret_parameters" in parameters: # parameter_defaults["dbt_model_secret_parameters"] = parameters[ # "dbt_model_secret_parameters" From 7f3007f7d1404284024f99eb97febbf6289bdcc0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Aug 2023 15:49:08 +0000 Subject: [PATCH 06/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/utils/dump_url/tasks.py | 13 ++++++++++--- pipelines/utils/dump_url/utils.py | 8 ++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pipelines/utils/dump_url/tasks.py b/pipelines/utils/dump_url/tasks.py index 72a4f47a9..a1e038098 100644 --- a/pipelines/utils/dump_url/tasks.py +++ b/pipelines/utils/dump_url/tasks.py @@ -151,14 +151,21 @@ def dump_files( chunksize: int = 10**6, build_json_dataframe: bool = False, dataframe_key_column: str = None, - encoding: str = 'utf-8', - on_bad_lines: str = 'error', + encoding: str = "utf-8", + on_bad_lines: str = "error", ) -> None: """ Dump files according to chunk size """ event_id = datetime.now().strftime("%Y%m%d-%H%M%S") - for idx, chunk in enumerate(pd.read_csv(Path(file_path), chunksize=chunksize, encoding=encoding, on_bad_lines=on_bad_lines)): + for idx, chunk in enumerate( + pd.read_csv( + Path(file_path), + chunksize=chunksize, + encoding=encoding, + on_bad_lines=on_bad_lines, + ) + ): log(f"Dumping batch {idx} with size {chunksize}") handle_dataframe_chunk( dataframe=chunk, diff --git a/pipelines/utils/dump_url/utils.py b/pipelines/utils/dump_url/utils.py index a35a97907..04c26e000 100644 --- a/pipelines/utils/dump_url/utils.py +++ b/pipelines/utils/dump_url/utils.py @@ -136,13 +136,9 @@ def generate_dump_url_schedules( # pylint: disable=too-many-arguments,too-many- "materialize_to_datario" ] if "encoding" in parameters: - parameter_defaults["encoding"] = parameters[ - "encoding" - ] + parameter_defaults["encoding"] = parameters["encoding"] if "on_bad_lines" in parameters: - parameter_defaults["on_bad_lines"] = parameters[ - "on_bad_lines" - ] + parameter_defaults["on_bad_lines"] = parameters["on_bad_lines"] # if "dbt_model_secret_parameters" in parameters: # parameter_defaults["dbt_model_secret_parameters"] = parameters[ # "dbt_model_secret_parameters" From c6ab82864f35b9ba13128ac4c42f9c6f56eb52b1 Mon Sep 17 00:00:00 2001 From: Bruno Almeida Date: Wed, 9 Aug 2023 12:50:12 -0300 Subject: [PATCH 07/12] add: on_bad_lines parameter to flow config --- pipelines/utils/dump_url/flows.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipelines/utils/dump_url/flows.py b/pipelines/utils/dump_url/flows.py index cf2507c2e..adf0a0451 100644 --- a/pipelines/utils/dump_url/flows.py +++ b/pipelines/utils/dump_url/flows.py @@ -49,6 +49,7 @@ # Table parameters partition_columns = Parameter("partition_columns", required=False, default="") encoding = Parameter("encoding", required=False, default="utf-8") + on_bad_lines = Parameter("on_bad_lines", required=False, default="error") # Materialization parameters materialize_after_dump = Parameter( @@ -123,6 +124,7 @@ build_json_dataframe=build_json_dataframe, dataframe_key_column=dataframe_key_column, encoding=encoding, + on_bad_lines=on_bad_lines, ) DUMP_CHUNKS_TASK.set_upstream(DOWNLOAD_URL_TASK) From 051711e0e4b36ea999c6fc6ec7e3ada10c8ff779 Mon Sep 17 00:00:00 2001 From: Bruno Almeida Date: Wed, 9 Aug 2023 13:42:45 -0300 Subject: [PATCH 08/12] just to trigger actions --- pipelines/utils/dump_url/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/utils/dump_url/tasks.py b/pipelines/utils/dump_url/tasks.py index a1e038098..f97eb99ba 100644 --- a/pipelines/utils/dump_url/tasks.py +++ b/pipelines/utils/dump_url/tasks.py @@ -155,7 +155,7 @@ def dump_files( on_bad_lines: str = "error", ) -> None: """ - Dump files according to chunk size + Dump files according to chunk size and read mode """ event_id = datetime.now().strftime("%Y%m%d-%H%M%S") for idx, chunk in enumerate( From b8b5b737cf8be69883d5b46f794978a47096eee4 Mon Sep 17 00:00:00 2001 From: Bruno Almeida Date: Wed, 9 Aug 2023 15:03:23 -0300 Subject: [PATCH 09/12] add: sep parameter to use in pandas read_csv --- pipelines/rj_sme/dump_url_educacao_basica/schedules.py | 1 + pipelines/utils/dump_url/flows.py | 2 ++ pipelines/utils/dump_url/tasks.py | 2 ++ pipelines/utils/dump_url/utils.py | 2 ++ 4 files changed, 7 insertions(+) diff --git a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py index 850948cc2..8eca06250 100644 --- a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py +++ b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py @@ -41,6 +41,7 @@ "dataset_id": "educacao_basica_avaliacao", "encoding": "latin-1", "on_bad_lines": "skip", + "separator": ";", }, "bimestral_2021": { "dump_mode": "overwrite", diff --git a/pipelines/utils/dump_url/flows.py b/pipelines/utils/dump_url/flows.py index adf0a0451..615abdf17 100644 --- a/pipelines/utils/dump_url/flows.py +++ b/pipelines/utils/dump_url/flows.py @@ -50,6 +50,7 @@ partition_columns = Parameter("partition_columns", required=False, default="") encoding = Parameter("encoding", required=False, default="utf-8") on_bad_lines = Parameter("on_bad_lines", required=False, default="error") + separator = Parameter("separator", required=False, default=",") # Materialization parameters materialize_after_dump = Parameter( @@ -125,6 +126,7 @@ dataframe_key_column=dataframe_key_column, encoding=encoding, on_bad_lines=on_bad_lines, + separator=separator, ) DUMP_CHUNKS_TASK.set_upstream(DOWNLOAD_URL_TASK) diff --git a/pipelines/utils/dump_url/tasks.py b/pipelines/utils/dump_url/tasks.py index f97eb99ba..f6094d5b6 100644 --- a/pipelines/utils/dump_url/tasks.py +++ b/pipelines/utils/dump_url/tasks.py @@ -153,6 +153,7 @@ def dump_files( dataframe_key_column: str = None, encoding: str = "utf-8", on_bad_lines: str = "error", + separator: str = ',', ) -> None: """ Dump files according to chunk size and read mode @@ -164,6 +165,7 @@ def dump_files( chunksize=chunksize, encoding=encoding, on_bad_lines=on_bad_lines, + sep=separator, ) ): log(f"Dumping batch {idx} with size {chunksize}") diff --git a/pipelines/utils/dump_url/utils.py b/pipelines/utils/dump_url/utils.py index 04c26e000..6397c65ce 100644 --- a/pipelines/utils/dump_url/utils.py +++ b/pipelines/utils/dump_url/utils.py @@ -139,6 +139,8 @@ def generate_dump_url_schedules( # pylint: disable=too-many-arguments,too-many- parameter_defaults["encoding"] = parameters["encoding"] if "on_bad_lines" in parameters: parameter_defaults["on_bad_lines"] = parameters["on_bad_lines"] + if "separator" in parameters: + parameter_defaults["separator"] = parameters["separator"] # if "dbt_model_secret_parameters" in parameters: # parameter_defaults["dbt_model_secret_parameters"] = parameters[ # "dbt_model_secret_parameters" From 05aa1a4f40dab6f16657dfe982d519aef66e1315 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Aug 2023 18:03:44 +0000 Subject: [PATCH 10/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/utils/dump_url/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/utils/dump_url/tasks.py b/pipelines/utils/dump_url/tasks.py index f6094d5b6..d5b6d44e1 100644 --- a/pipelines/utils/dump_url/tasks.py +++ b/pipelines/utils/dump_url/tasks.py @@ -153,7 +153,7 @@ def dump_files( dataframe_key_column: str = None, encoding: str = "utf-8", on_bad_lines: str = "error", - separator: str = ',', + separator: str = ",", ) -> None: """ Dump files according to chunk size and read mode From 388623813c8c839006d5a70a76f239b64afc7338 Mon Sep 17 00:00:00 2001 From: Bruno Almeida Date: Fri, 25 Aug 2023 15:28:47 -0300 Subject: [PATCH 11/12] change schedule --- pipelines/rj_sme/dump_url_educacao_basica/schedules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py index 8eca06250..e4c326ee9 100644 --- a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py +++ b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py @@ -111,7 +111,7 @@ gsheets_clocks = generate_dump_url_schedules( interval=timedelta(days=365), - start_date=datetime(2022, 11, 4, 20, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 8, 26, 0, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SME_AGENT_LABEL.value, ], From 2871828789b2a49b7f7c728d6727dd876d336139 Mon Sep 17 00:00:00 2001 From: Bruno Almeida Date: Fri, 22 Mar 2024 10:23:34 -0300 Subject: [PATCH 12/12] fix: schedule to trigger github actions --- pipelines/rj_sme/dump_url_educacao_basica/schedules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py index e4c326ee9..ff43bafa7 100644 --- a/pipelines/rj_sme/dump_url_educacao_basica/schedules.py +++ b/pipelines/rj_sme/dump_url_educacao_basica/schedules.py @@ -111,7 +111,7 @@ gsheets_clocks = generate_dump_url_schedules( interval=timedelta(days=365), - start_date=datetime(2023, 8, 26, 0, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2024, 3, 22, 12, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SME_AGENT_LABEL.value, ],