From 669da9b6dc3e30e068f1a27cf59846a3bddcf75a Mon Sep 17 00:00:00 2001 From: benoit74 Date: Wed, 10 Jul 2024 12:44:07 +0000 Subject: [PATCH] fixup! Add retry logic with detailled logs to extraction of video data from HTML page --- src/ted2zim/scraper.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/src/ted2zim/scraper.py b/src/ted2zim/scraper.py index 36356a9..333ae0a 100644 --- a/src/ted2zim/scraper.py +++ b/src/ted2zim/scraper.py @@ -840,23 +840,9 @@ def extract_info_from_video_page( url, retry_count=retry_count + 1 ) - json_data = json.loads(next_data_tag.string) - - if ( - "props" not in json_data - or "pageProps" not in json_data["props"] - or "videoData" not in json_data["props"]["pageProps"] - ): - logger.debug( - "Inproper data returned by server in __NEXT_DATA__ JSON. Retrying " - "in 5 seconds..." - ) - time.sleep(5) - return self.extract_info_from_video_page( - url, retry_count=retry_count + 1 - ) - - json_data = json_data["props"]["pageProps"]["videoData"] + json_data = json.loads(next_data_tag.string)["props"]["pageProps"][ + "videoData" + ] requested_lang_code = self.get_lang_code_from_url(url) if requested_lang_code and json_data["language"] != requested_lang_code: