Skip to content

Commit

Permalink
fixup! Add retry logic with detailled logs to extraction of video dat…
Browse files Browse the repository at this point in the history
…a from HTML page
  • Loading branch information
benoit74 committed Jul 10, 2024
1 parent 2829651 commit 669da9b
Showing 1 changed file with 3 additions and 17 deletions.
20 changes: 3 additions & 17 deletions src/ted2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,23 +840,9 @@ def extract_info_from_video_page(
url, retry_count=retry_count + 1
)

json_data = json.loads(next_data_tag.string)

if (
"props" not in json_data
or "pageProps" not in json_data["props"]
or "videoData" not in json_data["props"]["pageProps"]
):
logger.debug(
"Inproper data returned by server in __NEXT_DATA__ JSON. Retrying "
"in 5 seconds..."
)
time.sleep(5)
return self.extract_info_from_video_page(
url, retry_count=retry_count + 1
)

json_data = json_data["props"]["pageProps"]["videoData"]
json_data = json.loads(next_data_tag.string)["props"]["pageProps"][
"videoData"
]

requested_lang_code = self.get_lang_code_from_url(url)
if requested_lang_code and json_data["language"] != requested_lang_code:
Expand Down

0 comments on commit 669da9b

Please sign in to comment.