From 81f148778613115f0adbb347707297eae02e597d Mon Sep 17 00:00:00 2001 From: Tran Trung Quan Date: Sun, 11 Sep 2022 15:30:08 +0700 Subject: [PATCH] Add delay to avoid spam blocking --- README.md | 3 ++- hako2epub.py | 23 ++++++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3571f65..f57c8a2 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ python hako2epub.py -u python hako2epub.py -u light_novel_url ``` ### Notes +* After processing 190 requests at a time, the program will pause for 120 seconds (2 mins) to avoid spam blocking. Please be patient if it hangs. * Light novel will be downloaded into the same folder as the program. * Downloaded information will be saved into `ln_info.json` file located in the same folder as the program. * If you download specific chapters of a light novel, please enter the full name of the chapter in the "from ... to ..." prompt. @@ -159,6 +160,6 @@ Distributed under the MIT License. See [LICENSE][license-url] for more informati * [EbookLib](https://github.com/aerkalov/ebooklib) -[python-shield]: https://img.shields.io/badge/python-3.9.6-brightgreen?style=flat-square +[python-shield]: https://img.shields.io/badge/python-3.9.7-brightgreen?style=flat-square [license-shield]: https://img.shields.io/github/license/quantrancse/hako2epub?style=flat-square [license-url]: https://github.com/quantrancse/hako2epub/blob/master/LICENSE diff --git a/hako2epub.py b/hako2epub.py index fd369a0..cc9cadf 100644 --- a/hako2epub.py +++ b/hako2epub.py @@ -1,6 +1,7 @@ import argparse import json import re +import time from io import BytesIO from multiprocessing.dummy import Pool as ThreadPool from os import mkdir @@ -20,9 +21,20 @@ 'Referer': 'https://ln.hako.vn/' } -tool_version = '2.0.3' +tool_version = '2.0.5' bs4_html_parser = 'html.parser' ln_request = requests.Session() +current_num_requests = 0 +max_num_requests = 190 + + +def check_current_num_request(url): + global current_num_requests + if any(substr in url for substr in ['ln.hako.vn', 'docln.net']): + current_num_requests += 1 + if current_num_requests > max_num_requests: + time.sleep(120) + current_num_requests = 0 def print_format(name='', info='', info_style='bold fg:orange', prefix='! '): @@ -89,6 +101,7 @@ def get_image(self, image_url): try: image = Image.open(ln_request.get( image_url, headers=HEADERS, stream=True, timeout=5).raw).convert('RGB') + check_current_num_request(image_url) except Exception: print('Can not get image: ' + image_url) return image @@ -122,6 +135,7 @@ def check_update_ln(self, old_ln, mode=''): old_ln_url = old_ln.get('ln_url') try: request = ln_request.get(old_ln_url, headers=HEADERS, timeout=5) + check_current_num_request(old_ln_url) soup = BeautifulSoup(request.text, bs4_html_parser) new_ln = LNInfo() new_ln = new_ln.get_ln_info(old_ln_url, soup, 'update') @@ -422,6 +436,7 @@ def make_chapter(self, i=0): pool = ThreadPool(THREAD_NUM) contents = [] try: + print("After processing 190 requests at a time, the process will pause for 120 seconds to avoid spam blocking. Please be patient if it hangs.") contents = list(tqdm.tqdm(pool.imap_unordered(self.make_chapter_content, chapter_urls_index), total=len( chapter_urls_index), desc='Making chapter contents: ')) contents.sort(key=lambda x: x[0]) @@ -443,6 +458,8 @@ def make_chapter_content(self, chapter_list): request = ln_request.get( chapter_url, headers=HEADERS, timeout=5) + check_current_num_request(chapter_url) + soup = BeautifulSoup(request.text, bs4_html_parser) xhtml_file = 'chap_%s.xhtml' % str(i + 1) @@ -524,6 +541,8 @@ def bind_epub_book(self): try: self.book.set_cover('cover.jpeg', ln_request.get( self.volume.cover_img, headers=HEADERS, stream=True, timeout=5).content) + check_current_num_request(self.volume.cover_img) + except Exception: print('Error: Can not set cover image!') print('--------------------') @@ -760,6 +779,7 @@ def set_ln_volume_list(self, volume_urls): try: request = ln_request.get( volume_url, headers=HEADERS, timeout=5) + check_current_num_request(volume_url) soup = BeautifulSoup(request.text, bs4_html_parser) self.volume_list.append(Volume(volume_url, soup)) except Exception: @@ -826,6 +846,7 @@ def start(self, ln_url, mode): try: request = ln_request.get( ln_url, headers=HEADERS, timeout=5) + check_current_num_request(ln_url) soup = BeautifulSoup(request.text, bs4_html_parser) if not soup.find('section', 'volume-list'): print('Invalid url. Please try again.')