Skip to content

Commit

Permalink
Add delay to avoid spam blocking
Browse files Browse the repository at this point in the history
  • Loading branch information
quantrancse committed Sep 11, 2022
1 parent 4bed9f9 commit 81f1487
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ python hako2epub.py -u
python hako2epub.py -u light_novel_url
```
### Notes
* After processing 190 requests at a time, the program will pause for 120 seconds (2 mins) to avoid spam blocking. Please be patient if it hangs.
* Light novel will be downloaded into the same folder as the program.
* Downloaded information will be saved into `ln_info.json` file located in the same folder as the program.
* If you download specific chapters of a light novel, please enter the full name of the chapter in the "from ... to ..." prompt.
Expand Down Expand Up @@ -159,6 +160,6 @@ Distributed under the MIT License. See [LICENSE][license-url] for more informati
* [EbookLib](https://github.com/aerkalov/ebooklib)

<!-- MARKDOWN LINKS & IMAGES -->
[python-shield]: https://img.shields.io/badge/python-3.9.6-brightgreen?style=flat-square
[python-shield]: https://img.shields.io/badge/python-3.9.7-brightgreen?style=flat-square
[license-shield]: https://img.shields.io/github/license/quantrancse/hako2epub?style=flat-square
[license-url]: https://github.com/quantrancse/hako2epub/blob/master/LICENSE
23 changes: 22 additions & 1 deletion hako2epub.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import json
import re
import time
from io import BytesIO
from multiprocessing.dummy import Pool as ThreadPool
from os import mkdir
Expand All @@ -20,9 +21,20 @@
'Referer': 'https://ln.hako.vn/'
}

tool_version = '2.0.3'
tool_version = '2.0.5'
bs4_html_parser = 'html.parser'
ln_request = requests.Session()
current_num_requests = 0
max_num_requests = 190


def check_current_num_request(url):
global current_num_requests
if any(substr in url for substr in ['ln.hako.vn', 'docln.net']):
current_num_requests += 1
if current_num_requests > max_num_requests:
time.sleep(120)
current_num_requests = 0


def print_format(name='', info='', info_style='bold fg:orange', prefix='! '):
Expand Down Expand Up @@ -89,6 +101,7 @@ def get_image(self, image_url):
try:
image = Image.open(ln_request.get(
image_url, headers=HEADERS, stream=True, timeout=5).raw).convert('RGB')
check_current_num_request(image_url)
except Exception:
print('Can not get image: ' + image_url)
return image
Expand Down Expand Up @@ -122,6 +135,7 @@ def check_update_ln(self, old_ln, mode=''):
old_ln_url = old_ln.get('ln_url')
try:
request = ln_request.get(old_ln_url, headers=HEADERS, timeout=5)
check_current_num_request(old_ln_url)
soup = BeautifulSoup(request.text, bs4_html_parser)
new_ln = LNInfo()
new_ln = new_ln.get_ln_info(old_ln_url, soup, 'update')
Expand Down Expand Up @@ -422,6 +436,7 @@ def make_chapter(self, i=0):
pool = ThreadPool(THREAD_NUM)
contents = []
try:
print("After processing 190 requests at a time, the process will pause for 120 seconds to avoid spam blocking. Please be patient if it hangs.")
contents = list(tqdm.tqdm(pool.imap_unordered(self.make_chapter_content, chapter_urls_index), total=len(
chapter_urls_index), desc='Making chapter contents: '))
contents.sort(key=lambda x: x[0])
Expand All @@ -443,6 +458,8 @@ def make_chapter_content(self, chapter_list):

request = ln_request.get(
chapter_url, headers=HEADERS, timeout=5)
check_current_num_request(chapter_url)

soup = BeautifulSoup(request.text, bs4_html_parser)

xhtml_file = 'chap_%s.xhtml' % str(i + 1)
Expand Down Expand Up @@ -524,6 +541,8 @@ def bind_epub_book(self):
try:
self.book.set_cover('cover.jpeg', ln_request.get(
self.volume.cover_img, headers=HEADERS, stream=True, timeout=5).content)
check_current_num_request(self.volume.cover_img)

except Exception:
print('Error: Can not set cover image!')
print('--------------------')
Expand Down Expand Up @@ -760,6 +779,7 @@ def set_ln_volume_list(self, volume_urls):
try:
request = ln_request.get(
volume_url, headers=HEADERS, timeout=5)
check_current_num_request(volume_url)
soup = BeautifulSoup(request.text, bs4_html_parser)
self.volume_list.append(Volume(volume_url, soup))
except Exception:
Expand Down Expand Up @@ -826,6 +846,7 @@ def start(self, ln_url, mode):
try:
request = ln_request.get(
ln_url, headers=HEADERS, timeout=5)
check_current_num_request(ln_url)
soup = BeautifulSoup(request.text, bs4_html_parser)
if not soup.find('section', 'volume-list'):
print('Invalid url. Please try again.')
Expand Down

0 comments on commit 81f1487

Please sign in to comment.