Skip to content

Commit

Permalink
Merge pull request #40 from RyouMon/feature-yandere
Browse files Browse the repository at this point in the history
Can organize yandere post by artist
  • Loading branch information
RyouMon authored May 21, 2023
2 parents 236544a + 3660a6e commit 2058f2c
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 19 deletions.
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ yandere:
USERNAME: xxxx
```

If you want save pixiv files to `pictures/a`, and want save yandere files to `pictures/b`, you can modify config file like this:
## Download location
By default, pictures will download to working directory.
If you want to change download location, you can add FILES_STORE option to config.
For example, if you want save pixiv files to `pictures/a`, and want save yandere files to `pictures/b`, you can modify config file like this:
```yaml
pixiv:
ACCESS_TOKEN: xxxxxxxxxxxxxxxxxxxxxxxxxxxx
Expand All @@ -127,11 +130,21 @@ yandere:
FILES_STORE: pictures/b
```
## Organize file by artist
if you want to organize pixiv illust by user, add this line to your config:
```yaml
...
pixiv:
FAVORS_PIXIV_ENABLE_ORGANIZE_BY_USER: true # add this line to your pixiv config
# FAVORS_PIXIV_ENABLE_ORGANIZE_BY_USER: true # (Deprecation)
ENABLE_ORGANIZE_BY_ARTIST: true # add this line to your yandere config
...
...
```
if you want to organize yandere post by artist, add this line to your config:
```yaml
...
yandere:
ENABLE_ORGANIZE_BY_ARTIST: true # add this line to your yandere config
...
...
```
3 changes: 2 additions & 1 deletion src/favorites_crawler/constants/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
PIXIV_LOGIN_URL = "https://app-api.pixiv.net/web/v1/login"
PIXIV_AUTH_TOKEN_URL = "https://oauth.secure.pixiv.net/auth/token"

YANDERE_POST_URL = 'https://yande.re/post.json'
YANDERE_LIST_POST_URL = 'https://yande.re/post.json'
YANDERE_SHOW_POST_URL = 'https://yande.re/post/show/{id}'

LEMON_PIC_USER_FAVORITES_URL = 'https://www.lmmbtc.com/user-center'

Expand Down
1 change: 1 addition & 0 deletions src/favorites_crawler/itemloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class YanderePostItemLoader(ItemLoader):
default_output_processor = take_first

file_urls_out = identity
artist_out = Compose(take_first, lambda s: s.strip())


class NHentaiGalleryItemLoader(ItemLoader):
Expand Down
11 changes: 8 additions & 3 deletions src/favorites_crawler/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,21 @@ class PixivIllustItem(BaseItem):
user_id: str = field(default=None)

def get_folder_name(self, spider):
if not spider.crawler.settings.getbool('FAVORS_PIXIV_ENABLE_ORGANIZE_BY_USER'):
if not (spider.crawler.settings.getbool('FAVORS_PIXIV_ENABLE_ORGANIZE_BY_USER')
or spider.crawler.settings.getbool('ENABLE_ORGANIZE_BY_ARTIST')):
return ''
return self.user_id or 'unknown'


@dataclass
class YanderePostItem(BaseItem):

def get_folder_name(self, _):
return ''
artist: str = field(default=None)

def get_folder_name(self, spider):
if not spider.crawler.settings.getbool('ENABLE_ORGANIZE_BY_ARTIST'):
return ''
return self.artist or 'unknown'


@dataclass
Expand Down
33 changes: 21 additions & 12 deletions src/favorites_crawler/spiders/yandere.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from favorites_crawler.spiders import BaseSpider
from favorites_crawler.constants.domains import YANDERE_DOMAIN
from favorites_crawler.itemloaders import YanderePostItemLoader
from favorites_crawler.constants.endpoints import YANDERE_POST_URL
from favorites_crawler.constants.endpoints import YANDERE_LIST_POST_URL, YANDERE_SHOW_POST_URL


class YandereSpider(BaseSpider):
Expand All @@ -28,26 +28,35 @@ def start_requests(self):
raise CloseSpider('Did you run "favors login yandere"?')

self.params['tags'] = f'vote:>=1:{username}'
yield Request(f'{YANDERE_POST_URL}?{urlencode(self.params)}')
yield Request(f'{YANDERE_LIST_POST_URL}?{urlencode(self.params)}')

def parse_start_url(self, response, **kwargs):
for request_or_item in self.parse(response, **kwargs):
yield request_or_item

def parse(self, response, **kwargs):
"""Spider Contracts:
"""Parse list post url
@url https://yande.re/post.json?limit=100&page=1
@returns item 100
@returns requests 1
@scrapes file_urls
@returns requests 101
"""
posts = response.json()

if len(posts) == self.limit:
self.params['page'] += 1
yield Request(f'{YANDERE_POST_URL}?{urlencode(self.params)}')
yield Request(f'{YANDERE_LIST_POST_URL}?{urlencode(self.params)}', callback=self.parse_start_url)

for post in posts:
loader = YanderePostItemLoader()
loader.add_value('file_urls', post['file_url'])
yield loader.load_item()
if self.settings.getbool('ENABLE_ORGANIZE_BY_ARTIST'):
yield Request(YANDERE_SHOW_POST_URL.format(id=post['id']),
callback=self.parse, cb_kwargs={'loader': loader})
else:
yield loader.load_item()

def parse(self, response, **kwargs):
"""Parse show post url
@url https://yande.re/post/show/1056911
@returns item 1
@scrapes artist
"""
loader = kwargs.get('loader', YanderePostItemLoader())
loader.selector = response
loader.add_xpath('artist', '//li[@class="tag-type-artist"]/a[last()]/text()')
yield loader.load_item()
2 changes: 1 addition & 1 deletion tests/test_utils/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_cbz_archive_should_contains_page(self, comic_path):
comic_archive = create_comic_archive(comic_path)

with ZipFile(comic_archive) as zf:
assert zf.namelist() == ['1.jpg', '2.jpg']
assert sorted(zf.namelist()) == ['1.jpg', '2.jpg']

def test_should_write_comment_to_archive(self, comic_path):
comic_archive = create_comic_archive(comic_path, comment=b"I'm a comic.")
Expand Down

0 comments on commit 2058f2c

Please sign in to comment.