-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathp_pl_dl_pt.py
183 lines (148 loc) · 6.67 KB
/
p_pl_dl_pt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from time import time
import yt_dlp as youtube_dl
import p_pl_dl_common as dl_common
sExtractor = 'porntrex'
"""
For PornTrex, Youtube-DL does not seem able to consistently pick the highest
quality source. To workaround this, I pull the entire video page and pick the
largest file available from its download options, and pass that URL to Youtube-DL.
PornTrex also uses AJAX, which is a bit awkward to deal with for me.
"""
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
print(f"Running {sExtractor} extractor for {sUrl}\r\n")
nTimeStart = time()
if sCookieSource is not None:
dl_common.parseCookieFile(sCookieSource)
if dl_common.dCookiesParsed is None:
print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")
if 'porntrex.com/video' in sUrl:
sUrlType = 'video'
elif 'porntrex.com/my' in sUrl:
sUrlType = 'playlist'
elif 'porntrex.com/search' in sUrl:
sUrlType = 'playlist' # Search results can be treated as a playlist
else:
raise ValueError(f"Unable to determine {sExtractor} URL type for {sUrl}! Please submit a bug report!")
# Attempt initial connection
html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
print(f"Initial connection status: {html.status_code}")
if html.status_code == 403:
raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
elif html.status_code != 200:
raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
print()
if bDebug:
# Save HTML content to a text file for debug
text_file = open("html_content.txt", "w", encoding='utf-8')
text_file.write(html.text)
text_file.close()
lUrlVideos = []
if sUrlType == 'playlist':
print("Playlist detected. Getting videos...")
sUrlBaseFormat = urlBaseFormatGet(sUrl)
nPage = 0
while True:
nPage += 1
print(f"Attempting page {nPage:02}")
if 'search' in sUrl:
if nPage == 1:
sUrlPage = sUrlBaseFormat.format('')
else:
sUrlPage = sUrlBaseFormat.format(f'{nPage}/')
else:
sUrlPage = sUrlBaseFormat.format(f'{nPage:02}')
page = dl_common.Page(sUrlPage)
nPageStatus = page.content.status_code
if nPageStatus != 200:
if nPageStatus == 403:
raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
elif nPageStatus == 404:
print(f"Page {nPage} returned 404!")
print(f"Assuming page {nPage - 1} was the last page of the playlist")
break
page._extract_video_urls()
if page.videos:
lUrlVideos += page.videos
else:
break
# Remove non-video URLs that may have been picked up
lTemp = []
for sUrl in lUrlVideos:
if sUrl == 'https://www.porntrex.com/my/favourites/videos/':
continue
if 'video' in sUrl:
lTemp += [sUrl]
lUrlVideos = lTemp
nNumVideos = len(lUrlVideos)
print(f"Found {nNumVideos} video URLs in the playlist")
if bDebug:
for sUrl in lUrlVideos:
print(sUrl)
elif sUrlType == 'video':
lUrlVideos = [sUrl]
dYdlOptions = dict(dl_common.dYdlOptions)
dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"
for nIdx, sVideoUrl in enumerate(lUrlVideos):
if sUrlType == 'playlist':
print(f"Processing video {nIdx + 1} of {nNumVideos}...")
print()
if bDebug:
print(f"Processing {sVideoUrl}")
video = Video(sVideoUrl)
dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\{video.sFullName}'
with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
ydl.download([video.downloadUrl])
if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
break
print()
nTimeEnd = time()
print(f"Run time: {round((nTimeEnd - nTimeStart) / 60, 2)} minutes")
def urlBaseFormatGet(sUrl):
"""
Create the base f-string URL that be used to iteratively go through pages.
Playlists and favorites use an AJAX format. They do not have simple page numbers.
Search results use simple page numbers.
"""
sUrlBase = None
if 'playlists' in sUrl:
print("Using 'playlists' format...")
nType = 10
nPlaylistId = sUrl.split('/')[-2]
sUrlBase = f'https://www.porntrex.com/my/playlists/{nPlaylistId}/?mode=async&function=get_block&block_id=list_videos_my_favourite_videos&fav_type={nType}&playlist_id={nPlaylistId}&sort_by=&from_my_fav_videos={{}}'
elif 'favourites' in sUrl:
print("Using 'favourites' format...")
nType = 0
nPlaylistId = 0
sUrlBase = f'https://www.porntrex.com/my/favourites/videos/?mode=async&function=get_block&block_id=list_videos_my_favourite_videos&fav_type={nType}&playlist_id={nPlaylistId}&sort_by=&from_my_fav_videos={{}}'
elif 'search' in sUrl:
if not sUrl.endswith('/'):
sUrl += '/'
sUrl += '{}'
sUrlBase = sUrl
return sUrlBase
class Video(dl_common.Page):
def __init__(self, url):
super().__init__(url)
self.downloadUrl = None
lUrlComponents = self.url.split('/')
if lUrlComponents[-1] == '':
lUrlComponents.pop(-1)
self._lUrlComponents = lUrlComponents
self.sVideoId = self._lUrlComponents[-2]
self.sVideoName = self._lUrlComponents[-1]
self.sFullName = '_'.join([self.sVideoId, self.sVideoName]) + '.mp4'
self._extract_video_urls(sFilter='get_file')
self._extract_video_largest()
def _extract_video_largest(self):
"""
Get the file that has the largest file size, which should be the highest quality.
"""
nIdxBiggest = 0
nBiggestSize = 0
for index in range(len(self.videos)):
nSize = int(dl_common.session.get(self.videos[index], cookies=dl_common.dCookiesParsed, stream=True).headers['Content-Length'])
if nSize > nBiggestSize:
nIdxBiggest = index
nBiggestSize = nSize
self.downloadUrl = self.videos[nIdxBiggest]