forked from bsdplus/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyget.pl
executable file
·204 lines (169 loc) · 7.19 KB
/
yget.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/perl -T
#
## Calomel.org ,:, Download Youtube videos and music using wget
## Script Name : youtube_wget_video.pl
## Version : 0.34
## Valid from : May 2013
## URL Page : https://calomel.org/youtube_wget.html
## OS Support : Linux, Mac OSX, OpenBSD, FreeBSD or any system with perl
# `:`
## Two arguments
## $1 Youtube URL from the browser
## $2 prefix to the file name of the video (optional)
#
use strict;
use warnings;
############ options ##########################################
# Option: what file type do you want to download? The string is used to search
# in the youtube URL so you can choose mp4, webm, avi or flv. mp4 seems to
# work on the most players like android, ipod, ipad, iphones, vlc and mplayer.
my $fileType = "mp4";
# Option: what visual resolution or quality do you want to download? List
# multiple values just in case the highest quality video is not available, the
# script will look for the next resolution. You can choose "highres" for 4k
# (4096x2304), "hd1080" for 1080p, "hd720" for 720p, "itag=18" which means
# standard definition 640x380 and "itag=17" which is mobile resolution 144p
# (176x144). The script will always prefer to download the highest resolution
# video format from the list if available. Do not download highres 4k content
# unless you have a 4k tv. That would just be silly.
my $resolution = "hd1080,hd720,itag=18";
# Option: How many times should the script retry the download if wget fails for
# any reason? Do not make this too high as a reoccurring error will just hit
# youtube over and over again. A value of 3 to 10 works well.
my $retryTimes = 5;
# Option: do you want the resolution of the video in the file name? zero(0) is
# no and one(1) is yes. This option simply puts "_hd1080.mp4" or similar at the
# end of the file name.
my $resolutionFilename = 0;
# Option: turn on DEBUG mode. Use this to reverse engineering this code if you are
# making changes or you are building your own youtube download script.
my $DEBUG=0;
#################################################################
## clear the environment and set our path
$ENV{ENV} ="";
$ENV{PATH} = "/bin:/usr/bin:/usr/local/bin";
## initialize retry loop and resolution variables
my $prefix = "";
my $retry = 1;
my $retryCounter = 0;
my $resFile = "unknown";
my $user_url = "";
my $user_prefix = "";
my $url = "";
## collect the URL from the command line argument and untaint the input
if (defined($ARGV[0])) {
chomp($user_url = $ARGV[0]);
$url = "$1" if ($user_url =~ m/^([a-zA-Z0-9\-\&\?\=\:\.\/\_]+)$/ or die "\nError: Illegal characters in YouTube URL\n\n" );
} else {
print "\nError: You Must specify a Youtube URL\n\n";
exit;
}
## declare the user defined file name prefix if specified and untaint the input
if (defined($ARGV[1])) {
chomp($user_prefix = $ARGV[1]);
$prefix = "$1" if ($user_prefix =~ m/^([a-zA-Z0-9\_\-\.\ ]+)$/ or die "\nError: Illegal characters in filename prefix\n\n" );
}
## retry getting the video if the script fails for any reason
while ( $retry != 0 && $retryCounter < $retryTimes ) {
## download the html code from the youtube page suppling the page title and the
## video url. The page title will be used for the local video file name and the
## url will be sanitized and passed to wget for the download.
my $html = `wget -Ncq -e convert-links=off --keep-session-cookies --save-cookies /dev/null --no-check-certificate "$url" -O-` or die "\nThere was a problem downloading the HTML file or the video is not open to the public.\n\n";
## format the title of the page to use as the file name
my ($title) = $html =~ m/<title>(.+)<\/title>/si;
$title =~ s/[^\w\d]+/_/g or die "\nError: title of the HTML page not found. Check the URL.\n\n";
$title =~ s/_youtube//ig;
$title =~ s/^_//ig;
$title = lc ($title);
$title =~ s/_amp//ig;
## collect the URL of the video from the HTML page
my ($download) = $html =~ /"url_encoded_fmt_stream_map"(.*)/ig;
# Print all of the separated strings in the HTML page
print "\n$download\n\n" if ($DEBUG == 1);
# This is where we look through the HTML code and select the file type and
# video quality.
my @urls = split(',', $download);
OUTERLOOP:
foreach my $val (@urls) {
# print "\n$val\n\n";
if ( $val =~ /$fileType/ ) {
my @res = split(',', $resolution);
foreach my $ress (@res) {
if ( $val =~ /$ress/ ) {
print "\n\nGOOD\n\n" if ($DEBUG == 1);
print "$val\n" if ($DEBUG == 1);
$resFile = $ress;
$resFile = "sd640" if ( $ress =~ /itag=18/ );
$resFile = "mb144" if ( $ress =~ /itag=17/ );
$download = $val;
last OUTERLOOP;
}
}
}
}
## clean up the url by translating unicode and removing unwanted strings
$download =~ s/\:\ \"//;
$download =~ s/%3A/:/g;
$download =~ s/%2F/\//g;
$download =~ s/%3F/\?/g;
$download =~ s/%3D/\=/g;
$download =~ s/%252C/%2C/g;
$download =~ s/%26/\&/g;
$download =~ s/sig=/signature=/g;
$download =~ s/\\u0026/\&/g;
$download =~ s/(type=[^&]+)//g;
$download =~ s/(fallback_host=[^&]+)//g;
$download =~ s/(quality=[^&]+)//g;
## collect the url and sig since the html page randomizes their order
my ($signature) = $download =~ /(signature=[^&]+)/;
my ($youtubeurl) = $download =~ /(http?:.+)/;
$youtubeurl =~ s/&signature.+$//;
## combine the url and sig in order
$download = "$youtubeurl\&$signature";
## a bit more cleanup as youtube
$download =~ s/&+/&/g;
$download =~ s/&itag=\d+&signature=/&signature=/g;
## combine file variables into the full file name
my $filename = "unknown";
if ( $resolutionFilename == 1 ) {
$filename = "$prefix$title\_$resFile.$fileType";
} else {
$filename = "$prefix$title.$fileType";
}
## Process check: Are we currently downloading this exact same video? Two of the
## same wget processes will overwrite themselves and corrupt the video.
my $running = `ps auwww | grep [w]get | grep -c "$filename"`;
print "\nNumber of the same wgets running: $running\n" if ($DEBUG == 1);
if ($running >= 1)
{
print "\nAlready downloading the same filename, exiting: $filename\n";
exit 0;
};
## Print the long, sanitized youtube url for testing and debugging
print "\n$download\n" if ($DEBUG == 1);
## print the file name of the video collected from the web page title for us to see on the cli
print "\n Download: $filename\n\n";
## Background the script. Use "ps" if you need to look for the process
## running or use "ls -al" to look at the file size and date.
fork and exit;
## Download the video
system("wget", "-Ncq", "-e", "convert-links=off", "--load-cookies", "/dev/null", "--tries=200", "--timeout=20", "--no-check-certificate", "$download", "-O", "$filename");
## Print the error code of wget
print " error code: $?\n" if ($DEBUG == 1);
## Exit Status: Check if the file exists and we received the correct error code
## from system call. If the download experienced any problems the script will run again and try
## continue the download till the file is downloaded.
if( $? == 0 && -e "$filename" && ! -z "$filename" )
{
print " Finished: $filename\n\n" if ($DEBUG == 1);
$retry = 0;
}
else
{
print STDERR "\n FAILED: $filename\n\n" if ($DEBUG == 1);
$retry = 1;
$retryCounter++;
sleep $retryCounter;
}
}
#### EOF #####