forked from mtakemoto/YTCA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathytca.py
150 lines (129 loc) · 4.99 KB
/
ytca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/python3
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.tools import argparser
from collections import OrderedDict
import argparse
import csv
try:
import configparser
except ImportError:
import ConfigParser as configparser
DEVELOPER_KEY = ""
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
with open('apikey', 'r') as f:
DEVELOPER_KEY = f.readline()
config = configparser.ConfigParser()
parser = argparse.ArgumentParser()
input_type_group = parser.add_mutually_exclusive_group()
input_type_group.add_argument("--ini", help="Specify an alternative ini file to process. Default is 'channels.ini'")
input_type_group.add_argument("--username", help="Run the caption auditor on a single youtube account, by username")
input_type_group.add_argument("--chid", help="Run the caption auditor on a single youtube account, by channel ID")
#Initialize Data API object
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
class Channel(object):
def __init__(self, name, channel_id):
self.name = name
self.channel_id = channel_id
self.uploads_list_id = ""
self.videos_list = []
self.total_videos = 0
self.captioned_videos = 0
@classmethod
def from_username(cls, username):
results = youtube.channels().list(
part="snippet",
forUsername=username
).execute()
if(results['items']):
items = results['items'][0]
name = items['snippet']['title']
channel_id = items['id']
channel = cls(name, channel_id)
return channel
else:
print("Error: YouTube username %s not found" % username)
exit()
#Query the channel in question for the id of the uplaods playlist
def uploads_playlist(self):
results = youtube.channels().list(
part="contentDetails",
id=self.channel_id
).execute()
channel = results["items"][0]
if "contentDetails" in channel:
uploads_list_id = channel['contentDetails']['relatedPlaylists']['uploads']
self.uploads_list_id = uploads_list_id
#Go through the uploads plyalist and populate the list of video IDs
def get_videos(self):
playlist_results = youtube.playlistItems().list(
playlistId=self.uploads_list_id,
part="contentDetails",
maxResults=50
)
#Iterate through all pages in result set
print("Populating videos list...")
while playlist_results:
response = playlist_results.execute()
#Build list of video IDs to grab caption data from
chunk = []
for playlist_item in response['items']:
video_id = playlist_item['contentDetails']['videoId']
chunk.append(video_id)
self.total_videos += len(chunk)
self.videos_list.append(chunk)
#Get the next page
playlist_results = youtube.playlistItems().list_next(playlist_results, response)
#Helper function for find_captions that
def video_request(self, videos_str):
video_response = youtube.videos().list(
part="contentDetails",
id=videos_str,
maxResults=50
).execute()
for video_result in video_response.get("items", []):
if video_result['contentDetails']['caption'] == u'true':
self.captioned_videos += 1
def find_captions(self):
for chunk in list(self.videos_list):
#Turn video list into a comma separated string for the API query
query_str = ",".join(chunk)
self.video_request(query_str)
def run(self):
print("Processing channel %s" % self.name)
self.uploads_playlist()
self.get_videos()
self.find_captions()
print("Total videos in channel: %d" % self.total_videos)
print("Captioned Videos: %d" % self.captioned_videos)
print("Percent captioned: %0.2f%%\n" % ((self.captioned_videos / self.total_videos)*100))
#-------------------------
def load_channels_list(filename):
channels = OrderedDict()
config.read(filename)
for section in config._sections:
name = config.get(section, "name")
id = config.get(section, "id")
if name.startswith('\''):
name = name[1:-1]
if id.startswith('\''):
id = id[1:-1]
channels.update({name:id})
return channels
if __name__ == '__main__':
args = parser.parse_args()
if args.ini:
channels = load_channels_list(args.ini)
for name, id in channels.items():
this_channel = Channel(name, id)
this_channel.run()
elif args.chid:
channel = Channel(None, args.chid)
channel.run()
elif args.username:
channel = Channel.from_username(args.username)
channel.run()
else:
parser.print_help()
praser.exit()