diff --git a/db.py b/db.py index 46d64bf..fbc11af 100644 --- a/db.py +++ b/db.py @@ -53,7 +53,30 @@ def valid_name(self, size): def decode_twimg(orig_url): url = urlparse(orig_url) if url.netloc == "abs.twimg.com": - base = url.netloc + "/" + url.path + assert url.path == "" or url.path[0] == "/" + base = url.netloc + url.path + return base, (None, None), (None, None) + + elif url.netloc == "video.twimg.com": + if url.path.startswith("/ext_tw_video/"): + m = re.fullmatch(r"/ext_tw_video/[0-9]+/p(?:r|u)/(?:pl|vid(?:/avc1)?/[0-9]+x[0-9]+)/([A-Za-z0-9_-]+)\.(mp4|m3u8)", url.path) + assert m, url.path + base = "{}/{}.{}".format(url.netloc, m.group(1), m.group(2)) + print(base) + + elif url.path.startswith("/tweet_video/"): + m = re.fullmatch(r"/tweet_video/([A-Za-z0-9_-]+)\.(mp4)", url.path) + assert m, url.path + base = "{}/{}.{}".format(url.netloc, m.group(1), m.group(2)) + + elif url.path.startswith("/amplify_video/"): + m = re.fullmatch(r"/amplify_video/[0-9]+/(?:pl|vid(?:/avc1)?/[0-9]+x[0-9]+)/([A-Za-z0-9_-]+)\.(mp4|m3u8)", url.path) + assert m, url.path + base = "{}/{}.{}".format(url.netloc, m.group(1), m.group(2)) + + else: + assert False, url.path + return base, (None, None), (None, None) assert url.netloc in ("pbs.twimg.com", ""), orig_url @@ -201,7 +224,10 @@ def add_from_archive(self, fs, tweets_media): if not m: print("what about", media_fname) continue - cache_key = "/media/"+m.group(2) + if m.group(3) == "mp4": + cache_key = "video.twimg.com/" + m.group(2) + ".mp4" + else: + cache_key = "/media/"+m.group(2) imageset = self.media_by_url.setdefault(cache_key, ImageSet()) fmt = m.group(3) path = tweets_media+"/"+media_fname @@ -231,27 +257,51 @@ def lookup(self, url): # replace urls in tweet/user objects +def urlmap_list(urlmap, f, l): + l2 = [] + any_patched = False + for m in l: + m2 = f(urlmap, m) + if m2: + l2.append(m2) + any_patched = True + else: + l2.append(m) + if any_patched: + return l2 + +def urlmap_variant(urlmap, variant): + if "url" in variant: + url = variant["url"] + url2 = urlmap(url) + if url != url2: + v = variant.copy() + v["url"] = url2 + return v + +def urlmap_variants(urlmap, variants): + return urlmap_list(urlmap, urlmap_variant, variants) + def urlmap_media(urlmap, media): + m = None if "media_url_https" in media: url = media["media_url_https"] url2 = urlmap(url) if url != url2: m = media.copy() m["media_url_https"] = url2 - return m + if "video_info" in media and "variants" in media["video_info"]: + v2 = urlmap_variants(urlmap, media["video_info"]["variants"]) + if v2: + vi = media["video_info"].copy() + vi["variants"] = v2 + if m is None: + m = media.copy() + m["video_info"] = vi + return m def urlmap_media_list(urlmap, media_list): - l = [] - any_patched = False - for m in media_list: - m2 = urlmap_media(urlmap, m) - if m2: - l.append(m2) - any_patched = True - else: - l.append(m) - if any_patched: - return l + return urlmap_list(urlmap, urlmap_media, media_list) def urlmap_entities(urlmap, entities): if "media" in entities: diff --git a/server.py b/server.py index 7fc377e..0f04710 100644 --- a/server.py +++ b/server.py @@ -4,6 +4,7 @@ server_path = os.path.dirname(__file__) sys.path.append(server_path + "/vendor") # use bundled copy of bottle, if system has none from bottle import parse_date, request, route, run, static_file, HTTPError, HTTPResponse +from pprint import pprint use_twitter_cdn_for_images = False @@ -14,7 +15,8 @@ def __init__(self, db): # tweets def urlmap(self, url): - if self.db.media.lookup(url): + item, cacheable = self.db.media.lookup(url) + if item: return "/media" + url[7:] # /media/pbs.twitter.com/... if use_twitter_cdn_for_images: return url @@ -29,6 +31,10 @@ def patch(self, tweet): if "entities" in tweet: entities = urlmap_entities(self.urlmap, tweet["entities"]) + extended_entities = None + if "extended_entities" in tweet: + extended_entities = urlmap_entities(self.urlmap, tweet["extended_entities"]) + quoted_status = None if "quoted_status_id_str" in tweet: _, quoted_status = self.get_tweet(int(tweet["quoted_status_id_str"])) @@ -38,6 +44,8 @@ def patch(self, tweet): tweet["user"] = user if entities: tweet["entities"] = entities + if extended_entities: + tweet["extended_entities"] = extended_entities if quoted_status: tweet["quoted_status"] = quoted_status del tweet["original_id"] @@ -73,7 +81,12 @@ def get_tweet(self, twid): if is_pinned: tweet = tweet.copy() tweet["context_icon"] = "pin" - return (twid, self.patch(tweet)) + try: + return (twid, self.patch(tweet)) + except Exception as e: + print("while processing", twid) + pprint(tweet) + raise def home_view(self, uid): # this could be cached diff --git a/static/client.js b/static/client.js index 7a1173f..bbb9e4d 100644 --- a/static/client.js +++ b/static/client.js @@ -200,6 +200,12 @@ let Poll = (props) => { h("span", null, "Final results"))); }; let TweetImage = (props) => h("div", { class: "t20230624-image-div", style: { "background-image": `url('${props.src}')` }, onClick: props.onClick, title: props.title }); +let TweetVideo = (props) => { + for (let variant of props.entity.video_info.variants) { + if (variant.content_type == "video/mp4" && variant.url != null) + return h("video", { src: variant.url, poster: props.entity.media_url_https, controls: true, preload: "none", loop: props.entity.type == "animated_gif" }); + } +}; let months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]; let dateFormat = (datestr) => { let now = new Date(); @@ -430,12 +436,16 @@ let Tweet = (props) => { // let userPath = "/"+props.u.screen_name; let userPath = "/profile/" + user_id_str; let embeds = []; - if (props.t.entities !== undefined && props.t.entities.media !== undefined) { - let media = props.t.entities.media; - let items = media.map((media) => h(TweetImage, { src: media.media_url_https + "?name=small", onClick: (e) => { - e.preventDefault(); - props.showMediaViewer([media.media_url_https]); - }, title: media.ext_alt_text })); + // videos appear as type="photo" in entities, and type="video" in extended_entities + let entities = props.t.extended_entities || props.t.entities; + if (entities !== undefined && entities.media !== undefined) { + let media = entities.media; + let items = media.map((media) => (media.type == "video" || media.type == "animated_gif") + ? h(TweetVideo, { entity: media }) + : h(TweetImage, { src: media.media_url_https + "?name=small", title: media.ext_alt_text, onClick: (e) => { + e.preventDefault(); + props.showMediaViewer([media.media_url_https]); + } })); if (items.length != 1) { embeds.push(h(MediaGrid, { items: items })); } diff --git a/static/client.tsx b/static/client.tsx index 9dc9a9d..4bc2350 100644 --- a/static/client.tsx +++ b/static/client.tsx @@ -38,8 +38,14 @@ type Sizes2020 = { thumb: SizeInfo }; +type VideoInfo = { + variants: {content_type: string, url: string}[] +}; + type MediaEntity = { ext_alt_text?: string, + type: string, + video_info: VideoInfo, indices: [string, string], original_info?: { // doesn't exist in archives for example width: number, @@ -76,6 +82,7 @@ type TweetInfo = { reply_count: string, id_str: string, entities?: Entities, + extended_entities?: Entities, user: LegacyProfile, user_id_str: string, created_at: string, @@ -358,6 +365,19 @@ let TweetImage = (props: { style={{"background-image": `url('${props.src}')`}} /*todo: proper escape*/ onClick={props.onClick} title={props.title}>; +let TweetVideo = (props: {entity: MediaEntity}) => { + for (let variant of props.entity.video_info.variants) { + if (variant.content_type == "video/mp4" && variant.url != null) + return