Skip to content

Commit

Permalink
serve videos from twitter archive
Browse files Browse the repository at this point in the history
  • Loading branch information
rrika committed Jan 11, 2024
1 parent 6fd0755 commit b078f3e
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 30 deletions.
78 changes: 64 additions & 14 deletions db.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,30 @@ def valid_name(self, size):
def decode_twimg(orig_url):
url = urlparse(orig_url)
if url.netloc == "abs.twimg.com":
base = url.netloc + "/" + url.path
assert url.path == "" or url.path[0] == "/"
base = url.netloc + url.path
return base, (None, None), (None, None)

elif url.netloc == "video.twimg.com":
if url.path.startswith("/ext_tw_video/"):
m = re.fullmatch(r"/ext_tw_video/[0-9]+/p(?:r|u)/(?:pl|vid(?:/avc1)?/[0-9]+x[0-9]+)/([A-Za-z0-9_-]+)\.(mp4|m3u8)", url.path)
assert m, url.path
base = "{}/{}.{}".format(url.netloc, m.group(1), m.group(2))
print(base)

elif url.path.startswith("/tweet_video/"):
m = re.fullmatch(r"/tweet_video/([A-Za-z0-9_-]+)\.(mp4)", url.path)
assert m, url.path
base = "{}/{}.{}".format(url.netloc, m.group(1), m.group(2))

elif url.path.startswith("/amplify_video/"):
m = re.fullmatch(r"/amplify_video/[0-9]+/(?:pl|vid(?:/avc1)?/[0-9]+x[0-9]+)/([A-Za-z0-9_-]+)\.(mp4|m3u8)", url.path)
assert m, url.path
base = "{}/{}.{}".format(url.netloc, m.group(1), m.group(2))

else:
assert False, url.path

return base, (None, None), (None, None)

assert url.netloc in ("pbs.twimg.com", ""), orig_url
Expand Down Expand Up @@ -201,7 +224,10 @@ def add_from_archive(self, fs, tweets_media):
if not m:
print("what about", media_fname)
continue
cache_key = "/media/"+m.group(2)
if m.group(3) == "mp4":
cache_key = "video.twimg.com/" + m.group(2) + ".mp4"
else:
cache_key = "/media/"+m.group(2)
imageset = self.media_by_url.setdefault(cache_key, ImageSet())
fmt = m.group(3)
path = tweets_media+"/"+media_fname
Expand Down Expand Up @@ -231,27 +257,51 @@ def lookup(self, url):

# replace urls in tweet/user objects

def urlmap_list(urlmap, f, l):
l2 = []
any_patched = False
for m in l:
m2 = f(urlmap, m)
if m2:
l2.append(m2)
any_patched = True
else:
l2.append(m)
if any_patched:
return l2

def urlmap_variant(urlmap, variant):
if "url" in variant:
url = variant["url"]
url2 = urlmap(url)
if url != url2:
v = variant.copy()
v["url"] = url2
return v

def urlmap_variants(urlmap, variants):
return urlmap_list(urlmap, urlmap_variant, variants)

def urlmap_media(urlmap, media):
m = None
if "media_url_https" in media:
url = media["media_url_https"]
url2 = urlmap(url)
if url != url2:
m = media.copy()
m["media_url_https"] = url2
return m
if "video_info" in media and "variants" in media["video_info"]:
v2 = urlmap_variants(urlmap, media["video_info"]["variants"])
if v2:
vi = media["video_info"].copy()
vi["variants"] = v2
if m is None:
m = media.copy()
m["video_info"] = vi
return m

def urlmap_media_list(urlmap, media_list):
l = []
any_patched = False
for m in media_list:
m2 = urlmap_media(urlmap, m)
if m2:
l.append(m2)
any_patched = True
else:
l.append(m)
if any_patched:
return l
return urlmap_list(urlmap, urlmap_media, media_list)

def urlmap_entities(urlmap, entities):
if "media" in entities:
Expand Down
17 changes: 15 additions & 2 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
server_path = os.path.dirname(__file__)
sys.path.append(server_path + "/vendor") # use bundled copy of bottle, if system has none
from bottle import parse_date, request, route, run, static_file, HTTPError, HTTPResponse
from pprint import pprint

use_twitter_cdn_for_images = False

Expand All @@ -14,7 +15,8 @@ def __init__(self, db):
# tweets

def urlmap(self, url):
if self.db.media.lookup(url):
item, cacheable = self.db.media.lookup(url)
if item:
return "/media" + url[7:] # /media/pbs.twitter.com/...
if use_twitter_cdn_for_images:
return url
Expand All @@ -29,6 +31,10 @@ def patch(self, tweet):
if "entities" in tweet:
entities = urlmap_entities(self.urlmap, tweet["entities"])

extended_entities = None
if "extended_entities" in tweet:
extended_entities = urlmap_entities(self.urlmap, tweet["extended_entities"])

quoted_status = None
if "quoted_status_id_str" in tweet:
_, quoted_status = self.get_tweet(int(tweet["quoted_status_id_str"]))
Expand All @@ -38,6 +44,8 @@ def patch(self, tweet):
tweet["user"] = user
if entities:
tweet["entities"] = entities
if extended_entities:
tweet["extended_entities"] = extended_entities
if quoted_status:
tweet["quoted_status"] = quoted_status
del tweet["original_id"]
Expand Down Expand Up @@ -73,7 +81,12 @@ def get_tweet(self, twid):
if is_pinned:
tweet = tweet.copy()
tweet["context_icon"] = "pin"
return (twid, self.patch(tweet))
try:
return (twid, self.patch(tweet))
except Exception as e:
print("while processing", twid)
pprint(tweet)
raise

def home_view(self, uid):
# this could be cached
Expand Down
22 changes: 16 additions & 6 deletions static/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,12 @@ let Poll = (props) => {
h("span", null, "Final results")));
};
let TweetImage = (props) => h("div", { class: "t20230624-image-div", style: { "background-image": `url('${props.src}')` }, onClick: props.onClick, title: props.title });
let TweetVideo = (props) => {
for (let variant of props.entity.video_info.variants) {
if (variant.content_type == "video/mp4" && variant.url != null)
return h("video", { src: variant.url, poster: props.entity.media_url_https, controls: true, preload: "none", loop: props.entity.type == "animated_gif" });
}
};
let months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
let dateFormat = (datestr) => {
let now = new Date();
Expand Down Expand Up @@ -430,12 +436,16 @@ let Tweet = (props) => {
// let userPath = "/"+props.u.screen_name;
let userPath = "/profile/" + user_id_str;
let embeds = [];
if (props.t.entities !== undefined && props.t.entities.media !== undefined) {
let media = props.t.entities.media;
let items = media.map((media) => h(TweetImage, { src: media.media_url_https + "?name=small", onClick: (e) => {
e.preventDefault();
props.showMediaViewer([media.media_url_https]);
}, title: media.ext_alt_text }));
// videos appear as type="photo" in entities, and type="video" in extended_entities
let entities = props.t.extended_entities || props.t.entities;
if (entities !== undefined && entities.media !== undefined) {
let media = entities.media;
let items = media.map((media) => (media.type == "video" || media.type == "animated_gif")
? h(TweetVideo, { entity: media })
: h(TweetImage, { src: media.media_url_https + "?name=small", title: media.ext_alt_text, onClick: (e) => {
e.preventDefault();
props.showMediaViewer([media.media_url_https]);
} }));
if (items.length != 1) {
embeds.push(h(MediaGrid, { items: items }));
}
Expand Down
41 changes: 33 additions & 8 deletions static/client.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,14 @@ type Sizes2020 = {
thumb: SizeInfo
};

type VideoInfo = {
variants: {content_type: string, url: string}[]
};

type MediaEntity = {
ext_alt_text?: string,
type: string,
video_info: VideoInfo,
indices: [string, string],
original_info?: { // doesn't exist in archives for example
width: number,
Expand Down Expand Up @@ -76,6 +82,7 @@ type TweetInfo = {
reply_count: string,
id_str: string,
entities?: Entities,
extended_entities?: Entities,
user: LegacyProfile,
user_id_str: string,
created_at: string,
Expand Down Expand Up @@ -358,6 +365,19 @@ let TweetImage = (props: {
style={{"background-image": `url('${props.src}')`}} /*todo: proper escape*/
onClick={props.onClick} title={props.title}></div>;

let TweetVideo = (props: {entity: MediaEntity}) => {
for (let variant of props.entity.video_info.variants) {
if (variant.content_type == "video/mp4" && variant.url != null)
return <video
src={variant.url}
poster={props.entity.media_url_https}
controls={true}
preload="none"
loop={props.entity.type == "animated_gif"}
/>;
}
};

let months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];

let dateFormat = (datestr: string | number) => {
Expand Down Expand Up @@ -620,14 +640,19 @@ let Tweet = (props: TweetProps) => {
let userPath = "/profile/"+user_id_str;

let embeds = [];
if (props.t.entities !== undefined && props.t.entities.media !== undefined) {
let media = props.t.entities.media;
let items = media.map((media: MediaEntity) => <TweetImage src={media.media_url_https + "?name=small"} onClick={
(e: JSX.TargetedMouseEvent<HTMLElement>) => {
e.preventDefault();
props.showMediaViewer([media.media_url_https]);
}
} title={media.ext_alt_text}/>);
// videos appear as type="photo" in entities, and type="video" in extended_entities
let entities = props.t.extended_entities || props.t.entities;
if (entities !== undefined && entities.media !== undefined) {
let media = entities.media;
let items = media.map((media: MediaEntity) =>
(media.type == "video" || media.type == "animated_gif")
? <TweetVideo entity={media}/>
: <TweetImage src={media.media_url_https + "?name=small"} title={media.ext_alt_text} onClick={
(e: JSX.TargetedMouseEvent<HTMLElement>) => {
e.preventDefault();
props.showMediaViewer([media.media_url_https]);
}
}/>);
if (items.length != 1) {
embeds.push(<MediaGrid items={items}/>);
} else {
Expand Down

0 comments on commit b078f3e

Please sign in to comment.