-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtranslateVTT.R
71 lines (53 loc) · 2.29 KB
/
translateVTT.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# this is a translation function for vtt files
# note: this function is accessible using R package translateVTT
# installing my R package from github
# devtools::install_github("vzhomeexperiments/translateVTT")
# (C) 2017 Vladimir Zhbanko
translateVTT <- function(fileName, sourceLang = "en", destLang, apikey, fileEnc = "UTF-8"){
# PURPOSE: Translate *.vtt files from any language to any language using Google API key
# Note: Google Tranlation with API is paid service, however 300USD is given for free for 12 month
# variables for debugging
# fileName <- filesToTranslate[1]
# sourceLang <- "en"
# destLang <- "de"
# apikey <- api_key
require(stringr)
require(tidyverse)
require(translateR)
# read file -> it will be a dataframe
t <- read.delim(fileName, stringsAsFactors = F)
# extract logical vector indicating which rows containing timestamps
x <- t %>%
# detect rows with date time (those for not translate)
apply(MARGIN = 1, str_detect, pattern = "-->")
# extract only rows containing text (e.g. not containing timestamps)
txt <- subset.data.frame(t, !x)
# extract only time stamps
tst <- subset.data.frame(t, x)
## translate this file using translate API paid service in Google
# translate object txt or file in R
# Google, translate column in dataset
google.dataset.out <- translateR::translate(dataset = txt,
content.field = 'WEBVTT',
google.api.key = apikey,
source.lang = sourceLang,
target.lang = destLang)
# extract only new column
trsltd <- google.dataset.out %>% select(WEBVTT)
# give original name
colnames(trsltd) <- "WEBVTT"
# bind rows with original timestamps
abc <- rbind(tst, trsltd)
# order this file back again
bcd <- abc[ order(as.numeric(row.names(abc))), ] %>% as.character %>% as.data.frame()
# return original name
colnames(bcd) <- "WEBVTT"
# adding one row in the beginning
bcd <- as_tibble(bcd)
# add one row
bcd2 <- add_row(bcd, WEBVTT = "", .before = 1)
# write this file back :_)
#fileName <- "C:/Users/fxtrams/Downloads/L1.vtt"
#destLang <- "de"
write.table(bcd2, paste0(fileName, destLang, ".vtt"), quote = F, row.names = F)
}