forked from houshuang/folders2web
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdokuwiki.rb
518 lines (425 loc) · 16.3 KB
/
dokuwiki.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
# encoding: UTF-8
# researchr scripts relevant to BibDesk (the right one is executed from the bottom of the file)
$:.push(File.dirname($0))
require 'utility-functions'
require 'appscript'
#### utility functions ####
def has_selection
pbcopy('')
@chrome.windows[1].active_tab.copy_selection
sel = pbpaste
return (sel.size > 0) ? sel : nil
end
def check_bibdesk
search = has_selection
fail "No text selected" unless search
found = try { Appscript.app("BibDesk").document.search({:for=>search.strip}) }
if found.size > 0
msg = "Matching citation exists in BibDesk: #{found[0].cite_key.get}"
msg << ", and #{found.size - 1} more" unless found.size == 1
found[0].select
else
msg = "No matching citation in BibDesk"
end
growl msg
puts msg
end
def cururl
url = @chrome.windows[1].active_tab.get.URL.get.strip
url.remove!(/\?s\[\](.+?)$/) if ( url.index(Internet_path) || url.index(Server_path) )
return url
end
def curtitle
title = @chrome.windows[1].get.tabs[@chrome.windows[1].get.active_tab_index.get].get.title.get.strip
end
# cleans bibtex string on clipboard
def clean_bibtex
res = try { cleanup_bibtex_string(pbpaste) }
fail "Could not parse BibTeX string, maybe the page next to Google Scholar was not the BibTeX page?" unless res
pbcopy(res)
end
# gets the bibtex from the current page, whether it's researchr or scrobblr, and cleans it up
def get_bibtex_from_page
# returns the content of the BibTeX hidden div
bibtex = ''
if cururl.index("/ref:") || cururl.index("herokuapp")
query = cururl.index("herokuapp") ? "getElementById('bibtex')" : "querySelectorAll('.code')[0]"
js = "document.#{query}.innerHTML;"
bibtex = @chrome.windows[1].get.tabs[@chrome.windows[1].get.active_tab_index.get].get.execute(:javascript => js)
bibtex.gsubs!(
[/\<(.+?)\>/, ''], # plugins might insert random HTML tags
[/keywords.+?\}\,\n/i, ''], # no keywords, we want to assign our own
["<b>Bibtex:</b>", ''], # not part of bibtex string
["&", '&'],
[/bdsk\-file.+?\}/mi, '}'], # the local bibdesk file reference is useless
["read = {1},\n", ''], # other's might have read it, we haven't yet
[/\}\n/m, "},\n"], # fix comma after any lines cleaned of tags
).strip
bibtex << "}" unless bibtex.scan("{").size == bibtex.scan("}").size # ensure right number of closing brackets
# elsif cururl.index("wikipapers.referata.com")
# title = try { cururl.match(/wikipapers.referata.com\/wiki\/(.+?)$/)[0] }
# if title
# require 'open-uri'
# newurl = "http://wikipapers.referata.com/wiki/Special:Ask/-5B-5Btitle::#{title}/format%3Dbibtex"
# puts newurl
# bibtex = try { open(newurl).read }
# end
# fail "Could not acquire citation from Wikipapers" unless defined?(bibtex) && bibtex
# puts bibtex
else
require 'open-uri'
url = "http://scraper.bibsonomy.org/service?url=#{cururl}&format=bibtex"
bibtex = try { open(url).read }
bibtex.force_encoding("UTF-8") # erroneously returns as ASCII-8bit
end
# final sanity check
bibtex = cleanup_bibtex_string(bibtex)
raise unless bibtex.index("author")
return bibtex
end
def get_pdf_from_refpage
bibtex = try {get_bibtex_from_page}
fail "Could not read BibTeX metadata from page" unless bibtex
url = try { bibtex.scan(/url = \{(.+?)\}/)[0][0] }
fail "Citation does not have a linked Open Access PDF" unless url
dlpath = "/tmp/pdftmp.pdf"
growl "Attempting to automatically download the linked PDF"
dl_file(url, "/tmp/pdftmp.pdf", PDF_content_types)
return dlpath
end
# opens a given reference as passed by skimx:// URL in Skim
# launched by skimx:// url in Chrome (skimx.app must be registered first)
def url(argv)
require 'uri'
arg = argv[8..-1]
arg.gsub!("#","%23")
pdf, page = arg.split("%23")
# check if this is my page, or someone else's
if My_domains.index( URI.parse(cururl).host )
fname = "#{PDF_path}/#{pdf}.pdf"
else
fname = try { get_pdf_from_refpage }
fail "Not able to automatically download PDF" unless fname
end
if File.exists?(fname)
skim = Appscript.app('skim')
dd = skim.open(fname)
dd.go({:to => dd.pages.get[page.to_i-1]}) unless page == nil
skim.activate
else
growl("File not found", "Cannot find PDF #{fname}")
end
end
#### keyboard commands ####
# if Ctrl+Cmd+Alt+G is invoked, and current tab is not Google Scholar, assume that it is a foreign wiki, and try to
# import citation to BibDesk
def import_bibtex
#fail "This page is not a Researchr wiki, cannot import citation" unless cururl.downcase.index("/ref:") || cururl.downcase.index("herokuapp")
bibtex_final = try {get_bibtex_from_page}
fail "Could not extract BibTeX citation from this page" unless bibtex_final
bibdesk = Appscript.app("BibDesk")
bibdesk.activate
document = bibdesk.document.get[0].import({:from => bibtex_final})
citekey = document[0].cite_key.get
if bibtex_final.scan(/url \= \{(.+?)\}/)
fname = $~[1]
exit unless fname.index("http")
growl "Attempting to automatically download and link PDF..."
`rm "/tmp/pdftmp.pdf"`
try { dl_file(fname, "/tmp/pdftmp.pdf", PDF_content_types) }
# unless File.size?("/tmp/pdftmp.pdf") && Proxy_url != ''
# fname.gsub!(/^(.+?)\:\/\/(.+?)\/(.+?)$/,"\1://\2.#{Proxy_url}/\3")
# try { dl_file(fname, "/tmp/pdftmp.pdf", "application/pdf") }
# end
unless File.size?("/tmp/pdftmp.pdf")
fail "Not able to download file from #{fname}"
end
d = bibdesk.search({:for=>citekey})
f = MacTypes::FileURL.path('/tmp/pdftmp.pdf')
d[0].linked_files.add(f,{:to =>d[0]})
d[0].auto_file
growl("PDF added", "File added successfully to #{citekey}")
end
end
# adds the currently selected page to RSS feed, adds data to a temp file, will be formatted next time bibtex-batch
# is executed (Ctrl+Alt+Cmd+F)
def add_to_rss
require 'open-uri'
require 'cgi'
fname = Wiki_path + "/rss-temp"
internalurl = cururl.split("/").last
url = "#{Internet_path}/#{internalurl}"
# load existing holding file, or start form scratch
if File.exists?(fname)
rss_entries = Marshal::load(File.read(fname))
else
rss_entries = Array.new
end
page_contents = open("http://localhost/wiki/#{internalurl}?vecdo=print").read
contents = page_contents.scan(/<\!\-\- start rendered wiki content \-\-\>(.+?)\<\!\-\- end rendered wiki content \-\-\>/m)[0][0]
contents.gsub!(/\<div class\=\"hiddenGlobal(.+?)\<div class\=\"plugin_include_content/m, '<div ')
# remove title (already given in metadata)
contents.remove!(
/\<h1 class\=\"sectionedit1\"\>(.+?)\<\/a\>\<\/h1\>/,
/\<\!\-\- TOC START \-\-\>(.+?)\<\!\-\- TOC END \-\-\>/m,
/\<span class\=\"tip\"\>(.+?)\<\/span\>/, # remove citation tooltips
/\<div class\=\"plugin\_include\_content\ plugin\_include\_\_clip(.+?)\<\/div\>/m, # remove wiki clippings
/\<div class\=\"plugin\_include\_content\ plugin\_include\_\_kindle(.+?)\<\/div\>/m
)
title = page_contents.scan(/\<h1(.+?)id(.+?)>(.+)\<(.+?)\<\/h1\>/)[0][2]
title = CGI.unescapeHTML(title)
entry_contents = {:title => title, :date => Time.now, :link => url, :description => contents}
exists = false
rss_entries.map! do |entry|
if entry[:link] == url
exists = true
entry_contents
else
entry
end
end
unless exists
rss_entries << entry_contents
end
rss_entries = rss_entries.drop(1) if rss_entries.size > 15
File.write(fname, Marshal::dump(rss_entries))
if exists
growl("Article updated", "Article #{title} updated")
else
growl("Article added to feed", "'#{title}' added to RSS feed")
end
end
# pops up dialogue box, asking where to send text, takes selected text (or just link, if desired) and inserts at the bottom
# of the selected page, with a context-relevant reference to original source
def do_clip(pagename, titletxt, onlytext = false)
pagepath = ("#{Wiki_path}/data/pages/#{clean_pagename(pagename)}.txt").gsub(":","/")
curpage = cururl.split("/").last
sel = has_selection
# format properly if citation
unless onlytext
if curpage.index("ref:")
curpage = "[@#{curpage.split(':').last.downcase}]"
elsif cururl.index("localhost/wiki")
curpage = "[[:#{capitalize_word(curpage.gsub("_", " "))}]]"
else
title = (titletxt ? titletxt : curtitle)
curpage ="[[#{cururl}|#{title}]]"
end
else
curpage = ''
end
insert = (sel ? "#{sel} " : " * " ) # any text, or just a link (bullet list)
insert.gsubs!( {:all_with=> "\n\n"}, "\n", "\n\n\n" )
if File.exists?(pagepath)
prevcont = File.read(pagepath)
haslinks = prevcont.match(/\-\-\-(\n \*[^\n]+?)+?\Z/m) # a "---"" followed by only lines starting with " * "
# bullet lists need an extra blank line after them before the "----"
if sel
divider = (haslinks ? "\n\n----\n" : "\n----\n")
else
divider = (haslinks ? "\n" : "\n----\n")
end
growltext = "Selected text added to #{pagename}"
else
prevcont = "h1. #{capitalize_word(pagename)}\n\n"
growltext = "Selected text added to newly created #{pagename}"
end
filetext = [prevcont, divider, insert, curpage].join
dwpage(pagename, filetext)
growl("Text added", growltext)
end
def clip
require 'pashua'
title = curtitle.strip
# asks for a page name, and appends selected text on current page to that wiki page, with proper citation
gui = "
ob.type = checkbox
ob.label = do not include citation information, only insert pure text
fb.type = textbox
fb.default = #{title}
fb.label = Link title\n"
gui << "ob.disabled = 1\n" unless has_selection # no point in only inserting text, if no text selected
# get last page inserted to as default, if exists
lastclip = try { File.read("/tmp/dokuwiki-clip.tmp").split("\n") }
gui << "cb.default = #{lastclip[0]}\n" if lastclip
pagetmp = wikipage_selector("Which wikipage do you want to add text to?", true, gui)
exit if pagetmp["cancel"] == 1
onlytext = pagetmp['ob'] == "1" ? true : false
pagename = pagetmp['cb'].strip
pashua_title = pagetmp['fb'].strip
filetitle = (title.strip == pashua_title.strip) ? nil : pashua_title
# store for clip_again
File.write("/tmp/dokuwiki-clip.tmp","#{pagename}\n#{cururl}\n#{filetitle}\n#{onlytext.to_s}")
do_clip(pagename, filetitle, onlytext)
end
# uses info stored in temp file to do a clipping from the same page, to the same page
def clip_again
a = File.read("/tmp/dokuwiki-clip.tmp")
page, url, title, onlytext_s = a.split("\n")
onlytext = (onlytext_s == 'true' && has_selection)
title = curtitle if (title.strip == "") || (url != cururl)
do_clip(page, title, onlytext)
end
# cleans up a text into bulleted list, either separated by commas or by line shifts
# there is quite a lot of black magic and guessing in here, a wonder it mostly works
def bulletlist
b = pbpaste
a = b.remove(/^[\t]*\*/) # strip off bullet etc from beginning
if a.scan("\n").size > 1 # determine whether to split on newline, space or comma
splt = "\n"
elsif a.scan(")").size > a.scan("(").size + 2
splt = ")"
a.gsub!(/[, (]*\d+\)/,")")
elsif a.scan(";").size > 1
splt = ";"
elsif a.scan(".").size > 2
splt = "."
elsif a.scan("?").size > 2
splt = "?"
elsif a.scan(",").size < 1
splt = " "
else
splt = ","
end
splits = a.split(splt)
# deal with situation where the last two items are delimited with "and", but not for line shift or 1) 2) kind of lists
if splits.last.index(" and ") && !(splt == "\n" || splt == ")")
x,y = splits.last.split(" and ")
splits.pop
splits << x
splits << y
end
out = ''
splits.each do |item|
i = item.remove(
/p\. *\d+$/,
", and",
/[\.\*]/,
/^ *and /,
/\.$/,
"•",
"",
""
).strip
out << " * #{i}\n" if i.size > 0
end
puts out
end
# Present a wiki page selector and open the page selected
def go
require 'pashua'
pagetmp = wikipage_selector("Jump to which page?")
exit unless pagetmp
@chrome.windows[1].get.tabs[@chrome.windows[1].get.active_tab_index.get].get.URL.set("http://localhost/wiki/#{pagetmp}")
end
# Moves last screenshot to DokuWiki media folder, and inserts a link to that image properly formatted
def image(local=1)
unless cururl.index(Internet_path)
fail "You can only do this on a Researchr wikipage"
end
wiki = cururl[22..-1]
w,dummy = wiki.split("?")
wikipage = w.gsubs({:all_with => "_"}, ":", "%3A", "%20").downcase
if local==1
curfile = File.last_added("#{Home_path}/Desktop/Screen*.png") # this might be different between different OSX versions
else
dir = File.last_added_dir(Photostream_path) # this might be different between different OSX versions
curfile = File.last_added(dir+"*.JPG")
end
if curfile == nil
growl("No screenshots available")
exit
end
newfilename, pagenum = filename_in_series("#{Wiki_path}/data/media/pages/#{wikipage}",".png")
p newfilename
if File.exists?(newfilename)
pbcopy("")
fail("File already exists, aborting!")
end
puts %Q(mv "#{curfile.strip}" "#{newfilename}")
`mv "#{curfile.strip}" "#{newfilename}"`
if defined?(dir) # if from iCloud
`rm -rf "#{dir}"`
`sips --resampleWidth 487 #{newfilename}`
end
`touch "#{newfilename}"` # to make sure it comes up as newest next time we run filename_in_series
pbcopy("{{pages:#{wikipage}#{pagenum}.png}}")
end
# previews last added image to PhotoStream folder
def preview_iphone_image
dir = File.last_added_dir(Photostream_path) # this might be different between different OSX versions
curfile = File.last_added(dir)
if curfile == nil
fail("No screenshots available")
else
`qlmanage -p '#{curfile}'`
end
end
# asks for the name of a page, and presents it side-by-side with the existing page, in editing mode if it's a wiki page
def sbs
page = wikipage_selector("Choose page to view side-by-side with the current page")
exit unless page
if cururl.index("localhost/wiki")
url = cururl.to_s + "?do=edit&vecdo=print"
else
# uses Instapaper to nicely format the article text, for fitting into a split-screen window
url = "http://www.instapaper.com/text?u=\"+encodeURIComponent(\"#{cururl}\")+\""
end
newurl = "http://localhost/wiki/#{page.gsub(" ","_")}"
js = "var MyFrame=\"<frameset cols=\'*,*\'><frame src=\'#{url}\'><frame src=\'#{newurl}?do=edit&vecdo=print\'></frameset>\";with(document) { write(MyFrame);};return false;"
@chrome.windows[1].get.tabs[@chrome.windows[1].get.active_tab_index.get].get.execute(:javascript => js)
end
# asks for name, and creates a new author page from a template
def newauthor
require 'Pashua'
include Pashua
config = <<EOS
*.title = Add a new author page
cb.type = textfield
cb.label = Name of author page to create
cb.width = 220
db.type = cancelbutton
db.label = Cancel
db.tooltip = Closes this window without taking action
EOS
pagetmp = pashua_run config
exit if pagetmp["cancel"] == 1
page = pagetmp["cb"]
pname = "/wiki/data/pages/a/#{clean_pagename(page)}.txt"
File.open(pname,"w") {|f| f<<"h1. #{page}\n\nh2. Research\n\nh2. Links\n * [[ |Homepage]]
\n{{page>abib:#{page}}}"}
`chmod a+rw "#{pname}"`
`open "http://localhost/wiki/a:#{page}?do=edit"`
end
# removes current page and all related pages (ref, skimg etc) after confirmation
def delete
require 'pashua'
include Pashua
config = <<EOS
*.title = Delete this page?
cb.type = text
cb.text = This action will delete this page, and all related pages (ref:, notes:, skimg:, kindle:, etc). Are you sure?
cb.width = 220
db.type = cancelbutton
db.label = Cancel
EOS
pagetmp = pashua_run config
exit if pagetmp['db'] == "1"
pname = cururl.split("/").last.downcase
page = pname.split(":").last
ns = pname.split(":").first
directories = %w[ref notes skimg kindle clip]
if directories.index(ns)
paths = directories.map {|f| "#{Wiki_path}/data/pages/#{f}/#{page}.txt"}
else
paths = ["#{Wiki_path}/data/pages/#{clean_pagename(pname).gsub(":", "/")}.txt"]
end
c = 0
paths.each do |f|
c += 1 if try { File.delete(f) }
end
growl "#{c ? c : 0} pages deleted"
end
#### Running the right function, depending on command line input ####
@chrome = Appscript.app('Google Chrome')
send *ARGV unless ARGV == []