forked from ghoseb/planet.clojure
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadmin_cb.py
executable file
·141 lines (86 loc) · 3.21 KB
/
admin_cb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import cgi
import cgitb
cgitb.enable()
from urllib import unquote
import sys, os
# Modify this to point to where you usually run planet.
BASE_DIR = '..'
# Modify this to point to your venus installation dir, relative to planet dir above.
VENUS_INSTALL = "venus"
# Config file, relative to planet dir above
CONFIG_FILE = "config/live"
# Admin page URL, relative to this script's URL
ADMIN_URL = "admin.html"
# chdir to planet dir - config may be relative from there
os.chdir(os.path.abspath(BASE_DIR))
# Add venus to path.
sys.path.append(VENUS_INSTALL)
# Add shell dir to path - auto detection does not work
sys.path.append(os.path.join(VENUS_INSTALL, "planet", "shell"))
# import necessary planet items
from planet import config
from planet.spider import filename
# Load config
config.load(CONFIG_FILE)
# parse query parameters
form = cgi.FieldStorage()
# Start HTML output at once
print "Content-Type: text/html;charset=utf-8" # HTML is following
print # blank line, end of headers
print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
print '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="sv"><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>Admin results</title></head><body>'
print '<div>'
# Cache and blacklist dirs
cache = config.cache_directory()
blacklist = config.cache_blacklist_directory()
# Must have command parameter
if not "command" in form:
print "<p>Unknown command</p>"
elif form['command'].value == "blacklist":
# Create the blacklist dir if it does not exist
if not os.path.exists(blacklist):
os.mkdir(blacklist)
print "<p>Created directory %s</p>" % blacklist
# find list of urls, in the form bl[n]=url
for key in form.keys():
if not key.startswith("bl"): continue
url = unquote(form[key].value)
# find corresponding files
cache_file = filename(cache, url)
blacklist_file = filename(blacklist, url)
# move to blacklist if found
if os.path.exists(cache_file):
os.rename(cache_file, blacklist_file)
print "<p>Blacklisted <a href='%s'>%s</a></p>" % (url, url)
else:
print "<p>Unknown file: %s</p>" % cache_file
print """
<p>Note that blacklisting does not automatically
refresh the planet. You will need to either wait for
a scheduled planet run, or refresh manually from the admin interface.</p>
"""
elif form['command'].value == "run":
# run spider and refresh
from planet import spider, splice
try:
spider.spiderPlanet(only_if_new=False)
print "<p>Successfully ran spider</p>"
except Exception, e:
print e
doc = splice.splice()
splice.apply(doc.toxml('utf-8'))
elif form['command'].value == "refresh":
# only refresh
from planet import splice
doc = splice.splice()
splice.apply(doc.toxml('utf-8'))
print "<p>Successfully refreshed</p>"
elif form['command'].value == "expunge":
# only expunge
from planet import expunge
expunge.expungeCache()
print "<p>Successfully expunged</p>"
print "<p><strong><a href='" + ADMIN_URL + "'>Return</a> to admin interface</strong></p>"
print "</body></html>"