-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathddls_jobs_fetcher.py
74 lines (61 loc) · 2.5 KB
/
ddls_jobs_fetcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
## Fetching available new DDLS jobs listed in scilifelab website
## and create a new json file, where the content can be copy
## pasted (upon review) to data/jobs.json on the platform repo
import requests
import json
import sys
from datetime import datetime
def date_not_past_today(date_str):
given_date = datetime.strptime(date_str, "%Y-%m-%d").date()
today_date = datetime.now().date()
return given_date >= today_date
def validate_request(url, target):
r = requests.get(url)
if r.status_code != 200:
sys.exit("Fetching jobs from {} failed, check the URL {}".format(url, target))
return r
sll_jobs_url = (
"https://www.scilifelab.se/wp-json/wp/v2/career?orderby=archive_date&per_page=50"
)
dc_jobs_url = "https://blobserver.dc.scilifelab.se/blob/data_platform_jobs.json"
# try and get jobs from scilifelab and data centre platform
sll_jobs_request = validate_request(sll_jobs_url, "Scilifelab")
dc_jobs_request = validate_request(dc_jobs_url, "DC")
dc_all_jobs = dc_jobs_request.json()
dc_open_jobs = []
for job in dc_all_jobs["items"]:
if date_not_past_today(job["app_deadline"]):
dc_open_jobs.append(job["job_url"])
page_num, sll_jobs_total_pages = (1, int(sll_jobs_request.headers["X-WP-TotalPages"]))
sll_new_open_jobs = []
while page_num <= sll_jobs_total_pages:
if page_num > 1:
sll_jobs_url = "{}&page={}".format(sll_jobs_url, str(page_num))
sll_jobs_request = validate_request(sll_jobs_url, "Scilifelab")
sll_all_jobs = sll_jobs_request.json()
for job in sll_all_jobs:
if date_not_past_today(job["archive_date"]):
try:
job_url = job["acf"]["read_more_external_link"]["url"]
except:
job_url = job["link"]
if job_url not in dc_open_jobs:
job_info = {
"title": job["title"]["rendered"],
"type": [],
"app_deadline": job["archive_date"],
"job_url": job_url,
"description": "",
}
u_list = [univ["university"]["title"].strip() for univ in job["acf"]["university_lists"]]
job_info["employer"] = ", ".join(u_list)
sll_new_open_jobs.append(job_info)
else:
break
page_num += 1
if len(sll_new_open_jobs) == 0:
print(
"There are no new jobs to add, all open jobs in scilifelab are already in DC platform"
)
else:
print(json.dumps(sll_new_open_jobs, indent=4, ensure_ascii=False))