-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproposal_downloader.py
54 lines (45 loc) · 1.6 KB
/
proposal_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# -*- coding: utf-8 -*-
import pandas as pd
import json
import time
from torqueclient import Torque
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
def clean_objs(key, val):
if isinstance(val, list):
val = ','.join(val)
elif isinstance(val, dict):
val = json.dumps(val)
return key, val
def run(**kwargs):
t0 = time.time()
torque = Torque(
"https://torque.leverforchange.org/GlobalView",
kwargs['username'], kwargs['api_key']
)
# Get all Competition keys
competitions = torque.competitions.keys()
competitions = list(filter(lambda c: c not in kwargs['exclude_competitions'], competitions))
print('Gathering from competitions:', ', '.join([c for c in competitions]))
# determine fields to download
fields = ['Application #', 'Competition Domain', 'Project Title', 'GlobalView MediaWiki Title']
fields += kwargs['fields']
df = []
for comp in competitions:
print(f'Downloading {comp} proposals...',)
comp = torque.competitions[comp]
proposals = comp.proposals
torque.bulk_fetch(proposals)
comp_fields = set(comp.fields) & set(fields)
for proposal in proposals:
res = {k:proposal[k] for k in comp_fields}
res = dict(map(lambda x: clean_objs(x[0],x[1]), res.items()))
df.append(res)
df = pd.DataFrame(df)
df.to_csv('data/lfc-proposals.csv', index=False)
print(f'\nDownloaded {len(df)} proposals in', f'{round(time.time() - t0, 1)}s')
if __name__ == '__main__':
import json
path = 'Open-Grant-Commons-Pipelines/'
kwargs = json.load(open(path + 'args.json'))['proposal_downloader']
run(**kwargs)