-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess.py
57 lines (50 loc) · 2.78 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import csv
import os
import json
test_match_ids = [71, 144, 367, 267, 281, 311, 279, 88, 352, 157,
133, 34, 257, 238, 147, 368, 124, 148, 346, 117,
114, 362, 119, 250, 301, 152, 306, 205, 132, 100,
22, 197, 61, 121, 38, 59, 324, 120, 372, 318]
def main():
data_dir = 'posts'
with open('train.csv', mode='w', newline='', encoding='utf-8') as train_csv, open('test.csv', mode='w', newline='', encoding='utf-8') as test_csv:
train_writer = csv.writer(train_csv)
train_writer.writerow(['Comment', 'Match_ID', 'Home_Team', 'Away_Team', 'Comment_Team', 'Result'])
test_writer = csv.writer(test_csv)
test_writer.writerow(['Comment', 'Match_ID', 'Home_Team', 'Away_Team', 'Comment_Team', 'Result'])
with open('epl_fixtures-post_ids.csv', mode='r') as fixture_csv:
fixture_reader = csv.DictReader(fixture_csv)
for match in fixture_reader:
result = match['Results'].split(' - ')
if match['Home_PostID']:
file_dir = os.path.join(data_dir, match['Home_PostID'] + '.json')
with open(file_dir) as fp:
comments = json.load(fp)['comments_full']
for comment in comments:
row = [comment['comment_text'],
match['Match Number'],
match['Home'],
match['Away'],
'Home',
int(result[0]) - int(result[1])]
if int(match['Match Number']) in test_match_ids:
test_writer.writerow(row)
else:
train_writer.writerow(row)
if match['Away_PostID']:
file_dir = os.path.join(data_dir, match['Away_PostID'] + '.json')
with open(file_dir) as fp:
comments = json.load(fp)['comments_full']
for comment in comments:
row = [comment['comment_text'],
match['Match Number'],
match['Home'],
match['Away'],
'Away',
int(result[1]) - int(result[0])]
if int(match['Match Number']) in test_match_ids:
test_writer.writerow(row)
else:
train_writer.writerow(row)
if __name__ == '__main__':
main()