-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_submission_format.py
130 lines (106 loc) · 4.09 KB
/
check_submission_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
=========================================================
Title: ImageArg Shared Task Code - Check Submission Format
---------------------------------------------------------
Please be careful about your submission file name and file
format, which will be used to validate your submission.
File Format Validation:
The file should be in CSV format with a maximum of two
columns: "tweet_id", and either "stance" or "persuasiveness".
TaskA CSV file should include:
tweet_id, stance
1204712613394944000, oppose
1204712613394933000, support
TaskB CSV file should include:
tweet_id, persuasiveness
1204712613394944000, yes
1204712613394933000, no
Please note tweet id should be integer, and
stance/persuasiveness should be lower cases.
---------------------------------------------------------
File Name Validation:
1. Team_Name: your team name
2. Method_Name: give a name to your model/method
(we accept multiple methods/models you might try)
3. Task_Name: shared task name (For Subtask A Stance
Classification, the task name is "TaskA", while for
Subtask B Persuasiveness Classification the task
name is "TaskB")
4. Attempt_Number: the number of submission attempts
(you can do a total of 5 submission attempts)
5. No period symbol in Team_Name, Method_Name, Task_Name,
and Attempt_Number.
Your file name should be like
"{team_name}.{method_name}.{task_name}.{attempt_number}.csv"
i.e., test-team.baseline.TaskA.1.csv.
---------------------------------------------------------
Notice: This code is managed by ImageArg Shared Task
(https://imagearg.github.io/).
---------------------------------------------------------
Data: 2023-07-16
=========================================================
"""
import pandas as pd
import argparse
import sys
sys.tracebacklimit = 0
def verify_filename(file_name):
try:
team, model, subtask, run_number, file_format = file_name.split('.')
except:
print("Wrong File Name Format!")
return
if not subtask in ["TaskA", "TaskB"]:
print("Wrong Task Name Format!")
return
if file_format.lower() != "csv":
print("Wrong File Name Format!")
try:
if int(run_number) >= 6:
print("Wrong Attempt Number!")
return
except:
print("Wrong Attempt Number!")
print("File name is good to go!")
return True
def verify_format(file_name):
team, model, subtask, run_number, file_format = file_name.split('.')
predictions = pd.read_csv(file_name)
if len(predictions.columns) != 2:
print("Only include two columns: tweet_id and stance/persuasiveness columns!")
return
if len(predictions) != 300:
print("Missing predictions! Should be 300 examples!")
return
if "tweet_id" not in predictions.columns:
print("Wrong column name in your file!")
return
if subtask == 'TaskA':
try:
if "stance" not in predictions.columns:
print("Wrong column name in your file - should be named stance")
return
assert all(_pred in ("support", "oppose") for _pred in predictions['stance'].tolist())
except:
print('Wrong values for Subtask A Stance, please use "support", "oppose" labels in your predictions')
return
elif subtask == 'TaskB':
try:
if "persuasiveness" not in predictions.columns:
print("Wrong column name in your file - should be named persuasiveness")
return
assert all(_pred in ("yes", "no") for _pred in predictions['persuasiveness'].tolist())
except:
print('Wrong values for Subtask B Persuasiveness, please use "yes", "no" labels in your predictions')
return
else:
print("Wrong Task Name!")
return
print('File Format is good to go!')
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='imagearg shared task', description='check submission file format')
parser.add_argument('filename')
args = parser.parse_args()
res = verify_filename(args.filename)
if res:
verify_format(args.filename)