-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathball24.py
417 lines (325 loc) · 15.7 KB
/
ball24.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
import requests
from bs4 import BeautifulSoup
import shutil
import csv
import data.tips24
from common import TippeData, Scraper, Team, Contestant
from reader import CsvReader24
import os
import datetime
# get current standings every day
# check each team n matches played
# if a team has increased the number of matches played
# check points (do we need (W/D/L) ? )
# check goal diff
# save data to file
# if all teams have played N matches
# compute standings after that
# update team position
# for each round that everyone has played
# get position
# compute history
class Scraper24(Scraper):
def __init__(self) -> None:
super().__init__('https://www.obos-ligaen.no/resultater', 'data/2024.csv')
def get_standings(self):
r = requests.get(self.url)
soup = BeautifulSoup(r.content, 'html.parser', from_encoding='utf-8')
league_table = soup.find('table', class_="table--persist-area table table--league-obos")
standings = []
for tbody in league_table.find_all("tbody"):
# ea row has all info we need
# iterate over each team
for row in tbody.find_all("tr", class_="table__row"):
team_element = row.find("span", class_="table__typo--full")
assert team_element is not None, "Expected to find team"
name = team_element.string # team name
td_elements = row.find_all("td") # get number data
n_exp = 11
assert len(td_elements) == n_exp, f"Expected {n_exp} elements, found {len(td_elements)}"
pos = int(td_elements[0].get_text())
n_played = int(td_elements[2].get_text())
goal_diff = int(td_elements[-3].get_text())
n_points = int(td_elements[-2].get_text())
# Append team status to current standings
team = [pos, name, n_played, goal_diff, n_points]
standings.append(team)
return standings
class TippeData24(TippeData):
def __init__(self, debug=False):
super().__init__(debug)
self.teams = [
Team('Stabæk', 'STB'),
Team('Aalesund', 'AFK'),
Team('Bryne', 'BRY'),
Team('Egersund', 'EGE'),
Team('Kongsvinger', 'KIL'),
Team('Levanger', 'LEV'),
Team('Lyn', 'LYN'),
Team('Mjøndalen', 'MIF'),
Team('Moss', 'MOS'),
Team('Ranheim TF', 'RAN'),
Team('Raufoss', 'RAU'),
Team('Sandnes Ulf', 'ULF'),
Team('Sogndal', 'SGN'),
Team('Start', 'STA'),
Team('Vålerenga', 'VIF'),
Team('Åsane', 'ÅSA'),
]
self.reader = CsvReader24("data/2024.csv", self.teams, self.debug)
self.scraper = Scraper24()
entries = data.tips24.ENTRIES # {Name: List[team_name]}
self.prepare_contestant_entries(entries)
self.set_contestant(self.create_average_contestant())
def create_average_contestant(self):
for team in self.teams:
team.avg_placement = 0
for contestant in self.contestants:
for index, team in enumerate(contestant.data['prediction']):
self.teams[self.teams.index(team)].avg_placement += (index + 1)
for team in self.teams:
team.avg_placement = team.avg_placement / (float)(len(self.teams))
# sort teams based on their avg_placement in rising order
sorted_teams = sorted(self.teams, key=lambda x: x.avg_placement)
avg_contestant = Contestant("fifagutta", "AVG")
avg_contestant.set_prediction(sorted_teams)
# Optionally, print each team's average placement for verification
if self.debug:
for team in sorted_teams:
print("team", team.name, "avg:", team.avg_placement)
# Return or store the average contestant
return avg_contestant
def prepare_contestant_entries(self, entries):
if not self.teams:
print(f"teams not set! cant set data dict")
return
# entries: {Name: { prediction: [], short: "", avatar : ""} }
for name, data in entries.items():
contestant = Contestant(name, data['short'])
contestant.set_avatar(data['avatar'])
prediction = []
for team_name in data['prediction']: # add each team in order
team = self.get_team(team_name)
if not team:
print(f"did not find team {team_name}")
return
prediction.append(team)
contestant.set_prediction(prediction)
# add contestant to list of contestants
self.set_contestant(contestant)
# update CSV file from current standings for each team not already updated
def update_team_csv(self, output_fname="", input_fname=""):
updated_something = False
# use a temp file to write/read so that we dont refresh
temp_path = None
if output_fname != input_fname:
temp_path = "data/temp.csv"
if not input_fname:
input_fname = self.reader.csv
# copy file at path input_fname to path temp_path
shutil.copyfile(input_fname, temp_path)
for team in self.teams:
# find the row in standings for this teame
team_standing = None
for standing in self.standings:
if team.name == standing[1] or team.name == standing[1].split(" ")[0]: # team name is at index 1
team_standing = standing
break # Exit loop once the matching team is found
if team_standing is None:
raise ValueError(f"ERROR did not find team {team.name} in standings")
team.n_played = int(team_standing[2])
csv_n_played = self.reader.get_n_matches_played(team.name, input_fname)
if (team.n_played == csv_n_played):
if self.debug:
print(f"already saved round {team.n_played} for {team.name}\n")
continue # already saved last match played - continue
f_in = temp_path if temp_path is not None else input_fname
f_out = temp_path if temp_path is not None else output_fname
self.reader.write_team_entry(
team.name, team.n_played, team_standing[4], team_standing[3],
output_fname=f_out, input_fname=f_in)
updated_something = True
if temp_path is not None:
shutil.copyfile(temp_path, output_fname)
print(f"->copied {temp_path} to {output_fname}")
return updated_something
def compute_standings_after_full_round(self, round_number, input_fname=""):
# read historic data
for team in self.teams:
csv_entry = self.reader.get_team_entry(team.name, round_number, input_fname=input_fname)
if csv_entry is None:
print("ERROR: Returning")
return
match_data = {
'points': int(csv_entry[2]),
'gd': int(csv_entry[3]),
'pos': None # update later
}
team.match_history[round_number] = match_data
# compute positions based on round data
round_standings = [] # name, points, gd
for team in self.teams:
round_standings.append([team.name,
team.match_history[round_number]['points'],
team.match_history[round_number]['gd']])
# sort on points first and gd second
round_standings.sort(key=lambda x: (x[1], x[2]), reverse=True)
if self.debug:
print(f"Game #{round_number}:\n# | Team | Points | GD")
# save team positions for this round
for i in range(len(round_standings)):
team = self.get_team(round_standings[i][0])
team.match_history[round_number]['pos'] = i+1
if self.debug:
print(f"{i+1}| {round_standings[i]}")
# check CSV file, find min_played for all teams; check if min_played has "pos" for each team
# if not, need to add "pos" for the teams
# return if something was updated
def update_csv_positions(self, output_fname="", input_fname=""):
n_registered_matches = self.reader.get_min_matches_played(input_fname=input_fname)
n_registered_positions = self.reader.get_n_pos_rows_written(input_fname=input_fname)
if self.debug:
print(f"already registered {n_registered_matches} matches")
if n_registered_matches == n_registered_positions:
print(f"->latest position already updated")
return False
if abs(n_registered_positions-n_registered_matches) > 1:
print(f"WARN: registered matches is {n_registered_matches} but registered positions is only {n_registered_positions} ")
# Compute positions for all rounds that havent gotten this
for round_number in range(n_registered_positions+1, n_registered_matches+1):
self.compute_standings_after_full_round(round_number, input_fname)
# use a temp file to write/read so that we dont refresh
temp_path = None
if output_fname != input_fname:
temp_path = "data/temp.csv"
if not input_fname:
input_fname = self.reader.csv
# copy file at path input_fname to path temp_path
shutil.copyfile(input_fname, temp_path)
f_in = temp_path if temp_path is not None else input_fname
f_out = temp_path if temp_path is not None else output_fname
# Update "pos" column for each team
for team in self.teams:
self.reader.write_team_pos(team.name, round_number,
team.match_history[round_number]['pos'],
output_fname=f_out, input_fname=f_in)
if temp_path is not None:
shutil.copyfile(temp_path, output_fname)
print(f"->copied {temp_path} to {output_fname}")
if self.debug:
print(f"->added 'pos' for each team round {n_registered_matches}")
return True
# compute the points_history for each contestant, using csv positions
def compute_contestant_points_timeseries(self, input_fname=""):
# read csv file "POS" for i in range(n_min_played)
n_pos_written = self.reader.get_n_pos_rows_written(input_fname=input_fname)
for contestant in self.contestants:
contestant.data['points_history'] = [] # empty previous stuff
for round_number in range(1, n_pos_written+1):
round_standings = self.reader.get_simple_standings_at_round_number(round_number, input_fname=input_fname)
if self.debug:
print("Round", round_number, "standings:", round_standings)
#compute points for each contestants using those standings
for contestant in self.contestants:
prediction = contestant.data['prediction']
round_points = 0
for row in round_standings:
team_name = row[0]
team_ind = next((index for index, obj in enumerate(prediction) \
if obj.name == team_name), None)
prediction_pos = team_ind + 1
team_pos = int(row[1])
team_points = abs(prediction_pos - team_pos)
round_points += team_points
# append round points to history
contestant.data['points_history'].append(round_points)
# TO BE RUN BY WORKFLOW
def update_csv(self, output_fname="", input_fname=""):
self.fetch_standings()
if (output_fname=="" and input_fname==""):
output_fname = self.reader.csv
input_fname = self.reader.csv
updated_matches = self.update_team_csv(
output_fname=output_fname, input_fname=input_fname)
updated_pos = self.update_csv_positions(
output_fname=output_fname, input_fname=output_fname)
if updated_matches:
print(f"updated matches of file {output_fname or self.reader.csv}")
if updated_pos:
print(f"updated positions of file {output_fname or self.reader.csv}")
return updated_pos, updated_matches
# MAIN FCN - RUN AT REFRESH
def update_contestants(self, input_fname=""):
self.fetch_standings() # get latest standings online
self.update_current_points() # compute current points of contestants
# Assuming CSV file has been updated
self.compute_contestant_points_timeseries(input_fname=input_fname)
print(f"computed contestants' points history")
def action_update_csv(dir_prefix=None, backup_only=True):
if (dir_prefix is None):
dir_prefix = os.getcwd()
ball = TippeData24()
now = datetime.datetime.now()
backup_dir = f"{dir_prefix}/data/backup/time" # Adjust the directory path
os.makedirs(backup_dir, exist_ok=True) # Ensure the backup directory exists
backup_time = f"{backup_dir}/2024-{now.month:02d}-{now.day:02d}-{now.hour:02d}.{now.minute:02d}.csv"
if backup_only:
ball.update_csv(output_fname=backup_time)
return
# update main CSV file here
updated_pos, updated_matches = ball.update_csv()
if updated_matches or updated_pos:
# make backup whenever something has changed
shutil.copy(ball.reader.csv, backup_time)
print(f"\nBackup file at {backup_time}")
# Hotfix - use full path to ensure stuff is saved
full_csv_path = f"{dir_prefix}/data/2024.csv"
try:
shutil.copy(ball.reader.csv, full_csv_path)
print(f"\nSaved CSV file at {full_csv_path}")
except:
print(f"\nFile should be found at {full_csv_path}")
pass
if updated_pos:
num = ball.reader.get_n_pos_rows_written()
backup = f"{dir_prefix}/data/backup/2024-r{num}.csv"
shutil.copy(ball.reader.csv, backup)
print(f"\nBackup file at {backup}")
def main():
debug = True
ball = TippeData24(debug)
if debug:
temp_fname = "data/2024-debug.csv"
print("\n # Fetch standings")
ball.fetch_standings()
ball.print_standings()
print(f"\n # Update {temp_fname} with latest games")
# ball.update_csv(input_fname=temp_fname, output_fname=temp_fname)
#ball.update_contestants(input_fname=temp_fname)
else:
print("\nFETCHING STANDINGS AND UPDATING CSV\n")
action_update_csv(backup_only=False)
# update MAIN csv
# updated_pos, updated_mathces = ball.update_csv()
# if updated_matches or updated_pos:
# backup = f"data/backup/2024-{MONTH}-{DAY}-{HOUR}:{MINUTE}"
# shutil.copy(ball.reader.csv, backup)
# print(f"\nBackup file at {backup}")
# if updated_pos:
# num = ball.reader.get_n_pos_rows_written()
# backup = f"data/backup/2024-r{num}"
# shutil.copy(ball.reader.csv, backup)
# print(f"\nBackup file at {backup}")
print("\n # Update points of contestants")
ball.update_current_points()
ball.print_contestants()
# print("\n # Get number of Pos Rows Written")
# ball.reader.get_n_pos_rows_written()
# print("\n # Compute Standings After Full Round")
# ball.compute_standings_after_full_round(round_number=1)
# print("\n # Update CSV positions")
# ball.update_csv_positions()
# print("\n # Compute Contestant Timeseries")
# ball.compute_contestant_points_timeseries()
if __name__ == "__main__":
main()