forked from tmdt-buw/schlably
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluations.py
100 lines (80 loc) · 4.02 KB
/
evaluations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
This file provides the EvaluationHandler class, which supports the evaluation of a model testing.
"""
import numpy as np
class EvaluationHandler:
"""
This class keeps track of several environment parameters (e.g. makespan, tardiness) during a model testing.
After a testing, you can call the evaluate_test function
to compute evaluation metrics across all collected test episodes (e.g. mean, standard deviation).
You can adapt evaluate_test to compute different or more metrics.
"""
def __init__(self):
# test parameters. Valid for a complete test loop by one agent. Including multiple episodes
self.rewards = []
self.tardiness = []
self.tardiness_max = []
self.makespan = [] # number of steps required to end all jobs
self.actions_list = []
self.tasks_list = []
def record_environment_episode(self, env, total_reward) -> None:
"""
Stores all necessary environment parameters from the recent episode
:param env: Non reset environment object, whose parameters should be recorded
:param total_reward: Total reward of the episode
:return: None
"""
# append data from test run to list
self.makespan.append(env.get_makespan())
self.rewards.append(total_reward)
self.tardiness.append(sum(env.tardiness))
self.tardiness_max.append(max(env.tardiness))
self.tasks_list.append(env.tasks)
self.actions_list.append(env.action_history)
def update_episode_solved_with_solver(self, env) -> None:
"""
Calculates all missing parameters of an environment processed by the solver
:param env: Environment object with task attribute generated by the solver
:return: None
"""
# calculate tardiness
env.calculate_tardiness()
# prepare get_makespan function call
for task in env.tasks:
if task.finished > env.ends_of_machine_occupancies[task.selected_machine]:
env.ends_of_machine_occupancies[task.selected_machine] = task.finished
self.record_environment_episode(env, 0)
def evaluate_test(self) -> dict:
"""
Gets all test_parameter and computes all relevant statistical data for plots and prints
:return: Dictionary with all specified evaluation metrics
"""
rewards, tardiness = np.asarray(self.rewards), self.tardiness
evaluation_results = {}
evaluation_results['rew_mean'] = np.mean(rewards)
evaluation_results['rew_std'] = np.std(rewards)
evaluation_results['rew_best'] = np.max(rewards)
evaluation_results['rew_best_count'] = sum([1 for el in rewards if el==evaluation_results['rew_best']])
evaluation_results['rew_worst'] = np.min(rewards)
evaluation_results['tardiness_mean'] = np.mean(tardiness)
evaluation_results['tardiness_std'] = np.std(tardiness)
evaluation_results['tardiness_max_mean'] = np.mean(self.tardiness_max)
evaluation_results['makespan_mean'] = np.mean(self.makespan)
evaluation_results['rew_worst_quantile_border'] = np.quantile(rewards, 0.1)
evaluation_results['rew_cvar'] = rewards[rewards <= evaluation_results['rew_worst_quantile_border']].mean()
evaluation_results['rew_perc_good_solutions'] = 1 - np.count_nonzero(rewards)/len(rewards)
evaluation_results['num_tests'] = len(rewards)
return evaluation_results
@classmethod
def add_solver_gap_to_results(cls, results: dict) -> dict:
"""
If solver makespan exists, compute optimal gap for all agents
:param results: Dictionary with test results
:return: Updated dictionary with test results now including optimal gap
"""
if 'solver' in results:
optimal_makespan = results['solver']['makespan_mean']
for agent, result in results.items():
gap = result['makespan_mean'] - optimal_makespan
results[agent].update({'gap_to_solver': gap})
return results