-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathatari_experiment.cpp
144 lines (128 loc) · 5.36 KB
/
atari_experiment.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
//
// Created by Khurram Javed on 2022-07-18.
//
#include "include/environments/proto_prediction_environments.h"
#include "include/utils.h"
#include <fstream>
#include <iostream>
#include <vector>
#include "include/environments/animal_learning/tracecondioning.h"
#include "include/experiment/Experiment.h"
#include "include/experiment/Metric.h"
#include "include/nn/networks/base_lstm.h"
#include "include/nn/networks/lstm_bptt.h"
#include "include/nn/networks/lstm_incremental_networks.h"
#include "include/nn/networks/network_factory.h"
#include "include/nn/utils.h"
#include "include/utils.h"
#include <deque>
#include <iostream>
#include <string>
int main(int argc, char *argv[]) {
std::vector<float> list_of_returns;
std::vector<float> list_of_predictions;
std::deque<float> list_of_errors;
int K = 500000;
Experiment *my_experiment = new ExperimentJSON(argc, argv);
ProtoPredictionEnvironment env2(my_experiment->get_string_param("env"),
my_experiment->get_float_param("gamma"));
Metric error_metric = Metric(my_experiment->database_name, "error_table",
std::vector<std::string>{"run", "step", "error"},
std::vector<std::string>{"int", "int", "real"},
std::vector<std::string>{"run", "step"});
Metric episodic_error =
Metric(my_experiment->database_name, "episodic_error",
std::vector<std::string>{"run", "step", "error"},
std::vector<std::string>{"int", "int", "real"},
std::vector<std::string>{"run", "step"});
Metric avg_error = Metric(
my_experiment->database_name, "predictions",
std::vector<std::string>{"run", "step", "pred", "target", "reward"},
std::vector<std::string>{"int", "int", "real", "real", "real"},
std::vector<std::string>{"run", "step"});
std::cout << "Program started \n";
std::mt19937 mt(my_experiment->get_int_param("seed"));
BaseLSTM *network = NetworkFactory::get_network(my_experiment);
std::cout << "Network created\n";
float running_error = 0.00;
auto x = env2.step();
int layer = 0;
float old_pred = network->forward(x);
list_of_predictions.push_back(old_pred);
for (int env_step = 1; env_step < my_experiment->get_int_param("steps");
env_step++) {
if (env_step % std::stoi(my_experiment->get_vector_param("features")[2]) == std::stoi(my_experiment->get_vector_param("features")[2]) - 1) {
layer++;
std::cout << "Increasing layer\n";
}
if(env_step%100000==0) {
std::cout << "Step = " << env_step << "\n";
IncrementalNetworks* ptr = dynamic_cast<IncrementalNetworks*>(network);
if(ptr!= nullptr)
print_vector(ptr->prediction_weights);
}
x = env2.step();
// Evaluation code
if (env2.get_gamma() == 0) {
auto returns = env2.GetListOfReturns();
float error = 0;
for (int i = 1; i < returns.size(); i++) {
int actual_env_step = env_step - returns.size() + i;
float real_error = (returns[i] - list_of_predictions[i]) *
(returns[i] - list_of_predictions[i]);
list_of_errors.push_back(real_error);
running_error += real_error;
if (list_of_errors.size() > K) {
running_error -= list_of_errors[0];
list_of_errors.pop_front();
}
error += real_error;
if (actual_env_step % 50000 == 0 && list_of_errors.size() == K) {
std::vector<std::string> cur_error;
cur_error.push_back(
std::to_string(my_experiment->get_int_param("run")));
cur_error.push_back(std::to_string(actual_env_step));
cur_error.push_back(std::to_string(running_error/K));
std::cout << "Running error = " << running_error/K << std::endl;
error_metric.record_value(cur_error);
}
if (actual_env_step % 1000000 < 500) {
std::vector<std::string> cur_error;
cur_error.push_back(
std::to_string(my_experiment->get_int_param("run")));
cur_error.push_back(std::to_string(env_step + i));
cur_error.push_back(std::to_string(list_of_predictions[i]));
cur_error.push_back(std::to_string(returns[i]));
cur_error.push_back(std::to_string(0));
avg_error.record_value(cur_error);
}
}
std::vector<std::string> cur_error;
cur_error.push_back(std::to_string(my_experiment->get_int_param("run")));
cur_error.push_back(std::to_string(env_step));
cur_error.push_back(std::to_string(error / returns.size()));
episodic_error.record_value(cur_error);
std::cout << "Episode return error = " << error / returns.size()
<< std::endl;
list_of_predictions.clear();
}
// Evaluation code ends
network->decay_gradient(my_experiment->get_float_param("lambda") *
env2.get_gamma());
network->backward(layer);
float pred = network->forward(x);
list_of_predictions.push_back(pred);
float target = env2.get_reward() + env2.get_gamma() * pred;
float error = target - old_pred;
network->update_parameters(layer, error);
if (env_step % 100000 == 0) {
error_metric.commit_values();
avg_error.commit_values();
episodic_error.commit_values();
}
old_pred = pred;
}
error_metric.commit_values();
avg_error.commit_values();
episodic_error.commit_values();
}