We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hello, I ran the code using windows and ### Jupiter notebook. I got this error message:
TypeError Traceback (most recent call last) <ipython-input-6-ebd6b7e4996f> in <module> 61 iter_no += 1 62 s, a, r, next_s = agent.sample_env() ---> 63 agent.value_update(s, a, r, next_s) 64 65 reward = 0.0 <ipython-input-6-ebd6b7e4996f> in value_update(self, s, a, r, next_s) 35 best_v, _ = self.best_value_and_action(next_s) 36 new_v = r + GAMMA * best_v ---> 37 old_v = self.values[(s, a)] 38 self.values[(s, a)] = old_v * (1-ALPHA) + new_v * ALPHA 39 TypeError: unhashable type: 'dict'
#!/usr/bin/env python3 import gym import collections from tensorboardX import SummaryWriter ENV_NAME = "FrozenLake-v1" GAMMA = 0.9 ALPHA = 0.2 TEST_EPISODES = 20 class Agent: def __init__(self): self.env = gym.make(ENV_NAME) self.state = self.env.reset() self.values = collections.defaultdict(float) def sample_env(self): action = self.env.action_space.sample() old_state = self.state new_state, reward, is_done, _ = self.env.step(action) self.state = self.env.reset() if is_done else new_state return old_state, action, reward, new_state def best_value_and_action(self, state): best_value, best_action = None, None for action in range(self.env.action_space.n): action_value = self.values[(state, action)] if best_value is None or best_value < action_value: best_value = action_value best_action = action return best_value, best_action def value_update(self, s, a, r, next_s): best_v, _ = self.best_value_and_action(next_s) new_v = r + GAMMA * best_v old_v = self.values[(s, a)] self.values[(s, a)] = old_v * (1-ALPHA) + new_v * ALPHA def play_episode(self, env): total_reward = 0.0 state = env.reset() while True: _, action = self.best_value_and_action(state) new_state, reward, is_done, _ = env.step(action) total_reward += reward if is_done: break state = new_state return total_reward if __name__ == "__main__": test_env = gym.make(ENV_NAME) agent = Agent() writer = SummaryWriter(comment="-q-learning") iter_no = 0 best_reward = 0.0 while True: iter_no += 1 s, a, r, next_s = agent.sample_env() agent.value_update(s, a, r, next_s) reward = 0.0 for _ in range(TEST_EPISODES): reward += agent.play_episode(test_env) reward /= TEST_EPISODES writer.add_scalar("reward", reward, iter_no) if reward > best_reward: print("Best reward updated %.3f -> %.3f" % ( best_reward, reward)) best_reward = reward if reward > 0.80: print("Solved in %d iterations!" % iter_no) break writer.close()
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Hello, I ran the code using windows and ### Jupiter notebook. I got this error message:
Error message
The original code::
The text was updated successfully, but these errors were encountered: