For CartPole in Chapter 4 This is the code that worked for me. #71

jupitermarketingagency · 2023-12-07T02:03:14Z

#!/usr/bin/env python3
import gymnasium as gym
from collections import namedtuple
import numpy as np
from tensorboardX import SummaryWriter

import torch
import torch.nn as nn
import torch.optim as optim

HIDDEN_SIZE = 128
BATCH_SIZE = 16
PERCENTILE = 70

class Net(nn.Module):
def init(self, obs_size, hidden_size, n_actions):
super(Net, self).init()
self.net = nn.Sequential(
nn.Linear(obs_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, n_actions)
)

def forward(self, x):
    return self.net(x)

Episode = namedtuple('Episode', field_names=['reward', 'steps'])
EpisodeStep = namedtuple('EpisodeStep', field_names=['observation', 'action'])

def iterate_batches(env, net, batch_size):
batch = []
episode_reward = 0.0
episode_steps = []
obs, _ = env.reset()
env.render()
sm = nn.Softmax(dim=1)
while True:
obs_v = torch.FloatTensor([obs])
act_probs_v = sm(net(obs_v))
act_probs = act_probs_v.data.numpy()[0]
action = np.random.choice(len(act_probs), p=act_probs)
next_obs, reward, is_done, _, _ = env.step(action)
episode_reward += reward
step = EpisodeStep(observation=obs, action=action)
episode_steps.append(step)

    if is_done:
        e = Episode(reward=episode_reward, steps=episode_steps)
        batch.append(e)
        episode_reward = 0.0
        episode_steps = []
        next_obs, _ = env.reset()
        if len(batch) == batch_size:
            yield batch
            batch = []
    obs = next_obs

def filter_batch(batch, percentile):
rewards = list(map(lambda s: s.reward, batch))
reward_bound = np.percentile(rewards, percentile)
reward_mean = float(np.mean(rewards))

train_obs = []
train_act = []
for reward, steps in batch:
    if reward < reward_bound:
        continue
    train_obs.extend(map(lambda step: step.observation, steps))
    train_act.extend(map(lambda step: step.action, steps))

train_obs_v = torch.FloatTensor(train_obs)
train_act_v = torch.LongTensor(train_act)
return train_obs_v, train_act_v, reward_bound, reward_mean

if name == "main":
env = gym.make("CartPole-v1", render_mode='human')
#env = gym.wrappers.Monitor(env, directory="mon", force=True)
obs_size = env.observation_space.shape[0]
n_actions = env.action_space.n

net = Net(obs_size, HIDDEN_SIZE, n_actions)
objective = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=net.parameters(), lr=0.01)
writer = SummaryWriter(comment="-cartpole")

for iter_no, batch in enumerate(iterate_batches(
        env, net, BATCH_SIZE)):
    obs_v, acts_v, reward_b, reward_m = \
        filter_batch(batch, PERCENTILE)
    optimizer.zero_grad()
    action_scores_v = net(obs_v)
    loss_v = objective(action_scores_v, acts_v)
    loss_v.backward()
    optimizer.step()
    print("%d: loss=%.3f, reward_mean=%.1f, rw_bound=%.1f" % (
        iter_no, loss_v.item(), reward_m, reward_b))
    writer.add_scalar("loss", loss_v.item(), iter_no)
    writer.add_scalar("reward_bound", reward_b, iter_no)
    writer.add_scalar("reward_mean", reward_m, iter_no)
    if reward_m > 199:
        print("Solved!")
        break
writer.close()

The text was updated successfully, but these errors were encountered:

dkinneyBU · 2023-12-27T20:57:38Z

@jupitermarketingagency OH MY GOOOOOOODDDD! Thank you for this, I've been fighting this stupid program for THREE DAYS! This guy really needs to revisit this code, I've had to debug basically all of it with a few rare exceptions. And this is only Chapter 4!!!!

You are a life saver, if you conjure up any more fixes please post--I will be internally grateful. :-)

jupitermarketingagency · 2023-12-28T19:31:42Z

@dkinneyBU Glad to hear that was of help to you. Yes, agree with you about him revisiting this code. So far from all the RL courses we've seen this happen over and over again because the books are more than 2 years old. So we've been trying to only focus on books that have been recently published.

MFKruger · 2024-01-24T13:42:41Z

Thank you very much! Being a newbie to PyTorch and DRL, you saved me a lot time!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

For CartPole in Chapter 4 This is the code that worked for me. #71

For CartPole in Chapter 4 This is the code that worked for me. #71

jupitermarketingagency commented Dec 7, 2023 •

edited

Loading

dkinneyBU commented Dec 27, 2023

jupitermarketingagency commented Dec 28, 2023

MFKruger commented Jan 24, 2024

For CartPole in Chapter 4 This is the code that worked for me. #71

For CartPole in Chapter 4 This is the code that worked for me. #71

Comments

jupitermarketingagency commented Dec 7, 2023 • edited Loading

dkinneyBU commented Dec 27, 2023

jupitermarketingagency commented Dec 28, 2023

MFKruger commented Jan 24, 2024

jupitermarketingagency commented Dec 7, 2023 •

edited

Loading