Skip to content

Commit

Permalink
Remove unnecessary section marks in docstrings
Browse files Browse the repository at this point in the history
This commit removes unnecessary markers (such as ---- and -------) in the docstrings throughout the project.
  • Loading branch information
nakashima-hikaru committed Nov 30, 2023
1 parent f77b9e4 commit 055cba4
Show file tree
Hide file tree
Showing 28 changed files with 60 additions and 110 deletions.
54 changes: 53 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ tqdm = "^4.64.1"
numpy = "^1.26.2"
pydantic = "^2.5.1"
torch = "^2.1.1"
gymnasium = "^0.29.1"


[tool.poetry.group.dev.dependencies]
Expand Down
4 changes: 0 additions & 4 deletions reinforcement_learning/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,16 @@ class NumpyValidationError(Exception):
"""Exception raised when encountering a validation error in numpy arrays.
Attributes:
----------
message (str): Description of the validation error.
"""

def __init__(self: Self, *, message: str) -> None:
"""Initialize a NumpyValidationError object with the given message.
Args:
----
message (str): The error message associated with the exception.
Returns:
-------
None
"""
Expand All @@ -36,7 +33,6 @@ def __init__(self: Self, expected_dim: int, actual_dim: int) -> None:
"""Initialize the instance with expected and actual dimensions.
Args:
----
expected_dim (int): The expected dimensionality.
actual_dim (int): The actual dimensionality provided.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def __init__(self: Self, *, epsilon: float, action_size: int, alpha: float, seed
"""Initialize AlphaEpsilonGreedyAgent.
Args:
----
epsilon: The value of epsilon for epsilon-greedy action selection.
action_size: The number of possible actions.
alpha: The learning rate for updating action values.
Expand All @@ -37,12 +36,10 @@ def update(self: Self, i_action: int, reward: float) -> None:
"""Update the action-value estimation for the specified action using the given reward.
Args:
----
i_action (int): The index of the action to update the estimation for.
reward (float): The reward received after taking the action.
Returns:
-------
None
"""
self.__action_values[i_action] += (reward - self.__action_values[i_action]) * self.__alpha
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def __init__(self: Self, *, epsilon: float, seed: int | None) -> None:
"""Initialize EpsilonGreedyAgentBase.
Args:
----
epsilon: The value of the exploration rate. Must be between 0 and 1, inclusive.
action_size: The number of possible actions.
seed: An optional seed value for random number generation.
Expand All @@ -42,7 +41,6 @@ def update(self: Self, *, i_action: int, reward: float) -> None:
"""Update the agent's internal state based on the given action and reward.
Args:
----
i_action: An integer representing the chosen action.
reward: A floating-point number representing the reward received.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,11 @@ def __init__(self: Self, epsilon: float, action_size: int, seed: int | None = No
"""Initialize an EpsilonGreedyAgent instance.
Args:
----
epsilon: The exploration rate, between 0.0 and 1.0.
action_size: The number of possible actions.
seed: An optional seed value for random number generation.
Returns:
-------
None
"""
super().__init__(epsilon=epsilon, seed=seed)
Expand All @@ -44,12 +42,10 @@ def update(self: Self, *, i_action: int, reward: float) -> None:
"""Update the agent's estimate of the action value based on the received reward.
Args:
----
i_action (int): The index of the chosen action.
reward (float): The reward received after taking the action.
Returns:
-------
None
"""
self.__ns[i_action] += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def __init__(self: Self, *, n_arms: int, seed: int | None = None) -> None:
"""Initialize BanditBase.
Args:
----
n_arms: The number of arms in the bandit.
seed: An optional seed value for random number generation.
Expand All @@ -46,19 +45,16 @@ def _next_rates(self: Self, *, rates: NDArray[np.float64]) -> NDArray[np.float64
"""Return next rates.
Args:
----
rates: An NDArray containing rates of a bandit machine.
"""

def play(self: Self, *, i_arm: int) -> float:
"""Play a single round of the bandit game.
Args:
----
i_arm: An integer representing the index of the arm to play.
Returns:
-------
A float indicating the reward obtained from playing the arm.
"""
rate: np.float64 = cast(np.float64, self.rates[i_arm])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def __init__(self: Self, *, n_arms: int, seed: int | None = None) -> None:
"""Initialize NonStationaryBandit.
Args:
----
n_arms: An integer representing the number of arms in the stationary bandit problem.
seed: An optional seed value for random number generation.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def __init__(self: Self, *, n_arms: int, seed: int | None = None) -> None:
"""Initialize StationaryBandit.
Args:
----
n_arms: An integer representing the number of arms in the stationary bandit problem.
seed: An optional seed value for random number generation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@ def simulate(
"""Simulate the interaction between a bandit and an agent over a number of steps.
Args:
----
steps (int): The number of steps to simulate.
bandit (BanditBase): The bandit to interact with.
agent (EpsilonGreedyAgentBase): The agent to use for selecting actions.
Returns:
-------
tuple[list[float], list[float]]: A tuple containing two lists. The first list
contains the total rewards accumulated after each step. The second list contains
the rewards rate at each step.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def __init__(self: Self, *, seed: int | None) -> None:
"""Initialize the AgentBase with the given seed.
Args:
----
seed (int): The seed value for random number generation.
"""
self.__rng: np.random.Generator = np.random.default_rng(seed=seed)
Expand All @@ -34,12 +33,10 @@ def get_action(self: Self, *, state: State) -> Action:
"""Select an action.
Args:
----
state: the state of the environment.
policy: the policy of the agent.
Returns:
-------
the chosen action based on the action probabilities for the given state.
"""

Expand All @@ -48,7 +45,6 @@ def add_memory(self: Self, *, state: State, action: Action, result: ActionResult
"""Add a new experience into the memory.
Args:
----
state: The current state of the agent.
action: The action taken by the agent.
result: The result of the action taken by the agent.
Expand All @@ -75,11 +71,9 @@ def get_action(self: Self, *, state: State) -> Action:
"""Select an action based on policy `self.__b`.
Args:
----
state: the state of the environment.
Returns:
-------
the chosen action based on the action probabilities for the given state.
"""
action_probs = self.behavior_policy[state]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def direction(self: Self) -> State:
"""Gets the direction of an action.
Returns:
-------
ret (State): a tuple representing the direction of the action.
"""
ret: State
Expand Down Expand Up @@ -101,7 +100,6 @@ class ActionResult:
"""Represent the result of an action in the context of a reinforcement learning system.
Args:
----
next_state: The next state after taking the action.
reward: The reward received for taking the action.
done: A flag indicating whether the task or episode is completed after taking the action.
Expand Down Expand Up @@ -133,13 +131,11 @@ def __init__(
"""Initialize a GridWorld object with the given reward map, goal state, and start state.
Args:
----
reward_map (Map): A 2D numpy array representing the reward map of the grid world.
goal_state (State): The coordinates of the goal state in the grid world.
start_state (State): The coordinates of the start state in the grid world.
Raises:
------
NumpyDimError: If the reward map has a dimension other than 2.
"""
if reward_map.ndim != MAP_DIM:
Expand All @@ -164,7 +160,6 @@ def goal_state(self: Self) -> State:
"""Return the goal state of the GridWorld.
Returns:
-------
State: The goal state of the GridWorld.
"""
Expand All @@ -185,7 +180,6 @@ def height(self: Self) -> int:
"""Return the height of the grid in the GridWorld object.
Returns:
-------
int: The height of the grid.
"""
return len(self.__reward_map)
Expand All @@ -195,7 +189,6 @@ def width(self: Self) -> int:
"""Return the width of the reward map.
Returns:
-------
the length of the first element of the private class attribute __reward_map,
which represents the width of the reward map.
"""
Expand Down Expand Up @@ -225,13 +218,11 @@ def next_state(self: Self, state: State, action: Action) -> State:
"""Move to the next state based on the provided action.
Args:
----
self (Self): An instance of the current object.
state (State): A tuple representing the current state (y_coordinate, x_coordinate).
action (Action): An object representing the action to be taken.
Returns:
-------
State: A tuple representing the next state after performing the action.
"""
next_state: State = (
Expand All @@ -247,11 +238,9 @@ def reward(self: Self, *, next_state: State) -> float:
"""Compute the reward for a given state transition.
Args:
----
next_state (State): The state to which transition is made.
Returns:
-------
float: The reward for transitioning to the provided state.
"""
return cast(float, self.__reward_map[next_state])
Expand All @@ -260,11 +249,9 @@ def step(self: Self, *, action: Action) -> ActionResult:
"""Perform an environment step based on the provided action.
Args:
----
action (Action): The action taken by the agent in the current state of the environment.
Returns:
-------
tuple(State, float, bool): The next state, reward from the current action and whether the goal state is reached.
"""
next_state = self.next_state(state=self.__agent_state, action=action)
Expand Down
Loading

1 comment on commit 055cba4

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCover
TOTAL9910100%

Tests Skipped Failures Errors Time
40 0 💤 0 ❌ 0 🔥 1.576s ⏱️

Please sign in to comment.