Remove unnecessary section marks in docstrings

This commit removes unnecessary markers (such as ---- and -------) in the docstrings throughout the project.
nakashima-hikaru · Nov 30, 2023 · 055cba4 · 055cba4 · github-actions · Nov 30, 2023
1 parent f77b9e4
commit 055cba4
Show file tree

Hide file tree

Showing 28 changed files with 60 additions and 110 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,6 +13,7 @@ tqdm = "^4.64.1"
 numpy = "^1.26.2"
 pydantic = "^2.5.1"
 torch = "^2.1.1"
+gymnasium = "^0.29.1"
 
 
 [tool.poetry.group.dev.dependencies]

diff --git a/reinforcement_learning/errors.py b/reinforcement_learning/errors.py
@@ -6,19 +6,16 @@ class NumpyValidationError(Exception):
     """Exception raised when encountering a validation error in numpy arrays.
 
     Attributes:
-    ----------
         message (str): Description of the validation error.
     """
 
     def __init__(self: Self, *, message: str) -> None:
         """Initialize a NumpyValidationError object with the given message.
 
         Args:
-        ----
             message (str): The error message associated with the exception.
 
         Returns:
-        -------
             None
 
         """
@@ -36,7 +33,6 @@ def __init__(self: Self, expected_dim: int, actual_dim: int) -> None:
         """Initialize the instance with expected and actual dimensions.
 
         Args:
-        ----
         expected_dim (int): The expected dimensionality.
         actual_dim (int): The actual dimensionality provided.
         """

diff --git a/...ment_learning/markov_decision_process/bandit_problem/agents/alpha_epsilon_greedy_agent.py b/...ment_learning/markov_decision_process/bandit_problem/agents/alpha_epsilon_greedy_agent.py
@@ -23,7 +23,6 @@ def __init__(self: Self, *, epsilon: float, action_size: int, alpha: float, seed
         """Initialize AlphaEpsilonGreedyAgent.
 
         Args:
-        ----
             epsilon: The value of epsilon for epsilon-greedy action selection.
             action_size: The number of possible actions.
             alpha: The learning rate for updating action values.
@@ -37,12 +36,10 @@ def update(self: Self, i_action: int, reward: float) -> None:
         """Update the action-value estimation for the specified action using the given reward.
 
         Args:
-        ----
             i_action (int): The index of the action to update the estimation for.
             reward (float): The reward received after taking the action.
 
         Returns:
-        -------
             None
         """
         self.__action_values[i_action] += (reward - self.__action_values[i_action]) * self.__alpha
diff --git a/reinforcement_learning/markov_decision_process/bandit_problem/agents/base.py b/reinforcement_learning/markov_decision_process/bandit_problem/agents/base.py
@@ -23,7 +23,6 @@ def __init__(self: Self, *, epsilon: float, seed: int | None) -> None:
         """Initialize EpsilonGreedyAgentBase.
 
         Args:
-        ----
             epsilon: The value of the exploration rate. Must be between 0 and 1, inclusive.
             action_size: The number of possible actions.
             seed: An optional seed value for random number generation.
@@ -42,7 +41,6 @@ def update(self: Self, *, i_action: int, reward: float) -> None:
         """Update the agent's internal state based on the given action and reward.
 
         Args:
-        ----
             i_action: An integer representing the chosen action.
             reward: A floating-point number representing the reward received.
 

diff --git a/reinforcement_learning/markov_decision_process/bandit_problem/agents/epsilon_greedy_agent.py b/reinforcement_learning/markov_decision_process/bandit_problem/agents/epsilon_greedy_agent.py
@@ -27,13 +27,11 @@ def __init__(self: Self, epsilon: float, action_size: int, seed: int | None = No
         """Initialize an EpsilonGreedyAgent instance.
 
         Args:
-        ----
             epsilon: The exploration rate, between 0.0 and 1.0.
             action_size: The number of possible actions.
             seed: An optional seed value for random number generation.
 
         Returns:
-        -------
             None
         """
         super().__init__(epsilon=epsilon, seed=seed)
@@ -44,12 +42,10 @@ def update(self: Self, *, i_action: int, reward: float) -> None:
         """Update the agent's estimate of the action value based on the received reward.
 
         Args:
-        ----
             i_action (int): The index of the chosen action.
             reward (float): The reward received after taking the action.
 
         Returns:
-        -------
             None
         """
         self.__ns[i_action] += 1

diff --git a/reinforcement_learning/markov_decision_process/bandit_problem/bandits/base.py b/reinforcement_learning/markov_decision_process/bandit_problem/bandits/base.py
@@ -22,7 +22,6 @@ def __init__(self: Self, *, n_arms: int, seed: int | None = None) -> None:
         """Initialize BanditBase.
 
         Args:
-        ----
             n_arms: The number of arms in the bandit.
             seed: An optional seed value for random number generation.
 
@@ -46,19 +45,16 @@ def _next_rates(self: Self, *, rates: NDArray[np.float64]) -> NDArray[np.float64
         """Return next rates.
 
         Args:
-        ----
             rates: An NDArray containing rates of a bandit machine.
         """
 
     def play(self: Self, *, i_arm: int) -> float:
         """Play a single round of the bandit game.
 
         Args:
-        ----
             i_arm: An integer representing the index of the arm to play.
 
         Returns:
-        -------
             A float indicating the reward obtained from playing the arm.
         """
         rate: np.float64 = cast(np.float64, self.rates[i_arm])

diff --git a/...orcement_learning/markov_decision_process/bandit_problem/bandits/non_stationary_bandit.py b/...orcement_learning/markov_decision_process/bandit_problem/bandits/non_stationary_bandit.py
@@ -24,7 +24,6 @@ def __init__(self: Self, *, n_arms: int, seed: int | None = None) -> None:
         """Initialize NonStationaryBandit.
 
         Args:
-        ----
             n_arms: An integer representing the number of arms in the stationary bandit problem.
             seed: An optional seed value for random number generation.
         """

diff --git a/reinforcement_learning/markov_decision_process/bandit_problem/bandits/stationary_bandit.py b/reinforcement_learning/markov_decision_process/bandit_problem/bandits/stationary_bandit.py
@@ -21,7 +21,6 @@ def __init__(self: Self, *, n_arms: int, seed: int | None = None) -> None:
         """Initialize StationaryBandit.
 
         Args:
-        ----
             n_arms: An integer representing the number of arms in the stationary bandit problem.
             seed: An optional seed value for random number generation.
 

diff --git a/reinforcement_learning/markov_decision_process/bandit_problem/simulator.py b/reinforcement_learning/markov_decision_process/bandit_problem/simulator.py
@@ -18,13 +18,11 @@ def simulate(
     """Simulate the interaction between a bandit and an agent over a number of steps.
 
     Args:
-    ----
         steps (int): The number of steps to simulate.
         bandit (BanditBase): The bandit to interact with.
         agent (EpsilonGreedyAgentBase): The agent to use for selecting actions.
 
     Returns:
-    -------
         tuple[list[float], list[float]]: A tuple containing two lists. The first list
         contains the total rewards accumulated after each step. The second list contains
         the rewards rate at each step.

diff --git a/reinforcement_learning/markov_decision_process/grid_world/agent_base.py b/reinforcement_learning/markov_decision_process/grid_world/agent_base.py
@@ -19,7 +19,6 @@ def __init__(self: Self, *, seed: int | None) -> None:
         """Initialize the AgentBase with the given seed.
 
         Args:
-        ----
             seed (int): The seed value for random number generation.
         """
         self.__rng: np.random.Generator = np.random.default_rng(seed=seed)
@@ -34,12 +33,10 @@ def get_action(self: Self, *, state: State) -> Action:
         """Select an action.
 
         Args:
-        ----
             state: the state of the environment.
             policy: the policy of the agent.
 
         Returns:
-        -------
             the chosen action based on the action probabilities for the given state.
         """
 
@@ -48,7 +45,6 @@ def add_memory(self: Self, *, state: State, action: Action, result: ActionResult
         """Add a new experience into the memory.
 
         Args:
-        ----
             state: The current state of the agent.
             action: The action taken by the agent.
             result: The result of the action taken by the agent.
@@ -75,11 +71,9 @@ def get_action(self: Self, *, state: State) -> Action:
         """Select an action based on policy `self.__b`.
 
         Args:
-        ----
             state: the state of the environment.
 
         Returns:
-        -------
             the chosen action based on the action probabilities for the given state.
         """
         action_probs = self.behavior_policy[state]

diff --git a/reinforcement_learning/markov_decision_process/grid_world/environment.py b/reinforcement_learning/markov_decision_process/grid_world/environment.py
@@ -70,7 +70,6 @@ def direction(self: Self) -> State:
         """Gets the direction of an action.
 
         Returns:
-        -------
             ret (State): a tuple representing the direction of the action.
         """
         ret: State
@@ -101,7 +100,6 @@ class ActionResult:
     """Represent the result of an action in the context of a reinforcement learning system.
 
     Args:
-    ----
         next_state: The next state after taking the action.
         reward: The reward received for taking the action.
         done: A flag indicating whether the task or episode is completed after taking the action.
@@ -133,13 +131,11 @@ def __init__(
         """Initialize a GridWorld object with the given reward map, goal state, and start state.
 
         Args:
-        ----
             reward_map (Map): A 2D numpy array representing the reward map of the grid world.
             goal_state (State): The coordinates of the goal state in the grid world.
             start_state (State): The coordinates of the start state in the grid world.
 
         Raises:
-        ------
             NumpyDimError: If the reward map has a dimension other than 2.
         """
         if reward_map.ndim != MAP_DIM:
@@ -164,7 +160,6 @@ def goal_state(self: Self) -> State:
         """Return the goal state of the GridWorld.
 
         Returns:
-        -------
                 State: The goal state of the GridWorld.
 
         """
@@ -185,7 +180,6 @@ def height(self: Self) -> int:
         """Return the height of the grid in the GridWorld object.
 
         Returns:
-        -------
             int: The height of the grid.
         """
         return len(self.__reward_map)
@@ -195,7 +189,6 @@ def width(self: Self) -> int:
         """Return the width of the reward map.
 
         Returns:
-        -------
             the length of the first element of the private class attribute __reward_map,
             which represents the width of the reward map.
         """
@@ -225,13 +218,11 @@ def next_state(self: Self, state: State, action: Action) -> State:
         """Move to the next state based on the provided action.
 
         Args:
-        ----
             self (Self): An instance of the current object.
             state (State): A tuple representing the current state (y_coordinate, x_coordinate).
             action (Action): An object representing the action to be taken.
 
         Returns:
-        -------
             State: A tuple representing the next state after performing the action.
         """
         next_state: State = (
@@ -247,11 +238,9 @@ def reward(self: Self, *, next_state: State) -> float:
         """Compute the reward for a given state transition.
 
         Args:
-        ----
             next_state (State): The state to which transition is made.
 
         Returns:
-        -------
             float: The reward for transitioning to the provided state.
         """
         return cast(float, self.__reward_map[next_state])
@@ -260,11 +249,9 @@ def step(self: Self, *, action: Action) -> ActionResult:
         """Perform an environment step based on the provided action.
 
         Args:
-        ----
         action (Action): The action taken by the agent in the current state of the environment.
 
         Returns:
-        -------
         tuple(State, float, bool): The next state, reward from the current action and whether the goal state is reached.
         """
         next_state = self.next_state(state=self.__agent_state, action=action)
-Original file line number
+Diff line change
@@ Expand Up @@
             """Initialize NonStationaryBandit.
             Args:
-            ----
                 n_arms: An integer representing the number of arms in the stationary bandit problem.
                 seed: An optional seed value for random number generation.
             """
@@ Expand Down @@