Skip to content

Commit

Permalink
Refactor EpsilonGreedyAgentBase and concretions to improve encapsulation
Browse files Browse the repository at this point in the history
In this commit, encapsulation in the EpsilonGreedyAgentBase and its concretions (EpsilonGreedyAgent and AlphaEpsilonGreedyAgent) has been improved. The 'action_size' parameter has been removed from the base class' constructor and each child class is now responsible for initializing its own 'action_values' array. New getter method 'action_values' has been added to the base class which is abstract and must be implemented by all child classes. This improves the encapsulation by making 'action_values' private and only accessible through its getter. This change also avoids the need for type checking in 'base.py'. Although 'action_size' now needs to be provided in each concretion, it ensures that the implementations have proper control over their 'action_values' and makes the code more in line with OOP principles.
  • Loading branch information
nakashima-hikaru committed Nov 26, 2023
1 parent 0a708f6 commit f77b9e4
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,21 @@
"""
from typing import Self, final

import numpy as np
import numpy.typing as npt

from reinforcement_learning.markov_decision_process.bandit_problem.agents.base import EpsilonGreedyAgentBase


@final
class AlphaEpsilonGreedyAgent(EpsilonGreedyAgentBase):
"""Implement an Alpha Epsilon Greedy Agent for multi-armed bandit problems."""

@property
def action_values(self: Self) -> npt.NDArray[np.float64]:
"""Return the array of action values for the current agent."""
return self.__action_values

def __init__(self: Self, *, epsilon: float, action_size: int, alpha: float, seed: int | None = None) -> None:
"""Initialize AlphaEpsilonGreedyAgent.
Expand All @@ -21,8 +29,9 @@ def __init__(self: Self, *, epsilon: float, action_size: int, alpha: float, seed
alpha: The learning rate for updating action values.
seed: The seed value for random number generation. Must be an integer or None.
"""
super().__init__(epsilon=epsilon, action_size=action_size, seed=seed)
self.alpha: float = alpha
super().__init__(epsilon=epsilon, seed=seed)
self.__alpha: float = alpha
self.__action_values: npt.NDArray[np.float64] = np.zeros(action_size, dtype=np.float64)

def update(self: Self, i_action: int, reward: float) -> None:
"""Update the action-value estimation for the specified action using the given reward.
Expand All @@ -36,4 +45,4 @@ def update(self: Self, i_action: int, reward: float) -> None:
-------
None
"""
self._action_values[i_action] += (reward - self._action_values[i_action]) * self.alpha
self.__action_values[i_action] += (reward - self.__action_values[i_action]) * self.__alpha
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
estimation and 'get_action' to get the next action following the epsilon-greedy policy.
"""
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Self
from typing import Self

import numpy as np

if TYPE_CHECKING:
import numpy.typing as npt
import numpy.typing as npt


class EpsilonGreedyAgentBase(ABC):
Expand All @@ -21,7 +19,7 @@ class EpsilonGreedyAgentBase(ABC):
which is an exploration-exploitation algorithm commonly used in Reinforcement Learning.
"""

def __init__(self: Self, *, epsilon: float, action_size: int, seed: int | None) -> None:
def __init__(self: Self, *, epsilon: float, seed: int | None) -> None:
"""Initialize EpsilonGreedyAgentBase.
Args:
Expand All @@ -31,11 +29,14 @@ def __init__(self: Self, *, epsilon: float, action_size: int, seed: int | None)
seed: An optional seed value for random number generation.
"""
self._epsilon: float = epsilon
self._action_values: npt.NDArray[np.float64] = np.zeros(action_size, dtype=np.float64)
self._ns: npt.NDArray[np.int64] = np.zeros(action_size, dtype=np.int64)
self.__epsilon: float = epsilon
self.__rng: np.random.Generator = np.random.default_rng(seed=seed)

@property
@abstractmethod
def action_values(self: Self) -> npt.NDArray[np.float64]:
"""Return the action value."""

@abstractmethod
def update(self: Self, *, i_action: int, reward: float) -> None:
"""Update the agent's internal state based on the given action and reward.
Expand All @@ -49,6 +50,6 @@ def update(self: Self, *, i_action: int, reward: float) -> None:

def get_action(self: Self) -> int:
"""Determine an action according to its policy."""
if self.__rng.random() < self._epsilon:
return int(self.__rng.integers(0, len(self._action_values)))
return int(np.argmax(self._action_values))
if self.__rng.random() < self.__epsilon:
return int(self.__rng.integers(0, len(self.action_values)))
return int(np.argmax(self.action_values))
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,23 @@
otherwise exploits its current knowledge.
The agent's state and action-value estimations are updated based on the rewards received after choosing actions.
"""
from typing import Final, Self, final
from typing import Self, final

from reinforcement_learning.markov_decision_process.bandit_problem.agents.base import EpsilonGreedyAgentBase
import numpy as np
import numpy.typing as npt

SEED: Final[int] = 0
from reinforcement_learning.markov_decision_process.bandit_problem.agents.base import EpsilonGreedyAgentBase


@final
class EpsilonGreedyAgent(EpsilonGreedyAgentBase):
"""An agent for Epsilon-greedy exploration strategy for the multi-armed bandit problem."""

@property
def action_values(self: Self) -> npt.NDArray[np.float64]:
"""Return the array of action values for the current agent."""
return self.__action_values

def __init__(self: Self, epsilon: float, action_size: int, seed: int | None = None) -> None:
"""Initialize an EpsilonGreedyAgent instance.
Expand All @@ -30,7 +36,9 @@ def __init__(self: Self, epsilon: float, action_size: int, seed: int | None = No
-------
None
"""
super().__init__(epsilon=epsilon, action_size=action_size, seed=seed)
super().__init__(epsilon=epsilon, seed=seed)
self.__ns: npt.NDArray[np.int64] = np.zeros(action_size, dtype=np.int64)
self.__action_values: npt.NDArray[np.float64] = np.zeros(action_size, dtype=np.float64)

def update(self: Self, *, i_action: int, reward: float) -> None:
"""Update the agent's estimate of the action value based on the received reward.
Expand All @@ -44,5 +52,5 @@ def update(self: Self, *, i_action: int, reward: float) -> None:
-------
None
"""
self._ns[i_action] += 1
self._action_values[i_action] += (reward - self._action_values[i_action]) / self._ns[i_action]
self.__ns[i_action] += 1
self.__action_values[i_action] += (reward - self.__action_values[i_action]) / self.__ns[i_action]

0 comments on commit f77b9e4

Please sign in to comment.