From 24a3774b8f5d6eccc3f41e6947815aec4a10f2c3 Mon Sep 17 00:00:00 2001 From: rodrigoarenas456 <31422766+rodrigoarenas456@users.noreply.github.com> Date: Thu, 8 Apr 2021 20:46:29 -0500 Subject: [PATCH] ParameterGrid to remove Sklearn dependencies --- dev-requirements.txt | 8 +- examples/queing/multierlang.py | 4 +- pyworkforce/queuing/erlang.py | 6 +- pyworkforce/utils/__init__.py | 3 + pyworkforce/utils/grid.py | 113 ++++++++++++++++++ pyworkforce/utils/tests/__init__.py | 0 .../utils/tests/test_parameter_grid.py | 75 ++++++++++++ setup.py | 6 +- 8 files changed, 203 insertions(+), 12 deletions(-) create mode 100644 pyworkforce/utils/__init__.py create mode 100644 pyworkforce/utils/grid.py create mode 100644 pyworkforce/utils/tests/__init__.py create mode 100644 pyworkforce/utils/tests/test_parameter_grid.py diff --git a/dev-requirements.txt b/dev-requirements.txt index a65a8ae..c6d5e0b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,8 +2,8 @@ pytest==6.2.2 codecov==2.1.11 pytest-cov==2.11.1 twine==3.3.0 -numpy>=1.18.1 +numpy~=1.18.1 ortools>=7.8.7959 -pandas>=1.0.0 -scikit-learn>=0.20.0 -joblib>=0.11 \ No newline at end of file +pandas~=1.0.0 +joblib~=0.11 + diff --git a/examples/queing/multierlang.py b/examples/queing/multierlang.py index 3e6224d..82f2f98 100644 --- a/examples/queing/multierlang.py +++ b/examples/queing/multierlang.py @@ -2,7 +2,7 @@ param_grid = {"transactions": [100, 200], "aht": [3], "interval": [30], "asa": [20 / 60], "shrinkage": [0.3]} required_positions_scenarios = {"service_level": [0.85, 0.9], "max_occupancy": [0.8]} -service_level_scenarios= {"positions": [10, 20, 30], "scale_positions": [True, False]} +service_level_scenarios = {"positions": [10, 20, 30], "scale_positions": [True, False]} multi_erlang = MultiErlangC(param_grid=param_grid, n_jobs=-1) @@ -18,4 +18,4 @@ required_positions_scenarios = {"service_level": [0.7, 0.85, 0.9], "max_occupancy": [0.8]} positions_requirements = multi_erlang.required_positions(required_positions_scenarios) -print("positions_requirements: ", positions_requirements) \ No newline at end of file +print("positions_requirements: ", positions_requirements) diff --git a/pyworkforce/queuing/erlang.py b/pyworkforce/queuing/erlang.py index 6d3bed8..462a2c2 100644 --- a/pyworkforce/queuing/erlang.py +++ b/pyworkforce/queuing/erlang.py @@ -1,5 +1,5 @@ from math import exp, ceil, floor -from sklearn.model_selection import ParameterGrid +from pyworkforce.utils import ParameterGrid from joblib import Parallel, delayed @@ -132,8 +132,8 @@ def required_positions(self, service_level: float, max_occupancy: float = 1.0): class MultiErlangC: """ - This class uses de ErlangC class using joblib's Parallel allowing to run multiples scenarios with one class It - finds solutions iterating over all possible combinations provided by the users, inspired how Sklearn's Grid + This class uses de ErlangC class using joblib's Parallel allowing to run multiples scenarios with one class. + It finds solutions iterating over all possible combinations provided by the users, inspired how Sklearn's Grid Search works """ diff --git a/pyworkforce/utils/__init__.py b/pyworkforce/utils/__init__.py new file mode 100644 index 0000000..0fceeba --- /dev/null +++ b/pyworkforce/utils/__init__.py @@ -0,0 +1,3 @@ +from pyworkforce.utils.grid import ParameterGrid + +__all__ = ["ParameterGrid"] diff --git a/pyworkforce/utils/grid.py b/pyworkforce/utils/grid.py new file mode 100644 index 0000000..8e836a3 --- /dev/null +++ b/pyworkforce/utils/grid.py @@ -0,0 +1,113 @@ +from collections.abc import Mapping, Iterable +from itertools import product +from functools import partial, reduce +import operator +import numpy as np + + +class ParameterGrid: + """ + This implementation is taken from scikit-learn: https://github.com/scikit-learn/scikit-learn + Grid of parameters with a discrete number of values for each. + Can be used to iterate over parameter value combinations with the + Python built-in function iter. + The order of the generated parameter combinations is deterministic. + Read more in the :ref:`User Guide `. + Parameters + ---------- + param_grid : dict of str to sequence, or sequence of such + The parameter grid to explore, as a dictionary mapping estimator + parameters to sequences of allowed values. + An empty dict signifies default parameters. + A sequence of dicts signifies a sequence of grids to search, and is + useful to avoid exploring parameter combinations that make no sense + or have no effect. See the examples below. + """ + + def __init__(self, param_grid): + if not isinstance(param_grid, (Mapping, Iterable)): + raise TypeError('Parameter grid is not a dict or ' + 'a list ({!r})'.format(param_grid)) + + if isinstance(param_grid, Mapping): + # wrap dictionary in a singleton list to support either dict + # or list of dicts + param_grid = [param_grid] + + # check if all entries are dictionaries of lists + for grid in param_grid: + if not isinstance(grid, dict): + raise TypeError('Parameter grid is not a ' + 'dict ({!r})'.format(grid)) + for key in grid: + if not isinstance(grid[key], Iterable): + raise TypeError('Parameter grid value is not iterable ' + '(key={!r}, value={!r})' + .format(key, grid[key])) + + self.param_grid = param_grid + + def __iter__(self): + """Iterate over the points in the grid. + Returns + ------- + params : iterator over dict of str to any + Yields dictionaries mapping each estimator parameter to one of its + allowed values. + """ + for p in self.param_grid: + # Always sort the keys of a dictionary, for reproducibility + items = sorted(p.items()) + if not items: + yield {} + else: + keys, values = zip(*items) + for v in product(*values): + params = dict(zip(keys, v)) + yield params + + def __len__(self): + """Number of points on the grid.""" + # Product function that can handle iterables (np.product can't). + product = partial(reduce, operator.mul) + return sum(product(len(v) for v in p.values()) if p else 1 + for p in self.param_grid) + + def __getitem__(self, ind): + """Get the parameters that would be ``ind``th in iteration + Parameters + ---------- + ind : int + The iteration index + Returns + ------- + params : dict of str to any + Equal to list(self)[ind] + """ + # This is used to make discrete sampling without replacement memory + # efficient. + for sub_grid in self.param_grid: + # XXX: could memoize information used here + if not sub_grid: + if ind == 0: + return {} + else: + ind -= 1 + continue + + # Reverse so most frequent cycling parameter comes first + keys, values_lists = zip(*sorted(sub_grid.items())[::-1]) + sizes = [len(v_list) for v_list in values_lists] + total = np.product(sizes) + + if ind >= total: + # Try the next grid + ind -= total + else: + out = {} + for key, v_list, n in zip(keys, values_lists, sizes): + ind, offset = divmod(ind, n) + out[key] = v_list[offset] + return out + + raise IndexError('ParameterGrid index out of range') diff --git a/pyworkforce/utils/tests/__init__.py b/pyworkforce/utils/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyworkforce/utils/tests/test_parameter_grid.py b/pyworkforce/utils/tests/test_parameter_grid.py new file mode 100644 index 0000000..df89835 --- /dev/null +++ b/pyworkforce/utils/tests/test_parameter_grid.py @@ -0,0 +1,75 @@ +from pyworkforce.utils import ParameterGrid +from collections.abc import Iterable, Sized +from itertools import chain, product +import pytest + + +def assert_grid_iter_equals_getitem(grid): + assert list(grid) == [grid[i] for i in range(len(grid))] + + +def test_parameter_grid(): + """ + Test taken from scikit-learn + """ + # Test basic properties of ParameterGrid. + params1 = {"foo": [1, 2, 3]} + grid1 = ParameterGrid(params1) + assert isinstance(grid1, Iterable) + assert isinstance(grid1, Sized) + assert len(grid1) == 3 + assert_grid_iter_equals_getitem(grid1) + + params2 = {"foo": [4, 2], + "bar": ["ham", "spam", "eggs"]} + grid2 = ParameterGrid(params2) + assert len(grid2) == 6 + + # loop to assert we can iterate over the grid multiple times + for i in range(2): + # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2) + points = set(tuple(chain(*(sorted(p.items())))) for p in grid2) + assert (points == + set(("bar", x, "foo", y) + for x, y in product(params2["bar"], params2["foo"]))) + assert_grid_iter_equals_getitem(grid2) + + # Special case: empty grid (useful to get default estimator settings) + empty = ParameterGrid({}) + assert len(empty) == 1 + assert list(empty) == [{}] + assert_grid_iter_equals_getitem(empty) + with pytest.raises(IndexError): + empty[1] + + has_empty = ParameterGrid([{'C': [1, 10]}, {}, {'C': [.5]}]) + assert len(has_empty) == 4 + assert list(has_empty) == [{'C': 1}, {'C': 10}, {}, {'C': .5}] + assert_grid_iter_equals_getitem(has_empty) + + +def test_non_iterable_parameter_grid(): + params = {"foo": 4, + "bar": ["ham", "spam", "eggs"]} + with pytest.raises(Exception) as excinfo: + grid = ParameterGrid(params) + for grid in params: + for key in grid: + assert str(excinfo.value) == 'Parameter grid value is not iterable ' + '(key={!r}, value={!r})'.format(key, grid[key]) + + +def test_non_dict_parameter_grid(): + params = [4] + with pytest.raises(Exception) as excinfo: + grid = ParameterGrid(params) + for grid in params: + assert str(excinfo.value) == 'Parameter grid is not a dict ({!r})'.format(grid) + + +def test_wrong_parameter_grid(): + params = 4 + with pytest.raises(Exception) as excinfo: + grid = ParameterGrid(params) + for grid in params: + assert str(excinfo.value) == 'Parameter grid is not a dict or a list ({!r})'.format(params) diff --git a/setup.py b/setup.py index 18626b3..a90003d 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import pathlib from setuptools import setup, find_packages +# python setup.py sdist bdist_wheel # twine upload --skip-existing --repository-url https://test.pypi.org/legacy/ dist/* HERE = pathlib.Path(__file__).parent @@ -27,10 +28,9 @@ ], packages=find_packages(include=['pyworkforce', 'pyworkforce.*']), install_requires=[ - 'numpy>=1.18.1', + 'numpy', 'ortools>=7.8.7959', - 'pandas>=1.0.0', - 'scikit-learn>=0.20.0', + 'pandas', 'joblib>=0.11' ], python_requires=">=3.6",