diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 98ce838621..f9bc1263e2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -182,7 +182,7 @@ jobs: path: coverage - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 with: directory: ./coverage/ fail_ci_if_error: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a33b812fd0..207171eeaa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ exclude: | )$ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: debug-statements exclude: | @@ -25,9 +25,10 @@ repos: - id: black language_version: python3 - repo: https://github.com/pycqa/flake8 - rev: 5.0.4 + rev: 6.0.0 hooks: - id: flake8 + language_version: python39 - repo: https://github.com/pycqa/isort rev: 5.10.1 hooks: @@ -47,7 +48,7 @@ repos: )$ args: ['--in-place', '--remove-all-unused-imports', '--remove-unused-variable'] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.982 + rev: v0.991 hooks: - id: mypy additional_dependencies: diff --git a/README.rst b/README.rst index 40bfe2e233..c2717f1460 100644 --- a/README.rst +++ b/README.rst @@ -122,10 +122,10 @@ Contributing We welcome bug reports and fixes and improvements to the documentation. For more information on contributing, please see the -`contributing guide `. +`contributing guide `__. A good place to start contributing is by looking through the issues -`here `__. Support ======= diff --git a/aesara/compile/builders.py b/aesara/compile/builders.py index b751172c0a..cca88ffbc7 100644 --- a/aesara/compile/builders.py +++ b/aesara/compile/builders.py @@ -2,7 +2,7 @@ from collections import OrderedDict from copy import copy from functools import partial -from typing import List, Optional, Sequence, cast +from typing import Dict, List, Optional, Sequence, Tuple, cast import aesara.tensor as at from aesara import function @@ -19,7 +19,6 @@ clone_replace, graph_inputs, io_connection_pattern, - replace_nominals_with_dummies, ) from aesara.graph.fg import FunctionGraph from aesara.graph.null_type import NullType @@ -82,6 +81,81 @@ def local_traverse(out): return ret +def construct_nominal_fgraph( + inputs: Sequence[Variable], outputs: Sequence[Variable] +) -> Tuple[ + FunctionGraph, + Sequence[Variable], + Dict[Variable, Variable], + Dict[Variable, Variable], +]: + """Construct an inner-`FunctionGraph` with ordered nominal inputs.""" + dummy_inputs = [] + for n, inp in enumerate(inputs): + if ( + not isinstance(inp, Variable) + or isinstance(inp, Constant) + or isinstance(inp, SharedVariable) + ): + raise TypeError( + f"Inputs and outputs must be non-Constant/shared Variable instances; got {inp}" + ) + + dummy_inputs.append(inp.type()) + + dummy_shared_inputs = [] + shared_inputs = [] + for var in graph_inputs(outputs, inputs): + if isinstance(var, SharedVariable): + # To correctly support shared variables the inner-graph should + # not see them; otherwise, there will be problems with + # gradients. + # That's why we collect the shared variables and replace them + # with dummies. + shared_inputs.append(var) + dummy_shared_inputs.append(var.type()) + elif var not in inputs and not isinstance(var, Constant): + raise MissingInputError(f"OpFromGraph is missing an input: {var}") + + replacements = dict(zip(inputs + shared_inputs, dummy_inputs + dummy_shared_inputs)) + + new = rebuild_collect_shared( + cast(Sequence[Variable], outputs), + inputs=inputs + shared_inputs, + replace=replacements, + copy_inputs_over=False, + ) + ( + local_inputs, + local_outputs, + (clone_d, update_d, update_expr, new_shared_inputs), + ) = new + + assert len(local_inputs) == len(inputs) + len(shared_inputs) + assert len(local_outputs) == len(outputs) + assert not update_d + assert not update_expr + assert not new_shared_inputs + + fgraph = FunctionGraph(local_inputs, local_outputs, clone=False) + + # The inputs need to be `NominalVariable`s so that we can merge + # inner-graphs + nominal_local_inputs = tuple( + NominalVariable(n, var.type) for n, var in enumerate(local_inputs) + ) + + fgraph.replace_all(zip(local_inputs, nominal_local_inputs)) + + for i, inp in enumerate(fgraph.inputs): + nom_inp = nominal_local_inputs[i] + fgraph.inputs[i] = nom_inp + fgraph.clients.pop(inp, None) + fgraph.add_input(nom_inp) + + return fgraph, shared_inputs, update_d, update_expr + + class OpFromGraph(Op, HasInnerGraph): r""" This creates an `Op` from inputs and outputs lists of variables. @@ -333,66 +407,21 @@ def __init__( if not (isinstance(inputs, list) and isinstance(outputs, list)): raise TypeError("Inputs and outputs must be lists") - for i in inputs + outputs: - if not isinstance(i, Variable): + for out in outputs: + if not isinstance(out, Variable): raise TypeError( - f"Inputs and outputs must be Variable instances; got {i}" + f"Inputs and outputs must be Variable instances; got {out}" ) - if i in inputs: - if isinstance(i, Constant): - raise TypeError(f"Constants not allowed as inputs; {i}") - if isinstance(i, SharedVariable): - raise TypeError(f"SharedVariables not allowed as inputs; {i}") - - for var in graph_inputs(outputs, inputs): - if var not in inputs and not isinstance(var, (Constant, SharedVariable)): - raise MissingInputError(f"OpFromGraph is missing an input: {var}") if "updates" in kwargs or "givens" in kwargs: - raise NotImplementedError("Updates and givens are not allowed here") + raise NotImplementedError("Updates and givens are not supported") self.is_inline = inline - # To correctly support shared variables the inner fct should - # not see them. Otherwise there is a problem with the gradient. - self.shared_inputs = [] - for var in graph_inputs(outputs): - if isinstance(var, SharedVariable): - self.shared_inputs.append(var) - - inputs, outputs = replace_nominals_with_dummies(inputs, outputs) - - # The inputs should be `NominalVariable`s, so that graphs can be merged - replacements = {} - for n, v in enumerate(inputs): - replacements[v] = NominalVariable(n, v.type) - - shared_vars = [ - NominalVariable(n, var.type) - for n, var in enumerate(self.shared_inputs, start=len(inputs) + 1) - ] - - replacements.update(dict(zip(self.shared_inputs, shared_vars))) - - new = rebuild_collect_shared( - cast(Sequence[Variable], outputs), - inputs=inputs + shared_vars, - replace=replacements, - copy_inputs_over=False, + self.fgraph, self.shared_inputs, _, _ = construct_nominal_fgraph( + inputs, outputs ) - ( - local_inputs, - local_outputs, - (clone_d, update_d, update_expr, shared_inputs), - ) = new - - assert len(local_inputs) == len(inputs) + len(self.shared_inputs) - assert len(local_outputs) == len(outputs) - assert not update_d - assert not update_expr - assert not shared_inputs - - self.fgraph = FunctionGraph(local_inputs, local_outputs, clone=False) + self.kwargs = kwargs self.input_types = [inp.type for inp in inputs] self.output_types = [out.type for out in outputs] @@ -415,6 +444,7 @@ def __init__( else: self.set_lop_overrides("default") self._lop_type = "lop" + self.set_rop_overrides(rop_overrides) self._connection_pattern = connection_pattern diff --git a/aesara/compile/debugmode.py b/aesara/compile/debugmode.py index 39d3cf4c3b..c3c116b091 100644 --- a/aesara/compile/debugmode.py +++ b/aesara/compile/debugmode.py @@ -848,17 +848,17 @@ def _get_preallocated_maps( or "ALL" in prealloc_modes ): max_ndim = 0 - rev_out_broadcastable = [] + rev_out_shape = [] for r in considered_outputs: if isinstance(r.type, TensorType): if max_ndim < r.ndim: - rev_out_broadcastable += [True] * (r.ndim - max_ndim) + rev_out_shape += [1] * (r.ndim - max_ndim) max_ndim = r.ndim - assert len(rev_out_broadcastable) == max_ndim + assert len(rev_out_shape) == max_ndim - for i, b in enumerate(r.broadcastable[::-1]): - rev_out_broadcastable[i] = rev_out_broadcastable[i] and b - out_broadcastable = rev_out_broadcastable[::-1] + for i, s in enumerate(r.type.shape[::-1]): + rev_out_shape[i] = 1 if rev_out_shape[i] == 1 and s == 1 else None + out_shape = rev_out_shape[::-1] if "strided" in prealloc_modes or "ALL" in prealloc_modes: check_ndim = config.DebugMode__check_preallocated_output_ndim @@ -887,14 +887,14 @@ def _get_preallocated_maps( # Moreover, to avoid memory problems, we do not test with strides # 2 and -2 on those dimensions. step_signs_list = [] - for b in out_broadcastable[-check_ndim:]: - if b: + for s in out_shape[-check_ndim:]: + if s == 1: step_signs_list.append((1,)) else: step_signs_list.append((-1, 1)) # Use the same step on all dimensions before the last check_ndim. - if all(out_broadcastable[:-check_ndim]): + if all(s == 1 for s in out_shape[:-check_ndim]): step_signs_list = [(1,)] + step_signs_list else: step_signs_list = [(-1, 1)] + step_signs_list @@ -905,7 +905,7 @@ def _get_preallocated_maps( # First, the dimensions above check_ndim, then the other ones # Do not test with 2 or -2 for dimensions above check_ndim - steps = [step_signs[0]] * len(out_broadcastable[:-check_ndim]) + steps = [step_signs[0]] * len(out_shape[:-check_ndim]) steps += [s * step_size for s in step_signs[1:]] name = f"strided{tuple(steps)}" @@ -932,8 +932,8 @@ def _get_preallocated_maps( if "wrong_size" in prealloc_modes or "ALL" in prealloc_modes: # For each dimension, try size-1, size, size+1 - for dim, b in enumerate(out_broadcastable): - if b: + for dim, s in enumerate(out_shape): + if s == 1: # The shape has to be 1 continue @@ -947,11 +947,11 @@ def _get_preallocated_maps( for r in considered_outputs: if isinstance(r.type, TensorType): r_shape_diff = shape_diff[: r.ndim] - out_shape = [ + new_buf_shape = [ max((s + sd), 0) for s, sd in zip(r_vals[r].shape, r_shape_diff) ] - new_buf = np.empty(out_shape, dtype=r.type.dtype) + new_buf = np.empty(new_buf_shape, dtype=r.type.dtype) new_buf[...] = np.asarray(def_val).astype(r.type.dtype) wrong_size[r] = new_buf diff --git a/aesara/compile/function/pfunc.py b/aesara/compile/function/pfunc.py index dfe5ff6e16..72ca466ecb 100644 --- a/aesara/compile/function/pfunc.py +++ b/aesara/compile/function/pfunc.py @@ -3,7 +3,6 @@ """ -import logging from copy import copy from typing import Optional @@ -16,11 +15,6 @@ from aesara.graph.fg import FunctionGraph -_logger = logging.getLogger("aesara.compile.function.pfunc") - -__docformat__ = "restructuredtext en" - - def rebuild_collect_shared( outputs, inputs=None, @@ -78,10 +72,12 @@ def rebuild_collect_shared( shared_inputs = [] def clone_v_get_shared_updates(v, copy_inputs_over): - """ - Clones a variable and its inputs recursively until all are in clone_d. - Also appends all shared variables met along the way to shared inputs, - and their default_update (if applicable) to update_d and update_expr. + r"""Clones a variable and its inputs recursively until all are in `clone_d`. + + Also, it appends all `SharedVariable`\s met along the way to + `shared_inputs` and their corresponding + `SharedVariable.default_update`\s (when applicable) to `update_d` and + `update_expr`. """ # this co-recurses with clone_a @@ -103,7 +99,7 @@ def clone_v_get_shared_updates(v, copy_inputs_over): elif isinstance(v, SharedVariable): if v not in shared_inputs: shared_inputs.append(v) - if hasattr(v, "default_update"): + if v.default_update is not None: # Check that v should not be excluded from the default # updates list if no_default_updates is False or ( @@ -419,22 +415,24 @@ def construct_pfunc_ins_and_outs( givens = [] if not isinstance(params, (list, tuple)): - raise Exception("in pfunc() the first argument must be a list or " "a tuple") + raise TypeError("The `params` argument must be a list or a tuple") if not isinstance(no_default_updates, bool) and not isinstance( no_default_updates, list ): - raise TypeError("no_default_update should be either a boolean or " "a list") + raise TypeError("The `no_default_update` argument must be a boolean or list") - if len(updates) > 0 and any( - isinstance(v, Variable) for v in iter_over_pairs(updates) + if len(updates) > 0 and not all( + isinstance(pair, (tuple, list)) + and len(pair) == 2 + and isinstance(pair[0], Variable) + for pair in iter_over_pairs(updates) ): - raise ValueError( - "The updates parameter must be an OrderedDict/dict or a list of " - "lists/tuples with 2 elements" + raise TypeError( + "The `updates` parameter must be an ordered mapping or a list of pairs" ) - # transform params into aesara.compile.In objects. + # Transform params into aesara.compile.In objects. inputs = [ _pfunc_param_to_in(p, allow_downcast=allow_input_downcast) for p in params ] diff --git a/aesara/compile/function/types.py b/aesara/compile/function/types.py index 6f9db79c6a..7faa254047 100644 --- a/aesara/compile/function/types.py +++ b/aesara/compile/function/types.py @@ -32,6 +32,7 @@ if TYPE_CHECKING: + from aesara.compile.mode import Mode from aesara.link.vm import VM @@ -1391,9 +1392,16 @@ def check_unused_inputs(inputs, outputs, on_unused_input): @staticmethod def prepare_fgraph( - inputs, outputs, additional_outputs, fgraph, rewriter, linker, profile + inputs, + outputs, + additional_outputs, + fgraph: FunctionGraph, + mode: "Mode", + profile, ): + rewriter = mode.optimizer + try: start_rewriter = time.perf_counter() @@ -1401,6 +1409,7 @@ def prepare_fgraph( rewrite_time = None with config.change_flags( + mode=mode, compute_test_value=config.compute_test_value_opt, traceback__limit=config.traceback__compile_limit, ): @@ -1440,7 +1449,7 @@ def prepare_fgraph( stacklevel=3, ) - if not hasattr(linker, "accept"): + if not hasattr(mode.linker, "accept"): raise ValueError( "'linker' parameter of FunctionMaker should be " f"a Linker with an accept method or one of {list(aesara.compile.mode.predefined_linkers.keys())}" @@ -1511,12 +1520,8 @@ def __init__( self.fgraph = fgraph - rewriter, linker = mode.optimizer, copy.copy(mode.linker) - if not no_fgraph_prep: - self.prepare_fgraph( - inputs, outputs, found_updates, fgraph, rewriter, linker, profile - ) + self.prepare_fgraph(inputs, outputs, found_updates, fgraph, mode, profile) assert len(fgraph.outputs) == len(outputs + found_updates) @@ -1528,6 +1533,8 @@ def __init__( if not spec.borrow ] + linker = copy.copy(mode.linker) + if no_borrow: self.linker = linker.accept( fgraph, diff --git a/aesara/compile/mode.py b/aesara/compile/mode.py index 5232163ba2..b753d26695 100644 --- a/aesara/compile/mode.py +++ b/aesara/compile/mode.py @@ -7,6 +7,8 @@ import warnings from typing import Optional, Tuple, Union +from typing_extensions import Literal + from aesara.compile.function.types import Supervisor from aesara.configdefaults import config from aesara.graph.destroyhandler import DestroyHandler @@ -530,3 +532,26 @@ def register_mode(name, mode): if name in predefined_modes: raise ValueError(f"Mode name already taken: {name}") predefined_modes[name] = mode + + +def get_target_language(mode=None) -> Tuple[Literal["py", "c", "numba", "jax"], ...]: + """Get the compilation target language.""" + + if mode is None: + mode = get_default_mode() + + linker = mode.linker + + if isinstance(linker, NumbaLinker): + return ("numba",) + if isinstance(linker, JAXLinker): + return ("jax",) + if isinstance(linker, PerformLinker): + return ("py",) + if isinstance(linker, CLinker): + return ("c",) + + if isinstance(linker, (VMLinker, OpWiseCLinker)): + return ("c", "py") if config.cxx else ("py",) + + raise Exception(f"Unsupported Linker: {linker}") diff --git a/aesara/compile/profiling.py b/aesara/compile/profiling.py index 15c57fdf74..05dfc212f9 100644 --- a/aesara/compile/profiling.py +++ b/aesara/compile/profiling.py @@ -43,7 +43,7 @@ def extended_open(filename, mode="r"): logger = logging.getLogger("aesara.compile.profiling") -aesara_imported_time: float = time.time() +aesara_imported_time: float = time.perf_counter() total_fct_exec_time: float = 0.0 total_graph_rewrite_time: float = 0.0 total_time_linker: float = 0.0 @@ -165,7 +165,7 @@ def print_global_stats(): print( ( "Global stats: ", - f"Time elasped since Aesara import = {time.time() - aesara_imported_time:6.3f}s, " + f"Time elasped since Aesara import = {time.perf_counter() - aesara_imported_time:6.3f}s, " f"Time spent in Aesara functions = {total_fct_exec_time:6.3f}s, " "Time spent compiling Aesara functions: " f"rewriting = {total_graph_rewrite_time:6.3f}s, linking = {total_time_linker:6.3f}s ", @@ -831,7 +831,7 @@ def summary_globals(self, file): f"Time in all call to aesara.grad() {aesara.gradient.grad_time:e}s", file=file, ) - total_time = time.time() - aesara_imported_time + total_time = time.perf_counter() - aesara_imported_time print(f"Time since aesara import {total_time:.3f}s", file=file) def summary_memory(self, file, N=None): @@ -1299,9 +1299,9 @@ def compute_max_stats(running_memory, stats): # Config: whether print min memory peak if config.profiling__min_peak_memory: node_list = fgraph.apply_nodes - ttt = time.time() + ttt = time.perf_counter() min_peak = count_minimum_peak(node_list, fgraph, nodes_mem) - min_peak_time += time.time() - ttt + min_peak_time += time.perf_counter() - ttt min_max_peak = max(min_max_peak, min_peak) del fgraph, nodes_mem diff --git a/aesara/compile/sharedvalue.py b/aesara/compile/sharedvalue.py index 6852911bfb..2004f330b3 100644 --- a/aesara/compile/sharedvalue.py +++ b/aesara/compile/sharedvalue.py @@ -1,13 +1,9 @@ -""" -Provide a simple user friendly API to Aesara-managed memory. - -""" +"""Provide a simple user friendly API to Aesara-managed memory.""" import copy from contextlib import contextmanager -from typing import List, Optional - -import numpy as np +from functools import singledispatch +from typing import TYPE_CHECKING, List, Optional from aesara.graph.basic import Variable from aesara.graph.utils import add_tag_trace @@ -15,6 +11,10 @@ from aesara.link.c.type import generic +if TYPE_CHECKING: + from aesara.graph.type import Type + + __SHARED_CONTEXT__: Optional[List[Variable]] = None @@ -32,54 +32,41 @@ def collect_new_shareds(): class SharedVariable(Variable): - """ - Variable that is (defaults to being) shared between functions that - it appears in. - - Parameters - ---------- - name : str - The name for this variable (see `Variable`). - type : str - The type for this variable (see `Variable`). - value - A value to associate with this variable (a new container will be - created). - strict - True : assignments to .value will not be cast or copied, so they must - have the correct type. - allow_downcast - Only applies if `strict` is False. - True : allow assigned value to lose precision when cast during - assignment. - False : never allow precision loss. - None : only allow downcasting of a Python float to a scalar floatX. - container - The container to use for this variable. Illegal to pass this as well as - a value. - - Notes - ----- - For more user-friendly constructor, see `shared`. - - """ - - # Container object - container = None - """ - A container to use for this SharedVariable when it is an implicit - function parameter. - - :type: `Container` - """ - - # default_update - # If this member is present, its value will be used as the "update" for - # this Variable, unless another update value has been passed to "function", - # or the "no_default_updates" list passed to "function" contains it. + """Variable that is shared between compiled functions.""" + + def __init__( + self, + type: "Type", + value, + strict: bool, + allow_downcast=None, + container: Optional[Container] = None, + name: Optional[str] = None, + ): + r""" + Parameters + ---------- + type + The `Type` for this variable (see `Variable`). + value + A value to associate with this variable (a new container will be + created). + strict + ``True`` means that values assigned to this variable will not be + cast or copied, so they must have the correct `Type`\s. + allow_downcast + Only applies if `strict` is ``False``. + ``True`` means that the assigned value can lose precision when cast + during assignment. ``None`` means that only down-casting of a Python + float to a scalar ``floatX`` is allowed. + container + The container to use for this variable. Illegal to pass this as well as + a value. + name + The name for this variable (see `Variable`). - def __init__(self, name, type, value, strict, allow_downcast=None, container=None): - super().__init__(type=type, name=name, owner=None, index=None) + """ + super().__init__(type=type, owner=None, index=None, name=name) if container is not None: self.container = container @@ -103,6 +90,8 @@ def __init__(self, name, type, value, strict, allow_downcast=None, container=Non if isinstance(__SHARED_CONTEXT__, list): __SHARED_CONTEXT__.append(self) + self._default_update: Optional[Variable] = None + def get_value(self, borrow=False, return_internal_type=False): """ Get the non-symbolic value associated with this SharedVariable. @@ -147,29 +136,10 @@ def set_value(self, new_value, borrow=False): def get_test_value(self): return self.get_value(borrow=True, return_internal_type=True) - def zero(self, borrow=False): - """ - Set the values of a shared variable to 0. - - Parameters - ---------- - borrow : bbol - True to modify the value of a shared variable directly by using - its previous value. Potentially this can cause problems - regarding to the aliased memory. - - Changes done with this function will be visible to all functions using - this SharedVariable. - - """ - if borrow: - self.container.value[...] = 0 - else: - self.container.value = 0 * self.container.value - - def clone(self): + def clone(self, **kwargs): + name = kwargs.get("name", self.name) cp = self.__class__( - name=self.name, + name=name, type=self.type, value=None, strict=None, @@ -178,143 +148,77 @@ def clone(self): cp.tag = copy.copy(self.tag) return cp - def __getitem__(self, *args): - # __getitem__ is not available for generic SharedVariable objects. - # We raise a TypeError like Python would do if __getitem__ was not - # implemented at all, but with a more explicit error message to help - # Aesara users figure out the root of the problem more easily. - value = self.get_value(borrow=True) - if isinstance(value, np.ndarray): - # Array probably had an unknown dtype. - msg = ( - f"a Numpy array with dtype: '{value.dtype}'. This data type is not " - "currently recognized by Aesara tensors: please cast " - "your data into a supported numeric type if you need " - "Aesara tensor functionalities." - ) - else: - msg = ( - f"an object of type: {type(value)}. Did you forget to cast it into " - "a Numpy array before calling aesara.shared()?" - ) - - raise TypeError( - "The generic 'SharedVariable' object is not subscriptable. " - f"This shared variable contains {msg}" - ) - - def _value_get(self): - raise Exception( - "sharedvar.value does not exist anymore. Use " - "sharedvar.get_value() or sharedvar.set_value()" - " instead." - ) - - def _value_set(self, new_value): - raise Exception( - "sharedvar.value does not exist anymore. Use " - "sharedvar.get_value() or sharedvar.set_value()" - " instead." - ) - - # We keep this just to raise an error - value = property(_value_get, _value_set) + @property + def default_update(self) -> Optional[Variable]: + """A default update expression for this `Variable`. + If this value is non-``None``, its value will be used as the `update` + (see `aesara.function`) for this `Variable` when no updates are + provided through `aesara.function` and `no_default_updates` isn't + enabled. + """ + return self._default_update -def shared_constructor(ctor, remove=False): - if remove: - shared.constructors.remove(ctor) - else: - shared.constructors.append(ctor) - return ctor + @default_update.setter + def default_update(self, value): + if value is not None: + self._default_update = self.type.filter_variable(value, allow_convert=True) + else: + self._default_update = value def shared(value, name=None, strict=False, allow_downcast=None, **kwargs): - """Return a SharedVariable Variable, initialized with a copy or - reference of `value`. + r"""Create a `SharedVariable` initialized with a copy or reference of `value`. This function iterates over constructor functions to find a - suitable SharedVariable subclass. The suitable one is the first + suitable `SharedVariable` subclass. The suitable one is the first constructor that accept the given value. See the documentation of :func:`shared_constructor` for the definition of a constructor function. This function is meant as a convenient default. If you want to use a - specific shared variable constructor, consider calling it directly. + specific constructor, consider calling it directly. - ``aesara.shared`` is a shortcut to this function. - - .. attribute:: constructors - - A list of shared variable constructors that will be tried in reverse - order. + `aesara.shared` is a shortcut to this function. Notes ----- By passing kwargs, you effectively limit the set of potential constructors to those that can accept those kwargs. - Some shared variable have ``borrow`` as extra kwargs. + Some shared variable have `borrow` as a kwarg. - Some shared variable have ``broadcastable`` as extra kwargs. As shared + `SharedVariable`\s of `TensorType` have `broadcastable` as a kwarg. As shared variable shapes can change, all dimensions default to not being - broadcastable, even if ``value`` has a shape of 1 along some dimension. - This parameter allows you to create for example a `row` or `column` 2d - tensor. + broadcastable, even if `value` has a shape of 1 along some dimension. + This parameter allows one to create for example a row or column tensor. """ - try: - if isinstance(value, Variable): - raise TypeError( - "Shared variable constructor needs numeric " - "values and not symbolic variables." - ) - - for ctor in reversed(shared.constructors): - try: - var = ctor( - value, - name=name, - strict=strict, - allow_downcast=allow_downcast, - **kwargs, - ) - add_tag_trace(var) - return var - except TypeError: - continue - # This may happen when kwargs were supplied - # if kwargs were given, the generic_constructor won't be callable. - # - # This was done on purpose, the rationale being that if kwargs - # were supplied, the user didn't want them to be ignored. + if isinstance(value, Variable): + raise TypeError("Shared variable values can not be symbolic.") + try: + var = shared_constructor( + value, + name=name, + strict=strict, + allow_downcast=allow_downcast, + **kwargs, + ) + add_tag_trace(var) + return var except MemoryError as e: e.args = e.args + ("Consider using `aesara.shared(..., borrow=True)`",) raise - raise TypeError( - "No suitable SharedVariable constructor could be found." - " Are you sure all kwargs are supported?" - " We do not support the parameter dtype or type." - f' value="{value}". parameters="{kwargs}"' - ) - -shared.constructors = [] - - -@shared_constructor -def generic_constructor(value, name=None, strict=False, allow_downcast=None): - """ - SharedVariable Constructor. - - """ +@singledispatch +def shared_constructor(value, name=None, strict=False, allow_downcast=None, **kwargs): return SharedVariable( type=generic, value=value, - name=name, strict=strict, allow_downcast=allow_downcast, + name=name, ) diff --git a/aesara/gradient.py b/aesara/gradient.py index 51b2bb77ad..db47557f20 100644 --- a/aesara/gradient.py +++ b/aesara/gradient.py @@ -492,7 +492,7 @@ def grad( respect to the output, then a zero variable is returned. """ - t0 = time.time() + t0 = time.perf_counter() if cost is None: if known_grads is None: @@ -643,7 +643,7 @@ def handle_disconnected(var): else: assert return_disconnected.lower() == "disconnected" - t1 = time.time() + t1 = time.perf_counter() global grad_time grad_time += t1 - t0 @@ -1802,13 +1802,11 @@ def verify_grad( mode=mode, ) - tensor_pt = [ - aesara.tensor.type.TensorType( - aesara.tensor.as_tensor_variable(p).dtype, - aesara.tensor.as_tensor_variable(p).broadcastable, - )(name=f"input {i}") - for i, p in enumerate(pt) - ] + tensor_pt = [] + for i, p in enumerate(pt): + p_t = aesara.tensor.as_tensor_variable(p).type() + p_t.name = f"input {i}" + tensor_pt.append(p_t) # fun can be either a function or an actual Op instance o_output = fun(*tensor_pt) diff --git a/aesara/graph/basic.py b/aesara/graph/basic.py index de5897f12e..e8d3392c4d 100644 --- a/aesara/graph/basic.py +++ b/aesara/graph/basic.py @@ -510,9 +510,14 @@ def __repr__(self, firstPass=True): pass return "\n".join(to_print) - def clone(self): + def clone(self, **kwargs): """Return a new, un-owned `Variable` like `self`. + Parameters + ---------- + **kwargs : dict + Optional "name" keyword argument for the copied instance. Same as `self.name` if value not provided. + Returns ------- Variable instance @@ -523,7 +528,8 @@ def clone(self): Tags and names are copied to the returned instance. """ - cp = self.__class__(self.type, None, None, self.name) + name = kwargs.pop("name", self.name) + cp = self.__class__(type=self.type, owner=None, index=None, name=name, **kwargs) cp.tag = copy(self.tag) return cp @@ -621,8 +627,8 @@ def __getstate__(self): class AtomicVariable(Variable[_TypeType, None]): """A node type that has no ancestors and should never be considered an input to a graph.""" - def __init__(self, type: _TypeType, **kwargs): - super().__init__(type, None, None, **kwargs) + def __init__(self, type: _TypeType, name: Optional[str] = None, **kwargs): + super().__init__(type=type, owner=None, index=None, name=name, **kwargs) @abc.abstractmethod def signature(self): @@ -656,6 +662,12 @@ def index(self, value): if value is not None: raise ValueError("AtomicVariable instances cannot have an index.") + def clone(self, **kwargs): + name = kwargs.pop("name", self.name) + cp = self.__class__(type=self.type, name=name, **kwargs) + cp.tag = copy(self.tag) + return cp + class NominalVariable(AtomicVariable[_TypeType]): """A variable that enables alpha-equivalent comparisons.""" @@ -682,12 +694,13 @@ def _str(self): return cls.__instances__[(typ, id)] - def __init__(self, id: _IdType, typ: _TypeType, **kwargs): + def __init__(self, id: _IdType, typ: _TypeType, name: Optional[str] = None): self.id = id - super().__init__(typ, **kwargs) + super().__init__(type=typ, name=name) - def clone(self): - return self + def clone(self, **kwargs): + name = kwargs.pop("name", self.name) + return self.__class__(id=self.id, typ=self.type, name=name, **kwargs) def __eq__(self, other): if self is other: @@ -744,8 +757,7 @@ def __str__(self): name = name[:10] + "..." + name[-10:] return f"{type(self).__name__}{{{name}}}" - def clone(self): - """Return `self`, because there's no reason to clone a constant.""" + def clone(self, **kwargs): return self @property diff --git a/aesara/graph/features.py b/aesara/graph/features.py index 4e69a654bb..73a625409f 100644 --- a/aesara/graph/features.py +++ b/aesara/graph/features.py @@ -473,7 +473,7 @@ def validate_(self, fgraph): exception. replace_all_validate will print out the verbose output. Or it has to be done here before raise. """ - t0 = time.time() + t0 = time.perf_counter() try: ret = fgraph.execute_callbacks("validate") except Exception as e: @@ -494,7 +494,7 @@ def validate_(self, fgraph): reason = uf_info.function print(f"validate failed on node {r}.\n Reason: {reason}, {e}") raise - t1 = time.time() + t1 = time.perf_counter() if fgraph.profile: fgraph.profile.validate_time += t1 - t0 return ret diff --git a/aesara/graph/fg.py b/aesara/graph/fg.py index 26fb74bd7f..68e7a6a26c 100644 --- a/aesara/graph/fg.py +++ b/aesara/graph/fg.py @@ -712,7 +712,7 @@ def execute_callbacks(self, name: str, *args, **kwargs) -> None: a method called after name. """ - t0 = time.time() + t0 = time.perf_counter() for feature in self._features: try: fn = getattr(feature, name) @@ -721,10 +721,10 @@ def execute_callbacks(self, name: str, *args, **kwargs) -> None: # try; the AttributeError really must come from feature.${name} # not existing continue - tf0 = time.time() + tf0 = time.perf_counter() fn(self, *args, **kwargs) - self.execute_callbacks_times[feature] += time.time() - tf0 - self.execute_callbacks_time += time.time() - t0 + self.execute_callbacks_times[feature] += time.perf_counter() - tf0 + self.execute_callbacks_time += time.perf_counter() - t0 def collect_callbacks(self, name: str, *args) -> Dict[Feature, Any]: """Collects callbacks diff --git a/aesara/graph/rewriting/basic.py b/aesara/graph/rewriting/basic.py index 586dca33d3..a6bd80f9b2 100644 --- a/aesara/graph/rewriting/basic.py +++ b/aesara/graph/rewriting/basic.py @@ -298,9 +298,9 @@ def apply(self, fgraph): for rewriter in self.data: try: nb_nodes_before = len(fgraph.apply_nodes) - t0 = time.time() + t0 = time.perf_counter() sub_prof = rewriter.apply(fgraph) - l.append(float(time.time() - t0)) + l.append(float(time.perf_counter() - t0)) sub_profs.append(sub_prof) nb_nodes.append((nb_nodes_before, len(fgraph.apply_nodes))) if fgraph.profile: @@ -701,7 +701,7 @@ def add_requirements(self, fgraph): def apply(self, fgraph): sched = fgraph.merge_feature.scheduled nb_fail = 0 - t0 = time.time() + t0 = time.perf_counter() if fgraph.profile: validate_before = fgraph.profile.validate_time callback_before = fgraph.execute_callbacks_time @@ -807,7 +807,7 @@ def apply(self, fgraph): return ( nb_fail, - time.time() - t0, + time.perf_counter() - t0, validate_time, callback_time, callbacks_time, @@ -1066,9 +1066,9 @@ def get_rewrites(self, node): return self.track_dict[type(node.op)] def time_call(self, fn): - start = time.time() + start = time.perf_counter() fn() - return time.time() - start + return time.perf_counter() - start class FromFunctionNodeRewriter(NodeRewriter): @@ -1303,9 +1303,9 @@ def transform(self, fgraph, node): new_repl = None for rewrite in rewrites: - rewrite_start = time.time() + rewrite_start = time.perf_counter() new_repl = rewrite.transform(fgraph, node) - rewrite_finish = time.time() + rewrite_finish = time.perf_counter() if self.profile: self.time_rewrites[rewrite] += rewrite_start - rewrite_finish self.process_count[rewrite] += 1 @@ -2026,9 +2026,9 @@ def apply(self, fgraph, start_from=None): start_from = fgraph.outputs callback_before = fgraph.execute_callbacks_time nb_nodes_start = len(fgraph.apply_nodes) - t0 = time.time() + t0 = time.perf_counter() q = deque(io_toposort(fgraph.inputs, start_from)) - io_t = time.time() - t0 + io_t = time.perf_counter() - t0 def importer(node): if node is not current_node: @@ -2039,7 +2039,7 @@ def importer(node): ) nb = 0 try: - t0 = time.time() + t0 = time.perf_counter() while q: if self.order == "out_to_in": node = q.pop() @@ -2049,7 +2049,7 @@ def importer(node): continue current_node = node nb += self.process_node(fgraph, node) - loop_t = time.time() - t0 + loop_t = time.perf_counter() - t0 finally: self.detach_updater(fgraph, u) @@ -2367,9 +2367,9 @@ def apply_cleanup(profs_dict): for crewriter in self.cleanup_rewriters: change_tracker.reset() nb = change_tracker.nb_imported - t_rewrite = time.time() + t_rewrite = time.perf_counter() sub_prof = crewriter.apply(fgraph) - time_rewriters[crewriter] += time.time() - t_rewrite + time_rewriters[crewriter] += time.perf_counter() - t_rewrite profs_dict[crewriter].append(sub_prof) if change_tracker.changed: process_count.setdefault(crewriter, 0) @@ -2381,7 +2381,7 @@ def apply_cleanup(profs_dict): while changed and not max_use_abort: process_count = {} - t0 = time.time() + t0 = time.perf_counter() changed = False iter_cleanup_sub_profs = {} for crewrite in self.cleanup_rewriters: @@ -2392,9 +2392,9 @@ def apply_cleanup(profs_dict): for grewrite in self.global_rewriters: change_tracker.reset() nb = change_tracker.nb_imported - t_rewrite = time.time() + t_rewrite = time.perf_counter() sub_prof = grewrite.apply(fgraph) - time_rewriters[grewrite] += time.time() - t_rewrite + time_rewriters[grewrite] += time.perf_counter() - t_rewrite sub_profs.append(sub_prof) if change_tracker.changed: process_count.setdefault(grewrite, 0) @@ -2409,13 +2409,13 @@ def apply_cleanup(profs_dict): ) global_sub_profs.append(sub_profs) - global_rewriter_timing.append(float(time.time() - t0)) + global_rewriter_timing.append(float(time.perf_counter() - t0)) changed |= apply_cleanup(iter_cleanup_sub_profs) - topo_t0 = time.time() + topo_t0 = time.perf_counter() q = deque(io_toposort(fgraph.inputs, start_from)) - io_toposort_timing.append(time.time() - topo_t0) + io_toposort_timing.append(time.perf_counter() - topo_t0) nb_nodes.append(len(q)) max_nb_nodes = max(max_nb_nodes, len(q)) @@ -2443,11 +2443,11 @@ def chin(node, i, r, new_r, reason): current_node = node for node_rewriter in self.node_tracker.get_trackers(node.op): nb = change_tracker.nb_imported - t_rewrite = time.time() + t_rewrite = time.perf_counter() node_rewriter_change = self.process_node( fgraph, node, node_rewriter ) - time_rewriters[node_rewriter] += time.time() - t_rewrite + time_rewriters[node_rewriter] += time.perf_counter() - t_rewrite if not node_rewriter_change: continue process_count.setdefault(node_rewriter, 0) @@ -2469,13 +2469,13 @@ def chin(node, i, r, new_r, reason): # Apply final rewriters sub_profs = [] - t_before_final_rewrites = time.time() + t_before_final_rewrites = time.perf_counter() for grewrite in self.final_rewriters: change_tracker.reset() nb = change_tracker.nb_imported - t_rewrite = time.time() + t_rewrite = time.perf_counter() sub_prof = grewrite.apply(fgraph) - time_rewriters[grewrite] += time.time() - t_rewrite + time_rewriters[grewrite] += time.perf_counter() - t_rewrite sub_profs.append(sub_prof) if change_tracker.changed: process_count.setdefault(grewrite, 0) @@ -2490,7 +2490,7 @@ def chin(node, i, r, new_r, reason): ) final_sub_profs.append(sub_profs) - global_rewriter_timing[-1] += time.time() - t_before_final_rewrites + global_rewriter_timing[-1] += time.perf_counter() - t_before_final_rewrites changed |= apply_cleanup(iter_cleanup_sub_profs) @@ -2504,7 +2504,7 @@ def chin(node, i, r, new_r, reason): cleanup_sub_profs.append(c_sub_profs) loop_process_count.append(process_count) - loop_timing.append(float(time.time() - t0)) + loop_timing.append(float(time.perf_counter() - t0)) end_nb_nodes = len(fgraph.apply_nodes) diff --git a/aesara/link/c/cmodule.py b/aesara/link/c/cmodule.py index 58102b7303..02b4e0f11b 100644 --- a/aesara/link/c/cmodule.py +++ b/aesara/link/c/cmodule.py @@ -326,11 +326,11 @@ def dlimport(fullpath, suffix=None): global import_time try: importlib.invalidate_caches() - t0 = time.time() + t0 = time.perf_counter() with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="numpy.ndarray size changed") rval = __import__(module_name, {}, {}, [module_name]) - t1 = time.time() + t1 = time.perf_counter() import_time += t1 - t0 if not rval: raise Exception("__import__ failed", fullpath) @@ -771,7 +771,7 @@ def refresh(self, age_thresh_use=None, delete_if_problem=False, cleanup=True): """ if age_thresh_use is None: age_thresh_use = self.age_thresh_use - start_time = time.time() + start_time = time.perf_counter() too_old_to_use = [] to_delete = [] @@ -786,7 +786,7 @@ def rmtree_empty(*args, **kwargs): to_delete_empty.append((args, kwargs)) # add entries that are not in the entry_from_key dictionary - time_now = time.time() + time_now = time.perf_counter() # Go through directories in alphabetical order to ensure consistent # behavior. try: @@ -956,7 +956,7 @@ def unpickle_failure(): # directories in alphabetical order so as to make # sure all new processes only use the first one. if cleanup: - age = time.time() - last_access_time(entry) + age = time.perf_counter() - last_access_time(entry) if delete_if_problem or age > self.age_thresh_del: rmtree( root, @@ -1063,7 +1063,9 @@ def unpickle_failure(): if not files: _rmtree(*a, **kw) - _logger.debug(f"Time needed to refresh cache: {time.time() - start_time}") + _logger.debug( + f"Time needed to refresh cache: {time.perf_counter() - start_time}" + ) return too_old_to_use @@ -1269,7 +1271,7 @@ def check_key(self, key, key_pkl): Its associated pickled file containing a KeyData. """ - start_time = time.time() + start_time = time.perf_counter() # Verify that when we reload the KeyData from the pickled file, the # same key can be found in it, and is not equal to more than one # other key. @@ -1317,7 +1319,7 @@ def check_key(self, key, key_pkl): f"The keys are:\n {other}\nand\n {key}\n(found in {key_pkl})." ) - self.time_spent_in_check_key += time.time() - start_time + self.time_spent_in_check_key += time.perf_counter() - start_time # default 31 days age_thresh_del = config.cmodule__age_thresh_use + 60 * 60 * 24 * 7 @@ -1506,7 +1508,7 @@ def clear_unversioned(self, min_age=None): assert key[0] to_del = [] - time_now = time.time() + time_now = time.perf_counter() for filename in os.listdir(self.dirname): if filename.startswith("tmp"): try: diff --git a/aesara/link/c/params_type.py b/aesara/link/c/params_type.py index c48db53fc5..928b92ed2c 100644 --- a/aesara/link/c/params_type.py +++ b/aesara/link/c/params_type.py @@ -29,7 +29,7 @@ .. code-block:: python - params_type = ParamsType(attr1=TensorType('int32', (False, False)), attr2=ScalarType('float64')) + params_type = ParamsType(attr1=TensorType('int32', shape=(None, None)), attr2=ScalarType('float64')) If your op contains attributes ``attr1`` **and** ``attr2``, the default ``op.get_params()`` implementation will automatically try to look for it and generate an appropriate Params object. @@ -324,7 +324,7 @@ class ParamsType(CType): `ParamsType` constructor takes key-value args. Key will be the name of the attribute in the struct. Value is the Aesara type of this attribute, ie. an instance of (a subclass of) :class:`CType` - (eg. ``TensorType('int64', (False,))``). + (eg. ``TensorType('int64', (None,))``). In a Python code any attribute named ``key`` will be available via:: diff --git a/aesara/link/jax/dispatch/basic.py b/aesara/link/jax/dispatch/basic.py index 5165ae6050..f48ea2db1e 100644 --- a/aesara/link/jax/dispatch/basic.py +++ b/aesara/link/jax/dispatch/basic.py @@ -49,7 +49,7 @@ def jax_funcify_FunctionGraph( return fgraph_to_python( fgraph, jax_funcify, - type_conversion_fn=jax_typify, + const_conversion_fn=jax_typify, fgraph_name=fgraph_name, **kwargs, ) @@ -82,26 +82,10 @@ def assert_fn(x, *inputs): return assert_fn -def jnp_safe_copy(x): - try: - res = jnp.copy(x) - except NotImplementedError: - warnings.warn( - "`jnp.copy` is not implemented yet. " "Using the object's `copy` method." - ) - if hasattr(x, "copy"): - res = jnp.array(x.copy()) - else: - warnings.warn(f"Object has no `copy` method: {x}") - res = x - - return res - - @jax_funcify.register(DeepCopyOp) def jax_funcify_DeepCopyOp(op, **kwargs): def deepcopyop(x): - return jnp_safe_copy(x) + return jnp.copy(x) return deepcopyop diff --git a/aesara/link/jax/dispatch/elemwise.py b/aesara/link/jax/dispatch/elemwise.py index b3c4f15be2..5b054fd5fe 100644 --- a/aesara/link/jax/dispatch/elemwise.py +++ b/aesara/link/jax/dispatch/elemwise.py @@ -1,7 +1,7 @@ import jax import jax.numpy as jnp -from aesara.link.jax.dispatch.basic import jax_funcify, jnp_safe_copy +from aesara.link.jax.dispatch.basic import jax_funcify from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise from aesara.tensor.special import LogSoftmax, Softmax, SoftmaxGrad @@ -69,7 +69,7 @@ def dimshuffle(x): res = jnp.reshape(res, shape) if not op.inplace: - res = jnp_safe_copy(res) + res = jnp.copy(res) return res diff --git a/aesara/link/jax/dispatch/scalar.py b/aesara/link/jax/dispatch/scalar.py index d782e256ad..5947f49279 100644 --- a/aesara/link/jax/dispatch/scalar.py +++ b/aesara/link/jax/dispatch/scalar.py @@ -133,10 +133,18 @@ def psi(x): @jax_funcify.register(Softplus) def jax_funcify_Softplus(op, **kwargs): def softplus(x): - # This expression is numerically equivalent to the Aesara one - # It just contains one "speed" optimization less than the Aesara counterpart return jnp.where( - x < -37.0, jnp.exp(x), jnp.where(x > 33.3, x, jnp.log1p(jnp.exp(x))) + x < -37.0, + jnp.exp(x), + jnp.where( + x < 18.0, + jnp.log1p(jnp.exp(x)), + jnp.where( + x < 33.3, + x + jnp.exp(-x), + x, + ), + ), ) return softplus diff --git a/aesara/link/jax/dispatch/subtensor.py b/aesara/link/jax/dispatch/subtensor.py index 822d78a6fa..46293e2300 100644 --- a/aesara/link/jax/dispatch/subtensor.py +++ b/aesara/link/jax/dispatch/subtensor.py @@ -13,23 +13,58 @@ from aesara.tensor.type_other import MakeSlice +BOOLEAN_MASK_ERROR = """JAX does not support resizing arrays with boolean +masks. In some cases, however, it is possible to re-express your model +in a form that JAX can compile: + +>>> import aesara.tensor as at +>>> x_at = at.vector('x') +>>> y_at = x_at[x_at > 0].sum() + +can be re-expressed as: + +>>> import aesara.tensor as at +>>> x_at = at.vector('x') +>>> y_at = at.where(x_at > 0, x_at, 0).sum() +""" + +DYNAMIC_SLICE_LENGTH_ERROR = """JAX does not support slicing arrays with a dynamic +slice length. +""" + + +def assert_indices_jax_compatible(node, idx_list): + from aesara.graph.basic import Constant + from aesara.tensor.var import TensorVariable + + ilist = indices_from_subtensor(node.inputs[1:], idx_list) + for idx in ilist: + + if isinstance(idx, TensorVariable): + if idx.type.dtype == "bool": + raise NotImplementedError(BOOLEAN_MASK_ERROR) + elif isinstance(idx, slice): + for slice_arg in (idx.start, idx.stop, idx.step): + if slice_arg is not None and not isinstance(slice_arg, Constant): + raise NotImplementedError(DYNAMIC_SLICE_LENGTH_ERROR) + + @jax_funcify.register(Subtensor) @jax_funcify.register(AdvancedSubtensor) @jax_funcify.register(AdvancedSubtensor1) -def jax_funcify_Subtensor(op, **kwargs): +def jax_funcify_Subtensor(op, node, **kwargs): idx_list = getattr(op, "idx_list", None) + assert_indices_jax_compatible(node, idx_list) - def subtensor(x, *ilists): - + def subtensor_constant(x, *ilists): indices = indices_from_subtensor(ilists, idx_list) - if len(indices) == 1: indices = indices[0] return x.__getitem__(indices) - return subtensor + return subtensor_constant @jax_funcify.register(IncSubtensor) diff --git a/aesara/link/jax/dispatch/tensor_basic.py b/aesara/link/jax/dispatch/tensor_basic.py index c15233175f..a578229fd7 100644 --- a/aesara/link/jax/dispatch/tensor_basic.py +++ b/aesara/link/jax/dispatch/tensor_basic.py @@ -1,5 +1,6 @@ import jax.numpy as jnp +from aesara.graph.basic import Constant from aesara.link.jax.dispatch.basic import jax_funcify from aesara.tensor.basic import ( Alloc, @@ -15,6 +16,15 @@ ) +ARANGE_CONCRETE_VALUE_ERROR = """JAX requires the arguments of `jax.numpy.arange` +to be constants. The graph that you defined thus cannot be JIT-compiled +by JAX. An example of a graph that can be compiled to JAX: + +>>> import aesara.tensor basic +>>> at.arange(1, 10, 2) +""" + + @jax_funcify.register(AllocDiag) def jax_funcify_AllocDiag(op, **kwargs): offset = op.offset @@ -43,9 +53,26 @@ def alloc(x, *shape): @jax_funcify.register(ARange) -def jax_funcify_ARange(op, **kwargs): - # XXX: This currently requires concrete arguments. - def arange(start, stop, step): +def jax_funcify_ARange(op, node, **kwargs): + """Register a JAX implementation for `ARange`. + + `jax.numpy.arange` requires concrete values for its arguments. Here we check + that the arguments are constant, and raise otherwise. + + TODO: Handle other situations in which values are concrete (shape of an array). + + """ + arange_args = node.inputs + constant_args = [] + for arg in arange_args: + if not isinstance(arg, Constant): + raise NotImplementedError(ARANGE_CONCRETE_VALUE_ERROR) + + constant_args.append(arg.value) + + start, stop, step = constant_args + + def arange(*_): return jnp.arange(start, stop, step, dtype=op.dtype) return arange diff --git a/aesara/link/jax/dispatch/test_subtensor.py b/aesara/link/jax/dispatch/test_subtensor.py deleted file mode 100644 index 22cc492402..0000000000 --- a/aesara/link/jax/dispatch/test_subtensor.py +++ /dev/null @@ -1,186 +0,0 @@ -import jax -import numpy as np -import pytest -from jax._src.errors import NonConcreteBooleanIndexError -from packaging.version import parse as version_parse - -import aesara.tensor as at -from aesara.configdefaults import config -from aesara.graph.fg import FunctionGraph -from aesara.tensor import subtensor as at_subtensor -from tests.link.jax.test_basic import compare_jax_and_py - - -def test_jax_Subtensors(): - # Basic indices - x_at = at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))) - out_at = x_at[1, 2, 0] - assert isinstance(out_at.owner.op, at_subtensor.Subtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - out_at = x_at[1:2, 1, :] - assert isinstance(out_at.owner.op, at_subtensor.Subtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - # Advanced indexing - out_at = at_subtensor.advanced_subtensor1(x_at, [1, 2]) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor1) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - out_at = x_at[[1, 2], [2, 3]] - assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - # Advanced and basic indexing - out_at = x_at[[1, 2], :] - assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - out_at = x_at[[1, 2], :, [3, 4]] - assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - -@pytest.mark.xfail( - version_parse(jax.__version__) >= version_parse("0.2.12"), - reason="Omnistaging cannot be disabled", -) -def test_jax_Subtensors_omni(): - x_at = at.arange(3 * 4 * 5).reshape((3, 4, 5)) - - # Boolean indices - out_at = x_at[x_at < 0] - assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - -def test_jax_IncSubtensor(): - rng = np.random.default_rng(213234) - - x_np = rng.uniform(-1, 1, size=(3, 4, 5)).astype(config.floatX) - x_at = at.constant(np.arange(3 * 4 * 5).reshape((3, 4, 5)).astype(config.floatX)) - - # "Set" basic indices - st_at = at.as_tensor_variable(np.array(-10.0, dtype=config.floatX)) - out_at = at_subtensor.set_subtensor(x_at[1, 2, 3], st_at) - assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - st_at = at.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX)) - out_at = at_subtensor.set_subtensor(x_at[:2, 0, 0], st_at) - assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - out_at = at_subtensor.set_subtensor(x_at[0, 1:3, 0], st_at) - assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - # "Set" advanced indices - st_at = at.as_tensor_variable( - rng.uniform(-1, 1, size=(2, 4, 5)).astype(config.floatX) - ) - out_at = at_subtensor.set_subtensor(x_at[np.r_[0, 2]], st_at) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - st_at = at.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX)) - out_at = at_subtensor.set_subtensor(x_at[[0, 2], 0, 0], st_at) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - # "Set" boolean indices - mask_at = at.constant(x_np > 0) - out_at = at_subtensor.set_subtensor(x_at[mask_at], 0.0) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - # "Increment" basic indices - st_at = at.as_tensor_variable(np.array(-10.0, dtype=config.floatX)) - out_at = at_subtensor.inc_subtensor(x_at[1, 2, 3], st_at) - assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - st_at = at.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX)) - out_at = at_subtensor.inc_subtensor(x_at[:2, 0, 0], st_at) - assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - out_at = at_subtensor.set_subtensor(x_at[0, 1:3, 0], st_at) - assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - # "Increment" advanced indices - st_at = at.as_tensor_variable( - rng.uniform(-1, 1, size=(2, 4, 5)).astype(config.floatX) - ) - out_at = at_subtensor.inc_subtensor(x_at[np.r_[0, 2]], st_at) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - st_at = at.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX)) - out_at = at_subtensor.inc_subtensor(x_at[[0, 2], 0, 0], st_at) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - # "Increment" boolean indices - mask_at = at.constant(x_np > 0) - out_at = at_subtensor.set_subtensor(x_at[mask_at], 1.0) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - compare_jax_and_py(out_fg, []) - - -def test_jax_IncSubtensors_unsupported(): - rng = np.random.default_rng(213234) - x_np = rng.uniform(-1, 1, size=(3, 4, 5)).astype(config.floatX) - x_at = at.constant(np.arange(3 * 4 * 5).reshape((3, 4, 5)).astype(config.floatX)) - - mask_at = at.as_tensor(x_np) > 0 - out_at = at_subtensor.set_subtensor(x_at[mask_at], 0.0) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - with pytest.raises( - NonConcreteBooleanIndexError, match="Array boolean indices must be concrete" - ): - compare_jax_and_py(out_fg, []) - - mask_at = at.as_tensor_variable(x_np) > 0 - out_at = at_subtensor.set_subtensor(x_at[mask_at], 1.0) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - with pytest.raises( - NonConcreteBooleanIndexError, match="Array boolean indices must be concrete" - ): - compare_jax_and_py(out_fg, []) - - st_at = at.as_tensor_variable(x_np[[0, 2], 0, :3]) - out_at = at_subtensor.set_subtensor(x_at[[0, 2], 0, :3], st_at) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - with pytest.raises(IndexError, match="Array slice indices must have static"): - compare_jax_and_py(out_fg, []) - - st_at = at.as_tensor_variable(x_np[[0, 2], 0, :3]) - out_at = at_subtensor.inc_subtensor(x_at[[0, 2], 0, :3], st_at) - assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor) - out_fg = FunctionGraph([], [out_at]) - with pytest.raises(IndexError, match="Array slice indices must have static"): - compare_jax_and_py(out_fg, []) diff --git a/aesara/link/numba/dispatch/__init__.py b/aesara/link/numba/dispatch/__init__.py index 0c5b37eba4..11f47aa8f9 100644 --- a/aesara/link/numba/dispatch/__init__.py +++ b/aesara/link/numba/dispatch/__init__.py @@ -1,5 +1,9 @@ # isort: off -from aesara.link.numba.dispatch.basic import numba_funcify, numba_typify +from aesara.link.numba.dispatch.basic import ( + numba_funcify, + numba_const_convert, + numba_njit, +) # Load dispatch specializations import aesara.link.numba.dispatch.scalar @@ -9,5 +13,6 @@ import aesara.link.numba.dispatch.random import aesara.link.numba.dispatch.elemwise import aesara.link.numba.dispatch.scan +import aesara.link.numba.dispatch.sparse # isort: on diff --git a/aesara/link/numba/dispatch/basic.py b/aesara/link/numba/dispatch/basic.py index fe4caf08d0..bd7f1a9c79 100644 --- a/aesara/link/numba/dispatch/basic.py +++ b/aesara/link/numba/dispatch/basic.py @@ -3,15 +3,16 @@ from contextlib import contextmanager from functools import singledispatch from textwrap import dedent -from typing import Union +from typing import TYPE_CHECKING, Callable, Optional, Union, cast import numba import numba.np.unsafe.ndarray as numba_ndarray import numpy as np import scipy import scipy.special -from llvmlite.llvmpy.core import Type as llvm_Type +from llvmlite.ir import FunctionType from numba import types +from numba.core.decorators import _jit from numba.core.errors import TypingError from numba.cpython.unsafe.tuple import tuple_setitem # noqa: F401 from numba.extending import box @@ -21,6 +22,7 @@ from aesara.compile.ops import DeepCopyOp from aesara.graph.basic import Apply, NoParams from aesara.graph.fg import FunctionGraph +from aesara.graph.op import Op from aesara.graph.type import Type from aesara.ifelse import IfElse from aesara.link.utils import ( @@ -30,6 +32,7 @@ ) from aesara.scalar.basic import ScalarType from aesara.scalar.math import Softplus +from aesara.sparse.type import SparseTensorType from aesara.tensor.blas import BatchedDot from aesara.tensor.math import Dot from aesara.tensor.shape import Reshape, Shape, Shape_i, SpecifyShape @@ -46,6 +49,10 @@ from aesara.tensor.type_other import MakeSlice, NoneConst +if TYPE_CHECKING: + from aesara.graph.op import StorageMapType + + def numba_njit(*args, **kwargs): if len(args) > 0 and callable(args[0]): @@ -61,14 +68,44 @@ def numba_vectorize(*args, **kwargs): return numba.vectorize(*args, cache=config.numba__cache, **kwargs) -def get_numba_type( - aesara_type: Type, +# This can be removed after Numba 0.57.0 and `_jit` replaced with standard `[n]jit` +generated_jit = _jit( + sigs=None, + locals={}, + cache=False, + target="cpu", + targetoptions={}, + impl_kind="generated", +) + + +@singledispatch +def get_numba_type(aesara_type: Type, **kwargs) -> numba.types.Type: + r"""Create a Numba type object for a :class:`Type`.""" + return numba.types.pyobject + + +@get_numba_type.register(SparseTensorType) +def get_numba_type_SparseType(aesara_type, **kwargs): + # This is needed to differentiate `SparseTensorType` from `TensorType` + return numba.types.pyobject + + +@get_numba_type.register(ScalarType) +def get_numba_type_ScalarType(aesara_type, **kwargs): + dtype = np.dtype(aesara_type.dtype) + numba_dtype = numba.from_dtype(dtype) + return numba_dtype + + +@get_numba_type.register(TensorType) +def get_numba_type_TensorType( + aesara_type, layout: str = "A", force_scalar: bool = False, reduce_to_scalar: bool = False, -) -> numba.types.Type: - r"""Create a Numba type object for a :class:`Type`. - +): + r""" Parameters ---------- aesara_type @@ -80,44 +117,27 @@ def get_numba_type( reduce_to_scalar Return Numba scalars for zero dimensional :class:`TensorType`\s. """ - - if isinstance(aesara_type, TensorType): - dtype = aesara_type.numpy_dtype - numba_dtype = numba.from_dtype(dtype) - if force_scalar or ( - reduce_to_scalar and getattr(aesara_type, "ndim", None) == 0 - ): - return numba_dtype - return numba.types.Array(numba_dtype, aesara_type.ndim, layout) - elif isinstance(aesara_type, ScalarType): - dtype = np.dtype(aesara_type.dtype) - numba_dtype = numba.from_dtype(dtype) + dtype = aesara_type.numpy_dtype + numba_dtype = numba.from_dtype(dtype) + if force_scalar or (reduce_to_scalar and getattr(aesara_type, "ndim", None) == 0): return numba_dtype - else: - raise NotImplementedError(f"Numba type not implemented for {aesara_type}") + return numba.types.Array(numba_dtype, aesara_type.ndim, layout) def create_numba_signature( - node_or_fgraph: Union[FunctionGraph, Apply], - force_scalar: bool = False, - reduce_to_scalar: bool = False, + node_or_fgraph: Union[FunctionGraph, Apply], **kwargs ) -> numba.types.Type: """Create a Numba type for the signature of an `Apply` node or `FunctionGraph`.""" input_types = [] for inp in node_or_fgraph.inputs: - input_types.append( - get_numba_type( - inp.type, force_scalar=force_scalar, reduce_to_scalar=reduce_to_scalar - ) - ) + input_types.append(get_numba_type(inp.type, **kwargs)) output_types = [] for out in node_or_fgraph.outputs: - output_types.append( - get_numba_type( - out.type, force_scalar=force_scalar, reduce_to_scalar=reduce_to_scalar - ) - ) + output_types.append(get_numba_type(out.type, **kwargs)) + + if isinstance(node_or_fgraph, FunctionGraph): + return numba.types.Tuple(output_types)(*input_types) if len(output_types) > 1: return numba.types.Tuple(output_types)(*input_types) @@ -128,7 +148,7 @@ def create_numba_signature( def slice_new(self, start, stop, step): - fnty = llvm_Type.function(self.pyobj, [self.pyobj, self.pyobj, self.pyobj]) + fnty = FunctionType(self.pyobj, [self.pyobj, self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PySlice_New") return self.builder.call(fn, [start, stop, step]) @@ -168,7 +188,7 @@ def in_seq_empty_tuple(x, y): enable_slice_boxing() -@numba.generated_jit(nopython=True) +@generated_jit def to_scalar(x): if isinstance(x, (numba.types.Number, numba.types.Boolean)): return lambda x: x @@ -315,13 +335,48 @@ def use_optimized_cheap_pass(*args, **kwargs): @singledispatch -def numba_typify(data, dtype=None, **kwargs): +def numba_const_convert(data, dtype=None, **kwargs): + """Create a Numba compatible constant from an Aesara `Constant`.""" return data +def numba_funcify(obj, node=None, storage_map=None, **kwargs) -> Callable: + """Convert `obj` to a Numba-JITable object.""" + return _numba_funcify(obj, node=node, storage_map=storage_map, **kwargs) + + @singledispatch -def numba_funcify(op, node=None, storage_map=None, **kwargs): - """Create a Numba compatible function from an Aesara `Op`.""" +def _numba_funcify( + obj, + node: Optional[Apply] = None, + storage_map: Optional["StorageMapType"] = None, + **kwargs, +) -> Callable: + r"""Dispatch on Aesara object types to perform Numba conversions. + + Arguments + --------- + obj + The object used to determine the appropriate conversion function based + on its type. This is generally an `Op` instance, but `FunctionGraph`\s + are also supported. + node + When `obj` is an `Op`, this value should be the corresponding `Apply` node. + storage_map + A storage map with, for example, the constant and `SharedVariable` values + of the graph being converted. + + Returns + ------- + A `Callable` that can be JIT-compiled in Numba using `numba.jit`. + + """ + raise NotImplementedError() + + +@_numba_funcify.register(Op) +def numba_funcify_perform(op, node, storage_map=None, **kwargs) -> Callable: + """Create a Numba compatible function from an Aesara `Op.perform`.""" warnings.warn( f"Numba will use object mode to run {op}'s perform method", @@ -372,10 +427,10 @@ def perform(*inputs): ret = py_perform_return(inputs) return ret - return perform + return cast(Callable, perform) -@numba_funcify.register(OpFromGraph) +@_numba_funcify.register(OpFromGraph) def numba_funcify_OpFromGraph(op, node=None, **kwargs): _ = kwargs.pop("storage_map", None) @@ -397,7 +452,7 @@ def opfromgraph(*inputs): return opfromgraph -@numba_funcify.register(FunctionGraph) +@_numba_funcify.register(FunctionGraph) def numba_funcify_FunctionGraph( fgraph, node=None, @@ -407,7 +462,7 @@ def numba_funcify_FunctionGraph( return fgraph_to_python( fgraph, numba_funcify, - type_conversion_fn=numba_typify, + const_conversion_fn=numba_const_convert, fgraph_name=fgraph_name, **kwargs, ) @@ -505,9 +560,9 @@ def {fn_name}({", ".join(input_names)}): return subtensor_def_src -@numba_funcify.register(Subtensor) -@numba_funcify.register(AdvancedSubtensor) -@numba_funcify.register(AdvancedSubtensor1) +@_numba_funcify.register(Subtensor) +@_numba_funcify.register(AdvancedSubtensor) +@_numba_funcify.register(AdvancedSubtensor1) def numba_funcify_Subtensor(op, node, **kwargs): subtensor_def_src = create_index_func( @@ -523,8 +578,8 @@ def numba_funcify_Subtensor(op, node, **kwargs): return numba_njit(subtensor_fn) -@numba_funcify.register(IncSubtensor) -@numba_funcify.register(AdvancedIncSubtensor) +@_numba_funcify.register(IncSubtensor) +@_numba_funcify.register(AdvancedIncSubtensor) def numba_funcify_IncSubtensor(op, node, **kwargs): incsubtensor_def_src = create_index_func( @@ -540,7 +595,7 @@ def numba_funcify_IncSubtensor(op, node, **kwargs): return numba_njit(incsubtensor_fn) -@numba_funcify.register(AdvancedIncSubtensor1) +@_numba_funcify.register(AdvancedIncSubtensor1) def numba_funcify_AdvancedIncSubtensor1(op, node, **kwargs): inplace = op.inplace set_instead_of_inc = op.set_instead_of_inc @@ -573,7 +628,7 @@ def advancedincsubtensor1(x, vals, idxs): return advancedincsubtensor1 -@numba_funcify.register(DeepCopyOp) +@_numba_funcify.register(DeepCopyOp) def numba_funcify_DeepCopyOp(op, node, **kwargs): # Scalars are apparently returned as actual Python scalar types and not @@ -595,8 +650,8 @@ def deepcopyop(x): return deepcopyop -@numba_funcify.register(MakeSlice) -def numba_funcify_MakeSlice(op, **kwargs): +@_numba_funcify.register(MakeSlice) +def numba_funcify_MakeSlice(op, node, **kwargs): @numba_njit def makeslice(*x): return slice(*x) @@ -604,8 +659,8 @@ def makeslice(*x): return makeslice -@numba_funcify.register(Shape) -def numba_funcify_Shape(op, **kwargs): +@_numba_funcify.register(Shape) +def numba_funcify_Shape(op, node, **kwargs): @numba_njit(inline="always") def shape(x): return np.asarray(np.shape(x)) @@ -613,8 +668,8 @@ def shape(x): return shape -@numba_funcify.register(Shape_i) -def numba_funcify_Shape_i(op, **kwargs): +@_numba_funcify.register(Shape_i) +def numba_funcify_Shape_i(op, node, **kwargs): i = op.i @numba_njit(inline="always") @@ -644,8 +699,8 @@ def codegen(context, builder, signature, args): return sig, codegen -@numba_funcify.register(Reshape) -def numba_funcify_Reshape(op, **kwargs): +@_numba_funcify.register(Reshape) +def numba_funcify_Reshape(op, node, **kwargs): ndim = op.ndim if ndim == 0: @@ -667,7 +722,7 @@ def reshape(x, shape): return reshape -@numba_funcify.register(SpecifyShape) +@_numba_funcify.register(SpecifyShape) def numba_funcify_SpecifyShape(op, node, **kwargs): shape_inputs = node.inputs[1:] shape_input_names = ["shape_" + str(i) for i in range(len(shape_inputs))] @@ -714,7 +769,7 @@ def inputs_cast(x): return inputs_cast -@numba_funcify.register(Dot) +@_numba_funcify.register(Dot) def numba_funcify_Dot(op, node, **kwargs): # Numba's `np.dot` does not support integer dtypes, so we need to cast to # float. @@ -729,7 +784,7 @@ def dot(x, y): return dot -@numba_funcify.register(Softplus) +@_numba_funcify.register(Softplus) def numba_funcify_Softplus(op, node, **kwargs): x_dtype = np.dtype(node.inputs[0].dtype) @@ -748,7 +803,7 @@ def softplus(x): return softplus -@numba_funcify.register(Cholesky) +@_numba_funcify.register(Cholesky) def numba_funcify_Cholesky(op, node, **kwargs): lower = op.lower @@ -784,7 +839,7 @@ def cholesky(a): return cholesky -@numba_funcify.register(Solve) +@_numba_funcify.register(Solve) def numba_funcify_Solve(op, node, **kwargs): assume_a = op.assume_a @@ -831,7 +886,7 @@ def solve(a, b): return solve -@numba_funcify.register(BatchedDot) +@_numba_funcify.register(BatchedDot) def numba_funcify_BatchedDot(op, node, **kwargs): dtype = node.outputs[0].type.numpy_dtype @@ -852,7 +907,7 @@ def batched_dot(x, y): # optimizations are apparently already performed by Numba -@numba_funcify.register(IfElse) +@_numba_funcify.register(IfElse) def numba_funcify_IfElse(op, **kwargs): n_outs = op.n_outs diff --git a/aesara/link/numba/dispatch/elemwise.py b/aesara/link/numba/dispatch/elemwise.py index 74ac324eaa..c401f282fd 100644 --- a/aesara/link/numba/dispatch/elemwise.py +++ b/aesara/link/numba/dispatch/elemwise.py @@ -12,6 +12,7 @@ from aesara.graph.op import Op from aesara.link.numba.dispatch import basic as numba_basic from aesara.link.numba.dispatch.basic import ( + _numba_funcify, create_numba_signature, create_tuple_creator, numba_funcify, @@ -422,7 +423,7 @@ def axis_apply_fn(x): return axis_apply_fn -@numba_funcify.register(Elemwise) +@_numba_funcify.register(Elemwise) def numba_funcify_Elemwise(op, node, **kwargs): scalar_op_fn = numba_funcify(op.scalar_op, node=node, inline="always", **kwargs) @@ -474,7 +475,7 @@ def {inplace_elemwise_fn_name}({input_signature_str}): return elemwise_fn -@numba_funcify.register(CAReduce) +@_numba_funcify.register(CAReduce) def numba_funcify_CAReduce(op, node, **kwargs): axes = op.axis if axes is None: @@ -512,7 +513,7 @@ def numba_funcify_CAReduce(op, node, **kwargs): return careduce_fn -@numba_funcify.register(DimShuffle) +@_numba_funcify.register(DimShuffle) def numba_funcify_DimShuffle(op, **kwargs): shuffle = tuple(op.shuffle) transposition = tuple(op.transposition) @@ -590,7 +591,7 @@ def dimshuffle(x): return dimshuffle -@numba_funcify.register(Softmax) +@_numba_funcify.register(Softmax) def numba_funcify_Softmax(op, node, **kwargs): x_at = node.inputs[0] @@ -627,7 +628,7 @@ def softmax_py_fn(x): return softmax -@numba_funcify.register(SoftmaxGrad) +@_numba_funcify.register(SoftmaxGrad) def numba_funcify_SoftmaxGrad(op, node, **kwargs): sm_at = node.inputs[1] @@ -658,7 +659,7 @@ def softmax_grad_py_fn(dy, sm): return softmax_grad -@numba_funcify.register(LogSoftmax) +@_numba_funcify.register(LogSoftmax) def numba_funcify_LogSoftmax(op, node, **kwargs): x_at = node.inputs[0] @@ -692,7 +693,7 @@ def log_softmax_py_fn(x): return log_softmax -@numba_funcify.register(MaxAndArgmax) +@_numba_funcify.register(MaxAndArgmax) def numba_funcify_MaxAndArgmax(op, node, **kwargs): axis = op.axis x_at = node.inputs[0] diff --git a/aesara/link/numba/dispatch/extra_ops.py b/aesara/link/numba/dispatch/extra_ops.py index bbb0e15ad5..6b9f310a49 100644 --- a/aesara/link/numba/dispatch/extra_ops.py +++ b/aesara/link/numba/dispatch/extra_ops.py @@ -6,7 +6,7 @@ from aesara import config from aesara.link.numba.dispatch import basic as numba_basic -from aesara.link.numba.dispatch.basic import get_numba_type, numba_funcify +from aesara.link.numba.dispatch.basic import _numba_funcify, get_numba_type from aesara.tensor.extra_ops import ( Bartlett, BroadcastTo, @@ -21,7 +21,7 @@ ) -@numba_funcify.register(Bartlett) +@_numba_funcify.register(Bartlett) def numba_funcify_Bartlett(op, **kwargs): @numba_basic.numba_njit(inline="always") def bartlett(x): @@ -30,7 +30,7 @@ def bartlett(x): return bartlett -@numba_funcify.register(CumOp) +@_numba_funcify.register(CumOp) def numba_funcify_CumOp(op, node, **kwargs): axis = op.axis mode = op.mode @@ -65,7 +65,7 @@ def cumop(x): return cumop -@numba_funcify.register(FillDiagonal) +@_numba_funcify.register(FillDiagonal) def numba_funcify_FillDiagonal(op, **kwargs): @numba_basic.numba_njit def filldiagonal(a, val): @@ -75,7 +75,7 @@ def filldiagonal(a, val): return filldiagonal -@numba_funcify.register(FillDiagonalOffset) +@_numba_funcify.register(FillDiagonalOffset) def numba_funcify_FillDiagonalOffset(op, node, **kwargs): @numba_basic.numba_njit def filldiagonaloffset(a, val, offset): @@ -100,7 +100,7 @@ def filldiagonaloffset(a, val, offset): return filldiagonaloffset -@numba_funcify.register(RavelMultiIndex) +@_numba_funcify.register(RavelMultiIndex) def numba_funcify_RavelMultiIndex(op, node, **kwargs): mode = op.mode @@ -165,7 +165,7 @@ def ravelmultiindex(*inp): return ravelmultiindex -@numba_funcify.register(Repeat) +@_numba_funcify.register(Repeat) def numba_funcify_Repeat(op, node, **kwargs): axis = op.axis @@ -210,7 +210,7 @@ def repeatop(x, repeats): return repeatop -@numba_funcify.register(Unique) +@_numba_funcify.register(Unique) def numba_funcify_Unique(op, node, **kwargs): axis = op.axis @@ -256,7 +256,7 @@ def unique(x): return unique -@numba_funcify.register(UnravelIndex) +@_numba_funcify.register(UnravelIndex) def numba_funcify_UnravelIndex(op, node, **kwargs): order = op.order @@ -291,7 +291,7 @@ def unravelindex(arr, shape): return unravelindex -@numba_funcify.register(SearchsortedOp) +@_numba_funcify.register(SearchsortedOp) def numba_funcify_Searchsorted(op, node, **kwargs): side = op.side @@ -325,7 +325,7 @@ def searchsorted(a, v): return searchsorted -@numba_funcify.register(BroadcastTo) +@_numba_funcify.register(BroadcastTo) def numba_funcify_BroadcastTo(op, node, **kwargs): create_zeros_tuple = numba_basic.create_tuple_creator( diff --git a/aesara/link/numba/dispatch/nlinalg.py b/aesara/link/numba/dispatch/nlinalg.py index 1ac3823012..010e217f85 100644 --- a/aesara/link/numba/dispatch/nlinalg.py +++ b/aesara/link/numba/dispatch/nlinalg.py @@ -5,9 +5,9 @@ from aesara.link.numba.dispatch import basic as numba_basic from aesara.link.numba.dispatch.basic import ( + _numba_funcify, get_numba_type, int_to_float_fn, - numba_funcify, ) from aesara.tensor.nlinalg import ( SVD, @@ -21,7 +21,7 @@ ) -@numba_funcify.register(SVD) +@_numba_funcify.register(SVD) def numba_funcify_SVD(op, node, **kwargs): full_matrices = op.full_matrices compute_uv = op.compute_uv @@ -56,7 +56,7 @@ def svd(x): return svd -@numba_funcify.register(Det) +@_numba_funcify.register(Det) def numba_funcify_Det(op, node, **kwargs): out_dtype = node.outputs[0].type.numpy_dtype @@ -69,7 +69,7 @@ def det(x): return det -@numba_funcify.register(Eig) +@_numba_funcify.register(Eig) def numba_funcify_Eig(op, node, **kwargs): out_dtype_1 = node.outputs[0].type.numpy_dtype @@ -85,7 +85,7 @@ def eig(x): return eig -@numba_funcify.register(Eigh) +@_numba_funcify.register(Eigh) def numba_funcify_Eigh(op, node, **kwargs): uplo = op.UPLO @@ -120,7 +120,7 @@ def eigh(x): return eigh -@numba_funcify.register(Inv) +@_numba_funcify.register(Inv) def numba_funcify_Inv(op, node, **kwargs): out_dtype = node.outputs[0].type.numpy_dtype @@ -133,7 +133,7 @@ def inv(x): return inv -@numba_funcify.register(MatrixInverse) +@_numba_funcify.register(MatrixInverse) def numba_funcify_MatrixInverse(op, node, **kwargs): out_dtype = node.outputs[0].type.numpy_dtype @@ -146,7 +146,7 @@ def matrix_inverse(x): return matrix_inverse -@numba_funcify.register(MatrixPinv) +@_numba_funcify.register(MatrixPinv) def numba_funcify_MatrixPinv(op, node, **kwargs): out_dtype = node.outputs[0].type.numpy_dtype @@ -159,7 +159,7 @@ def matrixpinv(x): return matrixpinv -@numba_funcify.register(QRFull) +@_numba_funcify.register(QRFull) def numba_funcify_QRFull(op, node, **kwargs): mode = op.mode diff --git a/aesara/link/numba/dispatch/random.py b/aesara/link/numba/dispatch/random.py index bb968f44c8..2ded1d417e 100644 --- a/aesara/link/numba/dispatch/random.py +++ b/aesara/link/numba/dispatch/random.py @@ -5,14 +5,22 @@ import numpy as np from numba import _helperlib, types from numba.core import cgutils -from numba.extending import NativeValue, box, models, register_model, typeof_impl, unbox +from numba.extending import ( + NativeValue, + box, + models, + overload, + register_model, + typeof_impl, + unbox, +) from numpy.random import RandomState import aesara.tensor.random.basic as aer from aesara.graph.basic import Apply from aesara.graph.op import Op from aesara.link.numba.dispatch import basic as numba_basic -from aesara.link.numba.dispatch.basic import numba_funcify, numba_typify +from aesara.link.numba.dispatch.basic import _numba_funcify, numba_const_convert from aesara.link.utils import ( compile_function_src, get_name_for_object, @@ -78,11 +86,21 @@ def box_random_state(typ, val, c): return class_obj -@numba_typify.register(RandomState) -def numba_typify_RandomState(state, **kwargs): - # The numba_typify in this case is just an passthrough function +@overload(np.random.uniform) +def uniform_empty_size(a, b, size): + if isinstance(size, types.Tuple) and size.count == 0: + + def uniform_no_size(a, b, size): + return np.random.uniform(a, b) + + return uniform_no_size + + +@numba_const_convert.register(RandomState) +def numba_const_convert_RandomState(state, **kwargs): + # The `numba_const_convert` in this case is just a passthrough function # that synchronizes Numba's internal random state with the current - # RandomState object + # `RandomState` object. ints, index = state.get_state()[1:3] ptr = _helperlib.rnd_get_np_state_ptr() _helperlib.rnd_set_state(ptr, (index, [int(x) for x in ints])) @@ -189,29 +207,29 @@ def {sized_fn_name}({random_fn_input_names}): return random_fn -@numba_funcify.register(aer.UniformRV) -@numba_funcify.register(aer.TriangularRV) -@numba_funcify.register(aer.BetaRV) -@numba_funcify.register(aer.NormalRV) -@numba_funcify.register(aer.LogNormalRV) -@numba_funcify.register(aer.GammaRV) -@numba_funcify.register(aer.ChiSquareRV) -@numba_funcify.register(aer.ParetoRV) -@numba_funcify.register(aer.GumbelRV) -@numba_funcify.register(aer.ExponentialRV) -@numba_funcify.register(aer.WeibullRV) -@numba_funcify.register(aer.LogisticRV) -@numba_funcify.register(aer.VonMisesRV) -@numba_funcify.register(aer.PoissonRV) -@numba_funcify.register(aer.GeometricRV) -@numba_funcify.register(aer.HyperGeometricRV) -@numba_funcify.register(aer.WaldRV) -@numba_funcify.register(aer.LaplaceRV) -@numba_funcify.register(aer.BinomialRV) -@numba_funcify.register(aer.MultinomialRV) -@numba_funcify.register(aer.RandIntRV) # only the first two arguments are supported -@numba_funcify.register(aer.ChoiceRV) # the `p` argument is not supported -@numba_funcify.register(aer.PermutationRV) +@_numba_funcify.register(aer.UniformRV) +@_numba_funcify.register(aer.TriangularRV) +@_numba_funcify.register(aer.BetaRV) +@_numba_funcify.register(aer.NormalRV) +@_numba_funcify.register(aer.LogNormalRV) +@_numba_funcify.register(aer.GammaRV) +@_numba_funcify.register(aer.ChiSquareRV) +@_numba_funcify.register(aer.ParetoRV) +@_numba_funcify.register(aer.GumbelRV) +@_numba_funcify.register(aer.ExponentialRV) +@_numba_funcify.register(aer.WeibullRV) +@_numba_funcify.register(aer.LogisticRV) +@_numba_funcify.register(aer.VonMisesRV) +@_numba_funcify.register(aer.PoissonRV) +@_numba_funcify.register(aer.GeometricRV) +@_numba_funcify.register(aer.HyperGeometricRV) +@_numba_funcify.register(aer.WaldRV) +@_numba_funcify.register(aer.LaplaceRV) +@_numba_funcify.register(aer.BinomialRV) +@_numba_funcify.register(aer.MultinomialRV) +@_numba_funcify.register(aer.RandIntRV) # only the first two arguments are supported +@_numba_funcify.register(aer.ChoiceRV) # the `p` argument is not supported +@_numba_funcify.register(aer.PermutationRV) def numba_funcify_RandomVariable(op, node, **kwargs): name = op.name np_random_func = getattr(np.random, name) @@ -267,12 +285,12 @@ def {np_random_fn_name}({np_input_names}): return make_numba_random_fn(node, np_random_fn) -@numba_funcify.register(aer.NegBinomialRV) +@_numba_funcify.register(aer.NegBinomialRV) def numba_funcify_NegBinomialRV(op, node, **kwargs): return make_numba_random_fn(node, np.random.negative_binomial) -@numba_funcify.register(aer.CauchyRV) +@_numba_funcify.register(aer.CauchyRV) def numba_funcify_CauchyRV(op, node, **kwargs): def body_fn(loc, scale): return f" return ({loc} + np.random.standard_cauchy()) / {scale}" @@ -280,7 +298,7 @@ def body_fn(loc, scale): return create_numba_random_fn(op, node, body_fn) -@numba_funcify.register(aer.HalfNormalRV) +@_numba_funcify.register(aer.HalfNormalRV) def numba_funcify_HalfNormalRV(op, node, **kwargs): def body_fn(a, b): return f" return {a} + {b} * abs(np.random.normal(0, 1))" @@ -288,7 +306,7 @@ def body_fn(a, b): return create_numba_random_fn(op, node, body_fn) -@numba_funcify.register(aer.BernoulliRV) +@_numba_funcify.register(aer.BernoulliRV) def numba_funcify_BernoulliRV(op, node, **kwargs): out_dtype = node.outputs[1].type.numpy_dtype @@ -308,7 +326,7 @@ def body_fn(a): ) -@numba_funcify.register(aer.CategoricalRV) +@_numba_funcify.register(aer.CategoricalRV) def numba_funcify_CategoricalRV(op, node, **kwargs): out_dtype = node.outputs[1].type.numpy_dtype size_len = int(get_vector_length(node.inputs[1])) @@ -321,7 +339,7 @@ def categorical_rv(rng, size, dtype, p): size_tpl = numba_ndarray.to_fixed_tuple(size, size_len) p = np.broadcast_to(p, size_tpl + p.shape[-1:]) - unif_samples = np.random.uniform(0, 1, size_tpl) + unif_samples = np.asarray(np.random.uniform(0, 1, size_tpl)) res = np.empty(size_tpl, dtype=out_dtype) for idx in np.ndindex(*size_tpl): @@ -332,7 +350,7 @@ def categorical_rv(rng, size, dtype, p): return categorical_rv -@numba_funcify.register(aer.DirichletRV) +@_numba_funcify.register(aer.DirichletRV) def numba_funcify_DirichletRV(op, node, **kwargs): out_dtype = node.outputs[1].type.numpy_dtype diff --git a/aesara/link/numba/dispatch/scalar.py b/aesara/link/numba/dispatch/scalar.py index 08dd5f1a10..b05a9c011e 100644 --- a/aesara/link/numba/dispatch/scalar.py +++ b/aesara/link/numba/dispatch/scalar.py @@ -10,7 +10,11 @@ from aesara.compile.ops import ViewOp from aesara.graph.basic import Variable from aesara.link.numba.dispatch import basic as numba_basic -from aesara.link.numba.dispatch.basic import create_numba_signature, numba_funcify +from aesara.link.numba.dispatch.basic import ( + _numba_funcify, + create_numba_signature, + numba_funcify, +) from aesara.link.utils import ( compile_function_src, get_name_for_object, @@ -31,7 +35,7 @@ from aesara.scalar.math import Erf, Erfc, GammaLn, Log1mexp, Sigmoid -@numba_funcify.register(ScalarOp) +@_numba_funcify.register(ScalarOp) def numba_funcify_ScalarOp(op, node, **kwargs): # TODO: Do we need to cache these functions so that we don't end up # compiling the same Numba function over and over again? @@ -126,7 +130,7 @@ def {scalar_op_fn_name}({', '.join(input_names)}): )(scalar_op_fn) -@numba_funcify.register(Switch) +@_numba_funcify.register(Switch) def numba_funcify_Switch(op, node, **kwargs): @numba_basic.numba_njit(inline="always") def switch(condition, x, y): @@ -154,7 +158,7 @@ def {binary_op_name}({input_signature}): return nary_fn -@numba_funcify.register(Add) +@_numba_funcify.register(Add) def numba_funcify_Add(op, node, **kwargs): signature = create_numba_signature(node, force_scalar=True) @@ -166,7 +170,7 @@ def numba_funcify_Add(op, node, **kwargs): )(nary_add_fn) -@numba_funcify.register(Mul) +@_numba_funcify.register(Mul) def numba_funcify_Mul(op, node, **kwargs): signature = create_numba_signature(node, force_scalar=True) @@ -178,7 +182,7 @@ def numba_funcify_Mul(op, node, **kwargs): )(nary_mul_fn) -@numba_funcify.register(Cast) +@_numba_funcify.register(Cast) def numba_funcify_Cast(op, node, **kwargs): dtype = np.dtype(op.o_type.dtype) @@ -190,8 +194,8 @@ def cast(x): return cast -@numba_funcify.register(Identity) -@numba_funcify.register(ViewOp) +@_numba_funcify.register(Identity) +@_numba_funcify.register(ViewOp) def numba_funcify_ViewOp(op, **kwargs): @numba_basic.numba_njit(inline="always") def viewop(x): @@ -200,7 +204,7 @@ def viewop(x): return viewop -@numba_funcify.register(Clip) +@_numba_funcify.register(Clip) def numba_funcify_Clip(op, **kwargs): @numba_basic.numba_njit def clip(_x, _min, _max): @@ -218,7 +222,7 @@ def clip(_x, _min, _max): return clip -@numba_funcify.register(Composite) +@_numba_funcify.register(Composite) def numba_funcify_Composite(op, node, **kwargs): signature = create_numba_signature(node, force_scalar=True) @@ -230,7 +234,7 @@ def numba_funcify_Composite(op, node, **kwargs): return composite_fn -@numba_funcify.register(Second) +@_numba_funcify.register(Second) def numba_funcify_Second(op, node, **kwargs): @numba_basic.numba_njit(inline="always") def second(x, y): @@ -239,7 +243,7 @@ def second(x, y): return second -@numba_funcify.register(Reciprocal) +@_numba_funcify.register(Reciprocal) def numba_funcify_Reciprocal(op, node, **kwargs): @numba_basic.numba_njit(inline="always") def reciprocal(x): @@ -250,7 +254,7 @@ def reciprocal(x): return reciprocal -@numba_funcify.register(Sigmoid) +@_numba_funcify.register(Sigmoid) def numba_funcify_Sigmoid(op, node, **kwargs): @numba_basic.numba_njit(inline="always", fastmath=config.numba__fastmath) def sigmoid(x): @@ -259,7 +263,7 @@ def sigmoid(x): return sigmoid -@numba_funcify.register(GammaLn) +@_numba_funcify.register(GammaLn) def numba_funcify_GammaLn(op, node, **kwargs): @numba_basic.numba_njit(inline="always", fastmath=config.numba__fastmath) def gammaln(x): @@ -268,7 +272,7 @@ def gammaln(x): return gammaln -@numba_funcify.register(Log1mexp) +@_numba_funcify.register(Log1mexp) def numba_funcify_Log1mexp(op, node, **kwargs): @numba_basic.numba_njit(inline="always", fastmath=config.numba__fastmath) def logp1mexp(x): @@ -280,7 +284,7 @@ def logp1mexp(x): return logp1mexp -@numba_funcify.register(Erf) +@_numba_funcify.register(Erf) def numba_funcify_Erf(op, **kwargs): @numba_basic.numba_njit(inline="always", fastmath=config.numba__fastmath) def erf(x): @@ -289,7 +293,7 @@ def erf(x): return erf -@numba_funcify.register(Erfc) +@_numba_funcify.register(Erfc) def numba_funcify_Erfc(op, **kwargs): @numba_basic.numba_njit(inline="always", fastmath=config.numba__fastmath) def erfc(x): diff --git a/aesara/link/numba/dispatch/scan.py b/aesara/link/numba/dispatch/scan.py index 89af6c15e4..e663d956d9 100644 --- a/aesara/link/numba/dispatch/scan.py +++ b/aesara/link/numba/dispatch/scan.py @@ -7,6 +7,7 @@ from aesara.link.numba.dispatch import basic as numba_basic from aesara.link.numba.dispatch.basic import ( + _numba_funcify, create_arg_string, create_tuple_string, numba_funcify, @@ -45,7 +46,7 @@ def range_arr(x): return range_arr -@numba_funcify.register(Scan) +@_numba_funcify.register(Scan) def numba_funcify_Scan(op, node, **kwargs): scan_inner_func = numba_basic.numba_njit(numba_funcify(op.fgraph)) diff --git a/aesara/link/numba/dispatch/sparse.py b/aesara/link/numba/dispatch/sparse.py new file mode 100644 index 0000000000..d07e029501 --- /dev/null +++ b/aesara/link/numba/dispatch/sparse.py @@ -0,0 +1,142 @@ +import scipy as sp +import scipy.sparse +from numba.core import cgutils, types +from numba.extending import ( + NativeValue, + box, + make_attribute_wrapper, + models, + register_model, + typeof_impl, + unbox, +) + + +class CSMatrixType(types.Type): + """A Numba `Type` modeled after the base class `scipy.sparse.compressed._cs_matrix`.""" + + name: str + instance_class: type + + def __init__(self, dtype): + self.dtype = dtype + self.data = types.Array(dtype, 1, "A") + self.indices = types.Array(types.int32, 1, "A") + self.indptr = types.Array(types.int32, 1, "A") + self.shape = types.UniTuple(types.int64, 2) + super().__init__(self.name) + + +make_attribute_wrapper(CSMatrixType, "data", "data") +make_attribute_wrapper(CSMatrixType, "indices", "indices") +make_attribute_wrapper(CSMatrixType, "indptr", "indptr") +make_attribute_wrapper(CSMatrixType, "shape", "shape") + + +class CSRMatrixType(CSMatrixType): + name = "csr_matrix" + + @staticmethod + def instance_class(data, indices, indptr, shape): + return sp.sparse.csr_matrix((data, indices, indptr), shape, copy=False) + + +class CSCMatrixType(CSMatrixType): + name = "csc_matrix" + + @staticmethod + def instance_class(data, indices, indptr, shape): + return sp.sparse.csc_matrix((data, indices, indptr), shape, copy=False) + + +@typeof_impl.register(sp.sparse.csc_matrix) +def typeof_csc_matrix(val, c): + data = typeof_impl(val.data, c) + return CSCMatrixType(data.dtype) + + +@typeof_impl.register(sp.sparse.csr_matrix) +def typeof_csr_matrix(val, c): + data = typeof_impl(val.data, c) + return CSRMatrixType(data.dtype) + + +@register_model(CSRMatrixType) +class CSRMatrixModel(models.StructModel): + def __init__(self, dmm, fe_type): + members = [ + ("data", fe_type.data), + ("indices", fe_type.indices), + ("indptr", fe_type.indptr), + ("shape", fe_type.shape), + ] + super().__init__(dmm, fe_type, members) + + +@register_model(CSCMatrixType) +class CSCMatrixModel(models.StructModel): + def __init__(self, dmm, fe_type): + members = [ + ("data", fe_type.data), + ("indices", fe_type.indices), + ("indptr", fe_type.indptr), + ("shape", fe_type.shape), + ] + super().__init__(dmm, fe_type, members) + + +@unbox(CSCMatrixType) +@unbox(CSRMatrixType) +def unbox_matrix(typ, obj, c): + + struct_ptr = cgutils.create_struct_proxy(typ)(c.context, c.builder) + + data = c.pyapi.object_getattr_string(obj, "data") + indices = c.pyapi.object_getattr_string(obj, "indices") + indptr = c.pyapi.object_getattr_string(obj, "indptr") + shape = c.pyapi.object_getattr_string(obj, "shape") + + struct_ptr.data = c.unbox(typ.data, data).value + struct_ptr.indices = c.unbox(typ.indices, indices).value + struct_ptr.indptr = c.unbox(typ.indptr, indptr).value + struct_ptr.shape = c.unbox(typ.shape, shape).value + + c.pyapi.decref(data) + c.pyapi.decref(indices) + c.pyapi.decref(indptr) + c.pyapi.decref(shape) + + is_error_ptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) + is_error = c.builder.load(is_error_ptr) + + res = NativeValue(struct_ptr._getvalue(), is_error=is_error) + + return res + + +@box(CSCMatrixType) +@box(CSRMatrixType) +def box_matrix(typ, val, c): + struct_ptr = cgutils.create_struct_proxy(typ)(c.context, c.builder, value=val) + + data_obj = c.box(typ.data, struct_ptr.data) + indices_obj = c.box(typ.indices, struct_ptr.indices) + indptr_obj = c.box(typ.indptr, struct_ptr.indptr) + shape_obj = c.box(typ.shape, struct_ptr.shape) + + c.pyapi.incref(data_obj) + c.pyapi.incref(indices_obj) + c.pyapi.incref(indptr_obj) + c.pyapi.incref(shape_obj) + + cls_obj = c.pyapi.unserialize(c.pyapi.serialize_object(typ.instance_class)) + obj = c.pyapi.call_function_objargs( + cls_obj, (data_obj, indices_obj, indptr_obj, shape_obj) + ) + + c.pyapi.decref(data_obj) + c.pyapi.decref(indices_obj) + c.pyapi.decref(indptr_obj) + c.pyapi.decref(shape_obj) + + return obj diff --git a/aesara/link/numba/dispatch/tensor_basic.py b/aesara/link/numba/dispatch/tensor_basic.py index 942e9cb709..b9b85daa6d 100644 --- a/aesara/link/numba/dispatch/tensor_basic.py +++ b/aesara/link/numba/dispatch/tensor_basic.py @@ -3,7 +3,7 @@ import numpy as np from aesara.link.numba.dispatch import basic as numba_basic -from aesara.link.numba.dispatch.basic import create_tuple_string, numba_funcify +from aesara.link.numba.dispatch.basic import _numba_funcify, create_tuple_string from aesara.link.utils import compile_function_src, unique_name_generator from aesara.tensor.basic import ( Alloc, @@ -21,7 +21,7 @@ from aesara.tensor.shape import Unbroadcast -@numba_funcify.register(AllocEmpty) +@_numba_funcify.register(AllocEmpty) def numba_funcify_AllocEmpty(op, node, **kwargs): global_env = { @@ -59,7 +59,7 @@ def allocempty({", ".join(shape_var_names)}): return numba_basic.numba_njit(alloc_fn) -@numba_funcify.register(Alloc) +@_numba_funcify.register(Alloc) def numba_funcify_Alloc(op, node, **kwargs): global_env = {"np": np, "to_scalar": numba_basic.to_scalar} @@ -95,7 +95,7 @@ def alloc(val, {", ".join(shape_var_names)}): return numba_basic.numba_njit(alloc_fn) -@numba_funcify.register(AllocDiag) +@_numba_funcify.register(AllocDiag) def numba_funcify_AllocDiag(op, **kwargs): offset = op.offset @@ -106,7 +106,7 @@ def allocdiag(v): return allocdiag -@numba_funcify.register(ARange) +@_numba_funcify.register(ARange) def numba_funcify_ARange(op, **kwargs): dtype = np.dtype(op.dtype) @@ -122,7 +122,7 @@ def arange(start, stop, step): return arange -@numba_funcify.register(Join) +@_numba_funcify.register(Join) def numba_funcify_Join(op, **kwargs): view = op.view @@ -139,7 +139,7 @@ def join(axis, *tensors): return join -@numba_funcify.register(Split) +@_numba_funcify.register(Split) def numba_funcify_Split(op, **kwargs): @numba_basic.numba_njit def split(tensor, axis, indices): @@ -151,7 +151,7 @@ def split(tensor, axis, indices): return split -@numba_funcify.register(ExtractDiag) +@_numba_funcify.register(ExtractDiag) def numba_funcify_ExtractDiag(op, **kwargs): offset = op.offset # axis1 = op.axis1 @@ -164,7 +164,7 @@ def extract_diag(x): return extract_diag -@numba_funcify.register(Eye) +@_numba_funcify.register(Eye) def numba_funcify_Eye(op, **kwargs): dtype = np.dtype(op.dtype) @@ -180,7 +180,7 @@ def eye(N, M, k): return eye -@numba_funcify.register(MakeVector) +@_numba_funcify.register(MakeVector) def numba_funcify_MakeVector(op, node, **kwargs): dtype = np.dtype(op.dtype) @@ -208,7 +208,7 @@ def makevector({", ".join(input_names)}): return numba_basic.numba_njit(makevector_fn) -@numba_funcify.register(Unbroadcast) +@_numba_funcify.register(Unbroadcast) def numba_funcify_Unbroadcast(op, **kwargs): @numba_basic.numba_njit def unbroadcast(x): @@ -217,7 +217,7 @@ def unbroadcast(x): return unbroadcast -@numba_funcify.register(TensorFromScalar) +@_numba_funcify.register(TensorFromScalar) def numba_funcify_TensorFromScalar(op, **kwargs): @numba_basic.numba_njit(inline="always") def tensor_from_scalar(x): @@ -226,7 +226,7 @@ def tensor_from_scalar(x): return tensor_from_scalar -@numba_funcify.register(ScalarFromTensor) +@_numba_funcify.register(ScalarFromTensor) def numba_funcify_ScalarFromTensor(op, **kwargs): @numba_basic.numba_njit(inline="always") def scalar_from_tensor(x): diff --git a/aesara/link/numba/linker.py b/aesara/link/numba/linker.py index bb390b0523..28224f7256 100644 --- a/aesara/link/numba/linker.py +++ b/aesara/link/numba/linker.py @@ -27,21 +27,21 @@ def fgraph_convert(self, fgraph, **kwargs): return numba_funcify(fgraph, **kwargs) def jit_compile(self, fn): - import numba + from aesara.link.numba.dispatch import numba_njit - jitted_fn = numba.njit(fn) + jitted_fn = numba_njit(fn) return jitted_fn def create_thunk_inputs(self, storage_map): from numpy.random import RandomState - from aesara.link.numba.dispatch import numba_typify + from aesara.link.numba.dispatch import numba_const_convert thunk_inputs = [] for n in self.fgraph.inputs: sinput = storage_map[n] if isinstance(sinput[0], RandomState): - new_value = numba_typify( + new_value = numba_const_convert( sinput[0], dtype=getattr(sinput[0], "dtype", None) ) # We need to remove the reference-based connection to the diff --git a/aesara/link/utils.py b/aesara/link/utils.py index c7d93fa7f1..5baea2d95c 100644 --- a/aesara/link/utils.py +++ b/aesara/link/utils.py @@ -676,7 +676,7 @@ def fgraph_to_python( fgraph: FunctionGraph, op_conversion_fn: Callable, *, - type_conversion_fn: Callable = lambda x, **kwargs: x, + const_conversion_fn: Callable = lambda x, **kwargs: x, order: Optional[List[Apply]] = None, storage_map: Optional["StorageMapType"] = None, fgraph_name: str = "fgraph_to_python", @@ -696,8 +696,8 @@ def fgraph_to_python( A callable used to convert nodes inside `fgraph` based on their `Op` types. It must have the signature ``(op: Op, node: Apply=None, storage_map: Dict[Variable, List[Optional[Any]]]=None, **kwargs)``. - type_conversion_fn - A callable used to convert the values in `storage_map`. It must have + const_conversion_fn + A callable used to convert the `Constant` values in `storage_map`. It must have the signature ``(value: Optional[Any], variable: Variable=None, storage: List[Optional[Any]]=None, **kwargs)``. order @@ -751,7 +751,7 @@ def fgraph_to_python( ) if input_storage[0] is not None or isinstance(i, Constant): # Constants need to be assigned locally and referenced - global_env[local_input_name] = type_conversion_fn( + global_env[local_input_name] = const_conversion_fn( input_storage[0], variable=i, storage=input_storage, **kwargs ) # TODO: We could attempt to use the storage arrays directly @@ -774,7 +774,7 @@ def fgraph_to_python( output_storage = storage_map.setdefault( out, [None if not isinstance(out, Constant) else out.data] ) - global_env[local_output_name] = type_conversion_fn( + global_env[local_output_name] = const_conversion_fn( output_storage[0], variable=out, storage=output_storage, diff --git a/aesara/link/vm.py b/aesara/link/vm.py index 67c80c810e..e79e9ebbb4 100644 --- a/aesara/link/vm.py +++ b/aesara/link/vm.py @@ -394,9 +394,9 @@ def __call__(self): for thunk, node, old_storage in zip_longest( self.thunks, self.nodes, self.post_thunk_clear, fillvalue=() ): - t0 = time.time() + t0 = time.perf_counter() thunk() - t1 = time.time() + t1 = time.perf_counter() self.call_counts[i] += 1 self.call_times[i] += t1 - t0 for old_s in old_storage: @@ -515,15 +515,15 @@ def run_thunk_of_node(self, node): """ idx = self.node_idx[node] - t0 = time.time() + t0 = time.perf_counter() rval = self.thunks[idx]() self.node_executed_order.append(node) # Some thunks on some computers run faster than the granularity - # of the time.time clock. + # of the time.perf_counter clock. # Profile output looks buggy if a node has run but takes 0 time. # (and profile code might hide real bugs if it rounds up 0) - dt = max(time.time() - t0, 1e-10) + dt = max(time.perf_counter() - t0, 1e-10) if self.callback is not None: self.callback( node=node, @@ -1056,7 +1056,7 @@ def make_vm( callback=self.callback, callback_input=self.callback_input, ) - elif self.use_cloop and CVM: + elif self.use_cloop and CVM is not None: # create a map from nodes to ints and vars to ints nodes_idx = {} @@ -1231,21 +1231,21 @@ def make_all( thunks = [] - t0 = time.time() + t0 = time.perf_counter() linker_make_thunk_time = {} impl = None if self.c_thunks is False: impl = "py" for node in order: try: - thunk_start = time.time() + thunk_start = time.perf_counter() # no-recycling is done at each VM.__call__ So there is # no need to cause duplicate c code by passing # no_recycling here. thunks.append( node.op.make_thunk(node, storage_map, compute_map, [], impl=impl) ) - linker_make_thunk_time[node] = time.time() - thunk_start + linker_make_thunk_time[node] = time.perf_counter() - thunk_start if not hasattr(thunks[-1], "lazy"): # We don't want all ops maker to think about lazy Ops. # So if they didn't specify that its lazy or not, it isn't. @@ -1254,7 +1254,7 @@ def make_all( except Exception: raise_with_op(fgraph, node) - t1 = time.time() + t1 = time.perf_counter() if self.profile: self.profile.linker_node_make_thunks += t1 - t0 diff --git a/aesara/misc/check_blas.py b/aesara/misc/check_blas.py index 19b874cf4a..ccc7d5d3e5 100644 --- a/aesara/misc/check_blas.py +++ b/aesara/misc/check_blas.py @@ -82,12 +82,12 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, iters=10, order= if sync: # Make sure we don't include the time from the first call c.get_value(borrow=True, return_internal_type=True).sync() - t0 = time.time() + t0 = time.perf_counter() for i in range(iters): f() if sync: c.get_value(borrow=True, return_internal_type=True).sync() - t1 = time.time() + t1 = time.perf_counter() return t1 - t0, impl diff --git a/aesara/misc/elemwise_time_test.py b/aesara/misc/elemwise_time_test.py index 60727c563b..e6a6a66a6a 100644 --- a/aesara/misc/elemwise_time_test.py +++ b/aesara/misc/elemwise_time_test.py @@ -34,9 +34,9 @@ def evalTime(f, v, script=False, loops=1000): min = 1e10 for i in range(0, loops): - t0 = time.time() + t0 = time.perf_counter() f(v) - dt = time.time() - t0 + dt = time.perf_counter() - t0 min = dt if dt < min else min if not script: print(f" run time in {int(loops)} loops was {min:2.9f} sec") diff --git a/aesara/misc/latence_gpu_transfert.py b/aesara/misc/latence_gpu_transfert.py index e31674d235..270f17c2c6 100644 --- a/aesara/misc/latence_gpu_transfert.py +++ b/aesara/misc/latence_gpu_transfert.py @@ -13,12 +13,12 @@ print(f2.maker.fgraph.toposort()) for i in (1, 10, 100, 1000, 10000, 100000, 1000000, 10000000): o = np.zeros(i, dtype="float32") - t0 = time.time() + t0 = time.perf_counter() f1(o) - t1 = time.time() + t1 = time.perf_counter() tf1 = t1 - t0 - t0 = time.time() + t0 = time.perf_counter() f2() - t1 = time.time() + t1 = time.perf_counter() print("%8i %6.1f ns %7.1f ns" % (i, tf1 * 1e6, (t1 - t0) * 1e6)) diff --git a/aesara/sandbox/multinomial.py b/aesara/sandbox/multinomial.py index fc72ca8c6d..6e15d6b57c 100644 --- a/aesara/sandbox/multinomial.py +++ b/aesara/sandbox/multinomial.py @@ -44,7 +44,9 @@ def make_node(self, pvals, unis, n=1): odtype = pvals.dtype else: odtype = self.odtype - out = at.tensor(dtype=odtype, shape=pvals.type.broadcastable) + out = at.tensor( + dtype=odtype, shape=tuple(1 if s == 1 else None for s in pvals.type.shape) + ) return Apply(self, [pvals, unis, as_scalar(n)], [out]) def grad(self, ins, outgrads): diff --git a/aesara/sandbox/rng_mrg.py b/aesara/sandbox/rng_mrg.py index a1afeb3d5d..5c1c252ce4 100644 --- a/aesara/sandbox/rng_mrg.py +++ b/aesara/sandbox/rng_mrg.py @@ -379,20 +379,23 @@ def make_node(self, rstate, size): # this op should not be called directly. # # call through MRG_RandomStream instead. - broad = [] + out_shape = () for i in range(self.output_type.ndim): - broad.append(at.extract_constant(size[i]) == 1) - output_type = self.output_type.clone(shape=broad)() + if at.extract_constant(size[i]) == 1: + out_shape += (1,) + else: + out_shape += (None,) + output_var = self.output_type.clone(shape=out_shape)() rstate = as_tensor_variable(rstate) size = as_tensor_variable(size) - return Apply(self, [rstate, size], [rstate.type(), output_type]) + return Apply(self, [rstate, size], [rstate.type(), output_var]) @classmethod def new(cls, rstate, ndim, dtype, size): v_size = as_tensor_variable(size) if ndim is None: ndim = get_vector_length(v_size) - op = cls(TensorType(dtype, (False,) * ndim)) + op = cls(TensorType(dtype, shape=(None,) * ndim)) return op(rstate, v_size) def perform(self, node, inp, out, params): diff --git a/aesara/scalar/basic.py b/aesara/scalar/basic.py index 6764f4a16e..8c89f81ce7 100644 --- a/aesara/scalar/basic.py +++ b/aesara/scalar/basic.py @@ -27,6 +27,7 @@ from aesara.gradient import DisconnectedType, grad_undefined from aesara.graph.basic import Apply, Constant, Variable, clone, list_of_nodes from aesara.graph.fg import FunctionGraph +from aesara.graph.op import HasInnerGraph from aesara.graph.rewriting.basic import MergeOptimizer from aesara.graph.type import HasDataType, HasShape from aesara.graph.utils import MetaObject, MethodNotDefined @@ -3987,7 +3988,7 @@ def c_code(self, *args, **kwargs): complex_from_polar = ComplexFromPolar(name="complex_from_polar") -class Composite(ScalarOp): +class Composite(ScalarOp, HasInnerGraph): """ Composite is an Op that takes a graph of scalar operations and produces c code for the whole graph. Its purpose is to implement loop @@ -3999,174 +4000,6 @@ class Composite(ScalarOp): init_param: Union[Tuple[str, str], Tuple[str]] = ("inputs", "outputs") - def __str__(self): - if self.name is None: - self.init_name() - return self.name - - def make_new_inplace(self, output_types_preference=None, name=None): - """ - This op.__init__ fct don't have the same parameter as other scalar op. - This break the insert_inplace_optimizer optimization. - This fct allow fix patch this. - - """ - d = {k: getattr(self, k) for k in self.init_param} - out = self.__class__(**d) - if name: - out.name = name - else: - name = out.name - super(Composite, out).__init__(output_types_preference, name) - return out - - def init_c_code(self): - """ - Assemble the C code for this Composite Op. - - The result is assigned to `self._c_code`. - """ - from aesara.link.c.interface import CLinkerType - - # It was already called - if hasattr(self, "_c_code"): - return - subd = dict( - chain( - ((e, f"%(i{int(i)})s") for i, e in enumerate(self.fgraph.inputs)), - ((e, f"%(o{int(i)})s") for i, e in enumerate(self.fgraph.outputs)), - ) - ) - - for var in self.fgraph.variables: - if var.owner is None: - if var not in self.fgraph.inputs: - # This is an orphan - if isinstance(var, Constant) and isinstance(var.type, CLinkerType): - subd[var] = var.type.c_literal(var.data) - else: - raise ValueError( - "All orphans in the fgraph to Composite must" - " be Constant, CLinkerType instances." - ) - elif any(i.dtype == "float16" for i in var.owner.inputs) or any( - o.dtype == "float16" for o in var.owner.outputs - ): - # flag for elemwise ops to check. - self.inner_float16 = True - - _c_code = "{\n" - self.nodenames = [ - f"%(nodename)s_subnode{int(j)}" - for j, n in enumerate(self.fgraph.toposort()) - ] - - i = 0 - for j, node in enumerate(self.fgraph.toposort()): - for output in node.outputs: - if output not in subd: - i += 1 - name = f"V%(id)s_tmp{int(i)}" - subd[output] = name - _c_code += f"{output.type.dtype_specs()[1]} {name};\n" - s = node.op.c_code( - node, - self.nodenames[j], - [subd[input] for input in node.inputs], - [subd[output] for output in node.outputs], - dict(fail="%(fail)s", id=f"%(id)s_{int(j)}"), - ) - _c_code += s - _c_code += "\n" - _c_code += "}\n" - self._c_code = _c_code - - def init_py_impls(self): - """ - Return a list of functions that compute each output of self. - - """ - # In the case where the graph is a dag, but not a tree like: - # add(*1 -> mul(x, y), *1) - - # We have an efficient way to build the executable (we build - # and traverse each node only once). - - # But we don't have an efficient execution. We will execute - # like a tree, so nodes that have more then 1 client will be - # executed as many times as there number of clients. In the - # example above, it will calculate *1 twice. Doing otherwise - # imply making a complicated execution engine. - - # We need the fast creation of the executor as we always do it - # even if we will use the c code. The Python implementation is - # already slow, so it is not as much important to have a fast - # execution there. - - memo = {} - - def compose_impl(r): - if r in memo: - return memo[r] - if r in self.fgraph.inputs: - idx = self.fgraph.inputs.index(r) - - def f(inputs): - return inputs[idx] - - memo[r] = f - return f - elif r.owner is None: # in fgraph.orphans: - - def f(inputs): - return r.data - - memo[r] = f - return f - node = r.owner - producers = [compose_impl(input) for input in node.inputs] - - def f(inputs): - return node.op.impl(*[p(inputs) for p in producers]) - - memo[r] = f - return f - - self._impls = [compose_impl(r) for r in self.fgraph.outputs] - - def init_name(self): - """ - Return a readable string representation of self.fgraph. - - """ - rval = self.name - if rval is None: - for i, r in enumerate(self.fgraph.inputs): - r.name = f"i{int(i)}" - for i, r in enumerate(self.fgraph.outputs): - r.name = f"o{int(i)}" - io = set(self.fgraph.inputs + self.fgraph.outputs) - for i, r in enumerate(self.fgraph.variables): - if r not in io and len(self.fgraph.clients[r]) > 1: - r.name = f"t{int(i)}" - outputs_str = ", ".join([pprint(output) for output in self.fgraph.outputs]) - rval = f"Composite{{{outputs_str}}}" - self.name = rval - - def init_fgraph(self): - # The clone done by FunctionGraph is needed as we don't want - # the fgraph to be set to the variable as we need to pickle - # them for the cache of c module to work. - fgraph = FunctionGraph(self.inputs, self.outputs) - MergeOptimizer().rewrite(fgraph) - for node in fgraph.apply_nodes: - if not isinstance(node.op, ScalarOp): - raise ValueError( - "The fgraph to Composite must be exclusively" - " composed of ScalarOp instances." - ) - self.fgraph = fgraph - def __init__(self, inputs, outputs): # We need to clone the graph as sometimes its nodes already # contain a reference to an fgraph. As we want the Composite @@ -4179,6 +4012,7 @@ def __init__(self, inputs, outputs): # only 1 new Composite each time at the output. for i in inputs: assert i not in outputs # This isn't supported, use identity + if len(outputs) > 1 or not any( isinstance(var.owner.op, Composite) for var in outputs ): @@ -4210,15 +4044,112 @@ def __init__(self, inputs, outputs): self.outputs_type = tuple([output.type for output in outputs]) self.nin = len(inputs) self.nout = len(outputs) - self.init_fgraph() # self.fgraph - # Postpone the creation in case it isn't needed. - # self.init_name() # self.name - self.name = None self.prepare_node_called = set() + @property + def fn(self): + return None + + @property + def inner_inputs(self): + return self.fgraph.inputs + + @property + def inner_outputs(self): + return self.fgraph.outputs + + def __str__(self): + return self.name + + def make_new_inplace(self, output_types_preference=None, name=None): + """ + This op.__init__ fct don't have the same parameter as other scalar op. + This break the insert_inplace_optimizer optimization. + This fct allow fix patch this. + + """ + d = {k: getattr(self, k) for k in self.init_param} + out = self.__class__(**d) + if name: + out.name = name + else: + name = out.name + super(Composite, out).__init__(output_types_preference, name) + return out + + @property + def py_perform(self): + if hasattr(self, "_py_perform_fn"): + return self._py_perform_fn + + from aesara.link.utils import fgraph_to_python + + def python_convert(op, node=None, **kwargs): + assert node is not None + + n_outs = len(node.outputs) + + if n_outs > 1: + + def _perform(*inputs, outputs=[[None]] * n_outs): + op.perform(node, inputs, outputs) + return tuple(o[0] for o in outputs) + + else: + + def _perform(*inputs, outputs=[[None]]): + op.perform(node, inputs, outputs) + return outputs[0][0] + + return _perform + + self._py_perform_fn = fgraph_to_python(self.fgraph, python_convert) + return self._py_perform_fn + + @property + def name(self): + if hasattr(self, "_name"): + return self._name + + # TODO FIXME: Just implement pretty printing for the `Op`; don't do + # this redundant, outside work in the `Op` itself. + for i, r in enumerate(self.fgraph.inputs): + r.name = f"i{int(i)}" + for i, r in enumerate(self.fgraph.outputs): + r.name = f"o{int(i)}" + io = set(self.fgraph.inputs + self.fgraph.outputs) + for i, r in enumerate(self.fgraph.variables): + if r not in io and len(self.fgraph.clients[r]) > 1: + r.name = f"t{int(i)}" + outputs_str = ", ".join([pprint(output) for output in self.fgraph.outputs]) + rval = f"Composite{{{outputs_str}}}" + self._name = rval + return self._name + + @name.setter + def name(self, name): + self._name = name + + @property + def fgraph(self): + if hasattr(self, "_fgraph"): + return self._fgraph + + # The clone done by FunctionGraph is needed as we don't want + # the fgraph to be set to the variable as we need to pickle + # them for the cache of c module to work. + fgraph = FunctionGraph(self.inputs, self.outputs) + MergeOptimizer().rewrite(fgraph) + for node in fgraph.apply_nodes: + if not isinstance(node.op, ScalarOp): + raise TypeError( + "The fgraph to Composite must be exclusively" + " composed of ScalarOp instances." + ) + self._fgraph = fgraph + return self._fgraph + def prepare_node(self, node, storage_map, compute_map, impl): - if impl == "py": - self.init_py_impls() # self._impls if impl not in self.prepare_node_called: for n in list_of_nodes(self.inputs, self.outputs): n.op.prepare_node(n, None, None, impl) @@ -4229,7 +4160,13 @@ def clone_float32(self): new_ins, new_outs = composite_f32.apply(self.fgraph) return Composite(new_ins, new_outs) + def clone(self): + new_ins, new_outs = composite_f32.apply(self.fgraph) + return Composite(new_ins, new_outs) + def output_types(self, input_types): + # TODO FIXME: What's the intended purpose/use of this method, and why + # does it even need to be a method? if tuple(input_types) != self.inputs_type: raise TypeError( f"Wrong types for Composite. Expected {self.inputs_type}, got {tuple(input_types)}." @@ -4256,8 +4193,9 @@ def make_node(self, *inputs): return node def perform(self, node, inputs, output_storage): - for storage, impl in zip(output_storage, self._impls): - storage[0] = impl(inputs) + outputs = self.py_perform(*inputs) + for storage, out_val in zip(output_storage, outputs): + storage[0] = out_val def impl(self, *inputs): output_storage = [[None] for i in range(self.nout)] @@ -4270,8 +4208,110 @@ def impl(self, *inputs): def grad(self, inputs, output_grads): raise NotImplementedError("grad is not implemented for Composite") + def __eq__(self, other): + if self is other: + return True + if ( + type(self) != type(other) + or self.nin != other.nin + or self.nout != other.nout + ): + return False + + # TODO FIXME: Why this? Shouldn't we expect equivalent inputs to this + # object to generate the same `_c_code`? + return self.c_code_template == other.c_code_template + + def __hash__(self): + # Note that in general, the configparser settings at the time + # of code generation (__init__) affect the semantics of this Op. + # This function assumes that all relevant info about the configparser + # is embodied in _c_code. So the _c_code, rather than self.fgraph, + # is the signature of the semantics of this Op. + # _c_code is preserved through unpickling, so the Op will not change + # semantics when it is reloaded with different configparser + # settings. + # + # TODO FIXME: Doesn't the above just mean that we should be including + # the relevant "configparser settings" here? Also, why should we even + # care about the exact form of the generated C code when comparing + # `Op`s? All this smells of leaky concerns and interfaces. + return hash((type(self), self.nin, self.nout, self.c_code_template)) + + def __getstate__(self): + rval = dict(self.__dict__) + rval.pop("_c_code", None) + rval.pop("_py_perform_fn", None) + rval.pop("_fgraph", None) + rval.pop("prepare_node_called", None) + return rval + + def __setstate__(self, d): + self.__dict__.update(d) + self.prepare_node_called = set() + + @property + def c_code_template(self): + from aesara.link.c.interface import CLinkerType + + if hasattr(self, "_c_code"): + return self._c_code + + subd = dict( + chain( + ((e, f"%(i{int(i)})s") for i, e in enumerate(self.fgraph.inputs)), + ((e, f"%(o{int(i)})s") for i, e in enumerate(self.fgraph.outputs)), + ) + ) + + for var in self.fgraph.variables: + if var.owner is None: + if var not in self.fgraph.inputs: + # This is an orphan + if isinstance(var, Constant) and isinstance(var.type, CLinkerType): + subd[var] = var.type.c_literal(var.data) + else: + raise ValueError( + "All orphans in the fgraph to Composite must" + " be Constant, CLinkerType instances." + ) + elif any(i.dtype == "float16" for i in var.owner.inputs) or any( + o.dtype == "float16" for o in var.owner.outputs + ): + # flag for elemwise ops to check. + self.inner_float16 = True + + _c_code = "{\n" + self.nodenames = [ + f"%(nodename)s_subnode{int(j)}" + for j, n in enumerate(self.fgraph.toposort()) + ] + + i = 0 + for j, node in enumerate(self.fgraph.toposort()): + for output in node.outputs: + if output not in subd: + i += 1 + name = f"V%(id)s_tmp{int(i)}" + subd[output] = name + _c_code += f"{output.type.dtype_specs()[1]} {name};\n" + s = node.op.c_code( + node, + self.nodenames[j], + [subd[input] for input in node.inputs], + [subd[output] for output in node.outputs], + dict(fail="%(fail)s", id=f"%(id)s_{int(j)}"), + ) + _c_code += s + _c_code += "\n" + + _c_code += "}\n" + + self._c_code = _c_code + + return self._c_code + def c_code(self, node, nodename, inames, onames, sub): - self.init_c_code() d = dict( chain( @@ -4286,7 +4326,7 @@ def c_code(self, node, nodename, inames, onames, sub): # It won't generate conflicting variable name. d["id"] = "_DUMMY_ID_" - return self._c_code % d + return self.c_code_template % d def c_code_cache_version(self): rval = [3] @@ -4314,7 +4354,6 @@ def c_support_code(self, **kwargs): return "\n".join(sorted(rval)) def c_support_code_apply(self, node, name): - self.init_c_code() rval = [] for subnode, subnodename in zip(self.fgraph.toposort(), self.nodenames): subnode_support_code = subnode.op.c_support_code_apply( @@ -4328,49 +4367,6 @@ def c_support_code_apply(self, node, name): # c_support_code instead of c_support_code_apply. return "\n".join(rval) - def __eq__(self, other): - if self is other: - return True - if ( - type(self) != type(other) - or self.nin != other.nin - or self.nout != other.nout - ): - return False - # see __hash__ for comment on why there is no mention of fgraph - # or module cache key here. - self.init_c_code() # self._c_code and self.nodenames - other.init_c_code() - return self._c_code == other._c_code - - def __hash__(self): - self.init_c_code() # self._c_code and self.nodenames - rval = hash((type(self), self.nin, self.nout, self._c_code)) - # Note that in general, the configparser settings at the time - # of code generation (__init__) affect the semantics of this Op. - # This function assumes that all relevant info about the configparser - # is embodied in _c_code. So the _c_code, rather than self.fgraph, - # is the signature of the semantics of this Op. - # _c_code is preserved through unpickling, so the Op will not change - # semantics when it is reloaded with different configparser - # settings. - return rval - - def __getstate__(self): - rval = dict(self.__dict__) - rval.pop("_impls", None) - rval.pop("prepare_node_called", None) - del rval["fgraph"] - return rval - - def __setstate__(self, d): - self.__dict__.update(d) - # We must call init to set fgraph and _impls again, as otherwise - # self.perform will not work. - self.prepare_node_called = set() - self.init_fgraph() - self.init_py_impls() - class Compositef32: # This is a dict of scalar op classes that need special handling diff --git a/aesara/scalar/math.py b/aesara/scalar/math.py index 43ee662ebf..6dd6d0004e 100644 --- a/aesara/scalar/math.py +++ b/aesara/scalar/math.py @@ -6,6 +6,7 @@ import os import warnings +from textwrap import dedent import numpy as np import scipy.special @@ -1134,7 +1135,8 @@ class Softplus(UnaryScalarOp): r""" Compute log(1 + exp(x)), also known as softplus or log1pexp - This function is numerically more stable than the naive approach. + This function is numerically faster than the naive approach, and does not overflow + for large values of x. For details, see https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf @@ -1172,44 +1174,30 @@ def grad(self, inp, grads): def c_code(self, node, name, inp, out, sub): (x,) = inp (z,) = out - # The boundary constants were obtained by looking at the output of - # python commands like: - # import numpy, aesara - # dt='float32' # or float64 - # for i in range(750): - # print i, repr(numpy.log1p(numpy.exp(_asarray([i,-i], dtype=dt)))) - # the upper boundary check prevents us from generating inf, whereas the - # the lower boundary check prevents using exp when the result will be 0 anyway. - # The intermediate constants are taken from Machler (2012). - - # We use the float32 limits for float16 for now as the - # computation will happen in float32 anyway. + # We use the same limits for all precisions, which may be suboptimal. The reference + # paper only looked at double precision if node.inputs[0].type in float_types: if node.inputs[0].type == float64: - return ( - """ - %(z)s = ( - %(x)s < -745.0 ? 0.0 : - %(x)s < -37.0 ? exp(%(x)s) : - %(x)s < 18.0 ? log1p(exp(%(x)s)) : - %(x)s < 33.3 ? %(x)s + exp(-%(x)s) : - %(x)s + return dedent( + f""" + {z} = ( + {x} < -37.0 ? exp({x}) : + {x} < 18.0 ? log1p(exp({x})) : + {x} < 33.3 ? {x} + exp(-{x}) : + {x} ); """ - % locals() ) else: - return ( - """ - %(z)s = ( - %(x)s < -103.0f ? 0.0 : - %(x)s < -37.0f ? exp(%(x)s) : - %(x)s < 18.0f ? log1p(exp(%(x)s)) : - %(x)s < 33.3f ? %(x)s + exp(-%(x)s) : - %(x)s + return dedent( + f""" + {z} = ( + {x} < -37.0f ? exp({x}) : + {x} < 18.0f ? log1p(exp({x})) : + {x} < 33.3f ? {x} + exp(-{x}) : + {x} ); """ - % locals() ) else: raise NotImplementedError("only floatingpoint is implemented") @@ -1217,7 +1205,7 @@ def c_code(self, node, name, inp, out, sub): def c_code_cache_version(self): v = super().c_code_cache_version() if v: - return (2,) + v + return (3,) + v else: return v diff --git a/aesara/scan/basic.py b/aesara/scan/basic.py index 81c42cdc1f..e3f38acc17 100644 --- a/aesara/scan/basic.py +++ b/aesara/scan/basic.py @@ -996,8 +996,8 @@ def wrap_into_list(x): # We also don't want to remove a default update that applies to # the scope/context containing this `Scan`, so we only remove # default updates on "local" variables. - if is_local and hasattr(input.variable, "default_update"): - del input.variable.default_update + if is_local and input.variable.default_update is not None: + input.variable.default_update = None new_var = safe_new(input.variable) diff --git a/aesara/scan/op.py b/aesara/scan/op.py index 481eaf971d..4914e041fe 100644 --- a/aesara/scan/op.py +++ b/aesara/scan/op.py @@ -54,9 +54,9 @@ import numpy as np import aesara +import aesara.link.utils as link_utils from aesara import tensor as at -from aesara.compile import SharedVariable -from aesara.compile.builders import infer_shape +from aesara.compile.builders import construct_nominal_fgraph, infer_shape from aesara.compile.function.pfunc import pfunc from aesara.compile.io import In, Out from aesara.compile.mode import Mode, get_default_mode, get_mode @@ -65,22 +65,17 @@ from aesara.gradient import DisconnectedType, NullType, Rop, grad, grad_undefined from aesara.graph.basic import ( Apply, - Constant, - NominalVariable, Variable, clone_replace, equal_computations, graph_inputs, io_connection_pattern, - replace_nominals_with_dummies, ) from aesara.graph.features import NoOutputFromInplace -from aesara.graph.fg import FunctionGraph from aesara.graph.op import HasInnerGraph, Op from aesara.graph.utils import InconsistencyError, MissingInputError from aesara.link.c.basic import CLinker from aesara.link.c.exceptions import MissingGXX -from aesara.link.utils import raise_with_op from aesara.printing import op_debug_information from aesara.scan.utils import ScanProfileStats, Validator, forced_replace, safe_new from aesara.tensor.basic import as_tensor_variable @@ -755,22 +750,12 @@ def __init__( If ``True``, all the shared variables used in the inner-graph must be provided. """ - inputs, outputs = replace_nominals_with_dummies(inputs, outputs) + self.fgraph, shared_inputs, _, _ = construct_nominal_fgraph(inputs, outputs) - input_replacements = [] - for n, v in enumerate(inputs): - if not isinstance(v, (SharedVariable, Constant)): - input_replacements.append((v, NominalVariable(n, v.type))) - - assert not isinstance(v, NominalVariable) - - outputs = clone_replace(outputs, replace=input_replacements) - - if input_replacements: - _, inputs_ = zip(*input_replacements) - inputs = list(inputs_) - else: - inputs = [] + # The shared variables should have been removed, so, if there are + # any, it's because the user didn't specify an input. + if shared_inputs: + raise MissingInputError(f"Scan is missing inputs: {shared_inputs}") self.info = info self.truncate_gradient = truncate_gradient @@ -782,7 +767,7 @@ def __init__( # Clone mode_instance, altering "allow_gc" for the linker, # and adding a message if we profile if self.name: - message = self.name + " sub profile" + message = f"{self.name} sub profile" else: message = "Scan sub profile" @@ -805,7 +790,7 @@ def tensorConstructor(shape, dtype): while idx < info.n_mit_mot_outs: # Not that for mit_mot there are several output slices per # output sequence - o = outputs[idx] + o = self.fgraph.outputs[idx] self.output_types.append( # TODO: What can we actually say about the shape of this # added dimension? @@ -818,7 +803,7 @@ def tensorConstructor(shape, dtype): # mit_sot / sit_sot / nit_sot end = idx + info.n_mit_sot + info.n_sit_sot + info.n_nit_sot - for o in outputs[idx:end]: + for o in self.fgraph.outputs[idx:end]: self.output_types.append( # TODO: What can we actually say about the shape of this # added dimension? @@ -826,7 +811,7 @@ def tensorConstructor(shape, dtype): ) # shared outputs + possibly the ending condition - for o in outputs[end:]: + for o in self.fgraph.outputs[end:]: self.output_types.append(o.type) if info.as_while: @@ -862,8 +847,6 @@ def tensorConstructor(shape, dtype): self.n_outer_inputs = info.n_outer_inputs self.n_outer_outputs = info.n_outer_outputs - self.fgraph = FunctionGraph(inputs, outputs, clone=False) - _ = self.prepare_fgraph(self.fgraph) if any(node.op.destroy_map for node in self.fgraph.apply_nodes): @@ -871,10 +854,6 @@ def tensorConstructor(shape, dtype): "Inner-graphs must not contain in-place operations." ) - # Do the missing inputs check here to have the error early. - for var in graph_inputs(self.inner_outputs, self.inner_inputs): - if var not in self.inner_inputs and not isinstance(var, Constant): - raise MissingInputError(f"ScanOp is missing an input: {repr(var)}") self._cmodule_key = CLinker().cmodule_key_variables( self.inner_inputs, self.inner_outputs, [] ) @@ -1650,7 +1629,7 @@ def p(node, inputs, outputs): if hasattr(self.fn.vm, "position_of_error") and hasattr( self.fn.vm, "thunks" ): - raise_with_op( + link_utils.raise_with_op( self.fn.maker.fgraph, self.fn.vm.nodes[self.fn.vm.position_of_error], self.fn.vm.thunks[self.fn.vm.position_of_error], @@ -1721,7 +1700,7 @@ def perform(self, node, inputs, output_storage, params=None): """ info = self.info # 1. Unzip the number of steps and sequences. - t0_call = time.time() + t0_call = time.perf_counter() t_fn = 0 n_steps = inputs[0] seqs = [] @@ -1942,7 +1921,7 @@ def perform(self, node, inputs, output_storage, params=None): old_mitmot_input_data[idx] = var.data # 5.1 compute outputs - t0_fn = time.time() + t0_fn = time.perf_counter() try: vm() @@ -1953,7 +1932,7 @@ def perform(self, node, inputs, output_storage, params=None): # done by raise_with_op is not implemented in C. if hasattr(vm, "thunks"): # For the CVM - raise_with_op( + link_utils.raise_with_op( self.fn.maker.fgraph, vm.nodes[vm.position_of_error], vm.thunks[vm.position_of_error], @@ -1963,14 +1942,14 @@ def perform(self, node, inputs, output_storage, params=None): # We don't have access from python to all the # temps values So for now, we just don't print # the extra shapes/strides info - raise_with_op( + link_utils.raise_with_op( self.fn.maker.fgraph, vm.nodes[vm.position_of_error] ) else: # old-style linkers raise their own exceptions raise - dt_fn = time.time() - t0_fn + dt_fn = time.perf_counter() - t0_fn if info.as_while: pdx = offset + info.n_shared_outs cond = inner_output_storage[pdx].storage[0] == 0 @@ -2196,7 +2175,7 @@ def perform(self, node, inputs, output_storage, params=None): for o_s in inner_output_storage: o_s.storage[0] = None - t_call = time.time() - t0_call + t_call = time.perf_counter() - t0_call # NOTE: make this match what's in function.types.Function # and this little string helps us to find this spot: # "PROFILE_CODE" @@ -3448,7 +3427,7 @@ def profile_printer( ) -@op_debug_information.register(Scan) # type: ignore[has-type] +@op_debug_information.register(Scan) def _op_debug_information_Scan(op, node): from typing import Sequence diff --git a/aesara/sparse/basic.py b/aesara/sparse/basic.py index 6f5bb22b0a..46ac71d8ce 100644 --- a/aesara/sparse/basic.py +++ b/aesara/sparse/basic.py @@ -14,6 +14,7 @@ from numpy.lib.stride_tricks import as_strided import aesara +from aesara import _as_symbolic, as_symbolic from aesara import scalar as aes from aesara.configdefaults import config from aesara.gradient import DisconnectedType, grad_not_implemented, grad_undefined @@ -128,6 +129,11 @@ def _is_dense(x): return isinstance(x, np.ndarray) +@_as_symbolic.register(scipy.sparse.base.spmatrix) +def as_symbolic_sparse(x, **kwargs): + return as_sparse_variable(x, **kwargs) + + def as_sparse_variable(x, name=None, ndim=None, **kwargs): """ Wrapper around SparseVariable constructor to construct @@ -174,26 +180,7 @@ def as_sparse_variable(x, name=None, ndim=None, **kwargs): as_sparse = as_sparse_variable -def as_sparse_or_tensor_variable(x, name=None): - """ - Same as `as_sparse_variable` but if we can't make a - sparse variable, we try to make a tensor variable. - - Parameters - ---------- - x - A sparse matrix. - - Returns - ------- - SparseVariable or TensorVariable version of `x` - - """ - - try: - return as_sparse_variable(x, name) - except (ValueError, TypeError): - return at.as_tensor_variable(x, name) +as_sparse_or_tensor_variable = as_symbolic def constant(x, name=None): @@ -592,7 +579,7 @@ def make_node(self, csm): csm = as_sparse_variable(csm) assert csm.format in ("csr", "csc") - data = TensorType(dtype=csm.type.dtype, shape=(False,))() + data = TensorType(dtype=csm.type.dtype, shape=(None,))() return Apply(self, [csm], [data, ivector(), ivector(), ivector()]) def perform(self, node, inputs, out): @@ -994,7 +981,7 @@ def make_node(self, x): return Apply( self, [x], - [TensorType(dtype=x.type.dtype, shape=(False, False))()], + [TensorType(dtype=x.type.dtype, shape=(None, None))()], ) def perform(self, node, inputs, outputs): @@ -1753,11 +1740,13 @@ def __init__(self, axis=None, sparse_grad=True): def make_node(self, x): x = as_sparse_variable(x) assert x.format in ("csr", "csc") - b = () + if self.axis is not None: - b = (False,) + out_shape = (None,) + else: + out_shape = () - z = TensorType(shape=b, dtype=x.dtype)() + z = TensorType(dtype=x.dtype, shape=out_shape)() return Apply(self, [x], [z]) def perform(self, node, inputs, outputs): @@ -1872,7 +1861,7 @@ def make_node(self, x): """ x = as_sparse_variable(x) assert x.format in ("csr", "csc") - return Apply(self, [x], [tensor(shape=(False,), dtype=x.dtype)]) + return Apply(self, [x], [tensor(dtype=x.dtype, shape=(None,))]) def perform(self, node, inputs, outputs): (x,) = inputs @@ -2138,7 +2127,7 @@ def make_node(self, x, y): return Apply( self, [x, y], - [TensorType(dtype=out_dtype, shape=y.type.broadcastable)()], + [TensorType(dtype=out_dtype, shape=y.type.shape)()], ) def perform(self, node, inputs, outputs): @@ -2621,7 +2610,7 @@ def make_node(self, x, y): x, y = as_sparse_variable(x), at.as_tensor_variable(y) assert y.type.ndim == 2 - out = TensorType(dtype="uint8", shape=(False, False))() + out = TensorType(dtype="uint8", shape=(None, None))() return Apply(self, [x, y], [out]) def perform(self, node, inputs, outputs): @@ -3462,7 +3451,7 @@ def make_node(self, a, b): return Apply( self, [a, b], - [tensor(dtype_out, (False, b.type.broadcastable[1]))], + [tensor(dtype_out, shape=(None, 1 if b.type.shape[1] == 1 else None))], ) def perform(self, node, inputs, outputs): @@ -3593,7 +3582,7 @@ class StructuredDotGradCSC(COp): def make_node(self, a_indices, a_indptr, b, g_ab): return Apply( - self, [a_indices, a_indptr, b, g_ab], [tensor(g_ab.dtype, (False,))] + self, [a_indices, a_indptr, b, g_ab], [tensor(g_ab.dtype, shape=(None,))] ) def perform(self, node, inputs, outputs): @@ -3726,7 +3715,9 @@ class StructuredDotGradCSR(COp): __props__ = () def make_node(self, a_indices, a_indptr, b, g_ab): - return Apply(self, [a_indices, a_indptr, b, g_ab], [tensor(b.dtype, (False,))]) + return Apply( + self, [a_indices, a_indptr, b, g_ab], [tensor(b.dtype, shape=(None,))] + ) def perform(self, node, inputs, outputs): (a_indices, a_indptr, b, g_ab) = inputs @@ -3967,6 +3958,7 @@ def make_node(self, x, y): x = as_sparse_variable(x) if isinstance(y, scipy.sparse.spmatrix): y = as_sparse_variable(y) + x_is_sparse_var = _is_sparse_variable(x) y_is_sparse_var = _is_sparse_variable(y) @@ -3978,34 +3970,35 @@ def make_node(self, x, y): ) if x_is_sparse_var: - broadcast_x = (False,) * x.ndim + shape_x = (None,) * x.type.ndim else: x = at.as_tensor_variable(x) - broadcast_x = x.type.broadcastable + shape_x = x.type.shape assert y.format in ("csr", "csc") if x.ndim not in (1, 2): raise TypeError( "Input 0 (0-indexed) must have ndim of " - f"1 or 2, {int(x.ndim)} given." + f"1 or 2, {int(x.type.ndim)} given." ) if y_is_sparse_var: - broadcast_y = (False,) * y.ndim + shape_y = (None,) * y.type.ndim else: y = at.as_tensor_variable(y) - broadcast_y = y.type.broadcastable + shape_y = y.type.shape assert x.format in ("csr", "csc") if y.ndim not in (1, 2): raise TypeError( "Input 1 (1-indexed) must have ndim of " - f"1 or 2, {int(y.ndim)} given." + f"1 or 2, {int(y.type.ndim)} given." ) - if len(broadcast_y) == 2: - broadcast_out = broadcast_x[:-1] + broadcast_y[1:] - elif len(broadcast_y) == 1: - broadcast_out = broadcast_x[:-1] - return Apply(self, [x, y], [tensor(dtype=dtype_out, shape=broadcast_out)]) + if len(shape_y) == 2: + shape_out = shape_x[:-1] + shape_y[1:] + elif len(shape_y) == 1: + shape_out = shape_x[:-1] + + return Apply(self, [x, y], [tensor(dtype=dtype_out, shape=shape_out)]) def perform(self, node, inputs, out): x, y = inputs @@ -4126,21 +4119,21 @@ def make_node(self, alpha, x, y, z): alpha = at.as_tensor_variable(alpha) z = at.as_tensor_variable(z) - assert z.ndim == 2 - assert alpha.type.broadcastable == (True,) * alpha.ndim + assert z.type.ndim == 2 + assert alpha.type.shape == (1,) * alpha.type.ndim if not _is_sparse_variable(x): x = at.as_tensor_variable(x) assert y.format in ("csr", "csc") - assert x.ndim == 2 + assert x.type.ndim == 2 if not _is_sparse_variable(y): y = at.as_tensor_variable(y) assert x.format in ("csr", "csc") - assert y.ndim == 2 + assert y.type.ndim == 2 return Apply( self, [alpha, x, y, z], - [tensor(dtype=dtype_out, shape=(False, False))], + [tensor(dtype=dtype_out, shape=(None, None))], ) def perform(self, node, inputs, outputs): diff --git a/aesara/sparse/rewriting.py b/aesara/sparse/rewriting.py index fde57a30ac..69865c7fe8 100644 --- a/aesara/sparse/rewriting.py +++ b/aesara/sparse/rewriting.py @@ -126,7 +126,9 @@ def make_node(self, x, y): # The magic number two here arises because L{scipy.sparse} # objects must be matrices (have dimension 2) assert y.type.ndim == 2 - out = TensorType(dtype=out_dtype, shape=y.type.broadcastable)() + out = TensorType( + dtype=out_dtype, shape=tuple(1 if s == 1 else None for s in y.type.shape) + )() return Apply(self, [data, indices, indptr, y], [out]) def c_code(self, node, name, inputs, outputs, sub): @@ -268,7 +270,7 @@ def make_node(self, a_val, a_ind, a_ptr, a_nrows, b): r = Apply( self, [a_val, a_ind, a_ptr, a_nrows, b], - [tensor(dtype_out, (False, b.type.broadcastable[1]))], + [tensor(dtype_out, shape=(None, 1 if b.type.shape[1] == 1 else None))], ) return r @@ -463,7 +465,7 @@ def make_node(self, a_val, a_ind, a_ptr, b): r = Apply( self, [a_val, a_ind, a_ptr, b], - [tensor(self.dtype_out, (False, b.type.broadcastable[1]))], + [tensor(self.dtype_out, shape=(None, 1 if b.type.shape[1] == 1 else None))], ) return r @@ -675,7 +677,7 @@ def make_node(self, alpha, x_val, x_ind, x_ptr, x_nrows, y, z): assert x_ind.dtype == "int32" assert x_ptr.dtype == "int32" assert x_nrows.dtype == "int32" - assert alpha.ndim == 2 and alpha.type.broadcastable == (True, True) + assert alpha.ndim == 2 and alpha.type.shape == (1, 1) assert x_val.ndim == 1 assert y.ndim == 2 assert z.ndim == 2 @@ -703,7 +705,7 @@ def make_node(self, alpha, x_val, x_ind, x_ptr, x_nrows, y, z): r = Apply( self, [alpha, x_val, x_ind, x_ptr, x_nrows, y, z], - [tensor(dtype_out, (False, y.type.broadcastable[1]))], + [tensor(dtype_out, shape=(None, 1 if y.type.shape[1] == 1 else None))], ) return r @@ -903,7 +905,7 @@ def c_code_cache_version(self): { "pattern": "alpha", "constraint": lambda expr: ( - all(expr.type.broadcastable) and config.blas__ldflags + all(s == 1 for s in expr.type.shape) and config.blas__ldflags ), }, (sparse._dot, "x", "y"), @@ -1140,7 +1142,7 @@ def make_node(self, a_data, a_indices, a_indptr, b): """ assert b.type.ndim == 2 return Apply( - self, [a_data, a_indices, a_indptr, b], [tensor(b.dtype, (False,))] + self, [a_data, a_indices, a_indptr, b], [tensor(b.dtype, shape=(None,))] ) def c_code_cache_version(self): @@ -1278,7 +1280,7 @@ def make_node(self, a_data, a_indices, a_indptr, b): """ assert b.type.ndim == 2 return Apply( - self, [a_data, a_indices, a_indptr, b], [tensor(b.dtype, (False,))] + self, [a_data, a_indices, a_indptr, b], [tensor(b.dtype, shape=(None,))] ) def c_code_cache_version(self): @@ -1468,7 +1470,7 @@ def make_node(self, a_data, a_indices, a_indptr, b): """ assert b.type.ndim == 1 return Apply( - self, [a_data, a_indices, a_indptr, b], [tensor(b.dtype, (False,))] + self, [a_data, a_indices, a_indptr, b], [tensor(b.dtype, shape=(None,))] ) def c_code_cache_version(self): @@ -1640,7 +1642,7 @@ def make_node(self, a_data, a_indices, a_indptr, b): assert a_indptr.type.ndim == 1 assert b.type.ndim == 1 return Apply( - self, [a_data, a_indices, a_indptr, b], [tensor(b.dtype, (False,))] + self, [a_data, a_indices, a_indptr, b], [tensor(b.dtype, shape=(None,))] ) def c_code_cache_version(self): @@ -1851,9 +1853,9 @@ def make_node(self, x, y, p_data, p_ind, p_ptr, p_ncols): self, [x, y, p_data, p_ind, p_ptr, p_ncols], [ - tensor(dtype=dtype_out, shape=(False,)), - tensor(dtype=p_ind.type.dtype, shape=(False,)), - tensor(dtype=p_ptr.type.dtype, shape=(False,)), + tensor(dtype=dtype_out, shape=(None,)), + tensor(dtype=p_ind.type.dtype, shape=(None,)), + tensor(dtype=p_ptr.type.dtype, shape=(None,)), ], ) diff --git a/aesara/sparse/sandbox/sp.py b/aesara/sparse/sandbox/sp.py index 1f95d01758..6015006848 100644 --- a/aesara/sparse/sandbox/sp.py +++ b/aesara/sparse/sandbox/sp.py @@ -181,7 +181,7 @@ def evaluate(inshp, kshp, strides=(1, 1), nkern=1, mode="valid", ws=True): # taking into account multiple # input features - col = ( + col = int( iy * inshp[2] + ix + fmapi * np.prod(inshp[1:]) ) @@ -196,13 +196,13 @@ def evaluate(inshp, kshp, strides=(1, 1), nkern=1, mode="valid", ws=True): # convert to row index of sparse matrix if ws: - row = ( + row = int( (y * outshp[1] + x) * inshp[0] * ksize + l + fmapi * ksize ) else: - row = y * outshp[1] + x + row = int(y * outshp[1] + x) # Store something at that location # in sparse matrix. The written diff --git a/aesara/sparse/sharedvar.py b/aesara/sparse/sharedvar.py index 47fc365b86..f5ed652cc2 100644 --- a/aesara/sparse/sharedvar.py +++ b/aesara/sparse/sharedvar.py @@ -2,30 +2,29 @@ import scipy.sparse -from aesara.compile import SharedVariable, shared_constructor +from aesara.compile import shared_constructor from aesara.sparse.basic import SparseTensorType, _sparse_py_operators +from aesara.tensor.sharedvar import TensorSharedVariable -class SparseTensorSharedVariable(_sparse_py_operators, SharedVariable): - dtype = property(lambda self: self.type.dtype) - format = property(lambda self: self.type.format) +class SparseTensorSharedVariable(TensorSharedVariable, _sparse_py_operators): + @property + def format(self): + return self.type.format -@shared_constructor +@shared_constructor.register(scipy.sparse.spmatrix) def sparse_constructor( value, name=None, strict=False, allow_downcast=None, borrow=False, format=None ): - if not isinstance(value, scipy.sparse.spmatrix): - raise TypeError( - "Expected a sparse matrix in the sparse shared variable constructor. Received: ", - value.__class__, - ) - if format is None: format = value.format + type = SparseTensorType(format=format, dtype=value.dtype) + if not borrow: value = copy.deepcopy(value) + return SparseTensorSharedVariable( - type=type, value=value, name=name, strict=strict, allow_downcast=allow_downcast + type=type, value=value, strict=strict, allow_downcast=allow_downcast, name=name ) diff --git a/aesara/tensor/__init__.py b/aesara/tensor/__init__.py index 1f63c30644..d726da3241 100644 --- a/aesara/tensor/__init__.py +++ b/aesara/tensor/__init__.py @@ -80,8 +80,9 @@ def get_vector_length(v: TensorLike) -> int: if v.type.ndim != 1: raise TypeError(f"Argument must be a vector; got {v.type}") - if v.type.broadcastable[0]: - return 1 + static_shape: Optional[int] = v.type.shape[0] + if static_shape is not None: + return static_shape return _get_vector_length(getattr(v.owner, "op", v), v) diff --git a/aesara/tensor/basic.py b/aesara/tensor/basic.py index f629bfb095..4762d903d2 100644 --- a/aesara/tensor/basic.py +++ b/aesara/tensor/basic.py @@ -28,7 +28,7 @@ from aesara.graph.fg import FunctionGraph from aesara.graph.op import Op from aesara.graph.rewriting.utils import rewrite_graph -from aesara.graph.type import Type +from aesara.graph.type import HasShape, Type from aesara.link.c.op import COp from aesara.link.c.params_type import ParamsType from aesara.misc.safe_asarray import _asarray @@ -110,10 +110,12 @@ def _as_tensor_Variable(x, name, ndim, **kwargs): if x.type.ndim > ndim: # Strip off leading broadcastable dimensions - non_broadcastables = [idx for idx in range(x.ndim) if not x.broadcastable[idx]] + non_broadcastables = tuple( + idx for idx in range(x.type.ndim) if x.type.shape[idx] != 1 + ) if non_broadcastables: - x = x.dimshuffle(list(range(x.ndim))[non_broadcastables[0] :]) + x = x.dimshuffle(list(range(x.type.ndim))[non_broadcastables[0] :]) else: x = x.dimshuffle() @@ -346,8 +348,8 @@ def get_scalar_constant_value( if isinstance(inp, Constant): return np.asarray(np.shape(inp.data)[i]) # The shape of a broadcastable dimension is 1 - if hasattr(inp.type, "broadcastable") and inp.type.broadcastable[i]: - return np.asarray(1) + if isinstance(inp.type, HasShape) and inp.type.shape[i] is not None: + return np.asarray(inp.type.shape[i]) # Don't act as the constant_folding optimization here as this # fct is used too early in the optimization phase. This would @@ -500,21 +502,16 @@ def get_scalar_constant_value( owner.inputs[1], max_recur=max_recur ) grandparent = leftmost_parent.owner.inputs[0] - gp_broadcastable = grandparent.type.broadcastable + gp_shape = grandparent.type.shape ndim = grandparent.type.ndim if grandparent.owner and isinstance( grandparent.owner.op, Unbroadcast ): - ggp_broadcastable = grandparent.owner.inputs[0].broadcastable - l = [ - b1 or b2 - for b1, b2 in zip(ggp_broadcastable, gp_broadcastable) - ] - gp_broadcastable = tuple(l) - - assert ndim == len(gp_broadcastable) + ggp_shape = grandparent.owner.inputs[0].type.shape + l = [s1 == 1 or s2 == 1 for s1, s2 in zip(ggp_shape, gp_shape)] + gp_shape = tuple(l) - if not (idx < len(gp_broadcastable)): + if not (idx < ndim): msg = ( "get_scalar_constant_value detected " f"deterministic IndexError: x.shape[{int(idx)}] " @@ -526,8 +523,9 @@ def get_scalar_constant_value( msg += f" x={v}" raise ValueError(msg) - if gp_broadcastable[idx]: - return np.asarray(1) + gp_shape_val = gp_shape[idx] + if gp_shape_val is not None and gp_shape_val > -1: + return np.asarray(gp_shape_val) if isinstance(grandparent, Constant): return np.asarray(np.shape(grandparent.data)[idx]) @@ -862,7 +860,7 @@ def make_node(self, a): a = as_tensor_variable(a) if a.ndim == 0: raise ValueError("Nonzero only supports non-scalar arrays.") - output = [TensorType(dtype="int64", shape=(False,))() for i in range(a.ndim)] + output = [TensorType(dtype="int64", shape=(None,))() for i in range(a.ndim)] return Apply(self, [a], output) def perform(self, node, inp, out_): @@ -993,7 +991,7 @@ def make_node(self, N, M, k): return Apply( self, [N, M, k], - [TensorType(dtype=self.dtype, shape=(False, False))()], + [TensorType(dtype=self.dtype, shape=(None, None))()], ) def perform(self, node, inp, out_): @@ -1272,7 +1270,7 @@ def make_node(self, n, m, k): return Apply( self, [n, m, k], - [TensorType(dtype=self.dtype, shape=(False, False))()], + [TensorType(dtype=self.dtype, shape=(None, None))()], ) def perform(self, node, inp, out_): @@ -1509,15 +1507,16 @@ def grad(self, inputs, grads): axis_kept = [] for i, (ib, gb) in enumerate( zip( - inputs[0].broadcastable, + inputs[0].type.shape, # We need the dimensions corresponding to x - grads[0].broadcastable[-inputs[0].ndim :], + grads[0].type.shape[-inputs[0].ndim :], ) ): - if ib and not gb: + if ib == 1 and gb != 1: axis_broadcasted.append(i + n_axes_to_sum) else: axis_kept.append(i) + gx = gz.sum(axis=axis + axis_broadcasted) if axis_broadcasted: new_order = ["x"] * x.ndim @@ -1663,7 +1662,7 @@ def make_node(self, *inputs): else: dtype = self.dtype - otype = TensorType(dtype, (len(inputs),)) + otype = TensorType(dtype, shape=(len(inputs),)) return Apply(self, inputs, [otype()]) def perform(self, node, inputs, out_): @@ -1863,11 +1862,14 @@ def transpose(x, axes=None): """ _x = as_tensor_variable(x) + if axes is None: - axes = list(range((_x.ndim - 1), -1, -1)) - ret = DimShuffle(_x.broadcastable, axes)(_x) - if _x.name and axes == list(range((_x.ndim - 1), -1, -1)): + axes = list(range((_x.type.ndim - 1), -1, -1)) + ret = DimShuffle(tuple(s == 1 for s in _x.type.shape), axes)(_x) + + if _x.name and axes == list(range((_x.type.ndim - 1), -1, -1)): ret.name = _x.name + ".T" + return ret @@ -1918,7 +1920,7 @@ def make_node(self, x, axis, splits): raise TypeError("`axis` parameter must be an integer scalar") inputs = [x, axis, splits] - out_type = TensorType(dtype=x.dtype, shape=[None] * x.type.ndim) + out_type = TensorType(dtype=x.dtype, shape=(None,) * x.type.ndim) outputs = [out_type() for i in range(self.len_splits)] return Apply(self, inputs, outputs) @@ -2210,9 +2212,9 @@ def make_node(self, axis, *tensors): "Join cannot handle arguments of dimension 0." " Use `stack` to join scalar values." ) - # Handle single-tensor joins immediately. + if len(tensors) == 1: - bcastable = list(tensors[0].type.broadcastable) + out_shape = tensors[0].type.shape else: # When the axis is fixed, a dimension should be # broadcastable if at least one of the inputs is @@ -2220,8 +2222,8 @@ def make_node(self, axis, *tensors): # except for the axis dimension. # Initialize bcastable all false, and then fill in some trues with # the loops. - bcastable = [False] * len(tensors[0].type.broadcastable) - ndim = len(bcastable) + ndim = tensors[0].type.ndim + out_shape = [None] * ndim if not isinstance(axis, int): try: @@ -2246,15 +2248,15 @@ def make_node(self, axis, *tensors): axis += ndim for x in tensors: - for current_axis, bflag in enumerate(x.type.broadcastable): + for current_axis, s in enumerate(x.type.shape): # Constant negative axis can no longer be negative at # this point. It safe to compare this way. if current_axis == axis: continue - if bflag: - bcastable[current_axis] = True + if s == 1: + out_shape[current_axis] = 1 try: - bcastable[axis] = False + out_shape[axis] = None except IndexError: raise ValueError( f"Axis value {axis} is out of range for the given input dimensions" @@ -2262,9 +2264,9 @@ def make_node(self, axis, *tensors): else: # When the axis may vary, no dimension can be guaranteed to be # broadcastable. - bcastable = [False] * len(tensors[0].type.broadcastable) + out_shape = [None] * tensors[0].type.ndim - if not builtins.all(x.ndim == len(bcastable) for x in tensors): + if not builtins.all(x.ndim == len(out_shape) for x in tensors): raise TypeError( "Only tensors with the same number of dimensions can be joined" ) @@ -2274,7 +2276,7 @@ def make_node(self, axis, *tensors): if inputs[0].type.dtype not in int_dtypes: raise TypeError(f"Axis value {inputs[0]} must be an integer type") - return Apply(self, inputs, [tensor(dtype=out_dtype, shape=bcastable)]) + return Apply(self, inputs, [tensor(dtype=out_dtype, shape=out_shape)]) def perform(self, node, axis_and_tensors, out_): (out,) = out_ @@ -2387,9 +2389,9 @@ def grad(self, axis_and_tensors, grads): # read it if needed. split_gz = [ g - if g.type.broadcastable == t.type.broadcastable + if g.type.shape == t.type.shape == 1 else specify_broadcastable( - g, *(ax for (ax, b) in enumerate(t.type.broadcastable) if b) + g, *(ax for (ax, s) in enumerate(t.type.shape) if s == 1) ) for t, g in zip(tens, split_gz) ] @@ -2770,13 +2772,13 @@ def flatten(x, ndim=1): dims = tuple(_x.shape[: ndim - 1]) + (-1,) else: dims = (-1,) + x_reshaped = _x.reshape(dims) - bcast_kept_dims = _x.broadcastable[: ndim - 1] - bcast_new_dim = builtins.all(_x.broadcastable[ndim - 1 :]) - broadcastable = bcast_kept_dims + (bcast_new_dim,) - x_reshaped = specify_broadcastable( - x_reshaped, *[i for i in range(ndim) if broadcastable[i]] - ) + shape_kept_dims = _x.type.shape[: ndim - 1] + bcast_new_dim = builtins.all(s == 1 for s in _x.type.shape[ndim - 1 :]) + out_shape = shape_kept_dims + (1 if bcast_new_dim else None,) + bcasted_indices = tuple(i for i in range(ndim) if out_shape[i] == 1) + x_reshaped = specify_broadcastable(x_reshaped, *bcasted_indices) return x_reshaped @@ -2882,7 +2884,7 @@ def make_node(self, start, stop, step): assert step.ndim == 0 inputs = [start, stop, step] - outputs = [tensor(self.dtype, (False,))] + outputs = [tensor(self.dtype, shape=(None,))] return Apply(self, inputs, outputs) @@ -3158,11 +3160,11 @@ def make_node(self, x, y, inverse): elif x_dim < y_dim: x = shape_padleft(x, n_ones=(y_dim - x_dim)) - # Compute the broadcastable pattern of the output - out_broadcastable = [ - xb and yb for xb, yb in zip(x.type.broadcastable, y.type.broadcastable) + out_shape = [ + 1 if xb == 1 and yb == 1 else None + for xb, yb in zip(x.type.shape, y.type.shape) ] - out_type = tensor(dtype=x.type.dtype, shape=out_broadcastable) + out_type = tensor(dtype=x.type.dtype, shape=out_shape) inputlist = [x, y, inverse] outputlist = [out_type] @@ -3205,11 +3207,11 @@ def _rec_perform(self, node, x, y, inverse, out, curdim): if xs0 == ys0: for i in range(xs0): self._rec_perform(node, x[i], y[i], inverse, out[i], curdim + 1) - elif ys0 == 1 and node.inputs[1].type.broadcastable[curdim]: + elif ys0 == 1 and node.inputs[1].type.shape[curdim] == 1: # Broadcast y for i in range(xs0): self._rec_perform(node, x[i], y[0], inverse, out[i], curdim + 1) - elif xs0 == 1 and node.inputs[0].type.broadcastable[curdim]: + elif xs0 == 1 and node.inputs[0].type.shape[curdim] == 1: # Broadcast x for i in range(ys0): self._rec_perform(node, x[0], y[i], inverse, out[i], curdim + 1) @@ -3268,7 +3270,7 @@ def grad(self, inp, grads): broadcasted_dims = [ dim for dim in range(gz.type.ndim) - if x.type.broadcastable[dim] and not gz.type.broadcastable[dim] + if x.type.shape[dim] == 1 and gz.type.shape[dim] != 1 ] gx = Sum(axis=broadcasted_dims)(gx) @@ -3283,8 +3285,13 @@ def grad(self, inp, grads): newdims.append(i) i += 1 - gx = DimShuffle(gx.type.broadcastable, newdims)(gx) - assert gx.type.broadcastable == x.type.broadcastable + gx = DimShuffle(tuple(s == 1 for s in gx.type.shape), newdims)(gx) + assert gx.type.ndim == x.type.ndim + assert all( + s1 == s2 + for s1, s2 in zip(gx.type.shape, x.type.shape) + if s1 == 1 or s2 == 1 + ) # if x is an integer type, then so is the output. # this means f(x+eps) = f(x) so the gradient with respect @@ -3394,7 +3401,7 @@ def make_node(self, x): return Apply( self, [x], - [x.type.__class__(dtype=x.dtype, shape=[False] * (x.ndim - 1))()], + [x.type.clone(dtype=x.dtype, shape=(None,) * (x.ndim - 1))()], ) def perform(self, node, inputs, outputs): @@ -3516,7 +3523,7 @@ def make_node(self, diag): return Apply( self, [diag], - [diag.type.clone(shape=[False] * (diag.ndim + 1))()], + [diag.type.clone(shape=(None,) * (diag.ndim + 1))()], ) def perform(self, node, inputs, outputs): @@ -3799,11 +3806,12 @@ def make_node(self, a, choices): choice = aesara.typed_list.make_list(choices) else: choice = as_tensor_variable(choices) + (out_shape,) = self.infer_shape( None, None, [shape_tuple(a), shape_tuple(choice)] ) - bcast = [] + static_out_shape = () for s in out_shape: try: s_val = aesara.get_scalar_constant_value(s) @@ -3811,11 +3819,11 @@ def make_node(self, a, choices): s_val = None if s_val == 1: - bcast.append(True) + static_out_shape += (1,) else: - bcast.append(False) + static_out_shape += (None,) - o = TensorType(choice.dtype, bcast) + o = TensorType(choice.dtype, shape=static_out_shape) return Apply(self, [a, choice], [o()]) def perform(self, node, inputs, outputs): diff --git a/aesara/tensor/blas.py b/aesara/tensor/blas.py index ee478b6a8a..7ab5c451a7 100644 --- a/aesara/tensor/blas.py +++ b/aesara/tensor/blas.py @@ -167,6 +167,7 @@ from aesara.tensor.shape import specify_broadcastable from aesara.tensor.type import ( DenseTensorType, + TensorType, integer_dtypes, tensor, values_eq_approx_remove_inf_nan, @@ -529,7 +530,7 @@ def c_support_code(self, **kwargs): #ifndef MOD #define MOD % #endif - static double time_time() // a time function like time.time() + static double time_time() // a time function like time.perf_counter() { struct timeval tv; gettimeofday(&tv, 0); @@ -1204,11 +1205,11 @@ def _as_scalar(res, dtype=None): """Return ``None`` or a `TensorVariable` of float type""" if dtype is None: dtype = config.floatX - if all(res.type.broadcastable): + if all(s == 1 for s in res.type.shape): while res.owner and isinstance(res.owner.op, DimShuffle): res = res.owner.inputs[0] # may still have some number of True's - if res.type.broadcastable: + if res.type.ndim > 0: rval = res.dimshuffle() else: rval = res @@ -1230,8 +1231,8 @@ def _is_real_matrix(res): return ( res.type.dtype in ("float16", "float32", "float64") and res.type.ndim == 2 - and res.type.broadcastable[0] is False - and res.type.broadcastable[1] is False + and res.type.shape[0] != 1 + and res.type.shape[1] != 1 ) # cope with tuple vs. list @@ -1239,7 +1240,7 @@ def _is_real_vector(res): return ( res.type.dtype in ("float16", "float32", "float64") and res.type.ndim == 1 - and res.type.broadcastable[0] is False + and res.type.shape[0] != 1 ) @@ -1298,9 +1299,7 @@ def scaled(thing): else: return scale * thing - try: - r.type.broadcastable - except Exception: + if not isinstance(r.type, TensorType): return None if (r.type.ndim not in (1, 2)) or r.type.dtype not in ( @@ -1333,10 +1332,10 @@ def scaled(thing): vectors = [] matrices = [] for i in r.owner.inputs: - if all(i.type.broadcastable): + if all(s == 1 for s in i.type.shape): while i.owner and isinstance(i.owner.op, DimShuffle): i = i.owner.inputs[0] - if i.type.broadcastable: + if i.type.ndim > 0: scalars.append(i.dimshuffle()) else: scalars.append(i) @@ -1489,15 +1488,15 @@ def _gemm_from_node2(fgraph, node): """ lst = [] - t0 = time.time() + t0 = time.perf_counter() _gemm_canonicalize(fgraph, node.outputs[0], 1.0, lst, 0) - t1 = time.time() + t1 = time.perf_counter() if len(lst) > 1: lst = _factor_canonicalized(lst) - t2 = time.time() + t2 = time.perf_counter() rval = _gemm_from_factored_list(fgraph, lst) - t3 = time.time() + t3 = time.perf_counter() # It can happen that _factor_canonicalized and # _gemm_from_factored_list return a node with an incorrect @@ -1550,9 +1549,9 @@ def on_import(new_node): fgraph.attach_feature(u) while did_something: nb_iter += 1 - t0 = time.time() + t0 = time.perf_counter() nodelist = aesara.graph.basic.io_toposort(fgraph.inputs, fgraph.outputs) - time_toposort += time.time() - t0 + time_toposort += time.perf_counter() - t0 did_something = False nodelist.reverse() for node in nodelist: @@ -1681,8 +1680,7 @@ def make_node(self, x, y): raise TypeError(y) if y.type.dtype != x.type.dtype: raise TypeError("dtype mismatch to Dot22") - bz = (x.type.broadcastable[0], y.type.broadcastable[1]) - outputs = [tensor(x.type.dtype, bz)] + outputs = [tensor(x.type.dtype, shape=(x.type.shape[0], y.type.shape[1]))] return Apply(self, [x, y], outputs) def perform(self, node, inp, out): @@ -1986,8 +1984,8 @@ def make_node(self, x, y, a): if not a.dtype.startswith("float") and not a.dtype.startswith("complex"): raise TypeError("Dot22Scalar requires float or complex args", a.dtype) - bz = [x.type.broadcastable[0], y.type.broadcastable[1]] - outputs = [tensor(x.type.dtype, bz)] + sz = (x.type.shape[0], y.type.shape[1]) + outputs = [tensor(x.type.dtype, shape=sz)] return Apply(self, [x, y, a], outputs) def perform(self, node, inp, out): @@ -2213,12 +2211,17 @@ def make_node(self, *inputs): dtype = aesara.scalar.upcast(*[input.type.dtype for input in inputs]) # upcast inputs to common dtype if needed upcasted_inputs = [at.cast(input, dtype) for input in inputs] - broadcastable = ( - (inputs[0].type.broadcastable[0] or inputs[1].type.broadcastable[0],) - + inputs[0].type.broadcastable[1:-1] - + inputs[1].type.broadcastable[2:] + out_shape = ( + ( + 1 + if inputs[0].type.shape[0] == 1 or inputs[1].type.shape[0] == 1 + else None, + ) + + inputs[0].type.shape[1:-1] + + inputs[1].type.shape[2:] ) - return Apply(self, upcasted_inputs, [tensor(dtype, broadcastable)]) + out_shape = tuple(1 if s == 1 else None for s in out_shape) + return Apply(self, upcasted_inputs, [tensor(dtype, shape=out_shape)]) def perform(self, node, inp, out): x, y = inp diff --git a/aesara/tensor/elemwise.py b/aesara/tensor/elemwise.py index 34f9ea5459..2b3fd11748 100644 --- a/aesara/tensor/elemwise.py +++ b/aesara/tensor/elemwise.py @@ -1,5 +1,5 @@ from copy import copy -from typing import List, Tuple, Union +from typing import List, Tuple import numpy as np @@ -62,17 +62,17 @@ class DimShuffle(ExternalCOp): If `j = new_order[i]` is an index, the output's ith dimension will be the input's jth dimension. If `new_order[i]` is `x`, the output's ith dimension will - be 1 and Broadcast operations will be allowed to do broadcasting + be 1 and broadcast operations will be allowed to do broadcasting over that dimension. - If `input.broadcastable[i] == False` then `i` must be found in new_order. + If `input.type.shape[i] != 1` then `i` must be found in `new_order`. Broadcastable dimensions, on the other hand, can be discarded. .. code-block:: python DimShuffle((False, False, False), ['x', 2, 'x', 0, 1]) - This op will only work on 3d tensors with no broadcastable + This `Op` will only work on 3d tensors with no broadcastable dimensions. The first dimension will be broadcastable, then we will have the third dimension of the input tensor as the second of the resulting tensor, etc. If the tensor has @@ -83,7 +83,7 @@ class DimShuffle(ExternalCOp): DimShuffle((True, False), [1]) - This op will only work on 2d tensors with the first dimension + This `Op` will only work on 2d tensors with the first dimension broadcastable. The second dimension of the input tensor will be the first dimension of the resulting tensor. @@ -186,7 +186,7 @@ def __setstate__(self, state): def make_node(self, _input): input = as_tensor_variable(_input) - ib = tuple(input.type.broadcastable) + ib = tuple(s == 1 for s in input.type.shape) if ib != self.input_broadcastable: if len(ib) != len(self.input_broadcastable): raise TypeError( @@ -258,7 +258,7 @@ def grad(self, inp, grads): (x,) = inp (gz,) = grads gz = as_tensor_variable(gz) - grad_order = ["x"] * len(x.type.broadcastable) + grad_order = ["x"] * x.type.ndim for i, v in enumerate(self.new_order): if v != "x": grad_order[v] = i @@ -269,7 +269,7 @@ def grad(self, inp, grads): return [inp[0].zeros_like(dtype=config.floatX)] else: return [ - DimShuffle(gz.type.broadcastable, grad_order)( + DimShuffle(tuple(s == 1 for s in gz.type.shape), grad_order)( Elemwise(scalar_identity)(gz) ) ] @@ -406,7 +406,7 @@ def get_output_info(self, dim_shuffle, *inputs): # TODO: use LComplete instead args.append( dim_shuffle( - input.type.broadcastable, + tuple(1 if s == 1 else None for s in input.type.shape), ["x"] * difference + list(range(length)), )(input) ) @@ -452,11 +452,11 @@ def get_most_specialized_shape(shapes): inplace_pattern = self.inplace_pattern if inplace_pattern: for overwriter, overwritten in inplace_pattern.items(): - for ob, ib in zip( + for out_s, in_s in zip( out_shapes[overwriter], - inputs[overwritten].type.broadcastable, + inputs[overwritten].type.shape, ): - if ib and not ob == 1: + if in_s == 1 and out_s != 1: raise ValueError( "Operation cannot be done inplace on an input " "with broadcasted dimensions." @@ -578,8 +578,8 @@ def L_op(self, inputs, outs, ograds): # TODO: only count dimensions that were effectively broadcasted to_sum = [ j - for j, bcast in enumerate(ipt.type.broadcastable) - if bcast and not outs[0].broadcastable[j] + for j, in_s in enumerate(ipt.type.shape) + if in_s == 1 and outs[0].type.shape[j] != 1 ] if to_sum: @@ -614,7 +614,7 @@ def as_scalar(t): f"{str(self.scalar_op)}.grad returned {str(type(scalar_igrads))} instead of list or tuple" ) - nd = len(inputs[0].type.broadcastable) # this is the same for everyone + nd = inputs[0].type.ndim # this is the same for everyone def transform(r): # From a graph of ScalarOps, make a graph of Broadcast ops. @@ -897,7 +897,7 @@ def _c_all(self, node, nodename, inames, onames, sub): # for each input: # same as range(ndim), but with 'x' at all broadcastable positions orders = [ - [x and "x" or i for i, x in enumerate(input.type.broadcastable)] + [s == 1 and "x" or i for i, s in enumerate(input.type.shape)] for input in inputs ] @@ -920,7 +920,7 @@ def _c_all(self, node, nodename, inames, onames, sub): [ f"PyArray_ISFORTRAN({arr})" for arr, var in z - if not all(var.broadcastable) + if not all(s == 1 for s in var.type.shape) ] ) # If it is a scalar, make it c contig to prevent problem with @@ -1005,7 +1005,7 @@ def _c_all(self, node, nodename, inames, onames, sub): or # Use simpler code when output ndim == 0 or 1 # or for broadcated scalar. - all(node.outputs[0].broadcastable) + all(s == 1 for s in node.outputs[0].type.shape) ): if nnested: all_code = [("", "")] * (nnested - 1) + [("", code)] + [""] @@ -1077,7 +1077,7 @@ def _c_all(self, node, nodename, inames, onames, sub): all(o.ndim >= 1 for o in node.outputs) and # Don't use the contig code for broadcasted scalar. - not all(node.outputs[0].broadcastable) + not all(s == 1 for s in node.outputs[0].type.shape) ): contig = None try: @@ -1110,7 +1110,7 @@ def _c_all(self, node, nodename, inames, onames, sub): """ index = "" for x, var in zip(inames + onames, inputs + node.outputs): - if not all(var.broadcastable): + if not all(s == 1 for s in var.type.shape): contig += ( """ dtype_%(x)s * %(x)s_ptr = (dtype_%(x)s*) PyArray_DATA(%(x)s); @@ -1144,18 +1144,19 @@ def _c_all(self, node, nodename, inames, onames, sub): ) if contig is not None: z = list(zip(inames + onames, inputs + node.outputs)) + all_broadcastable = all(s == 1 for s in var.type.shape) cond1 = " && ".join( [ "PyArray_ISCONTIGUOUS(%s)" % arr for arr, var in z - if not all(var.broadcastable) + if not all_broadcastable ] ) cond2 = " && ".join( [ "PyArray_ISFORTRAN(%s)" % arr for arr, var in z - if not all(var.broadcastable) + if not all_broadcastable ] ) loop = ( @@ -1256,33 +1257,61 @@ class CAReduce(COp): """ - __props__: Union[ - Tuple[str], Tuple[str, str], Tuple[str, str, str], Tuple[str, str, str, str] - ] = ("scalar_op", "axis") + __props__ = ("scalar_op", "axis", "dtype", "acc_dtype", "upcast_discrete_output") - def __init__(self, scalar_op, axis=None): + def __init__( + self, + scalar_op, + axis=None, + dtype=None, + acc_dtype=None, + upcast_discrete_output=False, + ): """ Parameters ---------- scalar_op - A binary scalar `Op` with only one output. It must be commutative - and associative. + A binary scalar `Op` with only one output. + It must be commutative and associative. axis - - The dimension along which we want to reduce - - List of dimensions that we want to reduce - - If ``None``, all dimensions are reduced + - the dimension along which we want to reduce + - list of dimensions that we want to reduce + - if ``None``, all dimensions are reduced + dtype + The dtype of the returned tensor. If ``None``, then we use the default + dtype which is the same as the input array's dtype except when + `upcast_discrete_output` is ``True`` and the following holds: + + - the input dtype is a signed integer of precision < 64 bit, in which + case we use int64 + - the input dtype is an unsigned integer of precision < 64 bit, in + which case we use uint64 + + This default dtype does _not_ depend on the value of `acc_dtype`. + This behavior is similar in spirit to that of NumPy, except that + NumPy uses the default machine integer while we always use 64 bit + integers to avoid platform-dependent behavior. + acc_dtype + The dtype of the internal accumulator. + If ``None`` (default), we use the dtype in the list below, + or the input dtype if its precision is higher: + + - for int dtypes, we use at least int64; + - for uint dtypes, we use at least uint64; + - for float dtypes, we use at least float64; + - for complex dtypes, we use at least complex128. + upcast_discrete_output + See """ if scalar_op.nin not in (-1, 2) or scalar_op.nout != 1: raise NotImplementedError( - "CAReduce only supports binary functions with a single " "output." + "CAReduce only supports binary functions with a single output." ) self.axis = None - self.ufunc_is_vectorized = False self.scalar_op = scalar_op - self.set_ufunc(scalar_op) if axis is not None: if isinstance(axis, (int, np.integer)) or ( @@ -1292,63 +1321,179 @@ def __init__(self, scalar_op, axis=None): else: self.axis = tuple(axis) - def set_ufunc(self, scalar_op): - if hasattr(scalar_op, "nfunc_spec") and hasattr(np, scalar_op.nfunc_spec[0]): - self.ufunc = getattr(np, scalar_op.nfunc_spec[0]) + self.dtype = dtype + self.acc_dtype = acc_dtype + self.upcast_discrete_output = upcast_discrete_output + + @property + def ufunc(self): + if hasattr(self, "_ufunc"): + return self._ufunc + + if hasattr(self.scalar_op, "nfunc_spec") and hasattr( + np, self.scalar_op.nfunc_spec[0] + ): + self._ufunc = getattr(np, self.scalar_op.nfunc_spec[0]) else: - self.ufunc = np.frompyfunc(scalar_op.impl, 2, 1) - self.ufunc_is_vectorized = True + self._ufunc = np.frompyfunc( + self.scalar_op.impl, 2, 1, identity=self.scalar_op.identity + ) + + return self._ufunc + + def _output_dtype(self, idtype): - def _output_dtype(self, input_dtype): - return input_dtype + if not self.upcast_discrete_output: + return idtype + + dtype = self.dtype + + if dtype == "OLD": + return dict( + int8="int32", + int16="int32", + int32="int64", + uint8="uint32", + uint16="uint32", + uint32="uint64", + ).get(idtype, idtype) + elif dtype is None: + # If input has a discrete dtype, upcast it to 64 + return dict( + bool="int64", + int8="int64", + int16="int64", + int32="int64", + uint8="uint64", + uint16="uint64", + uint32="uint64", + ).get(idtype, idtype) + else: + # The important is that the accumulator dtype does not + # lose precision. Then, the result can be downcasted. + return dtype + + def _acc_dtype(self, idtype): + acc_dtype = self.acc_dtype + if acc_dtype is None: + return dict( + bool="int64", + int8="int64", + int16="int64", + int32="int64", + uint8="uint64", + uint16="uint64", + uint32="uint64", + float16="float32", + float32="float64", + complex64="complex128", + ).get(idtype, idtype) + elif acc_dtype in continuous_dtypes and idtype in discrete_dtypes: + # Specifying a continuous accumulator for discrete input is OK + return acc_dtype + else: + # The conversion has to be considered an upcast. + upcasted_dtype = upcast(idtype, acc_dtype) + if acc_dtype != upcasted_dtype: + raise TypeError( + f"Cannot build {self} node with input dtype {idtype} " + f"and acc_dtype {acc_dtype}, as precision would be lost. " + "To correct this error, you can:\n" + " - not specify acc_dtype, or\n" + f" - use an acc_dtype at least as precise as {upcasted_dtype}.\n" + ' - specify "dtype" instead of "acc_dtype", so ' + "the reduction will be precise, but the result will " + 'be casted into "dtype" at the end.\n' + "If you are expecting the precision loss, you can " + f'use tensor.cast(..., dtype="{acc_dtype}"), on your input.' + ) + return acc_dtype def make_node(self, input): input = as_tensor_variable(input) inp_dims = input.type.ndim - inp_bdcast = input.type.broadcastable inp_dtype = input.type.dtype + # We need to redefine make_node so that, if self.dtype is None, + # we can infer what dtype should be, and create a node from an Op + # of the appropriate dtype. + dtype = self._output_dtype(inp_dtype) + acc_dtype = self._acc_dtype(inp_dtype) + + assert dtype is not None + assert acc_dtype is not None + axis = self.axis - if axis is None: - axis = list(range(inp_dims)) - copy_op = any(a < 0 for a in axis) # scalar inputs are treated as 1D regarding axis in this `Op` - try: - axis = np.core.numeric.normalize_axis_tuple(axis, ndim=max(1, inp_dims)) - except np.AxisError: - raise np.AxisError(axis, ndim=inp_dims) - - # We can't call self.__class__() as there is a class that - # inherits from CAReduce that doesn't have the same signature - if copy_op: - op = copy(self) - op.set_ufunc(op.scalar_op) - assert len(axis) == len(self.axis) - op.axis = tuple(axis) + if axis is not None: + try: + axis = np.core.numeric.normalize_axis_tuple(axis, ndim=max(1, inp_dims)) + except np.AxisError: + raise np.AxisError(axis, ndim=inp_dims) + + out_shape = tuple( + s for i, s in enumerate(input.type.shape) if i not in axis + ) else: - op = self + out_shape = () - broadcastable = [x for i, x in enumerate(inp_bdcast) if i not in axis] + if ( + (axis is not None and any(a < 0 for a in axis)) + or dtype != self.dtype + or acc_dtype != self.acc_dtype + ): + op = self.clone(axis=axis, dtype=dtype, acc_dtype=acc_dtype) + else: + op = self - output = TensorType(dtype=self._output_dtype(inp_dtype), shape=broadcastable)() + output = TensorType(dtype=dtype, shape=out_shape)() return Apply(op, [input], [output]) - def __getstate__(self): - d = copy(self.__dict__) - d.pop("ufunc", None) - return d + def clone( + self, + axis=None, + dtype=None, + acc_dtype=None, + upcast_discrete_output=None, + **kwargs, + ): + if axis is None: + axis = self.axis + if dtype is None: + dtype = self.dtype + if acc_dtype is None: + acc_dtype = self.acc_dtype + if upcast_discrete_output is None: + upcast_discrete_output = self.upcast_discrete_output - def __setstate__(self, d): - self.__dict__.update(d) - self.set_ufunc(self.scalar_op) + res = type(self)( + self.scalar_op, + axis=axis, + dtype=dtype, + acc_dtype=acc_dtype, + upcast_discrete_output=None, + **kwargs, + ) + + return res def __str__(self): prefix = f"{type(self).__name__}{{{self.scalar_op}}}" + extra_params = [] + if self.axis is not None: - axes_str = ", ".join(str(x) for x in self.axis) - return f"{prefix}{{{axes_str}}}" + axis = ", ".join(str(x) for x in self.axis) + extra_params.append(f"axis=[{axis}]") + + if self.acc_dtype: + extra_params.append(f"acc_dtype={self.acc_dtype}") + + extra_params_str = ", ".join(extra_params) + + if extra_params_str: + return f"{prefix}{{{extra_params_str}}}" else: return f"{prefix}" @@ -1356,44 +1501,28 @@ def perform(self, node, inp, out): (input,) = inp (output,) = out axis = self.axis - if axis is None: - axis = list(range(input.ndim)) - if hasattr(self, "acc_dtype") and self.acc_dtype is not None: + out_dtype = node.outputs[0].type.dtype + + if self.acc_dtype is not None: acc_dtype = self.acc_dtype else: - acc_dtype = node.outputs[0].type.dtype - - variable = np.array(input, dtype=acc_dtype) - - if axis: - # Reducing functions built using np.frompyfunc() do not - # support reduction along multiple axes. Hence loop through - # each, otherwise numpy's inbuilt reduction functions - # support reduction along multiple axes directly. - if self.ufunc_is_vectorized: - to_reduce = reversed(sorted(axis)) - for dimension in to_reduce: - variable = self.ufunc.reduce(variable, dimension, dtype=acc_dtype) - else: - variable = self.ufunc.reduce(variable, axis=tuple(axis)) - output[0] = _asarray(variable, dtype=node.outputs[0].type.dtype) - else: - # Force a copy - output[0] = np.array(variable, copy=True, dtype=node.outputs[0].type.dtype) + acc_dtype = out_dtype + + # out_dtype = self.dtype if self.dtype and self.dtype != "OLD" else out_dtype + + input = np.array(input, dtype=acc_dtype) + + out = self.ufunc.reduce(input, axis=axis, dtype=acc_dtype) + + output[0] = _asarray(out, dtype=out_dtype) def infer_shape(self, fgraph, node, shapes): (ishape,) = shapes axis = self.axis if axis is None: return ((),) - return ( - [ - ishape[i] - for (i, b) in enumerate(node.inputs[0].type.broadcastable) - if i not in axis - ], - ) + return ([ishape[i] for i in range(node.inputs[0].type.ndim) if i not in axis],) def _c_all(self, node, name, inames, onames, sub): @@ -1411,14 +1540,14 @@ def _c_all(self, node, name, inames, onames, sub): if acc_dtype is not None: if acc_dtype == "float16": raise MethodNotDefined("no c_code for float16") - acc_type = TensorType(shape=node.outputs[0].broadcastable, dtype=acc_dtype) + acc_type = TensorType(shape=node.outputs[0].type.shape, dtype=acc_dtype) adtype = acc_type.dtype_specs()[1] else: adtype = odtype axis = self.axis if axis is None: - axis = list(range(len(input.type.broadcastable))) + axis = list(range(input.type.ndim)) if len(axis) == 0: # The acc_dtype is never a downcast compared to the input dtype @@ -1592,176 +1721,6 @@ def c_code_cache_version_apply(self, node): return () -class CAReduceDtype(CAReduce): - """A subclass of `CAReduce` that accepts an additional output "dtype" parameter. - - It also accepts an optional `acc_dtype`, which specifies the dtype that - will be used for the accumulation. The accumulation will be done using an - array of dtype `acc_dtype`, then it will be cast into `dtype` and returned. - - If no `dtype` is provided, one will be inferred so as not to lose - too much precision. - - """ - - __props__: Union[Tuple[str, str, str], Tuple[str, str, str, str]] = ( - "scalar_op", - "axis", - "dtype", - "acc_dtype", - ) - - def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None): - """ - - Parameters - ---------- - scalar_op - A binary scalar `Op` with only one output. - It must be commutative and associative. - axis - * the dimension along which we want to reduce - * list of dimensions that we want to reduce - * if ``None``, all dimensions are reduced - dtype - The dtype of the returned tensor. If ``None``, then we use the default - dtype which is the same as the input array's dtype except when: - - * the input dtype is a signed integer of precision < 64 bit, in which - case we use int64 - * the input dtype is an unsigned integer of precision < 64 bit, in - which case we use uint64 - - This default dtype does _not_ depend on the value of `acc_dtype`. - This behavior is similar in spirit to that of NumPy, except that - NumPy uses the default machine integer while we always use 64 bit - integers to avoid platform-dependent behavior. - acc_dtype - The dtype of the internal accumulator. - If ``None`` (default), we use the dtype in the list below, - or the input dtype if its precision is higher: - - * for int dtypes, we use at least int64; - * for uint dtypes, we use at least uint64; - * for float dtypes, we use at least float64; - * for complex dtypes, we use at least complex128. - - """ - super().__init__(scalar_op, axis=axis) - self.dtype = dtype - self.acc_dtype = acc_dtype - - def __setstate__(self, d): - super().__setstate__(d) - if not hasattr(self, "dtype"): - # This is needed as old pickled will crash otherwise. - # We need to keep the old dtype behavior as the op - # could be in an apply node with a specified dtype. - self.dtype = "OLD" - - if not hasattr(self, "acc_dtype"): - # acc_dtype is not used by any external Op, so we do not - # need to keep the previous behaviour here. - self.acc_dtype = None - - def _output_dtype(self, idtype): - dtype = self.dtype - if dtype == "OLD": - return dict( - int8="int32", - int16="int32", - int32="int64", - uint8="uint32", - uint16="uint32", - uint32="uint64", - ).get(idtype, idtype) - if dtype is None: - # If input has a discrete dtype, upcast it to 64 - return dict( - bool="int64", - int8="int64", - int16="int64", - int32="int64", - uint8="uint64", - uint16="uint64", - uint32="uint64", - ).get(idtype, idtype) - else: - # The important is that the accumulator dtype does not - # lose precision. Then, the result can be downcasted. - return dtype - - def _acc_dtype(self, idtype): - acc_dtype = self.acc_dtype - if acc_dtype is None: - return dict( - bool="int64", - int8="int64", - int16="int64", - int32="int64", - uint8="uint64", - uint16="uint64", - uint32="uint64", - float16="float32", - float32="float64", - complex64="complex128", - ).get(idtype, idtype) - elif acc_dtype in continuous_dtypes and idtype in discrete_dtypes: - # Specifying a continuous accumulator for discrete input is OK - return acc_dtype - else: - # The conversion has to be considered an upcast. - upcasted_dtype = upcast(idtype, acc_dtype) - if acc_dtype != upcasted_dtype: - raise TypeError( - f"Cannot build {self} node with input dtype {idtype} " - f"and acc_dtype {acc_dtype}, as precision would be lost. " - "To correct this error, you can:\n" - " - not specify acc_dtype, or\n" - f" - use an acc_dtype at least as precise as {upcasted_dtype}.\n" - ' - specify "dtype" instead of "acc_dtype", so ' - "the reduction will be precise, but the result will " - 'be casted into "dtype" at the end.\n' - "If you are expecting the precision loss, you can " - f'use tensor.cast(..., dtype="{acc_dtype}"), on your input.' - ) - return acc_dtype - - def make_node(self, input): - # We need to redefine make_node so that, if self.dtype is None, - # we can infer what dtype should be, and create a node from an Op - # of the appropriate dtype. - input = as_tensor_variable(input) - dtype = self._output_dtype(input.dtype) - acc_dtype = self._acc_dtype(input.dtype) - - assert dtype is not None - assert acc_dtype is not None - - if dtype == self.dtype and acc_dtype == self.acc_dtype: - # Don't build another instance - op = self - else: - op = copy(self) - op.set_ufunc(self.scalar_op) - op.dtype = dtype - op.acc_dtype = acc_dtype - - assert op.acc_dtype is not None - - # TODO: Why doesn't `make_node` just take these - # automatically-determined values as arguments? - return super(CAReduceDtype, op).make_node(input) - - def __str__(self): - prefix = f"{type(self).__name__}{{{self.scalar_op}}}" - if self.axis is not None: - axis = ", ".join(str(x) for x in self.axis) - return f"{prefix}{{axis=[{axis}], acc_dtype={self.acc_dtype}}}" - else: - return f"{prefix}{{acc_dtype={self.acc_dtype}}}" - - def scalar_elemwise(*symbol, nfunc=None, nin=None, nout=None, symbolname=None): """Replace a symbol definition with an `Elemwise`-wrapped version of the corresponding scalar `Op`. diff --git a/aesara/tensor/extra_ops.py b/aesara/tensor/extra_ops.py index ca3e720339..397ff876d0 100644 --- a/aesara/tensor/extra_ops.py +++ b/aesara/tensor/extra_ops.py @@ -668,19 +668,21 @@ def make_node(self, x, repeats): ) if self.axis is None: - broadcastable = [False] + out_shape = [None] else: try: const_reps = at.get_scalar_constant_value(repeats) except NotScalarConstantError: const_reps = None if const_reps == 1: - broadcastable = x.broadcastable + out_shape = x.type.shape else: - broadcastable = list(x.broadcastable) - broadcastable[self.axis] = False + out_shape = list(x.type.shape) + out_shape[self.axis] = None - out_type = TensorType(x.dtype, broadcastable) + out_type = TensorType( + x.dtype, shape=tuple(1 if s == 1 else None for s in out_shape) + ) return Apply(self, [x, repeats], [out_type()]) @@ -1178,33 +1180,26 @@ def __init__( self.return_inverse = return_inverse self.return_counts = return_counts self.axis = axis - numpy_ver = [int(n) for n in np.__version__.split(".")[:2]] - if self.axis is not None and bool(numpy_ver < [1, 13]): - raise RuntimeError( - "Numpy version = " - + np.__version__ - + f". Option 'axis={axis}' works starting from version 1.13.0." - ) def make_node(self, x): x = at.as_tensor_variable(x) self_axis = self.axis if self_axis is None: - broadcastable = [False] + out_shape = (None,) else: if self_axis < 0: - self_axis += len(x.broadcastable) - if self_axis < 0 or self_axis >= len(x.broadcastable): - raise RuntimeError( - "Unique axis `{}` is outside of input ndim = " - "{}.".format(self.axis, len(x.broadcastable)) + self_axis += x.type.ndim + if self_axis < 0 or self_axis >= x.type.ndim: + raise ValueError( + f"Unique axis {self.axis} is outside of input ndim = {x.type.ndim}" ) - broadcastable = [ - b if axis != self_axis else False - for axis, b in enumerate(x.broadcastable) - ] - outputs = [TensorType(shape=broadcastable, dtype=x.dtype)()] - typ = TensorType(shape=[False], dtype="int64") + out_shape = tuple( + s if s == 1 and axis != self_axis else None + for axis, s in enumerate(x.type.shape) + ) + + outputs = [TensorType(dtype=x.dtype, shape=out_shape)()] + typ = TensorType(dtype="int64", shape=(None,)) if self.return_index: outputs.append(typ()) if self.return_inverse: @@ -1310,7 +1305,7 @@ def make_node(self, indices, dims): self, [indices, dims], [ - TensorType(dtype="int64", shape=(False,) * indices.ndim)() + TensorType(dtype="int64", shape=(None,) * indices.type.ndim)() for i in range(at.get_vector_length(dims)) ], ) @@ -1389,7 +1384,7 @@ def make_node(self, *inp): return Apply( self, multi_index + [dims], - [TensorType(dtype="int64", shape=(False,) * multi_index[0].ndim)()], + [TensorType(dtype="int64", shape=(None,) * multi_index[0].type.ndim)()], ) def infer_shape(self, fgraph, node, input_shapes): diff --git a/aesara/tensor/fft.py b/aesara/tensor/fft.py index 0fcdfbdeec..7cc2a9df45 100644 --- a/aesara/tensor/fft.py +++ b/aesara/tensor/fft.py @@ -15,7 +15,7 @@ class RFFTOp(Op): def output_type(self, inp): # add extra dim for real/imag - return TensorType(inp.dtype, shape=[False] * (inp.type.ndim + 1)) + return TensorType(inp.dtype, shape=(None,) * (inp.type.ndim + 1)) def make_node(self, a, s=None): a = as_tensor_variable(a) @@ -76,7 +76,7 @@ class IRFFTOp(Op): def output_type(self, inp): # remove extra dim for real/imag - return TensorType(inp.dtype, shape=[False] * (inp.type.ndim - 1)) + return TensorType(inp.dtype, shape=(None,) * (inp.type.ndim - 1)) def make_node(self, a, s=None): a = as_tensor_variable(a) diff --git a/aesara/tensor/fourier.py b/aesara/tensor/fourier.py index bc069b31e2..2bcc87ba0e 100644 --- a/aesara/tensor/fourier.py +++ b/aesara/tensor/fourier.py @@ -59,27 +59,22 @@ class Fourier(Op): def make_node(self, a, n, axis): a = as_tensor_variable(a) if a.ndim < 1: - raise TypeError( - f"{self.__class__.__name__}: input must be an array, not a scalar" - ) + raise TypeError("Input must be an array, not a scalar") if axis is None: axis = a.ndim - 1 axis = as_tensor_variable(axis) else: axis = as_tensor_variable(axis) if axis.dtype not in integer_dtypes: - raise TypeError( - "%s: index of the transformed axis must be" - " of type integer" % self.__class__.__name__ - ) + raise TypeError("Index of the transformed axis must be of type integer") elif axis.ndim != 0 or ( isinstance(axis, TensorConstant) and (axis.data < 0 or axis.data > a.ndim - 1) ): raise TypeError( - f"{self.__class__.__name__}: index of the transformed axis must be" - " a scalar not smaller than 0 and smaller than" - " dimension of array" + "Index of the transformed axis must be " + "a scalar not smaller than 0 and smaller than " + "dimension of array" ) if n is None: n = a.shape[axis] @@ -88,18 +83,21 @@ def make_node(self, a, n, axis): n = as_tensor_variable(n) if n.dtype not in integer_dtypes: raise TypeError( - "%s: length of the transformed axis must be" - " of type integer" % self.__class__.__name__ + "Length of the transformed axis must be of type integer" ) elif n.ndim != 0 or (isinstance(n, TensorConstant) and n.data < 1): raise TypeError( - "%s: length of the transformed axis must be a" - " strictly positive scalar" % self.__class__.__name__ + "Length of the transformed axis must be a strictly positive scalar" ) return Apply( self, [a, n, axis], - [TensorType("complex128", a.type.broadcastable)()], + [ + TensorType( + "complex128", + shape=tuple(1 if s == 1 else None for s in a.type.shape), + )() + ], ) def infer_shape(self, fgraph, node, in_shapes): diff --git a/aesara/tensor/io.py b/aesara/tensor/io.py index ab670b4bb2..4ddafe398e 100644 --- a/aesara/tensor/io.py +++ b/aesara/tensor/io.py @@ -21,11 +21,11 @@ class LoadFromDisk(Op): """ - __props__ = ("dtype", "broadcastable", "mmap_mode") + __props__ = ("dtype", "shape", "mmap_mode") - def __init__(self, dtype, broadcastable, mmap_mode=None): + def __init__(self, dtype, shape, mmap_mode=None): self.dtype = np.dtype(dtype) # turn "float64" into np.float64 - self.broadcastable = broadcastable + self.shape = shape if mmap_mode not in (None, "c"): raise ValueError( "The only supported values for mmap_mode " @@ -36,7 +36,7 @@ def __init__(self, dtype, broadcastable, mmap_mode=None): def make_node(self, path): if isinstance(path, str): path = Constant(Generic(), path) - return Apply(self, [path], [tensor(self.dtype, shape=self.broadcastable)]) + return Apply(self, [path], [tensor(self.dtype, shape=self.shape)]) def perform(self, node, inp, out): path = inp[0] @@ -50,14 +50,14 @@ def perform(self, node, inp, out): out[0][0] = result def __str__(self): - return "Load{{dtype: {}, broadcastable: {}, mmep: {}}}".format( + return "Load{{dtype: {}, shape: {}, mmep: {}}}".format( self.dtype, - self.broadcastable, + self.shape, self.mmap_mode, ) -def load(path, dtype, broadcastable, mmap_mode=None): +def load(path, dtype, shape, mmap_mode=None): """ Load an array from an .npy file. @@ -67,10 +67,8 @@ def load(path, dtype, broadcastable, mmap_mode=None): A Generic symbolic variable, that will contain a string dtype : data-type The data type of the array to be read. - broadcastable - The broadcastable pattern of the loaded array, for instance, - (False,) for a vector, (False, True) for a column, - (False, False) for a matrix. + shape + The static shape information of the loaded array. mmap_mode How the file will be loaded. None means that the data will be copied into an array in memory, 'c' means that the file @@ -83,7 +81,7 @@ def load(path, dtype, broadcastable, mmap_mode=None): -------- >>> from aesara import * >>> path = Variable(Generic(), None) - >>> x = tensor.load(path, 'int64', (False,)) + >>> x = tensor.load(path, 'int64', (None,)) >>> y = x*2 >>> fn = function([path], y) >>> fn("stored-array.npy") # doctest: +SKIP @@ -91,7 +89,7 @@ def load(path, dtype, broadcastable, mmap_mode=None): """ - return LoadFromDisk(dtype, broadcastable, mmap_mode)(path) + return LoadFromDisk(dtype, shape, mmap_mode)(path) ########################## @@ -129,7 +127,7 @@ def __init__(self, source, tag, shape, dtype): self.tag = tag self.shape = shape self.dtype = np.dtype(dtype) # turn "float64" into numpy.float64 - self.broadcastable = (False,) * len(shape) + self.static_shape = (None,) * len(shape) def make_node(self): return Apply( @@ -137,7 +135,7 @@ def make_node(self): [], [ Variable(Generic(), None), - tensor(self.dtype, shape=self.broadcastable), + tensor(self.dtype, shape=self.static_shape), ], ) @@ -182,7 +180,7 @@ def make_node(self, request, data): return Apply( self, [request, data], - [tensor(data.dtype, shape=data.broadcastable)], + [tensor(data.dtype, shape=data.type.shape)], ) def perform(self, node, inp, out): diff --git a/aesara/tensor/math.py b/aesara/tensor/math.py index 2b6724aa4b..6d8b321825 100644 --- a/aesara/tensor/math.py +++ b/aesara/tensor/math.py @@ -25,13 +25,7 @@ stack, switch, ) -from aesara.tensor.elemwise import ( - CAReduce, - CAReduceDtype, - DimShuffle, - Elemwise, - scalar_elemwise, -) +from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise, scalar_elemwise from aesara.tensor.shape import shape, specify_broadcastable from aesara.tensor.type import ( DenseTensorType, @@ -151,13 +145,15 @@ def make_node(self, x): # We keep the original broadcastable flags for dimensions on which # we do not perform the max / argmax. all_axes = set(self.axis) - broadcastable = [ - b for i, b in enumerate(x.type.broadcastable) if i not in all_axes - ] inputs = [x] + out_shape = tuple( + 1 if s == 1 else None + for i, s in enumerate(x.type.shape) + if i not in all_axes + ) outputs = [ - tensor(x.type.dtype, broadcastable, name="max"), - tensor("int64", broadcastable, name="argmax"), + tensor(x.type.dtype, shape=out_shape, name="max"), + tensor("int64", shape=out_shape, name="argmax"), ] return Apply(self, inputs, outputs) @@ -375,10 +371,8 @@ def make_node(self, x, axis=None): # We keep the original broadcastable flags for dimensions on which # we do not perform the argmax. - broadcastable = [ - b for i, b in enumerate(x.type.broadcastable) if i not in all_axes - ] - outputs = [tensor("int64", broadcastable, name="argmax")] + out_shape = tuple(s for i, s in enumerate(x.type.shape) if i not in all_axes) + outputs = [tensor("int64", shape=out_shape, name="argmax")] return Apply(self, inputs, outputs) def prepare_node(self, node, storage_map, compute_map, impl): @@ -633,6 +627,10 @@ class Max(NonZeroCAReduce): def __init__(self, axis): super().__init__(aes.scalar_maximum, axis) + def clone(self, **kwargs): + axis = kwargs.get("axis", self.axis) + return type(self)(axis=axis) + class Min(NonZeroCAReduce): nfunc_spec = ("min", 1, 1) @@ -640,6 +638,10 @@ class Min(NonZeroCAReduce): def __init__(self, axis): super().__init__(aes.scalar_minimum, axis) + def clone(self, **kwargs): + axis = kwargs.get("axis", self.axis) + return type(self)(axis=axis) + def max(x, axis=None, keepdims=False): """ @@ -1530,6 +1532,10 @@ def c_code(self, node, name, inames, onames, sub): """ ) + def clone(self, **kwargs): + axis = kwargs.get("axis", self.axis) + return type(self)(axis=axis) + # TODO: implement the grad. When done and tested, you can make this the default # version. @@ -1911,15 +1917,14 @@ def make_node(self, *inputs): "aesara.tensor.dot instead." ) - i_broadcastables = [input.type.broadcastable for input in inputs] - bx, by = i_broadcastables - if len(by) == 2: # y is a matrix - bz = bx[:-1] + by[-1:] - elif len(by) == 1: # y is vector - bz = bx[:-1] + sx, sy = [input.type.shape for input in inputs] + if len(sy) == 2: + sz = sx[:-1] + sy[-1:] + elif len(sy) == 1: + sz = sx[:-1] i_dtypes = [input.type.dtype for input in inputs] - outputs = [tensor(aes.upcast(*i_dtypes), bz)] + outputs = [tensor(aes.upcast(*i_dtypes), shape=sz)] return Apply(self, inputs, outputs) def perform(self, node, inp, out): @@ -2351,7 +2356,6 @@ class All(CAReduce): """ - __props__ = ("axis",) nfunc_spec = ("all", 1, 1) def __init__(self, axis=None): @@ -2377,6 +2381,10 @@ def grad(self, inp, grads): (x,) = inp return [x.zeros_like(config.floatX)] + def clone(self, **kwargs): + axis = kwargs.get("axis", self.axis) + return type(self)(axis=axis) + class Any(CAReduce): """Applies `bitwise or` to all the values of a tensor along the @@ -2384,7 +2392,6 @@ class Any(CAReduce): """ - __props__ = ("axis",) nfunc_spec = ("any", 1, 1) def __init__(self, axis=None): @@ -2410,48 +2417,31 @@ def grad(self, inp, grads): (x,) = inp return [x.zeros_like(config.floatX)] + def clone(self, **kwargs): + axis = kwargs.get("axis", self.axis) + return type(self)(axis=axis) + -class Sum(CAReduceDtype): +class Sum(CAReduce): """ Sums all the values of a tensor along the specified axis(es). - Equivalent to `CAReduceDtype(scalar.add, axis=axis, dtype=dtype)`, + Equivalent to `CAReduce(scalar.add, axis=axis, dtype=dtype)`, with the difference that this defines the gradient of sum wrt its tensor input. - Parameters - ---------- - axis - Axis(es) along which the tensor should be summed - (use None to sum over all axes, and a list or tuple to sum along more - than one axis). - - dtype - The dtype of the internal accumulator and returned - tensor. If None, then we use the default dtype which is the same as the - input tensor's dtype except when: - - the input dtype is a signed integer of precision < 64 bit, in - which case we use int64 - - the input dtype is an unsigned integer of precision < 64 bit, in - which case we use uint64 - This value does not depend on the value of "acc_dtype". - - acc_dtype - The dtype of the internal accumulator. - If None (default), we use the dtype in the list below, - or the input dtype if its precision is higher: - - for int dtypes, we use at least int64; - - for uint dtypes, we use at least uint64; - - for float dtypes, we use at least float64; - - for complex dtypes, we use at least complex128. - """ - __props__ = ("axis", "dtype", "acc_dtype") nfunc_spec = ("sum", 1, 1) def __init__(self, axis=None, dtype=None, acc_dtype=None): - super().__init__(aes.add, axis=axis, dtype=dtype, acc_dtype=acc_dtype) + super().__init__( + aes.add, + axis=axis, + dtype=dtype, + acc_dtype=acc_dtype, + upcast_discrete_output=True, + ) def __str__(self): name = self.__class__.__name__ @@ -2493,6 +2483,12 @@ def R_op(self, inputs, eval_points): return [None] return self(*eval_points, return_list=True) + def clone(self, **kwargs): + axis = kwargs.get("axis", self.axis) + dtype = kwargs.get("dtype", self.dtype) + acc_dtype = kwargs.get("acc_dtype", self.acc_dtype) + return type(self)(axis=axis, dtype=dtype, acc_dtype=acc_dtype) + def sum(input, axis=None, dtype=None, keepdims=False, acc_dtype=None): """ @@ -2524,7 +2520,7 @@ def sum(input, axis=None, dtype=None, keepdims=False, acc_dtype=None): pprint.assign(Sum, printing.FunctionPrinter(["sum"], ["axis"])) -class Prod(CAReduceDtype): +class Prod(CAReduce): """ Multiplies all the values of a tensor along the specified axis(es). @@ -2534,19 +2530,20 @@ class Prod(CAReduceDtype): """ - __props__ = ("axis", "dtype", "acc_dtype") + __props__ = ("scalar_op", "axis", "dtype", "acc_dtype", "no_zeros_in_input") + nfunc_spec = ("prod", 1, 1) def __init__(self, axis=None, dtype=None, acc_dtype=None, no_zeros_in_input=False): - super().__init__(aes.mul, axis=axis, dtype=dtype, acc_dtype=acc_dtype) + super().__init__( + aes.mul, + axis=axis, + dtype=dtype, + acc_dtype=acc_dtype, + upcast_discrete_output=True, + ) self.no_zeros_in_input = no_zeros_in_input - def __setstate__(self, dct): - super().__setstate__(dct) - # Add default value to be able to reload old pickled objects. - if "no_zeros_in_input" not in dct: - self.no_zeros_in_input = False - def L_op(self, inp, out, grads): """ The grad of this Op could be very easy, if it is was not for the case @@ -2669,6 +2666,18 @@ def L_op(self, inp, out, grads): def c_code_cache_version(self): return (1,) + def clone(self, **kwargs): + axis = kwargs.get("axis", self.axis) + dtype = kwargs.get("dtype", self.dtype) + acc_dtype = kwargs.get("acc_dtype", self.acc_dtype) + no_zeros_in_input = kwargs.get("no_zeros_in_input", self.no_zeros_in_input) + return type(self)( + axis=axis, + dtype=dtype, + acc_dtype=acc_dtype, + no_zeros_in_input=no_zeros_in_input, + ) + def prod( input, @@ -2737,12 +2746,15 @@ def c_code_cache_version(self): mul_without_zeros = MulWithoutZeros(aes.upcast_out, name="mul_without_zeros") -class ProdWithoutZeros(CAReduceDtype): - - __props__ = ("axis", "dtype", "acc_dtype") - +class ProdWithoutZeros(CAReduce): def __init__(self, axis=None, dtype=None, acc_dtype=None): - super().__init__(mul_without_zeros, axis=axis, dtype=dtype, acc_dtype=acc_dtype) + super().__init__( + mul_without_zeros, + axis=axis, + dtype=dtype, + acc_dtype=acc_dtype, + upcast_discrete_output=True, + ) def grad(self, inp, grads): from aesara.gradient import grad_not_implemented @@ -2758,6 +2770,12 @@ def grad(self, inp, grads): ) return [a_grad] + def clone(self, **kwargs): + axis = kwargs.get("axis", self.axis) + dtype = kwargs.get("dtype", self.dtype) + acc_dtype = kwargs.get("acc_dtype", self.acc_dtype) + return type(self)(axis=axis, dtype=dtype, acc_dtype=acc_dtype) + def any(x, axis=None, keepdims=False): out = Any(axis)(x) diff --git a/aesara/tensor/nnet/abstract_conv.py b/aesara/tensor/nnet/abstract_conv.py index dbfc0b7b69..f27b85620e 100644 --- a/aesara/tensor/nnet/abstract_conv.py +++ b/aesara/tensor/nnet/abstract_conv.py @@ -2492,10 +2492,11 @@ def make_node(self, img, kern): "filters does not match given kshp.", ) - broadcastable = [img.broadcastable[0], kern.broadcastable[0]] + ( - [False] * self.convdim - ) - output = img.type.clone(shape=broadcastable)() + out_shape = ( + 1 if img.type.shape[0] == 1 else None, + 1 if kern.type.shape[0] == 1 else None, + ) + ((None,) * self.convdim) + output = img.type.clone(shape=out_shape)() return Apply(self, [img, kern], [output]) def perform(self, node, inp, out_): @@ -2817,17 +2818,18 @@ def make_node(self, img, topgrad, shape, add_assert_shape=True): shape = as_tensor_variable(shape) if self.unshared: - broadcastable = ( - [topgrad.broadcastable[1]] - + ([False] * self.convdim) - + [img.broadcastable[1]] - + ([False] * self.convdim) + out_shape = ( + (topgrad.type.shape[1],) + + ((None,) * self.convdim) + + (img.type.shape[1],) + + ((None,) * self.convdim) ) else: - broadcastable = [topgrad.broadcastable[1], img.broadcastable[1]] + ( - [False] * self.convdim + out_shape = (topgrad.type.shape[1], img.type.shape[1]) + ( + (None,) * self.convdim ) - output = img.type.clone(shape=broadcastable)() + out_shape = tuple(1 if s == 1 else None for s in out_shape) + output = img.type.clone(shape=out_shape)() return Apply(self, [img, topgrad, shape], [output]) def perform(self, node, inp, out_): @@ -3146,7 +3148,10 @@ def make_node(self, kern, topgrad, shape, add_assert_shape=True): kern = as_tensor_variable(kern) if not isinstance(topgrad, Variable): topgrad = as_tensor_variable(topgrad) - gtype = kern.type.clone(dtype=topgrad.dtype, shape=topgrad.broadcastable) + gtype = kern.type.clone( + dtype=topgrad.dtype, + shape=tuple(1 if s == 1 else None for s in topgrad.type.shape), + ) topgrad = gtype.filter_variable(topgrad) if self.unshared: @@ -3175,15 +3180,13 @@ def make_node(self, kern, topgrad, shape, add_assert_shape=True): shape = as_tensor_variable(shape) if self.num_groups > 1: - broadcastable = [topgrad.type.broadcastable[0], False] + ( - [False] * self.convdim - ) + out_shape = (topgrad.type.shape[0], None) + ((None,) * self.convdim) else: - broadcastable = [ - topgrad.type.broadcastable[0], - kern.type.broadcastable[-self.convdim - 1], - ] + ([False] * self.convdim) - output = kern.type.clone(shape=broadcastable)() + out_shape = (topgrad.type.shape[0], kern.type.shape[-self.convdim - 1]) + ( + (None,) * self.convdim + ) + out_shape = tuple(1 if s == 1 else None for s in out_shape) + output = kern.type.clone(shape=out_shape)() return Apply(self, [kern, topgrad, shape], [output]) def perform(self, node, inp, out_): diff --git a/aesara/tensor/nnet/basic.py b/aesara/tensor/nnet/basic.py index 888350877d..61eaf4584c 100644 --- a/aesara/tensor/nnet/basic.py +++ b/aesara/tensor/nnet/basic.py @@ -507,8 +507,8 @@ def make_node(self, x, b, y_idx): raise ValueError("y_idx must be 1-d tensor of [u]ints", y_idx.type) # TODO: Is this correct? It used to be y, not y_idx - nll = TensorType(x.type.dtype, y_idx.type.broadcastable).make_variable() - # nll = TensorType(x.dtype, y.broadcastable) + out_shape = tuple(1 if s == 1 else None for s in y_idx.type.shape) + nll = TensorType(x.type.dtype, shape=out_shape).make_variable() sm = x.type() am = y_idx.type() return Apply(self, [x, b, y_idx], [nll, sm, am]) @@ -986,7 +986,7 @@ def make_node(self, coding_dist, true_one_of_n): return Apply( self, [_coding_dist, _true_one_of_n], - [TensorType(dtype=_coding_dist.dtype, shape=[False])()], + [TensorType(dtype=_coding_dist.dtype, shape=(None,))()], ) def perform(self, node, inp, out): diff --git a/aesara/tensor/nnet/conv.py b/aesara/tensor/nnet/conv.py index 0dbb0240c1..b0c391cef4 100644 --- a/aesara/tensor/nnet/conv.py +++ b/aesara/tensor/nnet/conv.py @@ -739,10 +739,16 @@ def make_node(self, inputs, kerns): "The image and the kernel must have the same type." "inputs({_inputs.dtype}), kerns({_kerns.dtype})" ) - bcastable23 = [self.outshp[0] == 1, self.outshp[1] == 1] + out_shape = ( + _inputs.type.shape[0], + _kerns.type.shape[0], + self.outshp[0], + self.outshp[1], + ) + out_shape = tuple(1 if s == 1 else None for s in out_shape) output = tensor( dtype=_inputs.type.dtype, - shape=[_inputs.broadcastable[0], _kerns.broadcastable[0]] + bcastable23, + shape=out_shape, ) return Apply(self, [_inputs, _kerns], [output]) diff --git a/aesara/tensor/nnet/corr.py b/aesara/tensor/nnet/corr.py index c6758a11e1..e89054d29f 100644 --- a/aesara/tensor/nnet/corr.py +++ b/aesara/tensor/nnet/corr.py @@ -692,14 +692,14 @@ def make_node(self, img, kern): if kern.type.ndim != 4: raise TypeError("kern must be 4D tensor") - broadcastable = [ - img.type.broadcastable[0], - kern.type.broadcastable[0], - False, - False, - ] + out_shape = tuple( + 1 if img.type.shape[0] == 1 else None, + 1 if kern.type.shape[0] == 1 else None, + None, + None, + ) dtype = img.type.dtype - return Apply(self, [img, kern], [TensorType(dtype, broadcastable)()]) + return Apply(self, [img, kern], [TensorType(dtype, shape=out_shape)()]) def infer_shape(self, fgraph, node, input_shape): imshp = input_shape[0] @@ -770,24 +770,25 @@ def make_node(self, img, topgrad, shape=None): ] if self.unshared is True: - broadcastable = [ - topgrad.type.broadcastable[1], - False, - False, - img.type.broadcastable[1], - False, - False, + out_shape = [ + 1 if topgrad.type.shape[1] == 1 else None, + None, + None, + 1 if img.type.shape[1] == 1 else None, + None, + None, ] else: - broadcastable = [ - topgrad.type.broadcastable[1], - img.type.broadcastable[1], - False, - False, + out_shape = [ + 1 if topgrad.type.shape[1] == 1 else None, + 1 if img.type.shape[1] == 1 else None, + None, + None, ] + dtype = img.type.dtype return Apply( - self, [img, topgrad] + height_width, [TensorType(dtype, broadcastable)()] + self, [img, topgrad] + height_width, [TensorType(dtype, shape=out_shape)()] ) def infer_shape(self, fgraph, node, input_shape): @@ -905,17 +906,17 @@ def make_node(self, kern, topgrad, shape=None): ] if self.num_groups > 1: - broadcastable = [topgrad.type.broadcastable[0], False, False, False] + out_shape = [1 if topgrad.type.shape[0] == 1 else None, None, None, None] else: - broadcastable = [ - topgrad.type.broadcastable[0], - kern.type.broadcastable[-3], - False, - False, + out_shape = [ + 1 if topgrad.type.shape[0] == 1 else None, + 1 if kern.type.shape[-3] == 1 else None, + None, + None, ] dtype = kern.type.dtype return Apply( - self, [kern, topgrad] + height_width, [TensorType(dtype, broadcastable)()] + self, [kern, topgrad] + height_width, [TensorType(dtype, shape=out_shape)()] ) def infer_shape(self, fgraph, node, input_shape): diff --git a/aesara/tensor/nnet/corr3d.py b/aesara/tensor/nnet/corr3d.py index dc2585b132..fa9d146777 100644 --- a/aesara/tensor/nnet/corr3d.py +++ b/aesara/tensor/nnet/corr3d.py @@ -631,15 +631,15 @@ def make_node(self, img, kern): if kern.type.ndim != 5: raise TypeError("kern must be 5D tensor") - broadcastable = [ - img.type.broadcastable[0], - kern.type.broadcastable[0], - False, - False, - False, + out_shape = [ + 1 if img.type.shape[0] == 1 else None, + 1 if kern.type.shape[0] == 1 else None, + None, + None, + None, ] dtype = img.type.dtype - return Apply(self, [img, kern], [TensorType(dtype, broadcastable)()]) + return Apply(self, [img, kern], [TensorType(dtype, shape=out_shape)()]) def infer_shape(self, fgraph, node, input_shape): imshp = input_shape[0] @@ -708,18 +708,18 @@ def make_node(self, img, topgrad, shape=None): as_tensor_variable(shape[2]).astype("int64"), ] - broadcastable = [ - topgrad.type.broadcastable[1], - img.type.broadcastable[1], - False, - False, - False, + out_shape = [ + 1 if topgrad.type.shape[1] == 1 else None, + 1 if img.type.shape[1] == 1 else None, + None, + None, + None, ] dtype = img.type.dtype return Apply( self, [img, topgrad] + height_width_depth, - [TensorType(dtype, broadcastable)()], + [TensorType(dtype, shape=out_shape)()], ) def infer_shape(self, fgraph, node, input_shape): @@ -829,11 +829,17 @@ def make_node(self, kern, topgrad, shape=None): ] if self.num_groups > 1: - broadcastable = [topgrad.type.broadcastable[0], False, False, False, False] + out_shape = [ + 1 if topgrad.type.shape[0] == 1 else None, + None, + None, + None, + None, + ] else: - broadcastable = [ - topgrad.type.broadcastable[0], - kern.type.broadcastable[1], + out_shape = [ + 1 if topgrad.type.shape[0] == 1 else None, + 1 if kern.type.shape[1] == 1 else None, False, False, False, @@ -842,7 +848,7 @@ def make_node(self, kern, topgrad, shape=None): return Apply( self, [kern, topgrad] + height_width_depth, - [TensorType(dtype, broadcastable)()], + [TensorType(dtype, shape=out_shape)()], ) def infer_shape(self, fgraph, node, input_shape): diff --git a/aesara/tensor/random/var.py b/aesara/tensor/random/var.py index 23898c8876..eceecbaa01 100644 --- a/aesara/tensor/random/var.py +++ b/aesara/tensor/random/var.py @@ -18,19 +18,18 @@ def __str__(self): ) -@shared_constructor +@shared_constructor.register(np.random.RandomState) +@shared_constructor.register(np.random.Generator) def randomgen_constructor( value, name=None, strict=False, allow_downcast=None, borrow=False ): - r"""`SharedVariable` Constructor for NumPy's `Generator` and/or `RandomState`.""" + r"""`SharedVariable` constructor for NumPy's `Generator` and/or `RandomState`.""" if isinstance(value, np.random.RandomState): rng_sv_type = RandomStateSharedVariable rng_type = random_state_type elif isinstance(value, np.random.Generator): rng_sv_type = RandomGeneratorSharedVariable rng_type = random_generator_type - else: - raise TypeError() if not borrow: value = copy.deepcopy(value) @@ -38,7 +37,7 @@ def randomgen_constructor( return rng_sv_type( type=rng_type, value=value, - name=name, strict=strict, allow_downcast=allow_downcast, + name=name, ) diff --git a/aesara/tensor/rewriting/basic.py b/aesara/tensor/rewriting/basic.py index 7cb095a346..831324b27b 100644 --- a/aesara/tensor/rewriting/basic.py +++ b/aesara/tensor/rewriting/basic.py @@ -1155,7 +1155,7 @@ def constant_folding(fgraph, node): if isinstance(output.type, DenseTensorType): output_type = TensorType( output.type.dtype, - tuple(s == 1 for s in data.shape), + shape=data.shape, name=output.type.name, ) else: diff --git a/aesara/tensor/rewriting/elemwise.py b/aesara/tensor/rewriting/elemwise.py index e80e871370..91123de506 100644 --- a/aesara/tensor/rewriting/elemwise.py +++ b/aesara/tensor/rewriting/elemwise.py @@ -7,15 +7,21 @@ import aesara import aesara.scalar.basic as aes from aesara import compile +from aesara.compile.mode import get_target_language from aesara.configdefaults import config from aesara.graph.basic import Apply, Constant, io_toposort from aesara.graph.features import ReplaceValidate from aesara.graph.op import compute_test_value, get_test_value -from aesara.graph.rewriting.basic import GraphRewriter, copy_stack_trace, node_rewriter +from aesara.graph.rewriting.basic import ( + GraphRewriter, + copy_stack_trace, + in2out, + node_rewriter, +) from aesara.graph.rewriting.db import SequenceDB from aesara.graph.utils import InconsistencyError, MethodNotDefined, TestValueError from aesara.tensor.basic import MakeVector, alloc, cast, get_scalar_constant_value -from aesara.tensor.elemwise import DimShuffle, Elemwise +from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise from aesara.tensor.exceptions import NotScalarConstantError from aesara.tensor.rewriting.basic import register_canonicalize, register_specialize from aesara.tensor.shape import shape_padleft @@ -833,9 +839,9 @@ def apply(self, fgraph): callbacks_before = fgraph.execute_callbacks_times.copy() callback_before = fgraph.execute_callbacks_time while did_something: - t0 = time.time() + t0 = time.perf_counter() nodelist = list(fgraph.toposort()) - time_toposort += time.time() - t0 + time_toposort += time.perf_counter() - t0 nodelist.reverse() did_something = False for node in nodelist: @@ -944,3 +950,82 @@ def local_useless_composite(fgraph, node): c = aes.Composite(inputs=comp.inputs, outputs=new_outputs) e = Elemwise(scalar_op=c)(*node.inputs, return_list=True) return dict(zip([node.outputs[i] for i in idx], e)) + + +@node_rewriter([CAReduce]) +def local_careduce_fusion(fgraph, node): + """Fuse a `CAReduce` applied to an `Elemwise`.""" + + (car_input,) = node.inputs + elm_node = car_input.owner + + if elm_node is None or not isinstance(elm_node.op, Elemwise): + return False + + elm_inputs = elm_node.inputs + elm_outputs = elm_node.outputs + + if len(elm_inputs) > 1 or len(elm_outputs) > 1: + # TODO: Implement the multiple inputs case + return False + + if len(fgraph.clients[elm_outputs[0]]) > 1: + return False + + # Don't form the fusion when the target language is Python + elm_scalar_op = elm_node.op.scalar_op + car_scalar_op = node.op.scalar_op + + if get_target_language() == ("py",): + return False + + try: + elm_scalar_op.c_code( + elm_node, + "test_presence_of_c_code", + ["x" for x in elm_inputs], + ["z" for z in elm_outputs], + {"fail": "%(fail)s"}, + ) + + car_scalar_op.c_code( + node, + "test_presence_of_c_code", + ["x" for x in node.inputs], + ["z" for z in node.outputs], + {"fail": "%(fail)s"}, + ) + except (NotImplementedError, MethodNotDefined): + return False + + car_axis = node.op.axis + + scalar_elm_inputs = [ + aes.get_scalar_type(inp.type.dtype).make_variable() for inp in elm_inputs + ] + elm_output = elm_scalar_op(*scalar_elm_inputs) + # This input represents the previous value in the `CAReduce` binary reduction + carried_car_input = elm_output.type() + scalar_fused_outputs = [car_scalar_op(carried_car_input, elm_output)] + + fused_scalar_op = aes.Composite( + inputs=[carried_car_input] + scalar_elm_inputs, outputs=scalar_fused_outputs + ) + + # The fused `Op` needs to look and behave like a `BinaryScalarOp` + # TODO: Generate a new `type` and make this relationship official? + fused_scalar_op.identity = car_scalar_op.identity + fused_scalar_op.nin = 2 + fused_scalar_op.nout = 1 + + new_car_op = CAReduce(fused_scalar_op, car_axis) + + return [new_car_op(*elm_inputs)] + + +compile.optdb.register( # type: ignore + "local_careduce_fusion", + in2out(local_careduce_fusion), + "fusion", + position=49, +) diff --git a/aesara/tensor/rewriting/shape.py b/aesara/tensor/rewriting/shape.py index a3b30177f0..87d77b1322 100644 --- a/aesara/tensor/rewriting/shape.py +++ b/aesara/tensor/rewriting/shape.py @@ -364,8 +364,8 @@ def set_shape(self, r, s, override=False): else: shape_vars.append(self.unpack(s[i], r)) assert all( - not hasattr(r.type, "broadcastable") - or not r.type.broadcastable[i] + not hasattr(r.type, "shape") + or r.type.shape[i] != 1 or self.lscalar_one.equals(shape_vars[i]) or self.lscalar_one.equals(extract_constant(shape_vars[i])) for i in range(r.type.ndim) @@ -447,9 +447,9 @@ def update_shape(self, r, other_r): merged_shape.append(other_shape[i]) assert all( ( - not hasattr(r.type, "broadcastable") - or not r.type.broadcastable[i] - and not other_r.type.broadcastable[i] + not hasattr(r.type, "shape") + or r.type.shape[i] != 1 + and other_r.type.shape[i] != 1 ) or self.lscalar_one.equals(merged_shape[i]) or self.lscalar_one.equals( @@ -474,8 +474,8 @@ def set_shape_i(self, r, i, s_i): else: new_shape.append(s_j) assert all( - not hasattr(r.type, "broadcastable") - or not r.type.broadcastable[idx] + not hasattr(r.type, "shape") + or r.type.shape[idx] != 1 or self.lscalar_one.equals(new_shape[idx]) or self.lscalar_one.equals(extract_constant(new_shape[idx])) for idx in range(r.type.ndim) @@ -781,7 +781,11 @@ def f(fgraph, node): # We should try to figure out why we lost the information about this # constant value... but in the meantime, better not apply this # rewrite. - if rval.broadcastable == node.outputs[0].broadcastable: + if rval.type.ndim == node.outputs[0].type.ndim and all( + s1 == s1 + for s1, s2 in zip(rval.type.shape, node.outputs[0].type.shape) + if s1 == 1 or s2 == 1 + ): return [rval] else: return False @@ -797,27 +801,31 @@ def f(fgraph, node): @register_stabilize @node_rewriter([Reshape]) def local_useless_reshape(fgraph, node): - """ - Remove two kinds of useless reshape. + """Remove two kinds of useless `Reshape`. - Remove Reshape when both the input and output have a single dimension. - Remove Reshape when reshaping to the shape of the input. + - Remove `Reshape` when both the input and output have a single dimension. + - Remove `Reshape` when reshaping to the shape of the input. """ - op = node.op - if not isinstance(op, Reshape): - return False - inp = node.inputs[0] output = node.outputs[0] output_shape = node.inputs[1] - if inp.ndim != output.ndim: + if inp.type.ndim != output.type.ndim: return False # Simple case: both input and output have a single dimension. - # This could hide errors if the user provides inconsistent shapes. - if inp.ndim == 1 and output.ndim == 1 and inp.broadcastable == output.broadcastable: + # TODO FIXME XXX: This could hide errors if the user provides inconsistent + # shapes. + if ( + inp.type.ndim == 1 + and output.type.ndim == 1 + and all( + s1 == s2 + for s1, s2 in zip(inp.type.shape, output.type.shape) + if s1 == 1 or s2 == 1 + ) + ): return [inp] # Second case: all the shapes match the input shape @@ -835,8 +843,8 @@ def local_useless_reshape(fgraph, node): shape_feature = getattr(fgraph, "shape_feature", None) nb_m1 = 0 - shape_match = [False] * inp.ndim - for dim in range(inp.ndim): + shape_match = [False] * inp.type.ndim + for dim in range(inp.type.ndim): outshp_i = output_shape_is[dim] # Match Shape_i{dim}(input) if ( @@ -862,9 +870,9 @@ def local_useless_reshape(fgraph, node): shape_match[dim] = True continue - # Match 1 if input.broadcastable[dim] is True + # Match 1 if input.type.shape[dim] == 1 cst_outshp_i = extract_constant(outshp_i, only_process_constants=1) - if inp.broadcastable[dim] and cst_outshp_i == 1: + if inp.type.shape[dim] == 1 and cst_outshp_i == 1: shape_match[dim] = True continue @@ -895,22 +903,18 @@ def local_useless_reshape(fgraph, node): @register_canonicalize @node_rewriter([Reshape]) def local_reshape_to_dimshuffle(fgraph, node): - """ - Broadcastable dimensions in Reshape are replaced with dimshuffle. + r"""Replace broadcastable dimensions in `Reshape` nodes with `DimShuffle`\s. - The goal is to avoid using reshape to add or remove broadcastable - dimensions, but use dimshuffle instead, so dimshuffles can cancel out - or be removed later on. + The goal is to avoid using `Reshape` to add or remove broadcastable + dimensions, and to use `DimShuffle` instead, since `DimShuffle`\s can + cancel out and/or be removed later on. For example: - - reshape(x, (1, n)) --> dimshuffle{x,0}(reshape(x, (n,)) + - reshape(x, (1, n)) -> DimShuffle{x,0}(Reshape(x, (n,)) - reshape(x, (1, m, 1, n, 1, 1)) - --> dimshuffle{x,0,x,1,x,x}(reshape(x, (m, n))) + -> DimShuffle{x,0,x,1,x,x}(Reshape(x, (m, n))) """ op = node.op - if not isinstance(op, Reshape): - return False - inp = node.inputs[0] output = node.outputs[0] output_shape = node.inputs[1] @@ -931,10 +935,15 @@ def local_reshape_to_dimshuffle(fgraph, node): dimshuffle_new_order.append(index) new_output_shape.append(dim) index = index + 1 - if index != output.ndim: + + if index != output.type.ndim: inner = op.__class__(len(new_output_shape))(inp, new_output_shape) copy_stack_trace(output, inner) - new_node = [DimShuffle(inner.type.broadcastable, dimshuffle_new_order)(inner)] + new_node = [ + DimShuffle(tuple(s == 1 for s in inner.type.shape), dimshuffle_new_order)( + inner + ) + ] copy_stack_trace(output, new_node) return new_node @@ -1023,8 +1032,8 @@ def local_Shape_of_SpecifyShape(fgraph, node): @register_useless @register_canonicalize @node_rewriter([Shape_i]) -def local_Shape_i_of_broadcastable(fgraph, node): - """Replace ``shape_i(x, i)`` with ``1`` when ``x.broadcastable[i]`` is ``True``.""" +def local_Shape_i_ground(fgraph, node): + """Replace ``shape_i(x, i)`` with ``s`` when ``x.type.shape[i] == s``.""" if not isinstance(node.op, Shape_i): return False @@ -1034,8 +1043,9 @@ def local_Shape_i_of_broadcastable(fgraph, node): if not isinstance(shape_arg.type, TensorType): return False - if shape_arg.broadcastable[node.op.i]: - return [as_tensor_variable(1, dtype=np.int64)] + s_val = shape_arg.type.shape[node.op.i] + if s_val is not None: + return [as_tensor_variable(s_val, dtype=np.int64)] @register_specialize @@ -1098,10 +1108,9 @@ def local_useless_dimshuffle_in_reshape(fgraph, node): new_order = node.inputs[0].owner.op.new_order inp = node.inputs[0].owner.inputs[0] - broadcastables = node.inputs[0].broadcastable new_order_of_nonbroadcast = [] - for i, bd in zip(new_order, broadcastables): - if not bd: + for i, s in zip(new_order, node.inputs[0].type.shape): + if s != 1: new_order_of_nonbroadcast.append(i) no_change_in_order = all( new_order_of_nonbroadcast[i] <= new_order_of_nonbroadcast[i + 1] @@ -1125,7 +1134,11 @@ def local_useless_unbroadcast(fgraph, node): """ if isinstance(node.op, Unbroadcast): x = node.inputs[0] - if x.broadcastable == node.outputs[0].broadcastable: + if x.type.ndim == node.outputs[0].type.ndim and all( + s1 == s2 + for s1, s2 in zip(x.type.shape, node.outputs[0].type.shape) + if s1 == 1 or s2 == 1 + ): # No broadcastable flag was modified # No need to copy over stack trace, # because x should already have a stack trace. diff --git a/aesara/tensor/shape.py b/aesara/tensor/shape.py index f6ed3590c5..bc4e4cebb8 100644 --- a/aesara/tensor/shape.py +++ b/aesara/tensor/shape.py @@ -8,6 +8,7 @@ import aesara from aesara.gradient import DisconnectedType from aesara.graph.basic import Apply, Variable +from aesara.graph.type import HasShape from aesara.link.c.op import COp from aesara.link.c.params_type import ParamsType from aesara.misc.safe_asarray import _asarray @@ -21,6 +22,9 @@ from aesara.tensor.var import TensorConstant, TensorVariable +ShapeValueType = Union[None, np.integer, int, Variable] + + def register_shape_c_code(type, code, version=()): """ Tell Shape Op how to generate C code for an Aesara Type. @@ -158,18 +162,28 @@ def _get_vector_length_Shape(op, var): def shape_tuple(x: TensorVariable) -> Tuple[Variable, ...]: - """Get a tuple of symbolic shape values. + r"""Get a tuple of symbolic shape values. + + This will return `ScalarConstant`\s for static shape values. - This will return a `ScalarConstant` with the value ``1`` wherever - broadcastable is ``True``. """ - one_at = aesara.scalar.ScalarConstant(aesara.scalar.int64, 1) - return tuple( - one_at if getattr(sh, "value", sh) == 1 or bcast else sh - for sh, bcast in zip( - shape(x), getattr(x, "broadcastable", (False,) * x.type.ndim) - ) - ) + if not isinstance(x.type, HasShape): + # We assume/call it a scalar + return () + + res = () + symbolic_shape = shape(x) + static_shape = x.type.shape + for i in range(x.type.ndim): + shape_val = static_shape[i] + + if shape_val is not None: + # TODO: Why not use uint64? + res += (aesara.scalar.ScalarConstant(aesara.scalar.int64, shape_val),) + else: + res += (symbolic_shape[i],) + + return res class Shape_i(COp): @@ -530,13 +544,12 @@ def c_code_cache_version(self): def specify_shape( x: Union[np.ndarray, Number, Variable], - shape: Union[ - int, List[Union[int, Variable]], Tuple[Union[int, Variable]], Variable - ], + shape: Union[ShapeValueType, List[ShapeValueType], Tuple[ShapeValueType]], ): """Specify a fixed shape for a `Variable`. - If a dimension's shape value is ``None``, the size of that dimension is not considered fixed/static at runtime. + If a dimension's shape value is ``None``, the size of that dimension is not + considered fixed/static at runtime. """ if not isinstance(shape, (tuple, list)): @@ -608,28 +621,27 @@ def make_node(self, x, shp): # except when shp is constant and empty # (in this case, shp.dtype does not matter anymore). raise TypeError(f"Shape must be integers; got {shp.dtype}") + assert shp.ndim == 1 + if isinstance(shp, TensorConstant): - bcast = [s == 1 for s in shp.data] - return Apply(self, [x, shp], [tensor(x.type.dtype, bcast)]) + out_shape = tuple(int(s) if s >= 0 else None for s in shp.data) else: - bcasts = [False] * self.ndim + out_shape = [None] * self.ndim shp_list = shp_orig if hasattr(shp_orig, "ndim") and shp_orig.ndim == 0: shp_list = [shp_orig] for index in range(self.ndim): y = shp_list[index] y = at.as_tensor_variable(y) - # Try to see if we can infer that y has a constant value of 1. - # If so, that dimension should be broadcastable. try: - bcasts[index] = ( - hasattr(y, "get_scalar_constant_value") - and y.get_scalar_constant_value() == 1 - ) + s_val = at.get_scalar_constant_value(y).item() + if s_val >= 0: + out_shape[index] = s_val except NotScalarConstantError: pass - return Apply(self, [x, shp], [tensor(x.type.dtype, bcasts)]) + + return Apply(self, [x, shp], [tensor(x.type.dtype, shape=out_shape)]) def perform(self, node, inp, out_, params): x, shp = inp @@ -769,7 +781,7 @@ def c_code(self, node, name, inputs, outputs, sub): def reshape(x, newshape, ndim=None): if ndim is None: newshape = at.as_tensor_variable(newshape) - if newshape.ndim != 1: + if newshape.type.ndim != 1: raise TypeError( "New shape in reshape must be a vector or a list/tuple of" f" scalar. Got {newshape} after conversion to a vector." @@ -894,8 +906,7 @@ def shape_padaxis(t, axis): def specify_broadcastable(x, *axes): - """ - Specify the input as being broadcastable in the specified axes. + """Specify the input as being broadcastable in the specified axes. For example, specify_broadcastable(x, 0) will make the first dimension of x broadcastable. When performing the function, if the length of @@ -924,7 +935,7 @@ def specify_broadcastable(x, *axes): if max(axes) >= x.type.ndim: raise ValueError("Trying to specify broadcastable of non-existent dimension") - shape_info = [1 if i in axes else None for i in range(len(x.type.shape))] + shape_info = [1 if i in axes else s for i, s in enumerate(x.type.shape)] return specify_shape(x, shape_info) diff --git a/aesara/tensor/sharedvar.py b/aesara/tensor/sharedvar.py index 76d9f3148b..86de487247 100644 --- a/aesara/tensor/sharedvar.py +++ b/aesara/tensor/sharedvar.py @@ -1,4 +1,3 @@ -import traceback import warnings import numpy as np @@ -20,9 +19,25 @@ def load_shared_variable(val): return tensor_constructor(val) -# _tensor_py_operators is first to have its version of __{gt,ge,lt,le}__ class TensorSharedVariable(_tensor_py_operators, SharedVariable): - pass + def zero(self, borrow: bool = False): + r"""Set the values of a shared variable to 0. + + Parameters + ---------- + borrow + ``True`` to modify the value of a shared variable directly by using + its previous value. Potentially this can cause problems regarding + to the aliased memory. + + Changes done with this function will be visible to all functions using + this `SharedVariable`. + + """ + if borrow: + self.container.value[...] = 0 + else: + self.container.value = 0 * self.container.value @_get_vector_length.register(TensorSharedVariable) @@ -30,7 +45,7 @@ def _get_vector_length_TensorSharedVariable(var_inst, var): return len(var.get_value(borrow=True)) -@shared_constructor +@shared_constructor.register(np.ndarray) def tensor_constructor( value, name=None, @@ -41,8 +56,7 @@ def tensor_constructor( target="cpu", broadcastable=None, ): - """ - SharedVariable Constructor for TensorType. + r"""`SharedVariable` constructor for `TensorType`\s. Notes ----- @@ -61,41 +75,36 @@ def tensor_constructor( if target != "cpu": raise TypeError("not for cpu") - if not isinstance(value, np.ndarray): - raise TypeError() - - # if no shape is given, then the default is to assume that - # the value might be resized in any dimension in the future. - # + # If no shape is given, then the default is to assume that the value might + # be resized in any dimension in the future. if shape is None: - shape = (False,) * len(value.shape) + shape = (None,) * value.ndim + type = TensorType(value.dtype, shape=shape) + return TensorSharedVariable( type=type, value=np.array(value, copy=(not borrow)), - name=name, strict=strict, allow_downcast=allow_downcast, + name=name, ) -# TensorSharedVariable brings in the tensor operators, is not ideal, but works -# as long as we don't do purely scalar-scalar operations -# _tensor_py_operators is first to have its version of __{gt,ge,lt,le}__ -# -# N.B. THERE IS ANOTHER CLASS CALLED ScalarSharedVariable in the -# aesara.scalar.sharedvar file. It is not registered as a shared_constructor, -# this one is. -class ScalarSharedVariable(_tensor_py_operators, SharedVariable): +class ScalarSharedVariable(TensorSharedVariable): pass -@shared_constructor +@shared_constructor.register(np.number) +@shared_constructor.register(float) +@shared_constructor.register(int) +@shared_constructor.register(complex) def scalar_constructor( value, name=None, strict=False, allow_downcast=None, borrow=False, target="cpu" ): - """ - SharedVariable constructor for scalar values. Default: int64 or float64. + """`SharedVariable` constructor for scalar values. + + Default: int64 or float64. Notes ----- @@ -109,28 +118,22 @@ def scalar_constructor( if target != "cpu": raise TypeError("not for cpu") - if not isinstance(value, (np.number, float, int, complex)): - raise TypeError() try: dtype = value.dtype - except Exception: + except AttributeError: dtype = np.asarray(value).dtype dtype = str(dtype) value = _asarray(value, dtype=dtype) - tensor_type = TensorType(dtype=str(value.dtype), shape=[]) + tensor_type = TensorType(dtype=str(value.dtype), shape=()) - try: - # Do not pass the dtype to asarray because we want this to fail if - # strict is True and the types do not match. - rval = ScalarSharedVariable( - type=tensor_type, - value=np.array(value, copy=True), - name=name, - strict=strict, - allow_downcast=allow_downcast, - ) - return rval - except Exception: - traceback.print_exc() - raise + # Do not pass the dtype to asarray because we want this to fail if + # strict is True and the types do not match. + rval = ScalarSharedVariable( + type=tensor_type, + value=np.array(value, copy=True), + name=name, + strict=strict, + allow_downcast=allow_downcast, + ) + return rval diff --git a/aesara/tensor/signal/pool.py b/aesara/tensor/signal/pool.py index bb46501244..543896a022 100755 --- a/aesara/tensor/signal/pool.py +++ b/aesara/tensor/signal/pool.py @@ -542,8 +542,10 @@ def make_node(self, x, ws, stride=None, pad=None): if pad.dtype not in int_dtypes: raise TypeError("Padding parameters must be ints.") # If the input shape are broadcastable we can have 0 in the output shape - broad = x.broadcastable[:-nd] + (False,) * nd - out = TensorType(x.dtype, broad) + out_shape = tuple( + 1 if s == 1 else None for s in x.type.shape[:-nd] + (None,) * nd + ) + out = TensorType(x.dtype, shape=out_shape) return Apply(self, [x, ws, stride, pad], [out()]) def perform(self, node, inp, out, params): @@ -2208,8 +2210,10 @@ def make_node(self, x, eval_point, ws, stride=None, pad=None): if not pad.dtype.startswith("int"): raise TypeError("Padding parameters must be ints.") # If the input shape are broadcastable we can have 0 in the output shape - broad = x.broadcastable[:-nd] + (False,) * nd - out = TensorType(eval_point.dtype, broad) + out_shape = tuple( + 1 if s == 1 else None for s in x.type.shape[:-nd] + (None,) * nd + ) + out = TensorType(eval_point.dtype, shape=out_shape) return Apply(self, [x, eval_point, ws, stride, pad], [out()]) def perform(self, node, inp, out, params): diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index 8bcd353a72..f8b265d8d6 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -1,9 +1,10 @@ import logging import warnings -from typing import Union +from typing import TYPE_CHECKING, Union import numpy as np import scipy.linalg +from typing_extensions import Literal import aesara.tensor from aesara.graph.basic import Apply @@ -11,10 +12,15 @@ from aesara.tensor import as_tensor_variable from aesara.tensor import basic as at from aesara.tensor import math as atm +from aesara.tensor.shape import reshape from aesara.tensor.type import matrix, tensor, vector from aesara.tensor.var import TensorVariable +if TYPE_CHECKING: + from aesara.tensor import TensorLike + + logger = logging.getLogger(__name__) @@ -122,73 +128,6 @@ def conjugate_solve_triangular(outer, inner): cholesky = Cholesky() -class CholeskyGrad(Op): - """""" - - __props__ = ("lower", "destructive") - - def __init__(self, lower=True): - self.lower = lower - self.destructive = False - - def make_node(self, x, l, dz): - x = as_tensor_variable(x) - l = as_tensor_variable(l) - dz = as_tensor_variable(dz) - assert x.ndim == 2 - assert l.ndim == 2 - assert dz.ndim == 2 - assert ( - l.owner.op.lower == self.lower - ), "lower/upper mismatch between Cholesky op and CholeskyGrad op" - return Apply(self, [x, l, dz], [x.type()]) - - def perform(self, node, inputs, outputs): - """ - Implements the "reverse-mode" gradient [#]_ for the - Cholesky factorization of a positive-definite matrix. - - References - ---------- - .. [#] S. P. Smith. "Differentiation of the Cholesky Algorithm". - Journal of Computational and Graphical Statistics, - Vol. 4, No. 2 (Jun.,1995), pp. 134-147 - http://www.jstor.org/stable/1390762 - - """ - x = inputs[0] - L = inputs[1] - dz = inputs[2] - dx = outputs[0] - N = x.shape[0] - if self.lower: - F = np.tril(dz) - for k in range(N - 1, -1, -1): - for j in range(k + 1, N): - for i in range(j, N): - F[i, k] -= F[i, j] * L[j, k] - F[j, k] -= F[i, j] * L[i, k] - for j in range(k + 1, N): - F[j, k] /= L[k, k] - F[k, k] -= L[j, k] * F[j, k] - F[k, k] /= 2 * L[k, k] - else: - F = np.triu(dz) - for k in range(N - 1, -1, -1): - for j in range(k + 1, N): - for i in range(j, N): - F[k, i] -= F[j, i] * L[k, j] - F[k, j] -= F[j, i] * L[k, i] - for j in range(k + 1, N): - F[k, j] /= L[k, k] - F[k, k] -= L[k, j] * F[k, j] - F[k, k] /= 2 * L[k, k] - dx[0] = F - - def infer_shape(self, fgraph, node, shapes): - return [shapes[0]] - - class CholeskySolve(Op): __props__ = ("lower", "check_finite") @@ -215,7 +154,7 @@ def make_node(self, C, b): o_dtype = scipy.linalg.solve( np.eye(1).astype(C.dtype), np.eye(1).astype(b.dtype) ).dtype - x = tensor(shape=b.broadcastable, dtype=o_dtype) + x = tensor(dtype=o_dtype, shape=b.type.shape) return Apply(self, [C, b], [x]) def perform(self, node, inputs, output_storage): @@ -292,7 +231,7 @@ def make_node(self, A, b): o_dtype = scipy.linalg.solve( np.eye(1).astype(A.dtype), np.eye(1).astype(b.dtype) ).dtype - x = tensor(shape=b.broadcastable, dtype=o_dtype) + x = tensor(dtype=o_dtype, shape=b.type.shape) return Apply(self, [A, b], [x]) def infer_shape(self, fgraph, node, shapes): @@ -735,6 +674,159 @@ def perform(self, node, inputs, outputs): expm = Expm() + +class SolveContinuousLyapunov(Op): + __props__ = () + + def make_node(self, A, B): + A = as_tensor_variable(A) + B = as_tensor_variable(B) + + out_dtype = aesara.scalar.upcast(A.dtype, B.dtype) + X = aesara.tensor.matrix(dtype=out_dtype) + + return aesara.graph.basic.Apply(self, [A, B], [X]) + + def perform(self, node, inputs, output_storage): + (A, B) = inputs + X = output_storage[0] + + X[0] = scipy.linalg.solve_continuous_lyapunov(A, B) + + def infer_shape(self, fgraph, node, shapes): + return [shapes[0]] + + def grad(self, inputs, output_grads): + # Gradient computations come from Kao and Hennequin (2020), https://arxiv.org/pdf/2011.11430.pdf + # Note that they write the equation as AX + XA.H + Q = 0, while scipy uses AX + XA^H = Q, + # so minor adjustments need to be made. + A, Q = inputs + (dX,) = output_grads + + X = self(A, Q) + S = self(A.conj().T, -dX) # Eq 31, adjusted + + A_bar = S.dot(X.conj().T) + S.conj().T.dot(X) + Q_bar = -S # Eq 29, adjusted + + return [A_bar, Q_bar] + + +class BilinearSolveDiscreteLyapunov(Op): + def make_node(self, A, B): + A = as_tensor_variable(A) + B = as_tensor_variable(B) + + out_dtype = aesara.scalar.upcast(A.dtype, B.dtype) + X = aesara.tensor.matrix(dtype=out_dtype) + + return aesara.graph.basic.Apply(self, [A, B], [X]) + + def perform(self, node, inputs, output_storage): + (A, B) = inputs + X = output_storage[0] + + X[0] = scipy.linalg.solve_discrete_lyapunov(A, B, method="bilinear") + + def infer_shape(self, fgraph, node, shapes): + return [shapes[0]] + + def grad(self, inputs, output_grads): + # Gradient computations come from Kao and Hennequin (2020), https://arxiv.org/pdf/2011.11430.pdf + A, Q = inputs + (dX,) = output_grads + + X = self(A, Q) + + # Eq 41, note that it is not written as a proper Lyapunov equation + S = self(A.conj().T, dX) + + A_bar = aesara.tensor.linalg.matrix_dot( + S, A, X.conj().T + ) + aesara.tensor.linalg.matrix_dot(S.conj().T, A, X) + Q_bar = S + return [A_bar, Q_bar] + + +_solve_continuous_lyapunov = SolveContinuousLyapunov() +_solve_bilinear_direct_lyapunov = BilinearSolveDiscreteLyapunov() + + +def iscomplexobj(x): + type_ = x.type + dtype = type_.dtype + return "complex" in dtype + + +def _direct_solve_discrete_lyapunov(A: "TensorLike", Q: "TensorLike") -> TensorVariable: + A_ = as_tensor_variable(A) + Q_ = as_tensor_variable(Q) + + if "complex" in A_.type.dtype: + AA = kron(A_, A_.conj()) + else: + AA = kron(A_, A_) + + X = solve(at.eye(AA.shape[0]) - AA, Q_.ravel()) + return reshape(X, Q_.shape) + + +def solve_discrete_lyapunov( + A: "TensorLike", Q: "TensorLike", method: Literal["direct", "bilinear"] = "direct" +) -> TensorVariable: + """Solve the discrete Lyapunov equation :math:`A X A^H - X = Q`. + + Parameters + ---------- + A + Square matrix of shape N x N; must have the same shape as Q + Q + Square matrix of shape N x N; must have the same shape as A + method + Solver method used, one of ``"direct"`` or ``"bilinear"``. ``"direct"`` + solves the problem directly via matrix inversion. This has a pure + Aesara implementation and can thus be cross-compiled to supported + backends, and should be preferred when ``N`` is not large. The direct + method scales poorly with the size of ``N``, and the bilinear can be + used in these cases. + + Returns + ------- + Square matrix of shape ``N x N``, representing the solution to the + Lyapunov equation + + """ + if method not in ["direct", "bilinear"]: + raise ValueError( + f'Parameter "method" must be one of "direct" or "bilinear", found {method}' + ) + + if method == "direct": + return _direct_solve_discrete_lyapunov(A, Q) + if method == "bilinear": + return _solve_bilinear_direct_lyapunov(A, Q) + + +def solve_continuous_lyapunov(A: "TensorLike", Q: "TensorLike") -> TensorVariable: + """Solve the continuous Lyapunov equation :math:`A X + X A^H + Q = 0`. + + Parameters + ---------- + A + Square matrix of shape ``N x N``; must have the same shape as `Q`. + Q + Square matrix of shape ``N x N``; must have the same shape as `A`. + + Returns + ------- + Square matrix of shape ``N x N``, representing the solution to the + Lyapunov equation + + """ + + return _solve_continuous_lyapunov(A, Q) + + __all__ = [ "cholesky", "solve", diff --git a/aesara/tensor/sort.py b/aesara/tensor/sort.py index 20bd23fcfc..9d3f870dcf 100644 --- a/aesara/tensor/sort.py +++ b/aesara/tensor/sort.py @@ -414,9 +414,13 @@ def make_node(self, inp, kth): _check_tensor_is_scalar(kth) outs = [] if self.return_values: - outs.append(inp.type()) + outs.append( + TensorType(dtype=inp.type.dtype, shape=(None,) * inp.type.ndim)() + ) if self.return_indices: - outs.append(TensorType(dtype=self.idx_dtype, shape=inp.type.shape)()) + outs.append( + TensorType(dtype=self.idx_dtype, shape=(None,) * inp.type.ndim)() + ) return Apply(self, [inp, kth], outs) def perform(self, node, inputs, output_storage): diff --git a/aesara/tensor/subtensor.py b/aesara/tensor/subtensor.py index f9abfaf784..4260fb444c 100644 --- a/aesara/tensor/subtensor.py +++ b/aesara/tensor/subtensor.py @@ -724,10 +724,11 @@ def make_node(self, x, *inputs): padded = get_constant_idx( self.idx_list, (None,) + inputs, allow_partial=True ) + [slice(None, None, None)] * (x.type.ndim - len(idx_list)) - broadcastable = [] - for i, (p, bc) in enumerate(zip(padded, x.type.broadcastable)): + + out_shape = [] + for i, (p, s) in enumerate(zip(padded, x.type.shape)): if isinstance(p, slice): - if bc: + if s == 1: start = p.start try: start = get_scalar_constant_value(start) @@ -741,15 +742,15 @@ def make_node(self, x, *inputs): isinstance(p.stop, (int, np.integer, np.ndarray)) and p.stop > start ): - broadcastable.append(True) + out_shape.append(1) continue - broadcastable.append(False) + out_shape.append(None) return Apply( self, (x,) + inputs, - [tensor(dtype=x.type.dtype, shape=broadcastable)], + [tensor(dtype=x.type.dtype, shape=out_shape)], ) def perform(self, node, inputs, out_): @@ -1948,8 +1949,9 @@ def make_node(self, x, ilist): raise TypeError("index must be vector") if x_.type.ndim == 0: raise TypeError("cannot index into a scalar") - bcast = (ilist_.broadcastable[0],) + x_.broadcastable[1:] - return Apply(self, [x_, ilist_], [TensorType(dtype=x.dtype, shape=bcast)()]) + out_shape = (ilist_.type.shape[0],) + x_.type.shape[1:] + out_shape = tuple(1 if s == 1 else None for s in out_shape) + return Apply(self, [x_, ilist_], [TensorType(dtype=x.dtype, shape=out_shape)()]) def perform(self, node, inp, out_): x, i = inp @@ -2551,17 +2553,14 @@ def make_node(self, x, *index): x = as_tensor_variable(x) index = tuple(map(as_index_variable, index)) - # We only want the broadcast information, and we don't need recursive - # `Subtensor` calls, so we create a fake symbolic shape tuple and - # identify the broadcast dimensions from the shape result of this - # entire subtensor operation. + # We create a fake symbolic shape tuple and identify the broadcast + # dimensions from the shape result of this entire subtensor operation. with config.change_flags(compute_test_value="off"): fake_shape = tuple( - tensor(dtype="int64", shape=()) if not bcast else 1 - for bcast in x.broadcastable + tensor(dtype="int64", shape=()) if s != 1 else 1 for s in x.type.shape ) - bcast_index = tuple( + fake_index = tuple( chain.from_iterable( aesara.tensor.basic.nonzero(idx) if getattr(idx, "ndim", 0) > 0 @@ -2571,15 +2570,15 @@ def make_node(self, x, *index): ) ) - bcast = [ - getattr(i, "value", i) == 1 - for i in indexed_result_shape(fake_shape, bcast_index) - ] + out_shape = tuple( + i.value if isinstance(i, Constant) else None + for i in indexed_result_shape(fake_shape, fake_index) + ) return Apply( self, (x,) + index, - [tensor(dtype=x.type.dtype, shape=bcast)], + [tensor(dtype=x.type.dtype, shape=out_shape)], ) def R_op(self, inputs, eval_points): @@ -2682,7 +2681,12 @@ def make_node(self, x, y, *inputs): return Apply( self, (x, y) + tuple(new_inputs), - [tensor(dtype=x.type.dtype, shape=x.type.broadcastable)], + [ + tensor( + dtype=x.type.dtype, + shape=tuple(1 if s == 1 else None for s in x.type.shape), + ) + ], ) def perform(self, node, inputs, out_): diff --git a/aesara/tensor/type.py b/aesara/tensor/type.py index df6fafa983..5890b6e22e 100644 --- a/aesara/tensor/type.py +++ b/aesara/tensor/type.py @@ -1,6 +1,6 @@ import logging import warnings -from typing import Iterable, Optional, Tuple, Union +from typing import TYPE_CHECKING, Iterable, Optional, Tuple, Union import numpy as np @@ -15,6 +15,12 @@ from aesara.utils import apply_across_args +if TYPE_CHECKING: + from numpy.typing import DTypeLike + + from aesara.tensor.var import TensorVariable + + _logger = logging.getLogger("aesara.tensor.type") @@ -380,7 +386,18 @@ def __str__(self): if self.name: return self.name else: - return f"TensorType({self.dtype}, {self.shape})" + + def shape_str(s): + if s is None: + return "?" + else: + return str(s) + + formatted_shape = ", ".join([shape_str(s) for s in self.shape]) + if len(self.shape) == 1: + formatted_shape += "," + + return f"TensorType({self.dtype}, ({formatted_shape}))" def __repr__(self): return str(self) @@ -805,14 +822,14 @@ def scalar(name=None, dtype=None): float_scalar_types = float_types complex_scalar_types = complex_types -cvector = TensorType("complex64", (False,)) -zvector = TensorType("complex128", (False,)) -fvector = TensorType("float32", (False,)) -dvector = TensorType("float64", (False,)) -bvector = TensorType("int8", (False,)) -wvector = TensorType("int16", (False,)) -ivector = TensorType("int32", (False,)) -lvector = TensorType("int64", (False,)) +cvector = TensorType("complex64", shape=(None,)) +zvector = TensorType("complex128", shape=(None,)) +fvector = TensorType("float32", shape=(None,)) +dvector = TensorType("float64", shape=(None,)) +bvector = TensorType("int8", shape=(None,)) +wvector = TensorType("int16", shape=(None,)) +ivector = TensorType("int32", shape=(None,)) +lvector = TensorType("int64", shape=(None,)) def vector(name=None, dtype=None): @@ -828,7 +845,7 @@ def vector(name=None, dtype=None): """ if dtype is None: dtype = config.floatX - type = TensorType(dtype, (False,)) + type = TensorType(dtype, shape=(None,)) return type(name) @@ -840,14 +857,14 @@ def vector(name=None, dtype=None): float_vector_types = fvector, dvector complex_vector_types = cvector, zvector -cmatrix = TensorType("complex64", (False, False)) -zmatrix = TensorType("complex128", (False, False)) -fmatrix = TensorType("float32", (False, False)) -dmatrix = TensorType("float64", (False, False)) -bmatrix = TensorType("int8", (False, False)) -wmatrix = TensorType("int16", (False, False)) -imatrix = TensorType("int32", (False, False)) -lmatrix = TensorType("int64", (False, False)) +cmatrix = TensorType("complex64", shape=(None, None)) +zmatrix = TensorType("complex128", shape=(None, None)) +fmatrix = TensorType("float32", shape=(None, None)) +dmatrix = TensorType("float64", shape=(None, None)) +bmatrix = TensorType("int8", shape=(None, None)) +wmatrix = TensorType("int16", shape=(None, None)) +imatrix = TensorType("int32", shape=(None, None)) +lmatrix = TensorType("int64", shape=(None, None)) def matrix(name=None, dtype=None): @@ -863,7 +880,7 @@ def matrix(name=None, dtype=None): """ if dtype is None: dtype = config.floatX - type = TensorType(dtype, (False, False)) + type = TensorType(dtype, shape=(None, None)) return type(name) @@ -875,18 +892,18 @@ def matrix(name=None, dtype=None): float_matrix_types = fmatrix, dmatrix complex_matrix_types = cmatrix, zmatrix -crow = TensorType("complex64", (True, False)) -zrow = TensorType("complex128", (True, False)) -frow = TensorType("float32", (True, False)) -drow = TensorType("float64", (True, False)) -brow = TensorType("int8", (True, False)) -wrow = TensorType("int16", (True, False)) -irow = TensorType("int32", (True, False)) -lrow = TensorType("int64", (True, False)) +crow = TensorType("complex64", shape=(1, None)) +zrow = TensorType("complex128", shape=(1, None)) +frow = TensorType("float32", shape=(1, None)) +drow = TensorType("float64", shape=(1, None)) +brow = TensorType("int8", shape=(1, None)) +wrow = TensorType("int16", shape=(1, None)) +irow = TensorType("int32", shape=(1, None)) +lrow = TensorType("int64", shape=(1, None)) def row(name=None, dtype=None): - """Return a symbolic row variable (ndim=2, shape=[True,False]). + """Return a symbolic row variable (i.e. shape ``(1, None)``). Parameters ---------- @@ -898,65 +915,69 @@ def row(name=None, dtype=None): """ if dtype is None: dtype = config.floatX - type = TensorType(dtype, (True, False)) + type = TensorType(dtype, shape=(1, None)) return type(name) rows, frows, drows, irows, lrows = apply_across_args(row, frow, drow, irow, lrow) -ccol = TensorType("complex64", (False, True)) -zcol = TensorType("complex128", (False, True)) -fcol = TensorType("float32", (False, True)) -dcol = TensorType("float64", (False, True)) -bcol = TensorType("int8", (False, True)) -wcol = TensorType("int16", (False, True)) -icol = TensorType("int32", (False, True)) -lcol = TensorType("int64", (False, True)) +ccol = TensorType("complex64", shape=(None, 1)) +zcol = TensorType("complex128", shape=(None, 1)) +fcol = TensorType("float32", shape=(None, 1)) +dcol = TensorType("float64", shape=(None, 1)) +bcol = TensorType("int8", shape=(None, 1)) +wcol = TensorType("int16", shape=(None, 1)) +icol = TensorType("int32", shape=(None, 1)) +lcol = TensorType("int64", shape=(None, 1)) -def col(name=None, dtype=None): - """Return a symbolic column variable (ndim=2, shape=[False,True]). +def col( + name: Optional[str] = None, dtype: Optional["DTypeLike"] = None +) -> "TensorVariable": + """Return a symbolic column variable (i.e. shape ``(None, 1)``). Parameters ---------- - dtype : numeric - None means to use aesara.config.floatX. name A name to attach to this variable. + dtype + ``None`` means to use `aesara.config.floatX`. """ if dtype is None: dtype = config.floatX - type = TensorType(dtype, (False, True)) + type = TensorType(dtype, shape=(None, 1)) return type(name) cols, fcols, dcols, icols, lcols = apply_across_args(col, fcol, dcol, icol, lcol) -ctensor3 = TensorType("complex64", ((False,) * 3)) -ztensor3 = TensorType("complex128", ((False,) * 3)) -ftensor3 = TensorType("float32", ((False,) * 3)) -dtensor3 = TensorType("float64", ((False,) * 3)) -btensor3 = TensorType("int8", ((False,) * 3)) -wtensor3 = TensorType("int16", ((False,) * 3)) -itensor3 = TensorType("int32", ((False,) * 3)) -ltensor3 = TensorType("int64", ((False,) * 3)) +ctensor3 = TensorType("complex64", shape=((None,) * 3)) +ztensor3 = TensorType("complex128", shape=((None,) * 3)) +ftensor3 = TensorType("float32", shape=((None,) * 3)) +dtensor3 = TensorType("float64", shape=((None,) * 3)) +btensor3 = TensorType("int8", shape=((None,) * 3)) +wtensor3 = TensorType("int16", shape=((None,) * 3)) +itensor3 = TensorType("int32", shape=((None,) * 3)) +ltensor3 = TensorType("int64", shape=((None,) * 3)) -def tensor3(name=None, dtype=None): - """Return a symbolic 3-D variable. +def tensor3( + name: Optional[str] = None, dtype: Optional["DTypeLike"] = None +) -> "TensorVariable": + """Return a symbolic 3D variable. Parameters ---------- - dtype: numeric type - None means to use aesara.config.floatX. name A name to attach to this variable. + dtype + ``None`` means to use `aesara.config.floatX`. """ if dtype is None: dtype = config.floatX - type = TensorType(dtype, (False, False, False)) + type = TensorType(dtype, shape=(None, None, None)) return type(name) @@ -964,30 +985,32 @@ def tensor3(name=None, dtype=None): tensor3, ftensor3, dtensor3, itensor3, ltensor3 ) -ctensor4 = TensorType("complex64", ((False,) * 4)) -ztensor4 = TensorType("complex128", ((False,) * 4)) -ftensor4 = TensorType("float32", ((False,) * 4)) -dtensor4 = TensorType("float64", ((False,) * 4)) -btensor4 = TensorType("int8", ((False,) * 4)) -wtensor4 = TensorType("int16", ((False,) * 4)) -itensor4 = TensorType("int32", ((False,) * 4)) -ltensor4 = TensorType("int64", ((False,) * 4)) +ctensor4 = TensorType("complex64", shape=((None,) * 4)) +ztensor4 = TensorType("complex128", shape=((None,) * 4)) +ftensor4 = TensorType("float32", shape=((None,) * 4)) +dtensor4 = TensorType("float64", shape=((None,) * 4)) +btensor4 = TensorType("int8", shape=((None,) * 4)) +wtensor4 = TensorType("int16", shape=((None,) * 4)) +itensor4 = TensorType("int32", shape=((None,) * 4)) +ltensor4 = TensorType("int64", shape=((None,) * 4)) -def tensor4(name=None, dtype=None): - """Return a symbolic 4-D variable. +def tensor4( + name: Optional[str] = None, dtype: Optional["DTypeLike"] = None +) -> "TensorVariable": + """Return a symbolic 4D variable. Parameters ---------- - dtype: numeric type - None means to use aesara.config.floatX. name A name to attach to this variable. + dtype + ``None`` means to use `aesara.config.floatX`. """ if dtype is None: dtype = config.floatX - type = TensorType(dtype, (False, False, False, False)) + type = TensorType(dtype, shape=(None, None, None, None)) return type(name) @@ -995,30 +1018,32 @@ def tensor4(name=None, dtype=None): tensor4, ftensor4, dtensor4, itensor4, ltensor4 ) -ctensor5 = TensorType("complex64", ((False,) * 5)) -ztensor5 = TensorType("complex128", ((False,) * 5)) -ftensor5 = TensorType("float32", ((False,) * 5)) -dtensor5 = TensorType("float64", ((False,) * 5)) -btensor5 = TensorType("int8", ((False,) * 5)) -wtensor5 = TensorType("int16", ((False,) * 5)) -itensor5 = TensorType("int32", ((False,) * 5)) -ltensor5 = TensorType("int64", ((False,) * 5)) +ctensor5 = TensorType("complex64", shape=((None,) * 5)) +ztensor5 = TensorType("complex128", shape=((None,) * 5)) +ftensor5 = TensorType("float32", shape=((None,) * 5)) +dtensor5 = TensorType("float64", shape=((None,) * 5)) +btensor5 = TensorType("int8", shape=((None,) * 5)) +wtensor5 = TensorType("int16", shape=((None,) * 5)) +itensor5 = TensorType("int32", shape=((None,) * 5)) +ltensor5 = TensorType("int64", shape=((None,) * 5)) -def tensor5(name=None, dtype=None): - """Return a symbolic 5-D variable. +def tensor5( + name: Optional[str] = None, dtype: Optional["DTypeLike"] = None +) -> "TensorVariable": + """Return a symbolic 5D variable. Parameters ---------- - dtype: numeric type - None means to use aesara.config.floatX. name A name to attach to this variable. + dtype + ``None`` means to use `aesara.config.floatX`. """ if dtype is None: dtype = config.floatX - type = TensorType(dtype, (False, False, False, False, False)) + type = TensorType(dtype, shape=(None, None, None, None, None)) return type(name) @@ -1026,30 +1051,32 @@ def tensor5(name=None, dtype=None): tensor5, ftensor5, dtensor5, itensor5, ltensor5 ) -ctensor6 = TensorType("complex64", ((False,) * 6)) -ztensor6 = TensorType("complex128", ((False,) * 6)) -ftensor6 = TensorType("float32", ((False,) * 6)) -dtensor6 = TensorType("float64", ((False,) * 6)) -btensor6 = TensorType("int8", ((False,) * 6)) -wtensor6 = TensorType("int16", ((False,) * 6)) -itensor6 = TensorType("int32", ((False,) * 6)) -ltensor6 = TensorType("int64", ((False,) * 6)) +ctensor6 = TensorType("complex64", shape=((None,) * 6)) +ztensor6 = TensorType("complex128", shape=((None,) * 6)) +ftensor6 = TensorType("float32", shape=((None,) * 6)) +dtensor6 = TensorType("float64", shape=((None,) * 6)) +btensor6 = TensorType("int8", shape=((None,) * 6)) +wtensor6 = TensorType("int16", shape=((None,) * 6)) +itensor6 = TensorType("int32", shape=((None,) * 6)) +ltensor6 = TensorType("int64", shape=((None,) * 6)) -def tensor6(name=None, dtype=None): - """Return a symbolic 6-D variable. +def tensor6( + name: Optional[str] = None, dtype: Optional["DTypeLike"] = None +) -> "TensorVariable": + """Return a symbolic 6D variable. Parameters ---------- - dtype: numeric type - None means to use aesara.config.floatX. name A name to attach to this variable. + dtype + ``None`` means to use `aesara.config.floatX`. """ if dtype is None: dtype = config.floatX - type = TensorType(dtype, (False,) * 6) + type = TensorType(dtype, shape=(None,) * 6) return type(name) @@ -1057,30 +1084,32 @@ def tensor6(name=None, dtype=None): tensor6, ftensor6, dtensor6, itensor6, ltensor6 ) -ctensor7 = TensorType("complex64", ((False,) * 7)) -ztensor7 = TensorType("complex128", ((False,) * 7)) -ftensor7 = TensorType("float32", ((False,) * 7)) -dtensor7 = TensorType("float64", ((False,) * 7)) -btensor7 = TensorType("int8", ((False,) * 7)) -wtensor7 = TensorType("int16", ((False,) * 7)) -itensor7 = TensorType("int32", ((False,) * 7)) -ltensor7 = TensorType("int64", ((False,) * 7)) +ctensor7 = TensorType("complex64", shape=((None,) * 7)) +ztensor7 = TensorType("complex128", shape=((None,) * 7)) +ftensor7 = TensorType("float32", shape=((None,) * 7)) +dtensor7 = TensorType("float64", shape=((None,) * 7)) +btensor7 = TensorType("int8", shape=((None,) * 7)) +wtensor7 = TensorType("int16", shape=((None,) * 7)) +itensor7 = TensorType("int32", shape=((None,) * 7)) +ltensor7 = TensorType("int64", shape=((None,) * 7)) -def tensor7(name=None, dtype=None): +def tensor7( + name: Optional[str] = None, dtype: Optional["DTypeLike"] = None +) -> "TensorVariable": """Return a symbolic 7-D variable. Parameters ---------- - dtype: numeric type - None means to use aesara.config.floatX. name A name to attach to this variable. + dtype + ``None`` means to use `aesara.config.floatX`. """ if dtype is None: dtype = config.floatX - type = TensorType(dtype, (False,) * 7) + type = TensorType(dtype, shape=(None,) * 7) return type(name) diff --git a/doc/extending/creating_a_c_op.rst b/doc/extending/creating_a_c_op.rst index 3c0a56736b..423d84c7f9 100644 --- a/doc/extending/creating_a_c_op.rst +++ b/doc/extending/creating_a_c_op.rst @@ -618,7 +618,7 @@ C code. # Create an output variable of the same type as x output_var = aesara.tensor.type.TensorType( dtype=aesara.scalar.upcast(x.dtype, y.dtype), - shape=[False])() + shape=(None,))() return Apply(self, [x, y], [output_var]) @@ -767,7 +767,7 @@ The new :class:`Op` is defined inside a Python file with the following code : # Create an output variable of the same type as x output_var = aesara.tensor.type.TensorType( dtype=aesara.scalar.upcast(x.dtype, y.dtype), - shape=[False])() + shape=(None,))() return Apply(self, [x, y], [output_var]) diff --git a/doc/extending/creating_a_numba_jax_op.rst b/doc/extending/creating_a_numba_jax_op.rst index 108d3c8494..cff9a645f7 100644 --- a/doc/extending/creating_a_numba_jax_op.rst +++ b/doc/extending/creating_a_numba_jax_op.rst @@ -30,7 +30,7 @@ For example, the :class:`Eye`\ :class:`Op` current has an :meth:`Op.make_node` a return Apply( self, [n, m, k], - [TensorType(dtype=self.dtype, shape=(False, False))()], + [TensorType(dtype=self.dtype, shape=(None, None))()], ) @@ -83,7 +83,7 @@ Here's an example for :class:`IfElse`: return res if n_outs > 1 else res[0] -Step 3: Register the function with the `jax_funcify` dispatcher +Step 3: Register the function with the `_jax_funcify` dispatcher --------------------------------------------------------------- With the Aesara `Op` replicated in JAX, we’ll need to register the @@ -91,7 +91,7 @@ function with the Aesara JAX `Linker`. This is done through the use of `singledispatch`. If you don't know how `singledispatch` works, see the `Python documentation `_. -The relevant dispatch functions created by `singledispatch` are :func:`aesara.link.numba.dispatch.numba_funcify` and +The relevant dispatch functions created by `singledispatch` are :func:`aesara.link.numba.dispatch.basic._numba_funcify` and :func:`aesara.link.jax.dispatch.jax_funcify`. Here’s an example for the `Eye`\ `Op`: diff --git a/doc/extending/extending_aesara_solution_1.py b/doc/extending/extending_aesara_solution_1.py index d232756ebd..f30562a9e9 100755 --- a/doc/extending/extending_aesara_solution_1.py +++ b/doc/extending/extending_aesara_solution_1.py @@ -16,9 +16,9 @@ class ProdOp(Op): def make_node(self, x, y): x = at.as_tensor_variable(x) y = at.as_tensor_variable(y) - outdim = x.ndim + outdim = x.type.ndim output = TensorType( - dtype=aesara.scalar.upcast(x.dtype, y.dtype), shape=[False] * outdim + dtype=aesara.scalar.upcast(x.dtype, y.dtype), shape=(None,) * outdim )() return Apply(self, inputs=[x, y], outputs=[output]) @@ -41,12 +41,12 @@ class SumDiffOp(Op): def make_node(self, x, y): x = at.as_tensor_variable(x) y = at.as_tensor_variable(y) - outdim = x.ndim + outdim = x.type.ndim output1 = TensorType( - dtype=aesara.scalar.upcast(x.dtype, y.dtype), shape=[False] * outdim + dtype=aesara.scalar.upcast(x.dtype, y.dtype), shape=(None,) * outdim )() output2 = TensorType( - dtype=aesara.scalar.upcast(x.dtype, y.dtype), shape=[False] * outdim + dtype=aesara.scalar.upcast(x.dtype, y.dtype), shape=(None,) * outdim )() return Apply(self, inputs=[x, y], outputs=[output1, output2]) diff --git a/doc/extending/graphstructures.rst b/doc/extending/graphstructures.rst index 41f16504f5..2b28eec2da 100644 --- a/doc/extending/graphstructures.rst +++ b/doc/extending/graphstructures.rst @@ -217,7 +217,7 @@ For example, :ref:`aesara.tensor.irow ` is an instance o >>> from aesara.tensor import irow >>> irow() - + As the string print-out shows, `irow` specifies the following information about the :class:`Variable`\s it constructs: diff --git a/doc/extending/type.rst b/doc/extending/type.rst index 39accfc687..473af43e90 100644 --- a/doc/extending/type.rst +++ b/doc/extending/type.rst @@ -90,7 +90,7 @@ For example, let's say we have two :class:`Variable`\s with the following >>> from aesara.tensor.type import TensorType >>> v1 = TensorType("float64", (2, None))() >>> v1.type -TensorType(float64, (2, None)) +TensorType(float64, (2, ?)) >>> v2 = TensorType("float64", (2, 1))() >>> v2.type TensorType(float64, (2, 1)) @@ -145,7 +145,7 @@ SpecifyShape.0 >>> import aesara >>> aesara.dprint(v3, print_type=True) SpecifyShape [id A] - | [id B] + | [id B] |TensorConstant{2} [id C] |TensorConstant{1} [id D] diff --git a/doc/library/compile/shared.rst b/doc/library/compile/shared.rst index 0cd4a74b12..d7d0d90c1a 100644 --- a/doc/library/compile/shared.rst +++ b/doc/library/compile/shared.rst @@ -13,7 +13,7 @@ .. class:: SharedVariable - Variable with Storage that is shared between functions that it appears in. + Variable with storage that is shared between the compiled functions that it appears in. These variables are meant to be created by registered *shared constructors* (see :func:`shared_constructor`). @@ -68,7 +68,6 @@ A container to use for this SharedVariable when it is an implicit function parameter. - :type: class:`Container` .. autofunction:: shared @@ -76,10 +75,10 @@ Append `ctor` to the list of shared constructors (see :func:`shared`). - Each registered constructor ``ctor`` will be called like this: + Each registered constructor `ctor` will be called like this: .. code-block:: python ctor(value, name=name, strict=strict, **kwargs) - If it do not support given value, it must raise a TypeError. + If it do not support given value, it must raise a `TypeError`. diff --git a/doc/library/scan.rst b/doc/library/scan.rst index 0abcbcd6ed..86b59d119f 100644 --- a/doc/library/scan.rst +++ b/doc/library/scan.rst @@ -406,7 +406,7 @@ Using the original Gibbs sampling example, with ``strict=True`` added to the Traceback (most recent call last): ... MissingInputError: An input of the graph, used to compute - DimShuffle{1,0}(), was not provided and + DimShuffle{1,0}(), was not provided and not given a value.Use the Aesara flag exception_verbosity='high',for more information on this error. diff --git a/doc/library/tensor/basic.rst b/doc/library/tensor/basic.rst index b9e52e6684..153e731f63 100644 --- a/doc/library/tensor/basic.rst +++ b/doc/library/tensor/basic.rst @@ -409,7 +409,7 @@ them perfectly, but a `dscalar` otherwise. broadcast over the middle dimension of a 3-dimensional tensor when adding them together, we would define it like this: - >>> middle_broadcaster = TensorType('complex64', [False, True, False]) + >>> middle_broadcaster = TensorType('complex64', shape=(None, 1, None)) .. attribute:: ndim diff --git a/doc/tutorial/debug_faq.rst b/doc/tutorial/debug_faq.rst index 7e4d6a12e9..5bb53d7eb3 100644 --- a/doc/tutorial/debug_faq.rst +++ b/doc/tutorial/debug_faq.rst @@ -44,8 +44,8 @@ Running the code above we see: Traceback (most recent call last): ... ValueError: Input dimension mismatch. (input[0].shape[0] = 3, input[1].shape[0] = 2) - Apply node that caused the error: Elemwise{add,no_inplace}(, , ) - Inputs types: [TensorType(float64, (None,)), TensorType(float64, (None,)), TensorType(float64, (None,))] + Apply node that caused the error: Elemwise{add,no_inplace}(, , ) + Inputs types: [TensorType(float64, (?,)), TensorType(float64, (?,)), TensorType(float64, (?,))] Inputs shapes: [(3,), (2,), (2,)] Inputs strides: [(8,), (8,), (8,)] Inputs scalar values: ['not scalar', 'not scalar', 'not scalar'] @@ -73,11 +73,11 @@ message becomes : z = z + y Debugprint of the apply node: - Elemwise{add,no_inplace} [id A] '' - |Elemwise{add,no_inplace} [id B] '' - | | [id C] - | | [id C] - | [id D] + Elemwise{add,no_inplace} [id A] '' + |Elemwise{add,no_inplace} [id B] '' + | | [id C] + | | [id C] + | [id D] We can here see that the error can be traced back to the line ``z = z + y``. For this example, using ``optimizer=fast_compile`` worked. If it did not, @@ -145,18 +145,18 @@ Running the above code generates the following error message: outputs = self.vm() ValueError: Shape mismatch: x has 10 cols (and 5 rows) but y has 20 rows (and 10 cols) Apply node that caused the error: Dot22(x, DimShuffle{1,0}.0) - Inputs types: [TensorType(float64, (None, None)), TensorType(float64, (None, None))] + Inputs types: [TensorType(float64, (?, ?)), TensorType(float64, (?, ?))] Inputs shapes: [(5, 10), (20, 10)] Inputs strides: [(80, 8), (8, 160)] Inputs scalar values: ['not scalar', 'not scalar'] Debugprint of the apply node: - Dot22 [id A] '' - |x [id B] - |DimShuffle{1,0} [id C] '' - |Flatten{2} [id D] '' - |DimShuffle{2,0,1} [id E] '' - |W1 [id F] + Dot22 [id A] '' + |x [id B] + |DimShuffle{1,0} [id C] '' + |Flatten{2} [id D] '' + |DimShuffle{2,0,1} [id E] '' + |W1 [id F] HINT: Re-running with most Aesara optimization disabled could give you a back-traces when this node was created. This can be done with by setting the Aesara flags 'optimizer=fast_compile'. If that does not work, Aesara optimization can be disabled with 'optimizer=None'. @@ -483,7 +483,7 @@ Consider this example script (``ex.py``): ValueError: Input dimension mismatch. (input[0].shape[0] = 3, input[1].shape[0] = 5) Apply node that caused the error: Elemwise{mul,no_inplace}(a, b) Toposort index: 0 - Inputs types: [TensorType(float64, (None, None)), TensorType(float64, (None, None))] + Inputs types: [TensorType(float64, (?, ?)), TensorType(float64, (?, ?))] Inputs shapes: [(3, 4), (5, 5)] Inputs strides: [(32, 8), (40, 8)] Inputs values: ['not shown', 'not shown'] diff --git a/environment-arm.yml b/environment-arm.yml new file mode 100644 index 0000000000..6c42774eb1 --- /dev/null +++ b/environment-arm.yml @@ -0,0 +1,47 @@ +# To use: +# +# $ conda env create -f environment.yml # `mamba` works too for this command +# $ conda activate aesara-dev +# +name: aesara-dev +channels: + - conda-forge +dependencies: + - python + - compilers + - numpy>=1.17.0 + - scipy>=0.14 + - filelock + - etuples + - logical-unification + - miniKanren + - cons + # Non-Intel BLAS + - nomkl + - openblas + - libblas=*=*openblas + # numba backend + - numba>=0.55.2 + - llvmlite>=0.38.1 + - numba-scipy + # For testing + - coveralls + - diff-cover + - pytest + - pytest-cov + - pytest-xdist + # For building docs + - sphinx>=1.3 + - sphinx_rtd_theme + - pygments + - pydot + - ipython + # developer tools + - pre-commit + - packaging + - typing_extensions + # optional + - sympy + - cython + - jax + - jaxlib diff --git a/environment.yml b/environment.yml index 3bffffc618..388ce4f30f 100644 --- a/environment.yml +++ b/environment.yml @@ -35,13 +35,6 @@ dependencies: - pygments - pydot - ipython - # code style - - black - - isort - # For linting - - flake8 - - pep8 - - pyflakes # developer tools - pre-commit - packaging diff --git a/tests/compile/function/test_pfunc.py b/tests/compile/function/test_pfunc.py index 5ded264858..7fb1d02f93 100644 --- a/tests/compile/function/test_pfunc.py +++ b/tests/compile/function/test_pfunc.py @@ -36,6 +36,22 @@ def data_of(s): class TestPfunc: + def test_errors(self): + a = lscalar() + b = shared(1) + + with pytest.raises(TypeError): + pfunc({a}, a + b) + + with pytest.raises(TypeError): + pfunc([a], a + b, no_default_updates=1) + + with pytest.raises(TypeError): + pfunc([a], a + b, updates=[{b, a}]) + + with pytest.raises(TypeError): + pfunc([a], a + b, updates=[(1, b)]) + def test_doc(self): # Ensure the code given in pfunc.txt works as expected @@ -432,7 +448,8 @@ def test_default_updates(self): f() assert x.get_value() == 1 - del x.default_update + x.default_update = None + f() assert x.get_value() == 2 diff --git a/tests/compile/test_builders.py b/tests/compile/test_builders.py index b770121134..0ca4cabf53 100644 --- a/tests/compile/test_builders.py +++ b/tests/compile/test_builders.py @@ -580,10 +580,10 @@ def test_debugprint(): OpFromGraph{inline=False} [id A] >Elemwise{add,no_inplace} [id E] - > |*0- [id F] + > |*0- [id F] > |Elemwise{mul,no_inplace} [id G] - > |*1- [id H] - > |*2- [id I] + > |*1- [id H] + > |*2- [id I] """ for truth, out in zip(exp_res.split("\n"), lines): diff --git a/tests/compile/test_debugmode.py b/tests/compile/test_debugmode.py index c9c59a463f..2bfc45a444 100644 --- a/tests/compile/test_debugmode.py +++ b/tests/compile/test_debugmode.py @@ -716,8 +716,8 @@ def make_node(self, v): v = at.as_tensor_variable(v) assert v.type.ndim == 1 type_class = type(v.type) - out_r_type = type_class(dtype=v.dtype, shape=(True, False)) - out_c_type = type_class(dtype=v.dtype, shape=(False, True)) + out_r_type = type_class(dtype=v.dtype, shape=(1, None)) + out_c_type = type_class(dtype=v.dtype, shape=(None, 1)) return Apply(self, [v], [out_r_type(), out_c_type()]) def perform(self, node, inp, out): diff --git a/tests/compile/test_mode.py b/tests/compile/test_mode.py index 0c19dc3edc..ebe577c58f 100644 --- a/tests/compile/test_mode.py +++ b/tests/compile/test_mode.py @@ -1,9 +1,20 @@ +import copy + +import pytest + from aesara.compile.function import function -from aesara.compile.mode import AddFeatureOptimizer, Mode +from aesara.compile.mode import ( + AddFeatureOptimizer, + Mode, + get_default_mode, + get_target_language, +) +from aesara.configdefaults import config from aesara.graph.features import NoOutputFromInplace from aesara.graph.rewriting.db import RewriteDatabaseQuery, SequenceDB +from aesara.link.basic import LocalLinker from aesara.tensor.math import dot, tanh -from aesara.tensor.type import matrix +from aesara.tensor.type import matrix, vector def test_Mode_basic(): @@ -48,3 +59,86 @@ def test_including(): new_mode = mode.including("fast_compile") assert set(new_mode._optimizer.include) == {"merge", "fast_compile"} + + +class TestBunchOfModes: + def test_modes(self): + # this is a quick test after the LazyLinker branch merge + # to check that all the current modes can still be used. + linker_classes_involved = [] + + predef_modes = ["FAST_COMPILE", "FAST_RUN", "DEBUG_MODE"] + + # Linkers to use with regular Mode + if config.cxx: + linkers = ["py", "c|py", "c|py_nogc", "vm", "vm_nogc", "cvm", "cvm_nogc"] + else: + linkers = ["py", "c|py", "c|py_nogc", "vm", "vm_nogc"] + modes = predef_modes + [Mode(linker, "fast_run") for linker in linkers] + + for mode in modes: + x = matrix() + y = vector() + f = function([x, y], x + y, mode=mode) + # test that it runs something + f([[1, 2], [3, 4]], [5, 6]) + linker_classes_involved.append(f.maker.mode.linker.__class__) + # print 'MODE:', mode, f.maker.mode.linker, 'stop' + + # regression check: + # there should be + # - `VMLinker` + # - OpWiseCLinker (FAST_RUN) + # - PerformLinker (FAST_COMPILE) + # - DebugMode's Linker (DEBUG_MODE) + assert 4 == len(set(linker_classes_involved)) + + +class TestOldModesProblem: + def test_modes(self): + # Then, build a mode with the same linker, and a modified optimizer + default_mode = get_default_mode() + modified_mode = default_mode.including("specialize") + + # The following line used to fail, with Python 2.4, in July 2012, + # because an fgraph was associated to the default linker + copy.deepcopy(modified_mode) + + # More straightforward test + linker = get_default_mode().linker + assert not hasattr(linker, "fgraph") or linker.fgraph is None + + +def test_get_target_language(): + with config.change_flags(mode=Mode(linker="py")): + res = get_target_language() + assert res == ("py",) + + res = get_target_language(Mode(linker="py")) + assert res == ("py",) + + res = get_target_language(Mode(linker="c")) + assert res == ("c",) + + res = get_target_language(Mode(linker="c|py")) + assert res == ("c", "py") + + res = get_target_language(Mode(linker="vm")) + assert res == ("c", "py") + + with config.change_flags(cxx=""): + res = get_target_language(Mode(linker="vm")) + assert res == ("py",) + + res = get_target_language(Mode(linker="jax")) + assert res == ("jax",) + + res = get_target_language(Mode(linker="numba")) + assert res == ("numba",) + + class MyLinker(LocalLinker): + pass + + test_mode = Mode(linker=MyLinker()) + with pytest.raises(Exception): + get_target_language(test_mode) diff --git a/tests/compile/test_modes.py b/tests/compile/test_modes.py deleted file mode 100644 index e46ced22ce..0000000000 --- a/tests/compile/test_modes.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Test compilation modes -""" - -import copy - -from aesara.compile.function import function -from aesara.compile.mode import Mode, get_default_mode -from aesara.configdefaults import config -from aesara.tensor.type import matrix, vector - - -class TestBunchOfModes: - def test_modes(self): - # this is a quick test after the LazyLinker branch merge - # to check that all the current modes can still be used. - linker_classes_involved = [] - - predef_modes = ["FAST_COMPILE", "FAST_RUN", "DEBUG_MODE"] - - # Linkers to use with regular Mode - if config.cxx: - linkers = ["py", "c|py", "c|py_nogc", "vm", "vm_nogc", "cvm", "cvm_nogc"] - else: - linkers = ["py", "c|py", "c|py_nogc", "vm", "vm_nogc"] - modes = predef_modes + [Mode(linker, "fast_run") for linker in linkers] - - for mode in modes: - x = matrix() - y = vector() - f = function([x, y], x + y, mode=mode) - # test that it runs something - f([[1, 2], [3, 4]], [5, 6]) - linker_classes_involved.append(f.maker.mode.linker.__class__) - # print 'MODE:', mode, f.maker.mode.linker, 'stop' - - # regression check: - # there should be - # - `VMLinker` - # - OpWiseCLinker (FAST_RUN) - # - PerformLinker (FAST_COMPILE) - # - DebugMode's Linker (DEBUG_MODE) - assert 4 == len(set(linker_classes_involved)) - - -class TestOldModesProblem: - def test_modes(self): - # Then, build a mode with the same linker, and a modified optimizer - default_mode = get_default_mode() - modified_mode = default_mode.including("specialize") - - # The following line used to fail, with Python 2.4, in July 2012, - # because an fgraph was associated to the default linker - copy.deepcopy(modified_mode) - - # More straightforward test - linker = get_default_mode().linker - assert not hasattr(linker, "fgraph") or linker.fgraph is None diff --git a/tests/compile/test_shared.py b/tests/compile/test_shared.py index 49058a7fee..714d30ab77 100644 --- a/tests/compile/test_shared.py +++ b/tests/compile/test_shared.py @@ -36,11 +36,11 @@ def test_ctors(self): # test tensor constructor b = shared(np.zeros((5, 5), dtype="int32")) - assert b.type == TensorType("int32", shape=[False, False]) + assert b.type == TensorType("int32", shape=(None, None)) b = shared(np.random.random((4, 5))) - assert b.type == TensorType("float64", shape=[False, False]) + assert b.type == TensorType("float64", shape=(None, None)) b = shared(np.random.random((5, 1, 2))) - assert b.type == TensorType("float64", shape=[False, False, False]) + assert b.type == TensorType("float64", shape=(None, None, None)) assert shared([]).type == generic @@ -67,7 +67,7 @@ def test_create_numpy_strict_false(self): # so creation should work SharedVariable( name="u", - type=TensorType(shape=[False], dtype="float64"), + type=TensorType(dtype="float64", shape=(None,)), value=np.asarray([1.0, 2.0]), strict=False, ) @@ -76,7 +76,7 @@ def test_create_numpy_strict_false(self): # so creation should work SharedVariable( name="u", - type=TensorType(shape=[False], dtype="float64"), + type=TensorType(dtype="float64", shape=(None,)), value=[1.0, 2.0], strict=False, ) @@ -85,7 +85,7 @@ def test_create_numpy_strict_false(self): # so creation should work SharedVariable( name="u", - type=TensorType(shape=[False], dtype="float64"), + type=TensorType(dtype="float64", shape=(None,)), value=[1, 2], # different dtype and not a numpy array strict=False, ) @@ -95,7 +95,7 @@ def test_create_numpy_strict_false(self): try: SharedVariable( name="u", - type=TensorType(shape=[False], dtype="float64"), + type=TensorType(dtype="float64", shape=(None,)), value=dict(), # not an array by any stretch strict=False, ) @@ -109,7 +109,7 @@ def test_use_numpy_strict_false(self): # so creation should work u = SharedVariable( name="u", - type=TensorType(shape=[False], dtype="float64"), + type=TensorType(dtype="float64", shape=(None,)), value=np.asarray([1.0, 2.0]), strict=False, ) diff --git a/tests/graph/rewriting/test_unify.py b/tests/graph/rewriting/test_unify.py index 6ce1284794..899e4e3166 100644 --- a/tests/graph/rewriting/test_unify.py +++ b/tests/graph/rewriting/test_unify.py @@ -72,7 +72,7 @@ def test_cons(): assert car(op1) == CustomOp assert cdr(op1) == (1,) - tt1 = TensorType("float32", [True, False]) + tt1 = TensorType("float32", shape=(1, None)) assert car(tt1) == TensorType assert cdr(tt1) == ("float32", (1, None)) @@ -247,8 +247,8 @@ def test_unify_Constant(): def test_unify_Type(): - t1 = TensorType(np.float64, (True, False)) - t2 = TensorType(np.float64, (True, False)) + t1 = TensorType(np.float64, shape=(1, None)) + t2 = TensorType(np.float64, shape=(1, None)) # `Type`, `Type` s = unify(t1, t2) diff --git a/tests/graph/test_basic.py b/tests/graph/test_basic.py index 9bbd282dd4..91085168d1 100644 --- a/tests/graph/test_basic.py +++ b/tests/graph/test_basic.py @@ -354,6 +354,12 @@ def test_clone(self): assert r1.auto_name == "auto_" + str(autoname_id) assert r2.auto_name == "auto_" + str(autoname_id + 1) + assert r1.name is None and r1.name is r2.name + + r3_name = "r3" + r3 = r1.clone(name=r3_name) + assert r3.name == r3_name + def test_equal_computations(): diff --git a/tests/link/c/test_params_type.py b/tests/link/c/test_params_type.py index 7053c054c7..65ba3e6bc1 100644 --- a/tests/link/c/test_params_type.py +++ b/tests/link/c/test_params_type.py @@ -12,7 +12,7 @@ from tests import unittest_tools as utt -tensor_type_0d = TensorType("float64", tuple()) +tensor_type_0d = TensorType("float64", shape=tuple()) scalar_type = ScalarType("float64") generic_type = Generic() @@ -127,15 +127,15 @@ class TestParamsType: def test_hash_and_eq_params(self): wp1 = ParamsType( a=Generic(), - array=TensorType("int64", (False,)), + array=TensorType("int64", shape=(None,)), floatting=ScalarType("float64"), - npy_scalar=TensorType("float64", tuple()), + npy_scalar=TensorType("float64", shape=tuple()), ) wp2 = ParamsType( a=Generic(), - array=TensorType("int64", (False,)), + array=TensorType("int64", shape=(None,)), floatting=ScalarType("float64"), - npy_scalar=TensorType("float64", tuple()), + npy_scalar=TensorType("float64", shape=tuple()), ) w1 = Params( wp1, @@ -157,9 +157,9 @@ def test_hash_and_eq_params(self): # Changing attributes names only (a -> other_name). wp2_other = ParamsType( other_name=Generic(), - array=TensorType("int64", (False,)), + array=TensorType("int64", shape=(None,)), floatting=ScalarType("float64"), - npy_scalar=TensorType("float64", tuple()), + npy_scalar=TensorType("float64", shape=tuple()), ) w2 = Params( wp2_other, @@ -190,13 +190,13 @@ def test_hash_and_eq_params(self): def test_hash_and_eq_params_type(self): w1 = ParamsType( - a1=TensorType("int64", (False, False)), - a2=TensorType("int64", (False, True, False, False, True)), + a1=TensorType("int64", shape=(None, None)), + a2=TensorType("int64", shape=(None, 1, None, None, 1)), a3=Generic(), ) w2 = ParamsType( - a1=TensorType("int64", (False, False)), - a2=TensorType("int64", (False, True, False, False, True)), + a1=TensorType("int64", shape=(None, None)), + a2=TensorType("int64", shape=(None, 1, None, None, 1)), a3=Generic(), ) assert w1 == w2 @@ -205,24 +205,24 @@ def test_hash_and_eq_params_type(self): assert w1.name == w2.name # Changing attributes names only. w2 = ParamsType( - a1=TensorType("int64", (False, False)), + a1=TensorType("int64", shape=(None, None)), other_name=TensorType( - "int64", (False, True, False, False, True) + "int64", shape=(None, 1, None, None, 1) ), # a2 -> other_name a3=Generic(), ) assert w1 != w2 # Changing attributes types only. w2 = ParamsType( - a1=TensorType("int64", (False, False)), + a1=TensorType("int64", shape=(None, None)), a2=Generic(), # changing class a3=Generic(), ) assert w1 != w2 # Changing attributes types characteristics only. w2 = ParamsType( - a1=TensorType("int64", (False, True)), # changing broadcasting - a2=TensorType("int64", (False, True, False, False, True)), + a1=TensorType("int64", shape=(None, 1)), # changing broadcasting + a2=TensorType("int64", shape=(None, 1, None, None, 1)), a3=Generic(), ) assert w1 != w2 @@ -239,8 +239,8 @@ def test_params_type_filtering(self): random_tensor = np.random.normal(size=size_tensor5).reshape(shape_tensor5) w = ParamsType( - a1=TensorType("int32", (False, False)), - a2=TensorType("float64", (False, False, False, False, False)), + a1=TensorType("int32", shape=(None, None)), + a2=TensorType("float64", shape=(None, None, None, None, None)), a3=Generic(), ) diff --git a/tests/link/c/test_type.py b/tests/link/c/test_type.py index aedf00c9a7..603dfb28d3 100644 --- a/tests/link/c/test_type.py +++ b/tests/link/c/test_type.py @@ -44,7 +44,7 @@ class GetOp(COp): __props__ = () def make_node(self, c): - return Apply(self, [c], [TensorType("float32", (False,))()]) + return Apply(self, [c], [TensorType("float32", shape=(None,))()]) def c_support_code(self, **kwargs): return """ @@ -73,7 +73,7 @@ def perform(self, *args, **kwargs): not aesara.config.cxx, reason="G++ not available, so we need to skip this test." ) def test_cdata(): - i = TensorType("float32", (False,))() + i = TensorType("float32", shape=(None,))() c = ProdOp()(i) i2 = GetOp()(c) mode = None diff --git a/tests/link/jax/test_elemwise.py b/tests/link/jax/test_elemwise.py index 4f6a7343ad..49acc6b807 100644 --- a/tests/link/jax/test_elemwise.py +++ b/tests/link/jax/test_elemwise.py @@ -24,12 +24,12 @@ def test_jax_Dimshuffle(): x_fg = FunctionGraph([a_at], [x]) compare_jax_and_py(x_fg, [np.c_[[1.0, 2.0], [3.0, 4.0]].astype(config.floatX)]) - a_at = tensor(dtype=config.floatX, shape=[False, True]) + a_at = tensor(dtype=config.floatX, shape=(None, 1)) x = a_at.dimshuffle((0,)) x_fg = FunctionGraph([a_at], [x]) compare_jax_and_py(x_fg, [np.c_[[1.0, 2.0, 3.0, 4.0]].astype(config.floatX)]) - a_at = tensor(dtype=config.floatX, shape=[False, True]) + a_at = tensor(dtype=config.floatX, shape=(None, 1)) x = at_elemwise.DimShuffle([False, True], (0,))(a_at) x_fg = FunctionGraph([a_at], [x]) compare_jax_and_py(x_fg, [np.c_[[1.0, 2.0, 3.0, 4.0]].astype(config.floatX)]) diff --git a/tests/link/jax/test_tensor_basic.py b/tests/link/jax/test_tensor_basic.py index ef9738e0cd..696991ebad 100644 --- a/tests/link/jax/test_tensor_basic.py +++ b/tests/link/jax/test_tensor_basic.py @@ -52,15 +52,22 @@ def test_jax_MakeVector(): compare_jax_and_py(x_fg, []) -@pytest.mark.xfail(reason="jax.numpy.arange requires concrete inputs") +def test_arange(): + out = at.arange(1, 10, 2) + fgraph = FunctionGraph([], [out]) + compare_jax_and_py(fgraph, []) + + def test_arange_nonconcrete(): + """JAX cannot JIT-compile `jax.numpy.arange` when arguments are not concrete values.""" a = scalar("a") a.tag.test_value = 10 - out = at.arange(a) - fgraph = FunctionGraph([a], [out]) - compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs]) + + with pytest.raises(NotImplementedError): + fgraph = FunctionGraph([a], [out]) + compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs]) def test_jax_Join(): diff --git a/tests/link/numba/test_basic.py b/tests/link/numba/test_basic.py index 29f07649bf..69192cd895 100644 --- a/tests/link/numba/test_basic.py +++ b/tests/link/numba/test_basic.py @@ -24,9 +24,10 @@ from aesara.graph.type import Type from aesara.ifelse import ifelse from aesara.link.numba.dispatch import basic as numba_basic -from aesara.link.numba.dispatch import numba_typify +from aesara.link.numba.dispatch import numba_const_convert from aesara.link.numba.linker import NumbaLinker from aesara.raise_op import assert_op +from aesara.sparse.type import SparseTensorType from aesara.tensor import blas from aesara.tensor import subtensor as at_subtensor from aesara.tensor.elemwise import Elemwise @@ -246,26 +247,21 @@ def assert_fn(x, y): @pytest.mark.parametrize( - "v, expected, force_scalar, not_implemented", + "v, expected, force_scalar", [ - (MyType(), None, False, True), - (aes.float32, numba.types.float32, False, False), - (at.fscalar, numba.types.Array(numba.types.float32, 0, "A"), False, False), - (at.fscalar, numba.types.float32, True, False), - (at.lvector, numba.types.int64[:], False, False), - (at.dmatrix, numba.types.float64[:, :], False, False), - (at.dmatrix, numba.types.float64, True, False), + (MyType(), numba.types.pyobject, False), + (SparseTensorType("csc", dtype=np.float64), numba.types.pyobject, False), + (aes.float32, numba.types.float32, False), + (at.fscalar, numba.types.Array(numba.types.float32, 0, "A"), False), + (at.fscalar, numba.types.float32, True), + (at.lvector, numba.types.int64[:], False), + (at.dmatrix, numba.types.float64[:, :], False), + (at.dmatrix, numba.types.float64, True), ], ) -def test_get_numba_type(v, expected, force_scalar, not_implemented): - cm = ( - contextlib.suppress() - if not not_implemented - else pytest.raises(NotImplementedError) - ) - with cm: - res = numba_basic.get_numba_type(v, force_scalar=force_scalar) - assert res == expected +def test_get_numba_type(v, expected, force_scalar): + res = numba_basic.get_numba_type(v, force_scalar=force_scalar) + assert res == expected @pytest.mark.parametrize( @@ -315,7 +311,7 @@ def test_create_numba_signature(v, expected, force_scalar): [ ( np.random.RandomState(1), - numba_typify, + numba_const_convert, lambda x, y: np.all(x.get_state()[1] == y.get_state()[1]), ) ], diff --git a/tests/link/numba/test_elemwise.py b/tests/link/numba/test_elemwise.py index d5457a4733..1302624b48 100644 --- a/tests/link/numba/test_elemwise.py +++ b/tests/link/numba/test_elemwise.py @@ -140,7 +140,7 @@ def test_Elemwise(inputs, input_vals, output_fn, exc): # `{'drop': [1], 'shuffle': [2, 0], 'augment': [0, 2, 4]}` ( set_test_value( - at.tensor(config.floatX, [False, True, False], name="a"), + at.tensor(config.floatX, shape=(None, 1, None), name="a"), np.array([[[1.0, 2.0]], [[3.0, 4.0]]], dtype=config.floatX), ), ("x", 2, "x", 0, "x"), @@ -149,21 +149,21 @@ def test_Elemwise(inputs, input_vals, output_fn, exc): # `{'drop': [1], 'shuffle': [0], 'augment': []}` ( set_test_value( - at.tensor(config.floatX, [False, True], name="a"), + at.tensor(config.floatX, shape=(None, 1), name="a"), np.array([[1.0], [2.0], [3.0], [4.0]], dtype=config.floatX), ), (0,), ), ( set_test_value( - at.tensor(config.floatX, [False, True], name="a"), + at.tensor(config.floatX, shape=(None, 1), name="a"), np.array([[1.0], [2.0], [3.0], [4.0]], dtype=config.floatX), ), (0,), ), ( set_test_value( - at.tensor(config.floatX, [True, True, True], name="a"), + at.tensor(config.floatX, shape=(1, 1, 1), name="a"), np.array([[[1.0]]], dtype=config.floatX), ), (), diff --git a/tests/link/numba/test_random.py b/tests/link/numba/test_random.py index b859919829..6f554d297f 100644 --- a/tests/link/numba/test_random.py +++ b/tests/link/numba/test_random.py @@ -270,7 +270,7 @@ np.array([[1, 2], [3, 4]], dtype=np.float64), ), set_test_value( - at.tensor("float64", [True, False, False]), + at.tensor("float64", shape=(1, None, None)), np.eye(2)[None, ...], ), ], diff --git a/tests/link/numba/test_sparse.py b/tests/link/numba/test_sparse.py new file mode 100644 index 0000000000..af49752f3b --- /dev/null +++ b/tests/link/numba/test_sparse.py @@ -0,0 +1,40 @@ +import numba +import numpy as np +import scipy as sp + +# Load Numba customizations +import aesara.link.numba.dispatch.sparse # noqa: F401 + + +def test_sparse_unboxing(): + @numba.njit + def test_unboxing(x, y): + return x.shape, y.shape + + x_val = sp.sparse.csr_matrix(np.eye(100)) + y_val = sp.sparse.csc_matrix(np.eye(101)) + + res = test_unboxing(x_val, y_val) + + assert res == (x_val.shape, y_val.shape) + + +def test_sparse_boxing(): + @numba.njit + def test_boxing(x, y): + return x, y + + x_val = sp.sparse.csr_matrix(np.eye(100)) + y_val = sp.sparse.csc_matrix(np.eye(101)) + + res_x_val, res_y_val = test_boxing(x_val, y_val) + + assert np.array_equal(res_x_val.data, x_val.data) + assert np.array_equal(res_x_val.indices, x_val.indices) + assert np.array_equal(res_x_val.indptr, x_val.indptr) + assert res_x_val.shape == x_val.shape + + assert np.array_equal(res_y_val.data, y_val.data) + assert np.array_equal(res_y_val.indices, y_val.indices) + assert np.array_equal(res_y_val.indptr, y_val.indptr) + assert res_y_val.shape == y_val.shape diff --git a/tests/link/test_vm.py b/tests/link/test_vm.py index b684630f55..08d816459d 100644 --- a/tests/link/test_vm.py +++ b/tests/link/test_vm.py @@ -113,12 +113,12 @@ def time_numpy(): x = np.asarray([2.0, 3.0], dtype=config.floatX) numpy_version(x, steps_a) - t0 = time.time() + t0 = time.perf_counter() # print numpy_version(x, steps_a) - t1 = time.time() - t2 = time.time() + t1 = time.perf_counter() + t2 = time.perf_counter() # print numpy_version(x, steps_b) - t3 = time.time() + t3 = time.perf_counter() t_a = t1 - t0 t_b = t3 - t2 @@ -135,15 +135,15 @@ def time_linker(name, linker): f_b = function([x], b, mode=Mode(optimizer=None, linker=linker())) f_a([2.0, 3.0]) - t0 = time.time() + t0 = time.perf_counter() f_a([2.0, 3.0]) - t1 = time.time() + t1 = time.perf_counter() f_b([2.0, 3.0]) - t2 = time.time() + t2 = time.perf_counter() f_b([2.0, 3.0]) - t3 = time.time() + t3 = time.perf_counter() t_a = t1 - t0 t_b = t3 - t2 @@ -185,15 +185,15 @@ def build_graph(x, depth=5): f_b = function([x], b, mode=Mode(optimizer=None, linker=linker)) f_a([2.0]) - t0 = time.time() + t0 = time.perf_counter() f_a([2.0]) - t1 = time.time() + t1 = time.perf_counter() f_b([2.0]) - t2 = time.time() + t2 = time.perf_counter() f_b([2.0]) - t3 = time.time() + t3 = time.perf_counter() t_a = t1 - t0 t_b = t3 - t2 diff --git a/tests/sandbox/test_rng_mrg.py b/tests/sandbox/test_rng_mrg.py index 4d46a924de..9ee543bf28 100644 --- a/tests/sandbox/test_rng_mrg.py +++ b/tests/sandbox/test_rng_mrg.py @@ -199,10 +199,10 @@ def check_basics( avg_var = 0.0 for i in range(steps): - t0 = time.time() + t0 = time.perf_counter() ival = f(*inputs) assert ival.shape == sample_size - dt += time.time() - t0 + dt += time.perf_counter() - t0 ival = np.asarray(ival) if i == 0: mean = np.array(ival, copy=True) @@ -733,11 +733,11 @@ def basic_multinomialtest( avg_pvals = np.zeros(target_pvals.shape, dtype=config.floatX) for i in range(steps): - t0 = time.time() + t0 = time.perf_counter() ival = f() assert ival.shape == sample_size assert np.all(np.sum(ival, axis=1) == n_samples) - dt += time.time() - t0 + dt += time.perf_counter() - t0 avg_pvals += ival avg_pvals /= steps * n_samples diff --git a/tests/scalar/test_basic.py b/tests/scalar/test_basic.py index bde31d1ee2..744e100601 100644 --- a/tests/scalar/test_basic.py +++ b/tests/scalar/test_basic.py @@ -2,6 +2,7 @@ import pytest import aesara +import aesara.tensor as at import tests.unittest_tools as utt from aesara.compile.mode import Mode from aesara.graph.fg import FunctionGraph @@ -130,11 +131,16 @@ def test_flatten(self): def test_with_constants(self): x, y, z = floats("xyz") e = mul(add(70.0, y), true_div(x, y)) - C = Composite([x, y], [e]) - c = C.make_node(x, y) - assert "70.0" in c.op.c_code(c, "dummy", ["x", "y"], ["z"], dict(id=0)) - # print c.c_code(['x', 'y'], ['z'], dict(id = 0)) - g = FunctionGraph([x, y], [c.out]) + comp_op = Composite([x, y], [e]) + comp_node = comp_op.make_node(x, y) + + c_code = comp_node.op.c_code(comp_node, "dummy", ["x", "y"], ["z"], dict(id=0)) + assert "70.0" in c_code + + # Make sure caching of the c_code template works + assert hasattr(comp_node.op, "_c_code") + + g = FunctionGraph([x, y], [comp_node.out]) fn = make_function(DualLinker().accept(g)) assert fn(1.0, 2.0) == 36.0 @@ -174,24 +180,35 @@ def test_composite_printing(self): "*1::1, *1::2, *1::3, *1::4, *1::5, *1::6, *1::7)" ) - def test_make_node_continue_graph(self): - # This is a test for a bug (now fixed) that disabled the - # local_gpu_elemwise_0 optimization and printed an - # optimization warning on the terminal. - - # We test that Composite.make_node accept as inputs Variable - # some that represent existing computation. - - si0 = aesara.scalar.int8() - si1 = aesara.scalar.int8() - si2 = aesara.scalar.float32() - sout = (si0 * si1) / si2 - sop = aesara.scalar.Composite([si0, si1, si2], [sout]) - si0 = aesara.scalar.int8() - si1 = aesara.scalar.int8() - si2 = aesara.scalar.float32() - si3 = aesara.scalar.float32() - sop.make_node(si0 * si3, si1, si2) + def test_non_scalar_error(self): + x = float32("x") + comp_op = Composite([x], [(at.zeros((2,)) + x).sum()]) + + with pytest.raises(TypeError, match=".*exclusively.*ScalarOp.*"): + comp_op.fgraph + + def test_multi_out_perform(self): + from aesara.graph.basic import Apply + from aesara.scalar.basic import ScalarOp + + class MultiOutOp(ScalarOp): + def make_node(self, x): + return Apply(self, [x], [x.type(), x.type()]) + + def perform(self, node, inputs, outputs): + outputs[1][0] = outputs[0][0] = inputs[0] + + def c_code(self, *args): + return "dummy" + + x = float32("x") + comp_op = Composite([x], MultiOutOp()(x)) + + y, z = comp_op(x) + + fn = aesara.function([x], [y, z], mode=Mode("py", None)) + + assert fn(1.0) == [1.0, 1.0] class TestLogical: diff --git a/tests/scan/test_basic.py b/tests/scan/test_basic.py index b4af67d572..7148accb1d 100644 --- a/tests/scan/test_basic.py +++ b/tests/scan/test_basic.py @@ -282,10 +282,10 @@ def inner_fn(x): n_steps=4, ) - assert not hasattr(inner_rng, "default_update") - assert hasattr(inner_inner_rng, "default_update") - assert hasattr(y, "default_update") - assert hasattr(z_rng, "default_update") + assert inner_rng is None + assert inner_inner_rng.default_update is not None + assert y.default_update is not None + assert z_rng.default_update is not None out_fn = function([], out, mode=Mode(optimizer=None)) res, z_res = out_fn() @@ -586,10 +586,6 @@ def f_rnn_shared(u_t, x_tm1, tmp_W_in, tmp_W): assert np.allclose(aesara_values, v_out) def test_oinp_iinp_iout_oout_mappings(self): - """ - Test the mapping produces by - ScanOp.get_oinp_iinp_iout_oout_mappings() - """ rng = RandomStream(123) diff --git a/tests/scan/test_printing.py b/tests/scan/test_printing.py index 8ff4175147..f02a37bf6b 100644 --- a/tests/scan/test_printing.py +++ b/tests/scan/test_printing.py @@ -58,8 +58,8 @@ def test_debugprint_sitsot(): for{cpu,scan_fn} [id C] (outer_out_sit_sot-0) >Elemwise{mul,no_inplace} [id W] (inner_out_sit_sot-0) - > |*0- [id X] -> [id E] (inner_in_sit_sot-0) - > |*1- [id Y] -> [id M] (inner_in_non_seqs-0)""" + > |*0- [id X] -> [id E] (inner_in_sit_sot-0) + > |*1- [id Y] -> [id M] (inner_in_non_seqs-0)""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip() @@ -113,8 +113,8 @@ def test_debugprint_sitsot_no_extra_info(): for{cpu,scan_fn} [id C] >Elemwise{mul,no_inplace} [id W] - > |*0- [id X] -> [id E] - > |*1- [id Y] -> [id M]""" + > |*0- [id X] -> [id E] + > |*1- [id Y] -> [id M]""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip() @@ -264,7 +264,7 @@ def compute_A_k(A, k): > | | | | | | | |Unbroadcast{0} [id BL] > | | | | | | | |InplaceDimShuffle{x,0} [id BM] > | | | | | | | |Elemwise{second,no_inplace} [id BN] - > | | | | | | | |*2- [id BO] -> [id W] (inner_in_non_seqs-0) + > | | | | | | | |*2- [id BO] -> [id W] (inner_in_non_seqs-0) > | | | | | | | |InplaceDimShuffle{x} [id BP] > | | | | | | | |TensorConstant{1.0} [id BQ] > | | | | | | |ScalarConstant{0} [id BR] @@ -275,7 +275,7 @@ def compute_A_k(A, k): > | | | | |Unbroadcast{0} [id BL] > | | | | |ScalarFromTensor [id BV] > | | | | |Subtensor{int64} [id BJ] - > | | | |*2- [id BO] -> [id W] (inner_in_non_seqs-0) (outer_in_non_seqs-0) + > | | | |*2- [id BO] -> [id W] (inner_in_non_seqs-0) (outer_in_non_seqs-0) > | | |ScalarConstant{1} [id BW] > | |ScalarConstant{-1} [id BX] > |InplaceDimShuffle{x} [id BY] @@ -283,8 +283,8 @@ def compute_A_k(A, k): for{cpu,scan_fn} [id BE] (outer_out_sit_sot-0) >Elemwise{mul,no_inplace} [id CA] (inner_out_sit_sot-0) - > |*0- [id CB] -> [id BG] (inner_in_sit_sot-0) - > |*1- [id CC] -> [id BO] (inner_in_non_seqs-0)""" + > |*0- [id CB] -> [id BG] (inner_in_sit_sot-0) + > |*1- [id CC] -> [id BO] (inner_in_non_seqs-0)""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip() @@ -334,7 +334,7 @@ def compute_A_k(A, k): for{cpu,scan_fn} [id E] (outer_out_nit_sot-0) -*0- [id Y] -> [id U] (inner_in_seqs-0) -*1- [id Z] -> [id W] (inner_in_seqs-1) - -*2- [id BA] -> [id C] (inner_in_non_seqs-0) + -*2- [id BA] -> [id C] (inner_in_non_seqs-0) -*3- [id BB] -> [id B] (inner_in_non_seqs-1) >Elemwise{mul,no_inplace} [id BC] (inner_out_nit_sot-0) > |InplaceDimShuffle{x} [id BD] @@ -353,7 +353,7 @@ def compute_A_k(A, k): > | | | | | | | |Unbroadcast{0} [id BN] > | | | | | | | |InplaceDimShuffle{x,0} [id BO] > | | | | | | | |Elemwise{second,no_inplace} [id BP] - > | | | | | | | |*2- [id BA] (inner_in_non_seqs-0) + > | | | | | | | |*2- [id BA] (inner_in_non_seqs-0) > | | | | | | | |InplaceDimShuffle{x} [id BQ] > | | | | | | | |TensorConstant{1.0} [id BR] > | | | | | | |ScalarConstant{0} [id BS] @@ -364,18 +364,18 @@ def compute_A_k(A, k): > | | | | |Unbroadcast{0} [id BN] > | | | | |ScalarFromTensor [id BW] > | | | | |Subtensor{int64} [id BL] - > | | | |*2- [id BA] (inner_in_non_seqs-0) (outer_in_non_seqs-0) + > | | | |*2- [id BA] (inner_in_non_seqs-0) (outer_in_non_seqs-0) > | | |ScalarConstant{1} [id BX] > | |ScalarConstant{-1} [id BY] > |InplaceDimShuffle{x} [id BZ] > |*1- [id Z] (inner_in_seqs-1) for{cpu,scan_fn} [id BH] (outer_out_sit_sot-0) - -*0- [id CA] -> [id BI] (inner_in_sit_sot-0) - -*1- [id CB] -> [id BA] (inner_in_non_seqs-0) + -*0- [id CA] -> [id BI] (inner_in_sit_sot-0) + -*1- [id CB] -> [id BA] (inner_in_non_seqs-0) >Elemwise{mul,no_inplace} [id CC] (inner_out_sit_sot-0) - > |*0- [id CA] (inner_in_sit_sot-0) - > |*1- [id CB] (inner_in_non_seqs-0)""" + > |*0- [id CA] (inner_in_sit_sot-0) + > |*1- [id CB] (inner_in_non_seqs-0)""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip() @@ -413,7 +413,7 @@ def fn(a_m2, a_m1, b_m2, b_m1): | | | | |Subtensor{int64} [id H] | | | | |Shape [id I] | | | | | |Subtensor{:int64:} [id J] - | | | | | | [id K] + | | | | | | [id K] | | | | | |ScalarConstant{2} [id L] | | | | |ScalarConstant{0} [id M] | | | |Subtensor{:int64:} [id J] @@ -426,7 +426,7 @@ def fn(a_m2, a_m1, b_m2, b_m1): | | | |Subtensor{int64} [id R] | | | |Shape [id S] | | | | |Subtensor{:int64:} [id T] - | | | | | [id U] + | | | | | [id U] | | | | |ScalarConstant{2} [id V] | | | |ScalarConstant{0} [id W] | | |Subtensor{:int64:} [id T] @@ -562,19 +562,19 @@ def test_debugprint_mitmot(): for{cpu,grad_of_scan_fn}.1 [id B] (outer_out_sit_sot-0) >Elemwise{add,no_inplace} [id CM] (inner_out_mit_mot-0-0) > |Elemwise{mul} [id CN] - > | |*2- [id CO] -> [id BL] (inner_in_mit_mot-0-0) - > | |*5- [id CP] -> [id P] (inner_in_non_seqs-0) - > |*3- [id CQ] -> [id BL] (inner_in_mit_mot-0-1) + > | |*2- [id CO] -> [id BL] (inner_in_mit_mot-0-0) + > | |*5- [id CP] -> [id P] (inner_in_non_seqs-0) + > |*3- [id CQ] -> [id BL] (inner_in_mit_mot-0-1) >Elemwise{add,no_inplace} [id CR] (inner_out_sit_sot-0) > |Elemwise{mul} [id CS] - > | |*2- [id CO] -> [id BL] (inner_in_mit_mot-0-0) - > | |*0- [id CT] -> [id Z] (inner_in_seqs-0) - > |*4- [id CU] -> [id CE] (inner_in_sit_sot-0) + > | |*2- [id CO] -> [id BL] (inner_in_mit_mot-0-0) + > | |*0- [id CT] -> [id Z] (inner_in_seqs-0) + > |*4- [id CU] -> [id CE] (inner_in_sit_sot-0) for{cpu,scan_fn} [id F] (outer_out_sit_sot-0) >Elemwise{mul,no_inplace} [id CV] (inner_out_sit_sot-0) - > |*0- [id CT] -> [id H] (inner_in_sit_sot-0) - > |*1- [id CW] -> [id P] (inner_in_non_seqs-0)""" + > |*0- [id CT] -> [id H] (inner_in_sit_sot-0) + > |*1- [id CW] -> [id P] (inner_in_non_seqs-0)""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip() diff --git a/tests/sparse/sandbox/test_sp.py b/tests/sparse/sandbox/test_sp.py index 6094a4d94c..0bd7119048 100644 --- a/tests/sparse/sandbox/test_sp.py +++ b/tests/sparse/sandbox/test_sp.py @@ -64,14 +64,14 @@ def test_convolution(self): fulloutshp = np.array(imshp) - np.array(kshp) + 1 else: fulloutshp = np.array(imshp) + np.array(kshp) - 1 - ntime1 = time.time() + ntime1 = time.perf_counter() refout = np.zeros((bsize,) + tuple(fulloutshp) + (nkern,)) for b in range(bsize): for n in range(nkern): refout[b, ..., n] = convolve2d( img2d[b, :, :], filtersflipped[n, ...], conv_mode ) - ntot += time.time() - ntime1 + ntot += time.perf_counter() - ntime1 # need to flatten images bench1 = refout[:, 0 :: ss[0], 0 :: ss[1], :].reshape( @@ -81,9 +81,9 @@ def test_convolution(self): # swap the last two dimensions (output needs to be nkern x outshp) bench1 = np.swapaxes(bench1, 1, 2) - ttime1 = time.time() + ttime1 = time.perf_counter() out1 = f(filters, biasvals, img1d) - ttot += time.time() - ttime1 + ttot += time.perf_counter() - ttime1 temp = bench1.flatten() - out1.flatten() assert (temp < 1e-5).all() diff --git a/tests/sparse/test_basic.py b/tests/sparse/test_basic.py index a3fd87ec47..839385bc64 100644 --- a/tests/sparse/test_basic.py +++ b/tests/sparse/test_basic.py @@ -693,7 +693,7 @@ def fn(m): def test_err(self): for ndim in [1, 3]: - t = TensorType(dtype=config.floatX, shape=(False,) * ndim)() + t = TensorType(dtype=config.floatX, shape=(None,) * ndim)() v = ivector() sub = t[v] @@ -1084,7 +1084,7 @@ def test_todense(self): @staticmethod def check_format_ndim(format, ndim): - x = tensor(dtype=config.floatX, shape=([False] * ndim), name="x") + x = tensor(dtype=config.floatX, shape=(None,) * ndim, name="x") s = SparseFromDense(format)(x) s_m = -s @@ -1171,7 +1171,7 @@ def test_csm_sparser(self): for format in ("csc", "csr"): for dtype in ("float32", "float64"): - x = tensor(dtype=dtype, shape=(False,)) + x = tensor(dtype=dtype, shape=(None,)) y = ivector() z = ivector() s = ivector() @@ -1224,7 +1224,7 @@ def test_csm(self): for format in ("csc", "csr"): for dtype in ("float32", "float64"): - x = tensor(dtype=dtype, shape=(False,)) + x = tensor(dtype=dtype, shape=(None,)) y = ivector() z = ivector() s = ivector() @@ -1334,7 +1334,7 @@ def test_opt_unpack(self): return # - kerns = TensorType(dtype="int64", shape=[False])("kerns") + kerns = TensorType(dtype="int64", shape=(None,))("kerns") spmat = sp.sparse.lil_matrix((4, 6), dtype="int64") for i in range(5): # set non-zeros in random locations (row x, col y) @@ -1343,7 +1343,7 @@ def test_opt_unpack(self): spmat[x, y] = np.random.random() * 10 spmat = sp.sparse.csc_matrix(spmat) - images = TensorType(dtype="float32", shape=[False, False])("images") + images = TensorType(dtype="float32", shape=(None, None))("images") cscmat = CSC(kerns, spmat.indices[: spmat.size], spmat.indptr, spmat.shape) f = aesara.function([kerns, images], structured_dot(cscmat, images.T)) @@ -1418,11 +1418,11 @@ def test_csc_correct_output_faster_than_scipy(self): aesara_times = [] scipy_times = [] for i in range(5): - t0 = time.time() + t0 = time.perf_counter() aesara_result = f(spmat, mat) - t1 = time.time() + t1 = time.perf_counter() scipy_result = spmat * mat - t2 = time.time() + t2 = time.perf_counter() aesara_times.append(t1 - t0) scipy_times.append(t2 - t1) @@ -1463,11 +1463,11 @@ def test_csr_correct_output_faster_than_scipy(self): ]: spmat = sp.sparse.csr_matrix(random_lil((M, N), sparse_dtype, nnz)) mat = np.asarray(np.random.standard_normal((N, K)), dense_dtype) - t0 = time.time() + t0 = time.perf_counter() aesara_result = f(spmat, mat) - t1 = time.time() + t1 = time.perf_counter() scipy_result = spmat * mat - t2 = time.time() + t2 = time.perf_counter() aesara_time = t1 - t0 scipy_time = t2 - t1 @@ -3275,7 +3275,7 @@ def test_op_sd(self): variable, data = sparse_random_inputs( format, shape=(10, 10), out_dtype=dtype, n=2, p=0.1 ) - variable[1] = TensorType(dtype=dtype, shape=(False, False))() + variable[1] = TensorType(dtype=dtype, shape=(None, None))() data[1] = data[1].toarray() f = aesara.function(variable, self.op(*variable)) diff --git a/tests/sparse/test_sharedvar.py b/tests/sparse/test_sharedvar.py new file mode 100644 index 0000000000..a7df62d291 --- /dev/null +++ b/tests/sparse/test_sharedvar.py @@ -0,0 +1,15 @@ +import numpy as np +import scipy as sp + +import aesara +from aesara.sparse.sharedvar import SparseTensorSharedVariable + + +def test_shared_basic(): + x = aesara.shared( + sp.sparse.csr_matrix(np.eye(100), dtype=np.float64), name="blah", borrow=True + ) + + assert isinstance(x, SparseTensorSharedVariable) + assert x.format == "csr" + assert x.dtype == "float64" diff --git a/tests/sparse/test_sparse.py b/tests/sparse/test_sparse.py deleted file mode 100644 index 115d6c9233..0000000000 --- a/tests/sparse/test_sparse.py +++ /dev/null @@ -1,14 +0,0 @@ -import numpy as np -import pytest - -import aesara -from aesara.compile import SharedVariable - - -sp = pytest.importorskip("scipy", minversion="0.7.0") - - -def test_shared_basic(): - x = aesara.shared(sp.sparse.csr_matrix(np.eye(100)), name="blah", borrow=True) - - assert isinstance(x, SharedVariable) diff --git a/tests/tensor/nnet/speed_test_conv.py b/tests/tensor/nnet/speed_test_conv.py index 0a413c9848..fd76afca37 100644 --- a/tests/tensor/nnet/speed_test_conv.py +++ b/tests/tensor/nnet/speed_test_conv.py @@ -39,7 +39,7 @@ def flip(kern, kshp): global_rng = np.random.default_rng(3423489) -dmatrix4 = TensorType("float64", (False, False, False, False)) +dmatrix4 = TensorType("float64", shape=(None, None, None, None)) def exec_multilayer_conv_nnet_old( @@ -99,7 +99,7 @@ def exec_multilayer_conv_nnet_old( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)) ) - time1 = time.time() + time1 = time.perf_counter() outval = np.zeros(np.r_[bsize, outshp]) if validate: # causes an atexit problem @@ -119,7 +119,7 @@ def exec_multilayer_conv_nnet_old( outval[b, n, ...] += _convolve2d( imgval[b, i, ...], w_flip[n, i, ...], 1, val, bval, 0 )[0 :: ss[0], 0 :: ss[1]] - ntot += time.time() - time1 + ntot += time.perf_counter() - time1 # ConvOp if unroll_patch and not unroll_patch_size: @@ -149,18 +149,18 @@ def exec_multilayer_conv_nnet_old( propup2 = function([inputs4, kerns4], conv_op) propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py")) - time1 = time.time() + time1 = time.perf_counter() for i in range(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ # [:,:,0::ss[0],0::ss[1]] - tctot += time.time() - time1 + tctot += time.perf_counter() - time1 if conv_op_py: - time1 = time.time() + time1 = time.perf_counter() for i in range(repeat): hidval3_ = propup3(imgval, w_flip) hidval3 = hidval3_ # [:,:,0::ss[0],0::ss[1]] - tpytot += time.time() - time1 + tpytot += time.perf_counter() - time1 assert (np.abs(hidval2 - hidval3) < 1e-5).all() else: tpytot += 0 @@ -223,7 +223,7 @@ def exec_multilayer_conv_nnet( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)) ) - time1 = time.time() + time1 = time.perf_counter() # outval = np.zeros(np.r_[bsize, outshp]) # ConvOp @@ -253,10 +253,10 @@ def exec_multilayer_conv_nnet( # ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) - time1 = time.time() + time1 = time.perf_counter() for i in range(repeat): propup2(imgval, w_flip) - tctot += time.time() - time1 + tctot += time.perf_counter() - time1 imshp = tuple(outshp) # imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) diff --git a/tests/tensor/nnet/test_abstract_conv.py b/tests/tensor/nnet/test_abstract_conv.py index 31a3df7aa3..70d78d976a 100644 --- a/tests/tensor/nnet/test_abstract_conv.py +++ b/tests/tensor/nnet/test_abstract_conv.py @@ -2529,7 +2529,7 @@ def setup_method(self): self.ref_mode = "FAST_RUN" def test_fwd(self): - tensor6 = TensorType(config.floatX, (False,) * 6) + tensor6 = TensorType(config.floatX, shape=(None,) * 6) img_sym = tensor4("img") kern_sym = tensor6("kern") ref_kern_sym = tensor4("ref_kern") @@ -2652,7 +2652,7 @@ def conv_gradweight(inputs_val, output_val): utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1) def test_gradinput(self): - tensor6 = TensorType(config.floatX, (False,) * 6) + tensor6 = TensorType(config.floatX, shape=(None,) * 6) kern_sym = tensor6("kern") top_sym = tensor4("top") ref_kern_sym = tensor4("ref_kern") diff --git a/tests/tensor/nnet/test_batchnorm.py b/tests/tensor/nnet/test_batchnorm.py index b5c57d6117..751375d87a 100644 --- a/tests/tensor/nnet/test_batchnorm.py +++ b/tests/tensor/nnet/test_batchnorm.py @@ -495,7 +495,7 @@ def test_batch_normalization_train_broadcast(): params_dimshuffle[axis] = i # construct non-broadcasted parameter variables - param_type = TensorType(x.dtype, (False,) * len(non_bc_axes)) + param_type = TensorType(x.dtype, shape=(None,) * len(non_bc_axes)) scale, bias, running_mean, running_var = ( param_type(n) for n in ("scale", "bias", "running_mean", "running_var") ) diff --git a/tests/tensor/nnet/test_conv.py b/tests/tensor/nnet/test_conv.py index 2f780fa5c8..621726fa10 100644 --- a/tests/tensor/nnet/test_conv.py +++ b/tests/tensor/nnet/test_conv.py @@ -615,9 +615,9 @@ def speed(self): ) ) aesara_conv = aesara.function([], output, mode=mode) - t1 = time.time() + t1 = time.perf_counter() aesara_conv.vm(n_calls=n_calls) - t2 = time.time() + t2 = time.perf_counter() print(t2 - t1, end=" ") print() diff --git a/tests/tensor/nnet/test_conv3d2d.py b/tests/tensor/nnet/test_conv3d2d.py index f717bc17f0..c286684b55 100644 --- a/tests/tensor/nnet/test_conv3d2d.py +++ b/tests/tensor/nnet/test_conv3d2d.py @@ -127,9 +127,9 @@ def test_conv3d(border_mode): np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32") ) - # t0 = time.time() + # t0 = time.perf_counter() pyres = pyconv3d(signals, filters, border_mode) - # print(time.time() - t0) + # print(time.perf_counter() - t0) s_signals = shared(signals) s_filters = shared(filters) @@ -146,9 +146,9 @@ def test_conv3d(border_mode): newconv3d = aesara.function([], [], updates={s_output: out}, mode=mode) check_diagonal_subtensor_view_traces(newconv3d) - # t0 = time.time() + # t0 = time.perf_counter() newconv3d() - # print(time.time() - t0) + # print(time.perf_counter() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = aesara.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = aesara.function( @@ -160,9 +160,9 @@ def test_conv3d(border_mode): ) check_diagonal_subtensor_view_traces(gnewconv3d) - # t0 = time.time() + # t0 = time.perf_counter() gnewconv3d() - # print("grad", time.time() - t0) + # print("grad", time.perf_counter() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2 @@ -189,9 +189,9 @@ def test_conv3d(border_mode): np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32") ) - # t0 = time.time() + # t0 = time.perf_counter() pyres = pyconv3d(signals, filters, border_mode) - # print(time.time() - t0) + # print(time.perf_counter() - t0) s_signals = shared(signals) s_filters = shared(filters) @@ -207,9 +207,9 @@ def test_conv3d(border_mode): newconv3d = aesara.function([], [], updates={s_output: out}, mode=mode) - # t0 = time.time() + # t0 = time.perf_counter() newconv3d() - # print(time.time() - t0) + # print(time.perf_counter() - t0) utt.assert_allclose(pyres, s_output.get_value(borrow=True)) gsignals, gfilters = aesara.grad(out.sum(), [s_signals, s_filters]) gnewconv3d = aesara.function( @@ -220,9 +220,9 @@ def test_conv3d(border_mode): name="grad", ) - # t0 = time.time() + # t0 = time.perf_counter() gnewconv3d() - # print("grad", time.time() - t0) + # print("grad", time.perf_counter() - t0) Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5 Nf, Tf, C, Hf, Wf = 4, 1, 3, 2, 2 diff --git a/tests/tensor/random/test_basic.py b/tests/tensor/random/test_basic.py index 42104a293f..69a5e6bc40 100644 --- a/tests/tensor/random/test_basic.py +++ b/tests/tensor/random/test_basic.py @@ -607,7 +607,7 @@ def test_mvnormal_ShapeFeature(): assert M_at in graph_inputs([s2]) # Test broadcasted shapes - mean = tensor(config.floatX, [True, False]) + mean = tensor(config.floatX, shape=(1, None)) mean.tag.test_value = np.array([[0, 1, 2]], dtype=config.floatX) test_covar = np.diag(np.array([1, 10, 100], dtype=config.floatX)) diff --git a/tests/tensor/random/test_op.py b/tests/tensor/random/test_op.py index a645b8a474..8e18dcec65 100644 --- a/tests/tensor/random/test_op.py +++ b/tests/tensor/random/test_op.py @@ -125,9 +125,9 @@ def test_RandomVariable_basics(): def test_RandomVariable_bcast(): rv = RandomVariable("normal", 0, [0, 0], config.floatX, inplace=True) - mu = tensor(config.floatX, [True, False, False]) + mu = tensor(config.floatX, shape=(1, None, None)) mu.tag.test_value = np.zeros((1, 2, 3)).astype(config.floatX) - sd = tensor(config.floatX, [False, False]) + sd = tensor(config.floatX, shape=(None, None)) sd.tag.test_value = np.ones((2, 3)).astype(config.floatX) s1 = iscalar() @@ -160,14 +160,14 @@ def test_RandomVariable_bcast_specify_shape(): s3 = Assert("testing")(s3, eq(s1, 1)) size = specify_shape(at.as_tensor([s1, s3, s2, s2, s1]), (5,)) - mu = tensor(config.floatX, [False, False, True]) + mu = tensor(config.floatX, shape=(None, None, 1)) mu.tag.test_value = np.random.normal(size=(2, 2, 1)).astype(config.floatX) - std = tensor(config.floatX, [False, True, True]) + std = tensor(config.floatX, shape=(None, 1, 1)) std.tag.test_value = np.ones((2, 1, 1)).astype(config.floatX) res = rv(mu, std, size=size) - assert res.broadcastable == (True, False, False, False, True) + assert res.type.shape == (1, None, None, None, 1) def test_RandomVariable_floatX(): diff --git a/tests/tensor/random/test_utils.py b/tests/tensor/random/test_utils.py index 18a7650147..bce218c27f 100644 --- a/tests/tensor/random/test_utils.py +++ b/tests/tensor/random/test_utils.py @@ -69,7 +69,7 @@ def test_broadcast_params(): # Try it in Aesara with config.change_flags(compute_test_value="raise"): - mean = tensor(config.floatX, [False, True]) + mean = tensor(config.floatX, shape=(None, 1)) mean.tag.test_value = np.array([[0], [10], [100]], dtype=config.floatX) cov = matrix() cov.tag.test_value = np.diag(np.array([1e-6], dtype=config.floatX)) diff --git a/tests/tensor/rewriting/test_basic.py b/tests/tensor/rewriting/test_basic.py index b835fe79ec..9bc2caad5a 100644 --- a/tests/tensor/rewriting/test_basic.py +++ b/tests/tensor/rewriting/test_basic.py @@ -614,8 +614,8 @@ def test_eq(self): f2 = function([x], eq(x, x), mode=self.mode) assert np.all(f2(vx) == np.ones((5, 4))) topo2 = f2.maker.fgraph.toposort() - # Shape_i{1}(), - # Shape_i{0}(), Alloc([[1]], Shape_i{0}.0, + # Shape_i{1}(), + # Shape_i{0}(), Alloc([[1]], Shape_i{0}.0, # Shape_i{1}.0 assert len(topo2) == 3 assert isinstance(topo2[-1].op, Alloc) @@ -1693,8 +1693,8 @@ def verify_op_count(f, count, cls): ], ) def test_basic(self, expr, x_shape, y_shape): - x = at.tensor("int64", (False,) * len(x_shape), name="x") - y = at.tensor("int64", (False,) * len(y_shape), name="y") + x = at.tensor("int64", shape=(None,) * len(x_shape), name="x") + y = at.tensor("int64", shape=(None,) * len(y_shape), name="y") z = expr(x, y) z_opt = aesara.function( @@ -1872,7 +1872,7 @@ def test_multi_input_single_alloc(self): def test_misc(self): x = row(dtype=self.dtype) - y = tensor(dtype=self.dtype, shape=(False, False, True)) + y = tensor(dtype=self.dtype, shape=(None, None, 1)) out = at.alloc(x, 5, 5).dimshuffle(0, 1, "x") + y func = function([y, x], out, mode=self.fast_run_mode) diff --git a/tests/tensor/rewriting/test_elemwise.py b/tests/tensor/rewriting/test_elemwise.py index cfb9b6a61d..d4d5c58cdf 100644 --- a/tests/tensor/rewriting/test_elemwise.py +++ b/tests/tensor/rewriting/test_elemwise.py @@ -71,9 +71,9 @@ def ds(x, y): def inputs(xbc=(0, 0), ybc=(0, 0), zbc=(0, 0)): - x = TensorType(shape=xbc, dtype="float64")("x") - y = TensorType(shape=ybc, dtype="float64")("y") - z = TensorType(shape=zbc, dtype="float64")("z") + x = TensorType(dtype="float64", shape=xbc)("x") + y = TensorType(dtype="float64", shape=ybc)("y") + z = TensorType(dtype="float64", shape=zbc)("z") return x, y, z @@ -82,6 +82,7 @@ def test_double_transpose(self): x, y, z = inputs() e = ds(ds(x, (1, 0)), (1, 0)) g = FunctionGraph([x], [e]) + # TODO FIXME: Construct these graphs and compare them. assert ( str(g) == "FunctionGraph(InplaceDimShuffle{1,0}(InplaceDimShuffle{1,0}(x)))" ) @@ -93,6 +94,7 @@ def test_merge2(self): x, y, z = inputs() e = ds(ds(x, (1, "x", 0)), (2, 0, "x", 1)) g = FunctionGraph([x], [e]) + # TODO FIXME: Construct these graphs and compare them. assert ( str(g) == "FunctionGraph(InplaceDimShuffle{2,0,x,1}(InplaceDimShuffle{1,x,0}(x)))" @@ -106,6 +108,7 @@ def test_elim3(self): x, y, z = inputs() e = ds(ds(ds(x, (0, "x", 1)), (2, 0, "x", 1)), (1, 0)) g = FunctionGraph([x], [e]) + # TODO FIXME: Construct these graphs and compare them. assert str(g) == ( "FunctionGraph(InplaceDimShuffle{1,0}(InplaceDimShuffle{2,0,x,1}" "(InplaceDimShuffle{0,x,1}(x))))" @@ -119,6 +122,7 @@ def test_lift(self): e = x + y + z g = FunctionGraph([x, y, z], [e]) + # TODO FIXME: Construct these graphs and compare them. # It does not really matter if the DimShuffles are inplace # or not. init_str_g_inplace = ( @@ -149,13 +153,14 @@ def test_recursive_lift(self): m = matrix(dtype="float64") out = ((v + 42) * (m + 84)).T g = FunctionGraph([v, m], [out]) + # TODO FIXME: Construct these graphs and compare them. init_str_g = ( "FunctionGraph(InplaceDimShuffle{1,0}(Elemwise{mul,no_inplace}" "(InplaceDimShuffle{x,0}(Elemwise{add,no_inplace}" - "(, " + "(, " "InplaceDimShuffle{x}(TensorConstant{42}))), " "Elemwise{add,no_inplace}" - "(, " + "(, " "InplaceDimShuffle{x,x}(TensorConstant{84})))))" ) assert str(g) == init_str_g @@ -163,10 +168,10 @@ def test_recursive_lift(self): new_g = FunctionGraph(g.inputs, [new_out]) rewrite_str_g = ( "FunctionGraph(Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}" - "(InplaceDimShuffle{0,x}(), " + "(InplaceDimShuffle{0,x}(), " "InplaceDimShuffle{x,x}(TensorConstant{42})), " "Elemwise{add,no_inplace}(InplaceDimShuffle{1,0}" - "(), " + "(), " "InplaceDimShuffle{x,x}(TensorConstant{84}))))" ) assert str(new_g) == rewrite_str_g @@ -177,6 +182,7 @@ def test_useless_dimshuffle(self): x, _, _ = inputs() e = ds(x, (0, 1)) g = FunctionGraph([x], [e]) + # TODO FIXME: Construct these graphs and compare them. assert str(g) == "FunctionGraph(InplaceDimShuffle{0,1}(x))" dimshuffle_lift.rewrite(g) assert str(g) == "FunctionGraph(x)" @@ -191,6 +197,7 @@ def test_dimshuffle_on_broadcastable(self): ds_z = ds(z, (2, 1, 0)) # useful ds_u = ds(u, ("x")) # useful g = FunctionGraph([x, y, z, u], [ds_x, ds_y, ds_z, ds_u]) + # TODO FIXME: Construct these graphs and compare them. assert ( str(g) == "FunctionGraph(InplaceDimShuffle{0,x}(x), InplaceDimShuffle{2,1,0}(y), InplaceDimShuffle{2,1,0}(z), InplaceDimShuffle{x}(TensorConstant{1}))" @@ -205,10 +212,10 @@ def test_dimshuffle_on_broadcastable(self): def test_local_useless_dimshuffle_in_reshape(): - vec = TensorType(shape=(False,), dtype="float64")("vector") - mat = TensorType(shape=(False, False), dtype="float64")("mat") - row = TensorType(shape=(True, False), dtype="float64")("row") - col = TensorType(shape=(False, True), dtype="float64")("col") + vec = TensorType(dtype="float64", shape=(None,))("vector") + mat = TensorType(dtype="float64", shape=(None, None))("mat") + row = TensorType(dtype="float64", shape=(1, None))("row") + col = TensorType(dtype="float64", shape=(None, 1))("col") reshape_dimshuffle_vector = reshape(vec.dimshuffle("x", 0), vec.shape) reshape_dimshuffle_mat = reshape(mat.dimshuffle("x", 0, "x", 1), mat.shape) @@ -225,6 +232,7 @@ def test_local_useless_dimshuffle_in_reshape(): ], ) + # TODO FIXME: Construct these graphs and compare them. assert str(g) == ( "FunctionGraph(Reshape{1}(InplaceDimShuffle{x,0}(vector), Shape(vector)), " "Reshape{2}(InplaceDimShuffle{x,0,x,1}(mat), Shape(mat)), " @@ -270,12 +278,12 @@ def my_init(dtype="float64", num=0): return np.zeros((5, 5), dtype=dtype) + num fw, fx, fy, fz = [ - tensor(dtype="float32", shape=[False] * 2, name=n) for n in "wxyz" + tensor(dtype="float32", shape=(None,) * 2, name=n) for n in "wxyz" ] dw, dx, dy, dz = [ - tensor(dtype="float64", shape=[False] * 2, name=n) for n in "wxyz" + tensor(dtype="float64", shape=(None,) * 2, name=n) for n in "wxyz" ] - ix, iy, iz = [tensor(dtype="int32", shape=[False] * 2, name=n) for n in "xyz"] + ix, iy, iz = [tensor(dtype="int32", shape=(None,) * 2, name=n) for n in "xyz"] fv = fvector("v") fs = fscalar("s") fwv = my_init("float32", 1) @@ -1105,6 +1113,86 @@ def test_test_values(self, test_value): f.maker.fgraph.outputs[0].tag.test_value, np.c_[[2.0]] ) + @pytest.mark.parametrize("linker", ["cvm", "py"]) + @pytest.mark.parametrize("axis", [None, 0, 1, (0, 1), (0, 1, 2)]) + def test_CAReduce_single_input(self, linker, axis): + """Make sure that `CAReduce` and `Elemwise` fusions work with a single input.""" + + mode = Mode(linker=linker) + mode._optimizer = mode._optimizer.including( + "local_careduce_fusion", + "canonicalize", + "inplace", + ) + + x = tensor("floatX", shape=(None, None, None), name="x") + out = exp(x).sum(axis=axis) + + out_fn = function([x], out, mode=mode) + + if linker != "py": + (out_node,) = out_fn.maker.fgraph.toposort() + assert isinstance(getattr(out_node.op, "scalar_op"), aes.basic.Composite) + + rng = np.random.default_rng(2320) + x_val = rng.random((4, 3, 2), dtype=config.floatX) + + exp_res = np.exp(x_val).sum(axis=axis) + + out_val = out_fn(x_val) + assert out_val.shape == exp_res.shape + assert np.allclose(out_val, exp_res) + else: + out_nodes = out_fn.maker.fgraph.toposort() + assert not any( + isinstance(out_node.op.scalar_op, aes.basic.Composite) + for out_node in out_nodes + if hasattr(out_node.op, "scalar_op") + ) + + # `Elemwise`s with more than one client shouldn't be rewritten + x = tensor("floatX", shape=(None, None, None), name="x") + exp_x = exp(x) + out = exp_x.sum(axis=axis) + exp(x) + + out_fn = function([x], out, mode=mode) + out_nodes = out_fn.maker.fgraph.toposort() + assert not any( + isinstance(out_node.op.scalar_op, aes.basic.Composite) + for out_node in out_nodes + if hasattr(out_node.op, "scalar_op") + ) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize("linker", ["cvm", "py"]) + @pytest.mark.parametrize("axis", [None, 0, 1, (0, 1), (0, 1, 2)]) + def test_CAReduce_multiple_inputs(self, linker, axis): + """Make sure that `CAReduce` and `Elemwise` fusions work with multiple inputs.""" + + mode = Mode(linker=linker) + mode._optimizer = mode._optimizer.including( + "local_careduce_fusion", + "canonicalize", + "inplace", + ) + + x = tensor("floatX", shape=(None, None, None), name="x") + y = tensor("floatX", shape=(None, None, None), name="y") + out = (x + y).sum(axis=axis) + + out_fn = function([x, y], out, mode=mode) + (out_node,) = out_fn.maker.fgraph.toposort() + + assert isinstance(getattr(out_node.op, "scalar_op"), aes.basic.Composite) + + rng = np.random.default_rng(2320) + x_val = rng.random((4, 3, 2), dtype=config.floatX) + y_val = rng.random((4, 3, 2), dtype=config.floatX) + exp_res = (x_val + y_val).sum(axis=axis) + out_val = out_fn(x_val, y_val) + assert out_val.shape == exp_res.shape + assert np.allclose(out_val, exp_res) + class TimesN(aes.basic.UnaryScalarOp): """ diff --git a/tests/tensor/rewriting/test_math.py b/tests/tensor/rewriting/test_math.py index aaad958556..80e7ea5c45 100644 --- a/tests/tensor/rewriting/test_math.py +++ b/tests/tensor/rewriting/test_math.py @@ -161,9 +161,9 @@ def rewrite(g, level="fast_run"): def inputs(xbc=(0, 0), ybc=(0, 0), zbc=(0, 0)): - x = TensorType(shape=xbc, dtype="float64")("x") - y = TensorType(shape=ybc, dtype="float64")("y") - z = TensorType(shape=zbc, dtype="float64")("z") + x = TensorType(dtype="float64", shape=xbc)("x") + y = TensorType(dtype="float64", shape=ybc)("y") + z = TensorType(dtype="float64", shape=zbc)("z") return x, y, z @@ -959,11 +959,11 @@ def test_canonicalize_nan(self): def test_mismatching_types(self): a = at.as_tensor([[0.0]], dtype=np.float64) - b = tensor("float64", (None,)).dimshuffle("x", 0) + b = tensor("float64", shape=(None,)).dimshuffle("x", 0) z = add(a, b) # Construct a node with the wrong output `Type` z = Apply( - z.owner.op, z.owner.inputs, [tensor("float64", (None, None))] + z.owner.op, z.owner.inputs, [tensor("float64", shape=(None, None))] ).outputs[0] z_rewritten = rewrite_graph( @@ -1098,13 +1098,13 @@ def my_init(shp, dtype="float64", num=0): return ret fw, fx, fy, fz = [ - tensor(dtype="float32", shape=[False] * len(shp), name=n) for n in "wxyz" + tensor(dtype="float32", shape=(None,) * len(shp), name=n) for n in "wxyz" ] dw, dx, dy, dz = [ - tensor(dtype="float64", shape=[False] * len(shp), name=n) for n in "wxyz" + tensor(dtype="float64", shape=(None,) * len(shp), name=n) for n in "wxyz" ] ix, iy, iz = [ - tensor(dtype="int32", shape=[False] * len(shp), name=n) for n in "xyz" + tensor(dtype="int32", shape=(None,) * len(shp), name=n) for n in "xyz" ] fv = fvector("v") fs = fscalar("s") @@ -1739,15 +1739,15 @@ def my_init(shp, dtype="float64", num=0): f = function(list(sym_inputs), g, mode=mode) for x in range(nb_repeat): out = f(*val_inputs) - t1 = time.time() + t1 = time.perf_counter() else: out = shared_fn(np.zeros(shp, dtype=out_dtype), "out") assert out.dtype == g.dtype f = function(sym_inputs, [], updates=[(out, g)], mode=mode) - t0 = time.time() + t0 = time.perf_counter() for x in range(nb_repeat): f(*val_inputs) - t1 = time.time() + t1 = time.perf_counter() out = out.get_value() times[id] = t1 - t0 @@ -2327,11 +2327,11 @@ def speed_local_pow_specialize_range(): f1 = function([v], v**i, mode=mode) f2 = function([v], v**i, mode=mode_without_pow_rewrite) assert len(f1.maker.fgraph.toposort()) == 1 - t1 = time.time() + t1 = time.perf_counter() f1(val) - t2 = time.time() + t2 = time.perf_counter() f2(val) - t3 = time.time() + t3 = time.perf_counter() print(i, t2 - t1, t3 - t2, t2 - t1 < t3 - t2) if not t2 - t1 < t3 - t2: print("WARNING WE ARE SLOWER") @@ -2339,11 +2339,11 @@ def speed_local_pow_specialize_range(): f1 = function([v], v**i, mode=mode) f2 = function([v], v**i, mode=mode_without_pow_rewrite) assert len(f1.maker.fgraph.toposort()) == 1 - t1 = time.time() + t1 = time.perf_counter() f1(val) - t2 = time.time() + t2 = time.perf_counter() f2(val) - t3 = time.time() + t3 = time.perf_counter() print(i, t2 - t1, t3 - t2, t2 - t1 < t3 - t2) if not t2 - t1 < t3 - t2: print("WARNING WE ARE SLOWER") @@ -3113,11 +3113,11 @@ def speed_local_log_erfc(self): f2 = function([x], log(erfc(x)), mode=mode) print(f1.maker.fgraph.toposort()) print(f2.maker.fgraph.toposort()) - t0 = time.time() + t0 = time.perf_counter() f1(val) - t1 = time.time() + t1 = time.perf_counter() f2(val) - t2 = time.time() + t2 = time.perf_counter() print(t1 - t0, t2 - t1) @@ -3558,7 +3558,7 @@ def test_local_reduce_broadcast_all_0(self): at_max, at_min, ]: - x = TensorType("int64", (True, True, True))() + x = TensorType("int64", shape=(1, 1, 1))() f = function([x], [fct(x)], mode=self.mode) assert not any( isinstance(node.op, CAReduce) for node in f.maker.fgraph.toposort() @@ -3573,7 +3573,7 @@ def test_local_reduce_broadcast_all_1(self): at_max, at_min, ]: - x = TensorType("int64", (True, True))() + x = TensorType("int64", shape=(1, 1))() f = function([x], [fct(x, axis=[0, 1])], mode=self.mode) assert not any( isinstance(node.op, CAReduce) for node in f.maker.fgraph.toposort() @@ -3588,7 +3588,7 @@ def test_local_reduce_broadcast_some_0(self): at_max, at_min, ]: - x = TensorType("int64", (True, False, True))() + x = TensorType("int64", shape=(1, None, 1))() f = function([x], [fct(x, axis=[0, 1])], mode=self.mode) order = f.maker.fgraph.toposort() @@ -3613,7 +3613,7 @@ def test_local_reduce_broadcast_some_1(self): at_max, at_min, ]: - x = TensorType("int64", (True, True, True))() + x = TensorType("int64", shape=(1, 1, 1))() f = function([x], [fct(x, axis=[0, 2])], mode=self.mode) assert not any( isinstance(node.op, CAReduce) for node in f.maker.fgraph.toposort() @@ -4097,7 +4097,7 @@ def test_local_log_sum_exp_maximum(): check_max_log_sum_exp(x, axis=2, dimshuffle_op=transpose_op) # If the sum is performed with keepdims=True - x = TensorType(dtype="floatX", shape=(False, True, False))("x") + x = TensorType(dtype="floatX", shape=(None, 1, None))("x") sum_keepdims_op = x.sum(axis=(0, 1), keepdims=True).owner.op check_max_log_sum_exp(x, axis=(0, 1), dimshuffle_op=sum_keepdims_op) diff --git a/tests/tensor/rewriting/test_shape.py b/tests/tensor/rewriting/test_shape.py index 09dc0585d0..c7281dd80c 100644 --- a/tests/tensor/rewriting/test_shape.py +++ b/tests/tensor/rewriting/test_shape.py @@ -9,7 +9,7 @@ from aesara.compile.mode import get_default_mode, get_mode from aesara.compile.ops import deep_copy_op from aesara.configdefaults import config -from aesara.graph.basic import Apply, Variable +from aesara.graph.basic import Apply, Variable, equal_computations from aesara.graph.fg import FunctionGraph from aesara.graph.op import Op from aesara.graph.rewriting.basic import check_stack_trace, node_rewriter, out2in @@ -324,7 +324,7 @@ class TestLocalReshapeToDimshuffle: def setup_method(self): self.rng = np.random.default_rng(utt.fetch_seed()) - def test_1(self): + def test_basic(self): reshape_lift = out2in(local_reshape_to_dimshuffle) useless_reshape = out2in(local_useless_reshape) x = shared(self.rng.standard_normal((4,))) @@ -332,27 +332,27 @@ def test_1(self): reshape_x = reshape(x, (1, 4)) reshape_y = reshape(y, (1, 5, 1, 6, 1, 1)) - g = FunctionGraph([x, y], [reshape_x, reshape_y]) - assert str(g) == ( - "FunctionGraph(Reshape{2}" - "(, " - "TensorConstant{[1 4]}), " - "Reshape{6}" - "(, " - "TensorConstant{[1 5 1 6 1 1]}))" + g = FunctionGraph([x, y], [reshape_x, reshape_y], clone=False) + + assert equal_computations( + g.outputs, + [ + Reshape(2)(x, as_tensor_variable((1, 4), ndim=1)), + Reshape(6)(y, as_tensor_variable((1, 5, 1, 6, 1, 1), ndim=1)), + ], ) reshape_lift.rewrite(g) useless_reshape.rewrite(g) - assert str(g) == ( - "FunctionGraph(InplaceDimShuffle{x,0}" - "(), " - "InplaceDimShuffle{x,0,x,1,x,x}" - "(Reshape{2}(, " - "TensorConstant{[5 6]})))" + + exp_x = SpecifyShape()(x, 4).dimshuffle("x", 0) + assert equal_computations([g.outputs[0]], [exp_x]) + + exp_y = Reshape(2)(y, as_tensor_variable((5, 6), ndim=1)).dimshuffle( + "x", 0, "x", 1, "x", "x" ) + assert equal_computations([g.outputs[1]], [exp_y]) - # Check stacktrace was copied over correctly after the rewrite was applied assert check_stack_trace(g, ops_to_check=(DimShuffle, Reshape)) @@ -493,15 +493,15 @@ def test_local_Shape_of_SpecifyShape_partial(s1): assert not any(isinstance(apply.op, SpecifyShape) for apply in fgraph.apply_nodes) -def test_local_Shape_i_of_broadcastable(): - x = tensor(np.float64, [False, True]) +def test_local_Shape_i_ground(): + x = tensor(np.float64, shape=(None, 2)) s = Shape_i(1)(x) fgraph = FunctionGraph(outputs=[s], clone=False) _ = rewrite_graph(fgraph, clone=False) assert x not in fgraph.variables - assert fgraph.outputs[0].data == 1 + assert fgraph.outputs[0].data == 2 # A test for a non-`TensorType` class MyType(Type): diff --git a/tests/tensor/rewriting/test_subtensor.py b/tests/tensor/rewriting/test_subtensor.py index 754dfc6995..8aab0aa35a 100644 --- a/tests/tensor/rewriting/test_subtensor.py +++ b/tests/tensor/rewriting/test_subtensor.py @@ -86,7 +86,7 @@ def test_local_replace_AdvancedSubtensor(indices, is_none): X_val = np.random.normal(size=(4, 4, 4)) - X = tensor(np.float64, [False, False, False], name="X") + X = tensor(np.float64, shape=(None, None, None), name="X") X.tag.test_value = X_val Y = X[indices] @@ -1858,7 +1858,10 @@ def test_local_subtensor_of_alloc(): # DebugMode should detect if something goes wrong. # test shape combination of odd and event shape. for s in [(3, 5), (4, 6), (3, 8), (4, 7), (1, 5), (5, 1)]: - x = tensor(dtype=config.floatX, shape=(s[0] == 1, s[1] == 1)) + x = tensor( + dtype=config.floatX, + shape=(1 if s[0] == 1 else None, 1 if s[1] == 1 else None), + ) xval = np.zeros(s, dtype=config.floatX) yval = np.arange(s[1], dtype=config.floatX) @@ -1902,7 +1905,7 @@ def test_local_subtensor_of_alloc(): def test_local_subtensor_shape_constant(): - x = tensor(np.float64, [True, False]).shape[0] + x = tensor(np.float64, shape=(1, None)).shape[0] (res,) = local_subtensor_shape_constant.transform(None, x.owner) assert isinstance(res, Constant) assert res.data == 1 @@ -1912,21 +1915,21 @@ def test_local_subtensor_shape_constant(): assert isinstance(res, Constant) assert res.data == 1 - x = _shape(tensor(np.float64, [True, False]))[lscalar()] + x = _shape(tensor(np.float64, shape=(1, None)))[lscalar()] assert not local_subtensor_shape_constant.transform(None, x.owner) - x = _shape(tensor(np.float64, [True, False]))[0:] + x = _shape(tensor(np.float64, shape=(1, None)))[0:] assert not local_subtensor_shape_constant.transform(None, x.owner) - x = _shape(tensor(np.float64, [True, False]))[lscalar() :] + x = _shape(tensor(np.float64, shape=(1, None)))[lscalar() :] assert not local_subtensor_shape_constant.transform(None, x.owner) - x = _shape(tensor(np.float64, [True, True]))[1:] + x = _shape(tensor(np.float64, shape=(1, 1)))[1:] (res,) = local_subtensor_shape_constant.transform(None, x.owner) assert isinstance(res, Constant) assert np.array_equal(res.data, [1]) - x = _shape(tensor(np.float64, [False, True, True]))[1:] + x = _shape(tensor(np.float64, shape=(None, 1, 1)))[1:] (res,) = local_subtensor_shape_constant.transform(None, x.owner) assert isinstance(res, Constant) assert np.array_equal(res.data, [1, 1]) diff --git a/tests/tensor/rewriting/test_uncanonicalize.py b/tests/tensor/rewriting/test_uncanonicalize.py index 0f0bcd8534..b4de1e3866 100644 --- a/tests/tensor/rewriting/test_uncanonicalize.py +++ b/tests/tensor/rewriting/test_uncanonicalize.py @@ -192,7 +192,7 @@ def test_local_dimshuffle_subtensor(): assert not all(isinstance(x, DimShuffle) for x in topo) # Test dimshuffle remove dimensions the subtensor don't "see". - x = tensor(shape=(False, True, False), dtype="float64") + x = tensor(dtype="float64", shape=(None, 1, None)) out = x[i].dimshuffle(1) g = FunctionGraph([x, i], [out]) @@ -203,7 +203,7 @@ def test_local_dimshuffle_subtensor(): # Test dimshuffle remove dimensions the subtensor don't "see" but # have in between dimensions. - x = tensor(shape=(False, True, False, True), dtype="float64") + x = tensor(dtype="float64", shape=(None, 1, None, 1)) out = x[i].dimshuffle(1) f = aesara.function([x, i], out) diff --git a/tests/tensor/signal/test_conv.py b/tests/tensor/signal/test_conv.py index 62cfbb0cc1..8217989002 100644 --- a/tests/tensor/signal/test_conv.py +++ b/tests/tensor/signal/test_conv.py @@ -16,8 +16,8 @@ def validate(self, image_shape, filter_shape, out_dim, verify_grad=True): image_dim = len(image_shape) filter_dim = len(filter_shape) - input = TensorType("float64", [False] * image_dim)() - filters = TensorType("float64", [False] * filter_dim)() + input = TensorType("float64", shape=(None,) * image_dim)() + filters = TensorType("float64", shape=(None,) * filter_dim)() bsize = image_shape[0] if image_dim != 3: diff --git a/tests/tensor/signal/test_pool.py b/tests/tensor/signal/test_pool.py index 4539090236..c3318cb38e 100644 --- a/tests/tensor/signal/test_pool.py +++ b/tests/tensor/signal/test_pool.py @@ -1122,7 +1122,7 @@ def test_max_pool_2d_6D(self): rng = np.random.default_rng(utt.fetch_seed()) maxpoolshps = [(3, 2)] imval = rng.random((2, 1, 1, 1, 3, 4)) - images = TensorType("float64", [False] * 6)() + images = TensorType("float64", shape=(None,) * 6)() for maxpoolshp, ignore_border, mode in product( maxpoolshps, @@ -1204,7 +1204,7 @@ def test_infer_shape(self): warn=False, ) # checking with broadcastable input - image = tensor(dtype="float64", shape=(False, False, True, True)) + image = tensor(dtype="float64", shape=(None, None, 1, 1)) image_val = rng.random((4, 6, 1, 1)) self._compile_and_check( [image], diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py index 9c123f4c3a..a38c4fa1da 100644 --- a/tests/tensor/test_basic.py +++ b/tests/tensor/test_basic.py @@ -458,10 +458,10 @@ def test_make_vector_fail(self): res = MakeVector("int32")(a, b) res = MakeVector()(a) - assert res.broadcastable == (True,) + assert res.type.shape == (1,) res = MakeVector()() - assert res.broadcastable == (False,) + assert res.type.shape == (0,) def test_infer_shape(self): adscal = dscalar() @@ -511,7 +511,7 @@ def perform(self, *args, **kwargs): def test_constant(): - int8_vector_type = TensorType(dtype="int8", shape=(False,)) + int8_vector_type = TensorType(dtype="int8", shape=(None,)) # Make sure we return a `TensorConstant` unchanged x = TensorConstant(int8_vector_type, [1, 2]) @@ -575,17 +575,17 @@ def test_list(self): as_tensor_variable(bad_apply_var) def test_ndim_strip_leading_broadcastable(self): - x = TensorType(config.floatX, (True, False))("x") + x = TensorType(config.floatX, shape=(1, None))("x") x = as_tensor_variable(x, ndim=1) assert x.ndim == 1 def test_ndim_all_broadcastable(self): - x = TensorType(config.floatX, (True, True))("x") + x = TensorType(config.floatX, shape=(1, 1))("x") res = as_tensor_variable(x, ndim=0) assert res.ndim == 0 def test_ndim_incompatible(self): - x = TensorType(config.floatX, (True, False))("x") + x = TensorType(config.floatX, shape=(1, None))("x") with pytest.raises(ValueError, match="^Tensor of type.*"): as_tensor_variable(x, ndim=0) @@ -661,7 +661,7 @@ def test_constant_identity(self): assert x_scalar is a_scalar x_vector = TensorConstant( - TensorType(dtype="int8", shape=(False,)), + TensorType(dtype="int8", shape=(None,)), np.array([1, 2], dtype="int8"), ) a_vector = as_tensor_variable(x_vector) @@ -975,7 +975,7 @@ class TestNonzero: @config.change_flags(compute_test_value="raise") def test_nonzero(self): def check(m): - m_symb = tensor(dtype=m.dtype, shape=(False,) * m.ndim) + m_symb = tensor(dtype=m.dtype, shape=(None,) * m.ndim) m_symb.tag.test_value = m res_tuple_at = nonzero(m_symb, return_matrix=False) @@ -1004,7 +1004,7 @@ def check(m): @config.change_flags(compute_test_value="raise") def test_flatnonzero(self): def check(m): - m_symb = tensor(dtype=m.dtype, shape=(False,) * m.ndim) + m_symb = tensor(dtype=m.dtype, shape=(None,) * m.ndim) m_symb.tag.test_value = m res_at = flatnonzero(m_symb) @@ -1033,7 +1033,7 @@ def check(m): @config.change_flags(compute_test_value="raise") def test_nonzero_values(self): def check(m): - m_symb = tensor(dtype=m.dtype, shape=(False,) * m.ndim) + m_symb = tensor(dtype=m.dtype, shape=(None,) * m.ndim) m_symb.tag.test_value = m res_at = nonzero_values(m_symb) @@ -1177,6 +1177,8 @@ def test_get_vector_length(): # Test `Alloc`s assert 3 == get_vector_length(alloc(0, 3)) + assert 5 == get_vector_length(tensor(np.float64, shape=(5,))) + class TestJoinAndSplit: # Split is tested by each verify_grad method. @@ -1660,21 +1662,21 @@ def test_broadcastable_flag_assignment_mixed_otheraxes(self): a_val = rng.random((1, 4, 1)).astype(self.floatX) b_val = rng.random((1, 3, 1)).astype(self.floatX) - a = self.shared(a_val, shape=(False, False, True)) - b = self.shared(b_val, shape=(True, False, True)) + a = self.shared(a_val, shape=(None, None, 1)) + b = self.shared(b_val, shape=(1, None, 1)) c = self.join_op(1, a, b) - assert c.type.broadcastable[0] and c.type.broadcastable[2] - assert not c.type.broadcastable[1] + assert c.type.shape[0] == 1 and c.type.shape[2] == 1 + assert c.type.shape[1] != 1 # Opt can remplace the int by an Aesara constant c = self.join_op(constant(1), a, b) - assert c.type.broadcastable[0] and c.type.broadcastable[2] - assert not c.type.broadcastable[1] + assert c.type.shape[0] == 1 and c.type.shape[2] == 1 + assert c.type.shape[1] != 1 # In case futur opt insert other useless stuff c = self.join_op(cast(constant(1), dtype="int32"), a, b) - assert c.type.broadcastable[0] and c.type.broadcastable[2] - assert not c.type.broadcastable[1] + assert c.type.shape[0] == 1 and c.type.shape[2] == 1 + assert c.type.shape[1] != 1 f = function([], c, mode=self.mode) topo = f.maker.fgraph.toposort() @@ -1698,10 +1700,10 @@ def test_broadcastable_flag_assignment_mixed_thisaxes(self): a_val = rng.random((2, 4, 1)).astype(self.floatX) b_val = rng.random((1, 4, 1)).astype(self.floatX) - a = self.shared(a_val, shape=(False, False, True)) - b = self.shared(b_val, shape=(True, False, True)) + a = self.shared(a_val, shape=(None, None, 1)) + b = self.shared(b_val, shape=(1, None, 1)) c = self.join_op(0, a, b) - assert not c.type.broadcastable[0] + assert c.type.shape[0] != 1 f = function([], c, mode=self.mode) topo = f.maker.fgraph.toposort() @@ -1715,8 +1717,8 @@ def test_broadcastable_flag_assignment_mixed_thisaxes(self): # We can't set the value| with pytest.raises(TypeError): b.set_value(rng.random((3, 4, 1)).astype(self.floatX)) - a = TensorType(dtype=self.floatX, shape=[False, False, True])() - b = TensorType(dtype=self.floatX, shape=[True, False, True])() + a = TensorType(dtype=self.floatX, shape=(None, None, 1))() + b = TensorType(dtype=self.floatX, shape=(1, None, 1))() c = self.join_op(0, a, b) f = function([a, b], c, mode=self.mode) bad_b_val = rng.random((3, 4, 1)).astype(self.floatX) @@ -1731,10 +1733,10 @@ def test_broadcastable_flags_all_broadcastable_on_joinaxis(self): a_val = rng.random((1, 4, 1)).astype(self.floatX) b_val = rng.random((1, 4, 1)).astype(self.floatX) - a = self.shared(a_val, shape=(True, False, True)) - b = self.shared(b_val, shape=(True, False, True)) + a = self.shared(a_val, shape=(1, None, 1)) + b = self.shared(b_val, shape=(1, None, 1)) c = self.join_op(0, a, b) - assert not c.type.broadcastable[0] + assert c.type.shape[0] != 1 f = function([], c, mode=self.mode) topo = f.maker.fgraph.toposort() @@ -1750,11 +1752,11 @@ def test_broadcastable_single_input_broadcastable_dimension(self): # single-input join. rng = np.random.default_rng(seed=utt.fetch_seed()) a_val = rng.random((1, 4, 1)).astype(self.floatX) - a = self.shared(a_val, shape=(True, False, True)) + a = self.shared(a_val, shape=(1, None, 1)) b = self.join_op(0, a) - assert b.type.broadcastable[0] - assert b.type.broadcastable[2] - assert not b.type.broadcastable[1] + assert b.type.shape[0] == 1 + assert b.type.shape[2] == 1 + assert b.type.shape[1] != 1 f = function([], b, mode=self.mode) topo = f.maker.fgraph.toposort() @@ -1774,29 +1776,19 @@ def test_broadcastable_single_input_broadcastable_dimension(self): def test_broadcastable_flags_many_dims_and_inputs(self): # Test that the right broadcastable flags get set for a join # with many inputs and many input dimensions. - a = TensorType( - dtype=self.floatX, shape=[True, False, True, False, False, False] - )() - b = TensorType( - dtype=self.floatX, shape=[True, True, True, False, False, False] - )() - c = TensorType( - dtype=self.floatX, shape=[True, False, False, False, False, False] - )() - d = TensorType( - dtype=self.floatX, shape=[True, False, True, True, False, True] - )() - e = TensorType( - dtype=self.floatX, shape=[True, False, True, False, False, True] - )() + a = TensorType(dtype=self.floatX, shape=(1, None, 1, None, None, None))() + b = TensorType(dtype=self.floatX, shape=(1, 1, 1, None, None, None))() + c = TensorType(dtype=self.floatX, shape=(1, None, None, None, None, None))() + d = TensorType(dtype=self.floatX, shape=(1, None, 1, 1, None, 1))() + e = TensorType(dtype=self.floatX, shape=(1, None, 1, None, None, 1))() f = self.join_op(0, a, b, c, d, e) - fb = f.type.broadcastable + fb = tuple(s == 1 for s in f.type.shape) assert not fb[0] and fb[1] and fb[2] and fb[3] and not fb[4] and fb[5] g = self.join_op(1, a, b, c, d, e) - gb = g.type.broadcastable + gb = tuple(s == 1 for s in g.type.shape) assert gb[0] and not gb[1] and gb[2] and gb[3] and not gb[4] and gb[5] h = self.join_op(4, a, b, c, d, e) - hb = h.type.broadcastable + hb = tuple(s == 1 for s in h.type.shape) assert hb[0] and hb[1] and hb[2] and hb[3] and not hb[4] and hb[5] f = function([a, b, c, d, e], f, mode=self.mode) @@ -1881,8 +1873,8 @@ def get_mat(s1, s2): def test_rebroadcast(self): # Regression test for a crash that used to happen when rebroadcasting. - x = TensorType(self.floatX, [False, False, True])() - u = TensorType(self.floatX, [False, False, True])() + x = TensorType(self.floatX, shape=(None, None, 1))() + u = TensorType(self.floatX, shape=(None, None, 1))() # This line used to crash. at.concatenate([x, -u], axis=2) @@ -1989,8 +1981,8 @@ def test_TensorFromScalar(): s = aes.constant(56) t = tensor_from_scalar(s) assert t.owner.op is tensor_from_scalar - assert t.type.broadcastable == (), t.type.broadcastable - assert t.type.ndim == 0, t.type.ndim + assert t.type.shape == () + assert t.type.ndim == 0 assert t.type.dtype == s.type.dtype v = eval_outputs([t]) @@ -2118,7 +2110,7 @@ def test_flatten_ndim2(): def test_flatten_ndim2_of_3(): - a = TensorType("float64", (False, False, False))() + a = TensorType("float64", shape=(None, None, None))() c = flatten(a, 2) f = inplace_func([a], c) a_val = _asarray([[[0, 1], [2, 3]], [[4, 5], [6, 7]]], dtype="float64") @@ -2135,25 +2127,25 @@ def test_flatten_broadcastable(): # Ensure that the broadcastable pattern of the output is coherent with # that of the input - inp = TensorType("float64", (False, False, False, False))() + inp = TensorType("float64", shape=(None, None, None, None))() out = flatten(inp, ndim=2) - assert out.broadcastable == (False, False) + assert out.type.shape == (None, None) - inp = TensorType("float64", (False, False, False, True))() + inp = TensorType("float64", shape=(None, None, None, 1))() out = flatten(inp, ndim=2) - assert out.broadcastable == (False, False) + assert out.type.shape == (None, None) - inp = TensorType("float64", (False, True, False, True))() + inp = TensorType("float64", shape=(None, 1, None, 1))() out = flatten(inp, ndim=2) - assert out.broadcastable == (False, False) + assert out.type.shape == (None, None) - inp = TensorType("float64", (False, True, True, True))() + inp = TensorType("float64", shape=(None, 1, 1, 1))() out = flatten(inp, ndim=2) - assert out.broadcastable == (False, True) + assert out.type.shape == (None, 1) - inp = TensorType("float64", (True, False, True, True))() + inp = TensorType("float64", shape=(1, None, 1, 1))() out = flatten(inp, ndim=3) - assert out.broadcastable == (True, False, True) + assert out.type.shape == (1, None, 1) def test_flatten_ndim_invalid(): @@ -2946,10 +2938,10 @@ def permute_fixed(s_input): def test_3b_2(self): # Test permute_row_elements on a more complex broadcasting pattern: - # input.type.broadcastable = (False, True, False), - # p.type.broadcastable = (False, False). + # input.type.shape = (None, 1, None), + # p.type.shape = (None, None). - input = TensorType("floatX", (False, True, False))() + input = TensorType("floatX", shape=(None, 1, None))() p = imatrix() out = permute_row_elements(input, p) permute = function([input, p], out) @@ -3185,7 +3177,7 @@ def test_too_big(self): def test_len(): for shape_ in [(5,), (3, 4), (7, 4, 6)]: - x = tensor(dtype="floatX", shape=(False,) * len(shape_)) + x = tensor(dtype="floatX", shape=(None,) * len(shape_)) with pytest.raises(TypeError): len(x) @@ -3327,7 +3319,7 @@ def test_shape_i(self): assert get_scalar_constant_value(s) == 3 s = Shape_i(1)(c) assert get_scalar_constant_value(s) == 4 - d = aesara.shared(np.random.standard_normal((1, 1)), shape=(True, True)) + d = aesara.shared(np.random.standard_normal((1, 1)), shape=(1, 1)) f = ScalarFromTensor()(Shape_i(0)(d)) assert get_scalar_constant_value(f) == 1 @@ -3532,7 +3524,7 @@ def _generator(self): for d in range(1, dims + 1): # Create a TensorType of the same dimensions as # as the data we want to test. - x = TensorType(dtype=config.floatX, shape=(False,) * d)("x") + x = TensorType(dtype=config.floatX, shape=(None,) * d)("x") # Make a slice of the test data that has the # dimensions we need by doing xv[0,...,0] @@ -4054,7 +4046,7 @@ def test_broadcasted(self): B = np.asarray(np.random.random((4, 1)), dtype="float32") for m in self.modes: f = function([a, b], choose(a, b, mode=m)) - assert choose(a, b, mode=m).broadcastable[0] + assert choose(a, b, mode=m).type.shape[0] == 1 t_c = f(A, B) n_c = np.choose(A, B, mode=m) assert np.allclose(t_c, n_c) @@ -4117,8 +4109,10 @@ def test_infer_shape(self): ((4,), (1,)), ((1,), (1,)), ]: - a = tensor(dtype="int32", shape=[n == 1 for n in shp1]) - c = tensor(dtype="float32", shape=[n == 1 for n in shp2]) + a = tensor(dtype="int32", shape=tuple(1 if s == 1 else None for s in shp1)) + c = tensor( + dtype="float32", shape=tuple(1 if s == 1 else None for s in shp2) + ) A = np.asarray(np.random.random(shp1) * shp2[0], dtype="int32") C = np.asarray(np.random.random(shp2) * shp2[0], dtype="float32") self._compile_and_check( @@ -4268,8 +4262,12 @@ def test_take_along_axis(self, shape, axis, samples): indices_size[axis or 0] = samples indices = rng.integers(low=0, high=shape[axis or 0], size=indices_size) - arr_in = at.tensor(config.floatX, [s == 1 for s in arr.shape]) - indices_in = at.tensor(np.int64, [s == 1 for s in indices.shape]) + arr_in = at.tensor( + config.floatX, shape=tuple(1 if s == 1 else None for s in arr.shape) + ) + indices_in = at.tensor( + np.int64, shape=tuple(1 if s == 1 else None for s in indices.shape) + ) out = at.take_along_axis(arr_in, indices_in, axis) @@ -4280,12 +4278,12 @@ def test_take_along_axis(self, shape, axis, samples): ) def test_ndim_dtype_failures(self): - arr = at.tensor(config.floatX, [False] * 2) - indices = at.tensor(np.int64, [False] * 3) + arr = at.tensor(config.floatX, shape=(None,) * 2) + indices = at.tensor(np.int64, shape=(None,) * 3) with pytest.raises(ValueError): at.take_along_axis(arr, indices) - indices = at.tensor(np.float64, [False] * 2) + indices = at.tensor(np.float64, shape=(None,) * 2) with pytest.raises(IndexError): at.take_along_axis(arr, indices) diff --git a/tests/tensor/test_blas.py b/tests/tensor/test_blas.py index 96de2667ab..9c15a017c7 100644 --- a/tests/tensor/test_blas.py +++ b/tests/tensor/test_blas.py @@ -1469,7 +1469,7 @@ def test_gemv_broadcast(self): v2 = shared(v2_orig) m = shared( np.array(rng.uniform(size=(1, 2)), dtype="float32"), - shape=(True, False), + shape=(1, None), ) o = aesara.tensor.dot(m, v1) f = function([], o + v2, mode=mode_blas_opt) @@ -1779,20 +1779,20 @@ class TestDgemv(BaseGemv, unittest_tools.OptimizationTestMixin): class TestGerMakeNode: def setup_method(self): - self.iv = tensor(dtype="int32", shape=(False,)) - self.fv = tensor(dtype="float32", shape=(False,)) - self.fv1 = tensor(dtype="float32", shape=(True,)) - self.dv = tensor(dtype="float64", shape=(False,)) - self.dv1 = tensor(dtype="float64", shape=(True,)) - self.cv = tensor(dtype="complex64", shape=(False,)) - self.zv = tensor(dtype="complex128", shape=(False,)) - - self.fv_2 = tensor(dtype="float32", shape=(False,)) - self.fv1_2 = tensor(dtype="float32", shape=(True,)) - self.dv_2 = tensor(dtype="float64", shape=(False,)) - self.dv1_2 = tensor(dtype="float64", shape=(True,)) - self.cv_2 = tensor(dtype="complex64", shape=(False,)) - self.zv_2 = tensor(dtype="complex128", shape=(False,)) + self.iv = tensor(dtype="int32", shape=(None,)) + self.fv = tensor(dtype="float32", shape=(None,)) + self.fv1 = tensor(dtype="float32", shape=(1,)) + self.dv = tensor(dtype="float64", shape=(None,)) + self.dv1 = tensor(dtype="float64", shape=(1,)) + self.cv = tensor(dtype="complex64", shape=(None,)) + self.zv = tensor(dtype="complex128", shape=(None,)) + + self.fv_2 = tensor(dtype="float32", shape=(None,)) + self.fv1_2 = tensor(dtype="float32", shape=(1,)) + self.dv_2 = tensor(dtype="float64", shape=(None,)) + self.dv1_2 = tensor(dtype="float64", shape=(1,)) + self.cv_2 = tensor(dtype="complex64", shape=(None,)) + self.zv_2 = tensor(dtype="complex128", shape=(None,)) self.fm = fmatrix() self.dm = dmatrix() @@ -1866,10 +1866,10 @@ def setup_method(self): self.mode = aesara.compile.get_default_mode().including("fast_run") self.mode = self.mode.excluding("c_blas", "scipy_blas") dtype = self.dtype = "float64" # optimization isn't dtype-dependent - self.A = tensor(dtype=dtype, shape=(False, False)) + self.A = tensor(dtype=dtype, shape=(None, None)) self.a = tensor(dtype=dtype, shape=()) - self.x = tensor(dtype=dtype, shape=(False,)) - self.y = tensor(dtype=dtype, shape=(False,)) + self.x = tensor(dtype=dtype, shape=(None,)) + self.y = tensor(dtype=dtype, shape=(None,)) self.ger = ger self.ger_destructive = ger_destructive self.gemm = gemm_no_inplace @@ -2000,9 +2000,9 @@ def given_dtype(self, dtype, M, N, *, destructive=True): # test corner case shape and dtype rng = np.random.default_rng(unittest_tools.fetch_seed()) - A = tensor(dtype=dtype, shape=(False, False)) - x = tensor(dtype=dtype, shape=(False,)) - y = tensor(dtype=dtype, shape=(False,)) + A = tensor(dtype=dtype, shape=(None, None)) + x = tensor(dtype=dtype, shape=(None,)) + y = tensor(dtype=dtype, shape=(None,)) f = self.function([A, x, y], A + 0.1 * outer(x, y)) self.assertFunctionContains( diff --git a/tests/tensor/test_blas_c.py b/tests/tensor/test_blas_c.py index af6c5f761d..4205badb0b 100644 --- a/tests/tensor/test_blas_c.py +++ b/tests/tensor/test_blas_c.py @@ -41,10 +41,10 @@ def manual_setup_method(self, dtype="float64"): # This tests can run even when aesara.config.blas__ldflags is empty. self.dtype = dtype self.mode = aesara.compile.get_default_mode().including("fast_run") - self.A = tensor(dtype=dtype, shape=(False, False)) + self.A = tensor(dtype=dtype, shape=(None, None)) self.a = tensor(dtype=dtype, shape=()) - self.x = tensor(dtype=dtype, shape=(False,)) - self.y = tensor(dtype=dtype, shape=(False,)) + self.x = tensor(dtype=dtype, shape=(None,)) + self.y = tensor(dtype=dtype, shape=(None,)) self.Aval = np.ones((2, 3), dtype=dtype) self.xval = np.asarray([1, 2], dtype=dtype) self.yval = np.asarray([1.5, 2.7, 3.9], dtype=dtype) @@ -131,12 +131,12 @@ def setup_method(self): self.dtype = dtype self.mode = aesara.compile.get_default_mode().including("fast_run") # matrix - self.A = tensor(dtype=dtype, shape=(False, False)) + self.A = tensor(dtype=dtype, shape=(None, None)) self.Aval = np.ones((2, 3), dtype=dtype) # vector - self.x = tensor(dtype=dtype, shape=(False,)) - self.y = tensor(dtype=dtype, shape=(False,)) + self.x = tensor(dtype=dtype, shape=(None,)) + self.y = tensor(dtype=dtype, shape=(None,)) self.xval = np.asarray([1, 2], dtype=dtype) self.yval = np.asarray([1.5, 2.7, 3.9], dtype=dtype) diff --git a/tests/tensor/test_blas_scipy.py b/tests/tensor/test_blas_scipy.py index 25fe5316a7..2d81d87bb6 100644 --- a/tests/tensor/test_blas_scipy.py +++ b/tests/tensor/test_blas_scipy.py @@ -17,10 +17,10 @@ def setup_method(self): self.mode = self.mode.including("fast_run") self.mode = self.mode.excluding("c_blas") # c_blas trumps scipy Ops dtype = self.dtype = "float64" # optimization isn't dtype-dependent - self.A = tensor(dtype=dtype, shape=(False, False)) + self.A = tensor(dtype=dtype, shape=(None, None)) self.a = tensor(dtype=dtype, shape=()) - self.x = tensor(dtype=dtype, shape=(False,)) - self.y = tensor(dtype=dtype, shape=(False,)) + self.x = tensor(dtype=dtype, shape=(None,)) + self.y = tensor(dtype=dtype, shape=(None,)) self.Aval = np.ones((2, 3), dtype=dtype) self.xval = np.asarray([1, 2], dtype=dtype) self.yval = np.asarray([1.5, 2.7, 3.9], dtype=dtype) diff --git a/tests/tensor/test_casting.py b/tests/tensor/test_casting.py index e7f4e63fc5..3477897179 100644 --- a/tests/tensor/test_casting.py +++ b/tests/tensor/test_casting.py @@ -75,7 +75,7 @@ def test_illegal(self): ), ) def test_basic(self, type1, type2, converter): - x = TensorType(dtype=type1, shape=(False,))() + x = TensorType(dtype=type1, shape=(None,))() y = converter(x) f = function([In(x, strict=True)], y) a = np.arange(10, dtype=type1) @@ -86,8 +86,8 @@ def test_convert_to_complex(self): val64 = np.ones(3, dtype="complex64") + 0.5j val128 = np.ones(3, dtype="complex128") + 0.5j - vec64 = TensorType("complex64", (False,))() - vec128 = TensorType("complex128", (False,))() + vec64 = TensorType("complex64", shape=(None,))() + vec128 = TensorType("complex128", shape=(None,))() f = function([vec64], _convert_to_complex128(vec64)) # we need to compare with the same type. diff --git a/tests/tensor/test_elemwise.py b/tests/tensor/test_elemwise.py index 6bd514f277..435cd5b216 100644 --- a/tests/tensor/test_elemwise.py +++ b/tests/tensor/test_elemwise.py @@ -17,7 +17,7 @@ from aesara.link.c.basic import CLinker, OpWiseCLinker from aesara.tensor import as_tensor_variable from aesara.tensor.basic import second -from aesara.tensor.elemwise import CAReduce, CAReduceDtype, DimShuffle, Elemwise +from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise from aesara.tensor.exceptions import ShapeError from aesara.tensor.math import all as at_all from aesara.tensor.math import any as at_any @@ -56,13 +56,14 @@ def with_linker(self, linker): ((1, 1, 1), (), ()), ((1,), ("x", "x"), (1, 1)), ]: - ib = [(entry == 1) for entry in xsh] - x = self.type(self.dtype, ib)("x") + i_shape = [entry if entry == 1 else None for entry in xsh] + ib = [entry == 1 for entry in i_shape] + x = self.type(self.dtype, shape=i_shape)("x") e = self.op(ib, shuffle)(x) f = aesara.function([x], e, mode=Mode(linker=linker)) assert f(np.ones(xsh, dtype=self.dtype)).shape == zsh # test that DimShuffle.infer_shape work correctly - x = self.type(self.dtype, ib)("x") + x = self.type(self.dtype, shape=i_shape)("x") e = self.op(ib, shuffle)(x) f = aesara.function( [x], e.shape, mode=Mode(linker=linker), on_unused_input="ignore" @@ -71,13 +72,13 @@ def with_linker(self, linker): # Test when we drop a axis that is not broadcastable ib = [False, True, False] - x = self.type(self.dtype, ib)("x") + x = self.type(self.dtype, shape=(None, 1, None))("x") with pytest.raises(ValueError): self.op(ib, shuffle) # Test when we drop a axis that don't have shape 1 ib = [True, True, False] - x = self.type(self.dtype, ib)("x") + x = self.type(self.dtype, shape=(1, 1, None))("x") e = self.op(ib, (1, 2))(x) f = aesara.function([x], e.shape, mode=Mode(linker=linker)) with pytest.raises(TypeError): @@ -86,7 +87,7 @@ def with_linker(self, linker): # Test that we can't take a dimensions multiple time xsh, shuffle, zsh = ((1, 1, 4), (0, 1, 2, 0), (1, 4)) ib = [False, True, False] - x = self.type(self.dtype, ib)("x") + x = self.type(self.dtype, shape=(None, 1, None))("x") with pytest.raises(ValueError): DimShuffle(ib, shuffle) @@ -111,8 +112,9 @@ def test_infer_shape(self): ((1, 1, 1), ()), ((1,), ("x", "x")), ]: + i_shape = [entry if entry == 1 else None for entry in xsh] ib = [(entry == 1) for entry in xsh] - adtens = self.type(self.dtype, ib)("x") + adtens = self.type(self.dtype, shape=i_shape)("x") adtens_val = np.ones(xsh, dtype=self.dtype) self._compile_and_check( [adtens], @@ -234,11 +236,11 @@ def with_linker(self, linker, op, type, rand_val): # type shape provided by Aesara was broadcastable/non-broadcastable x_type = type( aesara.config.floatX, - broadcastable=[(entry == 1) for entry in xsh], + shape=tuple(s if s == 1 else None for s in xsh), ) y_type = type( aesara.config.floatX, - broadcastable=[(entry == 1) for entry in ysh], + shape=tuple(s if s == 1 else None for s in ysh), ) else: x_type = type(aesara.config.floatX, shape=[None for _ in xsh]) @@ -285,11 +287,11 @@ def with_linker_inplace(self, linker, op, type, rand_val): # type shape provided by Aesara was broadcastable/non-broadcastable x_type = type( aesara.config.floatX, - broadcastable=[(entry == 1) for entry in xsh], + shape=tuple(s if s == 1 else None for s in xsh), ) y_type = type( aesara.config.floatX, - broadcastable=[(entry == 1) for entry in ysh], + shape=tuple(s if s == 1 else None for s in ysh), ) else: x_type = type(aesara.config.floatX, shape=[None for _ in xsh]) @@ -349,8 +351,8 @@ def test_fill(self): [self.type, self.ctype], [self.rand_val, self.rand_cval], ): - x = t(aesara.config.floatX, (False, False))("x") - y = t(aesara.config.floatX, (True, True))("y") + x = t(aesara.config.floatX, shape=(None, None))("x") + y = t(aesara.config.floatX, shape=(1, 1))("y") e = op(aes.Second(aes.transfer_type(0)), {0: 0})(x, y) f = make_function(linker().accept(FunctionGraph([x, y], [e]))) xv = rval((5, 5)) @@ -363,11 +365,10 @@ def test_fill_var(self): x.fill(3) def test_fill_grad(self): - # Fix bug reported at - # https://groups.google.com/d/topic/theano-users/nQshB8gUA6k/discussion - x = TensorType(config.floatX, (False, True, False))("x") - y = TensorType(config.floatX, (False, True, False))("y") + x = TensorType(config.floatX, shape=(None, 1, None))("x") + y = TensorType(config.floatX, shape=(None, 1, None))("y") e = second(x, y) + # TODO FIXME: Make this a real test and assert something here! aesara.grad(e.sum(), y) @pytest.mark.skipif( @@ -380,8 +381,8 @@ def test_weird_strides(self): [self.type, self.ctype], [self.rand_val, self.rand_cval], ): - x = t(aesara.config.floatX, (False,) * 5)("x") - y = t(aesara.config.floatX, (False,) * 5)("y") + x = t(aesara.config.floatX, shape=(None,) * 5)("x") + y = t(aesara.config.floatX, shape=(None,) * 5)("y") e = op(aes.add)(x, y) f = make_function(linker().accept(FunctionGraph([x, y], [e]))) xv = rval((2, 2, 2, 2, 2)) @@ -399,7 +400,7 @@ def test_same_inputs(self): [self.type, self.ctype], [self.rand_val, self.rand_cval], ): - x = t(aesara.config.floatX, (False,) * 2)("x") + x = t(aesara.config.floatX, shape=(None,) * 2)("x") e = op(aes.add)(x, x) f = make_function(linker().accept(FunctionGraph([x], [e]))) xv = rval((2, 2)) @@ -440,7 +441,9 @@ def with_mode( for xsh, tosum in self.cases: if dtype == "floatX": dtype = aesara.config.floatX - x = self.type(dtype, [(entry == 1) for entry in xsh])("x") + x = self.type( + dtype, shape=tuple(entry if entry == 1 else None for entry in xsh) + )("x") d = {} if pre_scalar_op is not None: d = {"pre_scalar_op": pre_scalar_op} @@ -529,26 +532,20 @@ def with_mode( for axis in reversed(sorted(tosum)): zv = np.bitwise_xor.reduce(zv, axis) else: - raise Exception( + raise NotImplementedError( f"Test for CAReduce with scalar_op {scalar_op} not implemented" ) if test_nan: - try: - assert self.type.values_eq(f(xv), zv), (f(xv), zv) - except NotImplementedError: - # GpuCAReduce don't implement all cases when size is 0 - assert xv.size == 0 + assert self.type.values_eq(f(xv), zv), (f(xv), zv) else: - try: - f_xv = f(xv) - assert f_xv.shape == zv.shape, (f_xv, zv) - utt.assert_allclose(zv, f_xv) - except NotImplementedError: - # GpuCAReduce don't implement all cases when size is 0 - assert xv.size == 0 - - x = self.type(dtype, [(entry == 1) for entry in xsh])("x") + f_xv = f(xv) + assert f_xv.shape == zv.shape, (f_xv, zv) + utt.assert_allclose(zv, f_xv) + + x = self.type( + dtype, shape=tuple(entry if entry == 1 else None for entry in xsh) + )("x") if tensor_op is None: e = self.op(scalar_op, axis=tosum)(x) else: @@ -560,11 +557,7 @@ def with_mode( scalar_op in [aes.scalar_maximum, aes.scalar_minimum] and (xsh == () or np.prod(xsh) == 0) ): - try: - assert all(f(xv) == zv.shape) - except NotImplementedError: - # GpuCAReduce don't implement all cases when size is 0 - assert xv.size == 0 + assert all(f(xv) == zv.shape) def test_perform_noopt(self): self.with_mode(Mode(linker="py", optimizer=None), aes.add, dtype="floatX") @@ -653,7 +646,9 @@ def test_infer_shape(self, dtype=None, pre_scalar_op=None): if dtype is None: dtype = aesara.config.floatX for xsh, tosum in self.cases: - x = self.type(dtype, [(entry == 1) for entry in xsh])("x") + x = self.type( + dtype, shape=tuple(entry if entry == 1 else None for entry in xsh) + )("x") if pre_scalar_op is not None: x = pre_scalar_op(x) if tosum is None: @@ -676,12 +671,12 @@ def test_str(self): op = CAReduce(aes.add, axis=None) assert str(op) == "CAReduce{add}" op = CAReduce(aes.add, axis=(1,)) - assert str(op) == "CAReduce{add}{1}" + assert str(op) == "CAReduce{add}{axis=[1]}" - op = CAReduceDtype(aes.add, axis=None, acc_dtype="float64") - assert str(op) == "CAReduceDtype{add}{acc_dtype=float64}" - op = CAReduceDtype(aes.add, axis=(1,), acc_dtype="float64") - assert str(op) == "CAReduceDtype{add}{axis=[1], acc_dtype=float64}" + op = CAReduce(aes.add, axis=None, acc_dtype="float64") + assert str(op) == "CAReduce{add}{acc_dtype=float64}" + op = CAReduce(aes.add, axis=(1,), acc_dtype="float64") + assert str(op) == "CAReduce{add}{axis=[1], acc_dtype=float64}" def test_repeated_axis(self): x = vector("x") @@ -749,8 +744,12 @@ def test_infer_shape(self): ((2, 3, 4, 1), (2, 3, 4, 5)), ]: dtype = aesara.config.floatX - t_left = TensorType(dtype, [(entry == 1) for entry in s_left])() - t_right = TensorType(dtype, [(entry == 1) for entry in s_right])() + t_left = TensorType( + dtype, shape=tuple(entry if entry == 1 else None for entry in s_left) + )() + t_right = TensorType( + dtype, shape=tuple(entry if entry == 1 else None for entry in s_right) + )() t_left_val = np.zeros(s_left, dtype=dtype) t_right_val = np.zeros(s_right, dtype=dtype) self._compile_and_check( @@ -857,7 +856,7 @@ def test_shape_types(self): def test_static_shape_unary(self): x = tensor("float64", shape=(None, 0, 1, 5)) - exp(x).type.shape == (None, 0, 1, 5) + assert exp(x).type.shape == (None, 0, 1, 5) def test_static_shape_binary(self): x = tensor("float64", shape=(None, 5)) diff --git a/tests/tensor/test_extra_ops.py b/tests/tensor/test_extra_ops.py index 9c88453420..f56e5b0358 100644 --- a/tests/tensor/test_extra_ops.py +++ b/tests/tensor/test_extra_ops.py @@ -337,10 +337,10 @@ def test_perform(self, axis, n): @pytest.mark.parametrize( "x_type", ( - at.TensorType("float64", (None, None)), - at.TensorType("float64", (None, 30)), - at.TensorType("float64", (10, None)), - at.TensorType("float64", (10, 30)), + at.TensorType("float64", shape=(None, None)), + at.TensorType("float64", shape=(None, 30)), + at.TensorType("float64", shape=(10, None)), + at.TensorType("float64", shape=(10, 30)), ), ) @pytest.mark.parametrize("axis", (-2, -1, 0, 1)) @@ -363,19 +363,19 @@ def setup_method(self): self.op = squeeze @pytest.mark.parametrize( - "shape, broadcast", + "shape, var_shape", zip( [(1, 3), (1, 2, 3), (1, 5, 1, 1, 6)], [ - [True, False], - [True, False, False], - [True, False, True, True, False], + [1, None], + [1, None, None], + [1, None, 1, 1, None], ], ), ) - def test_op(self, shape, broadcast): + def test_op(self, shape, var_shape): data = np.random.random(size=shape).astype(config.floatX) - variable = TensorType(config.floatX, broadcast)() + variable = TensorType(config.floatX, shape=var_shape)() f = aesara.function([variable], self.op(variable)) @@ -386,19 +386,19 @@ def test_op(self, shape, broadcast): assert np.allclose(tested, expected) @pytest.mark.parametrize( - "shape, broadcast", + "shape, var_shape", zip( [(1, 3), (1, 2, 3), (1, 5, 1, 1, 6)], [ - [True, False], - [True, False, False], - [True, False, True, True, False], + [1, None], + [1, None, None], + [1, None, 1, 1, None], ], ), ) - def test_infer_shape(self, shape, broadcast): + def test_infer_shape(self, shape, var_shape): data = np.random.random(size=shape).astype(config.floatX) - variable = TensorType(config.floatX, broadcast)() + variable = TensorType(config.floatX, shape=var_shape)() self._compile_and_check( [variable], [self.op(variable)], [data], DimShuffle, warn=False @@ -420,20 +420,20 @@ def test_grad(self, shape, broadcast): utt.verify_grad(self.op, [data]) @pytest.mark.parametrize( - "shape, broadcast", + "shape, var_shape", zip( [(1, 3), (1, 2, 3), (1, 5, 1, 1, 6)], [ - [True, False], - [True, False, False], - [True, False, True, True, False], + [1, None], + [1, None, None], + [1, None, 1, 1, None], ], ), ) - def test_var_interface(self, shape, broadcast): + def test_var_interface(self, shape, var_shape): # same as test_op, but use a_aesara_var.squeeze. data = np.random.random(size=shape).astype(config.floatX) - variable = TensorType(config.floatX, broadcast)() + variable = TensorType(config.floatX, shape=var_shape)() f = aesara.function([variable], variable.squeeze()) @@ -444,29 +444,29 @@ def test_var_interface(self, shape, broadcast): assert np.allclose(tested, expected) def test_axis(self): - variable = TensorType(config.floatX, [False, True, False])() + variable = TensorType(config.floatX, shape=(None, 1, None))() res = squeeze(variable, axis=1) assert res.broadcastable == (False, False) - variable = TensorType(config.floatX, [False, True, False])() + variable = TensorType(config.floatX, shape=(None, 1, None))() res = squeeze(variable, axis=(1,)) assert res.broadcastable == (False, False) - variable = TensorType(config.floatX, [False, True, False, True])() + variable = TensorType(config.floatX, shape=(None, 1, None, 1))() res = squeeze(variable, axis=(1, 3)) assert res.broadcastable == (False, False) - variable = TensorType(config.floatX, [True, False, True, False, True])() + variable = TensorType(config.floatX, shape=(1, None, 1, None, 1))() res = squeeze(variable, axis=(0, -1)) assert res.broadcastable == (False, True, False) def test_invalid_axis(self): # Test that trying to squeeze a non broadcastable dimension raises error - variable = TensorType(config.floatX, [True, False])() + variable = TensorType(config.floatX, shape=(1, None))() with pytest.raises( ValueError, match="Cannot drop a non-broadcastable dimension" ): @@ -540,7 +540,7 @@ def setup_method(self): def test_basic(self, ndim, dtype): rng = np.random.default_rng(4282) - x = TensorType(config.floatX, [False] * ndim)() + x = TensorType(config.floatX, (None,) * ndim)() a = rng.random((10,) * ndim).astype(config.floatX) for axis in self._possible_axis(ndim): @@ -579,7 +579,7 @@ def test_basic(self, ndim, dtype): ) # check when r is aesara tensortype that broadcastable is (True,) - r_var = TensorType(shape=(True,), dtype=dtype)() + r_var = TensorType(dtype=dtype, shape=(1,))() r = rng.integers(1, 6, size=(1,)).astype(dtype) f = aesara.function([x, r_var], repeat(x, r_var, axis=axis)) assert np.allclose(np.repeat(a, r[0], axis=axis), f(a, r)) @@ -593,7 +593,7 @@ def test_basic(self, ndim, dtype): def test_infer_shape(self, ndim, dtype): rng = np.random.default_rng(4282) - x = TensorType(config.floatX, [False] * ndim)() + x = TensorType(config.floatX, shape=(None,) * ndim)() shp = (np.arange(ndim) + 1) * 3 a = rng.random(shp).astype(config.floatX) @@ -635,7 +635,7 @@ def test_grad(self, ndim): utt.verify_grad(lambda x: Repeat(axis=axis)(x, 3), [a]) def test_broadcastable(self): - x = TensorType(config.floatX, [False, True, False])() + x = TensorType(config.floatX, shape=(None, 1, None))() r = Repeat(axis=1)(x, 2) assert r.broadcastable == (False, False, False) r = Repeat(axis=1)(x, 1) @@ -1333,7 +1333,7 @@ def test_gradient(self, fn, input_dims): def test_infer_shape(self): rng = np.random.default_rng(43) - a = tensor(config.floatX, [False, True, False]) + a = tensor(config.floatX, shape=(None, 1, None)) shape = list(a.shape) out = self.op(a, shape) @@ -1344,7 +1344,7 @@ def test_infer_shape(self): self.op_class, ) - a = tensor(config.floatX, [False, True, False]) + a = tensor(config.floatX, shape=(None, 1, None)) shape = [iscalar() for i in range(4)] self._compile_and_check( [a] + shape, diff --git a/tests/tensor/test_gc.py b/tests/tensor/test_gc.py index ced09d6724..4883cfbdc4 100644 --- a/tests/tensor/test_gc.py +++ b/tests/tensor/test_gc.py @@ -112,13 +112,13 @@ def test_merge_opt_runtime(): for i in range(50): r = r + r / 10 - t = time.time() + t = time.perf_counter() aesara.function([x], r, mode="FAST_COMPILE") # FAST_RUN does in-place optimizer which requires a lot of # toposorting, which is actually pretty slow at the moment. This # test was designed to test MergeOptimizer... so I'm leaving # toposort optimizations for a later date. - dt = time.time() - t + dt = time.perf_counter() - t # it should never take longer than 5 seconds to compile this graph assert dt < 5.0, dt diff --git a/tests/tensor/test_io.py b/tests/tensor/test_io.py index addc0a54bf..64cad51b0d 100644 --- a/tests/tensor/test_io.py +++ b/tests/tensor/test_io.py @@ -20,7 +20,7 @@ def test_basic(self): path = Variable(Generic(), None) # Not specifying mmap_mode defaults to None, and the data is # copied into main memory - x = load(path, "int32", (False,)) + x = load(path, "int32", (None,)) y = x * 2 fn = function([path], y) assert (fn(self.filename) == (self.data * 2)).all() @@ -32,14 +32,14 @@ def test_invalid_modes(self): path = Variable(Generic(), None) for mmap_mode in ("r+", "r", "w+", "toto"): with pytest.raises(ValueError): - load(path, "int32", (False,), mmap_mode) + load(path, "int32", (None,), mmap_mode) - def test1(self): + def test_copy_on_write(self): path = Variable(Generic(), None) # 'c' means "copy-on-write", which allow the array to be overwritten # by an inplace Op in the graph, without modifying the underlying # file. - x = load(path, "int32", (False,), "c") + x = load(path, "int32", (None,), "c") # x ** 2 has been chosen because it will work inplace. y = (x**2).sum() fn = function([path], y) @@ -49,7 +49,7 @@ def test1(self): def test_memmap(self): path = Variable(Generic(), None) - x = load(path, "int32", (False,), mmap_mode="c") + x = load(path, "int32", (None,), mmap_mode="c") fn = function([path], x) assert type(fn(self.filename)) == np.core.memmap diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py index 71114a03dd..2d1d12ddf2 100644 --- a/tests/tensor/test_math.py +++ b/tests/tensor/test_math.py @@ -562,7 +562,7 @@ def test_maximum_minimum_grad(): def test_py_c_match(): - a = TensorType(dtype="int8", shape=(False,))() + a = TensorType(dtype="int8", shape=(None,))() f = function([a], arccos(a), mode="DebugMode") # This can fail in DebugMode f(np.asarray([1, 0, -1], dtype="int8")) @@ -1460,8 +1460,8 @@ class TestOuter: def test_outer(self): for m in range(4): for n in range(4): - x = tensor(dtype="floatX", shape=(False,) * m) - y = tensor(dtype="floatX", shape=(False,) * n) + x = tensor(dtype="floatX", shape=(None,) * m) + y = tensor(dtype="floatX", shape=(None,) * n) s1 = self.rng.integers(1, 10, m) s2 = self.rng.integers(1, 10, n) v1 = np.asarray(self.rng.random(s1)).astype(config.floatX) @@ -1927,21 +1927,21 @@ def is_super_shape(var1, var2): for dtype0 in ("float32", "float64", "complex64"): for dtype1 in ("float32", "complex64", "complex128"): for bc0 in ( - (True,), - (False,), - (True, True), - (True, False), - (False, True), - (False, False), + (1,), + (None,), + (1, 1), + (1, None), + (None, 1), + (None, None), ): x = TensorType(dtype=dtype0, shape=bc0)() for bc1 in ( - (True,), - (False,), - (True, True), - (True, False), - (False, True), - (False, False), + (1,), + (None,), + (1, 1), + (1, None), + (None, 1), + (None, None), ): y = TensorType(dtype=dtype1, shape=bc1)() @@ -2117,7 +2117,7 @@ def test_scalar0(self): def test_broadcastable1(self): rng = np.random.default_rng(seed=utt.fetch_seed()) - x = TensorType(dtype=config.floatX, shape=(True, False, False))("x") + x = TensorType(dtype=config.floatX, shape=(1, None, None))("x") y = tensor3("y") z = tensordot(x, y) assert z.broadcastable == (True, False) @@ -2129,7 +2129,7 @@ def test_broadcastable1(self): def test_broadcastable2(self): rng = np.random.default_rng(seed=utt.fetch_seed()) - x = TensorType(dtype=config.floatX, shape=(True, False, False))("x") + x = TensorType(dtype=config.floatX, shape=(1, None, None))("x") y = tensor3("y") axes = [[2, 1], [0, 1]] z = tensordot(x, y, axes=axes) @@ -2156,7 +2156,7 @@ def test_smallest(): def test_var(): - a = TensorType(dtype="float64", shape=[False, False, False])() + a = TensorType(dtype="float64", shape=(None, None, None))() f = function([a], var(a)) a_val = np.arange(6).reshape(1, 2, 3) @@ -2206,7 +2206,7 @@ def test_var(): class TestSum: def test_sum_overflow(self): # Ensure that overflow errors are a little bit harder to get - a = TensorType(dtype="int8", shape=[False])() + a = TensorType(dtype="int8", shape=(None,))() f = function([a], at_sum(a)) assert f([1] * 300) == 300 @@ -3262,7 +3262,7 @@ def test_grad_useless_sum(): mode = get_default_mode().including("canonicalize") mode.check_isfinite = False - x = TensorType(config.floatX, (True,))("x") + x = TensorType(config.floatX, shape=(1,))("x") l = log(1.0 - sigmoid(x))[0] g = grad(l, x) @@ -3287,8 +3287,8 @@ def test_tanh_grad_broadcast(): # FIXME: This is not a real test. # This crashed in the past. - x = tensor(dtype="float32", shape=(True, False, False, False)) - y = tensor(dtype="float32", shape=(True, True, False, False)) + x = tensor(dtype="float32", shape=(1, None, None, None)) + y = tensor(dtype="float32", shape=(1, 1, None, None)) # TODO FIXME: This is a bad test grad(tanh(x).sum(), x) diff --git a/tests/tensor/test_shape.py b/tests/tensor/test_shape.py index 1db5d510ec..e93829d6d2 100644 --- a/tests/tensor/test_shape.py +++ b/tests/tensor/test_shape.py @@ -9,6 +9,7 @@ from aesara.graph.fg import FunctionGraph from aesara.graph.type import Type from aesara.misc.safe_asarray import _asarray +from aesara.scalar.basic import ScalarConstant from aesara.tensor import as_tensor_variable, get_vector_length, row from aesara.tensor.basic import MakeVector, constant from aesara.tensor.elemwise import DimShuffle, Elemwise @@ -22,6 +23,7 @@ reshape, shape, shape_i, + shape_tuple, specify_broadcastable, specify_shape, unbroadcast, @@ -46,19 +48,20 @@ from aesara.tensor.var import TensorVariable from aesara.typed_list import make_list from tests import unittest_tools as utt +from tests.graph.utils import MyType2 from tests.tensor.utils import eval_outputs, random from tests.test_rop import RopLopChecker def test_shape_basic(): s = shape([]) - assert s.type.broadcastable == (True,) + assert s.type.shape == (1,) s = shape([10]) - assert s.type.broadcastable == (True,) + assert s.type.shape == (1,) s = shape(lscalar()) - assert s.type.broadcastable == (False,) + assert s.type.shape == (0,) class MyType(Type): def filter(self, *args, **kwargs): @@ -68,7 +71,7 @@ def __eq__(self, other): return isinstance(other, MyType) and other.thingy == self.thingy s = shape(Variable(MyType(), None)) - assert s.type.broadcastable == (False,) + assert s.type.shape == (None,) s = shape(np.array(1)) assert np.array_equal(eval_outputs([s]), []) @@ -116,15 +119,14 @@ def test_basics(self): b = dmatrix() d = dmatrix() - # basic to 1 dim(without list) - c = reshape(b, as_tensor_variable(6), ndim=1) - f = self.function([b], c) - b_val1 = np.asarray([[0, 1, 2], [3, 4, 5]]) c_val1 = np.asarray([0, 1, 2, 3, 4, 5]) b_val2 = b_val1.T c_val2 = np.asarray([0, 3, 1, 4, 2, 5]) + # basic to 1 dim(without list) + c = reshape(b, as_tensor_variable(6), ndim=1) + f = self.function([b], c) f_out1 = f(b_val1) f_out2 = f(b_val2) assert np.array_equal(f_out1, c_val1), (f_out1, c_val1) @@ -188,10 +190,10 @@ def just_vals(v): f(np.asarray([[0, 1, 2], [3, 4, 5]])), np.asarray([[[0], [1], [2]], [[3], [4], [5]]]), ) - assert f.maker.fgraph.toposort()[-1].outputs[0].type.broadcastable == ( - False, - False, - True, + assert f.maker.fgraph.toposort()[-1].outputs[0].type.shape == ( + None, + None, + 1, ) # test broadcast flag for constant value of 1 if it cannot be @@ -202,10 +204,10 @@ def just_vals(v): f(np.asarray([[0, 1, 2], [3, 4, 5]])), np.asarray([[[0], [1]], [[2], [3]], [[4], [5]]]), ) - assert f.maker.fgraph.toposort()[-1].outputs[0].type.broadcastable == ( - False, - False, - True, + assert f.maker.fgraph.toposort()[-1].outputs[0].type.shape == ( + None, + None, + 1, ) def test_m1(self): @@ -657,3 +659,18 @@ def test_basic(self): Unbroadcast, warn=False, ) + + +def test_shape_tuple(): + + x = Variable(MyType2(), None, None) + assert shape_tuple(x) == () + + x = tensor(np.float64, shape=(1, 2, None)) + res = shape_tuple(x) + assert isinstance(res, tuple) + assert isinstance(res[0], ScalarConstant) + assert res[0].data == 1 + assert isinstance(res[1], ScalarConstant) + assert res[1].data == 2 + assert not isinstance(res[2], ScalarConstant) diff --git a/tests/tensor/test_sharedvar.py b/tests/tensor/test_sharedvar.py index 7dba717209..65e5e9ec82 100644 --- a/tests/tensor/test_sharedvar.py +++ b/tests/tensor/test_sharedvar.py @@ -10,6 +10,7 @@ from aesara.tensor import get_vector_length from aesara.tensor.basic import MakeVector from aesara.tensor.shape import Shape_i, specify_shape +from aesara.tensor.sharedvar import ScalarSharedVariable, TensorSharedVariable from tests import unittest_tools as utt @@ -513,7 +514,7 @@ def test_specify_shape_inplace(self): ) topo = f.maker.fgraph.toposort() f() - # [Gemm{inplace}(, 0.01, , , 2e-06)] + # [Gemm{inplace}(, 0.01, , , 2e-06)] if aesara.config.mode != "FAST_COMPILE": assert ( sum( @@ -649,19 +650,33 @@ class TestSharedOptions: pass +def test_tensor_shared_zero(): + shared_val = np.array([1.0, 3.0], dtype=np.float32) + res = aesara.shared(value=shared_val, borrow=True) + assert isinstance(res, TensorSharedVariable) + assert res.get_value(borrow=True) is shared_val + + res.zero(borrow=True) + new_shared_val = res.get_value(borrow=True) + assert new_shared_val is shared_val + assert np.array_equal(new_shared_val, np.zeros((2,), dtype=np.float32)) + + res.set_value(shared_val, borrow=True) + + res.zero(borrow=False) + new_shared_val = res.get_value(borrow=True) + assert new_shared_val is not shared_val + assert np.array_equal(new_shared_val, np.zeros((2,), dtype=np.float32)) + + def test_scalar_shared_options(): - # Simple test to make sure we do not loose that fonctionality. - aesara.shared(value=0.0, name="lk", borrow=True) - aesara.shared(value=np.float32(0.0), name="lk", borrow=True) + res = aesara.shared(value=np.float32(0.0), name="lk", borrow=True) + assert isinstance(res, ScalarSharedVariable) + assert res.type.dtype == "float32" + assert res.name == "lk" + assert res.type.shape == () def test_get_vector_length(): x = aesara.shared(np.array((2, 3, 4, 5))) assert get_vector_length(x) == 4 - - -def test_deprecated_kwargs(): - with pytest.warns(DeprecationWarning, match=".*broadcastable.*"): - res = aesara.shared(np.array([[1.0]]), broadcastable=(True, False)) - - assert res.type.shape == (1, None) diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py index 13acf1febc..073766365e 100644 --- a/tests/tensor/test_slinalg.py +++ b/tests/tensor/test_slinalg.py @@ -2,7 +2,6 @@ import itertools import numpy as np -import numpy.linalg import pytest import scipy @@ -12,7 +11,6 @@ from aesara.configdefaults import config from aesara.tensor.slinalg import ( Cholesky, - CholeskyGrad, CholeskySolve, Solve, SolveBase, @@ -23,6 +21,8 @@ expm, kron, solve, + solve_continuous_lyapunov, + solve_discrete_lyapunov, solve_triangular, ) from aesara.tensor.type import dmatrix, matrix, tensor, vector @@ -121,22 +121,17 @@ def test_cholesky_grad_indef(): @pytest.mark.slow -def test_cholesky_and_cholesky_grad_shape(): +def test_cholesky_shape(): rng = np.random.default_rng(utt.fetch_seed()) x = matrix() for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)): f_chol = aesara.function([x], l.shape) - g = aesara.gradient.grad(l.sum(), x) - f_cholgrad = aesara.function([x], g.shape) topo_chol = f_chol.maker.fgraph.toposort() - topo_cholgrad = f_cholgrad.maker.fgraph.toposort() if config.mode != "FAST_COMPILE": assert sum(node.op.__class__ == Cholesky for node in topo_chol) == 0 - assert sum(node.op.__class__ == CholeskyGrad for node in topo_cholgrad) == 0 for shp in [2, 3, 5]: m = np.cov(rng.standard_normal((shp, shp + 10))).astype(config.floatX) np.testing.assert_equal(f_chol(m), (shp, shp)) - np.testing.assert_equal(f_cholgrad(m), (shp, shp)) def test_eigvalsh(): @@ -178,13 +173,13 @@ class TestSolveBase(utt.InferShapeTester): [ (vector, matrix, "`A` must be a matrix.*"), ( - functools.partial(tensor, dtype="floatX", shape=(False,) * 3), + functools.partial(tensor, dtype="floatX", shape=(None,) * 3), matrix, "`A` must be a matrix.*", ), ( matrix, - functools.partial(tensor, dtype="floatX", shape=(False,) * 3), + functools.partial(tensor, dtype="floatX", shape=(None,) * 3), "`b` must be a matrix or a vector.*", ), ], @@ -514,7 +509,6 @@ def test_expm_grad_3(): class TestKron(utt.InferShapeTester): - rng = np.random.default_rng(43) def setup_method(self): @@ -523,12 +517,12 @@ def setup_method(self): def test_perform(self): for shp0 in [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]: - x = tensor(dtype="floatX", shape=(False,) * len(shp0)) + x = tensor(dtype="floatX", shape=(None,) * len(shp0)) a = np.asarray(self.rng.random(shp0)).astype(config.floatX) for shp1 in [(6,), (6, 7), (6, 7, 8), (6, 7, 8, 9)]: if len(shp0) + len(shp1) == 2: continue - y = tensor(dtype="floatX", shape=(False,) * len(shp1)) + y = tensor(dtype="floatX", shape=(None,) * len(shp1)) f = function([x, y], kron(x, y)) b = self.rng.random(shp1).astype(config.floatX) out = f(a, b) @@ -542,13 +536,77 @@ def test_perform(self): def test_numpy_2d(self): for shp0 in [(2, 3)]: - x = tensor(dtype="floatX", shape=(False,) * len(shp0)) + x = tensor(dtype="floatX", shape=(None,) * len(shp0)) a = np.asarray(self.rng.random(shp0)).astype(config.floatX) for shp1 in [(6, 7)]: if len(shp0) + len(shp1) == 2: continue - y = tensor(dtype="floatX", shape=(False,) * len(shp1)) + y = tensor(dtype="floatX", shape=(None,) * len(shp1)) f = function([x, y], kron(x, y)) b = self.rng.random(shp1).astype(config.floatX) out = f(a, b) assert np.allclose(out, np.kron(a, b)) + + +def test_solve_discrete_lyapunov_via_direct_real(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.dmatrix() + q = at.dmatrix() + f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")]) + + A = rng.normal(size=(N, N)) + Q = rng.normal(size=(N, N)) + + X = f(A, Q) + assert np.allclose(A @ X @ A.T - X + Q, 0.0) + + utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) + + +def test_solve_discrete_lyapunov_via_direct_complex(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.zmatrix() + q = at.zmatrix() + f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")]) + + A = rng.normal(size=(N, N)) + rng.normal(size=(N, N)) * 1j + Q = rng.normal(size=(N, N)) + X = f(A, Q) + assert np.allclose(A @ X @ A.conj().T - X + Q, 0.0) + + # TODO: the .conj() method currently does not have a gradient; add this test when gradients are implemented. + # utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) + + +def test_solve_discrete_lyapunov_via_bilinear(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.dmatrix() + q = at.dmatrix() + f = function([a, q], [solve_discrete_lyapunov(a, q, method="bilinear")]) + + A = rng.normal(size=(N, N)) + Q = rng.normal(size=(N, N)) + + X = f(A, Q) + assert np.allclose(A @ X @ A.conj().T - X + Q, 0.0) + + utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) + + +def test_solve_continuous_lyapunov(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.dmatrix() + q = at.dmatrix() + f = function([a, q], [solve_continuous_lyapunov(a, q)]) + + A = rng.normal(size=(N, N)) + Q = rng.normal(size=(N, N)) + X = f(A, Q) + + assert np.allclose(A @ X + X @ A.conj().T, Q) + + utt.verify_grad(solve_continuous_lyapunov, pt=[A, Q], rng=rng) diff --git a/tests/tensor/test_sort.py b/tests/tensor/test_sort.py index 0b82e6e979..8a3e3b23e3 100644 --- a/tests/tensor/test_sort.py +++ b/tests/tensor/test_sort.py @@ -459,7 +459,7 @@ def test_argtopk_nd(self, shp, k_, dtype, sorted, idx_dtype): if k == 0: continue - x = tensor(name="x", shape=(False,) * len(shp), dtype=dtype) + x = tensor(name="x", shape=(None,) * len(shp), dtype=dtype) y = argtopk(x, k, axis=axis, sorted=sorted, idx_dtype=idx_dtype) fn = aesara.function([x], y, mode=self.mode) assert any( @@ -515,7 +515,7 @@ def test_combined_infer_shape(self, shp, k_): if k == 0: continue - x = tensor(name="x", shape=(False,) * len(shp), dtype=aesara.config.floatX) + x = tensor(name="x", shape=(None,) * len(shp), dtype=aesara.config.floatX) yv, yi = topk_and_argtopk(x, k, axis=axis, sorted=False, idx_dtype="int32") size = reduce(int.__mul__, shp) xval = gen_unique_vector(size, aesara.config.floatX).reshape(shp) diff --git a/tests/tensor/test_subtensor.py b/tests/tensor/test_subtensor.py index 1af4e50ccd..1e6a3e99da 100644 --- a/tests/tensor/test_subtensor.py +++ b/tests/tensor/test_subtensor.py @@ -465,7 +465,7 @@ def test_ok_elem_2(self): def test_ok_row(self): n = self.shared(np.arange(6, dtype=self.dtype).reshape((2, 3))) t = n[1] - assert not any(n.type.broadcastable) + assert not any(s == 1 for s in n.type.shape) assert isinstance(t.owner.op, Subtensor) tval = self.eval_output_and_check(t) assert tval.shape == (3,) @@ -475,7 +475,7 @@ def test_ok_col(self): n = self.shared(np.arange(6, dtype=self.dtype).reshape((2, 3))) t = n[:, 0] assert isinstance(t.owner.op, Subtensor) - assert not any(n.type.broadcastable) + assert not any(s == 1 for s in n.type.shape) tval = self.eval_output_and_check(t) assert tval.shape == (2,) assert np.all(tval == [0, 3]) @@ -877,7 +877,7 @@ def test_err_bound_list(self): def test_adv_sub1_broadcast(self): v = np.arange(3, dtype=self.dtype).reshape((1, 3)) - n = self.shared(v * 5, shape=(True, False)) + n = self.shared(v * 5, shape=(1, None)) idx = lvector() t = n[idx] @@ -960,8 +960,8 @@ def test_adv_sub1_idx_broadcast(self): # The idx can be a broadcastable vector. ones = np.ones((4, 3), dtype=self.dtype) n = self.shared(ones * 5) - idx = TensorType(dtype="int64", shape=(True,))() - assert idx.type.broadcastable == (True,) + idx = TensorType(dtype="int64", shape=(1,))() + assert idx.type.shape == (1,) t = n[idx] f = self.function([idx], t, op=AdvancedSubtensor1) @@ -1167,7 +1167,7 @@ def test_advanced1_inc_and_set(self): # We create a new one every time in order not to # have duplicated variables in the function's inputs data_var = TensorType( - shape=[False] * data_n_dims, dtype=self.dtype + shape=(None,) * data_n_dims, dtype=self.dtype )() # Symbolic variable with rows to be incremented. idx_var = vector(dtype="int64") @@ -1190,7 +1190,7 @@ def test_advanced1_inc_and_set(self): idx_num = idx_num.astype("int64") # Symbolic variable with increment value. inc_var = TensorType( - shape=[False] * inc_n_dims, dtype=self.dtype + shape=(None,) * inc_n_dims, dtype=self.dtype )() # Trick for the case where `inc_shape` is the same as # `data_shape`: what we actually want is the first @@ -1715,7 +1715,7 @@ def test_advinc_subtensor(self, inplace): def check(idx, y_val, x_val, true): x = self.shared(x_val, name="x") - y = tensor(dtype="float32", shape=(False,) * len(y_val.shape), name="y") + y = tensor(dtype="float32", shape=(None,) * len(y_val.shape), name="y") sym_idx = [at.as_tensor_variable(ix) for ix in idx] expr = AdvancedIncSubtensor(inplace=inplace)(x, y, *sym_idx) f = aesara.function( @@ -1773,15 +1773,17 @@ def test_index_into_vec_w_vec(self): def test_index_into_vec_w_matrix(self): a = self.v[self.ix2] assert a.dtype == self.v.dtype, (a.dtype, self.v.dtype) - assert a.broadcastable == self.ix2.broadcastable, ( - a.broadcastable, - self.ix2.broadcastable, + assert a.type.ndim == self.ix2.type.ndim + assert all( + s1 == s2 + for s1, s2 in zip(a.type.shape, self.ix2.type.shape) + if s1 == 1 or s2 == 1 ) def test_index_into_mat_w_row(self): a = self.m[self.ixr] assert a.dtype == self.m.dtype, (a.dtype, self.m.dtype) - assert a.broadcastable == (True, False, False) + assert a.type.shape == (1, None, None) def test_index_w_int_and_vec(self): # like test_ok_list, but with a single index on the first one @@ -1879,7 +1881,10 @@ def test_inc_adv_subtensor_w_2vec(self, ignore_duplicates): subt = self.m[self.ix1, self.ix12] a = inc_subtensor(subt, subt, ignore_duplicates=ignore_duplicates) - typ = TensorType(self.m.type.dtype, self.ix2.type.broadcastable) + typ = TensorType( + self.m.type.dtype, + shape=tuple(1 if s == 1 else None for s in self.ix2.type.shape), + ) assert a.type == typ f = aesara.function( @@ -2071,7 +2076,7 @@ def test_adv_grouped(self): def test_grad(self): ones = np.ones((1, 3), dtype=self.dtype) - n = self.shared(ones * 5, shape=(True, False)) + n = self.shared(ones * 5, shape=(1, None)) idx = lvector() idx2 = lvector() t = n[idx, idx2] @@ -2444,7 +2449,7 @@ def test_AdvancedSubtensor_bool(self): ) abs_res = n[~isinf(n)] - assert abs_res.broadcastable == (False,) + assert abs_res.type.shape == (None,) @config.change_flags(compute_test_value="raise") @@ -2465,9 +2470,7 @@ def idx_as_tensor(x): def bcast_shape_tuple(x): if not hasattr(x, "shape"): return x - return tuple( - s if not bcast else 1 for s, bcast in zip(tuple(x.shape), x.broadcastable) - ) + return tuple(s if ss != 1 else 1 for s, ss in zip(tuple(x.shape), x.type.shape)) test_idx = np.ix_(np.array([True, True]), np.array([True]), np.array([True, True])) diff --git a/tests/tensor/test_type.py b/tests/tensor/test_type.py index 3c8a5194ef..3df380dd03 100644 --- a/tests/tensor/test_type.py +++ b/tests/tensor/test_type.py @@ -25,30 +25,34 @@ def test_numpy_dtype(dtype, exp_dtype): def test_in_same_class(): - test_type = TensorType(config.floatX, [False, False]) - test_type2 = TensorType(config.floatX, [False, True]) + test_type = TensorType(config.floatX, shape=(None, None)) + test_type2 = TensorType(config.floatX, shape=(None, 1)) assert test_type.in_same_class(test_type) assert not test_type.in_same_class(test_type2) + test_type = TensorType(config.floatX, shape=()) + test_type2 = TensorType(config.floatX, shape=(None,)) + assert not test_type.in_same_class(test_type2) + def test_is_super(): - test_type = TensorType(config.floatX, [False, False]) - test_type2 = TensorType(config.floatX, [False, True]) + test_type = TensorType(config.floatX, shape=(None, None)) + test_type2 = TensorType(config.floatX, shape=(None, 1)) assert test_type.is_super(test_type) assert test_type.is_super(test_type2) assert not test_type2.is_super(test_type) - test_type3 = TensorType(config.floatX, [False, False, False]) + test_type3 = TensorType(config.floatX, shape=(None, None, None)) assert not test_type3.is_super(test_type) def test_convert_variable(): - test_type = TensorType(config.floatX, [False, False]) + test_type = TensorType(config.floatX, shape=(None, None)) test_var = test_type() - test_type2 = TensorType(config.floatX, [True, False]) + test_type2 = TensorType(config.floatX, shape=(1, None)) test_var2 = test_type2() res = test_type.convert_variable(test_var) @@ -60,7 +64,7 @@ def test_convert_variable(): res = test_type2.convert_variable(test_var) assert res.type == test_type2 - test_type3 = TensorType(config.floatX, [True, False, True]) + test_type3 = TensorType(config.floatX, shape=(1, None, 1)) test_var3 = test_type3() res = test_type2.convert_variable(test_var3) @@ -84,12 +88,12 @@ def test_convert_variable_mixed_specificity(): def test_filter_variable(): - test_type = TensorType(config.floatX, []) + test_type = TensorType(config.floatX, shape=()) with pytest.raises(TypeError): test_type.filter(test_type()) - test_type = TensorType(config.floatX, [True, False]) + test_type = TensorType(config.floatX, shape=(1, None)) with pytest.raises(TypeError): test_type.filter(np.empty((0, 1), dtype=config.floatX)) @@ -103,7 +107,7 @@ def test_filter_variable(): test_type.filter_checks_isfinite = True test_type.filter(np.full((1, 2), np.inf, dtype=config.floatX)) - test_type2 = TensorType(config.floatX, [False, False]) + test_type2 = TensorType(config.floatX, shape=(None, None)) test_var = test_type() test_var2 = test_type2() @@ -120,7 +124,7 @@ def test_filter_variable(): def test_filter_strict(): - test_type = TensorType(config.floatX, []) + test_type = TensorType(config.floatX, shape=()) with pytest.raises(TypeError): test_type.filter(1, strict=True) @@ -131,7 +135,7 @@ def test_filter_strict(): def test_filter_ndarray_subclass(): """Make sure `TensorType.filter` can handle NumPy `ndarray` subclasses.""" - test_type = TensorType(config.floatX, [False]) + test_type = TensorType(config.floatX, shape=(None,)) class MyNdarray(np.ndarray): pass @@ -147,7 +151,7 @@ class MyNdarray(np.ndarray): def test_filter_float_subclass(): """Make sure `TensorType.filter` can handle `float` subclasses.""" with config.change_flags(floatX="float64"): - test_type = TensorType("float64", shape=[]) + test_type = TensorType("float64", shape=()) nan = np.array([np.nan], dtype="float64")[0] assert isinstance(nan, float) and not isinstance(nan, np.ndarray) @@ -157,7 +161,7 @@ def test_filter_float_subclass(): with config.change_flags(floatX="float32"): # Try again, except this time `nan` isn't a `float` - test_type = TensorType("float32", shape=[]) + test_type = TensorType("float32", shape=()) nan = np.array([np.nan], dtype="float32")[0] assert isinstance(nan, np.floating) and not isinstance(nan, np.ndarray) @@ -173,7 +177,7 @@ def test_filter_memmap(): filename = path.join(mkdtemp(), "newfile.dat") fp = np.memmap(filename, dtype=config.floatX, mode="w+", shape=(3, 4)) - test_type = TensorType(config.floatX, [False, False]) + test_type = TensorType(config.floatX, shape=(None, None)) res = test_type.filter(fp) assert res is fp @@ -219,25 +223,25 @@ def test_tensor_values_eq_approx(): def test_fixed_shape_basic(): - t1 = TensorType("float64", (1, 1)) + t1 = TensorType("float64", shape=(1, 1)) assert t1.shape == (1, 1) assert t1.broadcastable == (True, True) - t1 = TensorType("float64", (0,)) + t1 = TensorType("float64", shape=(0,)) assert t1.shape == (0,) assert t1.broadcastable == (False,) - t1 = TensorType("float64", (False, False)) + t1 = TensorType("float64", shape=(None, None)) assert t1.shape == (None, None) assert t1.broadcastable == (False, False) - t1 = TensorType("float64", (2, 3)) + t1 = TensorType("float64", shape=(2, 3)) assert t1.shape == (2, 3) assert t1.broadcastable == (False, False) assert str(t1) == "TensorType(float64, (2, 3))" - t1 = TensorType("float64", (1,)) + t1 = TensorType("float64", shape=(1,)) assert t1.shape == (1,) assert t1.broadcastable == (True,) @@ -256,13 +260,13 @@ def test_fixed_shape_clone(): t2 = t1.clone(dtype="float32", shape=(2, 4)) assert t2.shape == (2, 4) - t2 = t1.clone(dtype="float32", shape=(False, False)) + t2 = t1.clone(dtype="float32", shape=(None, None)) assert t2.shape == (None, None) def test_fixed_shape_comparisons(): - t1 = TensorType("float64", (True, True)) - t2 = TensorType("float64", (1, 1)) + t1 = TensorType("float64", shape=(1, 1)) + t2 = TensorType("float64", shape=(1, 1)) assert t1 == t2 assert t1.is_super(t2) @@ -270,19 +274,19 @@ def test_fixed_shape_comparisons(): assert hash(t1) == hash(t2) - t3 = TensorType("float64", (True, False)) - t4 = TensorType("float64", (1, 2)) + t3 = TensorType("float64", shape=(1, None)) + t4 = TensorType("float64", shape=(1, 2)) assert t3 != t4 - t1 = TensorType("float64", (True, True)) - t2 = TensorType("float64", ()) + t1 = TensorType("float64", shape=(1, 1)) + t2 = TensorType("float64", shape=()) assert t1 != t2 def test_fixed_shape_convert_variable(): # These are equivalent types - t1 = TensorType("float64", (True, True)) - t2 = TensorType("float64", (1, 1)) + t1 = TensorType("float64", shape=(1, 1)) + t2 = TensorType("float64", shape=(1, 1)) assert t1 == t2 assert t1.shape == t2.shape @@ -298,13 +302,13 @@ def test_fixed_shape_convert_variable(): res = t2.convert_variable(t1_var) assert res is t1_var - t3 = TensorType("float64", (False, True)) + t3 = TensorType("float64", shape=(None, 1)) t3_var = t3() res = t2.convert_variable(t3_var) assert isinstance(res.owner.op, SpecifyShape) - t3 = TensorType("float64", (False, False)) - t4 = TensorType("float64", (3, 2)) + t3 = TensorType("float64", shape=(None, None)) + t4 = TensorType("float64", shape=(3, 2)) t4_var = t4() assert t3.shape == (None, None) res = t3.convert_variable(t4_var) diff --git a/tests/tensor/test_var.py b/tests/tensor/test_var.py index 05127cb3d2..f7d1a4ddc5 100644 --- a/tests/tensor/test_var.py +++ b/tests/tensor/test_var.py @@ -155,18 +155,18 @@ def test__getitem__Subtensor(): def test__getitem__AdvancedSubtensor_bool(): x = matrix("x") - i = TensorType("bool", (False, False))("i") + i = TensorType("bool", shape=(None, None))("i") z = x[i] op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])] assert op_types[-1] == AdvancedSubtensor - i = TensorType("bool", (False,))("i") + i = TensorType("bool", shape=(None,))("i") z = x[:, i] op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])] assert op_types[-1] == AdvancedSubtensor - i = TensorType("bool", (False,))("i") + i = TensorType("bool", shape=(None,))("i") z = x[..., i] op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])] assert op_types[-1] == AdvancedSubtensor @@ -244,23 +244,25 @@ def test__getitem__newaxis(x, indices, new_order): def test_fixed_shape_variable_basic(): - x = TensorVariable(TensorType("int64", (4,)), None) + x = TensorVariable(TensorType("int64", shape=(4,)), None) assert isinstance(x.shape, Constant) assert np.array_equal(x.shape.data, (4,)) - x = TensorConstant(TensorType("int64", (False, False)), np.array([[1, 2], [2, 3]])) + x = TensorConstant( + TensorType("int64", shape=(None, None)), np.array([[1, 2], [2, 3]]) + ) assert x.type.shape == (2, 2) with pytest.raises(ValueError): - TensorConstant(TensorType("int64", (True, False)), np.array([[1, 2], [2, 3]])) + TensorConstant(TensorType("int64", shape=(1, None)), np.array([[1, 2], [2, 3]])) def test_get_vector_length(): - x = TensorVariable(TensorType("int64", (4,)), None) + x = TensorVariable(TensorType("int64", shape=(4,)), None) res = get_vector_length(x) assert res == 4 - x = TensorVariable(TensorType("int64", (None,)), None) + x = TensorVariable(TensorType("int64", shape=(None,)), None) with pytest.raises(ValueError): get_vector_length(x) diff --git a/tests/tensor/utils.py b/tests/tensor/utils.py index c59c99d99d..d92b9b066d 100644 --- a/tests/tensor/utils.py +++ b/tests/tensor/utils.py @@ -449,7 +449,9 @@ def test_good(self): inputrs = [ TensorType( dtype=input.dtype, - shape=[shape_elem == 1 for shape_elem in input.shape], + shape=tuple( + 1 if shape_elem == 1 else None for shape_elem in input.shape + ), )() for input in inputs ] @@ -611,7 +613,9 @@ def test_grad_none(self): inputrs = [ TensorType( dtype=input.dtype, - shape=[shape_elem == 1 for shape_elem in input.shape], + shape=tuple( + 1 if shape_elem == 1 else None for shape_elem in input.shape + ), )() for input in inputs ] @@ -632,8 +636,10 @@ def test_grad_none(self): dtype = config.floatX else: dtype = str(out.dtype) - bcast = [shape_elem == 1 for shape_elem in out.shape] - var = TensorType(dtype=dtype, shape=bcast)() + out_shape = tuple( + 1 if shape_elem == 1 else None for shape_elem in out.shape + ) + var = TensorType(dtype=dtype, shape=out_shape)() out_grad_vars.append(var) try: diff --git a/tests/test_printing.py b/tests/test_printing.py index db7d4cd3c6..414be0b555 100644 --- a/tests/test_printing.py +++ b/tests/test_printing.py @@ -282,7 +282,7 @@ def test_debugprint(): | | |B | |TensorConstant{1.0} | |B - | | + | | | |TensorConstant{0.0} |D """ @@ -306,9 +306,9 @@ def test_debugprint_id_type(): exp_res = f"""Elemwise{{add,no_inplace}} [id {e_at.auto_name}] |dot [id {d_at.auto_name}] - | | [id {b_at.auto_name}] - | | [id {a_at.auto_name}] - | [id {a_at.auto_name}] + | | [id {b_at.auto_name}] + | | [id {a_at.auto_name}] + | [id {a_at.auto_name}] """ assert [l.strip() for l in s.split("\n")] == [ @@ -319,7 +319,7 @@ def test_debugprint_id_type(): def test_pprint(): x = dvector() y = x[1] - assert pp(y) == "[1]" + assert pp(y) == "[1]" def test_debugprint_inner_graph(): diff --git a/tests/typed_list/test_basic.py b/tests/typed_list/test_basic.py index 554f120843..52b4d94012 100644 --- a/tests/typed_list/test_basic.py +++ b/tests/typed_list/test_basic.py @@ -57,7 +57,7 @@ class TestGetItem: def test_sanity_check_slice(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicSlice = SliceType()() @@ -75,7 +75,7 @@ def test_sanity_check_slice(self): def test_sanity_check_single(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicScalar = scalar(dtype="int64") @@ -90,7 +90,7 @@ def test_sanity_check_single(self): def test_interface(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicScalar = scalar(dtype="int64") @@ -110,7 +110,7 @@ def test_interface(self): def test_wrong_input(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicMatrix = matrix() @@ -119,7 +119,7 @@ def test_wrong_input(self): def test_constant_input(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = GetItem()(mySymbolicMatricesList, 0) @@ -140,7 +140,7 @@ def test_constant_input(self): class TestAppend: def test_inplace(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -156,7 +156,7 @@ def test_inplace(self): def test_sanity_check(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -172,7 +172,7 @@ def test_sanity_check(self): def test_interfaces(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -190,10 +190,10 @@ def test_interfaces(self): class TestExtend: def test_inplace(self): mySymbolicMatricesList1 = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicMatricesList2 = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = Extend(True)(mySymbolicMatricesList1, mySymbolicMatricesList2) @@ -210,10 +210,10 @@ def test_inplace(self): def test_sanity_check(self): mySymbolicMatricesList1 = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicMatricesList2 = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = Extend()(mySymbolicMatricesList1, mySymbolicMatricesList2) @@ -228,10 +228,10 @@ def test_sanity_check(self): def test_interface(self): mySymbolicMatricesList1 = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicMatricesList2 = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = mySymbolicMatricesList1.extend(mySymbolicMatricesList2) @@ -248,7 +248,7 @@ def test_interface(self): class TestInsert: def test_inplace(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() myScalar = scalar(dtype="int64") @@ -267,7 +267,7 @@ def test_inplace(self): def test_sanity_check(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() myScalar = scalar(dtype="int64") @@ -284,7 +284,7 @@ def test_sanity_check(self): def test_interface(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() myScalar = scalar(dtype="int64") @@ -303,7 +303,7 @@ def test_interface(self): class TestRemove: def test_inplace(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -319,7 +319,7 @@ def test_inplace(self): def test_sanity_check(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -335,7 +335,7 @@ def test_sanity_check(self): def test_interface(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -353,7 +353,7 @@ def test_interface(self): class TestReverse: def test_inplace(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = Reverse(True)(mySymbolicMatricesList) @@ -368,7 +368,7 @@ def test_inplace(self): def test_sanity_check(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = Reverse()(mySymbolicMatricesList) @@ -383,7 +383,7 @@ def test_sanity_check(self): def test_interface(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = mySymbolicMatricesList.reverse() @@ -400,7 +400,7 @@ def test_interface(self): class TestIndex: def test_sanity_check(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -416,7 +416,7 @@ def test_sanity_check(self): def test_interface(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -432,10 +432,10 @@ def test_interface(self): def test_non_tensor_type(self): mySymbolicNestedMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)), 1 + TensorType(aesara.config.floatX, shape=(None, None)), 1 )() mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = Index()(mySymbolicNestedMatricesList, mySymbolicMatricesList) @@ -468,7 +468,7 @@ def test_sparse(self): class TestCount: def test_sanity_check(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -484,7 +484,7 @@ def test_sanity_check(self): def test_interface(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() myMatrix = matrix() @@ -500,10 +500,10 @@ def test_interface(self): def test_non_tensor_type(self): mySymbolicNestedMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)), 1 + TensorType(aesara.config.floatX, shape=(None, None)), 1 )() mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = Count()(mySymbolicNestedMatricesList, mySymbolicMatricesList) @@ -536,7 +536,7 @@ def test_sparse(self): class TestLength: def test_sanity_check(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = Length()(mySymbolicMatricesList) @@ -549,7 +549,7 @@ def test_sanity_check(self): def test_interface(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = mySymbolicMatricesList.__len__() diff --git a/tests/typed_list/test_rewriting.py b/tests/typed_list/test_rewriting.py index 167424cfb8..9e1244e63f 100644 --- a/tests/typed_list/test_rewriting.py +++ b/tests/typed_list/test_rewriting.py @@ -13,7 +13,7 @@ class TestInplace: def test_reverse_inplace(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = Reverse()(mySymbolicMatricesList) @@ -36,7 +36,7 @@ def test_reverse_inplace(self): def test_append_inplace(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicMatrix = matrix() z = Append()(mySymbolicMatricesList, mySymbolicMatrix) @@ -62,11 +62,11 @@ def test_append_inplace(self): def test_extend_inplace(self): mySymbolicMatricesList1 = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicMatricesList2 = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() z = Extend()(mySymbolicMatricesList1, mySymbolicMatricesList2) @@ -91,7 +91,7 @@ def test_extend_inplace(self): def test_insert_inplace(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicIndex = scalar(dtype="int64") mySymbolicMatrix = matrix() @@ -121,7 +121,7 @@ def test_insert_inplace(self): def test_remove_inplace(self): mySymbolicMatricesList = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() mySymbolicMatrix = matrix() z = Remove()(mySymbolicMatricesList, mySymbolicMatrix) diff --git a/tests/typed_list/test_type.py b/tests/typed_list/test_type.py index 4ee1b76e02..e0b53dfdb2 100644 --- a/tests/typed_list/test_type.py +++ b/tests/typed_list/test_type.py @@ -24,7 +24,7 @@ def test_wrong_input_on_filter(self): # specified on creation # list of matrices - myType = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myType = TypedListType(TensorType(aesara.config.floatX, shape=(None, None))) with pytest.raises(TypeError): myType.filter([4]) @@ -34,7 +34,7 @@ def test_not_a_list_on_filter(self): # if no iterable variable is given on input # list of matrices - myType = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myType = TypedListType(TensorType(aesara.config.floatX, shape=(None, None))) with pytest.raises(TypeError): myType.filter(4) @@ -45,11 +45,11 @@ def test_type_equality(self): # variables # list of matrices - myType1 = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myType1 = TypedListType(TensorType(aesara.config.floatX, shape=(None, None))) # list of matrices - myType2 = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myType2 = TypedListType(TensorType(aesara.config.floatX, shape=(None, None))) # list of scalars - myType3 = TypedListType(TensorType(aesara.config.floatX, ())) + myType3 = TypedListType(TensorType(aesara.config.floatX, shape=())) assert myType2 == myType1 assert myType3 != myType1 @@ -57,7 +57,7 @@ def test_type_equality(self): def test_filter_sanity_check(self): # Simple test on typed list type filter - myType = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myType = TypedListType(TensorType(aesara.config.floatX, shape=(None, None))) x = random_ranged(-1000, 1000, [100, 100]) @@ -68,14 +68,14 @@ def test_intern_filter(self): # filtered. If they weren't this code would raise # an exception. - myType = TypedListType(TensorType("float64", (False, False))) + myType = TypedListType(TensorType("float64", shape=(None, None))) x = np.asarray([[4, 5], [4, 5]], dtype="float32") assert np.array_equal(myType.filter([x]), [x]) def test_load_alot(self): - myType = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myType = TypedListType(TensorType(aesara.config.floatX, shape=(None, None))) x = random_ranged(-1000, 1000, [10, 10]) testList = [] @@ -87,7 +87,9 @@ def test_load_alot(self): def test_basic_nested_list(self): # Testing nested list with one level of depth - myNestedType = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myNestedType = TypedListType( + TensorType(aesara.config.floatX, shape=(None, None)) + ) myType = TypedListType(myNestedType) @@ -98,7 +100,9 @@ def test_basic_nested_list(self): def test_comparison_different_depth(self): # Nested list with different depth aren't the same - myNestedType = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myNestedType = TypedListType( + TensorType(aesara.config.floatX, shape=(None, None)) + ) myNestedType2 = TypedListType(myNestedType) @@ -110,10 +114,10 @@ def test_nested_list_arg(self): # test for the 'depth' optional argument myNestedType = TypedListType( - TensorType(aesara.config.floatX, (False, False)), 3 + TensorType(aesara.config.floatX, shape=(None, None)), 3 ) - myType = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myType = TypedListType(TensorType(aesara.config.floatX, shape=(None, None))) myManualNestedType = TypedListType(TypedListType(TypedListType(myType))) @@ -122,7 +126,7 @@ def test_nested_list_arg(self): def test_get_depth(self): # test case for get_depth utilitary function - myType = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myType = TypedListType(TensorType(aesara.config.floatX, shape=(None, None))) myManualNestedType = TypedListType(TypedListType(TypedListType(myType))) @@ -131,7 +135,7 @@ def test_get_depth(self): def test_comparison_uneven_nested(self): # test for comparison between uneven nested list - myType = TypedListType(TensorType(aesara.config.floatX, (False, False))) + myType = TypedListType(TensorType(aesara.config.floatX, shape=(None, None))) myManualNestedType1 = TypedListType(TypedListType(TypedListType(myType))) @@ -142,7 +146,7 @@ def test_comparison_uneven_nested(self): def test_variable_is_Typed_List_variable(self): mySymbolicVariable = TypedListType( - TensorType(aesara.config.floatX, (False, False)) + TensorType(aesara.config.floatX, shape=(None, None)) )() assert isinstance(mySymbolicVariable, TypedListVariable)