Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance enhancements of conditional logit #81

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/pylogit/choice_calcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,8 +927,9 @@ def calc_fisher_info_matrix(beta,
# Calculate the weights for the sample
if weights is None:
weights = np.ones(design.shape[0])
weights_per_obs =\
np.max(rows_to_obs.toarray() * weights[:, None], axis=0)

M = rows_to_obs.multiply(weights.reshape(-1,1))
weights_per_obs = np.max(M, axis=0).toarray().reshape(-1)

##########
# Get the required matrices
Expand Down
4 changes: 3 additions & 1 deletion src/pylogit/conditional_logit.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import numpy as np
from scipy.sparse import diags

from pylogit.scipy_utils import identity_matrix

from . import choice_calcs as cc
from . import base_multinomial_cm_v2 as base_mcm
from .estimation import LogitTypeEstimator
Expand Down Expand Up @@ -177,7 +179,7 @@ class MNLEstimator(LogitTypeEstimator):
def set_derivatives(self):
# Pre-calculate the derivative of the transformation vector with
# respect to the vector of systematic utilities
dh_dv = diags(np.ones(self.design.shape[0]), 0, format='csr')
dh_dv = identity_matrix(self.design.shape[0])

# Create a function to calculate dh_dv which will return the
# pre-calculated result when called
Expand Down
5 changes: 3 additions & 2 deletions src/pylogit/mixed_logit_calcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,8 +662,9 @@ def calc_bhhh_hessian_approximation_mixed_logit(params,
# Calculate the weights for the sample
if weights is None:
weights = np.ones(design_3d.shape[0])
weights_per_obs =\
np.max(rows_to_mixers.toarray() * weights[:, None], axis=0)

M = rows_to_obs.multiply(weights.reshape(-1,1))
weights_per_obs = np.max(M, axis=0).toarray().reshape(-1)
# Calculate the regular probability array. Note the implicit assumption
# that params == index coefficients.
prob_array = general_calc_probabilities(params,
Expand Down
8 changes: 6 additions & 2 deletions src/pylogit/nested_choice_calcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,9 @@ def calc_nested_gradient(orig_nest_coefs,
# Calculate the weights for the sample
if weights is None:
weights = np.ones(design.shape[0])
weights_per_obs = np.max(rows_to_obs.toarray() * weights[:, None], axis=0)

M = rows_to_obs.multiply(weights.reshape(-1,1))
weights_per_obs = np.max(M, axis=0).toarray().reshape(-1)

# Transform the nest coefficients into their "always positive" versions
nest_coefs = naturalize_nest_coefs(orig_nest_coefs)
Expand Down Expand Up @@ -743,7 +745,9 @@ def calc_bhhh_hessian_approximation(orig_nest_coefs,
# Calculate the weights for the sample
if weights is None:
weights = np.ones(design.shape[0])
weights_per_obs = np.max(rows_to_obs.toarray() * weights[:, None], axis=0)

M = rows_to_obs.multiply(weights.reshape(-1,1))
weights_per_obs = np.max(M, axis=0).toarray().reshape(-1)

# Transform the nest coefficients into their "always positive" versions
nest_coefs = naturalize_nest_coefs(orig_nest_coefs)
Expand Down
145 changes: 145 additions & 0 deletions src/pylogit/scipy_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 7 20:05:51 2021

@name: Scipy Utilities
@author: Mathijs van der Vlies
@summary: Contains an efficient implementation of an identity matrix through the
identity_matrix class, which simply performs the identity operation
when multiplied with another matrix without multiplying the individual
elements.
"""
from __future__ import absolute_import
from scipy.sparse import spmatrix
from scipy.sparse.csr import csr_matrix
import numpy as np
from scipy.sparse.sputils import check_shape, isshape


class identity_matrix(spmatrix):
"""Efficient implementation of an identity matrix.

When multiplied with another matrix `A`, simply passes through and returns `A`
without multiplying the individual elements.

Parameters
----------
arg1 : int, 2-tuple or identity_matrix
If int `n`, then the matrix is n-by-n.
If a 2-tuple, then it must be of the form `(n, n)` (square matrix), so that
the matrix becomes n-by-n.
If of type `identity_matrix`, then its size will be copied over.
copy : bool, default = False
Inactive parameter because no data is stored by the object, kept for
consistency with `spmatrix` interface.
"""

def __init__(self, arg1, copy=False):
super().__init__()
if isinstance(arg1, int):
if arg1 < 0:
raise ValueError(f"The number of rows `n` cannot be negative. "
f"Got a value of {arg1}")
self.n = arg1
elif isinstance(arg1, type(self)):
self.n = arg1.n
elif isinstance(arg1, tuple):
if isshape(arg1):
m, n = check_shape(arg1)
if m != n:
raise ValueError("Only a square shape is valid.")
self.n = n
else:
raise ValueError("Only a square shape is valid.")
else:
raise TypeError(f"Invalid input to constructor. Expected an object of "
f"type `int`, `tuple`, or {type(self)}")

def set_shape(self, shape):
return super().set_shape(shape)

def get_shape(self):
return (self.n, self.n)

shape = property(fget=get_shape, fset=set_shape)

def getnnz(self, axis=None):
if axis is None:
return self.n

return 1

def __repr__(self):
return f"<{self.n}x{self.n} identity matrix>"

def _mul_vector(self, other):
return other

def _mul_multivector(self, other):
return other

def _mul_sparse_matrix(self, other):
return other

def transpose(self, axes=None, copy=False):
"""
Reverses the dimensions of the sparse matrix.

Parameters
----------
axes : None, optional
This argument is in the signature *solely* for NumPy
compatibility reasons. Do not pass in anything except
for the default value.
copy : bool, optional
Indicates whether or not attributes of `self` should be
copied whenever possible. The degree to which attributes
are copied varies depending on the type of sparse matrix
being used.

Returns
-------
p : `self` with the dimensions reversed.

See Also
--------
numpy.matrix.transpose : NumPy's implementation of 'transpose'
for matrices
"""
if copy:
return self.copy()

return self

def conj(self, copy=True):
"""Element-wise complex conjugation.

If the matrix is of non-complex data type and `copy` is False,
this method does nothing and the data is not copied.

Parameters
----------
copy : bool, optional
If True, the result is guaranteed to not share data with self.

Returns
-------
A : The element-wise complex conjugate.

"""
return self.transpose(copy=copy)

def tocsr(self, copy=False):
"""Convert this matrix to Compressed Sparse Row format.

For this identity matrix, `copy` doesn't affect anything
because it doesn't store any underlying data.
"""
row = np.arange(self.n)
col = np.arange(self.n)
data = np.ones(self.n)

return csr_matrix((data, (row, col)), shape=self.shape, copy=False)

def __pow__(self, other):
return self.copy()
Loading