Skip to content

Commit

Permalink
Update densifying sparse array (#1263)
Browse files Browse the repository at this point in the history
Switch from `.A` to `.toarray()`
  • Loading branch information
WeilerP authored Jul 11, 2024
1 parent 95e6c96 commit 3fbce6f
Show file tree
Hide file tree
Showing 21 changed files with 94 additions and 77 deletions.
4 changes: 2 additions & 2 deletions scvelo/core/_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def get_df(
df = data

if issparse(df):
df = np.array(df.A)
df = np.array(df.toarray())
if columns is None and hasattr(df, "names"):
columns = df.names

Expand Down Expand Up @@ -426,7 +426,7 @@ def make_dense(
for modality in modalities:
count_data = get_modality(adata=adata, modality=modality)
if issparse(count_data):
set_modality(adata=adata, modality=modality, new_value=count_data.A)
set_modality(adata=adata, modality=modality, new_value=count_data.toarray())

return adata if not inplace else None

Expand Down
5 changes: 4 additions & 1 deletion scvelo/core/_linear_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ def _trim_data(self, data: List) -> List:
data = [data]

data = np.array(
[data_mat.A if issparse(data_mat) else data_mat for data_mat in data]
[
data_mat.toarray() if issparse(data_mat) else data_mat
for data_mat in data
]
)

# TODO: Add explanatory comment
Expand Down
2 changes: 1 addition & 1 deletion scvelo/inference/_metabolic_labeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _get_n_neighbors(
rep_X = rep_X[rows, cols]

if sparse_op:
n_neighbors_to_use = np.cumsum(rep_X.A > 0, axis=1)
n_neighbors_to_use = np.cumsum(rep_X.toarray() > 0, axis=1)
else:
n_neighbors_to_use = np.cumsum(rep_X > 0, axis=1)

Expand Down
2 changes: 1 addition & 1 deletion scvelo/plotting/heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def heatmap(
else adata[:, var_names].X
)
if issparse(X):
X = X.A
X = X.toarray()
df = pd.DataFrame(X[np.argsort(time)], columns=var_names)

if n_convolve is not None:
Expand Down
4 changes: 2 additions & 2 deletions scvelo/plotting/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
def make_dense(X):
"""TODO."""
if issparse(X):
XA = X.A if X.ndim == 2 else X.A1
XA = X.toarray() if X.ndim == 2 else X.A1
else:
XA = X.A1 if isinstance(X, np.matrix) else X
return np.array(XA)
Expand Down Expand Up @@ -799,7 +799,7 @@ def interpret_colorkey(adata, c=None, layer=None, perc=None, use_raw=None):
if adata.raw is None and use_raw:
raise ValueError("AnnData object does not have `raw` counts.")
c = adata.raw.obs_vector(c) if use_raw else adata.obs_vector(c)
c = c.A.flatten() if issparse(c) else c
c = c.toarray().flatten() if issparse(c) else c
elif c in adata.var.keys(): # color by observation key
c = adata.var[c]
elif np.any([var_key in c for var_key in adata.var.keys()]):
Expand Down
2 changes: 1 addition & 1 deletion scvelo/plotting/velocity.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def velocity(
_adata = adata[:, var]
s, u = _adata.layers[skey], _adata.layers[ukey]
if issparse(s):
s, u = s.A, u.A
s, u = s.toarray(), u.toarray()

# spliced/unspliced phase portrait with steady-state estimate
ax = pl.subplot(gs[v * nplts])
Expand Down
2 changes: 1 addition & 1 deletion scvelo/plotting/velocity_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def velocity_graph(

if groups is not None:
if issparse(T):
T = T.A
T = T.toarray()
T[~groups_to_bool(adata, groups, color)] = 0
T = csr_matrix(T)
T.eliminate_zeros()
Expand Down
12 changes: 6 additions & 6 deletions scvelo/preprocessing/moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ def moments(
adata.layers["Ms"] = (
csr_matrix.dot(connectivities, csr_matrix(adata.layers["spliced"]))
.astype(np.float32)
.A
.toarray()
)
adata.layers["Mu"] = (
csr_matrix.dot(connectivities, csr_matrix(adata.layers["unspliced"]))
.astype(np.float32)
.A
.toarray()
)
# if renormalize: normalize_per_cell(adata, layers={'Ms', 'Mu'}, enforce=True)

Expand Down Expand Up @@ -130,8 +130,8 @@ def second_order_moments(adata, adjusted=False):
s, u = csr_matrix(adata.layers["spliced"]), csr_matrix(adata.layers["unspliced"])
if s.shape[0] == 1:
s, u = s.T, u.T
Mss = csr_matrix.dot(connectivities, s.multiply(s)).astype(np.float32).A
Mus = csr_matrix.dot(connectivities, s.multiply(u)).astype(np.float32).A
Mss = csr_matrix.dot(connectivities, s.multiply(s)).astype(np.float32).toarray()
Mus = csr_matrix.dot(connectivities, s.multiply(u)).astype(np.float32).toarray()
if adjusted:
Mss = 2 * Mss - adata.layers["Ms"].reshape(Mss.shape)
Mus = 2 * Mus - adata.layers["Mu"].reshape(Mus.shape)
Expand All @@ -157,7 +157,7 @@ def second_order_moments_u(adata):

connectivities = get_connectivities(adata)
u = csr_matrix(adata.layers["unspliced"])
Muu = csr_matrix.dot(connectivities, u.multiply(u)).astype(np.float32).A
Muu = csr_matrix.dot(connectivities, u.multiply(u)).astype(np.float32).toarray()

return Muu

Expand Down Expand Up @@ -222,5 +222,5 @@ def get_moments(
else:
Mx = csr_matrix.dot(connectivities, X)
if issparse(X):
Mx = Mx.astype(np.float32).A
Mx = Mx.astype(np.float32).toarray()
return Mx
4 changes: 2 additions & 2 deletions scvelo/preprocessing/neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def fit(self, X, metric="l2", M=16, ef=100, ef_construction=100, random_state=0)
ef_c, ef = max(ef_construction, self.n_neighbors), max(self.n_neighbors, ef)
metric = "l2" if metric == "euclidean" else metric

X = X.A if issparse(X) else X
X = X.toarray() if issparse(X) else X
ns, dim = X.shape

knn = hnswlib.Index(space=metric, dim=dim)
Expand Down Expand Up @@ -564,7 +564,7 @@ def get_duplicate_cells(data):
vals = [val for val, count in Counter(lst).items() if count > 1]
idx_dup = np.where(pd.Series(lst).isin(vals))[0]

X_new = np.array(X[idx_dup].A if issparse(X) else X[idx_dup])
X_new = np.array(X[idx_dup].toarray() if issparse(X) else X[idx_dup])
sorted_idx = np.lexsort(X_new.T)
sorted_data = X_new[sorted_idx, :]

Expand Down
6 changes: 3 additions & 3 deletions scvelo/tools/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
# TODO: Add docstrings
def get_weight(x, y=None, perc=95):
"""TODO."""
xy_norm = np.array(x.A if issparse(x) else x)
xy_norm = np.array(x.toarray() if issparse(x) else x)
if y is not None:
if issparse(y):
y = y.A
y = y.toarray()
xy_norm = xy_norm / np.clip(np.max(xy_norm, axis=0), 1e-3, None)
xy_norm += y / np.clip(np.max(y, axis=0), 1e-3, None)
if isinstance(perc, numbers.Number):
Expand Down Expand Up @@ -84,7 +84,7 @@ def optimize_NxN(x, y, fit_offset=False, perc=None):
perc = perc[1]
weights = get_weight(x, y, perc).astype(bool)
if issparse(weights):
weights = weights.A
weights = weights.toarray()
else:
weights = None

Expand Down
4 changes: 2 additions & 2 deletions scvelo/tools/paga.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def compute_transitions(self):
transitions_conf.eliminate_zeros()

# remove non-confident direct paths if more confident indirect path is found.
T = transitions_conf.A
T = transitions_conf.toarray()
threshold = max(np.nanmin(np.nanmax(T / (T > 0), axis=0)) - 1e-6, 0.01)
T *= T > threshold
for i in range(len(T)):
Expand All @@ -169,7 +169,7 @@ def compute_transitions(self):
T_tmp[np.where(T_num[:, i])[0][0], i] = T_max
from scipy.sparse.csgraph import minimum_spanning_tree

T_tmp = np.abs(minimum_spanning_tree(-T_tmp).A) > 0
T_tmp = np.abs(minimum_spanning_tree(-T_tmp).toarray()) > 0
T = T_tmp * T

transitions_conf = csr_matrix(T)
Expand Down
10 changes: 5 additions & 5 deletions scvelo/tools/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def convert_to_adata(vlm, basis=None):
layers = OrderedDict()
layers["spliced"] = vlm.S_sz.T if hasattr(vlm, "S_sz") else vlm.S.T
layers["unspliced"] = vlm.U_sz.T if hasattr(vlm, "U_sz") else vlm.U.T
if hasattr(vlm, "A") and (vlm.A.T.shape == layers["spliced"].shape):
layers["ambiguous"] = vlm.A.T
if hasattr(vlm, "A") and (vlm.toarray().T.shape == layers["spliced"].shape):
layers["ambiguous"] = vlm.toarray().T

if hasattr(vlm, "velocity"):
layers["velocity"] = vlm.velocity.T
Expand Down Expand Up @@ -109,12 +109,12 @@ def __init__(self, adata, basis=None):
self.S = adata.layers["spliced"].T
self.U = adata.layers["unspliced"].T
self.S = (
np.array(self.S.A, **kwargs)
np.array(self.S.toarray(), **kwargs)
if issparse(self.S)
else np.array(self.S, **kwargs)
)
self.U = (
np.array(self.U.A, **kwargs)
np.array(self.U.toarray(), **kwargs)
if issparse(self.U)
else np.array(self.U, **kwargs)
)
Expand Down Expand Up @@ -146,7 +146,7 @@ def __init__(self, adata, basis=None):
if "ambiguous" in adata.layers.keys():
self.A = np.array(adata.layers["ambiguous"].T)
if issparse(self.A):
self.A = self.A.A
self.A = self.A.toarray()

self.ca = {}
self.ra = {}
Expand Down
4 changes: 2 additions & 2 deletions scvelo/tools/terminal_states.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def cell_fate(
T = transition_matrix(_adata, self_transitions=self_transitions)
fate = np.linalg.inv(np.eye(_adata.n_obs) - T)
if issparse(T):
fate = fate.A
fate = fate.toarray()
cell_fates = np.array(_adata.obs[groupby][fate.argmax(1)])
if disconnected_groups is not None:
idx = _adata.obs[groupby].isin(disconnected_groups)
Expand Down Expand Up @@ -123,7 +123,7 @@ def cell_origin(
T = transition_matrix(_adata, self_transitions=self_transitions, backward=True)
fate = np.linalg.inv(np.eye(_adata.n_obs) - T)
if issparse(T):
fate = fate.A
fate = fate.toarray()
cell_fates = np.array(_adata.obs[groupby][fate.argmax(1)])
if disconnected_groups is not None:
idx = _adata.obs[groupby].isin(disconnected_groups)
Expand Down
2 changes: 1 addition & 1 deletion scvelo/tools/transition_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def transition_matrix(
graph = graph.multiply(basis_graph)

if self_transitions:
confidence = graph.max(1).A.flatten()
confidence = graph.max(1).toarray().flatten()
ub = np.percentile(confidence, 98)
self_prob = np.clip(ub - confidence, 0, 1)
graph.setdiag(self_prob)
Expand Down
10 changes: 5 additions & 5 deletions scvelo/tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def mean(x, axis=0):
# TODO: Add docstrings
def make_dense(X):
"""TODO."""
XA = X.A if issparse(X) and X.ndim == 2 else X.A1 if issparse(X) else X
XA = X.toarray() if issparse(X) and X.ndim == 2 else X.A1 if issparse(X) else X
if XA.ndim == 2:
XA = XA[0] if XA.shape[0] == 1 else XA[:, 0] if XA.shape[1] == 1 else XA
return np.array(XA)
Expand Down Expand Up @@ -327,8 +327,8 @@ def cutoff_small_velocities(
x = adata.layers["spliced"] if use_raw else adata.layers["Ms"]
y = adata.layers["unspliced"] if use_raw else adata.layers["Mu"]

x_max = x.max(0).A[0] if issparse(x) else x.max(0)
y_max = y.max(0).A[0] if issparse(y) else y.max(0)
x_max = x.max(0).toarray()[0] if issparse(x) else x.max(0)
y_max = y.max(0).toarray()[0] if issparse(y) else y.max(0)

xy_norm = x / np.clip(x_max, 1e-3, None) + y / np.clip(y_max, 1e-3, None)
W = xy_norm >= np.percentile(xy_norm, 98, axis=0) * frac_of_max
Expand Down Expand Up @@ -449,9 +449,9 @@ def vcorrcoef(X, y, mode="pearsons", axis=-1):
Which correlation metric to use.
"""
if issparse(X):
X = np.array(X.A)
X = np.array(X.toarray())
if issparse(y):
y = np.array(y.A)
y = np.array(y.toarray())
if axis == 0:
if X.ndim > 1:
X = np.array(X.T)
Expand Down
4 changes: 2 additions & 2 deletions scvelo/tools/velocity_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def velocity_embedding(
T.eliminate_zeros()

densify = adata.n_obs < 1e4
TA = T.A if densify else None
TA = T.toarray() if densify else None

with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand All @@ -179,7 +179,7 @@ def velocity_embedding(
)
delta = T.dot(X[:, vgenes]) - X[:, vgenes]
if issparse(delta):
delta = delta.A
delta = delta.toarray()
cos_proj = (V * delta).sum(1) / l2_norm(delta)
V_emb *= np.clip(cos_proj[:, None] * 10, 0, 1)

Expand Down
6 changes: 3 additions & 3 deletions scvelo/tools/velocity_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def __init__(
xkey = xkey if xkey in adata.layers.keys() else "spliced"

X = np.array(
adata.layers[xkey].A[:, subset]
adata.layers[xkey].toarray()[:, subset]
if issparse(adata.layers[xkey])
else adata.layers[xkey][:, subset]
)
V = np.array(
adata.layers[vkey].A[:, subset]
adata.layers[vkey].toarray()[:, subset]
if issparse(adata.layers[vkey])
else adata.layers[vkey][:, subset]
)
Expand Down Expand Up @@ -209,7 +209,7 @@ def compute_cosines(
)
self.uncertainties.eliminate_zeros()

confidence = self.graph.max(1).A.flatten()
confidence = self.graph.max(1).toarray().flatten()
self.self_prob = np.clip(np.percentile(confidence, 98) - confidence, 0, 1)

def _compute_cosines(self, obs_idx, queue):
Expand Down
4 changes: 2 additions & 2 deletions tests/core/test_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ def test_cleanup_all(

if dense:
if layer is None:
adata.X = adata.X.A
adata.X = adata.X.toarray()
else:
adata.layers[layer] = adata.layers[layer].A
adata.layers[layer] = adata.layers[layer].toarray()
returned_adata = cleanup(adata=adata, clean="all", inplace=inplace)

if not inplace:
Expand Down
20 changes: 11 additions & 9 deletions tests/preprocessing/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def test_first_moments(

if dense:
if layer is None:
adata.X = adata.X.A
adata.X = adata.X.toarray()
else:
adata.layers[layer] = adata.layers[layer].A
adata.layers[layer] = adata.layers[layer].toarray()

first_order_moment = get_moments(adata=adata, layer=layer, mode=mode)
assert isinstance(first_order_moment, np.ndarray)
Expand All @@ -69,9 +69,9 @@ def test_second_moments(

if dense:
if layer is None:
adata.X = adata.X.A
adata.X = adata.X.toarray()
else:
adata.layers[layer] = adata.layers[layer].A
adata.layers[layer] = adata.layers[layer].toarray()

second_order_moment = get_moments(
adata=adata, layer=layer, mode=mode, second_order=True, centered=False
Expand All @@ -98,9 +98,9 @@ def test_passing_array_for_layer(

if dense:
if layer is None:
adata.X = adata.X.A
adata.X = adata.X.toarray()
else:
adata.layers[layer] = adata.layers[layer].A
adata.layers[layer] = adata.layers[layer].toarray()

if layer is None:
first_order_moment = get_moments(adata=adata, layer=adata.X, mode=mode)
Expand Down Expand Up @@ -181,13 +181,15 @@ def _compare_adatas(self, adata_1, adata_2):
assert set(adata_1.obsp) == {"distances", "connectivities"}
assert issparse(adata_1.obsp["connectivities"])
np.testing.assert_almost_equal(
adata_1.obsp["connectivities"].A,
adata_2.obsp["connectivities"].A,
adata_1.obsp["connectivities"].toarray(),
adata_2.obsp["connectivities"].toarray(),
decimal=4,
)
assert issparse(adata_1.obsp["distances"])
np.testing.assert_almost_equal(
adata_1.obsp["distances"].A, adata_2.obsp["distances"].A, decimal=3
adata_1.obsp["distances"].toarray(),
adata_2.obsp["distances"].toarray(),
decimal=3,
)

# Check `.uns` is unchanged
Expand Down
Loading

0 comments on commit 3fbce6f

Please sign in to comment.