Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add VELOVI preprocess methods #1228

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Preprocessing (pp)
pp.normalize_per_cell
pp.log1p
pp.filter_and_normalize
pp.min_max_scale
pp.filter_on_r2

**Moments** (across nearest neighbors in PCA space)

Expand Down
6 changes: 6 additions & 0 deletions scvelo/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
filter_and_normalize,
filter_genes,
filter_genes_dispersion,
filter_genes_r2,
log1p,
min_max_scale,
normalize_per_cell,
recipe_velocity,
velovi_preprocess_recipe,
)

__all__ = [
Expand All @@ -20,4 +23,7 @@
"pca",
"recipe_velocity",
"remove_duplicate_cells",
"min_max_scale",
"filter_genes_r2",
"velovi_preprocess_recipe",
]
72 changes: 72 additions & 0 deletions scvelo/preprocessing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from scipy.sparse import issparse
from sklearn.utils import sparsefuncs

from anndata import AnnData

from scvelo import logging as logg
from scvelo.core import get_initial_size, get_size, multiply, set_initial_size, sum

Expand Down Expand Up @@ -731,3 +733,73 @@ def recipe_velocity(
)
moments(adata, n_neighbors=n_neighbors, n_pcs=n_pcs)
return adata if copy else None


def min_max_scale(
adata: AnnData,
spliced_layer: str,
unspliced_layer: str,
copy: bool = False,
) -> AnnData:
"""Scale the spliced and unspliced layers to the range ``[0, 1]``."""
from sklearn.preprocessing import MinMaxScaler

adata = adata.copy() if copy else adata
adata.layers[spliced_layer] = MinMaxScaler().fit_transform(
adata.layers[spliced_layer]
)
adata.layers[unspliced_layer] = MinMaxScaler().fit_transform(
adata.layers[unspliced_layer]
)

return adata


def filter_genes_r2(adata: AnnData, copy: bool = False) -> AnnData:
"""Filter out genes according to a linear regression fit."""
from scvelo.tools import velocity

adata = velocity(adata, mode="deterministic", copy=copy)
adata = adata[
:, np.logical_and(adata.var.velocity_r2 > 0, adata.var.velocity_gamma > 0)
]
adata = adata[:, adata.var.velocity_genes]

return adata


def velovi_preprocess_recipe(
adata: AnnData,
spliced_layer: str = "Ms",
unspliced_layer: str = "Mu",
min_max_scale: bool = True,
filter_on_r2: bool = True,
copy: bool = False,
) -> AnnData:
"""Preprocess data for use with VELOVI.

Removes poorly detected genes and min-max scales the data.

Parameters
----------
adata
Annotated data matrix.
spliced_layer
Name of the spliced layer.
unspliced_layer
Name of the unspliced layer.
min_max_scale
Min-max scale spliced and unspliced.
filter_on_r2
Filter out genes according to linear regression fit.

Returns
-------
Preprocessed adata.
"""
if min_max_scale:
adata = min_max_scale(adata, spliced_layer, unspliced_layer, copy=copy)
if filter_on_r2:
adata = filter_genes_r2(adata, copy=copy)

return adata