Skip to content

Commit

Permalink
enable downsampling during conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
Benedikt Obermayer committed Aug 3, 2020
1 parent d940893 commit 1db2609
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 0 deletions.
10 changes: 10 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@
History
=======

------
v0.8.5
------

- allow text import with mtx files
- display only most frequent categories
- use `ad.raw` if present
- enable downsampling during conversion
- link to publication

------
v0.8.4
------
Expand Down
1 change: 1 addition & 0 deletions scelvis/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
# Callbacks for the file upload.
callbacks.register_file_upload(app)


# Add redirection for root.
@app_flask.route("/")
def redirect_root():
Expand Down
12 changes: 12 additions & 0 deletions scelvis/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ def _normalize_filter_dge(ad):
return ad

def _write_output(self, ad):
if self.args.ncells:
logger.info("Sampling %s cells from anndata object", self.args.ncells)
sc.pp.subsample(ad, n_obs=self.args.ncells)
logger.info("Saving anndata object to %s", self.args.out_file)
ad.write(self.args.out_file)

Expand Down Expand Up @@ -327,6 +330,9 @@ def _load_expression(self, coords, annotation, markers):
return ad

def _write_output(self, ad):
if self.args.ncells:
logger.info("Sampling %s cells from anndata object", self.args.ncells)
sc.pp.subsample(ad, n_obs=self.args.ncells)
logger.info("Saving anndata object to %s", self.args.out_file)
ad.write(self.args.out_file)

Expand Down Expand Up @@ -393,6 +399,9 @@ def _load_loom(self, markers):
return ad

def _write_output(self, ad):
if self.args.ncells:
logger.info("Sampling %s cells from anndata object", self.args.ncells)
sc.pp.subsample(ad, n_obs=self.args.ncells)
logger.info("Saving anndata object to %s", self.args.out_file)
ad.write(self.args.out_file)

Expand Down Expand Up @@ -492,6 +501,9 @@ def setup_argparse(parser):
type=int,
help="Save top n markers per cluster [10]",
)
parser.add_argument(
"--ncells", dest="ncells", type=int, help="sample ncells cells from object [take all]"
)
parser.add_argument(
"--verbose", default=False, action="store_true", help="Enable verbose output"
)
9 changes: 9 additions & 0 deletions scelvis/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,15 @@ def load_data(data_source, identifier):
numerical_meta.append(col)
else:
categorical_meta.append(col)
# use only the top 40 most frequent categories to avoid legend blow-up
if len(ad.obs[col].unique()) >= 40:
logger.warn("keeping only the top 40 most frequent items in column %s", col)
keep = ad.obs[col].value_counts().index[:39].astype(str)
if "other" in keep:
logger.warn("value 'other' already present in column %s", col)
tmp = ad.obs[col].astype(str)
tmp[~np.isin(tmp, keep)] = "other"
ad.obs[col] = tmp.astype("category")
# add coordinates to obs
coords = {}
for k in ad.obsm.keys():
Expand Down

0 comments on commit 1db2609

Please sign in to comment.