Skip to content

Commit

Permalink
fixup! Add pca-beta process
Browse files Browse the repository at this point in the history
  • Loading branch information
marcellevstek committed Jun 27, 2024
1 parent 3a7890b commit 95d4750
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 17 deletions.
32 changes: 15 additions & 17 deletions resolwe_bio/processes/clustering/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA as plotPCA
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from resolwe.process import (
Expand Down Expand Up @@ -41,13 +41,12 @@ def get_pca(expressions=pd.DataFrame(), gene_labels=[], error=None):
all_components = [[], []]
all_explained_variance_ratios = [0.0, 0.0]
else:
pca = plotPCA(n_components=2, whiten=True)
pca = PCA(n_components=2, whiten=True)
pca_expressions = pca.fit_transform(expressions.transpose())

coordinates = [
t[:2].tolist() if len(t) > 1 else [t[0], 0.0] for t in pca_expressions
]
print(coordinates)
all_components = [
component_top_factors(component=component, allgenes_array=gene_labels)
for component in pca.components_
Expand Down Expand Up @@ -85,11 +84,8 @@ def save_pca(result={}, sample_ids=[], max_size=10):
"all_explained_variance_ratios": result["all_explained_variance_ratios"],
}

if output_fn:
with open(output_fn, "w") as outfile:
json.dump(data, outfile, separators=(",", ":"), allow_nan=False)
else:
print(json.dumps(data, separators=(",", ":"), allow_nan=False))
with open(output_fn, "w") as outfile:
json.dump(data, outfile, separators=(",", ":"), allow_nan=False)


def read_csv(fname):
Expand Down Expand Up @@ -182,16 +178,16 @@ def run_pca(
save_pca(result, sample_ids)


class PCA(Process):
"""Principal component analysis process."""
class PrinicipalComponentAnalysis(Process):
"""Principal component analysis process (beta)."""

slug = "pca-beta"
name = "PCA"
name = "Principal component analysis (beta)"
requirements = {
"expression-engine": "jinja",
"executor": {
"docker": {
"image": "public.ecr.aws/genialis/resolwebio/rnaseq:6.0.0",
"image": "public.ecr.aws/genialis/resolwebio/common:4.1.1",
},
},
"resources": {
Expand All @@ -201,7 +197,7 @@ class PCA(Process):
},
}
data_name = "{{ alignment|name|default('?') }}"
version = "1.0.0"
version = "0.1.0"
process_type = "data:pca"
category = "Enrichment and Clustering"
scheduling_class = SchedulingClass.INTERACTIVE
Expand All @@ -218,7 +214,8 @@ class Input:
label="Gene subset",
required=False,
placeholder="new gene id, e.g. ENSG00000185982 (ENSEMBL database)",
description="Specify at least two genes or leave this field empty.",
description="Specify at least two genes or leave this field empty. "
"To subset your analysis to only a subset of genes, specify them here.",
)
source = StringField(
label="Gene ID database of selected genes",
Expand All @@ -243,17 +240,18 @@ class Input:
default=True,
description="Filter the input expression matrix to remove genes "
"with low expression values. Only genes with 80th percentile above "
"read count of 100 are retained.",
"read count of 100 are retained. "
"This option only works if raw counts are available.",
)
log2 = BooleanField(
label="Log-transform expressions",
default=True,
description="Transform expressions with log2(x + 1) before PCA.",
description="Transform expressions with log2(x + 1) before performing PCA.",
)
standard_scaler = BooleanField(
label="Transform input data using StandardScaler",
default=True,
description="Apply the StandardScaler transformation before PCA.",
description="Apply the StandardScaler transformation before performing PCA.",
)

class Output:
Expand Down
Binary file added resolwe_bio/tests/files/pca_plot_beta_4.json.gz
Binary file not shown.
33 changes: 33 additions & 0 deletions resolwe_bio/tests/processes/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,39 @@ def test_beta_pca(self):
self.assertAlmostEqualGeneric(test_json["components"], saved_json["components"])
self.assertEqual(len(pca.process_warning), 0)

with self.preparation_stage():
expression_1 = self.prepare_expression(
f_exp="exp_1_tpm.tab.gz",
f_type="TPM",
source="DICTYBASE",
species="Dictyostelium discoideum",
)
expression_2 = self.prepare_expression(
f_exp="exp_2_tpm.tab.gz",
f_type="TPM",
source="DICTYBASE",
species="Dictyostelium discoideum",
)

inputs = {
"exps": [expression_1.pk, expression_2.pk],
"source": "DICTYBASE",
"species": "Dictyostelium discoideum",
}
pca = self.run_process("pca-beta", inputs)
saved_json, test_json = self.get_json(
"pca_plot_beta_4.json.gz", pca.output["pca"]
)
self.assertAlmostEqualGeneric(
test_json["flot"]["data"], saved_json["flot"]["data"]
)
self.assertAlmostEqualGeneric(
test_json["explained_variance_ratios"],
saved_json["explained_variance_ratios"],
)
self.assertAlmostEqualGeneric(test_json["components"], saved_json["components"])
self.assertEqual(len(pca.process_warning), 0)

@with_resolwe_host
@tag_process("pca")
def test_pca(self):
Expand Down

0 comments on commit 95d4750

Please sign in to comment.