Skip to content

Commit

Permalink
Update the pca process with the functionality from the
Browse files Browse the repository at this point in the history
deprecated pca-beta process
  • Loading branch information
jkokosar committed Oct 6, 2024
1 parent d1097d8 commit 4f57d8d
Show file tree
Hide file tree
Showing 18 changed files with 15 additions and 426 deletions.
2 changes: 2 additions & 0 deletions docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ Changed
- Optimize resource usage in processes ``bbduk-single``, ``bbduk-paired``,
``upload-fastq-single``, ``upload-fastq-paired``,
``files-to-fastq-single`` and ``files-to-fastq-paired``
- Update the ``pca`` process with the functionality from the
deprecated ``pca-beta`` process

Fixed
-----
Expand Down
6 changes: 3 additions & 3 deletions resolwe_bio/processes/clustering/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ def run_pca(
class PrinicipalComponentAnalysis(Process):
"""Principal component analysis process (beta)."""

slug = "pca-beta"
name = "Principal component analysis (beta)"
slug = "pca"
name = "Principal component analysis (PCA)"
requirements = {
"expression-engine": "jinja",
"executor": {
Expand All @@ -197,7 +197,7 @@ class PrinicipalComponentAnalysis(Process):
},
}
data_name = "PCA"
version = "0.1.1"
version = "3.0.0"
process_type = "data:pca"
category = "Enrichment and Clustering"
scheduling_class = SchedulingClass.INTERACTIVE
Expand Down
109 changes: 0 additions & 109 deletions resolwe_bio/processes/clustering/pca.yml

This file was deleted.

Binary file removed resolwe_bio/tests/files/clustering_NCBI.tab.gz
Binary file not shown.
Binary file removed resolwe_bio/tests/files/clustering_NCBI_1.tab.gz
Binary file not shown.
Binary file removed resolwe_bio/tests/files/clustering_NCBI_2.tab.gz
Binary file not shown.
Binary file removed resolwe_bio/tests/files/pca_exp_noname1.tab.gz
Binary file not shown.
Binary file removed resolwe_bio/tests/files/pca_exp_noname2.tab.gz
Binary file not shown.
Binary file modified resolwe_bio/tests/files/pca_plot.json.gz
Binary file not shown.
Binary file removed resolwe_bio/tests/files/pca_plot2.json.gz
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file removed resolwe_bio/tests/files/pca_plot_beta.json.gz
Binary file not shown.
Binary file removed resolwe_bio/tests/files/pca_plot_ncbi.json.gz
Binary file not shown.
Binary file removed resolwe_bio/tests/files/pca_plot_single_sample.json.gz
Binary file not shown.
179 changes: 10 additions & 169 deletions resolwe_bio/tests/processes/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def round_elements(element, places=2):

class PcaProcessorTestCase(KBBioProcessTestCase):
@with_resolwe_host
@tag_process("pca-beta")
def test_beta_pca(self):
@tag_process("pca")
def test_pca(self):
with self.preparation_stage():
expression_1 = self.prepare_expression(
f_rc="exp_1_rc.tab.gz",
Expand Down Expand Up @@ -59,10 +59,8 @@ def test_beta_pca(self):
"source": "DICTYBASE",
"species": "Dictyostelium discoideum",
}
pca = self.run_process("pca-beta", inputs)
saved_json, test_json = self.get_json(
"pca_plot_beta.json.gz", pca.output["pca"]
)
pca = self.run_process("pca", inputs)
saved_json, test_json = self.get_json("pca_plot.json.gz", pca.output["pca"])
self.assertAlmostEqualGeneric(
round_elements(test_json["flot"]["data"]),
round_elements(saved_json["flot"]["data"]),
Expand All @@ -83,10 +81,8 @@ def test_beta_pca(self):
"DPU_G0067102",
"DPU_G0067112",
] # all non-zero
pca = self.run_process("pca-beta", inputs)
saved_json, test_json = self.get_json(
"pca_plot_beta_2.json.gz", pca.output["pca"]
)
pca = self.run_process("pca", inputs)
saved_json, test_json = self.get_json("pca_plot_2.json.gz", pca.output["pca"])
self.assertAlmostEqualGeneric(
round_elements(test_json["flot"]["data"]),
round_elements(saved_json["flot"]["data"]),
Expand All @@ -96,10 +92,8 @@ def test_beta_pca(self):

inputs["low_expression_filter"] = False
inputs["standard_scaler"] = False
pca = self.run_process("pca-beta", inputs)
saved_json, test_json = self.get_json(
"pca_plot_beta_3.json.gz", pca.output["pca"]
)
pca = self.run_process("pca", inputs)
saved_json, test_json = self.get_json("pca_plot_3.json.gz", pca.output["pca"])
self.assertAlmostEqualGeneric(
round_elements(test_json["flot"]["data"]),
round_elements(saved_json["flot"]["data"]),
Expand All @@ -112,164 +106,11 @@ def test_beta_pca(self):
"source": "DICTYBASE",
"species": "Dictyostelium discoideum",
}
pca = self.run_process("pca-beta", inputs)
saved_json, test_json = self.get_json(
"pca_plot_beta_4.json.gz", pca.output["pca"]
)
pca = self.run_process("pca", inputs)
saved_json, test_json = self.get_json("pca_plot_4.json.gz", pca.output["pca"])
self.assertAlmostEqualGeneric(
round_elements(test_json["flot"]["data"]),
round_elements(saved_json["flot"]["data"]),
)

self.assertEqual(len(pca.process_warning), 1)

@with_resolwe_host
@tag_process("pca")
def test_pca(self):
with self.preparation_stage():
expression_1 = self.prepare_expression(
f_rc="exp_1_rc.tab.gz",
f_exp="exp_1_tpm.tab.gz",
f_type="TPM",
source="DICTYBASE",
species="Dictyostelium discoideum",
)
expression_2 = self.prepare_expression(
f_rc="exp_2_rc.tab.gz",
f_exp="exp_2_tpm.tab.gz",
f_type="TPM",
source="DICTYBASE",
species="Dictyostelium discoideum",
)
expression_noname_1 = self.prepare_expression(
f_rc="pca_exp_noname1.tab.gz",
f_exp="pca_exp_noname1.tab.gz",
f_type="TPM",
source="DICTYBASE",
species="Dictyostelium discoideum",
)
expression_noname_2 = self.prepare_expression(
f_rc="pca_exp_noname2.tab.gz",
f_exp="pca_exp_noname2.tab.gz",
f_type="TPM",
source="DICTYBASE",
species="Dictyostelium discoideum",
)

inputs = {
"exps": [expression_1.pk, expression_2.pk],
"source": "DICTYBASE",
"species": "Dictyostelium discoideum",
}
pca = self.run_process("pca", inputs)
saved_json, test_json = self.get_json("pca_plot.json.gz", pca.output["pca"])
self.assertAlmostEqualGeneric(
test_json["flot"]["data"], saved_json["flot"]["data"]
)
self.assertAlmostEqualGeneric(
test_json["explained_variance_ratios"],
saved_json["explained_variance_ratios"],
)
self.assertAlmostEqualGeneric(test_json["components"], saved_json["components"])
self.assertEqual(len(pca.process_warning), 0)

inputs = {
"exps": [expression_1.pk, expression_2.pk],
"genes": ["DPU_G0067098", "DPU_G0067100", "DPU_G0067104"], # all zero
"source": "DICTYBASE",
"species": "Dictyostelium discoideum",
}
pca = self.run_process("pca", inputs)
saved_json, test_json = self.get_json("pca_plot2.json.gz", pca.output["pca"])
self.assertAlmostEqualGeneric(
test_json["flot"]["data"], saved_json["flot"]["data"]
)
self.assertAlmostEqualGeneric(
test_json["explained_variance_ratios"],
saved_json["explained_variance_ratios"],
)
self.assertAlmostEqualGeneric(test_json["components"], saved_json["components"])
self.assertEqual(len(pca.process_warning), 0)

inputs = {
"exps": [expression_1.pk],
"source": "DICTYBASE",
"species": "Dictyostelium discoideum",
}
pca = self.run_process("pca", inputs)
saved_json, test_json = self.get_json(
"pca_plot_single_sample.json.gz", pca.output["pca"]
)
self.assertAlmostEqualGeneric(
test_json["flot"]["data"], saved_json["flot"]["data"]
)
self.assertAlmostEqualGeneric(
test_json["explained_variance_ratios"],
saved_json["explained_variance_ratios"],
)
self.assertAlmostEqualGeneric(test_json["components"], saved_json["components"])
self.assertEqual(len(pca.process_warning), 0)

inputs = {
"exps": [
expression_noname_1.pk,
expression_noname_2.pk,
],
"source": "DICTYBASE",
"species": "Dictyostelium discoideum",
}
pca = self.run_process("pca", inputs)

@with_resolwe_host
@tag_process("pca")
def test_pca_ncbi(self):
with self.preparation_stage():
expression_1 = self.prepare_expression(
f_exp="clustering_NCBI.tab.gz",
f_type="rc",
name="Expression",
source="NCBI",
species="Homo sapiens",
)
expression_2 = self.prepare_expression(
f_exp="clustering_NCBI_1.tab.gz",
f_type="rc",
name="Expression",
source="NCBI",
species="Homo sapiens",
)
expression_3 = self.prepare_expression(
f_exp="clustering_NCBI_2.tab.gz",
f_type="rc",
name="Expression",
source="NCBI",
species="Homo sapiens",
)

inputs = {
"exps": [
expression_1.pk,
expression_2.pk,
expression_3.pk,
],
"genes": [
"1",
"503538",
"56934",
"29974",
"2",
"144571",
"3",
"abc",
"lll",
],
"source": "NCBI",
"species": "Homo sapiens",
}
pca = self.run_process("pca", inputs)
saved_json, test_json = self.get_json(
"pca_plot_ncbi.json.gz", pca.output["pca"]
)
self.assertCountEqual(
test_json["zero_gene_symbols"], saved_json["zero_gene_symbols"]
)
Loading

0 comments on commit 4f57d8d

Please sign in to comment.