From 76c633d693b79e69244aec75e4a5e6cd77cefaac Mon Sep 17 00:00:00 2001 From: Roman Joeres Date: Tue, 3 Sep 2024 11:17:32 +0200 Subject: [PATCH] Improvements in documentation --- CHANGELOG.md | 5 + README.md | 6 +- experiments/DTI/split.py | 6 +- experiments/DTI/visualize.py | 3 +- experiments/README.md | 33 +- tests/data/rw_data/taxonomy_Phylum.tsv | 16217 +++++++++++++++++++++++ 6 files changed, 16261 insertions(+), 9 deletions(-) create mode 100644 tests/data/rw_data/taxonomy_Phylum.tsv diff --git a/CHANGELOG.md b/CHANGELOG.md index 52e479c..4b13ebf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ - [ ] Replace GraKel with something "modern" and fully "conda-installable" to make DataSAIL fully conda-installable - [ ] Include [MashMap3](https://github.com/marbl/MashMap) - [ ] Include MASH for amino acid sequences +- [ ] Custom clustering methods ([Issue #25](https://github.com/kalininalab/DataSAIL/issues/25)) + +## v1.0.1 (2024-05-08) till v1.0.7 (2024-06-27) + +- Bug fixes in stratification ## v1.0.0 (2024-04-04) diff --git a/README.md b/README.md index 988a4c2..f2ad83d 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ pip install grakel to install DataSAIL in an already existing environment. Alternatively, one can install DataSAIL-lite from conda. DataSAIL-lite is a version of DataSAIL that does not install all clustering algorithms as the standard DataSAIL. -DataSAIL is available from Python 3.8 and newer. +DataSAIL is available for Python 3.8 and newer. ## Usage @@ -55,7 +55,7 @@ datasail --e-type P --e-data --e-sim mmseqs --output None: def main(path): split_w_datasail(path, TECHNIQUES["datasail"]) - # split_w_deepchem(path, TECHNIQUES["deepchem"]) - # split_w_lohi(path) - # split_w_graphpart(path) + split_w_deepchem(path, TECHNIQUES["deepchem"]) + split_w_lohi(path) + split_w_graphpart(path) if __name__ == '__main__': diff --git a/experiments/DTI/visualize.py b/experiments/DTI/visualize.py index 0f7a805..9d07aaf 100644 --- a/experiments/DTI/visualize.py +++ b/experiments/DTI/visualize.py @@ -552,6 +552,5 @@ def plot(full_path: Path): if __name__ == '__main__': - # plot(Path(sys.argv[1])) comp_il() - + plot(Path(sys.argv[1])) diff --git a/experiments/README.md b/experiments/README.md index c1bd9ae..7b75031 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -2,4 +2,35 @@ ------------- -blub \ No newline at end of file +For the publication, we have conducted several experiments: + + 1. Splitting of data for drug-target interaction data, + 2. Splitting of data for Molecular Property Prediction, + 3. Splitting of data with samples belonging to either of two classes for stratified splits, + +and some ablation studies based on above's data. The experiments cover all possible applications of DataSAIL. Each +experiments-folder is structured in the same way: + + 1. `split.py`: Contains the code used for splitting using DataSAIL or baselines tools. + 2. `train.py`: Contains the code to train the different models on the split data. + 3. `visualize.py`: Contains the code to visualize the results of the training. + +All can be executed in the same way: + +```shell +python -m experiments..