diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 310ce62..a2a385f 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -21,7 +21,7 @@ jobs:
- name: Build and Commit
uses: sphinx-notes/pages@v2
with:
- requirements_path: ./docs/requirements.txt
+ requirements_path: requirements.txt
- name: Push changes
uses: ad-m/github-push-action@master
with:
diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml
new file mode 100644
index 0000000..ce79ac5
--- /dev/null
+++ b/.github/workflows/sphinx.yml
@@ -0,0 +1,138 @@
+# From: https://github.com/rkdarst/sphinx-actions-test/blob/master/.github/workflows/sphinx-build.yml
+
+name: sphinx
+on: [push, pull_request]
+
+env:
+ DEFAULT_BRANCH: "main"
+ #SPHINXOPTS: "-W --keep-going -T"
+ # ^-- If these SPHINXOPTS are enabled, then be strict about the builds and fail on any warnings
+
+jobs:
+ build-and-deploy:
+ name: Build and gh-pages
+ runs-on: ubuntu-latest
+ steps:
+ # https://github.com/marketplace/actions/checkout
+ - uses: actions/checkout@v2
+ with:
+ fetch-depth: 2
+ lfs: true
+ # https://github.com/marketplace/actions/setup-python
+ # ^-- This gives info on matrix testing.
+ - name: Install Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.9
+ # https://docs.github.com/en/actions/guides/building-and-testing-python#caching-dependencies
+ # ^-- How to set up caching for pip on Ubuntu
+ - name: Cache pip
+ uses: actions/cache@v2
+ with:
+ path: ~/.cache/pip
+ key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
+ restore-keys: |
+ ${{ runner.os }}-pip-
+ ${{ runner.os }}-
+ # https://docs.github.com/en/actions/guides/building-and-testing-python#installing-dependencies
+ # ^-- This gives info on installing dependencies with pip
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+ - name: Debugging information
+ run: |
+ echo "github.ref:" ${{github.ref}}
+ echo "github.event_name:" ${{github.event_name}}
+ echo "github.head_ref:" ${{github.head_ref}}
+ echo "github.base_ref:" ${{github.base_ref}}
+ set -x
+ git rev-parse --abbrev-ref HEAD
+ git branch
+ git branch -a
+ git remote -v
+ python -V
+ pip list --not-required
+ pip list
+ # Build
+ - uses: ammaraskar/sphinx-problem-matcher@master
+ - name: Build Sphinx docs
+ run: |
+ make dirhtml
+ # This fixes broken copy button icons, as explained in
+ # https://github.com/coderefinery/sphinx-lesson/issues/50
+ # https://github.com/executablebooks/sphinx-copybutton/issues/110
+ # This can be removed once these PRs are accepted (but the
+ # fixes also need to propagate to other themes):
+ # https://github.com/sphinx-doc/sphinx/pull/8524
+ # https://github.com/readthedocs/sphinx_rtd_theme/pull/1025
+ sed -i 's/url_root="#"/url_root=""/' _build/dirhtml/index.html || true
+ # The following supports building all branches and combining on
+ # gh-pages
+
+ # Clone and set up the old gh-pages branch
+ - name: Clone old gh-pages
+ if: ${{ github.event_name == 'push' }}
+ run: |
+ set -x
+ git fetch
+ ( git branch gh-pages remotes/origin/gh-pages && git clone . --branch=gh-pages _gh-pages/ ) || mkdir _gh-pages
+ rm -rf _gh-pages/.git/
+ mkdir -p _gh-pages/branch/
+ # If a push and default branch, copy build to _gh-pages/ as the "main"
+ # deployment.
+ - name: Copy new build (default branch)
+ if: |
+ contains(github.event_name, 'push') &&
+ contains(github.ref, env.DEFAULT_BRANCH)
+ run: |
+ set -x
+ # Delete everything under _gh-pages/ that is from the
+ # primary branch deployment. Eicludes the other branches
+ # _gh-pages/branch-* paths, and not including
+ # _gh-pages itself.
+ find _gh-pages/ -mindepth 1 ! -path '_gh-pages/branch*' -delete
+ rsync -a _build/dirhtml/ _gh-pages/
+ # If a push and not on default branch, then copy the build to
+ # _gh-pages/branch/$brname (transforming '/' into '--')
+ - name: Copy new build (branch)
+ if: |
+ contains(github.event_name, 'push') &&
+ !contains(github.ref, env.DEFAULT_BRANCH)
+ run: |
+ set -x
+ #brname=$(git rev-parse --abbrev-ref HEAD)
+ brname="${{github.ref}}"
+ brname="${brname##refs/heads/}"
+ brdir=${brname//\//--} # replace '/' with '--'
+ rm -rf _gh-pages/branch/${brdir}
+ rsync -a _build/dirhtml/ _gh-pages/branch/${brdir}
+ # Go through each branch in _gh-pages/branch/, if it's not a
+ # ref, then delete it.
+ - name: Delete old feature branches
+ if: ${{ github.event_name == 'push' }}
+ run: |
+ set -x
+ for brdir in `ls _gh-pages/branch/` ; do
+ brname=${brdir//--/\/} # replace '--' with '/'
+ if ! git show-ref remotes/origin/$brname ; then
+ echo "Removing $brdir"
+ rm -r _gh-pages/branch/$brdir/
+ fi
+ done
+ # Add the .nojekyll file
+ - name: nojekyll
+ if: ${{ github.event_name == 'push' }}
+ run: |
+ touch _gh-pages/.nojekyll
+ # Deploy
+ # https://github.com/peaceiris/actions-gh-pages
+ - name: Deploy
+ uses: peaceiris/actions-gh-pages@v3
+ if: ${{ github.event_name == 'push' }}
+ #if: ${{ success() && github.event_name == 'push' && github.ref == 'refs/heads/$defaultBranch' }}
+ with:
+ publish_branch: gh-pages
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ publish_dir: _gh-pages/
+ force_orphan: true
diff --git a/src/csfdock/.DS_Store b/src/csfdock/.DS_Store
deleted file mode 100644
index b84a05b..0000000
Binary files a/src/csfdock/.DS_Store and /dev/null differ
diff --git a/src/csfdock/DVisualize.py b/src/csfdock/DVisualize.py
deleted file mode 100644
index 72b3f17..0000000
--- a/src/csfdock/DVisualize.py
+++ /dev/null
@@ -1,334 +0,0 @@
-import os
-
-import py3Dmol
-from ipywidgets import Layout, interactive
-
-from rdkit import Chem
-from csfdock.utils import give_id, PDBParse
-from rich.console import Console
-console = Console()
-blue_console = Console(style="white on blue")
-
-
-class DVisualize:
- """Grid bix view of the docking pocket
- Attributes:
- box_center_x (int): Coordinates of box center x-axis.
- box_center_y (int): Coordinates of box center y-axis.
- box_center_z (int): Coordinates of box center z-axis.
- box_size_x (int): Size of x-axis of grid box
- box_size_y (int): Size of x-axis of grid box
- box_size_z (int): Size of x-axis of grid box
- ligand (str): Path of ligand
- protein (str): Path of Receptor
- """
-
- def __init__(self, *args, **kwargs):
- self.grid_box_centers = None
- self.receptor = kwargs.get("protein", None)
- self.ligand = kwargs.get("ligand", None)
- self.box_center_x = kwargs.get("box_center_x")
- self.box_center_y = kwargs.get("box_center_y")
- self.box_center_z = kwargs.get("box_center_z")
- self.box_size_x = kwargs.get("box_size_x", 20)
- self.box_size_y = kwargs.get("box_size_y", 20)
- self.box_size_z = kwargs.get("box_size_z", 20)
- self.prot_color = kwargs.get("prot_color", "spectrum")
- self.lig_color = kwargs.get("lig_color", "red")
- self.membrane = kwargs.get("membrane", None)
- self.save = kwargs.get("save", "False")
- self.bg_color = kwargs.get("bg_color", "white")
- self.mem_color = kwargs.get("mem_color", "blue")
- for arg in args:
- if isinstance(arg, str):
- if self.receptor is None:
- self.receptor = arg
- elif isinstance(arg, list):
- if self.grid_box_centers is None:
- self.grid_box_centers = arg
- self.box_center_x = arg[0]
- self.box_center_y = arg[1]
- self.box_center_z = arg[2]
- else:
- self.grid_box_sizes = arg
- self.box_size_x = arg[0]
- self.box_size_y = arg[1]
- self.box_size_z = arg[2]
-
- def LoadBox(self, *args, **kwargs):
- try:
- self.grid_box_centers
- except AttributeError:
- self.grid_box_centers = None
- for arg in args:
- if isinstance(arg, str):
- if self.receptor is None:
- self.receptor = arg
- elif isinstance(arg, list):
- if self.grid_box_centers is None:
- self.grid_box_centers = arg
- self.box_center_x = arg[0]
- self.box_center_y = arg[1]
- self.box_center_z = arg[2]
- else:
- self.grid_box_sizes = arg
- self.box_size_x = arg[0]
- self.box_size_y = arg[1]
- self.box_size_z = arg[2]
-
- def __rep__(self):
- return f"Complex_Grid: {self.receptor} and {self.ligand}"
-
- def __str__(self):
- return f"Protein: {self.receptor} and \nligand :{self.ligand}"
-
- def __grid_box(self):
- try:
- self.vobj.addBox(
- {
- "center": {
- "x": self.box_center_x,
- "y": self.box_center_y,
- "z": self.box_center_z,
- },
- "dimensions": {
- "w": self.box_size_x,
- "h": self.box_size_y,
- "d": self.box_size_z,
- },
- "color": "blue",
- "opacity": 0.5,
- }
- )
- except Exception as e:
- print("Failed to add Grid")
-
- def LoadLipid(self, *args, verbose=True,native=False, **kwargs):
- lipid = kwargs.get("lipid")
-
- for arg in args:
- lipid = arg
- lipid_path = self.LoadReceptor(
- lipid, key="Lipid", verbose=verbose, native=native
- )
- _, lipid, water, lig = PDBParse(lipid_path)
- self.lipid = lipid_path
- with open("./temp.pdb", "w+") as f:
- for i in lipid:
- print(i, end="", file=f)
- # m = Chem.MolFromPDBFile("./temp.pdb", sanitize=False)
- # print(m)
- try:
- if self.vobj:
- pass
- except AttributeError:
- self.vobj = py3Dmol.view(width=800, height=600)
- lipid_mol = open("./temp.pdb").read()
- self.vobj.addModel(lipid_mol, "pdb")
- self.vobj.setStyle({"lipid_mol": 2}, {"cartoon": {}})
- try:
- os.remove("./temp.pdb")
- except Exception:
- pass
-
- # self.vobj.addModel(lipid, "pdb")
- # self.vobj.setStyle({"model": 3}, {"cartoon": {}})
- # self.vobj.setStyle({"cartoon": {"color": "spectrum"}})
-
- def __complex_view(self):
- mol1 = open(self.receptor, "r").read()
- file_format = "pdb"
- try:
- mol2 = open(self.ligand, "r").read()
- lig_dir, lig_name, lig_file_format = give_id(self.ligand)
- if lig_file_format == "sdf":
- file_format = "sdf"
- self.vobj.addModel(mol2, f"{file_format}")
- self.vobj.setStyle({"model": 1}, {"stick": {}})
- except TypeError as er:
- self.mol_view.setStyle(
- {"resn": f"{self.resn}"}, {"stick": {"colorscheme": self.lig_color}}
- )
- self.vobj.addModel(mol1, "pdb")
- self.vobj.setStyle({"cartoon": {"color": self.prot_color}})
-
- def __visualize_mol(self):
- self.vobj = py3Dmol.view(width=800, height=600)
- self.__grid_box()
- self.__box_view()
- try:
- self.LoadLipid(self.lipid, verbose=False, native=False)
- except AttributeError as er:
- blue_console.print("Lipid maynot be loaded yet")
- try:
- _ = self.bg_color
- except AttributeError:
- self.bg_color= "white"
- self.vobj.setBackgroundColor(self.bg_color)
- self.vobj.rotate(90, {"x": 0, "y": 1, "z": 0}, viewer=(0, 1))
- self.vobj.zoomTo()
- return self.vobj.show()
-
- def ShowMolecules(self, **kwargs):
- """Visualize grid box with protein complex
- Returns:
- py3dmol : 3D Viewer
- """
- self.resn = kwargs.get("resn", "LIG")
-
- grid_obj = interactive(self.__visualize_mol)
- return display(grid_obj)
-
- def __show_ligand(
- self, mol_view_object, mol, resn=None, mol_color="blue", style="stick"
- ):
- _, mol_name, mol_format = give_id(mol)
-
- try:
- mol2 = open(mol, "r").read()
- *_, mol_file_format = give_id(self.ligand)
- mol_view_object.addModel(mol2, f"{mol_file_format}")
- mol_view_object.setStyle({"model": 1}, {"stick": {}})
- except (TypeError, AttributeError) as er:
- print(
- # "Cannot.."
- "Searching name space..."
- )
- mol_view_object.setStyle(
- {"resn": f"{resn}"}, {f"{style}": {"colorscheme": mol_color}}
- )
- # print(er)
- return mol_view_object
-
- def SimpleView(self, **kwargs):
- """3d visualization of pdb
- Args:
- protein (TYPE): protein
- ligand (None, optional): small molecule
- color (str, optional): color of wish, default: grey
- resn (str): Ligand from pdb file.
- Returns:
- TYPE: structure view.
- """
- resn = kwargs.get("resn", "LIG")
- self.bg_color = kwargs.get("bg_color", "white")
- self.prot_color = kwargs.get("prot_color", "spectrum")
- self.lig_color = kwargs.get("lig_color", "red")
- self.save = kwargs.get("save", False)
- self.show_ligand = kwargs.get("show_ligand", True)
- self.show_receptor = kwargs.get("show_receptor", True)
- vobj = py3Dmol.view(width=900, height=500)
- vobj.setBackgroundColor(self.bg_color)
- if self.show_receptor:
- structure_dir, structure_name, structure_format = give_id(self.receptor)
- if structure_format.lower() == "sdf":
- mol = Chem.MolFromMolFile(self.receptor, removeHs=False)
- mol = Chem.MolToMolBlock(mol)
- vobj.addModel(mol, f"{structure_format}")
- self.clean.addModel(mol, f"{structure_format}")
- else:
- vobj.addModel(open(self.receptor).read())
- self.clean = vobj
- vobj.setStyle({"cartoon": {"color": f"{self.prot_color}"}})
-
- if self.show_ligand:
- try:
- self.__show_ligand(vobj, self.ligand, mol_color=self.lig_color)
- except AttributeError as er:
- print("Ligand not yet added to the project...")
- vobj.zoomTo()
- if self.save == True:
- prefix = "image"
- while os.path.exists(f"./images/{prefix}.png"):
- suffix += 1
- name = f"{prefix}{suffix}.png"
- vobj.save_fig(f"./Images/{name}", dpi=600)
- print(f"Successfully saved ./Images/{name} ")
- if self.show_ligand is False and self.show_receptor is False:
- return "Nothing to visualize.."
-
- return vobj.show()
-
- def __box_view(self, **kwargs):
- """3d visualization of pdb
- Args:
- protein (TYPE): protein
- ligand (None, optional): small molecule
- color (str, optional): color of wish, default: grey
- resn (str): Ligand from pdb file.
- Returns:
- TYPE: structure view.
- """
- self.resn = kwargs.get("resn", "LIG")
- self.membrane = kwargs.get("membrane", None)
- self.lig_color = kwargs.get("resn_color", "yellow")
- self.element = kwargs.get("element", None)
- self.save = kwargs.get("save", False)
- self.mem_color = kwargs.get("mem_color", "blue")
- file_format = "pdb"
- try:
- structure_dir, structure_name, structure_format = give_id(self.receptor)
- if structure_format.lower() == "sdf":
- mol = Chem.MolFromMolFile(self.receptor, removeHs=False)
- mol = Chem.MolToMolBlock(mol)
- self.vobj.addModel(mol, f"{structure_format}")
- else:
- self.vobj.addModel(open(self.receptor).read())
-
- self.vobj.setStyle({"cartoon": {"color": "spectrum"}})
-
-
- except Exception as er:
- print("Failed to open protein")
- # try:
- # if self.ligand and self.resn is not None:
- # self.vobj.setStyle(
- # {"resn": f"{self.ligand}"},
- # {"stick": {"colorscheme": self.lig_color}},
- # )
- #
- # # elif self.ligand is not None and self.resn is None:
- # # mol2 = open(self.ligand, 'r').read()
- # # lig_dir, lig_name, lig_file_format = give_id(self.ligand)
- # # if lig_file_format == "sdf":
- # # file_format = "sdf"
- # # self.vobj.addModel(mol2, f"{file_format}")
- # # self.vobj.setStyle(
- # # {'model': 1}, {'stick': {"colorscheme": self.lig_color}}
- # # )
- #
- # # elif self.ligand is None and self.resn is not None:
- # # self.vobj.setStyle(
- # # {"resn": f"{self.resn}"},
- # # {"sphere": {"colorscheme": self.lig_color}},
- # # )
- # #except TypeError as er:
- # # print("failed to open ligand")
- # pass
- try:
- if self.ligand is not None:
- mol2 = open(self.ligand, "r").read()
- lig_dir, lig_name, lig_file_format = give_id(self.ligand)
- if lig_file_format == "sdf":
- file_format = "sdf"
- self.vobj.addModel(mol2, f"{file_format}")
- self.vobj.setStyle({"model": 1}, {"stick": {}})
- else:
- self.vobj.setStyle(
- {"resn": f"{self.resn}", "clickable": True},
- {"stick": {"colorscheme": self.lig_color}},
- )
- except Exception as er:
- pass
-
- self.vobj.zoomTo()
- # self.vobj.setStyle({"clickable": True})
- if self.save is True:
- prefix = "image"
- while os.path.exists(f"./images/{prefix}.png"):
- suffix += 1
- name = f"{prefix}{suffix}.png"
- self.vobj.save_fig(f"./Images/{name}", dpi=600)
- print(f"Successfully saved ./Images/{name} ")
- # return self.vobj.show()
-
diff --git a/src/csfdock/DockingTools.py b/src/csfdock/DockingTools.py
deleted file mode 100644
index c26752d..0000000
--- a/src/csfdock/DockingTools.py
+++ /dev/null
@@ -1,994 +0,0 @@
-# Functions invloved in docking using smina
-
-import itertools
-import os
-import re
-import subprocess
-
-import ipywidgets
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-from IPython.display import HTML, display
-from ipywidgets import (
- FileUpload,
- IntSlider,
- Layout,
- fixed,
- interactive,
- interactive_output,
- widgets,
-)
-from matplotlib.offsetbox import AnchoredText
-from rich.console import Console
-from scipy.special import expit
-from sklearn.datasets import make_classification
-from sklearn.linear_model import LinearRegression, LogisticRegression
-from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
-from sklearn.model_selection import KFold, train_test_split
-
-from csfdock.utils import give_id
-
-console = Console()
-
-
-def add_hydrogen(list_xyz):
- """A function to add hydrogen atoms using
- openbabel to a list of molecules.
- Creates sub folder "protein_id/poses" in
- input file directory and dump there.
- Args:
- list_xyz (list): Molecules to be added Hydrogen
- Returns:
- xyz: saves xyz in protein_id/poses with suffix "_addHs.xyz"
- """
- # FOR SERVER or use own path..
- OBABEL_PATH = "/share/openbabel-3.1.1/bin/obabel"
- for i in list_xyz:
- dir_name, id, file_format = give_id(i)
- if not os.path.exists(f"{dir_name}/addH/{id[:4]}"):
- os.makedirs(f"{dir_name}/addH/{id[:4]}")
- command = f"{OBABEL_PATH} {i} -O {dir_name}/addH/{id[:4]}/{id}_addHs.xyz -h"
- subprocess.run(command, cwd=f"{dir_name}", shell=True)
- return "Successfully Completed."
-
-
-def rmsd_calculator(reference, poses_list, key=None, nomatch=False, verbose=True):
- """Calculates RMSD between reference and pose using obrms in server.
- Args:
- reference (reference molecule): Reference molecule
- poses_list (test_poses): List of poses to test with the reference.
- key= Any suffix to add to the name of the file.
- Returns:
- Txt file: Writes reference, poses name and RMSD to a log file.
- """
- count = 0
- try:
- for ref in reference:
- _, ref_name, _dir = give_id(ref)
- ref_id = os.path.basename(ref)
- ref_dir = os.path.dirname(ref)
- ref_dir = os.path.dirname(ref_dir)
- for pose in poses_list:
- pose_id = os.path.basename(pose)
- if ref_id[:4].lower() == pose_id[:4].lower() or (nomatch is True):
- command = f"/share/openbabel-3.1.1/bin/obrms {ref} \t {pose}"
- pose_id = os.path.basename(pose)
- rmsd_out = subprocess.run(
- command,
- cwd=f"{ref_dir}",
- capture_output=True,
- text=True,
- shell=True,
- )
- if not os.path.exists(f"{ref_dir}/result/RMSD"):
- os.makedirs(f"{ref_dir}/result/RMSD")
- with open(
- f"{ref_dir}/result/RMSD/{ref_id[:4]}_RMSD_{key}.txt", "a+"
- ) as write_out:
- rmsd_result = f"{pose_id.lower()}\t" + str(rmsd_out.stdout)
- temp_rmsd = f"{ref_name}\t{pose_id.lower()}\t" + str(
- float(rmsd_out.stdout.split()[-1])
- )
- print(rmsd_result, file=write_out)
- count += 1
- else:
- return "No matched header found"
- if verbose:
- print(f"Total of {count} rmsd calculated.")
- return temp_rmsd
- except Exception as er:
- print(er)
-
-
-def rmsd_matrix_prep(rmsd_results, print_it=True, return_df=True, only_best=False):
- default_scoring_ = {}
- ad4_scoring_ = {}
- dkoes_fast_scoring_ = {}
- dkoes_scoring_old_scoring_ = {}
- vina_scoring_ = {}
- vinardo_scoring_ = {}
- custom_scoring_ = {}
- best = {}
- with open(rmsd_results[0], "r") as rmsd_results_read:
- for count, line in enumerate(rmsd_results_read):
- line_info = line.rsplit("/")[-1]
- try:
- key, value = line_info.split(".pdb")
- except Exception as er:
- # print(er)
- pass
- if "_ad4_scoring_" in line:
- ad4_scoring_[key] = value.strip()
- elif "_default_" in key:
- default_scoring_[key] = value.strip()
- elif "_dkoes_fast_" in key:
- dkoes_fast_scoring_[key] = value.strip()
- elif "_dkoes_scoring_old_" in key:
- dkoes_scoring_old_scoring_[key] = value.strip()
- elif "_vina_" in key:
- vina_scoring_[key] = value.strip()
- elif "_vinardo_" in key:
- vinardo_scoring_[key] = value.strip()
- else:
- custom_scoring_[key] = value.strip()
- # print(count)
- try:
- ad4_df = pd.DataFrame.from_dict(
- ad4_scoring_, orient="index", columns=(["ad4_scoring"])
- )
- default_df = pd.DataFrame.from_dict(
- default_scoring_, orient="index", columns=(["default_scoring"])
- )
- dkoes_fast_df = pd.DataFrame.from_dict(
- dkoes_fast_scoring_, orient="index", columns=(["dkoes_fast_scoring"])
- )
- dkoes_scoring_old_df = pd.DataFrame.from_dict(
- dkoes_fast_scoring_, orient="index", columns=(["dkoes_fast_scoring"])
- )
- vina_df = pd.DataFrame.from_dict(
- vina_scoring_, orient="index", columns=(["vina_scoring"])
- )
- vinardo_df = pd.DataFrame.from_dict(
- vinardo_scoring_, orient="index", columns=(["vinardo_scoring"])
- )
- custom_df = pd.DataFrame.from_dict(
- custom_scoring_, orient="index", columns=(["custom_scoring"])
- )
- return_df = pd.concat(
- [
- ad4_df,
- default_df,
- dkoes_fast_df,
- dkoes_scoring_old_df,
- vina_df,
- vinardo_df,
- custom_df,
- ]
- )
- except Exception as er:
- print(f"{er}\n error in data frame")
- if print_it:
- try:
- ad4_best = min(ad4_scoring_.items(), key=lambda x: x[1])
- print("===========BEST RMSD===================")
- best[ad4_best[0]] = ad4_best[1]
- print(f"{ad4_best[0]} : {ad4_best[1]}")
- default_best = min(default_scoring_.items(), key=lambda x: x[1])
- best[default_best[0]] = default_best[1]
- print(f"{default_best[0]} : {default_best[1]}")
- dkoes_fast_best = min(dkoes_fast_scoring_.items(), key=lambda x: x[1])
- best[dkoes_fast_best[0]] = dkoes_fast_best[1]
- print(f"{dkoes_fast_best[0]} : {dkoes_fast_best[1]}")
- dkoes_scoring_old_best = min(
- dkoes_scoring_old_scoring_.items(), key=lambda x: x[1]
- )
- best[dkoes_scoring_old_best[0]] = dkoes_scoring_old_best[1]
- print(f"{dkoes_scoring_old_best[0]} :{dkoes_scoring_old_best[1]}")
- vina_best = min(vina_scoring_.items(), key=lambda x: x[1])
- best[vina_best[0]] = vina_best[1]
- print(f"{vina_best[0]} : {vina_best[1]}")
- vinardo_best = min(vinardo_scoring_.items(), key=lambda x: x[1])
- best[vinardo_best[0]] = vinardo_best[1]
- print(f"{vinardo_best[0]} : {vinardo_best[-1]}")
- custom_best = min(custom_scoring_.items(), key=lambda x: x[1])
- best[custom_best[0]] = custom_best[1]
- print(f"{custom_best[0]} : {custom_best[1]}")
- except Exception as er:
- print(er)
- if only_best:
- return best
- if return_df is True:
- return return_df
- else:
- return (
- default_scoring_,
- ad4_scoring_,
- dkoes_fast_scoring_,
- dkoes_scoring_old_scoring_,
- vina_scoring_,
- vinardo_scoring_,
- custom_scoring_,
- )
-
-
-def conformer_split(filenames, target):
- for i in filenames:
- file_dir, file_id, file_format = give_id(i)
- if not os.path.exists(f"{file_dir}/poses"):
- os.makedirs(f"{file_dir}/poses")
- command = (
- f"/share/openbabel-3.1.1/bin/obabel {i} -o{target} -O"
- f" ./poses/{file_id}_.{target} -m"
- )
- subprocess.run(command, cwd=f"{file_dir}", shell=True)
- return "Successfully completed."
-
-
-def smina_histogram(sorted_RES, save=False):
- """Creates histogram of docking from smina output
- Args:
- sorted_RES (list): Sorted list of affinity values from smina output.
- save (bool, optional): Save plot in ./image/smina_histogram.png
- """
- name, benergy = zip(*sorted_RES.items())
- benergy = np.array((benergy), dtype=np.float32)
- mean = benergy.mean()
- best = benergy.min()
- worst = benergy.max()
- fig, axs = plt.subplots(1, sharey=False, sharex=False, tight_layout=True)
- axs.add_artist(
- AnchoredText(
- f"Total: {len(name)}\nMean: {mean:.2f}\nBest: {best:.2f}\nWorst:"
- f" {worst:.2f}",
- loc=1,
- )
- )
- axs.hist(benergy)
- axs.yaxis.set_label_text("Number of Datasets")
- axs.xaxis.set_label_text("Binding Energy Range")
- axs.set_title("Distribution of binding energy")
- if save:
- if not os.path.exists("./images"):
- os.makedirs("./images")
- plt.savefig("./images/smina_histogram.png", dpi=600)
- plt.show()
-
-
-def smina_monitor(smina_output_monitor, plot=False, save=False):
- """Display smina process while enter smina stdout
- Args:
- smina_output_monitor (stdout): stdout from qstat/qsub
- Returns:
- dict: Displays result in jupyter
- """
- RES = {}
- count = 0
- for i in smina_output_monitor:
- algo_dir, algo_name, algo_format = give_id(i)
- if algo_format.lower() == "pdb":
- with open(i, "r") as read_smina:
- for line in read_smina:
- if line[:5] == "MODEL":
- number = line[5:].strip()
- elif "REMARK" in line:
- energy = float(line.rsplit(" ")[-1].strip())
- RES[f"{algo_name}_{number}"] = f"{energy}"
- count += 1
- elif algo_format.lower() == "sdf":
- pattern_id = r"^[a-zA-Z]\S"
- pattern_affinity = r"^>\s<[a-zA-Z]+>"
- with open(i, "r") as read_smina:
- write_affinity = False
- for line in read_smina:
- if write_affinity:
- energy = float(line.strip())
- # print(energy)
- RES[f"{algo_name}_{number}"] = f"{energy}"
- count += 1
- write_affinity = False
- if re.match(pattern_id, line):
- number = line.strip()
- elif re.match(pattern_affinity, line):
- write_affinity = True
- else:
- return "File format not supported yet. ['pdb', 'sdf']"
-
- print(f"Total number of poses generated: {count}")
- sorted_RES = dict(sorted(RES.items(), key=lambda x: x[1]))
- print("_____________Detail list________________\n")
- for key, value in sorted_RES.items():
- print(key, ":", value)
- if plot:
- smina_histogram(sorted_RES, save=save)
-
- return sorted_RES
-
-
-def auc_plot(model, X_test, y_test, save=False):
- """Plot AUC plot from model, and X_text(data point) y_test(label).
- Args:
- model (sklearn obj)_: Model object from sklearn trained model.
- X_test (pd.DataFrame): Data point for test.
- y_test (pd.DataFrame): Label for the data point.
- save (bool, optional): Save AUC plot.
- """
- # assert isinstance(model, LinearRegression), f"{model} is not a valid model"
- # assert isinstance(X_test, pd.DataFrame), f"{X_test} is not a DataFrame"
- # assert isinstance(y_test, pd.DataFrame), f"{y_test} is not a DataFrame"
- model_regression_probability = model.predict_proba(X_test)
- model_regression_probability = model_regression_probability[:, 1]
- random_probability = [0 for _ in range(len(y_test))]
- random_auc = roc_auc_score(y_test, random_probability)
- model_auc = roc_auc_score(y_test, model_regression_probability)
- print(f"Random: ROC AUC={random_auc}")
- print(f"Model: ROC AUC={model_auc}")
- random_false_positive_rate, random_true_positive_rate, _ = roc_curve(
- y_test, random_probability
- )
- model_false_positive_rate, model_true_positive_rate, _ = roc_curve(
- y_test, model_regression_probability
- )
- plt.plot(
- random_false_positive_rate,
- random_true_positive_rate,
- linestyle="--",
- label="Random",
- )
- plt.plot(
- model_false_positive_rate,
- model_true_positive_rate,
- marker=".",
- label=f"Model (AUC:{model_auc:.2f})",
- )
- plt.xlabel("False Positive Rate")
- plt.ylabel("True Positive Rate")
- plt.xlim(xmin=0.0)
- plt.ylim(ymin=0.0)
- plt.title("ROC")
- plt.legend()
- if save:
- prefix = "image"
- while os.path.exists(f"./Generated/images/{prefix}.png"):
- suffix += 1
- name = f"{prefix}{suffix}.png"
- plt.savefig("./Generated/images/{name}", dpi=600)
- plt.show()
-
-
-def smina_model_score(
- file_path,
- num_features=3,
- intercept=False,
- tsize=0.3,
- plot_auc=False,
- plot_save=False,
-):
- """Generates regression model using sklearn. Will Print out coefficients
- Args:
- file_path (str): csv/excel file path
- num_features (int, optional): Number of features to use. Default: 3
- intercept (bool, optional): Mean Error
- tsize (float, optional): Percentage of datato use for test. Default 0.3(30%)
- plot_auc (bool, optional): Plot ROC AUC curve
- plot_save (bool, optional): Save ROC AUC plot
- """
- try:
- if isinstance(file_path, str):
- file_dir, file_name, file_format = give_id(file_path)
- # for now | smina result file
- supported_file_format = ["csv", "excel"]
- assert file_format in supported_file_format, (
- "Note: FileType Error: Not supported file format. Use"
- f" {supported_file_format}"
- )
- if file_format == "excel":
- df = pd.read_excel(file_path)
- else:
- df = pd.read_csv(file_path)
-
- except Exception as e:
- print(e)
-
- if isinstance(file_path, pd.DataFrame):
- df = file_path
- try:
- X, y = df.iloc[1:, 1:-2], df.iloc[1:, -1]
- X = pd.DataFrame(X)
- header = X.iloc[:0, :]
- X, y = make_classification(n_features=num_features)
- X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=tsize
- ) # TODO //include K-Fold Test
- model = LogisticRegression(fit_intercept=intercept)
- model.fit(X_train, y_train) # TODOD accept use model input
- weight = model.coef_
- weight = [item for i in weight for item in i]
- console.print("[bold cyan]Model weights are :~[/bold cyan]\n")
- for head, coeff in zip(header, weight):
- print(coeff, head, end="\n")
- model.predict(X_test)
- model.predict_proba(X_test)
- score = model.score(X_test, y_test)
- print(f"\nModel score: {score}")
- except Exception as er:
- print(er)
- if plot_auc:
- auc_plot(model, X_test, y_test, save=plot_save)
- # train_plot(model, X, y, X_test, y_test)
- return model
-
-
-def train_plot(model, X, y, X_test, y_test):
- plt.figure(1, figsize=(4, 3))
- plt.clf()
- print(len(X))
- print(len(y))
- plt.scatter(X.ravel(), y, color="black", zorder=20)
- # plt.scatter(y_test, X_test.iloc[:,0].values)
- loss = expit(X_test * model.coef_ + model.intercept_).ravel()
- plt.plot(X_test, loss, color="red", linewidth=3)
-
- ols = LinearRegression()
- ols.fit(X, y)
- plt.plot(X_test, ols.coef_ * X_test + ols.intercept_, linewidth=1)
- plt.axhline(0.5, color=".5")
-
- plt.ylabel("y")
- plt.xlabel("X")
- plt.xticks(range(-5, 10))
- plt.yticks([0, 0.5, 1])
- plt.ylim(-0.25, 1.25)
- plt.xlim(-4, 10)
- plt.legend(
- ("Logistic Regression Model", "Linear Regression Model"),
- loc="lower right",
- fontsize="small",
- )
- plt.tight_layout()
- plt.show()
-
-
-def input_custom_scoring():
- """GUI window to enter custom scoring function"""
- # initialize some msg and output env
- output_csf = widgets.Output()
- msg_empty_name = "Enter any name for the file."
- warn_empty_name = widgets.HTML(value=f"{msg_empty_name}")
- # save the input custom scoring value
-
- def save_scoring(data):
- output_csf.clear_output()
- msg_confirm_warn = "Please confirm if the values are right."
- information = ipywidgets.widgets.HTML(
- value=f"{msg_confirm_warn}"
- )
- global scoring_data
- global temp_name
- temp_name = file_name.value
- if not temp_name:
- with output_csf:
- display(warn_empty_name)
- else:
- splitted = custom_scoring_area.value.split("\n")
- scoring_data = []
- for split in splitted:
- split = split.strip()
- scoring_data.append(split)
- with output_csf:
- print(f"Entered file name : {temp_name}")
- for line in scoring_data:
- if select == "custom_scoring":
- if len(line.rstrip()) != 0:
- value, item = line.split()
- print(f"{value}\t{item}")
- else:
- print(line)
- display(information)
-
- # writes the save scoring data to a file
- def confirm_scoring(data):
- output_csf.clear_output()
- if not temp_name:
- with output_csf:
- display(warn_empty_name)
- else:
- msg_success = "Confirmed and Saved!"
- information = ipywidgets.widgets.HTML(
- value=f"{msg_success}"
- )
- file_name_save = temp_name
- file_content = scoring_data
- if select == "custom_scoring":
- if not os.path.exists("./Generated/custom_function"):
- os.makedirs("./Generated/custom_function")
- with open(
- f"./Generated/custom_function/{file_name_save}_csf.txt", "w+"
- ) as write_scoring_function:
- for line in file_content:
- if len(line.rstrip()) != 0:
- value, item = line.split()
- print(f"{value}\t{item}", file=write_scoring_function)
-
- info = (
- f"file save at ./Generated/custom_function/{file_name_save}_csf.txt"
- )
- else:
- if not os.path.exists("./Generated/smina_input"):
- os.makedirs("./Generated/smina_input")
- with open(
- f"./Generated/smina_input/{file_name_save}_mconfig.txt", "w+"
- ) as write_config:
- for line in file_content:
- print(f"{line}", file=write_config)
-
- info = (
- f"file save at ./Generated/smina_input/{file_name_save}_mconfig.txt"
- )
- with output_csf:
- print(info)
- display(information)
-
- # def all_clear(data):
- # with output_csf:
- # output_csf.clear_output()
- # #text area to observe all input text
- config_placeholder = (
- "Paste here\n \n Sample config.txt Docking parameters file\n "
- " -------------------------------------\n #Inputs\n receptor ="
- " ./3L6B_prot.pdbqt\n ligand = ./3L6B_lig.pdbqt\n #Outputs\n "
- " out = 3L6B-nowat-Vina.pdbqt\n log = 3L6B-nowat-Vina.log\n "
- " #Box center\n center_x = 4.500\n center_y = -2.944\n "
- " center_z = -5.250\n #Box size\n size_x = 50\n size_y ="
- " 50\n size_z = 50\n #Parameters\n exhaustiveness = 8\n "
- " seed = 123456\n"
- )
- csf_placeholder = (
- " Paste here\n \n Sample format of custom scoring\n "
- " -------------------------------------\n -0.035579 "
- " gauss(o=0,_w=0.5,_c=8)\n -0.005156 gauss(o=3,_w=2,_c=8\n "
- " 0.840245 repulsion(o=0,_c=8)\n -0.035069 "
- " hydrophobic(g=0.5,_b=1.5,_c=8)\n -0.587439 "
- " non_dir_h_bond(g=-0.7,_b=0,_c=8)\n 1.923 num_tors_div\n "
- " -100.0 atom_type_gaussian(t1=Chlorine,t2=Sulfur,o=0,_w=3,_c=8)\n"
- )
-
- def evaluate(selected):
- output_csf.clear_output()
- area_layout = Layout(width="100%", height="400px", flex="row")
- global select
- select = selected
- if selected == "custom_scoring":
- global custom_scoring_area
- custom_scoring_area = widgets.Textarea(
- placeholder=csf_placeholder,
- description="Enter:",
- disabled=False,
- justify_content="space_between",
- continuous_update=True,
- layout=area_layout,
- )
- else:
- custom_scoring_area = widgets.Textarea(
- placeholder=config_placeholder,
- description="Enter:",
- disabled=False,
- justify_content="space_between",
- continuous_update=True,
- layout=area_layout,
- )
- display(custom_scoring_area)
- output_csf.clear_output()
-
- select_option = widgets.RadioButtons(
- options=["custom_scoring", "manual_config"],
- value="custom_scoring",
- description="What:",
- disabled=False,
- )
- ui = widgets.HBox([select_option])
- options = widgets.interactive_output(evaluate, {"selected": select_option})
- instruction = ipywidgets.widgets.HTML(
- "Copy and Paste the scoring"
- " function below and enter"
- )
- display(instruction)
- # buttons widgets
- file_name = widgets.Text(description="Filename:", placeholder="file name ")
- save_button = widgets.Button(description="Save")
- save_button.style.button_color = "lightgreen"
- confirm_button = widgets.Button(description="Confirm")
- confirm_button.style.button_color = "salmon"
- # clear_button = widgets.Button(description="Clear")
- # clear_button.style.button_color = "lightgreen"
- display((widgets.VBox([file_name, ui, options])), output_csf)
- display((widgets.HBox([save_button, confirm_button])))
- save_button.on_click(save_scoring)
- confirm_button.on_click(confirm_scoring)
- # clear_button.on_click(all_clear)
-
-
-def xg_model(X, y):
- from sklearn.datasets import make_classification
-
- num_classes = 3
- X, y = make_classification(n_samples=1000, n_informative=5, n_classes=num_classes)
- dtrain = xgb.DMatrix(data=X, label=y)
- num_parallel_tree = 4
- num_boost_round = 16
- # total number of built trees is num_parallel_tree * num_classes * num_boost_round
-
- # We build a boosted random forest for classification here.
- booster = xgb.train(
- {"num_parallel_tree": 4, "subsample": 0.5, "num_class": 3},
- num_boost_round=num_boost_round,
- dtrain=dtrain,
- )
-
- # This is the sliced model, containing [3, 7) forests
- # step is also supported with some limitations like negative step is invalid.
- sliced: xgb.Booster = booster[3:7]
-
- return [_ for _ in booster]
-
-
-def xgb_boost(file_train, file_test):
- import xgboost as xgb
-
- CURRENT_DIR = os.path.dirname(__file__)
- dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, "file_train"))
- dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, "file_test"))
- param = {
- "objective": "binary:logistic",
- "booster": "gblinear",
- "alpha": 0.0001,
- "lambda": 1,
- }
- watchlist = [(dtest, "eval"), (dtrain, "train")]
- num_round = 4
- bst = xgb.train(param, dtrain, num_round, watchlist)
- preds = bst.predict(dtest)
- labels = dtest.get_label()
- print(
- "error=%f"
- % (
- sum(int(preds[i] > 0.5) != labels[i] for i in range(len(preds)))
- / float(len(preds))
- )
- )
-
-
-def run_smina(dir_name_, config_file_name, **kwargs):
- """ Creates folder within the cwd with the name id and sub\
- folder run where it will write sh.Also creates a dump folder\
- where error and output log will be dumped.
-
- """
- mode = kwargs.get("mode", False)
- log = kwargs.get("log", "log.txt")
- output = kwargs.get("output", "output.sdf")
- local = kwargs.get("local", False)
- cpu_num = kwargs.get("cpu", 2)
- job_name = kwargs.get("job_name", None)
- scoring = kwargs.get("scoring")
- custom = kwargs.get("custom", False)
- enter_output = kwargs.get("enter_output", True)
- enter_log = kwargs.get("enter_log", True)
- cluster = kwargs.get("cluster", None)
- cluster_grp = ["all.q", "gp1", "gp2"]
- if (cluster is not None) and (cluster not in cluster_grp):
- return f"Invalid cluster name. Available cluster names: {cluster_grp}"
- name_id = config_file_name[:4].lower() # FIX
- dir_name_cwd = os.getcwd()
- dir_name = os.path.dirname(dir_name_)
- PATH = kwargs.get("PATH", False)
- dir_name = f"{dir_name}" if PATH else f"{dir_name_cwd}"
-
- if not os.path.exists(f"{dir_name}/Generated/jobs/{name_id}/run"):
- os.makedirs(f"{dir_name}/Generated/jobs/{name_id}/run")
- if local is False:
- SMINA_PATH = "/share/vina/smina"
- with open(
- f"{dir_name}/Generated/jobs/{name_id}/run/{name_id}_SMina.sh", "w"
- ) as out:
- if job_name is None:
- job_name = name_id
- if job_name[0].isdigit():
- job_name = "S" + job_name
- print(f"#$ -N {job_name}", file=out)
- print("#$ -V", file=out)
- print("#$ -S /bin/bash", file=out)
- if cluster is not None:
- print(f"#$ -q {cluster}", file=out)
- print(f"#$ -pe {cpu_num}cpu {cpu_num}", file=out)
- if not os.path.exists(f"{dir_name}/Generated/jobs/{name_id}/dump/"):
- os.makedirs(f"{dir_name}/Generated/jobs/{name_id}/dump/")
- print(f"#$ -o {dir_name}/Generated/jobs/{name_id}/dump/", file=out)
- print(f"#$ -e {dir_name}/Generated/jobs/{name_id}/dump/", file=out)
- print("#$ -cwd", file=out)
-
- # Conditional to write log and output
- enter_log = f"--log {log}" if enter_log else ""
- enter_output = f"--out {output}" if enter_output else ""
- if (mode is True) and (custom is False):
- print(
- f"{SMINA_PATH} --config"
- f" {dir_name}/Generated/smina_input/{config_file_name}"
- f" --scoring {scoring} --score_only {enter_log} {enter_output}",
- file=out,
- )
-
- elif (mode is True) and (custom is True):
- print(
- f"{SMINA_PATH} --config"
- f" {dir_name}/Generated/smina_input/{config_file_name}"
- f" --custom_scoring {scoring} --score_only {enter_output}"
- f" {enter_log}",
- file=out,
- )
-
- elif (mode is False) and (custom is False):
- print(
- f"{SMINA_PATH} --config"
- f" {dir_name}/Generated/smina_input/{config_file_name} --scoring"
- f" {scoring} {enter_log} {enter_output} ",
- file=out,
- )
-
- elif (mode is False) and (custom is True):
- print(
- f"{SMINA_PATH} --config"
- f" {dir_name}/Generated/smina_input/{config_file_name}"
- f" --custom_scoring {scoring} {enter_log} {enter_output} ",
- file=out,
- )
-
- command = f"qsub {dir_name}/Generated/jobs/{name_id}/run/{name_id}_SMina.sh"
- else:
- if custom is False:
- command = (
- f"smina --config {dir_name}/Generated/smina_input/{config_file_name}"
- f" --scoring {scoring} {enter_log} {enter_output}"
- )
- else:
- command = (
- f"smina --config {dir_name}/Generated/smina_input/{config_file_name}"
- f" --custom_scoring {scoring} {enter_log} {enter_output}"
- )
-
- subprocess.run(command, cwd=f"{dir_name}/Generated/jobs/{name_id}", shell=True)
-
- return "Succesfully completed."
-
-
-# RECORDS OF ALL SCORING FUNCTION
-# SF = [ "ad4_scoring",
-# "default",
-# "dkoes_fast",
-# "dkoes_scoring",
-# "dkoes_scoring_old",
-# "vina",
-# "vinardo",
-# ]
-
-# CSF = ["custom"] # for custom scoring and pass custom_scoring_file=PATH to the function
-# Otherwise all the other SF will be run but will be calculate with csf
-# for numerous time
-
-
-def smina_run(protein_list, ligand_list, **kwargs):
- """Prepares config file for smina when enter proterin and ligand
- Above sh_run function must be initialezed before in notebook"""
-
- SF = kwargs.get("SF")
- if not isinstance(SF, list):
- return "Supplied SF is not a list"
- cluster = kwargs.get("cluster", None)
- cluster_grp = ["all.q", "gp1", "gp2"]
- if (cluster is not None) and (cluster not in cluster_grp):
- return f"Invalid cluster name. Available cluster names: {cluster_grp}"
- autobox = kwargs.get("autobox", None)
- if isinstance(autobox, list):
- autobox = autobox[0]
- manual_config = kwargs.get("manual_config", None)
- if isinstance(manual_config, list):
- manual_config = manual_config[0]
- run = kwargs.get("run", False)
- mode = kwargs.get("mode", False)
- local = kwargs.get("local", False)
- job_name = kwargs.get("job_name", None)
- NUM_MODES = kwargs.get("num_modes", 10)
- EXHAUSTIVE = kwargs.get("exhaustive", 50)
- ENERGY_RANGE = kwargs.get("energy_range", 10)
- SEED = kwargs.get("seed", None)
- AUTOBOX_PAD = kwargs.get("pad", 4)
- CPU_NUM = kwargs.get("cpu", 8)
- nomatch = kwargs.get("match", False)
- OUT_FORMAT = kwargs.get("out_format", "sdf")
- custom = kwargs.get("custom", False)
-
- # if CUSTOM_SCORE is not None:
- # CSF_FLAG = True
- # else:
- # CSF_FLAG = False
-
- def write_config(
- receptor,
- ligand,
- config_file_name,
- output_file_name,
- log_file_name,
- # scoring=None, # Moved to CLI
- ):
- """Writes config into new files, if already \
- exist append to it."""
-
- # dir_name = os.path.dirname(receptor)
- dir_name = os.getcwd()
-
- if not os.path.exists(f"{dir_name}/Generated/smina_input"):
- os.makedirs(f"{dir_name}/Generated/smina_input/")
- with open(
- f"{dir_name}/Generated/smina_input/{config_file_name}", "w+"
- ) as config_file:
-
- # required config arguments
- # ------------------------
- print(f"receptor = {receptor} ", file=config_file)
- print(f"ligand = {ligand}", file=config_file)
- if autobox is not None:
- print(f"autobox_ligand = {autobox}", file=config_file)
- print(f"autobox_add = {AUTOBOX_PAD}", file=config_file)
-
- # Optionals con # MOVED TO CLI
- # ------------------------------
- print(f"out = {output_file_name}", file=config_file)
- print(f"log = {log_file_name}", file=config_file)
- # print(f"scoring = {scoring}", file=config_file) ## change to run in CLI
-
- # Misc(optional) configs
- # -----------------------------
- # if CUSTOM_SCORE is not None: ## MOVED TO CLI
- # print(f"custom_scoring = {CUSTOM_SCORE}", file=config_file)
- # if SMINA_MODE is not None:
- # print(f"{SMINA_MODE}", file=config_file)
-
- print(f"cpu = {CPU_NUM}", file=config_file)
- if SEED is not None:
- print(f"\n\nseed = {SEED}", file=config_file)
- print(f"exhaustiveness = {EXHAUSTIVE}", file=config_file)
- # if CUSTOM_SCORE is None: ## change to run in CLI
- print(f"num_modes = {NUM_MODES}", file=config_file)
- print(f"energy_range = {ENERGY_RANGE }", file=config_file)
-
- for scoring in SF:
- for protein in protein_list:
- for ligand in ligand_list:
- protein_dir, protein_id, prot_format = give_id(protein)
- ligand_dir, ligand_id, lig_format = give_id(ligand)
- if custom:
- _dir, _name, _format = give_id(scoring)
- scoring = _name
- if protein_id[:4].lower() == ligand_id[:4].lower() or (nomatch == True):
- output_file_name = ligand_id + f"_output_{scoring}.{OUT_FORMAT}"
- log_file_name = ligand_id + f"_log_{scoring}.txt"
- if manual_config is None:
- config_file_name = ligand_id.lower() + f"_config_{scoring}.txt"
- enter_output = True
- enter_log = True
- write_config(
- protein_list[0],
- ligand,
- config_file_name,
- output_file_name,
- log_file_name,
- )
- else:
- (
- manual_config_dir,
- manual_config_name,
- manual_config_format,
- ) = give_id(manual_config)
- config_file_name = (
- f"{manual_config_name}.{manual_config_format}"
- )
- if "output" in open(manual_config).read():
- enter_output = False
- else:
- enter_output = True
- if "log" in open(manual_config).read():
- enter_log = False
- else:
- enter_log = True
- else:
- print("Protein and ligand prefix [4 letter] didnt match")
- if custom:
- scoring = f"{_dir}/{_name}.{_format}"
- if run is True and mode is True:
- run_smina(
- protein_dir,
- config_file_name,
- scoring=scoring,
- mode=mode,
- local=local,
- # cpu=CPU_NUM,
- # job_name=job_name,
- custom=custom,
- log=log_file_name,
- output=output_file_name,
- enter_output=enter_output,
- enter_log=enter_log,
- )
- elif run is True and mode is False:
- run_smina(
- protein_dir,
- config_file_name,
- scoring=scoring,
- local=local,
- cpu=CPU_NUM,
- job_name=job_name,
- custom=custom,
- output=output_file_name,
- log=log_file_name,
- enter_output=enter_output,
- enter_log=enter_log,
- PATH=True,
- cluster=cluster,
- )
- else:
- print(
- "Run command was not passed so only created the"
- " config and sh file but not executed"
- )
- # print(base_name)
- return "Succesfully completed."
-
-
-def view_affinity(sorted_RES, keyword):
- return pd.DataFrame(
- [
- (key, value)
- for key, value in sorted_RES.items()
- if f"{keyword}" in key.lower()
- ],
- columns=["Pose", "Affinity"],
- )
-
-
-def smina_output_df(sorted_RES):
- return pd.DataFrame(
- [(key, value) for key, value in sorted_RES.items()],
- columns=["Pose", "Affinity"],
- )
-
-
-def rmsd_matrix(
- ref, length=2, key="MATRIX", verbose=False, plot=True, save=False, annot=False
-):
- _rmsd_list = itertools.combinations(ref, length)
- cols = ["Reference", "Pose", "RMSD"]
- df = pd.DataFrame(columns=cols)
- for count, i in enumerate(_rmsd_list):
- ref, conf = list([i[0]]), list([i[1]])
- x = rmsd_calculator(ref, conf, nomatch=True, key=key, verbose=False)
- if verbose:
- print(f"{count}. {x}", sep=" ", flush=True)
- x1, x2, x3 = x.split("\t")
- df.loc[count] = [x1, x2, x3]
-
- mdf = df.pivot(index="Reference", columns="Pose", values="RMSD")
- mdf.fillna(0)
- mdf = mdf.astype(float)
- if plot:
- # plt.figure(figsize=[15, 8])
- hmap = sns.heatmap(mdf, annot=annot)
- hmap.set_title("RMSD MATRIX")
- if save:
- if not os.path.exists("./Generated/images/"):
- os.makedirs("./Generated/images/")
- fig = hmap.figure
- image = f"./Generated/images/{key}.jpg"
- fig.savefig(image, dpi=600)
- print(f"Saved ! {image}")
- return mdf
diff --git a/src/csfdock/KinaseModules.py b/src/csfdock/KinaseModules.py
deleted file mode 100644
index a8661e6..0000000
--- a/src/csfdock/KinaseModules.py
+++ /dev/null
@@ -1,232 +0,0 @@
-import numpy as np
-from urllib.request import urlopen
-from PIL import Image
-import pandas as pd
-from typing import Union, Optional
-import io
-import os
-import subprocess
-
-
-# PUBCHEM RELATED
-
-
-class Attributes:
- def __init__(self, CID, format="csv"):
-
- self.CID = CID
- self.format = format
- self.CID_URL = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid"
-
- @property
- def image(self):
- IMAGE_API = f"{self.CID_URL}/{self.CID}/record/png"
- self.image = Image.open(IMAGE_API)
- return self.image
-
- @property
- def description(self):
- DESC_API = f"{self.CID_URL}/{self.CID}/description/XML"
- return pd.read_xml(urlopen(DESC_API).read().decode("utf-8"))
-
- @property
- def formula(self):
- FORMULA_API = (
- f"{self.CID_URL}/{self.CID}/property/MolecularFormula/{self.format}"
- )
- return pd.read_csv(io.StringIO(urlopen(FORMULA_API).read().decode("utf-8")))
-
- @property
- def weight(self):
- MOL_WEIGHT_API = (
- f"{self.CID_URL}/{self.CID}/property/MolecularWeight/{self.format}"
- )
- return pd.read_csv(io.StringIO(urlopen(MOL_WEIGHT_API).read().decode("utf-8")))
-
- @property
- def xlog(self):
- XLOG_API = f"{self.CID_URL}/{self.CID}/property/XLogP/{self.format}"
- return pd.read_csv(io.StringIO(urlopen(XLOG_API).read().decode("utf-8")))
-
- @property
- def smile(self):
- ISO_SMILES_API = (
- f"{self.CID_URL}/{self.CID}/property/IsomericSmiles/{self.format}"
- )
- return pd.read_csv(io.StringIO(urlopen(ISO_SMILES_API).read().decode("utf-8")))
-
- def structure(self, save=False, *args, **kw):
- SDF_API = f"{self.CID_URL}/{self.CID}/SDF"
- self.structure = urlopen(SDF_API).read().decode("utf-8")
-
- if save:
- try:
- _dir = kw.get("dir", None)
- _filename = kw.get("filename", None)
- _smile_save = kw.get("smile_save", False)
- _dir = f"{_dir}" if _dir else "./data/structures"
- _filename = (
- f"{_filename}"
- if _filename
- else "{}".format(self.structure.partition("\n")[0].strip())
- )
-
- if not os.path.isdir(_dir):
- os.makedirs(_dir)
- FileExists = f"{_dir}/{_filename}.sdf"
- if _smile_save:
- with open("./data/structures/smiles.smi", "a+") as f:
- print(self.smile, file=f)
- with open(f"{FileExists}", "w") as w:
- w.write(self.structure)
- w.close()
- return f"save success at {_dir} as {_filename}.sdf"
- except Exception as error:
- print(error)
- return self.structure
-
- # //TODO Method Chaining
-
- # def protein(self):
- # return f"https://pubchem.ncbi.nlm.nih.gov/protein/{self.CID}"
- # def gene(self):
- # return f"https://pubchem.ncbi.nlm.nih.gov/gene/{self.CID}"
-
-
-def download_CID_structures(UNIQUE_ID):
- error_list = []
- print(f"\r=> Calling API and Downloading Structures...")
- for label, content in enumerate(UNIQUE_ID):
- try:
- t = Attributes(content)
- t.structure(dir="./data/structures", smile_save=True, save=True)
- if not os.path.exists(f"./data/structures/{content}.sdf"):
- error_list.append(content)
- except Exception as error:
- print(error)
- error_list.append(content)
- print(
- f"\r=> => Saved Successfully Verified: {label- len(error_list)} OK š Error:"
- f" {len(error_list)}",
- end="",
- flush=True,
- )
- return error_list
-
-
-def concatenate_aid_details(AID: [pd.DataFrame, list], download: bool = False):
-
- print("Time depends on number and size of files...Please be patient... ")
- total = len(AID)
- aid_list = []
- error_aid_list = []
- DATA = AID["aid"] if isinstance(AID, pd.DataFrame) else AID
- for count, _aid in enumerate(DATA):
- try:
- _aid_exp_detail = extract_aid_detail(f"{_aid}", download=download)
- aid_list.append(_aid_exp_detail)
- except Exception as error:
- # print(f"\rš“{error}", sep=' ', end='', flush=True)
- error_aid_list.append(_aid)
- continue
- print(
- f"\rSuccess: {len(aid_list)}/{total} OK š Error:"
- f" {len(error_aid_list)}/{total} š“ "
- f" {'Completed' if {count} != {total} else ''}",
- sep=" ",
- end="",
- flush=True,
- )
-
- print(f"Parsed : {len(aid_list) + len(error_aid_list)} š¦ ")
-
- detail_data_type = {
- "AID": int,
- "Panel Member ID": int,
- "SID": int,
- "CID": int,
- "Bioactivity Outcome": str,
- "Target GI": int,
- "Target GeneID": int,
- "Activity Value [uM]": float,
- "Activity Name": str,
- "Assay Name": str,
- "Bioassay Type": str,
- "PubMed ID": str,
- "RNAi": str,
- }
-
- main_df = pd.DataFrame()
- empty_error = []
- for file in aid_list:
- try:
- main_df = main_df.append(
- pd.read_csv(file, dtype=detail_data_type, engine="python")
- )
- # print(main_df)
- except:
- empty_error.append(file)
- continue
-
- # main_df = pd.concat(
- # [pd.read_csv(file, dtype=detail_data_type, engine="python", quoting=3, error_bad_lines=False) for file in aid_list if pd.read_csv(file).empty == False], ignore_index=True, sort=False)
-
- return main_df, error_aid_list
-
-
-def extract_aid_detail(AID: int, download: bool = False, view: bool = False) -> str:
-
- _API = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/{AID}/concise/CSV"
- if download:
- BASE_DIR = os.getcwd()
- path = "src/Data/Data_Source/PubChem"
- download_path = os.path.join(BASE_DIR, path)
- file = f"{AID}.csv"
- if not os.path.exists(download_path):
- os.makedirs(download_path)
- if not os.path.exists(f"{download_path}/{file}"):
- command = f"wget -q {_API} -O {download_path}/{file}"
- subprocess.run(command, shell=True)
- downloaded_aid = f"{download_path}/{file}"
- print(f"\rDownloaded AID:{AID}.csv ", sep=" ", end="", flush=True)
- if not view:
- return downloaded_aid
- f = urlopen(_API)
- return (f.read().decode("utf-8")), downloaded_aid
- # except Exception as error:
- # print(f"\t š“{AID}_Error: {error}", sep=' ', end='', flush=True)
- # raise ValueError('A very specific bad thing happened with request.')
- # return -1
-
-
-def check_non_downloaded(AID: pd.DataFrame) -> list:
- """AID is the main list of ids to be downloaded"""
-
- total = len(AID)
- downloaded_list = []
- error_aid_list = []
- BASE_DIR = os.getcwd()
- for count, _aid in enumerate(AID["aid"]):
- if os.path.exists(f"{BASE_DIR}/Data/PubChem/{_aid}.csv"):
- downloaded_list.append(_aid)
- else:
- error_aid_list.append(_aid)
- print(
- f"\rSuccessfully Downloaded : {len(downloaded_list)}/{total} OK š Error:"
- f" {len(error_aid_list)}/{total} š“ ",
- sep=" ",
- end="",
- flush=True,
- )
- return error_aid_list
-
-
-def main(input_file: str) -> pd.DataFrame:
- print("started.....")
- df = pd.read_csv(input_file, low_memory=False)
- download_list, error_list = concatenate_aid_details(df, download=True)
-
-
-if __name__ == "__main":
- print("started...")
- main(sys.argv[1])
diff --git a/src/csfdock/MolView.py b/src/csfdock/MolView.py
deleted file mode 100644
index a728237..0000000
--- a/src/csfdock/MolView.py
+++ /dev/null
@@ -1,514 +0,0 @@
-import ipywidgets
-import py3Dmol
-from IPython.display import HTML, display
-from ipywidgets import (
- FileUpload,
- IntSlider,
- fixed,
- interact,
- interactive,
- interactive_output,
- widgets,
- Layout,
-)
-from rich.console import Console
-
-console = Console()
-from rdkit import Chem
-from rdkit.Chem import AllChem
-from csfdock.utils import *
-
-
-class MolView:
- """3D molecular view
- Args:
- mol (AllChem Obj): rdkit return object
- size (tuple, optional): window size to display 3d view
- style (str, optional): "strick | line | ribbon"
- surface (bool, optional): surface view
- opacity (float, optional): opacity of view
- Returns:
- py3dmol: 3d visual
- """
-
- def __init__(self, *args, **kwargs):
- self.molecule = kwargs.get("molecule")
- self.size = kwargs.get("size", (800, 600))
- self.style = kwargs.get("style", "stick")
- self.surface = kwargs.get("surface", False)
- self.opacity = kwargs.get("opacity", 0.5)
- self.conformers_list = []
- self.all_select = False
- self.default_name = False
- self.conformer = None
- # header information and display section
- self.msg_header = "Molecule Visualizer"
- self.msg_header_note = "Use upload option if want to import smiles from a file."
- self.msg_upload_info = (
- "Note: Upload option will copy your data on the server @"
- " ./Generated/smiles folder. "
- )
- self.msg_note = "Note: Use 4letter prefix similar to protein for ligand name"
- self.header = widgets.HTML(
- value=(
- "
{self.msg_header}
"
- )
- )
- self.header.add_class("header_bg")
- self.upload_info = widgets.HTML(
- value=f"{self.msg_upload_info}
{self.msg_note}"
- )
- self.output = widgets.Output()
- self.style_available = ["line", "stick", "sphere", "carton"]
- assert self.style in self.style_available, "Style Not Supported yet"
- # button sections
- self.add_button = widgets.Button(description="Add")
- self.add_button.style.button_color = "lightgreen"
- self.add_button.on_click(self.add_mol)
- self.remove_button = widgets.Button(description="Remove")
- self.remove_button.style.button_color = "salmon"
- self.remove_button.on_click(self.remove_mol)
- self.save_structure = widgets.Button(description="Save")
- self.save_structure.style.button_color = "lightblue"
- self.save_structure.on_click(self.write_mol)
- self.delete_button = widgets.Button(description="Delete")
- self.delete_button.style.button_color = "brown"
- self.delete_button.on_click(self.delete_mol)
- self.caption = widgets.Label(value="File Browser")
- self.upload_button = widgets.FileUpload(
- accept="", multiple=True, continuous_update=True
- )
- self.upload_button.observe(
- self.upload, names=["value", "content", "type", "name", "size"]
- )
- self.default_checkbox = widgets.Checkbox(
- value=False, description="Default names", disabled=False, indent=False
- )
- self.default_checkbox.observe(self.default, names="value")
- self.all_checkbox = widgets.Checkbox(
- value=False, description="Select All", disabled=False, indent=False
- )
- self.all_checkbox.observe(self.all_check, names="value")
- self.prefix_in = widgets.Text(
- placeholder="Enter atleast 4 letter name",
- description="Name: ",
- disable=False,
- )
- self.server_file_selected = widgets.Text(
- description="Selected File: ", disable=False
- )
- self.server_file_selected.on_submit(self.upload)
- self.prefix_in.on_submit(self.prefix_input)
- self.smile_in = widgets.Text(
- placeholder="Enter smile", description="Smile Code: ", disable=False
- )
- self.smile_in.on_submit(self.smile_input)
- self.INPUT_FLAG = False
- # Link accordian and file upload
-
- def __str__(self):
- return f"Total Molecules = {len(self.conformer)} {self.msg_header}"
-
- # widgets function section
- def smile_input(self, smi):
- self.output.clear_output()
- self.prefix_in.layout.visibility = None
- self.default_checkbox.layout.visibility = None
- self.smile_in.layout.visibility = "hidden"
- smile_name = self.smile_in.value if not isinstance(smi, list) else smi
- max = 0 if self.conformer is None else len(smi) - 1
- self.index_slider = IntSlider(
- value=0,
- min=0,
- max=max,
- step=1,
- disable=False,
- continuous_update=True,
- orientation="horizontal",
- layout=Layout(width="100%"),
- )
- smile_obj = interactive(
- self.smi_viewer,
- smile=smile_name,
- style=self.style_available,
- index=self.index_slider,
- )
- # self.smile_in.layout.visibility = "hidden"
- return display(smile_obj)
-
- def prefix_input(self, a):
- self.output.clear_output()
- prefix = self.prefix_in.value
- if len(prefix) < 4:
- with self.output:
- warnings.warn("Should be atleast 4 letter!.")
- exit()
- with self.output:
- print(
- f"Entered name: {prefix} .\nIf structure looks"
- " OK!\nYou can Save Now.\nElse delete it using Remove"
- )
-
- def add_mol(self, x):
- # self.output.clear_output()
- if self.INPUT_FLAG:
- self.molecule = self.conformer
- # TODO: None when called through infunction call.
- try:
- if self.molecule in self.conformers_list:
- with self.output:
- print("Already exist in the list.")
- else:
- self.conformers_list.append(self.molecule)
- with self.output:
- # print(self.conformers_list)
- console.print("Successfully Added!.")
- except:
- with self.output:
- print("Not able to Add")
-
- def remove_mol(self, y):
- self.output.clear_output()
- prefix = self.prefix_in.value
- try:
- self.conformers_list.remove(self.molecule)
- with self.output:
- console.print(" Successfully Remove from the temp list to save.!")
- except:
- with self.output:
- print("Molecule was not found!.")
-
- def delete_mol(self, m):
- # self.output.clear_output()
- prefix = self.prefix_in.value
- try:
- os.remove(f"./Generated/data/{prefix}.sdf")
- with self.output:
- print(f"./Generated/data/{prefix}.sdf Successfully deleted!.")
- except:
- with self.output:
- print(f"./Generated/data/{prefix}.sdf file not found!.")
-
- def write_all_mol(self, suffix, prefix):
- try:
- if all(isinstance(i, str) for i in self.conformer):
- conformers = [self.smile2conf(x) for x in self.conformer]
- elif isinstance(self.conformer, list):
- conformers = self.conformer
- conformers = list(filter(None, conformers))
- for conf in conformers:
- print(
- Chem.MolToMolBlock(conf),
- file=open(f"./Generated/data/{prefix}{suffix}.sdf", "w+"),
- )
- suffix += 1
- with self.output:
- print(f"{len(conformers)} molecules save in ./Generated/data folder. ")
- self.write = False
- except Exception as e:
- with self.output:
- print(
- f"{e}\nCannot write all molecules\n"
- "Presiding str maybe in the smiles code."
- )
-
- def default(self, m):
- # self.output.clear_output()
- self.default_name = m["new"]
- # print(f"Use default name: {default_name}")
-
- def all_check(self, n):
- # self.output.clear_output()
- self.all_select = n["new"]
-
- # Issue : invalid smiles upload saves the default "C"
- # TODO : smiles validity check and warn
- def upload(self, z):
- self.INPUT_FLAG = False
- if isinstance(z, str):
- with open(z, "r") as input_file_path:
- file_content = input_file_path.readlines()
- file_content = [x.strip() for x in file_content]
- input_file_dir, input_file_name, file_format = give_id(z)
- file_detail = f"{input_file_name}.{file_format}"
- self.INPUT_FLAG = True
- self.output = widgets.Output()
- else:
- try:
- file_detail = next(iter(self.upload_button.value))
- file_name, file_format = file_detail.rsplit(".", 1)
- file_content = self.upload_button.data
- file_content = [i.decode("utf-8") for i in file_content]
- file_content = "".join(str(i) for i in file_content)
- file_content = file_content.split()
- except StopIteration as er:
- input_file_dir, input_file_name, file_format = give_id(z.value)
- file_content = open(z.value).readlines()
- file_content = [x.strip() for x in file_content]
- file_detail = f"{input_file_name}.{file_format}"
- # print(file_content)
- # print(file_content)
- SDF = False
- if file_format.lower() == "sdf":
- # print(file_content)
- m = Chem.MolFromMolBlock(file_content)
- smiles = {}
- self.smile_in.layout.visibility = "hidden"
- self.view(m)
- SDF = True
- if self.INPUT_FLAG:
- smiles = file_content
- else:
- try:
- temp = [file_content.split("\r\n")]
- smiles = [
- item
- for subitem in temp
- for item in subitem
- if len(item.rstrip()) is not None
- ]
- except AttributeError:
- smiles = [item for item in file_content]
- # print(temp)
-
- smiles = list(filter(None, smiles))
- if not os.path.exists(f"./Generated/upload/"):
- os.makedirs(f"./Generated/upload/")
- self.all_checkbox.layout.visibility = None
- self.prefix_in.layout.visibility = None
- self.default_checkbox.layout.visibility = None
- try:
- with self.output:
- with open(
- f"./Generated/upload/{file_detail}", "w+"
- ) as server_upload_file:
- if SDF:
- print(file_content, file=server_upload_file)
- return
- if isinstance(smiles, list):
- for smi in smiles:
- print(smi, file=server_upload_file)
- self.conformer = [s for s in smiles]
- # self.conformer = smiles
- self.smile_input(self.conformer)
- print(f"{file_detail} successfully uploaded.")
- except Exception as e:
- print(e)
-
- # todo // override default or use both custom and default..
- def write_mol(self, z):
- try:
- # self.output.clear_output()
- prefix = self.prefix_in.value
- self.write = True
- if len(prefix) < 4 and self.default_name is False:
- with self.output:
- # warnings.warn("Check if name is entered or not and should be atleas 4 letter!.")
- print(
- "You need to add and enter either name or select default name."
- )
- else:
- if not os.path.exists("./Generated/data"):
- os.makedirs("./Generated/data")
- if len(prefix) < 4 and self.default_name is True:
- suffix = 0
- prefix = "small_molecule"
- while os.path.exists(f"./Generated/data/{prefix}.sdf"):
- suffix += 1
- prefix = f"small_molecule{suffix}"
- if self.all_select is False:
- print(
- Chem.MolToMolBlock(self.conformers_list[0]),
- file=open(f"./Generated/data/{prefix}.sdf", "w+"),
- )
- else:
- self.write_all_mol(suffix, prefix)
- elif len(prefix) >= 4 and self.default_name is True:
- suffix = 0
- prefix = f"{prefix}_small_molecule"
- while os.path.exists(f"./Generated/data/{prefix}.sdf"):
- suffix += 1
- prefix = f"{prefix}_small_molecule{suffix}"
- if self.all_select is False:
- suffix += 1
- prefix = f"{prefix}_small_molecule{suffix}"
- print(
- Chem.MolToMolBlock(self.conformers_list[0]),
- file=open(f"./Generated/data/{prefix}.sdf", "w+"),
- )
- self.write = False
- else:
- self.write_all_mol(suffix, prefix)
- if self.write:
- print(
- Chem.MolToMolBlock(self.conformers_list[0]),
- file=open(f"./Generated/data/{prefix}.sdf", "w+"),
- )
- with self.output:
- print(f"{prefix} saved in ./Generated/data/{prefix}.sdf.")
- except Exception as er:
- with self.output:
- print(
- f"{er}\nSorry, check input!Name \nTip: Need to Add/Select All"
- " first."
- )
-
- def display(self):
- """Displays widgets in jupyter notebook"""
- self.output.clear_output()
- display(self.header, self.upload_info)
- file_browser = ServerPath()
- display(widgets.VBox([self.caption, file_browser.accord]))
- display(self.server_file_selected)
- # Link upload and file browser
- display(
- (
- widgets.HBox(
- (
- self.add_button,
- self.remove_button,
- self.save_structure,
- self.delete_button,
- self.upload_button,
- self.all_checkbox,
- )
- )
- )
- )
- # elf.all_checkbox.layout.visibility = "hidden"
- # self.prefix_in.layout.visibility = "hidden"
- # self.default_checkbox.layout.visibility = "hidden"
- display(widgets.HBox([self.prefix_in, self.default_checkbox, self.smile_in]))
- display(self.output)
- # display(widgets.HBox([self.upload_button, self.all_checkbox]))
- # display(self.smile_in, self.output)
- # return self.view(self.molecule)
-
- def view(self, mol):
- """Creates py3dmol view
- Args:
- mol (rdkit obj): mol object from rdkit
- """
- try:
- molecular_block = Chem.MolToMolBlock(mol)
- viewer = py3Dmol.view(width=self.size[0], height=self.size[1])
- viewer.addModel(molecular_block, "mol")
- viewer.setStyle({self.style: {}})
- if self.surface:
- viewer.addSurface(py3Dmol.SAS, {"opacity": self.opacity})
- viewer.zoomTo()
- viewer.show()
- except Exception as er:
- with self.output:
- print(er, "in view section")
-
- def smi_viewer(self, smile, *args, **kwargs):
- """Converts smile to py3dmol view
- Args:
- smile (str): Valid smiles codes
- *args: Description
- **kwargs: style
- Returns:
- TYPE: Description
- """
- self.style = kwargs.get("style", "stick")
- index = kwargs.get("index", 0)
- # self.entered_smiles = kwargs.get("smiles")
- # self.entered_smiles = self.smile_in.value
- try:
- self.molecule = self.smile2conf(smile)
- print("+++++++++++++++++View+++++++++++++++++++++")
- print("Note: Hydrogens are added and MMFF Optimized.")
- # print(f"{Chem.MolToMolBlock(conf)}")
- print("++++++++++++++++++++++++++++++++++++++++++")
- # print(AllChem.EmbedMolecule(conf,randomSeed=0xf00d))
- return self.view(self.molecule)
- except Exception as er:
- with self.output:
- print(er, "pp")
-
- def smile2conf(self, smiles):
- """Convert SMILES to rdkit.Mol with 3D coordinates
- Args:
- smiles (str): smiles code
- Returns:
- AllChem.Mol: 3d mol object for visualization
- """
- try:
- mol = Chem.MolFromSmiles(smiles)
- mol.SetProp("_Name", f"{smiles}")
- if mol is None:
- return
- mol = Chem.AddHs(mol)
- # print(Chem.MolToMolBlock(mol))
- AllChem.EmbedMolecule(mol)
- AllChem.MMFFOptimizeMolecule(mol, maxIters=100)
- return mol
- except Exception as err:
- with self.output:
- print(err)
-
-
-class ServerPath(MolView):
- def __init__(self, start_dir=".", select_file=True):
- super().__init__()
- self.file = None
- self.select_file = select_file
- self.cwd = start_dir
- self.select = ipywidgets.SelectMultiple(
- value=(), rows=10, description="", disabled=False
- )
- self.accord = ipywidgets.Accordion(children=[self.select])
- self.accord.selected_index = None # Start closed (showing path only)
- self.refresh(".")
- self.select.observe(self.on_update, "value")
- # widget 1
-
- def on_update(self, change):
- if len(change["new"]) > 0:
- self.refresh(change["new"][0])
-
- def refresh(self, item):
- path = os.path.abspath(os.path.join(self.cwd, item))
- if os.path.isfile(path):
- if self.select_file:
- self.accord.set_title(0, path)
- self.file = path
- self.accord.selected_index = None
- else:
- self.select.value = ()
- else: # os.path.isdir(path)
- self.file = None
- self.cwd = path
- # ipywidgets list of files and dirs
- keys = ["š.."]
- for item in os.listdir(path):
- if item[0] == ".":
- continue
- elif os.path.isdir(os.path.join(path, item)):
- keys.append("š" + item)
- else:
- keys.append(item)
- # Sort and create list of output values
- keys.sort(key=str.lower, reverse=True)
- value = []
- for k in keys:
- if k[0] == "š":
- value.append(k[1:]) # strip off brackets
- else:
- value.append(k)
- # Update widget
- self.accord.set_title(0, path)
- self.select.options = list(zip(keys, value))
- with self.select.hold_trait_notifications():
- self.select.value = ()
- if self.file is not None:
- # print(self.file)
- self.upload(self.file)
- # self.smile_in.layout.visibility = "hidden"
- # self.prefix_in.layout.visibility = None
- # self.default_checkbox.layout.visibility = None
- # return self.file
diff --git a/src/csfdock/Project.py b/src/csfdock/Project.py
deleted file mode 100644
index dba48fe..0000000
--- a/src/csfdock/Project.py
+++ /dev/null
@@ -1,350 +0,0 @@
-import re
-from collections import Counter
-from os.path import join, splitext
-from rdkit import Chem
-from rich.console import Console
-from rich.table import Table
-
-from csfdock.DVisualize import *
-from csfdock.MolView import *
-from csfdock.utils import get, PDBParse
-
-console = Console()
-blue_console = Console(style="white on blue")
-
-
-class ProjectStart(MolView, DVisualize):
- """Creates a Project in Optimizing Scores and Docking
- Args:
- *args: Description
- **kwargs: Description
- """
-
- def __init__(self, *args, **kwargs):
- self.PROJECT_DIR = kwargs.get("path", os.getcwd())
- super().__init__(*args, **kwargs)
- self.AA = [
- "ALA",
- "ARG",
- "ASN",
- "ASP",
- "CYS",
- "GLN",
- "GLU",
- "GLY",
- "HIS",
- "ILE",
- "LEU",
- "LYS",
- "MET",
- "PHE",
- "PRO",
- "SER",
- "THR",
- "TRP",
- "TYR",
- "VAL",
- ]
-
- def SetFolders(self, *args, **kwargs):
- actual_cwd = os.getcwd()
- self.PROJECT_DIR = kwargs.get("path", os.getcwd())
- for i in args:
- self.PROJECT_DIR = i
- if self.PROJECT_DIR == ".":
- return console.print(f"Project Base Directory: {actual_cwd}")
- if actual_cwd != self.PROJECT_DIR:
- try:
- os.chdir(self.PROJECT_DIR)
- working_dir = self.PROJECT_DIR
- except Exception as err:
- working_dir = f"{actual_cwd}/{self.PROJECT_DIR}"
- os.chdir(working_dir)
- console.print(f"Project Base Directory: {working_dir}")
-
- def ProjectTree(self, *args, **kwargs):
- path = os.getwd() if self.PROJECT_DIR is None else self.PROJECT_DIR
- verbose = kwargs.get("verbose",)
- self.directory_tree(path, verbose=verbose)
-
- def __actual_dir_name(self, path, root=None):
- """helper function for directory tree generation"""
- if root is not None:
- path = os.path.join(root, path)
- result = os.path.basename(path)
- if os.path.islink(path):
- realpath = os.readlink(path)
- result = f"{os.path.basename(path)} -> {realpath}"
- return result
-
- def directory_tree(self, startpath, verbose=True, depth=-1):
- """Tree view of the project directory tree"
- directory_tree(path)
- """
- supported_file_format = {"txt", "pdb",
- "pdbqt", "sdf", "csv", "excel", "pickle"}
- console.print(
- f"Supported File Format :{supported_file_format}", style="bold green")
- table = self.__create_table("bold magenta", "File Type", "Total Files")
- c = Counter(
- [splitext(i)[1][1:] for i in glob(join(startpath, "**"),
- recursive=True) if splitext(i)[1][1:] in supported_file_format]
- )
- console.print("============Details of files====================")
- for ext, count in c.most_common():
- table.add_row(
- f"[bold green]{str(ext)}[/bold green]", f"[red]{str(count)}[/red]"
- )
- console.print(table)
- if verbose:
- console.print("============Directory Tree====================")
- prefix = 0
- if startpath != "/":
- if startpath.endswith("/"):
- startpath = startpath[:-1]
- prefix = len(startpath)
- for root, dirs, files in os.walk(startpath):
- level = root[prefix:].count(os.sep)
- if depth > -1 and level > depth:
- continue
- indent = subindent = ""
- if level > 0:
- indent = "| " * (level - 1) + "|-- "
- subindent = "| " * (level) + "|-- "
- print(
- f"{indent}š{self.__actual_dir_name(root)}/"
- ) # print dir only if symbolic link; otherwise, will be printed as root
- for d in dirs:
- if not d.startswith("."):
- if os.path.islink(os.path.join(root, d)):
- print(
- f"{subindent}š{self.__actual_dir_name(d, root=root)}")
- for f in files:
- _format = f.rsplit(".")[-1]
- if _format in supported_file_format:
- print(f"{subindent}š{self.__actual_dir_name(f, root=root)}")
- else:
- pass
-
- def __receptor_contents_print(self, receptor, receptor_content):
- number_of_residues = []
- number_of_membrane_molecules = []
- number_of_water_molecule = 0
- present_ions = []
- number_of_chains = []
- number_of_ligands = []
- number_of_ligands_atoms = 0
- for index, line in enumerate(receptor_content):
- if line.startswith("ATOM"):
- if line[17:20] in self.AA or line.split()[-1] == "PROA":
- number_of_chains.append(line[21])
- number_of_residues.append(line[22:26])
- elif line.split()[-1] == "MEMB":
- number_of_membrane_molecules.append(line[22:26])
- elif line.split()[-1] == "TIP3" or line[17:20] == "HOH":
- number_of_water_molecule += 1
- elif line.split()[-1] == "HETA":
- number_of_ligands.append(line.split()[3])
- else:
- present_ions.append(line[17:20])
- elif line.startswith("HETATM"):
- if line[17:20] == "HOH":
- number_of_water_molecule += 1
- elif len(line[17:20].strip()) < 3:
- present_ions.append(line[17:20])
- elif len(line[17:20].strip()) == 3:
- # number_of_ligands.append(line.split()[3])
- number_of_ligands.append(line[21])
- number_of_ligands_atoms += 1
- if not present_ions:
- max_number_of_single_ions = 0
- else:
- max_number_of_single_ions = max(
- present_ions, key=present_ions.count)
- types_of_ions = set(present_ions) if present_ions else 0
- number_of_membrane_molecules = (
- number_of_membrane_molecules[-1]
- if len(number_of_membrane_molecules) > 1 and not None
- else 0
- )
- table = self.__create_table("bold blue", "Record", "Counts")
- table.add_row("[bold green]Chains:[/]",
- f" {len(set(number_of_chains))}")
- table.add_row("[bold green]Ligands:[/]",
- f"{len(set(number_of_ligands))}")
- try:
- table.add_row(
- "[bold green]Number of ligand atoms :[/]",
- f"{number_of_ligands.count(max(number_of_ligands, key=number_of_ligands.count))}",
- )
- except ValueError:
- table.add_row("[bold green]Number of ligand atoms :[/]", "0")
-
- table.add_row("[bold green]Protein residues:[/]",
- f"{number_of_residues[-1]}")
-
- try:
- table.add_row(
- "[bold green]Lipids molecules :[/]", f"{number_of_membrane_molecules}"
- )
- except ValueError:
- table.add_row("[bold green]Lipids molecules :[/]", "0")
-
- try:
- table.add_row(
- "[bold green]Water molecules :[/]", f" {number_of_water_molecule}"
- )
- except ValueError:
- table.add_row("[bold green]Water molecules :[/]", "0")
- try:
- table.add_row("[bold green]Ions:[/]", f"{len(present_ions)}")
- table.add_row("[bold green]Ion types :[/]", f"{types_of_ions}")
- except ValueError:
- table.add_row("[bold green]Ions:[/]", "0")
- table.add_row("[bold green]Ion types :[/]", "None")
-
- console.print(f"\nFor[bold red] {receptor}[/]:")
- console.print(table)
-
- def LoadReceptor(self, *args, native=True, verbose=True, **kwargs):
- found_what = kwargs.get("key", "Receptor")
-
- for i in args:
- receptor = i
-
- info = True
- if not os.path.exists(receptor):
- try:
- _receptor = file_search(type="pdb", target=receptor)
- if len(_receptor) == 1:
- console.print(f"{found_what}: [bold]{_receptor[0]}[/bold]")
- info = False
- receptor = _receptor[0]
- elif len(_receptor) > 1:
- print(f"{found_what} {_receptor}")
- _receptor_number = int(
- input(f"Which {found_what} do you like to select: ")
- )
- receptor = _receptor[_receptor_number]
- console.print(f"Select {found_what} : {receptor}")
- else:
- print(f"No {found_what} found in Local directory")
- _download = input(
- f"Would you like to download the {receptor} from RCSB (Y/n)? "
- )
- confirm = ["yes", "y", "YES", "Y"]
- if _download in confirm:
- download_protein_msg = get(receptor)
- # TODO path pass forward
- console.print(download_protein_msg)
- receptor = f"./Generated/{receptor}.pdb"
- else:
- console.print(
- f"š {found_what}: [bold red]{receptor} [/]to process"
- " further.."
- )
- except Exception as er:
- print(er)
-
- if info and (native == False):
- console.print(f"{found_what}: [bold]{receptor}[/bold]")
- info = False
- receptor_content = open(receptor, "r")
- receptor_content = receptor_content.readlines()
- if verbose:
- self.__receptor_contents_print(receptor, receptor_content)
- # console.print(self.receptor)
- if native:
- self.receptor = receptor
- return receptor
-
- def __create_table(self, header_style, arg1, arg2):
- result = Table(show_header=True, header_style=header_style)
- result.add_column(arg1, style="dim", width=40)
- result.add_column(arg2, justify="right")
- return result
-
- def LoadLigand(self, *args, **kwargs):
- for arg in args:
- self.ligand = arg
- *_, _file_format= give_id(self.ligand)
- if _file_format.lower() == "sdf":
- inf = open(f"{self.ligand}", "rb")
- with Chem.ForwardSDMolSupplier(inf) as fsuppl:
- for mol in fsuppl:
- if mol is None:
- continue
- console.print(f"{self.ligand} has {mol.GetNumAtoms()} atoms")
- elif _file_format.lower() == "pdb":
- mol = Chem.MolFromPDBFile(self.ligand, sanitize=False)
- console.print(f"{self.ligand} has {mol.GetNumAtoms()} atoms")
- else:
- return "Unknow ligand file format"
-
- self.ligand_export = mol
- return self.ligand_export
-
- def SaveComplex(self, **kwargs):
- ligand = kwargs.get("lig", None)
- receptor = kwargs.get("pro", None)
- lipid = kwargs.get("lipid", None)
- out_file = kwargs.get("out", "complex_out.pdb")
- ligand_mol = Chem.MolToPDBBlock(self.ligand_export, flavor=32)
- # print(out_file)
- *_, structure_format = give_id(self.receptor)
- lipid = lipid if lipid is not None else self.lipid
- receptor = receptor if receptor is not None else self.receptor
- ligand = ligand if ligand is not None else self.ligand
-
- try:
- if structure_format.lower() == "sdf":
- receptor_mol = Chem.MolFromMolFile(receptor, removeHs=False)
- elif structure_format.lower() == "pdb":
- receptor_mol, *_ = PDBParse(receptor)
-
- except ValueError as er:
- console.print(f"{self.receptor} Error : {er}")
- blue_console.print("Not able to parse..")
-
- write_list = [receptor_mol, ligand_mol]
- try:
- _prot, lipid_mol, _tip, _lig = PDBParse(lipid)
- write_list.append(lipid_mol)
-
- except Exception as er:
- blue_console.print(f"Cannot able to parse {lipid}.\n Error: {er}")
- blue_console.print(f"Not writing Lipid in the {out_file}")
-
- self.__write_to_file(write_list, out_file, check=True)
-
- # self.__write_to_file(lipid_mol, out_file)
- # self.__write_to_file(Chem.MolToPDBBlock(self.ligand_export, flavor=32), out_file)
-
- def __write_to_file(self, content, filename, check=False):
- if check:
- count = 0
- _file, _format = filename.rsplit(".")
- while os.path.exists(filename):
- count += 1
- filename = f"{_file}_{count}.{_format}"
- with open(filename, "a+") as f:
- if isinstance(content, list):
- for _ in content:
- for line in _:
- print(line, end="", file=f)
- else:
- for line in content:
- print(line, end="", file=f)
- console.print(f"{filename} Saved Successfully!", style="bold green")
-
- def __call__(self):
- raise TypeError(
- 'Project must be accessed through "instance=ProjectStart()".')
-
- def __str__(self):
- ligand = self.ligand if self.ligand != None else "Not implemented"
- receptor = self.receptor if self.receptor != None else "Not implemented"
- lipid = self.lipid if self.lipid != None else "Not implemented"
- return (
- f"Project : \t\nProtein: {receptor}\t\nligand :{ligand}\t\nLipid : {lipid} "
- )
diff --git a/src/csfdock/Rahul-iikwon.sublime-project b/src/csfdock/Rahul-iikwon.sublime-project
deleted file mode 100644
index 2e5ea0c..0000000
--- a/src/csfdock/Rahul-iikwon.sublime-project
+++ /dev/null
@@ -1,8 +0,0 @@
-{
- "folders":
- [
- {
- "path": "Z:\\home\\lab09\\SPACE\\Rahul-Iikwon"
- }
- ]
-}
diff --git a/src/csfdock/Rahul-iikwon.sublime-workspace b/src/csfdock/Rahul-iikwon.sublime-workspace
deleted file mode 100644
index c1f132c..0000000
--- a/src/csfdock/Rahul-iikwon.sublime-workspace
+++ /dev/null
@@ -1,792 +0,0 @@
-{
- "auto_complete":
- {
- "selected_items":
- [
- [
- "Except",
- "Exception as e:\n\traise ValueError"
- ],
- [
- "Da",
- "DataFrame"
- ],
- [
- "file",
- "file_path"
- ],
- [
- "isins",
- "isinstance(file_path, str):\n\tfile"
- ],
- [
- "kwargs",
- "kwargs.get(\"match\", False)"
- ],
- [
- "aut",
- "autobox[0"
- ],
- [
- "verbose",
- "verbose = False"
- ],
- [
- "Data",
- "DataFrame"
- ],
- [
- "iter",
- "itertools.combinations"
- ],
- [
- "write",
- "write_affinity"
- ],
- [
- "patter",
- "pattern_id"
- ],
- [
- "start",
- "startswith(\"MODEL\"):\n\ts"
- ],
- [
- "elif",
- "elif algo_format.lower()"
- ],
- [
- "manu",
- "manual_config_format"
- ],
- [
- "cluster",
- "cluster=cluster,\n )\n else:"
- ],
- [
- "dir_name",
- "dir_name_cwd"
- ],
- [
- "dir",
- "dir_name"
- ],
- [
- "chec",
- "check_names(manual_"
- ],
- [
- "enter_out",
- "enter_output}\" if enter_output else \"\""
- ],
- [
- "enter",
- "enter_output"
- ],
- [
- "kwaRG",
- "kwargs.get(\""
- ],
- [
- "output",
- "output= output_file_name"
- ],
- [
- "Inva",
- "Invalid cluster name\"\n"
- ],
- [
- "cl",
- "cluster_grp"
- ],
- [
- "CUSTOM_SCORE",
- "CUSTOM_SCORE is not None and"
- ],
- [
- "C",
- "CUSTOM_SCORE"
- ],
- [
- "Gene",
- "Generated"
- ],
- [
- "Ge",
- "Generated/"
- ],
- [
- "file_",
- "file_format = \""
- ],
- [
- "Va",
- "ValueError:\n\ttable.add_row(\"[bold green]"
- ],
- [
- "except",
- "except ValueError:\n\t table.add_row("
- ],
- [
- "add_",
- "add_row"
- ],
- [
- "tab",
- "table.add_"
- ],
- [
- "excep",
- "except ValueError:\n\t"
- ],
- [
- "Mol",
- "MolView"
- ],
- [
- "Ser",
- "ServerPath"
- ],
- [
- "Instacne",
- "isinstance"
- ],
- [
- "Docking",
- "DockingTools"
- ],
- [
- "KinaseMod",
- "KinaseModule import *"
- ],
- [
- "setSty",
- "setStyle"
- ],
- [
- "element",
- "element}\"}, {\"stick\": {\"colorscheme\": self.lig_color}"
- ],
- [
- "Excep",
- "Exception as e:\n\t"
- ],
- [
- "config",
- "config_file_name)\n"
- ],
- [
- "mana",
- "manual_config"
- ],
- [
- "manual",
- "manual_config="
- ],
- [
- "clear",
- "clear_output()"
- ],
- [
- "msg",
- "msg_header"
- ],
- [
- "default",
- "default_checkbox"
- ],
- [
- "sty",
- "style_avaiable"
- ],
- [
- "view_obj",
- "view_object = self.view"
- ],
- [
- "smi2",
- "smi2viewer"
- ],
- [
- "entered",
- "entered_smiles"
- ],
- [
- "interac",
- "interactive"
- ],
- [
- "get",
- "get('size')\n"
- ],
- [
- "__init",
- "__init__"
- ],
- [
- "self",
- "self.box_size_z = 15\nself"
- ],
- [
- "bo",
- "box_size_y"
- ],
- [
- "box",
- "box_size_x"
- ],
- [
- "el",
- "elif key == 'box_size_z':"
- ],
- [
- "box_cen",
- "box_center_z"
- ],
- [
- "box_center",
- "box_center_y"
- ],
- [
- "ligand",
- "ligand = ligand\n"
- ],
- [
- "inte",
- "interactive"
- ]
- ]
- },
- "buffers":
- [
- {
- "file": "Project.py",
- "settings":
- {
- "buffer_size": 10100,
- "encoding": "UTF-8",
- "line_ending": "Unix"
- }
- },
- {
- "file": "DockingTools.py",
- "settings":
- {
- "buffer_size": 37840,
- "encoding": "UTF-8",
- "line_ending": "Windows"
- },
- "undo_stack":
- [
- [
- 6475,
- 1,
- "insert",
- {
- "characters": "\\n"
- },
- "AgAAAOw7AAAAAAAA7TsAAAAAAAAAAAAA7TsAAAAAAADuOwAAAAAAAAAAAAA",
- "AgAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPC/AAAAAAEAAADsOwAAAAAAAOw7AAAAAAAAAAAAAAAA8L8"
- ],
- [
- 6479,
- 1,
- "black",
- null,
- "",
- "AgAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPC/AAAAAAEAAAASOwAAAAAAABI7AAAAAAAAAAAAAAAA8L8"
- ],
- [
- 6486,
- 1,
- "insert",
- {
- "characters": "\\n"
- },
- "AgAAAMg8AAAAAAAAyTwAAAAAAAAAAAAAyTwAAAAAAADKPAAAAAAAAAAAAAA",
- "AgAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPC/AAAAAAEAAADIPAAAAAAAAMg8AAAAAAAAAAAAAAAA8L8"
- ],
- [
- 6490,
- 1,
- "black",
- null,
- "",
- "AgAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPC/AAAAAAEAAACLPAAAAAAAAIs8AAAAAAAAAAAAAAAA8L8"
- ]
- ]
- }
- ],
- "build_system": "",
- "build_system_choices":
- [
- [
- [
- [
- "Packages/Python/Python.sublime-build",
- ""
- ],
- [
- "Packages/Python/Python.sublime-build",
- "Syntax Check"
- ]
- ],
- [
- "Packages/Python/Python.sublime-build",
- "Syntax Check"
- ]
- ]
- ],
- "build_varint": "",
- "command_palette":
- {
- "height": 0.0,
- "last_filter": "",
- "selected_items":
- [
- [
- "doc",
- "AutoDocstring: Current"
- ],
- [
- "install",
- "Package Control: Install Package"
- ],
- [
- "fix",
- "Python Fix Imports"
- ],
- [
- "inde",
- "Indentation: Convert to Spaces"
- ]
- ],
- "width": 0.0
- },
- "console":
- {
- "height": 0.0,
- "history":
- [
- ]
- },
- "distraction_free":
- {
- "menu_visible": true,
- "show_minimap": false,
- "show_open_files": false,
- "show_tabs": false,
- "side_bar_visible": false,
- "status_bar_visible": false
- },
- "expanded_folders":
- [
- "/Z/home/lab09/SPACE/Rahul-Iikwon",
- "/Z/home/lab09/SPACE/Rahul-Iikwon/csfdock"
- ],
- "file_history":
- [
- "/Z/home/lab09/SPACE/Rahul-Iikwon/Generated/jobs/3eml/3eml_ligand_output_dkoes_scoring_old.pdb",
- "/Z/home/lab09/SPACE/Rahul-Iikwon/Generated/jobs/3eml/run/3eml_SMina.sh",
- "/Z/home/lab09/SPACE/Rahul-Iikwon/csfdock/DVisualize.py",
- "/Z/home/lab09/SPACE/Rahul-Iikwon/csfdock/Project.py",
- "/Z/home/lab09/SPACE/Rahul-Iikwon/csfdock/DockingTools.py",
- "/Z/home/lab09/DOCKER/CSF/DATA/Actives/6nzp_active_complete.sdf",
- "/Z/home/lab09/POSTGRES/docker-compose.yml",
- "/Z/home/lab09/DOCKER/CSF/DATA/Inactive/6nzp_decoy_complete.sdf",
- "/Z/home/lab09/DOCKER/CSF/DATA/Inactive/6nzp_decoy_complete_fix.sdf",
- "/Z/home/lab09/DOCKER/CSF/data/Inactive/dude-decoys/decoys/6nzp_all_decoys.smi",
- "/Z/home/lab09/DOCKER/CSF/scores.txt",
- "/Z/home/lab09/DOCKER/6NZP/Generated/data/6nzp_best.pdbqt",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Generated/2rgp.pdb",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/DATA/membrane.pdb",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Scripts/utils.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Scripts/ar2a_v3.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Scripts/MolView.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Scripts/ServerPath.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Scripts/DVisualize.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Scripts/DockingTools.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Scripts/__init__.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Scripts/main.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/ar2a_v3.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/DockingTools.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/data/6NZP/Utility.py",
- "/Z/home/lab09/DOCKER/gpu-jupyter/.build/start.sh",
- "/Z/home/lab09/DOCKER/gpu-jupyter/build_push_all.sh",
- "/Z/home/lab09/DOCKER/gpu-jupyter/.build/docker-stacks/base-notebook/start.sh",
- "/Z/home/lab09/DOCKER/gpu-jupyter/.build/docker-stacks/base-notebook/Dockerfile.ppc64le.patch",
- "/Z/home/lab09/DOCKER/gpu-jupyter/.build/docker-stacks/base-notebook/Dockerfile",
- "/Z/home/lab09/DOCKER/gpu-jupyter/.build/Dockerfile",
- "/Z/home/lab09/DOCKER/Rahul-IIkwon/ar2a_v3.py",
- "/Z/home/lab09/DOCKER/Rahul-IIkwon/KinaseModules.py",
- "/Z/home/lab09/DOCKER/Rahul-IIkwon/ar2r.py",
- "/Z/home/lab09/DOCKER/DEV/Kinase_CLassifier/KC/KinaseModules.py",
- "/Z/home/lab09/DOCKER/Rahul-IIkwon/RawData/smiles/Conformer3D_CID_2244.sdf"
- ],
- "find":
- {
- "height": 27.0
- },
- "find_in_files":
- {
- "height": 0.0,
- "where_history":
- [
- ]
- },
- "find_state":
- {
- "case_sensitive": false,
- "find_history":
- [
- "obabel",
- "Nomatch",
- "dir_name_"
- ],
- "highlight": true,
- "in_selection": false,
- "preserve_case": false,
- "regex": false,
- "replace_history":
- [
- "nomatch",
- "dir_name"
- ],
- "reverse": false,
- "scrollbar_highlights": true,
- "show_context": true,
- "use_buffer2": true,
- "use_gitignore": true,
- "whole_word": false,
- "wrap": true
- },
- "groups":
- [
- {
- "sheets":
- [
- {
- "buffer": 0,
- "file": "Project.py",
- "semi_transient": true,
- "settings":
- {
- "buffer_size": 10100,
- "regions":
- {
- },
- "selection":
- [
- [
- 0,
- 0
- ]
- ],
- "settings":
- {
- "bracket_highlighter.busy": false,
- "bracket_highlighter.locations":
- {
- "close":
- {
- },
- "icon":
- {
- },
- "open":
- {
- },
- "unmatched":
- {
- }
- },
- "bracket_highlighter.regions":
- [
- "bh_double_quote",
- "bh_double_quote_center",
- "bh_double_quote_open",
- "bh_double_quote_close",
- "bh_double_quote_content",
- "bh_square",
- "bh_square_center",
- "bh_square_open",
- "bh_square_close",
- "bh_square_content",
- "bh_default",
- "bh_default_center",
- "bh_default_open",
- "bh_default_close",
- "bh_default_content",
- "bh_curly",
- "bh_curly_center",
- "bh_curly_open",
- "bh_curly_close",
- "bh_curly_content",
- "bh_c_define",
- "bh_c_define_center",
- "bh_c_define_open",
- "bh_c_define_close",
- "bh_c_define_content",
- "bh_angle",
- "bh_angle_center",
- "bh_angle_open",
- "bh_angle_close",
- "bh_angle_content",
- "bh_tag",
- "bh_tag_center",
- "bh_tag_open",
- "bh_tag_close",
- "bh_tag_content",
- "bh_unmatched",
- "bh_unmatched_center",
- "bh_unmatched_open",
- "bh_unmatched_close",
- "bh_unmatched_content",
- "bh_round",
- "bh_round_center",
- "bh_round_open",
- "bh_round_close",
- "bh_round_content",
- "bh_regex",
- "bh_regex_center",
- "bh_regex_open",
- "bh_regex_close",
- "bh_regex_content",
- "bh_single_quote",
- "bh_single_quote_center",
- "bh_single_quote_open",
- "bh_single_quote_close",
- "bh_single_quote_content"
- ],
- "syntax": "Packages/Python/Python.sublime-syntax",
- "tab_size": 4,
- "translate_tabs_to_spaces": true
- },
- "translation.x": 0.0,
- "translation.y": 0.0,
- "zoom_level": 1.0
- },
- "stack_index": 1,
- "stack_multiselect": false,
- "type": "text"
- },
- {
- "buffer": 1,
- "file": "DockingTools.py",
- "selected": true,
- "semi_transient": false,
- "settings":
- {
- "buffer_size": 37840,
- "regions":
- {
- },
- "selection":
- [
- [
- 13647,
- 13644
- ]
- ],
- "settings":
- {
- "auto_complete": false,
- "bracket_highlighter.busy": false,
- "bracket_highlighter.locations":
- {
- "close":
- {
- "1":
- [
- 13645,
- 13646
- ]
- },
- "icon":
- {
- "1":
- [
- "Packages/BracketHighlighter/icons/round_bracket.png",
- "region.yellowish"
- ]
- },
- "open":
- {
- "1":
- [
- 13531,
- 13532
- ]
- },
- "unmatched":
- {
- }
- },
- "bracket_highlighter.regions":
- [
- "bh_double_quote",
- "bh_double_quote_center",
- "bh_double_quote_open",
- "bh_double_quote_close",
- "bh_double_quote_content",
- "bh_square",
- "bh_square_center",
- "bh_square_open",
- "bh_square_close",
- "bh_square_content",
- "bh_default",
- "bh_default_center",
- "bh_default_open",
- "bh_default_close",
- "bh_default_content",
- "bh_curly",
- "bh_curly_center",
- "bh_curly_open",
- "bh_curly_close",
- "bh_curly_content",
- "bh_c_define",
- "bh_c_define_center",
- "bh_c_define_open",
- "bh_c_define_close",
- "bh_c_define_content",
- "bh_angle",
- "bh_angle_center",
- "bh_angle_open",
- "bh_angle_close",
- "bh_angle_content",
- "bh_tag",
- "bh_tag_center",
- "bh_tag_open",
- "bh_tag_close",
- "bh_tag_content",
- "bh_unmatched",
- "bh_unmatched_center",
- "bh_unmatched_open",
- "bh_unmatched_close",
- "bh_unmatched_content",
- "bh_round",
- "bh_round_center",
- "bh_round_open",
- "bh_round_close",
- "bh_round_content",
- "bh_regex",
- "bh_regex_center",
- "bh_regex_open",
- "bh_regex_close",
- "bh_regex_content",
- "bh_single_quote",
- "bh_single_quote_center",
- "bh_single_quote_open",
- "bh_single_quote_close",
- "bh_single_quote_content"
- ],
- "syntax": "Packages/Python/Python.sublime-syntax",
- "tab_size": 4,
- "translate_tabs_to_spaces": true
- },
- "translation.x": 0.0,
- "translation.y": 14940.0,
- "zoom_level": 1.0
- },
- "stack_index": 0,
- "stack_multiselect": false,
- "type": "text"
- }
- ]
- }
- ],
- "incremental_find":
- {
- "height": 27.0
- },
- "input":
- {
- "height": 39.0
- },
- "layout":
- {
- "cells":
- [
- [
- 0,
- 0,
- 1,
- 1
- ]
- ],
- "cols":
- [
- 0.0,
- 1.0
- ],
- "rows":
- [
- 0.0,
- 1.0
- ]
- },
- "menu_visible": true,
- "output.black":
- {
- "height": 126.0
- },
- "output.exec":
- {
- "height": 78.0
- },
- "output.find_results":
- {
- "height": 0.0
- },
- "output.mdpopups":
- {
- "height": 0.0
- },
- "pinned_build_system": "",
- "project": "Rahul-iikwon.sublime-project",
- "replace":
- {
- "height": 50.0
- },
- "save_all_on_build": true,
- "select_file":
- {
- "height": 0.0,
- "last_filter": "",
- "selected_items":
- [
- ],
- "width": 0.0
- },
- "select_project":
- {
- "height": 0.0,
- "last_filter": "",
- "selected_items":
- [
- ],
- "width": 0.0
- },
- "select_symbol":
- {
- "height": 0.0,
- "last_filter": "",
- "selected_items":
- [
- ],
- "width": 0.0
- },
- "selected_group": 0,
- "settings":
- {
- },
- "show_minimap": true,
- "show_open_files": true,
- "show_tabs": true,
- "side_bar_visible": true,
- "side_bar_width": 121.0,
- "status_bar_visible": true,
- "template_settings":
- {
- }
-}
diff --git a/src/csfdock/__main__.py b/src/csfdock/__main__.py
deleted file mode 100644
index 96c7bfc..0000000
--- a/src/csfdock/__main__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""Command-line interface."""
-import click
-
-
-@click.command()
-@click.version_option()
-def main() :
- """CsfDock."""
-
-
-if __name__ == "__main__":
- main(prog_name="csfdock") # pragma: no cover
diff --git a/src/csfdock/ar2a_v3.py b/src/csfdock/ar2a_v3.py
deleted file mode 100644
index ade3344..0000000
--- a/src/csfdock/ar2a_v3.py
+++ /dev/null
@@ -1,186 +0,0 @@
-import re
-import sys
-from collections import Counter
-
-import ipywidgets
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import py3Dmol
-from IPython.display import HTML, display
-from ipywidgets import (
- FileUpload,
- IntSlider,
- fixed,
- interactive,
- interactive_output,
- widgets,
- Layout,
-)
-from ipywidgets.embed import embed_minimal_html
-from matplotlib.offsetbox import AnchoredText
-from rdkit import Chem
-from rdkit.Chem import AllChem
-from rich.console import Console
-from rich.table import Table
-
-console = Console()
-# from rich import print
-
-from sklearn.datasets import make_classification
-from sklearn.linear_model import LinearRegression, LogisticRegression
-from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
-from sklearn.model_selection import KFold, train_test_split
-
-from csfdock.DockingTools import *
-from csfdock.DVisualize import *
-from csfdock.KinaseModules import *
-from csfdock.MolView import *
-from csfdock.Project import *
-from csfdock.utils import *
-from csfdock.xg_mod import *
-
-#
-
-
-def view(structure, ligand=None, color="grey", save=False):
- """3d visualization of pdb
- Args:
- structure (TYPE): Description
- ligand (None, optional): small molecule
- color (str, optional): color of wish, default: grey
- Returns:
- TYPE: structure view.
- """
- structure_dir, structure_name, structure_format = give_id(structure)
- v = py3Dmol.view(width=900, height=500)
- if structure_format.lower() == "sdf":
- mol = Chem.MolFromMolFile(structure, removeHs=False)
- mol = Chem.MolToMolBlock(mol)
- v.addModel(mol, f"{structure_format}")
- else:
- v.addModel(open(structure).read())
- v.setStyle({"cartoon": {"color": f"{color}"}})
- if ligand is not None:
- v.setStyle({"resn": f"{ligand}"}, {"stick": {"colorscheme": "greenCarbon"}})
- v.zoomTo()
- v.show()
- if save:
- prefix = "image"
- while os.path.exists(f"./images/{prefix}.png"):
- suffix += 1
- name = f"{prefix}{suffix}.png"
- v.save_fig(f"./Images/{name}", dpi=600)
- return structure
-
-
-def update_exp_data(new_data):
- """Enter new data to already generated experimental data
- Args:
- new_data (list|dict): New experimental data
- Returns:
- pd.DataFrame: Latest data
- """
- try:
- order_list = ["Elec", "Vdw", "exp"]
- if not new_data:
- return "Enter valid data"
- if isinstance(new_data, list):
- CONFIRMED = input(f"Is the list in order(yes|no)\n{order_list}: ")
- if CONFIRMED.lower() != "yes":
- return "Enter valid order experimental data"
- df = pd.DataFrame(data=new_data)
- df = df.T
- new_columns = {0: "Elec", 1: "Vdw", 2: "exp"}
- df.rename(columns=new_columns, inplace=True)
- else:
- df = pd.DataFrame(data=new_data)
- print(
- "[bold magenta]Staged for updating previous data with[/bold magenta]"
- f" \n{df}\n"
- )
- old_df = pd.read_pickle("./DATA/experimental_data.pickle")
- latest_df = pd.concat([old_df, df], ignore_index=True)
- print(
- "[bold green]Successfully save!! [/bold green]\n\nlatest experimental_data"
- f" :\n {latest_df}"
- )
- latest_df.to_pickle("./DATA/experimental_data.pickle")
- return latest_df
- except Exception as e:
- print(e)
- return
-
-
-def parse_log(file):
- """Parse LIE log file in return delta Vwd and Elec
- Args:
- file (str): Log file path
- Returns:
- pd.DataFrame: Vdw and Elect DataFrame.
- """
- try:
- with open(file, "r") as file:
- info = []
- lines = file.readlines()
- extract = False
- for index, line in enumerate(lines):
- # print(f" {line.strip()}" )
- if line[:6].strip() == "Delta":
- extract = True
- if extract and (
- line[:6].strip() == "Vdw" or line[:7].strip() == "Elec"
- ):
- info.append(line.split())
- except Exception as e:
- print(e)
- df = pd.DataFrame(info)
- df = df.T.reset_index(drop=True)
- df.columns = df.iloc[0]
- df.drop(df.index[0], inplace=True)
- return df
-
-
-def exp_model_score(file_path, num_features=2, intercept=False, tsize=0.3, plot=False):
- """Generates regression model using sklearn. Will Print out coefficients
- Args:
- file_path (str): csv/excel file path
- num_features (int, optional): Number of features to use. Default: 3
- intercept (bool, optional): Mean Error
- tsize (float, optional): Percentage of datato use for test. Default 0.3(30%)
- plot_auc (bool, optional): Plot ROC AUC curve
- plot_save (bool, optional): Save ROC AUC plot
- """
- data, file_name, file_format = give_id(file_path)
- supported_file_format = ["csv", "excel"] # for now | smina result file
- assert (
- file_format in supported_file_format
- ), f"Note: FileType Error: Not supported file format. Use {supported_file_format}"
- try:
- if file_format == "excel":
- df = pd.read_excel(file_path)
- else:
- df = pd.read_csv(file_path)
- print(df)
- X, y = df.iloc[:, :-1], df.iloc[:, -1]
- X = pd.DataFrame(X)
- header = X.iloc[:0, :]
- # print(f"----------\n {X}")
- # X, y = make_classification(n_samples=df.shape()[0],n_features=num_features)
- # TODO //include K-Fold Test
- model = LinearRegression(fit_intercept=intercept)
- model.fit(X, y) # TODOD accept use model input
- print(f"Alpha: {model.coef_[-1]}, Beta= {model.coef_[0]}")
- # weight = model.coef_
- # weight = [item for i in weight for item in i]
- # for head, coeff in zip(header, weight):
- # print(coeff, head, end="\n")
- except Exception as er:
- print(er)
- if plot:
- plt.scatter(X, y, color="black")
- plt.plot(X, y, color="blue", linewidth=3)
- plt.xticks(())
- plt.yticks(())
- plt.show()
- return model
diff --git a/src/csfdock/py.typed b/src/csfdock/py.typed
deleted file mode 100644
index e69de29..0000000
diff --git a/src/csfdock/utils.py b/src/csfdock/utils.py
deleted file mode 100644
index 3f3969f..0000000
--- a/src/csfdock/utils.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Some often used utilities..
-from glob import glob
-import os
-import subprocess
-
-
-def file_search(type=None, target="*", specific=None):
- """searches files in sub dir
- Args:
- type (str, optional): Search file format
- target (str, optional): Identifier to search
- specific (str, optional): Specific folder to search
- Returns:
- list: Search result
- """
- BASE_DIR = os.getcwd()
- try:
- if specific is None:
- return sorted(glob(f"{BASE_DIR}/**/{target}.{type}", recursive=True))
- else:
- return sorted(
- glob(f"{BASE_DIR}/**/{specific}/{target}.{type}", recursive=True)
- )
- except Exception as error:
- print(f"{error} \n File not found anywhere.")
-
-
-def give_id(input_file):
- """Function to return the main file name excluding "." extension.
- Args:
- file (list): Name of file with "." extension.
- Returns:
- Name: Name without extension.
- """
- file_name = os.path.basename(input_file)
- file_name, file_format = file_name.rsplit(".")
- file_dir = os.path.dirname(input_file)
- return file_dir, file_name, file_format
-
-
-def get(id, molecule="protein", prot_id="", type_="pdb"):
- """Downloads structure from RCSB and save in Generated sub folder.
- Args:
- id (TYPE): PDB ID
- molecule (str, optional): default:Protein or Small Molecule
- prot_id (str, optional): Description
- type_ (str, optional): Structure type to download.
- Returns:
- pdb/sdf/**: 3D coordinate file
- """
- try:
- assert molecule in [
- "protein",
- "ligand",
- ], 'Note: molecule parameter must be either "protein" or "ligand" only'
- if not os.path.exists("./Generated/"):
- os.makedirs("./Generated/")
- if molecule.lower() == "protein":
- assert type_ in [
- "pdb"
- ], "Note: \n Only PDB format supported for protein for now."
- command = (
- f"wget https://files.rcsb.org/download/{id}.{type_} -q -P ./Generated/"
- )
- msg = f"downloading of {id}.{type_}"
- elif molecule.lower() == "ligand":
- command = (
- f"wget -c -O https://files.rcsb.org/ligands/download/{id}_ideal.{type_}"
- f" > ./Generated/{prot_id}_{id}_ligand.{type_}"
- )
- msg = f"downloading of {prot_id}_{id}_ligand.{type_}"
- if os.path.exists(f"./Generated/{id}.{type_}"):
- msg = "but not downloaded as it already exists"
- else:
- subprocess.run(command, shell=True)
- return f"Succcesfully executed {msg} in ./Generated folder."
- except Exception as er:
- print(er)
-
-
-def search_gui():
- output = widgets.Output()
-
- def f(File_type):
- global file_type
- file_type = File_type
-
- def search(file):
- output.clear_output()
- try:
- target = target_in.value
- if len(target) == 0:
- with output:
- print("** Cannot be empty target!")
- else:
- global search_result
- specific = folder_in.value
- search_result = folder_search(
- type=file_type, target=target, specific=specific
- )
- with output:
- print(f"Total files found: {len(search_result)}")
- print(search_result)
- return search_result
- except:
- with output:
- print("something wrong")
-
- usage_information = widgets.HTML(
- "Enter target file type and target name and specify"
- " the folder."
- )
- display(usage_information)
- interact(f, File_type=["pdb", "sdf", "xyz", "txt"])
- target_in = widgets.Text(
- placeholder="Enter name", description="Target: ", disable=False
- )
- folder_in = widgets.Text(
- placeholder="specific folder name(optional)",
- description="Folder: ",
- disable=False,
- )
- search_button = widgets.Button(description="Search")
- search_button.style.button_color = "lightgreen"
- display(widgets.HBox([target_in, folder_in, search_button]), output)
- search_button.on_click(search)
-
-
-def PDBParse(target):
- protein = []
- membrane = []
- tip3 = []
- ligand = []
- with open(target, "r") as target:
- temp = target.readlines()
- for line in temp:
- if line.startswith("ATOM") and line[21:22].strip() == "P":
- protein.append(line)
- elif line.startswith("ATOM") and line[21:22].strip() == "M":
- membrane.append(line)
- elif line.startswith("ATOM") and line[21:22].strip() == "T":
- tip3.append(line)
- elif line.startswith("HETATM"):
- ligand.append(line)
-
- return protein, membrane, tip3, ligand
diff --git a/src/csfdock/xg_mod.py b/src/csfdock/xg_mod.py
deleted file mode 100644
index 8d89c49..0000000
--- a/src/csfdock/xg_mod.py
+++ /dev/null
@@ -1,99 +0,0 @@
-from pandas import read_csv
-from numpy import absolute
-from matplotlib import pyplot
-from numpy import mean
-from numpy import std
-from sklearn.datasets import make_regression
-from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import RepeatedKFold
-from xgboost import XGBRegressor
-from sklearn.datasets import make_classification
-from sklearn.model_selection import RepeatedStratifiedKFold
-from xgboost import XGBClassifier
-from matplotlib import pyplot
-
-
-def ensamble(X, y):
- # define dataset
- X, y = make_regression(
- n_samples=1000, n_features=5, n_informative=5, noise=0.1, random_state=7
- )
- # define the model
- model = XGBRegressor()
- model.fit(X, y)
- # evaluate the model
- cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
- n_scores = cross_val_score(
- model,
- X,
- y,
- scoring="neg_mean_absolute_error",
- cv=cv,
- n_jobs=-1,
- error_score="raise",
- )
- # report performance
- print("MAE: %.3f (%.3f)" % (mean(n_scores), std(n_scores)))
-
-
-def xg(file, ensemble=True, params=None):
-
- dataframe = read_csv(file, header=None)
- data = dataframe.values
- # split data into input and output columns
- X, y = data[1:, 1:-1], data[1:, -1]
- # define model
- model = XGBRegressor()
- model.fit(X, y)
- # define model evaluation method
- cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=10)
- # evaluate model
- scores = cross_val_score(
- model, X, y, scoring="neg_mean_absolute_error", cv=cv, n_jobs=-1
- )
- # force scores to be positive
- scores = absolute(scores)
- print("Mean MAE: %.3f (%.3f)" % (scores.mean(), scores.std()))
-
- if ensemble:
- ensamble(X, y)
- if params is not None:
- xg_param(X, y)
-
-
-# explore xgboost number of trees effect on performance
-def xg_param(X, y):
- def get_dataset():
- X, y = make_classification(
- n_samples=1000,
- n_features=5,
- n_informative=15,
- n_redundant=5,
- random_state=712,
- )
- return X, y
-
- # get a list of models to evaluate
- def get_models():
- trees = [10, 50, 100, 500, 1000, 5000]
- return {str(n): XGBClassifier(n_estimators=n) for n in trees}
-
- # evaluate a give model using cross-validation
- def evaluate_model(model):
- cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
- return cross_val_score(model, X, y, scoring="accuracy", cv=cv, n_jobs=-1)
-
- # define dataset
- X, y = get_dataset()
- # get the models to evaluate
- models = get_models()
- # evaluate the models and store results
- results, names = list(), list()
- for name, model in models.items():
- scores = evaluate_model(model)
- results.append(scores)
- names.append(name)
- print(">%s Accuracy: %.3f[mean] %.3f[std]" % (name, mean(scores), std(scores)))
- # plot model performance for comparison
- pyplot.boxplot(results, labels=names, showmeans=True)
- pyplot.show()