diff --git a/.github/workflows/docker-image-manual.yml b/.github/workflows/docker-image-manual.yml new file mode 100644 index 0000000..46bdebd --- /dev/null +++ b/.github/workflows/docker-image-manual.yml @@ -0,0 +1,23 @@ +name: Docker Image CI [ master ] + +on: + workflow_dispatch: +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - name: Login to DockerHub + id: docker_login + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Build only + id: docker_build_only + uses: docker/build-push-action@v2 + with: + file: Dockerfile + push: true + tags: clinicalgenomics/fusion-report:2.1.5p5 diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000..eac02ef --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,25 @@ +name: Docker Image CI [ master ] + +on: + push: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - name: Login to DockerHub + id: docker_login + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Build only + id: docker_build_only + uses: docker/build-push-action@v2 + with: + file: Dockerfile + push: true + tags: clinicalgenomics/fusion-report:2.1.5p4 diff --git a/.gitignore b/.gitignore index 234e374..97d2a6b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ tests/sss *.log .tox/ .DS_Store +testfusionreport.sh +thisismydb/ \ No newline at end of file diff --git a/docs/download.md b/docs/download.md index 6c7c6bf..c90b826 100644 --- a/docs/download.md +++ b/docs/download.md @@ -15,6 +15,17 @@ fusion_report download /path/to/db ``` +With a non-academic/research login -> using QIAGEN with a commercial license: + +```bash +fusion_report download + --cosmic_usr '' + --cosmic_passwd 'QIAGEN ' + --qiagen + /path/to/db +``` + + ## Manual download ### FusionGDB diff --git a/fusion_report/app.py b/fusion_report/app.py index 5d5295b..d5cf04c 100644 --- a/fusion_report/app.py +++ b/fusion_report/app.py @@ -189,7 +189,7 @@ def export_results(self, path: str, extension: str) -> None: def generate_fusion_list(self, path: str, cutoff: int): """ Helper function that generates file containing list of found fusions and filtered list of - fusions. One of these files ise used by FusionInspector to visualize the fusions. + fusions. One of these files is used by FusionInspector to visualize the fusions. Input for FusionInspector expects list of fusions in format `geneA--geneB\n`. Returns: diff --git a/fusion_report/args_builder.py b/fusion_report/args_builder.py index dd7aa46..638bb80 100644 --- a/fusion_report/args_builder.py +++ b/fusion_report/args_builder.py @@ -74,7 +74,7 @@ def run_args(self, args, weight) -> None: help=optional.get('help'), action=optional.get('action') ) - else: + else: run_optional.add_argument( optional['key'][0], optional['key'][1], default=optional.get('default'), help=optional.get('help'), @@ -117,7 +117,10 @@ def _cosmic(self, args: Dict[str, Any], parser) -> None: which will be used to generate base64 token or the token itself.''' ) for cosmic in args['cosmic']: - download_cosmic.add_argument(cosmic['key'], help=cosmic['help'], type=str) + if not cosmic.get('action'): + download_cosmic.add_argument(cosmic['key'], help=cosmic.get('help'), type=str) + else: + download_cosmic.add_argument(cosmic['key'], help=cosmic.get('help'), action=cosmic.get('action')) def parse(self) -> Namespace: """Parse arguments.""" diff --git a/fusion_report/arguments.json b/fusion_report/arguments.json index 2b1ca12..71c5f7a 100644 --- a/fusion_report/arguments.json +++ b/fusion_report/arguments.json @@ -91,6 +91,11 @@ { "key": "--cosmic_token", "help": "COSMIC token" + }, + { + "key": "--qiagen", + "help": "Use QIAGEN to download COSMIC db (commercial usage)", + "action": "store_true" } ] } diff --git a/fusion_report/common/net.py b/fusion_report/common/net.py index d7817a9..af6f574 100644 --- a/fusion_report/common/net.py +++ b/fusion_report/common/net.py @@ -9,7 +9,8 @@ import time import pandas as pd from zipfile import ZipFile - +import subprocess +import json from argparse import Namespace from typing import List @@ -32,16 +33,59 @@ def get_cosmic_token(params: Namespace): if params.cosmic_token is not None: return params.cosmic_token - if ( - params.cosmic_token is None - and (params.cosmic_usr is not None or params.cosmic_passwd is not None) - ): + if params.cosmic_usr is not None and params.cosmic_passwd is not None: return base64.b64encode( f'{params.cosmic_usr}:{params.cosmic_passwd}'.encode() ).decode('utf-8') else: raise DownloadException('COSMIC credentials have not been provided correctly') + @staticmethod + def run_qiagen_cmd(cmd, return_output=False, silent=False): + if not silent: + print(cmd) + if return_output: + output = subprocess.check_output(cmd, shell=True, executable='/bin/bash').strip() + return output + else: + subprocess.check_call(cmd, shell=True, executable='/bin/bash') + + @staticmethod + def get_qiagen_files(token: str, output_path: str): + files_request = 'curl --stderr -s -X GET ' \ + '-H "Content-Type: application/octet-stream" ' \ + '-H "Authorization: Bearer {token}" ' \ + '"https://my.qiagendigitalinsights.com/bbp/data/files/cosmic"' \ + ' -o {output_path}qiagen_files.tsv' + cmd = files_request.format(token=token, output_path = output_path) + return Net.run_qiagen_cmd(cmd, True, True) + + @staticmethod + def download_qiagen_file(token: str, file_id: str, output_path: str): + file_request = 'curl -s -X GET ' \ + '-H "Content-Type: application/octet-stream" ' \ + '-H "Authorization: Bearer {token}" ' \ + '"https://my.qiagendigitalinsights.com/bbp/data/download/cosmic-download?name={file_id}"' \ + ' -o {output_path}CosmicFusionExport.tsv.gz' + cmd = file_request.format(token=token, file_id=file_id, output_path=output_path) + Net.run_qiagen_cmd(cmd, True, True) + + @staticmethod + def fetch_fusion_file_id(output_path: str): + df = pd.read_csv(output_path+"/qiagen_files.tsv", names=['file_id','file_name','genome_draft'], sep='\t') + file_id = df.loc[(df['file_name'] == Settings.COSMIC["FILE"]) & (df['genome_draft'] == 'cosmic/GRCh38'), 'file_id'].values[0] + return file_id + + @staticmethod + def get_cosmic_qiagen_token(params: Namespace): + token_request = 'curl -s -X POST ' \ + '-H "Content-Type: application/x-www-form-urlencoded" ' \ + '-d "grant_type=password&client_id=603912630-14192122372034111918-SmRwso&username={uid}&password={pwd}" ' \ + '"https://apps.ingenuity.com/qiaoauth/oauth/token"' + cmd = token_request.format(uid=params.cosmic_usr, pwd=params.cosmic_passwd) + token_response = Net.run_qiagen_cmd(cmd, True, True).decode('UTF-8') + return json.loads(token_response)['access_token'] + @staticmethod def get_large_file(url: str, ignore_ssl: bool = False) -> None: """Method for downloading a large file.""" @@ -54,7 +98,9 @@ def get_large_file(url: str, ignore_ssl: bool = False) -> None: if url.startswith('https') or url.startswith('ftp'): try: - with urllib.request.urlopen(url, context=ctx) as response: + req = urllib.request.Request(url) + req.add_header('User-Agent', 'Mozilla/5.0') + with urllib.request.urlopen(req, context=ctx) as response: file = url.split('/')[-1].split('?')[0] Logger(__name__).info('Downloading %s', file) # only download if file size doesn't match @@ -68,8 +114,8 @@ def get_large_file(url: str, ignore_ssl: bool = False) -> None: Logger(__name__).error('Downloading resources supports only HTTPS or FTP') @staticmethod - def get_cosmic(token: str, return_err: List[str]) -> None: - """Method for download COSMIC database.""" + def get_cosmic_from_sanger(token: str, return_err: List[str]) -> None: + """Method for download COSMIC database from sanger website.""" # get auth url to download file files = [] @@ -96,17 +142,42 @@ def get_cosmic(token: str, return_err: List[str]) -> None: except urllib.error.HTTPError as ex: return_err.append(f'{Settings.COSMIC["NAME"]}: {ex}') + @staticmethod + def get_cosmic_from_qiagen(token: str, return_err: List[str], outputpath: str) -> None: + """Method for download COSMIC database from QIAGEN.""" + try: + result = Net.get_qiagen_files(token, outputpath) + except Exception as ex: + print(ex) + #Then continue parsing out the fusion_file_id + file_id = Net.fetch_fusion_file_id(outputpath) + Net.download_qiagen_file(token, file_id, outputpath) + file: str = Settings.COSMIC["FILE"] + files = [] + + try: + files.append('.'.join(file.split('.')[:-1])) + + with gzip.open(file, 'rb') as archive, open(files[0], 'wb') as out_file: + shutil.copyfileobj(archive, out_file) + + db = CosmicDB('.') + db.setup(files, delimiter='\t', skip_header=True) + except urllib.error.HTTPError as ex: + return_err.append(f'{Settings.COSMIC["NAME"]}: {ex}') + + @staticmethod def get_fusiongdb(self, return_err: List[str]) -> None: """Method for download FusionGDB database.""" - pool_params = [ - (f'{Settings.FUSIONGDB["HOSTNAME"]}/{x}', True) for x in Settings.FUSIONGDB["FILES"] - ] - pool = Pool(Settings.THREAD_NUM) - pool.starmap(Net.get_large_file, pool_params) - pool.close() - pool.join() + for file in Settings.FUSIONGDB["FILES"]: + try: + url: str = (f'{Settings.FUSIONGDB["HOSTNAME"]}/{file}') + Net.get_large_file(url) + except DownloadException as ex: + return_err.append(f'FusionGDB: {ex}') + db = FusionGDB('.') db.setup(Settings.FUSIONGDB['FILES'], delimiter='\t', skip_header=False) @@ -135,7 +206,7 @@ def get_mitelman(self, return_err: List[str]) -> None: url: str = f'{Settings.MITELMAN["HOSTNAME"]}/{Settings.MITELMAN["FILE"]}' Net.get_large_file(url) with ZipFile(Settings.MITELMAN['FILE'], 'r') as archive: - files = [x for x in archive.namelist() if "mitelman_db/MBCA.TXT.DATA" in x] + files = [x for x in archive.namelist() if "MBCA.TXT.DATA" in x and not "MACOSX" in x] archive.extractall() db = MitelmanDB('.') diff --git a/fusion_report/common/template.py b/fusion_report/common/template.py index ae117ec..c4f7bd4 100644 --- a/fusion_report/common/template.py +++ b/fusion_report/common/template.py @@ -4,8 +4,8 @@ from pathlib import Path from typing import Any, Dict -from jinja2 import Environment, FileSystemLoader, Markup - +from jinja2 import Environment, FileSystemLoader +from markupsafe import Markup from fusion_report.common.page import Page from fusion_report.config import Config from fusion_report.settings import Settings diff --git a/fusion_report/data/schema/FusionGDB2.sql b/fusion_report/data/schema/FusionGDB2.sql index f07d3bc..7f89b0b 100644 --- a/fusion_report/data/schema/FusionGDB2.sql +++ b/fusion_report/data/schema/FusionGDB2.sql @@ -1,3 +1,3 @@ -CREATE TABLE "fusionGDB2" ( - "Fusions" varchar(50) NOT NULL DEFAULT '' +CREATE TABLE "fusiongdb2" ( + "fusions" varchar(50) NOT NULL DEFAULT '' ); diff --git a/fusion_report/download.py b/fusion_report/download.py index 5e3cb0d..e5f6b3a 100644 --- a/fusion_report/download.py +++ b/fusion_report/download.py @@ -22,8 +22,11 @@ def __init__(self, params: Namespace): self.download_all(params) def validate(self, params: Namespace) -> None: - """Method validating required input. In this case COSMIC credentials.""" - self.cosmic_token = Net.get_cosmic_token(params) + """Method validating required input. In this case COSMIC credentials.""" + if(params.qiagen): + self.cosmic_token = Net.get_cosmic_qiagen_token(params) + else: + self.cosmic_token = Net.get_cosmic_token(params) # making sure output directory exists if not os.path.exists(params.output): @@ -44,7 +47,12 @@ def download_all(self, params: Namespace) -> None: Net.get_fusiongdb2(self, return_err) # COSMIC - Net.get_cosmic(self.cosmic_token, return_err) + if params.qiagen: + Logger(__name__).info('Downloading resources from QIAGEN...') + Net.get_cosmic_from_qiagen(self.cosmic_token, return_err, params.output) + else: + Logger(__name__).info('Downloading resources from SANGER...') + Net.get_cosmic_from_sanger(self.cosmic_token, return_err) if len(return_err) > 0: raise DownloadException(return_err) diff --git a/fusion_report/modules/index_summary/index_summary.py b/fusion_report/modules/index_summary/index_summary.py index aee6150..fa2f6e1 100644 --- a/fusion_report/modules/index_summary/index_summary.py +++ b/fusion_report/modules/index_summary/index_summary.py @@ -29,8 +29,10 @@ def tool_detection(self) -> List[List[Any]]: counts['together'] = 0 running_tools_count: int = len(running_tools) for fusion in self.manager.fusions: + print(fusion.name) fusion_tools = fusion.tools.keys() for tool in fusion_tools: + print(tool) counts[tool] += 1 # intersection if len(fusion_tools) == running_tools_count: diff --git a/fusion_report/settings.py b/fusion_report/settings.py index af0d5fc..b9c7377 100644 --- a/fusion_report/settings.py +++ b/fusion_report/settings.py @@ -18,7 +18,7 @@ class Settings: COSMIC: Dict[str, str] = { "NAME": "COSMIC", - "HOSTNAME": "https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v95", + "HOSTNAME": "https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v98", "SCHEMA": "Cosmic.sql", "FILE": "CosmicFusionExport.tsv.gz", } diff --git a/requirements.txt b/requirements.txt index 3d4f796..82642bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ colorlog python-rapidjson pyyaml>=4.2b1 Jinja2>=2.10 -MarkupSafe +MarkupSafe>=2.1.1 pandas openpyxl xlrd >= 1.0.0 diff --git a/setup.py b/setup.py index 23b76c2..96009fa 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( name='fusion_report', version=Settings.VERSION, - python_requires='>=3.6.*', + python_requires='>=3.6', description='Tool for parsing outputs from fusion detection tools.', long_description=README, author='Martin Proks',