From 7821c59138199c828095b6da83d37a6e41c57cc7 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Thu, 12 Sep 2024 17:05:36 +0200 Subject: [PATCH 1/2] {bio}[foss/2023b] BiG-SCAPE v1.1.9, HMMER v3.4, FastTree v2.1.11 --- .../b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb | 66 ++++++++++++++++ .../FastTree-2.1.11-GCCcore-13.2.0.eb | 42 ++++++++++ .../h/HMMER/HMMER-3.4-gompi-2023b.eb | 78 +++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 easybuild/easyconfigs/b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb create mode 100644 easybuild/easyconfigs/f/FastTree/FastTree-2.1.11-GCCcore-13.2.0.eb create mode 100644 easybuild/easyconfigs/h/HMMER/HMMER-3.4-gompi-2023b.eb diff --git a/easybuild/easyconfigs/b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb b/easybuild/easyconfigs/b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb new file mode 100644 index 00000000000..87ffb021f06 --- /dev/null +++ b/easybuild/easyconfigs/b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb @@ -0,0 +1,66 @@ +easyblock = 'PythonPackage' + +name = 'BiG-SCAPE' +version = '1.1.9' + +homepage = 'https://bigscape-corason.secondarymetabolites.org/index.html' +description = """BiG-SCAPE and CORASON provide a set of tools to explore the diversity of biosynthetic gene clusters +(BGCs) across large numbers of genomes, by constructing BGC sequence similarity networks, grouping BGCs into gene +cluster families, and exploring gene cluster diversity linked to enzyme phylogenies.""" + +toolchain = {'name': 'foss', 'version': '2023b'} + +github_account = 'medema-group' +source_urls = [GITHUB_SOURCE] +sources = ['v%(version)s.tar.gz'] +patches = [ + 'BiG-SCAPE-1.1.5_use_env_var_for_html.patch', + 'BiG-SCAPE-1.1.5_use_correct_name_for_FastTree.patch', +] +checksums = [ + {'v1.1.9.tar.gz': 'ef0ddb5b433e0b1467ae5f96037fd6d23ebcba6bc08201d1421eba35d072e534'}, + {'BiG-SCAPE-1.1.5_use_env_var_for_html.patch': '540be22396ab982c2aeaaed4ce5acdb8ccb8ce2b31d36bc69d37be7a29c7c42a'}, + {'BiG-SCAPE-1.1.5_use_correct_name_for_FastTree.patch': + 'e1572e4134c6163a3927ac32bd2a39b7f87cf01109f7913b3c55126e2381a771'}, +] + +dependencies = [ + ('Python', '3.11.5'), + ('SciPy-bundle', '2023.11'), + ('Biopython', '1.84'), + ('scikit-learn', '1.4.0'), + ('networkx', '3.2.1'), + ('HMMER', '3.4'), + ('FastTree', '2.1.11'), +] + +use_pip = True +download_dep_fail = True +sanity_pip_check = True + +options = {'modulename': 'bigscape'} + +sanity_check_paths = { + 'files': ['bin/bigscape'], + 'dirs': ['lib/python%(pyshortver)s/site-packages'], +} + +sanity_check_commands = [ + 'bigscape --help', +] + +modextravars = { + 'BIG_SCAPE_HTML_PATH': '%(installdir)s/lib/python%(pyshortver)s/site-packages/BiG-SCAPE', +} + +modloadmsg = "%(name)s needs processed Pfam database to work properly.\n" +modloadmsg += "For this, download the latest 'Pfam-A.hmm.gz' file from the Pfam website " +modloadmsg += "(http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/), " +modloadmsg += "uncompress it and process it using the `hmmpress` command.\n" +modloadmsg += "For data files, like the domains_color_file.tsv and domain_includelist.txt, " +modloadmsg += "one can set the environment variable BIG_SCAPE_DATA_PATH, if that is not set " +modloadmsg += "it will use the directory where the bigscape command is started from.\n" +modloadmsg += "One can copy the domains_color_file.tsv from " +modloadmsg += "%(installdir)s/lib/python%(pyshortver)s/site-packages/BiG-SCAPE/domains_color_file.tsv\n" + +moduleclass = 'bio' diff --git a/easybuild/easyconfigs/f/FastTree/FastTree-2.1.11-GCCcore-13.2.0.eb b/easybuild/easyconfigs/f/FastTree/FastTree-2.1.11-GCCcore-13.2.0.eb new file mode 100644 index 00000000000..b44229828a4 --- /dev/null +++ b/easybuild/easyconfigs/f/FastTree/FastTree-2.1.11-GCCcore-13.2.0.eb @@ -0,0 +1,42 @@ +# Updated from previous config +# Author: Pavel Grochal (INUITS) +# License: GPLv2 + +easyblock = 'CmdCp' + +name = 'FastTree' +version = '2.1.11' + +homepage = 'http://www.microbesonline.org/fasttree/' +description = """FastTree infers approximately-maximum-likelihood phylogenetic trees from alignments of nucleotide + or protein sequences. FastTree can handle alignments with up to a million of sequences in a reasonable amount of + time and memory. """ + +toolchain = {'name': 'GCCcore', 'version': '13.2.0'} +toolchainopts = {'openmp': True} + +# HTTPS cert error: +# hostname 'www.microbesonline.org' doesn't match either of 'genomics.lbl.gov', 'mojave.qb3.berkeley.edu', ... +source_urls = ['http://www.microbesonline.org/fasttree/'] +sources = [{'filename': '%(name)s-%(version)s.c', 'extract_cmd': 'cp %s FastTree.c'}] +checksums = ['9026ae550307374be92913d3098f8d44187d30bea07902b9dcbfb123eaa2050f'] + +builddependencies = [('binutils', '2.40')] + +cmds_map = [('%(name)s-%(version)s.c', '$CC -DOPENMP $CFLAGS $LIBS %%(source)s -o %(name)s')] + +files_to_copy = [(['FastTree'], 'bin')] + +# as FastTree is built with OpenMP, the correct binary is FastTreeMP +# the FastTree binary should normally be built without OpenMP, but let’s keep it as is for backward compatibility +# see http://www.microbesonline.org/fasttree/#OpenMP +postinstallcmds = ['cd %(installdir)s/bin && ln -s FastTree FastTreeMP'] + +sanity_check_paths = { + 'files': ['bin/FastTree'], + 'dirs': [], +} + +sanity_check_commands = ['FastTree 2>&1 | grep "FastTree Version %(version)s"'] + +moduleclass = 'bio' diff --git a/easybuild/easyconfigs/h/HMMER/HMMER-3.4-gompi-2023b.eb b/easybuild/easyconfigs/h/HMMER/HMMER-3.4-gompi-2023b.eb new file mode 100644 index 00000000000..bb76dc1e937 --- /dev/null +++ b/easybuild/easyconfigs/h/HMMER/HMMER-3.4-gompi-2023b.eb @@ -0,0 +1,78 @@ +## +# EasyBuild reciPY as per https://github.com/easybuilders/easybuild +# +# Copyright:: Copyright 2012-2014 Uni.Lu/LCSB, NTUA +# Authors:: Nils Christian , +# Fotis Georgatos +# Updated by: Filip Kružík (INUITS) +# License:: MIT/GPL +# $Id$ +# +# This work implements a part of the HPCBIOS project and is a +# component of the policy: +# https://hpcbios.readthedocs.org/en/latest/HPCBIOS_2012-94.html +## + +easyblock = 'ConfigureMake' + +name = 'HMMER' +version = '3.4' + +homepage = 'http://hmmer.org/' +description = """HMMER is used for searching sequence databases for homologs + of protein sequences, and for making protein sequence alignments. It + implements methods using probabilistic models called profile hidden Markov + models (profile HMMs). Compared to BLAST, FASTA, and other sequence + alignment and database search tools based on older scoring methodology, + HMMER aims to be significantly more accurate and more able to detect remote + homologs because of the strength of its underlying mathematical models. In the + past, this strength came at significant computational expense, but in the new + HMMER3 project, HMMER is now essentially as fast as BLAST.""" + +toolchain = {'name': 'gompi', 'version': '2023b'} + +source_urls = [ + 'http://eddylab.org/software/hmmer/', + 'http://eddylab.org/software/hmmer%(version_major)s/%(version)s/', +] +sources = [SOURCELOWER_TAR_GZ] +checksums = ['ca70d94fd0cf271bd7063423aabb116d42de533117343a9b27a65c17ff06fbf3'] + +builddependencies = [ + ('Python', '3.11.5'), + ('Perl', '5.38.0'), +] + +# replace hardcoded /usr/bin/perl shebang lines with '/usr/bin/env perl' across all files +preconfigopts = "grep '/usr/bin/perl' . | cut -f1 -d: | xargs echo sed -i 's@/usr/bin/perl@/usr/bin/env perl@g' && " + +configopts = '--enable-mpi' + +buildopts = ' V=1 ' + +testopts = buildopts +runtest = 'check' + +installopts = ' && cd easel && make install' + +local_bin_files = ['alimask', 'esl-afetch', 'esl-alimanip', 'esl-alimap', 'esl-alimask', + 'esl-alimerge', 'esl-alipid', 'esl-alirev', 'esl-alistat', 'esl-compalign', + 'esl-compstruct', 'esl-construct', 'esl-histplot', 'esl-mask', 'esl-reformat', + 'esl-selectn', 'esl-seqrange', 'esl-seqstat', 'esl-sfetch', 'esl-shuffle', + 'esl-ssdraw', 'esl-translate', 'esl-weight', 'hmmalign', 'hmmbuild', + 'hmmconvert', 'hmmemit', 'hmmfetch', 'hmmlogo', 'hmmpgmd', 'hmmpress', + 'hmmscan', 'hmmsearch', 'hmmsim', 'hmmstat', 'jackhmmer', 'makehmmerdb', + 'nhmmer', 'nhmmscan', 'phmmer'] + +sanity_check_paths = { + 'files': ["bin/%s" % x for x in local_bin_files], + 'dirs': ['bin', 'share'], +} + +sanity_check_commands = [ + "esl-construct -h", + "hmmsearch -h", + "nhmmer -h", +] + +moduleclass = 'bio' From 6f238ee392ff66428d1a5e09dd2bad1874e3c553 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Tue, 17 Sep 2024 08:50:48 +0200 Subject: [PATCH 2/2] fix value for $BIG_SCAPE_HTML_PATH for BiG-SCAPE v1.1.9 --- .../easyconfigs/b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/easybuild/easyconfigs/b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb b/easybuild/easyconfigs/b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb index 87ffb021f06..59f7c1a7cd5 100644 --- a/easybuild/easyconfigs/b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb +++ b/easybuild/easyconfigs/b/BiG-SCAPE/BiG-SCAPE-1.1.9-foss-2023b.eb @@ -40,9 +40,11 @@ sanity_pip_check = True options = {'modulename': 'bigscape'} +local_lib_py_bigscape_path = 'lib/python%(pyshortver)s/site-packages/bigscape' + sanity_check_paths = { 'files': ['bin/bigscape'], - 'dirs': ['lib/python%(pyshortver)s/site-packages'], + 'dirs': [local_lib_py_bigscape_path], } sanity_check_commands = [ @@ -50,7 +52,7 @@ sanity_check_commands = [ ] modextravars = { - 'BIG_SCAPE_HTML_PATH': '%(installdir)s/lib/python%(pyshortver)s/site-packages/BiG-SCAPE', + 'BIG_SCAPE_HTML_PATH': '%(installdir)s/' + local_lib_py_bigscape_path, } modloadmsg = "%(name)s needs processed Pfam database to work properly.\n"