diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7f4b2b0..6788f91 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -14,10 +14,6 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, linux-gpu-cuda] - fullopt: ["True", "False"] - exclude: - - os: linux-gpu-cuda - fullopt: "False" runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 @@ -49,23 +45,19 @@ jobs: clang -v fi which h5c++ - if [[ "$(uname -s)" == "Linux" && "${{ matrix.fullopt }}" != "False" ]]; + if [[ "$(uname -s)" == "Linux" ]]; then - # always use PGI when fully optimizing - # but never when in compatible/low-optimization mode + # install PGI but do not source it + # the makefile will do it automatically ./scripts/install_hpc_sdk.sh &1) | tr A-Z a-z ) + +ifeq ($(PLATFORM),Darwin) +all: api main install test_binaries + +else + +# Note: important that all_nv is after all_cpu_basic and all_nv_avx2 for tests to work +all: all_cpu_basic all_nv_avx2 all_nv all_combined test_binaries_nv + +all_cpu_basic: api_cpu_basic main_cpu_basic install_cpu_basic + +all_nv: api_nv main_nv install_nv + +all_nv_avx2: api_nv_avx2 main_nv_avx2 install_nv_avx2 + +all_combined: api_combined install_combined + +endif + +clean: + -cd test && make clean + -cd src && make clean + -cd combined && make clean + +########### api api: cd src && make api +api_cpu_basic: + export BUILD_VARIANT=cpu_basic ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make clean && make api + +api_nv: + source ./setup_nv_h5.sh; export BUILD_VARIANT=nv ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make clean && make api + +api_nv_avx2: + source ./setup_nv_h5.sh; export BUILD_VARIANT=nv_avx2 ; export BUILD_FULL_OPTIMIZATION=True ; cd src && make clean && make api + +api_combined: + cd combined && make api + +########### main + +main: + cd src && make main + +main_cpu_basic: + export BUILD_VARIANT=cpu_basic ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make main + +main_nv: + source ./setup_nv_h5.sh; export BUILD_VARIANT=nv ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make main + +main_nv_avx2: + source ./setup_nv_h5.sh; export BUILD_VARIANT=nv_avx2 ; export BUILD_FULL_OPTIMIZATION=True ; cd src && make main + +########### install + +install: + cd src && make install + +install_cpu_basic: + export BUILD_VARIANT=cpu_basic ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make install + +install_nv: + source ./setup_nv_h5.sh; export BUILD_VARIANT=nv ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make install + +install_nv_avx2: + source ./setup_nv_h5.sh; export BUILD_VARIANT=nv_avx2 ; export BUILD_FULL_OPTIMIZATION=True ; cd src && make install + +install_combined: + cd combined && make install + +########### test + test_binaries: cd src && make test_binaries cd test && make test_binaries +test_binaries_nv: + source ./setup_nv_h5.sh; export BUILD_VARIANT=nv ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make test_binaries + # use the default compiler for the test subdir as it tests the combined shlib + cd test && make test_binaries + test: cd src && make test cd test && make test -install: - cd src && make install - -clean: - -cd test && make clean - -cd src && make clean diff --git a/README.md b/README.md index 7ef0d28..7b64bed 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,17 @@ To restrict the number of cores used, set: export OMP_NUM_THREADS=nthreads +## Older CPU support + +On Linux platforms, Unifrac will auto-detect the CPU generation, i.e. if it supports avx or avx2 vector instructions. +To force the most compatible binary variant, one can set: + + export UNIFRAC_MAX_CPU=basic + +To check which binary is used (Unifrac will print it to standard output at runtime), set: + + export UNIFRAC_CPU_INFO=Y + ## GPU support On Linux platforms, Unifrac will run on a GPU, if one is found. diff --git a/combined/Makefile b/combined/Makefile new file mode 100644 index 0000000..413ecf1 --- /dev/null +++ b/combined/Makefile @@ -0,0 +1,27 @@ +.PHONY: all api main install clean + +all: api install + +api: libssu.so + +# no-op, just for completeness +main: + + +ifeq ($(PREFIX),) + PREFIX := $(CONDA_PREFIX) +endif + +libssu.o: libssu.c + $(CC) -c libssu.c -fPIC + +libssu.so: libssu.o + $(CC) -shared -o libssu.so libssu.o -fPIC -ldl + +install: libssu.so + rm -f ${PREFIX}/lib//libssu.so; cp libssu.so ${PREFIX}/lib/ + rm -f ${PREFIX}/bin/ssu; cp ssu ${PREFIX}/bin/ + rm -f ${PREFIX}/bin/faithpd; cp faithpd ${PREFIX}/bin/ + +clean: + rm -f libssu.o libssu.so diff --git a/combined/faithpd b/combined/faithpd new file mode 100755 index 0000000..b6bbf48 --- /dev/null +++ b/combined/faithpd @@ -0,0 +1,44 @@ +#!/bin/bash + +#default +FPD=faithpd_nv + +# Need at least AVX to support GPUs +if [ "${UNIFRAC_MAX_CPU}" == "basic" ]; then + has_no_avx=1 +else + cat /proc/cpuinfo |grep flags |head -1 | grep -q avx + has_no_avx=$? +fi + +if [ "${has_no_avx}" -eq 1 ]; then + FPD=faithpd_cpu_basic +else + +if [ "${UNIFRAC_MAX_CPU}" == "avx" ]; then + has_no_avx2=1 +else + cat /proc/cpuinfo |grep flags |head -1 | grep -q avx2 + has_no_avx2=$? +fi + +if [ "${has_no_avx2}" -eq 1 ]; then + FPD=faithpd_nv +else + FPD=faithpd_nv_avx2 +fi # if "${has_no_avx2}" -eq 1 + + +fi # if "${has_no_avx}" -eq 1 + +if [ "${UNIFRAC_CPU_INFO}" == "Y" ]; then + echo "INFO (unifrac): Using executable" ${FPD} +fi + +# +# +# +BASEDIR=$(dirname "$0") + +exec ${BASEDIR}/${FPD} "$@" + diff --git a/combined/libssu.c b/combined/libssu.c new file mode 100644 index 0000000..b3742cd --- /dev/null +++ b/combined/libssu.c @@ -0,0 +1,415 @@ +/* + * BSD 3-Clause License + * + * Copyright (c) 2023, UniFrac development team. + * All rights reserved. + * + * See LICENSE file for more details + */ + +#include +#include +#include +#include + +#include "../src/api.hpp" + +/* + * Implement wrappers around all the EXTERN functions + * defined in api.hpp. + * + */ + +/*********************************************************************/ + +/* Pick the right libssu implementation */ +static const char *ssu_get_lib_name() { + __builtin_cpu_init (); + bool has_avx = __builtin_cpu_supports ("avx"); + bool has_avx2 = __builtin_cpu_supports ("avx2"); + + const char* env_max_cpu = getenv("UNIFRAC_MAX_CPU"); + + if ((env_max_cpu!=NULL) && (strcmp(env_max_cpu,"basic")==0)) { + has_avx = false; + has_avx2 = false; + } + + const char *ssu = "libssu_nv.so"; + if (has_avx) { + if ((env_max_cpu!=NULL) && (strcmp(env_max_cpu,"avx")==0)) { + has_avx2 = false; + } + if (has_avx2) { + ssu="libssu_nv_avx2.so"; + } else { + ssu="libssu_nv.so"; + } + } else { // no avx + const char* env_gpu_info = getenv("UNIFRAC_GPU_INFO"); + if ((env_gpu_info!=NULL) && (env_gpu_info[0]=='Y')) { + printf("INFO (unifrac): CPU too old, disabling GPU\n"); + } + ssu="libssu_cpu_basic.so"; + } + + const char* env_cpu_info = getenv("UNIFRAC_CPU_INFO"); + if ((env_cpu_info!=NULL) && (env_cpu_info[0]=='Y')) { + printf("INFO (unifrac): Using shared library %s\n",ssu); + } + return ssu; +} + +/*********************************************************************/ + +/* Handle pointing to the approriate libssu implementing the functionality + * Initialized on first use. */ +static void *dl_handle = NULL; + +static void ssu_load(const char *fncname, + void **dl_ptr) { + char *error; + + if (dl_handle==NULL) { + dl_handle = dlopen(ssu_get_lib_name(), RTLD_LAZY); + if (!dl_handle) { + fputs(dlerror(), stderr); + exit(1); + } + } + + *dl_ptr = dlsym(dl_handle, fncname); + if ((error = dlerror()) != NULL) { + fputs(error, stderr); + exit(1); + } +} + +/*********************************************************************/ +/* All the functons below are wrappers + * and each has its own function pointer + * that is initialized on first use */ +/*********************************************************************/ + +static void (*dl_ssu_set_random_seed)(unsigned int) = NULL; +void ssu_set_random_seed(unsigned int new_seed) { + if (dl_ssu_set_random_seed==NULL) ssu_load("ssu_set_random_seed", (void **) &dl_ssu_set_random_seed); + + (*dl_ssu_set_random_seed)(new_seed); +} + +/*********************************************************************/ + +static void (*dl_destroy_mat)(mat_t**) = NULL; +static void (*dl_destroy_mat_full_fp64)(mat_full_fp64_t**) = NULL; +static void (*dl_destroy_mat_full_fp32)(mat_full_fp32_t**) = NULL; +static void (*dl_destroy_partial_mat)(partial_mat_t**) = NULL; +static void (*dl_destroy_partial_dyn_mat)(partial_dyn_mat_t**) = NULL; +static void (*dl_destroy_results_vec)(r_vec**) = NULL; + +void destroy_mat(mat_t** result) { + if (dl_destroy_mat==NULL) ssu_load("destroy_mat", (void **) &dl_destroy_mat); + + (*dl_destroy_mat)(result); +} + +void destroy_mat_full_fp64(mat_full_fp64_t** result) { + if (dl_destroy_mat_full_fp64==NULL) ssu_load("destroy_mat_full_fp64", (void **) &dl_destroy_mat_full_fp64); + + (*dl_destroy_mat_full_fp64)(result); +} + +void destroy_mat_full_fp32(mat_full_fp32_t** result) { + if (dl_destroy_mat_full_fp32==NULL) ssu_load("destroy_mat_full_fp32", (void **) &dl_destroy_mat_full_fp32); + + (*dl_destroy_mat_full_fp32)(result); +} + +void destroy_partial_mat(partial_mat_t** result) { + if (dl_destroy_partial_mat==NULL) ssu_load("destroy_partial_mat", (void **) &dl_destroy_partial_mat); + + (*dl_destroy_partial_mat)(result); +} + +void destroy_partial_dyn_mat(partial_dyn_mat_t** result) { + if (dl_destroy_partial_dyn_mat==NULL) ssu_load("destroy_partial_dyn_mat", (void **) &dl_destroy_partial_dyn_mat); + + (*dl_destroy_partial_dyn_mat)(result); +} + +void destroy_results_vec(r_vec** result) { + if (dl_destroy_results_vec==NULL) ssu_load("destroy_results_vec", (void **) &dl_destroy_results_vec); + + (*dl_destroy_results_vec)(result); +} + +/*********************************************************************/ + +static ComputeStatus (*dl_one_off)(const char*, const char*, const char*, bool, double, bool, unsigned int, mat_t**) = NULL; +ComputeStatus one_off(const char* biom_filename, const char* tree_filename, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, mat_t** result) { + if (dl_one_off==NULL) ssu_load("one_off", (void **) &dl_one_off); + + return (*dl_one_off)(biom_filename, tree_filename, unifrac_method, variance_adjust, alpha, bypass_tips, n_substeps, result); +} + +/*********************************************************************/ + +static ComputeStatus (*dl_one_off_matrix_inmem_v2)(const support_biom_t *, const support_bptree_t *, const char*, bool, double, + bool, unsigned int, unsigned int, bool, const char *, mat_full_fp64_t**) = NULL; +static ComputeStatus (*dl_one_off_inmem)(const support_biom_t *, const support_bptree_t *, const char*, bool, double, + bool, unsigned int, mat_full_fp64_t**) = NULL; +static ComputeStatus (*dl_one_off_matrix_inmem_fp32_v2)(const support_biom_t *, const support_bptree_t *, const char*, bool, double, + bool, unsigned int, unsigned int, bool, const char *, mat_full_fp32_t**) = NULL; +static ComputeStatus (*dl_one_off_inmem_fp32)(const support_biom_t *, const support_bptree_t *, const char*, bool, double, + bool, unsigned int, mat_full_fp32_t**) = NULL; + +ComputeStatus one_off_matrix_inmem_v2(const support_biom_t *table_data, const support_bptree_t *tree_data, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, + unsigned int subsample_depth, bool subsample_with_replacement, const char *mmap_dir, + mat_full_fp64_t** result) { + if (dl_one_off_matrix_inmem_v2==NULL) ssu_load("one_off_matrix_inmem_v2", (void **) &dl_one_off_matrix_inmem_v2); + + return (*dl_one_off_matrix_inmem_v2)(table_data, tree_data, unifrac_method, variance_adjust, alpha, + bypass_tips, n_substeps, subsample_depth, subsample_with_replacement, mmap_dir, result); +} + +ComputeStatus one_off_inmem(const support_biom_t *table_data, const support_bptree_t *tree_data, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, mat_full_fp64_t** result) { + if (dl_one_off_inmem==NULL) ssu_load("one_off_inmem", (void **) &dl_one_off_inmem); + + return (*dl_one_off_inmem)(table_data, tree_data, unifrac_method, variance_adjust, alpha, bypass_tips, n_substeps, result); +} + +ComputeStatus one_off_matrix_inmem_fp32_v2(const support_biom_t *table_data, const support_bptree_t *tree_data, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, + unsigned int subsample_depth, bool subsample_with_replacement, const char *mmap_dir, + mat_full_fp32_t** result) { + if (dl_one_off_matrix_inmem_fp32_v2==NULL) ssu_load("one_off_matrix_inmem_fp32_v2", (void **) &dl_one_off_matrix_inmem_fp32_v2); + + return (*dl_one_off_matrix_inmem_fp32_v2)(table_data, tree_data, unifrac_method, variance_adjust, alpha, + bypass_tips, n_substeps, subsample_depth, subsample_with_replacement, mmap_dir, result); +} + +ComputeStatus one_off_inmem_fp32(const support_biom_t *table_data, const support_bptree_t *tree_data, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, mat_full_fp32_t** result) { + if (dl_one_off_inmem_fp32==NULL) ssu_load("one_off_inmem_fp32", (void **) &dl_one_off_inmem_fp32); + + return (*dl_one_off_inmem_fp32)(table_data, tree_data, unifrac_method, variance_adjust, alpha, bypass_tips, n_substeps, result); +} + +/*********************************************************************/ + +static ComputeStatus (*dl_one_off_matrix_v2)(const char*, const char*, const char*, bool, double, + bool, unsigned int, unsigned int, bool, const char *, mat_full_fp64_t**) = NULL; +static ComputeStatus (*dl_one_off_matrix)(const char*, const char*, const char*, bool, double, + bool, unsigned int, const char *, mat_full_fp64_t**) = NULL; +static ComputeStatus (*dl_one_off_matrix_fp32_v2)(const char*, const char*, const char*, bool, double, + bool, unsigned int, unsigned int, bool, const char *, mat_full_fp32_t**) = NULL; +static ComputeStatus (*dl_one_off_matrix_fp32)(const char*, const char*, const char*, bool, double, + bool, unsigned int, const char *, mat_full_fp32_t**) = NULL; + +ComputeStatus one_off_matrix_v2(const char* biom_filename, const char* tree_filename, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, + unsigned int subsample_depth, bool subsample_with_replacement, const char *mmap_dir, + mat_full_fp64_t** result) { + if (dl_one_off_matrix_v2==NULL) ssu_load("one_off_matrix_v2", (void **) &dl_one_off_matrix_v2); + + return (*dl_one_off_matrix_v2)(biom_filename, tree_filename, unifrac_method, variance_adjust, alpha, + bypass_tips, n_substeps, subsample_depth, subsample_with_replacement, mmap_dir, result); +} + +ComputeStatus one_off_matrix(const char* biom_filename, const char* tree_filename, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, + const char *mmap_dir, + mat_full_fp64_t** result) { + if (dl_one_off_matrix==NULL) ssu_load("one_off_matrix", (void **) &dl_one_off_matrix); + + return (*dl_one_off_matrix)(biom_filename, tree_filename, unifrac_method, variance_adjust, alpha, + bypass_tips, n_substeps, mmap_dir, result); +} + +ComputeStatus one_off_matrix_fp32_v2(const char* biom_filename, const char* tree_filename, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, + unsigned int subsample_depth, bool subsample_with_replacement, const char *mmap_dir, + mat_full_fp32_t** result) { + if (dl_one_off_matrix_fp32_v2==NULL) ssu_load("one_off_matrix_fp32_v2", (void **) &dl_one_off_matrix_fp32_v2); + + return (*dl_one_off_matrix_fp32_v2)(biom_filename, tree_filename, unifrac_method, variance_adjust, alpha, + bypass_tips, n_substeps, subsample_depth, subsample_with_replacement, mmap_dir, result); +} + +ComputeStatus one_off_matrix_fp32(const char* biom_filename, const char* tree_filename, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, + const char *mmap_dir, + mat_full_fp32_t** result) { + if (dl_one_off_matrix_fp32==NULL) ssu_load("one_off_matrix_fp32", (void **) &dl_one_off_matrix_fp32); + + return (*dl_one_off_matrix_fp32)(biom_filename, tree_filename, unifrac_method, variance_adjust, alpha, + bypass_tips, n_substeps, mmap_dir, result); +} + +/*********************************************************************/ + +static ComputeStatus (*dl_faith_pd_one_off)(const char*, const char*, r_vec**) = NULL; +ComputeStatus faith_pd_one_off(const char* biom_filename, const char* tree_filename, + r_vec** result) { + if (dl_faith_pd_one_off==NULL) ssu_load("faith_pd_one_off", (void **) &dl_faith_pd_one_off); + + return (*dl_faith_pd_one_off)(biom_filename, tree_filename, result); +} + +/*********************************************************************/ + +static ComputeStatus (*dl_unifrac_to_file_v2)(const char*, const char*, const char*, const char*, bool, double, + bool, unsigned int, const char*, unsigned int, bool, + unsigned int, unsigned int, const char *, const char *, const char *) = NULL; +static ComputeStatus (*dl_unifrac_to_file)(const char*, const char*, const char*, const char*, bool, double, + bool, unsigned int, const char*, unsigned int, const char *) = NULL; +static ComputeStatus (*dl_unifrac_multi_to_file_v2)(const char*, const char*, const char*, const char*, bool, double, + bool, unsigned int, const char*, unsigned int, unsigned int, bool, + unsigned int, unsigned int, const char *, const char *, const char *) = NULL; + +ComputeStatus unifrac_to_file_v2(const char* biom_filename, const char* tree_filename, const char* out_filename, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, const char* format, + unsigned int subsample_depth, bool subsample_with_replacement, + unsigned int pcoa_dims, + unsigned int permanova_perms, const char *grouping_filename, const char *grouping_columns, + const char *mmap_dir){ + if (dl_unifrac_to_file_v2==NULL) ssu_load("unifrac_to_file_v2", (void **) &dl_unifrac_to_file_v2); + + return (*dl_unifrac_to_file_v2)(biom_filename, tree_filename, out_filename, unifrac_method, variance_adjust, alpha, + bypass_tips, n_substeps, format, subsample_depth, subsample_with_replacement, + pcoa_dims, permanova_perms, grouping_filename, grouping_columns, mmap_dir); +} + +ComputeStatus unifrac_to_file(const char* biom_filename, const char* tree_filename, const char* out_filename, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, const char* format, + unsigned int pcoa_dims, const char *mmap_dir) { + if (dl_unifrac_to_file==NULL) ssu_load("unifrac_to_file", (void **) &dl_unifrac_to_file); + + return (*dl_unifrac_to_file)(biom_filename, tree_filename, out_filename, unifrac_method, variance_adjust, alpha, + bypass_tips, n_substeps, format, pcoa_dims, mmap_dir); +} + +ComputeStatus unifrac_multi_to_file_v2(const char* biom_filename, const char* tree_filename, const char* out_filename, + const char* unifrac_method, bool variance_adjust, double alpha, + bool bypass_tips, unsigned int n_substeps, const char* format, + unsigned int n_subsamples, unsigned int subsample_depth, bool subsample_with_replacement, + unsigned int pcoa_dims, + unsigned int permanova_perms, const char *grouping_filename, const char *grouping_columns, + const char *mmap_dir) { + if (dl_unifrac_multi_to_file_v2==NULL) ssu_load("unifrac_multi_to_file_v2", (void **) &dl_unifrac_multi_to_file_v2); + + return (*dl_unifrac_multi_to_file_v2)(biom_filename, tree_filename, out_filename, unifrac_method, variance_adjust, alpha, + bypass_tips, n_substeps, format, n_subsamples, subsample_depth, subsample_with_replacement, + pcoa_dims, permanova_perms, grouping_filename, grouping_columns, mmap_dir); +} + + +/*********************************************************************/ + +static ComputeStatus (*dl_compute_permanova_fp64)(const char *, unsigned int, const char**, mat_full_fp64_t *, unsigned int, double *, double *) = NULL; +static ComputeStatus (*dl_compute_permanova_fp32)(const char *, unsigned int, const char**, mat_full_fp32_t *, unsigned int, float *, float *) = NULL; + +ComputeStatus compute_permanova_fp64(const char *grouping_filename, unsigned int n_columns, const char* *columns, + mat_full_fp64_t * result, unsigned int permanova_perms, + double *fstats, double *pvalues) { + if (dl_compute_permanova_fp64==NULL) ssu_load("compute_permanova_fp64", (void **) &dl_compute_permanova_fp64); + + return (*dl_compute_permanova_fp64)(grouping_filename, n_columns, columns, result, permanova_perms, fstats, pvalues); +} + +ComputeStatus compute_permanova_fp32(const char *grouping_filename, unsigned int n_columns, const char* * columns, + mat_full_fp32_t * result, unsigned int permanova_perms, + float *fstats, float *pvalues) { + if (dl_compute_permanova_fp32==NULL) ssu_load("compute_permanova_fp32", (void **) &dl_compute_permanova_fp32); + + return (*dl_compute_permanova_fp32)(grouping_filename, n_columns, columns, result, permanova_perms, fstats, pvalues); +} + +/*********************************************************************/ + +static IOStatus (*dl_write_mat)(const char*, mat_t*) = NULL; +static IOStatus (*dl_write_mat_from_matrix)(const char*, mat_full_fp64_t*) = NULL; +static IOStatus (*dl_write_vec)(const char*, r_vec*) = NULL; + +IOStatus write_mat(const char* filename, mat_t* result) { + if (dl_write_mat==NULL) ssu_load("write_mat", (void **) &dl_write_mat); + + return (*dl_write_mat)(filename, result); +} + +IOStatus write_mat_from_matrix(const char* filename, mat_full_fp64_t* result) { + if (dl_write_mat_from_matrix==NULL) ssu_load("write_mat_from_matrix", (void **) &dl_write_mat_from_matrix); + + return (*dl_write_mat_from_matrix)(filename, result); +} + +IOStatus write_vec(const char* filename, r_vec* result) { + if (dl_write_vec==NULL) ssu_load("write_vec", (void **) &dl_write_vec); + + return (*dl_write_vec)(filename, result); +} + +/*********************************************************************/ + +static IOStatus (*dl_write_mat_from_matrix_hdf5_fp64_v2)(const char*, mat_full_fp64_t*, unsigned int, int, unsigned int, + const char* *, const char**, const double *, const double *, const unsigned int *, + const char**, const unsigned int *) = NULL; +static IOStatus (*dl_write_mat_from_matrix_hdf5_fp64)(const char*, mat_full_fp64_t*, unsigned int, int) = NULL; +static IOStatus (*dl_write_mat_from_matrix_hdf5_fp32_v2)(const char*, mat_full_fp32_t*, unsigned int, int, unsigned int, + const char**, const char**, const float *, const float *, const unsigned int *, + const char**, const unsigned int *) = NULL; +static IOStatus (*dl_write_mat_from_matrix_hdf5_fp32)(const char*, mat_full_fp32_t*, unsigned int, int) = NULL; + +IOStatus write_mat_from_matrix_hdf5_fp64_v2(const char* output_filename, mat_full_fp64_t* result, + unsigned int pcoa_dims, int save_dist, + unsigned int stat_n_vals, + const char* *stat_method_arr, const char* *stat_name_arr, + const double *stat_val_arr, const double *stat_pval_arr, const unsigned int *stat_perm_count_arr, + const char* *stat_group_name_arr, const unsigned int *stat_group_count_arr) { + if (dl_write_mat_from_matrix_hdf5_fp64_v2==NULL) ssu_load("write_mat_from_matrix_hdf5_fp64_v2", (void **) &dl_write_mat_from_matrix_hdf5_fp64_v2); + + return (*dl_write_mat_from_matrix_hdf5_fp64_v2)(output_filename, result, pcoa_dims, save_dist, stat_n_vals, + stat_method_arr, stat_name_arr, stat_val_arr, stat_pval_arr, stat_perm_count_arr, + stat_group_name_arr, stat_group_count_arr); +} + +IOStatus write_mat_from_matrix_hdf5_fp64(const char* filename, mat_full_fp64_t* result, unsigned int pcoa_dims, int save_dist) { + if (dl_write_mat_from_matrix_hdf5_fp64==NULL) ssu_load("write_mat_from_matrix_hdf5_fp64", (void **) &dl_write_mat_from_matrix_hdf5_fp64); + + return (*dl_write_mat_from_matrix_hdf5_fp64)(filename, result, pcoa_dims, save_dist); +} + +IOStatus write_mat_from_matrix_hdf5_fp32_v2(const char* output_filename, mat_full_fp32_t* result, + unsigned int pcoa_dims, int save_dist, + unsigned int stat_n_vals, + const char* *stat_method_arr, const char* *stat_name_arr, + const float *stat_val_arr, const float *stat_pval_arr, const unsigned int *stat_perm_count_arr, + const char* *stat_group_name_arr, const unsigned int *stat_group_count_arr) { + if (dl_write_mat_from_matrix_hdf5_fp32_v2==NULL) ssu_load("write_mat_from_matrix_hdf5_fp32_v2", (void **) &dl_write_mat_from_matrix_hdf5_fp32_v2); + + return (*dl_write_mat_from_matrix_hdf5_fp32_v2)(output_filename, result, pcoa_dims, save_dist, stat_n_vals, + stat_method_arr, stat_name_arr, stat_val_arr, stat_pval_arr, stat_perm_count_arr, + stat_group_name_arr, stat_group_count_arr); +} + +IOStatus write_mat_from_matrix_hdf5_fp32(const char* filename, mat_full_fp32_t* result, unsigned int pcoa_dims, int save_dist) { + if (dl_write_mat_from_matrix_hdf5_fp32==NULL) ssu_load("write_mat_from_matrix_hdf5_fp32", (void **) &dl_write_mat_from_matrix_hdf5_fp32); + + return (*dl_write_mat_from_matrix_hdf5_fp32)(filename, result, pcoa_dims, save_dist); +} + + diff --git a/combined/ssu b/combined/ssu new file mode 100755 index 0000000..1e8f8b1 --- /dev/null +++ b/combined/ssu @@ -0,0 +1,47 @@ +#!/bin/bash + +#default +SSU=ssu_nv + +# Need at least AVX to support GPUs +if [ "${UNIFRAC_MAX_CPU}" == "basic" ]; then + has_no_avx=1 +else + cat /proc/cpuinfo |grep flags |head -1 | grep -q avx + has_no_avx=$? +fi + +if [ "${has_no_avx}" -eq 1 ]; then + if [ "${UNIFRAC_GPU_INFO}" == "Y" ]; then + echo "INFO (unifrac): CPU too old, disabling GPU" + fi + SSU=ssu_cpu_basic +else + +if [ "${UNIFRAC_MAX_CPU}" == "avx" ]; then + has_no_avx2=1 +else + cat /proc/cpuinfo |grep flags |head -1 | grep -q avx2 + has_no_avx2=$? +fi + +if [ "${has_no_avx2}" -eq 1 ]; then + SSU=ssu_nv +else + SSU=ssu_nv_avx2 +fi # if "${has_no_avx2}" -eq 1 + + +fi # if "${has_no_avx}" -eq 1 + +if [ "${UNIFRAC_CPU_INFO}" == "Y" ]; then + echo "INFO (unifrac): Using executable" ${SSU} +fi + +# +# +# +BASEDIR=$(dirname "$0") + +exec ${BASEDIR}/${SSU} "$@" + diff --git a/src/Makefile b/src/Makefile index 1d5672b..f202e20 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,10 +1,18 @@ -.PHONY: all make api test test_binaries install clean rapi_test +.PHONY: all main api test test_binaries install clean rapi_test CXX := h5c++ PLATFORM := $(shell uname -s) COMPILER := $(shell ($(CXX) -v 2>&1) | tr A-Z a-z ) +ifeq ($(BUILD_VARIANT),) + SSU = ssu + FPD = faithpd +else + SSU = ssu_$(BUILD_VARIANT) + FPD = faithpd_$(BUILD_VARIANT) +endif + ifdef DEBUG ifneq (,$(findstring pgi,$(COMPILER))) OPT = -g @@ -32,7 +40,7 @@ BLASLIB=-llapacke -lcblas ifeq ($(PLATFORM),Darwin) AVX2 := $(shell sysctl -a | grep -c AVX2) - LDDFLAGS = -dynamiclib -install_name @rpath/libssu.so + LDDFLAGS = -dynamiclib -install_name @rpath/lib$(SSU).so else AVX2 := $(shell grep "^flags" /proc/cpuinfo | head -n 1 | grep -c avx2) LDDFLAGS = -shared @@ -114,9 +122,9 @@ CPPFLAGS += -Wall -std=c++14 -pedantic -I. $(OPT) -fPIC -L$(PREFIX)/lib all: api main install -main: ssu faithpd +main: $(SSU) $(FPD) -api: libssu.so +api: lib$(SSU).so test_binaries: test_su test_ska test_api @@ -127,14 +135,14 @@ test_ska: test_ska.cpp tree.o tsv.o test_su.cpp biom.o biom_inmem.o biom_subsamp test_api: test_api.cpp tree.o tsv.o test_su.cpp biom.o biom_inmem.o biom_subsampled.o unifrac.o skbio_alt.o api.o $(UNIFRAC_FILES) $(CXX) $(CPPFLAGS) $(EXEFLAGS) test_api.cpp -o test_api tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o $(UNIFRAC_FILES) unifrac.o skbio_alt.o api.o -llz4 $(BLASLIB) -lpthread -ssu: su.cpp tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o unifrac.o cmd.o skbio_alt.o api.o $(UNIFRAC_FILES) - $(CXX) $(CPPFLAGS) $(EXEFLAGS) su.cpp -o ssu tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o $(UNIFRAC_FILES) unifrac.o cmd.o skbio_alt.o api.o -lhdf5_cpp -llz4 $(BLASLIB) -lpthread +$(SSU): su.cpp tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o unifrac.o cmd.o skbio_alt.o api.o $(UNIFRAC_FILES) + $(CXX) $(CPPFLAGS) $(EXEFLAGS) su.cpp -o $(SSU) tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o $(UNIFRAC_FILES) unifrac.o cmd.o skbio_alt.o api.o -lhdf5_cpp -llz4 $(BLASLIB) -lpthread -faithpd: faithpd.cpp tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o unifrac.o cmd.o skbio_alt.o api.o $(UNIFRAC_FILES) - $(CXX) $(CPPFLAGS) $(EXEFLAGS) faithpd.cpp -o faithpd tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o $(UNIFRAC_FILES) unifrac.o cmd.o skbio_alt.o api.o -lhdf5_cpp -llz4 $(BLASLIB) -lpthread +$(FPD): faithpd.cpp tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o unifrac.o cmd.o skbio_alt.o api.o $(UNIFRAC_FILES) + $(CXX) $(CPPFLAGS) $(EXEFLAGS) faithpd.cpp -o $(FPD) tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o $(UNIFRAC_FILES) unifrac.o cmd.o skbio_alt.o api.o -lhdf5_cpp -llz4 $(BLASLIB) -lpthread -libssu.so: tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o unifrac.o cmd.o skbio_alt.o api.o $(UNIFRAC_FILES) - $(CXX) $(LDDFLAGS) -o libssu.so tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o $(UNIFRAC_FILES) unifrac.o cmd.o skbio_alt.o api.o -lc -llz4 $(BLASLIB) -L$(PREFIX)/lib -noshlib -lhdf5_cpp -lhdf5_hl_cpp -lhdf5_hl -lhdf5 +lib$(SSU).so: tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o unifrac.o cmd.o skbio_alt.o api.o $(UNIFRAC_FILES) + $(CXX) $(LDDFLAGS) -o lib$(SSU).so tree.o biom.o biom_inmem.o biom_subsampled.o tsv.o $(UNIFRAC_FILES) unifrac.o cmd.o skbio_alt.o api.o -lc -llz4 $(BLASLIB) -L$(PREFIX)/lib -noshlib -lhdf5_cpp -lhdf5_hl_cpp -lhdf5_hl -lhdf5 api.o: api.cpp api.hpp unifrac.hpp skbio_alt.hpp biom.hpp biom_inmem.hpp biom_subsampled.hpp tree.hpp tsv.hpp $(CXX) $(CPPFLAGS) api.cpp -c -o api.o -fPIC @@ -153,13 +161,14 @@ test: test_binaries ./test_ska ./test_api -install: libssu.so ssu faithpd - cp libssu.so ${PREFIX}/lib/ - cp ssu faithpd ${PREFIX}/bin/ +install: lib$(SSU).so $(SSU) $(FPD) + rm -f ${PREFIX}/lib//lib$(SSU).so; cp lib$(SSU).so ${PREFIX}/lib/ + rm -f ${PREFIX}/bin/$(SSU); cp $(SSU) ${PREFIX}/bin/ + rm -f ${PREFIX}/bin/$(FPD); cp $(FPD) ${PREFIX}/bin/ mkdir -p ${PREFIX}/include/unifrac - cp task_parameters.hpp ${PREFIX}/include/unifrac/ - cp api.hpp ${PREFIX}/include/unifrac/ - cp status_enum.hpp ${PREFIX}/include/unifrac/ + rm -f ${PREFIX}/include/unifrac/task_parameters.hpp; cp task_parameters.hpp ${PREFIX}/include/unifrac/ + rm -f ${PREFIX}/include/unifrac/api.hpp; cp api.hpp ${PREFIX}/include/unifrac/ + rm -f ${PREFIX}/include/unifrac/status_enum.hpp; cp status_enum.hpp ${PREFIX}/include/unifrac/ rapi_test: main mkdir -p ~/.R @@ -180,5 +189,5 @@ rapi_test: main rm -f *.o clean: - -rm -f *.o ssu faithpd test_su test_ska test_api libssu.so + -rm -f *.o $(SSU) $(FPD) test_su test_ska test_api lib$(SSU).so diff --git a/src/api.hpp b/src/api.hpp index 2f2b899..29f00af 100644 --- a/src/api.hpp +++ b/src/api.hpp @@ -11,6 +11,14 @@ #define EXTERN #endif +/* + * + * Note: Each function declared EXTERN must both have + * an implementation in api.cpp, AND + * a wrapper in ../combined/libssu.c + * + */ + #define PARTIAL_MAGIC "SSU-PARTIAL-01" #define PARTIAL_MAGIC_V2 0x088ABA02 @@ -453,7 +461,7 @@ EXTERN ComputeStatus compute_permanova_fp64(const char *grouping_filename, unsig * okay : no problems encountered * grouping_missing : the filename for the grouping does not exist or is not valid */ -EXTERN ComputeStatus compute_permanova_fp32(const char *grouping_filename, unsigned int n_columns, const char* const* columns, +EXTERN ComputeStatus compute_permanova_fp32(const char *grouping_filename, unsigned int n_columns, const char* * columns, mat_full_fp32_t * result, unsigned int permanova_perms, float *fstats, float *pvalues); @@ -480,7 +488,7 @@ EXTERN IOStatus write_mat(const char* filename, mat_t* result); * write_okay : no problems */ // backwards compatible version, deprecated -EXTERN IOStatus write_mat_hdf5_fp64(const char* filename, mat_t* result, unsigned int pcoa_dims, int save_dist); +IOStatus write_mat_hdf5_fp64(const char* filename, mat_t* result, unsigned int pcoa_dims, int save_dist); /* Write a matrix object using hdf5 format, using fp32 precision * @@ -494,7 +502,7 @@ EXTERN IOStatus write_mat_hdf5_fp64(const char* filename, mat_t* result, unsigne * write_okay : no problems */ // backwards compatible version, deprecated -EXTERN IOStatus write_mat_hdf5_fp32(const char* filename, mat_t* result, unsigned int pcoa_dims, int save_dist); +IOStatus write_mat_hdf5_fp32(const char* filename, mat_t* result, unsigned int pcoa_dims, int save_dist); /* Write a matrix object * @@ -618,7 +626,7 @@ EXTERN IOStatus write_vec(const char* filename, r_vec* result); * unknown_method : the requested method is unknown. */ -EXTERN ComputeStatus partial(const char* biom_filename, const char* tree_filename, +ComputeStatus partial(const char* biom_filename, const char* tree_filename, const char* unifrac_method, bool variance_adjust, double alpha, bool bypass_tips, unsigned int n_substeps, unsigned int stripe_start, unsigned int stripe_stop, partial_mat_t** result); @@ -664,7 +672,7 @@ EXTERN ComputeStatus partial(const char* biom_filename, const char* tree_filenam * ### FOOTER ### * : char, e.g., SSU-PARTIAL-01, same as starting magic */ -EXTERN IOStatus write_partial(const char* filename, const partial_mat_t* result); +IOStatus write_partial(const char* filename, const partial_mat_t* result); /* Read a partial matrix object * @@ -679,7 +687,7 @@ EXTERN IOStatus write_partial(const char* filename, const partial_mat_t* result) * bad_header : header seems malformed * unexpected_end : format end not found in expected location */ -EXTERN IOStatus read_partial(const char* filename, partial_mat_t** result); +IOStatus read_partial(const char* filename, partial_mat_t** result); /* Read a partial matrix object header * @@ -694,7 +702,7 @@ EXTERN IOStatus read_partial(const char* filename, partial_mat_t** result); * bad_header : header seems malformed * unexpected_end : format end not found in expected location */ -EXTERN IOStatus read_partial_header(const char* input_filename, partial_dyn_mat_t** result_out); +IOStatus read_partial_header(const char* input_filename, partial_dyn_mat_t** result_out); /* Read a stripe of a partial matrix * @@ -710,12 +718,12 @@ EXTERN IOStatus read_partial_header(const char* input_filename, partial_dyn_mat_ * bad_header : header seems malformed * unexpected_end : format end not found in expected location */ -EXTERN IOStatus read_partial_one_stripe(partial_dyn_mat_t* result, uint32_t stripe_idx); +IOStatus read_partial_one_stripe(partial_dyn_mat_t* result, uint32_t stripe_idx); /* * Description TBD */ -EXTERN MergeStatus validate_partial(const partial_dyn_mat_t* const * partial_mats, int n_partials); +MergeStatus validate_partial(const partial_dyn_mat_t* const * partial_mats, int n_partials); /* Merge partial results * @@ -730,7 +738,7 @@ EXTERN MergeStatus validate_partial(const partial_dyn_mat_t* const * partial_mat * sample_id_consistency : samples described by stripes are inconsistent * square_mismatch : inconsistency on denotation of square matrix */ -EXTERN MergeStatus merge_partial(partial_mat_t** partial_mats, int n_partials, unsigned int dummy, mat_t** result); +MergeStatus merge_partial(partial_mat_t** partial_mats, int n_partials, unsigned int dummy, mat_t** result); /* Merge partial results * diff --git a/test/Makefile b/test/Makefile index 539161d..14f79a3 100644 --- a/test/Makefile +++ b/test/Makefile @@ -7,11 +7,6 @@ ifeq ($(PREFIX),) PREFIX := $(CONDA_PREFIX) endif -ifneq ($(PLATFORM),Darwin) - CC := gcc - LD := ld -endif - test: capi_test ./capi_test 1