From d0fad6547a17abd54baf450f00ad30a9770f50bb Mon Sep 17 00:00:00 2001 From: Joana Cruz Date: Tue, 15 Oct 2024 10:57:21 +0000 Subject: [PATCH] Delete old benchmark system Deleted old benchmark system and replaced with most up to date one which uses googlebench. Changes to CMakeLists were carried accordingly. Deleted section in documentation on legacy benchmark system. --- docs/4-tools/README.md | 79 ------- src/CMakeLists.txt | 2 +- .../CMakeLists.txt | 0 src/libm-benchmarks/Makefile | 153 ------------- src/libm-benchmarks/ProcessData.java | 193 ----------------- src/{benchmarks => libm-benchmarks}/README.md | 2 +- src/libm-benchmarks/bench.h | 69 ------ .../benchmark_callers.hpp | 0 .../benchmark_templates.hpp | 0 src/libm-benchmarks/benchsleef.c | 144 ------------ .../benchsleef.cpp | 0 src/libm-benchmarks/benchsleef128.c | 205 ------------------ src/libm-benchmarks/benchsleef256.c | 183 ---------------- src/libm-benchmarks/benchsleef512.c | 182 ---------------- src/libm-benchmarks/benchsvml.c | 153 ------------- src/libm-benchmarks/benchsvml128.c | 144 ------------ src/libm-benchmarks/benchsvml256.c | 147 ------------- src/libm-benchmarks/benchsvml512.c | 144 ------------ .../gen_input.hpp | 0 src/libm-benchmarks/measure.sh | 17 -- .../type_defs.hpp | 0 21 files changed, 2 insertions(+), 1815 deletions(-) rename src/{benchmarks => libm-benchmarks}/CMakeLists.txt (100%) delete mode 100644 src/libm-benchmarks/Makefile delete mode 100644 src/libm-benchmarks/ProcessData.java rename src/{benchmarks => libm-benchmarks}/README.md (98%) delete mode 100644 src/libm-benchmarks/bench.h rename src/{benchmarks => libm-benchmarks}/benchmark_callers.hpp (100%) rename src/{benchmarks => libm-benchmarks}/benchmark_templates.hpp (100%) delete mode 100644 src/libm-benchmarks/benchsleef.c rename src/{benchmarks => libm-benchmarks}/benchsleef.cpp (100%) delete mode 100644 src/libm-benchmarks/benchsleef128.c delete mode 100644 src/libm-benchmarks/benchsleef256.c delete mode 100644 src/libm-benchmarks/benchsleef512.c delete mode 100644 src/libm-benchmarks/benchsvml.c delete mode 100644 src/libm-benchmarks/benchsvml128.c delete mode 100644 src/libm-benchmarks/benchsvml256.c delete mode 100644 src/libm-benchmarks/benchsvml512.c rename src/{benchmarks => libm-benchmarks}/gen_input.hpp (100%) delete mode 100755 src/libm-benchmarks/measure.sh rename src/{benchmarks => libm-benchmarks}/type_defs.hpp (100%) diff --git a/docs/4-tools/README.md b/docs/4-tools/README.md index 75860f49..517ea0b4 100644 --- a/docs/4-tools/README.md +++ b/docs/4-tools/README.md @@ -92,85 +92,6 @@ In some cases, it is desirable to fix the last few coefficients to values like Finding a set of good parameters is not a straightforward process. -

Legacy Benchmarking tool

- -SLEEF has a tool for measuring and plotting execution time of each function in -the library. It consists of an executable for measurements, a makefile for -driving measurement and plotting, and a couple of scripts. - -In order to start a measurement, you need to first build the executable for -measurement. CMake builds the executable along with the library. Please refer -to [compiling and installing the library](../1-user-guide) for this. - -Then, change directory to `sleef/src/libm-benchmarks/`. You also need to set -the build directory to `BUILDDIR` environment variable. You also need Java -runtime environment. - -```sh -export BUILDDIR=$PATH:`pwd`/../../build -``` - -Type "make measure". After compiling the tools, it will prompt a label for -measurement. After you input a label, measurement begins. After a measurement -finishes, you can repeat measurements under different configurations. If you -want to measure on a different computer, please copy the entire directory on to -that computer and continue measurements. If you have Intel Compiler installed -on your computer, you can type "make measureSVML" to measure the computation -time of SVML functions. - -```sh -make measure -./measure.sh benchsleef - ... - Enter label of measurement(e.g. My desktop PC) : Skylake - Measurement in progress. This may take several minutes. - Sleef_sind2_u10 - Sleef_cosd2_u10 - Sleef_tand2_u10 - Sleef_sincosd2_u10 - ... - Sleef_atanf8_u10 - Sleef_atan2f8_u10 - Sleef_atanf8_u35 - Sleef_atan2f8_u35 - - Now, you can plot the results of measurement by 'make plot'. - You can do another measurement by 'make measure'. - You can start over by 'make restart'. -``` - -Then type `make plot` to generate graphs. -```sh -make plot - javac ProcessData.java - java ProcessData *dptrig*.out - gnuplot script.out - mv output.png trigdp.png - java ProcessData *dpnontrig*.out - gnuplot script.out - mv output.png nontrigdp.png - java ProcessData *sptrig*.out - gnuplot script.out - mv output.png trigsp.png - java ProcessData *spnontrig*.out - gnuplot script.out - mv output.png nontrigsp.png -``` - -You need to have JDK and gnuplot installed on your computer. -Install these with: -```sh -sudo apt install openjdk-19-jdk-headless -``` -and -```sh -sudo apt install gnuplot -``` - -Four graphs are generated : trigdp.png, nontrigdp.png, trigsp.png -and nontrigsp.png. Please see our [benchmark results](../5-performance/) for -an example of generated graphs by this tool. -

Benchmarking tool

This tool uses the [googlebench](https://github.com/google/benchmark) framework to benchmark SLEEF diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b9fe6f96..cbaaf9fc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,7 +8,7 @@ endif() add_subdirectory("common") if (SLEEF_BUILD_BENCH) - add_subdirectory("benchmarks") + add_subdirectory("libm-benchmarks") endif() if (SLEEF_BUILD_DFT) diff --git a/src/benchmarks/CMakeLists.txt b/src/libm-benchmarks/CMakeLists.txt similarity index 100% rename from src/benchmarks/CMakeLists.txt rename to src/libm-benchmarks/CMakeLists.txt diff --git a/src/libm-benchmarks/Makefile b/src/libm-benchmarks/Makefile deleted file mode 100644 index e05046e4..00000000 --- a/src/libm-benchmarks/Makefile +++ /dev/null @@ -1,153 +0,0 @@ -ICCAVAILABLE := $(shell command -v icc 2> /dev/null) -ARCH := $(shell uname -p) - -all : -ifndef BUILDDIR - @echo - @echo Please set the build directory to BUILDDIR environment variable and run make once again. - @echo e.g. export BUILDDIR='`pwd`'/../../build - @echo -else - @echo - @echo You can start measurement by "'"make measure"'". -ifdef ICCAVAILABLE - @echo You can start measurement with SVML by "'"make measureSVML"'". -endif - @echo Then, you can plot the results of measurement by "'"make plot"'". - @echo - @echo You have to install java and gnuplot to do plotting. - @echo Stop all tasks on the computer before starting measurement. - @echo -endif - -benchsvml128_10.o : benchsvml128.c bench.h - -command -v icc >/dev/null 2>&1 && icc benchsvml128.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml128_10.o - -benchsvml128_40.o : benchsvml128.c bench.h - -command -v icc >/dev/null 2>&1 && icc benchsvml128.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml128_40.o - -benchsvml256_10.o : benchsvml256.c bench.h - -command -v icc >/dev/null 2>&1 && icc benchsvml256.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml256_10.o - -benchsvml256_40.o : benchsvml256.c bench.h - -command -v icc >/dev/null 2>&1 && icc benchsvml256.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml256_40.o - -benchsvml512_10.o : benchsvml512.c bench.h - -command -v icc >/dev/null 2>&1 && icc benchsvml512.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -xCOMMON-AVX512 -O0 -lm -c -o benchsvml512_10.o - -benchsvml512_40.o : benchsvml512.c bench.h - -command -v icc >/dev/null 2>&1 && icc benchsvml512.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -xCOMMON-AVX512 -O0 -lm -c -o benchsvml512_40.o - - -benchsvml_10 : benchsvml.c benchsvml128_10.o benchsvml256_10.o benchsvml512_10.o bench.h - -command -v icc >/dev/null 2>&1 && icc benchsvml.c benchsvml128_10.o benchsvml256_10.o benchsvml512_10.o -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -O0 -march=native -lm -o benchsvml_10 - -benchsvml_40 : benchsvml.c benchsvml128_40.o benchsvml256_40.o benchsvml512_40.o bench.h - -command -v icc >/dev/null 2>&1 && icc benchsvml.c benchsvml128_40.o benchsvml256_40.o benchsvml512_40.o -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -O0 -march=native -lm -o benchsvml_40 - -# - -ifeq ($(ARCH),aarch64) - -benchsleef : benchsleef.c benchsleef128.o bench.h - $(CC) benchsleef.c benchsleef128.o -Wall -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef - -benchsleef128.o : benchsleef128.c bench.h - $(CC) benchsleef128.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c - -else ifeq ($(ARCH),s390x) - -benchsleef : benchsleef.c benchsleef128.o bench.h - $(CC) benchsleef.c benchsleef128.o -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef - -benchsleef128.o : benchsleef128.c bench.h - $(CC) benchsleef128.c -Wall -mzvector -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c - -else ifeq ($(ARCH),ppc64le) - -benchsleef : benchsleef.c benchsleef128.o bench.h - $(CC) benchsleef.c benchsleef128.o -Wall -mcpu=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef - -benchsleef128.o : benchsleef128.c bench.h - $(CC) benchsleef128.c -Wall -mcpu=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c - -else - -benchsleef : benchsleef.c benchsleef128.o benchsleef256.o benchsleef512.o bench.h - $(CC) benchsleef.c benchsleef128.o benchsleef256.o benchsleef512.o -Wall -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef - -benchsleef128.o : benchsleef128.c bench.h - $(CC) benchsleef128.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c - -benchsleef256.o : benchsleef256.c bench.h - $(CC) benchsleef256.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c - -benchsleef512.o : benchsleef512.c bench.h - $(CC) benchsleef512.c -Wall -mavx512f -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c - -endif - -# - -ProcessData.class : ProcessData.java - javac ProcessData.java - -# - -ifndef BUILDDIR -measure : - @echo - @echo Please set the build directory to BUILDDIR environment variable and run make once again. - @echo e.g. export BUILDDIR='`pwd`'/../../build - @echo -else -measure : benchsleef - chmod +x ./measure.sh - LD_LIBRARY_PATH=$(BUILDDIR)/lib ./measure.sh ./benchsleef - @echo - @echo Now, you can plot the results of measurement by "'"make plot"'". - @echo You can do another measurement by "'"make measure"'". -ifdef ICCAVAILABLE - @echo You can start another measurement with SVML by "'"make measureSVML"'". -endif - @echo You can start over by "'"make restart"'". - @echo -endif - -measureSVML : all benchsvml_10 benchsvml_40 - chmod +x ./measure.sh - ./measure.sh ./benchsvml_10 ./benchsvml_40 - @echo - @echo Now, you can plot the results of measurement by "'"make plot"'". - @echo You can do another measurement by "'"make measure"'". -ifdef ICCAVAILABLE - @echo You can start another measurement with SVML by "'"make measureSVML"'". -endif - @echo You can start over by "'"make restart"'". - @echo - -plot : ProcessData.class counter.txt - java ProcessData *dptrig*.out - gnuplot script.out - mv output.png trigdp.png - java ProcessData *dpnontrig*.out - gnuplot script.out - mv output.png nontrigdp.png - java ProcessData *sptrig*.out - gnuplot script.out - mv output.png trigsp.png - java ProcessData *spnontrig*.out - gnuplot script.out - mv output.png nontrigsp.png - @echo - @echo Plotted results are in trigdp.png, nontrigdp.png, trigsp.png and nontrigsp.png. - @echo - -clean : - rm -f *~ a.out *.so *.so.* *.a *.s *.o - rm -rf *.dSYM *.dylib - rm -f *.obj *.lib *.dll *.exp *.exe *.stackdump - rm -f *.class *.png benchsleef benchsvml_10 benchsvml_40 *.out counter.txt - -restart : - rm -f *.out counter.txt diff --git a/src/libm-benchmarks/ProcessData.java b/src/libm-benchmarks/ProcessData.java deleted file mode 100644 index 99b67c3f..00000000 --- a/src/libm-benchmarks/ProcessData.java +++ /dev/null @@ -1,193 +0,0 @@ -import java.util.*; -import java.io.*; - -public class ProcessData { - static final int DP = 64, SP = 32; - - static LinkedHashMap funcNameOrder = new LinkedHashMap(); - - static class Key { - final String funcName; - - final int prec, bits; - final ArrayList range = new ArrayList(); - final double ulps; - - Key(String s) { - String[] a = s.split(","); - - funcName = a[0].trim(); - if (funcNameOrder.get(funcName) == null) { - funcNameOrder.put(funcName, funcNameOrder.size()); - } - - prec = - a[1].trim().equals("DP") ? DP : - a[1].trim().equals("SP") ? SP : - 0; - - bits = Integer.parseInt(a[2].trim()); - - int c; - - for(c = 3;;c++) { - if (a[c].trim().endsWith("ulps")) break; - range.add(Double.parseDouble(a[c])); - } - - ulps = Double.parseDouble(a[c].trim().replace("ulps", "")); - } - - public int hashCode() { - int h = funcName.hashCode(); - h ^= prec ^ bits; - return h; - } - - public boolean equals(Object o) { - if (this == o) return true; - Key k = (Key) o; - if (funcName.compareTo(k.funcName) != 0) return false; - if (prec != k.prec) return false; - if (bits != k.bits) return false; - if (range.size() != k.range.size()) return false; - for(int i=0;i { - public int compare(Key d0, Key d1) { - if (d0 == d1) return 0; - if (d0.prec < d1.prec) return 1; - if (d0.prec > d1.prec) return -1; - if (d0.ulps > d1.ulps) return 1; - if (d0.ulps < d1.ulps) return -1; - - int fc = (int)funcNameOrder.get(d0.funcName) - (int)funcNameOrder.get(d1.funcName); - if (fc != 0) return fc; - - if (d0.bits > d1.bits) return 1; - if (d0.bits < d1.bits) return -1; - - if (d0.range.size() > d1.range.size()) return 1; - if (d0.range.size() < d1.range.size()) return -1; - - for(int i=0;i d1.range.get(i)) return 1; - if (d0.range.get(i) < d1.range.get(i)) return -1; - } - - return 0; - } - } - - public static void main(String[] args) throws Exception { - LinkedHashMap> allData = new LinkedHashMap>(); - TreeSet allKeys = new TreeSet(new KeyComparator()); - LinkedHashSet allColumnTitles = new LinkedHashSet(); - double maximum = 0; - - for(int i=0;i v = allData.get(key); - if (v == null) { - v = new LinkedHashMap(); - allData.put(key, v); - } - String[] a = s.split(","); - - double time = Double.parseDouble(a[a.length-1]); - v.put(columnTitle, time); - maximum = Math.max(maximum, time); - } - - lnr.close(); - } - - PrintStream ps = new PrintStream("data.out"); - - for(Key k : allKeys) { - ps.print("\"" + k + "\" "); - - LinkedHashMap v = allData.get(k); - - for(String s : allColumnTitles) { - Double d = v.get(s); - if (d != null) ps.print(d); - if (d == null) ps.print("0"); - ps.print("\t"); - } - ps.println(); - } - - ps.close(); - - ps = new PrintStream("script.out"); - - ps.println("set terminal pngcairo size 1280, 800 font \",10\""); - ps.println("set output \"output.png\""); - - ps.println("color00 = \"#FF5050\";"); // red - ps.println("color01 = \"#0066FF\";"); // blue - ps.println("color02 = \"#00FF00\";"); // green - ps.println("color03 = \"#FF9900\";"); // orange - ps.println("color04 = \"#CC00CC\";"); // purple - ps.println("color05 = \"#880000\";"); // brown - ps.println("color06 = \"#003300\";"); // dark green - ps.println("color07 = \"#000066\";"); // dark blue - - ps.println("set style data histogram"); - ps.println("set style histogram cluster gap 1"); - ps.println("set style fill solid 1.00"); - ps.println("set boxwidth 0.9"); - ps.println("set xtics format \"\""); - ps.println("set xtics rotate by -90"); - ps.println("set grid ytics"); - - ps.println("set ylabel \"Execution time in micro sec.\""); - ps.println("set yrange [0:*]"); - ps.println("set bmargin 24"); - - ps.println("set title \"Single execution time in micro sec.\""); - ps.print("plot"); - - int i = 0; - for(String s : allColumnTitles) { - ps.print("\"data.out\" using " + (i+2) + ":xtic(1) title \"" + s + - "\" linecolor rgb color" + String.format("%02d", i)); - if (i != allColumnTitles.size()-1) ps.print(", "); - i++; - } - ps.println(); - - ps.close(); - } -} diff --git a/src/benchmarks/README.md b/src/libm-benchmarks/README.md similarity index 98% rename from src/benchmarks/README.md rename to src/libm-benchmarks/README.md index 009eb8e5..c2aa539e 100644 --- a/src/benchmarks/README.md +++ b/src/libm-benchmarks/README.md @@ -109,7 +109,7 @@ template <> newtype gen_input (double lo, double hi) ```

Note

This tool can also be built as a standalone project. -From `sleef/src/benchmarks` directory, run: +From `sleef/src/libm-benchmarks` directory, run: ```sh cmake -S . -B build -Dsleef_BINARY_DIR= cmake --build build -j diff --git a/src/libm-benchmarks/bench.h b/src/libm-benchmarks/bench.h deleted file mode 100644 index 130757cf..00000000 --- a/src/libm-benchmarks/bench.h +++ /dev/null @@ -1,69 +0,0 @@ -// ARRAY SIZE: N * veclen -#define N 10000 -// NUM ITERATIONS -#define ITER 1000 -// TOTAL NUMBER OF ITERATIONS -#define NITER (N * ITER) - -#define callFuncSLEEF1_1(funcName, name, xmin, xmax, ulp, arg, type_in, type_out) ({ \ - printf("%s\n", #funcName); \ - uint64_t t0 = Sleef_currentTimeMicros(); \ - for(int j=0;j -#include -#include -#include -#include -#include -#include - -#include "bench.h" - -int veclen = 16; -double *abufdp, *bbufdp; -float *abufsp, *bbufsp; -FILE *fp; - -#if defined(__i386__) || defined(__x86_64__) -void x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { - uint32_t a, b, c, d; - __asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx)); - out[0] = a; out[1] = b; out[2] = c; out[3] = d; -} - -int cpuSupportsAVX() { - int32_t reg[4]; - x86CpuID(reg, 1, 0); - return (reg[2] & (1 << 28)) != 0; -} - -int cpuSupportsAVX512F() { - int32_t reg[4]; - x86CpuID(reg, 7, 0); - return (reg[1] & (1 << 16)) != 0; -} -#endif - -void fillDP(double *buf, double min, double max) { - for(int i=0;i= 3) fnBase = argv[2]; - - srandom(time(NULL)); - -#if defined(__i386__) || defined(__x86_64__) - int do128bit = 1; - int do256bit = cpuSupportsAVX(); - int do512bit = cpuSupportsAVX512F(); -#elif defined(__ARM_NEON) || defined(__VSX__) || defined(__VX__) - int do128bit = 1; -#else -#error Unsupported architecture -#endif - - posix_memalign((void **)&abufdp, veclen*sizeof(double), N*veclen*sizeof(double)); - posix_memalign((void **)&bbufdp, veclen*sizeof(double), N*veclen*sizeof(double)); - - abufsp = (float *)abufdp; - bbufsp = (float *)bbufdp; - - sprintf(fn, "%sdptrig.out", fnBase); - fp = fopen(fn, "w"); - fprintf(fp, "%s\n", columnTitle); - - if (do128bit) benchSleef128_DPTrig(); -#if defined(__i386__) || defined(__x86_64__) - if (do256bit) benchSleef256_DPTrig(); - if (do512bit) benchSleef512_DPTrig(); -#endif - - fclose(fp); - - sprintf(fn, "%sdpnontrig.out", fnBase); - fp = fopen(fn, "w"); - fprintf(fp, "%s\n", columnTitle); - - if (do128bit) benchSleef128_DPNontrig(); -#if defined(__i386__) || defined(__x86_64__) - if (do256bit) benchSleef256_DPNontrig(); - if (do512bit) benchSleef512_DPNontrig(); -#endif - - fclose(fp); - - sprintf(fn, "%ssptrig.out", fnBase); - fp = fopen(fn, "w"); - fprintf(fp, "%s\n", columnTitle); - - if (do128bit) benchSleef128_SPTrig(); -#if defined(__i386__) || defined(__x86_64__) - if (do256bit) benchSleef256_SPTrig(); - if (do512bit) benchSleef512_SPTrig(); -#endif - - fclose(fp); - - sprintf(fn, "%sspnontrig.out", fnBase); - fp = fopen(fn, "w"); - fprintf(fp, "%s\n", columnTitle); - - if (do128bit) benchSleef128_SPNontrig(); -#if defined(__i386__) || defined(__x86_64__) - if (do256bit) benchSleef256_SPNontrig(); - if (do512bit) benchSleef512_SPNontrig(); -#endif - - fclose(fp); - - exit(0); -} diff --git a/src/benchmarks/benchsleef.cpp b/src/libm-benchmarks/benchsleef.cpp similarity index 100% rename from src/benchmarks/benchsleef.cpp rename to src/libm-benchmarks/benchsleef.cpp diff --git a/src/libm-benchmarks/benchsleef128.c b/src/libm-benchmarks/benchsleef128.c deleted file mode 100644 index 04a19155..00000000 --- a/src/libm-benchmarks/benchsleef128.c +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include -#include -#include - -void fillDP(double *buf, double min, double max); -void fillSP(float *buf, double min, double max); - -extern char x86BrandString[256], versionString[1024]; -extern int veclen; -extern double *abufdp, *bbufdp; -extern float *abufsp, *bbufsp; -extern FILE *fp; - -#include "bench.h" - -#ifdef __SSE2__ -#if defined(_MSC_VER) -#include -#else -#include -#endif -typedef __m128d vdouble; -typedef __m128 vfloat; -typedef Sleef___m128d_2 vdouble2; -typedef Sleef___m128_2 vfloat2; -#define ENABLED -#elif defined(__ARM_NEON) -#include -typedef float64x2_t vdouble; -typedef float32x4_t vfloat; -typedef Sleef_float64x2_t_2 vdouble2; -typedef Sleef_float32x4_t_2 vfloat2; -#define ENABLED -#elif defined(__VSX__) -#include -typedef __vector double vdouble; -typedef __vector float vfloat; -typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; -typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; - -#define ENABLED -#elif defined(__VX__) -#include -typedef __vector double vdouble; -typedef __vector float vfloat; -typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; -typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; -#define ENABLED -#endif - - -#ifdef ENABLED -void benchSleef128_DPTrig() { - fillDP(abufdp, 0, 6.28); - - callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 6.28, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 6.28, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 6.28, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 6.28, 1.0, abufdp, vdouble, vdouble2); - - callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 6.28, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 6.28, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 6.28, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 6.28, 4.0, abufdp, vdouble, vdouble2); - - fillDP(abufdp, 0, 1e+6); - - callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble, vdouble2); - - callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble, vdouble2); - - fillDP(abufdp, 0, 1e+100); - - callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble, vdouble2); - - callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble, vdouble2); -} - -void benchSleef128_DPNontrig() { - fillDP(abufdp, 0, 1e+300); - - callFuncSLEEF1_1(Sleef_logd2_u10 , "log, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_log10d2_u10, "log10, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_log1pd2_u10, "log1p, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_logd2_u35 , "log, DP, 128", 0, 1e+300, 4.0, abufdp, vdouble, vdouble); - - fillDP(abufdp, -700, 700); - - callFuncSLEEF1_1(Sleef_expd2_u10 , "exp, DP, 128", -700, 700, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_exp2d2_u10 , "exp2, DP, 128", -700, 700, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_exp10d2_u10, "exp10, DP, 128", -700, 700, 1.0, abufdp, vdouble, vdouble); - - fillDP(abufdp, -30, 30); - fillDP(bbufdp, -30, 30); - - callFuncSLEEF1_2(Sleef_powd2_u10, "pow, DP, 128", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble, vdouble); - - fillDP(abufdp, -1.0, 1.0); - - callFuncSLEEF1_1(Sleef_asind2_u10, "asin, DP, 128", -1.0, 1.0, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_acosd2_u10, "acos, DP, 128", -1.0, 1.0, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_asind2_u35, "asin, DP, 128", -1.0, 1.0, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_acosd2_u35, "acos, DP, 128", -1.0, 1.0, 4.0, abufdp, vdouble, vdouble); - - fillDP(abufdp, -10, 10); - fillDP(bbufdp, -10, 10); - - callFuncSLEEF1_1(Sleef_atand2_u10, "atan, DP, 128", -10, 10, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_2(Sleef_atan2d2_u10, "atan2, DP, 128", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_atand2_u35, "atan, DP, 128", -10, 10, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_2(Sleef_atan2d2_u35, "atan2, DP, 128", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble, vdouble); -} - -void benchSleef128_SPTrig() { - fillSP(abufsp, 0, 6.28); - - callFuncSLEEF1_1(Sleef_sinf4_u10 , "sin, SP, 128", 0, 6.28, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf4_u10 , "cos, SP, 128", 0, 6.28, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf4_u10 , "tan, SP, 128", 0, 6.28, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf4_u10, "sincos, SP, 128", 0, 6.28, 1.0, abufsp, vfloat, vfloat2); - - callFuncSLEEF1_1(Sleef_sinf4_u35 , "sin, SP, 128", 0, 6.28, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf4_u35 , "cos, SP, 128", 0, 6.28, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf4_u35 , "tan, SP, 128", 0, 6.28, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf4_u35, "sincos, SP, 128", 0, 6.28, 4.0, abufsp, vfloat, vfloat2); - - fillSP(abufsp, 0, 1e+20); - - callFuncSLEEF1_1(Sleef_sinf4_u10 , "sin, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf4_u10 , "cos, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf4_u10 , "tan, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf4_u10, "sincos, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat, vfloat2); - - callFuncSLEEF1_1(Sleef_sinf4_u35 , "sin, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf4_u35 , "cos, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf4_u35 , "tan, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf4_u35, "sincos, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat, vfloat2); -} - -void benchSleef128_SPNontrig() { - fillSP(abufsp, 0, 1e+38); - - callFuncSLEEF1_1(Sleef_logf4_u10 , "log, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_log10f4_u10, "log10, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat, vfloat); - //callFuncSLEEF1_1(Sleef_log1pf4_u10, "log1p, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat, vfloat); - - callFuncSLEEF1_1(Sleef_logf4_u35 , "log, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat, vfloat); - //callFuncSLEEF1_1(Sleef_log10f4_u35, "log10, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat, vfloat); - //callFuncSLEEF1_1(Sleef_log1pf4_u35, "log1p, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat, vfloat); - - fillSP(abufsp, -100, 100); - - callFuncSLEEF1_1(Sleef_expf4_u10 , "exp, SP, 128", -100, 100, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_exp2f4_u10 , "exp2, SP, 128", -100, 100, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_exp10f4_u10, "exp10, SP, 128", -100, 100, 1.0, abufsp, vfloat, vfloat); - - fillSP(abufsp, -30, 30); - fillSP(bbufsp, -30, 30); - - callFuncSLEEF1_2(Sleef_powf4_u10, "pow, SP, 128", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat, vfloat); - - fillSP(abufsp, -1.0, 1.0); - - callFuncSLEEF1_1(Sleef_asinf4_u10, "asin, SP, 128", -1.0, 1, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_acosf4_u10, "acos, SP, 128", -1.0, 1, 1.0, abufsp, vfloat, vfloat); - - callFuncSLEEF1_1(Sleef_asinf4_u35, "asin, SP, 128", -1.0, 1.0, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_acosf4_u35, "acos, SP, 128", -1.0, 1.0, 4.0, abufsp, vfloat, vfloat); - - fillSP(abufsp, -10, 10); - fillSP(bbufsp, -10, 10); - - callFuncSLEEF1_1(Sleef_atanf4_u10, "atan, SP, 128", -10, 10, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_2(Sleef_atan2f4_u10, "atan2, SP, 128", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat, vfloat); - - callFuncSLEEF1_1(Sleef_atanf4_u35, "atan, SP, 128", -10, 10, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_2(Sleef_atan2f4_u35, "atan2, SP, 128", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat, vfloat); -} -#else // #ifdef ENABLED -void benchSleef128_DPTrig() {} -void benchSleef128_DPNontrig() {} -void benchSleef128_SPTrig() {} -void benchSleef128_SPNontrig() {} -#endif // #ifdef ENABLED diff --git a/src/libm-benchmarks/benchsleef256.c b/src/libm-benchmarks/benchsleef256.c deleted file mode 100644 index 4be97901..00000000 --- a/src/libm-benchmarks/benchsleef256.c +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include -#include -#include - -void fillDP(double *buf, double min, double max); -void fillSP(float *buf, double min, double max); - -extern char x86BrandString[256], versionString[1024]; -extern int veclen; -extern double *abufdp, *bbufdp; -extern float *abufsp, *bbufsp; -extern FILE *fp; - -#include "bench.h" - -#ifdef __AVX__ -#if defined(_MSC_VER) -#include -#else -#include -#endif -typedef __m256d vdouble; -typedef __m256 vfloat; -typedef Sleef___m256d_2 vdouble2; -typedef Sleef___m256_2 vfloat2; -#define ENABLED -#endif - -#ifdef ENABLED -void benchSleef256_DPTrig() { - fillDP(abufdp, 0, 6.28); - - callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 6.28, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 6.28, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 6.28, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 6.28, 1.0, abufdp, vdouble, vdouble2); - - callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 6.28, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 6.28, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 6.28, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 6.28, 4.0, abufdp, vdouble, vdouble2); - - fillDP(abufdp, 0, 1e+6); - - callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble, vdouble2); - - callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble, vdouble2); - - fillDP(abufdp, 0, 1e+100); - - callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble, vdouble2); - - callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble, vdouble2); -} - -void benchSleef256_DPNontrig() { - fillDP(abufdp, 0, 1e+300); - - callFuncSLEEF1_1(Sleef_logd4_u10 , "log, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_log10d4_u10, "log10, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_log1pd4_u10, "log1p, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_logd4_u35 , "log, DP, 256", 0, 1e+300, 4.0, abufdp, vdouble, vdouble); - - fillDP(abufdp, -700, 700); - - callFuncSLEEF1_1(Sleef_expd4_u10 , "exp, DP, 256", -700, 700, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_exp2d4_u10 , "exp2, DP, 256", -700, 700, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_exp10d4_u10, "exp10, DP, 256", -700, 700, 1.0, abufdp, vdouble, vdouble); - - fillDP(abufdp, -30, 30); - fillDP(bbufdp, -30, 30); - - callFuncSLEEF1_2(Sleef_powd4_u10, "pow, DP, 256", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble, vdouble); - - fillDP(abufdp, -1.0, 1.0); - - callFuncSLEEF1_1(Sleef_asind4_u10, "asin, DP, 256", -1.0, 1.0, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_acosd4_u10, "acos, DP, 256", -1.0, 1.0, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_asind4_u35, "asin, DP, 256", -1.0, 1.0, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_acosd4_u35, "acos, DP, 256", -1.0, 1.0, 4.0, abufdp, vdouble, vdouble); - - fillDP(abufdp, -10, 10); - fillDP(bbufdp, -10, 10); - - callFuncSLEEF1_1(Sleef_atand4_u10, "atan, DP, 256", -10, 10, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_2(Sleef_atan2d4_u10, "atan2, DP, 256", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_atand4_u35, "atan, DP, 256", -10, 10, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_2(Sleef_atan2d4_u35, "atan2, DP, 256", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble, vdouble); -} - -void benchSleef256_SPTrig() { - fillSP(abufsp, 0, 6.28); - - callFuncSLEEF1_1(Sleef_sinf8_u10 , "sin, SP, 256", 0, 6.28, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf8_u10 , "cos, SP, 256", 0, 6.28, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf8_u10 , "tan, SP, 256", 0, 6.28, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf8_u10, "sincos, SP, 256", 0, 6.28, 1.0, abufsp, vfloat, vfloat2); - - callFuncSLEEF1_1(Sleef_sinf8_u35 , "sin, SP, 256", 0, 6.28, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf8_u35 , "cos, SP, 256", 0, 6.28, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf8_u35 , "tan, SP, 256", 0, 6.28, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf8_u35, "sincos, SP, 256", 0, 6.28, 4.0, abufsp, vfloat, vfloat2); - - fillSP(abufsp, 0, 1e+20); - - callFuncSLEEF1_1(Sleef_sinf8_u10 , "sin, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf8_u10 , "cos, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf8_u10 , "tan, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf8_u10, "sincos, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat, vfloat2); - - callFuncSLEEF1_1(Sleef_sinf8_u35 , "sin, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf8_u35 , "cos, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf8_u35 , "tan, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf8_u35, "sincos, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat, vfloat2); -} - -void benchSleef256_SPNontrig() { - fillSP(abufsp, 0, 1e+38); - - callFuncSLEEF1_1(Sleef_logf8_u10 , "log, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_log10f8_u10, "log10, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat, vfloat); - //callFuncSLEEF1_1(Sleef_log1pf8_u10, "log1p, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat, vfloat); - - callFuncSLEEF1_1(Sleef_logf8_u35 , "log, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat, vfloat); - //callFuncSLEEF1_1(Sleef_log10f8_u35, "log10, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat, vfloat); - //callFuncSLEEF1_1(Sleef_log1pf8_u35, "log1p, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat, vfloat); - - fillSP(abufsp, -100, 100); - - callFuncSLEEF1_1(Sleef_expf8_u10 , "exp, SP, 256", -100, 100, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_exp2f8_u10 , "exp2, SP, 256", -100, 100, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_exp10f8_u10, "exp10, SP, 256", -100, 100, 1.0, abufsp, vfloat, vfloat); - - fillSP(abufsp, -30, 30); - fillSP(bbufsp, -30, 30); - - callFuncSLEEF1_2(Sleef_powf8_u10, "pow, SP, 256", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat, vfloat); - - fillSP(abufsp, -1.0, 1.0); - - callFuncSLEEF1_1(Sleef_asinf8_u10, "asin, SP, 256", -1.0, 1, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_acosf8_u10, "acos, SP, 256", -1.0, 1, 1.0, abufsp, vfloat, vfloat); - - callFuncSLEEF1_1(Sleef_asinf8_u35, "asin, SP, 256", -1.0, 1.0, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_acosf8_u35, "acos, SP, 256", -1.0, 1.0, 4.0, abufsp, vfloat, vfloat); - - fillSP(abufsp, -10, 10); - fillSP(bbufsp, -10, 10); - - callFuncSLEEF1_1(Sleef_atanf8_u10, "atan, SP, 256", -10, 10, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_2(Sleef_atan2f8_u10, "atan2, SP, 256", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat, vfloat); - - callFuncSLEEF1_1(Sleef_atanf8_u35, "atan, SP, 256", -10, 10, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_2(Sleef_atan2f8_u35, "atan2, SP, 256", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat, vfloat); -} -#else // #ifdef ENABLED -void zeroupper256() {} -void benchSleef256_DPTrig() {} -void benchSleef256_DPNontrig() {} -void benchSleef256_SPTrig() {} -void benchSleef256_SPNontrig() {} -#endif // #ifdef ENABLED diff --git a/src/libm-benchmarks/benchsleef512.c b/src/libm-benchmarks/benchsleef512.c deleted file mode 100644 index 52903f37..00000000 --- a/src/libm-benchmarks/benchsleef512.c +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include -#include -#include - -void fillDP(double *buf, double min, double max); -void fillSP(float *buf, double min, double max); - -extern char x86BrandString[256], versionString[1024]; -extern int veclen; -extern double *abufdp, *bbufdp; -extern float *abufsp, *bbufsp; -extern FILE *fp; - -#include "bench.h" - -#ifdef __AVX512F__ -#if defined(_MSC_VER) -#include -#else -#include -#endif -typedef __m512d vdouble; -typedef __m512 vfloat; -typedef Sleef___m512d_2 vdouble2; -typedef Sleef___m512_2 vfloat2; -#define ENABLED -#endif - -#ifdef ENABLED -void benchSleef512_DPTrig() { - fillDP(abufdp, 0, 6.28); - - callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 6.28, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 6.28, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 6.28, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 6.28, 1.0, abufdp, vdouble, vdouble2); - - callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 6.28, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 6.28, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 6.28, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 6.28, 4.0, abufdp, vdouble, vdouble2); - - fillDP(abufdp, 0, 1e+6); - - callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble, vdouble2); - - callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble, vdouble2); - - fillDP(abufdp, 0, 1e+100); - - callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble, vdouble2); - - callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble, vdouble2); -} - -void benchSleef512_DPNontrig() { - fillDP(abufdp, 0, 1e+300); - - callFuncSLEEF1_1(Sleef_logd8_u10 , "log, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_log10d8_u10, "log10, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_log1pd8_u10, "log1p, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_logd8_u35 , "log, DP, 512", 0, 1e+300, 4.0, abufdp, vdouble, vdouble); - - fillDP(abufdp, -700, 700); - - callFuncSLEEF1_1(Sleef_expd8_u10 , "exp, DP, 512", -700, 700, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_exp2d8_u10 , "exp2, DP, 512", -700, 700, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_exp10d8_u10, "exp10, DP, 512", -700, 700, 1.0, abufdp, vdouble, vdouble); - - fillDP(abufdp, -30, 30); - fillDP(bbufdp, -30, 30); - - callFuncSLEEF1_2(Sleef_powd8_u10, "pow, DP, 512", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble, vdouble); - - fillDP(abufdp, -1.0, 1.0); - - callFuncSLEEF1_1(Sleef_asind8_u10, "asin, DP, 512", -1.0, 1.0, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_acosd8_u10, "acos, DP, 512", -1.0, 1.0, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_asind8_u35, "asin, DP, 512", -1.0, 1.0, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_acosd8_u35, "acos, DP, 512", -1.0, 1.0, 4.0, abufdp, vdouble, vdouble); - - fillDP(abufdp, -10, 10); - fillDP(bbufdp, -10, 10); - - callFuncSLEEF1_1(Sleef_atand8_u10, "atan, DP, 512", -10, 10, 1.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_2(Sleef_atan2d8_u10, "atan2, DP, 512", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble, vdouble); - callFuncSLEEF1_1(Sleef_atand8_u35, "atan, DP, 512", -10, 10, 4.0, abufdp, vdouble, vdouble); - callFuncSLEEF1_2(Sleef_atan2d8_u35, "atan2, DP, 512", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble, vdouble); -} - -void benchSleef512_SPTrig() { - fillSP(abufsp, 0, 6.28); - - callFuncSLEEF1_1(Sleef_sinf16_u10 , "sin, SP, 512", 0, 6.28, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf16_u10 , "cos, SP, 512", 0, 6.28, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf16_u10 , "tan, SP, 512", 0, 6.28, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf16_u10, "sincos, SP, 512", 0, 6.28, 1.0, abufsp, vfloat, vfloat2); - - callFuncSLEEF1_1(Sleef_sinf16_u35 , "sin, SP, 512", 0, 6.28, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf16_u35 , "cos, SP, 512", 0, 6.28, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf16_u35 , "tan, SP, 512", 0, 6.28, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf16_u35, "sincos, SP, 512", 0, 6.28, 4.0, abufsp, vfloat, vfloat2); - - fillSP(abufsp, 0, 1e+20); - - callFuncSLEEF1_1(Sleef_sinf16_u10 , "sin, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf16_u10 , "cos, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf16_u10 , "tan, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf16_u10, "sincos, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat, vfloat2); - - callFuncSLEEF1_1(Sleef_sinf16_u35 , "sin, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_cosf16_u35 , "cos, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_tanf16_u35 , "tan, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_sincosf16_u35, "sincos, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat, vfloat2); -} - -void benchSleef512_SPNontrig() { - fillSP(abufsp, 0, 1e+38); - - callFuncSLEEF1_1(Sleef_logf16_u10 , "log, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_log10f16_u10, "log10, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat, vfloat); - //callFuncSLEEF1_1(Sleef_log1pf16_u10, "log1p, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat, vfloat); - - callFuncSLEEF1_1(Sleef_logf16_u35 , "log, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat, vfloat); - //callFuncSLEEF1_1(Sleef_log10f16_u35, "log10, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat, vfloat); - //callFuncSLEEF1_1(Sleef_log1pf16_u35, "log1p, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat, vfloat); - - fillSP(abufsp, -100, 100); - - callFuncSLEEF1_1(Sleef_expf16_u10 , "exp, SP, 512", -100, 100, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_exp2f16_u10 , "exp2, SP, 512", -100, 100, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_exp10f16_u10, "exp10, SP, 512", -100, 100, 1.0, abufsp, vfloat, vfloat); - - fillSP(abufsp, -30, 30); - fillSP(bbufsp, -30, 30); - - callFuncSLEEF1_2(Sleef_powf16_u10, "pow, SP, 512", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat, vfloat); - - fillSP(abufsp, -1.0, 1.0); - - callFuncSLEEF1_1(Sleef_asinf16_u10, "asin, SP, 512", -1.0, 1, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_acosf16_u10, "acos, SP, 512", -1.0, 1, 1.0, abufsp, vfloat, vfloat); - - callFuncSLEEF1_1(Sleef_asinf16_u35, "asin, SP, 512", -1.0, 1.0, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_1(Sleef_acosf16_u35, "acos, SP, 512", -1.0, 1.0, 4.0, abufsp, vfloat, vfloat); - - fillSP(abufsp, -10, 10); - fillSP(bbufsp, -10, 10); - - callFuncSLEEF1_1(Sleef_atanf16_u10, "atan, SP, 512", -10, 10, 1.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_2(Sleef_atan2f16_u10, "atan2, SP, 512", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat, vfloat); - - callFuncSLEEF1_1(Sleef_atanf16_u35, "atan, SP, 512", -10, 10, 4.0, abufsp, vfloat, vfloat); - callFuncSLEEF1_2(Sleef_atan2f16_u35, "atan2, SP, 512", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat, vfloat); -} -#else // #ifdef ENABLED -void benchSleef512_DPTrig() {} -void benchSleef512_DPNontrig() {} -void benchSleef512_SPTrig() {} -void benchSleef512_SPNontrig() {} -#endif // #ifdef ENABLED diff --git a/src/libm-benchmarks/benchsvml.c b/src/libm-benchmarks/benchsvml.c deleted file mode 100644 index ef04302f..00000000 --- a/src/libm-benchmarks/benchsvml.c +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bench.h" - -int veclen = 16; -int enableLogExp; -double *abufdp, *bbufdp; -float *abufsp, *bbufsp; -FILE *fp; - -#if defined(__i386__) || defined(__x86_64__) -void x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { - uint32_t a, b, c, d; - __asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx)); - out[0] = a; out[1] = b; out[2] = c; out[3] = d; -} - -int cpuSupportsAVX() { - int32_t reg[4]; - x86CpuID(reg, 1, 0); - return (reg[2] & (1 << 28)) != 0; -} - -int cpuSupportsAVX512F() { - int32_t reg[4]; - x86CpuID(reg, 7, 0); - return (reg[1] & (1 << 16)) != 0; -} -#endif - -uint64_t Sleef_currentTimeMicros() { - struct timespec tp; - clock_gettime(CLOCK_MONOTONIC, &tp); - return (uint64_t)tp.tv_sec * 1000000LL + ((uint64_t)tp.tv_nsec/1000); -} - -void fillDP(double *buf, double min, double max) { - for(int i=0;i= 3) fnBase = argv[2]; - - srandom(time(NULL)); - -#if defined(__i386__) || defined(__x86_64__) - int do128bit = 1; - int do256bit = cpuSupportsAVX(); - int do512bit = cpuSupportsAVX512F(); -#elif defined(__ARM_NEON) - int do128bit = 1; - int do256bit = 0; - int do512bit = 0; -#else -#error Unsupported architecture -#endif - - posix_memalign((void **)&abufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); - posix_memalign((void **)&bbufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); - - abufsp = (float *)abufdp; - bbufsp = (float *)bbufdp; - - enableLogExp = SVMLULP < 2; - - sprintf(fn, "%sdptrig%gulp.out", fnBase, (double)SVMLULP); - fp = fopen(fn, "w"); - fprintf(fp, "%s\n", columnTitle); - - if (do256bit) zeroupper256(); - if (do128bit) benchSVML128_DPTrig(); - if (do256bit) benchSVML256_DPTrig(); - if (do512bit) benchSVML512_DPTrig(); - - fclose(fp); - - sprintf(fn, "%sdpnontrig%gulp.out", fnBase, (double)SVMLULP); - fp = fopen(fn, "w"); - fprintf(fp, "%s\n", columnTitle); - - if (do256bit) zeroupper256(); - if (do128bit) benchSVML128_DPNontrig(); - if (do256bit) benchSVML256_DPNontrig(); - if (do512bit) benchSVML512_DPNontrig(); - - fclose(fp); - - sprintf(fn, "%ssptrig%gulp.out", fnBase, (double)SVMLULP); - fp = fopen(fn, "w"); - fprintf(fp, "%s\n", columnTitle); - - if (do256bit) zeroupper256(); - if (do128bit) benchSVML128_SPTrig(); - if (do256bit) benchSVML256_SPTrig(); - if (do512bit) benchSVML512_SPTrig(); - - fclose(fp); - - sprintf(fn, "%sspnontrig%gulp.out", fnBase, (double)SVMLULP); - fp = fopen(fn, "w"); - fprintf(fp, "%s\n", columnTitle); - - if (do256bit) zeroupper256(); - if (do128bit) benchSVML128_SPNontrig(); - if (do256bit) benchSVML256_SPNontrig(); - if (do512bit) benchSVML512_SPNontrig(); - - fclose(fp); - - exit(0); -} diff --git a/src/libm-benchmarks/benchsvml128.c b/src/libm-benchmarks/benchsvml128.c deleted file mode 100644 index ba4a80ee..00000000 --- a/src/libm-benchmarks/benchsvml128.c +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include -#include -#include -#include - -uint64_t Sleef_currentTimeMicros(); -void fillDP(double *buf, double min, double max); -void fillSP(float *buf, double min, double max); - -extern char x86BrandString[256], versionString[1024]; -extern int veclen; -extern int enableLogExp; -extern double *abufdp, *bbufdp; -extern float *abufsp, *bbufsp; -extern FILE *fp; - -#include "bench.h" - -#ifdef __SSE2__ -typedef __m128d vdouble; -typedef __m128 vfloat; -#define ENABLED -#endif - -#ifdef ENABLED -void benchSVML128_DPTrig() { - fillDP(abufdp, 0, 6.28); - - callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 6.28, abufdp, vdouble); - callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 6.28, abufdp, vdouble); - callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 6.28, abufdp, vdouble); - callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 6.28, abufdp, vdouble); - - fillDP(abufdp, 0, 1e+6); - - callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 1e+6, abufdp, vdouble); - callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 1e+6, abufdp, vdouble); - callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 1e+6, abufdp, vdouble); - callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 1e+6, abufdp, vdouble); - - fillDP(abufdp, 0, 1e+100); - - callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 1e+100, abufdp, vdouble); - callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 1e+100, abufdp, vdouble); - callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 1e+100, abufdp, vdouble); - callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 1e+100, abufdp, vdouble); -} - -void benchSVML128_DPNontrig() { - fillDP(abufdp, 0, 1e+300); - - callFuncSVML1_1(_mm_log_pd , "log, DP, 128", 0, 1e+300, abufdp, vdouble); - - if (enableLogExp) { - callFuncSVML1_1(_mm_log10_pd, "log10, DP, 128", 0, 1e+300, abufdp, vdouble); - callFuncSVML1_1(_mm_log1p_pd, "log1p, DP, 128", 0, 1e+300, abufdp, vdouble); - - fillDP(abufdp, -700, 700); - - callFuncSVML1_1(_mm_exp_pd , "exp, DP, 128", -700, 700, abufdp, vdouble); - callFuncSVML1_1(_mm_exp2_pd , "exp2, DP, 128", -700, 700, abufdp, vdouble); - callFuncSVML1_1(_mm_exp10_pd, "exp10, DP, 128", -700, 700, abufdp, vdouble); - - fillDP(abufdp, -30, 30); - fillDP(bbufdp, -30, 30); - - callFuncSVML1_2(_mm_pow_pd, "pow, DP, 128", -30, 30, -30, 30, abufdp, bbufdp, vdouble); - } - - fillDP(abufdp, -1.0, 1.0); - - callFuncSVML1_1(_mm_asin_pd, "asin, DP, 128", -1.0, 1.0, abufdp, vdouble); - callFuncSVML1_1(_mm_acos_pd, "acos, DP, 128", -1.0, 1.0, abufdp, vdouble); - - fillDP(abufdp, -10, 10); - fillDP(bbufdp, -10, 10); - - callFuncSVML1_1(_mm_atan_pd, "atan, DP, 128", -10, 10, abufdp, vdouble); - callFuncSVML1_2(_mm_atan2_pd, "atan2, DP, 128", -10, 10, -10, 10, abufdp, bbufdp, vdouble); -} - -void benchSVML128_SPTrig() { - fillSP(abufsp, 0, 6.28); - - callFuncSVML1_1(_mm_sin_ps , "sin, SP, 128", 0, 6.28, abufsp, vfloat); - callFuncSVML1_1(_mm_cos_ps , "cos, SP, 128", 0, 6.28, abufsp, vfloat); - callFuncSVML1_1(_mm_tan_ps , "tan, SP, 128", 0, 6.28, abufsp, vfloat); - callFuncSVML2_1(_mm_sincos_ps, "sincos, SP, 128", 0, 6.28, abufsp, vfloat); - - fillSP(abufsp, 0, 1e+20); - - callFuncSVML1_1(_mm_sin_ps , "sin, SP, 128", 0, 1e+20, abufsp, vfloat); - callFuncSVML1_1(_mm_cos_ps , "cos, SP, 128", 0, 1e+20, abufsp, vfloat); - callFuncSVML1_1(_mm_tan_ps , "tan, SP, 128", 0, 1e+20, abufsp, vfloat); - callFuncSVML2_1(_mm_sincos_ps, "sincos, SP, 128", 0, 1e+20, abufsp, vfloat); -} - -void benchSVML128_SPNontrig() { - fillSP(abufsp, 0, 1e+38); - - callFuncSVML1_1(_mm_log_ps , "log, SP, 128", 0, 1e+38, abufsp, vfloat); - - if (enableLogExp) { - callFuncSVML1_1(_mm_log10_ps, "log10, SP, 128", 0, 1e+38, abufsp, vfloat); - //callFuncSVML1_1(_mm_log1p_ps, "log1p, SP, 128", 0, 1e+38, abufsp, vfloat); - - fillSP(abufsp, -100, 100); - - callFuncSVML1_1(_mm_exp_ps , "exp, SP, 128", -100, 100, abufsp, vfloat); - callFuncSVML1_1(_mm_exp2_ps , "exp2, SP, 128", -100, 100, abufsp, vfloat); - callFuncSVML1_1(_mm_exp10_ps, "exp10, SP, 128", -100, 100, abufsp, vfloat); - - fillSP(abufsp, -30, 30); - fillSP(bbufsp, -30, 30); - - callFuncSVML1_2(_mm_pow_ps, "pow, SP, 128", -30, 30, -30, 30, abufsp, bbufsp, vfloat); - } - - fillSP(abufsp, -1.0, 1.0); - - callFuncSVML1_1(_mm_asin_ps, "asin, SP, 128", -1.0, 1, abufsp, vfloat); - callFuncSVML1_1(_mm_acos_ps, "acos, SP, 128", -1.0, 1, abufsp, vfloat); - - fillSP(abufsp, -10, 10); - fillSP(bbufsp, -10, 10); - - callFuncSVML1_1(_mm_atan_ps, "atan, SP, 128", -10, 10, abufsp, vfloat); - callFuncSVML1_2(_mm_atan2_ps, "atan2, SP, 128", -10, 10, -10, 10, abufsp, bbufsp, vfloat); -} -#else // #ifdef ENABLED -void benchSVML128_DPTrig() {} -void benchSVML128_DPNontrig() {} -void benchSVML128_SPTrig() {} -void benchSVML128_SPNontrig() {} -#endif // #ifdef ENABLED diff --git a/src/libm-benchmarks/benchsvml256.c b/src/libm-benchmarks/benchsvml256.c deleted file mode 100644 index 36261c7d..00000000 --- a/src/libm-benchmarks/benchsvml256.c +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include -#include -#include -#include - -uint64_t Sleef_currentTimeMicros(); -void fillDP(double *buf, double min, double max); -void fillSP(float *buf, double min, double max); - -extern char x86BrandString[256], versionString[1024]; -extern int veclen; -extern int enableLogExp; -extern double *abufdp, *bbufdp; -extern float *abufsp, *bbufsp; -extern FILE *fp; - -#include "bench.h" - -#ifdef __AVX__ -typedef __m256d vdouble; -typedef __m256 vfloat; -#define ENABLED -#endif - -#ifdef ENABLED -void zeroupper256() { _mm256_zeroupper(); } - -void benchSVML256_DPTrig() { - fillDP(abufdp, 0, 6.28); - - callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 6.28, abufdp, vdouble); - callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 6.28, abufdp, vdouble); - callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 6.28, abufdp, vdouble); - callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 6.28, abufdp, vdouble); - - fillDP(abufdp, 0, 1e+6); - - callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 1e+6, abufdp, vdouble); - callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 1e+6, abufdp, vdouble); - callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 1e+6, abufdp, vdouble); - callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 1e+6, abufdp, vdouble); - - fillDP(abufdp, 0, 1e+100); - - callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 1e+100, abufdp, vdouble); - callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 1e+100, abufdp, vdouble); - callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 1e+100, abufdp, vdouble); - callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 1e+100, abufdp, vdouble); -} - -void benchSVML256_DPNontrig() { - fillDP(abufdp, 0, 1e+300); - - callFuncSVML1_1(_mm256_log_pd , "log, DP, 256", 0, 1e+300, abufdp, vdouble); - - if (enableLogExp) { - callFuncSVML1_1(_mm256_log10_pd, "log10, DP, 256", 0, 1e+300, abufdp, vdouble); - callFuncSVML1_1(_mm256_log1p_pd, "log1p, DP, 256", 0, 1e+300, abufdp, vdouble); - - fillDP(abufdp, -700, 700); - - callFuncSVML1_1(_mm256_exp_pd , "exp, DP, 256", -700, 700, abufdp, vdouble); - callFuncSVML1_1(_mm256_exp2_pd , "exp2, DP, 256", -700, 700, abufdp, vdouble); - callFuncSVML1_1(_mm256_exp10_pd, "exp10, DP, 256", -700, 700, abufdp, vdouble); - - fillDP(abufdp, -30, 30); - fillDP(bbufdp, -30, 30); - - callFuncSVML1_2(_mm256_pow_pd, "pow, DP, 256", -30, 30, -30, 30, abufdp, bbufdp, vdouble); - } - - fillDP(abufdp, -1.0, 1.0); - - callFuncSVML1_1(_mm256_asin_pd, "asin, DP, 256", -1.0, 1.0, abufdp, vdouble); - callFuncSVML1_1(_mm256_acos_pd, "acos, DP, 256", -1.0, 1.0, abufdp, vdouble); - - fillDP(abufdp, -10, 10); - fillDP(bbufdp, -10, 10); - - callFuncSVML1_1(_mm256_atan_pd, "atan, DP, 256", -10, 10, abufdp, vdouble); - callFuncSVML1_2(_mm256_atan2_pd, "atan2, DP, 256", -10, 10, -10, 10, abufdp, bbufdp, vdouble); -} - -void benchSVML256_SPTrig() { - fillSP(abufsp, 0, 6.28); - - callFuncSVML1_1(_mm256_sin_ps , "sin, SP, 256", 0, 6.28, abufsp, vfloat); - callFuncSVML1_1(_mm256_cos_ps , "cos, SP, 256", 0, 6.28, abufsp, vfloat); - callFuncSVML1_1(_mm256_tan_ps , "tan, SP, 256", 0, 6.28, abufsp, vfloat); - callFuncSVML2_1(_mm256_sincos_ps, "sincos, SP, 256", 0, 6.28, abufsp, vfloat); - - fillSP(abufsp, 0, 1e+20); - - callFuncSVML1_1(_mm256_sin_ps , "sin, SP, 256", 0, 1e+20, abufsp, vfloat); - callFuncSVML1_1(_mm256_cos_ps , "cos, SP, 256", 0, 1e+20, abufsp, vfloat); - callFuncSVML1_1(_mm256_tan_ps , "tan, SP, 256", 0, 1e+20, abufsp, vfloat); - callFuncSVML2_1(_mm256_sincos_ps, "sincos, SP, 256", 0, 1e+20, abufsp, vfloat); -} - -void benchSVML256_SPNontrig() { - fillSP(abufsp, 0, 1e+38); - - callFuncSVML1_1(_mm256_log_ps , "log, SP, 256", 0, 1e+38, abufsp, vfloat); - - if (enableLogExp) { - callFuncSVML1_1(_mm256_log10_ps, "log10, SP, 256", 0, 1e+38, abufsp, vfloat); - //callFuncSVML1_1(_mm256_log1p_ps, "log1p, SP, 256", 0, 1e+38, abufsp, vfloat); - - fillSP(abufsp, -100, 100); - - callFuncSVML1_1(_mm256_exp_ps , "exp, SP, 256", -100, 100, abufsp, vfloat); - callFuncSVML1_1(_mm256_exp2_ps , "exp2, SP, 256", -100, 100, abufsp, vfloat); - callFuncSVML1_1(_mm256_exp10_ps, "exp10, SP, 256", -100, 100, abufsp, vfloat); - - fillSP(abufsp, -30, 30); - fillSP(bbufsp, -30, 30); - - callFuncSVML1_2(_mm256_pow_ps, "pow, SP, 256", -30, 30, -30, 30, abufsp, bbufsp, vfloat); - } - - fillSP(abufsp, -1.0, 1.0); - - callFuncSVML1_1(_mm256_asin_ps, "asin, SP, 256", -1.0, 1, abufsp, vfloat); - callFuncSVML1_1(_mm256_acos_ps, "acos, SP, 256", -1.0, 1, abufsp, vfloat); - - fillSP(abufsp, -10, 10); - fillSP(bbufsp, -10, 10); - - callFuncSVML1_1(_mm256_atan_ps, "atan, SP, 256", -10, 10, abufsp, vfloat); - callFuncSVML1_2(_mm256_atan2_ps, "atan2, SP, 256", -10, 10, -10, 10, abufsp, bbufsp, vfloat); -} -#else // #ifdef ENABLED -void zeroupper256() {} -void benchSVML256_DPTrig() {} -void benchSVML256_DPNontrig() {} -void benchSVML256_SPTrig() {} -void benchSVML256_SPNontrig() {} -#endif // #ifdef ENABLED diff --git a/src/libm-benchmarks/benchsvml512.c b/src/libm-benchmarks/benchsvml512.c deleted file mode 100644 index 7ece237f..00000000 --- a/src/libm-benchmarks/benchsvml512.c +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include -#include -#include -#include - -uint64_t Sleef_currentTimeMicros(); -void fillDP(double *buf, double min, double max); -void fillSP(float *buf, double min, double max); - -extern char x86BrandString[256], versionString[1024]; -extern int veclen; -extern int enableLogExp; -extern double *abufdp, *bbufdp; -extern float *abufsp, *bbufsp; -extern FILE *fp; - -#include "bench.h" - -#ifdef __AVX512F__ -typedef __m512d vdouble; -typedef __m512 vfloat; -#define ENABLED -#endif - -#ifdef ENABLED -void benchSVML512_DPTrig() { - fillDP(abufdp, 0, 6.28); - - callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 6.28, abufdp, vdouble); - callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 6.28, abufdp, vdouble); - callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 6.28, abufdp, vdouble); - callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 6.28, abufdp, vdouble); - - fillDP(abufdp, 0, 1e+6); - - callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 1e+6, abufdp, vdouble); - callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 1e+6, abufdp, vdouble); - callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 1e+6, abufdp, vdouble); - callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 1e+6, abufdp, vdouble); - - fillDP(abufdp, 0, 1e+100); - - callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 1e+100, abufdp, vdouble); - callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 1e+100, abufdp, vdouble); - callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 1e+100, abufdp, vdouble); - callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 1e+100, abufdp, vdouble); -} - -void benchSVML512_DPNontrig() { - fillDP(abufdp, 0, 1e+300); - - callFuncSVML1_1(_mm512_log_pd , "log, DP, 512", 0, 1e+300, abufdp, vdouble); - - if (enableLogExp) { - callFuncSVML1_1(_mm512_log10_pd, "log10, DP, 512", 0, 1e+300, abufdp, vdouble); - callFuncSVML1_1(_mm512_log1p_pd, "log1p, DP, 512", 0, 1e+300, abufdp, vdouble); - - fillDP(abufdp, -700, 700); - - callFuncSVML1_1(_mm512_exp_pd , "exp, DP, 512", -700, 700, abufdp, vdouble); - callFuncSVML1_1(_mm512_exp2_pd , "exp2, DP, 512", -700, 700, abufdp, vdouble); - callFuncSVML1_1(_mm512_exp10_pd, "exp10, DP, 512", -700, 700, abufdp, vdouble); - - fillDP(abufdp, -30, 30); - fillDP(bbufdp, -30, 30); - - callFuncSVML1_2(_mm512_pow_pd, "pow, DP, 512", -30, 30, -30, 30, abufdp, bbufdp, vdouble); - } - - fillDP(abufdp, -1.0, 1.0); - - callFuncSVML1_1(_mm512_asin_pd, "asin, DP, 512", -1.0, 1.0, abufdp, vdouble); - callFuncSVML1_1(_mm512_acos_pd, "acos, DP, 512", -1.0, 1.0, abufdp, vdouble); - - fillDP(abufdp, -10, 10); - fillDP(bbufdp, -10, 10); - - callFuncSVML1_1(_mm512_atan_pd, "atan, DP, 512", -10, 10, abufdp, vdouble); - callFuncSVML1_2(_mm512_atan2_pd, "atan2, DP, 512", -10, 10, -10, 10, abufdp, bbufdp, vdouble); -} - -void benchSVML512_SPTrig() { - fillSP(abufsp, 0, 6.28); - - callFuncSVML1_1(_mm512_sin_ps , "sin, SP, 512", 0, 6.28, abufsp, vfloat); - callFuncSVML1_1(_mm512_cos_ps , "cos, SP, 512", 0, 6.28, abufsp, vfloat); - callFuncSVML1_1(_mm512_tan_ps , "tan, SP, 512", 0, 6.28, abufsp, vfloat); - callFuncSVML2_1(_mm512_sincos_ps, "sincos, SP, 512", 0, 6.28, abufsp, vfloat); - - fillSP(abufsp, 0, 1e+20); - - callFuncSVML1_1(_mm512_sin_ps , "sin, SP, 512", 0, 1e+20, abufsp, vfloat); - callFuncSVML1_1(_mm512_cos_ps , "cos, SP, 512", 0, 1e+20, abufsp, vfloat); - callFuncSVML1_1(_mm512_tan_ps , "tan, SP, 512", 0, 1e+20, abufsp, vfloat); - callFuncSVML2_1(_mm512_sincos_ps, "sincos, SP, 512", 0, 1e+20, abufsp, vfloat); -} - -void benchSVML512_SPNontrig() { - fillSP(abufsp, 0, 1e+38); - - callFuncSVML1_1(_mm512_log_ps , "log, SP, 512", 0, 1e+38, abufsp, vfloat); - - if (enableLogExp) { - callFuncSVML1_1(_mm512_log10_ps, "log10, SP, 512", 0, 1e+38, abufsp, vfloat); - //callFuncSVML1_1(_mm512_log1p_ps, "log1p, SP, 512", 0, 1e+38, abufsp, vfloat); - - fillSP(abufsp, -100, 100); - - callFuncSVML1_1(_mm512_exp_ps , "exp, SP, 512", -100, 100, abufsp, vfloat); - callFuncSVML1_1(_mm512_exp2_ps , "exp2, SP, 512", -100, 100, abufsp, vfloat); - callFuncSVML1_1(_mm512_exp10_ps, "exp10, SP, 512", -100, 100, abufsp, vfloat); - - fillSP(abufsp, -30, 30); - fillSP(bbufsp, -30, 30); - - callFuncSVML1_2(_mm512_pow_ps, "pow, SP, 512", -30, 30, -30, 30, abufsp, bbufsp, vfloat); - } - - fillSP(abufsp, -1.0, 1.0); - - callFuncSVML1_1(_mm512_asin_ps, "asin, SP, 512", -1.0, 1, abufsp, vfloat); - callFuncSVML1_1(_mm512_acos_ps, "acos, SP, 512", -1.0, 1, abufsp, vfloat); - - fillSP(abufsp, -10, 10); - fillSP(bbufsp, -10, 10); - - callFuncSVML1_1(_mm512_atan_ps, "atan, SP, 512", -10, 10, abufsp, vfloat); - callFuncSVML1_2(_mm512_atan2_ps, "atan2, SP, 512", -10, 10, -10, 10, abufsp, bbufsp, vfloat); -} -#else // #ifdef ENABLED -void benchSVML512_DPTrig() {} -void benchSVML512_DPNontrig() {} -void benchSVML512_SPTrig() {} -void benchSVML512_SPNontrig() {} -#endif // #ifdef ENABLED diff --git a/src/benchmarks/gen_input.hpp b/src/libm-benchmarks/gen_input.hpp similarity index 100% rename from src/benchmarks/gen_input.hpp rename to src/libm-benchmarks/gen_input.hpp diff --git a/src/libm-benchmarks/measure.sh b/src/libm-benchmarks/measure.sh deleted file mode 100755 index 4b0590c3..00000000 --- a/src/libm-benchmarks/measure.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh -echo -read -p "Enter label of measurement(e.g. My desktop PC) : " label - -if [ -f counter.txt ] -then - counter=`cat counter.txt` -else - counter=0 -fi - -echo Measurement in progress. This may take several minutes. -for i in $*; do - $i "$label" $counter -done -counter=$((counter+1)) -echo $counter > counter.txt diff --git a/src/benchmarks/type_defs.hpp b/src/libm-benchmarks/type_defs.hpp similarity index 100% rename from src/benchmarks/type_defs.hpp rename to src/libm-benchmarks/type_defs.hpp