diff --git a/simdpp/detail/insn/f_ceil.h b/simdpp/detail/insn/f_ceil.h index f339e90b..e22acf96 100644 --- a/simdpp/detail/insn/f_ceil.h +++ b/simdpp/detail/insn/f_ceil.h @@ -80,7 +80,11 @@ float32x8 i_ceil(const float32x8& a) static SIMDPP_INL float32<16> i_ceil(const float32<16>& a) { - return _mm512_ceil_ps(a.native()); +#if SIMDPP_USE_SVML + return _mm512_ceil_ps(a.native()); +#else + return _mm512_roundscale_ps(a.native(), (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); +#endif } #endif @@ -151,7 +155,11 @@ float64x4 i_ceil(const float64x4& a) static SIMDPP_INL float64<8> i_ceil(const float64<8>& a) { - return _mm512_ceil_pd(a.native()); +#if SIMDPP_USE_SVML + return _mm512_ceil_pd(a.native()); +#else + return _mm512_roundscale_pd(a.native(), (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); +#endif } #endif diff --git a/simdpp/detail/insn/f_floor.h b/simdpp/detail/insn/f_floor.h index d73f304f..499ade15 100644 --- a/simdpp/detail/insn/f_floor.h +++ b/simdpp/detail/insn/f_floor.h @@ -77,7 +77,11 @@ float32x8 i_floor(const float32x8& a) static SIMDPP_INL float32<16> i_floor(const float32<16>& a) { - return _mm512_floor_ps(a.native()); +#if SIMDPP_USE_SVML + return _mm512_floor_ps(a.native()); +#else + return _mm512_roundscale_ps(a.native(), (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)); +#endif } #endif @@ -148,7 +152,11 @@ float64x4 i_floor(const float64x4& a) static SIMDPP_INL float64<8> i_floor(const float64<8>& a) { - return _mm512_floor_pd(a.native()); +#if SIMDPP_USE_SVML + return _mm512_floor_pd(a.native()); +#else + return _mm512_roundscale_pd(a.native(), (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)); +#endif } #endif diff --git a/simdpp/setup_arch.h b/simdpp/setup_arch.h index a644aa94..0ae07c35 100644 --- a/simdpp/setup_arch.h +++ b/simdpp/setup_arch.h @@ -91,6 +91,11 @@ #else #define SIMDPP_USE_AVX512VL 0 #endif +#if SIMDPP_ARCH_PP_USE_SVML +#define SIMDPP_USE_SVML 1 +#else +#define SIMDPP_USE_SVML 0 +#endif #if SIMDPP_ARCH_PP_USE_NEON #define SIMDPP_USE_NEON 1 #else