diff --git a/src/arch/helperavx.h b/src/arch/helperavx.h index e33a0e78..00fdaf93 100644 --- a/src/arch/helperavx.h +++ b/src/arch/helperavx.h @@ -110,7 +110,7 @@ static INLINE int vavailability_i(int name) { #endif // #if !defined(SLEEF_GENHEADER) -static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } +static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); @@ -516,10 +516,10 @@ static INLINE float vcast_f_vf(vfloat v) { #endif // -#define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) -#define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) -#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) -#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) +#define PNMASK _mm256_set_pd( -0.0, +0.0, -0.0, +0.0 ) +#define NPMASK _mm256_set_pd( +0.0, -0.0, +0.0, -0.0 ) +#define PNMASKf _mm256_set_ps( -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f ) +#define NPMASKf _mm256_set_ps( +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f ) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } diff --git a/src/arch/helperavx2.h b/src/arch/helperavx2.h index 47fcdc60..38c21b4b 100644 --- a/src/arch/helperavx2.h +++ b/src/arch/helperavx2.h @@ -96,7 +96,7 @@ static INLINE int vavailability_i(int name) { #endif // #if !defined(SLEEF_GENHEADER) -static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } +static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); @@ -392,10 +392,10 @@ static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm2 // -#define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) -#define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) -#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) -#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) +#define PNMASK _mm256_set_pd( -0.0, +0.0, -0.0, +0.0 ) +#define NPMASK _mm256_set_pd( +0.0, -0.0, +0.0, -0.0 ) +#define PNMASKf _mm256_set_ps( -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f ) +#define NPMASKf _mm256_set_ps( +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f ) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } diff --git a/src/arch/helperavx2_128.h b/src/arch/helperavx2_128.h index 5233db1b..97258cd8 100644 --- a/src/arch/helperavx2_128.h +++ b/src/arch/helperavx2_128.h @@ -96,7 +96,7 @@ static INLINE int vavailability_i(int name) { #endif // #if !defined(SLEEF_GENHEADER) -static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } +static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } @@ -371,10 +371,10 @@ static INLINE float vcast_f_vf(vfloat v) { // -#define PNMASK ((vdouble) { +0.0, -0.0 }) -#define NPMASK ((vdouble) { -0.0, +0.0 }) -#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) -#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) +#define PNMASK _mm_set_pd( -0.0, +0.0 ) +#define NPMASK _mm_set_pd( +0.0, -0.0 ) +#define PNMASKf _mm_set_ps( -0.0f, +0.0f, -0.0f, +0.0f ) +#define NPMASKf _mm_set_ps( +0.0f, -0.0f, +0.0f, -0.0f ) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } diff --git a/src/arch/helperavx512f.h b/src/arch/helperavx512f.h index b98f4006..dcf8ead7 100644 --- a/src/arch/helperavx512f.h +++ b/src/arch/helperavx512f.h @@ -102,7 +102,7 @@ static INLINE int vavailability_i(int name) { #endif // #if !defined(SLEEF_GENHEADER) -static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } +static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); } #ifdef __INTEL_COMPILER static INLINE int vtestallones_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0xff; } diff --git a/src/arch/helperneon32.h b/src/arch/helperneon32.h index 042cad40..70462db7 100644 --- a/src/arch/helperneon32.h +++ b/src/arch/helperneon32.h @@ -257,10 +257,10 @@ static INLINE int vavailability_i(int name) { } -static INLINE vfloat vload_vf_p(const float *ptr) { return vld1q_f32(__builtin_assume_aligned(ptr, 16)); } +static INLINE vfloat vload_vf_p(const float *ptr) { return vld1q_f32((const float32_t*)__builtin_assume_aligned(ptr, 16)); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return vld1q_f32(ptr); } -static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32(__builtin_assume_aligned(ptr, 16), v); } +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32((float32_t*)__builtin_assume_aligned(ptr, 16), v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { diff --git a/src/arch/helpersse2.h b/src/arch/helpersse2.h index 833f5f9b..ae98add9 100644 --- a/src/arch/helpersse2.h +++ b/src/arch/helpersse2.h @@ -124,7 +124,7 @@ static INLINE int vavailability_i(int name) { #endif // #if !defined(SLEEF_GENHEADER) -static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } +static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch((const char *)ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } @@ -420,10 +420,10 @@ static INLINE float vcast_f_vf(vfloat v) { // -#define PNMASK ((vdouble) { +0.0, -0.0 }) -#define NPMASK ((vdouble) { -0.0, +0.0 }) -#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) -#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) +#define PNMASK _mm_set_pd( -0.0, +0.0 ) +#define NPMASK _mm_set_pd( +0.0, -0.0 ) +#define PNMASKf _mm_set_ps( -0.0f, +0.0f, -0.0f, +0.0f ) +#define NPMASKf _mm_set_ps( +0.0f, -0.0f, +0.0f, -0.0f ) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); }