From d94996b22f19a749138d35105ca6db7746c15b1b Mon Sep 17 00:00:00 2001 From: Naoki Shibata Date: Wed, 16 Oct 2024 11:47:51 +0900 Subject: [PATCH] This patch fixes the following two problems: * On aarch32, cbrt(4) was returned instead of M_PI/2. * Due to a bug in gcc-13, sign of atanf(+-0) was wrong. This patch also adds testing for the first problem. The second problem is detected by the existing tester. --- src/libm-tester/tester.c | 2 ++ src/libm/sleefsimdsp.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/libm-tester/tester.c b/src/libm-tester/tester.c index 03f5d099..5e2cf2ac 100644 --- a/src/libm-tester/tester.c +++ b/src/libm-tester/tester.c @@ -4825,6 +4825,8 @@ void do_test() { fprintf(stderr, "atanf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5); + checkAccuracy_f(mpfr_atan, child_atanf, +INFINITY, 3.5); + checkAccuracy_f(mpfr_atan, child_atanf, -INFINITY, 3.5); showResult(success); // diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index cf08a2e1..2600f9db 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -853,6 +853,9 @@ EXPORT CONST VECTOR_CC vfloat xtanf(vfloat d) { if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f*0.5f))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); u = vcast_vf_vi2(q); +#if (defined(ENABLE_PUREC_SCALAR) || defined(ENABLE_PURECFMA_SCALAR)) && !defined(__clang__) && __GNUC__ == 13 + if (q == 0) u = 0; +#endif x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), x); @@ -908,6 +911,9 @@ EXPORT CONST VECTOR_CC vfloat xtanf(vfloat d) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); u = vcast_vf_vi2(q); +#if (defined(ENABLE_PUREC_SCALAR) || defined(ENABLE_PURECFMA_SCALAR)) && !defined(__clang__) && __GNUC__ == 13 + if (q == 0) u = 0; +#endif x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), x); @@ -1770,7 +1776,7 @@ EXPORT CONST VECTOR_CC vfloat xatanf(vfloat d) { t = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(t))); #if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4) - t = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(1.5874010519681994747517056f), d), t); + t = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(1.570796326794896557998982), d), t); #endif return t;