Allow customizable emin instead of default emin=1-emax

This allows to make fp8-e4m3 fully OCP compliant, where it is specified with emax = 8 and emin = -6.
north-numerical-computing · Apr 30, 2024 · e3b5672 · e3b5672
1 parent 296e8cb
commit e3b5672
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 1 deletion.
diff --git a/mex/cpfloat.c b/mex/cpfloat.c
@@ -78,6 +78,7 @@ void mexFunction(int nlhs,
                  !strcmp(fpopts->format, "E4M3")) {
         fpopts->precision = 4;
         fpopts->emax = 8;
+        fpopts->emin = -6;
       } else if (!strcmp(fpopts->format, "q52") ||
                  !strcmp(fpopts->format, "fp8-e5m2") ||
                  !strcmp(fpopts->format, "E5M2")) {

diff --git a/src/cpfloat_definitions.h b/src/cpfloat_definitions.h
@@ -202,6 +202,19 @@ typedef struct {
    * exponent is larger than the maximum allowed by the storage format.
    */
   cpfloat_exponent_t emax;
+  /**
+   * @brief Minimum exponent of target format.
+   *
+   * @details The minimum values allowed are -126 and -1022 if the storage format
+   * is `float` or `double`, respectively. Smaller values are increase to the
+   * minimum allowed value without warning. This field is ignored unless
+   * `explim` is set to `CPFLOAT_EXPRANGE_TARG`.
+   *
+   * The validation functions cpfloatf_validate_optstruct() and
+   * cpfloat_validate_optstruct() return an error code if the required minimum
+   * exponent is smaller than the minimum allowed by the storage format.
+   */
+  cpfloat_exponent_t emin;
   /**
    * @brief Support for subnormal numbers in target format.
    *

diff --git a/src/cpfloat_template.h b/src/cpfloat_template.h
@@ -106,6 +106,7 @@ optstruct *init_optstruct() {
   fpopts->bitseed = NULL;
   fpopts->randseedf = NULL;
   fpopts->randseed = NULL;
+  fpopts->emin = -99999;
   return fpopts;
 }
 
@@ -279,6 +280,10 @@ static inline int VALIDATE_INPUT(const optstruct *fpopts) {
   if (fpopts->flip != CPFLOAT_NO_SOFTERR && (fpopts->p > 1 || fpopts->p < 0))
     return 5;
 
+  /* Return -6 if emin is invalid (either nonnegative or too small). */
+  if (fpopts->emin < DEFEMIN || fpopts->emin >= 0)
+    return -6;
+
   /* Return 0 or warning value. */
   return retval;
 }
@@ -304,7 +309,14 @@ static inline FPPARAMS COMPUTE_GLOBAL_PARAMS(const optstruct *fpopts,
   }
 
   /* Derived floating point parameters. */
-  int emin = 1-emax;
+  int emin = fpopts->emin;
+  /* If emin is not set by user, set it to the default 1-emax. */
+  if (emin == -99999)
+    emin = 1-emax;
+  if (emin < DEFEMIN) {
+    emax = DEFEMIN;
+    *retval = -6;
+  }
   FPTYPE xmin = ldexp(1., emin);              /* Smallest pos. normal. */
   FPTYPE xmins = ldexp(1., emin-precision+1); /* Smallest pos. subnormal. */
   FPTYPE ftzthreshold = (fpopts->subnormal == CPFLOAT_SUBN_USE) ? xmins : xmin;