Skip to content

Commit

Permalink
Allow customizable emin instead of default emin=1-emax
Browse files Browse the repository at this point in the history
This allows to make fp8-e4m3 fully OCP compliant, where it is specified with emax = 8 and emin = -6.
  • Loading branch information
mmikaitis committed Apr 30, 2024
1 parent 296e8cb commit e3b5672
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 1 deletion.
1 change: 1 addition & 0 deletions mex/cpfloat.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ void mexFunction(int nlhs,
!strcmp(fpopts->format, "E4M3")) {
fpopts->precision = 4;
fpopts->emax = 8;
fpopts->emin = -6;
} else if (!strcmp(fpopts->format, "q52") ||
!strcmp(fpopts->format, "fp8-e5m2") ||
!strcmp(fpopts->format, "E5M2")) {
Expand Down
13 changes: 13 additions & 0 deletions src/cpfloat_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,19 @@ typedef struct {
* exponent is larger than the maximum allowed by the storage format.
*/
cpfloat_exponent_t emax;
/**
* @brief Minimum exponent of target format.
*
* @details The minimum values allowed are -126 and -1022 if the storage format
* is `float` or `double`, respectively. Smaller values are increase to the
* minimum allowed value without warning. This field is ignored unless
* `explim` is set to `CPFLOAT_EXPRANGE_TARG`.
*
* The validation functions cpfloatf_validate_optstruct() and
* cpfloat_validate_optstruct() return an error code if the required minimum
* exponent is smaller than the minimum allowed by the storage format.
*/
cpfloat_exponent_t emin;
/**
* @brief Support for subnormal numbers in target format.
*
Expand Down
14 changes: 13 additions & 1 deletion src/cpfloat_template.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ optstruct *init_optstruct() {
fpopts->bitseed = NULL;
fpopts->randseedf = NULL;
fpopts->randseed = NULL;
fpopts->emin = -99999;
return fpopts;
}

Expand Down Expand Up @@ -279,6 +280,10 @@ static inline int VALIDATE_INPUT(const optstruct *fpopts) {
if (fpopts->flip != CPFLOAT_NO_SOFTERR && (fpopts->p > 1 || fpopts->p < 0))
return 5;

/* Return -6 if emin is invalid (either nonnegative or too small). */
if (fpopts->emin < DEFEMIN || fpopts->emin >= 0)
return -6;

/* Return 0 or warning value. */
return retval;
}
Expand All @@ -304,7 +309,14 @@ static inline FPPARAMS COMPUTE_GLOBAL_PARAMS(const optstruct *fpopts,
}

/* Derived floating point parameters. */
int emin = 1-emax;
int emin = fpopts->emin;
/* If emin is not set by user, set it to the default 1-emax. */
if (emin == -99999)
emin = 1-emax;
if (emin < DEFEMIN) {
emax = DEFEMIN;
*retval = -6;
}
FPTYPE xmin = ldexp(1., emin); /* Smallest pos. normal. */
FPTYPE xmins = ldexp(1., emin-precision+1); /* Smallest pos. subnormal. */
FPTYPE ftzthreshold = (fpopts->subnormal == CPFLOAT_SUBN_USE) ? xmins : xmin;
Expand Down

0 comments on commit e3b5672

Please sign in to comment.