diff --git a/mex/cpfloat.c b/mex/cpfloat.c
index efc831e..15a717c 100644
--- a/mex/cpfloat.c
+++ b/mex/cpfloat.c
@@ -41,11 +41,15 @@ void mexFunction(int nlhs,
     fpopts->precision = 11;
     fpopts->emin = -14;
     fpopts->emax = 15;
-    fpopts->subnormal = CPFLOAT_SUBN_USE;
     fpopts->explim = CPFLOAT_EXPRANGE_TARG;
+    fpopts->infinity = CPFLOAT_INF_USE;
     fpopts->round = CPFLOAT_RND_NE;
-    fpopts->flip = CPFLOAT_NO_SOFTERR;
+    fpopts->saturation = CPFLOAT_SAT_NO;
+    fpopts->subnormal = CPFLOAT_SUBN_USE;
+
+    fpopts->flip = CPFLOAT_SOFTERR_NO;
     fpopts->p = 0.5;
+
     fpopts->bitseed = NULL;
     fpopts->randseedf = NULL;
     fpopts->randseed = NULL;
@@ -54,6 +58,7 @@ void mexFunction(int nlhs,
   /* Parse second argument and populate fpopts structure. */
   if (nrhs > 1) {
     bool is_subn_rnd_default = false;
+    bool is_inf_no_default = false;
     if(!mxIsEmpty(prhs[1]) && !mxIsStruct(prhs[1])) {
       mexErrMsgIdAndTxt("cpfloat:invalidstruct",
                         "Second argument must be a struct.");
@@ -62,7 +67,7 @@ void mexFunction(int nlhs,
 
       if (tmp != NULL) {
         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
-          /* Use default format, for compatibility with chop. */
+          /* Set default format, for compatibility with chop. */
           strcpy(fpopts->format, "h");
         else if (mxGetClassID(tmp) == mxCHAR_CLASS)
           strcpy(fpopts->format, mxArrayToString(tmp));
@@ -80,6 +85,7 @@ void mexFunction(int nlhs,
         fpopts->precision = 4;
         fpopts->emin = -6;
         fpopts->emax = 8;
+        is_inf_no_default = true;
       } else if (!strcmp(fpopts->format, "q52") ||
                  !strcmp(fpopts->format, "fp8-e5m2") ||
                  !strcmp(fpopts->format, "E5M2")) {
@@ -134,6 +140,7 @@ void mexFunction(int nlhs,
         mexErrMsgIdAndTxt("cpfloat:invalidformat",
                           "Invalid floating-point format specified.");
       }
+
       /* Set default values to be compatible with MATLAB chop. */
       tmp = mxGetField(prhs[1], 0, "subnormal");
       if (tmp != NULL) {
@@ -144,9 +151,8 @@ void mexFunction(int nlhs,
       } else {
         if (is_subn_rnd_default)
           fpopts->subnormal = CPFLOAT_SUBN_RND; /* Default for bfloat16. */
-        else
-          fpopts->subnormal = CPFLOAT_SUBN_USE;
       }
+
       tmp = mxGetField(prhs[1], 0, "explim");
       if (tmp != NULL) {
         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
@@ -154,6 +160,18 @@ void mexFunction(int nlhs,
         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
           fpopts->explim = *((double *)mxGetData(tmp));
       }
+
+      tmp = mxGetField(prhs[1], 0, "infinity");
+      if (tmp != NULL) {
+        if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
+          fpopts->infinity = CPFLOAT_INF_USE;
+        else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
+          fpopts->infinity = *((double *)mxGetData(tmp));
+      } else {
+        if (is_inf_no_default)
+          fpopts->infinity = CPFLOAT_INF_NO; /* Default for E4M5. */
+      }
+
       tmp = mxGetField(prhs[1], 0, "round");
       if (tmp != NULL) {
         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
@@ -161,10 +179,32 @@ void mexFunction(int nlhs,
         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
           fpopts->round = *((double *)mxGetData(tmp));
       }
+
+      tmp = mxGetField(prhs[1], 0, "saturation");
+      if (tmp != NULL) {
+        if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
+          fpopts->saturation = CPFLOAT_SAT_NO;
+        else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
+          fpopts->saturation = *((double *)mxGetData(tmp));
+      }
+
+      tmp = mxGetField(prhs[1], 0, "subnormal");
+      if (tmp != NULL) {
+        if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
+          fpopts->subnormal = CPFLOAT_SUBN_USE;
+        else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
+          fpopts->subnormal = *((double *)mxGetData(tmp));
+      } else {
+        if (is_subn_rnd_default)
+          fpopts->subnormal = CPFLOAT_SUBN_RND; /* Default for bfloat16. */
+        else
+          fpopts->subnormal = CPFLOAT_SUBN_USE;
+      }
+
       tmp = mxGetField(prhs[1], 0, "flip");
       if (tmp != NULL) {
         if (mxGetM(tmp) == 0 && mxGetN(tmp) == 0)
-          fpopts->flip = CPFLOAT_NO_SOFTERR;
+          fpopts->flip = CPFLOAT_SOFTERR_NO;
         else if (mxGetClassID(tmp) == mxDOUBLE_CLASS)
           fpopts->flip = *((double *)mxGetData(tmp));
       }
@@ -288,10 +328,11 @@ void mexFunction(int nlhs,
 
   /* Allocate and return second output. */
   if (nlhs > 1) {
-    const char* field_names[] = {"format", "params", "subnormal", "round",
-                                 "flip", "p", "explim"};
+    const char* field_names[] = {"format", "params", "explim", "infinity",
+                                 "round", "saturation", "subnormal",
+                                 "flip", "p"};
     mwSize dims[2] = {1, 1};
-    plhs[1] = mxCreateStructArray(2, dims, 7, field_names);
+    plhs[1] = mxCreateStructArray(2, dims, 9, field_names);
     mxSetFieldByNumber(plhs[1], 0, 0, mxCreateString(fpopts->format));
 
     mxArray *outparams = mxCreateDoubleMatrix(1,3,mxREAL);
@@ -301,30 +342,40 @@ void mexFunction(int nlhs,
     outparamsptr[2] = fpopts->emax;
     mxSetFieldByNumber(plhs[1], 0, 1, outparams);
 
-    mxArray *outsubnormal = mxCreateDoubleMatrix(1,1,mxREAL);
-    double *outsubnormalptr = mxGetData(outsubnormal);
-    outsubnormalptr[0] = fpopts->subnormal;
-    mxSetFieldByNumber(plhs[1], 0, 2, outsubnormal);
+    mxArray *outexplim = mxCreateDoubleMatrix(1, 1, mxREAL);
+    double *outexplimptr = mxGetData(outexplim);
+    outexplimptr[0] = fpopts->explim;
+    mxSetFieldByNumber(plhs[1], 0, 2, outexplim);
+
+    mxArray *outinfinity = mxCreateDoubleMatrix(1, 1, mxREAL);
+    double *outinfinityptr = mxGetData(outinfinity);
+    outinfinityptr[0] = fpopts->infinity;
+    mxSetFieldByNumber(plhs[1], 0, 3, outinfinity);
 
     mxArray *outround = mxCreateDoubleMatrix(1,1,mxREAL);
     double *outroundptr = mxGetData(outround);
     outroundptr[0] = fpopts->round;
-    mxSetFieldByNumber(plhs[1], 0, 3, outround);
+    mxSetFieldByNumber(plhs[1], 0, 4, outround);
+
+    mxArray *outsaturation = mxCreateDoubleMatrix(1,1,mxREAL);
+    double *outsaturationptr = mxGetData(outsaturation);
+    outsaturationptr[0] = fpopts->saturation;
+    mxSetFieldByNumber(plhs[1], 0, 5, outsaturation);
+
+    mxArray *outsubnormal = mxCreateDoubleMatrix(1,1,mxREAL);
+    double *outsubnormalptr = mxGetData(outsubnormal);
+    outsubnormalptr[0] = fpopts->subnormal;
+    mxSetFieldByNumber(plhs[1], 0, 6, outsubnormal);
 
     mxArray *outflip = mxCreateDoubleMatrix(1,1,mxREAL);
     double *outflipptr = mxGetData(outflip);
     outflipptr[0] = fpopts->flip;
-    mxSetFieldByNumber(plhs[1], 0, 4, outflip);
+    mxSetFieldByNumber(plhs[1], 0, 7, outflip);
 
     mxArray *outp = mxCreateDoubleMatrix(1,1,mxREAL);
     double *outpptr = mxGetData(outp);
     outpptr[0] = fpopts->p;
-    mxSetFieldByNumber(plhs[1], 0, 5, outp);
-
-    mxArray *outexplim = mxCreateDoubleMatrix(1,1,mxREAL);
-    double *outexplimptr = mxGetData(outexplim);
-    outexplimptr[0] = fpopts->explim;
-    mxSetFieldByNumber(plhs[1], 0, 6, outexplim);
+    mxSetFieldByNumber(plhs[1], 0, 8, outp);
 
   }
   if (nlhs > 2)
diff --git a/mex/cpfloat.m b/mex/cpfloat.m
index 4f1dfa3..3e27dea 100644
--- a/mex/cpfloat.m
+++ b/mex/cpfloat.m
@@ -39,17 +39,17 @@
 %     the target format, respectively. The default value of this field is
 %     the vector [11,-14,15].
 %
-%   * The scalar FPOPTS.subnormal specifies the support for subnormal numbers.
-%     The target floating-point format will not support subnormal numbers if
-%     this field is set to 0, and will support them otherwise. The default value
-%     for this field is 0 if the target format is 'bfloat16' and 1 otherwise.
-%
 %   * The scalar FPOPTS.explim specifies the support for an extended exponent
 %     range. The target floating-point format will have the exponent range of
 %     the storage format ('single' or 'double', depending on the class of X) if
 %     this field is set to 0, and the exponent range of the format specified in
 %     FPOPTS.format otherwise. The default value for this field is 1.
 %
+%   * The scalar FPOPTS.infinity specifies whether infinities are supported. The
+%     target floating-point format will support infinities if this field is set
+%     to 1, and they will be replaced by NaNs otherwise. The default value for
+%     this field is 0 if the target format is 'E4M3' and 1 otherwise.
+%
 %   * The scalar FPOPTS.round specifies the rounding mode. Possible values are:
 %       -1 for round-to-nearest with ties-to-away;
 %        0 for round-to-nearest with ties-to-zero;
@@ -63,6 +63,16 @@
 %      Any other value results in no rounding. The default value for this field
 %      is 1.
 %
+%   * The scalar FPOPTS.saturation specifies whether saturation arithmetic is in
+%     use. On overflow, the target floating-point format will use the largest
+%     representable floating-point if this field is set to 0, and infinity
+%     otherwise. The default value for this field is 0.
+
+%   * The scalar FPOPTS.subnormal specifies the support for subnormal numbers.
+%     The target floating-point format will not support subnormal numbers if
+%     this field is set to 0, and will support them otherwise. The default value
+%     for this field is 0 if the target format is 'bfloat16' and 1 otherwise.
+%
 %   * The scalar FPOPTS.flip specifies whether the function should simulate the
 %     occurrence of a single bit flip striking the floating-point representation
 %     of elements of Y. Possible values are:
diff --git a/src/cpfloat_binary32.h b/src/cpfloat_binary32.h
index 3ecbe83..9ce1843 100644
--- a/src/cpfloat_binary32.h
+++ b/src/cpfloat_binary32.h
@@ -289,8 +289,8 @@ static inline int cpf_fmaf(float *X, const float *A, const float *B,
 #define INTSUFFIX  U
 
 #define DEFPREC   24
-#define DEFEMAX  127
 #define DEFEMIN -126
+#define DEFEMAX  127
 #define NLEADBITS  9
 #define NBITS     32
 #define FULLMASK 0xFFFFFFFFU
diff --git a/src/cpfloat_binary64.h b/src/cpfloat_binary64.h
index 5df0a6b..fa57f63 100644
--- a/src/cpfloat_binary64.h
+++ b/src/cpfloat_binary64.h
@@ -289,8 +289,8 @@ static inline int cpf_fma(double *X, const double *A, const double *B,
 #define INTTYPE uint64_t
 #define INTSUFFIX ULL
 #define DEFPREC 53
-#define DEFEMAX 1023
 #define DEFEMIN -1022
+#define DEFEMAX 1023
 #define NLEADBITS 12
 #define NBITS 64
 #define FULLMASK 0xFFFFFFFFFFFFFFFFULL
diff --git a/src/cpfloat_definitions.h b/src/cpfloat_definitions.h
index 78bfc7b..9493f4d 100644
--- a/src/cpfloat_definitions.h
+++ b/src/cpfloat_definitions.h
@@ -8,10 +8,12 @@
  * @details This file includes all the external header files used by CPFloat,
  * defines the enumerated types
  *
- * + @ref cpfloat_subnormal_t,
  * + @ref cpfloat_explim_t,
+ * + @ref cpfloat_infinity_t,
  * + @ref cpfloat_rounding_t,
+ * + @ref cpfloat_saturation_t,
  * + @ref cpfloat_softerr_t,
+ * + @ref cpfloat_subnormal_t,
  *
  * and the structured data type @ref optstruct. It is not necessary to include
  * this file in order to use CPFloat, as it is already included by @ref
@@ -52,16 +54,6 @@ typedef unsigned int cpfloat_precision_t;
  */
 typedef int cpfloat_exponent_t;
 
-/**
- * @brief Subnormal support modes available in CPFloat.
- */
-typedef enum {
-  /** Round subnormal numbers using current rounding mode. */
-  CPFLOAT_SUBN_RND = 0,
-  /** Support storage of subnormal numbers. */
-  CPFLOAT_SUBN_USE = 1
-} cpfloat_subnormal_t;
-
 /**
  * @brief Extended exponent range modes available in CPFloat.
  */
@@ -72,6 +64,16 @@ typedef enum {
   CPFLOAT_EXPRANGE_TARG = 1
 } cpfloat_explim_t;
 
+/**
+ * @brief Infinity support modes available in CPFloat.
+ */
+typedef enum {
+  /** Use infinities in target format. */
+  CPFLOAT_INF_NO = 0,
+  /** Replace infinities with NaNs in target format. */
+  CPFLOAT_INF_USE = 1,
+} cpfloat_infinity_t;
+
 /**
  * @brief Rounding modes available in CPFloat.
  */
@@ -98,19 +100,38 @@ typedef enum {
   CPFLOAT_NO_RND =  8,
 } cpfloat_rounding_t;
 
+/**
+ * @brief Saturation modes available in CPFloat.
+ */
+typedef enum {
+  /** Use standard arithmetic. */
+  CPFLOAT_SAT_NO = 0,
+  /** Use saturation arithmetic. */
+  CPFLOAT_SAT_USE = 1,
+} cpfloat_saturation_t;
+
 /**
  * @brief Soft fault simulation modes available in CPFloat.
  */
 typedef enum {
   /** Do not introduce soft errors. */
-  CPFLOAT_NO_SOFTERR = 0,
+  CPFLOAT_SOFTERR_NO = 0,
   /** Soft errors in fraction of target-format floating-point representation.*/
-  CPFLOAT_FRAC_SOFTERR = 1,
+  CPFLOAT_SOFTERR_FRAC = 1,
   /** Soft errors anywhere in target-format floating-point representation. */
-  CPFLOAT_FP_SOFTERR = 2
-
+  CPFLOAT_SOFTERR_FP = 2
 } cpfloat_softerr_t;
 
+/**
+ * @brief Subnormal support modes available in CPFloat.
+ */
+typedef enum {
+  /** Round subnormal numbers according to the current rounding mode. */
+  CPFLOAT_SUBN_RND = 0,
+  /** Support storage of subnormal numbers. */
+  CPFLOAT_SUBN_USE = 1
+} cpfloat_subnormal_t;
+
 /** @cond */
 #ifdef PCG_VARIANTS_H_INCLUDED
 #define CPFLOAT_BITSEEDTYPE pcg32_random_t
@@ -157,12 +178,12 @@ typedef struct {
    * Possible values are:
    * + `q43`, `e4m3`, `E4M3` for E4M3 (4-bit exponent, 4-bit significand);
    * + `q52`, `e5m2`, `E5M2` for E5M2 (5-bit exponent, 2-bit significand);
-   * + `b`, `bf16`, and `bfloat16` for bfloat16;
-   * + `h`, `fp16`, `binary16`, and `half` for binary16;
-   * + `t`, `tf32`, and `TensorFloat-32`, for TensorFloat-32;
-   * + `s`, `fp32`, `binary32`, and `single` for binary32;
-   * + `d`, `fp64`, `binary64`, and `double` for binary64; and
-   * + `custom`, and `c` for a format specified using `precision` and `emax`.
+   * + `b`, `bf16`, `bfloat16` for bfloat16;
+   * + `h`, `fp16`, `binary16`, `half` for binary16;
+   * + `t`, `tf32`, `TensorFloat-32`, for TensorFloat-32;
+   * + `s`, `fp32`, `binary32`, `single` for binary32;
+   * + `d`, `fp64`, `binary64`, `double` for binary64; and
+   * + `custom`, `c` for a format specifying `precision`, `emin`, and `emax`.
    *
    * The validation functions cpfloatf_validate_optstruct() and
    * cpfloat_validate_optstruct() return a warning code if this field is not set
@@ -179,8 +200,8 @@ typedef struct {
    * number of digits of precision for `float` and `double` cannot exceed 11 and
    * 25, respectively, when using stochastic rounding, and cannot exceed 23 and
    * 52, respectively, for other rounding modes. The C implementation does not
-   * have any such restrictions, but note that using larger values can cause
-   * double rounding.
+   * have any such restrictions, but using larger values can cause double
+   * rounding.
    *
    * The validation functions cpfloatf_validate_optstruct() and
    * cpfloat_validate_optstruct() return an error code if the required number of
@@ -189,6 +210,19 @@ typedef struct {
    * by the MEX interface.
    */
   cpfloat_precision_t precision;
+  /**
+   * @brief Minimum exponent of target format.
+   *
+   * @details The minimum values allowed are -126 and -1022 if the storage
+   * format is `float` or `double`, respectively. If a smaller value is chosen,
+   * it is changed to the minimum allowed value without warning. This field is
+   * ignored unless `explim` is set to `CPFLOAT_EXPRANGE_TARG`.
+   *
+   * The validation functions cpfloatf_validate_optstruct() and
+   * cpfloat_validate_optstruct() return an error code if the required minimum
+   * exponent is smaller than the minimum allowed by the storage format.
+   */
+  cpfloat_exponent_t emin;
   /**
    * @brief Maximum exponent of target format.
    *
@@ -202,27 +236,6 @@ typedef struct {
    * exponent is larger than the maximum allowed by the storage format.
    */
   cpfloat_exponent_t emax;
-  /**
-   * @brief Minimum exponent of target format.
-   *
-   * @details The minimum values allowed are -126 and -1022 if the storage format
-   * is `float` or `double`, respectively. If a smaller value is chosen, it is
-   * changed to the minimum allowed value without warning. This field is ignored
-   * unless `explim` is set to `CPFLOAT_EXPRANGE_TARG`.
-   *
-   * The validation functions cpfloatf_validate_optstruct() and
-   * cpfloat_validate_optstruct() return an error code if the required minimum
-   * exponent is smaller than the minimum allowed by the storage format.
-   */
-  cpfloat_exponent_t emin;
-  /**
-   * @brief Support for subnormal numbers in target format.
-   *
-   * @details Subnormal numbers are supported if this field is set to
-   * `CPFLOAT_SUBN_USE` and rounded to a normal number using the current
-   * rounding mode if it is set to `CPFLOAT_SUBN_RND`.
-   */
-  cpfloat_subnormal_t subnormal;
   /**
    * @brief Support for extended exponents in target format.
    *
@@ -232,6 +245,14 @@ typedef struct {
    * `CPFLOAT_EXPRANGE_STOR`.
    */
   cpfloat_explim_t explim;
+  /**
+   * @brief Support for infinities in target format.
+   *
+   * @details If this field is set to `CPFLOAT_INF_USE`, the target format
+   * supports signed infinities. If the field is set to `CPFLOAT_INF_NO`,
+   * infinities are replaced with a quiet NaN.
+   */
+  cpfloat_infinity_t infinity;
   /**
    * @brief Rounding mode to be used for the conversion.
    *
@@ -257,15 +278,36 @@ typedef struct {
    * those in the list above is specified.
    */
   cpfloat_rounding_t round;
+  /**
+   * @brief Support for saturation arithmetic in target format.
+   *
+   * @details If this field is set to `CPFLOAT_SAT_USE`, numbers too large to be
+   * represented in the target format are clamped to the largest floating-point
+   * number of appropriate sign. If this field is set to `CPFLOAT_SAT_NO`,
+   * numbers that are too large to be represented are rounded to either the
+   * largest normal value of appropriate sign or the closest infinity according
+   * to the current rounding mode.
+   */
+  cpfloat_saturation_t saturation;
+  /**
+   * @brief Support for subnormal numbers in target format.
+   *
+   * @details Subnormal numbers are supported if this field is set to
+   * `CPFLOAT_SUBN_USE` and rounded to a normal number according to the current
+   * rounding mode if it is set to `CPFLOAT_SUBN_RND`.
+   */
+  cpfloat_subnormal_t subnormal;
+
+  /* Bit flips. */
   /**
    * @brief Support for soft errors.
    *
-   * @details If this field is not set to `CPFLOAT_NO_SOFTERR`, a single bit
+   * @details If this field is not set to `CPFLOAT_SOFTERR_NO`, a single bit
    * flip is introduced in the binary floating-point representation of the
    * rounded result with probability `p`. The bit flip can strike only the
    * target-format fraction (significand without the implicit bit) if this field
-   * is set to `CPFLOAT_FRAC_SOFTERR` and any bit in the target-format
-   * representation if it is set to `CPFLOAT_FP_SOFTERR`.
+   * is set to `CPFLOAT_SOFTERR_FRAC` and any bit in the target-format
+   * representation if it is set to `CPFLOAT_SOFTERR_FP`.
    */
   cpfloat_softerr_t flip;
   /**
@@ -274,14 +316,16 @@ typedef struct {
    * @details The probability of flipping a single bit in the binary
    * floating-point representation or in the fraction (significand without the
    * implicit bit) of a number after rounding. This field is ignored if `flip`
-   * is set to `CPFLOAT_NO_SOFTERR`.
+   * is set to `CPFLOAT_SOFTERR_NO`.
    *
    * The validation functions cpfloatf_validate_optstruct() and
    * cpfloat_validate_optstruct() return an error code if `flip` is set to
-   * `CPFLOAT_FP_SOFTERR` or `CPFLOAT_FRAC_SOFTERR` and this field does not
+   * `CPFLOAT_FP_SOFTERR` or `CPFLOAT_SOFTERR_FRAC` and this field does not
    * contain a number in the interval [0,1].
    */
   double p;
+
+  /* Internal: state of pseudo-random number generator. */
   /**
    * @brief Internal state of pseudo-random number generator for single bits.
    *
diff --git a/src/cpfloat_template.h b/src/cpfloat_template.h
index e0d80b4..7695e87 100644
--- a/src/cpfloat_template.h
+++ b/src/cpfloat_template.h
@@ -158,11 +158,14 @@ typedef union {
 #define FPPARAMS ADDSUFFIXTO(fpparams)
 typedef struct {
   cpfloat_precision_t precision;
-  cpfloat_exponent_t emax;
   cpfloat_exponent_t emin;
-  cpfloat_subnormal_t subnormal;
+  cpfloat_exponent_t emax;
+  cpfloat_infinity_t infinity;
   cpfloat_rounding_t round;
+  cpfloat_saturation_t saturation;
+  cpfloat_subnormal_t subnormal;
   FPTYPE ftzthreshold;
+  FPTYPE ofvalue;
   FPTYPE xmin;
   FPTYPE xmax;
   FPTYPE xbnd;
@@ -277,7 +280,7 @@ static inline int VALIDATE_INPUT(const optstruct *fpopts) {
     retval = -4;
 
   /* Return 5 if p is required but is not a valid probability. */
-  if (fpopts->flip != CPFLOAT_NO_SOFTERR && (fpopts->p > 1 || fpopts->p < 0))
+  if (fpopts->flip != CPFLOAT_SOFTERR_NO && (fpopts->p > 1 || fpopts->p < 0))
     return 5;
 
   /* Return 0 or warning value. */
@@ -292,40 +295,49 @@ static inline FPPARAMS COMPUTE_GLOBAL_PARAMS(const optstruct *fpopts,
   /* Actual precision and exponent range. */
   *retval = 0;
   cpfloat_precision_t precision = fpopts->precision;
-  cpfloat_exponent_t emax = fpopts->explim == CPFLOAT_EXPRANGE_TARG ?
-    fpopts->emax :
-    DEFEMAX;
   cpfloat_exponent_t emin = fpopts->explim == CPFLOAT_EXPRANGE_TARG ?
     fpopts->emin :
     DEFEMIN;
+  cpfloat_exponent_t emax = fpopts->explim == CPFLOAT_EXPRANGE_TARG ?
+    fpopts->emax :
+    DEFEMAX;
 
   if (precision > DEFPREC) {
     precision = DEFPREC;
     *retval = 1;
   }
 
-  if (emax > DEFEMAX) {
-    emax = DEFEMAX;
+  if (emin < DEFEMIN) {
+    emin = DEFEMIN;
     *retval = 2;
   }
 
-  if (emin < DEFEMIN) {
-    emin = DEFEMIN;
+  if (emax > DEFEMAX) {
+    emax = DEFEMAX;
     *retval = 2;
   }
 
   FPTYPE xmin = ldexp(1., emin);              /* Smallest pos. normal. */
   FPTYPE xmins = ldexp(1., emin-precision+1); /* Smallest pos. subnormal. */
   FPTYPE ftzthreshold = (fpopts->subnormal == CPFLOAT_SUBN_USE) ? xmins : xmin;
+
   FPTYPE xmax = ldexp(1., emax) * (2-ldexp(1., 1-precision));
   FPTYPE xbnd = ldexp(1., emax) * (2-ldexp(1., -precision));
+  /*
+   * Here, fpopts->saturation takes precedence over fpopts->infinity. Therefore,
+   * when saturation arithmetic is used, infinities are not produced even when
+   * the target format supports them.
+   */
+  FPTYPE ofvalue = (fpopts->saturation == CPFLOAT_SAT_USE) ? xmax :
+    (fpopts->infinity == CPFLOAT_INF_USE ? INFINITY : NAN);
 
   /* Bitmasks. */
   INTTYPE leadmask = FULLMASK << (DEFPREC-precision); /* To keep. */
   INTTYPE trailmask = leadmask ^ FULLMASK;            /* To discard. */
 
-  FPPARAMS params = {precision, emax, emin, fpopts->subnormal, fpopts->round,
-                     ftzthreshold, xmin, xmax, xbnd,
+  FPPARAMS params = {precision, emin, emax, fpopts->infinity, fpopts->round,
+                     fpopts->saturation, fpopts->subnormal,
+                     ftzthreshold, ofvalue, xmin, xmax, xbnd,
                      leadmask, trailmask, NULL, NULL};
 
   return params;
@@ -401,7 +413,7 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
     else                                                                       \
       *(x) = FPOF(SIGN(y) | INTOFCONST(p->ftzthreshold));                      \
   } else if (ABS(y) >= p->xbnd) { /* Overflow */                               \
-    *(x) = FPOF(SIGN(y) | INTOFCONST(INFINITY));                               \
+    *(x) = FPOF(SIGN(y) | INTOFCONST(p->ofvalue));                             \
   } else {                                                                     \
     UPDATE_LOCAL_PARAMS(y, p, lp);                                             \
     *(x) = FPOF((INTOF(y) +                                                    \
@@ -425,7 +437,7 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
     else                                                                       \
       *(x) = FPOF(SIGN(y));                                                    \
   } else if (ABS(y) > p->xbnd) { /* Overflow */                                \
-    *(x) = FPOF(SIGN(y) | INTOFCONST(INFINITY));                               \
+    *(x) = FPOF(SIGN(y) | INTOFCONST(p->ofvalue));                             \
   } else {                                                                     \
     UPDATE_LOCAL_PARAMS(y, p, lp);                                             \
     *(x) = FPOF((INTOF(y) + (lp->trailmask>>1)) & lp->leadmask);               \
@@ -450,7 +462,7 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
     else                                                                       \
       *(x) = FPOF(SIGN(y) | INTOFCONST(p->ftzthreshold));                      \
   } else if (ABS(y) >= p->xbnd) { /* Overflow */                               \
-    *(x) = FPOF(SIGN(y) | INTOFCONST(INFINITY));                               \
+    *(x) = FPOF(SIGN(y) | INTOFCONST(p->ofvalue));                             \
   } else {                                                                     \
     UPDATE_LOCAL_PARAMS(y, p, lp);                                             \
     INTTYPE LSB = ((INTOF(y) >> (DEFPREC-lp->precision)) & INTCONST(1))        \
@@ -477,11 +489,11 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
     *(x) = *(y) > 0 ? p->ftzthreshold : 0;                                     \
   } else if (ABS(y) > p->xmax) { /* Overflow */                                \
     if (*(y) > p->xmax)                                                        \
-      *(x) = INFINITY;                                                         \
-    else if (*(y) < -p->xmax && *(y) != -INFINITY)                             \
+      *(x) = p->ofvalue;                                                       \
+    else if (*(y) < -p->xmax && *(y) != -p->ofvalue)                           \
       *(x) = -p->xmax;                                                         \
     else /* *(y) == -INFINITY */                                               \
-      *(x) = -INFINITY;                                                        \
+      *(x) = -p->ofvalue;                                                      \
   } else {                                                                     \
     UPDATE_LOCAL_PARAMS(y, p, lp);                                             \
     if (SIGN(y) == 0) /* Add ulp if x is positive.  */                         \
@@ -509,11 +521,11 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
     *(x) = *(y) >= 0 ? 0 : -p->ftzthreshold;                                   \
   } else if (ABS(y) > p->xmax) { /* Overflow */                                \
     if (*(y) < -p->xmax)                                                       \
-      *(x) = -INFINITY;                                                        \
-    else if (*(y) > p->xmax && *(y) != INFINITY)                               \
+      *(x) = -p->ofvalue;                                                      \
+    else if (*(y) > p->xmax && *(y) != p->ofvalue)                             \
       *(x) = p->xmax;                                                          \
     else /* *(y) == INFINITY */                                                \
-      *(x) = INFINITY;                                                         \
+      *(x) = p->ofvalue;                                                       \
   } else {                                                                     \
     UPDATE_LOCAL_PARAMS(y, p, lp);                                             \
     if (SIGN(y)) /* Subtract ulp if x is positive. */                          \
@@ -532,7 +544,7 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
 #define RD_TWD_ZERO_SCALAR_OTHER_EXP(x, y, p, lp)                              \
   if (ABS(y) < p->ftzthreshold) { /* Underflow */                              \
     *(x) = FPOF(SIGN(y));                                                      \
-  } else if (ABS(y) > p->xmax && ABS(y) != INFINITY) { /* Overflow */          \
+  } else if (ABS(y) > p->xmax && ABS(y) != p->ofvalue) { /* Overflow */        \
     *(x) = FPOF(SIGN(y) | INTOFCONST(p->xmax));                                \
   } else {                                                                     \
     UPDATE_LOCAL_PARAMS(y, p, lp);                                             \
@@ -581,7 +593,7 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
     *(x) = *(y);                                                               \
   }                                                                            \
   if (ABS(x) >= p->xbnd) /* Overflow */                                        \
-    *(x) = FPOF(SIGN(y) | INTOFCONST(INFINITY));
+    *(x) = FPOF(SIGN(y) | INTOFCONST(p->ofvalue));
 
 /* Stochastic rounding with equal probabilities. */
 #define RS_EQUI_SCALAR(x, y, p, lp)                                            \
@@ -589,9 +601,9 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
   if (ABS(y) < p->ftzthreshold && *(y) != 0) {         /* Underflow */         \
     randombit = GENBIT(p->BITSEED);                                            \
     *(x) = FPOF(SIGN(y) | INTOFCONST(randombit ? p->ftzthreshold : 0));        \
-  } else if (ABS(y) > p->xmax && ABS(y) != INFINITY) { /* Overflow */          \
+  } else if (ABS(y) > p->xmax && ABS(y) != p->ofvalue) { /* Overflow */        \
     randombit = GENBIT(p->BITSEED);                                            \
-    *(x) = FPOF(SIGN(y) | INTOFCONST(randombit ? INFINITY : p->xmax));         \
+    *(x) = FPOF(SIGN(y) | INTOFCONST(randombit ? p->ofvalue : p->xmax));       \
   } else if ((INTOF(y) & lp->trailmask)) { /* Not exactly representable. */    \
     randombit = GENBIT(p->BITSEED);                                            \
     *(x) = FPOF(INTOF(y) & lp->leadmask);                                      \
@@ -604,7 +616,7 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
 #define RO_SCALAR(x, y, p, lp)                                                 \
   if (ABS(y) < p->ftzthreshold && *(y) != 0) {         /* Underflow */         \
     *(x) =  FPOF(SIGN(y) | INTOFCONST(p->ftzthreshold));                       \
-  } else if (ABS(y) > p->xmax && ABS(y) != INFINITY) { /* Overflow */          \
+  } else if (ABS(y) > p->xmax && ABS(y) != p->ofvalue) { /* Overflow */        \
     *(x) = FPOF(SIGN(y) | INTOFCONST(p->xmax));                                \
   } else {                                                                     \
     UPDATE_LOCAL_PARAMS(y, p, lp);                                             \
@@ -651,7 +663,9 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
                                  numelem, p, lp)                               \
   PARALLEL_STRING(PARALLEL)                                                    \
   {                                                                            \
-    if (p->emax == DEFEMAX) {                                                  \
+    if (p->emax == DEFEMAX                                                     \
+        && p->saturation == CPFLOAT_SAT_NO                                     \
+        && p->infinity == CPFLOAT_INF_USE) {                                   \
       FOR_STRING(PARALLEL)                                                     \
       for (size_t i=0; i<numelem; i++) {                                       \
         DEPARENTHESIZE_MAYBE(PREPROC)                                          \
@@ -783,10 +797,10 @@ static inline void UPDATE_LOCAL_PARAMS(const FPTYPE *A,
 #define ADD_BITFLIP_CODE_NO_BITFLIP(PARALLEL_TYPE, X)
 #define ADD_BITFLIP_CODE_INTRODUCE_BITFLIP(PARALLEL_TYPE, X)                   \
   /* Introduce bit flips anywhere in the binary representation. */             \
-  if (fpopts->flip != CPFLOAT_NO_SOFTERR) {                                    \
+  if (fpopts->flip != CPFLOAT_SOFTERR_NO) {                                    \
     PRNG_RAND_INIT                                                             \
     size_t flippos;                                                            \
-    const size_t totbits = fpopts->flip == CPFLOAT_FP_SOFTERR ?                \
+    const size_t totbits = fpopts->flip == CPFLOAT_SOFTERR_FP ?                \
                              NBITS : DEFPREC;                                  \
     FOR_STRING(PARALLEL_TYPE)                                                  \
     for (size_t i = 0; i < numelem; i++) {                                     \
@@ -1207,8 +1221,8 @@ GENERATE_UNIVARIATE_MATH_H(lgamma, lgamma)
    int temp = ADDSUFFIXTO(ilogb)(*(A+i));                                      \
    if (temp == FP_ILOGB0 || temp == FP_ILOGBNAN || temp == INT_MAX) {          \
      params.precision = DEFPREC-1;                                             \
-     params.emax = DEFEMAX;                                                    \
      params.emin = DEFEMIN;                                                    \
+     params.emax = DEFEMAX;                                                    \
    } else {                                                                    \
      params.precision = temp + 1;                                              \
      params.ftzthreshold = 1.0;                                                \
@@ -1490,8 +1504,8 @@ GENERATE_TRIVARIATE_MATH_H(fma, fma)
 #undef INTSUFFIX
 
 #undef DEFPREC
-#undef DEFEMAX
 #undef DEFEMIN
+#undef DEFEMAX
 #undef NLEADBITS
 #undef NBITS
 #undef FULLMASK
diff --git a/test/cpfloat_test.m b/test/cpfloat_test.m
index abef1f8..d44b4fd 100644
--- a/test/cpfloat_test.m
+++ b/test/cpfloat_test.m
@@ -20,46 +20,68 @@
   pi_h = 6432*uh; % fp16(pi)
 
   % Check handling of defaults and persistent variable.
-  fp.format = 'bfloat16'; [c,options] = cpfloat(pi,fp);
+  fp.format = 'bfloat16';
+  [~,options] = cpfloat(pi,fp);
   assert_eq(fp.format,options.format)
   assert_eq(options.subnormal,0)
 
-  fp.format = []; [c,options] = cpfloat(pi,fp);
+  fp.format = [];
+  [~,options] = cpfloat(pi,fp);
   assert_eq(options.format,'h')  % Check default;
 
-  fp.subnormal = 0; [c,options] = cpfloat(pi,fp);
-  assert_eq(options.subnormal,0)
+  fp.explim = [];
+  [~,options] = cpfloat(pi,fp);
+  assert_eq(options.explim,1)  % Check default.
 
-  fp.subnormal = []; [c,options] = cpfloat(pi,fp);
-  assert_eq(options.subnormal,1)  % Check default;
+  fp.explim = 0;
+  [~,options] = cpfloat(pi,fp);
+  assert_eq(options.explim,0)  % Check no default.
 
-  fp.round = []; [c,options] = cpfloat(pi,fp);
+  fp.round = [];
+  [~,options] = cpfloat(pi,fp);
   assert_eq(options.round,1)  % Check default.
 
-  fp.flip = []; [c,options] = cpfloat(pi,fp);
-  assert_eq(options.flip,0)  % Check no default.
+  fp.saturation = 1;
+  [~,options] = cpfloat(pi,fp);
+  assert_eq(options.saturation,1)
 
-  fp.explim = []; [c,options] = cpfloat(pi,fp);
-  assert_eq(options.explim,1)  % Check default.
+  fp.saturation = [];
+  [~,options] = cpfloat(pi,fp);
+  assert_eq(options.saturation,0)  % Check default;
 
-  fp.explim = 0; [c,options] = cpfloat(pi,fp);
-  assert_eq(options.explim,0)  % Check no default.
+  fp.subnormal = 0;
+  [~,options] = cpfloat(pi,fp);
+  assert_eq(options.subnormal,0)
+
+  fp.subnormal = [];
+  [~,options] = cpfloat(pi,fp);
+  assert_eq(options.subnormal,1)  % Check default;
+
+  fp.flip = [];
+  [~,options] = cpfloat(pi,fp);
+  assert_eq(options.flip,0)  % Check no default.
 
   clear cpfloat fp options
-  fp.flip = 1; [~,options] = cpfloat([],fp);
+  fp.flip = 1;
+  [~,options] = cpfloat([],fp);
   assert_eq(options.format,'h')
   assert_eq(options.round,1)
+  assert_eq(options.saturation,0)
   assert_eq(options.subnormal,1)
 
   clear cpfloat fp options
   % check all default options
-  fp.format = []; fp.subnormal = [];
-  fp.round = []; fp.flip = [];
+  fp.format = [];
+  fp.round = [];
+  fp.saturation = [];
+  fp.subnormal = [];
+  fp.flip = [];
   fp.p = [];
-  [c,options] = cpfloat(pi,fp);
+  [~,options] = cpfloat(pi,fp);
   assert_eq(options.format,'h')
-  assert_eq(options.subnormal,1)
   assert_eq(options.round,1)
+  assert_eq(options.saturation,0)
+  assert_eq(options.subnormal,1)
   assert_eq(options.flip,0)
   assert_eq(options.p,0.5)
   % % Takes different path from previous test since fpopts exists.
@@ -71,18 +93,22 @@
   clear cpfloat fp
   fp.flip = 1; fp.format = 'd';
   c = ones(8,1);
-  d = cpfloat(c,fp); assert_eq(norm(d-c,1)>0,true);
-  d = cpfloat(c',fp); assert_eq(norm(d-c',1)>0,true);
+  d = cpfloat(c,fp);
+  assert_eq(norm(d-c,1)>0,true);
+  d = cpfloat(c',fp);
+  assert_eq(norm(d-c',1)>0,true);
   fp.p = 0; % No bits flipped.
-  d = cpfloat(c,fp); assert_eq(d,d);
+  d = cpfloat(c,fp);
+  assert_eq(d,d);
   fp.p = 1; % All bits flipped.
-  d = cpfloat(c,fp); assert_eq(all(d ~= c),true);
+  d = cpfloat(c,fp);
+  assert_eq(all(d ~= c),true);
 
   clear cpfloat
   [~,fp] = cpfloat;
   assert_eq(fp.subnormal,1)
   assert_eq(fp.format,'h')
-  [c,options] = cpfloat(pi);
+  [~,options] = cpfloat(pi);
   assert_eq(options.format,'h')
   assert_eq(options.subnormal,1)
   assert_eq(options.round,1)
@@ -91,7 +117,7 @@
 
   clear fp
   fp.format = 'd';
-  [c,options] = cpfloat(pi,fp);
+  [~,options] = cpfloat(pi,fp);
   assert_eq(options.format,'d')
   assert_eq(options.subnormal,1)
   assert_eq(options.params, [53 -1022 1023])
@@ -101,7 +127,20 @@
   assert_eq(fp.params, [53 -1022 1023])
 
   clear fp
-  fp.format = 'bfloat16'; [c,options] = cpfloat(pi,fp);
+  fp.format = 'E4M3';
+  [~,options] = cpfloat(pi,fp);
+  assert_eq(options.format,'E4M3')
+  assert_eq(options.infinity,0)
+  assert_eq(options.params, [4 -6 8])
+  [~,fp] = cpfloat;
+  assert_eq(fp.format,'E4M3')
+  assert_eq(fp.infinity,0)
+  assert_eq(fp.params, [4 -6 8])
+
+
+  clear fp
+  fp.format = 'bfloat16';
+  [~,options] = cpfloat(pi,fp);
   assert_eq(options.format,'bfloat16')
   assert_eq(options.subnormal,0)
   assert_eq(options.params, [8 -126 127])
@@ -114,7 +153,8 @@
   [~,fp] = cpfloat;
   fp.format = 'b';
   fp = rmfield(fp, 'params');
-  [c,options] = cpfloat(pi,fp);
+  [~,options] = cpfloat(pi,fp);
+  assert_eq(options.saturation,0) % No saturation if that field was empty.
   assert_eq(options.subnormal,1) % No subnormals only if that field was empty.
 
   % Check these usages do not give an error.
@@ -134,7 +174,8 @@
   B = A + randn(size(A))*1e-12;
   C = cpfloat(B,options);
   assert_eq(A,C);
-  A2 = hilb(6); C = cpfloat(A2);
+  A2 = hilb(6);
+  C = cpfloat(A2);
 
   options.format = 'c';
   options.params = [8 -126 127];  % bfloat16
@@ -152,7 +193,7 @@
   [X1,opt] = cpfloat(A,options);
   [X2,opt2] = cpfloat(A,options2);
   assert_eq(X1,X2)
-  % assert_eq(cpfloat(A,options),cpfloat(A,options2));
+  assert_eq(cpfloat(A,opt),cpfloat(A,opt2));
 
   % Row vector
   clear options
@@ -185,14 +226,14 @@
     elseif i == 2
       % Half precision tests.
       [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('half');
-      options.format = 'h'
+      options.format = 'h';
     elseif i == 3
       % Quarter precision tests.
       [u,xmins,xmin,xmax,p,emins,emin,emax] = float_params('q43');
       options.format = 'E4M3';
       % Modification for OCP compliant q43.
       emin = -6; % Previously thought to be 1-emax=-7.
-      emax = 8; % Previously thought to be 7
+      emax = 8;  % Previously thought to be 7
       emins = emin + 1 - p; % Exponent of smallest subnormal number.
       xmins = 2^emins;
       xmin = 2^emin;
@@ -282,7 +323,22 @@
       assert_eq(c,x)
     end
 
+    % Saturation tests.
+    options.saturation = 1;
+    for j = 1:6
+      options.round = j;
+      x = inf;
+      c = cpfloat(x,options);
+      assert_eq(c,xmax)
+      c = cpfloat(-x,options);
+      assert_eq(c,-xmax)
+    end
+
     % Infinities tests.
+    [~,fpopts] = cpfloat;
+    prev_infinity = fpopts.infinity;
+    options.infinity = 1;
+    options.saturation = 0;
     for j = 1:6
       options.round = j;
       x = inf;
@@ -345,18 +401,21 @@
     end
 
     options.subnormal = 1;
-    c = cpfloat(xmin,options); assert_eq(c,xmin)
+    c = cpfloat(xmin,options);
+    assert_eq(c,xmin)
     x = [xmins xmin/2 xmin 0 xmax 2*xmax 1-delta/5 1+delta/4];
     c = cpfloat(x,options);
     c_expected = [x(1:5) inf 1 1];
     assert_eq(c,c_expected)
 
     options.subnormal = 0;
-    c = cpfloat(xmin,options); assert_eq(c,xmin)
+    c = cpfloat(xmin,options);
+    assert_eq(c,xmin)
     x = [xmins xmin/2 xmin 0 xmax 2*xmax 1-delta/5 1+delta/4];
     c = cpfloat(x,options);
     c_expected = [0 0 x(3:5) inf 1 1];
     assert_eq(c,c_expected)
+    options.infinity = prev_infinity;
 
     % Smallest normal number and spacing between the subnormal numbers.
     y = xmin; delta = xmin*2^(1-p);
@@ -409,12 +468,24 @@
 
     % Do not limit exponent.
     options.explim = 0;
-    x = xmin/2;  c = cpfloat(x,options); assert_eq(c,x)
-    x = -xmin/2;  c = cpfloat(x,options); assert_eq(c,x)
-    x = xmax*2;  c = cpfloat(x,options); assert_eq(c,x)
-    x = -xmax*2;  c = cpfloat(x,options); assert_eq(c,x)
-    x = xmins/2; c = cpfloat(x,options); assert_eq(c,x)
-    x = -xmins/2; c = cpfloat(x,options); assert_eq(c,x)
+    x = xmin/2;
+    c = cpfloat(x,options);
+    assert_eq(c,x)
+    x = -xmin/2;
+    c = cpfloat(x,options);
+    assert_eq(c,x)
+    x = xmax*2;
+    c = cpfloat(x,options);
+    assert_eq(c,x)
+    x = -xmax*2;
+    c = cpfloat(x,options);
+    assert_eq(c,x)
+    x = xmins/2;
+    c = cpfloat(x,options);
+    assert_eq(c,x)
+    x = -xmins/2;
+    c = cpfloat(x,options);
+    assert_eq(c,x)
     A = [pi -pi; pi -pi];
     C = cpfloat(A,options);
     options.explim = 1;
@@ -498,21 +569,32 @@
 
   % Test rounding with CHOPFAST versus native rounding.
   options.format = 's';
-  m = 100; y = zeros(3,n); z = y;
+  m = 100;
+  y = zeros(3,n);
+  z = y;
   for i = 1:m
     x = randn;
-    options.round = 2; y(i,1) = cpfloat(x,options);
-    options.round = 3; y(i,2) = cpfloat(x,options);
-    options.round = 4; y(i,3) = cpfloat(x,options);
+    options.round = 2;
+    y(i,1) = cpfloat(x,options);
+    options.round = 3;
+    y(i,2) = cpfloat(x,options);
+    options.round = 4;
+    y(i,3) = cpfloat(x,options);
     if usingoctave
-      fesetround(inf); z(i,1) = single(x);
-      fesetround(-inf); z(i,2) = single(x);
-      fesetround(0); z(i,3) = single(x);
+      fesetround(inf);
+      z(i,1) = single(x);
+      fesetround(-inf);
+      z(i,2) = single(x);
+      fesetround(0);
+      z(i,3) = single(x);
     else
       % Use undocumented function to set rounding mode in MATLAB.
-      feature('setround',inf), z(i,1) = single(x);
-      feature('setround',-inf), z(i,2) = single(x);
-      feature('setround',0), z(i,3) = single(x);
+      feature('setround',inf);
+      z(i,1) = single(x);
+      feature('setround',-inf);
+      z(i,2) = single(x);
+      feature('setround',0);
+      z(i,3) = single(x);
     end
   end
   assert_eq(y,z)
@@ -533,9 +615,15 @@
   c = cpfloat(x,options);
   assert_eq(c,[0 0 x(3:4)])
 
-  options.format = 'd'; options.subnormal = 0; cpfloat([],options);
-  a = cpfloat(pi); assert_eq(a,pi)
-  options.format = 'd'; options.subnormal = 1; cpfloat([],options);
+  options.format = 'd';
+  options.subnormal = 0;
+  cpfloat([],options);
+  a = cpfloat(pi);
+  assert_eq(a,pi)
+
+  options.format = 'd';
+  options.subnormal = 1;
+  cpfloat([],options);
   a = cpfloat(pi); assert_eq(a,pi)
 
   x = pi^2;
@@ -563,8 +651,10 @@
   yd = cpfloat(pd);
   assert_eq(double(ys),yd)
 
-  options.format = 'h'; options.round = 2;
-  as = single(rand(n,1)); ad = double(as);
+  options.format = 'h';
+  options.round = 2;
+  as = single(rand(n,1));
+  ad = double(as);
   delta = single(rand(n,1));
   cd = cpfloat(ad + 1e-5*double(delta),options);
   cs = cpfloat(as + 1e-5*delta,options);
@@ -581,21 +671,21 @@
   % Test base 2 logarithm
   options.format = 'h';
   options.round = 4;
-  x = single(2^-3 * (sum(2.^(-[0:23]))));
-  assert_eq(cpfloat(x,options), single(2^-3 * (sum(2.^(-[0:10])))))
+  x = single(2^-3 * (sum(2.^(-(0:23)))));
+  assert_eq(cpfloat(x,options), single(2^-3 * (sum(2.^(-(0:10))))))
 
-  x = 2^-3 * (sum(2.^(-[0:52])));
-  assert_eq(cpfloat(x,options), 2^-3 * (sum(2.^(-[0:10]))))
+  x = 2^-3 * (sum(2.^(-(0:52))));
+  assert_eq(cpfloat(x,options), 2^-3 * (sum(2.^(-(0:10)))))
 
   options.format = 's';
-  x = single(2^-3 * (sum(2.^(-[0:23]))));
+  x = single(2^-3 * (sum(2.^(-(0:23)))));
   assert_eq(cpfloat(x,options), x)
 
-  x = 2^-3 * (sum(2.^(-[0:52])));
-  assert_eq(cpfloat(x,options), 2^-3 * (sum(2.^(-[0:23]))))
+  x = 2^-3 * (sum(2.^(-(0:52))));
+  assert_eq(cpfloat(x,options), 2^-3 * (sum(2.^(-(0:23)))))
 
   options.format = 'd';
-  x = 2^-3 * (sum(2.^(-[0:52])));
+  x = 2^-3 * (sum(2.^(-(0:52))));
   assert_eq(cpfloat(x,options), x)
 
   options.round = 1;
diff --git a/test/cpfloat_test.ts b/test/cpfloat_test.ts
index be83030..fb3d791 100644
--- a/test/cpfloat_test.ts
+++ b/test/cpfloat_test.ts
@@ -53,10 +53,14 @@ optstruct *fpopts;
 
 void fpopts_setup(void) {
   fpopts = malloc(sizeof(optstruct));
-  fpopts->flip = 0;
-  fpopts->p = 0.5;
   fpopts->round = 1;
   fpopts->explim = CPFLOAT_EXPRANGE_STOR;
+  fpopts->infinity = CPFLOAT_INF_USE;
+  fpopts->saturation = CPFLOAT_SAT_NO;
+
+  fpopts->flip = 0;
+  fpopts->p = 0.5;
+
   fpopts->bitseed = NULL;
   fpopts->randseedf = NULL;
   fpopts->randseed = NULL;
@@ -163,7 +167,7 @@ void init_intarray_rounding_double(uint64_t *x, size_t n,
                                    uint64_t first, uint64_t step) {
   x[0] = first + 1ul;
   x[1] = first + (step >> 1);
-  for (size_t i = 2; i < n; i+=3) {
+  for (size_t i = 2; i < n; i += 3) {
     first += step;
     x[i] = first - 1ul;
     if (i+1 < n)
@@ -178,7 +182,7 @@ void init_intarray_rounding_float(uint32_t *x, size_t n,
                                   uint32_t first, uint32_t step) {
   x[0] = first + 1u;
   x[1] = first + (step >> 1);
-  for (size_t i = 2; i < n; i+=3) {
+  for (size_t i = 2; i < n; i += 3) {
     first += step;
     x[i] = first - 1u;
     if (i+1 < n)
@@ -209,13 +213,13 @@ void init_fparray_rounding_double(double *x, size_t n,
                                   double first, double step) {
   x[0] = FPOFd(INTOFd(first) + 1lu);
   x[1] = first + step/2;
-  for (size_t i = 2; i < n; i+=3) {
+  for (size_t i = 2; i < n; i += 3) {
     first += step;
     x[i] = FPOFd(INTOFd(first) - 1lu);
     if (i+1 < n)
-        x[i+1] = FPOFd(INTOFd(first) + 1lu);
+      x[i+1] = FPOFd(INTOFd(first) + 1lu);
     if (i+2 < n)
-        x[i+2] = first + step/2;
+      x[i+2] = first + step/2;
   }
 }
 
@@ -224,13 +228,13 @@ void init_fparray_rounding_float(float *x, size_t n,
                                  float first, float step) {
   x[0] = FPOFf(INTOFf(first) + 1lu);
   x[1] = first + step/2;
-  for (size_t i = 2; i < n; i+=3) {
+  for (size_t i = 2; i < n; i += 3) {
     first += step;
     x[i] = FPOFf(INTOFf(first) - 1lu);
     if (i+1 < n)
-        x[i+1] = FPOFf(INTOFf(first) + 1lu);
+      x[i+1] = FPOFf(INTOFf(first) + 1lu);
     if (i+2 < n)
-        x[i+2] = first + step/2;
+      x[i+2] = first + step/2;
   }
 }
 
@@ -368,7 +372,7 @@ void check_equality_double_long_long(double *x, long long *y, size_t n) {
 static inline
 void check_equality_float(float *x, float *y, size_t n) {
   for (size_t j = 0; j < n; j++) {
-        if (!nan_safe_compare_float(x[j], y[j])) {
+    if (!nan_safe_compare_float(x[j], y[j])) {
       printf("FLOAT\n");
       printf("***\nj = %ld\nx  = %23.15e [%X]\ny = %23.15e\n",
              j, x[j], *(uint32_t *)(x + j), (float)y[j]);
@@ -458,12 +462,12 @@ void check_array_float(float *y, float *x, float *ref,
              "pr = %d\n"
              "e = %d\n"
              "s = %d\n",
-          j,
-          x[j], * (uint32_t *)(x+j),
-          y[j], * (uint32_t *)(y+j),
-          ref[j], * (uint32_t *)(ref+j),
-          fpopts->round, fpopts->precision,
-          fpopts->emax, fpopts->subnormal);
+             j,
+             x[j], * (uint32_t *)(x+j),
+             y[j], * (uint32_t *)(y+j),
+             ref[j], * (uint32_t *)(ref+j),
+             fpopts->round, fpopts->precision,
+             fpopts->emax, fpopts->subnormal);
     }
     ck_assert(nan_safe_compare_float(y[j], ref[j]));
   }
@@ -593,7 +597,8 @@ void select_tests_det_double(double *y, double *x, double *ref,
     fpopts->round = round;
     for (format = minformat; format <= maxformat; format++) {
       fpopts->precision = precision[format];
-      fpopts->emax = emax[format]; fpopts->emin = emin[format];
+      fpopts->emax = emax[format];
+      fpopts->emin = emin[format];
       for (subnormal = minsubnormal; subnormal <= maxsubnormal; subnormal++) {
         fpopts->subnormal = subnormal;
         for (explim = minexplim; explim <= maxexplim; explim++) {
@@ -625,7 +630,8 @@ void select_tests_det_float(float *y, float *x, float *ref,
     fpopts->round = round;
     for (format = minformat; format <= maxformat; format++) {
       fpopts->precision = precision[format];
-      fpopts->emax = emax[format]; fpopts->emin = emin[format];
+      fpopts->emax = emax[format];
+      fpopts->emin = emin[format];
       for (subnormal = minsubnormal; subnormal <= maxsubnormal; subnormal++) {
         fpopts->subnormal = subnormal;
         for (explim = minexplim; explim <= maxexplim; explim++) {
@@ -657,7 +663,8 @@ void select_tests_stoc_double(double *tmpin, double *tmpout,
   fpopts->round = mode;
   for (format = minformat; format <= maxformat; format++) {
     fpopts->precision = precision[format];
-    fpopts->emax = emax[format]; fpopts->emin = emin[format];
+    fpopts->emax = emax[format];
+    fpopts->emin = emin[format];
     for (subnormal = minsubnormal; subnormal <= maxsubnormal; subnormal++) {
       fpopts->subnormal = subnormal;
       for (explim = minexplim; explim <= maxexplim; explim++) {
@@ -692,7 +699,8 @@ void select_tests_stoc_float(float *tmpin, float *tmpout,
   fpopts->round = mode;
   for (format = minformat; format <= maxformat; format++) {
     fpopts->precision = precision[format];
-    fpopts->emax = emax[format]; fpopts->emin = emin[format];
+    fpopts->emax = emax[format];
+    fpopts->emin = emin[format];
     for (subnormal = minsubnormal; subnormal <= maxsubnormal; subnormal++) {
       fpopts->subnormal = subnormal;
       for (explim = minexplim; explim <= maxexplim; explim++) {
@@ -709,25 +717,25 @@ void select_tests_stoc_float(float *tmpin, float *tmpout,
 
 double *alloc_init_array_double(double *x, size_t n) {
   double *y = malloc(n * sizeof(*y));
-  for (size_t i=0; i<n; i++)
+  for (size_t i = 0; i < n; i++)
     y[i] = x[i];
   return y;
 }
 
 float *alloc_init_array_float(float *x, size_t n) {
   float *y = malloc(n * sizeof(*y));
-  for (size_t i=0; i<n; i++)
+  for (size_t i = 0; i < n; i++)
     y[i] = x[i];
   return y;
 }
 
 void copy_array_double(double *y, double *x, size_t n) {
-  for (size_t i=0; i<n; i++)
+  for (size_t i = 0; i < n; i++)
     y[i] = x[i];
 }
 
 void copy_array_float(float *y, float *x, size_t n) {
-  for (size_t i=0; i<n; i++)
+  for (size_t i = 0; i < n; i++)
     y[i] = x[i];
 }
 
@@ -761,13 +769,13 @@ void free_array_float(float *y, size_t mode) {
 
 void reset_array_double(double *y, size_t n, size_t mode) {
   if (mode == 1)
-    for (size_t i=0; i<n; i++)
+    for (size_t i = 0; i < n; i++)
       y[i] = NAN;
 }
 
 void reset_array_float(float *y, size_t n, size_t mode) {
   if (mode == 1)
-    for (size_t i=0; i<n; i++)
+    for (size_t i = 0; i < n; i++)
       y[i] = NAN;
 }
 
@@ -901,7 +909,7 @@ void test_trivariate_arith_op_float(float* xd,
 
 #test no_rounding_special_values
 printf("1a. No rounding: special values\n");
-for (size_t mode=1; mode<3; mode++) {
+for (size_t mode = 1; mode < 3; mode++) {
   /* Infinity and non-a-number. */
   size_t n = 4;
   double xd [] = {0, -0, inf_double(), -inf_double(), nan_double()};
@@ -924,10 +932,11 @@ for (size_t mode=1; mode<3; mode++) {
 /* Exactly representable subnormals. */
 #test no_rounding_subnormal_numbers
 printf("1b. No rounding: subnormal numbers\n");
-for (size_t mode=1; mode<3; mode++) {
+for (size_t mode = 1; mode < 3; mode++) {
   for (size_t i = 0; i < nformats; i++) {
     fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
+    fpopts->emax = emax[i];
+    fpopts->emin = emin[i];
     size_t n = ldexp(1.,fpopts->precision-1) - 1; // number of subnormals
     double *xd = malloc(n * sizeof(*xd));
     init_fparray_double(xd, n, minsubnormal(fpopts), minsubnormal(fpopts));
@@ -962,10 +971,11 @@ for (size_t mode=1; mode<3; mode++) {
 /* Exactly representable normals. */
 #test no_rounding_normal_numbers
 printf("1c. No rounding: normal numbers\n");
-for (size_t mode=1; mode<3; mode++) {
+for (size_t mode = 1; mode < 3; mode++) {
   for (size_t i = 0; i < nformats; i++) {
     fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
+    fpopts->emax = emax[i];
+    fpopts->emin = emin[i];
     size_t n = ldexp(1., fpopts->precision-1) * 2 * fpopts->emax;
     uint64_t *xd = malloc(n * sizeof(*xd));
     init_intarray_double(xd, n, intminnormal_double(fpopts),
@@ -1014,12 +1024,13 @@ double *refxmind = malloc(n * sizeof(*refxmind));
 float *xf = malloc(n * sizeof(*xf));
 float *refzerof = calloc(n, sizeof(*refzerof));
 float *refxminf = malloc(n * sizeof(*refxminf));
-for (size_t mode=1; mode<3; mode++) {
+for (size_t mode = 1; mode < 3; mode++) {
   double *yd = allocate_array_double(xd, n, mode);
   float *yf = allocate_array_float(xf, n, mode);
   for (size_t i = 0; i < nformats; i++) {
     fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
+    fpopts->emax = emax[i];
+    fpopts->emin = emin[i];
     double xmin = minnormal(fpopts);
     double snxmin = minsubnormal(fpopts);
     double halfxmin = xmin / 2;
@@ -1164,12 +1175,13 @@ double *refxmind = malloc(n * sizeof(*refxmind));
 float *xf = malloc(n * sizeof(*xf));
 float *refzerof = calloc(n, sizeof(*refzerof));
 float *refxminf = malloc(n * sizeof(*refxminf));
-for (size_t mode=1; mode<3; mode++) {
+for (size_t mode = 1; mode < 3; mode++) {
   double *yd = allocate_array_double(xd, n, mode);
   float *yf = allocate_array_float(xf, n, mode);
   for (size_t i = 0; i < nformats; i++) {
     fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
+    fpopts->emax = emax[i];
+    fpopts->emin = emin[i];
     double xmin = minsubnormal(fpopts);
     double halfxmin = xmin / 2;
     double xd_imm [] = {nextafter(0, INFINITY),
@@ -1308,145 +1320,165 @@ double *refinfd = malloc(n * sizeof(*refinfd));
 float *xf = malloc(n * sizeof(*xf));
 float *refxmaxf = malloc(n * sizeof(*refxmaxf));
 float *refinff = malloc(n * sizeof(*refinff));
-for (size_t mode=1; mode<3; mode++) {
-  double *yd = allocate_array_double(xd, n, mode);
-  float *yf = allocate_array_float(xf, n, mode);
-  for (size_t i = 0; i < nformats; i++) {
-    fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
-    double xmax = maxnormal(fpopts);
-    double xbound = maxbound(fpopts);
-    double xd_imm [] = {nextafter(xmax, INFINITY),
-                        nextafter(xbound, 0),
-                    xbound,
-                    nextafter(xbound, INFINITY)};
-    double refd [] = {xmax, xmax, inf_double(), inf_double()};
-    for (size_t j = 0; j < n; j++) {
-      refxmaxd[j] = xmax;
-      refinfd[j] = inf_double();
-    }
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refd, n, fpopts, -1, -1, i, i, -1, 1);
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refd, n, fpopts, 1, 1, i, i, -1, 1);
-    refd[2] = xmax;
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refd, n, fpopts, 0, 0, i, i, -1, 1);
-    refd[2] = inf_double();
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refinfd, n, fpopts, 2, 2, i, i, -1, 1);
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 3, 4, i, i, -1, 1);
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 7, 7, i, i, -1, 1);
-    csign_intarray_double((uint64_t *)xd_imm, n);
-    csign_intarray_double((uint64_t *)refd, n);
-    csign_intarray_double((uint64_t *)refxmaxd, n);
-    csign_intarray_double((uint64_t *)refinfd, n);
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refd, n, fpopts, -1, -1, i, i, -1, 1);
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refd, n, fpopts, 1, 1, i, i, -1, 1);
-    refd[2] = -xmax;
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refd, n, fpopts, 0, 0, i, i, -1, 1);
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 2, 2, i, i, -1, 1);
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refinfd, n, fpopts, 3, 3, i, i, -1, 1);
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 4, 4, i, i, -1, 1);
-    reset_array_double(yd, n, mode);
-    copy_array_double(xd, (double *)&xd_imm, n);
-    select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 7, 7, i, i, -1, 1);
+for (size_t infinity = 0; infinity < 2; infinity++) {
+  fpopts->infinity = infinity;
+  for (size_t saturation = 0; saturation < 2; saturation++) {
+    fpopts->saturation = saturation;
+    for (size_t mode = 1; mode < 3; mode++) {
+      double *yd = allocate_array_double(xd, n, mode);
+      float *yf = allocate_array_float(xf, n, mode);
+      for (size_t i = 0; i < nformats; i++) {
+        fpopts->precision = precision[i];
+        fpopts->emax = emax[i];
+        fpopts->emin = emin[i];
+
+        double xmax = maxnormal(fpopts);
+        double xbound = maxbound(fpopts);
+
+        double of_value = saturation == 1 ? xmax :
+          (infinity == 1 ? inf_double() : NAN);
+
+        double xd_imm [] = {nextafter(xmax, INFINITY),
+                            nextafter(xbound, 0),
+                            xbound,
+                            nextafter(xbound, INFINITY)};
+        double refd [] = {xmax, xmax, of_value, of_value};
+        for (size_t j = 0; j < n; j++) {
+          refxmaxd[j] = xmax;
+          refinfd[j] = of_value;
+        }
 
-    float xf_imm [] = {nextafterf(xmax, INFINITY),
-                       nextafterf(xbound, 0),
-                   xbound,
-                   nextafterf(xbound, INFINITY)};
-    float reff [] = {xmax, xmax, inf_float(), inf_float()};
-    for (size_t j = 0; j < n; j++) {
-      refxmaxf[j] = xmax;
-      refinff[j] = inf_float();
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refd, n, fpopts, -1, -1, i, i, -1, 1);
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refd, n, fpopts, 1, 1, i, i, -1, 1);
+        refd[2] = xmax;
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refd, n, fpopts, 0, 0, i, i, -1, 1);
+        refd[2] = of_value;
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refinfd, n, fpopts, 2, 2, i, i, -1, 1);
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 3, 4, i, i, -1, 1);
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 7, 7, i, i, -1, 1);
+        csign_intarray_double((uint64_t *)xd_imm, n);
+        csign_intarray_double((uint64_t *)refd, n);
+        csign_intarray_double((uint64_t *)refxmaxd, n);
+        csign_intarray_double((uint64_t *)refinfd, n);
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refd, n, fpopts, -1, -1, i, i, -1, 1);
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refd, n, fpopts, 1, 1, i, i, -1, 1);
+        refd[2] = -xmax;
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refd, n, fpopts, 0, 0, i, i, -1, 1);
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 2, 2, i, i, -1, 1);
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refinfd, n, fpopts, 3, 3, i, i, -1, 1);
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 4, 4, i, i, -1, 1);
+        reset_array_double(yd, n, mode);
+        copy_array_double(xd, (double *)&xd_imm, n);
+        select_tests_det_double(yd, xd, refxmaxd, n, fpopts, 7, 7, i, i, -1, 1);
+
+        float of_valuef = saturation == 1 ? xmax :
+          (infinity == 1 ? inf_float() : NAN);
+        float xf_imm [] = {nextafterf(xmax, INFINITY),
+                           nextafterf(xbound, 0),
+                           xbound,
+                           nextafterf(xbound, INFINITY)};
+        float reff [] = {xmax, xmax, of_valuef, of_valuef};
+        for (size_t j = 0; j < n; j++) {
+          refxmaxf[j] = xmax;
+          refinff[j] = of_valuef;
+        }
+
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, reff, n, fpopts, -1, -1, i, i, -1, 1);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, reff, n, fpopts, 1, 1, i, i, -1, 1);
+        reff[2] = xmax;
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, reff, n, fpopts, 0, 0, i, i, -1, 1);
+        reff[2] = of_valuef;
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, refinff, n, fpopts, 2, 2, i, i, -1, 1);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 3, 4, i, i, -1, 1);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 7, 7, i, i, -1, 1);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        csign_intarray_float((uint32_t *)xf_imm, n);
+        csign_intarray_float((uint32_t *)reff, n);
+        csign_intarray_float((uint32_t *)refxmaxf, n);
+        csign_intarray_float((uint32_t *)refinff, n);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, reff, n, fpopts, -1, -1, i, i, -1, 1);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, reff, n, fpopts, 1, 1, i, i, -1, 1);
+        reff[2] = -xmax;
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, reff, n, fpopts, 0, 0, i, i, -1, 1);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 2, 2, i, i, -1, 1);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, refinff, n, fpopts, 3, 3, i, i, -1, 1);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 4, 4, i, i, -1, 1);
+        reset_array_float(yf, n, mode);
+        copy_array_float(xf, (float *)&xf_imm, n);
+        select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 7, 7, i, i, -1, 1);
+      }
+      free_array_double(yd, mode);
+      free_array_float(yf, mode);
     }
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, reff, n, fpopts, -1, -1, i, i, -1, 1);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, reff, n, fpopts, 1, 1, i, i, -1, 1);
-    reff[2] = xmax;
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, reff, n, fpopts, 0, 0, i, i, -1, 1);
-    reff[2] = inf_float();
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, refinff, n, fpopts, 2, 2, i, i, -1, 1);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 3, 4, i, i, -1, 1);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 7, 7, i, i, -1, 1);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    csign_intarray_float((uint32_t *)xf_imm, n);
-    csign_intarray_float((uint32_t *)reff, n);
-    csign_intarray_float((uint32_t *)refxmaxf, n);
-    csign_intarray_float((uint32_t *)refinff, n);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, reff, n, fpopts, -1, -1, i, i, -1, 1);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, reff, n, fpopts, 1, 1, i, i, -1, 1);
-    reff[2] = -xmax;
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, reff, n, fpopts, 0, 0, i, i, -1, 1);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 2, 2, i, i, -1, 1);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, refinff, n, fpopts, 3, 3, i, i, -1, 1);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 4, 4, i, i, -1, 1);
-    reset_array_float(yf, n, mode);
-    copy_array_float(xf, (float *)&xf_imm, n);
-    select_tests_det_float(yf, xf, refxmaxf, n, fpopts, 7, 7, i, i, -1, 1);
   }
-  free_array_double(yd, mode);
-  free_array_float(yf, mode);
-}
-free(xd);
-free(refxmaxd);
-free(refinfd);
-free(xf);
-free(refxmaxf);
-free(refinff);
+  free(xd);
+  free(refxmaxd);
+  free(refinfd);
+  free(xf);
+  free(refxmaxf);
+  free(refinff);
+ }
+fpopts->saturation = CPFLOAT_SAT_NO;
+fpopts->infinity = CPFLOAT_INF_USE;
+
 
 /* Rounding of numbers in the subnormal range. */
 #test deterministic_rounding_subnormal_numbers
 printf("2d. Deterministic rounding: subnormal numbers\n");
-for (size_t mode=1; mode<3; mode++) {
+for (size_t mode = 1; mode < 3; mode++) {
   for (size_t i = 0; i < nformats; i++) {
     fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
+    fpopts->emax = emax[i];
+    fpopts->emin = emin[i];
     size_t n = 3 * ldexp(1.,fpopts->precision-1) - 1;
     double *xd_imm = malloc(n * sizeof(*xd_imm));
     double *xd = malloc(n * sizeof(*xd));
@@ -1472,7 +1504,7 @@ for (size_t mode=1; mode<3; mode++) {
                             -1, -1, i, i, 1, 1);
     csign_intarray_double((uint64_t *)xd_imm, n);
     csign_intarray_double((uint64_t *)refd, n);
-    for (size_t j = 1; j < n; j+=3)
+    for (size_t j = 1; j < n; j += 3)
       refd[j] -= stepd;
     reset_array_double(yd, n, mode);
     copy_array_double(xd, xd_imm, n);
@@ -1488,7 +1520,7 @@ for (size_t mode=1; mode<3; mode++) {
     csign_intarray_double((uint64_t *)refd, n);
     reset_array_double(yd, n, mode);
     copy_array_double(xd, xd_imm, n);
-    for (size_t j = 1; j < n; j+=6)
+    for (size_t j = 1; j < n; j += 6)
       refd[j] += stepd;
     reset_array_double(yd, n, mode);
     copy_array_double(xd, xd_imm, n);
@@ -1573,7 +1605,7 @@ for (size_t mode=1; mode<3; mode++) {
                            -1, -1, i, i, 1, 1);
     csign_intarray_float((uint32_t *)xf_imm, n);
     csign_intarray_float((uint32_t *)reff, n);
-    for (size_t j = 1; j < n; j+=3)
+    for (size_t j = 1; j < n; j += 3)
       reff[j] -= stepf;
     reset_array_float(yf, n, mode);
     copy_array_float(xf, xf_imm, n);
@@ -1587,7 +1619,7 @@ for (size_t mode=1; mode<3; mode++) {
                            0, 0, i, i, 1, 1);
     csign_intarray_float((uint32_t *)xf_imm, n);
     csign_intarray_float((uint32_t *)reff, n);
-    for (size_t j = 1; j < n; j+=6)
+    for (size_t j = 1; j < n; j += 6)
       reff[j] += stepf;
     reset_array_float(yf, n, mode);
     copy_array_float(xf, xf_imm, n);
@@ -1655,10 +1687,11 @@ for (size_t mode=1; mode<3; mode++) {
 /* Rounding of numbers in the normal range. */
 #test deterministic_rounding_normal_numbers
 printf("2e. Deterministic rounding: normal numbers\n");
-for (size_t mode=1; mode<3; mode++) {
+for (size_t mode = 1; mode < 3; mode++) {
   for (size_t i = 0; i < nformats; i++) {
     fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
+    fpopts->emax = emax[i];
+    fpopts->emin = emin[i];
     size_t n = 3 * (ldexp(1., fpopts->precision-1) * 2 * fpopts->emax - 1);
     uint64_t *xd_imm = malloc(n * sizeof(*xd_imm));
     uint64_t *refd = malloc(n * sizeof(*refd));
@@ -1684,7 +1717,7 @@ for (size_t mode=1; mode<3; mode++) {
                             -1, -1, i, i, -1, -1);
     csign_intarray_double(xd_imm, n);
     csign_intarray_double(refd, n);
-    for (size_t j = 1; j < n; j+=3)
+    for (size_t j = 1; j < n; j += 3)
       refd[j] -= stepd;
     reset_array_double(yd, n, mode);
     copy_array_double(xd, (double *)xd_imm, n);
@@ -1698,7 +1731,7 @@ for (size_t mode=1; mode<3; mode++) {
                             0, 0, i, i, -1, -1);
     csign_intarray_double(xd_imm, n);
     csign_intarray_double(refd, n);
-    for (size_t j = 4; j < n; j+=6)
+    for (size_t j = 4; j < n; j += 6)
       refd[j] += stepd;
     reset_array_double(yd, n, mode);
     copy_array_double(xd, (double *)xd_imm, n);
@@ -1785,7 +1818,7 @@ for (size_t mode=1; mode<3; mode++) {
                            -1, -1, i, i, -1, -1);
     csign_intarray_float(xf_imm, n);
     csign_intarray_float(reff, n);
-    for (size_t j = 1; j < n; j+=3)
+    for (size_t j = 1; j < n; j += 3)
       reff[j] -= stepf;
     reset_array_float(yf, n, mode);
     copy_array_float(xf, (float *)xf_imm, n);
@@ -1799,7 +1832,7 @@ for (size_t mode=1; mode<3; mode++) {
                            0, 0, i, i, -1, -1);
     csign_intarray_float(xf_imm, n);
     csign_intarray_float(reff, n);
-    for (size_t j = 4; j < n; j+=6)
+    for (size_t j = 4; j < n; j += 6)
       reff[j] += stepf;
     reset_array_float(yf, n, mode);
     copy_array_float(xf, (float *)xf_imm, n);
@@ -1878,11 +1911,12 @@ double *tmpdout = malloc(NREPS * sizeof(*tmpdout));
 float *tmpfin = malloc(NREPS * sizeof(*tmpfin));
 float *tmpfout = malloc(NREPS * sizeof(*tmpfout));
 double proundequi = 0.50;
-for (size_t mode=1; mode<3; mode++) {
+for (size_t mode = 1; mode < 3; mode++) {
   for(subnormal = 0; subnormal <= 1; subnormal++) {
     for(size_t i = 0; i < nformats; i++) {
       fpopts->precision = precision[i];
-      fpopts->emax = emax[i]; fpopts->emin = emin[i];
+      fpopts->emax = emax[i];
+      fpopts->emin = emin[i];
       double xmin;
       if (subnormal)
         xmin = minsubnormal(fpopts);
@@ -1929,11 +1963,12 @@ double *tmpdout = malloc(NREPS * sizeof(*tmpdout));
 float *tmpfin = malloc(NREPS * sizeof(*tmpfin));
 float *tmpfout = malloc(NREPS * sizeof(*tmpfout));
 double proundequi = 0.50;
-for (size_t mode=1; mode<3; mode++ ) {
+for (size_t mode = 1; mode < 3; mode++ ) {
   for(subnormal = 0; subnormal <= 1; subnormal++) {
     for(i = 0; i < nformats; i++) {
       fpopts->precision = precision[i];
-      fpopts->emax = emax[i]; fpopts->emin = emin[i];
+      fpopts->emax = emax[i];
+      fpopts->emin = emin[i];
       double xmax = maxnormal(fpopts);
       double xbound = maxbound(fpopts);
       double *yd =  malloc(n * sizeof(*yd));
@@ -2009,10 +2044,11 @@ double *tmpdout = malloc(NREPS * sizeof(*tmpdout));
 float *tmpfin = malloc(NREPS * sizeof(*tmpfin));
 float *tmpfout = malloc(NREPS * sizeof(*tmpfout));
 double proundequi = 0.50;
-for (size_t mode=1; mode<3; mode++ ) {
+for (size_t mode = 1; mode < 3; mode++ ) {
   for (size_t i = 0; i < nformats; i++) {
     fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
+    fpopts->emax = emax[i];
+    fpopts->emin = emin[i];
     size_t n = 3 * ldexp(1.,fpopts->precision-1) - 1;
     double *xd = malloc(n * sizeof(*xd));
     double stepd = minsubnormal(fpopts);
@@ -2061,12 +2097,13 @@ double *tmpdout = malloc(NREPS * sizeof(*tmpdout));
 float *tmpfin = malloc(NREPS * sizeof(*tmpfin));
 float *tmpfout = malloc(NREPS * sizeof(*tmpfout));
 double proundequi = 0.50;
-for (size_t mode=1; mode<3; mode++ ) {
+for (size_t mode = 1; mode < 3; mode++ ) {
   for (subnormal = 0; subnormal <= 1; subnormal++) {
     for (explim = 0; explim <= 1; explim++) {
       for (size_t i = 0; i < nformats; i++) {
         fpopts->precision = precision[i];
-        fpopts->emax = emax[i]; fpopts->emin = emin[i];
+        fpopts->emax = emax[i];
+        fpopts->emin = emin[i];
 
         size_t n = 3 * (ldexp(1., fpopts->precision-1) * 2 * fpopts->emax - 1);
         uint64_t *xd = malloc(n * sizeof(*xd));
@@ -2139,7 +2176,8 @@ size_t n_roundings = 7;
 
 for (size_t i = 0; i < nformats; i++) {
   fpopts->precision = precision[i];
-  fpopts->emax = emax[i]; fpopts->emin = emin[i];
+  fpopts->emax = emax[i];
+  fpopts->emin = emin[i];
   for (size_t i = 0; i < n_roundings; i++) {
     fpopts->round = det_roundings[i];
 
@@ -2272,7 +2310,8 @@ float *reff = malloc(n * sizeof(* xf));
 
 for (size_t i = 0; i < nformats; i++) {
   fpopts->precision = precision[i];
-  fpopts->emax = emax[i]; fpopts->emin = emin[i];
+  fpopts->emax = emax[i];
+  fpopts->emin = emin[i];
   for (size_t i = 0; i < n_roundings; i++) {
     fpopts->round = det_roundings[i];
     // Univariate functions.
@@ -2413,7 +2452,8 @@ const int resd_fpclassify [] = {FP_NORMAL, FP_NORMAL,FP_NORMAL, FP_SUBNORMAL,
 
 for (size_t i = 0; i < nformats; i++) {
   fpopts->precision = precision[i];
-  fpopts->emax = emax[i]; fpopts->emin = emin[i];
+  fpopts->emax = emax[i];
+  fpopts->emin = emin[i];
   double xmind = minnormal(fpopts);
   double xminds = minsubnormal(fpopts);
   double ad[] = {-2., -1, -xmind, -xminds, xminds, xmind, 1., CONST_SQRT2,
@@ -2505,7 +2545,8 @@ const int resf_fpclassify [] = {FP_NORMAL, FP_NORMAL,FP_NORMAL,
 
 for (size_t i = 0; i < nformats; i++) {
   fpopts->precision = precision[i];
-  fpopts->emax = emax[i]; fpopts->emin = emin[i];
+  fpopts->emax = emax[i];
+  fpopts->emin = emin[i];
   float xminf = minnormal(fpopts);
   float xminfs = minsubnormal(fpopts);
   float af[] = { -2., -1, -xminf, xminf, 1., CONST_SQRT2,
@@ -2588,10 +2629,11 @@ printf("4c. Next and previous floating-point number\n");
 fpopts->explim = CPFLOAT_EXPRANGE_TARG;
 fpopts->subnormal = CPFLOAT_SUBN_USE;
 // Subnormals
-for (size_t mode=2; mode<3; mode++) {
+for (size_t mode = 2; mode < 3; mode++) {
   for (size_t i = 0; i < nformats; i++) {
     fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
+    fpopts->emax = emax[i];
+    fpopts->emin = emin[i];
 
     size_t n = (ldexp(1.,fpopts->precision-1) - 1) + 2;
     double *ad = malloc(n * sizeof(*ad));
@@ -2688,10 +2730,11 @@ for (size_t mode=2; mode<3; mode++) {
 }
 
 // Normals
-for (size_t mode=2; mode<3; mode++) {
+for (size_t mode = 2; mode < 3; mode++) {
   for (size_t i = 0; i < nformats; i++) {
     fpopts->precision = precision[i];
-    fpopts->emax = emax[i]; fpopts->emin = emin[i];
+    fpopts->emax = emax[i];
+    fpopts->emin = emin[i];
 
     size_t n = ldexp(1., fpopts->precision-1) * 2 * fpopts->emax + 2;
     double *ad = malloc(n * sizeof(*ad));
@@ -2972,7 +3015,8 @@ check_equality_double_long_long(rd7, xll, n-3);
 printf("4d. Arithmetic operations on large arrays\n");
 size_t i = 0;
 fpopts->precision = precision[i];
-fpopts->emax = emax[i]; fpopts->emin = emin[i];
+fpopts->emax = emax[i];
+fpopts->emin = emin[i];
 size_t n = 3 * (ldexp(1., fpopts->precision-1) * (ldexp(1., 5) - 2) - 1) - 2;
 double *ad = malloc(n * sizeof(*ad));
 double *bd = malloc(n * sizeof(*bd));