Corrections to integer overflow handling in sum()

- Fixes #361: When na.rm = TRUE, exclude NaN from the sum - Use Java 1.8 exact math to trap integer overflow - Determine return type / error first, before beginning computation. In some cases, the behavior does not exactly match that of GNU R 3.4.2, but I believe Renjin's behavior is now more consistent have submitted a bug report and patch to GNU R: https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17372
bedatadriven · Jan 9, 2018 · ef9d981 · ef9d981
1 parent dd71919
commit ef9d981
Show file tree

Hide file tree

Showing 13 changed files with 2,442 additions and 91 deletions.
diff --git a/core/src/main/java/org/renjin/primitives/Ops.java b/core/src/main/java/org/renjin/primitives/Ops.java
@@ -50,18 +50,11 @@ public static double plus(double x, double y) {
   @DataParallel(PreserveAttributeStyle.ALL)
   public static int plus(@Cast(CastStyle.EXPLICIT) int a, @Cast(CastStyle.EXPLICIT) int b) {
     // check for integer overflow
-    int r = a + b;
-    boolean bLTr = b < r;
-    if (a > 0) {
-      if (bLTr) {
-        return r;
-      }
-    } else {
-      if (!bLTr) {
-        return r;
-      }
+    try {
+      return Math.addExact(a, b);
+    } catch(ArithmeticException e) {
+      return IntVector.NA;
     }
-    return IntVector.NA;
   }
 
   @Builtin("+")
@@ -114,10 +107,9 @@ public static int minus(@Cast(CastStyle.EXPLICIT) int x) {
   @Builtin("-")
   @DataParallel(PreserveAttributeStyle.ALL)
   public static int minus(@Cast(CastStyle.EXPLICIT) int a, @Cast(CastStyle.EXPLICIT) int b) {
-    int r = a - b;
-    if ((a < 0 == b < 0) || (a < 0 == r < 0)) {
-      return r;
-    } else {
+    try {
+      return Math.subtractExact(a, b);
+    } catch (ArithmeticException e) {
       return IntVector.NA;
     }
   }
@@ -193,10 +185,9 @@ public static double multiply(double x, double y) {
   @Builtin("*")
   @DataParallel(PreserveAttributeStyle.ALL)
   public static int multiply(@Cast(CastStyle.EXPLICIT) int x, @Cast(CastStyle.EXPLICIT) int y) {
-    long l = (long) x * (long) y;
-    if (!(l < Integer.MIN_VALUE || l > Integer.MAX_VALUE)) {
-      return (int) l;
-    } else {
+    try {
+      return Math.multiplyExact(x, y);
+    } catch (ArithmeticException e) {
       return IntVector.NA;
     }
   }

diff --git a/core/src/main/java/org/renjin/primitives/Summary.java b/core/src/main/java/org/renjin/primitives/Summary.java
@@ -453,10 +453,38 @@ private static Complex complexProduct(ListVector arguments, boolean removeNA) {
   @GroupGeneric
   public static SEXP sum(@Current Context context, @ArgumentList ListVector arguments,
                          @NamedFlag("na.rm") boolean removeNA) throws IOException {
-    double realSum = 0;
+
+
+    // Check the return type first
     boolean haveDouble = false;
-    double imaginarySum = 0;
     boolean haveComplex = false;
+    for (SEXP argument : arguments) {
+      if(argument instanceof DoubleVector) {
+        haveDouble = true;
+      } else if(argument instanceof ComplexVector) {
+        haveComplex = true;
+      } else if(argument instanceof IntVector || argument instanceof LogicalVector || argument instanceof Null) {
+        // NOOP
+      } else {
+        // This is a difference between Renjin and GNU R that seems worth defending:
+        // we exit early if there is an invalid argument. Otherwise the exception can be silenced by
+        // an integer overflow or an NA value in an earlier argument which seem capricious.
+        throw new EvalException("invalid 'type' (" + argument.getTypeName() + ") of argument");
+      }
+    }
+
+    SEXP naValue;
+    if(haveComplex) {
+      naValue = ComplexArrayVector.NA_VECTOR;
+    } else if(haveDouble) {
+      naValue = DoubleArrayVector.NA_VECTOR;
+    } else {
+      naValue = IntArrayVector.NA_VECTOR;
+    }
+
+    double realSum = 0;
+    int intSum = 0;
+    double imaginarySum = 0;
 
     if(arguments.length() == 1 && arguments.get(0) instanceof DoubleVector && !removeNA) {
       DoubleVector argument = (DoubleVector) arguments.get(0);
@@ -465,34 +493,53 @@ public static SEXP sum(@Current Context context, @ArgumentList ListVector argume
       }
     }
 
-    for(SEXP argument : arguments) {
-      if(argument instanceof IntVector || argument instanceof LogicalVector) {
-        AtomicVector vector = (AtomicVector)argument;
-        for(int i=0;i!=argument.length();++i) {
-          if(vector.isElementNA(i)) {
-            if(!removeNA) {
-              return haveDouble ? new DoubleArrayVector(DoubleVector.NA) : new IntArrayVector(IntVector.NA);
+    argumentLoop: for (SEXP argument : arguments) {
+      if (argument instanceof IntVector || argument instanceof LogicalVector) {
+        AtomicVector vector = (AtomicVector) argument;
+        for (int i = 0; i != argument.length(); ++i) {
+          int element = vector.getElementAsInt(i);
+          if (IntVector.isNA(element)) {
+            if (!removeNA) {
+              return naValue;
             }
           } else {
-            int element = vector.getElementAsInt(i);
-            realSum += element;
+            try {
+              intSum = Math.addExact(intSum, element);
+            } catch(ArithmeticException e) {
+              // Overflow
+              context.warn("Integer overflow - use sum(as.numeric(.))");
+              return naValue;
+            }
           }
         }
-      } else if(argument instanceof DoubleVector) {
-        DoubleVector vector = (DoubleVector)argument;
-        haveDouble = true;
-        for(int i=0;i!=vector.length();++i) {
-          if(vector.isElementNA(i)) {
-            if(!removeNA) {
-              return new DoubleArrayVector(DoubleVector.NA);
+      } else if (argument instanceof DoubleVector) {
+        DoubleVector vector = (DoubleVector) argument;
+        for (int i = 0; i != vector.length(); ++i) {
+          double doubleValue = vector.getElementAsDouble(i);
+          if (Double.isNaN(doubleValue)) {
+            if (DoubleVector.isNA(doubleValue)) {
+              // If this is a "missing" value NA, then we can abort
+              // all calculation now.
+              if (!removeNA) {
+                return naValue;
+              }
+            } else {
+              // Otherwise for normal NaNs, we can stop processing
+              // only if there are no complex arguments.
+              if (!removeNA) {
+                if (haveComplex) {
+                  break;
+                } else {
+                  return new DoubleArrayVector(Double.NaN);
+                }
+              }
             }
           } else {
-            realSum += vector.getElementAsDouble(i);
+            realSum += doubleValue;
           }
         }
-      } else if(argument instanceof ComplexVector) {
+      } else if (argument instanceof ComplexVector) {
         ComplexVector vector = (ComplexVector) argument;
-        haveComplex = true;
         for (int i = 0; i != vector.length(); ++i) {
           if (vector.isElementNA(i)) {
             if (!removeNA) {
@@ -504,26 +551,16 @@ public static SEXP sum(@Current Context context, @ArgumentList ListVector argume
             imaginarySum += z.getImaginary();
           }
         }
-
-      } else if(argument == Null.INSTANCE) {
-        // NOOP
-
-      } else {
-        throw new EvalException("invalid 'type' (" + argument.getTypeName() + ") of argument");
       }
     }
-    if(haveComplex) {
-      return new ComplexArrayVector(ComplexVector.complex(realSum, imaginarySum));
+    if (haveComplex) {
+      return new ComplexArrayVector(ComplexVector.complex(realSum + intSum, imaginarySum));
 
-    } else if(haveDouble) {
-      return new DoubleArrayVector(realSum);
+    } else if (haveDouble) {
+      return new DoubleArrayVector(realSum + intSum);
 
     } else {
-      if(realSum < Integer.MIN_VALUE || realSum > Integer.MAX_VALUE) {
-        context.warn("Integer overflow - use sum(as.numeric(.))");
-        return new IntArrayVector(IntVector.NA);
-      }
-      return new IntArrayVector((int)realSum);
+      return new IntArrayVector(intSum);
     }
   }
 

diff --git a/core/src/main/java/org/renjin/primitives/sequence/RepDoubleVector.java b/core/src/main/java/org/renjin/primitives/sequence/RepDoubleVector.java
@@ -51,7 +51,7 @@ private RepDoubleVector(double constant, int length) {
 
   public static DoubleVector createConstantVector(double constant, int length) {
     if (length <= 0) {
-      return DoubleVector.EMPTY;
+      return DoubleArrayVector.EMPTY;
     }
     if (length < LENGTH_THRESHOLD) {
       double array[] = new double[length];

diff --git a/core/src/main/java/org/renjin/sexp/ComplexArrayVector.java b/core/src/main/java/org/renjin/sexp/ComplexArrayVector.java
@@ -26,6 +26,12 @@
 
 public class ComplexArrayVector extends ComplexVector {
 
+  public static final ComplexVector NA_VECTOR = new ComplexArrayVector(ComplexVector.NA);
+  public static final ComplexVector EMPTY = new ComplexArrayVector();
+
+  public static final ComplexVector NAMED_EMPTY = new ComplexArrayVector(new Complex[0],
+      AttributeMap.builder().setNames(StringVector.EMPTY).build());
+
   private final double[] values;
 
   public ComplexArrayVector(Complex... values) {

diff --git a/core/src/main/java/org/renjin/sexp/ComplexVector.java b/core/src/main/java/org/renjin/sexp/ComplexVector.java
@@ -27,10 +27,6 @@
 
 public abstract class ComplexVector extends AbstractAtomicVector implements Iterable<Complex> {
   public static final String TYPE_NAME = "complex";
-  public static final ComplexVector EMPTY = new ComplexArrayVector();
-
-  public static final ComplexVector NAMED_EMPTY = new ComplexArrayVector(new Complex[0], 
-      AttributeMap.builder().setNames(StringVector.EMPTY).build());
 
   public static final Complex NA = new Complex(DoubleVector.NA, DoubleVector.NA);
   public static final Complex NaN = new Complex(DoubleVector.NaN, DoubleVector.NaN);

diff --git a/core/src/main/java/org/renjin/sexp/DoubleArrayVector.java b/core/src/main/java/org/renjin/sexp/DoubleArrayVector.java
@@ -26,9 +26,14 @@
 
 public final class DoubleArrayVector extends DoubleVector {
 
-  public static DoubleArrayVector ZERO = new DoubleArrayVector(0);
+  public static final DoubleArrayVector ZERO = new DoubleArrayVector(0);
+
+  public static final DoubleArrayVector ONE = new DoubleArrayVector(1);
+
+  public static final DoubleArrayVector EMPTY = new DoubleArrayVector();
+
+  public static final DoubleArrayVector NA_VECTOR = new DoubleArrayVector(DoubleVector.NA);
 
-  public static DoubleArrayVector ONE = new DoubleArrayVector(1);
 
   private double[] values;
 

diff --git a/core/src/main/java/org/renjin/sexp/DoubleVector.java b/core/src/main/java/org/renjin/sexp/DoubleVector.java
@@ -63,7 +63,6 @@ public abstract class DoubleVector extends AbstractAtomicVector implements Itera
   public static final double NaN = Double.NaN;
   public static final double EPSILON = 2.220446e-16;
 
-  public static final DoubleVector EMPTY = new DoubleArrayVector();
   public static final int NA_PAYLOAD = 1954;
 
   protected DoubleVector(AttributeMap attributes) {

diff --git a/core/src/main/java/org/renjin/sexp/IntArrayVector.java b/core/src/main/java/org/renjin/sexp/IntArrayVector.java
@@ -24,6 +24,8 @@
 
 public class IntArrayVector extends IntVector {
 
+  public static final IntArrayVector NA_VECTOR = new IntArrayVector(IntVector.NA);
+
   private int[] values;
 
   private IntArrayVector(AttributeMap attributes) {
@@ -118,7 +120,7 @@ public Builder(int initialSize, int initialCapacity) {
       }
       values = new int[initialCapacity];
       size = initialSize;
-      Arrays.fill(values, NA);
+      Arrays.fill(values, IntVector.NA);
     }
 
     public Builder(int initialSize) {