From 91e8747e59304d5b7e78b336bd5cee6d6a8e54f9 Mon Sep 17 00:00:00 2001
From: Kyle Kavanagh <kdkavanagh@gmail.com>
Date: Mon, 2 Sep 2019 16:27:14 -0500
Subject: [PATCH 1/3] Add bit64 support

---
 src/python.cpp | 55 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 45 insertions(+), 10 deletions(-)
diff --git a/src/python.cpp b/src/python.cpp
index 28c4a4b14..4336a0e33 100644
--- a/src/python.cpp
+++ b/src/python.cpp
@@ -201,17 +201,18 @@ int narrow_array_typenum(int typenum) {
   case NPY_SHORT:
   case NPY_USHORT:
   case NPY_INT:
+  case NPY_LONG:
     typenum = NPY_LONG;
     break;
     // double
   case NPY_UINT:
   case NPY_ULONG:
   case NPY_ULONGLONG:
-  case NPY_LONG:
   case NPY_LONGLONG:
   case NPY_HALF:
   case NPY_FLOAT:
   case NPY_DOUBLE:
+
     typenum = NPY_DOUBLE;
     break;
 
@@ -618,9 +619,18 @@ SEXP py_to_r(PyObject* x, bool convert) {
       return LogicalVector::create(x == Py_True);
 
     // integer
-    else if (scalarType == INTSXP)
-      return IntegerVector::create(PyInt_AsLong(x));
-
+    else if (scalarType == INTSXP) {
+      long val = PyInt_AsLong(x);
+      if(val > std::numeric_limits<int>::max()) {
+        Rcpp::NumericVector vec(1);
+        std::memcpy(&(vec[0]), &(val), sizeof(double));
+        vec.attr("class") = "integer64";
+        return vec;
+      }
+      else{
+        return IntegerVector::create(val);
+      }
+    }
     // double
     else if (scalarType == REALSXP)
       return NumericVector::create(PyFloat_AsDouble(x));
@@ -653,9 +663,24 @@ SEXP py_to_r(PyObject* x, bool convert) {
       return vec;
     } else if (scalarType == INTSXP) {
       Rcpp::IntegerVector vec(len);
-      for (Py_ssize_t i = 0; i<len; i++)
-        vec[i] = PyInt_AsLong(PyList_GetItem(x, i));
+      for (Py_ssize_t i = 0; i<len; i++) {
+        long num = PyInt_AsLong(PyList_GetItem(x, i));
+        if(num > std::numeric_limits<int>::max()) {
+          //We need to start over an interpret as 64 bit int
+          Rcpp::NumericVector nVec(len);
+          long long* res_ptr  = (long long*) dataptr(nVec);
+          for (Py_ssize_t j = 0; j<len; j++) {
+            res_ptr[j] = PyInt_AsLong(PyList_GetItem(x, j));;
+          }
+          nVec.attr("class") = "integer64";
+          return nVec;
+          break;
+        } else {
+          vec[i] = num;
+        }
+      }
       return vec;
+
     } else if (scalarType == CPLXSXP) {
       Rcpp::ComplexVector vec(len);
       for (Py_ssize_t i = 0; i<len; i++) {
@@ -769,8 +794,17 @@ SEXP py_to_r(PyObject* x, bool convert) {
       case NPY_LONG: {
         npy_long* pData = (npy_long*)PyArray_DATA(array);
         rArray = Rf_allocArray(INTSXP, dimsVector);
-        for (int i=0; i<len; i++)
+        for (int i=0; i<len; i++) {
+          if(pData[i] > std::numeric_limits<int>::max()) {
+            Rcpp::NumericVector nVec(len);
+            //Inspired by https://gallery.rcpp.org/articles/creating-integer64-and-nanotime-vectors/
+            std::memcpy(&(nVec[0]), &(pData[0]), len * sizeof(double));
+            nVec.attr("class") = "integer64";
+            nVec.attr("dim") = dimsVector;
+            return nVec;
+          }
           INTEGER(rArray)[i] = pData[i];
+        }
         break;
       }
       case NPY_DOUBLE: {
@@ -1083,7 +1117,7 @@ PyObject* r_to_py_cpp(RObject x, bool convert) {
         typenum = NPY_INT;
       data = &(INTEGER(sexp)[0]);
     } else if (type == REALSXP) {
-      typenum = NPY_DOUBLE;
+      typenum = x.inherits("integer64") ? NPY_LONG : NPY_DOUBLE;
       data = &(REAL(sexp)[0]);
     } else if (type == LGLSXP) {
       typenum = NPY_BOOL;
@@ -1178,15 +1212,16 @@ PyObject* r_to_py_cpp(RObject x, bool convert) {
 
   // numeric (pass length 1 vectors as scalars, otherwise pass list)
   } else if (type == REALSXP) {
+    bool isInt64=x.inherits("integer64");
     if (LENGTH(sexp) == 1) {
       double value = REAL(sexp)[0];
-      return PyFloat_FromDouble(value);
+      return isInt64 ? PyInt_FromLong(reinterpret_cast<long&>(value)) : PyFloat_FromDouble(value);
     } else {
       PyObjectPtr list(PyList_New(LENGTH(sexp)));
       for (R_xlen_t i = 0; i<LENGTH(sexp); i++) {
         double value = REAL(sexp)[i];
         // NOTE: reference to added value is "stolen" by the list
-        int res = PyList_SetItem(list, i, PyFloat_FromDouble(value));
+        int res = PyList_SetItem(list, i, isInt64 ? PyInt_FromLong(reinterpret_cast<long&>(value)) : PyFloat_FromDouble(value));
         if (res != 0)
           stop(py_fetch_error());
       }

From 4a697bb42a1adf0555a8fd9021e25108e90c6b10 Mon Sep 17 00:00:00 2001
From: Kyle Kavanagh <kdkavanagh@gmail.com>
Date: Mon, 2 Sep 2019 16:51:21 -0500
Subject: [PATCH 2/3] Update tests and add dep on bit64

---
 DESCRIPTION                         |  1 +
 tests/testthat/test-python-numpy.R  | 14 +++++++++++++-
 tests/testthat/test-python-pandas.R |  7 +++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 96bf1c255..e3d99ba09 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -34,6 +34,7 @@ Imports:
     graphics,
     jsonlite,
     Rcpp (>= 0.12.7),
+    bit64,
     Matrix,
     methods
 Suggests:
diff --git a/tests/testthat/test-python-numpy.R b/tests/testthat/test-python-numpy.R
index 3ac8553cb..0bc0ee8ac 100644
--- a/tests/testthat/test-python-numpy.R
+++ b/tests/testthat/test-python-numpy.R
@@ -29,10 +29,22 @@ test_that("Character arrays are handled correctly", {
   expect_equal(a1, py_to_r(r_to_py(a1)))
 })
 
+
+test_that("Long integer types are converted to bit64", {
+  skip_if_no_numpy()
+  np <- import("numpy", convert = FALSE)
+  dtypes <- c(np$int64, np$long)
+  require(bit64)
+  lapply(dtypes, function(dtype) {
+    a1 <- np$array(c(as.integer64("12345"), as.integer64("1567447722123456786")), dtype = dtype)
+    expect_equal(class(py_to_r(a1)), "integer64")
+  })
+})
+
 test_that("Long integer types are converted to R numeric", {
   skip_if_no_numpy()
   np <- import("numpy", convert = FALSE)
-  dtypes <- c(np$int64, np$uint32, np$uint64, np$long, np$longlong)
+  dtypes <- c(np$uint32, np$uint64, np$longlong)
   lapply(dtypes, function(dtype) {
     a1 <- np$array(c(1L:30L), dtype = dtype)
     expect_equal(class(as.vector(py_to_r(a1))), "numeric")
diff --git a/tests/testthat/test-python-pandas.R b/tests/testthat/test-python-pandas.R
index 9a091b712..5cfc5953b 100644
--- a/tests/testthat/test-python-pandas.R
+++ b/tests/testthat/test-python-pandas.R
@@ -125,3 +125,10 @@ test_that("single-row data.frames with rownames can be converted", {
   expect_equal(c(before), c(after))
 
 })
+
+test_that("Large ints are handled correctly", {
+  skip_if_no_pandas()
+  require(bit64)
+  A <- data.frame(val=c(as.integer64("1567447722123456785"), as.integer64("1567447722123456786")))
+  expect_equal(A, py_to_r(r_to_py(A)))
+})

From 791bd6636c6e629090b791f0741259e4df51da45 Mon Sep 17 00:00:00 2001
From: "Kyle D. Kavanagh" <kdkavanagh@gmail.com>
Date: Sat, 15 Feb 2020 13:43:02 -0600
Subject: [PATCH 3/3] Add options to configure bit64 usage

---
 src/libpython.h                     |   7 ++
 src/python.cpp                      | 125 +++++++++++++++++++++-------
 tests/testthat/test-python-numpy.R  |  22 ++++-
 tests/testthat/test-python-pandas.R |   8 ++
 4 files changed, 130 insertions(+), 32 deletions(-)

diff --git a/src/libpython.h b/src/libpython.h
index 3c7a00000..7370c2ad8 100644
--- a/src/libpython.h
+++ b/src/libpython.h
@@ -282,6 +282,12 @@ LIBPYTHON_EXTERN PyObject* (*PyInt_FromLong)(long);
 LIBPYTHON_EXTERN long (*PyInt_AsLong)(PyObject *);
 LIBPYTHON_EXTERN PyObject* (*PyLong_FromLong)(long);
 LIBPYTHON_EXTERN long (*PyLong_AsLong)(PyObject *);
+LIBPYTHON_EXTERN PyObject* (*PyLong_FromUnsignedLong)(long);
+LIBPYTHON_EXTERN unsigned long (*PyLong_AsUnsignedLong)(PyObject *);
+LIBPYTHON_EXTERN long (*PyLong_AsLongAndOverflow)(PyObject *, int*);
+LIBPYTHON_EXTERN PyObject* (*PyInt_FromUnsignedLong)(long);
+LIBPYTHON_EXTERN unsigned long (*PyInt_AsUnsignedLong)(PyObject *);
+LIBPYTHON_EXTERN long (*PyInt_AsLongAndOverflow)(PyObject *, int*);
 
 LIBPYTHON_EXTERN PyObject* (*PyBool_FromLong)(long);
 
@@ -360,6 +366,7 @@ typedef struct tagPyArrayObject {
 
 typedef unsigned char npy_bool;
 typedef long npy_long;
+typedef unsigned long npy_ulong;
 typedef double npy_double;
 typedef struct { double real, imag; } npy_cdouble;
 typedef npy_cdouble npy_complex128;
diff --git a/src/python.cpp b/src/python.cpp
index 4336a0e33..f8bb77ffa 100644
--- a/src/python.cpp
+++ b/src/python.cpp
@@ -57,6 +57,27 @@ std::wstring s_python_v3;
 std::string s_pythonhome;
 std::wstring s_pythonhome_v3;
 
+const std::string CONFIG_LONG_AS_BIT64="reticulate.long_as_bit64";
+const std::string CONFIG_ULONG_AS_BIT64="reticulate.ulong_as_bit64";
+
+template<typename T>
+T getConfig(std::string config, T defValue) {
+  Environment base( "package:base" ) ;
+  Function getOption = base["getOption"];
+  SEXP s = getOption(config, defValue);
+  if(s == NULL) {
+    return defValue;
+  }
+  return as<T>(s);
+}
+
+bool convertLongToBit64() {
+  return getConfig<bool>(CONFIG_LONG_AS_BIT64, false);
+}
+
+bool convertULongToBit64() {
+  return getConfig<bool>(CONFIG_ULONG_AS_BIT64, false);
+}
 
 
 // helper to convert std::string to std::wstring
@@ -201,18 +222,24 @@ int narrow_array_typenum(int typenum) {
   case NPY_SHORT:
   case NPY_USHORT:
   case NPY_INT:
-  case NPY_LONG:
     typenum = NPY_LONG;
     break;
-    // double
-  case NPY_UINT:
+
+  case NPY_LONG:
+  case NPY_LONGLONG:
+    typenum = convertLongToBit64() ? NPY_LONG : NPY_DOUBLE;
+    break;
+
   case NPY_ULONG:
   case NPY_ULONGLONG:
-  case NPY_LONGLONG:
+    typenum = convertULongToBit64() ? NPY_ULONG : NPY_DOUBLE;
+    break;
+
+    // double
+  case NPY_UINT:
   case NPY_HALF:
   case NPY_FLOAT:
   case NPY_DOUBLE:
-
     typenum = NPY_DOUBLE;
     break;
 
@@ -238,12 +265,24 @@ int narrow_array_typenum(int typenum) {
   return typenum;
 }
 
+int typenum(PyArrayObject* array) {
+  return PyArray_TYPE(array);
+}
+
+int typenum(PyArray_Descr* descr) {
+  return descr->type_num;
+}
+
+int typenum(int typeenum) {
+  return typeenum;
+}
+
 int narrow_array_typenum(PyArrayObject* array) {
-  return narrow_array_typenum(PyArray_TYPE(array));
+  return narrow_array_typenum(typenum(array));
 }
 
 int narrow_array_typenum(PyArray_Descr* descr) {
-  return narrow_array_typenum(descr->type_num);
+  return narrow_array_typenum(typenum(descr->type_num));
 }
 
 bool is_numpy_str(PyObject* x) {
@@ -603,10 +642,8 @@ bool py_is_callable(PyObjectRef x) {
     return py_is_callable(x.get());
 }
 
-
 // convert a python object to an R object
 SEXP py_to_r(PyObject* x, bool convert) {
-
   // NULL for Python None
   if (py_is_none(x))
     return R_NilValue;
@@ -620,8 +657,8 @@ SEXP py_to_r(PyObject* x, bool convert) {
 
     // integer
     else if (scalarType == INTSXP) {
-      long val = PyInt_AsLong(x);
-      if(val > std::numeric_limits<int>::max()) {
+      long val = PyLong_AsLong(x);
+      if((val > std::numeric_limits<int>::max() || val < std::numeric_limits<int>::min()) && convertLongToBit64()) {
         Rcpp::NumericVector vec(1);
         std::memcpy(&(vec[0]), &(val), sizeof(double));
         vec.attr("class") = "integer64";
@@ -664,8 +701,8 @@ SEXP py_to_r(PyObject* x, bool convert) {
     } else if (scalarType == INTSXP) {
       Rcpp::IntegerVector vec(len);
       for (Py_ssize_t i = 0; i<len; i++) {
-        long num = PyInt_AsLong(PyList_GetItem(x, i));
-        if(num > std::numeric_limits<int>::max()) {
+        long num = PyLong_AsLong(PyList_GetItem(x, i));
+        if((num > std::numeric_limits<int>::max() || num < std::numeric_limits<int>::min()) && convertLongToBit64()) {
           //We need to start over an interpret as 64 bit int
           Rcpp::NumericVector nVec(len);
           long long* res_ptr  = (long long*) dataptr(nVec);
@@ -674,7 +711,6 @@ SEXP py_to_r(PyObject* x, bool convert) {
           }
           nVec.attr("class") = "integer64";
           return nVec;
-          break;
         } else {
           vec[i] = num;
         }
@@ -770,6 +806,7 @@ SEXP py_to_r(PyObject* x, bool convert) {
     }
 
     // determine the target type of the array
+    int oriType = typenum(array);
     int typenum = narrow_array_typenum(array);
 
     // cast it to a fortran array (PyArray_CastToType steals the descr)
@@ -792,19 +829,32 @@ SEXP py_to_r(PyObject* x, bool convert) {
         break;
       }
       case NPY_LONG: {
-        npy_long* pData = (npy_long*)PyArray_DATA(array);
-        rArray = Rf_allocArray(INTSXP, dimsVector);
-        for (int i=0; i<len; i++) {
-          if(pData[i] > std::numeric_limits<int>::max()) {
-            Rcpp::NumericVector nVec(len);
-            //Inspired by https://gallery.rcpp.org/articles/creating-integer64-and-nanotime-vectors/
-            std::memcpy(&(nVec[0]), &(pData[0]), len * sizeof(double));
-            nVec.attr("class") = "integer64";
-            nVec.attr("dim") = dimsVector;
-            return nVec;
+        if((oriType == NPY_LONG || oriType == NPY_LONGLONG) && convertLongToBit64()) {
+          Rcpp::NumericVector nVec(len);
+          //Inspired by https://gallery.rcpp.org/articles/creating-integer64-and-nanotime-vectors/
+          npy_ulong* pData = (npy_ulong*)PyArray_DATA(array);
+          std::memcpy(&(nVec[0]), &(pData[0]), len * sizeof(double));
+          nVec.attr("class") = "integer64";
+          nVec.attr("dim") = dimsVector;
+          rArray = nVec;
+        } else {
+          npy_long* pData = (npy_long*)PyArray_DATA(array);
+          rArray = Rf_allocArray(INTSXP, dimsVector);
+          for (int i=0; i<len; i++) {
+            INTEGER(rArray)[i] = pData[i];
           }
-          INTEGER(rArray)[i] = pData[i];
         }
+
+        break;
+      }
+      case NPY_ULONG: {
+        npy_ulong* pData = (npy_ulong*)PyArray_DATA(array);
+        Rcpp::NumericVector nVec(len);
+        //Inspired by https://gallery.rcpp.org/articles/creating-integer64-and-nanotime-vectors/
+        std::memcpy(&(nVec[0]), &(pData[0]), len * sizeof(double));
+        nVec.attr("class") = CharacterVector::create("integer64", "np.ulong");
+        nVec.attr("dim") = dimsVector;
+        rArray = nVec;
         break;
       }
       case NPY_DOUBLE: {
@@ -1117,7 +1167,7 @@ PyObject* r_to_py_cpp(RObject x, bool convert) {
         typenum = NPY_INT;
       data = &(INTEGER(sexp)[0]);
     } else if (type == REALSXP) {
-      typenum = x.inherits("integer64") ? NPY_LONG : NPY_DOUBLE;
+      typenum = x.inherits("integer64") ? (x.inherits("np.ulong") ? NPY_ULONG : NPY_LONG) : NPY_DOUBLE;
       data = &(REAL(sexp)[0]);
     } else if (type == LGLSXP) {
       typenum = NPY_BOOL;
@@ -1212,16 +1262,33 @@ PyObject* r_to_py_cpp(RObject x, bool convert) {
 
   // numeric (pass length 1 vectors as scalars, otherwise pass list)
   } else if (type == REALSXP) {
-    bool isInt64=x.inherits("integer64");
+    bool isBit64  = x.inherits("integer64");
+    bool isUnsigned = x.inherits("np.ulong");
+    bool isInt64 = isBit64 && !isUnsigned;
+    bool isUint64 = isBit64 && isUnsigned;
     if (LENGTH(sexp) == 1) {
       double value = REAL(sexp)[0];
-      return isInt64 ? PyInt_FromLong(reinterpret_cast<long&>(value)) : PyFloat_FromDouble(value);
+      if(isInt64) {
+        return PyInt_FromLong(reinterpret_cast<long&>(value));
+      } else if(isUint64) {
+        return PyLong_FromUnsignedLong(reinterpret_cast<unsigned long&>(value));
+      } else {
+        return PyFloat_FromDouble(value);
+      }
     } else {
       PyObjectPtr list(PyList_New(LENGTH(sexp)));
       for (R_xlen_t i = 0; i<LENGTH(sexp); i++) {
         double value = REAL(sexp)[i];
         // NOTE: reference to added value is "stolen" by the list
-        int res = PyList_SetItem(list, i, isInt64 ? PyInt_FromLong(reinterpret_cast<long&>(value)) : PyFloat_FromDouble(value));
+        PyObject* obj;
+        if(isInt64) {
+          obj = PyInt_FromLong(reinterpret_cast<long&>(value));
+        } else if(isUint64) {
+          obj = PyLong_FromUnsignedLong(reinterpret_cast<unsigned long&>(value));
+        } else {
+          obj = PyFloat_FromDouble(value);
+        }
+        int res = PyList_SetItem(list, i, obj);
         if (res != 0)
           stop(py_fetch_error());
       }
diff --git a/tests/testthat/test-python-numpy.R b/tests/testthat/test-python-numpy.R
index 0bc0ee8ac..7bd1c86f5 100644
--- a/tests/testthat/test-python-numpy.R
+++ b/tests/testthat/test-python-numpy.R
@@ -33,18 +33,33 @@ test_that("Character arrays are handled correctly", {
 test_that("Long integer types are converted to bit64", {
   skip_if_no_numpy()
   np <- import("numpy", convert = FALSE)
-  dtypes <- c(np$int64, np$long)
+  dtypes <- c(np$int64, np$long, np$uint64, np$longlong, np$ulonglong)
   require(bit64)
+  #First run with the default mode
   lapply(dtypes, function(dtype) {
     a1 <- np$array(c(as.integer64("12345"), as.integer64("1567447722123456786")), dtype = dtype)
-    expect_equal(class(py_to_r(a1)), "integer64")
+    expect_equal(class(as.vector(py_to_r(a1))), "numeric")
+  })
+
+  #Now with the options set
+  options(reticulate.long_as_bit64=TRUE)
+  options(reticulate.ulong_as_bit64=TRUE)
+  lapply(dtypes, function(dtype) {
+    a1 <- np$array(c(as.integer64("12345"), as.integer64("1567447722123456786")), dtype = dtype)
+    res = c('integer64')
+    if(dtype == np$uint64 || dtype == np$ulonglong) {
+      res = c(res, 'np.ulong')
+    }
+    expect_setequal(class(py_to_r(a1)), res)
   })
+  options(reticulate.long_as_bit64=F)
+  options(reticulate.ulong_as_bit64=F)
 })
 
 test_that("Long integer types are converted to R numeric", {
   skip_if_no_numpy()
   np <- import("numpy", convert = FALSE)
-  dtypes <- c(np$uint32, np$uint64, np$longlong)
+  dtypes <- c(np$uint32)
   lapply(dtypes, function(dtype) {
     a1 <- np$array(c(1L:30L), dtype = dtype)
     expect_equal(class(as.vector(py_to_r(a1))), "numeric")
@@ -95,3 +110,4 @@ test_that("boolean matrices are converted appropriately", {
   A <- matrix(TRUE, nrow = 2, ncol = 2)
   expect_equal(A, py_to_r(r_to_py(A)))
 })
+
diff --git a/tests/testthat/test-python-pandas.R b/tests/testthat/test-python-pandas.R
index 5cfc5953b..2ee8e019e 100644
--- a/tests/testthat/test-python-pandas.R
+++ b/tests/testthat/test-python-pandas.R
@@ -129,6 +129,14 @@ test_that("single-row data.frames with rownames can be converted", {
 test_that("Large ints are handled correctly", {
   skip_if_no_pandas()
   require(bit64)
+
+  options(reticulate.long_as_bit64=TRUE)
+  options(reticulate.ulong_as_bit64=TRUE)
+  A <- data.frame(val=c(as.integer64("1567447722123456785"), as.integer64("1567447722123456786")))
+  expect_equal(A, py_to_r(r_to_py(A)))
+
+  options(reticulate.long_as_bit64=F)
+  options(reticulate.ulong_as_bit64=F)
   A <- data.frame(val=c(as.integer64("1567447722123456785"), as.integer64("1567447722123456786")))
   expect_equal(A, py_to_r(r_to_py(A)))
 })