From 91e8747e59304d5b7e78b336bd5cee6d6a8e54f9 Mon Sep 17 00:00:00 2001 From: Kyle Kavanagh Date: Mon, 2 Sep 2019 16:27:14 -0500 Subject: [PATCH 1/3] Add bit64 support --- src/python.cpp | 55 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/src/python.cpp b/src/python.cpp index 28c4a4b14..4336a0e33 100644 --- a/src/python.cpp +++ b/src/python.cpp @@ -201,17 +201,18 @@ int narrow_array_typenum(int typenum) { case NPY_SHORT: case NPY_USHORT: case NPY_INT: + case NPY_LONG: typenum = NPY_LONG; break; // double case NPY_UINT: case NPY_ULONG: case NPY_ULONGLONG: - case NPY_LONG: case NPY_LONGLONG: case NPY_HALF: case NPY_FLOAT: case NPY_DOUBLE: + typenum = NPY_DOUBLE; break; @@ -618,9 +619,18 @@ SEXP py_to_r(PyObject* x, bool convert) { return LogicalVector::create(x == Py_True); // integer - else if (scalarType == INTSXP) - return IntegerVector::create(PyInt_AsLong(x)); - + else if (scalarType == INTSXP) { + long val = PyInt_AsLong(x); + if(val > std::numeric_limits::max()) { + Rcpp::NumericVector vec(1); + std::memcpy(&(vec[0]), &(val), sizeof(double)); + vec.attr("class") = "integer64"; + return vec; + } + else{ + return IntegerVector::create(val); + } + } // double else if (scalarType == REALSXP) return NumericVector::create(PyFloat_AsDouble(x)); @@ -653,9 +663,24 @@ SEXP py_to_r(PyObject* x, bool convert) { return vec; } else if (scalarType == INTSXP) { Rcpp::IntegerVector vec(len); - for (Py_ssize_t i = 0; i std::numeric_limits::max()) { + //We need to start over an interpret as 64 bit int + Rcpp::NumericVector nVec(len); + long long* res_ptr = (long long*) dataptr(nVec); + for (Py_ssize_t j = 0; j std::numeric_limits::max()) { + Rcpp::NumericVector nVec(len); + //Inspired by https://gallery.rcpp.org/articles/creating-integer64-and-nanotime-vectors/ + std::memcpy(&(nVec[0]), &(pData[0]), len * sizeof(double)); + nVec.attr("class") = "integer64"; + nVec.attr("dim") = dimsVector; + return nVec; + } INTEGER(rArray)[i] = pData[i]; + } break; } case NPY_DOUBLE: { @@ -1083,7 +1117,7 @@ PyObject* r_to_py_cpp(RObject x, bool convert) { typenum = NPY_INT; data = &(INTEGER(sexp)[0]); } else if (type == REALSXP) { - typenum = NPY_DOUBLE; + typenum = x.inherits("integer64") ? NPY_LONG : NPY_DOUBLE; data = &(REAL(sexp)[0]); } else if (type == LGLSXP) { typenum = NPY_BOOL; @@ -1178,15 +1212,16 @@ PyObject* r_to_py_cpp(RObject x, bool convert) { // numeric (pass length 1 vectors as scalars, otherwise pass list) } else if (type == REALSXP) { + bool isInt64=x.inherits("integer64"); if (LENGTH(sexp) == 1) { double value = REAL(sexp)[0]; - return PyFloat_FromDouble(value); + return isInt64 ? PyInt_FromLong(reinterpret_cast(value)) : PyFloat_FromDouble(value); } else { PyObjectPtr list(PyList_New(LENGTH(sexp))); for (R_xlen_t i = 0; i(value)) : PyFloat_FromDouble(value)); if (res != 0) stop(py_fetch_error()); } From 4a697bb42a1adf0555a8fd9021e25108e90c6b10 Mon Sep 17 00:00:00 2001 From: Kyle Kavanagh Date: Mon, 2 Sep 2019 16:51:21 -0500 Subject: [PATCH 2/3] Update tests and add dep on bit64 --- DESCRIPTION | 1 + tests/testthat/test-python-numpy.R | 14 +++++++++++++- tests/testthat/test-python-pandas.R | 7 +++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 96bf1c255..e3d99ba09 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,7 @@ Imports: graphics, jsonlite, Rcpp (>= 0.12.7), + bit64, Matrix, methods Suggests: diff --git a/tests/testthat/test-python-numpy.R b/tests/testthat/test-python-numpy.R index 3ac8553cb..0bc0ee8ac 100644 --- a/tests/testthat/test-python-numpy.R +++ b/tests/testthat/test-python-numpy.R @@ -29,10 +29,22 @@ test_that("Character arrays are handled correctly", { expect_equal(a1, py_to_r(r_to_py(a1))) }) + +test_that("Long integer types are converted to bit64", { + skip_if_no_numpy() + np <- import("numpy", convert = FALSE) + dtypes <- c(np$int64, np$long) + require(bit64) + lapply(dtypes, function(dtype) { + a1 <- np$array(c(as.integer64("12345"), as.integer64("1567447722123456786")), dtype = dtype) + expect_equal(class(py_to_r(a1)), "integer64") + }) +}) + test_that("Long integer types are converted to R numeric", { skip_if_no_numpy() np <- import("numpy", convert = FALSE) - dtypes <- c(np$int64, np$uint32, np$uint64, np$long, np$longlong) + dtypes <- c(np$uint32, np$uint64, np$longlong) lapply(dtypes, function(dtype) { a1 <- np$array(c(1L:30L), dtype = dtype) expect_equal(class(as.vector(py_to_r(a1))), "numeric") diff --git a/tests/testthat/test-python-pandas.R b/tests/testthat/test-python-pandas.R index 9a091b712..5cfc5953b 100644 --- a/tests/testthat/test-python-pandas.R +++ b/tests/testthat/test-python-pandas.R @@ -125,3 +125,10 @@ test_that("single-row data.frames with rownames can be converted", { expect_equal(c(before), c(after)) }) + +test_that("Large ints are handled correctly", { + skip_if_no_pandas() + require(bit64) + A <- data.frame(val=c(as.integer64("1567447722123456785"), as.integer64("1567447722123456786"))) + expect_equal(A, py_to_r(r_to_py(A))) +}) From 791bd6636c6e629090b791f0741259e4df51da45 Mon Sep 17 00:00:00 2001 From: "Kyle D. Kavanagh" Date: Sat, 15 Feb 2020 13:43:02 -0600 Subject: [PATCH 3/3] Add options to configure bit64 usage --- src/libpython.h | 7 ++ src/python.cpp | 125 +++++++++++++++++++++------- tests/testthat/test-python-numpy.R | 22 ++++- tests/testthat/test-python-pandas.R | 8 ++ 4 files changed, 130 insertions(+), 32 deletions(-) diff --git a/src/libpython.h b/src/libpython.h index 3c7a00000..7370c2ad8 100644 --- a/src/libpython.h +++ b/src/libpython.h @@ -282,6 +282,12 @@ LIBPYTHON_EXTERN PyObject* (*PyInt_FromLong)(long); LIBPYTHON_EXTERN long (*PyInt_AsLong)(PyObject *); LIBPYTHON_EXTERN PyObject* (*PyLong_FromLong)(long); LIBPYTHON_EXTERN long (*PyLong_AsLong)(PyObject *); +LIBPYTHON_EXTERN PyObject* (*PyLong_FromUnsignedLong)(long); +LIBPYTHON_EXTERN unsigned long (*PyLong_AsUnsignedLong)(PyObject *); +LIBPYTHON_EXTERN long (*PyLong_AsLongAndOverflow)(PyObject *, int*); +LIBPYTHON_EXTERN PyObject* (*PyInt_FromUnsignedLong)(long); +LIBPYTHON_EXTERN unsigned long (*PyInt_AsUnsignedLong)(PyObject *); +LIBPYTHON_EXTERN long (*PyInt_AsLongAndOverflow)(PyObject *, int*); LIBPYTHON_EXTERN PyObject* (*PyBool_FromLong)(long); @@ -360,6 +366,7 @@ typedef struct tagPyArrayObject { typedef unsigned char npy_bool; typedef long npy_long; +typedef unsigned long npy_ulong; typedef double npy_double; typedef struct { double real, imag; } npy_cdouble; typedef npy_cdouble npy_complex128; diff --git a/src/python.cpp b/src/python.cpp index 4336a0e33..f8bb77ffa 100644 --- a/src/python.cpp +++ b/src/python.cpp @@ -57,6 +57,27 @@ std::wstring s_python_v3; std::string s_pythonhome; std::wstring s_pythonhome_v3; +const std::string CONFIG_LONG_AS_BIT64="reticulate.long_as_bit64"; +const std::string CONFIG_ULONG_AS_BIT64="reticulate.ulong_as_bit64"; + +template +T getConfig(std::string config, T defValue) { + Environment base( "package:base" ) ; + Function getOption = base["getOption"]; + SEXP s = getOption(config, defValue); + if(s == NULL) { + return defValue; + } + return as(s); +} + +bool convertLongToBit64() { + return getConfig(CONFIG_LONG_AS_BIT64, false); +} + +bool convertULongToBit64() { + return getConfig(CONFIG_ULONG_AS_BIT64, false); +} // helper to convert std::string to std::wstring @@ -201,18 +222,24 @@ int narrow_array_typenum(int typenum) { case NPY_SHORT: case NPY_USHORT: case NPY_INT: - case NPY_LONG: typenum = NPY_LONG; break; - // double - case NPY_UINT: + + case NPY_LONG: + case NPY_LONGLONG: + typenum = convertLongToBit64() ? NPY_LONG : NPY_DOUBLE; + break; + case NPY_ULONG: case NPY_ULONGLONG: - case NPY_LONGLONG: + typenum = convertULongToBit64() ? NPY_ULONG : NPY_DOUBLE; + break; + + // double + case NPY_UINT: case NPY_HALF: case NPY_FLOAT: case NPY_DOUBLE: - typenum = NPY_DOUBLE; break; @@ -238,12 +265,24 @@ int narrow_array_typenum(int typenum) { return typenum; } +int typenum(PyArrayObject* array) { + return PyArray_TYPE(array); +} + +int typenum(PyArray_Descr* descr) { + return descr->type_num; +} + +int typenum(int typeenum) { + return typeenum; +} + int narrow_array_typenum(PyArrayObject* array) { - return narrow_array_typenum(PyArray_TYPE(array)); + return narrow_array_typenum(typenum(array)); } int narrow_array_typenum(PyArray_Descr* descr) { - return narrow_array_typenum(descr->type_num); + return narrow_array_typenum(typenum(descr->type_num)); } bool is_numpy_str(PyObject* x) { @@ -603,10 +642,8 @@ bool py_is_callable(PyObjectRef x) { return py_is_callable(x.get()); } - // convert a python object to an R object SEXP py_to_r(PyObject* x, bool convert) { - // NULL for Python None if (py_is_none(x)) return R_NilValue; @@ -620,8 +657,8 @@ SEXP py_to_r(PyObject* x, bool convert) { // integer else if (scalarType == INTSXP) { - long val = PyInt_AsLong(x); - if(val > std::numeric_limits::max()) { + long val = PyLong_AsLong(x); + if((val > std::numeric_limits::max() || val < std::numeric_limits::min()) && convertLongToBit64()) { Rcpp::NumericVector vec(1); std::memcpy(&(vec[0]), &(val), sizeof(double)); vec.attr("class") = "integer64"; @@ -664,8 +701,8 @@ SEXP py_to_r(PyObject* x, bool convert) { } else if (scalarType == INTSXP) { Rcpp::IntegerVector vec(len); for (Py_ssize_t i = 0; i std::numeric_limits::max()) { + long num = PyLong_AsLong(PyList_GetItem(x, i)); + if((num > std::numeric_limits::max() || num < std::numeric_limits::min()) && convertLongToBit64()) { //We need to start over an interpret as 64 bit int Rcpp::NumericVector nVec(len); long long* res_ptr = (long long*) dataptr(nVec); @@ -674,7 +711,6 @@ SEXP py_to_r(PyObject* x, bool convert) { } nVec.attr("class") = "integer64"; return nVec; - break; } else { vec[i] = num; } @@ -770,6 +806,7 @@ SEXP py_to_r(PyObject* x, bool convert) { } // determine the target type of the array + int oriType = typenum(array); int typenum = narrow_array_typenum(array); // cast it to a fortran array (PyArray_CastToType steals the descr) @@ -792,19 +829,32 @@ SEXP py_to_r(PyObject* x, bool convert) { break; } case NPY_LONG: { - npy_long* pData = (npy_long*)PyArray_DATA(array); - rArray = Rf_allocArray(INTSXP, dimsVector); - for (int i=0; i std::numeric_limits::max()) { - Rcpp::NumericVector nVec(len); - //Inspired by https://gallery.rcpp.org/articles/creating-integer64-and-nanotime-vectors/ - std::memcpy(&(nVec[0]), &(pData[0]), len * sizeof(double)); - nVec.attr("class") = "integer64"; - nVec.attr("dim") = dimsVector; - return nVec; + if((oriType == NPY_LONG || oriType == NPY_LONGLONG) && convertLongToBit64()) { + Rcpp::NumericVector nVec(len); + //Inspired by https://gallery.rcpp.org/articles/creating-integer64-and-nanotime-vectors/ + npy_ulong* pData = (npy_ulong*)PyArray_DATA(array); + std::memcpy(&(nVec[0]), &(pData[0]), len * sizeof(double)); + nVec.attr("class") = "integer64"; + nVec.attr("dim") = dimsVector; + rArray = nVec; + } else { + npy_long* pData = (npy_long*)PyArray_DATA(array); + rArray = Rf_allocArray(INTSXP, dimsVector); + for (int i=0; i(value)) : PyFloat_FromDouble(value); + if(isInt64) { + return PyInt_FromLong(reinterpret_cast(value)); + } else if(isUint64) { + return PyLong_FromUnsignedLong(reinterpret_cast(value)); + } else { + return PyFloat_FromDouble(value); + } } else { PyObjectPtr list(PyList_New(LENGTH(sexp))); for (R_xlen_t i = 0; i(value)) : PyFloat_FromDouble(value)); + PyObject* obj; + if(isInt64) { + obj = PyInt_FromLong(reinterpret_cast(value)); + } else if(isUint64) { + obj = PyLong_FromUnsignedLong(reinterpret_cast(value)); + } else { + obj = PyFloat_FromDouble(value); + } + int res = PyList_SetItem(list, i, obj); if (res != 0) stop(py_fetch_error()); } diff --git a/tests/testthat/test-python-numpy.R b/tests/testthat/test-python-numpy.R index 0bc0ee8ac..7bd1c86f5 100644 --- a/tests/testthat/test-python-numpy.R +++ b/tests/testthat/test-python-numpy.R @@ -33,18 +33,33 @@ test_that("Character arrays are handled correctly", { test_that("Long integer types are converted to bit64", { skip_if_no_numpy() np <- import("numpy", convert = FALSE) - dtypes <- c(np$int64, np$long) + dtypes <- c(np$int64, np$long, np$uint64, np$longlong, np$ulonglong) require(bit64) + #First run with the default mode lapply(dtypes, function(dtype) { a1 <- np$array(c(as.integer64("12345"), as.integer64("1567447722123456786")), dtype = dtype) - expect_equal(class(py_to_r(a1)), "integer64") + expect_equal(class(as.vector(py_to_r(a1))), "numeric") + }) + + #Now with the options set + options(reticulate.long_as_bit64=TRUE) + options(reticulate.ulong_as_bit64=TRUE) + lapply(dtypes, function(dtype) { + a1 <- np$array(c(as.integer64("12345"), as.integer64("1567447722123456786")), dtype = dtype) + res = c('integer64') + if(dtype == np$uint64 || dtype == np$ulonglong) { + res = c(res, 'np.ulong') + } + expect_setequal(class(py_to_r(a1)), res) }) + options(reticulate.long_as_bit64=F) + options(reticulate.ulong_as_bit64=F) }) test_that("Long integer types are converted to R numeric", { skip_if_no_numpy() np <- import("numpy", convert = FALSE) - dtypes <- c(np$uint32, np$uint64, np$longlong) + dtypes <- c(np$uint32) lapply(dtypes, function(dtype) { a1 <- np$array(c(1L:30L), dtype = dtype) expect_equal(class(as.vector(py_to_r(a1))), "numeric") @@ -95,3 +110,4 @@ test_that("boolean matrices are converted appropriately", { A <- matrix(TRUE, nrow = 2, ncol = 2) expect_equal(A, py_to_r(r_to_py(A))) }) + diff --git a/tests/testthat/test-python-pandas.R b/tests/testthat/test-python-pandas.R index 5cfc5953b..2ee8e019e 100644 --- a/tests/testthat/test-python-pandas.R +++ b/tests/testthat/test-python-pandas.R @@ -129,6 +129,14 @@ test_that("single-row data.frames with rownames can be converted", { test_that("Large ints are handled correctly", { skip_if_no_pandas() require(bit64) + + options(reticulate.long_as_bit64=TRUE) + options(reticulate.ulong_as_bit64=TRUE) + A <- data.frame(val=c(as.integer64("1567447722123456785"), as.integer64("1567447722123456786"))) + expect_equal(A, py_to_r(r_to_py(A))) + + options(reticulate.long_as_bit64=F) + options(reticulate.ulong_as_bit64=F) A <- data.frame(val=c(as.integer64("1567447722123456785"), as.integer64("1567447722123456786"))) expect_equal(A, py_to_r(r_to_py(A))) })