Skip to content

Commit

Permalink
fix: more fixes for R tests
Browse files Browse the repository at this point in the history
  • Loading branch information
eitsupi committed Apr 14, 2024
1 parent 33e73e1 commit bcf5e2e
Show file tree
Hide file tree
Showing 26 changed files with 189 additions and 139 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,5 @@ Collate:
'zzz.R'
Config/rextendr/version: 0.3.1
VignetteBuilder: knitr
Config/polars/LibVersion: 0.38.2
Config/polars/LibVersion: 0.39.0
Config/polars/RustToolchainVersion: nightly-2024-03-28
11 changes: 4 additions & 7 deletions R/expr__array.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,11 @@ ExprArr_unique = function(maintain_order = FALSE) .pr$Expr$arr_unique(self, main
#'
#' This allows to extract one value per array only.
#'
#' @inherit ExprList_get return
#' @param index An Expr or something coercible to an Expr, that must return a
#' single index. Values are 0-indexed (so index 0 would return the first item
#' of every sub-array) and negative values start from the end (index `-1`
#' returns the last item). If the index is out of bounds, it will return a
#' `null`. Strings are parsed as column names.
#'
#' @return Expr
#' @aliases arr_get
#' returns the last item).
#' @examples
#' df = pl$DataFrame(
#' values = list(c(1, 2), c(3, 4), c(NA_real_, 6)),
Expand All @@ -156,8 +153,8 @@ ExprArr_unique = function(maintain_order = FALSE) .pr$Expr$arr_unique(self, main
#' val_minus_1 = pl$col("values")$arr$get(-1),
#' val_oob = pl$col("values")$arr$get(10)
#' )
ExprArr_get = function(index) {
.pr$Expr$arr_get(self, index) |>
ExprArr_get = function(index, ..., null_on_oob = TRUE) {
.pr$Expr$arr_get(self, index, null_on_oob) |>
unwrap("in $arr$get():")
}

Expand Down
28 changes: 18 additions & 10 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -1377,16 +1377,13 @@ Expr_mode = use_extendr_wrapper
#'
#' Sort this column. If used in a groupby context, the groups are sorted.
#'
#' @param ... Ignored
#' @param descending Sort in descending order. When sorting by multiple columns,
#' can be specified per column by passing a vector of booleans.
#' @param nulls_last If `TRUE`, place nulls values last.
#' @inheritParams Series_sort
#' @return Expr
#' @examples
#' pl$DataFrame(a = c(6, 1, 0, NA, Inf, NaN))$
#' with_columns(sorted = pl$col("a")$sort())
Expr_sort = function(..., descending = FALSE, nulls_last = FALSE) {
.pr$Expr$sort(self, descending, nulls_last)
.pr$Expr$sort_with(self, descending, nulls_last)
}

#' Top k values
Expand Down Expand Up @@ -1478,14 +1475,17 @@ Expr_search_sorted = function(element) {
.pr$Expr$search_sorted(self, wrap_e(element))
}

# TODO: rewrite `by` to `...` <https://github.com/pola-rs/r-polars/pull/997>
#' Sort Expr by order of others
#'
#' Sort this column by the ordering of another column, or multiple other columns.
#' If used in a groupby context, the groups are sorted.
#'
#' @param by One expression or a list of expressions and/or strings (interpreted
#' as column names).
#' @inheritParams Expr_sort
#' @param maintain_order A logical to indicate whether the order should be maintained
#' if elements are equal.
#' @inheritParams Series_sort
#' @return Expr
#' @examples
#' df = pl$DataFrame(
Expand All @@ -1511,12 +1511,19 @@ Expr_search_sorted = function(element) {
#' df$with_columns(
#' sorted = pl$col("group")$sort_by(pl$col("value1")$sort(descending = TRUE))
#' )
Expr_sort_by = function(by, descending = FALSE) {
Expr_sort_by = function(
by, ..., descending = FALSE,
nulls_last = FALSE,
multithreaded = TRUE,
maintain_order = FALSE) {
.pr$Expr$sort_by(
self,
wrap_elist_result(by, str_to_lit = FALSE),
result(descending)
) |> unwrap("in $sort_by:")
descending,
nulls_last,
maintain_order,
multithreaded
) |> unwrap("in $sort_by():")
}

#' Gather values by index
Expand Down Expand Up @@ -3143,6 +3150,7 @@ Expr_cumulative_eval = function(expr, min_periods = 1L, parallel = FALSE) {
#' This enables downstream code to use fast paths for sorted arrays. WARNING:
#' this doesn't check whether the data is actually sorted, you have to ensure of
#' that yourself.
#' @param ... Ignored.
#' @param descending Sort the columns in descending order.
#' @return Expr
#' @examples
Expand All @@ -3154,7 +3162,7 @@ Expr_cumulative_eval = function(expr, min_periods = 1L, parallel = FALSE) {
#' s2 = pl$select(pl$lit(c(1, 3, 2, 4))$set_sorted()$alias("a"))$get_column("a")
#' s2$sort()
#' s2$flags # returns TRUE while it's not actually sorted
Expr_set_sorted = function(descending = FALSE) {
Expr_set_sorted = function(..., descending = FALSE) {
self$map_batches(\(s) {
.pr$Series$set_sorted_mut(s, descending) # use private to bypass mut protection
s
Expand Down
16 changes: 10 additions & 6 deletions R/expr__list.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,12 @@ ExprList_concat = function(other) {
#' @param index An Expr or something coercible to an Expr, that must return a
#' single index. Values are 0-indexed (so index 0 would return the first item
#' of every sublist) and negative values start from the end (index `-1`
#' returns the last item). If the index is out of bounds, it will return a
#' `null`. Strings are parsed as column names.
#'
#' @return Expr
#' @aliases list_get
#' returns the last item).
#' @param ... Ignored.
#' @param null_on_oob A logical to determine the behavior if an index is out of bounds:
#' - `TRUE` (default): set as `null`
#' - `FALSE`: raise an error
#' @return [Expr][Expr_class]
#' @examples
#' df = pl$DataFrame(
#' values = list(c(2, 2, NA), c(1, 2, 3), NA_real_, NULL),
Expand All @@ -128,7 +129,10 @@ ExprList_concat = function(other) {
#' val_minus_1 = pl$col("values")$list$get(-1),
#' val_oob = pl$col("values")$list$get(10)
#' )
ExprList_get = function(index) .pr$Expr$list_get(self, wrap_e(index, str_to_lit = FALSE))
ExprList_get = function(index, ..., null_on_oob = TRUE) {
.pr$Expr$list_get(self, index, null_on_oob) |>
unwrap("in $list$get():")
}

#' Get several values by index in a list
#'
Expand Down
2 changes: 1 addition & 1 deletion R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ RPolarsExpr$to_physical <- function() .Call(wrap__RPolarsExpr__to_physical, self

RPolarsExpr$cast <- function(data_type, strict) .Call(wrap__RPolarsExpr__cast, self, data_type, strict)

RPolarsExpr$sort <- function(descending, nulls_last) .Call(wrap__RPolarsExpr__sort, self, descending, nulls_last)
RPolarsExpr$sort_with <- function(descending, nulls_last) .Call(wrap__RPolarsExpr__sort_with, self, descending, nulls_last)

RPolarsExpr$arg_sort <- function(descending, nulls_last) .Call(wrap__RPolarsExpr__arg_sort, self, descending, nulls_last)

Expand Down
11 changes: 6 additions & 5 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -1296,15 +1296,15 @@ LazyFrame_join = function(
}


#' Sort a LazyFrame
#' @description Sort by one or more Expressions.
#' Sort the LazyFrame by the given columns
#'
#' @inheritParams Series_sort
#' @param by Column(s) to sort by. Can be character vector of column names,
#' a list of Expr(s) or a list with a mix of Expr(s) and column names.
#' @param ... More columns to sort by as above but provided one Expr per argument.
#' @param descending Logical. Sort in descending order (default is `FALSE`). This must be
#' either of length 1 or a logical vector of the same length as the number of
#' Expr(s) specified in `by` and `...`.
#' @param nulls_last Logical. Place `NULL`s at the end? Default is `FALSE`.
#' @param maintain_order Whether the order should be maintained if elements are
#' equal. If `TRUE`, streaming is not possible and performance might be worse
#' since this requires a stable search.
Expand All @@ -1326,10 +1326,11 @@ LazyFrame_sort = function(
...,
descending = FALSE,
nulls_last = FALSE,
maintain_order = FALSE) {
maintain_order = FALSE,
multithreaded = TRUE) {
.pr$LazyFrame$sort_by_exprs(
self, unpack_list(by, .context = "in $sort():"), err_on_named_args(...),
descending, nulls_last, maintain_order
descending, nulls_last, maintain_order, multithreaded
) |>
unwrap("in $sort():")
}
Expand Down
35 changes: 20 additions & 15 deletions R/series__series.R
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,7 @@ Series_is_sorted = function(descending = FALSE) {
#' Set a sorted flag on a Series
#'
#' @inheritParams Expr_set_sorted
#' @param in_place If `TRUE`, this will set the flag mutably and return NULL.
#' @param in_place If `TRUE`, this will set the flag mutably and return `NULL`.
#' Remember to use `options(polars.strictly_immutable = FALSE)` before using
#' this parameter, otherwise an error will occur. If `FALSE` (default), it will
#' return a cloned Series with the flag.
Expand All @@ -886,46 +886,51 @@ Series_is_sorted = function(descending = FALSE) {
#' @examples
#' s = as_polars_series(1:4)$set_sorted()
#' s$flags
Series_set_sorted = function(descending = FALSE, in_place = FALSE) {
if (in_place && polars_options()$strictly_immutable) {
stop(paste(
Series_set_sorted = function(..., descending = FALSE, in_place = FALSE) {
if (isTRUE(in_place) && polars_options()$strictly_immutable) {
Err_plain(
"Using `in_place = TRUE` in `set_sorted()` breaks immutability. To enable mutable features run:\n",
"`options(polars.strictly_immutable = FALSE)`"
))
) |>
unwrap("in $set_sorted():")
}

if (!in_place) {
if (!isTRUE(in_place)) {
self = self$clone()
}

.pr$Series$set_sorted_mut(self, descending)
if (in_place) invisible(NULL) else invisible(self)
if (isTRUE(in_place)) invisible(NULL) else invisible(self)
}


#' Sort a Series
#'
#' @param descending Sort in descending order.
#' @inheritParams Expr_sort
#' @inheritParams Series_set_sorted
#'
#' @param descending A logical. If `TRUE`, sort in descending order.
#' @param nulls_last A logical. If `TRUE`, place `null` values last insead of first.
#' @param multithreaded A logical. If `TRUE`, sort using multiple threads.
#' @return [Series][Series_class]
#'
#' @examples
#' as_polars_series(c(1.5, NA, 1, NaN, Inf, -Inf))$sort()
#' as_polars_series(c(1.5, NA, 1, NaN, Inf, -Inf))$sort(nulls_last = TRUE)
Series_sort = function(..., descending = FALSE, nulls_last = FALSE, in_place = FALSE) {
Series_sort = function(
..., descending = FALSE, nulls_last = FALSE, multithreaded = TRUE,
in_place = FALSE) {
uw = \(res) unwrap(res, "in $sort():")
if (isTRUE(in_place) && polars_options()$strictly_immutable) {
stop(paste(
Err_plain(
"in place sort breaks immutability, to enable mutable features run:\n",
"`options(polars.strictly_immutable = FALSE)`"
))
) |>
uw()
}
if (!isTRUE(in_place)) {
self = self$clone()
}

.pr$Series$sort(self, descending, nulls_last)
.pr$Series$sort(self, descending, nulls_last, multithreaded) |>
uw()
}

#' Convert Series to DataFrame
Expand Down
4 changes: 2 additions & 2 deletions man/DataFrame_sort.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 3 additions & 5 deletions man/ExprArr_get.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions man/ExprArr_sort.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 11 additions & 5 deletions man/ExprList_get.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions man/Expr_arg_sort.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/Expr_set_sorted.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions man/Expr_sort.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bcf5e2e

Please sign in to comment.