Skip to content

Commit

Permalink
WIP feat: <DataFrame>$partition_by() [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
eitsupi committed Mar 9, 2024
1 parent e9d96ac commit fa79a0b
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 2 deletions.
24 changes: 24 additions & 0 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -2093,3 +2093,27 @@ DataFrame_group_by_dynamic = function(
by, start_by, check_sorted
)
}


# TODO: support selectors
#' Split a DataFrame into multiple DataFrames
#'
#' @param ... Characters of column names to group by.
#' @param maintain_order If `TRUE`, the order of the rows will be preserved.
DataFrame_partition_by = function(
...,
maintain_order = TRUE,
include_key = TRUE) {
uw = \(res) unwrap(res, "in $partition_by():")

by = result(dots_to_colnames(self, ...)) |>
uw()

if (!length(by)) {
Err_plain("There is no column to partition by.") |>
uw()
}

.pr$DataFrame$partition_by(self, by, maintain_order, include_key) |>
uw()
}
10 changes: 10 additions & 0 deletions R/dotdotdot.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,13 @@ unpack_bool_expr_result = function(...) {
}
})
}


#' Convert dots to a character vector of column names
#' @param .df [RPolarsDataFrame]
#' @param ... Arguments to pass to [`pl$col()`][pl_col]
#' @noRd
dots_to_colnames = function(.df, ..., .call = sys.call(1L)) {
result(pl$DataFrame(schema = .df$schema)$select(pl$col(...))$columns) |>
unwrap(call = .call)
}
2 changes: 2 additions & 0 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ RPolarsDataFrame$to_struct <- function(name) .Call(wrap__RPolarsDataFrame__to_st

RPolarsDataFrame$unnest <- function(names) .Call(wrap__RPolarsDataFrame__unnest, self, names)

RPolarsDataFrame$partition_by <- function(by, maintain_order, include_keys) .Call(wrap__RPolarsDataFrame__partition_by, self, by, maintain_order, include_keys)

RPolarsDataFrame$export_stream <- function(stream_ptr) invisible(.Call(wrap__RPolarsDataFrame__export_stream, self, stream_ptr))

RPolarsDataFrame$from_arrow_record_batches <- function(rbr) .Call(wrap__RPolarsDataFrame__from_arrow_record_batches, rbr)
Expand Down
16 changes: 16 additions & 0 deletions man/DataFrame_partition_by.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/rust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "r-polars"
version = "0.38.0"
version = "0.38.1"
edition = "2021"
rust-version = "1.74.1"
publish = false
Expand Down
20 changes: 20 additions & 0 deletions src/rust/src/rdataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,26 @@ impl RPolarsDataFrame {
self.lazy().unnest(names)?.collect()
}

pub fn partition_by(
&self,
by: Robj,
maintain_order: Robj,
include_keys: Robj,
) -> RResult<List> {
let by = robj_to!(Vec, String, by)?;
let maintain_order = robj_to!(bool, maintain_order)?;
let include_keys = robj_to!(bool, include_keys)?;
let out = if maintain_order {
self.0.clone().partition_by_stable(by, include_keys)
} else {
self.0.partition_by(by, include_keys)
}
.map_err(polars_to_rpolars_err)?;

let vec = unsafe { std::mem::transmute::<Vec<pl::DataFrame>, Vec<RPolarsDataFrame>>(out) };
Ok(List::from_values(vec))
}

pub fn export_stream(&self, stream_ptr: &str) {
let schema = self.0.schema().to_arrow(false);
let data_type = ArrowDataType::Struct(schema.fields);
Expand Down

0 comments on commit fa79a0b

Please sign in to comment.