diff --git a/__tests__/dataframe.test.ts b/__tests__/dataframe.test.ts index 66effa91..9539fa07 100644 --- a/__tests__/dataframe.test.ts +++ b/__tests__/dataframe.test.ts @@ -756,7 +756,7 @@ describe("dataframe", () => { expect(actual.columns).toEqual(["foo_new", "bar_new", "ham_new"]); }); test("replaceAtIdx", () => { - const actual = pl.DataFrame({ + const actual: pl.DataFrame = pl.DataFrame({ foo: [1, 2, 3], bar: [6, 7, 8], ham: ["a", "b", "c"], @@ -1315,101 +1315,109 @@ describe("dataframe", () => { expect(actual).toFrameEqual(expected); }); test("pivot", () => { - let df = pl.DataFrame({ - a: pl.Series([1, 2, 3]).cast(pl.Int32), - b: pl - .Series([ - [1, 1], - [2, 2], - [3, 3], - ]) - .cast(pl.List(pl.Int32)), - }); - - let expected = pl - .DataFrame({ + { + const df = pl.DataFrame({ a: pl.Series([1, 2, 3]).cast(pl.Int32), - "1": pl.Series([[1, 1], null, null]).cast(pl.List(pl.Int32)), - "2": pl.Series([null, [2, 2], null]).cast(pl.List(pl.Int32)), - "3": pl.Series([null, null, [3, 3]]).cast(pl.List(pl.Int32)), - }) - .select("a", "1", "2", "3"); + b: pl + .Series([ + [1, 1], + [2, 2], + [3, 3], + ]) + .cast(pl.List(pl.Int32)), + }); - let actual = df.pivot("b", { - index: "a", - on: "a", - aggregateFunc: "first", - sortColumns: true, - }); + const expected = pl + .DataFrame({ + a: pl.Series([1, 2, 3]).cast(pl.Int32), + "1": pl.Series([[1, 1], null, null]).cast(pl.List(pl.Int32)), + "2": pl.Series([null, [2, 2], null]).cast(pl.List(pl.Int32)), + "3": pl.Series([null, null, [3, 3]]).cast(pl.List(pl.Int32)), + }) + .select("a", "1", "2", "3"); - expect(actual).toFrameEqual(expected, true); + const actual = df.pivot("b", { + index: "a", + on: "a", + aggregateFunc: "first", + sortColumns: true, + }); - df = pl.DataFrame({ - a: ["beep", "bop"], - b: ["a", "b"], - c: ["s", "f"], - d: [7, 8], - e: ["x", "y"], - }); - actual = df.pivot(["a", "e"], { - index: "b", - on: ["b"], - aggregateFunc: "first", - separator: "|", - maintainOrder: true, - }); + expect(actual).toFrameEqual(expected, true); + } - expected = pl.DataFrame({ - b: ["a", "b"], - "a|a": ["beep", null], - "a|b": [null, "bop"], - "e|a": ["x", null], - "e|b": [null, "y"], - }); - expect(actual).toFrameEqual(expected, true); + { + const df = pl.DataFrame({ + a: ["beep", "bop"], + b: ["a", "b"], + c: ["s", "f"], + d: [7, 8], + e: ["x", "y"], + }); + const actual = df.pivot(["a", "e"], { + index: "b", + on: ["b"], + aggregateFunc: "first", + separator: "|", + maintainOrder: true, + }); - df = pl.DataFrame({ - foo: ["A", "A", "B", "B", "C"], - N: [1, 2, 2, 4, 2], - bar: ["k", "l", "m", "n", "o"], - }); - actual = df.pivot(["N"], { - index: "foo", - on: "bar", - aggregateFunc: "first", - }); - expected = pl.DataFrame({ - foo: ["A", "B", "C"], - k: [1, null, null], - l: [2, null, null], - m: [null, 2, null], - n: [null, 4, null], - o: [null, null, 2], - }); - expect(actual).toFrameEqual(expected, true); + const expected = pl.DataFrame({ + b: ["a", "b"], + "a|a": ["beep", null], + "a|b": [null, "bop"], + "e|a": ["x", null], + "e|b": [null, "y"], + }); + expect(actual).toFrameEqual(expected, true); + } + { + const df = pl.DataFrame({ + foo: ["A", "A", "B", "B", "C"], + N: [1, 2, 2, 4, 2], + bar: ["k", "l", "m", "n", "o"], + }); + const actual = df.pivot(["N"], { + index: "foo", + on: "bar", + aggregateFunc: "first", + }); - df = pl.DataFrame({ - ix: [1, 1, 2, 2, 1, 2], - col: ["a", "a", "a", "a", "b", "b"], - foo: [0, 1, 2, 2, 7, 1], - bar: [0, 2, 0, 0, 9, 4], - }); + const expected = pl.DataFrame({ + foo: ["A", "B", "C"], + k: [1, null, null], + l: [2, null, null], + m: [null, 2, null], + n: [null, 4, null], + o: [null, null, 2], + }); - actual = df.pivot(["foo", "bar"], { - index: "ix", - on: "col", - aggregateFunc: "sum", - separator: "/", - }); + expect(actual).toFrameEqual(expected, true); + } + { + const df = pl.DataFrame({ + ix: [1, 1, 2, 2, 1, 2], + col: ["a", "a", "a", "a", "b", "b"], + foo: [0, 1, 2, 2, 7, 1], + bar: [0, 2, 0, 0, 9, 4], + }); - expected = pl.DataFrame({ - ix: [1, 2], - "foo/a": [1, 4], - "foo/b": [7, 1], - "bar/a": [2, 0], - "bar/b": [9, 4], - }); - expect(actual).toFrameEqual(expected, true); + const actual = df.pivot(["foo", "bar"], { + index: "ix", + on: "col", + aggregateFunc: "sum", + separator: "/", + }); + + const expected = pl.DataFrame({ + ix: [1, 2], + "foo/a": [1, 4], + "foo/b": [7, 1], + "bar/a": [2, 0], + "bar/b": [9, 4], + }); + expect(actual).toFrameEqual(expected, true); + } }); }); describe("join", () => { diff --git a/__tests__/expr.test.ts b/__tests__/expr.test.ts index 49cd9ba8..0218be1c 100644 --- a/__tests__/expr.test.ts +++ b/__tests__/expr.test.ts @@ -1103,30 +1103,36 @@ describe("expr.str", () => { json: [{ a: 1, b: true }, "null", { a: 2, b: false }], }); expect(actual).toFrameEqual(expected); - let s = pl.Series(["[1, 2, 3]", null, "[4, 5, 6]"]); - let dtype = pl.List(pl.Int64); - const expSeries = pl.Series([[1, 2, 3], null, [4, 5, 6]]); - expect(s.str.jsonDecode()).toSeriesEqual(expSeries); - expect(s.str.jsonDecode(dtype)).toSeriesEqual(expSeries); - dtype = pl.Struct([ - new pl.Field("a", pl.Int64), - new pl.Field("b", pl.Bool), - ]); - s = pl.Series("json", [ - '{"a":1, "b": true}', - '{"a": null, "b": null }', - '{"a":2, "b": false}', - ]); - expect(s.str.jsonDecode().as("json")).toSeriesEqual( - expected.getColumn("json"), - ); - expect(s.str.jsonDecode(dtype).as("json")).toSeriesEqual( - expected.getColumn("json"), - ); - s = pl.Series("col_a", [], pl.Utf8); - const exp = pl.Series("col_a", []).cast(pl.List(pl.Int64)); - dtype = pl.List(pl.Int64); - expect(s.str.jsonDecode(dtype).as("col_a")).toSeriesEqual(exp); + { + const s = pl.Series(["[1, 2, 3]", null, "[4, 5, 6]"]); + const dtype = pl.List(pl.Int64); + const expSeries = pl.Series([[1, 2, 3], null, [4, 5, 6]]); + expect(s.str.jsonDecode()).toSeriesEqual(expSeries); + expect(s.str.jsonDecode(dtype)).toSeriesEqual(expSeries); + } + { + const dtype = pl.Struct([ + new pl.Field("a", pl.Int64), + new pl.Field("b", pl.Bool), + ]); + const s = pl.Series("json", [ + '{"a":1, "b": true}', + '{"a": null, "b": null }', + '{"a":2, "b": false}', + ]); + expect(s.str.jsonDecode().as("json")).toSeriesEqual( + expected.getColumn("json"), + ); + expect(s.str.jsonDecode(dtype).as("json")).toSeriesEqual( + expected.getColumn("json"), + ); + } + { + const s = pl.Series("col_a", [], pl.Utf8); + const exp = pl.Series("col_a", []).cast(pl.List(pl.Int64)); + const dtype = pl.List(pl.Int64); + expect(s.str.jsonDecode(dtype).as("col_a")).toSeriesEqual(exp); + } }); test("jsonPathMatch", () => { const df = pl.DataFrame({ @@ -1206,68 +1212,82 @@ describe("expr.str", () => { }); test("expr.replace", () => { const df = pl.DataFrame({ a: [1, 2, 2, 3], b: ["a", "b", "c", "d"] }); - let actual = df.withColumns(pl.col("a").replace(2, 100).alias("replaced")); - let expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: [1, 100, 100, 3], - }); - expect(actual).toFrameEqual(expected); - actual = df.withColumns( - pl - .col("a") - .replaceStrict([2, 3], [100, 200], -1, pl.Float64) - .alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: [-1, 100, 100, 200], - }); - expect(actual).toFrameEqual(expected); - actual = df.withColumns( - pl.col("b").replaceStrict("a", "c", "e", pl.Utf8).alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: ["c", "e", "e", "e"], - }); - expect(actual).toFrameEqual(expected); - actual = df.withColumns( - pl - .col("b") - .replaceStrict(["a", "b"], ["c", "d"], "e", pl.Utf8) - .alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: ["c", "d", "e", "e"], - }); - expect(actual).toFrameEqual(expected); + { + const actual = df.withColumns( + pl.col("a").replace(2, 100).alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: [1, 100, 100, 3], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = df.withColumns( + pl + .col("a") + .replaceStrict([2, 3], [100, 200], -1, pl.Float64) + .alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: [-1, 100, 100, 200], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = df.withColumns( + pl.col("b").replaceStrict("a", "c", "e", pl.Utf8).alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: ["c", "e", "e", "e"], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = df.withColumns( + pl + .col("b") + .replaceStrict(["a", "b"], ["c", "d"], "e", pl.Utf8) + .alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: ["c", "d", "e", "e"], + }); + expect(actual).toFrameEqual(expected); + } const mapping = { 2: 100, 3: 200 }; - actual = df.withColumns( - pl - .col("a") - .replaceStrict({ old: mapping, default_: -1, returnDtype: pl.Int64 }) - .alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: [-1, 100, 100, 200], - }); - expect(actual).toFrameEqual(expected); - actual = df.withColumns( - pl.col("a").replace({ old: mapping }).alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: [1, 100, 100, 200], - }); - expect(actual).toFrameEqual(expected); + { + const actual = df.withColumns( + pl + .col("a") + .replaceStrict({ old: mapping, default_: -1, returnDtype: pl.Int64 }) + .alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: [-1, 100, 100, 200], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = df.withColumns( + pl.col("a").replace({ old: mapping }).alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: [1, 100, 100, 200], + }); + expect(actual).toFrameEqual(expected); + } }); test("slice", () => { const df = pl.DataFrame({ @@ -2196,69 +2216,76 @@ describe("Round", () => { describe("EWM", () => { test("ewmMean", () => { - let s = pl.Series("s", [2, 5, 3]); - let df = pl.DataFrame([s]); + const s = pl.Series("s", [2, 5, 3]); + const df = pl.DataFrame([s]); let expected = pl.DataFrame({ s, ewmMean: [2.0, 4.0, 3.4285714285714284] }); - - let seriesActual = df.getColumn("s").ewmMean().rename("ewmMean"); - let actual = df.withColumn(col("s").ewmMean().as("ewmMean")); - - expect(actual).toFrameEqual(expected); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); - - seriesActual = df - .getColumn("s") - .ewmMean({ alpha: 0.5, adjust: true, ignoreNulls: true }) - .rename("ewmMean"); - actual = df.withColumn( - col("s") + { + const seriesActual = df.getColumn("s").ewmMean().rename("ewmMean"); + const actual = df.withColumn(col("s").ewmMean().as("ewmMean")); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } + { + const seriesActual = df + .getColumn("s") .ewmMean({ alpha: 0.5, adjust: true, ignoreNulls: true }) - .as("ewmMean"), - ); - - expect(actual).toFrameEqual(expected); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + .rename("ewmMean"); + const actual = df.withColumn( + col("s") + .ewmMean({ alpha: 0.5, adjust: true, ignoreNulls: true }) + .as("ewmMean"), + ); - seriesActual = df - .getColumn("s") - .ewmMean({ alpha: 0.5, adjust: false, ignoreNulls: true }) - .rename("ewmMean"); - actual = df.withColumn( - col("s") + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } + { + const seriesActual = df + .getColumn("s") .ewmMean({ alpha: 0.5, adjust: false, ignoreNulls: true }) - .as("ewmMean"), - ); - - expected = pl.DataFrame({ s, ewmMean: [2.0, 3.5, 3.25] }); - expect(actual).toFrameEqual(expected); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); - - seriesActual = df - .getColumn("s") - .ewmMean(0.5, false, 1, true) - .rename("ewmMean"); - actual = df.withColumn(col("s").ewmMean(0.5, false, 1, true).as("ewmMean")); + .rename("ewmMean"); + const actual = df.withColumn( + col("s") + .ewmMean({ alpha: 0.5, adjust: false, ignoreNulls: true }) + .as("ewmMean"), + ); - expect(actual).toFrameEqual(expected); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + expected = pl.DataFrame({ s, ewmMean: [2.0, 3.5, 3.25] }); + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } + { + const seriesActual = df + .getColumn("s") + .ewmMean(0.5, false, 1, true) + .rename("ewmMean"); + const actual = df.withColumn( + col("s").ewmMean(0.5, false, 1, true).as("ewmMean"), + ); - s = pl.Series("a", [2, 3, 5, 7, 4]); - df = pl.DataFrame([s]); + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } + { + const s = pl.Series("a", [2, 3, 5, 7, 4]); + const df = pl.DataFrame([s]); - seriesActual = df - .getColumn("a") - .ewmMean({ adjust: true, minPeriods: 2, ignoreNulls: true }) - .round(5) - .rename("ewmMean"); - actual = df.withColumn( - col("a") + const seriesActual = df + .getColumn("a") .ewmMean({ adjust: true, minPeriods: 2, ignoreNulls: true }) .round(5) - .as("ewmMean"), - ); + .rename("ewmMean"); + const actual = df.withColumn( + col("a") + .ewmMean({ adjust: true, minPeriods: 2, ignoreNulls: true }) + .round(5) + .as("ewmMean"), + ); - expected = pl.DataFrame({ ewmMean: [null, 2.66667, 4, 5.6, 4.77419], s }); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + expected = pl.DataFrame({ ewmMean: [null, 2.66667, 4, 5.6, 4.77419], s }); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } }); test("ewmStd", () => { diff --git a/__tests__/series.test.ts b/__tests__/series.test.ts index 4db2ab2d..3c7b368d 100644 --- a/__tests__/series.test.ts +++ b/__tests__/series.test.ts @@ -573,50 +573,62 @@ describe("series", () => { expect(() => pl.Series("dt", [null], pl.Date).describe()).toThrow( "Invalid operation: describe is not supported for DataType(Date)", ); - let actual = pl.Series([true, false, true]).describe(); - let expected = pl.DataFrame({ - statistic: ["sum", "null_count", "count"], - value: [false, null, null], - }); - - expect(actual).toFrameEqual(expected); - actual = pl.Series(["a", "b", "c", null]).describe(); - expected = pl.DataFrame({ - statistic: ["unique", "null_count", "count"], - value: [4, 1, 4], - }); - expect(actual).toFrameEqual(expected); + { + const actual = pl.Series([true, false, true]).describe(); + const expected = pl.DataFrame({ + statistic: ["sum", "null_count", "count"], + value: [false, null, null], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = pl.Series(["a", "b", "c", null]).describe(); + const expected = pl.DataFrame({ + statistic: ["unique", "null_count", "count"], + value: [4, 1, 4], + }); + expect(actual).toFrameEqual(expected); + } }); it("series:valueCounts", () => { - let actual = pl.Series("a", [1, 2, 2, 3]).valueCounts(true); - let expected = pl.DataFrame({ - a: [2, 1, 3], - count: [2, 1, 1], - }); - expect(actual).toFrameEqual(expected); - - actual = pl - .Series("a", [1, 2, 2, 3]) - .valueCounts(true, true, undefined, true); - expected = pl.DataFrame({ - a: [2, 1, 3], - proportion: [0.5, 0.25, 0.25], - }); - expect(actual).toFrameEqual(expected); - - actual = pl.Series("a", [1, 2, 2, 3]).valueCounts(true, true, "foo", false); - expected = pl.DataFrame({ - a: [2, 1, 3], - foo: [2, 1, 1], - }); - expect(actual).toFrameEqual(expected); - - actual = pl.Series("a", [1, 2, 2, 3]).valueCounts(true, true, "foo", true); - expected = pl.DataFrame({ - a: [2, 1, 3], - foo: [0.5, 0.25, 0.25], - }); - expect(actual).toFrameEqual(expected); + { + const actual = pl.Series("a", [1, 2, 2, 3]).valueCounts(true); + const expected = pl.DataFrame({ + a: [2, 1, 3], + count: [2, 1, 1], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = pl + .Series("a", [1, 2, 2, 3]) + .valueCounts(true, true, undefined, true); + const expected = pl.DataFrame({ + a: [2, 1, 3], + proportion: [0.5, 0.25, 0.25], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = pl + .Series("a", [1, 2, 2, 3]) + .valueCounts(true, true, "foo", false); + const expected = pl.DataFrame({ + a: [2, 1, 3], + foo: [2, 1, 1], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = pl + .Series("a", [1, 2, 2, 3]) + .valueCounts(true, true, "foo", true); + const expected = pl.DataFrame({ + a: [2, 1, 3], + foo: [0.5, 0.25, 0.25], + }); + expect(actual).toFrameEqual(expected); + } }); it("set: expected matches actual", () => { const expected = pl.Series([99, 2, 3]); @@ -702,18 +714,22 @@ describe("series", () => { }); test("toDummies", () => { const s = pl.Series("a", [1, 2, 3]); - let actual = s.toDummies(); - let expected = pl.DataFrame( - { "a_1.0": [1, 0, 0], "a_2.0": [0, 1, 0], "a_3.0": [0, 0, 1] }, - { schema: { "a_1.0": pl.UInt8, "a_2.0": pl.UInt8, "a_3.0": pl.UInt8 } }, - ); - expect(actual).toFrameEqual(expected); - actual = s.toDummies(":", true); - expected = pl.DataFrame( - { "a:2.0": [0, 1, 0], "a:3.0": [0, 0, 1] }, - { schema: { "a:2.0": pl.UInt8, "a:3.0": pl.UInt8 } }, - ); - expect(actual).toFrameEqual(expected); + { + const actual = s.toDummies(); + const expected = pl.DataFrame( + { "a_1.0": [1, 0, 0], "a_2.0": [0, 1, 0], "a_3.0": [0, 0, 1] }, + { schema: { "a_1.0": pl.UInt8, "a_2.0": pl.UInt8, "a_3.0": pl.UInt8 } }, + ); + expect(actual).toFrameEqual(expected); + } + { + const actual = s.toDummies(":", true); + const expected = pl.DataFrame( + { "a:2.0": [0, 1, 0], "a:3.0": [0, 0, 1] }, + { schema: { "a:2.0": pl.UInt8, "a:3.0": pl.UInt8 } }, + ); + expect(actual).toFrameEqual(expected); + } }); }); describe("comparators & math", () => { diff --git a/polars/dataframe.ts b/polars/dataframe.ts index 842242ac..b1bba96a 100644 --- a/polars/dataframe.ts +++ b/polars/dataframe.ts @@ -20,12 +20,13 @@ import type { WriteParquetOptions, } from "./types"; -import { DataType } from "./datatypes"; +import { type DTypeToJs, DataType, type JsToDtype } from "./datatypes"; import { type ColumnSelection, type ColumnsOrExpr, type ExprOrString, + type Simplify, type ValueOrArray, columnOrColumns, columnOrColumnsStrict, @@ -254,10 +255,10 @@ interface WriteMethods { * ╰─────┴─────┴─────╯ * ``` */ -export interface DataFrame - extends Arithmetic, - Sample, - Arithmetic, +export interface DataFrame = any> + extends Arithmetic>, + Sample>, + Arithmetic>, WriteMethods, Serialize, GroupByOps { @@ -274,7 +275,7 @@ export interface DataFrame /** * Very cheap deep clone. */ - clone(): DataFrame; + clone(): DataFrame; /** * __Summary statistics for a DataFrame.__ * @@ -321,7 +322,18 @@ export interface DataFrame * ... "ham": ['a', 'b', 'c'], * ... "apple": ['a', 'b', 'c'] * ... }); - * > console.log(df.drop(['ham', 'apple']).toString()); + * // df: pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // ham: pl.Series; + * // apple: pl.Series; + * // }> + * > const df2 = df.drop(['ham', 'apple']); + * // df2: pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // }> + * > console.log(df2.toString()); * shape: (3, 2) * ╭─────┬─────╮ * │ foo ┆ bar │ @@ -336,9 +348,14 @@ export interface DataFrame * ╰─────┴─────╯ * ``` */ - drop(name: string): DataFrame; - drop(names: string[]): DataFrame; - drop(name: string, ...names: string[]): DataFrame; + drop(name: U): DataFrame>>; + drop( + names: U, + ): DataFrame>>; + drop( + name: U, + ...names: V + ): DataFrame>>; /** * __Return a new DataFrame where the null values are dropped.__ * @@ -364,9 +381,9 @@ export interface DataFrame * └─────┴─────┴─────┘ * ``` */ - dropNulls(column: string): DataFrame; - dropNulls(columns: string[]): DataFrame; - dropNulls(...columns: string[]): DataFrame; + dropNulls(column: keyof T): DataFrame; + dropNulls(columns: (keyof T)[]): DataFrame; + dropNulls(...columns: (keyof T)[]): DataFrame; /** * __Explode `DataFrame` to long format by exploding a column with Lists.__ * ___ @@ -518,7 +535,7 @@ export interface DataFrame * 2 * ``` */ - findIdxByName(name: string): number; + findIdxByName(name: keyof T): number; /** * __Apply a horizontal reduction on a DataFrame.__ * @@ -604,12 +621,46 @@ export interface DataFrame frameEqual(other: DataFrame, nullEqual: boolean): boolean; /** * Get a single column as Series by name. + * + * --- + * @example + * ``` + * > const df = pl.DataFrame({ + * ... foo: [1, 2, 3], + * ... bar: [6, null, 8], + * ... ham: ["a", "b", "c"], + * ... }); + * // df: pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // ham: pl.Series; + * // }> + * > const column = df.getColumn("foo"); + * // column: pl.Series + * ``` */ + getColumn(name: U): T[U]; getColumn(name: string): Series; /** * Get the DataFrame as an Array of Series. + * --- + * @example + * ``` + * > const df = pl.DataFrame({ + * ... foo: [1, 2, 3], + * ... bar: [6, null, 8], + * ... ham: ["a", "b", "c"], + * ... }); + * // df: pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // ham: pl.Series; + * // }> + * > const columns = df.getColumns(); + * // columns: (pl.Series | pl.Series | pl.Series)[] + * ``` */ - getColumns(): Array; + getColumns(): T[keyof T][]; /** * Start a groupby operation. * ___ @@ -656,7 +707,7 @@ export interface DataFrame * ╰─────┴─────┴─────╯ * ``` */ - head(length?: number): DataFrame; + head(length?: number): DataFrame; /** * Return a new DataFrame grown horizontally by stacking multiple Series to it. * @param columns - array of Series or DataFrame to stack @@ -668,8 +719,20 @@ export interface DataFrame * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); + * // df: pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // ham: pl.Series; + * // }> * > const x = pl.Series("apple", [10, 20, 30]) - * > df.hStack([x]) + * // x: pl.Series + * > df.hstack([x]) + * // pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // ham: pl.Series; + * // apple: pl.Series; + * // }> * shape: (3, 4) * ╭─────┬─────┬─────┬───────╮ * │ foo ┆ bar ┆ ham ┆ apple │ @@ -684,6 +747,12 @@ export interface DataFrame * ╰─────┴─────┴─────┴───────╯ * ``` */ + hstack = any>( + columns: DataFrame, + ): DataFrame>; + hstack( + columns: U, + ): DataFrame>; hstack(columns: Array | DataFrame): DataFrame; hstack(columns: Array | DataFrame, inPlace?: boolean): void; /** @@ -1160,6 +1229,11 @@ export interface DataFrame * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); + * // df: pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // ham: pl.Series; + * // }> * > df.rename({"foo": "apple"}); * ╭───────┬─────┬─────╮ * │ apple ┆ bar ┆ ham │ @@ -1174,21 +1248,38 @@ export interface DataFrame * ╰───────┴─────┴─────╯ * ``` */ + rename>>( + mapping: U, + ): DataFrame<{ [K in keyof T as U[K] extends string ? U[K] : K]: T[K] }>; rename(mapping: Record): DataFrame; /** * Replace a column at an index location. + * + * @warning typescript cannot encode type mutation, + * so the type of the DataFrame will be incorrect. cast the type of dataframe manually. * ___ * @param index - Column index * @param newColumn - New column to insert * @example * ``` - * > const df = pl.DataFrame({ + * > const df: pl.DataFrame = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); + * // df: pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // ham: pl.Series; + * // }> * > const x = pl.Series("apple", [10, 20, 30]); + * // x: pl.Series * > df.replaceAtIdx(0, x); + * // df: pl.DataFrame<{ + * // foo: pl.Series; <- notice how the type is still the same! + * // bar: pl.Series; + * // ham: pl.Series; + * // }> * shape: (3, 3) * ╭───────┬─────┬─────╮ * │ apple ┆ bar ┆ ham │ @@ -1223,7 +1314,28 @@ export interface DataFrame * Convert columnar data to rows as arrays */ rows(): Array>; - get schema(): Record; + /** + * @example + * ``` + * > const df: pl.DataFrame = pl.DataFrame({ + * ... "foo": [1, 2, 3], + * ... "bar": [6, 7, 8], + * ... "ham": ['a', 'b', 'c'] + * ... }); + * // df: pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // ham: pl.Series; + * // }> + * > df.schema + * // { + * // foo: Float64; + * // bar: Float64; + * // ham: Utf8; + * // } + * ``` + */ + get schema(): { [K in keyof T]: T[K]["dtype"] }; /** * Select columns from this DataFrame. * ___ @@ -1235,7 +1347,15 @@ export interface DataFrame * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c'] * ... }); + * // df: pl.DataFrame<{ + * // foo: pl.Series; + * // bar: pl.Series; + * // ham: pl.Series; + * // }> * > df.select('foo'); + * // pl.DataFrame<{ + * // foo: pl.Series; + * // }> * shape: (3, 1) * ┌─────┐ * │ foo │ @@ -1250,7 +1370,8 @@ export interface DataFrame * └─────┘ * ``` */ - select(...columns: ExprOrString[]): DataFrame; + select(...columns: U[]): DataFrame<{ [P in U]: T[P] }>; + select(...columns: ExprOrString[]): DataFrame; /** * Shift the values by a given period and fill the parts that will be empty due to this operation * with `Nones`. @@ -1291,8 +1412,8 @@ export interface DataFrame * └──────┴──────┴──────┘ * ``` */ - shift(periods: number): DataFrame; - shift({ periods }: { periods: number }): DataFrame; + shift(periods: number): DataFrame; + shift({ periods }: { periods: number }): DataFrame; /** * Shift the values by a given period and fill the parts that will be empty due to this operation * with the result of the `fill_value` expression. @@ -1523,7 +1644,7 @@ export interface DataFrame * ``` * @category IO */ - toObject(): Record; + toObject(): { [K in keyof T]: DTypeToJs[] }; /** * @deprecated *since 0.4.0* use {@link writeIPC} @@ -1758,7 +1879,16 @@ export interface DataFrame * @param existingName * @param newName */ + withColumnRenamed( + existingName: Existing, + replacement: New, + ): DataFrame<{ [K in keyof T as K extends Existing ? New : K]: T[K] }>; withColumnRenamed(existing: string, replacement: string): DataFrame; + + withColumnRenamed(opts: { + existingName: Existing; + replacement: New; + }): DataFrame<{ [K in keyof T as K extends Existing ? New : K]: T[K] }>; withColumnRenamed(opts: { existing: string; replacement: string }): DataFrame; /** * Add a column at index 0 that counts the rows. @@ -1787,7 +1917,7 @@ export interface DataFrame Or combine them: - "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds - By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). + By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". Parameters @@ -2697,6 +2827,14 @@ export const _DataFrame = (_df: any): DataFrame => { }); }; +interface DataFrameOptions { + columns?: any[]; + orient?: "row" | "col"; + schema?: Record; + schemaOverrides?: Record; + inferSchemaLength?: number; +} + /** * DataFrame constructor */ @@ -2746,16 +2884,21 @@ export interface DataFrameConstructor extends Deserialize { * ╰─────┴─────╯ * ``` */ - ( - data: any, - options?: { - columns?: any[]; - orient?: "row" | "col"; - schema?: Record; - schemaOverrides?: Record; - inferSchemaLength?: number; - }, - ): DataFrame; + >( + data: T1, + options?: DataFrameOptions, + ): DataFrame<{ + [K in T1[number] as K["name"]]: K; + }>; + >>( + data: T2, + options?: DataFrameOptions, + ): DataFrame<{ + [K in keyof T2]: K extends string + ? Series, K> + : never; + }>; + (data: any, options?: DataFrameOptions): DataFrame; isDataFrame(arg: any): arg is DataFrame; } diff --git a/polars/datatypes/datatype.ts b/polars/datatypes/datatype.ts index 1294c631..d3c38cb0 100644 --- a/polars/datatypes/datatype.ts +++ b/polars/datatypes/datatype.ts @@ -1,6 +1,7 @@ import { Field } from "./field"; export abstract class DataType { + declare readonly __dtype: string; get variant() { return this.constructor.name as DataTypeName; } @@ -17,72 +18,72 @@ export abstract class DataType { } /** Null type */ - public static get Null(): DataType { + public static get Null() { return new Null(); } /** `true` and `false`. */ - public static get Bool(): DataType { + public static get Bool() { return new Bool(); } /** An `i8` */ - public static get Int8(): DataType { + public static get Int8() { return new Int8(); } /** An `i16` */ - public static get Int16(): DataType { + public static get Int16() { return new Int16(); } /** An `i32` */ - public static get Int32(): DataType { + public static get Int32() { return new Int32(); } /** An `i64` */ - public static get Int64(): DataType { + public static get Int64() { return new Int64(); } /** An `u8` */ - public static get UInt8(): DataType { + public static get UInt8() { return new UInt8(); } /** An `u16` */ - public static get UInt16(): DataType { + public static get UInt16() { return new UInt16(); } /** An `u32` */ - public static get UInt32(): DataType { + public static get UInt32() { return new UInt32(); } /** An `u64` */ - public static get UInt64(): DataType { + public static get UInt64() { return new UInt64(); } /** A `f32` */ - public static get Float32(): DataType { + public static get Float32() { return new Float32(); } /** A `f64` */ - public static get Float64(): DataType { + public static get Float64() { return new Float64(); } - public static get Date(): DataType { + public static get Date() { return new Date(); } /** Time of day type */ - public static get Time(): DataType { + public static get Time() { return new Time(); } /** Type for wrapping arbitrary JS objects */ - public static get Object(): DataType { + public static get Object() { return new Object_(); } /** A categorical encoding of a set of strings */ - public static get Categorical(): DataType { + public static get Categorical() { return new Categorical(); } /** Decimal type */ - public static Decimal(precision?: number, scale?: number): DataType { + public static Decimal(precision?: number, scale?: number) { return new Decimal(precision, scale); } /** @@ -93,7 +94,7 @@ export abstract class DataType { public static Datetime( timeUnit?: TimeUnit | "ms" | "ns" | "us", timeZone: string | null | undefined = null, - ): DataType { + ) { return new Datetime(timeUnit ?? "ms", timeZone); } @@ -103,7 +104,7 @@ export abstract class DataType { * @param inner The `DataType` of values within the list * */ - public static List(inner: DataType): DataType { + public static List(inner: DataType) { return new List(inner); } @@ -112,7 +113,7 @@ export abstract class DataType { * This is called `Array` in other polars implementations, but `Array` is widely used in JS, so we use `FixedSizeList` instead. * */ - public static FixedSizeList(inner: DataType, listSize: number): DataType { + public static FixedSizeList(inner: DataType, listSize: number) { return new FixedSizeList(inner, listSize); } /** @@ -120,17 +121,15 @@ export abstract class DataType { */ public static Struct(fields: Field[]): DataType; public static Struct(fields: { [key: string]: DataType }): DataType; - public static Struct( - fields: Field[] | { [key: string]: DataType }, - ): DataType { + public static Struct(fields: Field[] | { [key: string]: DataType }) { return new Struct(fields); } /** A variable-length UTF-8 encoded string whose offsets are represented as `i64`. */ - public static get Utf8(): DataType { + public static get Utf8() { return new Utf8(); } - public static get String(): DataType { + public static get String() { return new String(); } @@ -165,29 +164,68 @@ export abstract class DataType { } } -export class Null extends DataType {} -export class Bool extends DataType {} -export class Int8 extends DataType {} -export class Int16 extends DataType {} -export class Int32 extends DataType {} -export class Int64 extends DataType {} -export class UInt8 extends DataType {} -export class UInt16 extends DataType {} -export class UInt32 extends DataType {} -export class UInt64 extends DataType {} -export class Float32 extends DataType {} -export class Float64 extends DataType {} +export class Null extends DataType { + declare __dtype: "Null"; +} + +export class Bool extends DataType { + declare __dtype: "Bool"; +} +export class Int8 extends DataType { + declare __dtype: "Int8"; +} +export class Int16 extends DataType { + declare __dtype: "Int16"; +} +export class Int32 extends DataType { + declare __dtype: "Int32"; +} +export class Int64 extends DataType { + declare __dtype: "Int64"; +} +export class UInt8 extends DataType { + declare __dtype: "UInt8"; +} +export class UInt16 extends DataType { + declare __dtype: "UInt16"; +} +export class UInt32 extends DataType { + declare __dtype: "UInt32"; +} +export class UInt64 extends DataType { + declare __dtype: "UInt64"; +} +export class Float32 extends DataType { + declare __dtype: "Float32"; +} +export class Float64 extends DataType { + declare __dtype: "Float64"; +} + // biome-ignore lint/suspicious/noShadowRestrictedNames: -export class Date extends DataType {} -export class Time extends DataType {} -export class Object_ extends DataType {} -export class Utf8 extends DataType {} +export class Date extends DataType { + declare __dtype: "Date"; +} +export class Time extends DataType { + declare __dtype: "Time"; +} +export class Object_ extends DataType { + declare __dtype: "Object"; +} +export class Utf8 extends DataType { + declare __dtype: "Utf8"; +} // biome-ignore lint/suspicious/noShadowRestrictedNames: -export class String extends DataType {} +export class String extends DataType { + declare __dtype: "String"; +} -export class Categorical extends DataType {} +export class Categorical extends DataType { + declare __dtype: "Categorical"; +} export class Decimal extends DataType { + declare __dtype: "Decimal"; private precision: number | null; private scale: number | null; constructor(precision?: number, scale?: number) { @@ -224,6 +262,7 @@ export class Decimal extends DataType { * Datetime type */ export class Datetime extends DataType { + declare __dtype: "Datetime"; constructor( private timeUnit: TimeUnit | "ms" | "ns" | "us" = "ms", private timeZone?: string | null, @@ -246,6 +285,7 @@ export class Datetime extends DataType { } export class List extends DataType { + declare __dtype: "List"; constructor(protected __inner: DataType) { super(); } @@ -261,6 +301,7 @@ export class List extends DataType { } export class FixedSizeList extends DataType { + declare __dtype: "FixedSizeList"; constructor( protected __inner: DataType, protected listSize: number, @@ -294,6 +335,7 @@ export class FixedSizeList extends DataType { } export class Struct extends DataType { + declare __dtype: "Struct"; private fields: Field[]; constructor( diff --git a/polars/lazy/expr/string.ts b/polars/lazy/expr/string.ts index 239fa4d8..c31426fd 100644 --- a/polars/lazy/expr/string.ts +++ b/polars/lazy/expr/string.ts @@ -427,13 +427,11 @@ export const ExprStringFunctions = (_expr: any): StringNamespace => { dtype: DataType.Date | DataType.Datetime | typeof DataType.Datetime, format?: string, ) { - if (!(dtype instanceof DataType)) { - dtype = dtype(); - } - if (dtype.equals(DataType.Date)) { + const dt = dtype instanceof DataType ? dtype : dtype(); + if (dt.equals(DataType.Date)) { return wrap("strToDate", format, false, false, false); } - if (dtype.equals(DataType.Datetime("ms"))) { + if (dt.equals(DataType.Datetime("ms"))) { return wrap( "strToDatetime", format, diff --git a/polars/lazy/functions.ts b/polars/lazy/functions.ts index 2166feb4..35988b8d 100644 --- a/polars/lazy/functions.ts +++ b/polars/lazy/functions.ts @@ -178,8 +178,8 @@ export function lit(value: any): Expr { * > .filter(pl.col("foo").lt(pl.intRange(0, 100))) * > .collect() * ``` - * - * + * + * * Generate an index column by using `intRange` in conjunction with :func:`len`. * ``` * df = pl.DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]}) @@ -232,7 +232,7 @@ export function intRange( opts: any, end?, step = 1 as number | Expr, - dtype = DataType.Int64, + dtype: DataType = DataType.Int64, eager?, ): Series | Expr { // @deprecated since 0.15.0 @@ -512,7 +512,7 @@ export function head(column: Series | ExprOrString, n?): Series | Expr { } return exprToLitOrExpr(column, false).head(n); } -/** Return the number of elements in the column. +/** Return the number of elements in the column. This is similar to `COUNT(*)` in SQL. @return Expr - Expression of data type :class:`UInt32`. diff --git a/polars/series/index.ts b/polars/series/index.ts index 23ea50a7..375616b9 100644 --- a/polars/series/index.ts +++ b/polars/series/index.ts @@ -31,7 +31,7 @@ const inspect = Symbol.for("nodejs.util.inspect.custom"); /** * A Series represents a single column in a polars DataFrame. */ -export interface Series +export interface Series extends ArrayLike, Rolling>, Arithmetic>, @@ -42,7 +42,7 @@ export interface Series EwmOps>, Serialize { inner(): any; - name: string; + name: Name; dtype: T; str: StringNamespace; lst: ListNamespace; @@ -51,20 +51,20 @@ export interface Series [inspect](): string; [Symbol.iterator](): IterableIterator>; // inner(): JsSeries - bitand(other: Series): Series; - bitor(other: Series): Series; - bitxor(other: Series): Series; + bitand(other: Series): Series; + bitor(other: Series): Series; + bitxor(other: Series): Series; /** * Take absolute values */ - abs(): Series; + abs(): Series; /** * __Rename this Series.__ * * @param name - new name * @see {@link rename} */ - alias(name: string): Series; + alias(name: U): Series; /** * __Append a Series to this one.__ * ___ @@ -119,11 +119,11 @@ export interface Series /** * Get index values where Boolean Series evaluate True. */ - argTrue(): Series; + argTrue(): Series; /** * Get unique index as Series. */ - argUnique(): Series; + argUnique(): Series; /** * Get the index values that would sort this Series. * ___ @@ -133,27 +133,28 @@ export interface Series * @param nullsLast - Place null values last instead of first. * @return {SeriesType} indexes - Indexes that can be used to sort this array. */ - argSort(): Series; - argSort(descending?: boolean, nullsLast?: boolean): Series; + argSort(): Series; + argSort(descending?: boolean, nullsLast?: boolean): Series; argSort({ descending, nullsLast, - }: { descending?: boolean; nullsLast?: boolean }): Series; + }: { descending?: boolean; nullsLast?: boolean }): Series; argSort({ reverse, // deprecated nullsLast, - }: { reverse?: boolean; nullsLast?: boolean }): Series; + }: { reverse?: boolean; nullsLast?: boolean }): Series; /** * __Rename this Series.__ * * @param name - new name * @see {@link rename} {@link alias} */ - as(name: string): Series; + as(name: string): Series; /** * Cast between data types. */ - cast(dtype: U, strict?: boolean): Series; + cast(dtype: U, strict?: boolean): Series; + cast(dtype: DataType, strict?: boolean): Series; /** * Get the length of each individual chunk */ @@ -161,8 +162,8 @@ export interface Series /** * Cheap deep clones. */ - clone(): Series; - concat(other: Series): Series; + clone(): Series; + concat(other: Series): Series; /** * __Quick summary statistics of a series. __ @@ -216,14 +217,14 @@ export interface Series * @param n - number of slots to shift * @param nullBehavior - `'ignore' | 'drop'` */ - diff(n: number, nullBehavior: "ignore" | "drop"): Series; + diff(n: number, nullBehavior: "ignore" | "drop"): Series; diff({ n, nullBehavior, }: { n: number; nullBehavior: "ignore" | "drop"; - }): Series; + }): Series; /** * Compute the dot/inner product between two Series * ___ @@ -239,7 +240,7 @@ export interface Series /** * Create a new Series that copies data from this Series without null values. */ - dropNulls(): Series; + dropNulls(): Series; /** * __Explode a list or utf8 Series.__ * @@ -1122,7 +1123,7 @@ export interface Series * ``` */ toObject(): { - name: string; + name: Name; datatype: DtypeToJsName; values: DTypeToJs[]; }; @@ -1871,7 +1872,7 @@ export function _Series(_s: any): Series { }, }; - return new Proxy(series, { + return new Proxy(series as unknown as Series, { get: (target, prop, receiver) => { if (typeof prop !== "symbol" && !Number.isNaN(Number(prop))) { return target.get(Number(prop)); @@ -1908,7 +1909,7 @@ export interface SeriesConstructor extends Deserialize { * ] * ``` */ - (values: ArrayLike): Series>; + (values: ArrayLike): Series>; (values: any): Series; /** * Create a new named series @@ -1926,15 +1927,15 @@ export interface SeriesConstructor extends Deserialize { * ] * ``` */ - ( - name: string, - values: ArrayLike, - ): Series>; - ( - name: string, - values: ArrayLike>, + ( + name: Name, + values: ArrayLike, + ): Series, Name>; + ( + name: Name, + values: ArrayLike>, dtype?: T2, - ): Series; + ): Series; (name: string, values: any[], dtype?): Series; /** @@ -1943,10 +1944,10 @@ export interface SeriesConstructor extends Deserialize { */ from(arrayLike: ArrayLike): Series>; from(arrayLike: ArrayLike): Series; - from( - name: string, + from( + name: Name, arrayLike: ArrayLike, - ): Series>; + ): Series, Name>; from(name: string, arrayLike: ArrayLike): Series; /** * Returns a new Series from a set of elements. diff --git a/polars/utils.ts b/polars/utils.ts index 70fe781a..bf2b53db 100644 --- a/polars/utils.ts +++ b/polars/utils.ts @@ -77,3 +77,5 @@ export const regexToString = (r: string | RegExp): string => { }; export const INSPECT_SYMBOL = Symbol.for("nodejs.util.inspect.custom"); + +export type Simplify = { [K in keyof T]: T[K] } & {};