Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improvement: prevent column headers from extra reloads #2796

Merged
merged 2 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions frontend/src/components/data-table/__test__/columns.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { expect, test } from "vitest";
import { uniformSample } from "../uniformSample";
import { UrlDetector } from "../url-detector";
import { render } from "@testing-library/react";
import { inferFieldTypes } from "../columns";

test("uniformSample", () => {
const items = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"];
Expand Down Expand Up @@ -31,3 +32,89 @@ test("UrlDetector renders URLs as hyperlinks", () => {
expect(link).toBeTruthy();
expect(link?.href).toBe("https://example.com/");
});

test("inferFieldTypes", () => {
const data = [
{
a: 1,
b: "foo",
c: null,
d: { mime: "text/csv" },
e: [1, 2, 3],
f: true,
g: false,
h: new Date(),
},
];
const fieldTypes = inferFieldTypes(data);
expect(fieldTypes).toMatchInlineSnapshot(`
{
"a": [
"number",
"number",
],
"b": [
"string",
"string",
],
"c": [
"unknown",
"unknown",
],
"d": [
"unknown",
"unknown",
],
"e": [
"unknown",
"unknown",
],
"f": [
"boolean",
"boolean",
],
"g": [
"boolean",
"boolean",
],
"h": [
"datetime",
"datetime",
],
}
`);
});

test("inferFieldTypes with nulls", () => {
const data = [{ a: 1, b: null }];
const fieldTypes = inferFieldTypes(data);
expect(fieldTypes).toMatchInlineSnapshot(`
{
"a": [
"number",
"number",
],
"b": [
"unknown",
"unknown",
],
}
`);
});

test("inferFieldTypes with mimetypes", () => {
const data = [{ a: { mime: "text/csv" }, b: { mime: "image/png" } }];
const fieldTypes = inferFieldTypes(data);
expect(fieldTypes).toMatchInlineSnapshot(`
{
"a": [
"unknown",
"unknown",
],
"b": [
"unknown",
"unknown",
],
}
`);
});
23 changes: 16 additions & 7 deletions frontend/src/components/data-table/chart-spec-model.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { TopLevelFacetedUnitSpec } from "@/plugins/impl/data-explorer/queri
import { mint, orange, slate } from "@radix-ui/colors";
import type { ColumnHeaderSummary, FieldTypes } from "./types";
import { asURL } from "@/utils/url";
import { parseCsvData } from "@/plugins/impl/vega/loader";

const MAX_BAR_HEIGHT = 24; // px
const MAX_BAR_WIDTH = 28; // px
Expand Down Expand Up @@ -33,6 +34,16 @@ export class ColumnChartSpecModel<T> {
includeCharts: boolean;
},
) {
// Support CSV data as a string
const isCsv =
typeof this.data === "string" &&
!this.data.startsWith("./@file") &&
!this.data.startsWith("/@file") &&
!this.data.startsWith("data:text/csv");
if (isCsv) {
this.data = parseCsvData(this.data) as T[];
}

this.columnSummaries = new Map(summaries.map((s) => [s.column, s]));
}

Expand All @@ -48,14 +59,12 @@ export class ColumnChartSpecModel<T> {
if (!this.data) {
return null;
}
if (typeof this.data !== "string") {
return null;
}

const base: Omit<TopLevelFacetedUnitSpec, "mark"> = {
data: {
url: asURL(this.data).href,
} as TopLevelFacetedUnitSpec["data"],
data: (typeof this.data === "string"
? {
url: asURL(this.data).href,
}
: { values: this.data }) as TopLevelFacetedUnitSpec["data"],
background: "transparent",
config: {
view: {
Expand Down
95 changes: 49 additions & 46 deletions frontend/src/components/data-table/columns.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,46 @@ import {
} from "./column-header";
import { Checkbox } from "../ui/checkbox";
import { MimeCell } from "./mime-cell";
import { uniformSample } from "./uniformSample";
import type { DataType } from "@/core/kernel/messages";
import { TableColumnSummary } from "./column-summary";
import type { FilterType } from "./filters";
import type { FieldTypesWithExternalType } from "./types";
import { UrlDetector } from "./url-detector";
import { Arrays } from "@/utils/arrays";
import { cn } from "@/utils/cn";
import { Objects } from "@/utils/objects";
import { uniformSample } from "./uniformSample";

interface ColumnInfo {
key: string;
type: "primitive" | "mime";
function inferDataType(value: unknown): DataType {
if (typeof value === "string") {
return "string";
}
if (typeof value === "number") {
return "number";
}
if (value instanceof Date) {
return "datetime";
}
if (typeof value === "boolean") {
return "boolean";
}
if (value == null) {
return "unknown";
}
return "unknown";
}

function getColumnInfo<T>(items: T[]): ColumnInfo[] {
export function inferFieldTypes<T>(items: T[]): FieldTypesWithExternalType {
// No items
if (items.length === 0) {
return Arrays.EMPTY;
return {};
}

// Not an object
if (typeof items[0] !== "object") {
return Arrays.EMPTY;
return {};
}

const keys = new Map<string, ColumnInfo>();
const fieldTypes: FieldTypesWithExternalType = {};

// This can be slow for large datasets,
// so only sample 10 evenly distributed rows
Expand All @@ -44,98 +58,90 @@ function getColumnInfo<T>(items: T[]): ColumnInfo[] {
// We will be a bit defensive and assume values are not homogeneous.
// If any is a mimetype, then we will treat it as a mimetype (i.e. not sortable)
Object.entries(item as object).forEach(([key, value], idx) => {
const currentValue = keys.get(key);
const currentValue = fieldTypes[key];
if (!currentValue) {
// Set for the first time
keys.set(key, {
key,
type: isPrimitiveOrNullish(value) ? "primitive" : "mime",
});
const dtype = inferDataType(value);
fieldTypes[key] = [dtype, dtype];
}
// If we have a value, and it is a primitive, we could possibly upgrade it to a mime
if (
currentValue &&
currentValue.type === "primitive" &&
!isPrimitiveOrNullish(value)
) {
keys.set(key, {
key,
type: "mime",
});

// If its not null, override the type
if (value != null) {
// This can be lossy as we infer take the last seen type
const dtype = inferDataType(value);
fieldTypes[key] = [dtype, dtype];
}
});
});

return [...keys.values()];
return fieldTypes;
}

export const NAMELESS_COLUMN_PREFIX = "__m_column__";

export function generateColumns<T>({
items,
rowHeaders,
selection,
fieldTypes,
textJustifyColumns,
wrappedColumns,
}: {
items: T[];
rowHeaders: string[];
selection: "single" | "multi" | null;
fieldTypes?: FieldTypesWithExternalType;
fieldTypes: FieldTypesWithExternalType;
textJustifyColumns?: Record<string, "left" | "center" | "right">;
wrappedColumns?: string[];
}): Array<ColumnDef<T>> {
const columnInfo = getColumnInfo(items);
const rowHeadersSet = new Set(rowHeaders);

const columns = columnInfo.map(
(info, idx): ColumnDef<T> => ({
id: info.key || `${NAMELESS_COLUMN_PREFIX}${idx}`,
const columns = Objects.entries(fieldTypes).map(
([key, types], idx): ColumnDef<T> => ({
id: key || `${NAMELESS_COLUMN_PREFIX}${idx}`,
// Use an accessorFn instead of an accessorKey because column names
// may have periods in them ...
// https://github.com/TanStack/table/issues/1671
accessorFn: (row) => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return (row as any)[info.key];
return (row as any)[key];
},

header: ({ column }) => {
const dtype = column.columnDef.meta?.dtype;
const headerWithType = (
<div className="flex flex-col">
<span className="font-bold">{info.key}</span>
<span className="font-bold">{key}</span>
{dtype && (
<span className="text-xs text-muted-foreground">{dtype}</span>
)}
</div>
);

// Row headers have no summaries
if (rowHeadersSet.has(info.key)) {
if (rowHeadersSet.has(key)) {
return (
<DataTableColumnHeader header={headerWithType} column={column} />
);
}

return (
<DataTableColumnHeaderWithSummary
key={key}
header={headerWithType}
column={column}
summary={<TableColumnSummary columnId={info.key} />}
summary={<TableColumnSummary columnId={key} />}
/>
);
},

cell: ({ column, renderValue, getValue }) => {
// Row headers are bold
if (rowHeadersSet.has(info.key)) {
if (rowHeadersSet.has(key)) {
return <b>{String(renderValue())}</b>;
}

const value = getValue();
const justify = textJustifyColumns?.[info.key];
const wrapped = wrappedColumns?.includes(info.key);
const justify = textJustifyColumns?.[key];
const wrapped = wrappedColumns?.includes(key);

const format = column.getColumnFormatting?.();
if (format) {
Expand Down Expand Up @@ -166,16 +172,13 @@ export function generateColumns<T>({
</div>
);
},
// Only enable sorting for primitive types and non-row headers
enableSorting: info.type === "primitive" && !rowHeadersSet.has(info.key),
// Remove any default filtering
filterFn: undefined,
meta: {
type: info.type,
rowHeader: rowHeadersSet.has(info.key),
filterType: getFilterTypeForFieldType(fieldTypes?.[info.key]?.[0]),
dtype: fieldTypes?.[info.key]?.[1],
dataType: fieldTypes?.[info.key]?.[0],
rowHeader: rowHeadersSet.has(key),
filterType: getFilterTypeForFieldType(types[0]),
dtype: types[1],
dataType: types[0],
},
}),
);
Expand Down
1 change: 0 additions & 1 deletion frontend/src/components/data-table/filters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import type { RowData } from "@tanstack/react-table";
declare module "@tanstack/react-table" {
//allows us to define custom properties for our columns
interface ColumnMeta<TData extends RowData, TValue> {
type?: "primitive" | "mime";
rowHeader?: boolean;
dtype?: string;
dataType?: DataType;
Expand Down
3 changes: 1 addition & 2 deletions frontend/src/components/datasets/icons.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import {
CalendarIcon,
HashIcon,
TypeIcon,
ListOrderedIcon,
type LucideIcon,
CalendarClockIcon,
ClockIcon,
Expand All @@ -23,6 +22,6 @@ export const DATA_TYPE_ICON: Record<DataType, LucideIcon> = {
datetime: CalendarClockIcon,
number: HashIcon,
string: TypeIcon,
integer: ListOrderedIcon,
integer: HashIcon,
unknown: CurlyBracesIcon,
};
12 changes: 8 additions & 4 deletions frontend/src/components/editor/file-tree/renderers.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
/* Copyright 2024 Marimo. All rights reserved. */
import { generateColumns } from "@/components/data-table/columns";
import {
generateColumns,
inferFieldTypes,
} from "@/components/data-table/columns";
import { DataTable } from "@/components/data-table/data-table";
import { parseCsvData } from "@/plugins/impl/vega/loader";
import { Objects } from "@/utils/objects";
Expand All @@ -17,14 +20,15 @@ export const CsvViewer: React.FC<{ contents: string }> = ({ contents }) => {
pageIndex: 0,
pageSize: PAGE_SIZE,
});
const fieldTypes = useMemo(() => inferFieldTypes(data), [data]);
const columns = useMemo(
() =>
generateColumns({
items: data,
generateColumns<object>({
rowHeaders: Arrays.EMPTY,
selection: null,
fieldTypes,
}),
[data],
[fieldTypes],
);

return (
Expand Down
Loading
Loading