From 6063e8ff07a1b54a70e789b07dd5bddedb3bde9b Mon Sep 17 00:00:00 2001 From: Qi Zhang Date: Thu, 21 Mar 2024 13:34:39 +0800 Subject: [PATCH] update delta lake --- Cargo.lock | 1099 ++++++++++++++++++++++++++++--- Cargo.toml | 3 +- src/lib.rs | 37 +- src/offsets.rs | 30 +- src/writer.rs | 53 +- tests/delta_partitions_tests.rs | 23 +- tests/helpers/mod.rs | 4 +- 7 files changed, 1075 insertions(+), 174 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f8c08c..acf04c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,6 +78,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -172,13 +178,24 @@ dependencies = [ "zerocopy 0.6.6", ] +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "arrow" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" dependencies = [ - "ahash", "arrow-arith", "arrow-array", "arrow-buffer", @@ -196,9 +213,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" dependencies = [ "arrow-array", "arrow-buffer", @@ -211,15 +228,16 @@ dependencies = [ [[package]] name = "arrow-array" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" dependencies = [ "ahash", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", + "chrono-tz", "half", "hashbrown", "num 0.4.1", @@ -227,9 +245,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" dependencies = [ "bytes", "half", @@ -238,9 +256,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" dependencies = [ "arrow-array", "arrow-buffer", @@ -249,6 +267,7 @@ dependencies = [ "arrow-select", "base64 0.21.5", "chrono", + "comfy-table", "half", "lexical-core", "num 0.4.1", @@ -256,9 +275,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca" +checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" dependencies = [ "arrow-array", "arrow-buffer", @@ -275,9 +294,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" dependencies = [ "arrow-buffer", "arrow-schema", @@ -287,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,13 +316,14 @@ dependencies = [ "arrow-data", "arrow-schema", "flatbuffers", + "lz4_flex", ] [[package]] name = "arrow-json" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" +checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" dependencies = [ "arrow-array", "arrow-buffer", @@ -321,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" dependencies = [ "arrow-array", "arrow-buffer", @@ -336,9 +356,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" dependencies = [ "ahash", "arrow-array", @@ -351,18 +371,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" dependencies = [ "serde", ] [[package]] name = "arrow-select" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" dependencies = [ "ahash", "arrow-array", @@ -374,9 +394,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -399,6 +419,24 @@ dependencies = [ "futures-core", ] +[[package]] +name = "async-compression" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + [[package]] name = "async-lock" version = "3.2.0" @@ -433,6 +471,323 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "aws-config" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4084d18094aec9f79d509f4cb6ccf6b613c5037e32f32e74312e52b836e366" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand 2.0.1", + "hex", + "http 0.2.11", + "hyper", + "ring 0.17.7", + "time 0.3.31", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa8587ae17c8e967e4b05a62d495be2fb7701bec52a97f7acfe8a29f938384c8" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-runtime" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b13dc54b4b49f8288532334bba8f87386a40571c47c37b1304979b556dc613c8" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand 2.0.1", + "http 0.2.11", + "http-body", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid 1.6.1", +] + +[[package]] +name = "aws-sdk-dynamodb" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc684cff4b0b7b156ad61aedd1766167af2aa0c4f65d6e8a04e75e726539c322" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand 2.0.1", + "http 0.2.11", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5cc34f5925899739a3f125bd3f7d37d081234a3df218feb9c9d337fd4c70e72" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "http 0.2.11", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7327cddd32b1a6f2aaeaadb1336b671a7975e96a999d3b1bcf5aa47932dc6ddb" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "http 0.2.11", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c11981cdb80e8e205e22beb6630a8bdec380a1256bd29efaab34aaebd07cfb9" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "http 0.2.11", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d6f29688a4be9895c0ba8bef861ad0c0dac5c15e9618b9b7a6c233990fc263" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac 0.12.1", + "http 0.2.11", + "http 1.1.0", + "once_cell", + "percent-encoding", + "sha2 0.10.8", + "time 0.3.31", + "tracing", +] + +[[package]] +name = "aws-smithy-async" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26ea8fa03025b2face2b3038a63525a10891e3d8829901d502e5384a0d8cd46" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-http" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f10fa66956f01540051b0aa7ad54574640f748f9839e843442d99b970d3aff9" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.11", + "http-body", + "once_cell", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec81002d883e5a7fd2bb063d6fb51c4999eb55d404f4fff3dd878bf4733b9f01" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand 2.0.1", + "h2", + "http 0.2.11", + "http-body", + "hyper", + "hyper-rustls 0.24.2", + "once_cell", + "pin-project-lite", + "pin-utils", + "rustls 0.21.10", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9acb931e0adaf5132de878f1398d83f8677f90ba70f01f65ff87f6d7244be1c5" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.11", + "http 1.1.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe14dceea1e70101d38fbf2a99e6a34159477c0fb95e68e05c66bd7ae4c3729" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.11", + "http-body", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time 0.3.31", + "tokio", + "tokio-util 0.7.10", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "872c68cf019c0e4afc5de7753c4f7288ce4b71663212771bf5e4542eb9346ca9" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dbf2f3da841a8930f159163175cf6a3d16ddde517c1b0fba7aa776822800f40" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "http 0.2.11", + "rustc_version", + "tracing", +] + [[package]] name = "azure_core" version = "0.18.0" @@ -560,6 +915,16 @@ version = "0.21.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -572,6 +937,28 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest 0.10.7", +] + +[[package]] +name = "blake3" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if 1.0.0", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.9.0" @@ -635,6 +1022,37 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "cc" version = "1.0.83" @@ -659,9 +1077,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" dependencies = [ "android-tzdata", "iana-time-zone", @@ -669,7 +1087,29 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.48.5", + "windows-targets 0.52.0", +] + +[[package]] +name = "chrono-tz" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf 0.11.2", +] + +[[package]] +name = "chrono-tz-build" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +dependencies = [ + "parse-zoneinfo", + "phf 0.11.2", + "phf_codegen 0.11.2", ] [[package]] @@ -714,6 +1154,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "comfy-table" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" +dependencies = [ + "strum 0.25.0", + "strum_macros 0.25.3", + "unicode-width", +] + [[package]] name = "concurrent-queue" version = "2.4.0" @@ -743,6 +1194,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "core-foundation" version = "0.9.4" @@ -846,34 +1303,251 @@ dependencies = [ ] [[package]] -name = "csv-core" -version = "0.1.11" +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "ct-logs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1a816186fa68d9e426e3cb4ae4dff1fcd8e4a2c34b781bf7a822574a0d0aac8" +dependencies = [ + "sct 0.6.1", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if 1.0.0", + "hashbrown", + "lock_api 0.4.11", + "once_cell", + "parking_lot_core 0.9.9", +] + +[[package]] +name = "datafusion" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4328f5467f76d890fe3f924362dbc3a838c6a733f762b32d87f9e0b7bef5fb49" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "half", + "hashbrown", + "indexmap", + "itertools 0.12.0", + "log", + "num_cpus", + "object_store", + "parking_lot 0.12.1", + "parquet", + "pin-project-lite", + "rand 0.8.5", + "sqlparser", + "tempfile", + "tokio", + "tokio-util 0.7.10", + "url", + "uuid 1.6.1", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-common" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29a7752143b446db4a2cccd9a6517293c6b97e8c39e520ca43ccd07135a4f7e" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "libc", + "num_cpus", + "object_store", + "parquet", + "sqlparser", +] + +[[package]] +name = "datafusion-execution" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d447650af16e138c31237f53ddaef6dd4f92f0e2d3f2f35d190e16c214ca496" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown", + "log", + "object_store", + "parking_lot 0.12.1", + "rand 0.8.5", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8d19598e48a498850fb79f97a9719b1f95e7deb64a7a06f93f313e8fa1d524b" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "datafusion-common", + "paste", + "sqlparser", + "strum 0.25.0", + "strum_macros 0.25.3", +] + +[[package]] +name = "datafusion-optimizer" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b7feb0391f1fc75575acb95b74bfd276903dc37a5409fcebe160bc7ddff2010" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown", + "itertools 0.12.0", + "log", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e911bca609c89a54e8f014777449d8290327414d3e10c57a3e3c2122e38878d0" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "base64 0.21.5", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-expr", + "half", + "hashbrown", + "hex", + "indexmap", + "itertools 0.12.0", + "log", + "md-5 0.10.6", + "paste", + "petgraph", + "rand 0.8.5", + "regex", + "sha2 0.10.8", + "unicode-segmentation", + "uuid 1.6.1", +] + +[[package]] +name = "datafusion-physical-plan" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +checksum = "e96b546b8a02e9c2ab35ac6420d511f12a4701950c1eb2e568c122b4fefb0be3" dependencies = [ - "memchr", + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "futures", + "half", + "hashbrown", + "indexmap", + "itertools 0.12.0", + "log", + "once_cell", + "parking_lot 0.12.1", + "pin-project-lite", + "rand 0.8.5", + "tokio", + "uuid 1.6.1", ] [[package]] -name = "ct-logs" -version = "0.8.0" +name = "datafusion-proto" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1a816186fa68d9e426e3cb4ae4dff1fcd8e4a2c34b781bf7a822574a0d0aac8" +checksum = "5742f993d1812d6bb3cdc4ce2a0aa99e10f6dc0daa11dd69b0ff57f2d8e7518c" dependencies = [ - "sct 0.6.1", + "arrow", + "chrono", + "datafusion", + "datafusion-common", + "datafusion-expr", + "object_store", + "prost", ] [[package]] -name = "dashmap" -version = "5.5.3" +name = "datafusion-sql" +version = "35.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +checksum = "2d18d36f260bbbd63aafdb55339213a23d540d3419810575850ef0a798a6b768" dependencies = [ - "cfg-if 1.0.0", - "hashbrown", - "lock_api 0.4.11", - "once_cell", - "parking_lot_core 0.9.9", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "log", + "sqlparser", ] [[package]] @@ -889,9 +1563,14 @@ dependencies = [ [[package]] name = "deltalake-aws" version = "0.1.0" -source = "git+https://github.com/delta-io/delta-rs?branch=main#9264edea89a2fc1c35f4a6b9faab125748ff3651" +source = "git+https://github.com/delta-io/delta-rs?branch=main#abafd2d0cb8dde32ffa990dc30fb97a5581688ec" dependencies = [ "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-dynamodb", + "aws-sdk-sts", + "aws-smithy-runtime-api", "backoff", "bytes", "deltalake-core", @@ -900,10 +1579,6 @@ dependencies = [ "maplit", "object_store", "regex", - "rusoto_core", - "rusoto_credential", - "rusoto_dynamodb", - "rusoto_sts", "thiserror", "tokio", "tracing", @@ -914,7 +1589,7 @@ dependencies = [ [[package]] name = "deltalake-azure" version = "0.1.0" -source = "git+https://github.com/delta-io/delta-rs?branch=main#9264edea89a2fc1c35f4a6b9faab125748ff3651" +source = "git+https://github.com/delta-io/delta-rs?branch=main#abafd2d0cb8dde32ffa990dc30fb97a5581688ec" dependencies = [ "async-trait", "bytes", @@ -931,14 +1606,16 @@ dependencies = [ [[package]] name = "deltalake-core" -version = "0.17.0" -source = "git+https://github.com/delta-io/delta-rs?branch=main#9264edea89a2fc1c35f4a6b9faab125748ff3651" +version = "0.17.1" +source = "git+https://github.com/delta-io/delta-rs?branch=main#abafd2d0cb8dde32ffa990dc30fb97a5581688ec" dependencies = [ "arrow", "arrow-arith", "arrow-array", "arrow-buffer", "arrow-cast", + "arrow-ipc", + "arrow-json", "arrow-ord", "arrow-row", "arrow-schema", @@ -948,10 +1625,18 @@ dependencies = [ "cfg-if 1.0.0", "chrono", "dashmap", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-proto", + "datafusion-sql", "either", "errno", "fix-hidden-lifetime-bug", "futures", + "hashbrown", + "indexmap", "itertools 0.12.0", "lazy_static", "libc", @@ -964,11 +1649,13 @@ dependencies = [ "parking_lot 0.12.1", "parquet", "percent-encoding", + "pin-project-lite", "rand 0.8.5", "regex", "roaring", "serde", "serde_json", + "sqlparser", "thiserror", "tokio", "tracing", @@ -1184,6 +1871,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "flatbuffers" version = "23.5.26" @@ -1378,6 +2071,12 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "h2" version = "0.3.22" @@ -1389,7 +2088,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.11", "indexmap", "slab", "tokio", @@ -1413,6 +2112,10 @@ name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "heck" @@ -1482,6 +2185,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http-body" version = "0.4.6" @@ -1489,7 +2203,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http", + "http 0.2.11", "pin-project-lite", ] @@ -1542,7 +2256,7 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", + "http 0.2.11", "http-body", "httparse", "httpdate", @@ -1566,7 +2280,7 @@ dependencies = [ "hyper", "log", "rustls 0.19.1", - "rustls-native-certs", + "rustls-native-certs 0.5.0", "tokio", "tokio-rustls 0.22.0", "webpki", @@ -1579,9 +2293,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", - "http", + "http 0.2.11", "hyper", + "log", "rustls 0.21.10", + "rustls-native-certs 0.6.3", "tokio", "tokio-rustls 0.24.1", ] @@ -1634,9 +2350,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.1.0" +version = "2.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" dependencies = [ "equivalent", "hashbrown", @@ -1965,6 +2681,17 @@ dependencies = [ "twox-hash", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "maplit" version = "1.0.2" @@ -1994,6 +2721,16 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if 1.0.0", + "digest 0.10.7", +] + [[package]] name = "memchr" version = "2.7.1" @@ -2215,9 +2952,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050" +checksum = "d139f545f64630e2e3688fd9f81c470888ab01edeb72d13b4e86c566f1130000" dependencies = [ "async-trait", "base64 0.21.5", @@ -2226,7 +2963,7 @@ dependencies = [ "futures", "humantime", "hyper", - "itertools 0.11.0", + "itertools 0.12.0", "parking_lot 0.12.1", "percent-encoding", "quick-xml", @@ -2317,6 +3054,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "outref" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" + [[package]] name = "parking" version = "2.2.0" @@ -2372,9 +3115,9 @@ dependencies = [ [[package]] name = "parquet" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4" +checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" dependencies = [ "ahash", "arrow-array", @@ -2390,6 +3133,7 @@ dependencies = [ "chrono", "flate2", "futures", + "half", "hashbrown", "lz4_flex", "num 0.4.1", @@ -2405,6 +3149,15 @@ dependencies = [ "zstd", ] +[[package]] +name = "parse-zoneinfo" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" +dependencies = [ + "regex", +] + [[package]] name = "paste" version = "1.0.14" @@ -2417,13 +3170,32 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "petgraph" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "phf" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" dependencies = [ - "phf_shared", + "phf_shared 0.8.0", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared 0.11.2", ] [[package]] @@ -2432,8 +3204,18 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", ] [[package]] @@ -2442,10 +3224,20 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" dependencies = [ - "phf_shared", + "phf_shared 0.8.0", "rand 0.7.3", ] +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand 0.8.5", +] + [[package]] name = "phf_shared" version = "0.8.0" @@ -2455,6 +3247,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.3" @@ -2524,6 +3325,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" +dependencies = [ + "anyhow", + "itertools 0.11.0", + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "psl" version = "2.1.13" @@ -2724,6 +3548,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -2742,7 +3572,7 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", + "http 0.2.11", "http-body", "hyper", "hyper-rustls 0.24.2", @@ -2756,6 +3586,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls 0.21.10", + "rustls-native-certs 0.6.3", "rustls-pemfile", "serde", "serde_json", @@ -2771,7 +3602,6 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots", "winreg", ] @@ -2838,7 +3668,7 @@ dependencies = [ "bytes", "crc32fast", "futures", - "http", + "http 0.2.11", "hyper", "hyper-rustls 0.22.1", "lazy_static", @@ -2910,10 +3740,10 @@ dependencies = [ "futures", "hex", "hmac 0.11.0", - "http", + "http 0.2.11", "hyper", "log", - "md-5", + "md-5 0.9.1", "percent-encoding", "pin-project-lite", "rusoto_credential", @@ -2923,21 +3753,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "rusoto_sts" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7edd42473ac006fd54105f619e480b0a94136e7f53cf3fb73541363678fd92" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "rusoto_core", - "serde_urlencoded", - "xml-rs", -] - [[package]] name = "rustc-demangle" version = "0.1.23" @@ -3003,6 +3818,18 @@ dependencies = [ "security-framework", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -3411,6 +4238,27 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "sqlparser" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -3435,6 +4283,15 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.3", +] + [[package]] name = "strum_macros" version = "0.20.1" @@ -3460,6 +4317,19 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.48", +] + [[package]] name = "subtle" version = "2.4.1" @@ -3794,7 +4664,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if 0.1.10", "static_assertions", ] @@ -3851,6 +4721,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + [[package]] name = "untrusted" version = "0.7.1" @@ -3884,6 +4760,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8parse" version = "0.2.1" @@ -3932,8 +4814,8 @@ dependencies = [ "json-pointer", "jsonway", "percent-encoding", - "phf", - "phf_codegen", + "phf 0.8.0", + "phf_codegen 0.8.0", "regex", "serde", "serde_json", @@ -3954,6 +4836,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "waker-fn" version = "1.1.1" @@ -4096,12 +4984,6 @@ dependencies = [ "untrusted 0.7.1", ] -[[package]] -name = "webpki-roots" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" - [[package]] name = "winapi" version = "0.3.9" @@ -4299,6 +5181,21 @@ version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fcb9cbac069e033553e8bb871be2fbdffcab578eb25bd0f7c508cedc6dcd75a" +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "z85" version = "3.0.5" diff --git a/Cargo.toml b/Cargo.toml index 978f3a8..b3e6f55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,8 @@ uuid = { version = "0.8", features = ["serde", "v4"] } url = "2.3" #deltalake = { version = "0.16.5", features = ["arrow", "json", "parquet"], optional = true } -deltalake-core = { git = "https://github.com/delta-io/delta-rs", branch = "main", features = ["json"]} +# datafusion feature is required for writer version 2 +deltalake-core = { git = "https://github.com/delta-io/delta-rs", branch = "main", features = ["json", "datafusion"]} deltalake-aws = { git = "https://github.com/delta-io/delta-rs", branch = "main", optional = true } deltalake-azure = { git = "https://github.com/delta-io/delta-rs", branch = "main", optional = true } diff --git a/src/lib.rs b/src/lib.rs index f3111aa..ff4cfa7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ extern crate strum_macros; extern crate serde_json; use coercions::CoercionTree; +use deltalake_core::operations::transaction::TableReference; use deltalake_core::protocol::DeltaOperation; use deltalake_core::protocol::OutputMode; use deltalake_core::{DeltaTable, DeltaTableError}; @@ -945,10 +946,7 @@ impl IngestProcessor { return Err(IngestError::ConflictingOffsets); } - if self - .delta_writer - .update_schema(self.table.state.delta_metadata().unwrap())? - { + if self.delta_writer.update_schema(&self.table)? { info!("Table schema has been updated"); // Update the coercion tree to reflect the new schema let coercion_tree = coercions::create_coercion_tree(self.table.schema().unwrap()); @@ -965,19 +963,20 @@ impl IngestProcessor { .duration_since(std::time::UNIX_EPOCH) .expect("Time went backwards") .as_millis() as i64; - match deltalake_core::operations::transaction::commit( - self.table.log_store().clone().as_ref(), - &actions, - DeltaOperation::StreamingUpdate { - output_mode: OutputMode::Append, - query_id: self.opts.app_id.clone(), - epoch_id, - }, - &self.table.state, - None, - ) - .await - { + let commit = deltalake_core::operations::transaction::CommitBuilder::default() + .with_actions(actions.clone()) + .build( + self.table.state.as_ref().map(|s| s as &dyn TableReference), + self.table.log_store().clone(), + DeltaOperation::StreamingUpdate { + output_mode: OutputMode::Append, + query_id: self.opts.app_id.clone(), + epoch_id, + }, + ) + .map_err(DeltaTableError::from)? + .await; + match commit { Ok(v) => { /*if v != version { return Err(IngestError::UnexpectedVersionMismatch { @@ -990,7 +989,7 @@ impl IngestProcessor { self.delta_partition_offsets.insert(*p, Some(*o)); } if self.opts.write_checkpoints { - try_create_checkpoint(&mut self.table, v).await?; + try_create_checkpoint(&mut self.table, v.version).await?; } record_write_lag( self.topic.as_str(), @@ -998,7 +997,7 @@ impl IngestProcessor { &partition_offsets, &self.ingest_metrics, )?; - return Ok(v); + return Ok(v.version); } Err(e) => match e { DeltaTableError::VersionAlreadyExists(_) => { diff --git a/src/offsets.rs b/src/offsets.rs index 97adbe0..66d8c6c 100644 --- a/src/offsets.rs +++ b/src/offsets.rs @@ -1,6 +1,7 @@ use crate::delta_helpers::*; use crate::{DataTypeOffset, DataTypePartition}; use deltalake_core::kernel::Action; +use deltalake_core::operations::transaction::TableReference; use deltalake_core::protocol::DeltaOperation; use deltalake_core::protocol::OutputMode; use deltalake_core::{DeltaTable, DeltaTableError}; @@ -115,23 +116,24 @@ async fn commit_partition_offsets( .as_millis() as i64; table.update().await?; - match deltalake_core::operations::transaction::commit( - table.log_store().clone().as_ref(), - &actions, - DeltaOperation::StreamingUpdate { - output_mode: OutputMode::Complete, - query_id: app_id, - epoch_id, - }, - &table.state, - None, - ) - .await - { + let commit = deltalake_core::operations::transaction::CommitBuilder::default() + .with_actions(actions) + .build( + table.state.as_ref().map(|s| s as &dyn TableReference), + table.log_store().clone(), + DeltaOperation::StreamingUpdate { + output_mode: OutputMode::Complete, + query_id: app_id, + epoch_id, + }, + ) + .map_err(DeltaTableError::from)? + .await; + match commit { Ok(v) => { info!( "Delta version {} completed with new txn offsets {}.", - v, offsets_as_str + v.version, offsets_as_str ); Ok(()) } diff --git a/src/writer.rs b/src/writer.rs index bcb7657..61ee078 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -12,7 +12,6 @@ use deltalake_core::arrow::{ json::reader::ReaderBuilder, record_batch::*, }; -use deltalake_core::parquet::format::FileMetaData; use deltalake_core::parquet::{ arrow::ArrowWriter, basic::{Compression, LogicalType}, @@ -27,9 +26,9 @@ use deltalake_core::{ kernel::{Action, Add, Schema}, protocol::{ColumnCountStat, ColumnValueStat, Stats}, storage::ObjectStoreRef, - table::DeltaTableMetaData, DeltaTable, DeltaTableError, ObjectStoreError, }; +use deltalake_core::{operations::transaction::TableReference, parquet::format::FileMetaData}; use log::{error, info, warn}; use serde_json::{Number, Value}; use std::collections::HashMap; @@ -367,11 +366,10 @@ impl DataWriter { /// Retrieves the latest schema from table, compares to the current and updates if changed. /// When schema is updated then `true` is returned which signals the caller that parquet /// created file or arrow batch should be revisited. - pub fn update_schema( - &mut self, - metadata: &DeltaTableMetaData, - ) -> Result> { - let schema: ArrowSchema = >::try_from(&metadata.schema)?; + pub fn update_schema(&mut self, table: &DeltaTable) -> Result> { + let metadata = table.metadata().unwrap(); + let schema: ArrowSchema = + >::try_from(&table.schema().unwrap())?; let schema_updated = self.arrow_schema_ref.as_ref() != &schema || self.partition_columns != metadata.partition_columns; @@ -585,20 +583,20 @@ impl DataWriter { self.write(values).await?; let mut adds = self.write_parquet_files(&table.table_uri()).await?; let actions = adds.drain(..).map(Action::Add).collect(); - let version = deltalake_core::operations::transaction::commit( - table.log_store().clone().as_ref(), - &actions, - DeltaOperation::Write { - mode: SaveMode::Append, - partition_by: Some(self.partition_columns.clone()), - predicate: None, - }, - &table.state, - None, - ) - .await?; - - Ok(version) + let commit = deltalake_core::operations::transaction::CommitBuilder::default() + .with_actions(actions) + .build( + table.state.as_ref().map(|s| s as &dyn TableReference), + table.log_store().clone(), + DeltaOperation::Write { + mode: SaveMode::Append, + partition_by: Some(self.partition_columns.clone()), + predicate: None, + }, + ) + .map_err(DeltaTableError::from)? + .await?; + Ok(commit.version) } } @@ -1058,7 +1056,6 @@ fn create_add( path, size, partition_values: partition_values.to_owned(), - partition_values_parsed: None, modification_time, data_change: true, stats: Some(stats_string), @@ -1244,12 +1241,12 @@ mod tests { let timestamp = producer.get("timestamp").unwrap().as_value().unwrap(); assert_eq!(0, timestamp); } - ("some_int", ColumnCountStat::Value(v)) => assert_eq!(100, *v), - ("some_bool", ColumnCountStat::Value(v)) => assert_eq!(100, *v), - ("some_string", ColumnCountStat::Value(v)) => assert_eq!(100, *v), - ("some_list", ColumnCountStat::Value(v)) => assert_eq!(100, *v), - ("some_nested_list", ColumnCountStat::Value(v)) => assert_eq!(0, *v), - ("date", ColumnCountStat::Value(v)) => assert_eq!(0, *v), + ("some_int", ColumnCountStat::Value(v)) => assert_eq!(100, v), + ("some_bool", ColumnCountStat::Value(v)) => assert_eq!(100, v), + ("some_string", ColumnCountStat::Value(v)) => assert_eq!(100, v), + ("some_list", ColumnCountStat::Value(v)) => assert_eq!(100, v), + ("some_nested_list", ColumnCountStat::Value(v)) => assert_eq!(0, v), + ("date", ColumnCountStat::Value(v)) => assert_eq!(0, v), _ => assert!(false, "Key should not be present"), } } diff --git a/tests/delta_partitions_tests.rs b/tests/delta_partitions_tests.rs index 0e872a2..f262ba7 100644 --- a/tests/delta_partitions_tests.rs +++ b/tests/delta_partitions_tests.rs @@ -2,7 +2,9 @@ mod helpers; use deltalake_core::kernel::{Action, Add}; +use deltalake_core::operations::transaction::TableReference; use deltalake_core::protocol::{DeltaOperation, SaveMode}; +use deltalake_core::DeltaTableError; use kafka_delta_ingest::writer::*; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; @@ -102,15 +104,18 @@ async fn test_delta_partitions() { predicate: None, }; - let version = deltalake_core::operations::transaction::commit( - table.log_store().clone().as_ref(), - &result.iter().cloned().map(Action::Add).collect(), - operation, - &table.state, - None, - ) - .await - .expect("Failed to create transaction"); + let version = deltalake_core::operations::transaction::CommitBuilder::default() + .with_actions(result.iter().cloned().map(Action::Add).collect()) + .build( + table.state.as_ref().map(|s| s as &dyn TableReference), + table.log_store().clone(), + operation, + ) + .map_err(DeltaTableError::from) + .unwrap() + .await + .expect("Failed to create transaction") + .version; deltalake_core::checkpoints::create_checkpoint(&table) .await diff --git a/tests/helpers/mod.rs b/tests/helpers/mod.rs index 737b6fc..b3deebd 100644 --- a/tests/helpers/mod.rs +++ b/tests/helpers/mod.rs @@ -110,7 +110,7 @@ pub async fn send_bytes(producer: &FutureProducer, topic: &str, bytes: &Vec) // TODO Research whether it's possible to read parquet data from bytes but not from file pub async fn read_files_from_store(table: &DeltaTable) -> Vec { let s3 = table.object_store().clone(); - let paths = table.get_files_iter(); + let paths = table.get_files_iter().unwrap(); let tmp = format!(".test-{}.tmp", Uuid::new_v4()); let mut list = Vec::new(); @@ -423,7 +423,7 @@ pub async fn read_table_content_at_version_as_jsons(table_uri: &str, version: i6 async fn json_listify_table_content(table: DeltaTable, store: ObjectStoreRef) -> Vec { let tmp = format!(".test-{}.tmp", Uuid::new_v4()); let mut list = Vec::new(); - for file in table.get_files_iter() { + for file in table.get_files_iter().unwrap() { let get_result = store.get(&file).await.unwrap(); let bytes = get_result.bytes().await.unwrap(); let mut file = File::create(&tmp).unwrap();