diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleMerger.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleMerger.hxx index 74f5c5165d7bf..934b900ec62c6 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleMerger.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleMerger.hxx @@ -86,6 +86,7 @@ struct RNTupleMergeOptions { */ // clang-format on class RNTupleMerger final { + std::unique_ptr fDestination; std::unique_ptr fPageAlloc; std::optional fTaskGroup; @@ -97,11 +98,11 @@ class RNTupleMerger final { std::span extraDstColumns, RNTupleMergeData &mergeData); public: - RNTupleMerger(); + /// Creates a RNTupleMerger with the given destination. + explicit RNTupleMerger(std::unique_ptr destination); /// Merge a given set of sources into the destination. - RResult Merge(std::span sources, RPageSink &destination, - const RNTupleMergeOptions &mergeOpts = RNTupleMergeOptions()); + RResult Merge(std::span sources, const RNTupleMergeOptions &mergeOpts = RNTupleMergeOptions()); }; // end of class RNTupleMerger diff --git a/tree/ntuple/v7/src/RNTupleMerger.cxx b/tree/ntuple/v7/src/RNTupleMerger.cxx index 0852e27bf1767..48a8ab1db6ef2 100644 --- a/tree/ntuple/v7/src/RNTupleMerger.cxx +++ b/tree/ntuple/v7/src/RNTupleMerger.cxx @@ -215,7 +215,7 @@ try { } // Now merge - RNTupleMerger merger; + RNTupleMerger merger{std::move(destination)}; RNTupleMergeOptions mergerOpts; mergerOpts.fCompressionSettings = *compression; mergerOpts.fExtraVerbose = extraVerbose; @@ -225,7 +225,7 @@ try { if (auto errBehavior = ParseOptionErrBehavior(mergeInfo->fOptions)) { mergerOpts.fErrBehavior = *errBehavior; } - merger.Merge(sourcePtrs, *destination, mergerOpts).ThrowOnError(); + merger.Merge(sourcePtrs, mergerOpts).ThrowOnError(); // Provide the caller with a merged anchor object (even though we've already // written it). @@ -928,23 +928,28 @@ GatherColumnInfos(const RDescriptorsComparison &descCmp, const RNTupleDescriptor return res; } -RNTupleMerger::RNTupleMerger() +RNTupleMerger::RNTupleMerger(std::unique_ptr destination) // TODO(gparolini): consider using an arena allocator instead, since we know the precise lifetime // of the RNTuples we are going to handle (e.g. we can reset the arena at every source) - : fPageAlloc(std::make_unique()) + : fDestination(std::move(destination)), fPageAlloc(std::make_unique()) { + R__ASSERT(fDestination); + #ifdef R__USE_IMT if (ROOT::IsImplicitMTEnabled()) fTaskGroup = TTaskGroup(); #endif } -ROOT::RResult -RNTupleMerger::Merge(std::span sources, RPageSink &destination, const RNTupleMergeOptions &mergeOptsIn) +ROOT::RResult RNTupleMerger::Merge(std::span sources, const RNTupleMergeOptions &mergeOptsIn) { RNTupleMergeOptions mergeOpts = mergeOptsIn; + + assert(fDestination); + + // Set compression settings if unset and verify it's compatible with the sink { - const auto dstCompSettings = destination.GetWriteOptions().GetCompression(); + const auto dstCompSettings = fDestination->GetWriteOptions().GetCompression(); if (!mergeOpts.fCompressionSettings) { mergeOpts.fCompressionSettings = dstCompSettings; } else if (*mergeOpts.fCompressionSettings != dstCompSettings) { @@ -955,7 +960,7 @@ RNTupleMerger::Merge(std::span sources, RPageSink &destination, c } } - RNTupleMergeData mergeData{sources, destination, mergeOpts}; + RNTupleMergeData mergeData{sources, *fDestination, mergeOpts}; std::unique_ptr model; // used to initialize the schema of the output RNTuple @@ -976,20 +981,20 @@ RNTupleMerger::Merge(std::span sources, RPageSink &destination, c mergeData.fSrcDescriptor = &srcDescriptor.GetRef(); // Create sink from the input model if not initialized - if (!destination.IsInitialized()) { + if (!fDestination->IsInitialized()) { auto opts = RNTupleDescriptor::RCreateModelOptions(); opts.fReconstructProjections = true; model = srcDescriptor->CreateModel(opts); - destination.Init(*model); + fDestination->Init(*model); } for (const auto &extraTypeInfoDesc : srcDescriptor->GetExtraTypeInfoIterable()) - destination.UpdateExtraTypeInfo(extraTypeInfoDesc); + fDestination->UpdateExtraTypeInfo(extraTypeInfoDesc); auto descCmpRes = CompareDescriptorStructure(mergeData.fDstDescriptor, srcDescriptor.GetRef()); if (!descCmpRes) { SKIP_OR_ABORT( - std::string("Source RNTuple will be skipped due to incompatible schema with the destination:\n") + + std::string("Source RNTuple will be skipped due to incompatible schema with the fDestination:\n") + descCmpRes.GetError()->GetReport()); } auto descCmp = descCmpRes.Unwrap(); @@ -1011,7 +1016,7 @@ RNTupleMerger::Merge(std::span sources, RPageSink &destination, c ExtendDestinationModel(descCmp.fExtraSrcFields, *model, mergeData, descCmp.fCommonFields); } else if (mergeOpts.fMergingMode == ENTupleMergingMode::kStrict) { // If the current source has extra fields and we're in Strict mode, error - std::string msg = "Source RNTuple has extra fields that the destination RNTuple doesn't have:"; + std::string msg = "Source RNTuple has extra fields that the fDestination RNTuple doesn't have:"; for (const auto *field : descCmp.fExtraSrcFields) { msg += "\n " + field->GetFieldName() + " : " + field->GetTypeName(); } @@ -1025,8 +1030,8 @@ RNTupleMerger::Merge(std::span sources, RPageSink &destination, c } // end loop over sources // Commit the output - destination.CommitClusterGroup(); - destination.CommitDataset(); + fDestination->CommitClusterGroup(); + fDestination->CommitDataset(); return RResult::Success(); } diff --git a/tree/ntuple/v7/test/ntuple_checksum.cxx b/tree/ntuple/v7/test/ntuple_checksum.cxx index c469aa4b63d7c..78294a0033973 100644 --- a/tree/ntuple/v7/test/ntuple_checksum.cxx +++ b/tree/ntuple/v7/test/ntuple_checksum.cxx @@ -147,9 +147,9 @@ TEST(RNTupleChecksum, Merge) } auto destination = std::make_unique("ntpl", fileGuard3.GetPath(), options); - RNTupleMerger merger; + RNTupleMerger merger{std::move(destination)}; try { - merger.Merge(sourcePtrs, *destination); + merger.Merge(sourcePtrs); FAIL() << "merging should fail due to checksum error"; } catch (const ROOT::RException &e) { EXPECT_THAT(e.what(), testing::HasSubstr("page checksum")); diff --git a/tree/ntuple/v7/test/ntuple_merger.cxx b/tree/ntuple/v7/test/ntuple_merger.cxx index 669c8c5c69e8a..948a09506d0f6 100644 --- a/tree/ntuple/v7/test/ntuple_merger.cxx +++ b/tree/ntuple/v7/test/ntuple_merger.cxx @@ -133,24 +133,26 @@ TEST(RNTupleMerger, MergeSymmetric) } // Now Merge the inputs - RNTupleMerger merger; RNTupleMergeOptions opts; { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kFilter; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kUnion; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kStrict; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } } @@ -238,12 +240,12 @@ TEST(RNTupleMerger, MergeAsymmetric1) // Now Merge the inputs // We expect this to fail in Filter and Strict mode since the fields between the sources do NOT match - RNTupleMerger merger; RNTupleMergeOptions opts; { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kFilter; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_FALSE(res); if (res.GetError()) { EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("missing the following field")); @@ -252,7 +254,8 @@ TEST(RNTupleMerger, MergeAsymmetric1) { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kStrict; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_FALSE(res); if (res.GetError()) { EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("missing the following field")); @@ -261,7 +264,8 @@ TEST(RNTupleMerger, MergeAsymmetric1) { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kUnion; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } } @@ -308,12 +312,12 @@ TEST(RNTupleMerger, MergeAsymmetric2) // Now Merge the inputs // We expect this to fail in Filter and Strict mode since the fields between the sources do NOT match - RNTupleMerger merger; RNTupleMergeOptions opts; { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kFilter; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_FALSE(res); if (res.GetError()) { EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("missing the following field")); @@ -322,7 +326,8 @@ TEST(RNTupleMerger, MergeAsymmetric2) { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kStrict; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_FALSE(res); if (res.GetError()) { EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("missing the following field")); @@ -331,7 +336,8 @@ TEST(RNTupleMerger, MergeAsymmetric2) { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kUnion; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } } @@ -378,12 +384,12 @@ TEST(RNTupleMerger, MergeAsymmetric3) // Now Merge the inputs // We expect this to succeed except in all modes except Strict. - RNTupleMerger merger; RNTupleMergeOptions opts; { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kStrict; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_FALSE(res); if (res.GetError()) { EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("Source RNTuple has extra fields")); @@ -392,13 +398,15 @@ TEST(RNTupleMerger, MergeAsymmetric3) { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kFilter; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } { auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); opts.fMergingMode = ENTupleMergingMode::kUnion; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } } @@ -463,10 +471,10 @@ TEST(RNTupleMerger, MergeVector) auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), opts); // Now Merge the inputs - RNTupleMerger merger; RNTupleMergeOptions mopts; mopts.fMergingMode = mmode; - auto res = merger.Merge(sourcePtrs, *destination, mopts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, mopts); EXPECT_TRUE(bool(res)); } @@ -558,14 +566,14 @@ TEST(RNTupleMerger, MergeInconsistentTypes) // Create the output auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), RNTupleWriteOptions()); + RNTupleMerger merger{std::move(destination)}; // Now Merge the inputs // We expect this to fail since the fields between the sources do NOT match for (const auto mmode : {ENTupleMergingMode::kFilter, ENTupleMergingMode::kStrict, ENTupleMergingMode::kUnion}) { - RNTupleMerger merger; RNTupleMergeOptions opts; opts.fMergingMode = mmode; - auto res = merger.Merge(sourcePtrs, *destination, opts); + auto res = merger.Merge(sourcePtrs, opts); EXPECT_FALSE(res); if (res.GetError()) { EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("type incompatible")); @@ -877,19 +885,30 @@ TEST(RNTupleMerger, ChangeCompression) auto destinationUncomp = std::make_unique("ntuple", fileGuardOutUncomp.GetPath(), writeOpts); writeOpts.SetEnablePageChecksums(false); - RNTupleMerger merger; auto opts = RNTupleMergeOptions{}; opts.fCompressionSettings = kNewComp; - // This should fail because we specified a different compression than the sink - auto res = merger.Merge(sourcePtrs, *destinationDifferentComp, opts); - EXPECT_FALSE(bool(res)); - res = merger.Merge(sourcePtrs, *destinationChecksum, opts); - EXPECT_TRUE(bool(res)); - res = merger.Merge(sourcePtrs, *destinationNoChecksum, opts); - EXPECT_TRUE(bool(res)); - opts.fCompressionSettings = 0; - res = merger.Merge(sourcePtrs, *destinationUncomp, opts); - EXPECT_TRUE(bool(res)); + { + RNTupleMerger merger{std::move(destinationDifferentComp)}; + // This should fail because we specified a different compression than the sink + auto res = merger.Merge(sourcePtrs, opts); + EXPECT_FALSE(bool(res)); + } + { + RNTupleMerger merger{std::move(destinationChecksum)}; + auto res = merger.Merge(sourcePtrs, opts); + EXPECT_TRUE(bool(res)); + } + { + RNTupleMerger merger{std::move(destinationNoChecksum)}; + auto res = merger.Merge(sourcePtrs, opts); + EXPECT_TRUE(bool(res)); + } + { + opts.fCompressionSettings = 0; + RNTupleMerger merger{std::move(destinationUncomp)}; + auto res = merger.Merge(sourcePtrs, opts); + EXPECT_TRUE(bool(res)); + } } // Check that compression is the right one @@ -940,18 +959,29 @@ TEST(RNTupleMerger, ChangeCompressionMixed) auto destinationUncomp = std::make_unique("ntuple", fileGuardOutUncomp.GetPath(), writeOpts); writeOpts.SetEnablePageChecksums(false); - RNTupleMerger merger; auto opts = RNTupleMergeOptions{}; - auto res = merger.Merge(sourcePtrs, *destinationChecksum, opts); - EXPECT_TRUE(bool(res)); - res = merger.Merge(sourcePtrs, *destinationNoChecksum, opts); - EXPECT_TRUE(bool(res)); - opts.fCompressionSettings = 101; - res = merger.Merge(sourcePtrs, *destinationDifferentComp, opts); - EXPECT_TRUE(bool(res)); - opts.fCompressionSettings = 0; - res = merger.Merge(sourcePtrs, *destinationUncomp, opts); - EXPECT_TRUE(bool(res)); + { + RNTupleMerger merger{std::move(destinationChecksum)}; + auto res = merger.Merge(sourcePtrs, opts); + EXPECT_TRUE(bool(res)); + } + { + RNTupleMerger merger{std::move(destinationNoChecksum)}; + auto res = merger.Merge(sourcePtrs, opts); + EXPECT_TRUE(bool(res)); + } + { + opts.fCompressionSettings = 101; + RNTupleMerger merger{std::move(destinationDifferentComp)}; + auto res = merger.Merge(sourcePtrs, opts); + EXPECT_TRUE(bool(res)); + } + { + opts.fCompressionSettings = 0; + RNTupleMerger merger{std::move(destinationUncomp)}; + auto res = merger.Merge(sourcePtrs, opts); + EXPECT_TRUE(bool(res)); + } } // Check that compression is the right one @@ -1015,11 +1045,11 @@ TEST(RNTupleMerger, MergeLateModelExtension) auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), wopts); // Now Merge the inputs - RNTupleMerger merger; auto opts = RNTupleMergeOptions{}; opts.fCompressionSettings = 0; opts.fMergingMode = ENTupleMergingMode::kUnion; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } @@ -1094,7 +1124,6 @@ TEST(RNTupleMerger, MergeCompression) } // Now Merge the inputs - RNTupleMerger merger; RNTupleMergeOptions opts; { auto wopts = RNTupleWriteOptions(); @@ -1102,7 +1131,8 @@ TEST(RNTupleMerger, MergeCompression) auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), wopts); opts.fMergingMode = ENTupleMergingMode::kUnion; opts.fCompressionSettings = kOutCompSettings; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } } @@ -1169,14 +1199,14 @@ TEST(RNTupleMerger, DifferentCompatibleRepresentations) auto sourcePtrs2 = sourcePtrs; // Now Merge the inputs. Do both with and without compression change - RNTupleMerger merger; { auto wopts = RNTupleWriteOptions(); wopts.SetCompression(0); auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), wopts); auto opts = RNTupleMergeOptions(); opts.fCompressionSettings = 0; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); // TODO(gparolini): we want to support this in the future EXPECT_FALSE(bool(res)); if (res.GetError()) { @@ -1186,7 +1216,8 @@ TEST(RNTupleMerger, DifferentCompatibleRepresentations) } { auto destination = std::make_unique("ntuple", fileGuard4.GetPath(), RNTupleWriteOptions()); - auto res = merger.Merge(sourcePtrs, *destination); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs); // TODO(gparolini): we want to support this in the future EXPECT_FALSE(bool(res)); if (res.GetError()) { @@ -1232,12 +1263,12 @@ TEST(RNTupleMerger, MultipleRepresentations) auto sourcePtrs2 = sourcePtrs; - RNTupleMerger merger; { auto destination = std::make_unique("ntuple", fileGuard2.GetPath(), RNTupleWriteOptions()); auto opts = RNTupleMergeOptions(); opts.fCompressionSettings = 0; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); // TODO(gparolini): we want to support this in the future // XXX: this currently fails because of a mismatch in the number of columns of dst vs src. // Is this correct? Anyway the situation will likely change once we properly support different representation @@ -1291,14 +1322,14 @@ TEST(RNTupleMerger, Double32) auto sourcePtrs2 = sourcePtrs; // Now Merge the inputs. Do both with and without compression change - RNTupleMerger merger; { auto wopts = RNTupleWriteOptions(); wopts.SetCompression(0); auto destination = std::make_unique("ntuple", fileGuard3.GetPath(), wopts); auto opts = RNTupleMergeOptions(); opts.fCompressionSettings = 0; - auto res = merger.Merge(sourcePtrs, *destination, opts); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs, opts); EXPECT_TRUE(bool(res)); } { @@ -1316,7 +1347,8 @@ TEST(RNTupleMerger, Double32) } { auto destination = std::make_unique("ntuple", fileGuard4.GetPath(), RNTupleWriteOptions()); - auto res = merger.Merge(sourcePtrs, *destination); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs); EXPECT_TRUE(bool(res)); } { @@ -1365,8 +1397,8 @@ TEST(RNTupleMerger, MergeProjectedFields) // Now Merge the inputs auto destination = std::make_unique("ntuple", fileGuard2.GetPath(), RNTupleWriteOptions()); - RNTupleMerger merger; - auto res = merger.Merge(sourcePtrs, *destination); + RNTupleMerger merger{std::move(destination)}; + auto res = merger.Merge(sourcePtrs); EXPECT_TRUE(bool(res)); } diff --git a/tree/ntuple/v7/test/rfield_streamer.cxx b/tree/ntuple/v7/test/rfield_streamer.cxx index 5b1b7d1ae917f..534b9aabbe0d6 100644 --- a/tree/ntuple/v7/test/rfield_streamer.cxx +++ b/tree/ntuple/v7/test/rfield_streamer.cxx @@ -193,8 +193,8 @@ TEST(RField, StreamerMerge) std::vector sourcePtrs{sources[0].get(), sources[1].get()}; auto destination = std::make_unique("ntpl", fileGuard3.GetPath(), RNTupleWriteOptions()); - RNTupleMerger merger; - EXPECT_NO_THROW(merger.Merge(sourcePtrs, *destination)); + RNTupleMerger merger{std::move(destination)}; + EXPECT_NO_THROW(merger.Merge(sourcePtrs)); } auto reader = RNTupleReader::Open("ntpl", fileGuard3.GetPath());