diff --git a/src/mold.h b/src/mold.h index 71add0e1fc..600170abb3 100644 --- a/src/mold.h +++ b/src/mold.h @@ -568,6 +568,9 @@ class OutputSection : public Chunk { std::unique_ptr> reloc_sec; std::vector> abs_rels; Atomic sh_flags; + + // Used only by create_output_sections() + std::vector *>> members_vec; }; template diff --git a/src/passes.cc b/src/passes.cc index 97f7e62b0c..5a5de0b403 100644 --- a/src/passes.cc +++ b/src/passes.cc @@ -548,6 +548,9 @@ static bool is_relro(OutputSection &osec) { } // Create output sections for input sections. +// +// Since one output section could contain millions of input sections, +// we need to do it efficiently. template void create_output_sections(Context &ctx) { Timer t(ctx, "create_output_sections"); @@ -556,18 +559,14 @@ void create_output_sections(Context &ctx) { OutputSectionKey::Hash>; MapType map; std::shared_mutex mu; - i64 size = ctx.osec_pool.size(); bool ctors_in_init_array = has_ctors_and_init_array(ctx); + tbb::enumerable_thread_specific caches; // Instantiate output sections tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { // Make a per-thread cache of the main map to avoid lock contention. // It makes a noticeable difference if we have millions of input sections. - MapType cache; - { - std::shared_lock lock(mu); - cache = map; - } + MapType &cache = caches.local(); for (std::unique_ptr> &isec : file->sections) { if (!isec || !isec->is_alive) @@ -620,22 +619,27 @@ void create_output_sections(Context &ctx) { } }); + // Add input sections to output sections + for (std::unique_ptr> &osec : ctx.osec_pool) + osec->members_vec.resize(ctx.objs.size()); + + tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) { + for (std::unique_ptr> &isec : ctx.objs[i]->sections) + if (isec && isec->output_section) + isec->output_section->members_vec[i].push_back(isec.get()); + }); + for (std::unique_ptr> &osec : ctx.osec_pool) { osec->shdr.sh_flags = osec->sh_flags; osec->is_relro = is_relro(*osec); + osec->members = flatten(osec->members_vec); + osec->members_vec = {}; } - // Add input sections to output sections - std::vector *> chunks; - for (i64 i = size; i < ctx.osec_pool.size(); i++) - chunks.push_back(ctx.osec_pool[i].get()); - - for (ObjectFile *file : ctx.objs) - for (std::unique_ptr> &isec : file->sections) - if (isec && isec->is_alive) - isec->output_section->members.push_back(isec.get()); - // Add output sections and mergeable sections to ctx.chunks + std::vector *> chunks; + for (std::unique_ptr> &osec : ctx.osec_pool) + chunks.push_back(osec.get()); for (std::unique_ptr> &osec : ctx.merged_sections) chunks.push_back(osec.get());