Skip to content

Commit

Permalink
Implement GNU jobserver posix client support
Browse files Browse the repository at this point in the history
The core principle of a jobserver is simple:
before starting a new job (edge in ninja-speak),
a token must be acquired from an external entity as approval.

Once a job is finished, the token is returned to represent a free job slot.
In the case of GNU Make, this external entity is the parent process
which has executed Ninja and is managing the load capacity for
all subprocesses which it has spawned. Introducing client support
for this model allows Ninja to give load capacity management
to it's parent process, allowing it to control the number of
subprocesses that Ninja spawns at any given time.

This functionality is desirable when Ninja is part of a bigger build,
such as Yocto/OpenEmbedded, Openwrt/Linux, Buildroot, and Android.
Here, multiple compile jobs are executed in parallel
in order to maximize cpu utilization, but if each compile job in Ninja
uses all available cores, the system is overloaded.

This implementation instantiates the client in the NinjaMain class
and passes pointers to the Jobserver class into other classes.
All tokens are returned whenever the CommandRunner aborts,
and the current number of tokens compared to the current number
of running subprocesses controls the available load capacity,
used to determine how many new tokens to attempt to acquire
in order to try to start another job for each loop to find work.

Calls to functions are excluded from Windows builds
pending Windows-specific support for the jobserver.

Co-authored-by: Martin Hundebøll <martin@geanix.com>
Co-developed-by: Martin Hundebøll <martin@geanix.com>
Signed-off-by: Martin Hundebøll <martin@geanix.com>
Signed-off-by: Michael Pratt <mcpratt@pm.me>
  • Loading branch information
mcprat and hundeboll committed Aug 30, 2024
1 parent dcefb83 commit 947e29f
Show file tree
Hide file tree
Showing 9 changed files with 356 additions and 40 deletions.
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,10 @@ if(WIN32)
# errors by telling windows.h to not define those two.
add_compile_definitions(NOMINMAX)
else()
target_sources(libninja PRIVATE src/subprocess-posix.cc)
target_sources(libninja PRIVATE
src/jobserver-posix.cc
src/subprocess-posix.cc
)
if(CMAKE_SYSTEM_NAME STREQUAL "OS400" OR CMAKE_SYSTEM_NAME STREQUAL "AIX")
target_sources(libninja PRIVATE src/getopt.c)
# Build getopt.c, which can be compiled as either C or C++, as C++
Expand Down
1 change: 1 addition & 0 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ def has_re2c() -> bool:
objs += cxx('minidump-win32', variables=cxxvariables)
objs += cc('getopt')
else:
objs += cxx('jobserver-posix')
objs += cxx('subprocess-posix')
if platform.is_aix():
objs += cc('getopt')
Expand Down
59 changes: 53 additions & 6 deletions src/build.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,26 @@ Edge* Plan::FindWork() {
if (ready_.empty())
return NULL;

// TODO: jobserver client support for Windows
#ifndef _WIN32
// Only initiate work if the jobserver can acquire a token.
if (builder_ && builder_->jobserver_ &&
builder_->jobserver_->Enabled() &&
!builder_->jobserver_->Acquire()) {
return NULL;
}
#endif

Edge* work = ready_.top();
ready_.pop();

// TODO: jobserver client support for Windows
#ifndef _WIN32
// Mark this edge as using a job token to be released when finished.
if (builder_ && builder_->jobserver_)
work->has_job_token_ = builder_->jobserver_->Enabled();
#endif

return work;
}

Expand Down Expand Up @@ -201,6 +219,16 @@ bool Plan::EdgeFinished(Edge* edge, EdgeResult result, string* err) {
edge->pool()->EdgeFinished(*edge);
edge->pool()->RetrieveReadyEdges(&ready_);

// TODO: jobserver client support for Windows
#ifndef _WIN32
// If jobserver is used, return the token for this job.
if (builder_ && builder_->jobserver_ &&
edge->has_job_token_) {
builder_->jobserver_->Release();
edge->has_job_token_ = false;
}
#endif

// The rest of this function only applies to successful commands.
if (result != kEdgeSucceeded)
return true;
Expand Down Expand Up @@ -594,7 +622,9 @@ void Plan::Dump() const {
}

struct RealCommandRunner : public CommandRunner {
explicit RealCommandRunner(const BuildConfig& config) : config_(config) {}
explicit RealCommandRunner(const BuildConfig& config, Jobserver* jobserver) :
config_(config), jobserver_(jobserver) {}

virtual ~RealCommandRunner() {}
virtual size_t CanRunMore() const;
virtual bool StartCommand(Edge* edge);
Expand All @@ -603,6 +633,7 @@ struct RealCommandRunner : public CommandRunner {
virtual void Abort();

const BuildConfig& config_;
Jobserver* jobserver_;
SubprocessSet subprocs_;
map<const Subprocess*, Edge*> subproc_to_edge_;
};
Expand All @@ -617,6 +648,10 @@ vector<Edge*> RealCommandRunner::GetActiveEdges() {

void RealCommandRunner::Abort() {
subprocs_.Clear();
// TODO: jobserver client support for Windows
#ifndef _WIN32
jobserver_->Clear();
#endif
}

size_t RealCommandRunner::CanRunMore() const {
Expand All @@ -631,6 +666,18 @@ size_t RealCommandRunner::CanRunMore() const {
capacity = load_capacity;
}

// TODO: jobserver client support for Windows
#ifndef _WIN32
int job_tokens = jobserver_->Tokens();

// When initialized, behave as if the implicit token is acquired already.
// Otherwise, this happens after a token is released but before it is replaced,
// so the base capacity is represented by job_tokens + 1 when positive.
// Add an extra loop on capacity for each job in order to get an extra token.
if (job_tokens)
capacity = abs(job_tokens) - subproc_number + 2;
#endif

if (capacity < 0)
capacity = 0;

Expand Down Expand Up @@ -670,10 +717,10 @@ bool RealCommandRunner::WaitForCommand(Result* result) {
return true;
}

Builder::Builder(State* state, const BuildConfig& config, BuildLog* build_log,
DepsLog* deps_log, DiskInterface* disk_interface,
Status* status, int64_t start_time_millis)
: state_(state), config_(config), plan_(this), status_(status),
Builder::Builder(State* state, const BuildConfig& config, Jobserver* jobserver,
BuildLog* build_log, DepsLog* deps_log, DiskInterface* disk_interface,
Status* status, int64_t start_time_millis) : state_(state),
config_(config), jobserver_(jobserver), plan_(this), status_(status),
start_time_millis_(start_time_millis), disk_interface_(disk_interface),
explanations_(g_explaining ? new Explanations() : nullptr),
scan_(state, build_log, deps_log, disk_interface,
Expand Down Expand Up @@ -778,7 +825,7 @@ bool Builder::Build(string* err) {
if (config_.dry_run)
command_runner_.reset(new DryRunCommandRunner);
else
command_runner_.reset(new RealCommandRunner(config_));
command_runner_.reset(new RealCommandRunner(config_, jobserver_));
}

// We are about to start the build process.
Expand Down
8 changes: 5 additions & 3 deletions src/build.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "depfile_parser.h"
#include "exit_status.h"
#include "graph.h"
#include "jobserver.h"
#include "util.h" // int64_t

struct BuildLog;
Expand Down Expand Up @@ -187,9 +188,9 @@ struct BuildConfig {

/// Builder wraps the build process: starting commands, updating status.
struct Builder {
Builder(State* state, const BuildConfig& config, BuildLog* build_log,
DepsLog* deps_log, DiskInterface* disk_interface, Status* status,
int64_t start_time_millis);
Builder(State* state, const BuildConfig& config, Jobserver* jobserver,
BuildLog* build_log, DepsLog* deps_log, DiskInterface* disk_interface,
Status* status, int64_t start_time_millis);
~Builder();

/// Clean up after interrupted commands by deleting output files.
Expand Down Expand Up @@ -224,6 +225,7 @@ struct Builder {

State* state_;
const BuildConfig& config_;
Jobserver* jobserver_;
Plan plan_;
std::unique_ptr<CommandRunner> command_runner_;
Status* status_;
Expand Down
Loading

0 comments on commit 947e29f

Please sign in to comment.