Skip to content

Commit

Permalink
[Cgroups] Add cgroups stats
Browse files Browse the repository at this point in the history
  • Loading branch information
apattidb committed Oct 12, 2023
1 parent 436f1ac commit 084d0ae
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import com.google.devtools.build.lib.server.FailureDetails.FailureDetail;
import com.google.devtools.build.lib.server.FailureDetails.TestAction;
import com.google.devtools.build.lib.server.FailureDetails.TestAction.Code;
import com.google.devtools.build.lib.shell.TerminationStatus;
import com.google.devtools.build.lib.util.Fingerprint;
import com.google.devtools.build.lib.util.OS;
import com.google.devtools.build.lib.util.io.OutErr;
Expand Down Expand Up @@ -323,6 +324,7 @@ protected TestCase parseTestResult(Path resultFile) {
protected void processTestOutput(
ActionExecutionContext actionExecutionContext,
TestResultData testResultData,
TerminationStatus ts,
String testName,
Path testLog)
throws IOException {
Expand Down Expand Up @@ -354,9 +356,10 @@ protected void processTestOutput(
.getEventHandler()
.handle(Event.of(EventKind.CANCELLED, null, testName));
} else {
String message = String.format("%s (%s) (see %s)", testName, ts.toShortString(), testLog);
actionExecutionContext
.getEventHandler()
.handle(Event.of(EventKind.FAIL, null, testName + " (see " + testLog + ")"));
.handle(Event.of(EventKind.FAIL, null, message));
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/exec/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/buildeventstream/proto:build_event_stream_java_proto",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/shell",
"//src/main/java/com/google/devtools/build/lib/skyframe:tree_artifact_value",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/util/io",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import com.google.devtools.build.lib.server.FailureDetails.Execution.Code;
import com.google.devtools.build.lib.server.FailureDetails.FailureDetail;
import com.google.devtools.build.lib.server.FailureDetails.TestAction;
import com.google.devtools.build.lib.shell.TerminationStatus;
import com.google.devtools.build.lib.util.Pair;
import com.google.devtools.build.lib.util.io.FileOutErr;
import com.google.devtools.build.lib.vfs.FileStatus;
Expand Down Expand Up @@ -278,7 +279,11 @@ private StandaloneFailedAttemptResult processTestAttempt(
.post(
TestAttempt.forExecutedTestResult(
action, data, attemptId, testOutputs, result.executionInfo(), isLastAttempt));
processTestOutput(actionExecutionContext, data, action.getTestName(), renamedTestLog);
TerminationStatus ts = TerminationStatus.builder()
.setWaitResponse(result.spawnResults().get(0).exitCode())
.setTimedOut(data.getStatus() == BlazeTestStatus.TIMEOUT)
.build();
processTestOutput(actionExecutionContext, data, ts, action.getTestName(), renamedTestLog);
return new StandaloneFailedAttemptResult(data);
}

Expand Down
22 changes: 18 additions & 4 deletions src/main/java/com/google/devtools/build/lib/profiler/Profiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.
package com.google.devtools.build.lib.profiler;

import static com.google.devtools.build.lib.profiler.ProfilerTask.SANDBOX_CPU_INFO;
import static com.google.devtools.build.lib.profiler.ProfilerTask.SANDBOX_MEMORY_INFO;
import static com.google.devtools.build.lib.profiler.ProfilerTask.TASK_COUNT;

import com.google.common.annotations.VisibleForTesting;
Expand Down Expand Up @@ -970,12 +972,24 @@ private void writeTask(JsonWriter writer, TaskData data) throws IOException {
writer.setIndent(" ");
writer.beginObject();
writer.setIndent("");
if (data.type == null) {
writer.setIndent(" ");
if (data.type == SANDBOX_CPU_INFO || data.type == SANDBOX_MEMORY_INFO) {
writer.name("cat").value("sandbox info");
writer.name("name").value(data.type.description);
writer.name("args");
writer.beginObject();
for (String stat : data.description.split("\n")) {
String [] pair = stat.split(" ", 2);
writer.name(pair[0]).value(pair[1]);
}
writer.endObject();
} else {
writer.name("cat").value(data.type.description);
if (data.type == null) {
writer.setIndent(" ");
} else {
writer.name("cat").value(data.type.description);
}
writer.name("name").value(data.description);
}
writer.name("name").value(data.description);
writer.name("ph").value(eventType);
writer
.name("ts")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ public enum ProfilerTask {
SYSTEM_NETWORK_UP_USAGE("Network Up usage (total)"),
SYSTEM_NETWORK_DOWN_USAGE("Network Down usage (total)"),
WORKERS_MEMORY_USAGE("Workers memory usage"),
SANDBOX_CPU_INFO("CPU stats (Sandbox)"),
SANDBOX_MEMORY_INFO("Memory stats (Sandbox)"),
SYSTEM_LOAD_AVERAGE("System load average"),
STARLARK_PARSER("Starlark Parser", Threshold.FIFTY_MILLIS),
STARLARK_USER_FN("Starlark user function call", Threshold.FIFTY_MILLIS),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import com.google.devtools.build.lib.exec.local.LocalExecutionOptions;
import com.google.devtools.build.lib.exec.local.PosixLocalEnvProvider;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.profiler.SilentCloseable;
import com.google.devtools.build.lib.runtime.CommandEnvironment;
import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxInputs;
Expand All @@ -59,6 +60,7 @@
import java.util.Map;
import java.util.Optional;
import java.util.SortedMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.annotation.Nullable;

Expand All @@ -70,6 +72,7 @@ final class LinuxSandboxedSpawnRunner extends AbstractSandboxSpawnRunner {
private static final AtomicBoolean warnedAboutNonHermeticTmp = new AtomicBoolean();

private static final AtomicBoolean warnedAboutUnsupportedModificationCheck = new AtomicBoolean();
private ConcurrentHashMap<Integer, Optional<VirtualCGroup>> cgroups;

/**
* Returns whether the linux sandbox is supported on the local machine by running a small command
Expand Down Expand Up @@ -172,12 +175,17 @@ private static boolean computeIsSupported(CommandEnvironment cmdEnv, Path linuxS
this.localEnvProvider = new PosixLocalEnvProvider(cmdEnv.getClientEnv());
this.treeDeleter = treeDeleter;
this.reporter = cmdEnv.getReporter();
this.cgroups = new ConcurrentHashMap<>();
}

private VirtualCGroup getCgroup(Spawn spawn, SpawnExecutionContext context) throws ExecException, IOException {
private Optional<VirtualCGroup> getCgroup(Spawn spawn, SpawnExecutionContext context) throws ExecException, IOException {
if (spawn.getExecutionInfo().get(ExecutionRequirements.NO_SUPPORTS_CGROUPS) != null) {
return null;
}
if (cgroups.containsKey(context.getId())) {
return cgroups.get(context.getId());
}

SandboxOptions sandboxOptions = getSandboxOptions();

VirtualCGroup cgroup = null;
Expand Down Expand Up @@ -252,7 +260,9 @@ private VirtualCGroup getCgroup(Spawn spawn, SpawnExecutionContext context) thro
cgroup.cpu().setCpus(cpuLimit);
}

return cgroup;
cgroups.put(context.getId(), Optional.ofNullable(cgroup));

return cgroups.get(context.getId());
}

@Override
Expand Down Expand Up @@ -326,10 +336,10 @@ protected SandboxedSpawn prepareSpawn(Spawn spawn, SpawnExecutionContext context
.setKillDelay(timeoutKillDelay);


VirtualCGroup cgroup = getCgroup(spawn, context);
if (cgroup != null) {
Optional<VirtualCGroup> cgroup = getCgroup(spawn, context);
if (cgroup.isPresent()) {
commandLineBuilder.setCgroupsDirs(
cgroup.paths().stream()
cgroup.get().paths().stream()
.map(p -> fileSystem.getPath(p.toString()))
.collect(ImmutableSet.toImmutableSet()));
}
Expand Down Expand Up @@ -528,6 +538,27 @@ public void verifyPostCondition(
if (getSandboxOptions().useHermetic) {
checkForConcurrentModifications(context);
}
Optional<VirtualCGroup> cgroup = cgroups.remove(context.getId());
if (cgroup == null || cgroup.isEmpty()) {
return;
}
long now = System.nanoTime();
if (cgroup.get().cpu() != null) {
String stats = cgroup.get().cpu().getStats();
Profiler.instance().logEventAtTime(now, ProfilerTask.SANDBOX_CPU_INFO, stats);
}
if (cgroup.get().memory() != null) {
Long kills = cgroup.get().memory().oomKills();
Long limit = cgroup.get().memory().getMaxBytes();
Long usage = cgroup.get().memory().maxUsage();

StringBuilder stats = new StringBuilder(cgroup.get().memory().getStats());
if (usage > 0) stats.append("max_usage_in_bytes").append(" ").append(usage).append("\n");
if (limit > 0) stats.append("limit_in_bytes").append(" ").append(limit).append("\n");
if (kills > 0) stats.append("oom_kills").append(" ").append(kills).append("\n");

Profiler.instance().logEventAtTime(now, ProfilerTask.SANDBOX_MEMORY_INFO, stats.toString());
}
}

private void checkForConcurrentModifications(SpawnExecutionContext context)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
import java.lang.reflect.InvocationHandler;
import java.lang.reflect.Method;
import java.lang.reflect.Proxy;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;
import java.util.stream.Collectors;

public interface Controller {
default boolean isLegacy() throws IOException {
Expand All @@ -33,9 +36,30 @@ protected FailureDetails.FailureDetail getFailureDetail(String message) {

Path getPath() throws IOException;

Path statFile() throws IOException;

default String getStats() throws IOException {
if (statFile() != null && statFile().toFile().exists()) {
return Files.readString(statFile());
}
return "";
}

interface Memory extends Controller {
void setMaxBytes(long bytes) throws IOException;
long getMaxBytes() throws IOException;
long oomKills() throws IOException;

long maxUsage() throws IOException;

default String getStats() throws IOException {
String stats = Controller.super.getStats();
long maxUsage = this.maxUsage();
if (maxUsage > 0) {
return stats.strip() + "\n" + "max_usage_in_bytes " + maxUsage;
}
return stats;
}
}
interface Cpu extends Controller {
void setCpus(float cpus) throws IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;
import java.util.stream.Collectors;

public class LegacyCpu implements Controller.Cpu {
private final Path path;
Expand All @@ -20,6 +22,11 @@ public Path getPath() {
return path;
}

@Override
public Path statFile() throws IOException {
return path.resolve("cpu.stat");
}

@Override
public void setCpus(float cpus) throws IOException {
int quota = Math.round(cpus * period);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class LegacyMemory implements Controller.Memory {
private final Path path;
Expand All @@ -14,6 +17,11 @@ public Path getPath() {
return path;
}

@Override
public Path statFile() throws IOException {
return path.resolve("memory.stat");
}

public LegacyMemory(Path path) {
this.path = path;
}
Expand All @@ -27,4 +35,19 @@ public void setMaxBytes(long bytes) throws IOException {
public long getMaxBytes() throws IOException {
return Long.parseLong(Files.readString(path.resolve("memory.limit_in_bytes")).trim());
}

@Override
public long oomKills() throws IOException {
for (String line: Files.readAllLines(getPath().resolve("memory.oom_control"))) {
if (line.startsWith("oom_kill ")) {
return Long.parseLong(line.substring(line.indexOf(" ") + 1));
}
}
return -1;
}

@Override
public long maxUsage() throws IOException {
return Long.parseLong(Files.readString(path.resolve("memory.max_usage_in_bytes")).trim());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ public Path getPath() {
return path;
}

@Override
public Path statFile() throws IOException {
return path.resolve("cpu.stat");
}

@Override
public void setCpus(float cpus) throws IOException {
int period = 1000_000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;
import java.util.stream.Collectors;

public class UnifiedMemory implements Controller.Memory {
private final Path path;
Expand All @@ -17,6 +19,11 @@ public Path getPath() {
return path;
}

@Override
public Path statFile() throws IOException {
return path.resolve("memory.stat");
}

@Override
public void setMaxBytes(long bytes) throws IOException {
Files.writeString(path.resolve("memory.max"), Long.toString(bytes));
Expand All @@ -26,4 +33,25 @@ public void setMaxBytes(long bytes) throws IOException {
public long getMaxBytes() throws IOException {
return Long.parseLong(Files.readString(path.resolve("memory.max")).trim());
}

@Override
public long oomKills() throws IOException {
for (String line: Files.readAllLines(getPath().resolve("memory.events"))) {
if (line.startsWith("oom_kill ")) {
return Long.parseLong(line.substring(line.indexOf(" ") + 1));
}
}
return -1;
}

@Override
public long maxUsage() throws IOException {
// This file has been added relatively recently, so it might not exist.
// Return -1 in that case, to signal its absence
// Ref. https://github.com/torvalds/linux/commit/8e20d4b332660a32e842e20c34cfc3b3456bc6dc
if (path.resolve("memory.peak").toFile().exists()) {
return Long.parseLong(Files.readString(path.resolve("memory.max")).trim());
}
return -1;
}
}

0 comments on commit 084d0ae

Please sign in to comment.