From d9ca7243eb6fc8a52bbfb3212472d2712374dbe1 Mon Sep 17 00:00:00 2001 From: Alessandro Patti Date: Wed, 27 Sep 2023 12:36:32 +0200 Subject: [PATCH] [Cgroups] Add cgroups stats --- .../build/lib/analysis/test/TestStrategy.java | 5 ++- .../com/google/devtools/build/lib/exec/BUILD | 1 + .../lib/exec/StandaloneTestStrategy.java | 7 +++- .../devtools/build/lib/profiler/Profiler.java | 22 ++++++++-- .../build/lib/profiler/ProfilerTask.java | 2 + .../sandbox/LinuxSandboxedSpawnRunner.java | 41 ++++++++++++++++--- .../build/lib/sandbox/cgroups/Controller.java | 24 +++++++++++ .../lib/sandbox/cgroups/v1/LegacyCpu.java | 7 ++++ .../lib/sandbox/cgroups/v1/LegacyMemory.java | 23 +++++++++++ .../lib/sandbox/cgroups/v2/UnifiedCpu.java | 5 +++ .../lib/sandbox/cgroups/v2/UnifiedMemory.java | 28 +++++++++++++ 11 files changed, 154 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/analysis/test/TestStrategy.java b/src/main/java/com/google/devtools/build/lib/analysis/test/TestStrategy.java index 01da1f738e15db..58c2afe3acc366 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/test/TestStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/test/TestStrategy.java @@ -43,6 +43,7 @@ import com.google.devtools.build.lib.server.FailureDetails.FailureDetail; import com.google.devtools.build.lib.server.FailureDetails.TestAction; import com.google.devtools.build.lib.server.FailureDetails.TestAction.Code; +import com.google.devtools.build.lib.shell.TerminationStatus; import com.google.devtools.build.lib.util.Fingerprint; import com.google.devtools.build.lib.util.OS; import com.google.devtools.build.lib.util.io.OutErr; @@ -323,6 +324,7 @@ protected TestCase parseTestResult(Path resultFile) { protected void processTestOutput( ActionExecutionContext actionExecutionContext, TestResultData testResultData, + TerminationStatus ts, String testName, Path testLog) throws IOException { @@ -354,9 +356,10 @@ protected void processTestOutput( .getEventHandler() .handle(Event.of(EventKind.CANCELLED, null, testName)); } else { + String message = String.format("%s (%s) (see %s)", testName, ts.toShortString(), testLog); actionExecutionContext .getEventHandler() - .handle(Event.of(EventKind.FAIL, null, testName + " (see " + testLog + ")")); + .handle(Event.of(EventKind.FAIL, null, message)); } } } diff --git a/src/main/java/com/google/devtools/build/lib/exec/BUILD b/src/main/java/com/google/devtools/build/lib/exec/BUILD index 86868c5c6acacd..cd3bfe4497238c 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/BUILD +++ b/src/main/java/com/google/devtools/build/lib/exec/BUILD @@ -356,6 +356,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/buildeventstream/proto:build_event_stream_java_proto", "//src/main/java/com/google/devtools/build/lib/collect/nestedset", "//src/main/java/com/google/devtools/build/lib/events", + "//src/main/java/com/google/devtools/build/lib/shell", "//src/main/java/com/google/devtools/build/lib/skyframe:tree_artifact_value", "//src/main/java/com/google/devtools/build/lib/util", "//src/main/java/com/google/devtools/build/lib/util/io", diff --git a/src/main/java/com/google/devtools/build/lib/exec/StandaloneTestStrategy.java b/src/main/java/com/google/devtools/build/lib/exec/StandaloneTestStrategy.java index 029ce7ae6ff136..98f238d1f589b3 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/StandaloneTestStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/exec/StandaloneTestStrategy.java @@ -52,6 +52,7 @@ import com.google.devtools.build.lib.server.FailureDetails.Execution.Code; import com.google.devtools.build.lib.server.FailureDetails.FailureDetail; import com.google.devtools.build.lib.server.FailureDetails.TestAction; +import com.google.devtools.build.lib.shell.TerminationStatus; import com.google.devtools.build.lib.util.Pair; import com.google.devtools.build.lib.util.io.FileOutErr; import com.google.devtools.build.lib.vfs.FileStatus; @@ -278,7 +279,11 @@ private StandaloneFailedAttemptResult processTestAttempt( .post( TestAttempt.forExecutedTestResult( action, data, attemptId, testOutputs, result.executionInfo(), isLastAttempt)); - processTestOutput(actionExecutionContext, data, action.getTestName(), renamedTestLog); + TerminationStatus ts = TerminationStatus.builder() + .setWaitResponse(result.spawnResults().get(0).exitCode()) + .setTimedOut(data.getStatus() == BlazeTestStatus.TIMEOUT) + .build(); + processTestOutput(actionExecutionContext, data, ts, action.getTestName(), renamedTestLog); return new StandaloneFailedAttemptResult(data); } diff --git a/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java b/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java index 774228810aa40b..527847586cf728 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java @@ -13,6 +13,8 @@ // limitations under the License. package com.google.devtools.build.lib.profiler; +import static com.google.devtools.build.lib.profiler.ProfilerTask.SANDBOX_CPU_INFO; +import static com.google.devtools.build.lib.profiler.ProfilerTask.SANDBOX_MEMORY_INFO; import static com.google.devtools.build.lib.profiler.ProfilerTask.TASK_COUNT; import com.google.common.annotations.VisibleForTesting; @@ -970,12 +972,24 @@ private void writeTask(JsonWriter writer, TaskData data) throws IOException { writer.setIndent(" "); writer.beginObject(); writer.setIndent(""); - if (data.type == null) { - writer.setIndent(" "); + if (data.type == SANDBOX_CPU_INFO || data.type == SANDBOX_MEMORY_INFO) { + writer.name("cat").value("sandbox info"); + writer.name("name").value(data.type.description); + writer.name("args"); + writer.beginObject(); + for (String stat : data.description.split("\n")) { + String [] pair = stat.split(" ", 2); + writer.name(pair[0]).value(pair[1]); + } + writer.endObject(); } else { - writer.name("cat").value(data.type.description); + if (data.type == null) { + writer.setIndent(" "); + } else { + writer.name("cat").value(data.type.description); + } + writer.name("name").value(data.description); } - writer.name("name").value(data.description); writer.name("ph").value(eventType); writer .name("ts") diff --git a/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java b/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java index 0fe41e99c0b8ab..75c6ca272ecdf3 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java @@ -73,6 +73,8 @@ public enum ProfilerTask { SYSTEM_NETWORK_UP_USAGE("Network Up usage (total)"), SYSTEM_NETWORK_DOWN_USAGE("Network Down usage (total)"), WORKERS_MEMORY_USAGE("Workers memory usage"), + SANDBOX_CPU_INFO("CPU stats (Sandbox)"), + SANDBOX_MEMORY_INFO("Memory stats (Sandbox)"), SYSTEM_LOAD_AVERAGE("System load average"), STARLARK_PARSER("Starlark Parser", Threshold.FIFTY_MILLIS), STARLARK_USER_FN("Starlark user function call", Threshold.FIFTY_MILLIS), diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/LinuxSandboxedSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/sandbox/LinuxSandboxedSpawnRunner.java index 0cc917dc1c8663..65596df3db484f 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/LinuxSandboxedSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/sandbox/LinuxSandboxedSpawnRunner.java @@ -37,6 +37,7 @@ import com.google.devtools.build.lib.exec.local.LocalExecutionOptions; import com.google.devtools.build.lib.exec.local.PosixLocalEnvProvider; import com.google.devtools.build.lib.profiler.Profiler; +import com.google.devtools.build.lib.profiler.ProfilerTask; import com.google.devtools.build.lib.profiler.SilentCloseable; import com.google.devtools.build.lib.runtime.CommandEnvironment; import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxInputs; @@ -59,6 +60,7 @@ import java.util.Map; import java.util.Optional; import java.util.SortedMap; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.Nullable; @@ -70,6 +72,7 @@ final class LinuxSandboxedSpawnRunner extends AbstractSandboxSpawnRunner { private static final AtomicBoolean warnedAboutNonHermeticTmp = new AtomicBoolean(); private static final AtomicBoolean warnedAboutUnsupportedModificationCheck = new AtomicBoolean(); + private ConcurrentHashMap> cgroups; /** * Returns whether the linux sandbox is supported on the local machine by running a small command @@ -172,12 +175,17 @@ private static boolean computeIsSupported(CommandEnvironment cmdEnv, Path linuxS this.localEnvProvider = new PosixLocalEnvProvider(cmdEnv.getClientEnv()); this.treeDeleter = treeDeleter; this.reporter = cmdEnv.getReporter(); + this.cgroups = new ConcurrentHashMap<>(); } - private VirtualCGroup getCgroup(Spawn spawn, SpawnExecutionContext context) throws ExecException, IOException { + private Optional getCgroup(Spawn spawn, SpawnExecutionContext context) throws ExecException, IOException { if (spawn.getExecutionInfo().get(ExecutionRequirements.NO_SUPPORTS_CGROUPS) != null) { return null; } + if (cgroups.containsKey(context.getId())) { + return cgroups.get(context.getId()); + } + SandboxOptions sandboxOptions = getSandboxOptions(); VirtualCGroup cgroup = null; @@ -252,7 +260,9 @@ private VirtualCGroup getCgroup(Spawn spawn, SpawnExecutionContext context) thro cgroup.cpu().setCpus(cpuLimit); } - return cgroup; + cgroups.put(context.getId(), Optional.ofNullable(cgroup)); + + return cgroups.get(context.getId()); } @Override @@ -326,10 +336,10 @@ protected SandboxedSpawn prepareSpawn(Spawn spawn, SpawnExecutionContext context .setKillDelay(timeoutKillDelay); - VirtualCGroup cgroup = getCgroup(spawn, context); - if (cgroup != null) { + Optional cgroup = getCgroup(spawn, context); + if (cgroup.isPresent()) { commandLineBuilder.setCgroupsDirs( - cgroup.paths().stream() + cgroup.get().paths().stream() .map(p -> fileSystem.getPath(p.toString())) .collect(ImmutableSet.toImmutableSet())); } @@ -528,6 +538,27 @@ public void verifyPostCondition( if (getSandboxOptions().useHermetic) { checkForConcurrentModifications(context); } + Optional cgroup = cgroups.remove(context.getId()); + if (cgroup == null || cgroup.isEmpty()) { + return; + } + long now = System.nanoTime(); + if (cgroup.get().cpu() != null) { + String stats = cgroup.get().cpu().getStats(); + Profiler.instance().logEventAtTime(now, ProfilerTask.SANDBOX_CPU_INFO, stats); + } + if (cgroup.get().memory() != null) { + Long kills = cgroup.get().memory().oomKills(); + Long limit = cgroup.get().memory().getMaxBytes(); + Long usage = cgroup.get().memory().maxUsage(); + + StringBuilder stats = new StringBuilder(cgroup.get().memory().getStats()); + if (usage > 0) stats.append("max_usage_in_bytes").append(" ").append(usage).append("\n"); + if (limit > 0) stats.append("limit_in_bytes").append(" ").append(limit).append("\n"); + if (kills > 0) stats.append("oom_kills").append(" ").append(kills).append("\n"); + + Profiler.instance().logEventAtTime(now, ProfilerTask.SANDBOX_MEMORY_INFO, stats.toString()); + } } private void checkForConcurrentModifications(SpawnExecutionContext context) diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/Controller.java b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/Controller.java index 23aa9ed2151e87..53bc73d295fafb 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/Controller.java +++ b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/Controller.java @@ -8,7 +8,10 @@ import java.lang.reflect.InvocationHandler; import java.lang.reflect.Method; import java.lang.reflect.Proxy; +import java.nio.file.Files; import java.nio.file.Path; +import java.util.Map; +import java.util.stream.Collectors; public interface Controller { default boolean isLegacy() throws IOException { @@ -33,9 +36,30 @@ protected FailureDetails.FailureDetail getFailureDetail(String message) { Path getPath() throws IOException; + Path statFile() throws IOException; + + default String getStats() throws IOException { + if (statFile() != null && statFile().toFile().exists()) { + return Files.readString(statFile()); + } + return ""; + } + interface Memory extends Controller { void setMaxBytes(long bytes) throws IOException; long getMaxBytes() throws IOException; + long oomKills() throws IOException; + + long maxUsage() throws IOException; + + default String getStats() throws IOException { + String stats = Controller.super.getStats(); + long maxUsage = this.maxUsage(); + if (maxUsage > 0) { + return stats.strip() + "\n" + "max_usage_in_bytes " + maxUsage; + } + return stats; + } } interface Cpu extends Controller { void setCpus(float cpus) throws IOException; diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v1/LegacyCpu.java b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v1/LegacyCpu.java index cdcff1643ba070..a6b2e0b39a3809 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v1/LegacyCpu.java +++ b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v1/LegacyCpu.java @@ -5,6 +5,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Map; +import java.util.stream.Collectors; public class LegacyCpu implements Controller.Cpu { private final Path path; @@ -20,6 +22,11 @@ public Path getPath() { return path; } + @Override + public Path statFile() throws IOException { + return path.resolve("cpu.stat"); + } + @Override public void setCpus(float cpus) throws IOException { int quota = Math.round(cpus * period); diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v1/LegacyMemory.java b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v1/LegacyMemory.java index 873f728ccfde19..7691f493461422 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v1/LegacyMemory.java +++ b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v1/LegacyMemory.java @@ -5,6 +5,9 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; public class LegacyMemory implements Controller.Memory { private final Path path; @@ -14,6 +17,11 @@ public Path getPath() { return path; } + @Override + public Path statFile() throws IOException { + return path.resolve("memory.stat"); + } + public LegacyMemory(Path path) { this.path = path; } @@ -27,4 +35,19 @@ public void setMaxBytes(long bytes) throws IOException { public long getMaxBytes() throws IOException { return Long.parseLong(Files.readString(path.resolve("memory.limit_in_bytes")).trim()); } + + @Override + public long oomKills() throws IOException { + for (String line: Files.readAllLines(getPath().resolve("memory.oom_control"))) { + if (line.startsWith("oom_kill ")) { + return Long.parseLong(line.substring(line.indexOf(" ") + 1)); + } + } + return -1; + } + + @Override + public long maxUsage() throws IOException { + return Long.parseLong(Files.readString(path.resolve("memory.max_usage_in_bytes")).trim()); + } } diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v2/UnifiedCpu.java b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v2/UnifiedCpu.java index a3711f845c80cb..bf95d9841e46f7 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v2/UnifiedCpu.java +++ b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v2/UnifiedCpu.java @@ -17,6 +17,11 @@ public Path getPath() { return path; } + @Override + public Path statFile() throws IOException { + return path.resolve("cpu.stat"); + } + @Override public void setCpus(float cpus) throws IOException { int period = 1000_000; diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v2/UnifiedMemory.java b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v2/UnifiedMemory.java index 3e6e23e1d7e9bb..1982395ee6d157 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v2/UnifiedMemory.java +++ b/src/main/java/com/google/devtools/build/lib/sandbox/cgroups/v2/UnifiedMemory.java @@ -5,6 +5,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Map; +import java.util.stream.Collectors; public class UnifiedMemory implements Controller.Memory { private final Path path; @@ -17,6 +19,11 @@ public Path getPath() { return path; } + @Override + public Path statFile() throws IOException { + return path.resolve("memory.stat"); + } + @Override public void setMaxBytes(long bytes) throws IOException { Files.writeString(path.resolve("memory.max"), Long.toString(bytes)); @@ -26,4 +33,25 @@ public void setMaxBytes(long bytes) throws IOException { public long getMaxBytes() throws IOException { return Long.parseLong(Files.readString(path.resolve("memory.max")).trim()); } + + @Override + public long oomKills() throws IOException { + for (String line: Files.readAllLines(getPath().resolve("memory.events"))) { + if (line.startsWith("oom_kill ")) { + return Long.parseLong(line.substring(line.indexOf(" ") + 1)); + } + } + return -1; + } + + @Override + public long maxUsage() throws IOException { + // This file has been added relatively recently, so it might not exist. + // Return -1 in that case, to signal its absence + // Ref. https://github.com/torvalds/linux/commit/8e20d4b332660a32e842e20c34cfc3b3456bc6dc + if (path.resolve("memory.peak").toFile().exists()) { + return Long.parseLong(Files.readString(path.resolve("memory.max")).trim()); + } + return -1; + } }