Skip to content

Commit

Permalink
🐑 Significantly reduce cluster startup time
Browse files Browse the repository at this point in the history
This change implements NativeLink's new worker-init system. This lets us
skip building nativelink from source, reducing the startup time for the
LRE workflow from ~10-15 minutes to ~1 minute.

Fixes #9
  • Loading branch information
aaronmondal committed Jun 26, 2024
1 parent f755350 commit cc2b8a2
Show file tree
Hide file tree
Showing 11 changed files with 270 additions and 2,349 deletions.
2 changes: 1 addition & 1 deletion .bazelversion
Original file line number Diff line number Diff line change
@@ -1 +1 @@
8.0.0-pre.20240422.4
8.0.0-pre.20240607.2
21 changes: 11 additions & 10 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,23 @@ module(
compatibility_level = 0,
)

bazel_dep(name = "bazel_skylib", version = "1.6.1", dev_dependency = True)
bazel_dep(name = "bazel_skylib", version = "1.7.1", dev_dependency = True)

git_override(
module_name = "rules_ll",
# Note: Keep this commit in sync with the one in flake.nix.
commit = "5ac0546db310da08d44f14271066e0b159611c25",
commit = "a8a26cc78c43896b179caaa263857bd6c88a1da9",
remote = "https://github.com/eomii/rules_ll",
)

rules_ll_dependencies = use_extension(
"@rules_ll//ll:init.bzl",
"rules_ll_dependencies",
)
use_repo(
rules_ll_dependencies,
"local-remote-execution",
bazel_dep(name = "local-remote-execution", version = "0")
archive_override(
module_name = "local-remote-execution",
integrity = "sha256-nWJ6XlV9YT1MSbLur6ygZ7IDaqHInm9mtwUmjRYqLQo=",
strip_prefix = "nativelink-2adda2475eed578d610a66b98f965922656061af/local-remote-execution",
urls = [
"https://github.com/TraceMachina/nativelink/archive/2adda2475eed578d610a66b98f965922656061af.zip",
],
)

# Note: The pipe-through mechanism from rules_ll makes this actually LLVM~19.
Expand Down Expand Up @@ -51,4 +52,4 @@ use_repo(
)

# Documentation.
bazel_dep(name = "stardoc", version = "0.6.2", dev_dependency = True)
bazel_dep(name = "stardoc", version = "0.7.0", dev_dependency = True)
2,467 changes: 189 additions & 2,278 deletions MODULE.bazel.lock

Large diffs are not rendered by default.

7 changes: 1 addition & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,7 @@ setup.

The setup may take a minute to boot up. When running the command the first time
it might take some time to build NativeLink from source inside the cluster
pipelines. Once the command finishes you can invoke the following command to run
a local dashboard:

```bash
kubectl -n kube-system port-forward svc/hubble-ui 8080:80
```
pipelines.

Visit <http://localhost:8080/?namespace=default> to view the cluster topology.

Expand Down
64 changes: 32 additions & 32 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
};
nativelink = {
# Note: Keep this commit in sync with the LRE commit in `MODULE.bazel`.
url = "github:TraceMachina/nativelink/v0.4.0";
url = "github:TraceMachina/nativelink/2adda2475eed578d610a66b98f965922656061af";

# This repository provides the autogenerated LRE toolchains which are
# dependent on the nixpkgs version in the nativelink repository. To keep
Expand All @@ -37,16 +37,16 @@
inputs = {
flake-utils.follows = "flake-utils";
flake-parts.follows = "flake-parts";
pre-commit-hooks.follows = "pre-commit-hooks";
git-hooks.follows = "pre-commit-hooks";
};
};
rules_ll = {
url = "github:eomii/rules_ll/5ac0546db310da08d44f14271066e0b159611c25";
url = "github:eomii/rules_ll/a8a26cc78c43896b179caaa263857bd6c88a1da9";
inputs = {
nixpkgs.follows = "nixpkgs";
flake-utils.follows = "flake-utils";
flake-parts.follows = "flake-parts";
pre-commit-hooks.follows = "pre-commit-hooks";
git-hooks.follows = "pre-commit-hooks";
nativelink.follows = "nativelink";
};
};
Expand Down
7 changes: 2 additions & 5 deletions local-remote-execution/create-worker.nix
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
{
# pkgs,
nativelink
, buildImage
{ buildImage
, self
, runCommand
, runtimeShell
Expand Down Expand Up @@ -89,7 +86,7 @@ buildImage {
mkEnvSymlink
(buildEnv {
name = "${image.imageName}-buildEnv";
paths = [ nativelink coreutils bash ];
paths = [ coreutils bash ];
pathsToLink = [ "/bin" ];
})
];
Expand Down
3 changes: 3 additions & 0 deletions local-remote-execution/lre-kill-the-mojo.nix
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,7 @@ writeShellScriptBin "lre-kill-the-mojo" ''
${docker}/bin/docker container stop kind-registry \
| ${findutils}/bin/xargs docker rm
${docker}/bin/docker container stop kind-loadbalancer \
| ${findutils}/bin/xargs docker rm
''
18 changes: 10 additions & 8 deletions local-remote-execution/lre-mojo-cluster.nix
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

let
# The specific commit to use
nativelinkCommit = "75105df746c626da76f74e412764e6755296a8ab";
nativelinkCommit = "9ba43236cf61737cd9561a1657ee50686b459966";

# Base URL for GitHub access
githubBaseUrl = "github:TraceMachina/nativelink/";
Expand All @@ -24,26 +24,27 @@ writeShellScriptBin "lre-mojo-cluster" ''
# Wait for the gateway to be ready
${kubectl}/bin/kubectl wait --for=condition=Programmed --timeout=60s \
gateway eventlistener
gateway el-gateway
# Allow an additional grace period for potential routes to set themselves up.
# TODO(aaronmondal): Find a better solution.
sleep 10
# Retrieve the event listener address
EVENTLISTENER=''$(${kubectl}/bin/kubectl get gtw eventlistener \
-o=jsonpath='{.status.addresses[0].value}')
# POST requests to the event listener
${curl}/bin/curl -v \
-H 'Content-Type: application/json' \
-d '{"flakeOutput": "${githubBaseUrl}${nativelinkCommit}#image"}' \
http://"''${EVENTLISTENER}":8080
localhost:8082/eventlistener
${curl}/bin/curl -v \
-H 'Content-Type: application/json' \
-d '{"flakeOutput": "${githubBaseUrl}${nativelinkCommit}#nativelink-worker-init"}' \
localhost:8082/eventlistener
${curl}/bin/curl -v \
-H 'Content-Type: application/json' \
-d '{"flakeOutput": "./src_root#nativelink-worker-lre-mojo"}' \
http://"''${EVENTLISTENER}":8080
localhost:8082/eventlistener
# Wait for PipelineRuns to start
until ${kubectl}/bin/kubectl get pipelinerun \
Expand Down Expand Up @@ -75,6 +76,7 @@ writeShellScriptBin "lre-mojo-cluster" ''
# Use kustomize to set images
cd "''${KUSTOMIZE_DIR}" && ${kustomize}/bin/kustomize edit set image \
nativelink=localhost:5001/nativelink:"''$(${nix}/bin/nix eval ${githubBaseUrl}${nativelinkCommit}#image.imageTag --raw)" \
nativelink-worker-init=localhost:5001/nativelink-worker-init:"''$(${nix}/bin/nix eval ${githubBaseUrl}${nativelinkCommit}#nativelink-worker-init.imageTag --raw)" \
nativelink-worker-lre-mojo=localhost:5001/nativelink-worker-lre-mojo:"''$(${nix}/bin/nix eval .#nativelink-worker-lre-mojo.imageTag --raw)"
# Apply the configuration
Expand Down
14 changes: 13 additions & 1 deletion local-remote-execution/worker-lre-mojo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ spec:
# yamllint disable rule:line-length
args:
- |
git config --global --add safe.directory "*"
NATIVELINK_WORKER_PLATFORM=docker://lre-mojo:$(nix eval /mnt/src_root#lre-mojo.imageTag --raw) &&
printf '#!/bin/sh\nexport NATIVELINK_WORKER_PLATFORM=%s\nexec "$@"' "$NATIVELINK_WORKER_PLATFORM" > /entrypoint/entrypoint.sh &&
chmod +x /entrypoint/entrypoint.sh
Expand All @@ -41,6 +42,13 @@ spec:
mountPath: /entrypoint
- name: mnt
mountPath: /mnt
- name: nativelink-worker-init
# This image will be edited by kustomize.
image: nativelink-worker-init
args: ["/shared/nativelink"]
volumeMounts:
- name: shared
mountPath: /shared
containers:
- name: nativelink-worker-lre-mojo
# This image will be edited by kustomize.
Expand All @@ -58,9 +66,13 @@ spec:
subPath: worker.json
- name: entrypoint
mountPath: /entrypoint
- name: shared
mountPath: /shared
command: ["/entrypoint/entrypoint.sh"]
args: ["/bin/nativelink", "/worker.json"]
args: ["/shared/nativelink", "/worker.json"]
volumes:
- name: shared
emptyDir: {}
- name: entrypoint
emptyDir: {}
- name: worker-config
Expand Down
8 changes: 4 additions & 4 deletions lre-bazel.nix
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
}:

writeShellScriptBin "lre-bazel" ''
EXECUTOR=$(${kubectl}/bin/kubectl get gtw scheduler -o=jsonpath='{.status.addresses[0].value}')
CACHE=$(${kubectl}/bin/kubectl get gtw cache -o=jsonpath='{.status.addresses[0].value}')
EXECUTOR=$(${kubectl}/bin/kubectl get gtw scheduler-gateway -o=jsonpath='{.status.addresses[0].value}')
CACHE=$(${kubectl}/bin/kubectl get gtw cache-gateway -o=jsonpath='{.status.addresses[0].value}')
if [[
"$1" == "build" ||
Expand All @@ -18,8 +18,8 @@ writeShellScriptBin "lre-bazel" ''
${bazel}/bin/bazel $1 \
--remote_timeout=600 \
--remote_instance_name=main \
--remote_cache=grpc://''${CACHE}:50051 \
--remote_executor=grpc://''${EXECUTOR}:50052 \
--remote_cache=grpc://''${CACHE} \
--remote_executor=grpc://''${EXECUTOR} \
--strategy=TestRunner=local \
''${@:2}
else
Expand Down

0 comments on commit cc2b8a2

Please sign in to comment.