From 9835f6b24649cb95f3aa295cfb7e8d5282d6c6ce Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Mon, 23 Oct 2023 08:26:23 -0700 Subject: [PATCH] airbyte-ci: better gradle caching (#31535) Co-authored-by: postamar --- .../actions/run-dagger-pipeline/action.yml | 8 ++ .../workflows/connectors_nightly_build.yml | 2 + .github/workflows/connectors_tests.yml | 4 + .github/workflows/publish_connectors.yml | 4 + airbyte-ci/connectors/pipelines/README.md | 1 + .../connectors/build_image/commands.py | 2 + .../connectors/bump_version/commands.py | 2 + .../airbyte_ci/connectors/context.py | 20 ++- .../migrate_to_base_image/commands.py | 2 + .../airbyte_ci/connectors/publish/commands.py | 2 + .../airbyte_ci/connectors/publish/context.py | 4 + .../airbyte_ci/connectors/test/commands.py | 2 + .../connectors/upgrade_base_image/commands.py | 2 + .../pipelines/airbyte_ci/steps/gradle.py | 122 ++++++++++++------ .../pipelines/pipelines/cli/airbyte_ci.py | 6 + .../pipelines/dagger/actions/secrets.py | 2 +- .../connectors/pipelines/pipelines/hacks.py | 17 --- .../connectors/pipelines/pyproject.toml | 2 +- build.gradle | 1 - settings.gradle | 12 +- 20 files changed, 150 insertions(+), 67 deletions(-) diff --git a/.github/actions/run-dagger-pipeline/action.yml b/.github/actions/run-dagger-pipeline/action.yml index 8d413a9c658f..afb0d8e69c1a 100644 --- a/.github/actions/run-dagger-pipeline/action.yml +++ b/.github/actions/run-dagger-pipeline/action.yml @@ -63,6 +63,12 @@ inputs: ci_job_key: description: "CI job key" required: false + s3_build_cache_access_key_id: + description: "Gradle S3 Build Cache AWS access key ID" + required: false + s3_build_cache_secret_key: + description: "Gradle S3 Build Cache AWS secret key" + required: false runs: using: "composite" steps: @@ -120,4 +126,6 @@ runs: SPEC_CACHE_GCS_CREDENTIALS: ${{ inputs.spec_cache_gcs_credentials }} DOCKER_HUB_USERNAME: ${{ inputs.docker_hub_username }} DOCKER_HUB_PASSWORD: ${{ inputs.docker_hub_password }} + S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ inputs.s3_build_cache_access_key_id }} + S3_BUILD_CACHE_SECRET_KEY: ${{ inputs.s3_build_cache_secret_key }} CI: "True" diff --git a/.github/workflows/connectors_nightly_build.yml b/.github/workflows/connectors_nightly_build.yml index e34fd9837dd5..c7f7eb7dddf5 100644 --- a/.github/workflows/connectors_nightly_build.yml +++ b/.github/workflows/connectors_nightly_build.yml @@ -41,4 +41,6 @@ jobs: sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }} git_branch: ${{ steps.extract_branch.outputs.branch }} github_token: ${{ secrets.GITHUB_TOKEN }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors ${{ inputs.test-connectors-options || '--concurrency=8 --support-level=certified' }} test" diff --git a/.github/workflows/connectors_tests.yml b/.github/workflows/connectors_tests.yml index b5567e9d6ec9..610e4fc94ad1 100644 --- a/.github/workflows/connectors_tests.yml +++ b/.github/workflows/connectors_tests.yml @@ -63,6 +63,8 @@ jobs: git_branch: ${{ steps.extract_branch.outputs.branch }} git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }} github_token: ${{ env.PAT }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors ${{ github.event.inputs.test-connectors-options }} test" - name: Test connectors [PULL REQUESTS] if: github.event_name == 'pull_request' @@ -76,4 +78,6 @@ jobs: git_branch: ${{ github.head_ref }} git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }} github_token: ${{ env.PAT }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors --modified test" diff --git a/.github/workflows/publish_connectors.yml b/.github/workflows/publish_connectors.yml index 44a7426548b9..5fdc8dfcde60 100644 --- a/.github/workflows/publish_connectors.yml +++ b/.github/workflows/publish_connectors.yml @@ -40,6 +40,8 @@ jobs: sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }} slack_webhook_url: ${{ secrets.PUBLISH_ON_MERGE_SLACK_WEBHOOK }} spec_cache_gcs_credentials: ${{ secrets.SPEC_CACHE_SERVICE_ACCOUNT_KEY_PUBLISH }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors --concurrency=1 --execute-timeout=3600 --metadata-changes-only publish --main-release" - name: Publish connectors [manual] @@ -57,6 +59,8 @@ jobs: sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }} slack_webhook_url: ${{ secrets.PUBLISH_ON_MERGE_SLACK_WEBHOOK }} spec_cache_gcs_credentials: ${{ secrets.SPEC_CACHE_SERVICE_ACCOUNT_KEY_PUBLISH }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors ${{ github.event.inputs.connectors-options }} publish ${{ github.event.inputs.publish-options }}" set-instatus-incident-on-failure: diff --git a/airbyte-ci/connectors/pipelines/README.md b/airbyte-ci/connectors/pipelines/README.md index d14af9d3f6cd..37ca60014196 100644 --- a/airbyte-ci/connectors/pipelines/README.md +++ b/airbyte-ci/connectors/pipelines/README.md @@ -398,6 +398,7 @@ This command runs the Python tests for a airbyte-ci poetry package. ## Changelog | Version | PR | Description | | ------- | ---------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | +| 2.2.4 | [#31535](https://github.com/airbytehq/airbyte/pull/31535) | Improve gradle caching when building java connectors. | | 2.2.3 | [#31688](https://github.com/airbytehq/airbyte/pull/31688) | Fix failing `CheckBaseImageUse` step when not running on PR. | | 2.2.2 | [#31659](https://github.com/airbytehq/airbyte/pull/31659) | Support builds on x86_64 platform | | 2.2.1 | [#31653](https://github.com/airbytehq/airbyte/pull/31653) | Fix CheckBaseImageIsUsed failing on non certified connectors. | diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/build_image/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/build_image/commands.py index 0cdf469c9d9c..7c24e6766b2c 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/build_image/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/build_image/commands.py @@ -40,6 +40,8 @@ def build(ctx: click.Context, use_host_gradle_dist_tar: bool) -> bool: use_local_cdk=ctx.obj.get("use_local_cdk"), open_report_in_browser=ctx.obj.get("open_report_in_browser"), use_host_gradle_dist_tar=use_host_gradle_dist_tar, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/bump_version/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/bump_version/commands.py index 1da52905c82d..72d69ddba4cf 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/bump_version/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/bump_version/commands.py @@ -41,6 +41,8 @@ def bump_version( ci_git_user=ctx.obj["ci_git_user"], ci_github_access_token=ctx.obj["ci_github_access_token"], open_report_in_browser=False, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py index 1c7dc0615509..188a0b581ef7 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py @@ -11,7 +11,7 @@ import yaml from anyio import Path from asyncer import asyncify -from dagger import Directory +from dagger import Directory, Secret from github import PullRequest from pipelines.airbyte_ci.connectors.reports import ConnectorReport from pipelines.dagger.actions import secrets @@ -57,6 +57,8 @@ def __init__( open_report_in_browser: bool = True, docker_hub_username: Optional[str] = None, docker_hub_password: Optional[str] = None, + s3_build_cache_access_key_id: Optional[str] = None, + s3_build_cache_secret_key: Optional[str] = None, ): """Initialize a connector context. @@ -82,6 +84,8 @@ def __init__( open_report_in_browser (bool, optional): Open HTML report in browser window. Defaults to True. docker_hub_username (Optional[str], optional): Docker Hub username to use to read registries. Defaults to None. docker_hub_password (Optional[str], optional): Docker Hub password to use to read registries. Defaults to None. + s3_build_cache_access_key_id (Optional[str], optional): Gradle S3 Build Cache credentials. Defaults to None. + s3_build_cache_secret_key (Optional[str], optional): Gradle S3 Build Cache credentials. Defaults to None. """ self.pipeline_name = pipeline_name @@ -101,6 +105,8 @@ def __init__( self.open_report_in_browser = open_report_in_browser self.docker_hub_username = docker_hub_username self.docker_hub_password = docker_hub_password + self.s3_build_cache_access_key_id = s3_build_cache_access_key_id + self.s3_build_cache_secret_key = s3_build_cache_secret_key super().__init__( pipeline_name=pipeline_name, @@ -121,6 +127,18 @@ def __init__( open_report_in_browser=open_report_in_browser, ) + @property + def s3_build_cache_access_key_id_secret(self) -> Optional[Secret]: + if self.s3_build_cache_access_key_id: + return self.dagger_client.set_secret("s3_build_cache_access_key_id", self.s3_build_cache_access_key_id) + return None + + @property + def s3_build_cache_secret_key_secret(self) -> Optional[Secret]: + if self.s3_build_cache_access_key_id and self.s3_build_cache_secret_key: + return self.dagger_client.set_secret("s3_build_cache_secret_key", self.s3_build_cache_secret_key) + return None + @property def modified_files(self): return self.connector.modified_files diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/migrate_to_base_image/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/migrate_to_base_image/commands.py index b57afc0e0005..430a9e22a304 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/migrate_to_base_image/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/migrate_to_base_image/commands.py @@ -58,6 +58,8 @@ def migrate_to_base_image( open_report_in_browser=False, docker_hub_username=docker_hub_username, docker_hub_password=docker_hub_password, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/commands.py index a13e4507eeab..adc8be6f101e 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/commands.py @@ -116,6 +116,8 @@ def publish( ci_context=ctx.obj.get("ci_context"), ci_gcs_credentials=ctx.obj["ci_gcs_credentials"], pull_request=ctx.obj.get("pull_request"), + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/context.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/context.py index 8c15ff3cb32c..932c7e3a030f 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/context.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/context.py @@ -39,6 +39,8 @@ def __init__( ci_context: Optional[str] = None, ci_gcs_credentials: str = None, pull_request: PullRequest = None, + s3_build_cache_access_key_id: Optional[str] = None, + s3_build_cache_secret_key: Optional[str] = None, ): self.pre_release = pre_release self.spec_cache_bucket_name = spec_cache_bucket_name @@ -66,6 +68,8 @@ def __init__( should_save_report=True, docker_hub_username=docker_hub_username, docker_hub_password=docker_hub_password, + s3_build_cache_access_key_id=s3_build_cache_access_key_id, + s3_build_cache_secret_key=s3_build_cache_secret_key, ) @property diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/commands.py index 8ed006d81143..041ae7e5a259 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/commands.py @@ -80,6 +80,8 @@ def test( fast_tests_only=fast_tests_only, code_tests_only=code_tests_only, use_local_cdk=ctx.obj.get("use_local_cdk"), + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/upgrade_base_image/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/upgrade_base_image/commands.py index 7c857bf617a4..634ff0ff99d9 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/upgrade_base_image/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/upgrade_base_image/commands.py @@ -50,6 +50,8 @@ def upgrade_base_image(ctx: click.Context, set_if_not_exists: bool, docker_hub_u open_report_in_browser=False, docker_hub_username=docker_hub_username, docker_hub_password=docker_hub_password, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index 924fc8807fcf..b2b383f59ab7 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -26,7 +26,10 @@ class GradleTask(Step, ABC): mount_connector_secrets (bool): Whether to mount connector secrets. """ - DEFAULT_GRADLE_TASK_OPTIONS = ("--no-daemon", "--scan", "--build-cache", "--console=plain") + DEFAULT_GRADLE_TASK_OPTIONS = ("--no-daemon", "--no-watch-fs", "--scan", "--build-cache", "--console=plain") + LOCAL_MAVEN_REPOSITORY_PATH = "/root/.m2" + GRADLE_DEP_CACHE_PATH = "/root/gradle-cache" + GRADLE_HOME_PATH = "/root/.gradle" gradle_task_name: ClassVar[str] bind_to_docker_host: ClassVar[bool] = False @@ -36,11 +39,9 @@ def __init__(self, context: PipelineContext) -> None: super().__init__(context) @property - def connector_java_build_cache(self) -> CacheVolume: - # TODO: remove this once we finish the project to boost source-postgres CI performance. - # We should use a static gradle-cache volume name. - cache_volume_name = hacks.get_gradle_cache_volume_name(self.context, self.logger) - return self.context.dagger_client.cache_volume(cache_volume_name) + def dependency_cache_volume(self) -> CacheVolume: + """This cache volume is for sharing gradle dependencies (jars and poms) across all pipeline runs.""" + return self.context.dagger_client.cache_volume("gradle-dependency-cache") @property def build_include(self) -> List[str]: @@ -56,15 +57,8 @@ def build_include(self) -> List[str]: for dependency_directory in self.context.connector.get_local_dependency_paths(with_test_dependencies=True) ] - def _get_gradle_command(self, task: str) -> List[str]: - return sh_dash_c( - [ - # The gradle command is chained in between a couple of rsyncs which load from- and store to the cache volume. - "(rsync -a --stats /root/gradle-cache/ /root/.gradle || true)", - f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS)} {task}", - "(rsync -a --stats /root/.gradle/ /root/gradle-cache || true)", - ] - ) + def _get_gradle_command(self, task: str, *args) -> str: + return f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS + args)} {task}" async def _run(self) -> StepResult: include = [ @@ -85,7 +79,6 @@ async def _run(self) -> StepResult: "tools/lib/lib.sh", "tools/gradle/codestyle", "pyproject.toml", - "airbyte-cdk/java/airbyte-cdk/**", ] + self.build_include yum_packages_to_install = [ @@ -97,12 +90,17 @@ async def _run(self) -> StepResult: "rsync", # required for gradle cache synchronization. ] - # Define a gradle container which will be cached and re-used for all tasks. - # We should do our best to cram any generic & expensive layers in here. - gradle_container = ( + # Common base container. + gradle_container_base = ( self.dagger_client.container() # Use a linux+jdk base image with long-term support, such as amazoncorretto. .from_(AMAZONCORRETTO_IMAGE) + # Mount the dependency cache volume, but not to $GRADLE_HOME, because gradle doesn't expect concurrent modifications. + .with_mounted_cache(self.GRADLE_DEP_CACHE_PATH, self.dependency_cache_volume, sharing=CacheSharingMode.LOCKED) + # Set GRADLE_HOME to the directory which will be rsync-ed with the gradle cache volume. + .with_env_variable("GRADLE_HOME", self.GRADLE_HOME_PATH) + # Same for GRADLE_USER_HOME. + .with_env_variable("GRADLE_USER_HOME", self.GRADLE_HOME_PATH) # Install a bunch of packages as early as possible. .with_exec( sh_dash_c( @@ -120,36 +118,73 @@ async def _run(self) -> StepResult: ] ) ) - # Set GRADLE_HOME and GRADLE_USER_HOME to the directory which will be rsync-ed with the gradle cache volume. - .with_env_variable("GRADLE_HOME", "/root/.gradle") - .with_env_variable("GRADLE_USER_HOME", "/root/.gradle") # Set RUN_IN_AIRBYTE_CI to tell gradle how to configure its build cache. # This is consumed by settings.gradle in the repo root. .with_env_variable("RUN_IN_AIRBYTE_CI", "1") + # Disable the Ryuk container because it needs privileged docker access which it can't have. + .with_env_variable("TESTCONTAINERS_RYUK_DISABLED", "true") + # Set the current working directory. + .with_workdir("/airbyte") # TODO: remove this once we finish the project to boost source-postgres CI performance. .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) - # Mount the gradle cache volume. - # We deliberately don't mount it at $GRADLE_HOME, instead we load it there and store it from there using rsync. - # This is because the volume is accessed concurrently by all GradleTask instances. - # Hence, why we synchronize the writes by setting the `sharing` parameter to LOCKED. - .with_mounted_cache("/root/gradle-cache", self.connector_java_build_cache, sharing=CacheSharingMode.LOCKED) - # Mount the parts of the repo which interest us in /airbyte. - .with_workdir("/airbyte") + ) + + # Augment the base container with S3 build cache secrets when available. + if self.context.s3_build_cache_access_key_id: + gradle_container_base = gradle_container_base.with_secret_variable( + "S3_BUILD_CACHE_ACCESS_KEY_ID", self.context.s3_build_cache_access_key_id_secret + ) + if self.context.s3_build_cache_secret_key: + gradle_container_base = gradle_container_base.with_secret_variable( + "S3_BUILD_CACHE_SECRET_KEY", self.context.s3_build_cache_secret_key_secret + ) + + # Running a gradle task like "help" with these arguments will trigger updating all dependencies. + # When the cache is cold, this downloads many gigabytes of jars and poms from all over the internet. + warm_dependency_cache_args = ["--write-verification-metadata", "sha256", "--dry-run"] + if self.context.is_local: + # When running locally, this dependency update is slower and less useful than within a CI runner. Skip it. + warm_dependency_cache_args = ["--dry-run"] + + # Mount the whole git repo to update the cache volume contents and build the CDK. + with_whole_git_repo = ( + gradle_container_base + # Mount the whole repo. + .with_directory("/airbyte", self.context.get_repo_dir(".")) + # Update the cache in place by executing a gradle task which will update all dependencies and build the CDK. + .with_exec( + sh_dash_c( + [ + # Ensure that the .m2 directory exists. + f"mkdir -p {self.LOCAL_MAVEN_REPOSITORY_PATH}", + # Load from the cache volume. + f"(rsync -a --stats --mkpath {self.GRADLE_DEP_CACHE_PATH}/ {self.GRADLE_HOME_PATH} || true)", + # Resolve all dependencies and write their checksums to './gradle/verification-metadata.dryrun.xml'. + self._get_gradle_command("help", *warm_dependency_cache_args), + # Build the CDK and publish it to the local maven repository. + self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded"), + # Store to the cache volume. + f"(rsync -a --stats {self.GRADLE_HOME_PATH}/ {self.GRADLE_DEP_CACHE_PATH} || true)", + ] + ) + ) + ) + + # Mount only the code needed to build the connector. + gradle_container = ( + gradle_container_base + # Copy the local maven repository and force evaluation of `with_whole_git_repo` container. + .with_directory(self.LOCAL_MAVEN_REPOSITORY_PATH, await with_whole_git_repo.directory(self.LOCAL_MAVEN_REPOSITORY_PATH)) + # Mount the connector-agnostic whitelisted files in the git repo. .with_mounted_directory("/airbyte", self.context.get_repo_dir(".", include=include)) + # Mount the sources for the connector and its dependencies in the git repo. .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) - # Disable the Ryuk container because it needs privileged docker access that does not work: - .with_env_variable("TESTCONTAINERS_RYUK_DISABLED", "true") - # Run gradle once to populate the container's local maven repository. - # This step is useful also to serve as a basic sanity check and to warm the gradle cache. - # This will download gradle itself, a bunch of poms and jars, compile the gradle plugins, configure tasks, etc. - .with_exec(self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded")) ) # From this point on, we add layers which are task-dependent. if self.mount_connector_secrets: - gradle_container = gradle_container.with_( - await secrets.mounted_connector_secrets(self.context, f"{self.context.connector.code_directory}/secrets") - ) + secrets_dir = f"{self.context.connector.code_directory}/secrets" + gradle_container = gradle_container.with_(await secrets.mounted_connector_secrets(self.context, secrets_dir)) if self.bind_to_docker_host: # If this GradleTask subclass needs docker, then install it and bind it to the existing global docker host container. gradle_container = pipelines.dagger.actions.system.docker.with_bound_docker_host(self.context, gradle_container) @@ -158,5 +193,14 @@ async def _run(self) -> StepResult: # Run the gradle task that we actually care about. connector_task = f":airbyte-integrations:connectors:{self.context.connector.technical_name}:{self.gradle_task_name}" - gradle_container = gradle_container.with_exec(self._get_gradle_command(connector_task)) + gradle_container = gradle_container.with_exec( + sh_dash_c( + [ + # Warm the gradle cache. + f"(rsync -a --stats --mkpath {self.GRADLE_DEP_CACHE_PATH}/ {self.GRADLE_HOME_PATH} || true)", + # Run the gradle task. + self._get_gradle_command(connector_task, f"-Ds3BuildCachePrefix={self.context.connector.technical_name}"), + ] + ) + ) return await self.get_step_result(gradle_container) diff --git a/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py b/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py index 7da72b02ee66..5256351700e8 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py +++ b/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py @@ -173,6 +173,8 @@ def get_modified_files( envvar="GCP_GSM_CREDENTIALS", ) @click.option("--ci-job-key", envvar="CI_JOB_KEY", type=str) +@click.option("--s3-build-cache-access-key-id", envvar="S3_BUILD_CACHE_ACCESS_KEY_ID", type=str) +@click.option("--s3-build-cache-secret-key", envvar="S3_BUILD_CACHE_SECRET_KEY", type=str) @click.option("--show-dagger-logs/--hide-dagger-logs", default=False, type=bool) @click.pass_context @track_command @@ -191,6 +193,8 @@ def airbyte_ci( ci_report_bucket_name: str, ci_gcs_credentials: str, ci_job_key: str, + s3_build_cache_access_key_id: str, + s3_build_cache_secret_key: str, show_dagger_logs: bool, ): # noqa D103 ctx.ensure_object(dict) @@ -209,6 +213,8 @@ def airbyte_ci( ctx.obj["ci_git_user"] = ci_git_user ctx.obj["ci_github_access_token"] = ci_github_access_token ctx.obj["ci_job_key"] = ci_job_key + ctx.obj["s3_build_cache_access_key_id"] = s3_build_cache_access_key_id + ctx.obj["s3_build_cache_secret_key"] = s3_build_cache_secret_key ctx.obj["pipeline_start_timestamp"] = pipeline_start_timestamp ctx.obj["show_dagger_logs"] = show_dagger_logs diff --git a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py index b497832c9181..1d1385403603 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py +++ b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py @@ -6,7 +6,7 @@ from __future__ import annotations import datetime -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING, Callable, Optional from dagger import Container, Secret from pipelines.helpers.utils import get_file_contents, get_secret_host_variable diff --git a/airbyte-ci/connectors/pipelines/pipelines/hacks.py b/airbyte-ci/connectors/pipelines/pipelines/hacks.py index 4cdac0926b1d..1dcbbbdb1647 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/hacks.py +++ b/airbyte-ci/connectors/pipelines/pipelines/hacks.py @@ -108,20 +108,3 @@ def get_cachebuster(context: ConnectorContext, logger: Logger) -> str: ) return str(context.pipeline_start_timestamp) return "0" - - -def get_gradle_cache_volume_name(context: ConnectorContext, logger: Logger) -> str: - """ - This function will return a semi-static gradle cache volume name for connectors in CONNECTORS_WITHOUT_CACHING and a static value for all other connectors. - By semi-static I mean that the gradle cache volume name will change on each pipeline execution but will be the same for all the steps of the pipeline. - This hack is useful to collect unbiased metrics on the CI speed for connectors in CONNECTORS_WITHOUT_CACHING: it guarantees that the gradle cache volume will be empty on each pipeline execution and no remote caching is used. - - Returns: - str: The gradle cache volume name. - """ - if context.connector.technical_name in CONNECTORS_WITHOUT_CACHING: - logger.warning( - f"Getting a fresh gradle cache volume name for {context.connector.technical_name} to not use remote caching. Only used in the context of the CI performance improvements project for {context.connector.technical_name}." - ) - return f"gradle-cache-{context.pipeline_start_timestamp}" - return "gradle-cache" diff --git a/airbyte-ci/connectors/pipelines/pyproject.toml b/airbyte-ci/connectors/pipelines/pyproject.toml index c7499c11b299..4b2ff0ba8ef6 100644 --- a/airbyte-ci/connectors/pipelines/pyproject.toml +++ b/airbyte-ci/connectors/pipelines/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pipelines" -version = "2.2.3" +version = "2.2.4" description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines" authors = ["Airbyte "] diff --git a/build.gradle b/build.gradle index 54a66dd407ff..bee172871803 100644 --- a/build.gradle +++ b/build.gradle @@ -37,7 +37,6 @@ ext { version = System.getenv("VERSION") ?: env.VERSION image_tag = System.getenv("VERSION") ?: 'dev' skipSlowTests = (System.getProperty('skipSlowTests', 'false') != 'false') - } // Pyenv support. try { diff --git a/settings.gradle b/settings.gradle index 903962ab182c..c9b9aaf27b13 100644 --- a/settings.gradle +++ b/settings.gradle @@ -146,13 +146,11 @@ if (isCiServer || isAirbyteCI) { enabled = isAirbyteCI } remote(com.github.burrunan.s3cache.AwsS3BuildCache) { - region = 'us-east-2' - bucket = 'airbyte-buildcache' - prefix = 'cache/' - push = isCiServer - enabled = isCiServer && !isAirbyteCI - // Credentials will be taken from S3_BUILD_CACHE_... environment variables - // anonymous access will be used if environment variables are missing + region = 'us-west-2' // close to dagger runners + bucket = 'ab-ci-cache' + prefix = "${System.getProperty('s3BuildCachePrefix', 'connectors')}-ci-cache/" + push = isAirbyteCI + enabled = System.getenv().containsKey("S3_BUILD_CACHE_ACCESS_KEY_ID") } } }