diff --git a/.github/operator-upgrade/action.yaml b/.github/operator-upgrade/action.yaml new file mode 100644 index 00000000..9038373a --- /dev/null +++ b/.github/operator-upgrade/action.yaml @@ -0,0 +1,20 @@ +name: Operator Upgrade Test +description: Runs operator upgrade scenario +runs: + using: composite + steps: + - name: Checkout source + uses: actions/checkout@v3 + + - name: Install all tools + uses: ./.github/tools-cache + + - uses: ./.github/compute-version + id: version + + - name: Run Operator Upgrade + shell: bash + run: | + ./tests/run-e2e.sh --ci + env: + VERSION: ${{ steps.version.outputs.version }} diff --git a/.github/workflows/pr-checks.yaml b/.github/workflows/pr-checks.yaml index bb892dbd..d697e66a 100644 --- a/.github/workflows/pr-checks.yaml +++ b/.github/workflows/pr-checks.yaml @@ -125,8 +125,64 @@ jobs: version: ${{ steps.version.outputs.version }} additional_tags: ${{ steps.additional_tags.outputs.result }} + operator-upgrade: + needs: [build-images,bundle] + env: + KIND_VERSION: "0.15.0" + GO111MODULE: "on" + OPERATOR_IMAGE: "quay.io/sustainable_computing_io/kepler-operator" + KUBECONFIG: /tmp/.kube/config + KIND_WORKER_NODES: 2 + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + go-version-file: go.mod + + - name: Install Go + uses: actions/setup-go@main + with: + go-version-file: go.mod + + - name: Install all tools + uses: ./.github/tools-cache + + - name: use kepler action for kind cluster build + uses: sustainable-computing-io/kepler-action@v0.0.5 + with: + ebpfprovider: libbpf + cluster_provider: kind + env: + PROMETHEUS_ENABLE: "false" + + - name: Ensure cluster is able to run OLM bundles + run: make cluster-prereqs + + - name: Run operator upgrade test + uses: ./.github/operator-upgrade + + - name: Capture cluster state + if: always() + shell: bash + run: | + # Capture apiserver state + # TODO: enable this when we have oc installed as part of `make tools` + # oc adm inspect node --dest-dir cluster-state || true + # oc adm inspect -A statefulset --dest-dir cluster-state || true + # oc adm inspect -A deployment --dest-dir cluster-state || true + # oc adm inspect -A ns --dest-dir cluster-state || true + cp -r tmp/e2e cluster-state/ || true + + - name: Archive production artifacts + if: always() + uses: actions/upload-artifact@v3 + with: + name: cluster-state + path: cluster-state + e2e: - needs: [docs, golangci, fmt, vulnerability_detect, escapes_detect] + needs: [operator-upgrade] env: KIND_VERSION: "0.15.0" GO111MODULE: "on" @@ -172,7 +228,7 @@ jobs: - name: Run e2e tests run: | - ./tests/run-e2e.sh --ci + ./tests/run-e2e.sh --ci --no-upgrade env: VERSION: ${{ steps.version.outputs.version }} diff --git a/tests/run-e2e.sh b/tests/run-e2e.sh index 1fa7bcab..f6ecc5fa 100755 --- a/tests/run-e2e.sh +++ b/tests/run-e2e.sh @@ -13,6 +13,7 @@ declare -r OPERATOR="kepler-operator" declare -r OLM_CATALOG="kepler-operator-catalog" declare -r VERSION=${VERSION:-"0.0.0-e2e"} declare -r OPERATOR_DEPLOY_YAML="config/manager/manager.yaml" +declare -r KEPLER_CR="config/samples/kepler.system_v1alpha1_kepler.yaml" declare -r OPERATOR_CSV="bundle/manifests/$OPERATOR.clusterserviceversion.yaml" declare -r OPERATOR_DEPLOY_NAME="kepler-operator-controller" declare -r OPERATOR_RELEASED_BUNDLE="quay.io/sustainable_computing_io/$OPERATOR-bundle" @@ -96,6 +97,12 @@ gather_olm() { run_bundle_upgrade() { header "Running Bundle Upgrade" + prune_images_if_ci + kind_load_images + delete_olm_subscription || true + build_bundle + push_bundle + local -i ret=0 local replaced_version="" @@ -113,6 +120,11 @@ run_bundle_upgrade() { return $ret } + info "Creating a new Kepler CR" + run kubectl apply -f "$KEPLER_CR" + + wait_for_kepler || return 1 + info "Running Upgrade to new bundle" run operator-sdk run bundle-upgrade "$BUNDLE_IMG" \ --namespace "$OPERATORS_NS" --use-http || { @@ -121,8 +133,37 @@ run_bundle_upgrade() { gather_olm || true fail "Running Bundle Upgrade failed" return $ret + } + wait_for_operator "$OPERATORS_NS" + wait_for_kepler || return 1 + wait_until 10 10 "kepler images to be up to date" check_images + + return 0 +} +wait_for_kepler() { + header "Waiting for Kepler to be ready" + wait_until 10 10 "kepler to be available" condition_check "True" oc get kepler kepler \ + -o jsonpath="{.status.exporter.conditions[?(@.type=='Available')].status}" || { + fail "Kepler is not ready" + return 1 + } + ok "Kepler is ready" + return 0 +} + +check_images() { + header "Checking Kepler Images" + local actual_image="" + local expected_image="" + actual_image=$(kubectl get keplerinternals -o \ + jsonpath="{.items[*].spec.exporter.deployment.image}") + expected_image=$(yq -r .spec.relatedImages[].image "$OPERATOR_CSV") + [[ "$actual_image" != "$expected_image" ]] && { + fail "Kepler images are not up to date" + return 1 } + ok "Kepler images are up to date" return 0 } @@ -371,12 +412,7 @@ deploy_operator() { delete_olm_subscription || true build_bundle push_bundle - - if $NO_UPGRADE; then - run_bundle - else - run_bundle_upgrade - fi + run_bundle wait_for_operator "$OPERATORS_NS" prune_images_if_ci } @@ -427,8 +463,7 @@ reject_invalid() { { # NOTE: || true ignores pipefail so that non-zero exit code will not be reported # when kubectl apply fails (as expected) - sed -e "s|name: kepler$|name: $invalid_kepler|g" \ - config/samples/kepler.system_v1alpha1_kepler.yaml | + sed -e "s|name: kepler$|name: $invalid_kepler|g" "$KEPLER_CR" | kubectl apply -f- 2>&1 || true } | tee /dev/stderr | @@ -475,6 +510,21 @@ print_config() { line 50 } +deploy_and_run_e2e() { + if $NO_DEPLOY; then + restart_operator || die "restarting operator failed 🤕" + else + deploy_operator + fi + + local -i ret=0 + run_e2e "$@" || ret=$? + + info "e2e test - exit code: $ret" + line 50 heavy + return $ret +} + main() { export PATH="$LOCAL_BIN:$PATH" parse_args "$@" || die "parse args failed" @@ -491,19 +541,12 @@ main() { init_logs_dir print_config - if $NO_DEPLOY; then - restart_operator || die "restarting operator failed 🤕" + if $NO_UPGRADE; then + deploy_and_run_e2e "$@" || return 1 else - deploy_operator + run_bundle_upgrade || return 1 fi - - local -i ret=0 - run_e2e "$@" || ret=$? - - info "e2e test - exit code: $ret" - line 50 heavy - - return $ret + return 0 } main "$@"