-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: collect log from provisioning rosa HCP
- Loading branch information
Showing
2 changed files
with
217 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Tekton Task: Rosa HCP | ||
|
||
Please read/update [rosa-hcp tasks docs](../../../docs/qe-available-tasks/rosa.md) from docs folder. |
214 changes: 214 additions & 0 deletions
214
tasks/rosa/hosted-cp/rosa-hcp-provision/0.2/rosa-hcp-provision.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
apiVersion: tekton.dev/v1 | ||
kind: Task | ||
metadata: | ||
name: rosa-hcp-provision | ||
spec: | ||
description: | | ||
The `rosa-hcp-provision` task automates the creation and provisioning of an ephemeral OpenShift cluster using Red Hat OpenShift on AWS (ROSA) with Hosted Control Planes (HCP). | ||
The task takes several parameters, including the OpenShift version, AWS machine type, and cluster name, to configure and deploy the cluster on AWS. | ||
It uses credentials stored in a Kubernetes secret for authentication and configuration of AWS and ROSA. | ||
Once the cluster is provisioned, the task outputs a login command to access the newly created cluster, which can be used in subsequent pipeline steps. | ||
results: | ||
- name: ocp-login-command | ||
description: Command to log in to the newly ephemeral OpenShift cluster. | ||
params: | ||
- name: ocp-version | ||
type: string | ||
description: The version of the OpenShift Container Platform (OCP) to deploy. This will be used to fetch the corresponding HCP version for deployment. | ||
- name: cluster-name | ||
type: string | ||
description: The unique name of the OpenShift cluster to be created. | ||
- name: machine-type | ||
type: string | ||
description: The AWS EC2 instance type to be used for the worker nodes of the OpenShift cluster (e.g., m5.xlarge). | ||
- name: replicas | ||
type: string | ||
description: The number of worker nodes to provision in the cluster. Defaults to 3 worker nodes. | ||
default: '3' | ||
- name: konflux-test-infra-secret | ||
type: string | ||
description: The name of the Kubernetes secret that contains AWS and ROSA configuration credentials needed for cluster provisioning. | ||
- name: cloud-credential-key | ||
type: string | ||
description: The key within the secret where AWS ROSA configurations (e.g., credentials, roles) are stored. | ||
- name: oci-container | ||
type: string | ||
description: The ORAS container registry URI where artifacts will be stored. | ||
volumes: | ||
- name: konflux-test-infra-volume | ||
secret: | ||
secretName: "$(params.konflux-test-infra-secret)" | ||
steps: | ||
- name: provision | ||
image: quay.io/konflux-qe-incubator/konflux-qe-tools:latest | ||
onError: continue | ||
volumeMounts: | ||
- name: konflux-test-infra-volume | ||
mountPath: /usr/local/konflux-test-infra | ||
workingDir: /workspace/cluster-provision | ||
env: | ||
- name: CLUSTER_NAME | ||
value: "$(params.cluster-name)" | ||
- name: OCP_VERSION | ||
value: "$(params.ocp-version)" | ||
- name: MACHINE_TYPE | ||
value: "$(params.machine-type)" | ||
script: | | ||
set -o errexit | ||
set -o nounset | ||
set -o pipefail | ||
export ROSA_TOKEN AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY BILLING_ACCOUNT_ID AWS_OIDC_CONFIG_ID OPERATOR_ROLES_PREFIX \ | ||
SUBNET_IDS INSTALL_ROLE_ARN SUPPORT_ROLE_ARN WORKER_ROLE_ARN REGION | ||
ROSA_TOKEN=$(jq -r '.aws["rosa-hcp"]["rosa-token"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
AWS_ACCESS_KEY_ID=$(jq -r '.aws["access-key-id"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
AWS_SECRET_ACCESS_KEY=$(jq -r '.aws["access-key-secret"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
BILLING_ACCOUNT_ID=$(jq -r '.aws["aws-account-id"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
AWS_OIDC_CONFIG_ID=$(jq -r '.aws["rosa-hcp"]["aws-oidc-config-id"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
OPERATOR_ROLES_PREFIX=$(jq -r '.aws["rosa-hcp"]["operator-roles-prefix"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
SUBNET_IDS=$(jq -r '.aws["rosa-hcp"]["subnets-ids"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
INSTALL_ROLE_ARN=$(jq -r '.aws["rosa-hcp"]["install-role-arn"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
SUPPORT_ROLE_ARN=$(jq -r '.aws["rosa-hcp"]["support-role-arn"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
WORKER_ROLE_ARN=$(jq -r '.aws["rosa-hcp"]["worker-role-arn"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
REGION=$(jq -r '.aws["region"]' /usr/local/konflux-test-infra/$(params.cloud-credential-key)) | ||
main() { | ||
config_aws_creds() { | ||
printf "INFO: Configure AWS Credentials...\n" | ||
aws configure set aws_access_key_id "$AWS_ACCESS_KEY_ID" | ||
aws configure set aws_secret_access_key "$AWS_SECRET_ACCESS_KEY" | ||
aws configure set region "$REGION" | ||
} | ||
print_debug_info() { | ||
printf "INFO: Print debug info......\n" | ||
rosa --region "$REGION" describe cluster --cluster="$CLUSTER_NAME" | ||
} | ||
# Even the cluster is shown ready on ocm side, and the cluster operators are available, some of the cluster operators are still progressing. | ||
check_clusteroperators() { | ||
local STATUS_LOG="co_status.log" | ||
local max_attempts=10 | ||
local attempt | ||
echo "[INFO] Checking cluster operators' status..." | ||
# retrying to get clusteroperator. Makes sense in case master nodes are not ready | ||
for attempt in $(seq 1 $max_attempts); do | ||
echo "[INFO] Attempt $attempt/$max_attempts" | ||
if kubectl get clusteroperators -A > "$STATUS_LOG" 2>&1; then | ||
cat "$STATUS_LOG" | ||
echo "[INFO] Cluster operators are accessible." | ||
break | ||
fi | ||
echo "[INFO] Attempt $attempt failed, retrying in 10 seconds..." | ||
sleep 10 | ||
done | ||
if [ $attempt -eq $max_attempts ]; then | ||
echo "[ERROR] All attempts to access cluster operators failed. Check $STATUS_LOG for details." | ||
cat "$STATUS_LOG" | ||
return 1 | ||
fi | ||
echo "[INFO] Waiting for cluster operators to be in 'Progressing=false' state..." | ||
kubectl wait clusteroperators --all --for=condition=Progressing=false --timeout=60m > "$STATUS_LOG" 2>&1 | ||
cat "$STATUS_LOG" | ||
} | ||
get_hcp_full_version() { | ||
rosa_output=$(rosa list version --channel-group stable --region "$REGION" --hosted-cp -o json) | ||
raw_id=$(echo "$rosa_output" | jq -r "[.[].raw_id | select(startswith(\"$OCP_VERSION\"))] | max") | ||
HCP_FULL_VERSION="$raw_id" | ||
if [ -z "$HCP_FULL_VERSION" ]; then | ||
echo "Failed to get the HCP full version of $OCP_VERSION" >&2 | ||
exit 1 | ||
fi | ||
} | ||
deploy_cluster() { | ||
printf "INFO: Log in to your Red Hat account...\n" | ||
config_aws_creds | ||
rosa login --token="$ROSA_TOKEN" | ||
printf "INFO: Create ROSA with HCP cluster...\n" | ||
get_hcp_full_version | ||
rosa create cluster --cluster-name "$CLUSTER_NAME" \ | ||
--sts --mode=auto --oidc-config-id "$AWS_OIDC_CONFIG_ID" \ | ||
--operator-roles-prefix "$OPERATOR_ROLES_PREFIX" --region "$REGION" --version "$HCP_FULL_VERSION" \ | ||
--role-arn "$INSTALL_ROLE_ARN" \ | ||
--support-role-arn "$SUPPORT_ROLE_ARN" \ | ||
--worker-iam-role "$WORKER_ROLE_ARN" \ | ||
--compute-machine-type "$MACHINE_TYPE" \ | ||
--subnet-ids="$SUBNET_IDS" \ | ||
--billing-account "$BILLING_ACCOUNT_ID" \ | ||
--replicas $(params.replicas) \ | ||
--tags konflux-ci:true,creation-date:$(date -u +"%Y-%m-%d"),cluster-type:rosa-hcp \ | ||
--hosted-cp -y | ||
printf "INFO: Track the progress of the cluster creation...\n" | ||
rosa logs install --cluster="$CLUSTER_NAME" --region "$REGION" --watch | ||
printf "INFO: ROSA with HCP cluster is ready, create a cluster admin account for accessing the cluster\n" | ||
admin_output="$(rosa create admin --region "$REGION" --cluster="$CLUSTER_NAME")" | ||
# Get the admin account credentials and API server URL | ||
admin_user="$(echo "$admin_output" | grep -oP '(?<=--username ).*(?= --password)')" | ||
admin_pass="$(echo "$admin_output" | grep -oP '(?<=--password ).*')" | ||
api_url="$(echo "$admin_output" | grep -oP '(?<=oc login ).*(?= --username)')" | ||
printf "INFO: Storing login command...\n" | ||
echo "oc login $api_url --username $admin_user --password $admin_pass" > $(results.ocp-login-command.path) | ||
# Use the admin account to login to the cluster in a loop until the account is active. | ||
printf "INFO: Check if it's able to login to OCP cluster...\n" | ||
max_retries=10 | ||
retries=0 | ||
while ! oc login "$api_url" --username "$admin_user" --password "$admin_pass" >/dev/null 2>&1; do | ||
if [ "$retries" -eq "$max_retries" ]; then | ||
echo "ERROR: Failed to login the cluster." >&2 | ||
print_debug_info | ||
exit 1 | ||
fi | ||
sleep 60 | ||
retries=$((retries + 1)) | ||
echo "Retried $retries times..." | ||
done | ||
#Workaround: Check if apiserver is ready by calling kubectl get nodes | ||
printf "INFO: Check if apiserver is ready...\n" | ||
if ! timeout 300s bash -c "while ! kubectl get nodes >/dev/null 2>/dev/null; do printf '.'; sleep 10; done"; then | ||
echo "ERROR: API server is not ready" >&2 | ||
exit 1 | ||
fi | ||
check_clusteroperators | ||
} | ||
deploy_cluster | ||
} | ||
main 2>&1 | tee cluster-provision.log | ||
- name: secure-push-oci | ||
ref: | ||
resolver: git | ||
params: | ||
- name: url | ||
value: https://github.com/konflux-ci/tekton-integration-catalog.git | ||
- name: revision | ||
value: main | ||
- name: pathInRepo | ||
value: stepactions/secure-push-oci/0.1/secure-push-oci.yaml | ||
params: | ||
- name: workdir-path | ||
value: /workspace/cluster-provision | ||
- name: oci-ref | ||
value: $(params.oci-container) | ||
- name: credentials-volume-name | ||
value: konflux-test-infra-volume | ||
- name: fail-if-any-step-failed | ||
ref: | ||
resolver: git | ||
params: | ||
- name: url | ||
value: https://github.com/konflux-ci/tekton-integration-catalog.git | ||
- name: revision | ||
value: main | ||
- name: pathInRepo | ||
value: stepactions/fail-if-any-step-failed/0.1/fail-if-any-step-failed.yaml |