Skip to content

Commit

Permalink
PENG-2372 patch the demo cluster to install the agents from the Snap …
Browse files Browse the repository at this point in the history
…Store
  • Loading branch information
matheushent committed Sep 27, 2024
1 parent 4bcf7b4 commit a3f10d1
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 57 deletions.
38 changes: 3 additions & 35 deletions democluster/user-data
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,9 @@ packages:
- mpich
- python3-venv
- apptainer
- snap:
- [vantage-agent, --channel=stable]
- [jobbergate-agent, --channel=stable]
bootcmd:
- mkdir /run/packer_backup
- mkdir /run/packer_backup/etc
Expand Down Expand Up @@ -390,38 +393,3 @@ runcmd:
EOF

- systemctl daemon-reload

# create agents' venv
- /usr/bin/python3 -m venv /srv/jobbergate-agent-venv
- /srv/jobbergate-agent-venv/bin/pip install -U pip
- /srv/jobbergate-agent-venv/bin/pip install jobbergate-agent
- |
cat <<EOF > /srv/jobbergate-agent-venv/.env
JOBBERGATE_AGENT_X_SLURM_USER_NAME=root
JOBBERGATE_AGENT_BASE_API_URL=https://apis.@ENVIRONMENT@vantagehpc.io
JOBBERGATE_AGENT_OIDC_DOMAIN=auth.@ENVIRONMENT@vantagehpc.io/realms/vantage
JOBBERGATE_AGENT_OIDC_AUDIENCE=https://apis.vantagehpc.io
JOBBERGATE_AGENT_OIDC_CLIENT_ID=@CLIENT_ID@
JOBBERGATE_AGENT_OIDC_CLIENT_SECRET=@CLIENT_SECRET@
JOBBERGATE_AGENT_TASK_JOBS_INTERVAL_SECONDS=30
JOBBERGATE_AGENT_TASK_SELF_UPDATE_INTERVAL_SECONDS=30
EOF
chown root:root /srv/jobbergate-agent-venv/.env
chmod 0644 /srv/jobbergate-agent-venv/.env

- /usr/bin/python3 -m venv /srv/vantage-agent-venv
- /srv/vantage-agent-venv/bin/pip install -U pip
- /srv/vantage-agent-venv/bin/pip install vantage-agent
- |
cat <<EOF > /srv/vantage-agent-venv/.env
VANTAGE_AGENT_BASE_API_URL=https://apis.@ENVIRONMENT@vantagehpc.io
VANTAGE_AGENT_OIDC_DOMAIN=auth.@ENVIRONMENT@vantagehpc.io/realms/vantage
VANTAGE_AGENT_OIDC_CLIENT_ID=@CLIENT_ID@
VANTAGE_AGENT_OIDC_CLIENT_SECRET=@CLIENT_SECRET@
VANTAGE_AGENT_TASK_JOBS_INTERVAL_SECONDS=30
VANTAGE_AGENT_TASK_SELF_UPDATE_INTERVAL_SECONDS=30
EOF
chown root:root /srv/vantage-agent-venv/.env
chmod 0644 /srv/vantage-agent-venv/.env

- systemctl daemon-reload
39 changes: 17 additions & 22 deletions public-scripts/deploy-democluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@ launch_instance () {

# Set the environment to the empty string if not supplied
if [ -z $ENV ]; then
ENVIRONMENT=""
BASE_API_URL="https://apis.vantagehpc.io"
OIDC_DOMAIN="auth.vantagehpc.io/realms/vantage"
else
ENVIRONMENT="${ENV}."
BASE_API_URL="https://apis.${ENV}.vantagehpc.io"
OIDC_DOMAIN="auth.${ENV}.vantagehpc.io/realms/vantage"
fi

# Create the cloud-init file and launch the demo cluster instance.
Expand All @@ -91,34 +93,27 @@ runcmd:
REAL_MEMORY=\$(free -m | grep -oP '\\d+' | head -n 1)
sed -i "s|@REAL_MEMORY@|\$REAL_MEMORY|g" /etc/slurm/slurm.conf
- |
sed -i "s|@CLIENT_ID@|$CLIENT_ID|g" /srv/jobbergate-agent-venv/.env
sed -i "s|@CLIENT_SECRET@|$CLIENT_SECRET|g" /srv/jobbergate-agent-venv/.env
sed -i "s|@ENVIRONMENT@|$ENVIRONMENT|g" /srv/jobbergate-agent-venv/.env
- |
sed -i "s|@CLIENT_ID@|$CLIENT_ID|g" /srv/vantage-agent-venv/.env
sed -i "s|@CLIENT_SECRET@|$CLIENT_SECRET|g" /srv/vantage-agent-venv/.env
sed -i "s|@ENVIRONMENT@|$ENVIRONMENT|g" /srv/vantage-agent-venv/.env
- systemctl start slurmrestd
- systemctl restart slurmdbd
- systemctl restart slurmd
- sleep 30
- systemctl restart slurmctld
- scontrol update NodeName=\$(hostname) State=RESUME
- systemctl start jobbergate-agent
- systemctl start vantage-agent
- snap set vantage-agent base-api-url=$BASE_API_URL
- snap set vantage-agent oidc-client-id=$CLIENT_ID
- snap set vantage-agent oidc-client-secret=$CLIENT_SECRET
- snap set vantage-agent task-jobs-interval-seconds=30
- snap set vantage-agent task-self-update-interval-seconds=30
- snap set jobbergate-agent base-api-url=$BASE_API_URL
- snap set jobbergate-agent oidc-client-id=$CLIENT_ID
- snap set jobbergate-agent oidc-client-secret=$CLIENT_SECRET
- snap set jobbergate-agent task-jobs-interval-seconds=30
- snap set jobbergate-agent task-self-update-interval-seconds=30
- snap set jobbergate-agent x-slurm-user-name=root
- snap start vantage-agent.start
- snap start jobbergate-agent.start
EOF

if ! [ -z "${JG_VERSION}" ]; then
echo " - systemctl stop jobbergate-agent" >> /tmp/cloud-init.yaml
echo " - /srv/jobbergate-agent-venv/bin/pip install -U jobbergate-agent==$JG_VERSION" >> /tmp/cloud-init.yaml
echo " - systemctl start jobbergate-agent" >> /tmp/cloud-init.yaml
fi
if ! [ -z "${VTG_VERSION}" ]; then
echo " - systemctl stop vantage-agent" >> /tmp/cloud-init.yaml
echo " - /srv/vantage-agent-venv/bin/pip install -U vantage-agent==$VTG_VERSION" >> /tmp/cloud-init.yaml
echo " - systemctl start vantage-agent" >> /tmp/cloud-init.yaml
fi
mkdir -p $HOME/democluster/tmp

cat /tmp/cloud-init.yaml | multipass launch -c$(nproc) \
Expand Down

0 comments on commit a3f10d1

Please sign in to comment.