-
Notifications
You must be signed in to change notification settings - Fork 149
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fork accelerator builds to use the EL + kickstart build (#2398)
* Fork accelerator builds to use the EL + kickstart build More native than the derivative images we were buliding * Update forked path Forgot to save this file the first time * Cleanup preview image builds, update nvidia version ref * Remove old preview builds using derivative images * Adjust EL builds for Rocky to use "latest" instead of "550" * Update workflow name Missed a reference to 550 * Update image descriptions Not LTS, but just latest * Fix repo status Deprecated, not used
- Loading branch information
Showing
5 changed files
with
323 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
261 changes: 261 additions & 0 deletions
261
...kflows/image_build/enterprise_linux/kickstart/rocky_linux_8_optimized_gcp_accelerator.cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,261 @@ | ||
# rocky-linux-8-optimized-gcp-options.cfg | ||
|
||
### Anaconda installer configuration. | ||
# Install in text mode. | ||
text --non-interactive | ||
url --url="https://dl.rockylinux.org/pub/sig/8/cloud/x86_64/cloud-kernel" | ||
repo --name=BaseOS --baseurl="https://dl.rockylinux.org/pub/rocky/8/BaseOS/x86_64/os" --excludepkgs="kernel,kernel-core" | ||
repo --name=AppStream --baseurl="https://dl.rockylinux.org/pub/rocky/8/AppStream/x86_64/os" | ||
repo --name=PowerTools --baseurl="https://dl.rockylinux.org/pub/rocky/8/PowerTools/x86_64/os" | ||
poweroff | ||
|
||
# Network configuration | ||
network --bootproto=dhcp --device=link | ||
|
||
### Installed system configuration. | ||
firewall --enabled | ||
services --enabled=sshd,rngd --disabled=sshd-keygen@ | ||
skipx | ||
timezone --utc UTC --ntpservers=metadata.google.internal | ||
rootpw --iscrypted --lock * | ||
firstboot --disabled | ||
user --name=gce --lock | ||
|
||
### Disk configuration. | ||
# Disk configuration is done by including a separate file with disk configuration, otherwise anaconda will try to validate that the disk exists before we configure udev rules. | ||
%pre --interpreter=/usr/bin/bash | ||
cp /run/install/isodir/65-gce-disk-naming.rules /etc/udev/rules.d/ | ||
cp /run/install/isodir/google_nvme_id /usr/lib/udev/ | ||
chmod +x /usr/lib/udev/google_nvme_id | ||
# Wait for coldplug events from boot to settle, or we won't generate new events for the reload/trigger | ||
udevadm settle | ||
udevadm control --reload | ||
udevadm trigger --settle | ||
tee -a /tmp/disk-config << EOM | ||
# build_installer.py will replace with the id of the install disk to avoid race conditions | ||
bootloader --boot-drive=/dev/disk/by-id/google-el-install-disk --timeout=0 --append="net.ifnames=0 biosdevname=0 scsi_mod.use_blk_mq=Y" | ||
# EFI partitioning, creates a GPT partitioned disk. | ||
clearpart --drives=/dev/disk/by-id/google-el-install-disk --all | ||
part /boot/efi --size=200 --fstype=efi --ondrive=/dev/disk/by-id/google-el-install-disk | ||
part / --size=100 --grow --ondrive=/dev/disk/by-id/google-el-install-disk --label=root --fstype=xfs | ||
EOM | ||
%end | ||
%include /tmp/disk-config | ||
|
||
# packages.cfg | ||
# Contains a list of packages to be installed, or not, on all flavors. | ||
# The %package command begins the package selection section of kickstart. | ||
# Packages can be specified by group, or package name. @Base and @Core are | ||
# always selected by default so they do not need to be specified. | ||
|
||
%packages | ||
acpid | ||
dhcp-client | ||
dnf-automatic | ||
net-tools | ||
openssh-server | ||
python3 | ||
rng-tools | ||
tar | ||
vim | ||
-subscription-manager | ||
-alsa-utils | ||
-b43-fwcutter | ||
-dmraid | ||
-eject | ||
-gpm | ||
-irqbalance | ||
-microcode_ctl | ||
-smartmontools | ||
-aic94xx-firmware | ||
-atmel-firmware | ||
-b43-openfwwf | ||
-bfa-firmware | ||
-ipw2100-firmware | ||
-ipw2200-firmware | ||
-ivtv-firmware | ||
-iwl100-firmware | ||
-iwl1000-firmware | ||
-iwl3945-firmware | ||
-iwl4965-firmware | ||
-iwl5000-firmware | ||
-iwl5150-firmware | ||
-iwl6000-firmware | ||
-iwl6000g2a-firmware | ||
-iwl6050-firmware | ||
-kernel-firmware | ||
-libertas-usb8388-firmware | ||
-ql2100-firmware | ||
-ql2200-firmware | ||
-ql23xx-firmware | ||
-ql2400-firmware | ||
-ql2500-firmware | ||
-rt61pci-firmware | ||
-rt73usb-firmware | ||
-xorg-x11-drv-ati-firmware | ||
-zd1211-firmware | ||
%end | ||
|
||
%post | ||
tee -a /etc/yum.repos.d/google-cloud.repo << EOM | ||
[google-compute-engine] | ||
name=Google Compute Engine | ||
baseurl=https://packages.cloud.google.com/yum/repos/google-compute-engine-el8-x86_64-stable | ||
enabled=1 | ||
gpgcheck=1 | ||
repo_gpgcheck=0 | ||
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg | ||
https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg | ||
EOM | ||
tee -a /etc/yum.repos.d/google-cloud.repo << EOM | ||
[google-cloud-sdk] | ||
name=Google Cloud SDK | ||
baseurl=https://packages.cloud.google.com/yum/repos/cloud-sdk-el8-x86_64 | ||
enabled=1 | ||
gpgcheck=1 | ||
repo_gpgcheck=0 | ||
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg | ||
https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg | ||
EOM | ||
|
||
# Rocky Linux Cloud Kernel repo. | ||
tee -a /etc/yum.repos.d/Rocky-CloudKernel.repo << EOM | ||
[cloud-kernel] | ||
name=Rocky Linux 8 - Cloud Kernel | ||
baseurl=https://dl.rockylinux.org/pub/sig/8/cloud/x86_64/cloud-kernel | ||
enabled=1 | ||
gpgcheck=1 | ||
gpgkey=https://dl.rockylinux.org/pub/sig/8/cloud/x86_64/cloud-kernel/RPM-GPG-KEY-Rocky-SIG-Cloud | ||
priority=-1 | ||
EOM | ||
tee -a /etc/yum.repos.d/Rocky-CloudKernel.repo << EOM | ||
[cloud-kernel-source] | ||
name=Rocky Linux 8 - Cloud Kernel Source | ||
baseurl=https://dl.rockylinux.org/pub/sig/8/cloud/source/cloud-kernel | ||
enabled=0 | ||
gpgcheck=1 | ||
gpgkey=https://dl.rockylinux.org/pub/sig/8/cloud/x86_64/cloud-kernel/RPM-GPG-KEY-Rocky-SIG-Cloud | ||
priority=-1 | ||
EOM | ||
# Be sure we don't get kernels from the BaseOS repo | ||
tee -a /etc/yum.repos.d/Rocky-BaseOS.repo << EOM | ||
exclude=kernel* | ||
EOM | ||
%end | ||
# Google Compute Engine kickstart config for Enterprise Linux 8. | ||
%onerror | ||
echo "Build Failed!" > /dev/ttyS0 | ||
shutdown -h now | ||
%end | ||
|
||
%post --erroronfail | ||
set -x | ||
exec &> /dev/ttyS0 | ||
# Delete the dummy user account. | ||
userdel -r gce | ||
|
||
# Import all RPM GPG keys. | ||
curl -o /etc/pki/rpm-gpg/google-rpm-package-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg | ||
curl -o /etc/pki/rpm-gpg/google-key.gpg https://packages.cloud.google.com/yum/doc/yum-key.gpg | ||
curl -o /etc/pki/rpm-gpg/RPM-GPG-KEY-Rocky-SIG-Cloud https://dl.rockylinux.org/pub/sig/8/cloud/x86_64/cloud-kernel/RPM-GPG-KEY-Rocky-SIG-Cloud | ||
rpm --import /etc/pki/rpm-gpg/* | ||
|
||
# Configure the network for GCE. | ||
# Given that GCE users typically control the firewall at the network API level, | ||
# we want to leave the standard Linux firewall setup enabled but all-open. | ||
firewall-offline-cmd --set-default-zone=trusted | ||
|
||
cat >>/etc/dhcp/dhclient.conf <<EOL | ||
# Set the dhclient retry interval to 10 seconds instead of 5 minutes. | ||
retry 10; | ||
EOL | ||
|
||
# Set google-compute-engine config for EL8. | ||
cat >>/etc/default/instance_configs.cfg.distro << EOL | ||
# Disable boto plugin setup. | ||
[InstanceSetup] | ||
set_boto_config = false | ||
EOL | ||
|
||
# Install GCE guest packages. | ||
dnf install -y google-compute-engine google-osconfig-agent gce-disk-expand | ||
|
||
# Install the Cloud SDK package. | ||
dnf install -y google-cloud-cli | ||
|
||
# Install Accelerator components: nvidia and mellanox drivers | ||
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo | ||
dnf install -y gcc make kernel-devel kernel | ||
test -f /var/tmp/kernel-upgrade-done || sh -c 'touch /var/tmp/kernel-upgrade-done' | ||
curl -L -o nvidia.run https://us.download.nvidia.com/tesla/550.90.12/NVIDIA-Linux-x86_64-550.90.12.run | ||
chmod +x ./nvidia.run | ||
# DKMS - not suitable for prod | ||
./nvidia.run -s --kernel-source-path=/usr/src/kernels/$(uname -r)/ | ||
dnf install -y createrepo gdb-headless libtool autoconf rpm-build kernel-rpm-macros patch automake wget lsof tk gcc-gfortran tcl pciutils | ||
wget https://content.mellanox.com/ofed/MLNX_OFED-23.10-3.2.2.0/MLNX_OFED_LINUX-23.10-3.2.2.0-rhel8.9-x86_64.tgz | ||
tar xf MLNX_OFED_LINUX-23.10-3.2.2.0-rhel8.9-x86_64.tgz | ||
cd MLNX_OFED_LINUX-23.10-3.2.2.0-rhel8.9-x86_64 | ||
./mlnxofedinstall --guest --force --skip-distro-check --add-kernel-support | ||
cd .. | ||
rm -rf MLNX_OFED_LINUX-23.10-3.2.2.0-rhel8.9-x86_64 MLNX_OFED_LINUX-23.10-3.2.2.0-rhel8.9-x86_64.tgz | ||
|
||
# Send /root/anaconda-ks.cfg to our logs. | ||
cp /run/install/ks.cfg /tmp/anaconda-ks.cfg | ||
|
||
# Remove files which shouldn't make it into the image. Its possible these files | ||
# will not exist. | ||
rm -f /etc/boto.cfg /etc/udev/rules.d/70-persistent-net.rules | ||
|
||
# Remove ens4 config from installer. | ||
rm -f /etc/sysconfig/network-scripts/ifcfg-ens4 | ||
|
||
# Disable password authentication by default. | ||
sed -i -e '/^PasswordAuthentication /s/ yes$/ no/' /etc/ssh/sshd_config | ||
|
||
# Set ServerAliveInterval and ClientAliveInterval to prevent SSH | ||
# disconnections. The pattern match is tuned to each source config file. | ||
# The $'...' quoting syntax tells the shell to expand escape characters. | ||
sed -i -e $'/^\tServerAliveInterval/d' /etc/ssh/ssh_config | ||
sed -i -e $'/^Host \\*$/a \\\tServerAliveInterval 420' /etc/ssh/ssh_config | ||
sed -i -e '/ClientAliveInterval/s/^.*/ClientAliveInterval 420/' /etc/ssh/sshd_config | ||
|
||
# Disable root login via SSH by default. | ||
sed -i -e '/PermitRootLogin yes/s/^.*/PermitRootLogin no/' /etc/ssh/sshd_config | ||
|
||
# Update all packages. | ||
dnf -y update | ||
|
||
# Make changes to dnf automatic.conf | ||
# Apply updates for security (RHEL) by default. NOTE this will not work in CentOS. | ||
sed -i 's/upgrade_type =.*/upgrade_type = security/' /etc/dnf/automatic.conf | ||
sed -i 's/apply_updates =.*/apply_updates = yes/' /etc/dnf/automatic.conf | ||
# Enable the DNF automatic timer service. | ||
systemctl enable dnf-automatic.timer | ||
|
||
# Cleanup this repo- we don't want to continue updating with it. | ||
# Depending which repos are used in build, one or more of these files will not | ||
# exist. | ||
rm -f /etc/yum.repos.d/google-cloud-unstable.repo \ | ||
/etc/yum.repos.d/google-cloud-staging.repo | ||
|
||
# Clean up the cache for smaller images. | ||
dnf clean all | ||
rm -fr /var/cache/dnf/* | ||
|
||
# Blacklist the floppy module. | ||
echo "blacklist floppy" > /etc/modprobe.d/blacklist-floppy.conf | ||
restorecon /etc/modprobe.d/blacklist-floppy.conf | ||
|
||
# Generate initramfs from latest kernel instead of the running kernel. | ||
kver="$(ls -t /lib/modules | head -n1)" | ||
dracut -f --kver="${kver}" | ||
|
||
# Fix selinux contexts on /etc/resolv.conf. | ||
restorecon /etc/resolv.conf | ||
%end | ||
|
||
# Cleanup. | ||
%post --nochroot --log=/dev/ttyS0 | ||
set -x | ||
rm -Rf /mnt/sysimage/tmp/* | ||
%end |
Oops, something went wrong.