Skip to content
This repository has been archived by the owner on Oct 2, 2024. It is now read-only.

Commit

Permalink
PR #1832: ch-fromhost: tidy
Browse files Browse the repository at this point in the history
  • Loading branch information
reidpr authored Feb 7, 2024
1 parent 48c8b36 commit 0ff0c5c
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 84 deletions.
157 changes: 81 additions & 76 deletions bin/ch-fromhost
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
# source:destination pairs separated by newlines, then walk through them and
# copy them into the image.
#
# The colon separator is to avoid the difficulty of iterating through a sequence
# of pairs with no arrays or structures in POSIX sh. We could avoid it by
# taking action immediately upon encountering each file in the argument list,
# but that would (a) yield a half-injected image for basic errors like
# The colon separator is to avoid the difficulty of iterating through a
# sequence of pairs with no arrays or structures in POSIX sh. We could avoid
# it by taking action immediately upon encountering each file in the argument
# list, but that would (a) yield a half-injected image for basic errors like
# misspellings on the command line and (b) would require the image to be first
# on the command line, which seems awkward.
#
Expand Down Expand Up @@ -62,8 +62,8 @@ Destination within image:
Options:
--print-fi print inferred destination for libfabric provider(s)
--print-cray-fi print inferred destination for libfabric replacement
--print-fi print inferred destination for libfabric provider(s)
--print-lib print inferred destination for shared libraries
--no-ldconfig don’t run ldconfig even if we injected shared libraries
-h, --help print this help and exit
Expand Down Expand Up @@ -93,10 +93,6 @@ print_fi_dest=
print_lib_dest=
no_ldconfig=

debug_indent () {
DEBUG ' %s\n' "$1"
}

ensure_nonempty () {
[ "$2" ] || FATAL -- "$1 must not be empty"
}
Expand Down Expand Up @@ -128,7 +124,7 @@ enqueue_file () {
old_ifs="$IFS"
IFS="$newline"
d="${dest:-$2}"
DEBUG "enqueue file(s)"
VERBOSE "enqueue file(s)"
for f in $1; do
case $f in
*:*)
Expand All @@ -139,16 +135,16 @@ enqueue_file () {
case $f in
*libfabric.so)
if ldd "$f" | grep libcxi > /dev/null 2>&1; then
debug_indent "cray libfabric: ${f}"
DEBUG "cray libfabric: ${f}"
cray_fi_found=yes
host_libfabric=$f
else
debug_indent "libfabric: ${f}"
DEBUG "libfabric: ${f}"
lib_found=yes
fi
;;
*-fi.so)
debug_indent "libfabric shared provider: ${f}"
DEBUG "libfabric shared provider: ${f}"
fi_prov_found=yes
# Providers, like Cray's libgnix-fi.so, link against paths that
# need to be bind-mounted at run-time. Some of these paths need
Expand All @@ -159,13 +155,14 @@ enqueue_file () {
ld=$(dirname "$(readlink -f "$l")")
# Avoid duplicates and host libfabric.so.
if [ "$(echo "$ld_conf" | grep -c "$ld")" -eq 0 ] \
&& [ "$(echo "$ld" | grep -c "libfabric.so")" -eq 0 ]; then
&& [ "$(echo "$ld" | grep -c "libfabric.so")" -eq 0 ]; \
then
enqueue_ldconf "$ld"
fi
done
;;
*)
debug_indent "shared library: ${f}"
DEBUG "shared library: ${f}"
lib_found=yes
;;
esac
Expand Down Expand Up @@ -297,27 +294,27 @@ fi
if [ -n "$cray_fi_found" ]; then
# There is no Slingshot provider CXI; to leverage slingshot we need to
# replace the image libfabric.so with Cray's.
DEBUG "searching image for inferred libfabric destiation"
VERBOSE "searching image for inferred libfabric destiation"
img_libfabric=$(find "$image" -name "libfabric.so")
[ -n "$img_libfabric" ] || FATAL "libfabric.so not found in $image"
debug_indent "found $img_libfabric"
DEBUG "found $img_libfabric"
if [ "$(echo "$img_libfabric" | wc -l)" -ne 1 ]; then
warn 'found more than one libfabric.so'
fi
img_libfabric_path=$(echo "$img_libfabric" | sed "s@$image@@")
cray_fi_dest=$(dirname "/$img_libfabric_path")

# Since cray's libfabric isn't a standard provider, to use slingshot we must
# also add any missing linked libraries from the host.
DEBUG "adding cray libfabric libraries"
# Since cray's libfabric isn't a standard provider, to use slingshot we
# must also add any missing linked libraries from the host.
VERBOSE "adding cray libfabric libraries"
ldds=$(ldd "$host_libfabric" 2>&1 | grep lib | awk '{print $3}' | sort -u)
for l in $ldds; do
# Do not replace any libraries found in the image, experimentation has
# shown this to be problematic. Perhaps revisit in the future. For now,
# both MPICH and OpenMPI examples work with this conservative approach.
file_found=$(find "${image}" -name "$(basename "$l")")
if [ -n "$file_found" ]; then
debug_indent "skipping $l"
DEBUG "skipping $l"
continue
fi
enqueue_file "$l"
Expand All @@ -333,33 +330,37 @@ if [ -n "$lib_found" ]; then
# We want to put the libraries in the first directory that ldconfig
# searches, so that we can override (or overwrite) any of the same library
# that may already be in the image.
DEBUG "asking ldconfig for inferred shared library destination"
# "ldconfig -Nv" gives some pointless warnings on stderr even if
# successful; we don't want to show those to users. However, we don't want
# to simply pipe stderr to /dev/null because this hides real errors. Thus,
# use the following abomination to pipe stdout and stderr to *separate
# grep commands*. See: https://stackoverflow.com/a/31151808
VERBOSE "asking ldconfig for inferred shared library destination"
# "ldconfig -Nv" gives pointless warnings on stderr even if successful; we
# don't want to show those to users (unless -vv or higher). However, we
# don't want to simply pipe stderr to /dev/null because this hides real
# errors. Thus, use the following abomination to pipe stdout and stderr to
# *separate grep commands*. See: https://stackoverflow.com/a/31151808
if [ "$log_level" -lt 2 ]; then # VERBOSE or lower
stderr_filter='(^|dynamic linker, ignoring|given more than once|No such file or directory)$'
else # DEBUG or higher
stderr_filter=weird_al_yankovic_will_not_appear_in_ldconfig_output
fi
lib_dest=$( { "${ch_bin}/ch-run" "$image" -- /sbin/ldconfig -Nv \
2>&1 1>&3 3>&- | grep -Ev '(^|dynamic linker, ignoring|given more than once)$' ; } \
2>&1 1>&3 3>&- | grep -Ev "$stderr_filter" ; } \
3>&1 1>&2 | grep -E '^/' | cut -d: -f1 | head -1 )
[ -n "$lib_dest" ] || FATAL 'empty path from ldconfig'
[ -z "${lib_dest%%/*}" ] || FATAL "bad path from ldconfig: ${lib_dest}"
DEBUG "inferred shared library destination: ${image}/${lib_dest}"
VERBOSE "inferred shared library destination: ${image}/${lib_dest}"
fi

if [ -n "$fi_prov_found" ]; then
# The libfabric provider can be specified with FI_PROVIDER. The path the
# search for shared providers at can be specified with FI_PROVIDER_PATH
# (undocumented). This complicates the inferred destination because these
# variables can be inherited from the host or explicitly set in the image's
# /ch/environment
# file.
# variables can be inherited from the host or explicitly set in the
# image's /ch/environment file.
#
# For simplicity, the inferred injection destination is the always the
# 'libfabric' directory at the path where libfabric.so is found. If it does
# not exist, create it. Warn if FI_PROVIDER_PATH or FI_PROVIDER is found
# in the the image's /ch/environment file.
DEBUG "searching ${image} for libfabric shared provider destination"
# 'libfabric' directory at the path where libfabric.so is found. If it
# does not exist, create it. Warn if FI_PROVIDER_PATH or FI_PROVIDER is
# found in the the image's /ch/environment file.
VERBOSE "searching ${image} for libfabric shared provider destination"
ch_env_p=$(grep -E '^FI_PROVIDER_PATH=' "${image}/ch/environment") \
|| true # avoid -e exit
ch_env_p=${ch_env_p##*=}
Expand All @@ -368,11 +369,11 @@ if [ -n "$fi_prov_found" ]; then
fi
img_libfabric=$(find "$image" -name 'libfabric.so')
img_libfabric_path=$(echo "$img_libfabric" | sed "s@$image@@")
debug_indent "found: ${image}${img_libfabric_path}"
DEBUG "found: ${image}${img_libfabric_path}"
fi_prov_dest=$(dirname "/${img_libfabric_path}")
fi_prov_dest="${fi_prov_dest}/libfabric"
queue_mkdir "$fi_prov_dest"
DEBUG "inferred provider destination: $fi_prov_dest"
VERBOSE "inferred provider destination: $fi_prov_dest"
fi

if [ -n "$print_lib_dest" ]; then
Expand All @@ -394,70 +395,71 @@ if [ -f /etc/opt/cray/release/cle-release ]; then
queue_mkdir /var/lib/hugetlbfs
# UGNI
if [ ! -L /etc/opt/cray/release/cle-release ]; then
# ALPS libraries require the contents of this directory to be present at
# the same path as the host. Create the mount point here, then ch-run
# bind-mounts it later.
# ALPS libraries require the contents of this directory to be present
# at the same path as the host. Create the mount point here, then
# ch-run bind-mounts it later.
queue_mkdir /var/opt/cray/alps/spool

# The cray-ugni provider will link against cray's libwlm_detect so. Create
# the mount point for ch-run.
# The cray-ugni provider will link against crays libwlm_detect so.
# Create the mount point for ch-run.
queue_mkdir /opt/cray/wlm_detect

# libwlm_detect.so requires file(s) to present at the same path as the host.
# Create mount point for ch-run.
# libwlm_detect.so requires file(s) to present at the same path as the
# host. Create mount point for ch-run.
queue_mkdir /etc/opt/cray/wlm_detect

# OFI uGNI provider, libgnix-fi.so, links against the Cray host's
# libxpmem, libudreg, libalpsutil, libalpslli, and libugni; create mount
# points for ch-run to use later.
# OFI uGNI provider, libgnix-fi.so, links against the Cray hosts
# libxpmem, libudreg, libalpsutil, libalpslli, and libugni; create
# mount points for ch-run to use later.
queue_mkdir /opt/cray/udreg
queue_mkdir /opt/cray/xpmem
queue_mkdir /opt/cray/ugni
queue_mkdir /opt/cray/alps
fi
# CXI (slingshot)
if [ -f /opt/cray/etc/release/cos ]; then
# Newer Cray Shasta environments require the contents of this directory
# to be present at the same path as the host. Create mount points for
# ch-run to use later.
# Newer Cray Shasta environments require the contents of this
# directory to be present at the same path as the host. Create mount
# points for ch-run to use later.
queue_mkdir /var/spool/slurmd
fi
fi

[ "$inject_files" ] || FATAL "empty file list"

DEBUG "injecting into image: ${image}"
VERBOSE "injecting into image: ${image}"

old_ifs="$IFS"
IFS="$newline"

# Process unlink list.
for u in $inject_unlinks; do
debug_indent "rm -f ${image}${u}"
DEBUG "deleting: ${image}${u}"
rm -f "${image}${u}"
done

# Process bind-mount destination targets.
for d in $inject_mkdirs; do
debug_indent "mkdir -p ${image}${d}"
DEBUG "mkdir: ${image}${d}"
mkdir -p "${image}${d}"
done

# Process ldconfig targets.
if [ "$fi_prov_found" ] || [ "$cray_fi_found" ]; then
if [ ! -f "${image}/etc/ld.so.conf" ]; then
debug_indent "touch ${image}/etc/ld.so.conf"
DEBUG "creating empty ld.so.conf"
touch "${image}/etc/ld.so.conf"
fi
if ! grep -F 'include ld.so.conf.d/*.conf' "${image}/etc/ld.so.conf" > /dev/null 2>&1; then
debug_indent "echo 'include ld.so.conf.d/*.conf' >> ${image}/etc/ld.so.conf"
if ! grep -F 'include ld.so.conf.d/*.conf' "${image}/etc/ld.so.conf" \
> /dev/null 2>&1; then
DEBUG "ld.so.conf: adding 'include ld.so.conf.d/*.conf'"
echo 'include ld.so.conf.d/*.conf' >> "${image}/etc/ld.so.conf"
fi
# Prepare image ch-ofi.conf.
printf '' > "${image}/etc/ld.so.conf.d/ch-ofi.conf"
# add ofi dso provider ld library dirs.
for c in $ld_conf; do
debug_indent "echo '$c' >> ${image}/etc/ld.so.conf.d/ch-ofi.conf"
DEBUG "ld.so.conf: adding ${c}"
echo "$c" >> "${image}/etc/ld.so.conf.d/ch-ofi.conf"
done
fi
Expand All @@ -476,17 +478,17 @@ for file in $inject_files; do
if ldd "$f" | grep libcxi > /dev/null 2>&1; then
d=$cray_fi_dest
fi
;;
;;
*-fi.so)
d=$fi_prov_dest
;;
;;
*)
d=$lib_dest
;;
;;
esac
infer=" (inferred)"
fi
debug_indent "${f} -> ${d}${infer}"
VERBOSE "${f} -> ${d}${infer}"
[ "$d" ] || FATAL "no destination for: ${f}"
[ -z "${d%%/*}" ] || FATAL "not an absolute path: ${d}"
[ -d "${image}${d}" ] || FATAL "not a directory: ${image}${d}"
Expand All @@ -503,28 +505,31 @@ done
IFS="$old_ifs"

if [ -z "$no_ldconfig" ] \
&& { [ "$lib_found" ] || [ "$fi_prov_found" ] || [ "$cray_fi_found" ] ;} then
DEBUG "running ldconfig"
debug_indent "${ch_bin}/ch-run -w $image -- /sbin/ldconfig"
"${ch_bin}/ch-run" -w "$image" -- /sbin/ldconfig 2> /dev/null || FATAL 'ldconfig error'
&& { [ "$lib_found" ] \
|| [ "$fi_prov_found" ] \
|| [ "$cray_fi_found" ] ;} then
VERBOSE "running ldconfig"
"${ch_bin}/ch-run" -w "$image" -- /sbin/ldconfig 2> /dev/null \
|| FATAL 'ldconfig error'
if [ -n "$fi_prov_found" ] || [ -n "$cray_fi_found" ]; then
DEBUG "validating ldconfig cache"
VERBOSE "validating ldconfig cache"
for file in $inject_files; do
f="$(basename "${file%%:*}")"
f=$("${ch_bin}/ch-run" "$image" -- find / \
-not \( -path /proc -prune \) \
-not \( -path /dev -prune \) \
-not \( -path /tmp -prune \) \
-not \( -path /sys -prune \) \
-not \( -path /var/opt/cray -prune \) \
-not \( -path /etc/opt/cray -prune \) \
-name "$f")
f=$( "${ch_bin}/ch-run" "$image" \
-- find / \
-not \( -path /proc -prune \) \
-not \( -path /dev -prune \) \
-not \( -path /tmp -prune \) \
-not \( -path /sys -prune \) \
-not \( -path /var/opt/cray -prune \) \
-not \( -path /etc/opt/cray -prune \) \
-name "$f")
if [ "$("${ch_bin}/ch-run" "$image" -- ldd "$f" | grep -c 'not found ')" -ne 0 ]; then
FATAL "ldconfig: '${ch_bin}/ch-run $image -- ldd $f' failed"
fi
done
fi
else
DEBUG "not running ldconfig"
VERBOSE "not running ldconfig"
fi
echo 'done'
Loading

0 comments on commit 0ff0c5c

Please sign in to comment.