From 3cf1d4c8a6c498fd4c004ca7042a4bd92fa8528f Mon Sep 17 00:00:00 2001 From: Praveen <66662436+pveleneni@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:06:53 +0530 Subject: [PATCH] #0 feat: add flink dockerfiles --- stubs/docker/apache-flink-plugins/Dockerfile | 14 ++ stubs/docker/apache-flink/Dockerfile | 95 +++++++++++ .../docker/apache-flink/docker-entrypoint.sh | 152 ++++++++++++++++++ 3 files changed, 261 insertions(+) create mode 100644 stubs/docker/apache-flink-plugins/Dockerfile create mode 100644 stubs/docker/apache-flink/Dockerfile create mode 100644 stubs/docker/apache-flink/docker-entrypoint.sh diff --git a/stubs/docker/apache-flink-plugins/Dockerfile b/stubs/docker/apache-flink-plugins/Dockerfile new file mode 100644 index 00000000..1351c9c2 --- /dev/null +++ b/stubs/docker/apache-flink-plugins/Dockerfile @@ -0,0 +1,14 @@ +FROM sanketikahub/flink:1.15.2-scala_2.12-jdk-11-source +USER flink +RUN mkdir $FLINK_HOME/plugins/s3-fs-presto +RUN mkdir $FLINK_HOME/plugins/gs-fs-hadoop +RUN wget -nv -O flink-streaming-scala_2.12-1.15.2.jar "https://repo1.maven.org/maven2/org/apache/flink/flink-streaming-scala_2.12/1.15.2/flink-streaming-scala_2.12-1.15.2.jar"; \ + mv flink-streaming-scala_2.12-1.15.2.jar $FLINK_HOME/lib/ +# COPY flink-shaded-hadoop2-uber-2.8.3-1.8.3.jar $FLINK_HOME/lib/ +# COPY flink-s3-fs-hadoop-1.15.2.jar $FLINK_HOME/lib/ +RUN wget -nv -O flink-azure-fs-hadoop-1.15.2.jar "https://repo1.maven.org/maven2/org/apache/flink/flink-azure-fs-hadoop/1.15.2/flink-azure-fs-hadoop-1.15.2.jar"; \ + mv flink-azure-fs-hadoop-1.15.2.jar $FLINK_HOME/lib/ +RUN wget -nv -O flink-s3-fs-presto-1.15.2.jar "https://repo1.maven.org/maven2/org/apache/flink/flink-s3-fs-presto/1.15.2/flink-s3-fs-presto-1.15.2.jar"; \ + mv flink-s3-fs-presto-1.15.2.jar $FLINK_HOME/plugins/s3-fs-presto +RUN wget -nv -O flink-gs-fs-hadoop-1.15.2.jar "https://repo1.maven.org/maven2/org/apache/flink/flink-gs-fs-hadoop/1.15.2/flink-gs-fs-hadoop-1.15.2.jar"; \ + mv flink-gs-fs-hadoop-1.15.2.jar $FLINK_HOME/plugins/gs-fs-hadoop diff --git a/stubs/docker/apache-flink/Dockerfile b/stubs/docker/apache-flink/Dockerfile new file mode 100644 index 00000000..4380813d --- /dev/null +++ b/stubs/docker/apache-flink/Dockerfile @@ -0,0 +1,95 @@ +# FLINK SOURCE LINK - https://github.com/apache/flink-docker/blob/4794f9425513fb4c0b55ec1efd629e8eb7e5d8c5/1.15/scala_2.12-java11-ubuntu/Dockerfile +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### + +FROM --platform=linux/x86_64 eclipse-temurin:11.0.20.1_1-jdk-focal + +# Install dependencies +RUN set -ex; \ + apt-get update; \ + apt-get -y install gpg libsnappy1v5 gettext-base libjemalloc-dev; \ + rm -rf /var/lib/apt/lists/* + +# Grab gosu for easy step-down from root +ENV GOSU_VERSION 1.11 +RUN set -ex; \ + wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)"; \ + wget -nv -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture).asc"; \ + export GNUPGHOME="$(mktemp -d)"; \ + for server in ha.pool.sks-keyservers.net $(shuf -e \ + hkp://p80.pool.sks-keyservers.net:80 \ + keyserver.ubuntu.com \ + hkp://keyserver.ubuntu.com:80 \ + pgp.mit.edu) ; do \ + gpg --batch --keyserver "$server" --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 && break || : ; \ + done && \ + gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME" /usr/local/bin/gosu.asc; \ + chmod +x /usr/local/bin/gosu; \ + gosu nobody true + +# Configure Flink version +ENV FLINK_TGZ_URL=https://www.apache.org/dyn/closer.cgi?action=download&filename=flink/flink-1.15.2/flink-1.15.2-bin-scala_2.12.tgz \ + FLINK_ASC_URL=https://www.apache.org/dist/flink/flink-1.15.2/flink-1.15.2-bin-scala_2.12.tgz.asc \ + GPG_KEY=0F79F2AFB2351BC29678544591F9C1EC125FD8DB \ + CHECK_GPG=true + +# Prepare environment +ENV FLINK_HOME=/opt/flink +ENV PATH=$FLINK_HOME/bin:$PATH +RUN groupadd --system --gid=9999 flink && \ + useradd --system --home-dir $FLINK_HOME --uid=9999 --gid=flink flink +WORKDIR $FLINK_HOME + +# Install Flink +RUN set -ex; \ + wget -nv -O flink.tgz "$FLINK_TGZ_URL"; \ + \ + if [ "$CHECK_GPG" = "true" ]; then \ + wget -nv -O flink.tgz.asc "$FLINK_ASC_URL"; \ + export GNUPGHOME="$(mktemp -d)"; \ + for server in ha.pool.sks-keyservers.net $(shuf -e \ + hkp://p80.pool.sks-keyservers.net:80 \ + keyserver.ubuntu.com \ + hkp://keyserver.ubuntu.com:80 \ + pgp.mit.edu) ; do \ + gpg --batch --keyserver "$server" --recv-keys "$GPG_KEY" && break || : ; \ + done && \ + gpg --batch --verify flink.tgz.asc flink.tgz; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME" flink.tgz.asc; \ + fi; \ + \ + tar -xf flink.tgz --strip-components=1; \ + rm flink.tgz; \ + \ + chown -R flink:flink .; \ + \ + # Replace default REST/RPC endpoint bind address to use the container's network interface \ + sed -i 's/rest.address: localhost/rest.address: 0.0.0.0/g' $FLINK_HOME/conf/flink-conf.yaml; \ + sed -i 's/rest.bind-address: localhost/rest.bind-address: 0.0.0.0/g' $FLINK_HOME/conf/flink-conf.yaml; \ + sed -i 's/jobmanager.bind-host: localhost/jobmanager.bind-host: 0.0.0.0/g' $FLINK_HOME/conf/flink-conf.yaml; \ + sed -i 's/taskmanager.bind-host: localhost/taskmanager.bind-host: 0.0.0.0/g' $FLINK_HOME/conf/flink-conf.yaml; \ + sed -i '/taskmanager.host: localhost/d' $FLINK_HOME/conf/flink-conf.yaml; + +# Configure container +COPY docker-entrypoint.sh / +ENTRYPOINT ["/docker-entrypoint.sh"] +EXPOSE 6123 8081 +CMD ["help"] diff --git a/stubs/docker/apache-flink/docker-entrypoint.sh b/stubs/docker/apache-flink/docker-entrypoint.sh new file mode 100644 index 00000000..8b0350e2 --- /dev/null +++ b/stubs/docker/apache-flink/docker-entrypoint.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash + +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### + +COMMAND_STANDALONE="standalone-job" +COMMAND_HISTORY_SERVER="history-server" + +# If unspecified, the hostname of the container is taken as the JobManager address +JOB_MANAGER_RPC_ADDRESS=${JOB_MANAGER_RPC_ADDRESS:-$(hostname -f)} +CONF_FILE="${FLINK_HOME}/conf/flink-conf.yaml" + +drop_privs_cmd() { + if [ $(id -u) != 0 ]; then + # Don't need to drop privs if EUID != 0 + return + elif [ -x /sbin/su-exec ]; then + # Alpine + echo su-exec flink + else + # Others + echo gosu flink + fi +} + +copy_plugins_if_required() { + if [ -z "$ENABLE_BUILT_IN_PLUGINS" ]; then + return 0 + fi + + echo "Enabling required built-in plugins" + for target_plugin in $(echo "$ENABLE_BUILT_IN_PLUGINS" | tr ';' ' '); do + echo "Linking ${target_plugin} to plugin directory" + plugin_name=${target_plugin%.jar} + + mkdir -p "${FLINK_HOME}/plugins/${plugin_name}" + if [ ! -e "${FLINK_HOME}/opt/${target_plugin}" ]; then + echo "Plugin ${target_plugin} does not exist. Exiting." + exit 1 + else + ln -fs "${FLINK_HOME}/opt/${target_plugin}" "${FLINK_HOME}/plugins/${plugin_name}" + echo "Successfully enabled ${target_plugin}" + fi + done +} + +set_config_option() { + local option=$1 + local value=$2 + + # escape periods for usage in regular expressions + local escaped_option=$(echo ${option} | sed -e "s/\./\\\./g") + + # either override an existing entry, or append a new one + if grep -E "^${escaped_option}:.*" "${CONF_FILE}" > /dev/null; then + sed -i -e "s/${escaped_option}:.*/$option: $value/g" "${CONF_FILE}" + else + echo "${option}: ${value}" >> "${CONF_FILE}" + fi +} + +prepare_configuration() { + set_config_option jobmanager.rpc.address ${JOB_MANAGER_RPC_ADDRESS} + set_config_option blob.server.port 6124 + set_config_option query.server.port 6125 + + if [ -n "${TASK_MANAGER_NUMBER_OF_TASK_SLOTS}" ]; then + set_config_option taskmanager.numberOfTaskSlots ${TASK_MANAGER_NUMBER_OF_TASK_SLOTS} + fi + + if [ -n "${FLINK_PROPERTIES}" ]; then + echo "${FLINK_PROPERTIES}" >> "${CONF_FILE}" + fi + envsubst < "${CONF_FILE}" > "${CONF_FILE}.tmp" && mv "${CONF_FILE}.tmp" "${CONF_FILE}" +} + +maybe_enable_jemalloc() { + if [ "${DISABLE_JEMALLOC:-false}" == "false" ]; then + JEMALLOC_PATH="/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so" + JEMALLOC_FALLBACK="/usr/lib/x86_64-linux-gnu/libjemalloc.so" + if [ -f "$JEMALLOC_PATH" ]; then + export LD_PRELOAD=$LD_PRELOAD:$JEMALLOC_PATH + elif [ -f "$JEMALLOC_FALLBACK" ]; then + export LD_PRELOAD=$LD_PRELOAD:$JEMALLOC_FALLBACK + else + if [ "$JEMALLOC_PATH" = "$JEMALLOC_FALLBACK" ]; then + MSG_PATH=$JEMALLOC_PATH + else + MSG_PATH="$JEMALLOC_PATH and $JEMALLOC_FALLBACK" + fi + echo "WARNING: attempted to load jemalloc from $MSG_PATH but the library couldn't be found. glibc will be used instead." + fi + fi +} + +maybe_enable_jemalloc + +copy_plugins_if_required + +prepare_configuration + +args=("$@") +if [ "$1" = "help" ]; then + printf "Usage: $(basename "$0") (jobmanager|${COMMAND_STANDALONE}|taskmanager|${COMMAND_HISTORY_SERVER})\n" + printf " Or $(basename "$0") help\n\n" + printf "By default, Flink image adopts jemalloc as default memory allocator. This behavior can be disabled by setting the 'DISABLE_JEMALLOC' environment variable to 'true'.\n" + exit 0 +elif [ "$1" = "jobmanager" ]; then + args=("${args[@]:1}") + + echo "Starting Job Manager" + + exec $(drop_privs_cmd) "$FLINK_HOME/bin/jobmanager.sh" start-foreground "${args[@]}" +elif [ "$1" = ${COMMAND_STANDALONE} ]; then + args=("${args[@]:1}") + + echo "Starting Job Manager" + + exec $(drop_privs_cmd) "$FLINK_HOME/bin/standalone-job.sh" start-foreground "${args[@]}" +elif [ "$1" = ${COMMAND_HISTORY_SERVER} ]; then + args=("${args[@]:1}") + + echo "Starting History Server" + + exec $(drop_privs_cmd) "$FLINK_HOME/bin/historyserver.sh" start-foreground "${args[@]}" +elif [ "$1" = "taskmanager" ]; then + args=("${args[@]:1}") + + echo "Starting Task Manager" + + exec $(drop_privs_cmd) "$FLINK_HOME/bin/taskmanager.sh" start-foreground "${args[@]}" +fi + +args=("${args[@]}") + +# Running command in pass-through mode +exec $(drop_privs_cmd) "${args[@]}"