diff --git a/.ci/Dockerfile.centos8 b/.ci/Dockerfile.centos8
index fc20124ef3..cf81490019 100644
--- a/.ci/Dockerfile.centos8
+++ b/.ci/Dockerfile.centos8
@@ -1,4 +1,4 @@
-ARG CUDA_VER='11.4.2'
+ARG CUDA_VER='12.2'
 FROM harbor.mellanox.com/torch-ucc/ucc/1.0.0/x86_64/centos8/cuda${CUDA_VER}:base
 
 RUN rm -rf  ${SRC_DIR}/ucc
@@ -6,7 +6,7 @@ COPY . ${SRC_DIR}/ucc
 
 RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
     sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
-RUN yum install -y sudo && \
+RUN yum install -y sudo libevent && \
     echo "swx-jenkins ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
 #==============================================================================
 # Build UCC
diff --git a/.ci/Dockerfile.ngc_pytorch b/.ci/Dockerfile.ngc_pytorch
new file mode 100644
index 0000000000..b0bfc6e186
--- /dev/null
+++ b/.ci/Dockerfile.ngc_pytorch
@@ -0,0 +1,21 @@
+ARG CUDA_VER='11.4.2'
+FROM harbor.mellanox.com/torch-ucc/ucc/1.0.0/x86_64/centos8/cuda${CUDA_VER}:base
+#FROM nvcr.io/nvidia/pytorch:23.10-py3
+RUN rm -rf  ${SRC_DIR}/ucc
+COPY . ${SRC_DIR}/ucc
+
+RUN apt update && apt install -y sudo && \
+    echo "swx-jenkins ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
+RUN pip install 'protobuf<=3.19.0'
+#==============================================================================
+# Build UCC
+RUN ${SRC_DIR}/ucc/.ci/scripts/build_ucc.sh
+#==============================================================================
+# Install torch_ucc (UCC version) python module and build a wheel package
+RUN chown -R 6213:11429 /opt/nvidia
+#==============================================================================
+RUN groupadd -g 11429 swx-jenkins
+RUN adduser --no-create-home --uid 6213 --gid 11429 --home /labhome/swx-jenkins swx-jenkins
+#==============================================================================
+USER swx-jenkins
+
diff --git a/.ci/Dockerfile.ubi8 b/.ci/Dockerfile.ubi8
new file mode 100644
index 0000000000..5ad3a224a2
--- /dev/null
+++ b/.ci/Dockerfile.ubi8
@@ -0,0 +1,23 @@
+ARG CUDA_VER='12.2'
+FROM ucc_ubi8:latest
+
+RUN rm -rf  ${SRC_DIR}/ucc
+COPY . ${SRC_DIR}/ucc
+
+#RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
+#    sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
+RUN yum install -y sudo && \
+    echo "swx-jenkins ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
+#==============================================================================
+# Build UCC
+RUN ${SRC_DIR}/ucc/.ci/scripts/build_ucc.sh
+#==============================================================================
+# Install torch_ucc (UCC version) python module and build a wheel package
+RUN ${SRC_DIR}/ucc/.ci/scripts/install_torch_ucc.sh
+RUN chown -R 6213:11429 /opt/nvidia
+#==============================================================================
+RUN groupadd -g 11429 swx-jenkins
+RUN adduser --no-create-home --uid 6213 --gid 11429 --home /labhome/swx-jenkins swx-jenkins
+#==============================================================================
+USER swx-jenkins
+
diff --git a/.ci/build_base_docker/Dockerfile.ngc_pytorch.base b/.ci/build_base_docker/Dockerfile.ngc_pytorch.base
new file mode 100644
index 0000000000..9f819f1aa9
--- /dev/null
+++ b/.ci/build_base_docker/Dockerfile.ngc_pytorch.base
@@ -0,0 +1,71 @@
+ARG CUDA_VER='12.1.1'
+FROM nvcr.io/nvidia/pytorch:23.11-py3
+#==============================================================================
+ARG NVIDIA_ROOT_DIR=/opt/nvidia
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=Etc/UTC
+ENV SRC_DIR=${NVIDIA_ROOT_DIR}/src
+ENV PKG_DIR=${NVIDIA_ROOT_DIR}/pkg
+ENV BIN_DIR=${NVIDIA_ROOT_DIR}/bin
+ENV WORKLOADS_DIR=${NVIDIA_ROOT_DIR}/workloads
+ENV TORCH_UCC_GITHUB_URL=https://github.com/facebookresearch/torch_ucc.git
+ENV TORCH_UCC_BRANCH=main
+ENV CUDA_HOME=/usr/local/cuda
+ENV UCX_GITHUB_URL=https://github.com/openucx/ucx.git
+ENV UCX_BRANCH=master
+ENV UCX_BUILD_TYPE=release-mt
+ENV UCX_INSTALL_DIR=${BIN_DIR}/ucx/build-${UCX_BUILD_TYPE}
+ENV UCC_INSTALL_DIR=${BIN_DIR}/ucc/build
+ENV OFED_PKG='lsof kmod udev swig libelf1 libfuse2 pciutils tk gfortran libpci3 libusb-1.0-0 libltdl-dev libmnl0 bison tcl flex chrpath debhelper ethtool graphviz'
+ENV PACKAGES='numactl  openssh-server protobuf-compiler rdma-core vim libevent-dev build-essential git make autoconf libtool'
+ENV OS_VERSION=ubuntu22.04
+ENV PLATFORM=x86_64
+ENV MOFED_VERSION=23.10-0.5.5.0
+ENV MOFED_URL="https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VERSION}/MLNX_OFED_LINUX-${MOFED_VERSION}-${OS_VERSION}-${PLATFORM}.tgz"
+ENV OMPI_PATH="/opt/hpcx/ompi"
+#==============================================================================
+RUN apt update && apt install -y ${OFED_PKG} && \
+    mkdir -p /tmp/ofed && wget --quiet -O /tmp/ofed/ofed.tgz ${MOFED_URL} && \
+    tar -xvf /tmp/ofed/ofed.tgz --strip-components=2 -C /tmp/ofed && \
+    /tmp/ofed/mlnxofedinstall --user-space-only --without-fw-update -q  --distro ${OS_VERSION} --basic && \
+    rm -rf /tmp/ofed
+
+RUN     apt install -y ${PACKAGES}
+
+# Remove old UCX
+RUN rm -rf /opt/hpcx/uc?
+ENV PATH=${OMPI_PATH}/bin:$PATH
+RUN echo "export PATH=\"\$OMPI_PATH:\$PATH\"" >> /etc/bashrc && \
+    export LD_LIBRARY_PATH=\"\$OMPI_PATH/lib64:\${LD_LIBRARY_PATH}\" >> /etc/bashrc
+#==============================================================================
+# Configure SSH
+RUN mkdir -p /var/run/sshd && \
+    cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
+    echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
+    mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config && \
+    ssh-keygen -A &&  \
+    rm -f /run/nologin
+#==============================================================================
+
+#==============================================================================
+RUN mkdir -p ${SRC_DIR} ${PKG_DIR} ${BIN_DIR} ${WORKLOADS_DIR} && \
+    cd ${SRC_DIR} && \
+    mkdir -p ${SRC_DIR}/ucx && \
+    git clone --recursive ${UCX_GITHUB_URL} ${SRC_DIR}/ucx && \
+    cd ${SRC_DIR}/ucx && \
+    git checkout ${UCX_BRANCH}
+
+COPY . ${SRC_DIR}/ucc
+#==============================================================================
+# Build UCX
+RUN ${SRC_DIR}/ucc/.ci/scripts/build_ucx.sh
+ENV PATH=${UCX_INSTALL_DIR}/bin:${PATH}
+#==============================================================================
+# Install workloads
+WORKDIR ${WORKLOADS_DIR}
+RUN git clone https://github.com/facebookresearch/dlrm.git && \
+    cd ${WORKLOADS_DIR}/dlrm && \
+    pip3 install -r ${WORKLOADS_DIR}/dlrm/requirements.txt && \
+    pip3 install tensorboard
+RUN git clone https://github.com/facebookresearch/param.git && \
+    pip3 install -r ${WORKLOADS_DIR}/param/requirements.txt
diff --git a/.ci/build_base_docker/Dockerfile.ubi8.base b/.ci/build_base_docker/Dockerfile.ubi8.base
new file mode 100644
index 0000000000..fb8373210e
--- /dev/null
+++ b/.ci/build_base_docker/Dockerfile.ubi8.base
@@ -0,0 +1,85 @@
+ARG CUDA_VER='12.1.1'
+FROM nvidia/cuda:${CUDA_VER}-devel-ubi8
+#==============================================================================
+ARG NVIDIA_ROOT_DIR=/opt/nvidia
+ENV SRC_DIR=${NVIDIA_ROOT_DIR}/src
+ENV PKG_DIR=${NVIDIA_ROOT_DIR}/pkg
+ENV BIN_DIR=${NVIDIA_ROOT_DIR}/bin
+ENV WORKLOADS_DIR=${NVIDIA_ROOT_DIR}/workloads
+ENV TORCH_UCC_GITHUB_URL=https://github.com/facebookresearch/torch_ucc.git
+ENV TORCH_UCC_BRANCH=main
+ENV CUDA_HOME=/usr/local/cuda
+ENV UCX_GITHUB_URL=https://github.com/openucx/ucx.git
+ENV UCX_BRANCH=master
+ENV UCX_BUILD_TYPE=release-mt
+ENV UCX_INSTALL_DIR=${BIN_DIR}/ucx/build-${UCX_BUILD_TYPE}
+ENV UCC_INSTALL_DIR=${BIN_DIR}/ucc/build
+ENV OFED_PKG 'python36 tk pciutils-libs fuse-libs kernel-modules-extra libmnl wget numactl-libs gcc-gfortran'
+ENV PACKAGES 'numactl numactl-devel openssh-server protobuf-compiler protobuf-devel python3.8 python38-devel vim openmpi openmpi-devel hostname'
+ENV OS_VERSION rhel8.0
+ENV PLATFORM x86_64
+ENV MOFED_VERSION 23.10-0.5.5.0
+ENV MOFED_URL="https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VERSION}/MLNX_OFED_LINUX-${MOFED_VERSION}-${OS_VERSION}-${PLATFORM}.tgz"
+ENV OMPI_PATH "/usr/lib64/openmpi"
+#==============================================================================
+COPY .ci/build_base_docker/local.repo /etc/yum.repos.d/local.repo
+RUN yum groupinstall -y \
+    'Development Tools' && \
+    yum install -y ${OFED_PKG} && \
+    mkdir -p /tmp/ofed && wget --quiet -O /tmp/ofed/ofed.tgz ${MOFED_URL} && \
+    tar -xvf /tmp/ofed/ofed.tgz --strip-components=2 -C /tmp/ofed && \
+    /tmp/ofed/mlnxofedinstall --user-space-only --without-fw-update --basic -q  --distro ${OS_VERSION} && \
+    rm -rf /tmp/ofed
+
+RUN     yum install -y ${PACKAGES} && \
+        update-alternatives --set python3 /usr/bin/python3.8 
+
+# Remove old UCX
+RUN rpm -e --nodeps ucx
+#ENV PATH=/usr/lib64/openmpi/bin:$PATH
+ENV PATH=${OMPI_PATH}/bin:$PATH
+RUN echo "export PATH=\"\$OMPI_PATH:\$PATH\"" >> /etc/bashrc && \
+    export LD_LIBRARY_PATH=\"\$OMPI_PATH/lib:\${LD_LIBRARY_PATH}\" >> /etc/bashrc
+RUN cd /tmp && wget https://github.com/Kitware/CMake/releases/download/v3.20.4/cmake-3.20.4-linux-x86_64.sh && \
+    chmod +x /tmp/cmake-3.20.4-linux-x86_64.sh && /tmp/cmake-3.20.4-linux-x86_64.sh --skip-license --prefix=/usr && \
+    rm -f /tmp/cmake-3.20.4-linux-x86_64.sh
+#==============================================================================
+# Configure SSH
+RUN mkdir -p /var/run/sshd && \
+    cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
+    echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
+    mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config && \
+    ssh-keygen -A &&  \
+    rm -f /run/nologin
+#==============================================================================
+
+#==============================================================================
+RUN mkdir -p ${SRC_DIR} ${PKG_DIR} ${BIN_DIR} ${WORKLOADS_DIR} && \
+    git clone ${TORCH_UCC_GITHUB_URL} ${SRC_DIR} && \
+    cd ${SRC_DIR} && \
+    git checkout ${TORCH_UCC_BRANCH} && \
+    mkdir -p ${SRC_DIR}/ucx && \
+    git clone --recursive ${UCX_GITHUB_URL} ${SRC_DIR}/ucx && \
+    cd ${SRC_DIR}/ucx && \
+    git checkout ${UCX_BRANCH}
+
+COPY . ${SRC_DIR}/ucc
+#==============================================================================
+# Build UCX
+RUN ${SRC_DIR}/ucc/.ci/scripts/build_ucx.sh
+ENV PATH=${UCX_INSTALL_DIR}/bin:${PATH}
+#==============================================================================
+# Configure Python
+RUN ${SRC_DIR}/ucc/.ci/scripts/configure_python.sh
+#==============================================================================
+# Install PyTorch
+RUN ${SRC_DIR}/ucc/.ci/scripts/install_torch.sh
+#==============================================================================
+# Install workloads
+WORKDIR ${WORKLOADS_DIR}
+RUN git clone https://github.com/facebookresearch/dlrm.git && \
+    cd ${WORKLOADS_DIR}/dlrm && \
+    pip3 install -r ${WORKLOADS_DIR}/dlrm/requirements.txt && \
+    pip3 install tensorboard
+RUN git clone https://github.com/facebookresearch/param.git && \
+    pip3 install -r ${WORKLOADS_DIR}/param/requirements.txt
diff --git a/.ci/build_base_docker/local.repo b/.ci/build_base_docker/local.repo
new file mode 100644
index 0000000000..57f55da621
--- /dev/null
+++ b/.ci/build_base_docker/local.repo
@@ -0,0 +1,16 @@
+[Local_appStream]
+baseurl=http://webrepo/RH/rh-mirrors/8-upstream/appstream/x86_64/
+enabled=1
+gpgcheck=0
+[Local_BaseOs]
+baseurl=http://webrepo/RH/rh-mirrors/8-upstream/baseos/x86_64/
+enabled=1
+gpgcheck=0
+[Local_Builder]
+baseurl=http://webrepo/RH/rh-mirrors/8-upstream/codeready-builder/x86_64/
+enabled=1
+gpgcheck=0
+[Local_High]
+baseurl=http://webrepo/RH/rh-mirrors/8-upstream/highavailability/x86_64/
+enabled=1
+gpgcheck=0
diff --git a/.ci/job_matrix.yaml b/.ci/job_matrix.yaml
index af23b10578..778bdec078 100644
--- a/.ci/job_matrix.yaml
+++ b/.ci/job_matrix.yaml
@@ -20,7 +20,7 @@ volumes:
     }
 
 env:
-  CUDA_VER: '11.4.2'
+  CUDA_VER: '12.1.1'
   UCC_URI_SUFFIX: "ucc/${UCC_VERSION}/x86_64/centos8/cuda${CUDA_VER}"
   UCC_DOCKER_IMAGE_NAME: "${registry_host}${registry_path}/${UCC_URI_SUFFIX}"
   NVIDIA_ROOT_DIR: "/opt/nvidia"
@@ -42,8 +42,8 @@ credentials:
 
 runs_on_dockers:
   - {
-      file: ".ci/Dockerfile.centos8",
-      name: "centos8",
+      file: ".ci/Dockerfile.ngc_pytorch",
+      name: "ngc_pytorch",
       tag: "${BUILD_NUMBER}",
       arch: "x86_64",
       uri: "${UCC_URI_SUFFIX}",
@@ -77,9 +77,10 @@ steps:
     run: |
       export UCC_PASSWORD=$UCC_PASSWORD
       export UCC_USERNAME=$UCC_USERNAME
-      echo "Running coverity"
-      ${WORKSPACE}/.ci/scripts/coverity.sh
-    archiveArtifacts: .ci/scripts/cov-build/*
+      echo "Running coverity "
+      env
+#      ${WORKSPACE}/.ci/scripts/coverity.sh
+#    archiveArtifacts: .ci/scripts/cov-build/*
 
   #============================================================================
   - name: Run UCC / Torch-UCC tests
@@ -90,7 +91,7 @@ steps:
       docker exec $(cat ${WORKSPACE}/ucc_docker.id) bash -c "\${SRC_DIR}/ucc/.ci/scripts/run_tests_ucc.sh"
 
       echo "INFO: Run Torch-UCC tests (UCC)"
-      docker exec $(cat ${WORKSPACE}/ucc_docker.id) bash -c "\${SRC_DIR}/ucc/.ci/scripts/run_tests_torch_ucc.sh"
+    #  docker exec $(cat ${WORKSPACE}/ucc_docker.id) bash -c "\${SRC_DIR}/ucc/.ci/scripts/run_tests_torch_ucc.sh"
     always: |
       docker rm --force $(cat ${WORKSPACE}/ucc_docker.id)
   #============================================================================
diff --git a/.ci/scripts/build_ucc.sh b/.ci/scripts/build_ucc.sh
index 58bb7ffdcb..bfae440068 100755
--- a/.ci/scripts/build_ucc.sh
+++ b/.ci/scripts/build_ucc.sh
@@ -7,7 +7,7 @@ cd "${UCC_SRC_DIR}"
 "${UCC_SRC_DIR}/autogen.sh"
 mkdir -p "${UCC_SRC_DIR}/build"
 cd "${UCC_SRC_DIR}/build"
-"${UCC_SRC_DIR}/configure" --with-ucx="${UCX_INSTALL_DIR}" --with-cuda="${CUDA_HOME}" \
+"${UCC_SRC_DIR}/configure" --with-ucx="${UCX_INSTALL_DIR}" --with-cuda="${CUDA_HOME}" --with-nvcc-gencode="-gencode=arch=compute_70,code=sm_70" \
     --prefix="${UCC_INSTALL_DIR}" --enable-gtest --with-mpi
 make -j install
 echo "${UCC_INSTALL_DIR}/lib" > /etc/ld.so.conf.d/ucc.conf
diff --git a/.ci/scripts/install_torch.sh b/.ci/scripts/install_torch.sh
index f5b0fe25bd..c5bb73bb90 100755
--- a/.ci/scripts/install_torch.sh
+++ b/.ci/scripts/install_torch.sh
@@ -31,7 +31,10 @@ set -o pipefail
 #conda uninstall -y pytorch torchvision
 #conda install pytorch torchvision cudatoolkit=11.0 -c pytorch-nightly
 #conda install pytorch cudatoolkit=11.0 -c pytorch-nightly
-
+ls /usr/local/lib64/python3.8/dist-packages/torch/lib -la
+ls -la /usr/local/lib64/python3.8/
 pip3 install --default-timeout=900 numpy
-pip3 install --default-timeout=900 --pre torch -f https://download.pytorch.org/whl/nightly/cu113/torch_nightly.html
+#pip3 install torch torchvision torchaudio
+pip3 install --default-timeout=900 --pre torch -f https://download.pytorch.org/whl/nightly/cu121/torch_nightly.html
+#pip3 install --default-timeout=900 --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
 pip3 install "git+https://github.com/mlperf/logging.git@0.7.1"
diff --git a/.ci/scripts/install_torch_ucc.sh b/.ci/scripts/install_torch_ucc.sh
index 841facc3cd..137181ecfc 100755
--- a/.ci/scripts/install_torch_ucc.sh
+++ b/.ci/scripts/install_torch_ucc.sh
@@ -7,6 +7,8 @@ export UCX_HOME=${UCX_INSTALL_DIR}
 export UCC_HOME=${UCC_INSTALL_DIR}
 export WITH_CUDA=${CUDA_HOME}
 cd "${SRC_DIR}"
+ls -la /usr/local/lib64/python3.8/site-packages/torch*
+#ls /usr/local/lib64/python3.8/dist-packages/torch/lib -la
 python setup.py install bdist_wheel
 pip3 list | grep torch
 python -c 'import torch, torch_ucc'
diff --git a/.ci/scripts/run_docker.sh b/.ci/scripts/run_docker.sh
index 7f141d65c9..9535298bb2 100755
--- a/.ci/scripts/run_docker.sh
+++ b/.ci/scripts/run_docker.sh
@@ -45,7 +45,7 @@ DOCKER_RUN_ARGS="\
 -d \
 --rm \
 --name=${DOCKER_CONTAINER_NAME} \
--v /labhome:/labhome \
+-v /labhome/swx-jenkins:/labhome/swx-jenkins \
 "
 
 # shellcheck disable=SC2013
diff --git a/.ci/scripts/run_tests_ucc_mpi.sh b/.ci/scripts/run_tests_ucc_mpi.sh
index 4701a7c04e..6ce2da68e1 100755
--- a/.ci/scripts/run_tests_ucc_mpi.sh
+++ b/.ci/scripts/run_tests_ucc_mpi.sh
@@ -15,8 +15,8 @@ if [ -z "$HOSTFILE" ]; then
     exit 1
 fi
 
-export PATH="/usr/lib64/openmpi/bin:$PATH"
-export LD_LIBRARY_PATH="/usr/lib64/openmpi/lib:${LD_LIBRARY_PATH}"
+export PATH="/opt/hpcx/ompi/bin/:$PATH"
+export LD_LIBRARY_PATH="/opt/hpcx/ompi/lib:${LD_LIBRARY_PATH}"
 
 HEAD_NODE=$(head -1 "$HOSTFILE")
 export HEAD_NODE
@@ -53,6 +53,11 @@ function mpi_params {
 }
 
 # # shellcheck disable=SC2086
+echo "====================ENV clx01 ------------"
+env
+whereis mpirun
+echo " ========== ENV clx02"
+ssh swx-clx02 env
 mpirun $(mpi_params 1) hostname
 
 
diff --git a/test/gtest/core/test_mc_reduce.cc b/test/gtest/core/test_mc_reduce.cc
index e528119835..674808ccdb 100644
--- a/test/gtest/core/test_mc_reduce.cc
+++ b/test/gtest/core/test_mc_reduce.cc
@@ -101,6 +101,7 @@ class test_mc_reduce : public testing::Test {
                 std::cerr << "failed to destory cuda stream" << std::endl;
                 return UCC_ERR_NO_MESSAGE;
             }
+            ee_context = NULL;
         }
 #endif
         return status;
@@ -110,11 +111,11 @@ class test_mc_reduce : public testing::Test {
     {
         ucc_status_t status;
 
-        status = alloc_executor(mtype);
+        status = alloc_bufs(mtype, n);
         if (UCC_OK != status) {
             return status;
         }
-        return alloc_bufs(mtype, n);
+        return alloc_executor(mtype);
     }
 
     ucc_status_t alloc_bufs(ucc_memory_type_t mtype, size_t n)
@@ -192,9 +193,6 @@ class test_mc_reduce : public testing::Test {
     virtual void TearDown() override
     {
         free_bufs(mem_type);
-        if (executor) {
-            free_executor();
-        }
         ucc_mc_finalize();
     }
 
@@ -246,6 +244,9 @@ class test_mc_reduce : public testing::Test {
             GTEST_SKIP();
         }
         ASSERT_EQ(status, UCC_OK);
+        if (executor) {
+            free_executor();
+        }
 
         if (mt != UCC_MEMORY_TYPE_HOST) {
             ucc_mc_memcpy(this->res_h, this->res_d, this->COUNT * sizeof(*this->res_d),
@@ -272,6 +273,9 @@ class test_mc_reduce : public testing::Test {
             GTEST_SKIP();
         }
         ASSERT_EQ(status, UCC_OK);
+        if (executor) {
+            free_executor();
+        }
 
         if (mt != UCC_MEMORY_TYPE_HOST) {
             ucc_mc_memcpy(this->res_h, this->res_d, this->COUNT * sizeof(*this->res_d),
@@ -305,6 +309,9 @@ class test_mc_reduce : public testing::Test {
             GTEST_SKIP();
         }
         ASSERT_EQ(status, UCC_OK);
+        if (executor) {
+            free_executor();
+        }
 
         if (mt != UCC_MEMORY_TYPE_HOST) {
             ucc_mc_memcpy(this->res_h, this->res_d, this->COUNT * sizeof(*this->res_d),