diff --git a/include/DataFrame/DataFrameMLVisitors.h b/include/DataFrame/DataFrameMLVisitors.h
index eb7191bb..7f080352 100644
--- a/include/DataFrame/DataFrameMLVisitors.h
+++ b/include/DataFrame/DataFrameMLVisitors.h
@@ -170,6 +170,7 @@ struct  KMeansVisitor  {
             std::array<double, K>   counts { 0.0 };
 
             // Find assignments.
+            //
             for (size_type point = 0; point < col_s; ++point) [[likely]]  {
                 const value_type    &value = *(column_begin + point);
 
@@ -286,7 +287,10 @@ struct  KMeansVisitor  {
                 return ((x - y) * (x - y));
             },
         seed_t seed = seed_t(-1))
-        : iter_num_(num_of_iter), cc_(calc_clusters), seed_(seed), dfunc_(f) {  }
+        : iter_num_(num_of_iter),
+          cc_(calc_clusters),
+          seed_(seed),
+          dfunc_(f) {  }
 };
 
 // ----------------------------------------------------------------------------
@@ -360,8 +364,8 @@ struct  AffinityPropVisitor  {
                     for (size_type jj = 0; jj < csize; ++jj)  {
                         if (jj ^ j) [[likely]]   {
                             const double    value =
-                                simil[(i * csize) + jj - ((i * (i + 1)) >> 1)] +
-                                avail[jj * csize + i];
+                               simil[(i * csize) + jj - ((i * (i + 1)) >> 1)] +
+                               avail[jj * csize + i];
 
                             if (value > max_diff)
                                 max_diff = value;
@@ -425,6 +429,7 @@ struct  AffinityPropVisitor  {
                 const H &column_begin, const H &column_end)  {
 
         GET_COL_SIZE
+
         const vec_t<double> simil =
             std::move(get_similarity_(column_begin, col_s));
         vec_t<double>       avail;
@@ -513,29 +518,59 @@ struct  FastFourierTransVisitor  {
 
     using cplx_t = typename result_type::value_type;
 
-    static inline result_type convolve_(result_type xvec, result_type yvec)  {
+    static inline result_type
+    convolve_(result_type xvec, result_type yvec, long thread_level)  {
+
+        transform_(xvec, false, thread_level);
+        transform_(yvec, false, thread_level);
 
-        transform_(xvec, false);
-        transform_(yvec, false);
+        const real_t    col_s = real_t(xvec.size());
 
-        std::transform(xvec.begin(), xvec.end(),
-                       yvec.begin(),
-                       xvec.begin(),
-                       std::multiplies<cplx_t>());
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    size_type(col_s),
+                    [&xvec, &yvec](auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]
+                            xvec[i] *= yvec[i];
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(xvec.begin(), xvec.end(),
+                           yvec.begin(),
+                           xvec.begin(),
+                           std::multiplies<cplx_t>());
+        }
 
-        transform_(xvec, true);
+        transform_(xvec, true, thread_level);
 
-        const real_t    col_s  = real_t(xvec.size());
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    size_type(col_s),
+                    [&xvec, col_s](auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]
+                            xvec[i] /= col_s;
+                    });
 
-        std::transform(xvec.begin(), xvec.end(),
-                       xvec.begin(),
-                       [col_s] (const cplx_t &v) -> cplx_t  {
-                           return (v / col_s);
-                       });
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(xvec.begin(), xvec.end(),
+                           xvec.begin(),
+                           [col_s] (const cplx_t &v) -> cplx_t  {
+                               return (v / col_s);
+                           });
+        }
         return (xvec);
     }
 
-    static inline size_type reverse_bits_(size_type val, size_type width)  {
+    static inline size_type
+    reverse_bits_(size_type val, size_type width)  {
 
         size_type   result { 0 };
 
@@ -544,11 +579,14 @@ struct  FastFourierTransVisitor  {
         return (result);
     }
 
-    static inline void fft_radix2_(result_type &column, bool reverse)  {
+    static inline void
+    fft_radix2_(result_type &column, bool reverse, long thread_level)  {
 
         const size_type col_s { column.size() };
-        size_type       levels { 0 };  // Compute levels = floor(log2(col_s))
+        size_type       levels { 0 };
 
+        // Compute levels = floor(log2(col_s))
+        //
         for (size_type i = col_s; i > 1; i >>= 1) [[likely]]
             levels += 1;
 
@@ -559,9 +597,26 @@ struct  FastFourierTransVisitor  {
             { (reverse ? real_t(2) : -real_t(2)) * real_t(M_PI) };
         result_type     exp_table (half_col_s);
 
-        for (size_type i = 0; i < half_col_s; i++) [[likely]]
-            exp_table[i] =
-                std::polar(real_t(1), two_pi * real_t(i) / real_t(col_s));
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    half_col_s,
+                    [&exp_table, two_pi, col_s]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]
+                            exp_table[i] =
+                                std::polar(real_t(1),
+                                           two_pi * real_t(i) / real_t(col_s));
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            for (size_type i = 0; i < half_col_s; i++) [[likely]]
+                exp_table[i] =
+                    std::polar(real_t(1), two_pi * real_t(i) / real_t(col_s));
+        }
 
         // Bit-reversed addressing permutation
         //
@@ -590,7 +645,8 @@ struct  FastFourierTransVisitor  {
         }
     }
 
-    static inline void fft_bluestein_(result_type &column, bool reverse)  {
+    static inline void
+    fft_bluestein_(result_type &column, bool reverse, long thread_level)  {
 
         const size_type col_s { column.size() };
 
@@ -600,10 +656,29 @@ struct  FastFourierTransVisitor  {
         const size_type col_s_2 { col_s * 2 };
         const real_t    pi { reverse ? real_t(M_PI) : -real_t(M_PI) };
 
-        for (size_type i = 0; i < col_s; i++) [[likely]]  {
-            const real_t    sq = real_t((i * i) % col_s_2);
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&exp_table, pi, col_s, col_s_2]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]  {
+                            const real_t    sq = real_t((i * i) % col_s_2);
+
+                            exp_table[i] =
+                                std::polar(real_t(1), pi * sq / real_t(col_s));
+                        }
+                    });
 
-            exp_table[i] = std::polar(real_t(1), pi * sq / real_t(col_s));
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            for (size_type i = 0; i < col_s; i++) [[likely]]  {
+                const real_t    sq = real_t((i * i) % col_s_2);
+
+                exp_table[i] = std::polar(real_t(1), pi * sq / real_t(col_s));
+            }
         }
 
         // Find a power of 2 convolution length m such that m >= col_s * 2 + 1
@@ -616,72 +691,165 @@ struct  FastFourierTransVisitor  {
         //
         result_type xvec (m, cplx_t(0, 0));
 
-        for (size_type i = 0; i < col_s; i++) [[likely]]
-            xvec[i] = column[i] * exp_table[i];
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&exp_table, &xvec, &column]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]
+                            xvec[i] = column[i] * exp_table[i];
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            for (size_type i = 0; i < col_s; i++) [[likely]]
+                xvec[i] = column[i] * exp_table[i];
+        }
 
         result_type yvec(m, cplx_t(0, 0));
 
         yvec[0] = exp_table[0];
-        for (size_type i = 1; i < col_s; i++) [[likely]]
-            yvec[i] = yvec[m - i] = std::conj(exp_table[i]);
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(1),
+                    col_s,
+                    [&exp_table, &yvec, m]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]
+                            yvec[i] = yvec[m - i] = std::conj(exp_table[i]);
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            for (size_type i = 1; i < col_s; i++) [[likely]]
+                yvec[i] = yvec[m - i] = std::conj(exp_table[i]);
+        }
 
         // Convolution
         //
-        const result_type   conv (convolve_(std::move(xvec), std::move(yvec)));
+        const result_type   conv (convolve_(std::move(xvec),
+                                            std::move(yvec),
+                                            thread_level));
 
         // Postprocessing
         //
-        std::transform(exp_table.begin(), exp_table.end(),
-                       conv.begin(), column.begin(),
-                       std::multiplies<cplx_t>());
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    exp_table.size(),
+                    [&exp_table, &conv, &column]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]
+                            column[i] = exp_table[i] * conv[i];
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(exp_table.begin(), exp_table.end(),
+                           conv.begin(), column.begin(),
+                           std::multiplies<cplx_t>());
+        }
     }
 
-    static inline void transform_(result_type &column, bool reverse)  {
+    static inline void
+    transform_(result_type &column, bool reverse, long thread_level)  {
 
         const size_type col_s { column.size() };
 
         if (col_s == 0) [[unlikely]]
             return;
         if ((col_s & (col_s - 1)) == 0)  // Is power of 2
-            fft_radix2_(column, reverse);
+            fft_radix2_(column, reverse, thread_level);
         else  // More complicated algorithm for arbitrary sizes
-            fft_bluestein_(column, reverse);
+            fft_bluestein_(column, reverse, thread_level);
     }
 
-    static inline void itransform_(result_type &column)  {
+    static inline void
+    itransform_(result_type &column, long thread_level)  {
+
+        const size_type col_s { column.size() };
 
         // Conjugate the complex numbers
         //
-        std::transform(column.begin(), column.end(),
-                       column.begin(),
-                       [] (const cplx_t &v) -> cplx_t  {
-                           return (std::conj(v));
-                       });
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]  {
+                            auto    &val = column[i];
+
+                            val = std::conj(val);
+                        }
+                    });
 
-        const size_type col_s { column.size() };
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column.begin(), column.end(),
+                           column.begin(),
+                           [] (const cplx_t &v) -> cplx_t  {
+                               return (std::conj(v));
+                           });
+        }
 
         // Forward fft
         //
         if ((col_s & (col_s - 1)) == 0)  // Is power of 2
-            fft_radix2_(column, false);
+            fft_radix2_(column, false, thread_level);
         else  // More complicated algorithm for arbitrary sizes
-            fft_bluestein_(column, false);
+            fft_bluestein_(column, false, thread_level);
 
         // Conjugate the complex numbers again
+        // Then scale the numbers
         //
-        std::transform(column.begin(), column.end(),
-                       column.begin(),
-                       [] (const cplx_t &v) -> cplx_t  {
-                           return (std::conj(v));
-                       });
-
-        // Scale the numbers
-        //
-        std::transform(column.begin(), column.end(),
-                       column.begin(),
-                       [col_s] (const cplx_t &v) -> cplx_t  {
-                           return (v / real_t(col_s));
-                       });
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]  {
+                            auto    &val = column[i];
+
+                            val = std::conj(val);
+                        }
+                    });
+
+            for (auto &fut : futures)  fut.get();
+            futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column, col_s]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i) [[likely]]
+                            column[i] /= real_t(col_s);
+                    });
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column.begin(), column.end(),
+                           column.begin(),
+                           [] (const cplx_t &v) -> cplx_t  {
+                               return (std::conj(v));
+                           });
+            std::transform(column.begin(), column.end(),
+                           column.begin(),
+                           [col_s] (const cplx_t &v) -> cplx_t  {
+                               return (v / real_t(col_s));
+                           });
+        }
     }
 
 public:
@@ -692,25 +860,56 @@ struct  FastFourierTransVisitor  {
                 const H &column_begin, const H &column_end)  {
 
         GET_COL_SIZE
+
         result_type result (col_s);
 
-        if constexpr (is_complex<T>::value)  {
-            std::transform(column_begin, column_end,
-                           result.begin(),
-                           [] (T v) -> cplx_t  { return (v); });
+        if (thread_level_ > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            std::vector<std::future<void>>  futures;
+
+            if constexpr (is_complex<T>::value)  {
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, &result]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i) [[likely]]
+                                result[i]= *(column_begin + i);
+                        });
+            }
+            else  {
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, &result]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i) [[likely]]
+                                result[i] =
+                                    std::complex<T>(*(column_begin + i), 0);
+                        });
+            }
+            for (auto &fut : futures)  fut.get();
         }
         else  {
-            std::transform(column_begin, column_end,
-                           result.begin(),
-                           [] (T v) -> cplx_t  {
-                               return (std::complex<T>(v, 0));
-                           });
+            if constexpr (is_complex<T>::value)  {
+                std::transform(column_begin, column_end,
+                               result.begin(),
+                               [] (T v) -> cplx_t  { return (v); });
+            }
+            else  {
+                std::transform(column_begin, column_end,
+                               result.begin(),
+                               [] (T v) -> cplx_t  {
+                                   return (std::complex<T>(v, 0));
+                               });
+            }
         }
 
         if (inverse_)
-            itransform_(result);
+            itransform_(result, thread_level_);
         else
-            transform_(result, false);
+            transform_(result, false, thread_level_);
         result_.swap(result);
     }
 
@@ -733,9 +932,29 @@ struct  FastFourierTransVisitor  {
     get_magnitude()  {
 
         if (magnitude_.empty())  {
-            magnitude_.reserve(result_.size());
-            for (const auto &citer : result_) [[likely]]
-                magnitude_.push_back(std::sqrt(std::norm(citer)));
+            const size_type col_s = result_.size();
+
+            if (thread_level_ > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+                magnitude_.resize(col_s);
+
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)
+                                this->magnitude_[i] =
+                                    std::sqrt(std::norm(this->result_[i]));
+                        });
+
+                for (auto &fut : futures)  fut.get();
+            }
+            else  {
+                magnitude_.reserve(col_s);
+                for (const auto &citer : result_) [[likely]]
+                    magnitude_.push_back(std::sqrt(std::norm(citer)));
+            }
         }
         return (magnitude_);
     }
@@ -749,19 +968,41 @@ struct  FastFourierTransVisitor  {
     get_angle()  {
 
         if (angle_.empty())  {
-            angle_.reserve(result_.size());
-            for (const auto &citer : result_) [[likely]]
-                angle_.push_back(std::arg(citer));
+            const size_type col_s = result_.size();
+
+            if (thread_level_ > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+                angle_.resize(col_s);
+
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)
+                                this->angle_[i] = std::arg(this->result_[i]);
+                        });
+
+                for (auto &fut : futures)  fut.get();
+            }
+            else  {
+                angle_.reserve(col_s);
+                for (const auto &citer : result_) [[likely]]
+                    angle_.push_back(std::arg(citer));
+            }
         }
         return (angle_);
     }
 
     explicit
-    FastFourierTransVisitor(bool inverse = false) : inverse_(inverse)  {   }
+    FastFourierTransVisitor(bool inverse = false)
+        : inverse_(inverse),
+          thread_level_(ThreadGranularity::get_thread_level())  {   }
 
 private:
 
     const bool      inverse_;
+    const long      thread_level_;
     result_type     result_ {  };
     vec_t<real_t>   magnitude_ {  };
     vec_t<real_t>   angle_ {  };
@@ -793,15 +1034,37 @@ struct  EntropyVisitor  {
 
         result_type result = std::move(sum_v.get_result());
 
-        std::transform(column_begin, column_end,
-                       result.begin(),
-                       result.begin(),
-                       [this](auto c, auto r) -> value_type  {
-                           const value_type    val = c / r;
+        if (ThreadGranularity::get_thread_level() > 2 &&
+            result.size() >= ThreadPool::MUL_THR_THHOLD)  {
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        result.size(),
+                        [&column_begin, &result, this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)  {
+                                value_type          &r = result[i];
+                                const value_type    val =
+                                    *(column_begin + i) / r;
+
+                                r = -val * std::log(val) /
+                                    std::log(this->log_base_);
+                            }
+                        });
+
+                for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column_begin, column_end,
+                           result.begin(),
+                           result.begin(),
+                           [this](auto c, auto r) -> value_type  {
+                               const value_type    val = c / r;
 
-                           return (-val * std::log(val) /
-                                   std::log(this->log_base_));
-                       });
+                               return (-val * std::log(val) /
+                                       std::log(this->log_base_));
+                           });
+        }
 
         sum_v.pre();
         sum_v (idx_begin, idx_end,  // the idx iterators are unused
@@ -886,8 +1149,8 @@ struct  ImpurityVisitor  {
                     table.erase(find_ret);
 
                 auto    insert_ret =
-                    table.insert(std::pair(*(column_begin + (roll_end - 1)),
-                                           0));
+                    table.insert(
+                        std::pair(*(column_begin + (roll_end - 1)), 0));
 
                 insert_ret.first->second += 1.0;
                 return (true);
@@ -964,72 +1227,199 @@ struct  SigmoidVisitor  {
 private:
 
     template <forward_iterator H>
-    inline void logistic_(const H &column_begin, const H &column_end)  {
-
-        std::transform(column_begin, column_end,
-                       std::back_inserter(result_),
-                       [](auto val) -> value_type  {
-                           return (1.0 / (1.0 + std::exp(-val)));
-                       });
+    inline void logistic_(const H &column_begin, const H &column_end,
+                          size_type col_s, long thread_level)  {
+
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column_begin, this]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i)
+                           this->result_[i] =
+                               T(1) / (T(1) + std::exp(-*(column_begin + i)));
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column_begin, column_end,
+                           result_.begin(),
+                           [](auto val) -> value_type  {
+                               return (T(1) / (T(1) + std::exp(-val)));
+                           });
+        }
     }
     template <forward_iterator H>
-    inline void algebraic_(const H &column_begin, const H &column_end)  {
-
-        std::transform(column_begin, column_end,
-                       std::back_inserter(result_),
-                       [](auto val) -> value_type  {
-                           return (1.0 / std::sqrt(1.0 + std::pow(val, 2.0)));
-                       });
+    inline void algebraic_(const H &column_begin, const H &column_end,
+                           size_type col_s, long thread_level)  {
+
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column_begin, this]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i)
+                           this->result_[i] =
+                               T(1) /
+                               std::sqrt(T(1) +
+                                         std::pow(*(column_begin + i), T(2)));
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column_begin, column_end,
+                           result_.begin(),
+                           [](auto val) -> value_type  {
+                           return (T(1) /
+                                   std::sqrt(T(1) + std::pow(val, T(2))));
+                           });
+        }
     }
     template <forward_iterator H>
-    inline void hyperbolic_tan_(const H &column_begin, const H &column_end)  {
-
-        std::transform(column_begin, column_end,
-                       std::back_inserter(result_),
-                       [](auto val) -> value_type  {
-                           return (std::tanh(val));
-                       });
+    inline void hyperbolic_tan_(const H &column_begin, const H &column_end,
+                                size_type col_s, long thread_level)  {
+
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column_begin, this]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i)
+                            this->result_[i] = std::tanh(*(column_begin + i));
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column_begin, column_end,
+                           result_.begin(),
+                           [](auto val) -> value_type  {
+                               return (std::tanh(val));
+                           });
+        }
     }
     template <forward_iterator H>
-    inline void arc_tan_(const H &column_begin, const H &column_end)  {
-
-        std::transform(column_begin, column_end,
-                       std::back_inserter(result_),
-                       [](auto val) -> value_type  {
-                           return (std::atan(val));
-                       });
+    inline void arc_tan_(const H &column_begin, const H &column_end,
+                         size_type col_s, long thread_level)  {
+
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column_begin, this]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i)
+                            this->result_[i] = std::atan(*(column_begin + i));
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column_begin, column_end,
+                           result_.begin(),
+                           [](auto val) -> value_type  {
+                               return (std::atan(val));
+                           });
+        }
     }
     template <forward_iterator H>
-    inline void error_function_(const H &column_begin, const H &column_end)  {
-
-        std::transform(column_begin, column_end,
-                       std::back_inserter(result_),
-                       [](auto val) -> value_type  {
-                           return (std::erf(val));
-                       });
+    inline void error_function_(const H &column_begin, const H &column_end,
+                                size_type col_s, long thread_level)  {
+
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column_begin, this]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i)
+                            this->result_[i] = std::erf(*(column_begin + i));
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column_begin, column_end,
+                           result_.begin(),
+                           [](auto val) -> value_type  {
+                               return (std::erf(val));
+                           });
+        }
     }
     template <forward_iterator H>
-    inline void gudermannian_(const H &column_begin, const H &column_end)  {
-
-        std::transform(column_begin, column_end,
-                       std::back_inserter(result_),
-                       [](auto val) -> value_type  {
-                           return (std::atan(std::sinh(val)));
-                       });
+    inline void gudermannian_(const H &column_begin, const H &column_end,
+                              size_type col_s, long thread_level)  {
+
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column_begin, this]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i)
+                            this->result_[i] =
+                                std::atan(std::sinh(*(column_begin + i)));
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column_begin, column_end,
+                           result_.begin(),
+                           [](auto val) -> value_type  {
+                               return (std::atan(std::sinh(val)));
+                           });
+        }
     }
     template <forward_iterator H>
-    inline void smoothstep_(const H &column_begin, const H &column_end)  {
-
-        std::transform(column_begin, column_end,
-                       std::back_inserter(result_),
-                       [](auto val) -> value_type  {
-                           if (val <= 0.0)
-                               return (0.0);
-                           else if (val >= 1.0)
-                               return (1.0);
-                           else
-                               return (val * val * (3.0 - 2.0 * val));
-                       });
+    inline void smoothstep_(const H &column_begin, const H &column_end,
+                            size_type col_s, long thread_level)  {
+
+        if (thread_level > 2 && col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&column_begin, this]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i)  {
+                            const value_type    val = *(column_begin + i);
+
+                            if (val <= 0)
+                                this->result_[i] = 0;
+                            else if (val >= T(1))
+                                this->result_[i] = T(1);
+                            else
+                                this->result_[i] =
+                                    val * val * (T(3) - T(2) * val);
+                        }
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            std::transform(column_begin, column_end,
+                           result_.begin(),
+                           [](auto val) -> value_type  {
+                               if (val <= 0)
+                                   return (0);
+                               else if (val >= T(1))
+                                   return (T(1));
+                               else
+                                   return (val * val * (T(3) - T(2) * val));
+                           });
+        }
     }
 
 public:
@@ -1039,21 +1429,25 @@ struct  SigmoidVisitor  {
     operator() (const K &, const K &,
                 const H &column_begin, const H &column_end)  {
 
-        result_.reserve(std::distance(column_begin, column_end));
+        GET_COL_SIZE2
+
+        const auto  thread_level = ThreadGranularity::get_thread_level();
+
+        result_.resize(std::distance(column_begin, column_end));
         if (sigmoid_type_ == sigmoid_type::logistic)
-            logistic_(column_begin, column_end);
+            logistic_(column_begin, column_end, col_s, thread_level);
         else if (sigmoid_type_ == sigmoid_type::algebraic)
-            algebraic_(column_begin, column_end);
+            algebraic_(column_begin, column_end, col_s, thread_level);
         else if (sigmoid_type_ == sigmoid_type::hyperbolic_tan)
-            hyperbolic_tan_(column_begin, column_end);
+            hyperbolic_tan_(column_begin, column_end, col_s, thread_level);
         else if (sigmoid_type_ == sigmoid_type::arc_tan)
-            arc_tan_(column_begin, column_end);
+            arc_tan_(column_begin, column_end, col_s, thread_level);
         else if (sigmoid_type_ == sigmoid_type::error_function)
-            error_function_(column_begin, column_end);
+            error_function_(column_begin, column_end, col_s, thread_level);
         else if (sigmoid_type_ == sigmoid_type::gudermannian)
-            gudermannian_(column_begin, column_end);
+            gudermannian_(column_begin, column_end, col_s, thread_level);
         else if (sigmoid_type_ == sigmoid_type::smoothstep)
-            smoothstep_(column_begin, column_end);
+            smoothstep_(column_begin, column_end, col_s, thread_level);
     }
 
     OBO_PORT_OPT
@@ -1096,70 +1490,205 @@ struct  RectifyVisitor  {
 
         GET_COL_SIZE2
 
-        result_.reserve(col_s);
-        if (rtype_ == rectify_type::ReLU)  {
-            std::for_each(column_begin, column_end,
-                          [this](const value_type &v) -> void  {
-                              this->result_.push_back(std::max(T(0), v));
-                          });
-        }
-        else if (rtype_ == rectify_type::param_ReLU)  {
-            std::for_each(column_begin, column_end,
-                          [this](const value_type &v) -> void  {
-                              this->result_.push_back(
-                                  std::max(v * this->param_, v));
-                          });
-        }
-        else if (rtype_ == rectify_type::GeLU)  {
-            std::for_each(column_begin, column_end,
-                          [this](const value_type &v) -> void  {
-                              this->result_.push_back(
-                                  v * this->standard_normal_dist_(v));
-                          });
-        }
-        else if (rtype_ == rectify_type::SiLU)  {
-            sigm_v<T, I, A> sigm(sigmoid_type::logistic);
-
-            sigm.pre();
-            sigm(idx_begin, idx_end, column_begin, column_end);
-            sigm.post();
+        if (ThreadGranularity::get_thread_level() > 2 &&
+            col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            std::vector<std::future<void>>  futures;
+
+            result_.resize(col_s);
+            if (rtype_ == rectify_type::ReLU)  {
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)
+                                this->result_[i] =
+                                    std::max(T(0), *(column_begin + i));
+                        });
+            }
+            else if (rtype_ == rectify_type::param_ReLU)  {
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    v = *(column_begin + i);
+
+                                this->result_[i] =
+                                    std::max(v * this->param_, v);
+                            }
+                        });
+            }
+            else if (rtype_ == rectify_type::GeLU)  {
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    v = *(column_begin + i);
+
+                                this->result_[i] =
+                                    v * this->standard_normal_dist_(v);
+                            }
+                        });
+            }
+            else if (rtype_ == rectify_type::SiLU)  {
+                sigm_v<T, I, A> sigm(sigmoid_type::logistic);
+
+                sigm.pre();
+                sigm(idx_begin, idx_end, column_begin, column_end);
+                sigm.post();
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, &sigm = std::as_const(sigm), this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    col = *(column_begin + i);
+                                const value_type    sig = sigm.get_result()[i];
+
+                                this->result_[i] = col * sig;
+                            }
+                        });
+            }
+            else if (rtype_ == rectify_type::softplus)  {
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    v = *(column_begin + i);
+
+                                this->result_[i] = softp_(v, this->param_);
+                            }
+                        });
+            }
+            else if (rtype_ == rectify_type::elu)  {
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    v = *(column_begin + i);
+
+                                if (v > 0)
+                                    this->result_[i] = v;
+                                else
+                                    this->result_[i] =
+                                        this->param_ * (std::exp(v) - T(1));
+                            }
+                        });
+            }
+            else if (rtype_ == rectify_type::mish)  {
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    v = *(column_begin + i);
+
+                                this->result_[i] =
+                                    v * std::tanh(softp_(v, this->param_));
+                            }
+                        });
+            }
+            else if (rtype_ == rectify_type::metallic_mean)  {
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&column_begin, this]
+                        (auto begin, auto end) -> void  {
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    v = *(column_begin + i);
+
+                                this->result_[i] =
+                                    (v + std::sqrt(v * v + T(4))) / T(2);
+                            }
+                        });
+            }
 
-            std::transform(column_begin, column_end,
-                           sigm.get_result().begin(),
-                           std::back_inserter(result_),
-                           [](auto col, auto sig) -> value_type  {
-                               return (col * sig);
-                           });
-        }
-        else if (rtype_ == rectify_type::softplus)  {
-            std::for_each(column_begin, column_end,
-                          [this](const value_type &v) -> void  {
-                              this->result_.push_back(softp_(v, this->param_));
-                          });
+            for (auto &fut : futures)  fut.get();
         }
-        else if (rtype_ == rectify_type::elu)  {
-            std::for_each(column_begin, column_end,
-                          [this](const value_type &v) -> void   {
-                              if (v > 0)
-                                  this->result_.push_back(v);
-                              else
+        else  {
+            result_.reserve(col_s);
+            if (rtype_ == rectify_type::ReLU)  {
+                std::for_each(column_begin, column_end,
+                              [this](const value_type &v) -> void  {
+                                  this->result_.push_back(std::max(T(0), v));
+                              });
+            }
+            else if (rtype_ == rectify_type::param_ReLU)  {
+                std::for_each(column_begin, column_end,
+                              [this](const value_type &v) -> void  {
                                   this->result_.push_back(
-                                      this->param_ * (std::exp(v) - T(1)));
-                          });
-        }
-        else if (rtype_ == rectify_type::mish)  {
-            std::for_each(column_begin, column_end,
-                          [this](const value_type &v) -> void  {
-                              this->result_.push_back(
-                                  v * std::tanh(softp_(v, this->param_)));
-                          });
-        }
-        else if (rtype_ == rectify_type::metallic_mean)  {
-            std::for_each(column_begin, column_end,
-                          [this](const value_type &v) -> void  {
-                              this->result_.push_back(
-                                  (v + std::sqrt(v * v + T(4))) / T(2));
-                          });
+                                      std::max(v * this->param_, v));
+                              });
+            }
+            else if (rtype_ == rectify_type::GeLU)  {
+                std::for_each(column_begin, column_end,
+                              [this](const value_type &v) -> void  {
+                                  this->result_.push_back(
+                                      v * this->standard_normal_dist_(v));
+                              });
+            }
+            else if (rtype_ == rectify_type::SiLU)  {
+                sigm_v<T, I, A> sigm(sigmoid_type::logistic);
+
+                sigm.pre();
+                sigm(idx_begin, idx_end, column_begin, column_end);
+                sigm.post();
+
+                std::transform(column_begin, column_end,
+                               sigm.get_result().begin(),
+                               std::back_inserter(result_),
+                               [](auto col, auto sig) -> value_type  {
+                                   return (col * sig);
+                               });
+            }
+            else if (rtype_ == rectify_type::softplus)  {
+                std::for_each(column_begin, column_end,
+                              [this](const value_type &v) -> void  {
+                                  this->result_.push_back(
+                                      softp_(v, this->param_));
+                              });
+            }
+            else if (rtype_ == rectify_type::elu)  {
+                std::for_each(column_begin, column_end,
+                              [this](const value_type &v) -> void   {
+                                  if (v > 0)
+                                      this->result_.push_back(v);
+                                  else
+                                      this->result_.push_back(
+                                          this->param_ * (std::exp(v) - T(1)));
+                              });
+            }
+            else if (rtype_ == rectify_type::mish)  {
+                std::for_each(column_begin, column_end,
+                              [this](const value_type &v) -> void  {
+                                  this->result_.push_back(
+                                      v * std::tanh(softp_(v, this->param_)));
+                              });
+            }
+            else if (rtype_ == rectify_type::metallic_mean)  {
+                std::for_each(column_begin, column_end,
+                              [this](const value_type &v) -> void  {
+                                  this->result_.push_back(
+                                      (v + std::sqrt(v * v + T(4))) / T(2));
+                              });
+            }
         }
     }
 
@@ -1223,13 +1752,35 @@ struct  PolicyLearningLossVisitor  {
 
         // Negative Log Likelihood
         //
-        result_.reserve(col_s);
-        std::transform(action_prob_begin, action_prob_end,
-                       reward_begin,
-                       std::back_inserter(result_),
-                       [](const T &ap, const T &r) -> T  {
-                           return (-std::log(ap) * r);
-                       });
+        if (ThreadGranularity::get_thread_level() > 2 &&
+            col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            result_.resize(col_s);
+
+            auto    futures =
+                ThreadGranularity::thr_pool_.parallel_loop(
+                    size_type(0),
+                    col_s,
+                    [&action_prob_begin, &reward_begin, this]
+                    (auto begin, auto end) -> void  {
+                        for (size_type i = begin; i < end; ++i)  {
+                            const value_type    ap = *(action_prob_begin + i);
+                            const value_type    r = *(reward_begin + i);
+
+                            this->result_[i] = -std::log(ap) * r;
+                        }
+                    });
+
+            for (auto &fut : futures)  fut.get();
+        }
+        else  {
+            result_.reserve(col_s);
+            std::transform(action_prob_begin, action_prob_end,
+                           reward_begin,
+                           std::back_inserter(result_),
+                           [](const T &ap, const T &r) -> T  {
+                               return (-std::log(ap) * r);
+                           });
+        }
     }
 
     DEFINE_PRE_POST
@@ -1245,7 +1796,7 @@ struct  PolicyLearningLossVisitor  {
 template<typename T, typename I = unsigned long, std::size_t A = 0>
 using plloss_v = PolicyLearningLossVisitor<T, I, A>;
 
-// -----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
 
 template<arithmetic T, typename I = unsigned long>
 struct  LossFunctionVisitor  {
@@ -1264,82 +1815,10 @@ struct  LossFunctionVisitor  {
 
         assert((col_s == size_type(std::distance(model_begin, model_end))));
 
-        if (lft_ == loss_function_type::kullback_leibler)  {
-            result_ =
-                std::transform_reduce(actual_begin, actual_end,
-                                      model_begin, T(0), std::plus { },
-                                      [](const T &a, const T &m) -> T  {
-                                          return (a * std::log(a / m));
-                                      });
-        }
-        else if (lft_ == loss_function_type::mean_abs_error)  {
-            result_ =
-                std::transform_reduce(actual_begin, actual_end,
-                                      model_begin, T(0), std::plus { },
-                                      [](const T &a, const T &m) -> T  {
-                                          return (std::fabs(a - m));
-                                      });
-            result_ /= col_s;
-        }
-        else if (lft_ == loss_function_type::mean_sqr_error)  {
-            result_ =
-                std::transform_reduce(actual_begin, actual_end,
-                                      model_begin, T(0), std::plus { },
-                                      [](const T &a, const T &m) -> T  {
-                                          const T   val = a - m;
-
-                                          return (val * val);
-                                      });
-            result_ /= col_s;
-        }
-        else if (lft_ == loss_function_type::mean_sqr_log_error)  {
-            result_ =
-                std::transform_reduce(actual_begin, actual_end,
-                                      model_begin, T(0), std::plus { },
-                                      [](const T &a, const T &m) -> T  {
-                                          const T   val = std::log(T(1) + a) -
-                                                          std::log(T(1) + m);
-
-                                          return (val * val);
-                                      });
-            result_ /= col_s;
-        }
-        else if (lft_ == loss_function_type::cross_entropy)  {
-            result_ =
-                std::transform_reduce(actual_begin, actual_end,
-                                      model_begin, T(0), std::plus { },
-                                      [](const T &a, const T &m) -> T  {
-                                          return (a * std::log(m));
-                                      });
-            result_ = -(result_ / col_s);
-        }
-        else if (lft_ == loss_function_type::binary_cross_entropy)  {
-            result_ =
-                std::transform_reduce(actual_begin, actual_end,
-                                      model_begin, T(0), std::plus { },
-                                      [](const T &a, const T &m) -> T  {
-                                          return (-(a * std::log(m)) +
-                                                  (1 - a) * std::log(1 - m));
-                                      });
-            result_ /= col_s;
-        }
-        else if (lft_ == loss_function_type::categorical_hinge)  {
-            const result_type   neg =
-                std::transform_reduce(actual_begin, actual_end,
-                                      model_begin, T(0), std::plus { },
-                                      [](const T &a, const T &m) -> T  {
-                                          return ((T(1) - a) * m);
-                                      });
-            const result_type   pos =
-                std::transform_reduce(actual_begin, actual_end,
-                                      model_begin, T(0), std::plus { },
-                                      [](const T &a, const T &m) -> T  {
-                                          return (a * m);
-                                      });
-
-            result_ = std::max(neg - pos + T(1), T(0));;
-        }
-        else if (lft_ == loss_function_type::cosine_similarity)  {
+        // The linear and parallel versions on this type are the same.
+        // So, I am taking it out of the if-else chain
+        //
+        if (lft_ == loss_function_type::cosine_similarity)  {
             DotProdVisitor<T, I>    dot_v;
 
             dot_v.pre();
@@ -1364,15 +1843,291 @@ struct  LossFunctionVisitor  {
             const result_type   m_mag = std::sqrt(dot_v.get_result());
 
             result_ = dot_prod / (a_mag * m_mag);
+            return;
+        }
+
+        if (ThreadGranularity::get_thread_level() > 2 &&
+            col_s >= ThreadPool::MUL_THR_THHOLD)  {
+            std::vector<std::future<void>>  futures;
+            if (lft_ == loss_function_type::kullback_leibler)  {
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&actual_begin, &model_begin]
+                        (auto begin, auto end) -> value_type  {
+                            value_type  sum { 0 };
+
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    a = *(actual_begin + i);
+                                const value_type    m = *(model_begin + i);
+
+                                sum += a * std::log(a / m);
+                            }
+                            return (sum);
+                        });
+
+                for (auto &fut : futures)  result_ += fut.get();
+            }
+            else if (lft_ == loss_function_type::mean_abs_error)  {
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&actual_begin, &model_begin]
+                        (auto begin, auto end) -> value_type  {
+                            value_type  sum { 0 };
+
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    a = *(actual_begin + i);
+                                const value_type    m = *(model_begin + i);
+
+                                sum += std::fabs(a - m);
+                            }
+                            return (sum);
+                        });
+
+                for (auto &fut : futures)  result_ += fut.get();
+                result_ /= col_s;
+            }
+            else if (lft_ == loss_function_type::mean_sqr_error)  {
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&actual_begin, &model_begin]
+                        (auto begin, auto end) -> value_type  {
+                            value_type  sum { 0 };
+
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    val =
+                                    *(actual_begin + i) - *(model_begin + i);
+
+                                sum += val * val;
+                            }
+                            return (sum);
+                        });
+
+                for (auto &fut : futures)  result_ += fut.get();
+                result_ /= col_s;
+            }
+            else if (lft_ == loss_function_type::mean_sqr_log_error)  {
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&actual_begin, &model_begin]
+                        (auto begin, auto end) -> value_type  {
+                            value_type  sum { 0 };
+
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    val =
+                                    std::log(T(1) + *(actual_begin + i)) -
+                                    std::log(T(1) + *(model_begin + i));
+
+                                sum += val * val;
+                            }
+                            return (sum);
+                        });
+
+                for (auto &fut : futures)  result_ += fut.get();
+                result_ /= col_s;
+            }
+            else if (lft_ == loss_function_type::cross_entropy)  {
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&actual_begin, &model_begin]
+                        (auto begin, auto end) -> value_type  {
+                            value_type  sum { 0 };
+
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    a = *(actual_begin + i);
+                                const value_type    m = *(model_begin + i);
+
+                                sum += a * std::log(m);
+                            }
+                            return (sum);
+                        });
+
+                for (auto &fut : futures)  result_ += fut.get();
+                result_ = -(result_ / col_s);
+            }
+            else if (lft_ == loss_function_type::binary_cross_entropy)  {
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&actual_begin, &model_begin]
+                        (auto begin, auto end) -> value_type  {
+                            value_type  sum { 0 };
+
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    a = *(actual_begin + i);
+                                const value_type    m = *(model_begin + i);
+
+                                sum += -(a * std::log(m)) +
+                                       (1 - a) * std::log(1 - m);
+                            }
+                            return (sum);
+                        });
+
+                for (auto &fut : futures)  result_ += fut.get();
+                result_ /= col_s;
+            }
+            else if (lft_ == loss_function_type::categorical_hinge)  {
+                auto        futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&actual_begin, &model_begin]
+                        (auto begin, auto end) -> value_type  {
+                            value_type  sum { 0 };
+
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    a = *(actual_begin + i);
+                                const value_type    m = *(model_begin + i);
+
+                                sum += (T(1) - a) * m;
+                            }
+                            return (sum);
+                        });
+                value_type  neg { 0 };
+
+                for (auto &fut : futures)  neg += fut.get();
+                futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&actual_begin, &model_begin]
+                        (auto begin, auto end) -> value_type  {
+                            value_type  sum { 0 };
+
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    a = *(actual_begin + i);
+                                const value_type    m = *(model_begin + i);
+
+                                sum += a * m;
+                            }
+                            return (sum);
+                        });
+
+                value_type  pos { 0 };
+
+                for (auto &fut : futures)  pos += fut.get();
+                result_ = std::max(neg - pos + T(1), T(0));;
+            }
+            else if (lft_ == loss_function_type::log_cosh)  {
+                auto    futures =
+                    ThreadGranularity::thr_pool_.parallel_loop(
+                        size_type(0),
+                        col_s,
+                        [&actual_begin, &model_begin]
+                        (auto begin, auto end) -> value_type  {
+                            value_type  sum { 0 };
+
+                            for (size_type i = begin; i < end; ++i)  {
+                                const value_type    a = *(actual_begin + i);
+                                const value_type    m = *(model_begin + i);
+
+                                sum += std::log(std::cosh(m - a));
+                            }
+                            return (sum);
+                        });
+
+                for (auto &fut : futures)  result_ += fut.get();
+                result_ /= col_s;
+            }
         }
-        else if (lft_ == loss_function_type::log_cosh)  {
-            result_ =
-                std::transform_reduce(actual_begin, actual_end,
-                                      model_begin, T(0), std::plus { },
-                                      [](const T &a, const T &m) -> T  {
-                                          return (std::log(std::cosh(m - a)));
-                                      });
-            result_ /= col_s;
+        else  {
+            if (lft_ == loss_function_type::kullback_leibler)  {
+                result_ =
+                    std::transform_reduce(actual_begin, actual_end,
+                                          model_begin, T(0), std::plus { },
+                                          [](const T &a, const T &m) -> T  {
+                                              return (a * std::log(a / m));
+                                          });
+            }
+            else if (lft_ == loss_function_type::mean_abs_error)  {
+                result_ =
+                    std::transform_reduce(actual_begin, actual_end,
+                                          model_begin, T(0), std::plus { },
+                                          [](const T &a, const T &m) -> T {
+                                              return (std::fabs(a - m));
+                                          });
+                result_ /= col_s;
+            }
+            else if (lft_ == loss_function_type::mean_sqr_error)  {
+                result_ =
+                    std::transform_reduce(actual_begin, actual_end,
+                                          model_begin, T(0), std::plus { },
+                                          [](const T &a, const T &m) -> T  {
+                                              const T   val = a - m;
+
+                                              return (val * val);
+                                          });
+                result_ /= col_s;
+            }
+            else if (lft_ == loss_function_type::mean_sqr_log_error)  {
+                result_ =
+                    std::transform_reduce(actual_begin, actual_end,
+                                          model_begin, T(0), std::plus { },
+                                          [](const T &a, const T &m) -> T  {
+                                              const T   val =
+                                                  std::log(T(1) + a) -
+                                                  std::log(T(1) + m);
+
+                                              return (val * val);
+                                          });
+                result_ /= col_s;
+            }
+            else if (lft_ == loss_function_type::cross_entropy)  {
+                result_ =
+                    std::transform_reduce(actual_begin, actual_end,
+                                          model_begin, T(0), std::plus { },
+                                          [](const T &a, const T &m) -> T  {
+                                              return (a * std::log(m));
+                                          });
+                result_ = -(result_ / col_s);
+            }
+            else if (lft_ == loss_function_type::binary_cross_entropy)  {
+                result_ =
+                    std::transform_reduce(actual_begin, actual_end,
+                                          model_begin, T(0), std::plus { },
+                                          [](const T &a, const T &m) -> T  {
+                                              return (
+                                                  -(a * std::log(m)) +
+                                                  (1 - a) * std::log(1 - m));
+                                          });
+                result_ /= col_s;
+            }
+            else if (lft_ == loss_function_type::categorical_hinge)  {
+                const result_type   neg =
+                    std::transform_reduce(actual_begin, actual_end,
+                                          model_begin, T(0), std::plus { },
+                                          [](const T &a, const T &m) -> T  {
+                                              return ((T(1) - a) * m);
+                                          });
+                const result_type   pos =
+                    std::transform_reduce(actual_begin, actual_end,
+                                          model_begin, T(0), std::plus { },
+                                          [](const T &a, const T &m) -> T  {
+                                              return (a * m);
+                                          });
+
+                result_ = std::max(neg - pos + T(1), T(0));;
+            }
+            else if (lft_ == loss_function_type::log_cosh)  {
+                result_ =
+                    std::transform_reduce(actual_begin, actual_end,
+                                          model_begin, T(0), std::plus { },
+                                          [](const T &a, const T &m) -> T  {
+                                              return (
+                                                  std::log(std::cosh(m - a)));
+                                          });
+                result_ /= col_s;
+            }
         }
     }
 
@@ -1395,7 +2150,7 @@ using loss_v = LossFunctionVisitor<T, I>;
 
 } // namespace hmdf
 
-// -----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
 
 // Local Variables:
 // mode:C++
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 80e440fe..7c4ebd4b 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -41,6 +41,9 @@ endif()
 
 add_executable(dataframe_tester_3 dataframe_tester_3.cc)
 target_link_libraries(dataframe_tester_3 PRIVATE DataFrame)
+target_compile_options(dataframe_tester_3
+    PRIVATE $<$<CXX_COMPILER_ID:MSVC>:/bigobj>
+)
 add_test(NAME dataframe_tester_3 COMMAND dataframe_tester_3)
 hmdf_target_data_files(dataframe_tester_3
     DATA_FILES
@@ -55,6 +58,9 @@ add_test(NAME dataframe_tester_schema COMMAND dataframe_tester_schema)
 
 add_executable(vectors_tester vectors_tester.cc)
 target_link_libraries(vectors_tester PRIVATE DataFrame)
+target_compile_options(vectors_tester
+    PRIVATE $<$<CXX_COMPILER_ID:MSVC>:/bigobj>
+)
 add_test(NAME vectors_tester COMMAND vectors_tester)
 if(MSVC)
     # For some unknown reason to me, this test sigfaults in AppVeyor