From 92ef2523935c67c6928c992bab3e05b8ff1be902 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Fri, 3 Jan 2025 13:30:21 +0800 Subject: [PATCH] Optimize CPU and Memory performance for Resize linear mode parser Revise implemenation based on reviewer comments. Update performance comparison accordingly. +-------+--------------+----------------+----------------+-------------+ | n_dim | out_elements | New t-CPU (us) | Old t-CPU (us) | t-CPU Ratio | +-------+--------------+----------------+----------------+-------------+ | 4 | 786432 | 120405 | 1494350 | 0.0806 | | 4 | 1572864 | 282763 | 3826060 | 0.0739 | | 4 | 3145728 | 650957 | 7941436 | 0.0820 | | 4 | 6291456 | 1304652 | 14869059 | 0.0877 | | 4 | 12582912 | 2608523 | 29432326 | 0.0886 | | 4 | 25165824 | 5175560 | 58848631 | 0.0879 | | 4 | 50331648 | 10486676 | 118005802 | 0.0889 | | 4 | 100663296 | 21141464 | OOM Kill | N/A | +-------+--------------+----------------+----------------+-------------+ Signed-off-by: Colin Xu --- src/onnx/parse_resize.cpp | 49 ++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/src/onnx/parse_resize.cpp b/src/onnx/parse_resize.cpp index bb4254c55f1..1cf61db6739 100644 --- a/src/onnx/parse_resize.cpp +++ b/src/onnx/parse_resize.cpp @@ -28,6 +28,7 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -40,7 +41,7 @@ namespace onnx { * Output: vector contains the result of space index. * * From vvv_ind: - * layer-1: size of 1st dimension, caller will pass as n_dim + * layer-1: size of 1st dimension, caller will pass as n_bits * layer-2: hardcode to 2 by caller * layer-3: a vector of out_elements (caller pass) integers. * vvv_ind = { @@ -53,7 +54,7 @@ namespace onnx { * * To Compose a series of vector of indices, which will further be used to get space index from * the input shape. - * indices{} has (2^n_dim) * out_elements members, each member is a vector of n_dim indices. + * indices{} has (2^n_bits) * out_elements members, each member is a vector of n_bits indices. * indices = { * {...}, * {...}, @@ -77,15 +78,15 @@ namespace onnx { * {1} * }; * - * For each number within [0, (2^n_dim)) (outer loop), map each bit (inner loop, MSB to LSB) - * to n_dim, and pick A|B, C|D, E|F, G|H based on bit 0|1. - * i.e. 0110b -> A'D'F'G' - * Transpose A to A' and repeat for each elements within A' (medium loop). - * Use the new crafted vector of n_dim indices, get the mapping from shape in_s. + * Outer loop: + * Iterate all values within range [0, (2^n_bits)) and maps to bitset for inner loop (MSB to LSB). + * Middle loop: + * Transform all elements in layer-3: take indices from inner loop to get index from input shape, + append to vec_ind. + * Inner loop: + * Compose a vector of indices by iterating all layer-1 using current bitset from current element. * - * Outer loop: loop all values within range [0, (2^n_dim)) - * Medium loop: loop all elements within layer-3, range [0, m_elements) - * Inner loop: loop all bits of the value of current outer loop + * i.e. val = 6 -> bitset 0110b -> indices: pick each value from A'D'F'G' -> in_s.index(indices) */ static std::vector @@ -96,20 +97,26 @@ calc_neighbor_points(const std::vector>>& v std::size_t m_elements = vvv_ind[0][0].size(); std::vector vec_ind; + if(n_bits >= std::numeric_limits::digits) + { + throw std::runtime_error("Shape dimension " + std::to_string(n_bits) + " exceeds " + + std::to_string(std::numeric_limits::digits)); + } + for(std::size_t val = 0; val < (std::size_t{1} << n_bits); val++) { + std::bitset::digits> bits_val = val; std::vector indices(n_bits); - for(std::size_t i_element = 0; i_element < m_elements; i_element++) - { - std::size_t bits_val = val; - indices.clear(); - for(std::size_t dim = 0; dim < n_bits; dim++) - { - indices.push_back(vvv_ind[dim][bits_val & std::size_t{1}][i_element]); - bits_val >>= std::size_t{1}; - } - vec_ind.push_back(in_s.index(indices)); - } + transform( + range(m_elements), std::back_inserter(vec_ind), [&](const std::size_t& i_element) { + transform(vvv_ind, + range(n_bits), + indices.begin(), + [&](const auto& vv_ind, const std::size_t& bit) { + return vv_ind[bits_val[bit]][i_element]; + }); + return in_s.index(indices); + }); } return vec_ind;