Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorization #146

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions perf/perf_skeleton.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ PERF_TEST_P(Size_Only, ImageResize, testing::Values(MAT_SIZES))
// Test(s) for the Thinning function
//


PERF_TEST_P(Size_Only, Thinning, testing::Values(MAT_SIZES))
{
Size sz = GetParam();
Expand Down Expand Up @@ -190,3 +191,4 @@ TEST(CompleteColorSpace, ConvertColor_fpt)
ASSERT_LT(cv::countNonZero(diff), 7565);
// ASSERT_EQ(0, cv::countNonZero(diff));
}

66 changes: 52 additions & 14 deletions src/convertcolor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
dst.create(sz, CV_8UC1);

#ifdef HAVE_SSE
// __m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
// __m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
// __m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
// __m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
// __m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
// __m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
__m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
__m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
__m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
__m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_red_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14, 11, 8, 5, 2);
__m128i ssse3_red_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1, -1, -1, -1, -1, -1);
__m128i ssse3_red_indices_2 = _mm_set_epi8(15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
Expand All @@ -100,7 +100,7 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
__m128i bias = _mm_set1_epi16(128);
__m128i zero = _mm_setzero_si128();
#endif

int k;
for (int y = 0; y < sz.height; y++)
{
const uchar *psrc = src.ptr<uchar>(y);
Expand All @@ -120,23 +120,61 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
_mm_shuffle_epi8(chunk1, ssse3_red_indices_1)),
_mm_shuffle_epi8(chunk2, ssse3_red_indices_2));

/* ??? */

__m128i gray_packed; // Initialize it properly
__m128i green = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_green_indices_0),
_mm_shuffle_epi8(chunk1, ssse3_green_indices_1)),
_mm_shuffle_epi8(chunk2, ssse3_green_indices_2));

__m128i blue = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_blue_indices_0),
_mm_shuffle_epi8(chunk1, ssse3_blue_indices_1)),
_mm_shuffle_epi8(chunk2, ssse3_blue_indices_2));

__m128i big_red_1 = _mm_unpacklo_epi8(red, zero);
__m128i big_red_2 = _mm_unpackhi_epi8(red, zero);
__m128i big_green_1 = _mm_unpacklo_epi8(green, zero);
__m128i big_green_2 = _mm_unpackhi_epi8(green, zero);
__m128i big_blue_1 = _mm_unpacklo_epi8(blue, zero);
__m128i big_blue_2 = _mm_unpackhi_epi8(blue, zero);

__m128i red_summand_1 = _mm_mullo_epi16(red_coeff, big_red_1);
__m128i red_summand_2 = _mm_mullo_epi16(red_coeff, big_red_2);
__m128i green_summand_1 = _mm_mullo_epi16(green_coeff, big_green_1);
__m128i green_summand_2 = _mm_mullo_epi16(green_coeff, big_green_2);
__m128i blue_summand_1 = _mm_mullo_epi16(blue_coeff, big_blue_1);
__m128i blue_summand_2 = _mm_mullo_epi16(blue_coeff, big_blue_2);

__m128i first_oper_1 = _mm_add_epi16(zero, red_summand_1);
__m128i second_oper_1 = _mm_add_epi16(first_oper_1, green_summand_1);
__m128i third_oper_1 = _mm_add_epi16(second_oper_1, blue_summand_1);
__m128i first_oper_2 = _mm_add_epi16(zero, red_summand_2);
__m128i second_oper_2 = _mm_add_epi16(first_oper_2, green_summand_2);
__m128i third_oper_2 = _mm_add_epi16(second_oper_2, blue_summand_2);

__m128i gray_packed_1 = _mm_add_epi16(third_oper_1, bias);
__m128i gray_packed_2 = _mm_add_epi16(third_oper_2, bias);

__m128i gray_packed_shift_1 = _mm_srli_epi16(gray_packed_1, 8);
__m128i gray_packed_shift_2 = _mm_srli_epi16(gray_packed_2, 8);

__m128i gray_packed = _mm_packus_epi16(gray_packed_shift_1, gray_packed_shift_2);

_mm_storeu_si128((__m128i*)(pdst + x), gray_packed);
}
#endif

// Process leftover pixels
for (; x < sz.width; x++)
int shift = 16;

short rw = (short)(0.2126 * (1 << shift) + 0.5);
short gw = (short)(0.7152 * (1 << shift) + 0.5);
short bw = (short)(0.0722 * (1 << shift) + 0.5);

for (; x < sz.width; x++)
{
float color = 0.2126 * psrc[3 * x + 2] + 0.7152 * psrc[3 * x + 1] + 0.0722 * psrc[3 * x];
pdst[x] = (int)(color + 0.5);
pdst[x] = (rw * psrc[3 * x + 2] + gw * psrc[3 * x + 1] + bw * psrc[3 * x] + (1<<(shift-1))) >> shift;
}
}

// ! Remove this before writing your optimizations !
ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
// ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
// ! Remove this before writing your optimizations !
}
48 changes: 26 additions & 22 deletions src/resize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ void ImageResize(const cv::Mat &src, cv::Mat &dst, const cv::Size sz)
( (x1 == x2) ? (int)(q11 * (y2 - y) + q22 * (y - y1)) :
( (y1 == y2) ? (int)(q11 * (x2 - x) + q22 * (x - x1)) :
(int)(q11 * (x2 - x) * (y2 - y) + q21 * (x - x1) * (y2 - y) + q12 * (x2 - x) * (y - y1) + q22 * (x - x1) * (y - y1))));
ptr_dst[col] = (temp < 0) ? 0 : ((temp > 255) ? 255 : (uchar)temp);

ptr_dst[col] = (temp < 0) ? 0 : ((temp > 255) ? 255 : (uchar)temp);
}
}
}
Expand All @@ -50,39 +51,42 @@ void ImageResize_optimized(const cv::Mat &src, cv::Mat &dst, const cv::Size sz)
cv::Size sz_src = src.size();
dst.create(sz, src.type());

const int src_rows = src.rows;
const int src_cols = src.cols;
int src_rows = src.rows;
int src_cols = src.cols;

const int dst_rows = sz.height;
const int dst_cols = sz.width;
int dst_rows = sz.height;
int dst_cols = sz.width;
float xscale = (float)sz_src.width / sz.width;
float yscale = (float)sz_src.height / sz.height;

for (int row = 0; row < dst_rows; row++)
{
uchar *ptr_dst = dst.ptr<uchar>(row);

for (int col = 0; col < dst_cols; col++)
{
const float x = (((float)col) + .5f) * sz_src.width / sz.width - .5f;
const float y = (((float)row) + .5f) * sz_src.height / sz.height - .5f;
float x = (((float)col) + .5f) * xscale - .5f;
float y = (((float)row) + .5f) * yscale - .5f;

const int ix = (int)floor(x);
const int iy = (int)floor(y);
int ix = (x > 0) ? (int)x : (int)floor(x);
int iy = (y > 0) ? (int)y : (int)floor(y);

const int x1 = (ix < 0) ? 0 : ((ix >= src_cols) ? src_cols - 1 : ix);
const int x2 = (ix < 0) ? 0 : ((ix >= src_cols - 1) ? src_cols - 1 : ix + 1);
const int y1 = (iy < 0) ? 0 : ((iy >= src_rows) ? src_rows - 1 : iy);
const int y2 = (iy < 0) ? 0 : ((iy >= src_rows - 1) ? src_rows - 1 : iy + 1);
int x1 = ix;
int x2 = (ix >= src_cols - 1) ? src_cols - 1 : ix + 1;
int y1 = iy;
int y2 = (iy >= src_rows - 1) ? src_rows - 1 : iy + 1;

const uchar q11 = src.at<uchar>(y1, x1);
const uchar q12 = src.at<uchar>(y2, x1);
const uchar q21 = src.at<uchar>(y1, x2);
const uchar q22 = src.at<uchar>(y2, x2);
int q11 = src.at<uchar>(y1, x1);
int q12 = src.at<uchar>(y2, x1);
int q21 = src.at<uchar>(y1, x2);
int q22 = src.at<uchar>(y2, x2);

const int temp = ((x1 == x2) && (y1 == y2)) ? (int)q11 :
( (x1 == x2) ? (int)(q11 * (y2 - y) + q22 * (y - y1)) :
( (y1 == y2) ? (int)(q11 * (x2 - x) + q22 * (x - x1)) :
(int)(q11 * (x2 - x) * (y2 - y) + q21 * (x - x1) * (y2 - y) + q12 * (x2 - x) * (y - y1) + q22 * (x - x1) * (y - y1))));
ptr_dst[col] = (temp < 0) ? 0 : ((temp > 255) ? 255 : (uchar)temp);
int temp = ((x1 == x2) && (y1 == y2)) ? q11 :
( (x1 == x2) ? (q11 * (y2 - y) + q22 * (y - y1)) :
( (y1 == y2) ? (q11 * (x2 - x) + q22 * (x - x1)) :
(q11 * (x2 - x) * (y2 - y) + q21 * (x - x1) * (y2 - y) + q12 * (x2 - x) * (y - y1) + q22 * (x - x1) * (y - y1))));

ptr_dst[col] = (uchar)temp;
}
}
}
3 changes: 2 additions & 1 deletion src/thinning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void GuoHallThinning(const cv::Mat& src, cv::Mat& dst)

static void GuoHallIteration_optimized(cv::Mat& im, int iter)
{
cv::Mat marker = cv::Mat::zeros(im.size(), CV_8UC1);
cv::Mat marker = cv::Mat::zeros(im.size(), CV_8UC1);

for (int i = 1; i < im.rows-1; i++)
{
Expand Down Expand Up @@ -99,6 +99,7 @@ void GuoHallThinning_optimized(const cv::Mat& src, cv::Mat& dst)
cv::Mat prev = cv::Mat::zeros(src.size(), CV_8UC1);
cv::Mat diff;


do
{
GuoHallIteration_optimized(dst, 0);
Expand Down