Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simd optimization #145

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
48bade0
Add dummy test
mabean Feb 1, 2016
c5f5148
Add size test for BGR2GRAY + GuoHall
mabean Feb 1, 2016
387743a
Add size test for ImageResize
mabean Feb 1, 2016
cf92dbd
Add color test for ImageResize
mabean Feb 1, 2016
cc8e6e0
Add color test for bgr2gray
mabean Feb 1, 2016
2f5cf2d
Add correct work test for guohall
mabean Feb 1, 2016
c612d90
Merge branch 'add-new-google-tests'
mabean Feb 2, 2016
80b0939
Add perf test
mabean Feb 3, 2016
521f04e
fix merge conflict
mabean Feb 3, 2016
1f0e199
Merge https://github.com/itseez-academy/itseez-ws-2016-practice
mabean Feb 3, 2016
57a834d
Merge branch 'profiling-and-benchmarking'
mabean Feb 3, 2016
e3946ee
first step of optimization
mabean Feb 3, 2016
d8b4bb4
Current version of step 2
mabean Feb 3, 2016
990041a
second version of second step optimization
mabean Feb 4, 2016
ea69950
minor fix for second version
mabean Feb 4, 2016
0c8a9e3
fix merge problem
mabean Feb 4, 2016
20fb505
Merge https://github.com/itseez-academy/itseez-ws-2016-practice
mabean Feb 4, 2016
f4c878b
2 steps of the optimization
mabean Feb 4, 2016
71973ac
floor optimization in resize func
mabean Feb 4, 2016
9f5a4d9
deleting STL using for Travis
mabean Feb 4, 2016
3b1760b
fixes for perf_tests and optimization improve
mabean Feb 4, 2016
7e1b4fb
fix merge conflict in resize.cpp
mabean Feb 4, 2016
ca5b171
fix > >
mabean Feb 4, 2016
ff3de85
2.7 boost version
mabean Feb 4, 2016
5444d98
Working version of optimizations of GuoHall and Resize
mabean Feb 4, 2016
35c93d6
fix merge conflict in perf_skeleton
mabean Feb 5, 2016
b8f76fe
fix another merge conflict
mabean Feb 6, 2016
081c87c
Try to fix perf_skeleton
mabean Feb 6, 2016
86e3107
Merge branch 'master' of https://github.com/itseez-academy/itseez-ws-…
mabean Feb 6, 2016
0d93736
SIMD optimized version
mabean Feb 6, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 52 additions & 52 deletions perf/perf_skeleton.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,32 +29,32 @@ using std::tr1::get;

typedef perf::TestBaseWithParam<Size> Size_Only;

PERF_TEST_P(Size_Only, ImageResize, testing::Values(MAT_SIZES))
{
Size sz = GetParam();
Size sz_to(sz.width / 1.7, sz.height / 1.4);

cv::Mat src(sz, CV_8UC1);
cv::Mat dst(Size(sz_to), CV_8UC1);
cv::Mat gold(Size(sz_to), CV_8UC1);
declare.in(src, WARMUP_RNG).out(dst);

cv::RNG rng(234231412);
rng.fill(src, CV_8UC1, 0, 255);

ImageResize(src, gold, sz_to);

TEST_CYCLE()
{
ImageResize_optimized(src, dst, sz_to);
}

cv::Mat diff; cv::absdiff(dst, gold, diff);
cv::threshold(diff, diff, 1, 0, cv::THRESH_TOZERO);
ASSERT_EQ(0, cv::countNonZero(diff));

SANITY_CHECK(dst);
}
//PERF_TEST_P(Size_Only, ImageResize, testing::Values(MAT_SIZES))
//{
// Size sz = GetParam();
// Size sz_to(sz.width / 1.7, sz.height / 1.4);
//
// cv::Mat src(sz, CV_8UC1);
// cv::Mat dst(Size(sz_to), CV_8UC1);
// cv::Mat gold(Size(sz_to), CV_8UC1);
// declare.in(src, WARMUP_RNG).out(dst);
//
// cv::RNG rng(234231412);
// rng.fill(src, CV_8UC1, 0, 255);
//
// ImageResize(src, gold, sz_to);
//
// TEST_CYCLE()
// {
// ImageResize_optimized(src, dst, sz_to);
// }
//
// cv::Mat diff; cv::absdiff(dst, gold, diff);
// cv::threshold(diff, diff, 1, 0, cv::THRESH_TOZERO);
// ASSERT_EQ(0, cv::countNonZero(diff));
//
// SANITY_CHECK(dst);
//}

//
// Test(s) for the skeletonize function
Expand All @@ -77,31 +77,31 @@ PERF_TEST_P(Size_Only, ImageResize, testing::Values(MAT_SIZES))
// Test(s) for the Thinning function
//

PERF_TEST_P(Size_Only, Thinning, testing::Values(MAT_SIZES))
{
Size sz = GetParam();

cv::Mat image(sz, CV_8UC1);
declare.in(image, WARMUP_RNG).out(image);
declare.time(40);

cv::RNG rng(234231412);
rng.fill(image, CV_8UC1, 0, 255);
cv::threshold(image, image, 240, 255, cv::THRESH_BINARY_INV);

cv::Mat gold; GuoHallThinning(image, gold);

cv::Mat thinned_image;
TEST_CYCLE()
{
GuoHallThinning_optimized(image, thinned_image);
}

cv::Mat diff; cv::absdiff(thinned_image, gold, diff);
ASSERT_EQ(0, cv::countNonZero(diff));

SANITY_CHECK(image);
}
//PERF_TEST_P(Size_Only, Thinning, testing::Values(MAT_SIZES))
//{
// Size sz = GetParam();
//
// cv::Mat image(sz, CV_8UC1);
// declare.in(image, WARMUP_RNG).out(image);
// declare.time(40);
//
// cv::RNG rng(234231412);
// rng.fill(image, CV_8UC1, 0, 255);
// cv::threshold(image, image, 240, 255, cv::THRESH_BINARY_INV);
//
// cv::Mat gold; GuoHallThinning(image, gold);
//
// cv::Mat thinned_image;
// TEST_CYCLE()
// {
// GuoHallThinning_optimized(image, thinned_image);
// }
//
// cv::Mat diff; cv::absdiff(thinned_image, gold, diff);
// ASSERT_EQ(0, cv::countNonZero(diff));
//
// SANITY_CHECK(image);
//}

PERF_TEST_P(Size_Only, ConvertColor_fpt, testing::Values(MAT_SIZES))
{
Expand Down Expand Up @@ -189,4 +189,4 @@ TEST(CompleteColorSpace, ConvertColor_fpt)
EXPECT_EQ(0, cv::countNonZero(diff1));
ASSERT_LT(cv::countNonZero(diff), 7565);
// ASSERT_EQ(0, cv::countNonZero(diff));
}
}
47 changes: 38 additions & 9 deletions src/convertcolor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
dst.create(sz, CV_8UC1);

#ifdef HAVE_SSE
// __m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
// __m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
// __m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
// __m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
// __m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
// __m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
__m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
__m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
__m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
__m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_red_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14, 11, 8, 5, 2);
__m128i ssse3_red_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1, -1, -1, -1, -1, -1);
__m128i ssse3_red_indices_2 = _mm_set_epi8(15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
Expand All @@ -101,6 +101,7 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
__m128i zero = _mm_setzero_si128();
#endif


for (int y = 0; y < sz.height; y++)
{
const uchar *psrc = src.ptr<uchar>(y);
Expand All @@ -119,10 +120,37 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
__m128i red = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_red_indices_0),
_mm_shuffle_epi8(chunk1, ssse3_red_indices_1)),
_mm_shuffle_epi8(chunk2, ssse3_red_indices_2));
__m128i green = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_green_indices_0),
_mm_shuffle_epi8(chunk1, ssse3_green_indices_1)),
_mm_shuffle_epi8(chunk2, ssse3_green_indices_2));
__m128i blue = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_blue_indices_0),
_mm_shuffle_epi8(chunk1, ssse3_blue_indices_1)),
_mm_shuffle_epi8(chunk2, ssse3_blue_indices_2));

__m128i red8_1 = _mm_unpacklo_epi8(red,zero);
__m128i red8_2 = _mm_unpackhi_epi8(red,zero);

__m128i green8_1 = _mm_unpacklo_epi8(green,zero);
__m128i green8_2 = _mm_unpackhi_epi8(green,zero);

__m128i blue8_1 = _mm_unpacklo_epi8(blue,zero);
__m128i blue8_2 = _mm_unpackhi_epi8(blue,zero);

__m128i bluecomp1 = _mm_mullo_epi16(blue8_1,blue_coeff);
__m128i greencomp1 = _mm_mullo_epi16(green8_1,green_coeff);
__m128i redcomp1 = _mm_mullo_epi16(red8_1,red_coeff);

/* ??? */
__m128i bluecomp2 = _mm_mullo_epi16(blue8_2,blue_coeff);
__m128i greencomp2 = _mm_mullo_epi16(green8_2,green_coeff);
__m128i redcomp2 = _mm_mullo_epi16(red8_2,red_coeff);

__m128i gray_packed; // Initialize it properly
__m128i graypack1 = _mm_add_epi16( _mm_add_epi16( _mm_add_epi16(greencomp1,bluecomp1), redcomp1), bias);
__m128i graypack2 = _mm_add_epi16( _mm_add_epi16( _mm_add_epi16(greencomp2,bluecomp2), redcomp2), bias);

__m128i gp1s = _mm_srli_epi16(graypack1,8);
__m128i gp2s = _mm_srli_epi16(graypack2,8);

__m128i gray_packed = _mm_packus_epi16(gp1s,gp2s); // Initialize it properly

_mm_storeu_si128((__m128i*)(pdst + x), gray_packed);
}
Expand All @@ -131,12 +159,13 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
// Process leftover pixels
for (; x < sz.width; x++)
{

float color = 0.2126 * psrc[3 * x + 2] + 0.7152 * psrc[3 * x + 1] + 0.0722 * psrc[3 * x];
pdst[x] = (int)(color + 0.5);
}
}

// ! Remove this before writing your optimizations !
ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
//ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
// ! Remove this before writing your optimizations !
}
124 changes: 85 additions & 39 deletions src/resize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,43 +46,89 @@ void ImageResize(const cv::Mat &src, cv::Mat &dst, const cv::Size sz)

void ImageResize_optimized(const cv::Mat &src, cv::Mat &dst, const cv::Size sz)
{
CV_Assert(CV_8UC1 == src.type());
cv::Size sz_src = src.size();
dst.create(sz, src.type());

const int src_rows = src.rows;
const int src_cols = src.cols;

const int dst_rows = sz.height;
const int dst_cols = sz.width;

for (int row = 0; row < dst_rows; row++)
{
uchar *ptr_dst = dst.ptr<uchar>(row);

for (int col = 0; col < dst_cols; col++)
{
const float x = (((float)col) + .5f) * sz_src.width / sz.width - .5f;
const float y = (((float)row) + .5f) * sz_src.height / sz.height - .5f;

const int ix = (int)floor(x);
const int iy = (int)floor(y);

const int x1 = (ix < 0) ? 0 : ((ix >= src_cols) ? src_cols - 1 : ix);
const int x2 = (ix < 0) ? 0 : ((ix >= src_cols - 1) ? src_cols - 1 : ix + 1);
const int y1 = (iy < 0) ? 0 : ((iy >= src_rows) ? src_rows - 1 : iy);
const int y2 = (iy < 0) ? 0 : ((iy >= src_rows - 1) ? src_rows - 1 : iy + 1);

const uchar q11 = src.at<uchar>(y1, x1);
const uchar q12 = src.at<uchar>(y2, x1);
const uchar q21 = src.at<uchar>(y1, x2);
const uchar q22 = src.at<uchar>(y2, x2);

const int temp = ((x1 == x2) && (y1 == y2)) ? (int)q11 :
( (x1 == x2) ? (int)(q11 * (y2 - y) + q22 * (y - y1)) :
( (y1 == y2) ? (int)(q11 * (x2 - x) + q22 * (x - x1)) :
(int)(q11 * (x2 - x) * (y2 - y) + q21 * (x - x1) * (y2 - y) + q12 * (x2 - x) * (y - y1) + q22 * (x - x1) * (y - y1))));
ptr_dst[col] = (temp < 0) ? 0 : ((temp > 255) ? 255 : (uchar)temp);
}
}
CV_Assert(CV_8UC1 == src.type());
cv::Size sz_src = src.size();
dst.create(sz, src.type());

const int src_rows = src.rows;
const int src_cols = src.cols;

const int dst_rows = sz.height;
const int dst_cols = sz.width;

const float xscale = (float)sz_src.width / sz.width;
const float x0 = .5f * xscale - .5f;

const float yscale = (float)sz_src.height / sz.height;
const float y0 = .5f * yscale - .5f;
if (dst_cols <= src_cols && dst_rows <= src_rows)
{
for (int row = 0; row < dst_rows; row++)
{
uchar *ptr_dst = dst.ptr<uchar>(row);

for (int col = 0; col < dst_cols; col++)
{
const float x = col * xscale + x0;
const float y = row * yscale + y0;


const int ix = (int)x;
const int iy = (int)y;

const int x1 = ix;
const int x2 = ix + 1;
const int y1 = iy;
const int y2 = iy + 1;

const uchar q11 = src.at<uchar>(y1, x1);
const uchar q12 = src.at<uchar>(y2, x1);
const uchar q21 = src.at<uchar>(y1, x2);
const uchar q22 = src.at<uchar>(y2, x2);

const int temp = ((x1 == x2) && (y1 == y2)) ? (int)q11 :
( (x1 == x2) ? (int)(q11 * (y2 - y) + q22 * (y - y1)) :
( (y1 == y2) ? (int)(q11 * (x2 - x) + q22 * (x - x1)) :
(int)(q11 * (x2 - x) * (y2 - y) + q21 * (x - x1) * (y2 - y) + q12 * (x2 - x) * (y - y1) + q22 * (x - x1) * (y - y1))));
ptr_dst[col] =(uchar)temp;


}
}
}
else
for (int row = 0; row < dst_rows; row++)
{
uchar *ptr_dst = dst.ptr<uchar>(row);

for (int col = 0; col < dst_cols; col++)
{
const float x = col * xscale + x0;
const float y = row * yscale + y0;


const int ix = x > 0 ? (int)x : (int)floor(x);
const int iy = y > 0 ? (int)y : (int)floor(y);

const int x1 = (ix < 0) ? 0 : ((ix >= src_cols) ? src_cols - 1 : ix);

const int x2 = (ix < 0) ? 0 : ((ix >= src_cols - 1) ? src_cols - 1 : ix + 1);
const int y1 = (iy < 0) ? 0 : ((iy >= src_rows) ? src_rows - 1 : iy);
const int y2 = (iy < 0) ? 0 : ((iy >= src_rows - 1) ? src_rows - 1 : iy + 1);

const uchar q11 = src.at<uchar>(y1, x1);
const uchar q12 = src.at<uchar>(y2, x1);
const uchar q21 = src.at<uchar>(y1, x2);
const uchar q22 = src.at<uchar>(y2, x2);

const int temp = ((x1 == x2) && (y1 == y2)) ? (int)q11 :
( (x1 == x2) ? (int)(q11 * (y2 - y) + q22 * (y - y1)) :
( (y1 == y2) ? (int)(q11 * (x2 - x) + q22 * (x - x1)) :
(int)(q11 * (x2 - x) * (y2 - y) + q21 * (x - x1) * (y2 - y) + q12 * (x2 - x) * (y - y1) + q22 * (x - x1) * (y - y1))));
ptr_dst[col] =(uchar)temp;


}
}
}

10 changes: 10 additions & 0 deletions src/skeleton_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,38 @@ void skeletonize(const cv::Mat &input, cv::Mat &output, bool save_images)
TE(imwrite_0);

// Convert to grayscale
TS(bgr2gray);
cv::Mat gray_image;
ConvertColor_BGR2GRAY_BT709(input, gray_image);
if (save_images) cv::imwrite("1-convertcolor.png", gray_image);
TE(bgr2gray);

// Downscale input image
TS(imageresize);
cv::Mat small_image;
cv::Size small_size(input.cols / 1.5, input.rows / 1.5);
ImageResize(gray_image, small_image, small_size);
if (save_images) cv::imwrite("2-resize.png", small_image);
TE(imageresize);

// Binarization and inversion
TS(threshold);
cv::threshold(small_image, small_image, 128, 255, cv::THRESH_BINARY_INV);
if (save_images) cv::imwrite("3-threshold.png", small_image);
TE(threshold);

// Thinning
TS(guohall);
cv::Mat thinned_image;
GuoHallThinning(small_image, thinned_image);
if (save_images) cv::imwrite("4-thinning.png", thinned_image);
TE(guohall);

// Back inversion
TS(imwrite_1);
output = 255 - thinned_image;
if (save_images) cv::imwrite("5-output.png", output);
TE(imwrite_1);

TE(total);
}
Loading