Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Intrinsics #151

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 68 additions & 17 deletions src/convertcolor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ void ConvertColor_BGR2GRAY_BT709_fpt(const cv::Mat& src, cv::Mat& dst)
cv::Size sz = src.size();
dst.create(sz, CV_8UC1);

int shift = 16;
int bias = 0;
int shift = 8;

unsigned rw = (unsigned)(0.2126 * (1 << shift) + 0.5);
unsigned gw = (unsigned)(0.7152 * (1 << shift) + 0.5);
Expand All @@ -70,10 +69,14 @@ void ConvertColor_BGR2GRAY_BT709_fpt(const cv::Mat& src, cv::Mat& dst)
const cv::Vec3b *psrc = src.ptr<cv::Vec3b>(y);
uchar *pdst = dst.ptr<uchar>(y);

for (int x = 0; x < sz.width; x++)

for (int x = 0; x < sz.width; x++)
{
pdst[x] = (rw * psrc[x][2] + gw * psrc[x][1] + bw * psrc[x][0] + (1<<(shift-1)) + bias) >> shift;
pdst[x] = (rw * psrc[x][2] + gw * psrc[x][1] + bw * psrc[x][0] + (1<<(shift-1))) >> shift;
}



}
}

Expand All @@ -84,15 +87,17 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
dst.create(sz, CV_8UC1);

#ifdef HAVE_SSE
// __m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
// __m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
// __m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
// __m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
// __m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
// __m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_red_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14, 11, 8, 5, 2);
__m128i ssse3_red_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1, -1, -1, -1, -1, -1);
__m128i ssse3_red_indices_2 = _mm_set_epi8(15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_blue_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
__m128i ssse3_blue_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
__m128i ssse3_blue_indices_2 = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_green_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
__m128i ssse3_green_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
__m128i ssse3_green_indices_2 = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
__m128i ssse3_red_indices_0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14, 11, 8, 5, 2);
__m128i ssse3_red_indices_1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1, -1, -1, -1, -1, -1);
__m128i ssse3_red_indices_2 = _mm_set_epi8(15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);



__m128i red_coeff = _mm_set1_epi16(54);
__m128i green_coeff = _mm_set1_epi16(183);
Expand All @@ -112,18 +117,64 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
// Here is 16 times unrolled loop for vector processing
for (; x <= sz.width - 16; x += 16)
{
__m128i chunk0 = _mm_loadu_si128((const __m128i*)(psrc + x*3 + 16*0));
//���������� �����
__m128i chunk0 = _mm_loadu_si128((const __m128i*)(psrc + x*3 + 16*0));
__m128i chunk1 = _mm_loadu_si128((const __m128i*)(psrc + x*3 + 16*1));
__m128i chunk2 = _mm_loadu_si128((const __m128i*)(psrc + x*3 + 16*2));

__m128i red = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_red_indices_0),
_mm_shuffle_epi8(chunk1, ssse3_red_indices_1)),
_mm_shuffle_epi8(chunk2, ssse3_red_indices_2));

/* ??? */
__m128i green = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_green_indices_0),
_mm_shuffle_epi8(chunk1, ssse3_green_indices_1)),
_mm_shuffle_epi8(chunk2, ssse3_green_indices_2));

__m128i blue = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(chunk0, ssse3_blue_indices_0),
_mm_shuffle_epi8(chunk1, ssse3_blue_indices_1)),
_mm_shuffle_epi8(chunk2, ssse3_blue_indices_2));

//2
__m128i redlo = _mm_unpacklo_epi8(red, zero);
__m128i redhi =_mm_unpackhi_epi8(red, zero);

__m128i greenlo =_mm_unpacklo_epi8(green, zero);
__m128i greenhi =_mm_unpackhi_epi8(green, zero);

__m128i bluelo =_mm_unpacklo_epi8(blue, zero);
__m128i bluehi =_mm_unpackhi_epi8(blue, zero);


__m128i gray_packed_log = _mm_mullo_epi16(green_coeff, greenlo);
__m128i gray_packed_lor = _mm_mullo_epi16(red_coeff, redlo);
__m128i gray_packed_lob = _mm_mullo_epi16(blue_coeff, bluelo);

//__m128i gray_packed_lo1 = _mm_add_epi16c, gray_packed_lor );
//__m128i gray_packed_lo2 = _mm_add_epi16(gray_packed_lo1, gray_packed_lob );
//__m128i gray_packed_lo = _mm_add_epi16(gray_packed_lo2, bias);
__m128i gray_packed_lo1= _mm_add_epi16(_mm_add_epi16(gray_packed_log, gray_packed_lor), gray_packed_lob);
__m128i gray_packed_lo = _mm_add_epi16(gray_packed_lo1, bias);


__m128i gray_packed_hig = _mm_mullo_epi16(green_coeff, greenhi);
__m128i gray_packed_hir = _mm_mullo_epi16(red_coeff, redhi);
__m128i gray_packed_hib = _mm_mullo_epi16(blue_coeff, bluehi);

//__m128i gray_packed_hi1 = _mm_add_epi16(gray_packed_hig, gray_packed_hir );
//__m128i gray_packed_hi2 = _mm_add_epi16(gray_packed_hi1, gray_packed_hib );
//__m128i gray_packed_hi = _mm_add_epi16(gray_packed_hi2, bias);
__m128i gray_packed_hi1= _mm_add_epi16(_mm_add_epi16(gray_packed_hig, gray_packed_hir), gray_packed_hib);
__m128i gray_packed_hi = _mm_add_epi16(gray_packed_hi1, bias);

//3
gray_packed_lo=_mm_srli_epi16(gray_packed_lo, 8);
gray_packed_hi=_mm_srli_epi16(gray_packed_hi, 8);

__m128i gray_packed; // Initialize it properly
//4
__m128i gray_packed;
gray_packed = _mm_packus_epi16(gray_packed_lo, gray_packed_hi);

//5
_mm_storeu_si128((__m128i*)(pdst + x), gray_packed);
}
#endif
Expand All @@ -137,6 +188,6 @@ void ConvertColor_BGR2GRAY_BT709_simd(const cv::Mat& src, cv::Mat& dst)
}

// ! Remove this before writing your optimizations !
ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
//ConvertColor_BGR2GRAY_BT709_fpt(src, dst);
// ! Remove this before writing your optimizations !
}
40 changes: 22 additions & 18 deletions src/thinning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,28 @@ static void GuoHallIteration(cv::Mat& im, int iter)
{
for (int j = 1; j < im.cols-1; j++)
{
uchar p2 = im.at<uchar>(i-1, j);
uchar p3 = im.at<uchar>(i-1, j+1);
uchar p4 = im.at<uchar>(i, j+1);
uchar p5 = im.at<uchar>(i+1, j+1);
uchar p6 = im.at<uchar>(i+1, j);
uchar p7 = im.at<uchar>(i+1, j-1);
uchar p8 = im.at<uchar>(i, j-1);
uchar p9 = im.at<uchar>(i-1, j-1);

int C = (!p2 & (p3 | p4)) + (!p4 & (p5 | p6)) +
(!p6 & (p7 | p8)) + (!p8 & (p9 | p2));
int N1 = (p9 | p2) + (p3 | p4) + (p5 | p6) + (p7 | p8);
int N2 = (p2 | p3) + (p4 | p5) + (p6 | p7) + (p8 | p9);
int N = N1 < N2 ? N1 : N2;
int m = iter == 0 ? ((p6 | p7 | !p9) & p8) : ((p2 | p3 | !p5) & p4);

if (C == 1 && (N >= 2 && N <= 3) & (m == 0))
marker.at<uchar>(i,j) = 1;
uchar p1 = im.at<uchar>(i, j);
if (p1!=0)
{
uchar p2 = im.at<uchar>(i-1, j);
uchar p3 = im.at<uchar>(i-1, j+1);
uchar p4 = im.at<uchar>(i, j+1);
uchar p5 = im.at<uchar>(i+1, j+1);
uchar p6 = im.at<uchar>(i+1, j);
uchar p7 = im.at<uchar>(i+1, j-1);
uchar p8 = im.at<uchar>(i, j-1);
uchar p9 = im.at<uchar>(i-1, j-1);

int C = (!p2 & (p3 | p4)) + (!p4 & (p5 | p6)) +
(!p6 & (p7 | p8)) + (!p8 & (p9 | p2));
int N1 = (p9 | p2) + (p3 | p4) + (p5 | p6) + (p7 | p8);
int N2 = (p2 | p3) + (p4 | p5) + (p6 | p7) + (p8 | p9);
int N = N1 < N2 ? N1 : N2;
int m = iter == 0 ? ((p6 | p7 | !p9) & p8) : ((p2 | p3 | !p5) & p4);

if (C == 1 && (N >= 2 && N <= 3) & (m == 0))
marker.at<uchar>(i,j) = 1;
}
}
}

Expand Down