Skip to content

Commit

Permalink
*: separate ColumnsCommon (#9710)
Browse files Browse the repository at this point in the history
ref #6233

Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com>
  • Loading branch information
Lloyd-Pottiger authored Dec 10, 2024
1 parent a3dee48 commit b2cdb6a
Show file tree
Hide file tree
Showing 42 changed files with 361 additions and 249 deletions.
3 changes: 2 additions & 1 deletion dbms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,13 @@ check_then_add_sources_compile_flag (
TIFLASH_ENABLE_ARCH_HASWELL_SUPPORT
"${TIFLASH_COMPILER_ARCH_HASWELL_FLAG}"
src/Columns/ColumnString.cpp
src/Columns/ColumnsCommon.cpp
src/Columns/ColumnVector.cpp
src/Columns/ColumnDecimal.cpp
src/Columns/ColumnArray.cpp
src/Columns/ColumnNullable.cpp
src/Columns/ColumnFixedString.cpp
src/Columns/countBytesInFilter.cpp
src/Columns/ColumnUtil.cpp
src/DataTypes/DataTypeString.cpp
src/Interpreters/Join.cpp
src/IO/Compression/EncodingUtil.cpp
Expand Down
12 changes: 6 additions & 6 deletions dbms/src/AggregateFunctions/AggregateFunctionCount.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/VarInt.h>
#include <IO/WriteHelpers.h>
Expand Down Expand Up @@ -112,7 +112,7 @@ class AggregateFunctionCount final
}

/// May be used for optimization.
void addDelta(AggregateDataPtr __restrict place, UInt64 x) const { data(place).count += x; }
static void addDelta(AggregateDataPtr __restrict place, UInt64 x) { data(place).count += x; }

const char * getHeaderFilePath() const override { return __FILE__; }
};
Expand All @@ -123,7 +123,7 @@ class AggregateFunctionCountNotNullUnary final
: public IAggregateFunctionDataHelper<AggregateFunctionCountData, AggregateFunctionCountNotNullUnary>
{
public:
AggregateFunctionCountNotNullUnary(const DataTypePtr & argument)
explicit AggregateFunctionCountNotNullUnary(const DataTypePtr & argument)
{
if (!argument->isNullable())
throw Exception(
Expand Down Expand Up @@ -202,7 +202,7 @@ class AggregateFunctionCountNotNullVariadic final
: public IAggregateFunctionDataHelper<AggregateFunctionCountData, AggregateFunctionCountNotNullVariadic>
{
public:
AggregateFunctionCountNotNullVariadic(const DataTypes & arguments)
explicit AggregateFunctionCountNotNullVariadic(const DataTypes & arguments)
{
number_of_arguments = arguments.size();

Expand All @@ -214,7 +214,7 @@ class AggregateFunctionCountNotNullVariadic final
if (number_of_arguments > MAX_ARGS)
throw Exception(
"Maximum number of arguments for aggregate function with Nullable types is "
+ toString(size_t(MAX_ARGS)),
+ toString(static_cast<size_t>(MAX_ARGS)),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

for (size_t i = 0; i < number_of_arguments; ++i)
Expand Down Expand Up @@ -262,7 +262,7 @@ class AggregateFunctionCountNotNullVariadic final
MAX_ARGS = 8
};
size_t number_of_arguments = 0;
std::array<char, MAX_ARGS> is_nullable; /// Plain array is better than std::vector due to one indirection less.
std::array<char, MAX_ARGS> is_nullable{}; /// Plain array is better than std::vector due to one indirection less.
};

} // namespace DB
2 changes: 1 addition & 1 deletion dbms/src/AggregateFunctions/AggregateFunctionNull.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnAggregateFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
#include <AggregateFunctions/AggregateFunctionState.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/HashTable/Hash.h>
#include <Common/SipHash.h>
#include <Common/typeid_cast.h>
Expand Down
3 changes: 1 addition & 2 deletions dbms/src/Columns/ColumnArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/filterColumn.h>
#include <Common/Arena.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
Expand All @@ -30,7 +30,6 @@
#include <IO/WriteHelpers.h>
#include <string.h> // memcpy

#include <memory>

namespace DB
{
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnConst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/HashTable/Hash.h>
#include <Common/typeid_cast.h>
#include <IO/WriteHelpers.h>
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnDecimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/Arena.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnFixedString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/Arena.h>
#include <Common/HashTable/Hash.h>
#include <Common/SipHash.h>
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnFunction.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Functions/IFunction.h>
#include <Interpreters/ExpressionActions.h>
#include <fmt/format.h>
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/ColumnString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

#include <Columns/ColumnString.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/filterColumn.h>
#include <Common/HashTable/Hash.h>
#include <DataStreams/ColumnGathererStream.h>
#include <TiDB/Collation/CollatorUtils.h>
Expand Down
105 changes: 105 additions & 0 deletions dbms/src/Columns/ColumnUtil.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Columns/ColumnUtil.h>
#include <common/mem_utils_opt.h>

#if defined(__aarch64__) && defined(__ARM_NEON)
#include <arm_neon.h>
#elif defined(__SSE2__) || defined(__AVX2__) || defined(__AVX512F__) && defined(__AVX512BW__)
#include <immintrin.h>
#endif

#ifdef TIFLASH_ENABLE_AVX_SUPPORT
ASSERT_USE_AVX2_COMPILE_FLAG
#endif


namespace DB
{

UInt64 ToBits64(const UInt8 * bytes64)
{
#if defined(__AVX512F__) && defined(__AVX512BW__)
const __m512i vbytes = _mm512_loadu_si512(reinterpret_cast<const void *>(bytes64));
UInt64 res = _mm512_testn_epi8_mask(vbytes, vbytes);
#elif defined(__AVX2__)
const auto check_block = _mm256_setzero_si256();
uint64_t mask0 = mem_utils::details::get_block32_cmp_eq_mask(bytes64, check_block);
uint64_t mask1
= mem_utils::details::get_block32_cmp_eq_mask(bytes64 + mem_utils::details::BLOCK32_SIZE, check_block);
auto res = mask0 | (mask1 << mem_utils::details::BLOCK32_SIZE);
#elif defined(__SSE2__)
const auto zero16 = _mm_setzero_si128();
UInt64 res = static_cast<UInt64>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
<< 16)
| (static_cast<UInt64>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
<< 32)
| (static_cast<UInt64>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
<< 48);
#elif defined(__aarch64__) && defined(__ARM_NEON)
const uint8x16_t bitmask
= {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
const auto * src = reinterpret_cast<const unsigned char *>(bytes64);
const uint8x16_t p0 = vceqzq_u8(vld1q_u8(src));
const uint8x16_t p1 = vceqzq_u8(vld1q_u8(src + 16));
const uint8x16_t p2 = vceqzq_u8(vld1q_u8(src + 32));
const uint8x16_t p3 = vceqzq_u8(vld1q_u8(src + 48));
uint8x16_t t0 = vandq_u8(p0, bitmask);
uint8x16_t t1 = vandq_u8(p1, bitmask);
uint8x16_t t2 = vandq_u8(p2, bitmask);
uint8x16_t t3 = vandq_u8(p3, bitmask);
uint8x16_t sum0 = vpaddq_u8(t0, t1);
uint8x16_t sum1 = vpaddq_u8(t2, t3);
sum0 = vpaddq_u8(sum0, sum1);
sum0 = vpaddq_u8(sum0, sum0);
UInt64 res = vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
#else
UInt64 res = 0;
for (size_t i = 0; i < 64; ++i)
res |= static_cast<UInt64>(0 == bytes64[i]) << i;
#endif
return ~res;
}

/// If mask is a number of this kind: [0]*[1]+ function returns the length of the cluster of 1s.
/// Otherwise it returns the special value: 0xFF.
/// Note: mask must be non-zero.
UInt8 prefixToCopy(UInt64 mask)
{
static constexpr UInt64 all_match = 0xFFFFFFFFFFFFFFFFULL;
if (mask == all_match)
return 64;
/// std::countl_zero count from the most significant bit of mask, corresponding to the tail of the original filter.
/// If only the tail of the original filter is zero, we can copy the prefix directly.
/// The length of tail zero if `leading_zeros`, so the length of the prefix to copy is 64 - #(leading zeroes).
const UInt64 leading_zeroes = std::countl_zero(mask);
if (mask == ((all_match << leading_zeroes) >> leading_zeroes))
return 64 - leading_zeroes;
else
return 0xFF;
}

UInt8 suffixToCopy(UInt64 mask)
{
const auto prefix_to_copy = prefixToCopy(~mask);
return prefix_to_copy >= 64 ? prefix_to_copy : 64 - prefix_to_copy;
}

} // namespace DB
33 changes: 33 additions & 0 deletions dbms/src/Columns/ColumnUtil.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <common/types.h>

namespace DB
{

UInt64 ToBits64(const UInt8 * bytes64);

constexpr size_t FILTER_SIMD_BYTES = 64;

/// If mask is a number of this kind: [0]*[1]+ function returns the length of the cluster of 1s.
/// Otherwise it returns the special value: 0xFF.
/// Note: mask must be non-zero.
UInt8 prefixToCopy(UInt64 mask);

UInt8 suffixToCopy(UInt64 mask);

} // namespace DB
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnVector.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/Arena.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/IColumnDummy.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

#pragma once

#include <Columns/ColumnsCommon.h>
#include <Columns/IColumn.h>
#include <Columns/countBytesInFilter.h>
#include <Common/Arena.h>


Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/VirtualColumnUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/VirtualColumnUtils.h>
#include <Columns/filterColumn.h>
#include <Common/typeid_cast.h>
#include <Core/NamesAndTypes.h>
#include <Interpreters/Context.h>
Expand Down
Loading

0 comments on commit b2cdb6a

Please sign in to comment.