diff --git a/.gitignore b/.gitignore index fabf7c7..01f0b74 100644 --- a/.gitignore +++ b/.gitignore @@ -156,3 +156,5 @@ $RECYCLE.BIN/ # Mac desktop service store files .DS_Store +*.db-shm +*.db-wal diff --git a/README.md b/README.md index 9517c91..ffe0b64 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Libmorton v0.2.5 +# Libmorton v0.2.6 [![Build Status](https://travis-ci.org/Forceflow/libmorton.svg?branch=master)](https://travis-ci.org/Forceflow/libmorton) [![license](https://img.shields.io/github/license/mashape/apistatus.svg)](https://opensource.org/licenses/MIT) [![Donate](https://img.shields.io/badge/Donate-PayPal-green.svg)](https://www.paypal.me/forceflow) * Libmorton is a **C++ header-only library** with methods to efficiently encode/decode 64, 32 and 16-bit Morton codes and coordinates, in 2D and 3D. *Morton order* is also known as *Z-order* or *[the Z-order curve](https://en.wikipedia.org/wiki/Z-order_curve)*. @@ -21,13 +21,16 @@ inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uin inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z); +## Installation +No installation is required (just download the headers and include them), but I was informed libmorton is packaged for [Microsoft's VCPKG system](https://github.com/Microsoft/vcpkg) as well, if you want a more controlled environment to install C++ packages in. + ## Instruction sets In the standard case, libmorton only uses operations that are supported on pretty much any CPU you can throw it at. If you know you're compiling for a specific architecture, you might gain a speed boost in encoding/decoding operations by enabling implementations for a specific instruction set. Libmorton ships with support for: - * **BMI2 instruction set**: Intel Haswell CPU's and newer. Define `__BMI2__` before including `morton.h`. This is definitely a faster method when compared to the standard case. + * **BMI2 instruction set**: Intel: Haswell CPU's and newer. AMD: Ryzen CPU's and newer. Define `__BMI2__` before including `morton.h`. This is definitely a faster method when compared to the standard case. * **AVX512 instruction set (experimental)**: Intel Ice Lake CPU's and newer. Uses `_mm512_bitshuffle_epi64_mask`. Define `__AVX512BITALG__` before including `morton.h`. For more info on performance, see [this PR](https://github.com/Forceflow/libmorton/pull/40). - -## Installation -No installation is required (just download the headers and include them), but I was informed libmorton is packaged for [Microsoft's VCPKG system](https://github.com/Microsoft/vcpkg) as well, if you want a more controlled environment to install C++ packages in. + +When using MSVC, these options can be found under _Project Properties -> Code Generation -> Enable Enhanced Instruction set_. +When using GCC (version 9.0 or higher), you can use `-march=haswell` (or `-march=znver2`) for BMI2 support and `-march=icelake-client` for AVX512 support. ## Testing The `test` folder contains tools I use to test correctness and performance of the libmorton implementation. You can regard them as unit tests. This section is under heavy re-writing, but might contain some useful code for advanced usage. diff --git a/libmorton/include/morton.h b/libmorton/include/morton.h index c6c0c97..8bdff74 100644 --- a/libmorton/include/morton.h +++ b/libmorton/include/morton.h @@ -9,11 +9,9 @@ #include "morton2D.h" #include "morton3D.h" -#if defined(__BMI2__) || defined(__AVX2__) +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) #include "morton_BMI.h" -#endif - -#if defined(__AVX512BITALG__) +#elif defined(__AVX512BITALG__) #include "morton_AVX512BITALG.h" #endif @@ -22,31 +20,31 @@ namespace libmorton { //----------------------------------------------------------------------------------------------- // ENCODING -#if defined(__AVX512BITALG__) +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { - return m2D_e_BITALG(x, y); + return m2D_e_BMI(x, y); } inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y) { - return m2D_e_BITALG(x, y); + return m2D_e_BMI(x, y); } inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z) { - return m3D_e_BITALG(x, y, z); + return m3D_e_BMI(x, y, z); } inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) { - return m3D_e_BITALG(x, y, z); + return m3D_e_BMI(x, y, z); } -#elif defined(__BMI2__) || defined(__AVX2__) +#elif defined(__AVX512BITALG__) inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { - return m2D_e_BMI(x, y); + return m2D_e_BITALG(x, y); } inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y) { - return m2D_e_BMI(x, y); + return m2D_e_BITALG(x, y); } inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z) { - return m3D_e_BMI(x, y, z); + return m3D_e_BITALG(x, y, z); } inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) { - return m3D_e_BMI(x, y, z); + return m3D_e_BITALG(x, y, z); } #else inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { @@ -64,32 +62,33 @@ namespace libmorton { #endif // DECODING -#if defined(__AVX512BITALG__) + +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { - m2D_d_BITALG(morton, x, y); + m2D_d_BMI(morton, x, y); } inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y) { - m2D_d_BITALG(morton, x, y); + m2D_d_BMI(morton, x, y); } inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z) { - m3D_d_BITALG(morton, x, y, z); + m3D_d_BMI(morton, x, y, z); } inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) { - m3D_d_BITALG(morton, x, y, z); + m3D_d_BMI(morton, x, y, z); } -#elif defined(__BMI2__) || defined(__AVX2__) +#elif defined(__AVX512BITALG__) inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { - m2D_d_BMI(morton, x, y); + m2D_d_BITALG(morton, x, y); } inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y) { - m2D_d_BMI(morton, x, y); + m2D_d_BITALG(morton, x, y); } inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z) { - m3D_d_BMI(morton, x, y, z); + m3D_d_BITALG(morton, x, y, z); } inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) { - m3D_d_BMI(morton, x, y, z); - } + m3D_d_BITALG(morton, x, y, z); +} #else inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { m2D_d_sLUT(morton, x, y); diff --git a/libmorton/include/morton_AVX512BITALG.h b/libmorton/include/morton_AVX512BITALG.h index d9124f3..d33b7df 100644 --- a/libmorton/include/morton_AVX512BITALG.h +++ b/libmorton/include/morton_AVX512BITALG.h @@ -1,4 +1,5 @@ #pragma once +#if defined(__AVX512BITALG__) #include #include @@ -218,4 +219,5 @@ namespace libmorton { inline void m3D_d_BITALG(const morton m, coord& x, coord& y, coord& z) { bitalg_detail::bitunzip3D(m, x, y, z); } -} \ No newline at end of file +} +#endif \ No newline at end of file diff --git a/libmorton/include/morton_BMI.h b/libmorton/include/morton_BMI.h index 6b0222f..3c06227 100644 --- a/libmorton/include/morton_BMI.h +++ b/libmorton/include/morton_BMI.h @@ -1,5 +1,5 @@ #pragma once -#if defined(__BMI2__) || defined(__AVX2__) +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) #include #include diff --git a/test/libmorton_test.cpp b/test/libmorton_test.cpp index f6414c6..0533751 100644 --- a/test/libmorton_test.cpp +++ b/test/libmorton_test.cpp @@ -145,7 +145,7 @@ void registerFunctions() { f3D_32_decode.push_back(decode_3D_32_wrapper("LUT Shifted ET", &m3D_d_sLUT_ET)); // Register 3D BMI intrinsics if available -#if defined(__BMI2__) +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) f3D_64_encode.push_back(encode_3D_64_wrapper("BMI2 instruction set", &m3D_e_BMI)); f3D_32_encode.push_back(encode_3D_32_wrapper("BMI2 instruction set", &m3D_e_BMI)); f3D_64_decode.push_back(decode_3D_64_wrapper("BMI2 Instruction set", &m3D_d_BMI)); diff --git a/test/libmorton_test.h b/test/libmorton_test.h index 0fcc0ab..d96e1bc 100644 --- a/test/libmorton_test.h +++ b/test/libmorton_test.h @@ -22,6 +22,14 @@ #include "morton_LUT_generators.h" #include "../libmorton/include/morton2D.h" #include "../libmorton/include/morton3D.h" +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) +#include "morton_BMI.h" +#endif +#if defined(__AVX512BITALG__) +#include "morton_AVX512BITALG.h" +#endif + +// Load main morton include file (should be unnecessary) #include "../libmorton/include/morton.h" using std::string; diff --git a/test/makefile b/test/makefile index e05db36..2cc4a59 100644 --- a/test/makefile +++ b/test/makefile @@ -14,3 +14,6 @@ avx512: clean: rm -f libmorton_test libmorton_test_bmi2 libmorton_test_avx512 + +zen2: + $(CXX) $(CFLAGS) -march=znver2 libmorton_test.cpp -o libmorton_test_zen2