From 5754a58b8d8a26396d91dca10080b3ab2ad09508 Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Tue, 10 Nov 2020 15:08:12 +0100 Subject: [PATCH 01/10] Ignore VS db files --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index fabf7c7..01f0b74 100644 --- a/.gitignore +++ b/.gitignore @@ -156,3 +156,5 @@ $RECYCLE.BIN/ # Mac desktop service store files .DS_Store +*.db-shm +*.db-wal From cccf9d7414699632523d62a8ae6cbc081a34aa60 Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Tue, 10 Nov 2020 15:08:43 +0100 Subject: [PATCH 02/10] Possible fix for #54 --- libmorton/include/morton.h | 2 +- libmorton/include/morton_BMI.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libmorton/include/morton.h b/libmorton/include/morton.h index c6c0c97..5c04797 100644 --- a/libmorton/include/morton.h +++ b/libmorton/include/morton.h @@ -9,7 +9,7 @@ #include "morton2D.h" #include "morton3D.h" -#if defined(__BMI2__) || defined(__AVX2__) +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) #include "morton_BMI.h" #endif diff --git a/libmorton/include/morton_BMI.h b/libmorton/include/morton_BMI.h index 6b0222f..3c06227 100644 --- a/libmorton/include/morton_BMI.h +++ b/libmorton/include/morton_BMI.h @@ -1,5 +1,5 @@ #pragma once -#if defined(__BMI2__) || defined(__AVX2__) +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) #include #include From d7244f8bfa7865d4f1e47bb2a9ff2f81df91827b Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Tue, 10 Nov 2020 23:52:47 +0100 Subject: [PATCH 03/10] Additional #ifdef fixes --- libmorton/include/morton.h | 10 ++++------ test/libmorton_test.cpp | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/libmorton/include/morton.h b/libmorton/include/morton.h index 5c04797..03796dc 100644 --- a/libmorton/include/morton.h +++ b/libmorton/include/morton.h @@ -9,12 +9,10 @@ #include "morton2D.h" #include "morton3D.h" -#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) -#include "morton_BMI.h" -#endif - #if defined(__AVX512BITALG__) #include "morton_AVX512BITALG.h" +#elif defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) +#include "morton_BMI.h" #endif namespace libmorton { @@ -35,7 +33,7 @@ namespace libmorton { inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) { return m3D_e_BITALG(x, y, z); } -#elif defined(__BMI2__) || defined(__AVX2__) +#elif defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { return m2D_e_BMI(x, y); } @@ -77,7 +75,7 @@ namespace libmorton { inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) { m3D_d_BITALG(morton, x, y, z); } -#elif defined(__BMI2__) || defined(__AVX2__) +#elif defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { m2D_d_BMI(morton, x, y); } diff --git a/test/libmorton_test.cpp b/test/libmorton_test.cpp index f6414c6..0533751 100644 --- a/test/libmorton_test.cpp +++ b/test/libmorton_test.cpp @@ -145,7 +145,7 @@ void registerFunctions() { f3D_32_decode.push_back(decode_3D_32_wrapper("LUT Shifted ET", &m3D_d_sLUT_ET)); // Register 3D BMI intrinsics if available -#if defined(__BMI2__) +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) f3D_64_encode.push_back(encode_3D_64_wrapper("BMI2 instruction set", &m3D_e_BMI)); f3D_32_encode.push_back(encode_3D_32_wrapper("BMI2 instruction set", &m3D_e_BMI)); f3D_64_decode.push_back(decode_3D_64_wrapper("BMI2 Instruction set", &m3D_d_BMI)); From 858db290c06ef4d59a425cbc6ad548bd425d503d Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Tue, 10 Nov 2020 23:58:10 +0100 Subject: [PATCH 04/10] Added include guards to AVX512 header --- libmorton/include/morton_AVX512BITALG.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libmorton/include/morton_AVX512BITALG.h b/libmorton/include/morton_AVX512BITALG.h index d9124f3..d33b7df 100644 --- a/libmorton/include/morton_AVX512BITALG.h +++ b/libmorton/include/morton_AVX512BITALG.h @@ -1,4 +1,5 @@ #pragma once +#if defined(__AVX512BITALG__) #include #include @@ -218,4 +219,5 @@ namespace libmorton { inline void m3D_d_BITALG(const morton m, coord& x, coord& y, coord& z) { bitalg_detail::bitunzip3D(m, x, y, z); } -} \ No newline at end of file +} +#endif \ No newline at end of file From 4ee708a07f4d04a0fe649ac48ef4263c37daf6fc Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Tue, 10 Nov 2020 23:58:28 +0100 Subject: [PATCH 05/10] Switched preference for intrinsices (BMI2 preferred over AVX512) --- libmorton/include/morton.h | 49 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/libmorton/include/morton.h b/libmorton/include/morton.h index 03796dc..8bdff74 100644 --- a/libmorton/include/morton.h +++ b/libmorton/include/morton.h @@ -9,10 +9,10 @@ #include "morton2D.h" #include "morton3D.h" -#if defined(__AVX512BITALG__) -#include "morton_AVX512BITALG.h" -#elif defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) #include "morton_BMI.h" +#elif defined(__AVX512BITALG__) +#include "morton_AVX512BITALG.h" #endif namespace libmorton { @@ -20,31 +20,31 @@ namespace libmorton { //----------------------------------------------------------------------------------------------- // ENCODING -#if defined(__AVX512BITALG__) +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { - return m2D_e_BITALG(x, y); + return m2D_e_BMI(x, y); } inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y) { - return m2D_e_BITALG(x, y); + return m2D_e_BMI(x, y); } inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z) { - return m3D_e_BITALG(x, y, z); + return m3D_e_BMI(x, y, z); } inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) { - return m3D_e_BITALG(x, y, z); + return m3D_e_BMI(x, y, z); } -#elif defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) +#elif defined(__AVX512BITALG__) inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { - return m2D_e_BMI(x, y); + return m2D_e_BITALG(x, y); } inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y) { - return m2D_e_BMI(x, y); + return m2D_e_BITALG(x, y); } inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z) { - return m3D_e_BMI(x, y, z); + return m3D_e_BITALG(x, y, z); } inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) { - return m3D_e_BMI(x, y, z); + return m3D_e_BITALG(x, y, z); } #else inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) { @@ -62,32 +62,33 @@ namespace libmorton { #endif // DECODING -#if defined(__AVX512BITALG__) + +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { - m2D_d_BITALG(morton, x, y); + m2D_d_BMI(morton, x, y); } inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y) { - m2D_d_BITALG(morton, x, y); + m2D_d_BMI(morton, x, y); } inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z) { - m3D_d_BITALG(morton, x, y, z); + m3D_d_BMI(morton, x, y, z); } inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) { - m3D_d_BITALG(morton, x, y, z); + m3D_d_BMI(morton, x, y, z); } -#elif defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) +#elif defined(__AVX512BITALG__) inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { - m2D_d_BMI(morton, x, y); + m2D_d_BITALG(morton, x, y); } inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y) { - m2D_d_BMI(morton, x, y); + m2D_d_BITALG(morton, x, y); } inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z) { - m3D_d_BMI(morton, x, y, z); + m3D_d_BITALG(morton, x, y, z); } inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) { - m3D_d_BMI(morton, x, y, z); - } + m3D_d_BITALG(morton, x, y, z); +} #else inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) { m2D_d_sLUT(morton, x, y); From aeae472428b298715a871695afc7c93f76120e24 Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Wed, 11 Nov 2020 12:37:50 +0100 Subject: [PATCH 06/10] Test suite compilation fix --- test/libmorton_test.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/libmorton_test.h b/test/libmorton_test.h index 0fcc0ab..140283c 100644 --- a/test/libmorton_test.h +++ b/test/libmorton_test.h @@ -22,6 +22,13 @@ #include "morton_LUT_generators.h" #include "../libmorton/include/morton2D.h" #include "../libmorton/include/morton3D.h" +#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) +#include "morton_BMI.h" +#elif defined(__AVX512BITALG__) +#include "morton_AVX512BITALG.h" +#endif + +// Load main morton include file (should be unnecessary) #include "../libmorton/include/morton.h" using std::string; From 3352d4e58147448d39fb6208e6baca633cad4a3b Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Wed, 11 Nov 2020 13:12:40 +0100 Subject: [PATCH 07/10] Load AVX512 even if BMI2 is detected, for test suite --- test/libmorton_test.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/libmorton_test.h b/test/libmorton_test.h index 140283c..d96e1bc 100644 --- a/test/libmorton_test.h +++ b/test/libmorton_test.h @@ -24,7 +24,8 @@ #include "../libmorton/include/morton3D.h" #if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER)) #include "morton_BMI.h" -#elif defined(__AVX512BITALG__) +#endif +#if defined(__AVX512BITALG__) #include "morton_AVX512BITALG.h" #endif From 112ac1dfa8b2a4832b54808ad82b8f9a605934a3 Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Wed, 11 Nov 2020 16:03:43 +0100 Subject: [PATCH 08/10] Version bump --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9517c91..d746233 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Libmorton v0.2.5 +# Libmorton v0.2.6 [![Build Status](https://travis-ci.org/Forceflow/libmorton.svg?branch=master)](https://travis-ci.org/Forceflow/libmorton) [![license](https://img.shields.io/github/license/mashape/apistatus.svg)](https://opensource.org/licenses/MIT) [![Donate](https://img.shields.io/badge/Donate-PayPal-green.svg)](https://www.paypal.me/forceflow) * Libmorton is a **C++ header-only library** with methods to efficiently encode/decode 64, 32 and 16-bit Morton codes and coordinates, in 2D and 3D. *Morton order* is also known as *Z-order* or *[the Z-order curve](https://en.wikipedia.org/wiki/Z-order_curve)*. From 90d00a8912c1a62e8eaee603eafc9281e00827f0 Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Wed, 11 Nov 2020 16:11:25 +0100 Subject: [PATCH 09/10] Update README.md --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d746233..ffe0b64 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,16 @@ inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uin inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z); +## Installation +No installation is required (just download the headers and include them), but I was informed libmorton is packaged for [Microsoft's VCPKG system](https://github.com/Microsoft/vcpkg) as well, if you want a more controlled environment to install C++ packages in. + ## Instruction sets In the standard case, libmorton only uses operations that are supported on pretty much any CPU you can throw it at. If you know you're compiling for a specific architecture, you might gain a speed boost in encoding/decoding operations by enabling implementations for a specific instruction set. Libmorton ships with support for: - * **BMI2 instruction set**: Intel Haswell CPU's and newer. Define `__BMI2__` before including `morton.h`. This is definitely a faster method when compared to the standard case. + * **BMI2 instruction set**: Intel: Haswell CPU's and newer. AMD: Ryzen CPU's and newer. Define `__BMI2__` before including `morton.h`. This is definitely a faster method when compared to the standard case. * **AVX512 instruction set (experimental)**: Intel Ice Lake CPU's and newer. Uses `_mm512_bitshuffle_epi64_mask`. Define `__AVX512BITALG__` before including `morton.h`. For more info on performance, see [this PR](https://github.com/Forceflow/libmorton/pull/40). - -## Installation -No installation is required (just download the headers and include them), but I was informed libmorton is packaged for [Microsoft's VCPKG system](https://github.com/Microsoft/vcpkg) as well, if you want a more controlled environment to install C++ packages in. + +When using MSVC, these options can be found under _Project Properties -> Code Generation -> Enable Enhanced Instruction set_. +When using GCC (version 9.0 or higher), you can use `-march=haswell` (or `-march=znver2`) for BMI2 support and `-march=icelake-client` for AVX512 support. ## Testing The `test` folder contains tools I use to test correctness and performance of the libmorton implementation. You can regard them as unit tests. This section is under heavy re-writing, but might contain some useful code for advanced usage. From e152c13b146fd26501637dc7654e927e46058a16 Mon Sep 17 00:00:00 2001 From: Jeroen Baert <3607063+Forceflow@users.noreply.github.com> Date: Wed, 11 Nov 2020 16:13:13 +0100 Subject: [PATCH 10/10] Added Ryzen build to makefile --- test/makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/makefile b/test/makefile index e05db36..2cc4a59 100644 --- a/test/makefile +++ b/test/makefile @@ -14,3 +14,6 @@ avx512: clean: rm -f libmorton_test libmorton_test_bmi2 libmorton_test_avx512 + +zen2: + $(CXX) $(CFLAGS) -march=znver2 libmorton_test.cpp -o libmorton_test_zen2