diff --git a/Cargo.toml b/Cargo.toml
index 9d0809e..a607016 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,8 +13,7 @@ categories = ["game-engines", "multimedia::images"]
 edition = "2018"
 
 [dependencies]
-#simdeez = "1.0.8"
-simdeez = {path = "../simdeez"}
+simdeez = "2.0.0-dev3"
 
 [dev-dependencies]
 criterion = "0.4.0"
diff --git a/bisect.log b/bisect.log
deleted file mode 100644
index f13df8f..0000000
--- a/bisect.log
+++ /dev/null
@@ -1,15 +0,0 @@
-git bisect start
-# good: [3a4f3e6f79608616b6ee186dc665b601d015dc1e] Merge pull request #36 from Linus789/master
-git bisect good 3a4f3e6f79608616b6ee186dc665b601d015dc1e
-# bad: [56d69326cd8e194089ffcb95569860ed5e593257] chore: remove dead code
-git bisect bad 56d69326cd8e194089ffcb95569860ed5e593257
-# good: [69cc7c11a598d014092c1f8c636762e8495b130e] patch: example; ridge 'complete'
-git bisect good 69cc7c11a598d014092c1f8c636762e8495b130e
-# good: [92ecff15f174d5ed624914eb64ac91d2974a47b4] patch: example; extract noise_2d_to_frames
-git bisect good 92ecff15f174d5ed624914eb64ac91d2974a47b4
-# bad: [9a8153a5687233e2c0f622ce0509c776a4e9f151] patch: move Cell & Cell2 to its own files
-git bisect bad 9a8153a5687233e2c0f622ce0509c776a4e9f151
-# bad: [9a8153a5687233e2c0f622ce0509c776a4e9f151] patch: move Cell & Cell2 to its own files
-git bisect bad 9a8153a5687233e2c0f622ce0509c776a4e9f151
-# bad: [9a8153a5687233e2c0f622ce0509c776a4e9f151] patch: move Cell & Cell2 to its own files
-git bisect bad 9a8153a5687233e2c0f622ce0509c776a4e9f151
diff --git a/generate_intrinsics.py b/generate_intrinsics.py
deleted file mode 100755
index 122f11f..0000000
--- a/generate_intrinsics.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/env python3
-
-from typing import List
-import os
-
-noise_types = {
-    'cellular': ['2', '3'],
-    'cellular2': ['2', '3'],
-    'ridge': ['1', '2', '3', '4'],
-    'fbm': ['1', '2', '3', '4'],
-    'turbulence': ['1', '2', '3', '4'],
-    'gradient': ['1', '2', '3', '4']
-    }
-float_types = [
-    '32',
-    '64'
-    ]
-intrinsics = [
-    'avx2',
-    'scalar',
-    'sse2',
-    'sse41'
-    ]
-
-def generate_intrinsic_tests()-> List[str]:
-    codes = [
-"""
-use core::arch::x86_64::__m256;
-use simdnoise::intrinsics::{avx2, scalar, sse2, sse41};
-use simdnoise::{NoiseType, TurbulenceSettings, RidgeSettings, FbmSettings, CellularSettings, Cellular2Settings, GradientSettings, SimplexSettings, Settings, NoiseDimensions, CellDistanceFunction, CellReturnType, Cell2ReturnType};
-
-mod helpers;
-use helpers::{BIN_PATH, read_from_file_f32, save_to_file_f32, read_from_file_f64, save_to_file_f64};
-"""
-    ]
-    dim_lookup = {
-        '1': 'width: 64,',
-        '2': 'width: 64, height: 32,',
-        '3': 'width: 64, height: 32, depth: 16,',
-        '4': 'width: 64, height: 32, depth: 16, time: 8,',
-    }
-    cell_options = {
-        'cellular': ( "", ["CellValue", "Distance"]),
-        'cellular2': ("2", ["Distance2", "Distance2Add", "Distance2Sub", "Distance2Mul", "Distance2Div"]),
-    }
-
-    for noise_type, dimensions in noise_types.items():
-        options = {"normal": ""}
-        if noise_type in ['fbm', 'turbulence', 'ridge']:
-            options = {"normal": f"""
-        .with_lacunarity(0.5)
-        .with_gain(2.0)
-        .with_octaves(5)
-        """}
-        elif noise_type in cell_options:
-            (count, dist_ret) = cell_options[noise_type]
-            for dist in ["Euclidean", "Manhattan", "Natural"]:
-                for ret in dist_ret:
-                    title = f"{dist.lower()}_{ret.lower()}"
-                    option = f"""
-                        .with_distance_function(CellDistanceFunction::{dist})
-                        .with_return_type(Cell{count}ReturnType::{ret})
-                        """
-                    options[title] = option
-        for (postfix, option) in options.items():
-            for dimension in dimensions:
-                dims = dim_lookup[dimension]
-                for intrinsic in intrinsics:
-                    for float_type in float_types:
-                        if float_type == '64' and noise_type in ['cellular', 'cellular2']:
-                            # we skip these due to overflow errors
-                            continue
-                        variant = ["", f"_{float_type}"][float_type!="32"]
-                        fn_name = f"intrinsic_{noise_type}_{dimension}_{intrinsic}_{float_type}_{postfix}"
-                        enabled = ""
-                        if intrinsic == "sse41":
-                            enabled = "#[target_feature(enable = \"sse4.1\")]"
-                        elif intrinsic != "scalar":
-                            enabled = f"#[target_feature(enable = \"{intrinsic}\")]"
-                        block = f"""
-{enabled}
-unsafe fn do_{fn_name}() -> Vec<f{float_type}>{{
-    let dims = NoiseDimensions {{
-        {dims}
-        ..NoiseDimensions::default({dimension})
-    }};
-
-    let noise_type = {noise_type.capitalize()}Settings::default(dims)
-        .with_seed(1337)
-        {option}
-        .wrap();
-    let (noise, _min, _max) = {intrinsic}::get_{dimension}d_noise{variant}(&noise_type);
-    noise
-}}
-
-#[test]
-fn test_{fn_name} () {{
-    let file_name = format!(
-        "{{}}/{{}}_{{}}_{{}}_{{}}_{{}}_{{}}.bin",
-        BIN_PATH, "intrinsics", "{noise_type}", "{float_type}", "{dimension}d", "{intrinsic}", "{postfix}"
-    );
-    unsafe {{
-        let noise = do_{fn_name}();
-        //save_to_file_f{float_type}(&file_name, noise.as_slice()).unwrap();
-        let expected = read_from_file_f{float_type}(&file_name).unwrap();
-        assert_eq!(expected, noise);
-    }}
-}}
-"""
-                        codes.append(block)
-    return codes
-
-def main() :
-    codes = generate_intrinsic_tests()
-    file_name = "tests/intrinsics.rs"
-    with open(file_name, "w") as file_h:
-        source = "\n".join(codes)
-        file_h.write(source)
-    os.system(f"rustfmt {file_name}");
-
-if __name__ == '__main__':
-    main()
diff --git a/src/intrinsics/avx2.rs b/src/intrinsics/avx2.rs
index efb476d..a2ff66d 100644
--- a/src/intrinsics/avx2.rs
+++ b/src/intrinsics/avx2.rs
@@ -18,10 +18,12 @@ use crate::noise::simplex_32;
 use crate::noise::simplex_64;
 use crate::noise::turbulence_32;
 use crate::noise::turbulence_64;
+use crate::noise_helpers_32;
+use crate::noise_helpers_64;
 use crate::shared::scale_noise;
 use crate::{CellDistanceFunction, CellReturnType, DimensionalBeing, NoiseType};
 
-use simdeez::avx2::{Avx2, F32x8, F64x4};
+use simdeez::{SimdTransmuteF32, SimdTransmuteF64};
 
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
@@ -30,681 +32,344 @@ use std::arch::x86_64::*;
 
 use std::f32;
 
-/// Get a single value of 2d cellular/voroni noise
-#[target_feature(enable = "avx2")]
-pub unsafe fn cellular_2d(
-    x: __m256,
-    y: __m256,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m256,
-    seed: i32,
-) -> __m256 {
-    cell_32::cellular_2d::<Avx2>(
-        F32x8(x),
-        F32x8(y),
-        distance_function,
-        return_type,
-        F32x8(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d cellular/voroni noise
-#[target_feature(enable = "avx2")]
-pub unsafe fn cellular_3d(
-    x: __m256,
-    y: __m256,
-    z: __m256,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m256,
-    seed: i32,
-) -> __m256 {
-    cell_32::cellular_3d::<Avx2>(
-        F32x8(x),
-        F32x8(y),
-        F32x8(z),
-        distance_function,
-        return_type,
-        F32x8(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 2d cellular/voroni noise
-#[target_feature(enable = "avx2")]
-pub unsafe fn cellular_2d_f64(
-    x: __m256d,
-    y: __m256d,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m256d,
-    seed: i64,
-) -> __m256d {
-    cell_64::cellular_2d::<Avx2>(
-        F64x4(x),
-        F64x4(y),
-        distance_function,
-        return_type,
-        F64x4(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d cellular/voroni noise
-#[target_feature(enable = "avx2")]
-pub unsafe fn cellular_3d_f64(
-    x: __m256d,
-    y: __m256d,
-    z: __m256d,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m256d,
-    seed: i64,
-) -> __m256d {
-    cell_64::cellular_3d::<Avx2>(
-        F64x4(x),
-        F64x4(y),
-        F64x4(z),
-        distance_function,
-        return_type,
-        F64x4(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 1d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "avx2")]
-pub unsafe fn simplex_1d(x: __m256, seed: i32) -> __m256 {
-    simplex_32::simplex_1d::<Avx2>(F32x8(x), seed).0
-}
-
-/// Get a single value of 1d fractal brownian motion.
-#[target_feature(enable = "avx2")]
-pub unsafe fn fbm_1d(
-    x: __m256,
-    lacunarity: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    fbm_32::fbm_1d::<Avx2>(F32x8(x), F32x8(lacunarity), F32x8(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "avx2")]
-pub unsafe fn ridge_1d(
-    x: __m256,
-    lacunarity: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    ridge_32::ridge_1d::<Avx2>(F32x8(x), F32x8(lacunarity), F32x8(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "avx2")]
-pub unsafe fn turbulence_1d(
-    x: __m256,
-    lacunarity: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    turbulence_32::turbulence_1d::<Avx2>(F32x8(x), F32x8(lacunarity), F32x8(gain), octaves, seed).0
-}
-
-/// Get a single value of 1d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "avx2")]
-pub unsafe fn simplex_1d_f64(x: __m256d, seed: i64) -> __m256d {
-    simplex_64::simplex_1d::<Avx2>(F64x4(x), seed).0
-}
-
-/// Get a single value of 1d fractal brownian motion.
-#[target_feature(enable = "avx2")]
-pub unsafe fn fbm_1d_f64(
-    x: __m256d,
-    lacunarity: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    fbm_64::fbm_1d::<Avx2>(F64x4(x), F64x4(lacunarity), F64x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "avx2")]
-pub unsafe fn ridge_1d_f64(
-    x: __m256d,
-    lacunarity: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    ridge_64::ridge_1d::<Avx2>(F64x4(x), F64x4(lacunarity), F64x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "avx2")]
-pub unsafe fn turbulence_1d_f64(
-    x: __m256d,
-    lacunarity: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    turbulence_64::turbulence_1d::<Avx2>(F64x4(x), F64x4(lacunarity), F64x4(gain), octaves, seed).0
-}
-
-/// Gets a width sized block of 1d noise, unscaled.
-/// `start_x` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "avx2")]
-pub unsafe fn get_1d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_1d_noise::<Avx2>(noise_type)
-}
-pub unsafe fn get_1d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_1d_noise_f64::<Avx2>(noise_type)
-}
-
-/// Gets a width sized block of scaled 2d noise
-/// `start_x` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "avx2")]
-pub unsafe fn get_1d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_1d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Avx2>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 2d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "avx2")]
-pub unsafe fn simplex_2d(x: __m256, y: __m256, seed: i32) -> __m256 {
-    simplex_32::simplex_2d::<Avx2>(F32x8(x), F32x8(y), seed).0
-}
-
-/// Get a single value of 2d fractal brownian motion.
-#[target_feature(enable = "avx2")]
-pub unsafe fn fbm_2d(
-    x: __m256,
-    y: __m256,
-    lac: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    fbm_32::fbm_2d::<Avx2>(F32x8(x), F32x8(y), F32x8(lac), F32x8(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "avx2")]
-pub unsafe fn ridge_2d(
-    x: __m256,
-    y: __m256,
-    lac: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    ridge_32::ridge_2d::<Avx2>(F32x8(x), F32x8(y), F32x8(lac), F32x8(gain), octaves, seed).0
-}
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "avx2")]
-pub unsafe fn turbulence_2d(
-    x: __m256,
-    y: __m256,
-    lac: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    turbulence_32::turbulence_2d::<Avx2>(F32x8(x), F32x8(y), F32x8(lac), F32x8(gain), octaves, seed)
-        .0
-}
-
-/// Get a single value of 2d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "avx2")]
-pub unsafe fn simplex_2d_f64(x: __m256d, y: __m256d, seed: i64) -> __m256d {
-    simplex_64::simplex_2d::<Avx2>(F64x4(x), F64x4(y), seed).0
-}
-
-/// Get a single value of 2d fractal brownian motion.
-#[target_feature(enable = "avx2")]
-pub unsafe fn fbm_2d_f64(
-    x: __m256d,
-    y: __m256d,
-    lac: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    fbm_64::fbm_2d::<Avx2>(F64x4(x), F64x4(y), F64x4(lac), F64x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "avx2")]
-pub unsafe fn ridge_2d_f64(
-    x: __m256d,
-    y: __m256d,
-    lac: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    ridge_64::ridge_2d::<Avx2>(F64x4(x), F64x4(y), F64x4(lac), F64x4(gain), octaves, seed).0
-}
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "avx2")]
-pub unsafe fn turbulence_2d_f64(
-    x: __m256d,
-    y: __m256d,
-    lac: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    turbulence_64::turbulence_2d::<Avx2>(F64x4(x), F64x4(y), F64x4(lac), F64x4(gain), octaves, seed)
-        .0
-}
-/// Gets a width X height sized block of 2d noise, unscaled.
-/// `start_x` and `start_y` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "avx2")]
-pub unsafe fn get_2d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_2d_noise::<Avx2>(noise_type)
-}
-pub unsafe fn get_2d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_2d_noise_f64::<Avx2>(noise_type)
-}
-
-/// Gets a width X height sized block of scaled 2d noise
-/// `start_x` and `start_y` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "avx2")]
-pub unsafe fn get_2d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_2d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Avx2>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 3d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "avx2")]
-pub unsafe fn simplex_3d(x: __m256, y: __m256, z: __m256, seed: i32) -> __m256 {
-    simplex_32::simplex_3d::<Avx2>(F32x8(x), F32x8(y), F32x8(z), seed).0
-}
-
-/// Get a single value of 3d fractal brownian motion.
-#[target_feature(enable = "avx2")]
-pub unsafe fn fbm_3d(
-    x: __m256,
-    y: __m256,
-    z: __m256,
-    lac: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    fbm_32::fbm_3d::<Avx2>(
-        F32x8(x),
-        F32x8(y),
-        F32x8(z),
-        F32x8(lac),
-        F32x8(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d ridge noise.
-#[target_feature(enable = "avx2")]
-pub unsafe fn ridge_3d(
-    x: __m256,
-    y: __m256,
-    z: __m256,
-    lac: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    ridge_32::ridge_3d::<Avx2>(
-        F32x8(x),
-        F32x8(y),
-        F32x8(z),
-        F32x8(lac),
-        F32x8(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d turbulence.
-#[target_feature(enable = "avx2")]
-pub unsafe fn turbulence_3d(
-    x: __m256,
-    y: __m256,
-    z: __m256,
-    lac: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    turbulence_32::turbulence_3d::<Avx2>(
-        F32x8(x),
-        F32x8(y),
-        F32x8(z),
-        F32x8(lac),
-        F32x8(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "avx2")]
-pub unsafe fn simplex_3d_f64(x: __m256d, y: __m256d, z: __m256d, seed: i64) -> __m256d {
-    simplex_64::simplex_3d::<Avx2>(F64x4(x), F64x4(y), F64x4(z), seed).0
-}
-
-/// Get a single value of 3d fractal brownian motion.
-#[target_feature(enable = "avx2")]
-pub unsafe fn fbm_3d_f64(
-    x: __m256d,
-    y: __m256d,
-    z: __m256d,
-    lac: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    fbm_64::fbm_3d::<Avx2>(
-        F64x4(x),
-        F64x4(y),
-        F64x4(z),
-        F64x4(lac),
-        F64x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d ridge noise.
-#[target_feature(enable = "avx2")]
-pub unsafe fn ridge_3d_f64(
-    x: __m256d,
-    y: __m256d,
-    z: __m256d,
-    lac: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    ridge_64::ridge_3d::<Avx2>(
-        F64x4(x),
-        F64x4(y),
-        F64x4(z),
-        F64x4(lac),
-        F64x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d turbulence.
-#[target_feature(enable = "avx2")]
-pub unsafe fn turbulence_3d_f64(
-    x: __m256d,
-    y: __m256d,
-    z: __m256d,
-    lac: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    turbulence_64::turbulence_3d::<Avx2>(
-        F64x4(x),
-        F64x4(y),
-        F64x4(z),
-        F64x4(lac),
-        F64x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height X depth sized block of 3d noise, unscaled,
-/// `start_x`,`start_y` and `start_z` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "avx2")]
-pub unsafe fn get_3d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_3d_noise::<Avx2>(noise_type)
-}
-pub unsafe fn get_3d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_3d_noise_f64::<Avx2>(noise_type)
-}
-
-/// Gets a width X height X depth sized block of scaled 3d noise
-/// `start_x`, `start_y` and `start_z` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "avx2")]
-pub unsafe fn get_3d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_3d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Avx2>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 4d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "avx2")]
-pub unsafe fn simplex_4d(x: __m256, y: __m256, z: __m256, w: __m256, seed: i32) -> __m256 {
-    simplex_32::simplex_4d::<Avx2>(F32x8(x), F32x8(y), F32x8(z), F32x8(w), seed).0
-}
-/// Get a single value of 4d fractal brownian motion.
-#[target_feature(enable = "avx2")]
-pub unsafe fn fbm_4d(
-    x: __m256,
-    y: __m256,
-    z: __m256,
-    w: __m256,
-    lac: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    fbm_32::fbm_4d::<Avx2>(
-        F32x8(x),
-        F32x8(y),
-        F32x8(z),
-        F32x8(w),
-        F32x8(lac),
-        F32x8(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d ridge noise.
-#[target_feature(enable = "avx2")]
-pub unsafe fn ridge_4d(
-    x: __m256,
-    y: __m256,
-    z: __m256,
-    w: __m256,
-    lac: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    ridge_32::ridge_4d::<Avx2>(
-        F32x8(x),
-        F32x8(y),
-        F32x8(z),
-        F32x8(w),
-        F32x8(lac),
-        F32x8(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d turbulence.
-#[target_feature(enable = "avx2")]
-pub unsafe fn turbulence_4d(
-    x: __m256,
-    y: __m256,
-    z: __m256,
-    w: __m256,
-    lac: __m256,
-    gain: __m256,
-    octaves: u8,
-    seed: i32,
-) -> __m256 {
-    turbulence_32::turbulence_4d::<Avx2>(
-        F32x8(x),
-        F32x8(y),
-        F32x8(z),
-        F32x8(w),
-        F32x8(lac),
-        F32x8(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "avx2")]
-pub unsafe fn simplex_4d_f64(x: __m256d, y: __m256d, z: __m256d, w: __m256d, seed: i64) -> __m256d {
-    simplex_64::simplex_4d::<Avx2>(F64x4(x), F64x4(y), F64x4(z), F64x4(w), seed).0
-}
-/// Get a single value of 4d fractal brownian motion.
-#[target_feature(enable = "avx2")]
-pub unsafe fn fbm_4d_f64(
-    x: __m256d,
-    y: __m256d,
-    z: __m256d,
-    w: __m256d,
-    lac: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    fbm_64::fbm_4d::<Avx2>(
-        F64x4(x),
-        F64x4(y),
-        F64x4(z),
-        F64x4(w),
-        F64x4(lac),
-        F64x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d ridge noise.
-#[target_feature(enable = "avx2")]
-pub unsafe fn ridge_4d_f64(
-    x: __m256d,
-    y: __m256d,
-    z: __m256d,
-    w: __m256d,
-    lac: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    ridge_64::ridge_4d::<Avx2>(
-        F64x4(x),
-        F64x4(y),
-        F64x4(z),
-        F64x4(w),
-        F64x4(lac),
-        F64x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d turbulence.
-#[target_feature(enable = "avx2")]
-pub unsafe fn turbulence_4d_f64(
-    x: __m256d,
-    y: __m256d,
-    z: __m256d,
-    w: __m256d,
-    lac: __m256d,
-    gain: __m256d,
-    octaves: u8,
-    seed: i64,
-) -> __m256d {
-    turbulence_64::turbulence_4d::<Avx2>(
-        F64x4(x),
-        F64x4(y),
-        F64x4(z),
-        F64x4(w),
-        F64x4(lac),
-        F64x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height X depth x time sized block of 4d noise, unscaled,
-/// `start_*` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "avx2")]
-pub unsafe fn get_4d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_4d_noise::<Avx2>(noise_type)
-}
-pub unsafe fn get_4d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_4d_noise_f64::<Avx2>(noise_type)
-}
-
-/// Gets a width X height X depth X time sized block of scaled 4d noise
-/// `start_*` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "avx2")]
-pub unsafe fn get_4d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_4d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Avx2>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
+cellular!(
+    "2d",
+    cellular_2d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    cell_32,
+    try_transmute_avx2
+);
+cellular!(
+    "3d",
+    cellular_3d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    cell_32,
+    try_transmute_avx2
+);
+cellular!(
+    "2d",
+    cellular_2d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    cell_64,
+    try_transmute_avx2
+);
+cellular!(
+    "3d",
+    cellular_3d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    cell_64,
+    try_transmute_avx2
+);
+
+simplex!(
+    "1d",
+    simplex_1d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    simplex_32,
+    try_transmute_avx2
+);
+simplex!(
+    "2d",
+    simplex_2d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    simplex_32,
+    try_transmute_avx2
+);
+simplex!(
+    "3d",
+    simplex_3d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    simplex_32,
+    try_transmute_avx2
+);
+simplex!(
+    "4d",
+    simplex_4d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    simplex_32,
+    try_transmute_avx2
+);
+simplex!(
+    "1d",
+    simplex_1d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    simplex_64,
+    try_transmute_avx2
+);
+simplex!(
+    "2d",
+    simplex_2d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    simplex_64,
+    try_transmute_avx2
+);
+simplex!(
+    "3d",
+    simplex_3d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    simplex_64,
+    try_transmute_avx2
+);
+simplex!(
+    "4d",
+    simplex_4d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    simplex_64,
+    try_transmute_avx2
+);
+
+fbm!(
+    "1d",
+    fbm_1d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    fbm_32,
+    try_transmute_avx2
+);
+fbm!(
+    "2d",
+    fbm_2d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    fbm_32,
+    try_transmute_avx2
+);
+fbm!(
+    "3d",
+    fbm_3d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    fbm_32,
+    try_transmute_avx2
+);
+fbm!(
+    "4d",
+    fbm_4d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    fbm_32,
+    try_transmute_avx2
+);
+fbm!(
+    "1d",
+    fbm_1d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    fbm_64,
+    try_transmute_avx2
+);
+fbm!(
+    "2d",
+    fbm_2d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    fbm_64,
+    try_transmute_avx2
+);
+fbm!(
+    "3d",
+    fbm_3d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    fbm_64,
+    try_transmute_avx2
+);
+fbm!(
+    "4d",
+    fbm_4d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    fbm_64,
+    try_transmute_avx2
+);
+
+ridge!(
+    "1d",
+    ridge_1d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    ridge_32,
+    try_transmute_avx2
+);
+ridge!(
+    "2d",
+    ridge_2d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    ridge_32,
+    try_transmute_avx2
+);
+ridge!(
+    "3d",
+    ridge_3d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    ridge_32,
+    try_transmute_avx2
+);
+ridge!(
+    "4d",
+    ridge_4d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    ridge_32,
+    try_transmute_avx2
+);
+ridge!(
+    "1d",
+    ridge_1d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    ridge_64,
+    try_transmute_avx2
+);
+ridge!(
+    "2d",
+    ridge_2d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    ridge_64,
+    try_transmute_avx2
+);
+ridge!(
+    "3d",
+    ridge_3d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    ridge_64,
+    try_transmute_avx2
+);
+ridge!(
+    "4d",
+    ridge_4d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    ridge_64,
+    try_transmute_avx2
+);
+
+turbulence!(
+    "1d",
+    turbulence_1d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    turbulence_32,
+    try_transmute_avx2
+);
+turbulence!(
+    "2d",
+    turbulence_2d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    turbulence_32,
+    try_transmute_avx2
+);
+turbulence!(
+    "3d",
+    turbulence_3d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    turbulence_32,
+    try_transmute_avx2
+);
+turbulence!(
+    "4d",
+    turbulence_4d,
+    __m256,
+    SimdTransmuteF32::try_transmute_from_avx2,
+    i32,
+    turbulence_32,
+    try_transmute_avx2
+);
+turbulence!(
+    "1d",
+    turbulence_1d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    turbulence_64,
+    try_transmute_avx2
+);
+turbulence!(
+    "2d",
+    turbulence_2d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    turbulence_64,
+    try_transmute_avx2
+);
+turbulence!(
+    "3d",
+    turbulence_3d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    turbulence_64,
+    try_transmute_avx2
+);
+turbulence!(
+    "4d",
+    turbulence_4d_f64,
+    __m256d,
+    SimdTransmuteF64::try_transmute_from_avx2,
+    i64,
+    turbulence_64,
+    try_transmute_avx2
+);
+
+get_noise!(get_1d_noise, get_1d_noise, f32, noise_helpers_32);
+get_noise!(get_2d_noise, get_2d_noise, f32, noise_helpers_32);
+get_noise!(get_3d_noise, get_3d_noise, f32, noise_helpers_32);
+get_noise!(get_4d_noise, get_4d_noise, f32, noise_helpers_32);
+get_noise!(get_1d_noise, get_1d_noise_64, f64, noise_helpers_64);
+get_noise!(get_2d_noise, get_2d_noise_64, f64, noise_helpers_64);
+get_noise!(get_3d_noise, get_3d_noise_64, f64, noise_helpers_64);
+get_noise!(get_4d_noise, get_4d_noise_64, f64, noise_helpers_64);
+get_noise_scaled!(get_1d_noise, get_1d_scaled_noise, f32);
+get_noise_scaled!(get_2d_noise, get_2d_scaled_noise, f32);
+get_noise_scaled!(get_3d_noise, get_3d_scaled_noise, f32);
+get_noise_scaled!(get_4d_noise, get_4d_scaled_noise, f32);
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index e2871bd..3869dc3 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -1,10 +1,497 @@
-pub mod scalar;
+macro_rules! cellular {
+    ("2d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 2d cellular/voroni noise
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            distance_function: CellDistanceFunction,
+            return_type: CellReturnType,
+            jitter: $f_type,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::cellular_2d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                distance_function,
+                return_type,
+                $transmute_from(jitter),
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("3d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 3d cellular/voroni noise
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            z: $f_type,
+            distance_function: CellDistanceFunction,
+            return_type: CellReturnType,
+            jitter: $f_type,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::cellular_3d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(z),
+                distance_function,
+                return_type,
+                $transmute_from(jitter),
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+}
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-pub mod sse2;
+macro_rules! simplex {
+    ("1d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 1d simplex noise, results are not scaled.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(x: $f_type, seed: $seed_type) -> $f_type {
+            $mod::simplex_1d::<S>($transmute_from(x), seed).$transmute_to()
+        }
+    };
+    ("2d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 2d simplex noise, results are not scaled.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::simplex_2d::<S>($transmute_from(x), $transmute_from(y), seed).$transmute_to()
+        }
+    };
+    ("3d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 3d simplex noise, results are not scaled.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            z: $f_type,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::simplex_3d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(z),
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("4d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 4d simplex noise, results are not scaled.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            z: $f_type,
+            w: $f_type,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::simplex_4d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(z),
+                $transmute_from(w),
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+}
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-pub mod sse41;
+macro_rules! fbm {
+    ("1d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 1d fractal brownian motion.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::fbm_1d::<S>(
+                $transmute_from(x),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("2d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 2d fractal brownian motion.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::fbm_2d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("3d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 3d fractal brownian motion.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            z: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::fbm_3d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(z),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("4d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 4d fractal brownian motion.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            z: $f_type,
+            w: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::fbm_4d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(z),
+                $transmute_from(w),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+}
+macro_rules! ridge {
+    ("1d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 1d ridge noise.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::ridge_1d::<S>(
+                $transmute_from(x),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("2d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 2d ridge noise.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::ridge_2d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("3d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 3d ridge noise.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            z: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::ridge_3d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(z),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("4d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 4d ridge noise.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            z: $f_type,
+            w: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::ridge_4d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(z),
+                $transmute_from(w),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+}
+
+macro_rules! turbulence {
+    ("1d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 1d turbulence.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::turbulence_1d::<S>(
+                $transmute_from(x),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("2d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 2d turbulence.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::turbulence_2d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("3d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 3d turbulence.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            z: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::turbulence_3d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(z),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+    ("4d", $fn_name: ident, $f_type: ty, $transmute_from: path, $seed_type: ty, $mod: ident, $transmute_to: ident) => {
+        #[cfg(any(
+            target_feature = "sse2",
+            target_feature = "sse4.1",
+            target_feature = "avx2"
+        ))]
+        /// Get a single value of 4d turbulence.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            x: $f_type,
+            y: $f_type,
+            z: $f_type,
+            w: $f_type,
+            lacunarity: $f_type,
+            gain: $f_type,
+            octaves: u8,
+            seed: $seed_type,
+        ) -> $f_type {
+            $mod::turbulence_4d::<S>(
+                $transmute_from(x),
+                $transmute_from(y),
+                $transmute_from(z),
+                $transmute_from(w),
+                $transmute_from(lacunarity),
+                $transmute_from(gain),
+                octaves,
+                seed,
+            )
+            .$transmute_to()
+        }
+    };
+}
+
+macro_rules! get_noise {
+    ($call: ident, $fn_name: ident, $f_type: ty, $mod: ident) => {
+        /// Gets a width sized block of noise, unscaled.
+        /// `start_x` can be used to provide an offset in the
+        /// coordinates. Results are unscaled, 'min' and 'max' noise values
+        /// are returned so you can scale and transform the noise as you see fit
+        /// in a single pass.
+        pub unsafe fn $fn_name<S: simdeez::Simd>(
+            noise_type: &NoiseType,
+        ) -> (Vec<$f_type>, $f_type, $f_type) {
+            $mod::$call::<S>(noise_type)
+        }
+    };
+}
+macro_rules! get_noise_scaled {
+    ($call: ident, $fn_name: ident, $f_type: ty) => {
+        /// Gets a width sized block of scaled noise
+        /// `start_x` can be used to provide an offset in the coordinates.
+        /// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
+
+        pub unsafe fn $fn_name<S: simdeez::Simd>(noise_type: &NoiseType) -> Vec<$f_type> {
+            let (mut noise, min, max) = $call::<S>(noise_type);
+            let dim = noise_type.get_dimensions();
+            scale_noise::<S>(dim.min, dim.max, min, max, &mut noise);
+            noise
+        }
+    };
+}
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 pub mod avx2;
+pub mod scalar;
+pub mod sse2;
+pub mod sse41;
diff --git a/src/intrinsics/scalar.rs b/src/intrinsics/scalar.rs
index 7001867..7f1e4d2 100644
--- a/src/intrinsics/scalar.rs
+++ b/src/intrinsics/scalar.rs
@@ -12,615 +12,353 @@ use crate::noise::simplex_32;
 use crate::noise::simplex_64;
 use crate::noise::turbulence_32;
 use crate::noise::turbulence_64;
+use crate::noise_helpers_32;
+use crate::noise_helpers_64;
 use crate::{CellDistanceFunction, CellReturnType, DimensionalBeing, NoiseType};
 
 use crate::shared::scale_noise;
 
-use simdeez::scalar::{F32x1, F64x1, Scalar};
+use simdeez::{SimdTransmuteF32, SimdTransmuteF64};
 
 use std::f32;
 
-/// Get a single value of 2d cellular/voroni noise
-
-pub unsafe fn cellular_2d(
-    x: f32,
-    y: f32,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: f32,
-    seed: i32,
-) -> f32 {
-    cell_32::cellular_2d::<Scalar>(
-        F32x1(x),
-        F32x1(y),
-        distance_function,
-        return_type,
-        F32x1(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d cellular/voroni noise
-
-pub unsafe fn cellular_3d(
-    x: f32,
-    y: f32,
-    z: f32,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: f32,
-    seed: i32,
-) -> f32 {
-    cell_32::cellular_3d::<Scalar>(
-        F32x1(x),
-        F32x1(y),
-        F32x1(z),
-        distance_function,
-        return_type,
-        F32x1(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 2d cellular/voroni noise
-
-pub unsafe fn cellular_2d_f64(
-    x: f64,
-    y: f64,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: f64,
-    seed: i64,
-) -> f64 {
-    cell_64::cellular_2d::<Scalar>(
-        F64x1(x),
-        F64x1(y),
-        distance_function,
-        return_type,
-        F64x1(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d cellular/voroni noise
-
-pub unsafe fn cellular_3d_f64(
-    x: f64,
-    y: f64,
-    z: f64,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: f64,
-    seed: i64,
-) -> f64 {
-    cell_64::cellular_3d::<Scalar>(
-        F64x1(x),
-        F64x1(y),
-        F64x1(z),
-        distance_function,
-        return_type,
-        F64x1(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 1d simplex noise, results
-/// are not scaled.
-pub unsafe fn simplex_1d(x: f32, seed: i32) -> f32 {
-    simplex_32::simplex_1d::<Scalar>(F32x1(x), seed).0
-}
-
-/// Get a single value of 1d fractal brownian motion.
-
-pub unsafe fn fbm_1d(x: f32, lacunarity: f32, gain: f32, octaves: u8, seed: i32) -> f32 {
-    fbm_32::fbm_1d::<Scalar>(F32x1(x), F32x1(lacunarity), F32x1(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-
-pub unsafe fn ridge_1d(x: f32, lacunarity: f32, gain: f32, octaves: u8, seed: i32) -> f32 {
-    ridge_32::ridge_1d::<Scalar>(F32x1(x), F32x1(lacunarity), F32x1(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d turbulence.
-
-pub unsafe fn turbulence_1d(x: f32, lacunarity: f32, gain: f32, octaves: u8, seed: i32) -> f32 {
-    turbulence_32::turbulence_1d::<Scalar>(F32x1(x), F32x1(lacunarity), F32x1(gain), octaves, seed)
-        .0
-}
-
-/// Get a single value of 1d simplex noise, results
-/// are not scaled.
-pub unsafe fn simplex_1d_f64(x: f64, seed: i64) -> f64 {
-    simplex_64::simplex_1d::<Scalar>(F64x1(x), seed).0
-}
-
-/// Get a single value of 1d fractal brownian motion.
-
-pub unsafe fn fbm_1d_f64(x: f64, lacunarity: f64, gain: f64, octaves: u8, seed: i64) -> f64 {
-    fbm_64::fbm_1d::<Scalar>(F64x1(x), F64x1(lacunarity), F64x1(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-
-pub unsafe fn ridge_1d_f64(x: f64, lacunarity: f64, gain: f64, octaves: u8, seed: i64) -> f64 {
-    simplex_ridge_64::ridge_1d::<Scalar>(F64x1(x), F64x1(lacunarity), F64x1(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d turbulence.
-
-pub unsafe fn turbulence_1d_f64(x: f64, lacunarity: f64, gain: f64, octaves: u8, seed: i64) -> f64 {
-    turbulence_64::turbulence_1d::<Scalar>(F64x1(x), F64x1(lacunarity), F64x1(gain), octaves, seed)
-        .0
-}
-
-/// Gets a width sized block of 1d noise, unscaled.
-/// `start_x` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-
-pub unsafe fn get_1d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_1d_noise::<Scalar>(noise_type)
-}
-pub unsafe fn get_1d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_1d_noise_f64::<Scalar>(noise_type)
-}
-
-/// Gets a width sized block of scaled 2d noise
-/// `start_x` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-
-pub unsafe fn get_1d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_1d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Scalar>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 2d simplex noise, results
-/// are not scaled.
-
-pub unsafe fn simplex_2d(x: f32, y: f32, seed: i32) -> f32 {
-    simplex_32::simplex_2d::<Scalar>(F32x1(x), F32x1(y), seed).0
-}
-
-/// Get a single value of 2d simplex noise, results
-/// are not scaled.
-
-pub unsafe fn simplex_2d_f64(x: f64, y: f64, seed: i64) -> f64 {
-    simplex_64::simplex_2d::<Scalar>(F64x1(x), F64x1(y), seed).0
-}
-
-/// Get a single value of 2d fractal brownian motion.
-
-pub unsafe fn fbm_2d(x: f32, y: f32, lac: f32, gain: f32, octaves: u8, seed: i32) -> f32 {
-    fbm_32::fbm_2d::<Scalar>(F32x1(x), F32x1(y), F32x1(lac), F32x1(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d fractal brownian motion.
-
-pub unsafe fn fbm_2d_f64(x: f64, y: f64, lac: f64, gain: f64, octaves: u8, seed: i64) -> f64 {
-    fbm_64::fbm_2d::<Scalar>(F64x1(x), F64x1(y), F64x1(lac), F64x1(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-
-pub unsafe fn ridge_2d(x: f32, y: f32, lac: f32, gain: f32, octaves: u8, seed: i32) -> f32 {
-    ridge_32::ridge_2d::<Scalar>(F32x1(x), F32x1(y), F32x1(lac), F32x1(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-
-pub unsafe fn ridge_2d_f64(x: f64, y: f64, lac: f64, gain: f64, octaves: u8, seed: i64) -> f64 {
-    simplex_ridge_64::ridge_2d::<Scalar>(F64x1(x), F64x1(y), F64x1(lac), F64x1(gain), octaves, seed)
-        .0
-}
-
-/// Get a single value of 2d turbulence.
-
-pub unsafe fn turbulence_2d(x: f32, y: f32, lac: f32, gain: f32, octaves: u8, seed: i32) -> f32 {
-    turbulence_32::turbulence_2d::<Scalar>(
-        F32x1(x),
-        F32x1(y),
-        F32x1(lac),
-        F32x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 2d turbulence.
-
-pub unsafe fn turbulence_2d_f64(
-    x: f64,
-    y: f64,
-    lac: f64,
-    gain: f64,
-    octaves: u8,
-    seed: i64,
-) -> f64 {
-    turbulence_64::turbulence_2d::<Scalar>(
-        F64x1(x),
-        F64x1(y),
-        F64x1(lac),
-        F64x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height sized block of 2d noise, unscaled.
-/// `start_x` and `start_y` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-
-pub unsafe fn get_2d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_2d_noise::<Scalar>(noise_type)
-}
-pub unsafe fn get_2d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_2d_noise_f64::<Scalar>(noise_type)
-}
-
-/// Gets a width X height sized block of scaled 2d noise
-/// `start_x` and `start_y` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-
-pub unsafe fn get_2d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_2d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Scalar>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 3d simplex noise, results
-/// are not scaled.
-
-pub unsafe fn simplex_3d(x: f32, y: f32, z: f32, seed: i32) -> f32 {
-    simplex_32::simplex_3d::<Scalar>(F32x1(x), F32x1(y), F32x1(z), seed).0
-}
-
-pub unsafe fn simplex_3d_f64(x: f64, y: f64, z: f64, seed: i64) -> f64 {
-    simplex_64::simplex_3d::<Scalar>(F64x1(x), F64x1(y), F64x1(z), seed).0
-}
-
-/// Get a single value of 3d fractal brownian motion.
-
-pub unsafe fn fbm_3d(x: f32, y: f32, z: f32, lac: f32, gain: f32, octaves: u8, seed: i32) -> f32 {
-    fbm_32::fbm_3d::<Scalar>(
-        F32x1(x),
-        F32x1(y),
-        F32x1(z),
-        F32x1(lac),
-        F32x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d fractal brownian motion.
-
-pub unsafe fn fbm_3d_f64(
-    x: f64,
-    y: f64,
-    z: f64,
-    lac: f64,
-    gain: f64,
-    octaves: u8,
-    seed: i64,
-) -> f64 {
-    fbm_64::fbm_3d::<Scalar>(
-        F64x1(x),
-        F64x1(y),
-        F64x1(z),
-        F64x1(lac),
-        F64x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d ridge noise.
-
-pub unsafe fn ridge_3d(x: f32, y: f32, z: f32, lac: f32, gain: f32, octaves: u8, seed: i32) -> f32 {
-    ridge_32::ridge_3d::<Scalar>(
-        F32x1(x),
-        F32x1(y),
-        F32x1(z),
-        F32x1(lac),
-        F32x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d ridge noise.
-
-pub unsafe fn ridge_3d_f64(
-    x: f64,
-    y: f64,
-    z: f64,
-    lac: f64,
-    gain: f64,
-    octaves: u8,
-    seed: i64,
-) -> f64 {
-    simplex_ridge_64::ridge_3d::<Scalar>(
-        F64x1(x),
-        F64x1(y),
-        F64x1(z),
-        F64x1(lac),
-        F64x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d turbulence.
-pub unsafe fn turbulence_3d(
-    x: f32,
-    y: f32,
-    z: f32,
-    lac: f32,
-    gain: f32,
-    octaves: u8,
-    seed: i32,
-) -> f32 {
-    turbulence_32::turbulence_3d::<Scalar>(
-        F32x1(x),
-        F32x1(y),
-        F32x1(z),
-        F32x1(lac),
-        F32x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d turbulence.
-pub unsafe fn turbulence_3d_f64(
-    x: f64,
-    y: f64,
-    z: f64,
-    lac: f64,
-    gain: f64,
-    octaves: u8,
-    seed: i64,
-) -> f64 {
-    turbulence_64::turbulence_3d::<Scalar>(
-        F64x1(x),
-        F64x1(y),
-        F64x1(z),
-        F64x1(lac),
-        F64x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height X depth sized block of 3d noise, unscaled,
-/// `start_x`,`start_y` and `start_z` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-pub unsafe fn get_3d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_3d_noise::<Scalar>(noise_type)
-}
-pub unsafe fn get_3d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_3d_noise_f64::<Scalar>(noise_type)
-}
-
-/// Gets a width X height X depth sized block of scaled 3d noise
-/// `start_x`, `start_y` and `start_z` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-pub unsafe fn get_3d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_3d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Scalar>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 4d simplex noise, results
-/// are not scaled.
-
-pub unsafe fn simplex_4d(x: f32, y: f32, z: f32, w: f32, seed: i32) -> f32 {
-    simplex_32::simplex_4d::<Scalar>(F32x1(x), F32x1(y), F32x1(z), F32x1(w), seed).0
-}
-
-/// Get a single value of 4d simplex noise, results
-/// are not scaled.
-
-pub unsafe fn simplex_4d_f64(x: f64, y: f64, z: f64, w: f64, seed: i64) -> f64 {
-    simplex_64::simplex_4d::<Scalar>(F64x1(x), F64x1(y), F64x1(z), F64x1(w), seed).0
-}
-
-/// Get a single value of 4d fractal brownian motion.
-
-pub unsafe fn fbm_4d(
-    x: f32,
-    y: f32,
-    z: f32,
-    w: f32,
-    lac: f32,
-    gain: f32,
-    octaves: u8,
-    seed: i32,
-) -> f32 {
-    fbm_32::fbm_4d::<Scalar>(
-        F32x1(x),
-        F32x1(y),
-        F32x1(z),
-        F32x1(w),
-        F32x1(lac),
-        F32x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d fractal brownian motion.
-
-pub unsafe fn fbm_4d_f64(
-    x: f64,
-    y: f64,
-    z: f64,
-    w: f64,
-    lac: f64,
-    gain: f64,
-    octaves: u8,
-    seed: i64,
-) -> f64 {
-    fbm_64::fbm_4d::<Scalar>(
-        F64x1(x),
-        F64x1(y),
-        F64x1(z),
-        F64x1(w),
-        F64x1(lac),
-        F64x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d ridge noise.
-
-pub unsafe fn ridge_4d(
-    x: f32,
-    y: f32,
-    z: f32,
-    w: f32,
-    lac: f32,
-    gain: f32,
-    octaves: u8,
-    seed: i32,
-) -> f32 {
-    ridge_32::ridge_4d::<Scalar>(
-        F32x1(x),
-        F32x1(y),
-        F32x1(z),
-        F32x1(w),
-        F32x1(lac),
-        F32x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d ridge noise.
-
-pub unsafe fn ridge_4d_f64(
-    x: f64,
-    y: f64,
-    z: f64,
-    w: f64,
-    lac: f64,
-    gain: f64,
-    octaves: u8,
-    seed: i64,
-) -> f64 {
-    simplex_ridge_64::ridge_4d::<Scalar>(
-        F64x1(x),
-        F64x1(y),
-        F64x1(z),
-        F64x1(w),
-        F64x1(lac),
-        F64x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d turbulence.
-
-pub unsafe fn turbulence_4d(
-    x: f32,
-    y: f32,
-    z: f32,
-    w: f32,
-    lac: f32,
-    gain: f32,
-    octaves: u8,
-    seed: i32,
-) -> f32 {
-    turbulence_32::turbulence_4d::<Scalar>(
-        F32x1(x),
-        F32x1(y),
-        F32x1(z),
-        F32x1(w),
-        F32x1(lac),
-        F32x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d turbulence.
-
-pub unsafe fn turbulence_4d_f64(
-    x: f64,
-    y: f64,
-    z: f64,
-    w: f64,
-    lac: f64,
-    gain: f64,
-    octaves: u8,
-    seed: i64,
-) -> f64 {
-    turbulence_64::turbulence_4d::<Scalar>(
-        F64x1(x),
-        F64x1(y),
-        F64x1(z),
-        F64x1(w),
-        F64x1(lac),
-        F64x1(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height X depth x time sized block of 4d noise, unscaled,
-/// `start_*` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-
-pub unsafe fn get_4d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_4d_noise::<Scalar>(noise_type)
-}
-pub unsafe fn get_4d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_4d_noise_f64::<Scalar>(noise_type)
-}
-
-/// Gets a width X height X depth X time sized block of scaled 4d noise
-/// `start_*` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-
-pub unsafe fn get_4d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_4d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Scalar>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
+cellular!(
+    "2d",
+    cellular_2d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    cell_32,
+    try_transmute_scalar
+);
+cellular!(
+    "3d",
+    cellular_3d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    cell_32,
+    try_transmute_scalar
+);
+cellular!(
+    "2d",
+    cellular_2d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    cell_64,
+    try_transmute_scalar
+);
+cellular!(
+    "3d",
+    cellular_3d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    cell_64,
+    try_transmute_scalar
+);
+
+simplex!(
+    "1d",
+    simplex_1d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    simplex_32,
+    try_transmute_scalar
+);
+simplex!(
+    "2d",
+    simplex_2d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    simplex_32,
+    try_transmute_scalar
+);
+simplex!(
+    "3d",
+    simplex_3d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    simplex_32,
+    try_transmute_scalar
+);
+simplex!(
+    "4d",
+    simplex_4d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    simplex_32,
+    try_transmute_scalar
+);
+simplex!(
+    "1d",
+    simplex_1d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    simplex_64,
+    try_transmute_scalar
+);
+simplex!(
+    "2d",
+    simplex_2d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    simplex_64,
+    try_transmute_scalar
+);
+simplex!(
+    "3d",
+    simplex_3d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    simplex_64,
+    try_transmute_scalar
+);
+simplex!(
+    "4d",
+    simplex_4d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    simplex_64,
+    try_transmute_scalar
+);
+fbm!(
+    "1d",
+    fbm_1d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    fbm_32,
+    try_transmute_scalar
+);
+fbm!(
+    "2d",
+    fbm_2d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    fbm_32,
+    try_transmute_scalar
+);
+fbm!(
+    "3d",
+    fbm_3d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    fbm_32,
+    try_transmute_scalar
+);
+fbm!(
+    "4d",
+    fbm_4d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    fbm_32,
+    try_transmute_scalar
+);
+fbm!(
+    "1d",
+    fbm_1d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    fbm_64,
+    try_transmute_scalar
+);
+fbm!(
+    "2d",
+    fbm_2d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    fbm_64,
+    try_transmute_scalar
+);
+fbm!(
+    "3d",
+    fbm_3d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    fbm_64,
+    try_transmute_scalar
+);
+fbm!(
+    "4d",
+    fbm_4d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    fbm_64,
+    try_transmute_scalar
+);
+
+ridge!(
+    "1d",
+    ridge_1d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    ridge_32,
+    try_transmute_scalar
+);
+ridge!(
+    "2d",
+    ridge_2d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    ridge_32,
+    try_transmute_scalar
+);
+ridge!(
+    "3d",
+    ridge_3d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    ridge_32,
+    try_transmute_scalar
+);
+ridge!(
+    "4d",
+    ridge_4d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    ridge_32,
+    try_transmute_scalar
+);
+ridge!(
+    "1d",
+    ridge_1d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    simplex_ridge_64,
+    try_transmute_scalar
+);
+ridge!(
+    "2d",
+    ridge_2d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    simplex_ridge_64,
+    try_transmute_scalar
+);
+ridge!(
+    "3d",
+    ridge_3d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    simplex_ridge_64,
+    try_transmute_scalar
+);
+ridge!(
+    "4d",
+    ridge_4d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    simplex_ridge_64,
+    try_transmute_scalar
+);
+
+turbulence!(
+    "1d",
+    turbulenece_1d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    turbulence_32,
+    try_transmute_scalar
+);
+turbulence!(
+    "2d",
+    turbulenece_2d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    turbulence_32,
+    try_transmute_scalar
+);
+turbulence!(
+    "3d",
+    turbulenece_3d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    turbulence_32,
+    try_transmute_scalar
+);
+turbulence!(
+    "4d",
+    turbulenece_4d,
+    f32,
+    SimdTransmuteF32::try_transmute_from_scalar,
+    i32,
+    turbulence_32,
+    try_transmute_scalar
+);
+turbulence!(
+    "1d",
+    turbulenece_1d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    turbulence_64,
+    try_transmute_scalar
+);
+turbulence!(
+    "2d",
+    turbulenece_2d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    turbulence_64,
+    try_transmute_scalar
+);
+turbulence!(
+    "3d",
+    turbulenece_3d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    turbulence_64,
+    try_transmute_scalar
+);
+turbulence!(
+    "4d",
+    turbulenece_4d_f64,
+    f64,
+    SimdTransmuteF64::try_transmute_from_scalar,
+    i64,
+    turbulence_64,
+    try_transmute_scalar
+);
+
+get_noise!(get_1d_noise, get_1d_noise, f32, noise_helpers_32);
+get_noise!(get_2d_noise, get_2d_noise, f32, noise_helpers_32);
+get_noise!(get_3d_noise, get_3d_noise, f32, noise_helpers_32);
+get_noise!(get_4d_noise, get_4d_noise, f32, noise_helpers_32);
+get_noise!(get_1d_noise, get_1d_noise_64, f64, noise_helpers_64);
+get_noise!(get_2d_noise, get_2d_noise_64, f64, noise_helpers_64);
+get_noise!(get_3d_noise, get_3d_noise_64, f64, noise_helpers_64);
+get_noise!(get_4d_noise, get_4d_noise_64, f64, noise_helpers_64);
+get_noise_scaled!(get_1d_noise, get_1d_scaled_noise, f32);
+get_noise_scaled!(get_2d_noise, get_2d_scaled_noise, f32);
+get_noise_scaled!(get_3d_noise, get_3d_scaled_noise, f32);
+get_noise_scaled!(get_4d_noise, get_4d_scaled_noise, f32);
diff --git a/src/intrinsics/sse2.rs b/src/intrinsics/sse2.rs
index e15a2e8..f72d565 100644
--- a/src/intrinsics/sse2.rs
+++ b/src/intrinsics/sse2.rs
@@ -16,11 +16,13 @@ use crate::noise::simplex_32;
 use crate::noise::simplex_64;
 use crate::noise::turbulence_32;
 use crate::noise::turbulence_64;
+use crate::noise_helpers_32;
+use crate::noise_helpers_64;
 use crate::{CellDistanceFunction, CellReturnType, DimensionalBeing, NoiseType};
 
 use crate::shared::scale_noise;
 
-use simdeez::sse2::{F32x4, F64x2, Sse2};
+use simdeez::{SimdTransmuteF32, SimdTransmuteF64};
 
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
@@ -29,680 +31,344 @@ use std::arch::x86_64::*;
 
 use std::f32;
 
-/// Get a single value of 2d cellular/voroni noise
-#[target_feature(enable = "sse2")]
-pub unsafe fn cellular_2d(
-    x: __m128,
-    y: __m128,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m128,
-    seed: i32,
-) -> __m128 {
-    cell_32::cellular_2d::<Sse2>(
-        F32x4(x),
-        F32x4(y),
-        distance_function,
-        return_type,
-        F32x4(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d cellular/voroni noise
-#[target_feature(enable = "sse2")]
-pub unsafe fn cellular_3d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m128,
-    seed: i32,
-) -> __m128 {
-    cell_32::cellular_3d::<Sse2>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        distance_function,
-        return_type,
-        F32x4(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 2d cellular/voroni noise
-#[target_feature(enable = "sse2")]
-pub unsafe fn cellular_2d_f64(
-    x: __m128d,
-    y: __m128d,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m128d,
-    seed: i64,
-) -> __m128d {
-    cell_64::cellular_2d::<Sse2>(
-        F64x2(x),
-        F64x2(y),
-        distance_function,
-        return_type,
-        F64x2(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d cellular/voroni noise
-#[target_feature(enable = "sse2")]
-pub unsafe fn cellular_3d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m128d,
-    seed: i64,
-) -> __m128d {
-    cell_64::cellular_3d::<Sse2>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        distance_function,
-        return_type,
-        F64x2(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 1d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse2")]
-pub unsafe fn simplex_1d(x: __m128, seed: i32) -> __m128 {
-    simplex_32::simplex_1d::<Sse2>(F32x4(x), seed).0
-}
-/// Get a single value of 1d fractal brownian motion.
-#[target_feature(enable = "sse2")]
-pub unsafe fn fbm_1d(
-    x: __m128,
-    lacunarity: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    fbm_32::fbm_1d::<Sse2>(F32x4(x), F32x4(lacunarity), F32x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "sse2")]
-pub unsafe fn ridge_1d(
-    x: __m128,
-    lacunarity: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    ridge_32::ridge_1d::<Sse2>(F32x4(x), F32x4(lacunarity), F32x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "sse2")]
-pub unsafe fn turbulence_1d(
-    x: __m128,
-    lacunarity: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    turbulence_32::turbulence_1d::<Sse2>(F32x4(x), F32x4(lacunarity), F32x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 1d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse2")]
-pub unsafe fn simplex_1d_f64(x: __m128d, seed: i64) -> __m128d {
-    simplex_64::simplex_1d::<Sse2>(F64x2(x), seed).0
-}
-/// Get a single value of 1d fractal brownian motion.
-#[target_feature(enable = "sse2")]
-pub unsafe fn fbm_1d_f64(
-    x: __m128d,
-    lacunarity: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    fbm_64::fbm_1d::<Sse2>(F64x2(x), F64x2(lacunarity), F64x2(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "sse2")]
-pub unsafe fn ridge_1d_f64(
-    x: __m128d,
-    lacunarity: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    ridge_64::ridge_1d::<Sse2>(F64x2(x), F64x2(lacunarity), F64x2(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "sse2")]
-pub unsafe fn turbulence_1d_f64(
-    x: __m128d,
-    lacunarity: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    turbulence_64::turbulence_1d::<Sse2>(F64x2(x), F64x2(lacunarity), F64x2(gain), octaves, seed).0
-}
-
-/// Gets a width sized block of 1d noise, unscaled.
-/// `start_x` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "sse2")]
-pub unsafe fn get_1d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_1d_noise::<Sse2>(noise_type)
-}
-pub unsafe fn get_1d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_1d_noise_f64::<Sse2>(noise_type)
-}
-
-/// Gets a width sized block of scaled 2d noise
-/// `start_x` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "sse2")]
-pub unsafe fn get_1d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_1d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Sse2>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 2d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse2")]
-pub unsafe fn simplex_2d(x: __m128, y: __m128, seed: i32) -> __m128 {
-    simplex_32::simplex_2d::<Sse2>(F32x4(x), F32x4(y), seed).0
-}
-
-/// Get a single value of 2d fractal brownian motion.
-#[target_feature(enable = "sse2")]
-pub unsafe fn fbm_2d(
-    x: __m128,
-    y: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    fbm_32::fbm_2d::<Sse2>(F32x4(x), F32x4(y), F32x4(lac), F32x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "sse2")]
-pub unsafe fn ridge_2d(
-    x: __m128,
-    y: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    ridge_32::ridge_2d::<Sse2>(F32x4(x), F32x4(y), F32x4(lac), F32x4(gain), octaves, seed).0
-}
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "sse2")]
-pub unsafe fn turbulence_2d(
-    x: __m128,
-    y: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    turbulence_32::turbulence_2d::<Sse2>(F32x4(x), F32x4(y), F32x4(lac), F32x4(gain), octaves, seed)
-        .0
-}
-
-/// Get a single value of 2d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse2")]
-pub unsafe fn simplex_2d_f64(x: __m128d, y: __m128d, seed: i64) -> __m128d {
-    simplex_64::simplex_2d::<Sse2>(F64x2(x), F64x2(y), seed).0
-}
-
-/// Get a single value of 2d fractal brownian motion.
-#[target_feature(enable = "sse2")]
-pub unsafe fn fbm_2d_f64(
-    x: __m128d,
-    y: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    fbm_64::fbm_2d::<Sse2>(F64x2(x), F64x2(y), F64x2(lac), F64x2(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "sse2")]
-pub unsafe fn ridge_2d_f64(
-    x: __m128d,
-    y: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    ridge_64::ridge_2d::<Sse2>(F64x2(x), F64x2(y), F64x2(lac), F64x2(gain), octaves, seed).0
-}
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "sse2")]
-pub unsafe fn turbulence_2d_f64(
-    x: __m128d,
-    y: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    turbulence_64::turbulence_2d::<Sse2>(F64x2(x), F64x2(y), F64x2(lac), F64x2(gain), octaves, seed)
-        .0
-}
-
-/// Gets a width X height sized block of 2d noise, unscaled.
-/// `start_x` and `start_y` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "sse2")]
-pub unsafe fn get_2d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_2d_noise::<Sse2>(noise_type)
-}
-pub unsafe fn get_2d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_2d_noise_f64::<Sse2>(noise_type)
-}
-
-/// Gets a width X height sized block of scaled 2d noise
-/// `start_x` and `start_y` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "sse2")]
-pub unsafe fn get_2d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_2d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Sse2>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 3d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse2")]
-pub unsafe fn simplex_3d(x: __m128, y: __m128, z: __m128, seed: i32) -> __m128 {
-    simplex_32::simplex_3d::<Sse2>(F32x4(x), F32x4(y), F32x4(z), seed).0
-}
-
-/// Get a single value of 3d fractal brownian motion.
-#[target_feature(enable = "sse2")]
-pub unsafe fn fbm_3d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    fbm_32::fbm_3d::<Sse2>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d ridge noise.
-#[target_feature(enable = "sse2")]
-pub unsafe fn ridge_3d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    ridge_32::ridge_3d::<Sse2>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d turbulence.
-#[target_feature(enable = "sse2")]
-pub unsafe fn turbulence_3d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    turbulence_32::turbulence_3d::<Sse2>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse2")]
-pub unsafe fn simplex_3d_f64(x: __m128d, y: __m128d, z: __m128d, seed: i64) -> __m128d {
-    simplex_64::simplex_3d::<Sse2>(F64x2(x), F64x2(y), F64x2(z), seed).0
-}
-
-/// Get a single value of 3d fractal brownian motion.
-#[target_feature(enable = "sse2")]
-pub unsafe fn fbm_3d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    fbm_64::fbm_3d::<Sse2>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d ridge noise.
-#[target_feature(enable = "sse2")]
-pub unsafe fn ridge_3d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    ridge_64::ridge_3d::<Sse2>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d turbulence.
-#[target_feature(enable = "sse2")]
-pub unsafe fn turbulence_3d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    turbulence_64::turbulence_3d::<Sse2>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height X depth sized block of 3d noise, unscaled,
-/// `start_x`,`start_y` and `start_z` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "sse2")]
-pub unsafe fn get_3d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_3d_noise::<Sse2>(noise_type)
-}
-pub unsafe fn get_3d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_3d_noise_f64::<Sse2>(noise_type)
-}
-
-/// Gets a width X height X depth sized block of scaled 3d noise
-/// `start_x`, `start_y` and `start_z` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "sse2")]
-pub unsafe fn get_3d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_3d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Sse2>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 4d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse2")]
-pub unsafe fn simplex_4d(x: __m128, y: __m128, z: __m128, w: __m128, seed: i32) -> __m128 {
-    simplex_32::simplex_4d::<Sse2>(F32x4(x), F32x4(y), F32x4(z), F32x4(w), seed).0
-}
-/// Get a single value of 4d fractal brownian motion.
-#[target_feature(enable = "sse2")]
-pub unsafe fn fbm_4d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    w: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    fbm_32::fbm_4d::<Sse2>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(w),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d ridge noise.
-#[target_feature(enable = "sse2")]
-pub unsafe fn ridge_4d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    w: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    ridge_32::ridge_4d::<Sse2>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(w),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d turbulence.
-#[target_feature(enable = "sse2")]
-pub unsafe fn turbulence_4d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    w: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    turbulence_32::turbulence_4d::<Sse2>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(w),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse2")]
-pub unsafe fn simplex_4d_f64(x: __m128d, y: __m128d, z: __m128d, w: __m128d, seed: i64) -> __m128d {
-    simplex_64::simplex_4d::<Sse2>(F64x2(x), F64x2(y), F64x2(z), F64x2(w), seed).0
-}
-/// Get a single value of 4d fractal brownian motion.
-#[target_feature(enable = "sse2")]
-pub unsafe fn fbm_4d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    w: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    fbm_64::fbm_4d::<Sse2>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(w),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d ridge noise.
-#[target_feature(enable = "sse2")]
-pub unsafe fn ridge_4d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    w: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    ridge_64::ridge_4d::<Sse2>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(w),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d turbulence.
-#[target_feature(enable = "sse2")]
-pub unsafe fn turbulence_4d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    w: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    turbulence_64::turbulence_4d::<Sse2>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(w),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height X depth x time sized block of 4d noise, unscaled,
-/// `start_*` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "sse2")]
-pub unsafe fn get_4d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_4d_noise::<Sse2>(noise_type)
-}
-pub unsafe fn get_4d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_4d_noise_f64::<Sse2>(noise_type)
-}
-
-/// Gets a width X height X depth X time sized block of scaled 4d noise
-/// `start_*` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "sse2")]
-pub unsafe fn get_4d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_4d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Sse2>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
+cellular!(
+    "2d",
+    cellular_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    cell_32,
+    try_transmute_sse2
+);
+cellular!(
+    "3d",
+    cellular_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    cell_32,
+    try_transmute_sse2
+);
+cellular!(
+    "2d",
+    cellular_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    cell_64,
+    try_transmute_sse2
+);
+cellular!(
+    "3d",
+    cellular_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    cell_64,
+    try_transmute_sse2
+);
+
+simplex!(
+    "1d",
+    simplex_1d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    simplex_32,
+    try_transmute_sse2
+);
+simplex!(
+    "2d",
+    simplex_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    simplex_32,
+    try_transmute_sse2
+);
+simplex!(
+    "3d",
+    simplex_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    simplex_32,
+    try_transmute_sse2
+);
+simplex!(
+    "4d",
+    simplex_4d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    simplex_32,
+    try_transmute_sse2
+);
+simplex!(
+    "1d",
+    simplex_1d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    simplex_64,
+    try_transmute_sse2
+);
+simplex!(
+    "2d",
+    simplex_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    simplex_64,
+    try_transmute_sse2
+);
+simplex!(
+    "3d",
+    simplex_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    simplex_64,
+    try_transmute_sse2
+);
+simplex!(
+    "4d",
+    simplex_4d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    simplex_64,
+    try_transmute_sse2
+);
+
+fbm!(
+    "1d",
+    fbm_1d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    fbm_32,
+    try_transmute_sse2
+);
+fbm!(
+    "2d",
+    fbm_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    fbm_32,
+    try_transmute_sse2
+);
+fbm!(
+    "3d",
+    fbm_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    fbm_32,
+    try_transmute_sse2
+);
+fbm!(
+    "4d",
+    fbm_4d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    fbm_32,
+    try_transmute_sse2
+);
+fbm!(
+    "1d",
+    fbm_1d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    fbm_64,
+    try_transmute_sse2
+);
+fbm!(
+    "2d",
+    fbm_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    fbm_64,
+    try_transmute_sse2
+);
+fbm!(
+    "3d",
+    fbm_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    fbm_64,
+    try_transmute_sse2
+);
+fbm!(
+    "4d",
+    fbm_4d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    fbm_64,
+    try_transmute_sse2
+);
+
+ridge!(
+    "1d",
+    ridge_1d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    ridge_32,
+    try_transmute_sse2
+);
+ridge!(
+    "2d",
+    ridge_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    ridge_32,
+    try_transmute_sse2
+);
+ridge!(
+    "3d",
+    ridge_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    ridge_32,
+    try_transmute_sse2
+);
+ridge!(
+    "4d",
+    ridge_4d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    ridge_32,
+    try_transmute_sse2
+);
+ridge!(
+    "1d",
+    ridge_1d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    ridge_64,
+    try_transmute_sse2
+);
+ridge!(
+    "2d",
+    ridge_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    ridge_64,
+    try_transmute_sse2
+);
+ridge!(
+    "3d",
+    ridge_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    ridge_64,
+    try_transmute_sse2
+);
+ridge!(
+    "4d",
+    ridge_4d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    ridge_64,
+    try_transmute_sse2
+);
+
+turbulence!(
+    "1d",
+    turbulence_1d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    turbulence_32,
+    try_transmute_sse2
+);
+turbulence!(
+    "2d",
+    turbulence_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    turbulence_32,
+    try_transmute_sse2
+);
+turbulence!(
+    "3d",
+    turbulence_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    turbulence_32,
+    try_transmute_sse2
+);
+turbulence!(
+    "4d",
+    turbulence_4d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse2,
+    i32,
+    turbulence_32,
+    try_transmute_sse2
+);
+turbulence!(
+    "1d",
+    turbulence_1d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    turbulence_64,
+    try_transmute_sse2
+);
+turbulence!(
+    "2d",
+    turbulence_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    turbulence_64,
+    try_transmute_sse2
+);
+turbulence!(
+    "3d",
+    turbulence_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    turbulence_64,
+    try_transmute_sse2
+);
+turbulence!(
+    "4d",
+    turbulence_4d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse2,
+    i64,
+    turbulence_64,
+    try_transmute_sse2
+);
+
+get_noise!(get_1d_noise, get_1d_noise, f32, noise_helpers_32);
+get_noise!(get_2d_noise, get_2d_noise, f32, noise_helpers_32);
+get_noise!(get_3d_noise, get_3d_noise, f32, noise_helpers_32);
+get_noise!(get_4d_noise, get_4d_noise, f32, noise_helpers_32);
+get_noise!(get_1d_noise, get_1d_noise_64, f64, noise_helpers_64);
+get_noise!(get_2d_noise, get_2d_noise_64, f64, noise_helpers_64);
+get_noise!(get_3d_noise, get_3d_noise_64, f64, noise_helpers_64);
+get_noise!(get_4d_noise, get_4d_noise_64, f64, noise_helpers_64);
+get_noise_scaled!(get_1d_noise, get_1d_scaled_noise, f32);
+get_noise_scaled!(get_2d_noise, get_2d_scaled_noise, f32);
+get_noise_scaled!(get_3d_noise, get_3d_scaled_noise, f32);
+get_noise_scaled!(get_4d_noise, get_4d_scaled_noise, f32);
diff --git a/src/intrinsics/sse41.rs b/src/intrinsics/sse41.rs
index 21c164c..5b0008c 100644
--- a/src/intrinsics/sse41.rs
+++ b/src/intrinsics/sse41.rs
@@ -16,11 +16,13 @@ use crate::noise::simplex_32;
 use crate::noise::simplex_64;
 use crate::noise::turbulence_32;
 use crate::noise::turbulence_64;
+use crate::noise_helpers_32;
+use crate::noise_helpers_64;
 use crate::{CellDistanceFunction, CellReturnType, DimensionalBeing, NoiseType};
 
 use crate::shared::scale_noise;
 
-use simdeez::sse41::{F32x4, F64x2, Sse41};
+use simdeez::{SimdTransmuteF32, SimdTransmuteF64};
 
 #[cfg(target_arch = "x86")]
 use std::arch::x86::*;
@@ -29,696 +31,335 @@ use std::arch::x86_64::*;
 
 use std::f32;
 
-/// Get a single value of 2d cellular/voroni noise
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn cellular_2d(
-    x: __m128,
-    y: __m128,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m128,
-    seed: i32,
-) -> __m128 {
-    cell_32::cellular_2d::<Sse41>(
-        F32x4(x),
-        F32x4(y),
-        distance_function,
-        return_type,
-        F32x4(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d cellular/voroni noise
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn cellular_3d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m128,
-    seed: i32,
-) -> __m128 {
-    cell_32::cellular_3d::<Sse41>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        distance_function,
-        return_type,
-        F32x4(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 2d cellular/voroni noise
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn cellular_2d_f64(
-    x: __m128d,
-    y: __m128d,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m128d,
-    seed: i64,
-) -> __m128d {
-    cell_64::cellular_2d::<Sse41>(
-        F64x2(x),
-        F64x2(y),
-        distance_function,
-        return_type,
-        F64x2(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d cellular/voroni noise
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn cellular_3d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    distance_function: CellDistanceFunction,
-    return_type: CellReturnType,
-    jitter: __m128d,
-    seed: i64,
-) -> __m128d {
-    cell_64::cellular_3d::<Sse41>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        distance_function,
-        return_type,
-        F64x2(jitter),
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 1d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn simplex_1d(x: __m128, seed: i32) -> __m128 {
-    simplex_32::simplex_1d::<Sse41>(F32x4(x), seed).0
-}
-
-/// Get a single value of 1d fractal brownian motion.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn fbm_1d(
-    x: __m128,
-    lacunarity: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    fbm_32::fbm_1d::<Sse41>(F32x4(x), F32x4(lacunarity), F32x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn ridge_1d(
-    x: __m128,
-    lacunarity: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    ridge_32::ridge_1d::<Sse41>(F32x4(x), F32x4(lacunarity), F32x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn turbulence_1d(
-    x: __m128,
-    lacunarity: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    turbulence_32::turbulence_1d::<Sse41>(F32x4(x), F32x4(lacunarity), F32x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 1d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn simplex_1d_f64(x: __m128d, seed: i64) -> __m128d {
-    simplex_64::simplex_1d::<Sse41>(F64x2(x), seed).0
-}
-
-/// Get a single value of 1d fractal brownian motion.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn fbm_1d_f64(
-    x: __m128d,
-    lacunarity: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    fbm_64::fbm_1d::<Sse41>(F64x2(x), F64x2(lacunarity), F64x2(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn ridge_1d_f64(
-    x: __m128d,
-    lacunarity: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    ridge_64::ridge_1d::<Sse41>(F64x2(x), F64x2(lacunarity), F64x2(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn turbulence_1d_f64(
-    x: __m128d,
-    lacunarity: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    turbulence_64::turbulence_1d::<Sse41>(F64x2(x), F64x2(lacunarity), F64x2(gain), octaves, seed).0
-}
-
-/// Gets a width sized block of 1d noise, unscaled.
-/// `start_x` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn get_1d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_1d_noise::<Sse41>(noise_type)
-}
-pub unsafe fn get_1d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_1d_noise_f64::<Sse41>(noise_type)
-}
-
-/// Gets a width sized block of scaled 2d noise
-/// `start_x` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn get_1d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_1d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Sse41>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 2d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn simplex_2d(x: __m128, y: __m128, seed: i32) -> __m128 {
-    simplex_32::simplex_2d::<Sse41>(F32x4(x), F32x4(y), seed).0
-}
-
-/// Get a single value of 2d fractal brownian motion.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn fbm_2d(
-    x: __m128,
-    y: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    fbm_32::fbm_2d::<Sse41>(F32x4(x), F32x4(y), F32x4(lac), F32x4(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn ridge_2d(
-    x: __m128,
-    y: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    ridge_32::ridge_2d::<Sse41>(F32x4(x), F32x4(y), F32x4(lac), F32x4(gain), octaves, seed).0
-}
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn turbulence_2d(
-    x: __m128,
-    y: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    turbulence_32::turbulence_2d::<Sse41>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 2d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn simplex_2d_f64(x: __m128d, y: __m128d, seed: i64) -> __m128d {
-    simplex_64::simplex_2d::<Sse41>(F64x2(x), F64x2(y), seed).0
-}
-
-/// Get a single value of 2d fractal brownian motion.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn fbm_2d_f64(
-    x: __m128d,
-    y: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    fbm_64::fbm_2d::<Sse41>(F64x2(x), F64x2(y), F64x2(lac), F64x2(gain), octaves, seed).0
-}
-
-/// Get a single value of 2d ridge noise.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn ridge_2d_f64(
-    x: __m128d,
-    y: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    ridge_64::ridge_2d::<Sse41>(F64x2(x), F64x2(y), F64x2(lac), F64x2(gain), octaves, seed).0
-}
-/// Get a single value of 2d turbulence.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn turbulence_2d_f64(
-    x: __m128d,
-    y: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    turbulence_64::turbulence_2d::<Sse41>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height sized block of 2d noise, unscaled.
-/// `start_x` and `start_y` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn get_2d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_2d_noise::<Sse41>(noise_type)
-}
-pub unsafe fn get_2d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_2d_noise_f64::<Sse41>(noise_type)
-}
-
-/// Gets a width X height sized block of scaled 2d noise
-/// `start_x` and `start_y` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn get_2d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_2d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Sse41>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 3d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn simplex_3d(x: __m128, y: __m128, z: __m128, seed: i32) -> __m128 {
-    simplex_32::simplex_3d::<Sse41>(F32x4(x), F32x4(y), F32x4(z), seed).0
-}
-
-/// Get a single value of 3d fractal brownian motion.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn fbm_3d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    fbm_32::fbm_3d::<Sse41>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d ridge noise.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn ridge_3d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    ridge_32::ridge_3d::<Sse41>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d turbulence.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn turbulence_3d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    turbulence_32::turbulence_3d::<Sse41>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d simplex_64 noise, results
-/// are not scaled.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn simplex_3d_f64(x: __m128d, y: __m128d, z: __m128d, seed: i64) -> __m128d {
-    simplex_64::simplex_3d::<Sse41>(F64x2(x), F64x2(y), F64x2(z), seed).0
-}
-
-/// Get a single value of 3d fractal brownian motion.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn fbm_3d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    fbm_64::fbm_3d::<Sse41>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d ridge noise.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn ridge_3d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    ridge_64::ridge_3d::<Sse41>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 3d turbulence.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn turbulence_3d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    turbulence_64::turbulence_3d::<Sse41>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height X depth sized block of 3d noise, unscaled,
-/// `start_x`,`start_y` and `start_z` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn get_3d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_3d_noise::<Sse41>(noise_type)
-}
-pub unsafe fn get_3d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_3d_noise_f64::<Sse41>(noise_type)
-}
-
-/// Gets a width X height X depth sized block of scaled 3d noise
-/// `start_x`, `start_y` and `start_z` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn get_3d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_3d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Sse41>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
-
-/// Get a single value of 4d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn simplex_4d(x: __m128, y: __m128, z: __m128, w: __m128, seed: i32) -> __m128 {
-    simplex_32::simplex_4d::<Sse41>(F32x4(x), F32x4(y), F32x4(z), F32x4(w), seed).0
-}
-/// Get a single value of 4d fractal brownian motion.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn fbm_4d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    w: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    fbm_32::fbm_4d::<Sse41>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(w),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d ridge noise.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn ridge_4d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    w: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    ridge_32::ridge_4d::<Sse41>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(w),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d turbulence.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn turbulence_4d(
-    x: __m128,
-    y: __m128,
-    z: __m128,
-    w: __m128,
-    lac: __m128,
-    gain: __m128,
-    octaves: u8,
-    seed: i32,
-) -> __m128 {
-    turbulence_32::turbulence_4d::<Sse41>(
-        F32x4(x),
-        F32x4(y),
-        F32x4(z),
-        F32x4(w),
-        F32x4(lac),
-        F32x4(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d simplex noise, results
-/// are not scaled.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn simplex_4d_f64(x: __m128d, y: __m128d, z: __m128d, w: __m128d, seed: i64) -> __m128d {
-    simplex_64::simplex_4d::<Sse41>(F64x2(x), F64x2(y), F64x2(z), F64x2(w), seed).0
-}
-/// Get a single value of 4d fractal brownian motion.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn fbm_4d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    w: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    fbm_64::fbm_4d::<Sse41>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(w),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d ridge noise.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn ridge_4d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    w: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    ridge_64::ridge_4d::<Sse41>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(w),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Get a single value of 4d turbulence.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn turbulence_4d_f64(
-    x: __m128d,
-    y: __m128d,
-    z: __m128d,
-    w: __m128d,
-    lac: __m128d,
-    gain: __m128d,
-    octaves: u8,
-    seed: i64,
-) -> __m128d {
-    turbulence_64::turbulence_4d::<Sse41>(
-        F64x2(x),
-        F64x2(y),
-        F64x2(z),
-        F64x2(w),
-        F64x2(lac),
-        F64x2(gain),
-        octaves,
-        seed,
-    )
-    .0
-}
-
-/// Gets a width X height X depth x time sized block of 4d noise, unscaled,
-/// `start_*` can be used to provide an offset in the
-/// coordinates. Results are unscaled, 'min' and 'max' noise values
-/// are returned so you can scale and transform the noise as you see fit
-/// in a single pass.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn get_4d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
-    crate::noise_helpers_32::get_4d_noise::<Sse41>(noise_type)
-}
-pub unsafe fn get_4d_noise_64(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
-    crate::noise_helpers_64::get_4d_noise_f64::<Sse41>(noise_type)
-}
-
-/// Gets a width X height X depth X time sized block of scaled 4d noise
-/// `start_*` can be used to provide an offset in the
-/// coordinates.
-/// `scaled_min` and `scaled_max` specify the range you want the noise scaled to.
-#[target_feature(enable = "sse4.1")]
-pub unsafe fn get_4d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
-    let (mut noise, min, max) = get_4d_noise(noise_type);
-    let dim = noise_type.get_dimensions();
-    scale_noise::<Sse41>(dim.min, dim.max, min, max, &mut noise);
-    noise
-}
+cellular!(
+    "2d",
+    cellular_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    cell_32,
+    try_transmute_sse41
+);
+cellular!(
+    "3d",
+    cellular_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    cell_32,
+    try_transmute_sse41
+);
+cellular!(
+    "2d",
+    cellular_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    cell_64,
+    try_transmute_sse41
+);
+cellular!(
+    "3d",
+    cellular_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    cell_64,
+    try_transmute_sse41
+);
+
+simplex!(
+    "1d",
+    simplex_1d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    simplex_32,
+    try_transmute_sse41
+);
+simplex!(
+    "2d",
+    simplex_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    simplex_32,
+    try_transmute_sse41
+);
+simplex!(
+    "3d",
+    simplex_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    simplex_32,
+    try_transmute_sse41
+);
+simplex!(
+    "4d",
+    simplex_4d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    simplex_32,
+    try_transmute_sse41
+);
+simplex!(
+    "2d",
+    simplex_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    simplex_64,
+    try_transmute_sse41
+);
+simplex!(
+    "3d",
+    simplex_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    simplex_64,
+    try_transmute_sse41
+);
+simplex!(
+    "4d",
+    simplex_4d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    simplex_64,
+    try_transmute_sse41
+);
+
+fbm!(
+    "1d",
+    fbm_1d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    fbm_32,
+    try_transmute_sse41
+);
+fbm!(
+    "2d",
+    fbm_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    fbm_32,
+    try_transmute_sse41
+);
+fbm!(
+    "3d",
+    fbm_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    fbm_32,
+    try_transmute_sse41
+);
+fbm!(
+    "4d",
+    fbm_4d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    fbm_32,
+    try_transmute_sse41
+);
+fbm!(
+    "1d",
+    fbm_1d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    fbm_64,
+    try_transmute_sse41
+);
+fbm!(
+    "2d",
+    fbm_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    fbm_64,
+    try_transmute_sse41
+);
+fbm!(
+    "3d",
+    fbm_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    fbm_64,
+    try_transmute_sse41
+);
+fbm!(
+    "4d",
+    fbm_4d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    fbm_64,
+    try_transmute_sse41
+);
+
+ridge!(
+    "1d",
+    ridge_1d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    ridge_32,
+    try_transmute_sse41
+);
+ridge!(
+    "2d",
+    ridge_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    ridge_32,
+    try_transmute_sse41
+);
+ridge!(
+    "3d",
+    ridge_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    ridge_32,
+    try_transmute_sse41
+);
+ridge!(
+    "4d",
+    ridge_4d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    ridge_32,
+    try_transmute_sse41
+);
+ridge!(
+    "1d",
+    ridge_1d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    ridge_64,
+    try_transmute_sse41
+);
+ridge!(
+    "2d",
+    ridge_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    ridge_64,
+    try_transmute_sse41
+);
+ridge!(
+    "3d",
+    ridge_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    ridge_64,
+    try_transmute_sse41
+);
+ridge!(
+    "4d",
+    ridge_4d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    ridge_64,
+    try_transmute_sse41
+);
+
+turbulence!(
+    "1d",
+    turbulence_1d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    turbulence_32,
+    try_transmute_sse41
+);
+turbulence!(
+    "2d",
+    turbulence_2d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    turbulence_32,
+    try_transmute_sse41
+);
+turbulence!(
+    "3d",
+    turbulence_3d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    turbulence_32,
+    try_transmute_sse41
+);
+turbulence!(
+    "4d",
+    turbulence_4d,
+    __m128,
+    SimdTransmuteF32::try_transmute_from_sse41,
+    i32,
+    turbulence_32,
+    try_transmute_sse41
+);
+turbulence!(
+    "1d",
+    turbulence_1d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    turbulence_64,
+    try_transmute_sse41
+);
+turbulence!(
+    "2d",
+    turbulence_2d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    turbulence_64,
+    try_transmute_sse41
+);
+turbulence!(
+    "3d",
+    turbulence_3d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    turbulence_64,
+    try_transmute_sse41
+);
+turbulence!(
+    "4d",
+    turbulence_4d_f64,
+    __m128d,
+    SimdTransmuteF64::try_transmute_from_sse41,
+    i64,
+    turbulence_64,
+    try_transmute_sse41
+);
+
+get_noise!(get_1d_noise, get_1d_noise, f32, noise_helpers_32);
+get_noise!(get_2d_noise, get_2d_noise, f32, noise_helpers_32);
+get_noise!(get_3d_noise, get_3d_noise, f32, noise_helpers_32);
+get_noise!(get_4d_noise, get_4d_noise, f32, noise_helpers_32);
+get_noise!(get_1d_noise, get_1d_noise_64, f64, noise_helpers_64);
+get_noise!(get_2d_noise, get_2d_noise_64, f64, noise_helpers_64);
+get_noise!(get_3d_noise, get_3d_noise_64, f64, noise_helpers_64);
+get_noise!(get_4d_noise, get_4d_noise_64, f64, noise_helpers_64);
+get_noise_scaled!(get_1d_noise, get_1d_scaled_noise, f32);
+get_noise_scaled!(get_2d_noise, get_2d_scaled_noise, f32);
+get_noise_scaled!(get_3d_noise, get_3d_scaled_noise, f32);
+get_noise_scaled!(get_4d_noise, get_4d_scaled_noise, f32);
diff --git a/src/lib.rs b/src/lib.rs
index a828876..b1aadba 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -63,7 +63,7 @@
 //!
 //! // get a block of noise with the sse41 version, using the above settings
 //! unsafe {
-//!     let (noise,min,max) = simdnoise::intrinsics::sse41::get_3d_noise(&noise_setting);
+//!     let (noise,min,max) = simdnoise::intrinsics::sse41::get_3d_noise::<simdeez::Sse41>(&noise_setting);
 //! }
 //!
 //! // send your own SIMD x,y values to the noise functions directly
@@ -72,7 +72,7 @@
 //!   let x = _mm_set1_ps(5.0);
 //!   let y = _mm_set1_ps(10.0);
 //!   let seed = 42;
-//!   let f : __m128 = simdnoise::intrinsics::sse2::simplex_2d(x,y,seed);
+//!   let f : __m128 = simdnoise::intrinsics::sse2::simplex_2d::<simdeez::Sse2>(x,y,seed);
 //!
 //!   // avx2 turbulence
 //!   let x = _mm256_set1_ps(5.0);
@@ -80,13 +80,12 @@
 //!   let lacunarity = _mm256_set1_ps(0.5);
 //!   let gain = _mm256_set1_ps(2.0);
 //!   let octaves = 3;
-//!   let f_turbulence : __m256 = simdnoise::intrinsics::avx2::turbulence_2d(x,y,lacunarity,gain,octaves,seed);
+//!   let f_turbulence : __m256 = simdnoise::intrinsics::avx2::turbulence_2d::<simdeez::Avx2>(x,y,lacunarity,gain,octaves,seed);
 //!
 //! }
 //! ```
 
 extern crate simdeez;
-
 mod dimensional_being;
 pub mod intrinsics;
 pub mod noise;
@@ -97,6 +96,9 @@ mod noise_helpers_64;
 mod noise_type;
 mod shared;
 
+use shared::get_scaled_noise;
+use simdeez::prelude::*;
+
 use dimensional_being::DimensionalBeing;
 pub use noise::cell2_return_type::Cell2ReturnType;
 pub use noise::cell_distance_function::CellDistanceFunction;
@@ -105,181 +107,53 @@ pub use noise_builder::NoiseBuilder;
 pub use noise_dimensions::NoiseDimensions;
 pub use noise_type::NoiseType;
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! get_1d_noise {
-    ($setting:expr) => {
-        if is_x86_feature_detected!("avx2") {
-            unsafe { avx2::get_1d_noise($setting) }
-        } else if is_x86_feature_detected!("sse4.1") {
-            unsafe { sse41::get_1d_noise($setting) }
-        } else if is_x86_feature_detected!("sse2") {
-            unsafe { sse2::get_1d_noise($setting) }
-        } else {
-            unsafe { scalar::get_1d_noise($setting) }
-        }
-    };
-}
-
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-macro_rules! get_1d_noise {
-    ($setting:expr) => {
-        unsafe { scalar::get_1d_noise($setting) }
-    };
-}
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! get_2d_noise {
-    ($setting:expr) => {
-        if is_x86_feature_detected!("avx2") {
-            unsafe { avx2::get_2d_noise($setting) }
-        } else if is_x86_feature_detected!("sse4.1") {
-            unsafe { sse41::get_2d_noise($setting) }
-        } else if is_x86_feature_detected!("sse2") {
-            unsafe { sse2::get_2d_noise($setting) }
-        } else {
-            unsafe { scalar::get_2d_noise($setting) }
-        }
-    };
-}
-
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-macro_rules! get_2d_noise {
-    ($setting:expr) => {
-        unsafe { scalar::get_2d_noise($setting) }
-    };
-}
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! get_3d_noise {
-    ($setting:expr) => {
-        if is_x86_feature_detected!("avx2") {
-            unsafe { avx2::get_3d_noise($setting) }
-        } else if is_x86_feature_detected!("sse4.1") {
-            unsafe { sse41::get_3d_noise($setting) }
-        } else if is_x86_feature_detected!("sse2") {
-            unsafe { sse2::get_3d_noise($setting) }
-        } else {
-            unsafe { scalar::get_3d_noise($setting) }
-        }
-    };
-}
-
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-macro_rules! get_3d_noise {
-    ($setting:expr) => {
-        unsafe { scalar::get_3d_noise($setting) }
-    };
-}
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! get_4d_noise {
-    ($setting:expr) => {
-        if is_x86_feature_detected!("avx2") {
-            unsafe { avx2::get_4d_noise($setting) }
-        } else if is_x86_feature_detected!("sse4.1") {
-            unsafe { sse41::get_4d_noise($setting) }
-        } else if is_x86_feature_detected!("sse2") {
-            unsafe { sse2::get_4d_noise($setting) }
-        } else {
-            unsafe { scalar::get_4d_noise($setting) }
-        }
-    };
-}
-
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-macro_rules! get_4d_noise {
-    ($setting:expr) => {
-        unsafe { scalar::get_4d_noise($setting) }
-    };
-}
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! get_1d_scaled_noise {
-    ($setting:expr) => {
-        if is_x86_feature_detected!("avx2") {
-            unsafe { avx2::get_1d_scaled_noise($setting) }
-        } else if is_x86_feature_detected!("sse4.1") {
-            unsafe { sse41::get_1d_scaled_noise($setting) }
-        } else if is_x86_feature_detected!("sse2") {
-            unsafe { sse2::get_1d_scaled_noise($setting) }
-        } else {
-            unsafe { scalar::get_1d_scaled_noise($setting) }
-        }
-    };
-}
+simd_runtime_generate!(
+    pub fn get_1d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
+        noise_helpers_32::get_1d_noise::<S>(noise_type)
+    }
+);
 
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-macro_rules! get_1d_scaled_noise {
-    ($setting:expr) => {
-        unsafe { scalar::get_1d_scaled_noise($setting) }
-    };
-}
+simd_runtime_generate!(
+    pub fn get_2d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
+        noise_helpers_32::get_2d_noise::<S>(noise_type)
+    }
+);
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! get_2d_scaled_noise {
-    ($setting:expr) => {
-        if is_x86_feature_detected!("avx2") {
-            unsafe { avx2::get_2d_scaled_noise($setting) }
-        } else if is_x86_feature_detected!("sse4.1") {
-            unsafe { sse41::get_2d_scaled_noise($setting) }
-        } else if is_x86_feature_detected!("sse2") {
-            unsafe { sse2::get_2d_scaled_noise($setting) }
-        } else {
-            unsafe { scalar::get_2d_scaled_noise($setting) }
-        }
-    };
-}
+simd_runtime_generate!(
+    pub fn get_3d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
+        noise_helpers_32::get_3d_noise::<S>(noise_type)
+    }
+);
 
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-macro_rules! get_2d_scaled_noise {
-    ($setting:expr) => {
-        unsafe { scalar::get_2d_scaled_noise($setting) }
-    };
-}
+simd_runtime_generate!(
+    pub fn get_4d_noise(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
+        noise_helpers_32::get_4d_noise::<S>(noise_type)
+    }
+);
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! get_3d_scaled_noise {
-    ($setting:expr) => {
-        if is_x86_feature_detected!("avx2") {
-            unsafe { avx2::get_3d_scaled_noise($setting) }
-        } else if is_x86_feature_detected!("sse4.1") {
-            unsafe { sse41::get_3d_scaled_noise($setting) }
-        } else if is_x86_feature_detected!("sse2") {
-            unsafe { sse2::get_3d_scaled_noise($setting) }
-        } else {
-            unsafe { scalar::get_3d_scaled_noise($setting) }
-        }
-    };
-}
+simd_runtime_generate!(
+    pub fn get_1d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
+        unsafe { get_scaled_noise::<S, _>(noise_type, get_1d_noise) }
+    }
+);
 
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-macro_rules! get_3d_scaled_noise {
-    ($setting:expr) => {
-        unsafe { scalar::get_3d_scaled_noise($setting) }
-    };
-}
+simd_runtime_generate!(
+    pub fn get_2d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
+        unsafe { get_scaled_noise::<S, _>(noise_type, get_2d_noise) }
+    }
+);
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-macro_rules! get_4d_scaled_noise {
-    ($setting:expr) => {
-        if is_x86_feature_detected!("avx2") {
-            unsafe { avx2::get_4d_scaled_noise($setting) }
-        } else if is_x86_feature_detected!("sse4.1") {
-            unsafe { sse41::get_4d_scaled_noise($setting) }
-        } else if is_x86_feature_detected!("sse2") {
-            unsafe { sse2::get_4d_scaled_noise($setting) }
-        } else {
-            unsafe { scalar::get_4d_scaled_noise($setting) }
-        }
-    };
-}
+simd_runtime_generate!(
+    pub fn get_3d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
+        unsafe { get_scaled_noise::<S, _>(noise_type, get_3d_noise) }
+    }
+);
 
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-macro_rules! get_4d_scaled_noise {
-    ($setting:expr) => {
-        unsafe { scalar::get_4d_scaled_noise($setting) }
-    };
-}
+simd_runtime_generate!(
+    pub fn get_4d_scaled_noise(noise_type: &NoiseType) -> Vec<f32> {
+        unsafe { get_scaled_noise::<S, _>(noise_type, get_4d_noise) }
+    }
+);
 
 mod settings;
 pub use settings::{
@@ -287,6 +161,7 @@ pub use settings::{
     SimplexSettings, TurbulenceSettings,
 };
 
+/*
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -300,7 +175,39 @@ mod tests {
 
     #[test]
     fn small_dimensions() {
-        let _ = NoiseBuilder::gradient_2d(3, 2).generate();
+        let (scalar_gradient, scalar_w, scalar_h) =
+            NoiseBuilder::gradient_2d(3, 2).generate::<simdeez::Scalar>();
+        #[cfg(target_feature = "sse2")]
+        let (sse2_gradient, sse2_w, sse2_h) =
+            NoiseBuilder::gradient_2d(3, 2).generate::<simdeez::Sse2>();
+        #[cfg(target_feature = "sse41")]
+        let (sse41_gradient, sse41_w, sse2_h) =
+            NoiseBuilder::gradient_2d(3, 2).generate::<simdeez::Sse41>();
+        #[cfg(target_feature = "avx2")]
+        let (avx2_gradient, avx2_w, sse2_h) =
+            NoiseBuilder::gradient_2d(3, 2).generate::<simdeez::Avx2>();
+        for i in 0..scalar_gradient.len() {
+            #[cfg(target_feature = "sse2.1")]
+            {
+                assert_eq!(scalar_w, sse2_w);
+                assert_eq!(scalar_h, sse2_h);
+                assert_delta!(scalar_gradient[i], sse2_gradient[i], 0.1);
+            }
+
+            #[cfg(target_feature = "sse4.1")]
+            {
+                assert_eq!(scalar_w, sse41_w);
+                assert_eq!(scalar_h, sse41_h);
+                assert_delta!(scalar_gradient[i], sse41_gradient[i], 0.1);
+            }
+
+            #[cfg(target_feature = "avx2")]
+            {
+                assert_eq!(scalar_w, avx2_w);
+                assert_eq!(scalar_h, avx2_h);
+                assert_delta!(scalar_gradient[i], avx2_gradient[i], 0.1);
+            }
+        }
     }
 
     #[test]
@@ -308,14 +215,28 @@ mod tests {
     fn consistency_4d() {
         let noise_setting = NoiseBuilder::fbm_4d(10, 10, 10, 10).wrap();
         let scalar_noise = unsafe { scalar::get_4d_scaled_noise(&noise_setting) };
-        let sse2_noise = unsafe { sse2::get_4d_scaled_noise(&noise_setting) };
-        let sse41_noise = unsafe { sse41::get_4d_scaled_noise(&noise_setting) };
-        let avx2_noise = unsafe { avx2::get_4d_scaled_noise(&noise_setting) };
+
+        #[cfg(target_feature = "sse2")]
+        let sse2_noise =
+            unsafe { sse2::get_4d_scaled_noise::<simdeez::Sse2>(&noise_setting) };
+
+        #[cfg(target_feature = "sse4.1")]
+        let sse41_noise =
+            unsafe { sse41::get_4d_scaled_noise::<simdeez::Sse41>(&noise_setting) };
+
+        #[cfg(target_feature = "avx2")]
+        let avx2_noise =
+            unsafe { avx2::get_4d_scaled_noise::<simdeez::Avx2>(&noise_setting) };
 
         for i in 0..scalar_noise.len() {
+            #[cfg(target_feature = "sse2.1")]
             assert_delta!(scalar_noise[i], sse2_noise[i], 0.1);
-            assert_delta!(sse2_noise[i], sse41_noise[i], 0.1);
-            assert_delta!(sse41_noise[i], avx2_noise[i], 0.1);
+
+            #[cfg(target_feature = "sse4.1")]
+            assert_delta!(scalar_noise[i], sse41_noise[i], 0.1);
+
+            #[cfg(target_feature = "avx2")]
+            assert_delta!(scalar_noise[i], avx2_noise[i], 0.1);
         }
     }
 
@@ -324,14 +245,28 @@ mod tests {
     fn consistency_3d() {
         let noise_setting = NoiseBuilder::fbm_3d(23, 23, 23).wrap();
         let scalar_noise = unsafe { scalar::get_3d_scaled_noise(&noise_setting) };
-        let sse2_noise = unsafe { sse2::get_3d_scaled_noise(&noise_setting) };
-        let sse41_noise = unsafe { sse41::get_3d_scaled_noise(&noise_setting) };
-        let avx2_noise = unsafe { avx2::get_3d_scaled_noise(&noise_setting) };
+
+        #[cfg(target_feature = "sse2")]
+        let sse2_noise =
+            unsafe { sse2::get_3d_scaled_noise::<simdeez::Sse2>(&noise_setting) };
+
+        #[cfg(target_feature = "sse4.1")]
+        let sse41_noise =
+            unsafe { sse41::get_3d_scaled_noise::<simdeez::Sse41>(&noise_setting) };
+
+        #[cfg(target_feature = "avx2")]
+        let avx2_noise =
+            unsafe { avx2::get_3d_scaled_noise::<simdeez::Avx2>(&noise_setting) };
 
         for i in 0..scalar_noise.len() {
+            #[cfg(target_feature = "sse2")]
             assert_delta!(scalar_noise[i], sse2_noise[i], 0.1);
-            assert_delta!(sse2_noise[i], sse41_noise[i], 0.1);
-            assert_delta!(sse41_noise[i], avx2_noise[i], 0.1);
+
+            #[cfg(target_feature = "sse4.1")]
+            assert_delta!(scalar_noise[i], sse41_noise[i], 0.1);
+
+            #[cfg(target_feature = "avx2")]
+            assert_delta!(scalar_noise[i], avx2_noise[i], 0.1);
         }
     }
 
@@ -340,14 +275,28 @@ mod tests {
     fn consistency_2d() {
         let noise_setting = NoiseBuilder::fbm_2d(233, 233).wrap();
         let scalar_noise = unsafe { scalar::get_2d_scaled_noise(&noise_setting) };
-        let sse2_noise = unsafe { sse2::get_2d_scaled_noise(&noise_setting) };
-        let sse41_noise = unsafe { sse41::get_2d_scaled_noise(&noise_setting) };
-        let avx2_noise = unsafe { avx2::get_2d_scaled_noise(&noise_setting) };
+
+        #[cfg(target_feature = "sse2")]
+        let sse2_noise =
+            unsafe { sse2::get_2d_scaled_noise::<simdeez::Sse2>(&noise_setting) };
+
+        #[cfg(target_feature = "sse4.1")]
+        let sse41_noise =
+            unsafe { sse41::get_2d_scaled_noise::<simdeez::Sse41>(&noise_setting) };
+
+        #[cfg(target_feature = "avx2")]
+        let avx2_noise =
+            unsafe { avx2::get_2d_scaled_noise::<simdeez::Avx2>(&noise_setting) };
 
         for i in 0..scalar_noise.len() {
+            #[cfg(target_feature = "sse2")]
             assert_delta!(scalar_noise[i], sse2_noise[i], 0.1);
-            assert_delta!(sse2_noise[i], sse41_noise[i], 0.1);
-            assert_delta!(sse41_noise[i], avx2_noise[i], 0.1);
+
+            #[cfg(target_feature = "sse4.1")]
+            assert_delta!(scalar_noise[i], sse41_noise[i], 0.1);
+
+            #[cfg(target_feature = "avx2")]
+            assert_delta!(scalar_noise[i], avx2_noise[i], 0.1);
         }
     }
 
@@ -356,14 +305,28 @@ mod tests {
     fn consistency_1d() {
         let noise_setting = NoiseBuilder::fbm_1d(1000).wrap();
         let scalar_noise = unsafe { scalar::get_1d_scaled_noise(&noise_setting) };
-        let sse2_noise = unsafe { sse2::get_1d_scaled_noise(&noise_setting) };
-        let sse41_noise = unsafe { sse41::get_1d_scaled_noise(&noise_setting) };
-        let avx2_noise = unsafe { avx2::get_1d_scaled_noise(&noise_setting) };
+
+        #[cfg(target_feature = "sse2")]
+        let sse2_noise =
+            unsafe { sse2::get_1d_scaled_noise::<simdeez::Sse2>(&noise_setting) };
+
+        #[cfg(target_feature = "sse41")]
+        let sse41_noise =
+            unsafe { sse41::get_1d_scaled_noise::<simdeez::Sse41>(&noise_setting) };
+
+        #[cfg(target_feature = "avx2")]
+        let avx2_noise =
+            unsafe { avx2::get_1d_scaled_noise::<simdeez::Avx2>(&noise_setting) };
 
         for i in 0..scalar_noise.len() {
+            #[cfg(target_feature = "sse2")]
             assert_delta!(scalar_noise[i], sse2_noise[i], 0.1);
-            assert_delta!(sse2_noise[i], sse41_noise[i], 0.1);
-            assert_delta!(sse41_noise[i], avx2_noise[i], 0.1);
+
+            #[cfg(target_feature = "sse4.1")]
+            assert_delta!(scalar_noise[i], sse41_noise[i], 0.1);
+
+            #[cfg(target_feature = "avx2.1")]
+            assert_delta!(scalar_noise[i], avx2_noise[i], 0.1);
         }
     }
 
@@ -372,13 +335,24 @@ mod tests {
     fn cell_consistency_2d() {
         let noise_setting = NoiseBuilder::cellular_2d(100, 100).wrap();
         let scalar = unsafe { scalar::get_2d_scaled_noise(&noise_setting) };
-        let sse2 = unsafe { sse2::get_2d_scaled_noise(&noise_setting) };
-        let sse41 = unsafe { sse41::get_2d_scaled_noise(&noise_setting) };
-        let avx2 = unsafe { avx2::get_2d_scaled_noise(&noise_setting) };
+
+        #[cfg(target_feature = "sse2")]
+        let sse2 = unsafe { sse2::get_2d_scaled_noise::<simdeez::Sse2>(&noise_setting) };
+
+        #[cfg(target_feature = "sse4.1")]
+        let sse41 = unsafe { sse41::get_2d_scaled_noise::<simdeez::Sse41>(&noise_setting) };
+
+        #[cfg(target_feature = "avx2")]
+        let avx2 = unsafe { avx2::get_2d_scaled_noise::<simdeez::Avx2>(&noise_setting) };
         for i in 0..scalar.len() {
+            #[cfg(target_feature = "sse2")]
             assert_delta!(scalar[i], sse2[i], 0.1);
-            assert_delta!(sse2[i], sse41[i], 0.1);
-            assert_delta!(sse41[i], avx2[i], 0.1);
+
+            #[cfg(target_feature = "sse4.1")]
+            assert_delta!(scalar[i], sse41[i], 0.1);
+
+            #[cfg(target_feature = "avx2")]
+            assert_delta!(scalar[i], avx2[i], 0.1);
         }
     }
 
@@ -387,13 +361,24 @@ mod tests {
     fn cell_consistency_3d() {
         let noise_setting = NoiseBuilder::cellular2_3d(32, 32, 32).wrap();
         let scalar = unsafe { scalar::get_3d_scaled_noise(&noise_setting) };
-        let sse2 = unsafe { sse2::get_3d_scaled_noise(&noise_setting) };
-        let sse41 = unsafe { sse41::get_3d_scaled_noise(&noise_setting) };
-        let avx2 = unsafe { avx2::get_3d_scaled_noise(&noise_setting) };
+        #[cfg(target_feature = "sse2")]
+        let sse2 = unsafe { sse2::get_3d_scaled_noise::<simdeez::Sse2>(&noise_setting) };
+
+        #[cfg(target_feature = "sse4.1")]
+        let sse41 = unsafe { sse41::get_3d_scaled_noise::<simdeez::Sse41>(&noise_setting) };
+
+        #[cfg(target_feature = "avx2")]
+        let avx2 = unsafe { avx2::get_3d_scaled_noise::<simdeez::Avx2>(&noise_setting) };
+
         for i in 0..scalar.len() {
-            //            assert_delta!(scalar[i], sse2[i], 0.1);
-            assert_delta!(sse2[i], sse41[i], 0.1);
-            assert_delta!(sse41[i], avx2[i], 0.1);
+            //#[cfg(target_feature = "sse2")]
+            //assert_delta!(scalar[i], sse2[i], 0.1);
+            #[cfg(target_feature = "sse4.1")]
+            assert_delta!(scalar[i], sse41[i], 0.1);
+
+            #[cfg(target_feature = "avx2")]
+            assert_delta!(scalar[i], avx2[i], 0.1);
         }
     }
 }
+*/
diff --git a/src/noise/cell2_32.rs b/src/noise/cell2_32.rs
index 4956d1d..427c6c3 100644
--- a/src/noise/cell2_32.rs
+++ b/src/noise/cell2_32.rs
@@ -1,10 +1,10 @@
 use super::cellular_32::{hash_2d, hash_3d, BIT_10_MASK_32, X_PRIME_32, Y_PRIME_32, Z_PRIME_32};
 use crate::{Cell2ReturnType, CellDistanceFunction};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn cellular2_2d<S: Simd>(
+pub fn cellular2_2d<S: Simd>(
     x: S::Vf32,
     y: S::Vf32,
     distance_function: CellDistanceFunction,
@@ -14,73 +14,62 @@ pub unsafe fn cellular2_2d<S: Simd>(
     index1: usize,
     seed: i32,
 ) -> S::Vf32 {
-    let mut distance: [S::Vf32; 4] = [S::set1_ps(999999.0); 4];
+    let mut distance: [S::Vf32; 4] = [S::Vf32::set1(999999.0); 4];
 
-    let mut xc = S::sub_epi32(S::cvtps_epi32(x), S::set1_epi32(1));
-    let mut yc_base = S::sub_epi32(S::cvtps_epi32(y), S::set1_epi32(1));
+    let mut xc = x.cast_i32() - S::Vi32::set1(1);
+    let mut yc_base = y.cast_i32() - S::Vi32::set1(1);
 
-    let mut xcf = S::sub_ps(S::cvtepi32_ps(xc), x);
-    let ycf_base = S::sub_ps(S::cvtepi32_ps(yc_base), y);
+    let mut xcf = xc.cast_f32() - x;
+    let ycf_base = yc_base.cast_f32() - y;
 
-    xc = S::mullo_epi32(xc, S::set1_epi32(X_PRIME_32));
-    yc_base = S::mullo_epi32(yc_base, S::set1_epi32(Y_PRIME_32));
+    xc = xc * S::Vi32::set1(X_PRIME_32);
+    yc_base = yc_base * S::Vi32::set1(Y_PRIME_32);
 
     for _x in 0..3 {
         let mut ycf = ycf_base;
         let mut yc = yc_base;
         for _y in 0..3 {
             let hash = hash_2d::<S>(seed, xc, yc);
-            let mut xd = S::sub_ps(
-                S::cvtepi32_ps(S::and_epi32(hash, S::set1_epi32(BIT_10_MASK_32))),
-                S::set1_ps(511.5),
-            );
-            let mut yd = S::sub_ps(
-                S::cvtepi32_ps(S::and_epi32(
-                    S::srai_epi32(hash, 10),
-                    S::set1_epi32(BIT_10_MASK_32),
-                )),
-                S::set1_ps(511.5),
-            );
-            let inv_mag = S::mul_ps(
-                jitter,
-                S::rsqrt_ps(S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd))),
-            );
-            xd = S::add_ps(S::mul_ps(xd, inv_mag), xcf);
-            yd = S::add_ps(S::mul_ps(yd, inv_mag), ycf);
+            let mut xd = (hash & S::Vi32::set1(BIT_10_MASK_32)).cast_f32() - S::Vf32::set1(511.5);
+            let mut yd =
+                ((hash >> 10) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32() - S::Vf32::set1(511.5);
+            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+            xd = (xd * inv_mag) + xcf;
+            yd = (yd * inv_mag) + ycf;
 
             let new_distance = match distance_function {
-                CellDistanceFunction::Euclidean => S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd)),
-                CellDistanceFunction::Manhattan => S::add_ps(S::abs_ps(xd), S::abs_ps(yd)),
+                CellDistanceFunction::Euclidean => (xd * xd) + (yd * yd),
+                CellDistanceFunction::Manhattan => xd.abs() + yd.abs(),
                 CellDistanceFunction::Natural => {
-                    let euc = S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd));
-                    let man = S::add_ps(S::abs_ps(xd), S::abs_ps(yd));
-                    S::add_ps(euc, man)
+                    let euc = (xd * xd) + (yd * yd);
+                    let man = xd.abs() + yd.abs();
+                    euc + man
                 }
             };
             let mut i = index1;
             while i > 0 {
-                distance[i] = S::max_ps(S::min_ps(distance[i], new_distance), distance[i - 1]);
-                distance[0] = S::min_ps(distance[0], new_distance);
+                distance[i] = distance[i].min(new_distance).max(distance[i - 1]);
+                distance[0] = distance[0].min(new_distance);
                 i -= 1;
             }
-            ycf = S::add_ps(ycf, S::set1_ps(1.0));
-            yc = S::add_epi32(yc, S::set1_epi32(Y_PRIME_32));
+            ycf = ycf + S::Vf32::set1(1.0);
+            yc = yc + S::Vi32::set1(Y_PRIME_32);
         }
-        xcf = S::add_ps(xcf, S::set1_ps(1.0));
-        xc = S::add_epi32(xc, S::set1_epi32(X_PRIME_32));
+        xcf = xcf + S::Vf32::set1(1.0);
+        xc = xc + S::Vi32::set1(X_PRIME_32);
     }
 
     match return_type {
         Cell2ReturnType::Distance2 => distance[index1],
-        Cell2ReturnType::Distance2Add => S::add_ps(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Sub => S::sub_ps(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Mul => S::mul_ps(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Div => S::div_ps(distance[index0], distance[index1]),
+        Cell2ReturnType::Distance2Add => distance[index0] + distance[index1],
+        Cell2ReturnType::Distance2Sub => distance[index0] - distance[index1],
+        Cell2ReturnType::Distance2Mul => distance[index0] * distance[index1],
+        Cell2ReturnType::Distance2Div => distance[index0] / distance[index1],
     }
 }
 
 #[inline(always)]
-pub unsafe fn cellular2_3d<S: Simd>(
+pub fn cellular2_3d<S: Simd>(
     x: S::Vf32,
     y: S::Vf32,
     z: S::Vf32,
@@ -91,19 +80,19 @@ pub unsafe fn cellular2_3d<S: Simd>(
     index1: usize,
     seed: i32,
 ) -> S::Vf32 {
-    let mut distance: [S::Vf32; 4] = [S::set1_ps(999999.0); 4];
+    let mut distance: [S::Vf32; 4] = [S::Vf32::set1(999999.0); 4];
 
-    let mut xc = S::sub_epi32(S::cvtps_epi32(x), S::set1_epi32(1));
-    let mut yc_base = S::sub_epi32(S::cvtps_epi32(y), S::set1_epi32(1));
-    let mut zc_base = S::sub_epi32(S::cvtps_epi32(z), S::set1_epi32(1));
+    let mut xc = x.cast_i32() - S::Vi32::set1(1);
+    let mut yc_base = y.cast_i32() - S::Vi32::set1(1);
+    let mut zc_base = z.cast_i32() - S::Vi32::set1(1);
 
-    let mut xcf = S::sub_ps(S::cvtepi32_ps(xc), x);
-    let ycf_base = S::sub_ps(S::cvtepi32_ps(yc_base), y);
-    let zcf_base = S::sub_ps(S::cvtepi32_ps(zc_base), z);
+    let mut xcf = xc.cast_f32() - x;
+    let ycf_base = yc_base.cast_f32() - y;
+    let zcf_base = zc_base.cast_f32() - z;
 
-    xc = S::mullo_epi32(xc, S::set1_epi32(X_PRIME_32));
-    yc_base = S::mullo_epi32(yc_base, S::set1_epi32(Y_PRIME_32));
-    zc_base = S::mullo_epi32(zc_base, S::set1_epi32(Z_PRIME_32));
+    xc = xc * S::Vi32::set1(X_PRIME_32);
+    yc_base = yc_base * S::Vi32::set1(Y_PRIME_32);
+    zc_base = zc_base * S::Vi32::set1(Z_PRIME_32);
 
     for _x in 0..3 {
         let mut ycf = ycf_base;
@@ -113,73 +102,47 @@ pub unsafe fn cellular2_3d<S: Simd>(
             let mut zc = zc_base;
             for _z in 0..3 {
                 let hash = hash_3d::<S>(seed, xc, yc, zc);
-                let mut xd = S::sub_ps(
-                    S::cvtepi32_ps(S::and_epi32(hash, S::set1_epi32(BIT_10_MASK_32))),
-                    S::set1_ps(511.5),
-                );
-                let mut yd = S::sub_ps(
-                    S::cvtepi32_ps(S::and_epi32(
-                        S::srai_epi32(hash, 10),
-                        S::set1_epi32(BIT_10_MASK_32),
-                    )),
-                    S::set1_ps(511.5),
-                );
-                let mut zd = S::sub_ps(
-                    S::cvtepi32_ps(S::and_epi32(
-                        S::srai_epi32(hash, 20),
-                        S::set1_epi32(BIT_10_MASK_32),
-                    )),
-                    S::set1_ps(511.5),
-                );
-                let inv_mag = S::mul_ps(
-                    jitter,
-                    S::rsqrt_ps(S::add_ps(
-                        S::mul_ps(xd, xd),
-                        S::add_ps(S::mul_ps(yd, yd), S::mul_ps(zd, zd)),
-                    )),
-                );
-                xd = S::add_ps(S::mul_ps(xd, inv_mag), xcf);
-                yd = S::add_ps(S::mul_ps(yd, inv_mag), ycf);
-                zd = S::add_ps(S::mul_ps(zd, inv_mag), zcf);
+                let mut xd =
+                    (hash & S::Vi32::set1(BIT_10_MASK_32)).cast_f32() - S::Vf32::set1(511.5);
+                let mut yd = ((hash >> 10) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                    - S::Vf32::set1(511.5);
+                let mut zd = ((hash >> 20) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                    - S::Vf32::set1(511.5);
+                let inv_mag = jitter * ((xd * xd) + ((yd * yd) + (zd * zd))).rsqrt();
+                xd = (xd * inv_mag) + xcf;
+                yd = (yd * inv_mag) + ycf;
+                zd = (zd * inv_mag) + zcf;
 
                 let new_distance = match distance_function {
-                    CellDistanceFunction::Euclidean => S::add_ps(
-                        S::mul_ps(xd, xd),
-                        S::add_ps(S::mul_ps(yd, yd), S::mul_ps(zd, zd)),
-                    ),
-                    CellDistanceFunction::Manhattan => {
-                        S::add_ps(S::add_ps(S::abs_ps(xd), S::abs_ps(yd)), S::abs_ps(zd))
-                    }
+                    CellDistanceFunction::Euclidean => (xd * xd) + ((yd * yd) + (zd * zd)),
+                    CellDistanceFunction::Manhattan => (xd.abs() + yd.abs()) + zd.abs(),
                     CellDistanceFunction::Natural => {
-                        let euc = S::add_ps(
-                            S::mul_ps(xd, xd),
-                            S::add_ps(S::mul_ps(yd, yd), S::mul_ps(zd, zd)),
-                        );
-                        let man = S::add_ps(S::add_ps(S::abs_ps(xd), S::abs_ps(yd)), S::abs_ps(zd));
-                        S::add_ps(euc, man)
+                        let euc = (xd * xd) + ((yd * yd) + (zd * zd));
+                        let man = (xd.abs() + yd.abs()) + zd.abs();
+                        euc + man
                     }
                 };
                 let mut i = index1;
                 while i > 0 {
-                    distance[i] = S::max_ps(S::min_ps(distance[i], new_distance), distance[i - 1]);
-                    distance[0] = S::min_ps(distance[0], new_distance);
+                    distance[i] = distance[i].min(new_distance).max(distance[i - 1]);
+                    distance[0] = distance[0].min(new_distance);
                     i -= 1;
                 }
-                zcf = S::add_ps(ycf, S::set1_ps(1.0));
-                zc = S::add_epi32(yc, S::set1_epi32(Z_PRIME_32));
+                zcf = ycf + S::Vf32::set1(1.0);
+                zc = yc + S::Vi32::set1(Z_PRIME_32);
             }
-            ycf = S::add_ps(ycf, S::set1_ps(1.0));
-            yc = S::add_epi32(yc, S::set1_epi32(Y_PRIME_32));
+            ycf = ycf + S::Vf32::set1(1.0);
+            yc = yc + S::Vi32::set1(Y_PRIME_32);
         }
-        xcf = S::add_ps(xcf, S::set1_ps(1.0));
-        xc = S::add_epi32(xc, S::set1_epi32(X_PRIME_32));
+        xcf = xcf + S::Vf32::set1(1.0);
+        xc = xc + S::Vi32::set1(X_PRIME_32);
     }
 
     match return_type {
         Cell2ReturnType::Distance2 => distance[index1],
-        Cell2ReturnType::Distance2Add => S::add_ps(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Sub => S::sub_ps(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Mul => S::mul_ps(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Div => S::div_ps(distance[index0], distance[index1]),
+        Cell2ReturnType::Distance2Add => distance[index0] + distance[index1],
+        Cell2ReturnType::Distance2Sub => distance[index0] - distance[index1],
+        Cell2ReturnType::Distance2Mul => distance[index0] * distance[index1],
+        Cell2ReturnType::Distance2Div => distance[index0] / distance[index1],
     }
 }
diff --git a/src/noise/cell2_64.rs b/src/noise/cell2_64.rs
index 2c6fe08..863c1c2 100644
--- a/src/noise/cell2_64.rs
+++ b/src/noise/cell2_64.rs
@@ -2,10 +2,10 @@ use super::cellular_32::{BIT_10_MASK_64, X_PRIME_64, Y_PRIME_64, Z_PRIME_64};
 use super::cellular_64::{hash_2d, hash_3d};
 use crate::{Cell2ReturnType, CellDistanceFunction};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn cellular2_2d<S: Simd>(
+pub fn cellular2_2d<S: Simd>(
     x: S::Vf64,
     y: S::Vf64,
     distance_function: CellDistanceFunction,
@@ -15,73 +15,62 @@ pub unsafe fn cellular2_2d<S: Simd>(
     index1: usize,
     seed: i64,
 ) -> S::Vf64 {
-    let mut distance: [S::Vf64; 4] = [S::set1_pd(999999.0); 4];
+    let mut distance: [S::Vf64; 4] = [S::Vf64::set1(999999.0); 4];
 
-    let mut xc = S::sub_epi64(S::cvtpd_epi64(x), S::set1_epi64(1));
-    let mut yc_base = S::sub_epi64(S::cvtpd_epi64(y), S::set1_epi64(1));
+    let mut xc = x.cast_i64() - S::Vi64::set1(1);
+    let mut yc_base = y.cast_i64() - S::Vi64::set1(1);
 
-    let mut xcf = S::sub_pd(S::cvtepi64_pd(xc), x);
-    let ycf_base = S::sub_pd(S::cvtepi64_pd(yc_base), y);
+    let mut xcf = xc.cast_f64() - x;
+    let ycf_base = yc_base.cast_f64() - y;
 
-    xc = S::mullo_epi64(xc, S::set1_epi64(X_PRIME_64));
-    yc_base = S::mullo_epi64(yc_base, S::set1_epi64(Y_PRIME_64));
+    xc = xc * S::Vi64::set1(X_PRIME_64);
+    yc_base = yc_base * S::Vi64::set1(Y_PRIME_64);
 
     for _x in 0..3 {
         let mut ycf = ycf_base;
         let mut yc = yc_base;
         for _y in 0..3 {
             let hash = hash_2d::<S>(seed, xc, yc);
-            let mut xd = S::sub_pd(
-                S::cvtepi64_pd(S::and_epi64(hash, S::set1_epi64(BIT_10_MASK_64))),
-                S::set1_pd(511.5),
-            );
-            let mut yd = S::sub_pd(
-                S::cvtepi64_pd(S::and_epi64(
-                    S::srai_epi64(hash, 10),
-                    S::set1_epi64(BIT_10_MASK_64),
-                )),
-                S::set1_pd(511.5),
-            );
-            let inv_mag = S::mul_pd(
-                jitter,
-                S::rsqrt_pd(S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd))),
-            );
-            xd = S::add_pd(S::mul_pd(xd, inv_mag), xcf);
-            yd = S::add_pd(S::mul_pd(yd, inv_mag), ycf);
+            let mut xd = (hash & S::Vi64::set1(BIT_10_MASK_64)).cast_f64() - S::Vf64::set1(511.5);
+            let mut yd =
+                ((hash >> 10) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64() - S::Vf64::set1(511.5);
+            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+            xd = (xd * inv_mag) + xcf;
+            yd = (yd * inv_mag) + ycf;
 
             let new_distance = match distance_function {
-                CellDistanceFunction::Euclidean => S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd)),
-                CellDistanceFunction::Manhattan => S::add_pd(S::abs_pd(xd), S::abs_pd(yd)),
+                CellDistanceFunction::Euclidean => (xd * xd) + (yd * yd),
+                CellDistanceFunction::Manhattan => xd.abs() + yd.abs(),
                 CellDistanceFunction::Natural => {
-                    let euc = S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd));
-                    let man = S::add_pd(S::abs_pd(xd), S::abs_pd(yd));
-                    S::add_pd(euc, man)
+                    let euc = (xd * xd) + (yd * yd);
+                    let man = xd.abs() + yd.abs();
+                    euc + man
                 }
             };
             let mut i = index1;
             while i > 0 {
-                distance[i] = S::max_pd(S::min_pd(distance[i], new_distance), distance[i - 1]);
-                distance[0] = S::min_pd(distance[0], new_distance);
+                distance[i] = distance[i].min(new_distance).max(distance[i - 1]);
+                distance[0] = distance[0].min(new_distance);
                 i -= 1;
             }
-            ycf = S::add_pd(ycf, S::set1_pd(1.0));
-            yc = S::add_epi64(yc, S::set1_epi64(Y_PRIME_64));
+            ycf = ycf + S::Vf64::set1(1.0);
+            yc = yc + S::Vi64::set1(Y_PRIME_64);
         }
-        xcf = S::add_pd(xcf, S::set1_pd(1.0));
-        xc = S::add_epi64(xc, S::set1_epi64(X_PRIME_64));
+        xcf = xcf + S::Vf64::set1(1.0);
+        xc = xc + S::Vi64::set1(X_PRIME_64);
     }
 
     match return_type {
         Cell2ReturnType::Distance2 => distance[index1],
-        Cell2ReturnType::Distance2Add => S::add_pd(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Sub => S::sub_pd(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Mul => S::mul_pd(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Div => S::div_pd(distance[index0], distance[index1]),
+        Cell2ReturnType::Distance2Add => distance[index0] + distance[index1],
+        Cell2ReturnType::Distance2Sub => distance[index0] - distance[index1],
+        Cell2ReturnType::Distance2Mul => distance[index0] * distance[index1],
+        Cell2ReturnType::Distance2Div => distance[index0] / distance[index1],
     }
 }
 
 #[inline(always)]
-pub unsafe fn cellular2_3d<S: Simd>(
+pub fn cellular2_3d<S: Simd>(
     x: S::Vf64,
     y: S::Vf64,
     z: S::Vf64,
@@ -92,19 +81,19 @@ pub unsafe fn cellular2_3d<S: Simd>(
     index1: usize,
     seed: i64,
 ) -> S::Vf64 {
-    let mut distance: [S::Vf64; 4] = [S::set1_pd(999999.0); 4];
+    let mut distance: [S::Vf64; 4] = [S::Vf64::set1(999999.0); 4];
 
-    let mut xc = S::sub_epi64(S::cvtpd_epi64(x), S::set1_epi64(1));
-    let mut yc_base = S::sub_epi64(S::cvtpd_epi64(y), S::set1_epi64(1));
-    let mut zc_base = S::sub_epi64(S::cvtpd_epi64(z), S::set1_epi64(1));
+    let mut xc = x.cast_i64() - S::Vi64::set1(1);
+    let mut yc_base = y.cast_i64() - S::Vi64::set1(1);
+    let mut zc_base = z.cast_i64() - S::Vi64::set1(1);
 
-    let mut xcf = S::sub_pd(S::cvtepi64_pd(xc), x);
-    let ycf_base = S::sub_pd(S::cvtepi64_pd(yc_base), y);
-    let zcf_base = S::sub_pd(S::cvtepi64_pd(zc_base), z);
+    let mut xcf = xc.cast_f64() - x;
+    let ycf_base = yc_base.cast_f64() - y;
+    let zcf_base = zc_base.cast_f64() - z;
 
-    xc = S::mullo_epi64(xc, S::set1_epi64(X_PRIME_64));
-    yc_base = S::mullo_epi64(yc_base, S::set1_epi64(Y_PRIME_64));
-    zc_base = S::mullo_epi64(zc_base, S::set1_epi64(Z_PRIME_64));
+    xc = xc * S::Vi64::set1(X_PRIME_64);
+    yc_base = yc_base * S::Vi64::set1(Y_PRIME_64);
+    zc_base = zc_base * S::Vi64::set1(Z_PRIME_64);
 
     for _x in 0..3 {
         let mut ycf = ycf_base;
@@ -114,73 +103,47 @@ pub unsafe fn cellular2_3d<S: Simd>(
             let mut zc = zc_base;
             for _z in 0..3 {
                 let hash = hash_3d::<S>(seed, xc, yc, zc);
-                let mut xd = S::sub_pd(
-                    S::cvtepi64_pd(S::and_epi64(hash, S::set1_epi64(BIT_10_MASK_64))),
-                    S::set1_pd(511.5),
-                );
-                let mut yd = S::sub_pd(
-                    S::cvtepi64_pd(S::and_epi64(
-                        S::srai_epi64(hash, 10),
-                        S::set1_epi64(BIT_10_MASK_64),
-                    )),
-                    S::set1_pd(511.5),
-                );
-                let mut zd = S::sub_pd(
-                    S::cvtepi64_pd(S::and_epi64(
-                        S::srai_epi64(hash, 20),
-                        S::set1_epi64(BIT_10_MASK_64),
-                    )),
-                    S::set1_pd(511.5),
-                );
-                let inv_mag = S::mul_pd(
-                    jitter,
-                    S::rsqrt_pd(S::add_pd(
-                        S::mul_pd(xd, xd),
-                        S::add_pd(S::mul_pd(yd, yd), S::mul_pd(zd, zd)),
-                    )),
-                );
-                xd = S::add_pd(S::mul_pd(xd, inv_mag), xcf);
-                yd = S::add_pd(S::mul_pd(yd, inv_mag), ycf);
-                zd = S::add_pd(S::mul_pd(zd, inv_mag), zcf);
+                let mut xd =
+                    (hash & S::Vi64::set1(BIT_10_MASK_64)).cast_f64() - S::Vf64::set1(511.5);
+                let mut yd = ((hash >> 10) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                    - S::Vf64::set1(511.5);
+                let mut zd = ((hash >> 20) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                    - S::Vf64::set1(511.5);
+                let inv_mag = jitter * ((xd * xd) + ((yd * yd) + (zd * zd))).rsqrt();
+                xd = (xd * inv_mag) + xcf;
+                yd = (yd * inv_mag) + ycf;
+                zd = (zd * inv_mag) + zcf;
 
                 let new_distance = match distance_function {
-                    CellDistanceFunction::Euclidean => S::add_pd(
-                        S::mul_pd(xd, xd),
-                        S::add_pd(S::mul_pd(yd, yd), S::mul_pd(zd, zd)),
-                    ),
-                    CellDistanceFunction::Manhattan => {
-                        S::add_pd(S::add_pd(S::abs_pd(xd), S::abs_pd(yd)), S::abs_pd(zd))
-                    }
+                    CellDistanceFunction::Euclidean => (xd * xd) + ((yd * yd) + (zd * zd)),
+                    CellDistanceFunction::Manhattan => xd.abs() + yd.abs() + zd.abs(),
                     CellDistanceFunction::Natural => {
-                        let euc = S::add_pd(
-                            S::mul_pd(xd, xd),
-                            S::add_pd(S::mul_pd(yd, yd), S::mul_pd(zd, zd)),
-                        );
-                        let man = S::add_pd(S::add_pd(S::abs_pd(xd), S::abs_pd(yd)), S::abs_pd(zd));
-                        S::add_pd(euc, man)
+                        let euc = (xd * xd) + ((yd * yd) + (zd * zd));
+                        let man = (xd.abs() + yd.abs()) + zd.abs();
+                        euc + man
                     }
                 };
                 let mut i = index1;
                 while i > 0 {
-                    distance[i] = S::max_pd(S::min_pd(distance[i], new_distance), distance[i - 1]);
-                    distance[0] = S::min_pd(distance[0], new_distance);
+                    distance[i] = distance[i].min(new_distance).max(distance[i - 1]);
+                    distance[0] = distance[0].min(new_distance);
                     i -= 1;
                 }
-                zcf = S::add_pd(ycf, S::set1_pd(1.0));
-                zc = S::add_epi64(yc, S::set1_epi64(Z_PRIME_64));
+                zcf = ycf + S::Vf64::set1(1.0);
+                zc = yc + S::Vi64::set1(Z_PRIME_64);
             }
-            ycf = S::add_pd(ycf, S::set1_pd(1.0));
-            yc = S::add_epi64(yc, S::set1_epi64(Y_PRIME_64));
+            ycf = ycf + S::Vf64::set1(1.0);
+            yc = yc + S::Vi64::set1(Y_PRIME_64);
         }
-        xcf = S::add_pd(xcf, S::set1_pd(1.0));
-        xc = S::add_epi64(xc, S::set1_epi64(X_PRIME_64));
+        xcf = xcf + S::Vf64::set1(1.0);
+        xc = xc + S::Vi64::set1(X_PRIME_64);
     }
 
     match return_type {
         Cell2ReturnType::Distance2 => distance[index1],
-        Cell2ReturnType::Distance2Add => S::add_pd(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Sub => S::sub_pd(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Mul => S::mul_pd(distance[index0], distance[index1]),
-        Cell2ReturnType::Distance2Div => S::div_pd(distance[index0], distance[index1]),
+        Cell2ReturnType::Distance2Add => distance[index0] + distance[index1],
+        Cell2ReturnType::Distance2Sub => distance[index0] - distance[index1],
+        Cell2ReturnType::Distance2Mul => distance[index0] * distance[index1],
+        Cell2ReturnType::Distance2Div => distance[index0] / distance[index1],
     }
 }
diff --git a/src/noise/cell_32.rs b/src/noise/cell_32.rs
index f8235d3..460a3be 100644
--- a/src/noise/cell_32.rs
+++ b/src/noise/cell_32.rs
@@ -3,10 +3,10 @@ use super::cellular_32::{
 };
 use crate::{CellDistanceFunction, CellReturnType};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn cellular_2d<S: Simd>(
+pub fn cellular_2d<S: Simd>(
     x: S::Vf32,
     y: S::Vf32,
     distance_function: CellDistanceFunction,
@@ -14,15 +14,15 @@ pub unsafe fn cellular_2d<S: Simd>(
     jitter: S::Vf32,
     seed: i32,
 ) -> S::Vf32 {
-    let mut distance = S::set1_ps(999999.0);
-    let mut xc = S::sub_epi32(S::cvtps_epi32(x), S::set1_epi32(1));
-    let mut yc_base = S::sub_epi32(S::cvtps_epi32(y), S::set1_epi32(1));
+    let mut distance = S::Vf32::set1(999999.0);
+    let mut xc = x.cast_i32() - S::Vi32::set1(1);
+    let mut yc_base = y.cast_i32() - S::Vi32::set1(1);
 
-    let mut xcf = S::sub_ps(S::cvtepi32_ps(xc), x);
-    let ycf_base = S::sub_ps(S::cvtepi32_ps(yc_base), y);
+    let mut xcf = xc.cast_f32() - x;
+    let ycf_base = yc_base.cast_f32() - y;
 
-    xc = S::mullo_epi32(xc, S::set1_epi32(X_PRIME_32));
-    yc_base = S::mullo_epi32(yc_base, S::set1_epi32(Y_PRIME_32));
+    xc = xc * S::Vi32::set1(X_PRIME_32);
+    yc_base = yc_base * S::Vi32::set1(Y_PRIME_32);
     match return_type {
         CellReturnType::Distance => {
             match distance_function {
@@ -32,31 +32,23 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(hash, S::set1_epi32(BIT_10_MASK_32))),
-                                S::set1_ps(511.5),
-                            );
-                            let mut yd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(
-                                    S::srai_epi32(hash, 10),
-                                    S::set1_epi32(BIT_10_MASK_32),
-                                )),
-                                S::set1_ps(511.5),
-                            );
-                            let mut xd2 = S::mul_ps(xd, xd);
-                            let inv_mag =
-                                S::mul_ps(jitter, S::rsqrt_ps(S::add_ps(xd2, S::mul_ps(yd, yd))));
-                            xd = S::add_ps(S::mul_ps(xd, inv_mag), xcf);
-                            yd = S::add_ps(S::mul_ps(yd, inv_mag), ycf);
-                            xd2 = S::mul_ps(xd, xd);
-                            let new_distance = S::add_ps(xd2, S::mul_ps(yd, yd));
-                            distance = S::min_ps(new_distance, distance);
+                            let mut xd = (hash & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let mut xd2 = xd * xd;
+                            let inv_mag = jitter * (xd2 + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
+                            xd2 = xd * xd;
+                            let new_distance = xd2 + (yd * yd);
+                            distance = new_distance.min(distance);
 
-                            ycf = S::add_ps(ycf, S::set1_ps(1.0));
-                            yc = S::add_epi32(yc, S::set1_epi32(Y_PRIME_32));
+                            ycf = ycf + S::Vf32::set1(1.0);
+                            yc = yc + S::Vi32::set1(Y_PRIME_32);
                         }
-                        xcf = S::add_ps(xcf, S::set1_ps(1.0));
-                        xc = S::add_epi32(xc, S::set1_epi32(X_PRIME_32));
+                        xcf = xcf + S::Vf32::set1(1.0);
+                        xc = xc + S::Vi32::set1(X_PRIME_32);
                     }
                 }
                 CellDistanceFunction::Manhattan => {
@@ -65,32 +57,22 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(hash, S::set1_epi32(BIT_10_MASK_32))),
-                                S::set1_ps(511.5),
-                            );
-                            let mut yd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(
-                                    S::srai_epi32(hash, 10),
-                                    S::set1_epi32(BIT_10_MASK_32),
-                                )),
-                                S::set1_ps(511.5),
-                            );
-                            let inv_mag = S::mul_ps(
-                                jitter,
-                                S::rsqrt_ps(S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd))),
-                            );
-                            xd = S::add_ps(S::mul_ps(xd, inv_mag), xcf);
-                            yd = S::add_ps(S::mul_ps(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
-                            let new_distance = S::add_ps(S::abs_ps(xd), S::abs_ps(yd));
-                            distance = S::min_ps(new_distance, distance);
+                            let new_distance = xd.abs() + yd.abs();
+                            distance = new_distance.min(distance);
 
-                            ycf = S::add_ps(ycf, S::set1_ps(1.0));
-                            yc = S::add_epi32(yc, S::set1_epi32(Y_PRIME_32));
+                            ycf = ycf + S::Vf32::set1(1.0);
+                            yc = yc + S::Vi32::set1(Y_PRIME_32);
                         }
-                        xcf = S::add_ps(xcf, S::set1_ps(1.0));
-                        xc = S::add_epi32(xc, S::set1_epi32(X_PRIME_32));
+                        xcf = xcf + S::Vf32::set1(1.0);
+                        xc = xc + S::Vi32::set1(X_PRIME_32);
                     }
                 }
                 CellDistanceFunction::Natural => {
@@ -99,43 +81,33 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(hash, S::set1_epi32(BIT_10_MASK_32))),
-                                S::set1_ps(511.5),
-                            );
-                            let mut yd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(
-                                    S::srai_epi32(hash, 10),
-                                    S::set1_epi32(BIT_10_MASK_32),
-                                )),
-                                S::set1_ps(511.5),
-                            );
-                            let inv_mag = S::mul_ps(
-                                jitter,
-                                S::rsqrt_ps(S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd))),
-                            );
-                            xd = S::add_ps(S::mul_ps(xd, inv_mag), xcf);
-                            yd = S::add_ps(S::mul_ps(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
                             let new_distance = {
-                                let euc = S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd));
-                                let man = S::add_ps(S::abs_ps(xd), S::abs_ps(yd));
-                                S::add_ps(euc, man)
+                                let euc = (xd * xd) + (yd * yd);
+                                let man = xd.abs() + yd.abs();
+                                euc + man
                             };
-                            distance = S::min_ps(new_distance, distance);
+                            distance = new_distance.min(distance);
 
-                            ycf = S::add_ps(ycf, S::set1_ps(1.0));
-                            yc = S::add_epi32(yc, S::set1_epi32(Y_PRIME_32));
+                            ycf = ycf + S::Vf32::set1(1.0);
+                            yc = yc + S::Vi32::set1(Y_PRIME_32);
                         }
-                        xcf = S::add_ps(xcf, S::set1_ps(1.0));
-                        xc = S::add_epi32(xc, S::set1_epi32(X_PRIME_32));
+                        xcf = xcf + S::Vf32::set1(1.0);
+                        xc = xc + S::Vi32::set1(X_PRIME_32);
                     }
                 }
             }
             distance
         }
         CellReturnType::CellValue => {
-            let mut cell_value = S::setzero_ps();
+            let mut cell_value = S::Vf32::zeroes();
             match distance_function {
                 CellDistanceFunction::Euclidean => {
                     for _x in 0..3 {
@@ -143,36 +115,25 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(hash, S::set1_epi32(BIT_10_MASK_32))),
-                                S::set1_ps(511.5),
-                            );
-                            let mut yd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(
-                                    S::srai_epi32(hash, 10),
-                                    S::set1_epi32(BIT_10_MASK_32),
-                                )),
-                                S::set1_ps(511.5),
-                            );
-                            let inv_mag = S::mul_ps(
-                                jitter,
-                                S::rsqrt_ps(S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd))),
-                            );
-                            xd = S::add_ps(S::mul_ps(xd, inv_mag), xcf);
-                            yd = S::add_ps(S::mul_ps(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
-                            let new_cell_value =
-                                S::mul_ps(S::set1_ps(HASH_2_FLOAT_32), S::cvtepi32_ps(hash));
-                            let new_distance = S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd));
-                            let closer = S::cmplt_ps(new_distance, distance);
-                            distance = S::min_ps(new_distance, distance);
-                            cell_value = S::blendv_ps(cell_value, new_cell_value, closer);
+                            let new_cell_value = S::Vf32::set1(HASH_2_FLOAT_32) * hash.cast_f32();
+                            let new_distance = (xd * xd) + (yd * yd);
+                            let closer = new_distance.cmp_lt(distance);
+                            distance = new_distance.min(distance);
+                            cell_value = closer.blendv(cell_value, new_cell_value);
 
-                            ycf = S::add_ps(ycf, S::set1_ps(1.0));
-                            yc = S::add_epi32(yc, S::set1_epi32(Y_PRIME_32));
+                            ycf = ycf + S::Vf32::set1(1.0);
+                            yc = yc + S::Vi32::set1(Y_PRIME_32);
                         }
-                        xcf = S::add_ps(xcf, S::set1_ps(1.0));
-                        xc = S::add_epi32(xc, S::set1_epi32(X_PRIME_32));
+                        xcf = xcf + S::Vf32::set1(1.0);
+                        xc = xc + S::Vi32::set1(X_PRIME_32);
                     }
                 }
                 CellDistanceFunction::Manhattan => {
@@ -181,36 +142,25 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(hash, S::set1_epi32(BIT_10_MASK_32))),
-                                S::set1_ps(511.5),
-                            );
-                            let mut yd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(
-                                    S::srai_epi32(hash, 10),
-                                    S::set1_epi32(BIT_10_MASK_32),
-                                )),
-                                S::set1_ps(511.5),
-                            );
-                            let inv_mag = S::mul_ps(
-                                jitter,
-                                S::rsqrt_ps(S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd))),
-                            );
-                            xd = S::add_ps(S::mul_ps(xd, inv_mag), xcf);
-                            yd = S::add_ps(S::mul_ps(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
-                            let new_cell_value =
-                                S::mul_ps(S::set1_ps(HASH_2_FLOAT_32), S::cvtepi32_ps(hash));
-                            let new_distance = S::add_ps(S::abs_ps(xd), S::abs_ps(yd));
-                            let closer = S::cmplt_ps(new_distance, distance);
-                            distance = S::min_ps(new_distance, distance);
-                            cell_value = S::blendv_ps(cell_value, new_cell_value, closer);
+                            let new_cell_value = S::Vf32::set1(HASH_2_FLOAT_32) * hash.cast_f32();
+                            let new_distance = xd.abs() + yd.abs();
+                            let closer = new_distance.cmp_lt(distance);
+                            distance = new_distance.min(distance);
+                            cell_value = closer.blendv(cell_value, new_cell_value);
 
-                            ycf = S::add_ps(ycf, S::set1_ps(1.0));
-                            yc = S::add_epi32(yc, S::set1_epi32(Y_PRIME_32));
+                            ycf = ycf + S::Vf32::set1(1.0);
+                            yc = yc + S::Vi32::set1(Y_PRIME_32);
                         }
-                        xcf = S::add_ps(xcf, S::set1_ps(1.0));
-                        xc = S::add_epi32(xc, S::set1_epi32(X_PRIME_32));
+                        xcf = xcf + S::Vf32::set1(1.0);
+                        xc = xc + S::Vi32::set1(X_PRIME_32);
                     }
                 }
                 CellDistanceFunction::Natural => {
@@ -219,40 +169,29 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(hash, S::set1_epi32(BIT_10_MASK_32))),
-                                S::set1_ps(511.5),
-                            );
-                            let mut yd = S::sub_ps(
-                                S::cvtepi32_ps(S::and_epi32(
-                                    S::srai_epi32(hash, 10),
-                                    S::set1_epi32(BIT_10_MASK_32),
-                                )),
-                                S::set1_ps(511.5),
-                            );
-                            let inv_mag = S::mul_ps(
-                                jitter,
-                                S::rsqrt_ps(S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd))),
-                            );
-                            xd = S::add_ps(S::mul_ps(xd, inv_mag), xcf);
-                            yd = S::add_ps(S::mul_ps(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                                - S::Vf32::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
-                            let new_cell_value =
-                                S::mul_ps(S::set1_ps(HASH_2_FLOAT_32), S::cvtepi32_ps(hash));
+                            let new_cell_value = S::Vf32::set1(HASH_2_FLOAT_32) * hash.cast_f32();
                             let new_distance = {
-                                let euc = S::add_ps(S::mul_ps(xd, xd), S::mul_ps(yd, yd));
-                                let man = S::add_ps(S::abs_ps(xd), S::abs_ps(yd));
-                                S::add_ps(euc, man)
+                                let euc = (xd * xd) + (yd * yd);
+                                let man = xd.abs() + yd.abs();
+                                euc + man
                             };
-                            let closer = S::cmplt_ps(new_distance, distance);
-                            distance = S::min_ps(new_distance, distance);
-                            cell_value = S::blendv_ps(cell_value, new_cell_value, closer);
+                            let closer = new_distance.cmp_lt(distance);
+                            distance = new_distance.min(distance);
+                            cell_value = closer.blendv(cell_value, new_cell_value);
 
-                            ycf = S::add_ps(ycf, S::set1_ps(1.0));
-                            yc = S::add_epi32(yc, S::set1_epi32(Y_PRIME_32));
+                            ycf = ycf + S::Vf32::set1(1.0);
+                            yc = yc + S::Vi32::set1(Y_PRIME_32);
                         }
-                        xcf = S::add_ps(xcf, S::set1_ps(1.0));
-                        xc = S::add_epi32(xc, S::set1_epi32(X_PRIME_32));
+                        xcf = xcf + S::Vf32::set1(1.0);
+                        xc = xc + S::Vi32::set1(X_PRIME_32);
                     }
                 }
             }
@@ -262,7 +201,7 @@ pub unsafe fn cellular_2d<S: Simd>(
 }
 
 #[inline(always)]
-pub unsafe fn cellular_3d<S: Simd>(
+pub fn cellular_3d<S: Simd>(
     x: S::Vf32,
     y: S::Vf32,
     z: S::Vf32,
@@ -271,20 +210,20 @@ pub unsafe fn cellular_3d<S: Simd>(
     jitter: S::Vf32,
     seed: i32,
 ) -> S::Vf32 {
-    let mut distance = S::set1_ps(999999.0);
-    let mut cell_value = S::setzero_ps();
+    let mut distance = S::Vf32::set1(999999.0);
+    let mut cell_value = S::Vf32::zeroes();
 
-    let mut xc = S::sub_epi32(S::cvtps_epi32(x), S::set1_epi32(1));
-    let mut yc_base = S::sub_epi32(S::cvtps_epi32(y), S::set1_epi32(1));
-    let mut zc_base = S::sub_epi32(S::cvtps_epi32(z), S::set1_epi32(1));
+    let mut xc = x.cast_i32() - S::Vi32::set1(1);
+    let mut yc_base = y.cast_i32() - S::Vi32::set1(1);
+    let mut zc_base = z.cast_i32() - S::Vi32::set1(1);
 
-    let mut xcf = S::sub_ps(S::cvtepi32_ps(xc), x);
-    let ycf_base = S::sub_ps(S::cvtepi32_ps(yc_base), y);
-    let zcf_base = S::sub_ps(S::cvtepi32_ps(zc_base), z);
+    let mut xcf = xc.cast_f32() - x;
+    let ycf_base = yc_base.cast_f32() - y;
+    let zcf_base = zc_base.cast_f32() - z;
 
-    xc = S::mullo_epi32(xc, S::set1_epi32(X_PRIME_32));
-    yc_base = S::mullo_epi32(yc_base, S::set1_epi32(Y_PRIME_32));
-    zc_base = S::mullo_epi32(zc_base, S::set1_epi32(Z_PRIME_32));
+    xc = xc * S::Vi32::set1(X_PRIME_32);
+    yc_base = yc_base * S::Vi32::set1(Y_PRIME_32);
+    zc_base = zc_base * S::Vi32::set1(Z_PRIME_32);
 
     for _x in 0..3 {
         let mut ycf = ycf_base;
@@ -294,64 +233,38 @@ pub unsafe fn cellular_3d<S: Simd>(
             let mut zc = zc_base;
             for _z in 0..3 {
                 let hash = hash_3d::<S>(seed, xc, yc, zc);
-                let mut xd = S::sub_ps(
-                    S::cvtepi32_ps(S::and_epi32(hash, S::set1_epi32(BIT_10_MASK_32))),
-                    S::set1_ps(511.5),
-                );
-                let mut yd = S::sub_ps(
-                    S::cvtepi32_ps(S::and_epi32(
-                        S::srai_epi32(hash, 10),
-                        S::set1_epi32(BIT_10_MASK_32),
-                    )),
-                    S::set1_ps(511.5),
-                );
-                let mut zd = S::sub_ps(
-                    S::cvtepi32_ps(S::and_epi32(
-                        S::srai_epi32(hash, 20),
-                        S::set1_epi32(BIT_10_MASK_32),
-                    )),
-                    S::set1_ps(511.5),
-                );
-                let inv_mag = S::mul_ps(
-                    jitter,
-                    S::rsqrt_ps(S::add_ps(
-                        S::mul_ps(xd, xd),
-                        S::add_ps(S::mul_ps(yd, yd), S::mul_ps(zd, zd)),
-                    )),
-                );
-                xd = S::add_ps(S::mul_ps(xd, inv_mag), xcf);
-                yd = S::add_ps(S::mul_ps(yd, inv_mag), ycf);
-                zd = S::add_ps(S::mul_ps(zd, inv_mag), zcf);
+                let mut xd =
+                    (hash & S::Vi32::set1(BIT_10_MASK_32)).cast_f32() - S::Vf32::set1(511.5);
+                let mut yd = ((hash >> 10) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                    - S::Vf32::set1(511.5);
+                let mut zd = ((hash >> 20) & S::Vi32::set1(BIT_10_MASK_32)).cast_f32()
+                    - S::Vf32::set1(511.5);
+                let inv_mag = jitter * ((xd * xd) + ((yd * yd) + (zd * zd))).rsqrt();
+                xd = (xd * inv_mag) + xcf;
+                yd = (yd * inv_mag) + ycf;
+                zd = (zd * inv_mag) + zcf;
 
-                let new_cell_value = S::mul_ps(S::set1_ps(HASH_2_FLOAT_32), S::cvtepi32_ps(hash));
+                let new_cell_value = S::Vf32::set1(HASH_2_FLOAT_32) * hash.cast_f32();
                 let new_distance = match distance_function {
-                    CellDistanceFunction::Euclidean => S::add_ps(
-                        S::mul_ps(xd, xd),
-                        S::add_ps(S::mul_ps(yd, yd), S::mul_ps(zd, zd)),
-                    ),
-                    CellDistanceFunction::Manhattan => {
-                        S::add_ps(S::add_ps(S::abs_ps(xd), S::abs_ps(yd)), S::abs_ps(zd))
-                    }
+                    CellDistanceFunction::Euclidean => (xd * xd) + ((yd * yd) + (zd * zd)),
+                    CellDistanceFunction::Manhattan => (xd.abs() + yd.abs()) + zd.abs(),
                     CellDistanceFunction::Natural => {
-                        let euc = S::add_ps(
-                            S::mul_ps(xd, xd),
-                            S::add_ps(S::mul_ps(yd, yd), S::mul_ps(zd, zd)),
-                        );
-                        let man = S::add_ps(S::add_ps(S::abs_ps(xd), S::abs_ps(yd)), S::abs_ps(zd));
-                        S::add_ps(euc, man)
+                        let euc = (xd * xd) + ((yd * yd) + (zd * zd));
+                        let man = xd.abs() + yd.abs() + zd.abs();
+                        euc + man
                     }
                 };
-                let closer = S::cmplt_ps(new_distance, distance);
-                distance = S::min_ps(new_distance, distance);
-                cell_value = S::blendv_ps(cell_value, new_cell_value, closer);
-                zcf = S::add_ps(ycf, S::set1_ps(1.0));
-                zc = S::add_epi32(yc, S::set1_epi32(Z_PRIME_32));
+                let closer = new_distance.cmp_lt(distance);
+                distance = new_distance.min(distance);
+                cell_value = closer.blendv(cell_value, new_cell_value);
+                zcf = ycf + S::Vf32::set1(1.0);
+                zc = yc + S::Vi32::set1(Z_PRIME_32);
             }
-            ycf = S::add_ps(ycf, S::set1_ps(1.0));
-            yc = S::add_epi32(yc, S::set1_epi32(Y_PRIME_32));
+            ycf = ycf + S::Vf32::set1(1.0);
+            yc = yc + S::Vi32::set1(Y_PRIME_32);
         }
-        xcf = S::add_ps(xcf, S::set1_ps(1.0));
-        xc = S::add_epi32(xc, S::set1_epi32(X_PRIME_32));
+        xcf = xcf + S::Vf32::set1(1.0);
+        xc = xc + S::Vi32::set1(X_PRIME_32);
     }
 
     match return_type {
diff --git a/src/noise/cell_64.rs b/src/noise/cell_64.rs
index 805e059..88ae2ce 100644
--- a/src/noise/cell_64.rs
+++ b/src/noise/cell_64.rs
@@ -2,10 +2,10 @@ use super::cellular_32::{BIT_10_MASK_64, HASH_2_FLOAT_64, X_PRIME_64, Y_PRIME_64
 use super::cellular_64::{hash_2d, hash_3d};
 use crate::{CellDistanceFunction, CellReturnType};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn cellular_2d<S: Simd>(
+pub fn cellular_2d<S: Simd>(
     x: S::Vf64,
     y: S::Vf64,
     distance_function: CellDistanceFunction,
@@ -13,15 +13,15 @@ pub unsafe fn cellular_2d<S: Simd>(
     jitter: S::Vf64,
     seed: i64,
 ) -> S::Vf64 {
-    let mut distance = S::set1_pd(999999.0);
-    let mut xc = S::sub_epi64(S::cvtpd_epi64(x), S::set1_epi64(1));
-    let mut yc_base = S::sub_epi64(S::cvtpd_epi64(y), S::set1_epi64(1));
+    let mut distance = S::Vf64::set1(999999.0);
+    let mut xc = x.cast_i64() - S::Vi64::set1(1);
+    let mut yc_base = y.cast_i64() - S::Vi64::set1(1);
 
-    let mut xcf = S::sub_pd(S::cvtepi64_pd(xc), x);
-    let ycf_base = S::sub_pd(S::cvtepi64_pd(yc_base), y);
+    let mut xcf = xc.cast_f64() - x;
+    let ycf_base = yc_base.cast_f64() - y;
 
-    xc = S::mullo_epi64(xc, S::set1_epi64(X_PRIME_64));
-    yc_base = S::mullo_epi64(yc_base, S::set1_epi64(Y_PRIME_64));
+    xc = xc * S::Vi64::set1(X_PRIME_64);
+    yc_base = yc_base * S::Vi64::set1(Y_PRIME_64);
     match return_type {
         CellReturnType::Distance => {
             match distance_function {
@@ -31,31 +31,23 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(hash, S::set1_epi64(BIT_10_MASK_64))),
-                                S::set1_pd(511.5),
-                            );
-                            let mut yd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(
-                                    S::srai_epi64(hash, 10),
-                                    S::set1_epi64(BIT_10_MASK_64),
-                                )),
-                                S::set1_pd(511.5),
-                            );
-                            let mut xd2 = S::mul_pd(xd, xd);
-                            let inv_mag =
-                                S::mul_pd(jitter, S::rsqrt_pd(S::add_pd(xd2, S::mul_pd(yd, yd))));
-                            xd = S::add_pd(S::mul_pd(xd, inv_mag), xcf);
-                            yd = S::add_pd(S::mul_pd(yd, inv_mag), ycf);
-                            xd2 = S::mul_pd(xd, xd);
-                            let new_distance = S::add_pd(xd2, S::mul_pd(yd, yd));
-                            distance = S::min_pd(new_distance, distance);
+                            let mut xd = (hash & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let mut xd2 = xd * xd;
+                            let inv_mag = jitter * (xd2 + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
+                            xd2 = xd * xd;
+                            let new_distance = xd2 + (yd * yd);
+                            distance = new_distance.min(distance);
 
-                            ycf = S::add_pd(ycf, S::set1_pd(1.0));
-                            yc = S::add_epi64(yc, S::set1_epi64(Y_PRIME_64));
+                            ycf = ycf + S::Vf64::set1(1.0);
+                            yc = yc + S::Vi64::set1(Y_PRIME_64);
                         }
-                        xcf = S::add_pd(xcf, S::set1_pd(1.0));
-                        xc = S::add_epi64(xc, S::set1_epi64(X_PRIME_64));
+                        xcf = xcf + S::Vf64::set1(1.0);
+                        xc = xc + S::Vi64::set1(X_PRIME_64);
                     }
                 }
                 CellDistanceFunction::Manhattan => {
@@ -64,32 +56,22 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(hash, S::set1_epi64(BIT_10_MASK_64))),
-                                S::set1_pd(511.5),
-                            );
-                            let mut yd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(
-                                    S::srai_epi64(hash, 10),
-                                    S::set1_epi64(BIT_10_MASK_64),
-                                )),
-                                S::set1_pd(511.5),
-                            );
-                            let inv_mag = S::mul_pd(
-                                jitter,
-                                S::rsqrt_pd(S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd))),
-                            );
-                            xd = S::add_pd(S::mul_pd(xd, inv_mag), xcf);
-                            yd = S::add_pd(S::mul_pd(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
-                            let new_distance = S::add_pd(S::abs_pd(xd), S::abs_pd(yd));
-                            distance = S::min_pd(new_distance, distance);
+                            let new_distance = xd.abs() + yd.abs();
+                            distance = new_distance.min(distance);
 
-                            ycf = S::add_pd(ycf, S::set1_pd(1.0));
-                            yc = S::add_epi64(yc, S::set1_epi64(Y_PRIME_64));
+                            ycf = ycf + S::Vf64::set1(1.0);
+                            yc = yc + S::Vi64::set1(Y_PRIME_64);
                         }
-                        xcf = S::add_pd(xcf, S::set1_pd(1.0));
-                        xc = S::add_epi64(xc, S::set1_epi64(X_PRIME_64));
+                        xcf = xcf + S::Vf64::set1(1.0);
+                        xc = xc + S::Vi64::set1(X_PRIME_64);
                     }
                 }
                 CellDistanceFunction::Natural => {
@@ -98,43 +80,33 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(hash, S::set1_epi64(BIT_10_MASK_64))),
-                                S::set1_pd(511.5),
-                            );
-                            let mut yd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(
-                                    S::srai_epi64(hash, 10),
-                                    S::set1_epi64(BIT_10_MASK_64),
-                                )),
-                                S::set1_pd(511.5),
-                            );
-                            let inv_mag = S::mul_pd(
-                                jitter,
-                                S::rsqrt_pd(S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd))),
-                            );
-                            xd = S::add_pd(S::mul_pd(xd, inv_mag), xcf);
-                            yd = S::add_pd(S::mul_pd(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
                             let new_distance = {
-                                let euc = S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd));
-                                let man = S::add_pd(S::abs_pd(xd), S::abs_pd(yd));
-                                S::add_pd(euc, man)
+                                let euc = (xd * xd) + (yd * yd);
+                                let man = xd.abs() + yd.abs();
+                                euc + man
                             };
-                            distance = S::min_pd(new_distance, distance);
+                            distance = new_distance.min(distance);
 
-                            ycf = S::add_pd(ycf, S::set1_pd(1.0));
-                            yc = S::add_epi64(yc, S::set1_epi64(Y_PRIME_64));
+                            ycf = ycf + S::Vf64::set1(1.0);
+                            yc = yc + S::Vi64::set1(Y_PRIME_64);
                         }
-                        xcf = S::add_pd(xcf, S::set1_pd(1.0));
-                        xc = S::add_epi64(xc, S::set1_epi64(X_PRIME_64));
+                        xcf = xcf + S::Vf64::set1(1.0);
+                        xc = xc + S::Vi64::set1(X_PRIME_64);
                     }
                 }
             }
             distance
         }
         CellReturnType::CellValue => {
-            let mut cell_value = S::setzero_pd();
+            let mut cell_value = S::Vf64::zeroes();
             match distance_function {
                 CellDistanceFunction::Euclidean => {
                     for _x in 0..3 {
@@ -142,36 +114,25 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(hash, S::set1_epi64(BIT_10_MASK_64))),
-                                S::set1_pd(511.5),
-                            );
-                            let mut yd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(
-                                    S::srai_epi64(hash, 10),
-                                    S::set1_epi64(BIT_10_MASK_64),
-                                )),
-                                S::set1_pd(511.5),
-                            );
-                            let inv_mag = S::mul_pd(
-                                jitter,
-                                S::rsqrt_pd(S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd))),
-                            );
-                            xd = S::add_pd(S::mul_pd(xd, inv_mag), xcf);
-                            yd = S::add_pd(S::mul_pd(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
-                            let new_cell_value =
-                                S::mul_pd(S::set1_pd(HASH_2_FLOAT_64), S::cvtepi64_pd(hash));
-                            let new_distance = S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd));
-                            let closer = S::cmplt_pd(new_distance, distance);
-                            distance = S::min_pd(new_distance, distance);
-                            cell_value = S::blendv_pd(cell_value, new_cell_value, closer);
+                            let new_cell_value = S::Vf64::set1(HASH_2_FLOAT_64) * hash.cast_f64();
+                            let new_distance = (xd * xd) + (yd * yd);
+                            let closer = new_distance.cmp_lt(distance);
+                            distance = new_distance.min(distance);
+                            cell_value = closer.blendv(cell_value, new_cell_value);
 
-                            ycf = S::add_pd(ycf, S::set1_pd(1.0));
-                            yc = S::add_epi64(yc, S::set1_epi64(Y_PRIME_64));
+                            ycf = ycf + S::Vf64::set1(1.0);
+                            yc = yc + S::Vi64::set1(Y_PRIME_64);
                         }
-                        xcf = S::add_pd(xcf, S::set1_pd(1.0));
-                        xc = S::add_epi64(xc, S::set1_epi64(X_PRIME_64));
+                        xcf = xcf + S::Vf64::set1(1.0);
+                        xc = xc + S::Vi64::set1(X_PRIME_64);
                     }
                 }
                 CellDistanceFunction::Manhattan => {
@@ -180,36 +141,25 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(hash, S::set1_epi64(BIT_10_MASK_64))),
-                                S::set1_pd(511.5),
-                            );
-                            let mut yd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(
-                                    S::srai_epi64(hash, 10),
-                                    S::set1_epi64(BIT_10_MASK_64),
-                                )),
-                                S::set1_pd(511.5),
-                            );
-                            let inv_mag = S::mul_pd(
-                                jitter,
-                                S::rsqrt_pd(S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd))),
-                            );
-                            xd = S::add_pd(S::mul_pd(xd, inv_mag), xcf);
-                            yd = S::add_pd(S::mul_pd(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
-                            let new_cell_value =
-                                S::mul_pd(S::set1_pd(HASH_2_FLOAT_64), S::cvtepi64_pd(hash));
-                            let new_distance = S::add_pd(S::abs_pd(xd), S::abs_pd(yd));
-                            let closer = S::cmplt_pd(new_distance, distance);
-                            distance = S::min_pd(new_distance, distance);
-                            cell_value = S::blendv_pd(cell_value, new_cell_value, closer);
+                            let new_cell_value = S::Vf64::set1(HASH_2_FLOAT_64) * hash.cast_f64();
+                            let new_distance = xd.abs() + yd.abs();
+                            let closer = new_distance.cmp_lt(distance);
+                            distance = new_distance.min(distance);
+                            cell_value = closer.blendv(cell_value, new_cell_value);
 
-                            ycf = S::add_pd(ycf, S::set1_pd(1.0));
-                            yc = S::add_epi64(yc, S::set1_epi64(Y_PRIME_64));
+                            ycf = ycf + S::Vf64::set1(1.0);
+                            yc = yc + S::Vi64::set1(Y_PRIME_64);
                         }
-                        xcf = S::add_pd(xcf, S::set1_pd(1.0));
-                        xc = S::add_epi64(xc, S::set1_epi64(X_PRIME_64));
+                        xcf = xcf + S::Vf64::set1(1.0);
+                        xc = xc + S::Vi64::set1(X_PRIME_64);
                     }
                 }
                 CellDistanceFunction::Natural => {
@@ -218,40 +168,29 @@ pub unsafe fn cellular_2d<S: Simd>(
                         let mut yc = yc_base;
                         for _y in 0..3 {
                             let hash = hash_2d::<S>(seed, xc, yc);
-                            let mut xd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(hash, S::set1_epi64(BIT_10_MASK_64))),
-                                S::set1_pd(511.5),
-                            );
-                            let mut yd = S::sub_pd(
-                                S::cvtepi64_pd(S::and_epi64(
-                                    S::srai_epi64(hash, 10),
-                                    S::set1_epi64(BIT_10_MASK_64),
-                                )),
-                                S::set1_pd(511.5),
-                            );
-                            let inv_mag = S::mul_pd(
-                                jitter,
-                                S::rsqrt_pd(S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd))),
-                            );
-                            xd = S::add_pd(S::mul_pd(xd, inv_mag), xcf);
-                            yd = S::add_pd(S::mul_pd(yd, inv_mag), ycf);
+                            let mut xd = (hash & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let mut yd = ((hash >> 10) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                                - S::Vf64::set1(511.5);
+                            let inv_mag = jitter * ((xd * xd) + (yd * yd)).rsqrt();
+                            xd = (xd * inv_mag) + xcf;
+                            yd = (yd * inv_mag) + ycf;
 
-                            let new_cell_value =
-                                S::mul_pd(S::set1_pd(HASH_2_FLOAT_64), S::cvtepi64_pd(hash));
+                            let new_cell_value = S::Vf64::set1(HASH_2_FLOAT_64) * hash.cast_f64();
                             let new_distance = {
-                                let euc = S::add_pd(S::mul_pd(xd, xd), S::mul_pd(yd, yd));
-                                let man = S::add_pd(S::abs_pd(xd), S::abs_pd(yd));
-                                S::add_pd(euc, man)
+                                let euc = (xd * xd) + (yd * yd);
+                                let man = xd.abs() + yd.abs();
+                                euc + man
                             };
-                            let closer = S::cmplt_pd(new_distance, distance);
-                            distance = S::min_pd(new_distance, distance);
-                            cell_value = S::blendv_pd(cell_value, new_cell_value, closer);
+                            let closer = new_distance.cmp_lt(distance);
+                            distance = new_distance.min(distance);
+                            cell_value = closer.blendv(cell_value, new_cell_value);
 
-                            ycf = S::add_pd(ycf, S::set1_pd(1.0));
-                            yc = S::add_epi64(yc, S::set1_epi64(Y_PRIME_64));
+                            ycf = ycf + S::Vf64::set1(1.0);
+                            yc = yc + S::Vi64::set1(Y_PRIME_64);
                         }
-                        xcf = S::add_pd(xcf, S::set1_pd(1.0));
-                        xc = S::add_epi64(xc, S::set1_epi64(X_PRIME_64));
+                        xcf = xcf + S::Vf64::set1(1.0);
+                        xc = xc + S::Vi64::set1(X_PRIME_64);
                     }
                 }
             }
@@ -261,7 +200,7 @@ pub unsafe fn cellular_2d<S: Simd>(
 }
 
 #[inline(always)]
-pub unsafe fn cellular_3d<S: Simd>(
+pub fn cellular_3d<S: Simd>(
     x: S::Vf64,
     y: S::Vf64,
     z: S::Vf64,
@@ -270,20 +209,20 @@ pub unsafe fn cellular_3d<S: Simd>(
     jitter: S::Vf64,
     seed: i64,
 ) -> S::Vf64 {
-    let mut distance = S::set1_pd(999999.0);
-    let mut cell_value = S::setzero_pd();
+    let mut distance = S::Vf64::set1(999999.0);
+    let mut cell_value = S::Vf64::zeroes();
 
-    let mut xc = S::sub_epi64(S::cvtpd_epi64(x), S::set1_epi64(1));
-    let mut yc_base = S::sub_epi64(S::cvtpd_epi64(y), S::set1_epi64(1));
-    let mut zc_base = S::sub_epi64(S::cvtpd_epi64(z), S::set1_epi64(1));
+    let mut xc = x.cast_i64() - S::Vi64::set1(1);
+    let mut yc_base = y.cast_i64() - S::Vi64::set1(1);
+    let mut zc_base = z.cast_i64() - S::Vi64::set1(1);
 
-    let mut xcf = S::sub_pd(S::cvtepi64_pd(xc), x);
-    let ycf_base = S::sub_pd(S::cvtepi64_pd(yc_base), y);
-    let zcf_base = S::sub_pd(S::cvtepi64_pd(zc_base), z);
+    let mut xcf = xc.cast_f64() - x;
+    let ycf_base = yc_base.cast_f64() - y;
+    let zcf_base = zc_base.cast_f64() - z;
 
-    xc = S::mullo_epi64(xc, S::set1_epi64(X_PRIME_64));
-    yc_base = S::mullo_epi64(yc_base, S::set1_epi64(Y_PRIME_64));
-    zc_base = S::mullo_epi64(zc_base, S::set1_epi64(Z_PRIME_64));
+    xc = xc * S::Vi64::set1(X_PRIME_64);
+    yc_base = yc_base * S::Vi64::set1(Y_PRIME_64);
+    zc_base = zc_base * S::Vi64::set1(Z_PRIME_64);
 
     for _x in 0..3 {
         let mut ycf = ycf_base;
@@ -293,64 +232,38 @@ pub unsafe fn cellular_3d<S: Simd>(
             let mut zc = zc_base;
             for _z in 0..3 {
                 let hash = hash_3d::<S>(seed, xc, yc, zc);
-                let mut xd = S::sub_pd(
-                    S::cvtepi64_pd(S::and_epi64(hash, S::set1_epi64(BIT_10_MASK_64))),
-                    S::set1_pd(511.5),
-                );
-                let mut yd = S::sub_pd(
-                    S::cvtepi64_pd(S::and_epi64(
-                        S::srai_epi64(hash, 10),
-                        S::set1_epi64(BIT_10_MASK_64),
-                    )),
-                    S::set1_pd(511.5),
-                );
-                let mut zd = S::sub_pd(
-                    S::cvtepi64_pd(S::and_epi64(
-                        S::srai_epi64(hash, 20),
-                        S::set1_epi64(BIT_10_MASK_64),
-                    )),
-                    S::set1_pd(511.5),
-                );
-                let inv_mag = S::mul_pd(
-                    jitter,
-                    S::rsqrt_pd(S::add_pd(
-                        S::mul_pd(xd, xd),
-                        S::add_pd(S::mul_pd(yd, yd), S::mul_pd(zd, zd)),
-                    )),
-                );
-                xd = S::add_pd(S::mul_pd(xd, inv_mag), xcf);
-                yd = S::add_pd(S::mul_pd(yd, inv_mag), ycf);
-                zd = S::add_pd(S::mul_pd(zd, inv_mag), zcf);
+                let mut xd =
+                    (hash & S::Vi64::set1(BIT_10_MASK_64)).cast_f64() - S::Vf64::set1(511.5);
+                let mut yd = ((hash >> 10) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                    - S::Vf64::set1(511.5);
+                let mut zd = ((hash >> 20) & S::Vi64::set1(BIT_10_MASK_64)).cast_f64()
+                    - S::Vf64::set1(511.5);
+                let inv_mag = jitter * ((xd * xd) + ((yd * yd) + (zd * zd))).rsqrt();
+                xd = (xd * inv_mag) + xcf;
+                yd = (yd * inv_mag) + ycf;
+                zd = (zd * inv_mag) + zcf;
 
-                let new_cell_value = S::mul_pd(S::set1_pd(HASH_2_FLOAT_64), S::cvtepi64_pd(hash));
+                let new_cell_value = S::Vf64::set1(HASH_2_FLOAT_64) * hash.cast_f64();
                 let new_distance = match distance_function {
-                    CellDistanceFunction::Euclidean => S::add_pd(
-                        S::mul_pd(xd, xd),
-                        S::add_pd(S::mul_pd(yd, yd), S::mul_pd(zd, zd)),
-                    ),
-                    CellDistanceFunction::Manhattan => {
-                        S::add_pd(S::add_pd(S::abs_pd(xd), S::abs_pd(yd)), S::abs_pd(zd))
-                    }
+                    CellDistanceFunction::Euclidean => (xd * xd) + (yd * yd) + (zd * zd),
+                    CellDistanceFunction::Manhattan => xd.abs() + yd.abs() + zd.abs(),
                     CellDistanceFunction::Natural => {
-                        let euc = S::add_pd(
-                            S::mul_pd(xd, xd),
-                            S::add_pd(S::mul_pd(yd, yd), S::mul_pd(zd, zd)),
-                        );
-                        let man = S::add_pd(S::add_pd(S::abs_pd(xd), S::abs_pd(yd)), S::abs_pd(zd));
-                        S::add_pd(euc, man)
+                        let euc = (xd * xd) + (yd * yd) + (zd * zd);
+                        let man = xd.abs() + yd.abs() + zd.abs();
+                        euc + man
                     }
                 };
-                let closer = S::cmplt_pd(new_distance, distance);
-                distance = S::min_pd(new_distance, distance);
-                cell_value = S::blendv_pd(cell_value, new_cell_value, closer);
-                zcf = S::add_pd(ycf, S::set1_pd(1.0));
-                zc = S::add_epi64(yc, S::set1_epi64(Z_PRIME_64));
+                let closer = new_distance.cmp_lt(distance);
+                distance = new_distance.min(distance);
+                cell_value = closer.blendv(cell_value, new_cell_value);
+                zcf = ycf + S::Vf64::set1(1.0);
+                zc = yc + S::Vi64::set1(Z_PRIME_64);
             }
-            ycf = S::add_pd(ycf, S::set1_pd(1.0));
-            yc = S::add_epi64(yc, S::set1_epi64(Y_PRIME_64));
+            ycf = ycf + S::Vf64::set1(1.0);
+            yc = yc + S::Vi64::set1(Y_PRIME_64);
         }
-        xcf = S::add_pd(xcf, S::set1_pd(1.0));
-        xc = S::add_epi64(xc, S::set1_epi64(X_PRIME_64));
+        xcf = xcf + S::Vf64::set1(1.0);
+        xc = xc + S::Vi64::set1(X_PRIME_64);
     }
 
     match return_type {
diff --git a/src/noise/cellular_32.rs b/src/noise/cellular_32.rs
index d4363e7..c886631 100644
--- a/src/noise/cellular_32.rs
+++ b/src/noise/cellular_32.rs
@@ -5,7 +5,7 @@
 
 use std::f32;
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 pub const BIT_10_MASK_32: i32 = 1023;
 pub const BIT_10_MASK_64: i64 = 1023;
@@ -25,22 +25,16 @@ pub const Z_PRIME_32: i32 = 6971;
 pub const Z_PRIME_64: i64 = 6971;
 
 #[inline(always)]
-pub unsafe fn hash_2d<S: Simd>(seed: i32, x: S::Vi32, y: S::Vi32) -> S::Vi32 {
-    let mut hash = S::xor_epi32(x, S::set1_epi32(seed));
-    hash = S::xor_epi32(y, hash);
-    S::mullo_epi32(
-        S::mullo_epi32(S::mullo_epi32(hash, hash), S::set1_epi32(60493)),
-        hash,
-    )
+pub fn hash_2d<S: Simd>(seed: i32, x: S::Vi32, y: S::Vi32) -> S::Vi32 {
+    let mut hash = x ^ S::Vi32::set1(seed);
+    hash = y ^ hash;
+    ((hash * hash) * S::Vi32::set1(60493)) * hash
 }
 
 #[inline(always)]
-pub unsafe fn hash_3d<S: Simd>(seed: i32, x: S::Vi32, y: S::Vi32, z: S::Vi32) -> S::Vi32 {
-    let mut hash = S::xor_epi32(x, S::set1_epi32(seed));
-    hash = S::xor_epi32(y, hash);
-    hash = S::xor_epi32(z, hash);
-    S::mullo_epi32(
-        S::mullo_epi32(S::mullo_epi32(hash, hash), S::set1_epi32(60493)),
-        hash,
-    )
+pub fn hash_3d<S: Simd>(seed: i32, x: S::Vi32, y: S::Vi32, z: S::Vi32) -> S::Vi32 {
+    let mut hash = x ^ S::Vi32::set1(seed);
+    hash = y ^ hash;
+    hash = z ^ hash;
+    ((hash * hash) * S::Vi32::set1(60493)) * hash
 }
diff --git a/src/noise/cellular_64.rs b/src/noise/cellular_64.rs
index 8709027..54f4688 100644
--- a/src/noise/cellular_64.rs
+++ b/src/noise/cellular_64.rs
@@ -1,22 +1,16 @@
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn hash_2d<S: Simd>(seed: i64, x: S::Vi64, y: S::Vi64) -> S::Vi64 {
-    let mut hash = S::xor_epi64(x, S::set1_epi64(seed));
-    hash = S::xor_epi64(y, hash);
-    S::mullo_epi64(
-        S::mullo_epi64(S::mullo_epi64(hash, hash), S::set1_epi64(60493)),
-        hash,
-    )
+pub fn hash_2d<S: Simd>(seed: i64, x: S::Vi64, y: S::Vi64) -> S::Vi64 {
+    let mut hash = x ^ S::Vi64::set1(seed);
+    hash = y ^ hash;
+    ((hash * hash) * S::Vi64::set1(60493)) * hash
 }
 
 #[inline(always)]
-pub unsafe fn hash_3d<S: Simd>(seed: i64, x: S::Vi64, y: S::Vi64, z: S::Vi64) -> S::Vi64 {
-    let mut hash = S::xor_epi64(x, S::set1_epi64(seed));
-    hash = S::xor_epi64(y, hash);
-    hash = S::xor_epi64(z, hash);
-    S::mullo_epi64(
-        S::mullo_epi64(S::mullo_epi64(hash, hash), S::set1_epi64(60493)),
-        hash,
-    )
+pub fn hash_3d<S: Simd>(seed: i64, x: S::Vi64, y: S::Vi64, z: S::Vi64) -> S::Vi64 {
+    let mut hash = x ^ S::Vi64::set1(seed);
+    hash = y ^ hash;
+    hash = z ^ hash;
+    ((hash * hash) * S::Vi64::set1(60493)) * hash
 }
diff --git a/src/noise/fbm_32.rs b/src/noise/fbm_32.rs
index dbfad8f..fe6e16a 100644
--- a/src/noise/fbm_32.rs
+++ b/src/noise/fbm_32.rs
@@ -1,29 +1,29 @@
 use crate::noise::simplex_32::{simplex_1d, simplex_2d, simplex_3d, simplex_4d};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn fbm_1d<S: Simd>(
+pub fn fbm_1d<S: Simd>(
     mut x: S::Vf32,
     lacunarity: S::Vf32,
     gain: S::Vf32,
     octaves: u8,
     seed: i32,
 ) -> S::Vf32 {
-    let mut amp = S::set1_ps(1.0);
+    let mut amp = S::Vf32::set1(1.0);
     let mut result = simplex_1d::<S>(x, seed);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lacunarity);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(result, simplex_1d::<S>(x, seed));
+        x = x * lacunarity;
+        amp = amp * gain;
+        result = result + simplex_1d::<S>(x, seed);
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn fbm_2d<S: Simd>(
+pub fn fbm_2d<S: Simd>(
     mut x: S::Vf32,
     mut y: S::Vf32,
     lac: S::Vf32,
@@ -32,20 +32,20 @@ pub unsafe fn fbm_2d<S: Simd>(
     seed: i32,
 ) -> S::Vf32 {
     let mut result = simplex_2d::<S>(x, y, seed);
-    let mut amp = S::set1_ps(1.0);
+    let mut amp = S::Vf32::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lac);
-        y = S::mul_ps(y, lac);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(S::mul_ps(simplex_2d::<S>(x, y, seed), amp), result);
+        x = x * lac;
+        y = y * lac;
+        amp = amp * gain;
+        result = (simplex_2d::<S>(x, y, seed) * amp) + result;
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn fbm_3d<S: Simd>(
+pub fn fbm_3d<S: Simd>(
     mut x: S::Vf32,
     mut y: S::Vf32,
     mut z: S::Vf32,
@@ -55,21 +55,21 @@ pub unsafe fn fbm_3d<S: Simd>(
     seed: i32,
 ) -> S::Vf32 {
     let mut result = simplex_3d::<S>(x, y, z, seed);
-    let mut amp = S::set1_ps(1.0);
+    let mut amp = S::Vf32::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lac);
-        y = S::mul_ps(y, lac);
-        z = S::mul_ps(z, lac);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(S::mul_ps(simplex_3d::<S>(x, y, z, seed), amp), result);
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        amp = amp * gain;
+        result = (simplex_3d::<S>(x, y, z, seed) * amp) + result;
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn fbm_4d<S: Simd>(
+pub fn fbm_4d<S: Simd>(
     mut x: S::Vf32,
     mut y: S::Vf32,
     mut z: S::Vf32,
@@ -80,15 +80,15 @@ pub unsafe fn fbm_4d<S: Simd>(
     seed: i32,
 ) -> S::Vf32 {
     let mut result = simplex_4d::<S>(x, y, z, w, seed);
-    let mut amp = S::set1_ps(1.0);
+    let mut amp = S::Vf32::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lac);
-        y = S::mul_ps(y, lac);
-        z = S::mul_ps(z, lac);
-        w = S::mul_ps(w, lac);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(result, S::mul_ps(simplex_4d::<S>(x, y, z, w, seed), amp));
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        w = w * lac;
+        amp = amp * gain;
+        result = result + (simplex_4d::<S>(x, y, z, w, seed) * amp);
     }
 
     result
diff --git a/src/noise/fbm_64.rs b/src/noise/fbm_64.rs
index 4818262..7ead9cc 100644
--- a/src/noise/fbm_64.rs
+++ b/src/noise/fbm_64.rs
@@ -1,29 +1,29 @@
 use crate::noise::simplex_64::{simplex_1d, simplex_2d, simplex_3d, simplex_4d};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn fbm_1d<S: Simd>(
+pub fn fbm_1d<S: Simd>(
     mut x: S::Vf64,
     lacunarity: S::Vf64,
     gain: S::Vf64,
     octaves: u8,
     seed: i64,
 ) -> S::Vf64 {
-    let mut amp = S::set1_pd(1.0);
+    let mut amp = S::Vf64::set1(1.0);
     let mut result = simplex_1d::<S>(x, seed);
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lacunarity);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(result, simplex_1d::<S>(x, seed));
+        x = x * lacunarity;
+        amp = amp * gain;
+        result = result + simplex_1d::<S>(x, seed);
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn fbm_2d<S: Simd>(
+pub fn fbm_2d<S: Simd>(
     mut x: S::Vf64,
     mut y: S::Vf64,
     lac: S::Vf64,
@@ -32,20 +32,20 @@ pub unsafe fn fbm_2d<S: Simd>(
     seed: i64,
 ) -> S::Vf64 {
     let mut result = simplex_2d::<S>(x, y, seed);
-    let mut amp = S::set1_pd(1.0);
+    let mut amp = S::Vf64::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lac);
-        y = S::mul_pd(y, lac);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(S::mul_pd(simplex_2d::<S>(x, y, seed), amp), result);
+        x = x * lac;
+        y = y * lac;
+        amp = amp * gain;
+        result = (simplex_2d::<S>(x, y, seed) * amp) + result;
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn fbm_3d<S: Simd>(
+pub fn fbm_3d<S: Simd>(
     mut x: S::Vf64,
     mut y: S::Vf64,
     mut z: S::Vf64,
@@ -55,19 +55,19 @@ pub unsafe fn fbm_3d<S: Simd>(
     seed: i64,
 ) -> S::Vf64 {
     let mut result = simplex_3d::<S>(x, y, z, seed);
-    let mut amp = S::set1_pd(1.0);
+    let mut amp = S::Vf64::set1(1.0);
     for _ in 1..octaves {
-        x = S::mul_pd(x, lac);
-        y = S::mul_pd(y, lac);
-        z = S::mul_pd(z, lac);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(S::mul_pd(simplex_3d::<S>(x, y, z, seed), amp), result);
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        amp = amp * gain;
+        result = (simplex_3d::<S>(x, y, z, seed) * amp) + result;
     }
     result
 }
 
 #[inline(always)]
-pub unsafe fn fbm_4d<S: Simd>(
+pub fn fbm_4d<S: Simd>(
     mut x: S::Vf64,
     mut y: S::Vf64,
     mut z: S::Vf64,
@@ -78,15 +78,15 @@ pub unsafe fn fbm_4d<S: Simd>(
     seed: i64,
 ) -> S::Vf64 {
     let mut result = simplex_4d::<S>(x, y, z, w, seed);
-    let mut amp = S::set1_pd(1.0);
+    let mut amp = S::Vf64::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lac);
-        y = S::mul_pd(y, lac);
-        z = S::mul_pd(z, lac);
-        w = S::mul_pd(w, lac);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(result, S::mul_pd(simplex_4d::<S>(x, y, z, w, seed), amp));
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        w = w * lac;
+        amp = amp * gain;
+        result = result + (simplex_4d::<S>(x, y, z, w, seed) * amp);
     }
 
     result
diff --git a/src/noise/gradient_32.rs b/src/noise/gradient_32.rs
index af6765e..fa364a6 100644
--- a/src/noise/gradient_32.rs
+++ b/src/noise/gradient_32.rs
@@ -1,21 +1,18 @@
 use crate::noise::hash3d_32::hash3d;
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 /// Generates a random integer gradient in ±7 inclusive
 ///
 /// This differs from Gustavson's well-known implementation in that gradients can be zero, and the
 /// maximum gradient is 7 rather than 8.
 #[inline(always)]
-pub unsafe fn grad1<S: Simd>(seed: i32, hash: S::Vi32) -> S::Vf32 {
-    let h = S::and_epi32(S::xor_epi32(S::set1_epi32(seed), hash), S::set1_epi32(15));
-    let v = S::cvtepi32_ps(S::and_epi32(h, S::set1_epi32(7)));
+pub fn grad1<S: Simd>(seed: i32, hash: S::Vi32) -> S::Vf32 {
+    let h = (S::Vi32::set1(seed) ^ hash) & S::Vi32::set1(15);
+    let v = (h & S::Vi32::set1(7)).cast_f32();
 
-    let h_and_8 = S::castepi32_ps(S::cmpeq_epi32(
-        S::setzero_epi32(),
-        S::and_epi32(h, S::set1_epi32(8)),
-    ));
-    S::blendv_ps(S::sub_ps(S::setzero_ps(), v), v, h_and_8)
+    let h_and_8 = ((h & S::Vi32::set1(8)).cmp_eq(S::Vi32::zeroes())).bitcast_f32();
+    h_and_8.blendv(S::Vf32::zeroes() - v, v)
 }
 
 /// Generates a random gradient vector where one component is ±1 and the other is ±2.
@@ -23,38 +20,28 @@ pub unsafe fn grad1<S: Simd>(seed: i32, hash: S::Vi32) -> S::Vf32 {
 /// This differs from Gustavson's gradients by having a constant magnitude, providing results that
 /// are more consistent between directions.
 #[inline(always)]
-pub unsafe fn grad2<S: Simd>(seed: i32, hash: S::Vi32) -> [S::Vf32; 2] {
-    let h = S::and_epi32(S::xor_epi32(hash, S::set1_epi32(seed)), S::set1_epi32(7));
-    let mask = S::castepi32_ps(S::cmpgt_epi32(S::set1_epi32(4), h));
-    let x_magnitude = S::blendv_ps(S::set1_ps(2.0), S::set1_ps(1.0), mask);
-    let y_magnitude = S::blendv_ps(S::set1_ps(1.0), S::set1_ps(2.0), mask);
+pub fn grad2<S: Simd>(seed: i32, hash: S::Vi32) -> [S::Vf32; 2] {
+    let h = (hash ^ S::Vi32::set1(seed)) & S::Vi32::set1(7);
+    let mask = (S::Vi32::set1(4).cmp_gt(h)).bitcast_f32();
+    let x_magnitude = mask.blendv(S::Vf32::set1(2.0), S::Vf32::set1(1.0));
+    let y_magnitude = mask.blendv(S::Vf32::set1(1.0), S::Vf32::set1(2.0));
 
-    let h_and_1 = S::castepi32_ps(S::cmpeq_epi32(
-        S::setzero_epi32(),
-        S::and_epi32(h, S::set1_epi32(1)),
-    ));
-    let h_and_2 = S::castepi32_ps(S::cmpeq_epi32(
-        S::setzero_epi32(),
-        S::and_epi32(h, S::set1_epi32(2)),
-    ));
+    let h_and_1 = ((h & S::Vi32::set1(1)).cmp_eq(S::Vi32::zeroes())).bitcast_f32();
+    let h_and_2 = ((h & S::Vi32::set1(2)).cmp_eq(S::Vi32::zeroes())).bitcast_f32();
 
-    let gx = S::blendv_ps(
-        S::sub_ps(S::setzero_ps(), x_magnitude),
-        x_magnitude,
-        S::blendv_ps(h_and_2, h_and_1, mask),
-    );
-    let gy = S::blendv_ps(
-        S::sub_ps(S::setzero_ps(), y_magnitude),
-        y_magnitude,
-        S::blendv_ps(h_and_1, h_and_2, mask),
-    );
+    let gx = mask
+        .blendv(h_and_2, h_and_1)
+        .blendv(S::Vf32::zeroes() - x_magnitude, x_magnitude);
+    let gy = mask
+        .blendv(h_and_1, h_and_2)
+        .blendv(S::Vf32::zeroes() - y_magnitude, y_magnitude);
     [gx, gy]
 }
 
 /// Generates a random gradient vector from the origin towards the midpoint of an edge of a
 /// double-unit cube and computes its dot product with [x, y, z]
 #[inline(always)]
-pub unsafe fn grad3d_dot<S: Simd>(
+pub fn grad3d_dot<S: Simd>(
     seed: i32,
     i: S::Vi32,
     j: S::Vi32,
@@ -64,9 +51,9 @@ pub unsafe fn grad3d_dot<S: Simd>(
     z: S::Vf32,
 ) -> S::Vf32 {
     let h = hash3d::<S>(seed, i, j, k);
-    let u = S::blendv_ps(y, x, h.l8);
-    let v = S::blendv_ps(S::blendv_ps(z, x, h.h12_or_14), y, h.l4);
-    let result = S::add_ps(S::xor_ps(u, h.h1), S::xor_ps(v, h.h2));
+    let u = h.l8.blendv(y, x);
+    let v = h.l4.blendv(h.h12_or_14.blendv(z, x), y);
+    let result = (u ^ h.h1) + (v ^ h.h2);
     debug_assert_eq!(
         result[0],
         {
@@ -82,17 +69,17 @@ pub unsafe fn grad3d_dot<S: Simd>(
 ///
 /// This is a separate function because it's slower than `grad3d_dot` and only needed when computing
 /// derivatives.
-pub unsafe fn grad3d<S: Simd>(seed: i32, i: S::Vi32, j: S::Vi32, k: S::Vi32) -> [S::Vf32; 3] {
+pub fn grad3d<S: Simd>(seed: i32, i: S::Vi32, j: S::Vi32, k: S::Vi32) -> [S::Vf32; 3] {
     let h = hash3d::<S>(seed, i, j, k);
 
-    let first = S::set1_ps(1.0) | h.h1;
-    let mut gx = S::and_ps(h.l8, first);
-    let mut gy = S::andnot_ps(h.l8, first);
+    let first = S::Vf32::set1(1.0) | h.h1;
+    let mut gx = h.l8 & first;
+    let mut gy = first.and_not(h.l8);
 
-    let second = S::set1_ps(1.0) | h.h2;
-    gy = S::blendv_ps(gy, second, h.l4);
-    gx = S::blendv_ps(gx, second, S::andnot_ps(h.l4, h.h12_or_14));
-    let gz = S::andnot_ps(h.h12_or_14 | h.l4, second);
+    let second = S::Vf32::set1(1.0) | h.h2;
+    gy = h.l4.blendv(gy, second);
+    gx = h.h12_or_14.and_not(h.l4).blendv(gx, second);
+    let gz = second.and_not(h.h12_or_14 | h.l4);
     debug_assert_eq!(
         gx[0].abs() + gy[0].abs() + gz[0].abs(),
         2.0,
@@ -102,7 +89,7 @@ pub unsafe fn grad3d<S: Simd>(seed: i32, i: S::Vi32, j: S::Vi32, k: S::Vi32) ->
 }
 
 #[inline(always)]
-pub unsafe fn grad4<S: Simd>(
+pub fn grad4<S: Simd>(
     seed: i32,
     hash: S::Vi32,
     x: S::Vf32,
@@ -110,32 +97,19 @@ pub unsafe fn grad4<S: Simd>(
     z: S::Vf32,
     t: S::Vf32,
 ) -> S::Vf32 {
-    let h = S::and_epi32(S::xor_epi32(S::set1_epi32(seed), hash), S::set1_epi32(31));
-    let mut mask = S::castepi32_ps(S::cmpgt_epi32(S::set1_epi32(24), h));
-    let u = S::blendv_ps(y, x, mask);
-    mask = S::castepi32_ps(S::cmpgt_epi32(S::set1_epi32(16), h));
-    let v = S::blendv_ps(z, y, mask);
-    mask = S::castepi32_ps(S::cmpgt_epi32(S::set1_epi32(8), h));
-    let w = S::blendv_ps(t, z, mask);
+    let h = (S::Vi32::set1(seed) ^ hash) & S::Vi32::set1(31);
+    let mut mask = (S::Vi32::set1(24).cmp_gt(h)).bitcast_f32();
+    let u = mask.blendv(y, x);
+    mask = (S::Vi32::set1(16).cmp_gt(h)).bitcast_f32();
+    let v = mask.blendv(z, y);
+    mask = (S::Vi32::set1(8).cmp_gt(h)).bitcast_f32();
+    let w = mask.blendv(t, z);
 
-    let h_and_1 = S::castepi32_ps(S::cmpeq_epi32(
-        S::setzero_epi32(),
-        S::and_epi32(h, S::set1_epi32(1)),
-    ));
-    let h_and_2 = S::castepi32_ps(S::cmpeq_epi32(
-        S::setzero_epi32(),
-        S::and_epi32(h, S::set1_epi32(2)),
-    ));
-    let h_and_4 = S::castepi32_ps(S::cmpeq_epi32(
-        S::setzero_epi32(),
-        S::and_epi32(h, S::set1_epi32(4)),
-    ));
+    let h_and_1 = ((h & S::Vi32::set1(1)).cmp_eq(S::Vi32::zeroes())).bitcast_f32();
+    let h_and_2 = ((h & S::Vi32::set1(2)).cmp_eq(S::Vi32::zeroes())).bitcast_f32();
+    let h_and_4 = ((h & S::Vi32::set1(4)).cmp_eq(S::Vi32::zeroes())).bitcast_f32();
 
-    S::add_ps(
-        S::blendv_ps(S::sub_ps(S::setzero_ps(), u), u, h_and_1),
-        S::add_ps(
-            S::blendv_ps(S::sub_ps(S::setzero_ps(), v), v, h_and_2),
-            S::blendv_ps(S::sub_ps(S::setzero_ps(), w), w, h_and_4),
-        ),
-    )
+    h_and_1.blendv(S::Vf32::zeroes() - u, u)
+        + h_and_2.blendv(S::Vf32::zeroes() - v, v)
+        + h_and_4.blendv(S::Vf32::zeroes() - w, w)
 }
diff --git a/src/noise/gradient_64.rs b/src/noise/gradient_64.rs
index 7f5dafa..044d529 100644
--- a/src/noise/gradient_64.rs
+++ b/src/noise/gradient_64.rs
@@ -1,20 +1,18 @@
 use crate::noise::hash3d_64::hash3d;
-use simdeez::Simd;
+
+use simdeez::prelude::*;
 
 /// Generates a random integer gradient in ±7 inclusive
 ///
 /// This differs from Gustavson's well-known implementation in that gradients can be zero, and the
 /// maximum gradient is 7 rather than 8.
 #[inline(always)]
-pub unsafe fn grad1<S: Simd>(seed: i64, hash: S::Vi64) -> S::Vf64 {
-    let h = S::and_epi64(S::xor_epi64(S::set1_epi64(seed), hash), S::set1_epi64(15));
-    let v = S::cvtepi64_pd(S::and_epi64(h, S::set1_epi64(7)));
+pub fn grad1<S: Simd>(seed: i64, hash: S::Vi64) -> S::Vf64 {
+    let h = (S::Vi64::set1(seed) ^ hash) & S::Vi64::set1(15);
+    let v = (h & S::Vi64::set1(7)).cast_f64();
 
-    let h_and_8 = S::castepi64_pd(S::cmpeq_epi64(
-        S::setzero_epi64(),
-        S::and_epi64(h, S::set1_epi64(8)),
-    ));
-    S::blendv_pd(S::sub_pd(S::setzero_pd(), v), v, h_and_8)
+    let h_and_8 = ((h & S::Vi64::set1(8)).cmp_eq(S::Vi64::zeroes())).cast_f64();
+    h_and_8.blendv(S::Vf64::zeroes() - v, v)
 }
 
 /// Generates a random gradient vector where one component is ±1 and the other is ±2.
@@ -22,38 +20,28 @@ pub unsafe fn grad1<S: Simd>(seed: i64, hash: S::Vi64) -> S::Vf64 {
 /// This differs from Gustavson's gradients by having a constant magnitude, providing results that
 /// are more consistent between directions.
 #[inline(always)]
-pub unsafe fn grad2<S: Simd>(seed: i64, hash: S::Vi64) -> [S::Vf64; 2] {
-    let h = S::and_epi64(S::xor_epi64(hash, S::set1_epi64(seed)), S::set1_epi64(7));
-    let mask = S::castepi64_pd(S::cmpgt_epi64(S::set1_epi64(4), h));
-    let x_magnitude = S::blendv_pd(S::set1_pd(2.0), S::set1_pd(1.0), mask);
-    let y_magnitude = S::blendv_pd(S::set1_pd(1.0), S::set1_pd(2.0), mask);
+pub fn grad2<S: Simd>(seed: i64, hash: S::Vi64) -> [S::Vf64; 2] {
+    let h = (hash ^ S::Vi64::set1(seed)) & S::Vi64::set1(7);
+    let mask = (S::Vi64::set1(4).cmp_gt(h)).cast_f64();
+    let x_magnitude = mask.blendv(S::Vf64::set1(2.0), S::Vf64::set1(1.0));
+    let y_magnitude = mask.blendv(S::Vf64::set1(1.0), S::Vf64::set1(2.0));
 
-    let h_and_1 = S::castepi64_pd(S::cmpeq_epi64(
-        S::setzero_epi64(),
-        S::and_epi64(h, S::set1_epi64(1)),
-    ));
-    let h_and_2 = S::castepi64_pd(S::cmpeq_epi64(
-        S::setzero_epi64(),
-        S::and_epi64(h, S::set1_epi64(2)),
-    ));
+    let h_and_1 = ((h & S::Vi64::set1(1)).cmp_eq(S::Vi64::zeroes())).cast_f64();
+    let h_and_2 = ((h & S::Vi64::set1(2)).cmp_eq(S::Vi64::zeroes())).cast_f64();
 
-    let gx = S::blendv_pd(
-        S::sub_pd(S::setzero_pd(), x_magnitude),
-        x_magnitude,
-        S::blendv_pd(h_and_2, h_and_1, mask),
-    );
-    let gy = S::blendv_pd(
-        S::sub_pd(S::setzero_pd(), y_magnitude),
-        y_magnitude,
-        S::blendv_pd(h_and_1, h_and_2, mask),
-    );
+    let gx = mask
+        .blendv(h_and_2, h_and_1)
+        .blendv(S::Vf64::zeroes() - x_magnitude, x_magnitude);
+    let gy = mask
+        .blendv(h_and_1, h_and_2)
+        .blendv(S::Vf64::zeroes() - y_magnitude, y_magnitude);
     [gx, gy]
 }
 
 /// Generates a random gradient vector from the origin towards the midpoint of an edge of a
 /// double-unit cube and computes its dot product with [x, y, z]
 #[inline(always)]
-pub unsafe fn grad3d_dot<S: Simd>(
+pub fn grad3d_dot<S: Simd>(
     seed: i64,
     i: S::Vi64,
     j: S::Vi64,
@@ -63,9 +51,9 @@ pub unsafe fn grad3d_dot<S: Simd>(
     z: S::Vf64,
 ) -> S::Vf64 {
     let h = hash3d::<S>(seed, i, j, k);
-    let u = S::blendv_pd(y, x, h.l8);
-    let v = S::blendv_pd(S::blendv_pd(z, x, h.h12_or_14), y, h.l4);
-    let result = S::add_pd(S::xor_pd(u, h.h1), S::xor_pd(v, h.h2));
+    let u = h.l8.blendv(y, x);
+    let v = h.l4.blendv(h.h12_or_14.blendv(z, x), y);
+    let result = (u ^ h.h1) + (v ^ h.h2);
     debug_assert_eq!(
         result[0],
         {
@@ -81,17 +69,17 @@ pub unsafe fn grad3d_dot<S: Simd>(
 ///
 /// This is a separate function because it's slower than `grad3d_dot` and only needed when computing
 /// derivatives.
-pub unsafe fn grad3d<S: Simd>(seed: i64, i: S::Vi64, j: S::Vi64, k: S::Vi64) -> [S::Vf64; 3] {
+pub fn grad3d<S: Simd>(seed: i64, i: S::Vi64, j: S::Vi64, k: S::Vi64) -> [S::Vf64; 3] {
     let h = hash3d::<S>(seed, i, j, k);
 
-    let first = S::set1_pd(1.0) | h.h1;
-    let mut gx = S::and_pd(h.l8, first);
-    let mut gy = S::andnot_pd(h.l8, first);
+    let first = S::Vf64::set1(1.0) | h.h1;
+    let mut gx = h.l8 & first;
+    let mut gy = first.and_not(h.l8);
 
-    let second = S::set1_pd(1.0) | h.h2;
-    gy = S::blendv_pd(gy, second, h.l4);
-    gx = S::blendv_pd(gx, second, S::andnot_pd(h.l4, h.h12_or_14));
-    let gz = S::andnot_pd(h.h12_or_14 | h.l4, second);
+    let second = S::Vf64::set1(1.0) | h.h2;
+    gy = h.l4.blendv(gy, second);
+    gx = h.h12_or_14.and_not(h.l4).blendv(gx, second);
+    let gz = second.and_not(h.h12_or_14 | h.l4);
     debug_assert_eq!(
         gx[0].abs() + gy[0].abs() + gz[0].abs(),
         2.0,
@@ -101,7 +89,7 @@ pub unsafe fn grad3d<S: Simd>(seed: i64, i: S::Vi64, j: S::Vi64, k: S::Vi64) ->
 }
 
 #[inline(always)]
-pub unsafe fn grad4<S: Simd>(
+pub fn grad4<S: Simd>(
     seed: i64,
     hash: S::Vi64,
     x: S::Vf64,
@@ -109,32 +97,18 @@ pub unsafe fn grad4<S: Simd>(
     z: S::Vf64,
     t: S::Vf64,
 ) -> S::Vf64 {
-    let h = S::and_epi64(S::xor_epi64(S::set1_epi64(seed), hash), S::set1_epi64(31));
-    let mut mask = S::castepi64_pd(S::cmpgt_epi64(S::set1_epi64(24), h));
-    let u = S::blendv_pd(y, x, mask);
-    mask = S::castepi64_pd(S::cmpgt_epi64(S::set1_epi64(16), h));
-    let v = S::blendv_pd(z, y, mask);
-    mask = S::castepi64_pd(S::cmpgt_epi64(S::set1_epi64(8), h));
-    let w = S::blendv_pd(t, z, mask);
+    let h = (S::Vi64::set1(seed) ^ hash) & S::Vi64::set1(31);
+    let mut mask = (S::Vi64::set1(24).cmp_gt(h)).bitcast_f64();
+    let u = mask.blendv(y, x);
+    mask = (S::Vi64::set1(16).cmp_gt(h)).bitcast_f64();
+    let v = mask.blendv(z, y);
+    mask = (S::Vi64::set1(8).cmp_gt(h)).bitcast_f64();
+    let w = mask.blendv(t, z);
 
-    let h_and_1 = S::castepi64_pd(S::cmpeq_epi64(
-        S::setzero_epi64(),
-        S::and_epi64(h, S::set1_epi64(1)),
-    ));
-    let h_and_2 = S::castepi64_pd(S::cmpeq_epi64(
-        S::setzero_epi64(),
-        S::and_epi64(h, S::set1_epi64(2)),
-    ));
-    let h_and_4 = S::castepi64_pd(S::cmpeq_epi64(
-        S::setzero_epi64(),
-        S::and_epi64(h, S::set1_epi64(4)),
-    ));
+    let h_and_1 = ((h & S::Vi64::set1(1)).cmp_eq(S::Vi64::zeroes())).bitcast_f64();
+    let h_and_2 = ((h & S::Vi64::set1(2)).cmp_eq(S::Vi64::zeroes())).bitcast_f64();
+    let h_and_4 = ((h & S::Vi64::set1(4)).cmp_eq(S::Vi64::zeroes())).bitcast_f64();
 
-    S::add_pd(
-        S::blendv_pd(S::sub_pd(S::setzero_pd(), u), u, h_and_1),
-        S::add_pd(
-            S::blendv_pd(S::sub_pd(S::setzero_pd(), v), v, h_and_2),
-            S::blendv_pd(S::sub_pd(S::setzero_pd(), w), w, h_and_4),
-        ),
-    )
+    h_and_1.blendv(S::Vf64::zeroes() - u, u)
+        + (h_and_2.blendv(S::Vf64::zeroes() - v, v) + h_and_4.blendv(S::Vf64::zeroes() - w, w))
 }
diff --git a/src/noise/hash3d_32.rs b/src/noise/hash3d_32.rs
index d91f790..94bca8e 100644
--- a/src/noise/hash3d_32.rs
+++ b/src/noise/hash3d_32.rs
@@ -1,4 +1,4 @@
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 pub struct Hash3d<S: Simd> {
     // Masks guiding dimension selection
@@ -28,24 +28,21 @@ where
 /// Compute hash values used by `grad3d` and `grad3d_dot`
 
 #[inline(always)]
-pub unsafe fn hash3d<S: Simd>(seed: i32, i: S::Vi32, j: S::Vi32, k: S::Vi32) -> Hash3d<S> {
+pub fn hash3d<S: Simd>(seed: i32, i: S::Vi32, j: S::Vi32, k: S::Vi32) -> Hash3d<S> {
     // It seems that this function is inspired by FastNoise-SIMD and Auburn/FastNoise2Simd
     // https://github.com/jackmott/FastNoise-SIMD/blob/31c4a74d649ef4bc93aaabe4bf94fa81e4c0eadc/FastNoise/FastNoise3d.cpp#L348-L353
     //
-    let mut hash = S::xor_epi32(i, S::set1_epi32(seed));
-    hash = S::xor_epi32(j, hash);
-    hash = S::xor_epi32(k, hash);
-    hash = S::mullo_epi32(
-        S::mullo_epi32(S::mullo_epi32(hash, hash), S::set1_epi32(60493)),
-        hash,
-    );
-    hash = S::xor_epi32(S::srai_epi32(hash, 13), hash);
-    let hasha13 = S::and_epi32(hash, S::set1_epi32(13));
+    let mut hash = i ^ S::Vi32::set1(seed);
+    hash = j ^ hash;
+    hash = k ^ hash;
+    hash = ((hash * hash) * S::Vi32::set1(60493)) * hash;
+    hash = (hash >> 13) ^ hash;
+    let hasha13 = hash & S::Vi32::set1(13);
     Hash3d::new(
-        S::castepi32_ps(S::cmplt_epi32(hasha13, S::set1_epi32(8))),
-        S::castepi32_ps(S::cmplt_epi32(hasha13, S::set1_epi32(2))),
-        S::castepi32_ps(S::cmpeq_epi32(S::set1_epi32(12), hasha13)),
-        S::castepi32_ps(S::slli_epi32(hash, 31)),
-        S::castepi32_ps(S::slli_epi32(S::and_epi32(hash, S::set1_epi32(2)), 30)),
+        (hasha13.cmp_lt(S::Vi32::set1(8))).bitcast_f32(),
+        (hasha13.cmp_lt(S::Vi32::set1(2))).bitcast_f32(),
+        (hasha13).cmp_eq(S::Vi32::set1(12)).bitcast_f32(),
+        (hash << 31).bitcast_f32(),
+        ((hash & S::Vi32::set1(2)) << 30).bitcast_f32(),
     )
 }
diff --git a/src/noise/hash3d_64.rs b/src/noise/hash3d_64.rs
index 9535034..da7d4ba 100644
--- a/src/noise/hash3d_64.rs
+++ b/src/noise/hash3d_64.rs
@@ -1,4 +1,4 @@
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 pub struct Hash3d<S: Simd> {
     // Masks guiding dimension selection
@@ -15,6 +15,7 @@ impl<S> Hash3d<S>
 where
     S: Simd,
 {
+    #[allow(dead_code)]
     pub fn new(l8: S::Vf64, l4: S::Vf64, h12_or_14: S::Vf64, h1: S::Vf64, h2: S::Vf64) -> Self {
         Self {
             l8,
@@ -28,25 +29,27 @@ where
 /// Compute hash values used by `grad3d` and `grad3d_dot`
 
 #[inline(always)]
-pub unsafe fn hash3d<S: Simd>(seed: i64, i: S::Vi64, j: S::Vi64, k: S::Vi64) -> Hash3d<S> {
+#[allow(unused_variables)]
+pub fn hash3d<S: Simd>(seed: i64, i: S::Vi64, j: S::Vi64, k: S::Vi64) -> Hash3d<S> {
     // This 64 bit variant is not implemented.
     // The codeblock below is just the 64 bit SIMD instructions with the 32 bit magic numbers.
     // I don't know what values the Hash3d fields should hold or what magic number are needed for the bit shifts.
     unimplemented!();
-    let mut hash = S::xor_epi64(i, S::set1_epi64(seed));
-    hash = S::xor_epi64(j, hash);
-    hash = S::xor_epi64(k, hash);
-    hash = S::mullo_epi64(
-        S::mullo_epi64(S::mullo_epi64(hash, hash), S::set1_epi64(60493)),
-        hash,
+    /*
+    let mut hash = i ^ S::Vi64::set1(seed);
+    hash = j ^ hash;
+    hash = k ^ hash;
+    hash = (
+        ((hash * hash), S::Vi64::set1(60493)) * hash
     );
-    hash = S::xor_epi64(S::srai_epi64(hash, 13), hash);
-    let hasha13 = S::and_epi64(hash, S::set1_epi64(13));
+    hash = (hash >> 13) ^ hash;
+    let hasha13 = (hash & S::Vi64::set1(13));
     Hash3d::new(
-        S::castepi64_pd(S::cmplt_epi64(hasha13, S::set1_epi64(8))),
-        S::castepi64_pd(S::cmplt_epi64(hasha13, S::set1_epi64(2))),
-        S::castepi64_pd(S::cmpeq_epi64(S::set1_epi64(12), hasha13)),
-        S::castepi64_pd(S::slli_epi64(hash, 31)),
-        S::castepi64_pd(S::slli_epi64(S::and_epi64(hash, S::set1_epi64(2)), 30)),
+        hasha13.cmp_lt(S::Vi64::set1(8)).cast_f64(),
+        hasha13.cmp_lt(S::Vi64::set1(2)).cast_f64(),
+        hasha13.cmp_eq( S::Vi64::set1(12)).cast_f64(),
+        S::slli_epi64(hash, 31).cast_f64(),
+        S::slli_epi64((hash & S::Vi64::set1(2)), 30).cast_f64(),
     )
+    */
 }
diff --git a/src/noise/mod.rs b/src/noise/mod.rs
index 48f4590..f7ab2db 100644
--- a/src/noise/mod.rs
+++ b/src/noise/mod.rs
@@ -13,6 +13,7 @@ mod gradient_32;
 mod gradient_64;
 mod hash3d_32;
 mod hash3d_64;
+pub mod ops;
 pub mod ridge_32;
 pub mod ridge_64;
 pub mod simplex_32;
diff --git a/src/noise/ops.rs b/src/noise/ops.rs
new file mode 100644
index 0000000..46727cd
--- /dev/null
+++ b/src/noise/ops.rs
@@ -0,0 +1,25 @@
+use simdeez::prelude::*;
+
+/// # Safety:
+/// All array indices must be in-bounds.
+#[inline(always)]
+pub unsafe fn gather_32<S: Simd>(arr: &[i32], indices: S::Vi32) -> S::Vi32 {
+    let width = S::Vi32::WIDTH;
+    let mut dst = S::Vi32::zeroes();
+    for i in 0..width {
+        *dst.get_unchecked_mut(i) = *arr.get_unchecked(indices[i] as usize);
+    }
+    dst
+}
+
+/// # Safety:
+/// All array indices must be in-bounds.
+#[inline(always)]
+pub unsafe fn gather_64<S: Simd>(arr: &[i64], indices: S::Vi64) -> S::Vi64 {
+    let width = S::Vi64::WIDTH;
+    let mut dst = S::Vi64::zeroes();
+    for i in 0..width {
+        *dst.get_unchecked_mut(i) = *arr.get_unchecked(indices[i] as usize);
+    }
+    dst
+}
diff --git a/src/noise/ridge_32.rs b/src/noise/ridge_32.rs
index 01eb3f3..2cd1acd 100644
--- a/src/noise/ridge_32.rs
+++ b/src/noise/ridge_32.rs
@@ -1,32 +1,29 @@
 use crate::noise::simplex_32::{simplex_1d, simplex_2d, simplex_3d, simplex_4d};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn ridge_1d<S: Simd>(
+pub fn ridge_1d<S: Simd>(
     mut x: S::Vf32,
     lacunarity: S::Vf32,
     gain: S::Vf32,
     octaves: u8,
     seed: i32,
 ) -> S::Vf32 {
-    let mut amp = S::set1_ps(1.0);
-    let mut result = S::sub_ps(S::set1_ps(1.0), S::abs_ps(simplex_1d::<S>(x, seed)));
+    let mut amp = S::Vf32::set1(1.0);
+    let mut result = S::Vf32::set1(1.0) - simplex_1d::<S>(x, seed).abs();
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lacunarity);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(
-            result,
-            S::sub_ps(S::set1_ps(1.0), S::abs_ps(simplex_1d::<S>(x, seed))),
-        );
+        x = x * lacunarity;
+        amp = amp * gain;
+        result = result + S::Vf32::set1(1.0) - simplex_1d::<S>(x, seed).abs();
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn ridge_2d<S: Simd>(
+pub fn ridge_2d<S: Simd>(
     mut x: S::Vf32,
     mut y: S::Vf32,
     lac: S::Vf32,
@@ -34,24 +31,22 @@ pub unsafe fn ridge_2d<S: Simd>(
     octaves: u8,
     seed: i32,
 ) -> S::Vf32 {
-    let mut result = S::sub_ps(S::set1_ps(1.0), S::abs_ps(simplex_2d::<S>(x, y, seed)));
-    let mut amp = S::set1_ps(1.0);
+    let mut result = S::Vf32::set1(1.0) - simplex_2d::<S>(x, y, seed).abs();
+    let mut amp = S::Vf32::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lac);
-        y = S::mul_ps(y, lac);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(
-            result,
-            S::fnmadd_ps(S::abs_ps(simplex_2d::<S>(x, y, seed)), amp, S::set1_ps(1.0)),
-        );
+        x = x * lac;
+        y = y * lac;
+        amp = amp * gain;
+        result = result
+            + S::Vf32::neg_mul_add(simplex_2d::<S>(x, y, seed).abs(), amp, S::Vf32::set1(1.0));
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn ridge_3d<S: Simd>(
+pub fn ridge_3d<S: Simd>(
     mut x: S::Vf32,
     mut y: S::Vf32,
     mut z: S::Vf32,
@@ -60,29 +55,27 @@ pub unsafe fn ridge_3d<S: Simd>(
     octaves: u8,
     seed: i32,
 ) -> S::Vf32 {
-    let mut result = S::sub_ps(S::set1_ps(1.0), S::abs_ps(simplex_3d::<S>(x, y, z, seed)));
-    let mut amp = S::set1_ps(1.0);
+    let mut result = S::Vf32::set1(1.0) - simplex_3d::<S>(x, y, z, seed).abs();
+    let mut amp = S::Vf32::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lac);
-        y = S::mul_ps(y, lac);
-        z = S::mul_ps(z, lac);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(
-            result,
-            S::fnmadd_ps(
-                S::abs_ps(simplex_3d::<S>(x, y, z, seed)),
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        amp = amp * gain;
+        result = result
+            + S::Vf32::neg_mul_add(
+                simplex_3d::<S>(x, y, z, seed).abs(),
                 amp,
-                S::set1_ps(1.0),
-            ),
-        );
+                S::Vf32::set1(1.0),
+            );
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn ridge_4d<S: Simd>(
+pub fn ridge_4d<S: Simd>(
     mut x: S::Vf32,
     mut y: S::Vf32,
     mut z: S::Vf32,
@@ -92,25 +85,16 @@ pub unsafe fn ridge_4d<S: Simd>(
     octaves: u8,
     seed: i32,
 ) -> S::Vf32 {
-    let mut result = S::sub_ps(
-        S::set1_ps(1.0),
-        S::abs_ps(simplex_4d::<S>(x, y, z, w, seed)),
-    );
-    let mut amp = S::set1_ps(1.0);
+    let mut result = S::Vf32::set1(1.0) - simplex_4d::<S>(x, y, z, w, seed).abs();
+    let mut amp = S::Vf32::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lac);
-        y = S::mul_ps(y, lac);
-        z = S::mul_ps(z, lac);
-        w = S::mul_ps(w, lac);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(
-            result,
-            S::sub_ps(
-                S::set1_ps(1.0),
-                S::abs_ps(S::mul_ps(simplex_4d::<S>(x, y, z, w, seed), amp)),
-            ),
-        );
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        w = w * lac;
+        amp = amp * gain;
+        result = result + S::Vf32::set1(1.0) - (simplex_4d::<S>(x, y, z, w, seed) * amp).abs();
     }
 
     result
diff --git a/src/noise/ridge_64.rs b/src/noise/ridge_64.rs
index 342c72c..716cf7d 100644
--- a/src/noise/ridge_64.rs
+++ b/src/noise/ridge_64.rs
@@ -1,32 +1,29 @@
 use crate::noise::simplex_64::{simplex_1d, simplex_2d, simplex_3d, simplex_4d};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn ridge_1d<S: Simd>(
+pub fn ridge_1d<S: Simd>(
     mut x: S::Vf64,
     lacunarity: S::Vf64,
     gain: S::Vf64,
     octaves: u8,
     seed: i64,
 ) -> S::Vf64 {
-    let mut amp = S::set1_pd(1.0);
-    let mut result = S::sub_pd(S::set1_pd(1.0), S::abs_pd(simplex_1d::<S>(x, seed)));
+    let mut amp = S::Vf64::set1(1.0);
+    let mut result = S::Vf64::set1(1.0) - simplex_1d::<S>(x, seed).abs();
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lacunarity);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(
-            result,
-            S::sub_pd(S::set1_pd(1.0), S::abs_pd(simplex_1d::<S>(x, seed))),
-        );
+        x = x * lacunarity;
+        amp = amp * gain;
+        result = result + S::Vf64::set1(1.0) - simplex_1d::<S>(x, seed).abs();
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn ridge_2d<S: Simd>(
+pub fn ridge_2d<S: Simd>(
     mut x: S::Vf64,
     mut y: S::Vf64,
     lac: S::Vf64,
@@ -34,24 +31,22 @@ pub unsafe fn ridge_2d<S: Simd>(
     octaves: u8,
     seed: i64,
 ) -> S::Vf64 {
-    let mut result = S::sub_pd(S::set1_pd(1.0), S::abs_pd(simplex_2d::<S>(x, y, seed)));
-    let mut amp = S::set1_pd(1.0);
+    let mut result = S::Vf64::set1(1.0) - simplex_2d::<S>(x, y, seed).abs();
+    let mut amp = S::Vf64::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lac);
-        y = S::mul_pd(y, lac);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(
-            result,
-            S::fnmadd_pd(S::abs_pd(simplex_2d::<S>(x, y, seed)), amp, S::set1_pd(1.0)),
-        );
+        x = x * lac;
+        y = y * lac;
+        amp = amp * gain;
+        result = result
+            + S::Vf64::neg_mul_add(simplex_2d::<S>(x, y, seed).abs(), amp, S::Vf64::set1(1.0));
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn ridge_3d<S: Simd>(
+pub fn ridge_3d<S: Simd>(
     mut x: S::Vf64,
     mut y: S::Vf64,
     mut z: S::Vf64,
@@ -60,29 +55,27 @@ pub unsafe fn ridge_3d<S: Simd>(
     octaves: u8,
     seed: i64,
 ) -> S::Vf64 {
-    let mut result = S::sub_pd(S::set1_pd(1.0), S::abs_pd(simplex_3d::<S>(x, y, z, seed)));
-    let mut amp = S::set1_pd(1.0);
+    let mut result = S::Vf64::set1(1.0) - simplex_3d::<S>(x, y, z, seed).abs();
+    let mut amp = S::Vf64::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lac);
-        y = S::mul_pd(y, lac);
-        z = S::mul_pd(z, lac);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(
-            result,
-            S::fnmadd_pd(
-                S::abs_pd(simplex_3d::<S>(x, y, z, seed)),
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        amp = amp * gain;
+        result = result
+            + S::Vf64::neg_mul_add(
+                simplex_3d::<S>(x, y, z, seed).abs(),
                 amp,
-                S::set1_pd(1.0),
-            ),
-        );
+                S::Vf64::set1(1.0),
+            );
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn ridge_4d<S: Simd>(
+pub fn ridge_4d<S: Simd>(
     mut x: S::Vf64,
     mut y: S::Vf64,
     mut z: S::Vf64,
@@ -92,25 +85,16 @@ pub unsafe fn ridge_4d<S: Simd>(
     octaves: u8,
     seed: i64,
 ) -> S::Vf64 {
-    let mut result = S::sub_pd(
-        S::set1_pd(1.0),
-        S::abs_pd(simplex_4d::<S>(x, y, z, w, seed)),
-    );
-    let mut amp = S::set1_pd(1.0);
+    let mut result = S::Vf64::set1(1.0) - simplex_4d::<S>(x, y, z, w, seed).abs();
+    let mut amp = S::Vf64::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lac);
-        y = S::mul_pd(y, lac);
-        z = S::mul_pd(z, lac);
-        w = S::mul_pd(w, lac);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(
-            result,
-            S::sub_pd(
-                S::set1_pd(1.0),
-                S::abs_pd(S::mul_pd(simplex_4d::<S>(x, y, z, w, seed), amp)),
-            ),
-        );
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        w = w * lac;
+        amp = amp * gain;
+        result = result + S::Vf64::set1(1.0) - (simplex_4d::<S>(x, y, z, w, seed) * amp).abs();
     }
 
     result
diff --git a/src/noise/simplex_32.rs b/src/noise/simplex_32.rs
index 4343409..77a094c 100644
--- a/src/noise/simplex_32.rs
+++ b/src/noise/simplex_32.rs
@@ -5,8 +5,9 @@
 
 use crate::noise::cellular_32::{X_PRIME_32, Y_PRIME_32, Z_PRIME_32};
 use crate::noise::gradient_32::{grad1, grad2, grad3d, grad3d_dot, grad4};
+use crate::noise::ops::gather_32;
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 use std::f32;
 use std::f64;
@@ -40,7 +41,7 @@ pub const G34_64: f64 = 3.0 * G4_64;
 const G44_32: f32 = 4.0 * G4_32;
 pub const G44_64: f64 = 4.0 * G4_64;
 
-const PERM: [i32; 512] = [
+static PERM: [i32; 512] = [
     151, 160, 137, 91, 90, 15, 131, 13, 201, 95, 96, 53, 194, 233, 7, 225, 140, 36, 103, 30, 69,
     142, 8, 99, 37, 240, 21, 10, 23, 190, 6, 148, 247, 120, 234, 75, 0, 26, 197, 62, 94, 252, 219,
     203, 117, 35, 11, 32, 57, 177, 33, 88, 237, 149, 56, 87, 174, 20, 125, 136, 171, 168, 68, 175,
@@ -68,39 +69,51 @@ const PERM: [i32; 512] = [
     222, 114, 67, 29, 24, 72, 243, 141, 128, 195, 78, 66, 215, 61, 156, 180,
 ];
 
+#[inline(always)]
+fn assert_in_perm_range<S: Simd>(values: S::Vi32) {
+    debug_assert!(values
+        .cmp_lt(S::Vi32::set1(PERM.len() as i32))
+        .iter()
+        .all(|is_less_than| is_less_than != 0));
+}
+
 /// Like `simplex_1d`, but also computes the derivative
 #[inline(always)]
-pub unsafe fn simplex_1d_deriv<S: Simd>(x: S::Vf32, seed: i32) -> (S::Vf32, S::Vf32) {
+pub fn simplex_1d_deriv<S: Simd>(x: S::Vf32, seed: i32) -> (S::Vf32, S::Vf32) {
     // Gradients are selected deterministically based on the whole part of `x`
-    let ips = S::fast_floor_ps(x);
-    let mut i0 = S::cvtps_epi32(ips);
-    let i1 = S::and_epi32(S::add_epi32(i0, S::set1_epi32(1)), S::set1_epi32(0xff));
+    let ips = x.fast_floor();
+    let mut i0 = ips.cast_i32();
+    let i1 = (i0 + S::Vi32::set1(1)) & S::Vi32::set1(0xff);
 
     // the fractional part of x, i.e. the distance to the left gradient node. 0 ≤ x0 < 1.
-    let x0 = S::sub_ps(x, ips);
+    let x0 = x - ips;
     // signed distance to the right gradient node
-    let x1 = S::sub_ps(x0, S::set1_ps(1.0));
-
-    i0 = S::and_epi32(i0, S::set1_epi32(0xff));
-    let gi0 = S::i32gather_epi32(&PERM, i0);
-    let gi1 = S::i32gather_epi32(&PERM, i1);
+    let x1 = x0 - S::Vf32::set1(1.0);
+
+    i0 = i0 & S::Vi32::set1(0xff);
+    let (gi0, gi1) = unsafe {
+        // Safety: We just masked i0 and i1 with 0xff, so they're in 0..255.
+        let gi0 = gather_32::<S>(&PERM, i0);
+        let gi1 = gather_32::<S>(&PERM, i1);
+        (gi0, gi1)
+    };
 
     // Compute the contribution from the first gradient
-    let x20 = S::mul_ps(x0, x0); // x^2_0
-    let t0 = S::sub_ps(S::set1_ps(1.0), x20); // t_0
-    let t20 = S::mul_ps(t0, t0); // t^2_0
-    let t40 = S::mul_ps(t20, t20); // t^4_0
+    let x20 = x0 * x0; // x^2_0
+    let t0 = S::Vf32::set1(1.0) - x20; // t_0
+    let t20 = t0 * t0; // t^2_0
+    let t40 = t20 * t20; // t^4_0
     let gx0 = grad1::<S>(seed, gi0);
-    let n0 = S::mul_ps(t40, gx0 * x0);
+    let n0 = t40 * gx0 * x0;
     // n0 = (1 - x0^2)^4 * x0 * grad
 
     // Compute the contribution from the second gradient
-    let x21 = S::mul_ps(x1, x1); // x^2_1
-    let t1 = S::sub_ps(S::set1_ps(1.0), x21); // t_1
-    let t21 = S::mul_ps(t1, t1); // t^2_1
-    let t41 = S::mul_ps(t21, t21); // t^4_1
+    let x21 = x1 * x1; // x^2_1
+    let t1 = S::Vf32::set1(1.0) - x21; // t_1
+    let t21 = t1 * t1; // t^2_1
+    let t41 = t21 * t21; // t^4_1
     let gx1 = grad1::<S>(seed, gi1);
-    let n1 = S::mul_ps(t41, gx1 * x1);
+    let n1 = t41 * gx1 * x1;
 
     // n0 + n1 =
     //    grad0 * x0 * (1 - x0^2)^4
@@ -115,10 +128,11 @@ pub unsafe fn simplex_1d_deriv<S: Simd>(x: S::Vf32, seed: i32) -> (S::Vf32, S::V
     // allowing us to scale into [-1, 1]
     const SCALE: f32 = 256.0 / (81.0 * 7.0);
 
-    let value = S::add_ps(n0, n1) * S::set1_ps(SCALE);
-    let derivative =
-        ((t20 * t0 * gx0 * x20 + t21 * t1 * gx1 * x21) * S::set1_ps(-8.0) + t40 * gx0 + t41 * gx1)
-            * S::set1_ps(SCALE);
+    let value = (n0 + n1) * S::Vf32::set1(SCALE);
+    let derivative = ((t20 * t0 * gx0 * x20 + t21 * t1 * gx1 * x21) * S::Vf32::set1(-8.0)
+        + t40 * gx0
+        + t41 * gx1)
+        * S::Vf32::set1(SCALE);
     (value, derivative)
 }
 
@@ -126,7 +140,7 @@ pub unsafe fn simplex_1d_deriv<S: Simd>(x: S::Vf32, seed: i32) -> (S::Vf32, S::V
 ///
 /// Produces a value -1 ≤ n ≤ 1.
 #[inline(always)]
-pub unsafe fn simplex_1d<S: Simd>(x: S::Vf32, seed: i32) -> S::Vf32 {
+pub fn simplex_1d<S: Simd>(x: S::Vf32, seed: i32) -> S::Vf32 {
     simplex_1d_deriv::<S>(x, seed).0
 }
 
@@ -134,81 +148,77 @@ pub unsafe fn simplex_1d<S: Simd>(x: S::Vf32, seed: i32) -> S::Vf32 {
 ///
 /// Produces a value -1 ≤ n ≤ 1.
 #[inline(always)]
-pub unsafe fn simplex_2d<S: Simd>(x: S::Vf32, y: S::Vf32, seed: i32) -> S::Vf32 {
+pub fn simplex_2d<S: Simd>(x: S::Vf32, y: S::Vf32, seed: i32) -> S::Vf32 {
     simplex_2d_deriv::<S>(x, y, seed).0
 }
 
 /// Like `simplex_2d`, but also computes the derivative
 #[inline(always)]
-pub unsafe fn simplex_2d_deriv<S: Simd>(
-    x: S::Vf32,
-    y: S::Vf32,
-    seed: i32,
-) -> (S::Vf32, [S::Vf32; 2]) {
+pub fn simplex_2d_deriv<S: Simd>(x: S::Vf32, y: S::Vf32, seed: i32) -> (S::Vf32, [S::Vf32; 2]) {
     // Skew to distort simplexes with side length sqrt(2)/sqrt(3) until they make up
     // squares
-    let s = S::mul_ps(S::set1_ps(F2_32), S::add_ps(x, y));
-    let ips = S::floor_ps(S::add_ps(x, s));
-    let jps = S::floor_ps(S::add_ps(y, s));
+    let s = S::Vf32::set1(F2_32) * (x + y);
+    let ips = (x + s).floor();
+    let jps = (y + s).floor();
 
     // Integer coordinates for the base vertex of the triangle
-    let i = S::cvtps_epi32(ips);
-    let j = S::cvtps_epi32(jps);
+    let i = ips.cast_i32();
+    let j = jps.cast_i32();
 
-    let t = S::mul_ps(S::cvtepi32_ps(S::add_epi32(i, j)), S::set1_ps(G2_32));
+    let t = (i + j).cast_f32() * S::Vf32::set1(G2_32);
 
     // Unskewed distances to the first point of the enclosing simplex
-    let x0 = S::sub_ps(x, S::sub_ps(ips, t));
-    let y0 = S::sub_ps(y, S::sub_ps(jps, t));
+    let x0 = x - (ips - t);
+    let y0 = y - (jps - t);
 
-    let i1 = S::castps_epi32(S::cmpge_ps(x0, y0));
+    let i1 = (x0.cmp_gte(y0)).bitcast_i32();
 
-    let j1 = S::castps_epi32(S::cmpgt_ps(y0, x0));
+    let j1 = (y0.cmp_gt(x0)).bitcast_i32();
 
     // Distances to the second and third points of the enclosing simplex
-    let x1 = S::add_ps(S::add_ps(x0, S::cvtepi32_ps(i1)), S::set1_ps(G2_32));
-    let y1 = S::add_ps(S::add_ps(y0, S::cvtepi32_ps(j1)), S::set1_ps(G2_32));
-    let x2 = S::add_ps(S::add_ps(x0, S::set1_ps(-1.0)), S::set1_ps(G22_32));
-    let y2 = S::add_ps(S::add_ps(y0, S::set1_ps(-1.0)), S::set1_ps(G22_32));
-
-    let ii = S::and_epi32(i, S::set1_epi32(0xff));
-    let jj = S::and_epi32(j, S::set1_epi32(0xff));
-
-    let gi0 = S::i32gather_epi32(&PERM, S::add_epi32(ii, S::i32gather_epi32(&PERM, jj)));
-
-    let gi1 = S::i32gather_epi32(
-        &PERM,
-        S::add_epi32(
-            S::sub_epi32(ii, i1),
-            S::i32gather_epi32(&PERM, S::sub_epi32(jj, j1)),
-        ),
-    );
-
-    let gi2 = S::i32gather_epi32(
-        &PERM,
-        S::add_epi32(
-            S::sub_epi32(ii, S::set1_epi32(-1)),
-            S::i32gather_epi32(&PERM, S::sub_epi32(jj, S::set1_epi32(-1))),
-        ),
-    );
+    let x1 = (x0 + i1.cast_f32()) + S::Vf32::set1(G2_32);
+    let y1 = (y0 + j1.cast_f32()) + S::Vf32::set1(G2_32);
+    let x2 = (x0 + S::Vf32::set1(-1.0)) + S::Vf32::set1(G22_32);
+    let y2 = (y0 + S::Vf32::set1(-1.0)) + S::Vf32::set1(G22_32);
+
+    let ii = i & S::Vi32::set1(0xff);
+    let jj = j & S::Vi32::set1(0xff);
+
+    let (gi0, gi1, gi2) = unsafe {
+        assert_in_perm_range::<S>(ii);
+        assert_in_perm_range::<S>(jj);
+        assert_in_perm_range::<S>(ii - i1);
+        assert_in_perm_range::<S>(jj - j1);
+        assert_in_perm_range::<S>(ii + 1);
+        assert_in_perm_range::<S>(jj + 1);
+
+        let gi0 = gather_32::<S>(&PERM, ii + gather_32::<S>(&PERM, jj));
+        let gi1 = gather_32::<S>(&PERM, (ii - i1) + gather_32::<S>(&PERM, jj - j1));
+        let gi2 = gather_32::<S>(
+            &PERM,
+            (ii - S::Vi32::set1(-1)) + gather_32::<S>(&PERM, jj - S::Vi32::set1(-1)),
+        );
+
+        (gi0, gi1, gi2)
+    };
 
     // Weights associated with the gradients at each corner
     // These FMA operations are equivalent to: let t = 0.5 - x*x - y*y
-    let mut t0 = S::fnmadd_ps(y0, y0, S::fnmadd_ps(x0, x0, S::set1_ps(0.5)));
-    let mut t1 = S::fnmadd_ps(y1, y1, S::fnmadd_ps(x1, x1, S::set1_ps(0.5)));
-    let mut t2 = S::fnmadd_ps(y2, y2, S::fnmadd_ps(x2, x2, S::set1_ps(0.5)));
+    let mut t0 = S::Vf32::neg_mul_add(y0, y0, S::Vf32::neg_mul_add(x0, x0, S::Vf32::set1(0.5)));
+    let mut t1 = S::Vf32::neg_mul_add(y1, y1, S::Vf32::neg_mul_add(x1, x1, S::Vf32::set1(0.5)));
+    let mut t2 = S::Vf32::neg_mul_add(y2, y2, S::Vf32::neg_mul_add(x2, x2, S::Vf32::set1(0.5)));
 
     // Zero out negative weights
-    t0 &= S::cmpge_ps(t0, S::setzero_ps());
-    t1 &= S::cmpge_ps(t1, S::setzero_ps());
-    t2 &= S::cmpge_ps(t2, S::setzero_ps());
+    t0 &= t0.cmp_gte(S::Vf32::zeroes());
+    t1 &= t1.cmp_gte(S::Vf32::zeroes());
+    t2 &= t2.cmp_gte(S::Vf32::zeroes());
 
-    let t20 = S::mul_ps(t0, t0);
-    let t40 = S::mul_ps(t20, t20);
-    let t21 = S::mul_ps(t1, t1);
-    let t41 = S::mul_ps(t21, t21);
-    let t22 = S::mul_ps(t2, t2);
-    let t42 = S::mul_ps(t22, t22);
+    let t20 = t0 * t0;
+    let t40 = t20 * t20;
+    let t21 = t1 * t1;
+    let t41 = t21 * t21;
+    let t22 = t2 * t2;
+    let t42 = t22 * t22;
 
     let [gx0, gy0] = grad2::<S>(seed, gi0);
     let g0 = gx0 * x0 + gy0 * y0;
@@ -221,8 +231,8 @@ pub unsafe fn simplex_2d_deriv<S: Simd>(
     let n2 = t42 * g2;
 
     // Scaling factor found by numerical approximation
-    let scale = S::set1_ps(45.26450774985561631259);
-    let value = S::add_ps(n0, S::add_ps(n1, n2)) * scale;
+    let scale = S::Vf32::set1(45.26450774985561631259);
+    let value = (n0 + (n1 + n2)) * scale;
     let derivative = {
         let temp0 = t20 * t0 * g0;
         let mut dnoise_dx = temp0 * x0;
@@ -233,8 +243,8 @@ pub unsafe fn simplex_2d_deriv<S: Simd>(
         let temp2 = t22 * t2 * g2;
         dnoise_dx += temp2 * x2;
         dnoise_dy += temp2 * y2;
-        dnoise_dx *= S::set1_ps(-8.0);
-        dnoise_dy *= S::set1_ps(-8.0);
+        dnoise_dx *= S::Vf32::set1(-8.0);
+        dnoise_dy *= S::Vf32::set1(-8.0);
         dnoise_dx += t40 * gx0 + t41 * gx1 + t42 * gx2;
         dnoise_dy += t40 * gy0 + t41 * gy1 + t42 * gy2;
         dnoise_dx *= scale;
@@ -248,98 +258,74 @@ pub unsafe fn simplex_2d_deriv<S: Simd>(
 ///
 /// Produces a value -1 ≤ n ≤ 1.
 #[inline(always)]
-pub unsafe fn simplex_3d<S: Simd>(x: S::Vf32, y: S::Vf32, z: S::Vf32, seed: i32) -> S::Vf32 {
+pub fn simplex_3d<S: Simd>(x: S::Vf32, y: S::Vf32, z: S::Vf32, seed: i32) -> S::Vf32 {
     simplex_3d_deriv::<S>(x, y, z, seed).0
 }
 
 /// Like `simplex_3d`, but also computes the derivative
 #[inline(always)]
-pub unsafe fn simplex_3d_deriv<S: Simd>(
+pub fn simplex_3d_deriv<S: Simd>(
     x: S::Vf32,
     y: S::Vf32,
     z: S::Vf32,
     seed: i32,
 ) -> (S::Vf32, [S::Vf32; 3]) {
     // Find skewed simplex grid coordinates associated with the input coordinates
-    let f = S::mul_ps(S::set1_ps(F3_32), S::add_ps(S::add_ps(x, y), z));
-    let mut x0 = S::fast_floor_ps(S::add_ps(x, f));
-    let mut y0 = S::fast_floor_ps(S::add_ps(y, f));
-    let mut z0 = S::fast_floor_ps(S::add_ps(z, f));
+    let f = S::Vf32::set1(F3_32) * ((x + y) + z);
+    let mut x0 = (x + f).fast_floor();
+    let mut y0 = (y + f).fast_floor();
+    let mut z0 = (z + f).fast_floor();
 
     // Integer grid coordinates
-    let i = S::mullo_epi32(S::cvtps_epi32(x0), S::set1_epi32(X_PRIME_32));
-    let j = S::mullo_epi32(S::cvtps_epi32(y0), S::set1_epi32(Y_PRIME_32));
-    let k = S::mullo_epi32(S::cvtps_epi32(z0), S::set1_epi32(Z_PRIME_32));
+    let i = x0.cast_i32() * S::Vi32::set1(X_PRIME_32);
+    let j = y0.cast_i32() * S::Vi32::set1(Y_PRIME_32);
+    let k = z0.cast_i32() * S::Vi32::set1(Z_PRIME_32);
 
     // Compute distance from first simplex vertex to input coordinates
-    let g = S::mul_ps(S::set1_ps(G3_32), S::add_ps(S::add_ps(x0, y0), z0));
-    x0 = S::sub_ps(x, S::sub_ps(x0, g));
-    y0 = S::sub_ps(y, S::sub_ps(y0, g));
-    z0 = S::sub_ps(z, S::sub_ps(z0, g));
+    let g = S::Vf32::set1(G3_32) * ((x0 + y0) + z0);
+    x0 = x - (x0 - g);
+    y0 = y - (y0 - g);
+    z0 = z - (z0 - g);
 
-    let x0_ge_y0 = S::cmpge_ps(x0, y0);
-    let y0_ge_z0 = S::cmpge_ps(y0, z0);
-    let x0_ge_z0 = S::cmpge_ps(x0, z0);
+    let x0_ge_y0 = x0.cmp_gte(y0);
+    let y0_ge_z0 = y0.cmp_gte(z0);
+    let x0_ge_z0 = x0.cmp_gte(z0);
 
     let i1 = x0_ge_y0 & x0_ge_z0;
-    let j1 = S::andnot_ps(x0_ge_y0, y0_ge_z0);
-    let k1 = S::andnot_ps(x0_ge_z0, !y0_ge_z0);
+    let j1 = y0_ge_z0.and_not(x0_ge_y0);
+    let k1 = (!y0_ge_z0).and_not(x0_ge_z0);
 
     let i2 = x0_ge_y0 | x0_ge_z0;
     let j2 = (!x0_ge_y0) | y0_ge_z0;
     let k2 = !(x0_ge_z0 & y0_ge_z0);
 
     // Compute distances from remaining simplex vertices to input coordinates
-    let x1 = S::add_ps(S::sub_ps(x0, i1 & S::set1_ps(1.0)), S::set1_ps(G3_32));
-    let y1 = S::add_ps(S::sub_ps(y0, j1 & S::set1_ps(1.0)), S::set1_ps(G3_32));
-    let z1 = S::add_ps(S::sub_ps(z0, k1 & S::set1_ps(1.0)), S::set1_ps(G3_32));
+    let x1 = x0 - (i1 & S::Vf32::set1(1.0)) + S::Vf32::set1(G3_32);
+    let y1 = y0 - (j1 & S::Vf32::set1(1.0)) + S::Vf32::set1(G3_32);
+    let z1 = z0 - (k1 & S::Vf32::set1(1.0)) + S::Vf32::set1(G3_32);
 
-    let x2 = S::add_ps(S::sub_ps(x0, i2 & S::set1_ps(1.0)), S::set1_ps(F3_32));
-    let y2 = S::add_ps(S::sub_ps(y0, j2 & S::set1_ps(1.0)), S::set1_ps(F3_32));
-    let z2 = S::add_ps(S::sub_ps(z0, k2 & S::set1_ps(1.0)), S::set1_ps(F3_32));
+    let x2 = x0 - (i2 & S::Vf32::set1(1.0)) + S::Vf32::set1(F3_32);
+    let y2 = y0 - (j2 & S::Vf32::set1(1.0)) + S::Vf32::set1(F3_32);
+    let z2 = z0 - (k2 & S::Vf32::set1(1.0)) + S::Vf32::set1(F3_32);
 
-    let x3 = S::add_ps(x0, S::set1_ps(G33_32));
-    let y3 = S::add_ps(y0, S::set1_ps(G33_32));
-    let z3 = S::add_ps(z0, S::set1_ps(G33_32));
+    let x3 = x0 + S::Vf32::set1(G33_32);
+    let y3 = y0 + S::Vf32::set1(G33_32);
+    let z3 = z0 + S::Vf32::set1(G33_32);
 
     // Compute base weight factors associated with each vertex, `0.6 - v . v` where v is the
     // distance to the vertex. Strictly the constant should be 0.5, but 0.6 is thought by Gustavson
     // to give visually better results at the cost of subtle discontinuities.
     //#define SIMDf_NMUL_ADD(a,b,c) = SIMDf_SUB(c, SIMDf_MUL(a,b)
-    let mut t0 = S::sub_ps(
-        S::sub_ps(
-            S::sub_ps(S::set1_ps(0.6), S::mul_ps(x0, x0)),
-            S::mul_ps(y0, y0),
-        ),
-        S::mul_ps(z0, z0),
-    );
-    let mut t1 = S::sub_ps(
-        S::sub_ps(
-            S::sub_ps(S::set1_ps(0.6), S::mul_ps(x1, x1)),
-            S::mul_ps(y1, y1),
-        ),
-        S::mul_ps(z1, z1),
-    );
-    let mut t2 = S::sub_ps(
-        S::sub_ps(
-            S::sub_ps(S::set1_ps(0.6), S::mul_ps(x2, x2)),
-            S::mul_ps(y2, y2),
-        ),
-        S::mul_ps(z2, z2),
-    );
-    let mut t3 = S::sub_ps(
-        S::sub_ps(
-            S::sub_ps(S::set1_ps(0.6), S::mul_ps(x3, x3)),
-            S::mul_ps(y3, y3),
-        ),
-        S::mul_ps(z3, z3),
-    );
+    let mut t0 = S::Vf32::set1(0.6) - (x0 * x0) - (y0 * y0) - (z0 * z0);
+    let mut t1 = S::Vf32::set1(0.6) - (x1 * x1) - (y1 * y1) - (z1 * z1);
+    let mut t2 = S::Vf32::set1(0.6) - (x2 * x2) - (y2 * y2) - (z2 * z2);
+    let mut t3 = S::Vf32::set1(0.6) - (x3 * x3) - (y3 * y3) - (z3 * z3);
 
     // Zero out negative weights
-    t0 &= S::cmpge_ps(t0, S::setzero_ps());
-    t1 &= S::cmpge_ps(t1, S::setzero_ps());
-    t2 &= S::cmpge_ps(t2, S::setzero_ps());
-    t3 &= S::cmpge_ps(t3, S::setzero_ps());
+    t0 &= t0.cmp_gte(S::Vf32::zeroes());
+    t1 &= t1.cmp_gte(S::Vf32::zeroes());
+    t2 &= t2.cmp_gte(S::Vf32::zeroes());
+    t3 &= t3.cmp_gte(S::Vf32::zeroes());
 
     // Square each weight
     let t20 = t0 * t0;
@@ -359,50 +345,32 @@ pub unsafe fn simplex_3d_deriv<S: Simd>(
     let g0 = grad3d_dot::<S>(seed, i, j, k, x0, y0, z0);
     let v0 = t40 * g0;
 
-    let v1x = S::add_epi32(
-        i,
-        S::and_epi32(S::castps_epi32(i1), S::set1_epi32(X_PRIME_32)),
-    );
-    let v1y = S::add_epi32(
-        j,
-        S::and_epi32(S::castps_epi32(j1), S::set1_epi32(Y_PRIME_32)),
-    );
-    let v1z = S::add_epi32(
-        k,
-        S::and_epi32(S::castps_epi32(k1), S::set1_epi32(Z_PRIME_32)),
-    );
+    let v1x = i + (i1.bitcast_i32() & S::Vi32::set1(X_PRIME_32));
+    let v1y = j + (j1.bitcast_i32() & S::Vi32::set1(Y_PRIME_32));
+    let v1z = k + (k1.bitcast_i32() & S::Vi32::set1(Z_PRIME_32));
     let g1 = grad3d_dot::<S>(seed, v1x, v1y, v1z, x1, y1, z1);
     let v1 = t41 * g1;
 
-    let v2x = S::add_epi32(
-        i,
-        S::and_epi32(S::castps_epi32(i2), S::set1_epi32(X_PRIME_32)),
-    );
-    let v2y = S::add_epi32(
-        j,
-        S::and_epi32(S::castps_epi32(j2), S::set1_epi32(Y_PRIME_32)),
-    );
-    let v2z = S::add_epi32(
-        k,
-        S::and_epi32(S::castps_epi32(k2), S::set1_epi32(Z_PRIME_32)),
-    );
+    let v2x = i + (i2.bitcast_i32() & S::Vi32::set1(X_PRIME_32));
+    let v2y = j + (j2.bitcast_i32() & S::Vi32::set1(Y_PRIME_32));
+    let v2z = k + (k2.bitcast_i32() & S::Vi32::set1(Z_PRIME_32));
     let g2 = grad3d_dot::<S>(seed, v2x, v2y, v2z, x2, y2, z2);
     let v2 = t42 * g2;
 
     //SIMDf v3 = SIMDf_MASK(n3, SIMDf_MUL(SIMDf_MUL(t3, t3), FUNC(GradCoord)(seed, SIMDi_ADD(i, SIMDi_NUM(xPrime)), SIMDi_ADD(j, SIMDi_NUM(yPrime)), SIMDi_ADD(k, SIMDi_NUM(zPrime)), x3, y3, z3)));
-    let v3x = S::add_epi32(i, S::set1_epi32(X_PRIME_32));
-    let v3y = S::add_epi32(j, S::set1_epi32(Y_PRIME_32));
-    let v3z = S::add_epi32(k, S::set1_epi32(Z_PRIME_32));
+    let v3x = i + S::Vi32::set1(X_PRIME_32);
+    let v3y = j + S::Vi32::set1(Y_PRIME_32);
+    let v3z = k + S::Vi32::set1(Z_PRIME_32);
     //define SIMDf_MASK(m,a) SIMDf_AND(SIMDf_CAST_TO_FLOAT(m),a)
     let g3 = grad3d_dot::<S>(seed, v3x, v3y, v3z, x3, y3, z3);
     let v3 = t43 * g3;
 
-    let p1 = S::add_ps(v3, v2);
-    let p2 = S::add_ps(p1, v1);
+    let p1 = v3 + v2;
+    let p2 = p1 + v1;
 
     // Scaling factor found by numerical approximation
-    let scale = S::set1_ps(32.69587493801679);
-    let result = S::add_ps(p2, v0) * scale;
+    let scale = S::Vf32::set1(32.69587493801679);
+    let result = (p2 + v0) * scale;
     let derivative = {
         let temp0 = t20 * t0 * g0;
         let mut dnoise_dx = temp0 * x0;
@@ -420,9 +388,9 @@ pub unsafe fn simplex_3d_deriv<S: Simd>(
         dnoise_dx += temp3 * x3;
         dnoise_dy += temp3 * y3;
         dnoise_dz += temp3 * z3;
-        dnoise_dx *= S::set1_ps(-8.0);
-        dnoise_dy *= S::set1_ps(-8.0);
-        dnoise_dz *= S::set1_ps(-8.0);
+        dnoise_dx *= S::Vf32::set1(-8.0);
+        dnoise_dy *= S::Vf32::set1(-8.0);
+        dnoise_dz *= S::Vf32::set1(-8.0);
         let [gx0, gy0, gz0] = grad3d::<S>(seed, i, j, k);
         let [gx1, gy1, gz1] = grad3d::<S>(seed, v1x, v1y, v1z);
         let [gx2, gy2, gz2] = grad3d::<S>(seed, v2x, v2y, v2z);
@@ -443,226 +411,174 @@ pub unsafe fn simplex_3d_deriv<S: Simd>(
 ///
 /// Produces a value -1 ≤ n ≤ 1.
 #[inline(always)]
-pub unsafe fn simplex_4d<S: Simd>(
-    x: S::Vf32,
-    y: S::Vf32,
-    z: S::Vf32,
-    w: S::Vf32,
-    seed: i32,
-) -> S::Vf32 {
+pub fn simplex_4d<S: Simd>(x: S::Vf32, y: S::Vf32, z: S::Vf32, w: S::Vf32, seed: i32) -> S::Vf32 {
     //
     // Determine which simplex these points lie in, and compute the distance along each axis to each
     // vertex of the simplex
     //
 
-    let s = S::mul_ps(
-        S::set1_ps(F4_32),
-        S::add_ps(x, S::add_ps(y, S::add_ps(z, w))),
-    );
-
-    let ips = S::floor_ps(S::add_ps(x, s));
-    let jps = S::floor_ps(S::add_ps(y, s));
-    let kps = S::floor_ps(S::add_ps(z, s));
-    let lps = S::floor_ps(S::add_ps(w, s));
-
-    let i = S::cvtps_epi32(ips);
-    let j = S::cvtps_epi32(jps);
-    let k = S::cvtps_epi32(kps);
-    let l = S::cvtps_epi32(lps);
-
-    let t = S::mul_ps(
-        S::cvtepi32_ps(S::add_epi32(i, S::add_epi32(j, S::add_epi32(k, l)))),
-        S::set1_ps(G4_32),
-    );
-    let x0 = S::sub_ps(x, S::sub_ps(ips, t));
-    let y0 = S::sub_ps(y, S::sub_ps(jps, t));
-    let z0 = S::sub_ps(z, S::sub_ps(kps, t));
-    let w0 = S::sub_ps(w, S::sub_ps(lps, t));
-
-    let mut rank_x = S::setzero_epi32();
-    let mut rank_y = S::setzero_epi32();
-    let mut rank_z = S::setzero_epi32();
-    let mut rank_w = S::setzero_epi32();
-
-    let cond = S::castps_epi32(S::cmpgt_ps(x0, y0));
-    rank_x = S::add_epi32(rank_x, S::and_epi32(cond, S::set1_epi32(1)));
-    rank_y = S::add_epi32(rank_y, S::andnot_epi32(cond, S::set1_epi32(1)));
-    let cond = S::castps_epi32(S::cmpgt_ps(x0, z0));
-    rank_x = S::add_epi32(rank_x, S::and_epi32(cond, S::set1_epi32(1)));
-    rank_z = S::add_epi32(rank_z, S::andnot_epi32(cond, S::set1_epi32(1)));
-    let cond = S::castps_epi32(S::cmpgt_ps(x0, w0));
-    rank_x = S::add_epi32(rank_x, S::and_epi32(cond, S::set1_epi32(1)));
-    rank_w = S::add_epi32(rank_w, S::andnot_epi32(cond, S::set1_epi32(1)));
-    let cond = S::castps_epi32(S::cmpgt_ps(y0, z0));
-    rank_y = S::add_epi32(rank_y, S::and_epi32(cond, S::set1_epi32(1)));
-    rank_z = S::add_epi32(rank_z, S::andnot_epi32(cond, S::set1_epi32(1)));
-    let cond = S::castps_epi32(S::cmpgt_ps(y0, w0));
-    rank_y = S::add_epi32(rank_y, S::and_epi32(cond, S::set1_epi32(1)));
-    rank_w = S::add_epi32(rank_w, S::andnot_epi32(cond, S::set1_epi32(1)));
-    let cond = S::castps_epi32(S::cmpgt_ps(z0, w0));
-    rank_z = S::add_epi32(rank_z, S::and_epi32(cond, S::set1_epi32(1)));
-    rank_w = S::add_epi32(rank_w, S::andnot_epi32(cond, S::set1_epi32(1)));
-
-    let cond = S::cmpgt_epi32(rank_x, S::set1_epi32(2));
-    let i1 = S::and_epi32(S::set1_epi32(1), cond);
-    let cond = S::cmpgt_epi32(rank_y, S::set1_epi32(2));
-    let j1 = S::and_epi32(S::set1_epi32(1), cond);
-    let cond = S::cmpgt_epi32(rank_z, S::set1_epi32(2));
-    let k1 = S::and_epi32(S::set1_epi32(1), cond);
-    let cond = S::cmpgt_epi32(rank_w, S::set1_epi32(2));
-    let l1 = S::and_epi32(S::set1_epi32(1), cond);
-
-    let cond = S::cmpgt_epi32(rank_x, S::set1_epi32(1));
-    let i2 = S::and_epi32(S::set1_epi32(1), cond);
-    let cond = S::cmpgt_epi32(rank_y, S::set1_epi32(1));
-    let j2 = S::and_epi32(S::set1_epi32(1), cond);
-    let cond = S::cmpgt_epi32(rank_z, S::set1_epi32(1));
-    let k2 = S::and_epi32(S::set1_epi32(1), cond);
-    let cond = S::cmpgt_epi32(rank_w, S::set1_epi32(1));
-    let l2 = S::and_epi32(S::set1_epi32(1), cond);
-
-    let cond = S::cmpgt_epi32(rank_x, S::setzero_epi32());
-    let i3 = S::and_epi32(S::set1_epi32(1), cond);
-    let cond = S::cmpgt_epi32(rank_y, S::setzero_epi32());
-    let j3 = S::and_epi32(S::set1_epi32(1), cond);
-    let cond = S::cmpgt_epi32(rank_z, S::setzero_epi32());
-    let k3 = S::and_epi32(S::set1_epi32(1), cond);
-    let cond = S::cmpgt_epi32(rank_w, S::setzero_epi32());
-    let l3 = S::and_epi32(S::set1_epi32(1), cond);
-
-    let x1 = S::add_ps(S::sub_ps(x0, S::cvtepi32_ps(i1)), S::set1_ps(G4_32));
-    let y1 = S::add_ps(S::sub_ps(y0, S::cvtepi32_ps(j1)), S::set1_ps(G4_32));
-    let z1 = S::add_ps(S::sub_ps(z0, S::cvtepi32_ps(k1)), S::set1_ps(G4_32));
-    let w1 = S::add_ps(S::sub_ps(w0, S::cvtepi32_ps(l1)), S::set1_ps(G4_32));
-    let x2 = S::add_ps(S::sub_ps(x0, S::cvtepi32_ps(i2)), S::set1_ps(G24_32));
-    let y2 = S::add_ps(S::sub_ps(y0, S::cvtepi32_ps(j2)), S::set1_ps(G24_32));
-    let z2 = S::add_ps(S::sub_ps(z0, S::cvtepi32_ps(k2)), S::set1_ps(G24_32));
-    let w2 = S::add_ps(S::sub_ps(w0, S::cvtepi32_ps(l2)), S::set1_ps(G24_32));
-    let x3 = S::add_ps(S::sub_ps(x0, S::cvtepi32_ps(i3)), S::set1_ps(G34_32));
-    let y3 = S::add_ps(S::sub_ps(y0, S::cvtepi32_ps(j3)), S::set1_ps(G34_32));
-    let z3 = S::add_ps(S::sub_ps(z0, S::cvtepi32_ps(k3)), S::set1_ps(G34_32));
-    let w3 = S::add_ps(S::sub_ps(w0, S::cvtepi32_ps(l3)), S::set1_ps(G34_32));
-    let x4 = S::add_ps(S::sub_ps(x0, S::set1_ps(1.0)), S::set1_ps(G44_32));
-    let y4 = S::add_ps(S::sub_ps(y0, S::set1_ps(1.0)), S::set1_ps(G44_32));
-    let z4 = S::add_ps(S::sub_ps(z0, S::set1_ps(1.0)), S::set1_ps(G44_32));
-    let w4 = S::add_ps(S::sub_ps(w0, S::set1_ps(1.0)), S::set1_ps(G44_32));
-
-    let ii = S::and_epi32(i, S::set1_epi32(0xff));
-    let jj = S::and_epi32(j, S::set1_epi32(0xff));
-    let kk = S::and_epi32(k, S::set1_epi32(0xff));
-    let ll = S::and_epi32(l, S::set1_epi32(0xff));
-
-    let lp = S::i32gather_epi32(&PERM, ll);
-    let kp = S::i32gather_epi32(&PERM, S::add_epi32(kk, lp));
-    let jp = S::i32gather_epi32(&PERM, S::add_epi32(jj, kp));
-    let gi0 = S::i32gather_epi32(&PERM, S::add_epi32(ii, jp));
-
-    let lp = S::i32gather_epi32(&PERM, S::add_epi32(ll, l1));
-    let kp = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(kk, k1), lp));
-    let jp = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(jj, j1), kp));
-    let gi1 = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(ii, i1), jp));
-
-    let lp = S::i32gather_epi32(&PERM, S::add_epi32(ll, l2));
-    let kp = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(kk, k2), lp));
-    let jp = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(jj, j2), kp));
-    let gi2 = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(ii, i2), jp));
-
-    let lp = S::i32gather_epi32(&PERM, S::add_epi32(ll, l3));
-    let kp = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(kk, k3), lp));
-    let jp = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(jj, j3), kp));
-    let gi3 = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(ii, i3), jp));
-
-    let lp = S::i32gather_epi32(&PERM, S::add_epi32(ll, S::set1_epi32(1)));
-    let kp = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(kk, S::set1_epi32(1)), lp));
-    let jp = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(jj, S::set1_epi32(1)), kp));
-    let gi4 = S::i32gather_epi32(&PERM, S::add_epi32(S::add_epi32(ii, S::set1_epi32(1)), jp));
+    let s = S::Vf32::set1(F4_32) * (x + y + z + w);
+
+    let ips = (x + s).floor();
+    let jps = (y + s).floor();
+    let kps = (z + s).floor();
+    let lps = (w + s).floor();
+
+    let i = ips.cast_i32();
+    let j = jps.cast_i32();
+    let k = kps.cast_i32();
+    let l = lps.cast_i32();
+
+    let t = (i + j + k + l).cast_f32() * S::Vf32::set1(G4_32);
+    let x0 = x - (ips - t);
+    let y0 = y - (jps - t);
+    let z0 = z - (kps - t);
+    let w0 = w - (lps - t);
+
+    let mut rank_x = S::Vi32::zeroes();
+    let mut rank_y = S::Vi32::zeroes();
+    let mut rank_z = S::Vi32::zeroes();
+    let mut rank_w = S::Vi32::zeroes();
+
+    let cond = (x0.cmp_gt(y0)).bitcast_i32();
+    rank_x = rank_x + (cond & S::Vi32::set1(1));
+    rank_y = rank_y + S::Vi32::set1(1).and_not(cond);
+    let cond = (x0.cmp_gt(z0)).bitcast_i32();
+    rank_x = rank_x + (cond & S::Vi32::set1(1));
+    rank_z = rank_z + S::Vi32::set1(1).and_not(cond);
+    let cond = (x0.cmp_gt(w0)).bitcast_i32();
+    rank_x = rank_x + (cond & S::Vi32::set1(1));
+    rank_w = rank_w + S::Vi32::set1(1).and_not(cond);
+    let cond = (y0.cmp_gt(z0)).bitcast_i32();
+    rank_y = rank_y + (cond & S::Vi32::set1(1));
+    rank_z = rank_z + S::Vi32::set1(1).and_not(cond);
+    let cond = (y0.cmp_gt(w0)).bitcast_i32();
+    rank_y = rank_y + (cond & S::Vi32::set1(1));
+    rank_w = rank_w + S::Vi32::set1(1).and_not(cond);
+    let cond = (z0.cmp_gt(w0)).bitcast_i32();
+    rank_z = rank_z + (cond & S::Vi32::set1(1));
+    rank_w = rank_w + S::Vi32::set1(1).and_not(cond);
+
+    let cond = rank_x.cmp_gt(S::Vi32::set1(2));
+    let i1 = S::Vi32::set1(1) & cond;
+    let cond = rank_y.cmp_gt(S::Vi32::set1(2));
+    let j1 = S::Vi32::set1(1) & cond;
+    let cond = rank_z.cmp_gt(S::Vi32::set1(2));
+    let k1 = S::Vi32::set1(1) & cond;
+    let cond = rank_w.cmp_gt(S::Vi32::set1(2));
+    let l1 = S::Vi32::set1(1) & cond;
+
+    let cond = rank_x.cmp_gt(S::Vi32::set1(1));
+    let i2 = S::Vi32::set1(1) & cond;
+    let cond = rank_y.cmp_gt(S::Vi32::set1(1));
+    let j2 = S::Vi32::set1(1) & cond;
+    let cond = rank_z.cmp_gt(S::Vi32::set1(1));
+    let k2 = S::Vi32::set1(1) & cond;
+    let cond = rank_w.cmp_gt(S::Vi32::set1(1));
+    let l2 = S::Vi32::set1(1) & cond;
+
+    let cond = rank_x.cmp_gt(S::Vi32::zeroes());
+    let i3 = S::Vi32::set1(1) & cond;
+    let cond = rank_y.cmp_gt(S::Vi32::zeroes());
+    let j3 = S::Vi32::set1(1) & cond;
+    let cond = rank_z.cmp_gt(S::Vi32::zeroes());
+    let k3 = S::Vi32::set1(1) & cond;
+    let cond = rank_w.cmp_gt(S::Vi32::zeroes());
+    let l3 = S::Vi32::set1(1) & cond;
+
+    let x1 = x0 - i1.cast_f32() + S::Vf32::set1(G4_32);
+    let y1 = y0 - j1.cast_f32() + S::Vf32::set1(G4_32);
+    let z1 = z0 - k1.cast_f32() + S::Vf32::set1(G4_32);
+    let w1 = w0 - l1.cast_f32() + S::Vf32::set1(G4_32);
+    let x2 = x0 - i2.cast_f32() + S::Vf32::set1(G24_32);
+    let y2 = y0 - j2.cast_f32() + S::Vf32::set1(G24_32);
+    let z2 = z0 - k2.cast_f32() + S::Vf32::set1(G24_32);
+    let w2 = w0 - l2.cast_f32() + S::Vf32::set1(G24_32);
+    let x3 = x0 - i3.cast_f32() + S::Vf32::set1(G34_32);
+    let y3 = y0 - j3.cast_f32() + S::Vf32::set1(G34_32);
+    let z3 = z0 - k3.cast_f32() + S::Vf32::set1(G34_32);
+    let w3 = w0 - l3.cast_f32() + S::Vf32::set1(G34_32);
+    let x4 = x0 - S::Vf32::set1(1.0) + S::Vf32::set1(G44_32);
+    let y4 = y0 - S::Vf32::set1(1.0) + S::Vf32::set1(G44_32);
+    let z4 = z0 - S::Vf32::set1(1.0) + S::Vf32::set1(G44_32);
+    let w4 = w0 - S::Vf32::set1(1.0) + S::Vf32::set1(G44_32);
+
+    let ii = i & S::Vi32::set1(0xff);
+    let jj = j & S::Vi32::set1(0xff);
+    let kk = k & S::Vi32::set1(0xff);
+    let ll = l & S::Vi32::set1(0xff);
+
+    let (gi0, gi1, gi2, gi3, gi4) = unsafe {
+        // Safety: ii, jj, kk, and ll are all 0..255. All other temporary variables were fetched from PERM, which only
+        // contains elements in the range 0..255.
+        let lp = gather_32::<S>(&PERM, ll);
+        let kp = gather_32::<S>(&PERM, kk + lp);
+        let jp = gather_32::<S>(&PERM, jj + kp);
+        let gi0 = gather_32::<S>(&PERM, ii + jp);
+
+        let lp = gather_32::<S>(&PERM, ll + l1);
+        let kp = gather_32::<S>(&PERM, kk + k1 + lp);
+        let jp = gather_32::<S>(&PERM, jj + j1 + kp);
+        let gi1 = gather_32::<S>(&PERM, ii + i1 + jp);
+
+        let lp = gather_32::<S>(&PERM, ll + l2);
+        let kp = gather_32::<S>(&PERM, kk + k2 + lp);
+        let jp = gather_32::<S>(&PERM, jj + j2 + kp);
+        let gi2 = gather_32::<S>(&PERM, ii + i2 + jp);
+
+        let lp = gather_32::<S>(&PERM, ll + l3);
+        let kp = gather_32::<S>(&PERM, kk + k3 + lp);
+        let jp = gather_32::<S>(&PERM, jj + j3 + kp);
+        let gi3 = gather_32::<S>(&PERM, ii + i3 + jp);
+
+        let lp = gather_32::<S>(&PERM, ll + S::Vi32::set1(1));
+        let kp = gather_32::<S>(&PERM, kk + S::Vi32::set1(1) + lp);
+        let jp = gather_32::<S>(&PERM, jj + S::Vi32::set1(1) + kp);
+        let gi4 = gather_32::<S>(&PERM, ii + S::Vi32::set1(1) + jp);
+        (gi0, gi1, gi2, gi3, gi4)
+    };
 
     //
     // Compute base weight factors associated with each vertex
     //
 
-    let t0 = S::sub_ps(
-        S::sub_ps(
-            S::sub_ps(
-                S::sub_ps(S::set1_ps(0.5), S::mul_ps(x0, x0)),
-                S::mul_ps(y0, y0),
-            ),
-            S::mul_ps(z0, z0),
-        ),
-        S::mul_ps(w0, w0),
-    );
-    let t1 = S::sub_ps(
-        S::sub_ps(
-            S::sub_ps(
-                S::sub_ps(S::set1_ps(0.5), S::mul_ps(x1, x1)),
-                S::mul_ps(y1, y1),
-            ),
-            S::mul_ps(z1, z1),
-        ),
-        S::mul_ps(w1, w1),
-    );
-    let t2 = S::sub_ps(
-        S::sub_ps(
-            S::sub_ps(
-                S::sub_ps(S::set1_ps(0.5), S::mul_ps(x2, x2)),
-                S::mul_ps(y2, y2),
-            ),
-            S::mul_ps(z2, z2),
-        ),
-        S::mul_ps(w2, w2),
-    );
-    let t3 = S::sub_ps(
-        S::sub_ps(
-            S::sub_ps(
-                S::sub_ps(S::set1_ps(0.5), S::mul_ps(x3, x3)),
-                S::mul_ps(y3, y3),
-            ),
-            S::mul_ps(z3, z3),
-        ),
-        S::mul_ps(w3, w3),
-    );
-    let t4 = S::sub_ps(
-        S::sub_ps(
-            S::sub_ps(
-                S::sub_ps(S::set1_ps(0.5), S::mul_ps(x4, x4)),
-                S::mul_ps(y4, y4),
-            ),
-            S::mul_ps(z4, z4),
-        ),
-        S::mul_ps(w4, w4),
-    );
+    let t0 = S::Vf32::set1(0.5) - (x0 * x0) - (y0 * y0) - (z0 * z0) - (w0 * w0);
+    let t1 = S::Vf32::set1(0.5) - (x1 * x1) - (y1 * y1) - (z1 * z1) - (w1 * w1);
+    let t2 = S::Vf32::set1(0.5) - (x2 * x2) - (y2 * y2) - (z2 * z2) - (w2 * w2);
+    let t3 = S::Vf32::set1(0.5) - (x3 * x3) - (y3 * y3) - (z3 * z3) - (w3 * w3);
+    let t4 = S::Vf32::set1(0.5) - (x4 * x4) - (y4 * y4) - (z4 * z4) - (w4 * w4);
     // Cube each weight
-    let mut t0q = S::mul_ps(t0, t0);
-    t0q = S::mul_ps(t0q, t0q);
-    let mut t1q = S::mul_ps(t1, t1);
-    t1q = S::mul_ps(t1q, t1q);
-    let mut t2q = S::mul_ps(t2, t2);
-    t2q = S::mul_ps(t2q, t2q);
-    let mut t3q = S::mul_ps(t3, t3);
-    t3q = S::mul_ps(t3q, t3q);
-    let mut t4q = S::mul_ps(t4, t4);
-    t4q = S::mul_ps(t4q, t4q);
-
-    let mut n0 = S::mul_ps(t0q, grad4::<S>(seed, gi0, x0, y0, z0, w0));
-    let mut n1 = S::mul_ps(t1q, grad4::<S>(seed, gi1, x1, y1, z1, w1));
-    let mut n2 = S::mul_ps(t2q, grad4::<S>(seed, gi2, x2, y2, z2, w2));
-    let mut n3 = S::mul_ps(t3q, grad4::<S>(seed, gi3, x3, y3, z3, w3));
-    let mut n4 = S::mul_ps(t4q, grad4::<S>(seed, gi4, x4, y4, z4, w4));
+    let mut t0q = t0 * t0;
+    t0q = t0q * t0q;
+    let mut t1q = t1 * t1;
+    t1q = t1q * t1q;
+    let mut t2q = t2 * t2;
+    t2q = t2q * t2q;
+    let mut t3q = t3 * t3;
+    t3q = t3q * t3q;
+    let mut t4q = t4 * t4;
+    t4q = t4q * t4q;
+
+    let mut n0 = t0q * grad4::<S>(seed, gi0, x0, y0, z0, w0);
+    let mut n1 = t1q * grad4::<S>(seed, gi1, x1, y1, z1, w1);
+    let mut n2 = t2q * grad4::<S>(seed, gi2, x2, y2, z2, w2);
+    let mut n3 = t3q * grad4::<S>(seed, gi3, x3, y3, z3, w3);
+    let mut n4 = t4q * grad4::<S>(seed, gi4, x4, y4, z4, w4);
 
     // Discard contributions whose base weight factors are negative
-    let mut cond = S::cmplt_ps(t0, S::setzero_ps());
-    n0 = S::andnot_ps(cond, n0);
-    cond = S::cmplt_ps(t1, S::setzero_ps());
-    n1 = S::andnot_ps(cond, n1);
-    cond = S::cmplt_ps(t2, S::setzero_ps());
-    n2 = S::andnot_ps(cond, n2);
-    cond = S::cmplt_ps(t3, S::setzero_ps());
-    n3 = S::andnot_ps(cond, n3);
-    cond = S::cmplt_ps(t4, S::setzero_ps());
-    n4 = S::andnot_ps(cond, n4);
+    let mut cond = t0.cmp_lt(S::Vf32::zeroes());
+    n0 = n0.and_not(cond);
+    cond = t1.cmp_lt(S::Vf32::zeroes());
+    n1 = n1.and_not(cond);
+    cond = t2.cmp_lt(S::Vf32::zeroes());
+    n2 = n2.and_not(cond);
+    cond = t3.cmp_lt(S::Vf32::zeroes());
+    n3 = n3.and_not(cond);
+    cond = t4.cmp_lt(S::Vf32::zeroes());
+    n4 = n4.and_not(cond);
 
     // Scaling factor found by numerical approximation
-    S::add_ps(n0, S::add_ps(n1, S::add_ps(n2, S::add_ps(n3, n4)))) * S::set1_ps(62.77772078955791)
+    (n0 + n1 + n2 + n3 + n4) * S::Vf32::set1(62.77772078955791)
 }
 
 #[cfg(test)]
@@ -676,12 +592,12 @@ mod tests {
     }
 
     #[test]
-    fn simplex_1d_range() {
+    fn test_noise_simplex32_1d_range() {
         for seed in 0..10 {
             let mut min = f32::INFINITY;
             let mut max = -f32::INFINITY;
             for x in 0..1000 {
-                let n = unsafe { simplex_1d::<Scalar>(F32x1(x as f32 / 10.0), seed).0 };
+                let n = simplex_1d::<Scalar>(F32x1(x as f32 / 10.0), seed).0;
                 min = min.min(n);
                 max = max.max(n);
             }
@@ -690,7 +606,7 @@ mod tests {
     }
 
     #[test]
-    fn simplex_1d_deriv_sanity() {
+    fn test_noise_simplex32_1d_deriv_sanity() {
         let mut avg_err = 0.0;
         const SEEDS: i32 = 10;
         const POINTS: i32 = 1000;
@@ -699,9 +615,9 @@ mod tests {
                 // Offset a bit so we don't check derivative at lattice points, where it's always zero
                 let center = x as f32 / 10.0 + 0.1234;
                 const H: f32 = 0.01;
-                let n0 = unsafe { simplex_1d::<Scalar>(F32x1(center - H), seed).0 };
-                let (n1, d1) = unsafe { simplex_1d_deriv::<Scalar>(F32x1(center), seed) };
-                let n2 = unsafe { simplex_1d::<Scalar>(F32x1(center + H), seed).0 };
+                let n0 = simplex_1d::<Scalar>(F32x1(center - H), seed).0;
+                let (n1, d1) = simplex_1d_deriv::<Scalar>(F32x1(center), seed);
+                let n2 = simplex_1d::<Scalar>(F32x1(center + H), seed).0;
                 let (n1, d1) = (n1.0, d1.0);
                 avg_err += ((n2 - (n1 + d1 * H)).abs() + (n0 - (n1 - d1 * H)).abs())
                     / (SEEDS * POINTS * 2) as f32;
@@ -711,15 +627,15 @@ mod tests {
     }
 
     #[test]
-    fn simplex_2d_range() {
+    fn test_noise_simplex32_2d_range() {
         for seed in 0..10 {
             let mut min = f32::INFINITY;
             let mut max = -f32::INFINITY;
             for y in 0..10 {
                 for x in 0..100 {
-                    let n = unsafe {
-                        simplex_2d::<Scalar>(F32x1(x as f32 / 10.0), F32x1(y as f32 / 10.0), seed).0
-                    };
+                    let n =
+                        simplex_2d::<Scalar>(F32x1(x as f32 / 10.0), F32x1(y as f32 / 10.0), seed)
+                            .0;
                     min = min.min(n);
                     max = max.max(n);
                 }
@@ -729,7 +645,7 @@ mod tests {
     }
 
     #[test]
-    fn simplex_2d_deriv_sanity() {
+    fn test_noise_simplex32_2d_deriv_sanity() {
         let mut avg_err = 0.0;
         const SEEDS: i32 = 10;
         const POINTS: i32 = 10;
@@ -740,22 +656,13 @@ mod tests {
                     let center_x = x as f32 / 10.0 + 0.1234;
                     let center_y = y as f32 / 10.0 + 0.1234;
                     const H: f32 = 0.01;
-                    let (value, d) = unsafe {
-                        simplex_2d_deriv::<Scalar>(F32x1(center_x), F32x1(center_y), seed)
-                    };
+                    let (value, d) =
+                        simplex_2d_deriv::<Scalar>(F32x1(center_x), F32x1(center_y), seed);
                     let (value, d) = (value.0, [d[0].0, d[1].0]);
-                    let left = unsafe {
-                        simplex_2d::<Scalar>(F32x1(center_x - H), F32x1(center_y), seed).0
-                    };
-                    let right = unsafe {
-                        simplex_2d::<Scalar>(F32x1(center_x + H), F32x1(center_y), seed).0
-                    };
-                    let down = unsafe {
-                        simplex_2d::<Scalar>(F32x1(center_x), F32x1(center_y - H), seed).0
-                    };
-                    let up = unsafe {
-                        simplex_2d::<Scalar>(F32x1(center_x), F32x1(center_y + H), seed).0
-                    };
+                    let left = simplex_2d::<Scalar>(F32x1(center_x - H), F32x1(center_y), seed).0;
+                    let right = simplex_2d::<Scalar>(F32x1(center_x + H), F32x1(center_y), seed).0;
+                    let down = simplex_2d::<Scalar>(F32x1(center_x), F32x1(center_y - H), seed).0;
+                    let up = simplex_2d::<Scalar>(F32x1(center_x), F32x1(center_y + H), seed).0;
                     avg_err += ((left - (value - d[0] * H)).abs()
                         + (right - (value + d[0] * H)).abs()
                         + (down - (value - d[1] * H)).abs()
@@ -768,22 +675,20 @@ mod tests {
     }
 
     #[test]
-    fn simplex_3d_range() {
+    fn test_noise_simplex32_3d_range() {
         let mut min = f32::INFINITY;
         let mut max = -f32::INFINITY;
         const SEED: i32 = 0;
         for z in 0..10 {
             for y in 0..10 {
                 for x in 0..10000 {
-                    let n = unsafe {
-                        simplex_3d::<Scalar>(
-                            F32x1(x as f32 / 10.0),
-                            F32x1(y as f32 / 10.0),
-                            F32x1(z as f32 / 10.0),
-                            SEED,
-                        )
-                        .0
-                    };
+                    let n = simplex_3d::<Scalar>(
+                        F32x1(x as f32 / 10.0),
+                        F32x1(y as f32 / 10.0),
+                        F32x1(z as f32 / 10.0),
+                        SEED,
+                    )
+                    .0;
                     min = min.min(n);
                     max = max.max(n);
                 }
@@ -793,7 +698,7 @@ mod tests {
     }
 
     #[test]
-    fn simplex_3d_deriv_sanity() {
+    fn test_noise_simplex32_3d_deriv_sanity() {
         let mut avg_err = 0.0;
         const POINTS: i32 = 10;
         const SEED: i32 = 0;
@@ -805,42 +710,34 @@ mod tests {
                     let center_y = y as f32 / 10.0 + 0.1234;
                     let center_z = z as f32 / 10.0 + 0.1234;
                     const H: f32 = 0.01;
-                    let (value, d) = unsafe {
-                        simplex_3d_deriv::<Scalar>(
-                            F32x1(center_x),
-                            F32x1(center_y),
-                            F32x1(center_z),
-                            SEED,
-                        )
-                    };
+                    let (value, d) = simplex_3d_deriv::<Scalar>(
+                        F32x1(center_x),
+                        F32x1(center_y),
+                        F32x1(center_z),
+                        SEED,
+                    );
                     let (value, d) = (value.0, [d[0].0, d[1].0, d[2].0]);
-                    let right = unsafe {
-                        simplex_3d::<Scalar>(
-                            F32x1(center_x + H),
-                            F32x1(center_y),
-                            F32x1(center_z),
-                            SEED,
-                        )
-                        .0
-                    };
-                    let up = unsafe {
-                        simplex_3d::<Scalar>(
-                            F32x1(center_x),
-                            F32x1(center_y + H),
-                            F32x1(center_z),
-                            SEED,
-                        )
-                        .0
-                    };
-                    let forward = unsafe {
-                        simplex_3d::<Scalar>(
-                            F32x1(center_x),
-                            F32x1(center_y),
-                            F32x1(center_z + H),
-                            SEED,
-                        )
-                        .0
-                    };
+                    let right = simplex_3d::<Scalar>(
+                        F32x1(center_x + H),
+                        F32x1(center_y),
+                        F32x1(center_z),
+                        SEED,
+                    )
+                    .0;
+                    let up = simplex_3d::<Scalar>(
+                        F32x1(center_x),
+                        F32x1(center_y + H),
+                        F32x1(center_z),
+                        SEED,
+                    )
+                    .0;
+                    let forward = simplex_3d::<Scalar>(
+                        F32x1(center_x),
+                        F32x1(center_y),
+                        F32x1(center_z + H),
+                        SEED,
+                    )
+                    .0;
                     avg_err += ((right - (value + d[0] * H)).abs()
                         + (up - (value + d[1] * H)).abs()
                         + (forward - (value + d[2] * H)).abs())
@@ -852,7 +749,7 @@ mod tests {
     }
 
     #[test]
-    fn simplex_4d_range() {
+    fn test_noise_simplex32_4d_range() {
         let mut min = f32::INFINITY;
         let mut max = -f32::INFINITY;
         const SEED: i32 = 0;
@@ -860,16 +757,14 @@ mod tests {
             for z in 0..10 {
                 for y in 0..10 {
                     for x in 0..1000 {
-                        let n = unsafe {
-                            simplex_4d::<Scalar>(
-                                F32x1(x as f32 / 10.0),
-                                F32x1(y as f32 / 10.0),
-                                F32x1(z as f32 / 10.0),
-                                F32x1(w as f32 / 10.0),
-                                SEED,
-                            )
-                            .0
-                        };
+                        let n = simplex_4d::<Scalar>(
+                            F32x1(x as f32 / 10.0),
+                            F32x1(y as f32 / 10.0),
+                            F32x1(z as f32 / 10.0),
+                            F32x1(w as f32 / 10.0),
+                            SEED,
+                        )
+                        .0;
                         min = min.min(n);
                         max = max.max(n);
                     }
diff --git a/src/noise/simplex_64.rs b/src/noise/simplex_64.rs
index da2c705..3efd3fc 100644
--- a/src/noise/simplex_64.rs
+++ b/src/noise/simplex_64.rs
@@ -1,14 +1,15 @@
 use crate::noise::gradient_64::{grad1, grad2, grad3d, grad4};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 use crate::noise::cellular_32::{X_PRIME_64, Y_PRIME_64, Z_PRIME_64};
 use crate::noise::gradient_64::grad3d_dot;
+use crate::noise::ops::gather_64;
 use crate::noise::simplex_32::{
     F2_64, F3_64, F4_64, G22_64, G24_64, G2_64, G33_64, G34_64, G3_64, G44_64, G4_64,
 };
 
-const PERM64: [i64; 512] = [
+static PERM64: [i64; 512] = [
     151, 160, 137, 91, 90, 15, 131, 13, 201, 95, 96, 53, 194, 233, 7, 225, 140, 36, 103, 30, 69,
     142, 8, 99, 37, 240, 21, 10, 23, 190, 6, 148, 247, 120, 234, 75, 0, 26, 197, 62, 94, 252, 219,
     203, 117, 35, 11, 32, 57, 177, 33, 88, 237, 149, 56, 87, 174, 20, 125, 136, 171, 168, 68, 175,
@@ -36,39 +37,51 @@ const PERM64: [i64; 512] = [
     222, 114, 67, 29, 24, 72, 243, 141, 128, 195, 78, 66, 215, 61, 156, 180,
 ];
 
+#[inline(always)]
+fn assert_in_perm_range<S: Simd>(values: S::Vi64) {
+    debug_assert!(values
+        .cmp_lt(S::Vi64::set1(PERM64.len() as i64))
+        .iter()
+        .all(|is_less_than| is_less_than != 0));
+}
+
 /// Like `simplex_1d`, but also computes the derivative
 #[inline(always)]
-pub unsafe fn simplex_1d_deriv<S: Simd>(x: S::Vf64, seed: i64) -> (S::Vf64, S::Vf64) {
+pub fn simplex_1d_deriv<S: Simd>(x: S::Vf64, seed: i64) -> (S::Vf64, S::Vf64) {
     // Gradients are selected deterministically based on the whole part of `x`
-    let ips = S::fast_floor_pd(x);
-    let mut i0 = S::cvtpd_epi64(ips);
-    let i1 = S::and_epi64(S::add_epi64(i0, S::set1_epi64(1)), S::set1_epi64(0xff));
+    let ips = x.fast_floor();
+    let mut i0 = ips.cast_i64();
+    let i1 = (i0 + S::Vi64::set1(1)) & S::Vi64::set1(0xff);
 
     // the fractional part of x, i.e. the distance to the left gradient node. 0 ≤ x0 < 1.
-    let x0 = S::sub_pd(x, ips);
+    let x0 = x - ips;
     // signed distance to the right gradient node
-    let x1 = S::sub_pd(x0, S::set1_pd(1.0));
-
-    i0 = S::and_epi64(i0, S::set1_epi64(0xff));
-    let gi0 = S::i64gather_epi64(&PERM64, i0);
-    let gi1 = S::i64gather_epi64(&PERM64, i1);
+    let x1 = x0 - S::Vf64::set1(1.0);
+
+    i0 = i0 & S::Vi64::set1(0xff);
+    let (gi0, gi1) = unsafe {
+        // Safety: We just masked i0 and i1 with 0xff, so they're in 0..255.
+        let gi0 = gather_64::<S>(&PERM64, i0);
+        let gi1 = gather_64::<S>(&PERM64, i1);
+        (gi0, gi1)
+    };
 
     // Compute the contribution from the first gradient
-    let x20 = S::mul_pd(x0, x0); // x^2_0
-    let t0 = S::sub_pd(S::set1_pd(1.0), x20); // t_0
-    let t20 = S::mul_pd(t0, t0); // t^2_0
-    let t40 = S::mul_pd(t20, t20); // t^4_0
+    let x20 = x0 * x0; // x^2_0
+    let t0 = S::Vf64::set1(1.0) - x20; // t_0
+    let t20 = t0 * t0; // t^2_0
+    let t40 = t20 * t20; // t^4_0
     let gx0 = grad1::<S>(seed, gi0);
-    let n0 = S::mul_pd(t40, gx0 * x0);
+    let n0 = t40 * gx0 * x0;
     // n0 = (1 - x0^2)^4 * x0 * grad
 
     // Compute the contribution from the second gradient
-    let x21 = S::mul_pd(x1, x1); // x^2_1
-    let t1 = S::sub_pd(S::set1_pd(1.0), x21); // t_1
-    let t21 = S::mul_pd(t1, t1); // t^2_1
-    let t41 = S::mul_pd(t21, t21); // t^4_1
+    let x21 = x1 * x1; // x^2_1
+    let t1 = S::Vf64::set1(1.0) - x21; // t_1
+    let t21 = t1 * t1; // t^2_1
+    let t41 = t21 * t21; // t^4_1
     let gx1 = grad1::<S>(seed, gi1);
-    let n1 = S::mul_pd(t41, gx1 * x1);
+    let n1 = t41 * gx1 * x1;
 
     // n0 + n1 =
     //    grad0 * x0 * (1 - x0^2)^4
@@ -83,10 +96,11 @@ pub unsafe fn simplex_1d_deriv<S: Simd>(x: S::Vf64, seed: i64) -> (S::Vf64, S::V
     // allowing us to scale into [-1, 1]
     const SCALE: f64 = 256.0 / (81.0 * 7.0);
 
-    let value = S::add_pd(n0, n1) * S::set1_pd(SCALE);
-    let derivative =
-        ((t20 * t0 * gx0 * x20 + t21 * t1 * gx1 * x21) * S::set1_pd(-8.0) + t40 * gx0 + t41 * gx1)
-            * S::set1_pd(SCALE);
+    let value = (n0 + n1) * S::Vf64::set1(SCALE);
+    let derivative = ((t20 * t0 * gx0 * x20 + t21 * t1 * gx1 * x21) * S::Vf64::set1(-8.0)
+        + t40 * gx0
+        + t41 * gx1)
+        * S::Vf64::set1(SCALE);
     (value, derivative)
 }
 
@@ -94,7 +108,7 @@ pub unsafe fn simplex_1d_deriv<S: Simd>(x: S::Vf64, seed: i64) -> (S::Vf64, S::V
 ///
 /// Produces a value -1 ≤ n ≤ 1.
 #[inline(always)]
-pub unsafe fn simplex_1d<S: Simd>(x: S::Vf64, seed: i64) -> S::Vf64 {
+pub fn simplex_1d<S: Simd>(x: S::Vf64, seed: i64) -> S::Vf64 {
     simplex_1d_deriv::<S>(x, seed).0
 }
 
@@ -102,81 +116,74 @@ pub unsafe fn simplex_1d<S: Simd>(x: S::Vf64, seed: i64) -> S::Vf64 {
 ///
 /// Produces a value -1 ≤ n ≤ 1.
 #[inline(always)]
-pub unsafe fn simplex_2d<S: Simd>(x: S::Vf64, y: S::Vf64, seed: i64) -> S::Vf64 {
+pub fn simplex_2d<S: Simd>(x: S::Vf64, y: S::Vf64, seed: i64) -> S::Vf64 {
     simplex_2d_deriv::<S>(x, y, seed).0
 }
 
 /// Like `simplex_2d`, but also computes the derivative
 #[inline(always)]
-pub unsafe fn simplex_2d_deriv<S: Simd>(
-    x: S::Vf64,
-    y: S::Vf64,
-    seed: i64,
-) -> (S::Vf64, [S::Vf64; 2]) {
+pub fn simplex_2d_deriv<S: Simd>(x: S::Vf64, y: S::Vf64, seed: i64) -> (S::Vf64, [S::Vf64; 2]) {
     // Skew to distort simplexes with side length sqrt(2)/sqrt(3) until they make up
     // squares
-    let s = S::mul_pd(S::set1_pd(F2_64), S::add_pd(x, y));
-    let ips = S::floor_pd(S::add_pd(x, s));
-    let jps = S::floor_pd(S::add_pd(y, s));
+    let s = S::Vf64::set1(F2_64) * (x + y);
+    let ips = (x + s).floor();
+    let jps = (y + s).floor();
 
     // Integer coordinates for the base vertex of the triangle
-    let i = S::cvtpd_epi64(ips);
-    let j = S::cvtpd_epi64(jps);
+    let i = ips.cast_i64();
+    let j = jps.cast_i64();
 
-    let t = S::mul_pd(S::cvtepi64_pd(S::add_epi64(i, j)), S::set1_pd(G2_64));
+    let t = (i + j).cast_f64() * S::Vf64::set1(G2_64);
 
     // Unskewed distances to the first point of the enclosing simplex
-    let x0 = S::sub_pd(x, S::sub_pd(ips, t));
-    let y0 = S::sub_pd(y, S::sub_pd(jps, t));
+    let x0 = x - (ips - t);
+    let y0 = y - (jps - t);
 
-    let i1 = S::castpd_epi64(S::cmpge_pd(x0, y0));
+    let i1 = (x0.cmp_gte(y0)).bitcast_i64();
 
-    let j1 = S::castpd_epi64(S::cmpgt_pd(y0, x0));
+    let j1 = (y0.cmp_gt(x0)).bitcast_i64();
 
     // Distances to the second and third points of the enclosing simplex
-    let x1 = S::add_pd(S::add_pd(x0, S::cvtepi64_pd(i1)), S::set1_pd(G2_64));
-    let y1 = S::add_pd(S::add_pd(y0, S::cvtepi64_pd(j1)), S::set1_pd(G2_64));
-    let x2 = S::add_pd(S::add_pd(x0, S::set1_pd(-1.0)), S::set1_pd(G22_64));
-    let y2 = S::add_pd(S::add_pd(y0, S::set1_pd(-1.0)), S::set1_pd(G22_64));
-
-    let ii = S::and_epi64(i, S::set1_epi64(0xff));
-    let jj = S::and_epi64(j, S::set1_epi64(0xff));
-
-    let gi0 = S::i64gather_epi64(&PERM64, S::add_epi64(ii, S::i64gather_epi64(&PERM64, jj)));
-
-    let gi1 = S::i64gather_epi64(
-        &PERM64,
-        S::add_epi64(
-            S::sub_epi64(ii, i1),
-            S::i64gather_epi64(&PERM64, S::sub_epi64(jj, j1)),
-        ),
-    );
-
-    let gi2 = S::i64gather_epi64(
-        &PERM64,
-        S::add_epi64(
-            S::sub_epi64(ii, S::set1_epi64(-1)),
-            S::i64gather_epi64(&PERM64, S::sub_epi64(jj, S::set1_epi64(-1))),
-        ),
-    );
+    let x1 = (x0 + i1.cast_f64()) + S::Vf64::set1(G2_64);
+    let y1 = (y0 + j1.cast_f64()) + S::Vf64::set1(G2_64);
+    let x2 = (x0 + S::Vf64::set1(-1.0)) + S::Vf64::set1(G22_64);
+    let y2 = (y0 + S::Vf64::set1(-1.0)) + S::Vf64::set1(G22_64);
+
+    let ii = i & S::Vi64::set1(0xff);
+    let jj = j & S::Vi64::set1(0xff);
+
+    let (gi0, gi1, gi2) = unsafe {
+        assert_in_perm_range::<S>(ii);
+        assert_in_perm_range::<S>(jj);
+        assert_in_perm_range::<S>(ii - i1);
+        assert_in_perm_range::<S>(jj - j1);
+        assert_in_perm_range::<S>(ii + 1);
+        assert_in_perm_range::<S>(jj + 1);
+
+        let gi0 = gather_64::<S>(&PERM64, ii + gather_64::<S>(&PERM64, jj));
+        let gi1 = gather_64::<S>(&PERM64, (ii - i1) + gather_64::<S>(&PERM64, jj - j1));
+        let gi2 = gather_64::<S>(&PERM64, (ii - -1) + gather_64::<S>(&PERM64, jj - -1));
+
+        (gi0, gi1, gi2)
+    };
 
     // Weights associated with the gradients at each corner
     // These FMA operations are equivalent to: let t = 0.5 - x*x - y*y
-    let mut t0 = S::fnmadd_pd(y0, y0, S::fnmadd_pd(x0, x0, S::set1_pd(0.5)));
-    let mut t1 = S::fnmadd_pd(y1, y1, S::fnmadd_pd(x1, x1, S::set1_pd(0.5)));
-    let mut t2 = S::fnmadd_pd(y2, y2, S::fnmadd_pd(x2, x2, S::set1_pd(0.5)));
+    let mut t0 = S::Vf64::neg_mul_add(y0, y0, S::Vf64::neg_mul_add(x0, x0, S::Vf64::set1(0.5)));
+    let mut t1 = S::Vf64::neg_mul_add(y1, y1, S::Vf64::neg_mul_add(x1, x1, S::Vf64::set1(0.5)));
+    let mut t2 = S::Vf64::neg_mul_add(y2, y2, S::Vf64::neg_mul_add(x2, x2, S::Vf64::set1(0.5)));
 
     // Zero out negative weights
-    t0 &= S::cmpge_pd(t0, S::setzero_pd());
-    t1 &= S::cmpge_pd(t1, S::setzero_pd());
-    t2 &= S::cmpge_pd(t2, S::setzero_pd());
+    t0 &= t0.cmp_gte(S::Vf64::zeroes());
+    t1 &= t1.cmp_gte(S::Vf64::zeroes());
+    t2 &= t2.cmp_gte(S::Vf64::zeroes());
 
-    let t20 = S::mul_pd(t0, t0);
-    let t40 = S::mul_pd(t20, t20);
-    let t21 = S::mul_pd(t1, t1);
-    let t41 = S::mul_pd(t21, t21);
-    let t22 = S::mul_pd(t2, t2);
-    let t42 = S::mul_pd(t22, t22);
+    let t20 = t0 * t0;
+    let t40 = t20 * t20;
+    let t21 = t1 * t1;
+    let t41 = t21 * t21;
+    let t22 = t2 * t2;
+    let t42 = t22 * t22;
 
     let [gx0, gy0] = grad2::<S>(seed, gi0);
     let g0 = gx0 * x0 + gy0 * y0;
@@ -189,8 +196,8 @@ pub unsafe fn simplex_2d_deriv<S: Simd>(
     let n2 = t42 * g2;
 
     // Scaling factor found by numerical approximation
-    let scale = S::set1_pd(45.26450774985561631259);
-    let value = S::add_pd(n0, S::add_pd(n1, n2)) * scale;
+    let scale = S::Vf64::set1(45.26450774985561631259);
+    let value = (n0 + n1 + n2) * scale;
     let derivative = {
         let temp0 = t20 * t0 * g0;
         let mut dnoise_dx = temp0 * x0;
@@ -201,8 +208,8 @@ pub unsafe fn simplex_2d_deriv<S: Simd>(
         let temp2 = t22 * t2 * g2;
         dnoise_dx += temp2 * x2;
         dnoise_dy += temp2 * y2;
-        dnoise_dx *= S::set1_pd(-8.0);
-        dnoise_dy *= S::set1_pd(-8.0);
+        dnoise_dx *= S::Vf64::set1(-8.0);
+        dnoise_dy *= S::Vf64::set1(-8.0);
         dnoise_dx += t40 * gx0 + t41 * gx1 + t42 * gx2;
         dnoise_dy += t40 * gy0 + t41 * gy1 + t42 * gy2;
         dnoise_dx *= scale;
@@ -216,98 +223,74 @@ pub unsafe fn simplex_2d_deriv<S: Simd>(
 ///
 /// Produces a value -1 ≤ n ≤ 1.
 #[inline(always)]
-pub unsafe fn simplex_3d<S: Simd>(x: S::Vf64, y: S::Vf64, z: S::Vf64, seed: i64) -> S::Vf64 {
+pub fn simplex_3d<S: Simd>(x: S::Vf64, y: S::Vf64, z: S::Vf64, seed: i64) -> S::Vf64 {
     simplex_3d_deriv::<S>(x, y, z, seed).0
 }
 
 /// Like `simplex_3d`, but also computes the derivative
 #[inline(always)]
-pub unsafe fn simplex_3d_deriv<S: Simd>(
+pub fn simplex_3d_deriv<S: Simd>(
     x: S::Vf64,
     y: S::Vf64,
     z: S::Vf64,
     seed: i64,
 ) -> (S::Vf64, [S::Vf64; 3]) {
     // Find skewed simplex grid coordinates associated with the input coordinates
-    let f = S::mul_pd(S::set1_pd(F3_64), S::add_pd(S::add_pd(x, y), z));
-    let mut x0 = S::fast_floor_pd(S::add_pd(x, f));
-    let mut y0 = S::fast_floor_pd(S::add_pd(y, f));
-    let mut z0 = S::fast_floor_pd(S::add_pd(z, f));
+    let f = S::Vf64::set1(F3_64) * ((x + y) + z);
+    let mut x0 = (x + f).fast_floor();
+    let mut y0 = (y + f).fast_floor();
+    let mut z0 = (z + f).fast_floor();
 
     // Integer grid coordinates
-    let i = S::mullo_epi64(S::cvtpd_epi64(x0), S::set1_epi64(X_PRIME_64));
-    let j = S::mullo_epi64(S::cvtpd_epi64(y0), S::set1_epi64(Y_PRIME_64));
-    let k = S::mullo_epi64(S::cvtpd_epi64(z0), S::set1_epi64(Z_PRIME_64));
+    let i = x0.cast_i64() * S::Vi64::set1(X_PRIME_64);
+    let j = y0.cast_i64() * S::Vi64::set1(Y_PRIME_64);
+    let k = z0.cast_i64() * S::Vi64::set1(Z_PRIME_64);
 
     // Compute distance from first simplex vertex to input coordinates
-    let g = S::mul_pd(S::set1_pd(G3_64), S::add_pd(S::add_pd(x0, y0), z0));
-    x0 = S::sub_pd(x, S::sub_pd(x0, g));
-    y0 = S::sub_pd(y, S::sub_pd(y0, g));
-    z0 = S::sub_pd(z, S::sub_pd(z0, g));
+    let g = S::Vf64::set1(G3_64) * (x0 + y0 + z0);
+    x0 = x - (x0 - g);
+    y0 = y - (y0 - g);
+    z0 = z - (z0 - g);
 
-    let x0_ge_y0 = S::cmpge_pd(x0, y0);
-    let y0_ge_z0 = S::cmpge_pd(y0, z0);
-    let x0_ge_z0 = S::cmpge_pd(x0, z0);
+    let x0_ge_y0 = x0.cmp_gte(y0);
+    let y0_ge_z0 = y0.cmp_gte(z0);
+    let x0_ge_z0 = x0.cmp_gte(z0);
 
     let i1 = x0_ge_y0 & x0_ge_z0;
-    let j1 = S::andnot_pd(x0_ge_y0, y0_ge_z0);
-    let k1 = S::andnot_pd(x0_ge_z0, !y0_ge_z0);
+    let j1 = y0_ge_z0.and_not(x0_ge_y0);
+    let k1 = (!y0_ge_z0).and_not(x0_ge_z0);
 
     let i2 = x0_ge_y0 | x0_ge_z0;
     let j2 = (!x0_ge_y0) | y0_ge_z0;
     let k2 = !(x0_ge_z0 & y0_ge_z0);
 
     // Compute distances from remaining simplex vertices to input coordinates
-    let x1 = S::add_pd(S::sub_pd(x0, i1 & S::set1_pd(1.0)), S::set1_pd(G3_64));
-    let y1 = S::add_pd(S::sub_pd(y0, j1 & S::set1_pd(1.0)), S::set1_pd(G3_64));
-    let z1 = S::add_pd(S::sub_pd(z0, k1 & S::set1_pd(1.0)), S::set1_pd(G3_64));
+    let x1 = x0 - (i1 & S::Vf64::set1(1.0)) + S::Vf64::set1(G3_64);
+    let y1 = y0 - (j1 & S::Vf64::set1(1.0)) + S::Vf64::set1(G3_64);
+    let z1 = z0 - (k1 & S::Vf64::set1(1.0)) + S::Vf64::set1(G3_64);
 
-    let x2 = S::add_pd(S::sub_pd(x0, i2 & S::set1_pd(1.0)), S::set1_pd(F3_64));
-    let y2 = S::add_pd(S::sub_pd(y0, j2 & S::set1_pd(1.0)), S::set1_pd(F3_64));
-    let z2 = S::add_pd(S::sub_pd(z0, k2 & S::set1_pd(1.0)), S::set1_pd(F3_64));
+    let x2 = x0 - (i2 & S::Vf64::set1(1.0)) + S::Vf64::set1(F3_64);
+    let y2 = y0 - (j2 & S::Vf64::set1(1.0)) + S::Vf64::set1(F3_64);
+    let z2 = z0 - (k2 & S::Vf64::set1(1.0)) + S::Vf64::set1(F3_64);
 
-    let x3 = S::add_pd(x0, S::set1_pd(G33_64));
-    let y3 = S::add_pd(y0, S::set1_pd(G33_64));
-    let z3 = S::add_pd(z0, S::set1_pd(G33_64));
+    let x3 = x0 + S::Vf64::set1(G33_64);
+    let y3 = y0 + S::Vf64::set1(G33_64);
+    let z3 = z0 + S::Vf64::set1(G33_64);
 
     // Compute base weight factors associated with each vertex, `0.6 - v . v` where v is the
     // distance to the vertex. Strictly the constant should be 0.5, but 0.6 is thought by Gustavson
     // to give visually better results at the cost of subtle discontinuities.
     //#define SIMDf_NMUL_ADD(a,b,c) = SIMDf_SUB(c, SIMDf_MUL(a,b)
-    let mut t0 = S::sub_pd(
-        S::sub_pd(
-            S::sub_pd(S::set1_pd(0.6), S::mul_pd(x0, x0)),
-            S::mul_pd(y0, y0),
-        ),
-        S::mul_pd(z0, z0),
-    );
-    let mut t1 = S::sub_pd(
-        S::sub_pd(
-            S::sub_pd(S::set1_pd(0.6), S::mul_pd(x1, x1)),
-            S::mul_pd(y1, y1),
-        ),
-        S::mul_pd(z1, z1),
-    );
-    let mut t2 = S::sub_pd(
-        S::sub_pd(
-            S::sub_pd(S::set1_pd(0.6), S::mul_pd(x2, x2)),
-            S::mul_pd(y2, y2),
-        ),
-        S::mul_pd(z2, z2),
-    );
-    let mut t3 = S::sub_pd(
-        S::sub_pd(
-            S::sub_pd(S::set1_pd(0.6), S::mul_pd(x3, x3)),
-            S::mul_pd(y3, y3),
-        ),
-        S::mul_pd(z3, z3),
-    );
+    let mut t0 = S::Vf64::set1(0.6) - (x0 * x0) - (y0 * y0) - (z0 * z0);
+    let mut t1 = S::Vf64::set1(0.6) - (x1 * x1) - (y1 * y1) - (z1 * z1);
+    let mut t2 = S::Vf64::set1(0.6) - (x2 * x2) - (y2 * y2) - (z2 * z2);
+    let mut t3 = S::Vf64::set1(0.6) - (x3 * x3) - (y3 * y3) - (z3 * z3);
 
     // Zero out negative weights
-    t0 &= S::cmpge_pd(t0, S::setzero_pd());
-    t1 &= S::cmpge_pd(t1, S::setzero_pd());
-    t2 &= S::cmpge_pd(t2, S::setzero_pd());
-    t3 &= S::cmpge_pd(t3, S::setzero_pd());
+    t0 &= t0.cmp_gte(S::Vf64::zeroes());
+    t1 &= t1.cmp_gte(S::Vf64::zeroes());
+    t2 &= t2.cmp_gte(S::Vf64::zeroes());
+    t3 &= t3.cmp_gte(S::Vf64::zeroes());
 
     // Square each weight
     let t20 = t0 * t0;
@@ -327,50 +310,32 @@ pub unsafe fn simplex_3d_deriv<S: Simd>(
     let g0 = grad3d_dot::<S>(seed, i, j, k, x0, y0, z0);
     let v0 = t40 * g0;
 
-    let v1x = S::add_epi64(
-        i,
-        S::and_epi64(S::castpd_epi64(i1), S::set1_epi64(X_PRIME_64)),
-    );
-    let v1y = S::add_epi64(
-        j,
-        S::and_epi64(S::castpd_epi64(j1), S::set1_epi64(Y_PRIME_64)),
-    );
-    let v1z = S::add_epi64(
-        k,
-        S::and_epi64(S::castpd_epi64(k1), S::set1_epi64(Z_PRIME_64)),
-    );
+    let v1x = i + (i1.bitcast_i64() & S::Vi64::set1(X_PRIME_64));
+    let v1y = j + (j1.bitcast_i64() & S::Vi64::set1(Y_PRIME_64));
+    let v1z = k + (k1.bitcast_i64() & S::Vi64::set1(Z_PRIME_64));
     let g1 = grad3d_dot::<S>(seed, v1x, v1y, v1z, x1, y1, z1);
     let v1 = t41 * g1;
 
-    let v2x = S::add_epi64(
-        i,
-        S::and_epi64(S::castpd_epi64(i2), S::set1_epi64(X_PRIME_64)),
-    );
-    let v2y = S::add_epi64(
-        j,
-        S::and_epi64(S::castpd_epi64(j2), S::set1_epi64(Y_PRIME_64)),
-    );
-    let v2z = S::add_epi64(
-        k,
-        S::and_epi64(S::castpd_epi64(k2), S::set1_epi64(Z_PRIME_64)),
-    );
+    let v2x = i + (i2.bitcast_i64() & S::Vi64::set1(X_PRIME_64));
+    let v2y = j + (j2.bitcast_i64() & S::Vi64::set1(Y_PRIME_64));
+    let v2z = k + (k2.bitcast_i64() & S::Vi64::set1(Z_PRIME_64));
     let g2 = grad3d_dot::<S>(seed, v2x, v2y, v2z, x2, y2, z2);
     let v2 = t42 * g2;
 
     //SIMDf v3 = SIMDf_MASK(n3, SIMDf_MUL(SIMDf_MUL(t3, t3), FUNC(GradCoord)(seed, SIMDi_ADD(i, SIMDi_NUM(xPrime)), SIMDi_ADD(j, SIMDi_NUM(yPrime)), SIMDi_ADD(k, SIMDi_NUM(zPrime)), x3, y3, z3)));
-    let v3x = S::add_epi64(i, S::set1_epi64(X_PRIME_64));
-    let v3y = S::add_epi64(j, S::set1_epi64(Y_PRIME_64));
-    let v3z = S::add_epi64(k, S::set1_epi64(Z_PRIME_64));
+    let v3x = i + S::Vi64::set1(X_PRIME_64);
+    let v3y = j + S::Vi64::set1(Y_PRIME_64);
+    let v3z = k + S::Vi64::set1(Z_PRIME_64);
     //define SIMDf_MASK(m,a) SIMDf_AND(SIMDf_CAST_TO_FLOAT(m),a)
     let g3 = grad3d_dot::<S>(seed, v3x, v3y, v3z, x3, y3, z3);
     let v3 = t43 * g3;
 
-    let p1 = S::add_pd(v3, v2);
-    let p2 = S::add_pd(p1, v1);
+    let p1 = v3 + v2;
+    let p2 = p1 + v1;
 
     // Scaling factor found by numerical approximation
-    let scale = S::set1_pd(32.69587493801679);
-    let result = S::add_pd(p2, v0) * scale;
+    let scale = S::Vf64::set1(32.69587493801679);
+    let result = (p2 + v0) * scale;
     let derivative = {
         let temp0 = t20 * t0 * g0;
         let mut dnoise_dx = temp0 * x0;
@@ -388,9 +353,9 @@ pub unsafe fn simplex_3d_deriv<S: Simd>(
         dnoise_dx += temp3 * x3;
         dnoise_dy += temp3 * y3;
         dnoise_dz += temp3 * z3;
-        dnoise_dx *= S::set1_pd(-8.0);
-        dnoise_dy *= S::set1_pd(-8.0);
-        dnoise_dz *= S::set1_pd(-8.0);
+        dnoise_dx *= S::Vf64::set1(-8.0);
+        dnoise_dy *= S::Vf64::set1(-8.0);
+        dnoise_dz *= S::Vf64::set1(-8.0);
         let [gx0, gy0, gz0] = grad3d::<S>(seed, i, j, k);
         let [gx1, gy1, gz1] = grad3d::<S>(seed, v1x, v1y, v1z);
         let [gx2, gy2, gz2] = grad3d::<S>(seed, v2x, v2y, v2z);
@@ -411,225 +376,164 @@ pub unsafe fn simplex_3d_deriv<S: Simd>(
 ///
 /// Produces a value -1 ≤ n ≤ 1.
 #[inline(always)]
-pub unsafe fn simplex_4d<S: Simd>(
-    x: S::Vf64,
-    y: S::Vf64,
-    z: S::Vf64,
-    w: S::Vf64,
-    seed: i64,
-) -> S::Vf64 {
-    let s = S::mul_pd(
-        S::set1_pd(F4_64),
-        S::add_pd(x, S::add_pd(y, S::add_pd(z, w))),
-    );
-
-    let ipd = S::floor_pd(S::add_pd(x, s));
-    let jpd = S::floor_pd(S::add_pd(y, s));
-    let kpd = S::floor_pd(S::add_pd(z, s));
-    let lpd = S::floor_pd(S::add_pd(w, s));
-
-    let i = S::cvtpd_epi64(ipd);
-    let j = S::cvtpd_epi64(jpd);
-    let k = S::cvtpd_epi64(kpd);
-    let l = S::cvtpd_epi64(lpd);
-
-    let t = S::mul_pd(
-        S::cvtepi64_pd(S::add_epi64(i, S::add_epi64(j, S::add_epi64(k, l)))),
-        S::set1_pd(G4_64),
-    );
-    let x0 = S::sub_pd(x, S::sub_pd(ipd, t));
-    let y0 = S::sub_pd(y, S::sub_pd(jpd, t));
-    let z0 = S::sub_pd(z, S::sub_pd(kpd, t));
-    let w0 = S::sub_pd(w, S::sub_pd(lpd, t));
-
-    let mut rank_x = S::setzero_epi64();
-    let mut rank_y = S::setzero_epi64();
-    let mut rank_z = S::setzero_epi64();
-    let mut rank_w = S::setzero_epi64();
-
-    let cond = S::castpd_epi64(S::cmpgt_pd(x0, y0));
-    rank_x = S::add_epi64(rank_x, S::and_epi64(cond, S::set1_epi64(1)));
-    rank_y = S::add_epi64(rank_y, S::andnot_epi64(cond, S::set1_epi64(1)));
-    let cond = S::castpd_epi64(S::cmpgt_pd(x0, z0));
-    rank_x = S::add_epi64(rank_x, S::and_epi64(cond, S::set1_epi64(1)));
-    rank_z = S::add_epi64(rank_z, S::andnot_epi64(cond, S::set1_epi64(1)));
-    let cond = S::castpd_epi64(S::cmpgt_pd(x0, w0));
-    rank_x = S::add_epi64(rank_x, S::and_epi64(cond, S::set1_epi64(1)));
-    rank_w = S::add_epi64(rank_w, S::andnot_epi64(cond, S::set1_epi64(1)));
-    let cond = S::castpd_epi64(S::cmpgt_pd(y0, z0));
-    rank_y = S::add_epi64(rank_y, S::and_epi64(cond, S::set1_epi64(1)));
-    rank_z = S::add_epi64(rank_z, S::andnot_epi64(cond, S::set1_epi64(1)));
-    let cond = S::castpd_epi64(S::cmpgt_pd(y0, w0));
-    rank_y = S::add_epi64(rank_y, S::and_epi64(cond, S::set1_epi64(1)));
-    rank_w = S::add_epi64(rank_w, S::andnot_epi64(cond, S::set1_epi64(1)));
-    let cond = S::castpd_epi64(S::cmpgt_pd(z0, w0));
-    rank_z = S::add_epi64(rank_z, S::and_epi64(cond, S::set1_epi64(1)));
-    rank_w = S::add_epi64(rank_w, S::andnot_epi64(cond, S::set1_epi64(1)));
-
-    let cond = S::cmpgt_epi64(rank_x, S::set1_epi64(2));
-    let i1 = S::and_epi64(S::set1_epi64(1), cond);
-    let cond = S::cmpgt_epi64(rank_y, S::set1_epi64(2));
-    let j1 = S::and_epi64(S::set1_epi64(1), cond);
-    let cond = S::cmpgt_epi64(rank_z, S::set1_epi64(2));
-    let k1 = S::and_epi64(S::set1_epi64(1), cond);
-    let cond = S::cmpgt_epi64(rank_w, S::set1_epi64(2));
-    let l1 = S::and_epi64(S::set1_epi64(1), cond);
-
-    let cond = S::cmpgt_epi64(rank_x, S::set1_epi64(1));
-    let i2 = S::and_epi64(S::set1_epi64(1), cond);
-    let cond = S::cmpgt_epi64(rank_y, S::set1_epi64(1));
-    let j2 = S::and_epi64(S::set1_epi64(1), cond);
-    let cond = S::cmpgt_epi64(rank_z, S::set1_epi64(1));
-    let k2 = S::and_epi64(S::set1_epi64(1), cond);
-    let cond = S::cmpgt_epi64(rank_w, S::set1_epi64(1));
-    let l2 = S::and_epi64(S::set1_epi64(1), cond);
-
-    let cond = S::cmpgt_epi64(rank_x, S::setzero_epi64());
-    let i3 = S::and_epi64(S::set1_epi64(1), cond);
-    let cond = S::cmpgt_epi64(rank_y, S::setzero_epi64());
-    let j3 = S::and_epi64(S::set1_epi64(1), cond);
-    let cond = S::cmpgt_epi64(rank_z, S::setzero_epi64());
-    let k3 = S::and_epi64(S::set1_epi64(1), cond);
-    let cond = S::cmpgt_epi64(rank_w, S::setzero_epi64());
-    let l3 = S::and_epi64(S::set1_epi64(1), cond);
-
-    let x1 = S::add_pd(S::sub_pd(x0, S::cvtepi64_pd(i1)), S::set1_pd(G4_64));
-    let y1 = S::add_pd(S::sub_pd(y0, S::cvtepi64_pd(j1)), S::set1_pd(G4_64));
-    let z1 = S::add_pd(S::sub_pd(z0, S::cvtepi64_pd(k1)), S::set1_pd(G4_64));
-    let w1 = S::add_pd(S::sub_pd(w0, S::cvtepi64_pd(l1)), S::set1_pd(G4_64));
-    let x2 = S::add_pd(S::sub_pd(x0, S::cvtepi64_pd(i2)), S::set1_pd(G24_64));
-    let y2 = S::add_pd(S::sub_pd(y0, S::cvtepi64_pd(j2)), S::set1_pd(G24_64));
-    let z2 = S::add_pd(S::sub_pd(z0, S::cvtepi64_pd(k2)), S::set1_pd(G24_64));
-    let w2 = S::add_pd(S::sub_pd(w0, S::cvtepi64_pd(l2)), S::set1_pd(G24_64));
-    let x3 = S::add_pd(S::sub_pd(x0, S::cvtepi64_pd(i3)), S::set1_pd(G34_64));
-    let y3 = S::add_pd(S::sub_pd(y0, S::cvtepi64_pd(j3)), S::set1_pd(G34_64));
-    let z3 = S::add_pd(S::sub_pd(z0, S::cvtepi64_pd(k3)), S::set1_pd(G34_64));
-    let w3 = S::add_pd(S::sub_pd(w0, S::cvtepi64_pd(l3)), S::set1_pd(G34_64));
-    let x4 = S::add_pd(S::sub_pd(x0, S::set1_pd(1.0)), S::set1_pd(G44_64));
-    let y4 = S::add_pd(S::sub_pd(y0, S::set1_pd(1.0)), S::set1_pd(G44_64));
-    let z4 = S::add_pd(S::sub_pd(z0, S::set1_pd(1.0)), S::set1_pd(G44_64));
-    let w4 = S::add_pd(S::sub_pd(w0, S::set1_pd(1.0)), S::set1_pd(G44_64));
-
-    let ii = S::and_epi64(i, S::set1_epi64(0xff));
-    let jj = S::and_epi64(j, S::set1_epi64(0xff));
-    let kk = S::and_epi64(k, S::set1_epi64(0xff));
-    let ll = S::and_epi64(l, S::set1_epi64(0xff));
-
-    let lp = S::i64gather_epi64(&PERM64, ll);
-    let kp = S::i64gather_epi64(&PERM64, S::add_epi64(kk, lp));
-    let jp = S::i64gather_epi64(&PERM64, S::add_epi64(jj, kp));
-    let gi0 = S::i64gather_epi64(&PERM64, S::add_epi64(ii, jp));
-
-    let lp = S::i64gather_epi64(&PERM64, S::add_epi64(ll, l1));
-    let kp = S::i64gather_epi64(&PERM64, S::add_epi64(S::add_epi64(kk, k1), lp));
-    let jp = S::i64gather_epi64(&PERM64, S::add_epi64(S::add_epi64(jj, j1), kp));
-    let gi1 = S::i64gather_epi64(&PERM64, S::add_epi64(S::add_epi64(ii, i1), jp));
-
-    let lp = S::i64gather_epi64(&PERM64, S::add_epi64(ll, l2));
-    let kp = S::i64gather_epi64(&PERM64, S::add_epi64(S::add_epi64(kk, k2), lp));
-    let jp = S::i64gather_epi64(&PERM64, S::add_epi64(S::add_epi64(jj, j2), kp));
-    let gi2 = S::i64gather_epi64(&PERM64, S::add_epi64(S::add_epi64(ii, i2), jp));
-
-    let lp = S::i64gather_epi64(&PERM64, S::add_epi64(ll, l3));
-    let kp = S::i64gather_epi64(&PERM64, S::add_epi64(S::add_epi64(kk, k3), lp));
-    let jp = S::i64gather_epi64(&PERM64, S::add_epi64(S::add_epi64(jj, j3), kp));
-    let gi3 = S::i64gather_epi64(&PERM64, S::add_epi64(S::add_epi64(ii, i3), jp));
-
-    let lp = S::i64gather_epi64(&PERM64, S::add_epi64(ll, S::set1_epi64(1)));
-    let kp = S::i64gather_epi64(
-        &PERM64,
-        S::add_epi64(S::add_epi64(kk, S::set1_epi64(1)), lp),
-    );
-    let jp = S::i64gather_epi64(
-        &PERM64,
-        S::add_epi64(S::add_epi64(jj, S::set1_epi64(1)), kp),
-    );
-    let gi4 = S::i64gather_epi64(
-        &PERM64,
-        S::add_epi64(S::add_epi64(ii, S::set1_epi64(1)), jp),
-    );
-
-    let t0 = S::sub_pd(
-        S::sub_pd(
-            S::sub_pd(
-                S::sub_pd(S::set1_pd(0.5), S::mul_pd(x0, x0)),
-                S::mul_pd(y0, y0),
-            ),
-            S::mul_pd(z0, z0),
-        ),
-        S::mul_pd(w0, w0),
-    );
-    let t1 = S::sub_pd(
-        S::sub_pd(
-            S::sub_pd(
-                S::sub_pd(S::set1_pd(0.5), S::mul_pd(x1, x1)),
-                S::mul_pd(y1, y1),
-            ),
-            S::mul_pd(z1, z1),
-        ),
-        S::mul_pd(w1, w1),
-    );
-    let t2 = S::sub_pd(
-        S::sub_pd(
-            S::sub_pd(
-                S::sub_pd(S::set1_pd(0.5), S::mul_pd(x2, x2)),
-                S::mul_pd(y2, y2),
-            ),
-            S::mul_pd(z2, z2),
-        ),
-        S::mul_pd(w2, w2),
-    );
-    let t3 = S::sub_pd(
-        S::sub_pd(
-            S::sub_pd(
-                S::sub_pd(S::set1_pd(0.5), S::mul_pd(x3, x3)),
-                S::mul_pd(y3, y3),
-            ),
-            S::mul_pd(z3, z3),
-        ),
-        S::mul_pd(w3, w3),
-    );
-    let t4 = S::sub_pd(
-        S::sub_pd(
-            S::sub_pd(
-                S::sub_pd(S::set1_pd(0.5), S::mul_pd(x4, x4)),
-                S::mul_pd(y4, y4),
-            ),
-            S::mul_pd(z4, z4),
-        ),
-        S::mul_pd(w4, w4),
-    );
+pub fn simplex_4d<S: Simd>(x: S::Vf64, y: S::Vf64, z: S::Vf64, w: S::Vf64, seed: i64) -> S::Vf64 {
+    let s = S::Vf64::set1(F4_64) * (x + y + z + w);
+
+    let ipd = (x + s).floor();
+    let jpd = (y + s).floor();
+    let kpd = (z + s).floor();
+    let lpd = (w + s).floor();
+
+    let i = ipd.cast_i64();
+    let j = jpd.cast_i64();
+    let k = kpd.cast_i64();
+    let l = lpd.cast_i64();
+
+    let t = (i + j + k + l).cast_f64() * S::Vf64::set1(G4_64);
+    let x0 = x - (ipd - t);
+    let y0 = y - (jpd - t);
+    let z0 = z - (kpd - t);
+    let w0 = w - (lpd - t);
+
+    let mut rank_x = S::Vi64::zeroes();
+    let mut rank_y = S::Vi64::zeroes();
+    let mut rank_z = S::Vi64::zeroes();
+    let mut rank_w = S::Vi64::zeroes();
+
+    let cond = (x0.cmp_gt(y0)).bitcast_i64();
+    rank_x = rank_x + (cond & S::Vi64::set1(1));
+    rank_y = rank_y + S::Vi64::set1(1).and_not(cond);
+    let cond = (x0.cmp_gt(z0)).bitcast_i64();
+    rank_x = rank_x + (cond & S::Vi64::set1(1));
+    rank_z = rank_z + S::Vi64::set1(1).and_not(cond);
+    let cond = (x0.cmp_gt(w0)).bitcast_i64();
+    rank_x = rank_x + (cond & S::Vi64::set1(1));
+    rank_w = rank_w + S::Vi64::set1(1).and_not(cond);
+    let cond = (y0.cmp_gt(z0)).bitcast_i64();
+    rank_y = rank_y + (cond & S::Vi64::set1(1));
+    rank_z = rank_z + S::Vi64::set1(1).and_not(cond);
+    let cond = (y0.cmp_gt(w0)).bitcast_i64();
+    rank_y = rank_y + (cond & S::Vi64::set1(1));
+    rank_w = rank_w + S::Vi64::set1(1).and_not(cond);
+    let cond = (z0.cmp_gt(w0)).bitcast_i64();
+    rank_z = rank_z + (cond & S::Vi64::set1(1));
+    rank_w = rank_w + S::Vi64::set1(1).and_not(cond);
+
+    let cond = rank_x.cmp_gt(S::Vi64::set1(2));
+    let i1 = S::Vi64::set1(1) & cond;
+    let cond = rank_y.cmp_gt(S::Vi64::set1(2));
+    let j1 = S::Vi64::set1(1) & cond;
+    let cond = rank_z.cmp_gt(S::Vi64::set1(2));
+    let k1 = S::Vi64::set1(1) & cond;
+    let cond = rank_w.cmp_gt(S::Vi64::set1(2));
+    let l1 = S::Vi64::set1(1) & cond;
+
+    let cond = rank_x.cmp_gt(S::Vi64::set1(1));
+    let i2 = S::Vi64::set1(1) & cond;
+    let cond = rank_y.cmp_gt(S::Vi64::set1(1));
+    let j2 = S::Vi64::set1(1) & cond;
+    let cond = rank_z.cmp_gt(S::Vi64::set1(1));
+    let k2 = S::Vi64::set1(1) & cond;
+    let cond = rank_w.cmp_gt(S::Vi64::set1(1));
+    let l2 = S::Vi64::set1(1) & cond;
+
+    let cond = rank_x.cmp_gt(S::Vi64::zeroes());
+    let i3 = S::Vi64::set1(1) & cond;
+    let cond = rank_y.cmp_gt(S::Vi64::zeroes());
+    let j3 = S::Vi64::set1(1) & cond;
+    let cond = rank_z.cmp_gt(S::Vi64::zeroes());
+    let k3 = S::Vi64::set1(1) & cond;
+    let cond = rank_w.cmp_gt(S::Vi64::zeroes());
+    let l3 = S::Vi64::set1(1) & cond;
+
+    let x1 = x0 - i1.cast_f64() + S::Vf64::set1(G4_64);
+    let y1 = y0 - j1.cast_f64() + S::Vf64::set1(G4_64);
+    let z1 = z0 - k1.cast_f64() + S::Vf64::set1(G4_64);
+    let w1 = w0 - l1.cast_f64() + S::Vf64::set1(G4_64);
+    let x2 = x0 - i2.cast_f64() + S::Vf64::set1(G24_64);
+    let y2 = y0 - j2.cast_f64() + S::Vf64::set1(G24_64);
+    let z2 = z0 - k2.cast_f64() + S::Vf64::set1(G24_64);
+    let w2 = w0 - l2.cast_f64() + S::Vf64::set1(G24_64);
+    let x3 = x0 - i3.cast_f64() + S::Vf64::set1(G34_64);
+    let y3 = y0 - j3.cast_f64() + S::Vf64::set1(G34_64);
+    let z3 = z0 - k3.cast_f64() + S::Vf64::set1(G34_64);
+    let w3 = w0 - l3.cast_f64() + S::Vf64::set1(G34_64);
+    let x4 = x0 - S::Vf64::set1(1.0) + S::Vf64::set1(G44_64);
+    let y4 = y0 - S::Vf64::set1(1.0) + S::Vf64::set1(G44_64);
+    let z4 = z0 - S::Vf64::set1(1.0) + S::Vf64::set1(G44_64);
+    let w4 = w0 - S::Vf64::set1(1.0) + S::Vf64::set1(G44_64);
+
+    let ii = i & S::Vi64::set1(0xff);
+    let jj = j & S::Vi64::set1(0xff);
+    let kk = k & S::Vi64::set1(0xff);
+    let ll = l & S::Vi64::set1(0xff);
+
+    let (gi0, gi1, gi2, gi3, gi4) = unsafe {
+        // Safety: ii, jj, kk, and ll are all 0..255. All other temporary variables were fetched from PERM, which only
+        // contains elements in the range 0..255.
+        let lp = gather_64::<S>(&PERM64, ll);
+        let kp = gather_64::<S>(&PERM64, kk + lp);
+        let jp = gather_64::<S>(&PERM64, jj + kp);
+        let gi0 = gather_64::<S>(&PERM64, ii + jp);
+
+        let lp = gather_64::<S>(&PERM64, ll + l1);
+        let kp = gather_64::<S>(&PERM64, kk + k1 + lp);
+        let jp = gather_64::<S>(&PERM64, jj + j1 + kp);
+        let gi1 = gather_64::<S>(&PERM64, ii + i1 + jp);
+
+        let lp = gather_64::<S>(&PERM64, ll + l2);
+        let kp = gather_64::<S>(&PERM64, kk + k2 + lp);
+        let jp = gather_64::<S>(&PERM64, jj + j2 + kp);
+        let gi2 = gather_64::<S>(&PERM64, ii + i2 + jp);
+
+        let lp = gather_64::<S>(&PERM64, ll + l3);
+        let kp = gather_64::<S>(&PERM64, kk + k3 + lp);
+        let jp = gather_64::<S>(&PERM64, jj + j3 + kp);
+        let gi3 = gather_64::<S>(&PERM64, ii + i3 + jp);
+
+        let lp = gather_64::<S>(&PERM64, ll + S::Vi64::set1(1));
+        let kp = gather_64::<S>(&PERM64, kk + S::Vi64::set1(1) + lp);
+        let jp = gather_64::<S>(&PERM64, jj + S::Vi64::set1(1) + kp);
+        let gi4 = gather_64::<S>(&PERM64, ii + S::Vi64::set1(1) + jp);
+        (gi0, gi1, gi2, gi3, gi4)
+    };
+
+    let t0 = S::Vf64::set1(0.5) - (x0 * x0) - (y0 * y0) - (z0 * z0) - (w0 * w0);
+    let t1 = S::Vf64::set1(0.5) - (x1 * x1) - (y1 * y1) - (z1 * z1) - (w1 * w1);
+    let t2 = S::Vf64::set1(0.5) - (x2 * x2) - (y2 * y2) - (z2 * z2) - (w2 * w2);
+    let t3 = S::Vf64::set1(0.5) - (x3 * x3) - (y3 * y3) - (z3 * z3) - (w3 * w3);
+    let t4 = S::Vf64::set1(0.5) - (x4 * x4) - (y4 * y4) - (z4 * z4) - (w4 * w4);
     //ti*ti*ti*ti
-    let mut t0q = S::mul_pd(t0, t0);
-    t0q = S::mul_pd(t0q, t0q);
-    let mut t1q = S::mul_pd(t1, t1);
-    t1q = S::mul_pd(t1q, t1q);
-    let mut t2q = S::mul_pd(t2, t2);
-    t2q = S::mul_pd(t2q, t2q);
-    let mut t3q = S::mul_pd(t3, t3);
-    t3q = S::mul_pd(t3q, t3q);
-    let mut t4q = S::mul_pd(t4, t4);
-    t4q = S::mul_pd(t4q, t4q);
-
-    let mut n0 = S::mul_pd(t0q, grad4::<S>(seed, gi0, x0, y0, z0, w0));
-    let mut n1 = S::mul_pd(t1q, grad4::<S>(seed, gi1, x1, y1, z1, w1));
-    let mut n2 = S::mul_pd(t2q, grad4::<S>(seed, gi2, x2, y2, z2, w2));
-    let mut n3 = S::mul_pd(t3q, grad4::<S>(seed, gi3, x3, y3, z3, w3));
-    let mut n4 = S::mul_pd(t4q, grad4::<S>(seed, gi4, x4, y4, z4, w4));
+    let mut t0q = t0 * t0;
+    t0q = t0q * t0q;
+    let mut t1q = t1 * t1;
+    t1q = t1q * t1q;
+    let mut t2q = t2 * t2;
+    t2q = t2q * t2q;
+    let mut t3q = t3 * t3;
+    t3q = t3q * t3q;
+    let mut t4q = t4 * t4;
+    t4q = t4q * t4q;
+
+    let mut n0 = t0q * grad4::<S>(seed, gi0, x0, y0, z0, w0);
+    let mut n1 = t1q * grad4::<S>(seed, gi1, x1, y1, z1, w1);
+    let mut n2 = t2q * grad4::<S>(seed, gi2, x2, y2, z2, w2);
+    let mut n3 = t3q * grad4::<S>(seed, gi3, x3, y3, z3, w3);
+    let mut n4 = t4q * grad4::<S>(seed, gi4, x4, y4, z4, w4);
 
     //if ti < 0 then 0 else ni
-    let mut cond = S::cmplt_pd(t0, S::setzero_pd());
-    n0 = S::andnot_pd(cond, n0);
-    cond = S::cmplt_pd(t1, S::setzero_pd());
-    n1 = S::andnot_pd(cond, n1);
-    cond = S::cmplt_pd(t2, S::setzero_pd());
-    n2 = S::andnot_pd(cond, n2);
-    cond = S::cmplt_pd(t3, S::setzero_pd());
-    n3 = S::andnot_pd(cond, n3);
-    cond = S::cmplt_pd(t4, S::setzero_pd());
-    n4 = S::andnot_pd(cond, n4);
-
-    S::add_pd(n0, S::add_pd(n1, S::add_pd(n2, S::add_pd(n3, n4)))) * S::set1_pd(62.77772078955791)
+    let mut cond = t0.cmp_lt(S::Vf64::zeroes());
+    n0 = n0.and_not(cond);
+    cond = t1.cmp_lt(S::Vf64::zeroes());
+    n1 = n1.and_not(cond);
+    cond = t2.cmp_lt(S::Vf64::zeroes());
+    n2 = n2.and_not(cond);
+    cond = t3.cmp_lt(S::Vf64::zeroes());
+    n3 = n3.and_not(cond);
+    cond = t4.cmp_lt(S::Vf64::zeroes());
+    n4 = n4.and_not(cond);
+
+    (n0 + (n1 + (n2 + (n3 + n4)))) * S::Vf64::set1(62.77772078955791)
 }
 
 #[cfg(test)]
@@ -643,12 +547,12 @@ mod tests {
     }
 
     #[test]
-    fn simplex_1d_range() {
+    fn test_noise_simplex64_1d_range() {
         for seed in 0..10 {
             let mut min = f64::INFINITY;
             let mut max = -f64::INFINITY;
             for x in 0..1000 {
-                let n = unsafe { simplex_1d::<Scalar>(F64x1(x as f64 / 10.0), seed).0 };
+                let n = simplex_1d::<Scalar>(F64x1(x as f64 / 10.0), seed).0;
                 min = min.min(n);
                 max = max.max(n);
             }
@@ -657,7 +561,7 @@ mod tests {
     }
 
     #[test]
-    fn simplex_1d_deriv_sanity() {
+    fn test_noise_simplex64_1d_deriv_sanity() {
         let mut avg_err = 0.0;
         const SEEDS: i64 = 10;
         const POINTS: i64 = 1000;
@@ -666,9 +570,9 @@ mod tests {
                 // Offset a bit so we don't check derivative at lattice points, where it's always zero
                 let center = x as f64 / 10.0 + 0.1234;
                 const H: f64 = 0.01;
-                let n0 = unsafe { simplex_1d::<Scalar>(F64x1(center - H), seed).0 };
-                let (n1, d1) = unsafe { simplex_1d_deriv::<Scalar>(F64x1(center), seed) };
-                let n2 = unsafe { simplex_1d::<Scalar>(F64x1(center + H), seed).0 };
+                let n0 = simplex_1d::<Scalar>(F64x1(center - H), seed).0;
+                let (n1, d1) = simplex_1d_deriv::<Scalar>(F64x1(center), seed);
+                let n2 = simplex_1d::<Scalar>(F64x1(center + H), seed).0;
                 let (n1, d1) = (n1.0, d1.0);
                 avg_err += ((n2 - (n1 + d1 * H)).abs() + (n0 - (n1 - d1 * H)).abs())
                     / (SEEDS * POINTS * 2) as f64;
@@ -678,15 +582,15 @@ mod tests {
     }
 
     #[test]
-    fn simplex_2d_range() {
+    fn test_noise_simplex64_2d_range() {
         for seed in 0..10 {
             let mut min = f64::INFINITY;
             let mut max = -f64::INFINITY;
             for y in 0..10 {
                 for x in 0..100 {
-                    let n = unsafe {
-                        simplex_2d::<Scalar>(F64x1(x as f64 / 10.0), F64x1(y as f64 / 10.0), seed).0
-                    };
+                    let n =
+                        simplex_2d::<Scalar>(F64x1(x as f64 / 10.0), F64x1(y as f64 / 10.0), seed)
+                            .0;
                     min = min.min(n);
                     max = max.max(n);
                 }
@@ -696,7 +600,7 @@ mod tests {
     }
 
     #[test]
-    fn simplex_2d_deriv_sanity() {
+    fn test_noise_simplex64_2d_deriv_sanity() {
         let mut avg_err = 0.0;
         const SEEDS: i64 = 10;
         const POINTS: i64 = 10;
@@ -707,22 +611,13 @@ mod tests {
                     let center_x = x as f64 / 10.0 + 0.1234;
                     let center_y = y as f64 / 10.0 + 0.1234;
                     const H: f64 = 0.01;
-                    let (value, d) = unsafe {
-                        simplex_2d_deriv::<Scalar>(F64x1(center_x), F64x1(center_y), seed)
-                    };
+                    let (value, d) =
+                        simplex_2d_deriv::<Scalar>(F64x1(center_x), F64x1(center_y), seed);
                     let (value, d) = (value.0, [d[0].0, d[1].0]);
-                    let left = unsafe {
-                        simplex_2d::<Scalar>(F64x1(center_x - H), F64x1(center_y), seed).0
-                    };
-                    let right = unsafe {
-                        simplex_2d::<Scalar>(F64x1(center_x + H), F64x1(center_y), seed).0
-                    };
-                    let down = unsafe {
-                        simplex_2d::<Scalar>(F64x1(center_x), F64x1(center_y - H), seed).0
-                    };
-                    let up = unsafe {
-                        simplex_2d::<Scalar>(F64x1(center_x), F64x1(center_y + H), seed).0
-                    };
+                    let left = simplex_2d::<Scalar>(F64x1(center_x - H), F64x1(center_y), seed).0;
+                    let right = simplex_2d::<Scalar>(F64x1(center_x + H), F64x1(center_y), seed).0;
+                    let down = simplex_2d::<Scalar>(F64x1(center_x), F64x1(center_y - H), seed).0;
+                    let up = simplex_2d::<Scalar>(F64x1(center_x), F64x1(center_y + H), seed).0;
                     avg_err += ((left - (value - d[0] * H)).abs()
                         + (right - (value + d[0] * H)).abs()
                         + (down - (value - d[1] * H)).abs()
@@ -734,24 +629,22 @@ mod tests {
         assert!(avg_err < 1e-3);
     }
 
-    #[ignore]
     #[test]
-    fn simplex_3d_range() {
+    #[should_panic(expected = "not implemented")]
+    fn test_noise_simplex64_3d_range() {
         let mut min = f64::INFINITY;
         let mut max = -f64::INFINITY;
         const SEED: i64 = 0;
         for z in 0..10 {
             for y in 0..10 {
                 for x in 0..10000 {
-                    let n = unsafe {
-                        simplex_3d::<Scalar>(
-                            F64x1(x as f64 / 10.0),
-                            F64x1(y as f64 / 10.0),
-                            F64x1(z as f64 / 10.0),
-                            SEED,
-                        )
-                        .0
-                    };
+                    let n = simplex_3d::<Scalar>(
+                        F64x1(x as f64 / 10.0),
+                        F64x1(y as f64 / 10.0),
+                        F64x1(z as f64 / 10.0),
+                        SEED,
+                    )
+                    .0;
                     min = min.min(n);
                     max = max.max(n);
                 }
@@ -760,9 +653,9 @@ mod tests {
         check_bounds(min, max);
     }
 
-    #[ignore]
     #[test]
-    fn simplex_3d_deriv_sanity() {
+    #[should_panic(expected = "not implemented")]
+    fn test_noise_simplex64_3d_deriv_sanity() {
         let mut avg_err = 0.0;
         const POINTS: i64 = 10;
         const SEED: i64 = 0;
@@ -774,42 +667,34 @@ mod tests {
                     let center_y = y as f64 / 10.0 + 0.1234;
                     let center_z = z as f64 / 10.0 + 0.1234;
                     const H: f64 = 0.01;
-                    let (value, d) = unsafe {
-                        simplex_3d_deriv::<Scalar>(
-                            F64x1(center_x),
-                            F64x1(center_y),
-                            F64x1(center_z),
-                            SEED,
-                        )
-                    };
+                    let (value, d) = simplex_3d_deriv::<Scalar>(
+                        F64x1(center_x),
+                        F64x1(center_y),
+                        F64x1(center_z),
+                        SEED,
+                    );
                     let (value, d) = (value.0, [d[0].0, d[1].0, d[2].0]);
-                    let right = unsafe {
-                        simplex_3d::<Scalar>(
-                            F64x1(center_x + H),
-                            F64x1(center_y),
-                            F64x1(center_z),
-                            SEED,
-                        )
-                        .0
-                    };
-                    let up = unsafe {
-                        simplex_3d::<Scalar>(
-                            F64x1(center_x),
-                            F64x1(center_y + H),
-                            F64x1(center_z),
-                            SEED,
-                        )
-                        .0
-                    };
-                    let forward = unsafe {
-                        simplex_3d::<Scalar>(
-                            F64x1(center_x),
-                            F64x1(center_y),
-                            F64x1(center_z + H),
-                            SEED,
-                        )
-                        .0
-                    };
+                    let right = simplex_3d::<Scalar>(
+                        F64x1(center_x + H),
+                        F64x1(center_y),
+                        F64x1(center_z),
+                        SEED,
+                    )
+                    .0;
+                    let up = simplex_3d::<Scalar>(
+                        F64x1(center_x),
+                        F64x1(center_y + H),
+                        F64x1(center_z),
+                        SEED,
+                    )
+                    .0;
+                    let forward = simplex_3d::<Scalar>(
+                        F64x1(center_x),
+                        F64x1(center_y),
+                        F64x1(center_z + H),
+                        SEED,
+                    )
+                    .0;
                     avg_err += ((right - (value + d[0] * H)).abs()
                         + (up - (value + d[1] * H)).abs()
                         + (forward - (value + d[2] * H)).abs())
@@ -821,7 +706,7 @@ mod tests {
     }
 
     #[test]
-    fn simplex_4d_range() {
+    fn test_noise_simplex64_4d_range() {
         let mut min = f64::INFINITY;
         let mut max = -f64::INFINITY;
         const SEED: i64 = 0;
@@ -829,16 +714,14 @@ mod tests {
             for z in 0..10 {
                 for y in 0..10 {
                     for x in 0..1000 {
-                        let n = unsafe {
-                            simplex_4d::<Scalar>(
-                                F64x1(x as f64 / 10.0),
-                                F64x1(y as f64 / 10.0),
-                                F64x1(z as f64 / 10.0),
-                                F64x1(w as f64 / 10.0),
-                                SEED,
-                            )
-                            .0
-                        };
+                        let n = simplex_4d::<Scalar>(
+                            F64x1(x as f64 / 10.0),
+                            F64x1(y as f64 / 10.0),
+                            F64x1(z as f64 / 10.0),
+                            F64x1(w as f64 / 10.0),
+                            SEED,
+                        )
+                        .0;
                         min = min.min(n);
                         max = max.max(n);
                     }
diff --git a/src/noise/turbulence_32.rs b/src/noise/turbulence_32.rs
index 82d2970..f20388f 100644
--- a/src/noise/turbulence_32.rs
+++ b/src/noise/turbulence_32.rs
@@ -1,29 +1,29 @@
 use crate::noise::simplex_32::{simplex_1d, simplex_2d, simplex_3d, simplex_4d};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn turbulence_1d<S: Simd>(
+pub fn turbulence_1d<S: Simd>(
     mut x: S::Vf32,
     lacunarity: S::Vf32,
     gain: S::Vf32,
     octaves: u8,
     seed: i32,
 ) -> S::Vf32 {
-    let mut amp = S::set1_ps(1.0);
-    let mut result = S::abs_ps(simplex_1d::<S>(x, seed));
+    let mut amp = S::Vf32::set1(1.0);
+    let mut result = simplex_1d::<S>(x, seed).abs();
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lacunarity);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(result, S::abs_ps(simplex_1d::<S>(x, seed)));
+        x = x * lacunarity;
+        amp = amp * gain;
+        result = result + simplex_1d::<S>(x, seed).abs();
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn turbulence_2d<S: Simd>(
+pub fn turbulence_2d<S: Simd>(
     mut x: S::Vf32,
     mut y: S::Vf32,
     lac: S::Vf32,
@@ -31,25 +31,22 @@ pub unsafe fn turbulence_2d<S: Simd>(
     octaves: u8,
     seed: i32,
 ) -> S::Vf32 {
-    let mut result = S::abs_ps(simplex_2d::<S>(x, y, seed));
+    let mut result = simplex_2d::<S>(x, y, seed).abs();
 
-    let mut amp = S::set1_ps(1.0);
+    let mut amp = S::Vf32::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lac);
-        y = S::mul_ps(y, lac);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(
-            result,
-            S::abs_ps(S::mul_ps(simplex_2d::<S>(x, y, seed), amp)),
-        );
+        x = x * lac;
+        y = y * lac;
+        amp = amp * gain;
+        result = result + (simplex_2d::<S>(x, y, seed) * amp).abs();
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn turbulence_3d<S: Simd>(
+pub fn turbulence_3d<S: Simd>(
     mut x: S::Vf32,
     mut y: S::Vf32,
     mut z: S::Vf32,
@@ -58,25 +55,22 @@ pub unsafe fn turbulence_3d<S: Simd>(
     octaves: u8,
     seed: i32,
 ) -> S::Vf32 {
-    let mut result = S::abs_ps(simplex_3d::<S>(x, y, z, seed));
-    let mut amp = S::set1_ps(1.0);
+    let mut result = simplex_3d::<S>(x, y, z, seed).abs();
+    let mut amp = S::Vf32::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lac);
-        y = S::mul_ps(y, lac);
-        z = S::mul_ps(z, lac);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(
-            result,
-            S::abs_ps(S::mul_ps(simplex_3d::<S>(x, y, z, seed), amp)),
-        );
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        amp = amp * gain;
+        result = result + (simplex_3d::<S>(x, y, z, seed) * amp).abs();
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn turbulence_4d<S: Simd>(
+pub fn turbulence_4d<S: Simd>(
     mut x: S::Vf32,
     mut y: S::Vf32,
     mut z: S::Vf32,
@@ -86,19 +80,16 @@ pub unsafe fn turbulence_4d<S: Simd>(
     octaves: u8,
     seed: i32,
 ) -> S::Vf32 {
-    let mut result = S::abs_ps(simplex_4d::<S>(x, y, z, w, seed));
-    let mut amp = S::set1_ps(1.0);
+    let mut result = simplex_4d::<S>(x, y, z, w, seed).abs();
+    let mut amp = S::Vf32::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_ps(x, lac);
-        y = S::mul_ps(y, lac);
-        z = S::mul_ps(z, lac);
-        w = S::mul_ps(w, lac);
-        amp = S::mul_ps(amp, gain);
-        result = S::add_ps(
-            result,
-            S::abs_ps(S::mul_ps(simplex_4d::<S>(x, y, z, w, seed), amp)),
-        );
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        w = w * lac;
+        amp = amp * gain;
+        result = result + (simplex_4d::<S>(x, y, z, w, seed) * amp).abs();
     }
 
     result
diff --git a/src/noise/turbulence_64.rs b/src/noise/turbulence_64.rs
index e58eff0..c73d0d9 100644
--- a/src/noise/turbulence_64.rs
+++ b/src/noise/turbulence_64.rs
@@ -1,29 +1,29 @@
 use crate::noise::simplex_64::{simplex_1d, simplex_2d, simplex_3d, simplex_4d};
 
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 #[inline(always)]
-pub unsafe fn turbulence_1d<S: Simd>(
+pub fn turbulence_1d<S: Simd>(
     mut x: S::Vf64,
     lacunarity: S::Vf64,
     gain: S::Vf64,
     octaves: u8,
     seed: i64,
 ) -> S::Vf64 {
-    let mut amp = S::set1_pd(1.0);
-    let mut result = S::abs_pd(simplex_1d::<S>(x, seed));
+    let mut amp = S::Vf64::set1(1.0);
+    let mut result = simplex_1d::<S>(x, seed).abs();
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lacunarity);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(result, S::abs_pd(simplex_1d::<S>(x, seed)));
+        x = x * lacunarity;
+        amp = amp * gain;
+        result = result + simplex_1d::<S>(x, seed).abs();
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn turbulence_2d<S: Simd>(
+pub fn turbulence_2d<S: Simd>(
     mut x: S::Vf64,
     mut y: S::Vf64,
     lac: S::Vf64,
@@ -31,25 +31,22 @@ pub unsafe fn turbulence_2d<S: Simd>(
     octaves: u8,
     seed: i64,
 ) -> S::Vf64 {
-    let mut result = S::abs_pd(simplex_2d::<S>(x, y, seed));
+    let mut result = simplex_2d::<S>(x, y, seed).abs();
 
-    let mut amp = S::set1_pd(1.0);
+    let mut amp = S::Vf64::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lac);
-        y = S::mul_pd(y, lac);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(
-            result,
-            S::abs_pd(S::mul_pd(simplex_2d::<S>(x, y, seed), amp)),
-        );
+        x = x * lac;
+        y = y * lac;
+        amp = amp * gain;
+        result = result + (simplex_2d::<S>(x, y, seed) * amp).abs();
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn turbulence_3d<S: Simd>(
+pub fn turbulence_3d<S: Simd>(
     mut x: S::Vf64,
     mut y: S::Vf64,
     mut z: S::Vf64,
@@ -58,25 +55,22 @@ pub unsafe fn turbulence_3d<S: Simd>(
     octaves: u8,
     seed: i64,
 ) -> S::Vf64 {
-    let mut result = S::abs_pd(simplex_3d::<S>(x, y, z, seed));
-    let mut amp = S::set1_pd(1.0);
+    let mut result = simplex_3d::<S>(x, y, z, seed).abs();
+    let mut amp = S::Vf64::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lac);
-        y = S::mul_pd(y, lac);
-        z = S::mul_pd(z, lac);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(
-            result,
-            S::abs_pd(S::mul_pd(simplex_3d::<S>(x, y, z, seed), amp)),
-        );
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        amp = amp * gain;
+        result = result + (simplex_3d::<S>(x, y, z, seed) * amp).abs();
     }
 
     result
 }
 
 #[inline(always)]
-pub unsafe fn turbulence_4d<S: Simd>(
+pub fn turbulence_4d<S: Simd>(
     mut x: S::Vf64,
     mut y: S::Vf64,
     mut z: S::Vf64,
@@ -86,19 +80,16 @@ pub unsafe fn turbulence_4d<S: Simd>(
     octaves: u8,
     seed: i64,
 ) -> S::Vf64 {
-    let mut result = S::abs_pd(simplex_4d::<S>(x, y, z, w, seed));
-    let mut amp = S::set1_pd(1.0);
+    let mut result = simplex_4d::<S>(x, y, z, w, seed).abs();
+    let mut amp = S::Vf64::set1(1.0);
 
     for _ in 1..octaves {
-        x = S::mul_pd(x, lac);
-        y = S::mul_pd(y, lac);
-        z = S::mul_pd(z, lac);
-        w = S::mul_pd(w, lac);
-        amp = S::mul_pd(amp, gain);
-        result = S::add_pd(
-            result,
-            S::abs_pd(S::mul_pd(simplex_4d::<S>(x, y, z, w, seed), amp)),
-        );
+        x = x * lac;
+        y = y * lac;
+        z = z * lac;
+        w = w * lac;
+        amp = amp * gain;
+        result = result + (simplex_4d::<S>(x, y, z, w, seed) * amp).abs();
     }
 
     result
diff --git a/src/noise_helpers_32.rs b/src/noise_helpers_32.rs
index b9af118..29bb17d 100644
--- a/src/noise_helpers_32.rs
+++ b/src/noise_helpers_32.rs
@@ -1,54 +1,59 @@
 use crate::dimensional_being::DimensionalBeing;
-use crate::NoiseType;
+use crate::{
+    Cellular2Settings, CellularSettings, FbmSettings, GradientSettings, NoiseType, RidgeSettings,
+    Settings, TurbulenceSettings,
+};
 
-use crate::noise::cell2_32::{cellular2_2d, cellular2_3d};
-use crate::noise::cell_32::{cellular_2d, cellular_3d};
-use crate::noise::fbm_32::{fbm_1d, fbm_2d, fbm_3d, fbm_4d};
-use crate::noise::ridge_32::{ridge_1d, ridge_2d, ridge_3d, ridge_4d};
-use crate::noise::simplex_32::{simplex_1d, simplex_2d, simplex_3d, simplex_4d};
-use crate::noise::turbulence_32::{turbulence_1d, turbulence_2d, turbulence_3d, turbulence_4d};
-
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 use std::f32;
 
-macro_rules! get_1d_noise_helper_f32 {
-    ($Setting:expr,$f:expr $(,$arg:expr)*) => {
- {
-    let dim = $Setting.get_dimensions();
-    let freq_x = S::set1_ps($Setting.freq_x);
+pub trait Sample32<S: Simd>: DimensionalBeing + Settings {
+    fn sample_1d(&self, x: S::Vf32) -> S::Vf32;
+    fn sample_2d(&self, x: S::Vf32, y: S::Vf32) -> S::Vf32;
+    fn sample_3d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32) -> S::Vf32;
+    fn sample_4d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32, w: S::Vf32) -> S::Vf32;
+}
+
+#[inline(always)]
+unsafe fn get_1d_noise_helper_f32<S: Simd, Settings: Sample32<S>>(
+    settings: Settings,
+) -> (Vec<f32>, f32, f32) {
+    let dim = settings.get_dimensions();
+    let freq_x = S::Vf32::set1(settings.get_freq_x());
     let start_x = dim.x;
     let width = dim.width;
-    let mut min_s = S::set1_ps(f32::MAX);
-    let mut max_s = S::set1_ps(f32::MIN);
+    let mut min_s = S::Vf32::set1(f32::MAX);
+    let mut max_s = S::Vf32::set1(f32::MIN);
 
     let mut min = f32::MAX;
     let mut max = f32::MIN;
 
-    let mut result: Vec<f32> = Vec::with_capacity(width);
-    result.set_len(width);
+    let mut result = Vec::<f32>::with_capacity(width);
+    let result_ptr = result.as_mut_ptr();
     let mut i = 0;
-    let vector_width = S::VF32_WIDTH;
+    let vector_width = S::Vf32::WIDTH;
     let remainder = width % vector_width;
-    let mut x_arr = Vec::with_capacity(vector_width);
-    x_arr.set_len(vector_width);
+    let mut x_arr = Vec::<f32>::with_capacity(vector_width);
+    let x_ptr = x_arr.as_mut_ptr();
     for i in (0..vector_width).rev() {
-        x_arr[i] = start_x + i as f32;
+        x_ptr.add(i).write(start_x + i as f32);
     }
-    let mut x = S::loadu_ps(&x_arr[0]);
+    x_arr.set_len(vector_width);
+    let mut x = S::Vf32::load_from_ptr_unaligned(x_ptr);
     for _ in 0..width / vector_width {
-        let f = $f(S::mul_ps(x, freq_x) $(,$arg)*);
-        max_s = S::max_ps(max_s, f);
-        min_s = S::min_ps(min_s, f);
-        S::storeu_ps(result.get_unchecked_mut(i), f);
+        let f = settings.sample_1d(x * freq_x);
+        max_s = max_s.max(f);
+        min_s = min_s.min(f);
+        f.copy_to_ptr_unaligned(result_ptr.add(i));
         i += vector_width;
-        x = S::add_ps(x, S::set1_ps(vector_width as f32));
+        x = x + S::Vf32::set1(vector_width as f32);
     }
     if remainder != 0 {
-        let f = $f(S::mul_ps(x, freq_x) $(,$arg)*);
+        let f = settings.sample_1d(x * freq_x);
         for j in 0..remainder {
             let n = f[j];
-            *result.get_unchecked_mut(i) = n;
+            result_ptr.add(i).write(n);
             // Note: This is unecessary for large images
             if n < min {
                 min = n;
@@ -59,6 +64,7 @@ macro_rules! get_1d_noise_helper_f32 {
             i += 1;
         }
     }
+    result.set_len(width);
     for i in 0..vector_width {
         if min_s[i] < min {
             min = min_s[i];
@@ -68,51 +74,52 @@ macro_rules! get_1d_noise_helper_f32 {
         }
     }
     (result, min, max)
- }
-    }
 }
 
-macro_rules! get_2d_noise_helper_f32 {
-    ($Setting:expr,$f:expr $(,$arg:expr)*)=> {{
-    let dim = $Setting.get_dimensions();
-    let freq_x = S::set1_ps($Setting.freq_x);
-    let freq_y = S::set1_ps($Setting.freq_y);
+#[inline(always)]
+unsafe fn get_2d_noise_helper_f32<S: Simd, Settings: Sample32<S>>(
+    settings: Settings,
+) -> (Vec<f32>, f32, f32) {
+    let dim = settings.get_dimensions();
+    let freq_x = S::Vf32::set1(settings.get_freq_x());
+    let freq_y = S::Vf32::set1(settings.get_freq_y());
     let start_x = dim.x;
     let width = dim.width;
     let start_y = dim.y;
     let height = dim.height;
 
-    let mut min_s = S::set1_ps(f32::MAX);
-    let mut max_s = S::set1_ps(f32::MIN);
+    let mut min_s = S::Vf32::set1(f32::MAX);
+    let mut max_s = S::Vf32::set1(f32::MIN);
     let mut min = f32::MAX;
     let mut max = f32::MIN;
 
-    let mut result = Vec::with_capacity(width * height);
-    result.set_len(width * height);
-    let mut y = S::set1_ps(start_y);
+    let mut result = Vec::<f32>::with_capacity(width * height);
+    let result_ptr = result.as_mut_ptr();
+    let mut y = S::Vf32::set1(start_y);
     let mut i = 0;
-    let vector_width = S::VF32_WIDTH;
+    let vector_width = S::Vf32::WIDTH;
     let remainder = width % vector_width;
-    let mut x_arr = Vec::with_capacity(vector_width);
-    x_arr.set_len(vector_width);
+    let mut x_arr = Vec::<f32>::with_capacity(vector_width);
+    let x_ptr = x_arr.as_mut_ptr();
     for i in (0..vector_width).rev() {
-        x_arr[i] = start_x + i as f32;
+        x_ptr.add(i).write(start_x + i as f32);
     }
+    x_arr.set_len(vector_width);
     for _ in 0..height {
-        let mut x = S::loadu_ps(&x_arr[0]);
+        let mut x = S::Vf32::load_from_ptr_unaligned(x_ptr);
         for _ in 0..width / vector_width {
-            let f = $f(S::mul_ps(x, freq_x), S::mul_ps(y, freq_y) $(,$arg)*);
-            max_s = S::max_ps(max_s, f);
-            min_s = S::min_ps(min_s, f);
-            S::storeu_ps(result.get_unchecked_mut(i), f);
+            let f = settings.sample_2d(x * freq_x, y * freq_y);
+            max_s = max_s.max(f);
+            min_s = min_s.min(f);
+            f.copy_to_ptr_unaligned(result_ptr.add(i));
             i += vector_width;
-            x = S::add_ps(x, S::set1_ps(vector_width as f32));
+            x = x + S::Vf32::set1(vector_width as f32);
         }
         if remainder != 0 {
-            let f = $f(S::mul_ps(x, freq_x), S::mul_ps(y, freq_y) $(,$arg)*);
+            let f = settings.sample_2d(x * freq_x, y * freq_y);
             for j in 0..remainder {
                 let n = f[j];
-                *result.get_unchecked_mut(i) = n;
+                result_ptr.add(i).write(n);
                 if n < min {
                     min = n;
                 }
@@ -122,8 +129,9 @@ macro_rules! get_2d_noise_helper_f32 {
                 i += 1;
             }
         }
-        y = S::add_ps(y, S::set1_ps(1.0));
+        y = y + S::Vf32::set1(1.0);
     }
+    result.set_len(width * height);
     for i in 0..vector_width {
         if min_s[i] < min {
             min = min_s[i];
@@ -133,16 +141,16 @@ macro_rules! get_2d_noise_helper_f32 {
         }
     }
     (result, min, max)
-
-}};
 }
 
-macro_rules! get_3d_noise_helper_f32 {
-    ($Setting:expr,$f:expr $(,$arg:expr)*) => {{
-    let dim = $Setting.get_dimensions();
-    let freq_x = S::set1_ps($Setting.freq_x);
-    let freq_y = S::set1_ps($Setting.freq_y);
-    let freq_z = S::set1_ps($Setting.freq_z);
+#[inline(always)]
+unsafe fn get_3d_noise_helper_f32<S: Simd, Settings: Sample32<S>>(
+    settings: Settings,
+) -> (Vec<f32>, f32, f32) {
+    let dim = settings.get_dimensions();
+    let freq_x = S::Vf32::set1(settings.get_freq_x());
+    let freq_y = S::Vf32::set1(settings.get_freq_y());
+    let freq_z = S::Vf32::set1(settings.get_freq_z());
     let start_x = dim.x;
     let width = dim.width;
     let start_y = dim.y;
@@ -150,40 +158,41 @@ macro_rules! get_3d_noise_helper_f32 {
     let start_z = dim.z;
     let depth = dim.depth;
 
-    let mut min_s = S::set1_ps(f32::MAX);
-    let mut max_s = S::set1_ps(f32::MIN);
+    let mut min_s = S::Vf32::set1(f32::MAX);
+    let mut max_s = S::Vf32::set1(f32::MIN);
     let mut min = f32::MAX;
     let mut max = f32::MIN;
 
-    let mut result = Vec::with_capacity(width * height * depth);
-    result.set_len(width * height * depth);
+    let mut result = Vec::<f32>::with_capacity(width * height * depth);
+    let result_ptr = result.as_mut_ptr();
     let mut i = 0;
-    let vector_width = S::VF32_WIDTH;
+    let vector_width = S::Vf32::WIDTH;
     let remainder = width % vector_width;
-    let mut x_arr = Vec::with_capacity(vector_width);
-    x_arr.set_len(vector_width);
+    let mut x_arr = Vec::<f32>::with_capacity(vector_width);
+    let x_ptr = x_arr.as_mut_ptr();
     for i in (0..vector_width).rev() {
-        x_arr[i] = start_x + i as f32;
+        x_ptr.add(i).write(start_x + i as f32);
     }
+    x_arr.set_len(vector_width);
 
-    let mut z = S::set1_ps(start_z);
+    let mut z = S::Vf32::set1(start_z);
     for _ in 0..depth {
-        let mut y = S::set1_ps(start_y);
+        let mut y = S::Vf32::set1(start_y);
         for _ in 0..height {
-            let mut x = S::loadu_ps(&x_arr[0]);
+            let mut x = S::Vf32::load_from_ptr_unaligned(&x_arr[0]);
             for _ in 0..width / vector_width {
-                let f = $f(S::mul_ps(x, freq_x), S::mul_ps(y, freq_y), S::mul_ps(z, freq_z) $(,$arg)*);
-                max_s = S::max_ps(max_s, f);
-                min_s = S::min_ps(min_s, f);
-                S::storeu_ps(result.get_unchecked_mut(i), f);
+                let f = settings.sample_3d(x * freq_x, y * freq_y, z * freq_z);
+                max_s = max_s.max(f);
+                min_s = min_s.min(f);
+                f.copy_to_ptr_unaligned(result_ptr.add(i));
                 i += vector_width;
-                x = S::add_ps(x, S::set1_ps(vector_width as f32));
+                x = x + S::Vf32::set1(vector_width as f32);
             }
             if remainder != 0 {
-            let f = $f(S::mul_ps(x, freq_x), S::mul_ps(y, freq_y), S::mul_ps(z, freq_z) $(,$arg)*);
+                let f = settings.sample_3d(x * freq_x, y * freq_y, z * freq_z);
                 for j in 0..remainder {
                     let n = f[j];
-                    *result.get_unchecked_mut(i) = n;
+                    result_ptr.add(i).write(n);
                     if n < min {
                         min = n;
                     }
@@ -193,10 +202,11 @@ macro_rules! get_3d_noise_helper_f32 {
                     i += 1;
                 }
             }
-            y = S::add_ps(y, S::set1_ps(1.0));
+            y = y + S::Vf32::set1(1.0);
         }
-        z = S::add_ps(z, S::set1_ps(1.0));
+        z = z + S::Vf32::set1(1.0);
     }
+    result.set_len(width * height * depth);
     for i in 0..vector_width {
         if min_s[i] < min {
             min = min_s[i];
@@ -206,16 +216,17 @@ macro_rules! get_3d_noise_helper_f32 {
         }
     }
     (result, min, max)
-}};
 }
 
-macro_rules! get_4d_noise_helper_f32 {
-    ($Setting:expr,$f:expr $(,$arg:expr)*) => {{
-    let dim = $Setting.get_dimensions();
-    let freq_x = S::set1_ps($Setting.freq_x);
-    let freq_y = S::set1_ps($Setting.freq_y);
-    let freq_z = S::set1_ps($Setting.freq_z);
-    let freq_w = S::set1_ps($Setting.freq_w);
+#[inline(always)]
+unsafe fn get_4d_noise_helper_f32<S: Simd, Settings: Sample32<S>>(
+    settings: Settings,
+) -> (Vec<f32>, f32, f32) {
+    let dim = settings.get_dimensions();
+    let freq_x = S::Vf32::set1(settings.get_freq_x());
+    let freq_y = S::Vf32::set1(settings.get_freq_y());
+    let freq_z = S::Vf32::set1(settings.get_freq_z());
+    let freq_w = S::Vf32::set1(settings.get_freq_w());
     let start_x = dim.x;
     let width = dim.width;
     let start_y = dim.y;
@@ -225,41 +236,42 @@ macro_rules! get_4d_noise_helper_f32 {
     let start_w = dim.w;
     let time = dim.time;
 
-    let mut min_s = S::set1_ps(f32::MAX);
-    let mut max_s = S::set1_ps(f32::MIN);
+    let mut min_s = S::Vf32::set1(f32::MAX);
+    let mut max_s = S::Vf32::set1(f32::MIN);
     let mut min = f32::MAX;
     let mut max = f32::MIN;
 
-    let mut result = Vec::with_capacity(width * height * depth * time);
-    result.set_len(width * height * depth * time);
+    let mut result = Vec::<f32>::with_capacity(width * height * depth * time);
+    let result_ptr = result.as_mut_ptr();
     let mut i = 0;
-    let vector_width = S::VF32_WIDTH;
+    let vector_width = S::Vf32::WIDTH;
     let remainder = width % vector_width;
-    let mut x_arr = Vec::with_capacity(vector_width);
-    x_arr.set_len(vector_width);
+    let mut x_arr = Vec::<f32>::with_capacity(vector_width);
+    let x_ptr = x_arr.as_mut_ptr();
     for i in (0..vector_width).rev() {
-        x_arr[i] = start_x + i as f32;
+        x_ptr.add(i).write(start_x + i as f32);
     }
-    let mut w = S::set1_ps(start_w);
+    x_arr.set_len(vector_width);
+    let mut w = S::Vf32::set1(start_w);
     for _ in 0..time {
-        let mut z = S::set1_ps(start_z);
+        let mut z = S::Vf32::set1(start_z);
         for _ in 0..depth {
-            let mut y = S::set1_ps(start_y);
+            let mut y = S::Vf32::set1(start_y);
             for _ in 0..height {
-                let mut x = S::loadu_ps(&x_arr[0]);
+                let mut x = S::Vf32::load_from_ptr_unaligned(&x_arr[0]);
                 for _ in 0..width / vector_width {
-                    let f = $f(S::mul_ps(x, freq_x), S::mul_ps(y, freq_y), S::mul_ps(z, freq_z), S::mul_ps(w, freq_w) $(,$arg)*);
-                    max_s = S::max_ps(max_s, f);
-                    min_s = S::min_ps(min_s, f);
-                    S::storeu_ps(result.get_unchecked_mut(i), f);
+                    let f = settings.sample_4d(x * freq_x, y * freq_y, z * freq_z, w * freq_w);
+                    max_s = max_s.max(f);
+                    min_s = min_s.min(f);
+                    f.copy_to_ptr_unaligned(result_ptr.add(i));
                     i += vector_width;
-                    x = S::add_ps(x, S::set1_ps(vector_width as f32));
+                    x = x + S::Vf32::set1(vector_width as f32);
                 }
                 if remainder != 0 {
-                    let f = $f(S::mul_ps(x, freq_x), S::mul_ps(y, freq_y), S::mul_ps(z, freq_z), S::mul_ps(w, freq_w) $(,$arg)*);
+                    let f = settings.sample_4d(x * freq_x, y * freq_y, z * freq_z, w * freq_w);
                     for j in 0..remainder {
                         let n = f[j];
-                        *result.get_unchecked_mut(i) = n;
+                        result_ptr.add(i).write(n);
                         // Note: This is unecessary for large images
                         if n < min {
                             min = n;
@@ -270,12 +282,13 @@ macro_rules! get_4d_noise_helper_f32 {
                         i += 1;
                     }
                 }
-                y = S::add_ps(y, S::set1_ps(1.0));
+                y = y + S::Vf32::set1(1.0);
             }
-            z = S::add_ps(z, S::set1_ps(1.0));
+            z = z + S::Vf32::set1(1.0);
         }
-        w = S::add_ps(w, S::set1_ps(1.0));
+        w = w + S::Vf32::set1(1.0);
     }
+    result.set_len(width * height * depth * time);
     for i in 0..vector_width {
         if min_s[i] < min {
             min = min_s[i];
@@ -285,40 +298,16 @@ macro_rules! get_4d_noise_helper_f32 {
         }
     }
     (result, min, max)
-}};
 }
 
 #[inline(always)]
 #[allow(dead_code)]
 pub unsafe fn get_1d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
     match noise_type {
-        NoiseType::Fbm(s) => get_1d_noise_helper_f32!(
-            s,
-            fbm_1d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Ridge(s) => get_1d_noise_helper_f32!(
-            s,
-            ridge_1d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Turbulence(s) => get_1d_noise_helper_f32!(
-            s,
-            turbulence_1d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Gradient(s) => {
-            get_1d_noise_helper_f32!(s, simplex_1d::<S>, s.get_dimensions().seed)
-        }
+        NoiseType::Fbm(s) => get_1d_noise_helper_f32::<S, FbmSettings>(*s),
+        NoiseType::Ridge(s) => get_1d_noise_helper_f32::<S, RidgeSettings>(*s),
+        NoiseType::Turbulence(s) => get_1d_noise_helper_f32::<S, TurbulenceSettings>(*s),
+        NoiseType::Gradient(s) => get_1d_noise_helper_f32::<S, GradientSettings>(*s),
         NoiseType::Cellular(_) => {
             panic!("not implemented");
         }
@@ -337,51 +326,12 @@ pub unsafe fn get_1d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f32>, f32, f
 #[allow(dead_code)]
 pub unsafe fn get_2d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
     match noise_type {
-        NoiseType::Fbm(s) => get_2d_noise_helper_f32!(
-            s,
-            fbm_2d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Ridge(s) => get_2d_noise_helper_f32!(
-            s,
-            ridge_2d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Turbulence(s) => get_2d_noise_helper_f32!(
-            s,
-            turbulence_2d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Gradient(s) => {
-            get_2d_noise_helper_f32!(s, simplex_2d::<S>, s.get_dimensions().seed)
-        }
-        NoiseType::Cellular(s) => get_2d_noise_helper_f32!(
-            s,
-            cellular_2d::<S>,
-            s.distance_function,
-            s.return_type,
-            S::set1_ps(s.jitter),
-            s.get_dimensions().seed
-        ),
-        NoiseType::Cellular2(s) => get_2d_noise_helper_f32!(
-            s,
-            cellular2_2d::<S>,
-            s.distance_function,
-            s.return_type,
-            S::set1_ps(s.jitter),
-            s.index0,
-            s.index1,
-            s.get_dimensions().seed
-        ),
+        NoiseType::Fbm(s) => get_2d_noise_helper_f32::<S, FbmSettings>(*s),
+        NoiseType::Ridge(s) => get_2d_noise_helper_f32::<S, RidgeSettings>(*s),
+        NoiseType::Turbulence(s) => get_2d_noise_helper_f32::<S, TurbulenceSettings>(*s),
+        NoiseType::Gradient(s) => get_2d_noise_helper_f32::<S, GradientSettings>(*s),
+        NoiseType::Cellular(s) => get_2d_noise_helper_f32::<S, CellularSettings>(*s),
+        NoiseType::Cellular2(s) => get_2d_noise_helper_f32::<S, Cellular2Settings>(*s),
     }
 }
 
@@ -394,51 +344,12 @@ pub unsafe fn get_2d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f32>, f32, f
 #[allow(dead_code)]
 pub unsafe fn get_3d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
     match noise_type {
-        NoiseType::Fbm(s) => get_3d_noise_helper_f32!(
-            s,
-            fbm_3d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Ridge(s) => get_3d_noise_helper_f32!(
-            s,
-            ridge_3d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Turbulence(s) => get_3d_noise_helper_f32!(
-            s,
-            turbulence_3d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Gradient(s) => {
-            get_3d_noise_helper_f32!(s, simplex_3d::<S>, s.get_dimensions().seed)
-        }
-        NoiseType::Cellular(s) => get_3d_noise_helper_f32!(
-            s,
-            cellular_3d::<S>,
-            s.distance_function,
-            s.return_type,
-            S::set1_ps(s.jitter),
-            s.get_dimensions().seed
-        ),
-        NoiseType::Cellular2(s) => get_3d_noise_helper_f32!(
-            s,
-            cellular2_3d::<S>,
-            s.distance_function,
-            s.return_type,
-            S::set1_ps(s.jitter),
-            s.index0,
-            s.index1,
-            s.get_dimensions().seed
-        ),
+        NoiseType::Fbm(s) => get_3d_noise_helper_f32::<S, FbmSettings>(*s),
+        NoiseType::Ridge(s) => get_3d_noise_helper_f32::<S, RidgeSettings>(*s),
+        NoiseType::Turbulence(s) => get_3d_noise_helper_f32::<S, TurbulenceSettings>(*s),
+        NoiseType::Gradient(s) => get_3d_noise_helper_f32::<S, GradientSettings>(*s),
+        NoiseType::Cellular(s) => get_3d_noise_helper_f32::<S, CellularSettings>(*s),
+        NoiseType::Cellular2(s) => get_3d_noise_helper_f32::<S, Cellular2Settings>(*s),
     }
 }
 
@@ -446,33 +357,10 @@ pub unsafe fn get_3d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f32>, f32, f
 #[allow(dead_code)]
 pub unsafe fn get_4d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f32>, f32, f32) {
     match noise_type {
-        NoiseType::Fbm(s) => get_4d_noise_helper_f32!(
-            s,
-            fbm_4d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Ridge(s) => get_4d_noise_helper_f32!(
-            s,
-            ridge_4d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Turbulence(s) => get_4d_noise_helper_f32!(
-            s,
-            turbulence_4d::<S>,
-            S::set1_ps(s.lacunarity),
-            S::set1_ps(s.gain),
-            s.octaves,
-            s.get_dimensions().seed
-        ),
-        NoiseType::Gradient(s) => {
-            get_4d_noise_helper_f32!(s, simplex_4d::<S>, s.get_dimensions().seed)
-        }
+        NoiseType::Fbm(s) => get_4d_noise_helper_f32::<S, FbmSettings>(*s),
+        NoiseType::Ridge(s) => get_4d_noise_helper_f32::<S, RidgeSettings>(*s),
+        NoiseType::Turbulence(s) => get_4d_noise_helper_f32::<S, TurbulenceSettings>(*s),
+        NoiseType::Gradient(s) => get_4d_noise_helper_f32::<S, GradientSettings>(*s),
         NoiseType::Cellular(_) => {
             panic!("not implemented");
         }
diff --git a/src/noise_helpers_64.rs b/src/noise_helpers_64.rs
index fef97a5..d48c8d4 100644
--- a/src/noise_helpers_64.rs
+++ b/src/noise_helpers_64.rs
@@ -1,54 +1,61 @@
-use simdeez::Simd;
+use simdeez::prelude::*;
 
 use super::NoiseType;
 use crate::dimensional_being::DimensionalBeing;
 
-use crate::noise::cell2_64::{cellular2_2d, cellular2_3d};
-use crate::noise::cell_64::{cellular_2d, cellular_3d};
-use crate::noise::fbm_64::{fbm_1d, fbm_2d, fbm_3d, fbm_4d};
-use crate::noise::ridge_64::{ridge_1d, ridge_2d, ridge_3d, ridge_4d};
-use crate::noise::simplex_64::{simplex_1d, simplex_2d, simplex_3d, simplex_4d};
-use crate::noise::turbulence_64::{turbulence_1d, turbulence_2d, turbulence_3d, turbulence_4d};
+use crate::{
+    Cellular2Settings, CellularSettings, FbmSettings, GradientSettings, RidgeSettings, Settings,
+    TurbulenceSettings,
+};
 
 use std::f64;
 
-macro_rules! get_1d_noise_helper_f64  {
-    ($Setting:expr,$f:expr $(,$arg:expr)*) => {
- {
-    let dim = $Setting.get_dimensions();
-    let freq_x = S::set1_pd($Setting.freq_x as f64);
+pub trait Sample64<S: Simd>: DimensionalBeing + Settings {
+    fn sample_1d(&self, x: S::Vf64) -> S::Vf64;
+    fn sample_2d(&self, x: S::Vf64, y: S::Vf64) -> S::Vf64;
+    fn sample_3d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64) -> S::Vf64;
+    fn sample_4d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64, w: S::Vf64) -> S::Vf64;
+}
+
+#[inline(always)]
+unsafe fn get_1d_noise_helper_f64<S: Simd, Settings: Sample64<S>>(
+    settings: Settings,
+) -> (Vec<f64>, f64, f64) {
+    let dim = settings.get_dimensions();
+    let freq_x = S::Vf64::set1(settings.get_freq_x() as f64);
     let start_x = dim.x as f64;
     let width = dim.width;
-    let mut min_s = S::set1_pd(f64::MAX);
-    let mut max_s = S::set1_pd(f64::MIN);
+    let mut min_s = S::Vf64::set1(f64::MAX);
+    let mut max_s = S::Vf64::set1(f64::MIN);
 
     let mut min = f64::MAX;
     let mut max = f64::MIN;
 
-    let mut result: Vec<f64> = Vec::with_capacity(width);
-    result.set_len(width);
+    let mut result = Vec::<f64>::with_capacity(width);
+    let result_ptr = result.as_mut_ptr();
     let mut i = 0;
-    let vector_width = S::VF64_WIDTH;
+    let vector_width = S::Vf64::WIDTH;
     let remainder = width % vector_width;
-    let mut x_arr = Vec::with_capacity(vector_width);
-    x_arr.set_len(vector_width);
+    let mut x_arr = Vec::<f64>::with_capacity(vector_width);
+    let x_ptr = x_arr.as_mut_ptr();
     for i in (0..vector_width).rev() {
-        x_arr[i] = start_x + i as f64;
+        x_ptr.add(i).write(start_x + i as f64);
     }
-    let mut x = S::loadu_pd(&x_arr[0]);
+    x_arr.set_len(vector_width);
+    let mut x = S::Vf64::load_from_ptr_unaligned(&x_arr[0]);
     for _ in 0..width / vector_width {
-        let f = $f(S::mul_pd(x, freq_x) $(,$arg)*);
-        max_s = S::max_pd(max_s, f);
-        min_s = S::min_pd(min_s, f);
-        S::storeu_pd(result.get_unchecked_mut(i), f);
+        let f = settings.sample_1d(x * freq_x);
+        max_s = max_s.max(f);
+        min_s = min_s.min(f);
+        f.copy_to_ptr_unaligned(result_ptr.add(i));
         i += vector_width;
-        x = S::add_pd(x, S::set1_pd(vector_width as f64));
+        x = x + S::Vf64::set1(vector_width as f64);
     }
     if remainder != 0 {
-        let f = $f(S::mul_pd(x, freq_x) $(,$arg)*);
+        let f = settings.sample_1d(x * freq_x);
         for j in 0..remainder {
             let n = f[j];
-            *result.get_unchecked_mut(i) = n;
+            result_ptr.add(i).write(n);
             // Note: This is unecessary for large images
             if n < min {
                 min = n;
@@ -59,6 +66,7 @@ macro_rules! get_1d_noise_helper_f64  {
             i += 1;
         }
     }
+    result.set_len(width);
     for i in 0..vector_width {
         if min_s[i] < min {
             min = min_s[i];
@@ -68,51 +76,52 @@ macro_rules! get_1d_noise_helper_f64  {
         }
     }
     (result, min, max)
- }
-    }
 }
 
-macro_rules! get_2d_noise_helper_f64 {
-    ($Setting:expr,$f:expr $(,$arg:expr)*)=> {{
-    let dim = $Setting.get_dimensions();
-    let freq_x = S::set1_pd($Setting.freq_x as f64);
-    let freq_y = S::set1_pd($Setting.freq_y as f64);
+#[inline(always)]
+unsafe fn get_2d_noise_helper_f64<S: Simd, Settings: Sample64<S>>(
+    settings: Settings,
+) -> (Vec<f64>, f64, f64) {
+    let dim = settings.get_dimensions();
+    let freq_x = S::Vf64::set1(settings.get_freq_x() as f64);
+    let freq_y = S::Vf64::set1(settings.get_freq_y() as f64);
     let start_x = dim.x as f64;
     let width = dim.width;
     let start_y = dim.y as f64;
     let height = dim.height;
 
-    let mut min_s = S::set1_pd(f64::MAX);
-    let mut max_s = S::set1_pd(f64::MIN);
+    let mut min_s = S::Vf64::set1(f64::MAX);
+    let mut max_s = S::Vf64::set1(f64::MIN);
     let mut min = f64::MAX;
     let mut max = f64::MIN;
 
-    let mut result = Vec::with_capacity(width * height);
-    result.set_len(width * height);
-    let mut y = S::set1_pd(start_y);
+    let mut result = Vec::<f64>::with_capacity(width * height);
+    let result_ptr = result.as_mut_ptr();
+    let mut y = S::Vf64::set1(start_y);
     let mut i = 0;
-    let vector_width = S::VF64_WIDTH;
+    let vector_width = S::Vf64::WIDTH;
     let remainder = width % vector_width;
-    let mut x_arr = Vec::with_capacity(vector_width);
-    x_arr.set_len(vector_width);
+    let mut x_arr = Vec::<f64>::with_capacity(vector_width);
+    let x_ptr = x_arr.as_mut_ptr();
     for i in (0..vector_width).rev() {
-        x_arr[i] = start_x + i as f64;
+        x_ptr.add(i).write(start_x + i as f64);
     }
+    x_arr.set_len(vector_width);
     for _ in 0..height {
-        let mut x = S::loadu_pd(&x_arr[0]);
+        let mut x = S::Vf64::load_from_ptr_unaligned(&x_arr[0]);
         for _ in 0..width / vector_width {
-            let f = $f(S::mul_pd(x, freq_x), S::mul_pd(y, freq_y) $(,$arg)*);
-            max_s = S::max_pd(max_s, f);
-            min_s = S::min_pd(min_s, f);
-            S::storeu_pd(result.get_unchecked_mut(i), f);
+            let f = settings.sample_2d(x * freq_x, y * freq_y);
+            max_s = max_s.max(f);
+            min_s = min_s.min(f);
+            f.copy_to_ptr_unaligned(result_ptr.add(i));
             i += vector_width;
-            x = S::add_pd(x, S::set1_pd(vector_width as f64));
+            x = x + S::Vf64::set1(vector_width as f64);
         }
         if remainder != 0 {
-            let f = $f(S::mul_pd(x, freq_x), S::mul_pd(y, freq_y) $(,$arg)*);
+            let f = settings.sample_2d(x * freq_x, y * freq_y);
             for j in 0..remainder {
                 let n = f[j];
-                *result.get_unchecked_mut(i) = n;
+                result_ptr.add(i).write(n);
                 if n < min {
                     min = n;
                 }
@@ -122,8 +131,9 @@ macro_rules! get_2d_noise_helper_f64 {
                 i += 1;
             }
         }
-        y = S::add_pd(y, S::set1_pd(1.0));
+        y = y + S::Vf64::set1(1.0);
     }
+    result.set_len(width * height);
     for i in 0..vector_width {
         if min_s[i] < min {
             min = min_s[i];
@@ -133,16 +143,16 @@ macro_rules! get_2d_noise_helper_f64 {
         }
     }
     (result, min, max)
-
-}};
 }
 
-macro_rules! get_3d_noise_helper_f64 {
-    ($Setting:expr,$f:expr $(,$arg:expr)*) => {{
-    let dim = $Setting.get_dimensions();
-    let freq_x = S::set1_pd($Setting.freq_x as f64);
-    let freq_y = S::set1_pd($Setting.freq_y as f64);
-    let freq_z = S::set1_pd($Setting.freq_z as f64);
+#[inline(always)]
+unsafe fn get_3d_noise_helper_f64<S: Simd, Settings: Sample64<S>>(
+    settings: Settings,
+) -> (Vec<f64>, f64, f64) {
+    let dim = settings.get_dimensions();
+    let freq_x = S::Vf64::set1(settings.get_freq_x() as f64);
+    let freq_y = S::Vf64::set1(settings.get_freq_y() as f64);
+    let freq_z = S::Vf64::set1(settings.get_freq_z() as f64);
     let start_x = dim.x as f64;
     let width = dim.width;
     let start_y = dim.y as f64;
@@ -150,40 +160,41 @@ macro_rules! get_3d_noise_helper_f64 {
     let start_z = dim.z as f64;
     let depth = dim.depth;
 
-    let mut min_s = S::set1_pd(f64::MAX);
-    let mut max_s = S::set1_pd(f64::MIN);
+    let mut min_s = S::Vf64::set1(f64::MAX);
+    let mut max_s = S::Vf64::set1(f64::MIN);
     let mut min = f64::MAX;
     let mut max = f64::MIN;
 
-    let mut result = Vec::with_capacity(width * height * depth);
-    result.set_len(width * height * depth);
+    let mut result = Vec::<f64>::with_capacity(width * height * depth);
+    let result_ptr = result.as_mut_ptr();
     let mut i = 0;
-    let vector_width = S::VF64_WIDTH;
+    let vector_width = S::Vf64::WIDTH;
     let remainder = width % vector_width;
-    let mut x_arr = Vec::with_capacity(vector_width);
-    x_arr.set_len(vector_width);
+    let mut x_arr = Vec::<f64>::with_capacity(vector_width);
+    let x_ptr = x_arr.as_mut_ptr();
     for i in (0..vector_width).rev() {
-        x_arr[i] = start_x + i as f64;
+        x_ptr.add(i).write(start_x + i as f64);
     }
+    x_arr.set_len(vector_width);
 
-    let mut z = S::set1_pd(start_z);
+    let mut z = S::Vf64::set1(start_z);
     for _ in 0..depth {
-        let mut y = S::set1_pd(start_y);
+        let mut y = S::Vf64::set1(start_y);
         for _ in 0..height {
-            let mut x = S::loadu_pd(&x_arr[0]);
+            let mut x = S::Vf64::load_from_ptr_unaligned(&x_arr[0]);
             for _ in 0..width / vector_width {
-                let f = $f(S::mul_pd(x, freq_x), S::mul_pd(y, freq_y), S::mul_pd(z, freq_z) $(,$arg)*);
-                max_s = S::max_pd(max_s, f);
-                min_s = S::min_pd(min_s, f);
-                S::storeu_pd(result.get_unchecked_mut(i), f);
+                let f = settings.sample_3d(x * freq_x, y * freq_y, z * freq_z);
+                max_s = max_s.max(f);
+                min_s = min_s.min(f);
+                f.copy_to_ptr_unaligned(result_ptr.add(i));
                 i += vector_width;
-                x = S::add_pd(x, S::set1_pd(vector_width as f64));
+                x = x + S::Vf64::set1(vector_width as f64);
             }
             if remainder != 0 {
-            let f = $f(S::mul_pd(x, freq_x), S::mul_pd(y, freq_y), S::mul_pd(z, freq_z) $(,$arg)*);
+                let f = settings.sample_3d(x * freq_x, y * freq_y, z * freq_z);
                 for j in 0..remainder {
                     let n = f[j];
-                    *result.get_unchecked_mut(i) = n;
+                    result_ptr.add(i).write(n);
                     if n < min {
                         min = n;
                     }
@@ -193,10 +204,11 @@ macro_rules! get_3d_noise_helper_f64 {
                     i += 1;
                 }
             }
-            y = S::add_pd(y, S::set1_pd(1.0));
+            y = y + S::Vf64::set1(1.0);
         }
-        z = S::add_pd(z, S::set1_pd(1.0));
+        z = z + S::Vf64::set1(1.0);
     }
+    result.set_len(width * height * depth);
     for i in 0..vector_width {
         if min_s[i] < min {
             min = min_s[i];
@@ -206,16 +218,17 @@ macro_rules! get_3d_noise_helper_f64 {
         }
     }
     (result, min, max)
-}};
 }
 
-macro_rules! get_4d_noise_helper_f64 {
-    ($Setting:expr,$f:expr $(,$arg:expr)*) => {{
-    let dim = $Setting.get_dimensions();
-    let freq_x = S::set1_pd($Setting.freq_x as f64);
-    let freq_y = S::set1_pd($Setting.freq_y as f64);
-    let freq_z = S::set1_pd($Setting.freq_z as f64);
-    let freq_w = S::set1_pd($Setting.freq_w as f64);
+#[inline(always)]
+unsafe fn get_4d_noise_helper_f64<S: Simd, Settings: Sample64<S>>(
+    settings: Settings,
+) -> (Vec<f64>, f64, f64) {
+    let dim = settings.get_dimensions();
+    let freq_x = S::Vf64::set1(settings.get_freq_x() as f64);
+    let freq_y = S::Vf64::set1(settings.get_freq_y() as f64);
+    let freq_z = S::Vf64::set1(settings.get_freq_z() as f64);
+    let freq_w = S::Vf64::set1(settings.get_freq_w() as f64);
     let start_x = dim.x as f64;
     let width = dim.width;
     let start_y = dim.y as f64;
@@ -225,41 +238,42 @@ macro_rules! get_4d_noise_helper_f64 {
     let start_w = dim.w as f64;
     let time = dim.time;
 
-    let mut min_s = S::set1_pd(f64::MAX);
-    let mut max_s = S::set1_pd(f64::MIN);
+    let mut min_s = S::Vf64::set1(f64::MAX);
+    let mut max_s = S::Vf64::set1(f64::MIN);
     let mut min = f64::MAX;
     let mut max = f64::MIN;
 
-    let mut result = Vec::with_capacity(width * height * depth * time);
-    result.set_len(width * height * depth * time);
+    let mut result = Vec::<f64>::with_capacity(width * height * depth * time);
+    let result_ptr = result.as_mut_ptr();
     let mut i = 0;
-    let vector_width = S::VF64_WIDTH;
+    let vector_width = S::Vf64::WIDTH;
     let remainder = width % vector_width;
-    let mut x_arr = Vec::with_capacity(vector_width);
-    x_arr.set_len(vector_width);
+    let mut x_arr = Vec::<f64>::with_capacity(vector_width);
+    let x_ptr = x_arr.as_mut_ptr();
     for i in (0..vector_width).rev() {
-        x_arr[i] = start_x + i as f64;
+        x_ptr.add(i).write(start_x + i as f64);
     }
-    let mut w = S::set1_pd(start_w);
+    x_arr.set_len(vector_width);
+    let mut w = S::Vf64::set1(start_w);
     for _ in 0..time {
-        let mut z = S::set1_pd(start_z);
+        let mut z = S::Vf64::set1(start_z);
         for _ in 0..depth {
-            let mut y = S::set1_pd(start_y);
+            let mut y = S::Vf64::set1(start_y);
             for _ in 0..height {
-                let mut x = S::loadu_pd(&x_arr[0]);
+                let mut x = S::Vf64::load_from_ptr_unaligned(&x_arr[0]);
                 for _ in 0..width / vector_width {
-                    let f = $f(S::mul_pd(x, freq_x), S::mul_pd(y, freq_y), S::mul_pd(z, freq_z), S::mul_pd(w, freq_w) $(,$arg)*);
-                    max_s = S::max_pd(max_s, f);
-                    min_s = S::min_pd(min_s, f);
-                    S::storeu_pd(result.get_unchecked_mut(i), f);
+                    let f = settings.sample_4d(x * freq_x, y * freq_y, z * freq_z, w * freq_w);
+                    max_s = max_s.max(f);
+                    min_s = min_s.min(f);
+                    f.copy_to_ptr_unaligned(result_ptr.add(i));
                     i += vector_width;
-                    x = S::add_pd(x, S::set1_pd(vector_width as f64));
+                    x = x + S::Vf64::set1(vector_width as f64);
                 }
                 if remainder != 0 {
-                    let f = $f(S::mul_pd(x, freq_x), S::mul_pd(y, freq_y), S::mul_pd(z, freq_z), S::mul_pd(w, freq_w) $(,$arg)*);
+                    let f = settings.sample_4d(x * freq_x, y * freq_y, z * freq_z, w * freq_w);
                     for j in 0..remainder {
                         let n = f[j];
-                        *result.get_unchecked_mut(i) = n;
+                        result_ptr.add(i).write(n);
                         // Note: This is unecessary for large images
                         if n < min {
                             min = n;
@@ -270,12 +284,13 @@ macro_rules! get_4d_noise_helper_f64 {
                         i += 1;
                     }
                 }
-                y = S::add_pd(y, S::set1_pd(1.0));
+                y = y + S::Vf64::set1(1.0);
             }
-            z = S::add_pd(z, S::set1_pd(1.0));
+            z = z + S::Vf64::set1(1.0);
         }
-        w = S::add_pd(w, S::set1_pd(1.0));
+        w = w + S::Vf64::set1(1.0);
     }
+    result.set_len(width * height * depth * time);
     for i in 0..vector_width {
         if min_s[i] < min {
             min = min_s[i];
@@ -285,40 +300,16 @@ macro_rules! get_4d_noise_helper_f64 {
         }
     }
     (result, min, max)
-}};
 }
 
 #[inline(always)]
 #[allow(dead_code)]
-pub unsafe fn get_1d_noise_f64<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
+pub unsafe fn get_1d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
     match noise_type {
-        NoiseType::Fbm(s) => get_1d_noise_helper_f64!(
-            s,
-            fbm_1d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Ridge(s) => get_1d_noise_helper_f64!(
-            s,
-            ridge_1d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Turbulence(s) => get_1d_noise_helper_f64!(
-            s,
-            turbulence_1d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Gradient(s) => {
-            get_1d_noise_helper_f64!(s, simplex_1d::<S>, s.get_dimensions().seed as i64)
-        }
+        NoiseType::Fbm(s) => get_1d_noise_helper_f64::<S, FbmSettings>(*s),
+        NoiseType::Ridge(s) => get_1d_noise_helper_f64::<S, RidgeSettings>(*s),
+        NoiseType::Turbulence(s) => get_1d_noise_helper_f64::<S, TurbulenceSettings>(*s),
+        NoiseType::Gradient(s) => get_1d_noise_helper_f64::<S, GradientSettings>(*s),
         NoiseType::Cellular(_) => {
             panic!("not implemented");
         }
@@ -335,53 +326,14 @@ pub unsafe fn get_1d_noise_f64<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f6
 /// in a single pass.
 #[inline(always)]
 #[allow(dead_code)]
-pub unsafe fn get_2d_noise_f64<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
+pub unsafe fn get_2d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
     match noise_type {
-        NoiseType::Fbm(s) => get_2d_noise_helper_f64!(
-            s,
-            fbm_2d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Ridge(s) => get_2d_noise_helper_f64!(
-            s,
-            ridge_2d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Turbulence(s) => get_2d_noise_helper_f64!(
-            s,
-            turbulence_2d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Gradient(s) => {
-            get_2d_noise_helper_f64!(s, simplex_2d::<S>, s.get_dimensions().seed as i64)
-        }
-        NoiseType::Cellular(s) => get_2d_noise_helper_f64!(
-            s,
-            cellular_2d::<S>,
-            s.distance_function,
-            s.return_type,
-            S::set1_pd(s.jitter as f64),
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Cellular2(s) => get_2d_noise_helper_f64!(
-            s,
-            cellular2_2d::<S>,
-            s.distance_function,
-            s.return_type,
-            S::set1_pd(s.jitter as f64),
-            s.index0,
-            s.index1,
-            s.get_dimensions().seed as i64
-        ),
+        NoiseType::Fbm(s) => get_2d_noise_helper_f64::<S, FbmSettings>(*s),
+        NoiseType::Ridge(s) => get_2d_noise_helper_f64::<S, RidgeSettings>(*s),
+        NoiseType::Turbulence(s) => get_2d_noise_helper_f64::<S, TurbulenceSettings>(*s),
+        NoiseType::Gradient(s) => get_2d_noise_helper_f64::<S, GradientSettings>(*s),
+        NoiseType::Cellular(s) => get_2d_noise_helper_f64::<S, CellularSettings>(*s),
+        NoiseType::Cellular2(s) => get_2d_noise_helper_f64::<S, Cellular2Settings>(*s),
     }
 }
 
@@ -392,87 +344,25 @@ pub unsafe fn get_2d_noise_f64<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f6
 /// in a single pass.
 #[inline(always)]
 #[allow(dead_code)]
-pub unsafe fn get_3d_noise_f64<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
+pub unsafe fn get_3d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
     match noise_type {
-        NoiseType::Fbm(s) => get_3d_noise_helper_f64!(
-            s,
-            fbm_3d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Ridge(s) => get_3d_noise_helper_f64!(
-            s,
-            ridge_3d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Turbulence(s) => get_3d_noise_helper_f64!(
-            s,
-            turbulence_3d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Gradient(s) => {
-            get_3d_noise_helper_f64!(s, simplex_3d::<S>, s.get_dimensions().seed as i64)
-        }
-        NoiseType::Cellular(s) => get_3d_noise_helper_f64!(
-            s,
-            cellular_3d::<S>,
-            s.distance_function,
-            s.return_type,
-            S::set1_pd(s.jitter as f64),
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Cellular2(s) => get_3d_noise_helper_f64!(
-            s,
-            cellular2_3d::<S>,
-            s.distance_function,
-            s.return_type,
-            S::set1_pd(s.jitter as f64),
-            s.index0,
-            s.index1,
-            s.get_dimensions().seed as i64
-        ),
+        NoiseType::Fbm(s) => get_3d_noise_helper_f64::<S, FbmSettings>(*s),
+        NoiseType::Ridge(s) => get_3d_noise_helper_f64::<S, RidgeSettings>(*s),
+        NoiseType::Turbulence(s) => get_3d_noise_helper_f64::<S, TurbulenceSettings>(*s),
+        NoiseType::Gradient(s) => get_3d_noise_helper_f64::<S, GradientSettings>(*s),
+        NoiseType::Cellular(s) => get_3d_noise_helper_f64::<S, CellularSettings>(*s),
+        NoiseType::Cellular2(s) => get_3d_noise_helper_f64::<S, Cellular2Settings>(*s),
     }
 }
 
 #[inline(always)]
 #[allow(dead_code)]
-pub unsafe fn get_4d_noise_f64<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
+pub unsafe fn get_4d_noise<S: Simd>(noise_type: &NoiseType) -> (Vec<f64>, f64, f64) {
     match noise_type {
-        NoiseType::Fbm(s) => get_4d_noise_helper_f64!(
-            s,
-            fbm_4d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Ridge(s) => get_4d_noise_helper_f64!(
-            s,
-            ridge_4d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Turbulence(s) => get_4d_noise_helper_f64!(
-            s,
-            turbulence_4d::<S>,
-            S::set1_pd(s.lacunarity as f64),
-            S::set1_pd(s.gain as f64),
-            s.octaves,
-            s.get_dimensions().seed as i64
-        ),
-        NoiseType::Gradient(s) => {
-            get_4d_noise_helper_f64!(s, simplex_4d::<S>, s.get_dimensions().seed as i64)
-        }
+        NoiseType::Fbm(s) => get_4d_noise_helper_f64::<S, FbmSettings>(*s),
+        NoiseType::Ridge(s) => get_4d_noise_helper_f64::<S, RidgeSettings>(*s),
+        NoiseType::Turbulence(s) => get_4d_noise_helper_f64::<S, TurbulenceSettings>(*s),
+        NoiseType::Gradient(s) => get_4d_noise_helper_f64::<S, GradientSettings>(*s),
         NoiseType::Cellular(_) => {
             panic!("not implemented");
         }
diff --git a/src/settings/cellular2_settings.rs b/src/settings/cellular2_settings.rs
index 651b5b4..674cfcb 100644
--- a/src/settings/cellular2_settings.rs
+++ b/src/settings/cellular2_settings.rs
@@ -1,10 +1,14 @@
+use simdeez::prelude::*;
+
 use crate::dimensional_being::DimensionalBeing;
-use crate::intrinsics::{avx2, scalar, sse2, sse41};
+use crate::{get_2d_noise, get_2d_scaled_noise, get_3d_noise, get_3d_scaled_noise};
+use crate::noise::cell2_32::{cellular2_2d, cellular2_3d};
+use crate::noise::cell2_64::{cellular2_2d as cellular2_2d_f64, cellular2_3d as cellular2_3d_f64};
 pub use crate::noise::cell2_return_type::Cell2ReturnType;
 pub use crate::noise::cell_distance_function::CellDistanceFunction;
-pub use crate::noise::cell_return_type::CellReturnType;
-pub use crate::noise_builder::NoiseBuilder;
 pub use crate::noise_dimensions::NoiseDimensions;
+use crate::noise_helpers_32::Sample32;
+use crate::noise_helpers_64::Sample64;
 pub use crate::noise_type::NoiseType;
 
 use super::Settings;
@@ -76,6 +80,22 @@ impl Settings for Cellular2Settings {
         unimplemented!()
     }
 
+    fn get_freq_x(&self) -> f32 {
+        self.freq_x
+    }
+
+    fn get_freq_y(&self) -> f32 {
+        self.freq_y
+    }
+
+    fn get_freq_z(&self) -> f32 {
+        self.freq_z
+    }
+
+    fn get_freq_w(&self) -> f32 {
+        unimplemented!()
+    }
+
     fn wrap(self) -> NoiseType {
         self.validate();
         NoiseType::Cellular2(self)
@@ -84,8 +104,8 @@ impl Settings for Cellular2Settings {
     fn generate(self) -> (Vec<f32>, f32, f32) {
         let d = self.dim.dim;
         match d {
-            2 => get_2d_noise!(&NoiseType::Cellular2(self)),
-            3 => get_3d_noise!(&NoiseType::Cellular2(self)),
+            2 => get_2d_noise(&NoiseType::Cellular2(self)),
+            3 => get_3d_noise(&NoiseType::Cellular2(self)),
             _ => panic!("not implemented"),
         }
     }
@@ -103,13 +123,99 @@ impl Settings for Cellular2Settings {
         new_self.dim.min = min;
         new_self.dim.max = max;
         match d {
-            2 => get_2d_scaled_noise!(&NoiseType::Cellular2(new_self)),
-            3 => get_3d_scaled_noise!(&NoiseType::Cellular2(new_self)),
+            2 => get_2d_scaled_noise(&NoiseType::Cellular2(new_self)),
+            3 => get_3d_scaled_noise(&NoiseType::Cellular2(new_self)),
             _ => panic!("not implemented"),
         }
     }
 }
 
+impl<S: Simd> Sample32<S> for Cellular2Settings {
+    #[inline(always)]
+    #[allow(unused_variables)]
+    fn sample_1d(&self, x: S::Vf32) -> S::Vf32 {
+        unimplemented!()
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf32, y: S::Vf32) -> S::Vf32 {
+        cellular2_2d::<S>(
+            x,
+            y,
+            self.distance_function,
+            self.return_type,
+            S::Vf32::set1(self.jitter),
+            self.index0,
+            self.index1,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32) -> S::Vf32 {
+        cellular2_3d::<S>(
+            x,
+            y,
+            z,
+            self.distance_function,
+            self.return_type,
+            S::Vf32::set1(self.jitter),
+            self.index0,
+            self.index1,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    #[allow(unused_variables)]
+    fn sample_4d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32, w: S::Vf32) -> S::Vf32 {
+        unimplemented!()
+    }
+}
+
+impl<S: Simd> Sample64<S> for Cellular2Settings {
+    #[inline(always)]
+    #[allow(unused_variables)]
+    fn sample_1d(&self, x: S::Vf64) -> S::Vf64 {
+        unimplemented!()
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf64, y: S::Vf64) -> S::Vf64 {
+        cellular2_2d_f64::<S>(
+            x,
+            y,
+            self.distance_function,
+            self.return_type,
+            S::Vf64::set1(self.jitter.into()),
+            self.index0,
+            self.index1,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64) -> S::Vf64 {
+        cellular2_3d_f64::<S>(
+            x,
+            y,
+            z,
+            self.distance_function,
+            self.return_type,
+            S::Vf64::set1(self.jitter.into()),
+            self.index0,
+            self.index1,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    #[allow(unused_variables)]
+    fn sample_4d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64, w: S::Vf64) -> S::Vf64 {
+        unimplemented!()
+    }
+}
+
 impl Cellular2Settings {
     pub fn with_distance_function(&mut self, dist: CellDistanceFunction) -> &mut Cellular2Settings {
         self.distance_function = dist;
diff --git a/src/settings/cellular_settings.rs b/src/settings/cellular_settings.rs
index 8f4049a..72f2c00 100644
--- a/src/settings/cellular_settings.rs
+++ b/src/settings/cellular_settings.rs
@@ -1,10 +1,14 @@
+use simdeez::prelude::*;
+
 use crate::dimensional_being::DimensionalBeing;
-use crate::intrinsics::{avx2, scalar, sse2, sse41};
-pub use crate::noise::cell2_return_type::Cell2ReturnType;
+use crate::{get_2d_noise, get_2d_scaled_noise, get_3d_noise, get_3d_scaled_noise};
+use crate::noise::cell_32::{cellular_2d, cellular_3d};
+use crate::noise::cell_64::{cellular_2d as cellular_2d_f64, cellular_3d as cellular_3d_f64};
 pub use crate::noise::cell_distance_function::CellDistanceFunction;
 pub use crate::noise::cell_return_type::CellReturnType;
-pub use crate::noise_builder::NoiseBuilder;
 pub use crate::noise_dimensions::NoiseDimensions;
+use crate::noise_helpers_32::Sample32;
+use crate::noise_helpers_64::Sample64;
 pub use crate::noise_type::NoiseType;
 
 use super::Settings;
@@ -74,6 +78,22 @@ impl Settings for CellularSettings {
         unimplemented!()
     }
 
+    fn get_freq_x(&self) -> f32 {
+        self.freq_x
+    }
+
+    fn get_freq_y(&self) -> f32 {
+        self.freq_y
+    }
+
+    fn get_freq_z(&self) -> f32 {
+        self.freq_z
+    }
+
+    fn get_freq_w(&self) -> f32 {
+        unimplemented!()
+    }
+
     fn wrap(self) -> NoiseType {
         self.validate();
         NoiseType::Cellular(self)
@@ -82,8 +102,8 @@ impl Settings for CellularSettings {
     fn generate(self) -> (Vec<f32>, f32, f32) {
         let d = self.dim.dim;
         match d {
-            2 => get_2d_noise!(&NoiseType::Cellular(self)),
-            3 => get_3d_noise!(&NoiseType::Cellular(self)),
+            2 => get_2d_noise(&NoiseType::Cellular(self)),
+            3 => get_3d_noise(&NoiseType::Cellular(self)),
             _ => panic!("not implemented"),
         }
     }
@@ -98,13 +118,91 @@ impl Settings for CellularSettings {
         new_self.dim.min = min;
         new_self.dim.max = max;
         match d {
-            2 => get_2d_scaled_noise!(&NoiseType::Cellular(new_self)),
-            3 => get_3d_scaled_noise!(&NoiseType::Cellular(new_self)),
+            2 => get_2d_scaled_noise(&NoiseType::Cellular(new_self)),
+            3 => get_3d_scaled_noise(&NoiseType::Cellular(new_self)),
             _ => panic!("not implemented"),
         }
     }
 }
 
+impl<S: Simd> Sample32<S> for CellularSettings {
+    #[inline(always)]
+    #[allow(unused_variables)]
+    fn sample_1d(&self, x: S::Vf32) -> S::Vf32 {
+        unimplemented!()
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf32, y: S::Vf32) -> S::Vf32 {
+        cellular_2d::<S>(
+            x,
+            y,
+            self.distance_function,
+            self.return_type,
+            S::Vf32::set1(self.jitter),
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32) -> S::Vf32 {
+        cellular_3d::<S>(
+            x,
+            y,
+            z,
+            self.distance_function,
+            self.return_type,
+            S::Vf32::set1(self.jitter),
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    #[allow(unused_variables)]
+    fn sample_4d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32, w: S::Vf32) -> S::Vf32 {
+        unimplemented!()
+    }
+}
+
+impl<S: Simd> Sample64<S> for CellularSettings {
+    #[inline(always)]
+    #[allow(unused_variables)]
+    fn sample_1d(&self, x: S::Vf64) -> S::Vf64 {
+        unimplemented!()
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf64, y: S::Vf64) -> S::Vf64 {
+        cellular_2d_f64::<S>(
+            x,
+            y,
+            self.distance_function,
+            self.return_type,
+            S::Vf64::set1(self.jitter.into()),
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64) -> S::Vf64 {
+        cellular_3d_f64::<S>(
+            x,
+            y,
+            z,
+            self.distance_function,
+            self.return_type,
+            S::Vf64::set1(self.jitter.into()),
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    #[allow(unused_variables)]
+    fn sample_4d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64, w: S::Vf64) -> S::Vf64 {
+        unimplemented!()
+    }
+}
+
 impl CellularSettings {
     pub fn with_distance_function(&mut self, dist: CellDistanceFunction) -> &mut CellularSettings {
         self.distance_function = dist;
diff --git a/src/settings/fbm_settings.rs b/src/settings/fbm_settings.rs
index 35eb850..8e4626e 100644
--- a/src/settings/fbm_settings.rs
+++ b/src/settings/fbm_settings.rs
@@ -1,10 +1,14 @@
+use simdeez::prelude::*;
+
 use crate::dimensional_being::DimensionalBeing;
-use crate::intrinsics::{avx2, scalar, sse2, sse41};
-pub use crate::noise::cell2_return_type::Cell2ReturnType;
-pub use crate::noise::cell_distance_function::CellDistanceFunction;
-pub use crate::noise::cell_return_type::CellReturnType;
-pub use crate::noise_builder::NoiseBuilder;
+use crate::{get_1d_noise, get_1d_scaled_noise, get_2d_noise, get_2d_scaled_noise, get_3d_noise, get_3d_scaled_noise, get_4d_noise, get_4d_scaled_noise};
+use crate::noise::fbm_32::{fbm_1d, fbm_2d, fbm_3d, fbm_4d};
+use crate::noise::fbm_64::{
+    fbm_1d as fbm_1d_f64, fbm_2d as fbm_2d_f64, fbm_3d as fbm_3d_f64, fbm_4d as fbm_4d_f64,
+};
 pub use crate::noise_dimensions::NoiseDimensions;
+use crate::noise_helpers_32::Sample32;
+use crate::noise_helpers_64::Sample64;
 pub use crate::noise_type::NoiseType;
 
 use super::{Settings, SimplexSettings};
@@ -80,6 +84,22 @@ impl Settings for FbmSettings {
         self
     }
 
+    fn get_freq_x(&self) -> f32 {
+        self.freq_x
+    }
+
+    fn get_freq_y(&self) -> f32 {
+        self.freq_y
+    }
+
+    fn get_freq_z(&self) -> f32 {
+        self.freq_z
+    }
+
+    fn get_freq_w(&self) -> f32 {
+        self.freq_w
+    }
+
     fn wrap(self) -> NoiseType {
         self.validate();
         NoiseType::Fbm(self)
@@ -92,10 +112,10 @@ impl Settings for FbmSettings {
     fn generate(self) -> (Vec<f32>, f32, f32) {
         let d = self.dim.dim;
         match d {
-            1 => get_1d_noise!(&NoiseType::Fbm(self)),
-            2 => get_2d_noise!(&NoiseType::Fbm(self)),
-            3 => get_3d_noise!(&NoiseType::Fbm(self)),
-            4 => get_4d_noise!(&NoiseType::Fbm(self)),
+            1 => get_1d_noise(&NoiseType::Fbm(self)),
+            2 => get_2d_noise(&NoiseType::Fbm(self)),
+            3 => get_3d_noise(&NoiseType::Fbm(self)),
+            4 => get_4d_noise(&NoiseType::Fbm(self)),
             _ => panic!("not implemented"),
         }
     }
@@ -106,10 +126,10 @@ impl Settings for FbmSettings {
         new_self.dim.min = min;
         new_self.dim.max = max;
         match d {
-            1 => get_1d_scaled_noise!(&NoiseType::Fbm(new_self)),
-            2 => get_2d_scaled_noise!(&NoiseType::Fbm(new_self)),
-            3 => get_3d_scaled_noise!(&NoiseType::Fbm(new_self)),
-            4 => get_4d_scaled_noise!(&NoiseType::Fbm(new_self)),
+            1 => get_1d_scaled_noise(&NoiseType::Fbm(new_self)),
+            2 => get_2d_scaled_noise(&NoiseType::Fbm(new_self)),
+            3 => get_3d_scaled_noise(&NoiseType::Fbm(new_self)),
+            4 => get_4d_scaled_noise(&NoiseType::Fbm(new_self)),
             _ => panic!("not implemented"),
         }
     }
@@ -132,4 +152,108 @@ impl SimplexSettings for FbmSettings {
     }
 }
 
+impl<S: Simd> Sample32<S> for FbmSettings {
+    #[inline(always)]
+    fn sample_1d(&self, x: S::Vf32) -> S::Vf32 {
+        fbm_1d::<S>(
+            x,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf32, y: S::Vf32) -> S::Vf32 {
+        fbm_2d::<S>(
+            x,
+            y,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32) -> S::Vf32 {
+        fbm_3d::<S>(
+            x,
+            y,
+            z,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_4d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32, w: S::Vf32) -> S::Vf32 {
+        fbm_4d::<S>(
+            x,
+            y,
+            z,
+            w,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+}
+
+impl<S: Simd> Sample64<S> for FbmSettings {
+    #[inline(always)]
+    fn sample_1d(&self, x: S::Vf64) -> S::Vf64 {
+        fbm_1d_f64::<S>(
+            x,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf64, y: S::Vf64) -> S::Vf64 {
+        fbm_2d_f64::<S>(
+            x,
+            y,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64) -> S::Vf64 {
+        fbm_3d_f64::<S>(
+            x,
+            y,
+            z,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_4d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64, w: S::Vf64) -> S::Vf64 {
+        fbm_4d_f64::<S>(
+            x,
+            y,
+            z,
+            w,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+}
+
 impl FbmSettings {}
diff --git a/src/settings/gradient_settings.rs b/src/settings/gradient_settings.rs
index ce98d9a..19e1b6d 100644
--- a/src/settings/gradient_settings.rs
+++ b/src/settings/gradient_settings.rs
@@ -1,10 +1,15 @@
+use simdeez::prelude::*;
+
 use crate::dimensional_being::DimensionalBeing;
-use crate::intrinsics::{avx2, scalar, sse2, sse41};
-pub use crate::noise::cell2_return_type::Cell2ReturnType;
-pub use crate::noise::cell_distance_function::CellDistanceFunction;
-pub use crate::noise::cell_return_type::CellReturnType;
-pub use crate::noise_builder::NoiseBuilder;
+use crate::{get_1d_noise, get_1d_scaled_noise, get_2d_noise, get_2d_scaled_noise, get_3d_noise, get_3d_scaled_noise, get_4d_noise, get_4d_scaled_noise};
+use crate::noise::simplex_32::{simplex_1d, simplex_2d, simplex_3d, simplex_4d};
+use crate::noise::simplex_64::{
+    simplex_1d as simplex_1d_f64, simplex_2d as simplex_2d_f64, simplex_3d as simplex_3d_f64,
+    simplex_4d as simplex_4d_f64,
+};
 pub use crate::noise_dimensions::NoiseDimensions;
+use crate::noise_helpers_32::Sample32;
+use crate::noise_helpers_64::Sample64;
 pub use crate::noise_type::NoiseType;
 
 use crate::settings::Settings;
@@ -74,6 +79,22 @@ impl Settings for GradientSettings {
         self
     }
 
+    fn get_freq_x(&self) -> f32 {
+        self.freq_x
+    }
+
+    fn get_freq_y(&self) -> f32 {
+        self.freq_y
+    }
+
+    fn get_freq_z(&self) -> f32 {
+        self.freq_z
+    }
+
+    fn get_freq_w(&self) -> f32 {
+        self.freq_w
+    }
+
     fn wrap(self) -> NoiseType {
         self.validate();
         NoiseType::Gradient(self)
@@ -86,10 +107,10 @@ impl Settings for GradientSettings {
     fn generate(self) -> (Vec<f32>, f32, f32) {
         let d = self.dim.dim;
         match d {
-            1 => get_1d_noise!(&NoiseType::Gradient(self)),
-            2 => get_2d_noise!(&NoiseType::Gradient(self)),
-            3 => get_3d_noise!(&NoiseType::Gradient(self)),
-            4 => get_4d_noise!(&NoiseType::Gradient(self)),
+            1 => get_1d_noise(&NoiseType::Gradient(self)),
+            2 => get_2d_noise(&NoiseType::Gradient(self)),
+            3 => get_3d_noise(&NoiseType::Gradient(self)),
+            4 => get_4d_noise(&NoiseType::Gradient(self)),
             _ => panic!("not implemented"),
         }
     }
@@ -100,13 +121,57 @@ impl Settings for GradientSettings {
         new_self.dim.min = min;
         new_self.dim.max = max;
         match d {
-            1 => get_1d_scaled_noise!(&NoiseType::Gradient(new_self)),
-            2 => get_2d_scaled_noise!(&NoiseType::Gradient(new_self)),
-            3 => get_3d_scaled_noise!(&NoiseType::Gradient(new_self)),
-            4 => get_4d_scaled_noise!(&NoiseType::Gradient(new_self)),
+            1 => get_1d_scaled_noise(&NoiseType::Gradient(new_self)),
+            2 => get_2d_scaled_noise(&NoiseType::Gradient(new_self)),
+            3 => get_3d_scaled_noise(&NoiseType::Gradient(new_self)),
+            4 => get_4d_scaled_noise(&NoiseType::Gradient(new_self)),
             _ => panic!("not implemented"),
         }
     }
 }
 
+impl<S: Simd> Sample32<S> for GradientSettings {
+    #[inline(always)]
+    fn sample_1d(&self, x: S::Vf32) -> S::Vf32 {
+        simplex_1d::<S>(x, self.dim.seed)
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf32, y: S::Vf32) -> S::Vf32 {
+        simplex_2d::<S>(x, y, self.dim.seed)
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32) -> S::Vf32 {
+        simplex_3d::<S>(x, y, z, self.dim.seed)
+    }
+
+    #[inline(always)]
+    fn sample_4d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32, w: S::Vf32) -> S::Vf32 {
+        simplex_4d::<S>(x, y, z, w, self.dim.seed)
+    }
+}
+
+impl<S: Simd> Sample64<S> for GradientSettings {
+    #[inline(always)]
+    fn sample_1d(&self, x: S::Vf64) -> S::Vf64 {
+        simplex_1d_f64::<S>(x, self.dim.seed.into())
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf64, y: S::Vf64) -> S::Vf64 {
+        simplex_2d_f64::<S>(x, y, self.dim.seed.into())
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64) -> S::Vf64 {
+        simplex_3d_f64::<S>(x, y, z, self.dim.seed.into())
+    }
+
+    #[inline(always)]
+    fn sample_4d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64, w: S::Vf64) -> S::Vf64 {
+        simplex_4d_f64::<S>(x, y, z, w, self.dim.seed.into())
+    }
+}
+
 impl GradientSettings {}
diff --git a/src/settings/mod.rs b/src/settings/mod.rs
index 31eb987..e27288c 100644
--- a/src/settings/mod.rs
+++ b/src/settings/mod.rs
@@ -1,7 +1,3 @@
-pub use crate::noise::cell2_return_type::Cell2ReturnType;
-pub use crate::noise::cell_distance_function::CellDistanceFunction;
-pub use crate::noise::cell_return_type::CellReturnType;
-pub use crate::noise_builder::NoiseBuilder;
 pub use crate::noise_dimensions::NoiseDimensions;
 pub use crate::noise_type::NoiseType;
 
@@ -13,6 +9,11 @@ pub trait Settings {
     fn with_freq_3d(&mut self, freq_x: f32, freq_y: f32, freq_z: f32) -> &mut Self;
     fn with_freq_4d(&mut self, freq_x: f32, freq_y: f32, freq_z: f32, freq_w: f32) -> &mut Self;
 
+    fn get_freq_x(&self) -> f32;
+    fn get_freq_y(&self) -> f32;
+    fn get_freq_z(&self) -> f32;
+    fn get_freq_w(&self) -> f32;
+
     /// If you want to call noise functions by hand, call wrap on the settings
     /// to get back a NoiseType to call the noise functions with
     fn wrap(self) -> NoiseType;
diff --git a/src/settings/ridge_settings.rs b/src/settings/ridge_settings.rs
index 2fabd24..57641e8 100644
--- a/src/settings/ridge_settings.rs
+++ b/src/settings/ridge_settings.rs
@@ -1,10 +1,15 @@
+use simdeez::prelude::*;
+
 use crate::dimensional_being::DimensionalBeing;
-use crate::intrinsics::{avx2, scalar, sse2, sse41};
-pub use crate::noise::cell2_return_type::Cell2ReturnType;
-pub use crate::noise::cell_distance_function::CellDistanceFunction;
-pub use crate::noise::cell_return_type::CellReturnType;
-pub use crate::noise_builder::NoiseBuilder;
+use crate::{get_1d_noise, get_1d_scaled_noise, get_2d_noise, get_2d_scaled_noise, get_3d_noise, get_3d_scaled_noise, get_4d_noise, get_4d_scaled_noise};
+use crate::noise::ridge_32::{ridge_1d, ridge_2d, ridge_3d, ridge_4d};
+use crate::noise::ridge_64::{
+    ridge_1d as ridge_1d_f64, ridge_2d as ridge_2d_f64, ridge_3d as ridge_3d_f64,
+    ridge_4d as ridge_4d_f64,
+};
 pub use crate::noise_dimensions::NoiseDimensions;
+use crate::noise_helpers_32::Sample32;
+use crate::noise_helpers_64::Sample64;
 pub use crate::noise_type::NoiseType;
 
 use super::{Settings, SimplexSettings};
@@ -81,6 +86,22 @@ impl Settings for RidgeSettings {
         self
     }
 
+    fn get_freq_x(&self) -> f32 {
+        self.freq_x
+    }
+
+    fn get_freq_y(&self) -> f32 {
+        self.freq_y
+    }
+
+    fn get_freq_z(&self) -> f32 {
+        self.freq_z
+    }
+
+    fn get_freq_w(&self) -> f32 {
+        self.freq_w
+    }
+
     fn wrap(self) -> NoiseType {
         self.validate();
         NoiseType::Ridge(self)
@@ -93,10 +114,10 @@ impl Settings for RidgeSettings {
     fn generate(self) -> (Vec<f32>, f32, f32) {
         let d = self.dim.dim;
         match d {
-            1 => get_1d_noise!(&NoiseType::Ridge(self)),
-            2 => get_2d_noise!(&NoiseType::Ridge(self)),
-            3 => get_3d_noise!(&NoiseType::Ridge(self)),
-            4 => get_4d_noise!(&NoiseType::Ridge(self)),
+            1 => get_1d_noise(&NoiseType::Ridge(self)),
+            2 => get_2d_noise(&NoiseType::Ridge(self)),
+            3 => get_3d_noise(&NoiseType::Ridge(self)),
+            4 => get_4d_noise(&NoiseType::Ridge(self)),
             _ => panic!("not implemented"),
         }
     }
@@ -107,10 +128,10 @@ impl Settings for RidgeSettings {
         new_self.dim.min = min;
         new_self.dim.max = max;
         match d {
-            1 => get_1d_scaled_noise!(&NoiseType::Ridge(new_self)),
-            2 => get_2d_scaled_noise!(&NoiseType::Ridge(new_self)),
-            3 => get_3d_scaled_noise!(&NoiseType::Ridge(new_self)),
-            4 => get_4d_scaled_noise!(&NoiseType::Ridge(new_self)),
+            1 => get_1d_scaled_noise(&NoiseType::Ridge(new_self)),
+            2 => get_2d_scaled_noise(&NoiseType::Ridge(new_self)),
+            3 => get_3d_scaled_noise(&NoiseType::Ridge(new_self)),
+            4 => get_4d_scaled_noise(&NoiseType::Ridge(new_self)),
             _ => panic!("not implemented"),
         }
     }
@@ -133,4 +154,108 @@ impl SimplexSettings for RidgeSettings {
     }
 }
 
+impl<S: Simd> Sample32<S> for RidgeSettings {
+    #[inline(always)]
+    fn sample_1d(&self, x: S::Vf32) -> S::Vf32 {
+        ridge_1d::<S>(
+            x,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf32, y: S::Vf32) -> S::Vf32 {
+        ridge_2d::<S>(
+            x,
+            y,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32) -> S::Vf32 {
+        ridge_3d::<S>(
+            x,
+            y,
+            z,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_4d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32, w: S::Vf32) -> S::Vf32 {
+        ridge_4d::<S>(
+            x,
+            y,
+            z,
+            w,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+}
+
+impl<S: Simd> Sample64<S> for RidgeSettings {
+    #[inline(always)]
+    fn sample_1d(&self, x: S::Vf64) -> S::Vf64 {
+        ridge_1d_f64::<S>(
+            x,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf64, y: S::Vf64) -> S::Vf64 {
+        ridge_2d_f64::<S>(
+            x,
+            y,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64) -> S::Vf64 {
+        ridge_3d_f64::<S>(
+            x,
+            y,
+            z,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_4d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64, w: S::Vf64) -> S::Vf64 {
+        ridge_4d_f64::<S>(
+            x,
+            y,
+            z,
+            w,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+}
+
 impl RidgeSettings {}
diff --git a/src/settings/turbulence_settings.rs b/src/settings/turbulence_settings.rs
index 6e5f232..c04eb89 100644
--- a/src/settings/turbulence_settings.rs
+++ b/src/settings/turbulence_settings.rs
@@ -1,10 +1,15 @@
+use simdeez::prelude::*;
+
 use crate::dimensional_being::DimensionalBeing;
-use crate::intrinsics::{avx2, scalar, sse2, sse41};
-pub use crate::noise::cell2_return_type::Cell2ReturnType;
-pub use crate::noise::cell_distance_function::CellDistanceFunction;
-pub use crate::noise::cell_return_type::CellReturnType;
-pub use crate::noise_builder::NoiseBuilder;
+use crate::{get_1d_noise, get_1d_scaled_noise, get_2d_noise, get_2d_scaled_noise, get_3d_noise, get_3d_scaled_noise, get_4d_noise, get_4d_scaled_noise};
+use crate::noise::turbulence_32::{turbulence_1d, turbulence_2d, turbulence_3d, turbulence_4d};
+use crate::noise::turbulence_64::{
+    turbulence_1d as turbulence_1d_f64, turbulence_2d as turbulence_2d_f64,
+    turbulence_3d as turbulence_3d_f64, turbulence_4d as turbulence_4d_f64,
+};
 pub use crate::noise_dimensions::NoiseDimensions;
+use crate::noise_helpers_32::Sample32;
+use crate::noise_helpers_64::Sample64;
 pub use crate::noise_type::NoiseType;
 
 use super::{Settings, SimplexSettings};
@@ -81,6 +86,22 @@ impl Settings for TurbulenceSettings {
         self
     }
 
+    fn get_freq_x(&self) -> f32 {
+        self.freq_x
+    }
+
+    fn get_freq_y(&self) -> f32 {
+        self.freq_y
+    }
+
+    fn get_freq_z(&self) -> f32 {
+        self.freq_z
+    }
+
+    fn get_freq_w(&self) -> f32 {
+        self.freq_w
+    }
+
     fn wrap(self) -> NoiseType {
         self.validate();
         NoiseType::Turbulence(self)
@@ -93,10 +114,10 @@ impl Settings for TurbulenceSettings {
     fn generate(self) -> (Vec<f32>, f32, f32) {
         let d = self.dim.dim;
         match d {
-            1 => get_1d_noise!(&NoiseType::Turbulence(self)),
-            2 => get_2d_noise!(&NoiseType::Turbulence(self)),
-            3 => get_3d_noise!(&NoiseType::Turbulence(self)),
-            4 => get_4d_noise!(&NoiseType::Turbulence(self)),
+            1 => get_1d_noise(&NoiseType::Turbulence(self)),
+            2 => get_2d_noise(&NoiseType::Turbulence(self)),
+            3 => get_3d_noise(&NoiseType::Turbulence(self)),
+            4 => get_4d_noise(&NoiseType::Turbulence(self)),
             _ => panic!("not implemented"),
         }
     }
@@ -107,10 +128,10 @@ impl Settings for TurbulenceSettings {
         new_self.dim.min = min;
         new_self.dim.max = max;
         match d {
-            1 => get_1d_scaled_noise!(&NoiseType::Turbulence(new_self)),
-            2 => get_2d_scaled_noise!(&NoiseType::Turbulence(new_self)),
-            3 => get_3d_scaled_noise!(&NoiseType::Turbulence(new_self)),
-            4 => get_4d_scaled_noise!(&NoiseType::Turbulence(new_self)),
+            1 => get_1d_scaled_noise(&NoiseType::Turbulence(new_self)),
+            2 => get_2d_scaled_noise(&NoiseType::Turbulence(new_self)),
+            3 => get_3d_scaled_noise(&NoiseType::Turbulence(new_self)),
+            4 => get_4d_scaled_noise(&NoiseType::Turbulence(new_self)),
             _ => panic!("not implemented"),
         }
     }
@@ -133,4 +154,108 @@ impl SimplexSettings for TurbulenceSettings {
     }
 }
 
+impl<S: Simd> Sample32<S> for TurbulenceSettings {
+    #[inline(always)]
+    fn sample_1d(&self, x: S::Vf32) -> S::Vf32 {
+        turbulence_1d::<S>(
+            x,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf32, y: S::Vf32) -> S::Vf32 {
+        turbulence_2d::<S>(
+            x,
+            y,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32) -> S::Vf32 {
+        turbulence_3d::<S>(
+            x,
+            y,
+            z,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+
+    #[inline(always)]
+    fn sample_4d(&self, x: S::Vf32, y: S::Vf32, z: S::Vf32, w: S::Vf32) -> S::Vf32 {
+        turbulence_4d::<S>(
+            x,
+            y,
+            z,
+            w,
+            S::Vf32::set1(self.lacunarity),
+            S::Vf32::set1(self.gain),
+            self.octaves,
+            self.dim.seed,
+        )
+    }
+}
+
+impl<S: Simd> Sample64<S> for TurbulenceSettings {
+    #[inline(always)]
+    fn sample_1d(&self, x: S::Vf64) -> S::Vf64 {
+        turbulence_1d_f64::<S>(
+            x,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_2d(&self, x: S::Vf64, y: S::Vf64) -> S::Vf64 {
+        turbulence_2d_f64::<S>(
+            x,
+            y,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_3d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64) -> S::Vf64 {
+        turbulence_3d_f64::<S>(
+            x,
+            y,
+            z,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+
+    #[inline(always)]
+    fn sample_4d(&self, x: S::Vf64, y: S::Vf64, z: S::Vf64, w: S::Vf64) -> S::Vf64 {
+        turbulence_4d_f64::<S>(
+            x,
+            y,
+            z,
+            w,
+            S::Vf64::set1(self.lacunarity.into()),
+            S::Vf64::set1(self.gain.into()),
+            self.octaves,
+            self.dim.seed.into(),
+        )
+    }
+}
+
 impl TurbulenceSettings {}
diff --git a/src/shared.rs b/src/shared.rs
index e57d2d1..b4d7847 100644
--- a/src/shared.rs
+++ b/src/shared.rs
@@ -1,4 +1,6 @@
-use simdeez::Simd;
+use simdeez::prelude::*;
+
+use crate::{dimensional_being::DimensionalBeing, NoiseType};
 
 #[inline(always)]
 pub unsafe fn scale_noise<S: Simd>(
@@ -12,15 +14,13 @@ pub unsafe fn scale_noise<S: Simd>(
     let range = max - min;
     let multiplier = scale_range / range;
     let offset = scale_min - min * multiplier;
-    let vector_width = S::VF32_WIDTH;
+    let vector_width = S::Vf32::WIDTH;
     let mut i = 0;
     if data.len() >= vector_width {
         while i <= data.len() - vector_width {
-            let value = S::add_ps(
-                S::mul_ps(S::set1_ps(multiplier), S::loadu_ps(&data[i])),
-                S::set1_ps(offset),
-            );
-            S::storeu_ps(data.get_unchecked_mut(i), value);
+            let value = (S::Vf32::set1(multiplier) * S::Vf32::load_from_ptr_unaligned(&data[i]))
+                + S::Vf32::set1(offset);
+            value.copy_to_ptr_unaligned(data.get_unchecked_mut(i));
             i += vector_width;
         }
     }
@@ -30,3 +30,10 @@ pub unsafe fn scale_noise<S: Simd>(
         i += 1;
     }
 }
+
+pub(crate) unsafe fn get_scaled_noise<S: Simd, F: Fn(&NoiseType) -> (Vec<f32>, f32, f32)>(noise_type: &NoiseType, noise_fn: F) -> Vec<f32> {
+    let (mut noise, min, max) = noise_fn(noise_type);
+    let dim = noise_type.get_dimensions();
+    scale_noise::<S>(dim.min, dim.max, min, max, &mut noise);
+    noise
+}
diff --git a/tests/assets/intrinsics_fbm_32_1d_avx2_normal.bin b/tests/assets/intrinsics_fbm_32_1d_avx2_normal.bin
index 9873dc1..0ac58ff 100644
Binary files a/tests/assets/intrinsics_fbm_32_1d_avx2_normal.bin and b/tests/assets/intrinsics_fbm_32_1d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_32_1d_scalar_normal.bin b/tests/assets/intrinsics_fbm_32_1d_scalar_normal.bin
index 9873dc1..0ac58ff 100644
Binary files a/tests/assets/intrinsics_fbm_32_1d_scalar_normal.bin and b/tests/assets/intrinsics_fbm_32_1d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_32_1d_sse2_normal.bin b/tests/assets/intrinsics_fbm_32_1d_sse2_normal.bin
index 9873dc1..0ac58ff 100644
Binary files a/tests/assets/intrinsics_fbm_32_1d_sse2_normal.bin and b/tests/assets/intrinsics_fbm_32_1d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_32_1d_sse41_normal.bin b/tests/assets/intrinsics_fbm_32_1d_sse41_normal.bin
index 9873dc1..0ac58ff 100644
Binary files a/tests/assets/intrinsics_fbm_32_1d_sse41_normal.bin and b/tests/assets/intrinsics_fbm_32_1d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_32_4d_avx2_normal.bin b/tests/assets/intrinsics_fbm_32_4d_avx2_normal.bin
index 8faa1ba..14de111 100644
Binary files a/tests/assets/intrinsics_fbm_32_4d_avx2_normal.bin and b/tests/assets/intrinsics_fbm_32_4d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_32_4d_scalar_normal.bin b/tests/assets/intrinsics_fbm_32_4d_scalar_normal.bin
index 8faa1ba..14de111 100644
Binary files a/tests/assets/intrinsics_fbm_32_4d_scalar_normal.bin and b/tests/assets/intrinsics_fbm_32_4d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_32_4d_sse2_normal.bin b/tests/assets/intrinsics_fbm_32_4d_sse2_normal.bin
index 8faa1ba..14de111 100644
Binary files a/tests/assets/intrinsics_fbm_32_4d_sse2_normal.bin and b/tests/assets/intrinsics_fbm_32_4d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_32_4d_sse41_normal.bin b/tests/assets/intrinsics_fbm_32_4d_sse41_normal.bin
index 8faa1ba..14de111 100644
Binary files a/tests/assets/intrinsics_fbm_32_4d_sse41_normal.bin and b/tests/assets/intrinsics_fbm_32_4d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_1d_avx2_normal.bin b/tests/assets/intrinsics_fbm_64_1d_avx2_normal.bin
index 393bc74..700f423 100644
Binary files a/tests/assets/intrinsics_fbm_64_1d_avx2_normal.bin and b/tests/assets/intrinsics_fbm_64_1d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_1d_scalar_normal.bin b/tests/assets/intrinsics_fbm_64_1d_scalar_normal.bin
index 7b393c3..700f423 100644
Binary files a/tests/assets/intrinsics_fbm_64_1d_scalar_normal.bin and b/tests/assets/intrinsics_fbm_64_1d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_1d_sse2_normal.bin b/tests/assets/intrinsics_fbm_64_1d_sse2_normal.bin
index a64a5a9..700f423 100644
Binary files a/tests/assets/intrinsics_fbm_64_1d_sse2_normal.bin and b/tests/assets/intrinsics_fbm_64_1d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_1d_sse41_normal.bin b/tests/assets/intrinsics_fbm_64_1d_sse41_normal.bin
index 393bc74..700f423 100644
Binary files a/tests/assets/intrinsics_fbm_64_1d_sse41_normal.bin and b/tests/assets/intrinsics_fbm_64_1d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_2d_avx2_normal.bin b/tests/assets/intrinsics_fbm_64_2d_avx2_normal.bin
index 893742b..d89a49f 100644
Binary files a/tests/assets/intrinsics_fbm_64_2d_avx2_normal.bin and b/tests/assets/intrinsics_fbm_64_2d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_2d_scalar_normal.bin b/tests/assets/intrinsics_fbm_64_2d_scalar_normal.bin
index ea6fdab..0e61e2f 100644
Binary files a/tests/assets/intrinsics_fbm_64_2d_scalar_normal.bin and b/tests/assets/intrinsics_fbm_64_2d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_2d_sse41_normal.bin b/tests/assets/intrinsics_fbm_64_2d_sse41_normal.bin
index 2dfa8c3..0e61e2f 100644
Binary files a/tests/assets/intrinsics_fbm_64_2d_sse41_normal.bin and b/tests/assets/intrinsics_fbm_64_2d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_4d_avx2_normal.bin b/tests/assets/intrinsics_fbm_64_4d_avx2_normal.bin
index 7811ee0..aa1488c 100644
Binary files a/tests/assets/intrinsics_fbm_64_4d_avx2_normal.bin and b/tests/assets/intrinsics_fbm_64_4d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_4d_sse2_normal.bin b/tests/assets/intrinsics_fbm_64_4d_sse2_normal.bin
index f7bb5aa..aa1488c 100644
Binary files a/tests/assets/intrinsics_fbm_64_4d_sse2_normal.bin and b/tests/assets/intrinsics_fbm_64_4d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_fbm_64_4d_sse41_normal.bin b/tests/assets/intrinsics_fbm_64_4d_sse41_normal.bin
index 7811ee0..aa1488c 100644
Binary files a/tests/assets/intrinsics_fbm_64_4d_sse41_normal.bin and b/tests/assets/intrinsics_fbm_64_4d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_32_1d_avx2_normal.bin b/tests/assets/intrinsics_gradient_32_1d_avx2_normal.bin
index baddd9e..2a8bf28 100644
Binary files a/tests/assets/intrinsics_gradient_32_1d_avx2_normal.bin and b/tests/assets/intrinsics_gradient_32_1d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_32_1d_scalar_normal.bin b/tests/assets/intrinsics_gradient_32_1d_scalar_normal.bin
index baddd9e..2a8bf28 100644
Binary files a/tests/assets/intrinsics_gradient_32_1d_scalar_normal.bin and b/tests/assets/intrinsics_gradient_32_1d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_32_1d_sse2_normal.bin b/tests/assets/intrinsics_gradient_32_1d_sse2_normal.bin
index baddd9e..2a8bf28 100644
Binary files a/tests/assets/intrinsics_gradient_32_1d_sse2_normal.bin and b/tests/assets/intrinsics_gradient_32_1d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_32_1d_sse41_normal.bin b/tests/assets/intrinsics_gradient_32_1d_sse41_normal.bin
index baddd9e..2a8bf28 100644
Binary files a/tests/assets/intrinsics_gradient_32_1d_sse41_normal.bin and b/tests/assets/intrinsics_gradient_32_1d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_32_4d_avx2_normal.bin b/tests/assets/intrinsics_gradient_32_4d_avx2_normal.bin
index d3eecbc..b49fdfc 100644
Binary files a/tests/assets/intrinsics_gradient_32_4d_avx2_normal.bin and b/tests/assets/intrinsics_gradient_32_4d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_32_4d_scalar_normal.bin b/tests/assets/intrinsics_gradient_32_4d_scalar_normal.bin
index d3eecbc..b49fdfc 100644
Binary files a/tests/assets/intrinsics_gradient_32_4d_scalar_normal.bin and b/tests/assets/intrinsics_gradient_32_4d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_32_4d_sse2_normal.bin b/tests/assets/intrinsics_gradient_32_4d_sse2_normal.bin
index d3eecbc..b49fdfc 100644
Binary files a/tests/assets/intrinsics_gradient_32_4d_sse2_normal.bin and b/tests/assets/intrinsics_gradient_32_4d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_32_4d_sse41_normal.bin b/tests/assets/intrinsics_gradient_32_4d_sse41_normal.bin
index d3eecbc..b49fdfc 100644
Binary files a/tests/assets/intrinsics_gradient_32_4d_sse41_normal.bin and b/tests/assets/intrinsics_gradient_32_4d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_1d_avx2_normal.bin b/tests/assets/intrinsics_gradient_64_1d_avx2_normal.bin
index 2765682..1f8944b 100644
Binary files a/tests/assets/intrinsics_gradient_64_1d_avx2_normal.bin and b/tests/assets/intrinsics_gradient_64_1d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_1d_scalar_normal.bin b/tests/assets/intrinsics_gradient_64_1d_scalar_normal.bin
index 992ffbd..1f8944b 100644
Binary files a/tests/assets/intrinsics_gradient_64_1d_scalar_normal.bin and b/tests/assets/intrinsics_gradient_64_1d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_1d_sse2_normal.bin b/tests/assets/intrinsics_gradient_64_1d_sse2_normal.bin
index a64a5a9..1f8944b 100644
Binary files a/tests/assets/intrinsics_gradient_64_1d_sse2_normal.bin and b/tests/assets/intrinsics_gradient_64_1d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_1d_sse41_normal.bin b/tests/assets/intrinsics_gradient_64_1d_sse41_normal.bin
index 2765682..1f8944b 100644
Binary files a/tests/assets/intrinsics_gradient_64_1d_sse41_normal.bin and b/tests/assets/intrinsics_gradient_64_1d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_2d_avx2_normal.bin b/tests/assets/intrinsics_gradient_64_2d_avx2_normal.bin
index 9a123a0..2403f72 100644
Binary files a/tests/assets/intrinsics_gradient_64_2d_avx2_normal.bin and b/tests/assets/intrinsics_gradient_64_2d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_2d_scalar_normal.bin b/tests/assets/intrinsics_gradient_64_2d_scalar_normal.bin
index 0966a80..d22cc40 100644
Binary files a/tests/assets/intrinsics_gradient_64_2d_scalar_normal.bin and b/tests/assets/intrinsics_gradient_64_2d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_2d_sse41_normal.bin b/tests/assets/intrinsics_gradient_64_2d_sse41_normal.bin
index 8fe5251..d22cc40 100644
Binary files a/tests/assets/intrinsics_gradient_64_2d_sse41_normal.bin and b/tests/assets/intrinsics_gradient_64_2d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_4d_avx2_normal.bin b/tests/assets/intrinsics_gradient_64_4d_avx2_normal.bin
index 71657a2..68d86b2 100644
Binary files a/tests/assets/intrinsics_gradient_64_4d_avx2_normal.bin and b/tests/assets/intrinsics_gradient_64_4d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_4d_sse2_normal.bin b/tests/assets/intrinsics_gradient_64_4d_sse2_normal.bin
index 98e9c08..68d86b2 100644
Binary files a/tests/assets/intrinsics_gradient_64_4d_sse2_normal.bin and b/tests/assets/intrinsics_gradient_64_4d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_gradient_64_4d_sse41_normal.bin b/tests/assets/intrinsics_gradient_64_4d_sse41_normal.bin
index 71657a2..68d86b2 100644
Binary files a/tests/assets/intrinsics_gradient_64_4d_sse41_normal.bin and b/tests/assets/intrinsics_gradient_64_4d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_32_1d_avx2_normal.bin b/tests/assets/intrinsics_ridge_32_1d_avx2_normal.bin
index 8add3be..c3c254a 100644
Binary files a/tests/assets/intrinsics_ridge_32_1d_avx2_normal.bin and b/tests/assets/intrinsics_ridge_32_1d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_32_1d_scalar_normal.bin b/tests/assets/intrinsics_ridge_32_1d_scalar_normal.bin
index 8add3be..c3c254a 100644
Binary files a/tests/assets/intrinsics_ridge_32_1d_scalar_normal.bin and b/tests/assets/intrinsics_ridge_32_1d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_32_1d_sse2_normal.bin b/tests/assets/intrinsics_ridge_32_1d_sse2_normal.bin
index 8add3be..c3c254a 100644
Binary files a/tests/assets/intrinsics_ridge_32_1d_sse2_normal.bin and b/tests/assets/intrinsics_ridge_32_1d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_32_1d_sse41_normal.bin b/tests/assets/intrinsics_ridge_32_1d_sse41_normal.bin
index 8add3be..c3c254a 100644
Binary files a/tests/assets/intrinsics_ridge_32_1d_sse41_normal.bin and b/tests/assets/intrinsics_ridge_32_1d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_32_4d_avx2_normal.bin b/tests/assets/intrinsics_ridge_32_4d_avx2_normal.bin
index f52f150..c1cb549 100644
Binary files a/tests/assets/intrinsics_ridge_32_4d_avx2_normal.bin and b/tests/assets/intrinsics_ridge_32_4d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_32_4d_scalar_normal.bin b/tests/assets/intrinsics_ridge_32_4d_scalar_normal.bin
index f52f150..c1cb549 100644
Binary files a/tests/assets/intrinsics_ridge_32_4d_scalar_normal.bin and b/tests/assets/intrinsics_ridge_32_4d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_32_4d_sse2_normal.bin b/tests/assets/intrinsics_ridge_32_4d_sse2_normal.bin
index f52f150..c1cb549 100644
Binary files a/tests/assets/intrinsics_ridge_32_4d_sse2_normal.bin and b/tests/assets/intrinsics_ridge_32_4d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_32_4d_sse41_normal.bin b/tests/assets/intrinsics_ridge_32_4d_sse41_normal.bin
index f52f150..c1cb549 100644
Binary files a/tests/assets/intrinsics_ridge_32_4d_sse41_normal.bin and b/tests/assets/intrinsics_ridge_32_4d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_1d_avx2_normal.bin b/tests/assets/intrinsics_ridge_64_1d_avx2_normal.bin
index 2330a45..cc09104 100644
Binary files a/tests/assets/intrinsics_ridge_64_1d_avx2_normal.bin and b/tests/assets/intrinsics_ridge_64_1d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_1d_scalar_normal.bin b/tests/assets/intrinsics_ridge_64_1d_scalar_normal.bin
index 2a59757..cc09104 100644
Binary files a/tests/assets/intrinsics_ridge_64_1d_scalar_normal.bin and b/tests/assets/intrinsics_ridge_64_1d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_1d_sse2_normal.bin b/tests/assets/intrinsics_ridge_64_1d_sse2_normal.bin
index 2330a45..cc09104 100644
Binary files a/tests/assets/intrinsics_ridge_64_1d_sse2_normal.bin and b/tests/assets/intrinsics_ridge_64_1d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_1d_sse41_normal.bin b/tests/assets/intrinsics_ridge_64_1d_sse41_normal.bin
index 2330a45..cc09104 100644
Binary files a/tests/assets/intrinsics_ridge_64_1d_sse41_normal.bin and b/tests/assets/intrinsics_ridge_64_1d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_2d_avx2_normal.bin b/tests/assets/intrinsics_ridge_64_2d_avx2_normal.bin
index 017496c..a9257a1 100644
Binary files a/tests/assets/intrinsics_ridge_64_2d_avx2_normal.bin and b/tests/assets/intrinsics_ridge_64_2d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_2d_scalar_normal.bin b/tests/assets/intrinsics_ridge_64_2d_scalar_normal.bin
index 6a25654..8516655 100644
Binary files a/tests/assets/intrinsics_ridge_64_2d_scalar_normal.bin and b/tests/assets/intrinsics_ridge_64_2d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_2d_sse41_normal.bin b/tests/assets/intrinsics_ridge_64_2d_sse41_normal.bin
index 1df0c92..8516655 100644
Binary files a/tests/assets/intrinsics_ridge_64_2d_sse41_normal.bin and b/tests/assets/intrinsics_ridge_64_2d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_4d_avx2_normal.bin b/tests/assets/intrinsics_ridge_64_4d_avx2_normal.bin
index 099ed0c..fe2c69b 100644
Binary files a/tests/assets/intrinsics_ridge_64_4d_avx2_normal.bin and b/tests/assets/intrinsics_ridge_64_4d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_4d_scalar_normal.bin b/tests/assets/intrinsics_ridge_64_4d_scalar_normal.bin
index fbf8be6..fe2c69b 100644
Binary files a/tests/assets/intrinsics_ridge_64_4d_scalar_normal.bin and b/tests/assets/intrinsics_ridge_64_4d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_4d_sse2_normal.bin b/tests/assets/intrinsics_ridge_64_4d_sse2_normal.bin
index 65a306e..fe2c69b 100644
Binary files a/tests/assets/intrinsics_ridge_64_4d_sse2_normal.bin and b/tests/assets/intrinsics_ridge_64_4d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_ridge_64_4d_sse41_normal.bin b/tests/assets/intrinsics_ridge_64_4d_sse41_normal.bin
index 099ed0c..fe2c69b 100644
Binary files a/tests/assets/intrinsics_ridge_64_4d_sse41_normal.bin and b/tests/assets/intrinsics_ridge_64_4d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_32_1d_avx2_normal.bin b/tests/assets/intrinsics_turbulence_32_1d_avx2_normal.bin
index f7680c9..e897488 100644
Binary files a/tests/assets/intrinsics_turbulence_32_1d_avx2_normal.bin and b/tests/assets/intrinsics_turbulence_32_1d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_32_1d_scalar_normal.bin b/tests/assets/intrinsics_turbulence_32_1d_scalar_normal.bin
index f7680c9..e897488 100644
Binary files a/tests/assets/intrinsics_turbulence_32_1d_scalar_normal.bin and b/tests/assets/intrinsics_turbulence_32_1d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_32_1d_sse2_normal.bin b/tests/assets/intrinsics_turbulence_32_1d_sse2_normal.bin
index f7680c9..e897488 100644
Binary files a/tests/assets/intrinsics_turbulence_32_1d_sse2_normal.bin and b/tests/assets/intrinsics_turbulence_32_1d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_32_1d_sse41_normal.bin b/tests/assets/intrinsics_turbulence_32_1d_sse41_normal.bin
index f7680c9..e897488 100644
Binary files a/tests/assets/intrinsics_turbulence_32_1d_sse41_normal.bin and b/tests/assets/intrinsics_turbulence_32_1d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_32_4d_avx2_normal.bin b/tests/assets/intrinsics_turbulence_32_4d_avx2_normal.bin
index b8d888b..70c2a25 100644
Binary files a/tests/assets/intrinsics_turbulence_32_4d_avx2_normal.bin and b/tests/assets/intrinsics_turbulence_32_4d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_32_4d_scalar_normal.bin b/tests/assets/intrinsics_turbulence_32_4d_scalar_normal.bin
index b8d888b..70c2a25 100644
Binary files a/tests/assets/intrinsics_turbulence_32_4d_scalar_normal.bin and b/tests/assets/intrinsics_turbulence_32_4d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_32_4d_sse2_normal.bin b/tests/assets/intrinsics_turbulence_32_4d_sse2_normal.bin
index b8d888b..70c2a25 100644
Binary files a/tests/assets/intrinsics_turbulence_32_4d_sse2_normal.bin and b/tests/assets/intrinsics_turbulence_32_4d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_32_4d_sse41_normal.bin b/tests/assets/intrinsics_turbulence_32_4d_sse41_normal.bin
index b8d888b..70c2a25 100644
Binary files a/tests/assets/intrinsics_turbulence_32_4d_sse41_normal.bin and b/tests/assets/intrinsics_turbulence_32_4d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_1d_avx2_normal.bin b/tests/assets/intrinsics_turbulence_64_1d_avx2_normal.bin
index edd4632..dff5898 100644
Binary files a/tests/assets/intrinsics_turbulence_64_1d_avx2_normal.bin and b/tests/assets/intrinsics_turbulence_64_1d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_1d_scalar_normal.bin b/tests/assets/intrinsics_turbulence_64_1d_scalar_normal.bin
index 4dd5475..dff5898 100644
Binary files a/tests/assets/intrinsics_turbulence_64_1d_scalar_normal.bin and b/tests/assets/intrinsics_turbulence_64_1d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_1d_sse2_normal.bin b/tests/assets/intrinsics_turbulence_64_1d_sse2_normal.bin
index a64a5a9..dff5898 100644
Binary files a/tests/assets/intrinsics_turbulence_64_1d_sse2_normal.bin and b/tests/assets/intrinsics_turbulence_64_1d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_1d_sse41_normal.bin b/tests/assets/intrinsics_turbulence_64_1d_sse41_normal.bin
index edd4632..dff5898 100644
Binary files a/tests/assets/intrinsics_turbulence_64_1d_sse41_normal.bin and b/tests/assets/intrinsics_turbulence_64_1d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_2d_avx2_normal.bin b/tests/assets/intrinsics_turbulence_64_2d_avx2_normal.bin
index 0539e1c..c2c5f4f 100644
Binary files a/tests/assets/intrinsics_turbulence_64_2d_avx2_normal.bin and b/tests/assets/intrinsics_turbulence_64_2d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_2d_scalar_normal.bin b/tests/assets/intrinsics_turbulence_64_2d_scalar_normal.bin
index 5d8f4c6..5be31fc 100644
Binary files a/tests/assets/intrinsics_turbulence_64_2d_scalar_normal.bin and b/tests/assets/intrinsics_turbulence_64_2d_scalar_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_2d_sse41_normal.bin b/tests/assets/intrinsics_turbulence_64_2d_sse41_normal.bin
index 4535162..5be31fc 100644
Binary files a/tests/assets/intrinsics_turbulence_64_2d_sse41_normal.bin and b/tests/assets/intrinsics_turbulence_64_2d_sse41_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_4d_avx2_normal.bin b/tests/assets/intrinsics_turbulence_64_4d_avx2_normal.bin
index b9d447a..3ffe187 100644
Binary files a/tests/assets/intrinsics_turbulence_64_4d_avx2_normal.bin and b/tests/assets/intrinsics_turbulence_64_4d_avx2_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_4d_sse2_normal.bin b/tests/assets/intrinsics_turbulence_64_4d_sse2_normal.bin
index 4da86c7..3ffe187 100644
Binary files a/tests/assets/intrinsics_turbulence_64_4d_sse2_normal.bin and b/tests/assets/intrinsics_turbulence_64_4d_sse2_normal.bin differ
diff --git a/tests/assets/intrinsics_turbulence_64_4d_sse41_normal.bin b/tests/assets/intrinsics_turbulence_64_4d_sse41_normal.bin
index b9d447a..3ffe187 100644
Binary files a/tests/assets/intrinsics_turbulence_64_4d_sse41_normal.bin and b/tests/assets/intrinsics_turbulence_64_4d_sse41_normal.bin differ
diff --git a/tests/assets/noisebuilder_fbm_nooffset_32_1d.bin b/tests/assets/noisebuilder_fbm_nooffset_32_1d.bin
index c751910..d1be62a 100644
Binary files a/tests/assets/noisebuilder_fbm_nooffset_32_1d.bin and b/tests/assets/noisebuilder_fbm_nooffset_32_1d.bin differ
diff --git a/tests/assets/noisebuilder_fbm_nooffset_32_4d.bin b/tests/assets/noisebuilder_fbm_nooffset_32_4d.bin
index f3860e7..926b106 100644
Binary files a/tests/assets/noisebuilder_fbm_nooffset_32_4d.bin and b/tests/assets/noisebuilder_fbm_nooffset_32_4d.bin differ
diff --git a/tests/assets/noisebuilder_fbm_offset_32_1d.bin b/tests/assets/noisebuilder_fbm_offset_32_1d.bin
index 4c607d9..c1da1fa 100644
--- a/tests/assets/noisebuilder_fbm_offset_32_1d.bin
+++ b/tests/assets/noisebuilder_fbm_offset_32_1d.bin
@@ -1 +1 @@
-��I��wT�w�^��(i��s��|�HՂ��9���n��as���F��%薿jW��*���E����u�����x���~Ϊ�Sެ�����mm���	B��:i��ie��8���⶿�f��oƷ�-�����������������l��M��a���,��P�������泿}"���V������:����ۯ����4���e��a���sܫ�s$���v��cԩ��>��_���f<��aѧ��u��*��g�¦�(���
\ No newline at end of file
+��I��wT�x�^��(i��s��|�HՂ��9���n��as���F��%薿iW��)���E����u�����y���~Ϊ�Tެ�����mm���	B��:i��ie��8���⶿�f��oƷ�-�����������������l��M��a���,��P�������泿|"���V������:����ۯ����4���e��b���vܫ�s$���v��dԩ��>��`���f<��aѧ��u��*��g�¦�(���
\ No newline at end of file
diff --git a/tests/assets/noisebuilder_fbm_offset_32_4d.bin b/tests/assets/noisebuilder_fbm_offset_32_4d.bin
index ce32829..defba68 100644
Binary files a/tests/assets/noisebuilder_fbm_offset_32_4d.bin and b/tests/assets/noisebuilder_fbm_offset_32_4d.bin differ
diff --git a/tests/assets/noisebuilder_gradient_nooffset_32_1d.bin b/tests/assets/noisebuilder_gradient_nooffset_32_1d.bin
index d533e67..ad7e91c 100644
Binary files a/tests/assets/noisebuilder_gradient_nooffset_32_1d.bin and b/tests/assets/noisebuilder_gradient_nooffset_32_1d.bin differ
diff --git a/tests/assets/noisebuilder_gradient_nooffset_32_4d.bin b/tests/assets/noisebuilder_gradient_nooffset_32_4d.bin
index f1ecc70..0aee368 100644
Binary files a/tests/assets/noisebuilder_gradient_nooffset_32_4d.bin and b/tests/assets/noisebuilder_gradient_nooffset_32_4d.bin differ
diff --git a/tests/assets/noisebuilder_gradient_offset_32_1d.bin b/tests/assets/noisebuilder_gradient_offset_32_1d.bin
index 7c2ead1..0bc9c57 100644
Binary files a/tests/assets/noisebuilder_gradient_offset_32_1d.bin and b/tests/assets/noisebuilder_gradient_offset_32_1d.bin differ
diff --git a/tests/assets/noisebuilder_gradient_offset_32_4d.bin b/tests/assets/noisebuilder_gradient_offset_32_4d.bin
index 06250d2..3b66ea3 100644
Binary files a/tests/assets/noisebuilder_gradient_offset_32_4d.bin and b/tests/assets/noisebuilder_gradient_offset_32_4d.bin differ
diff --git a/tests/assets/noisebuilder_ridge_nooffset_32_1d.bin b/tests/assets/noisebuilder_ridge_nooffset_32_1d.bin
index b7efc2d..149b6be 100644
Binary files a/tests/assets/noisebuilder_ridge_nooffset_32_1d.bin and b/tests/assets/noisebuilder_ridge_nooffset_32_1d.bin differ
diff --git a/tests/assets/noisebuilder_ridge_nooffset_32_4d.bin b/tests/assets/noisebuilder_ridge_nooffset_32_4d.bin
index 4be8075..81d51fa 100644
Binary files a/tests/assets/noisebuilder_ridge_nooffset_32_4d.bin and b/tests/assets/noisebuilder_ridge_nooffset_32_4d.bin differ
diff --git a/tests/assets/noisebuilder_ridge_offset_32_1d.bin b/tests/assets/noisebuilder_ridge_offset_32_1d.bin
index 50fe688..30e12e9 100644
--- a/tests/assets/noisebuilder_ridge_offset_32_1d.bin
+++ b/tests/assets/noisebuilder_ridge_offset_32_1d.bin
@@ -1 +1 @@
-/ˆ@q�@� �@�ڂ@���@}o�@]�~@c|@�Hz@PFx@�\v@�t@K�r@�5q@ݰo@"En@��l@C�k@��j@֐i@(�h@J�g@�g@�^f@c�e@KMe@��d@��d@�Ld@�d@j�c@��c@�c@-d@�d@�Id@�~d@P�d@�e@�We@"�e@�f@�nf@��f@3=g@c�g@4h@�|h@��h@Mi@P�i@�j@�mj@��j@�k@�`k@Ѥk@��k@�{k@�j@��i@H,i@ߍh@�h@
\ No newline at end of file
+/ˆ@q�@� �@�ڂ@���@|o�@[�~@c|@�Hz@OFx@�\v@�t@K�r@�5q@ݰo@"En@��l@C�k@��j@אi@(�h@I�g@�g@�^f@c�e@JMe@��d@��d@�Ld@�d@j�c@��c@�c@-d@�d@�Id@�~d@P�d@�e@�We@!�e@�f@�nf@��f@3=g@d�g@5h@�|h@��h@Mi@O�i@�j@�mj@��j@�k@�`k@Фk@��k@�{k@�j@��i@H,i@ߍh@�h@
\ No newline at end of file
diff --git a/tests/assets/noisebuilder_ridge_offset_32_4d.bin b/tests/assets/noisebuilder_ridge_offset_32_4d.bin
index b1a57fc..7f1a8c8 100644
Binary files a/tests/assets/noisebuilder_ridge_offset_32_4d.bin and b/tests/assets/noisebuilder_ridge_offset_32_4d.bin differ
diff --git a/tests/assets/noisebuilder_turbulence_nooffset_32_1d.bin b/tests/assets/noisebuilder_turbulence_nooffset_32_1d.bin
index 1aeda00..c69fb20 100644
Binary files a/tests/assets/noisebuilder_turbulence_nooffset_32_1d.bin and b/tests/assets/noisebuilder_turbulence_nooffset_32_1d.bin differ
diff --git a/tests/assets/noisebuilder_turbulence_nooffset_32_4d.bin b/tests/assets/noisebuilder_turbulence_nooffset_32_4d.bin
index c034c81..7f80a84 100644
Binary files a/tests/assets/noisebuilder_turbulence_nooffset_32_4d.bin and b/tests/assets/noisebuilder_turbulence_nooffset_32_4d.bin differ
diff --git a/tests/assets/noisebuilder_turbulence_offset_32_1d.bin b/tests/assets/noisebuilder_turbulence_offset_32_1d.bin
index 57606ca..7346bf4 100644
--- a/tests/assets/noisebuilder_turbulence_offset_32_1d.bin
+++ b/tests/assets/noisebuilder_turbulence_offset_32_1d.bin
@@ -1 +1 @@
-��I?�wT?w�^?�(i?�s?�|?HՂ?�9�?�n�?as�?�F�?%�?jW�?*��?E��?�u�?��?x��?~Ϊ?Sެ?���?mm�?��?	B�?:i�?ie�?8�?��?�f�?oƷ?-�?��?��?���?���?�l�?M�?a��?,�?�P�?���?��?}"�?�V�?���?:��?�ۯ?��?4�?�e�?a��?sܫ?s$�?�v�?cԩ?�>�?_��?f<�?��?乪?D�?p��?B�?���?
\ No newline at end of file
+��I?�wT?x�^?�(i?�s?�|?HՂ?�9�?�n�?as�?�F�?%�?iW�?)��?E��?�u�?��?y��?~Ϊ?Tެ?���?mm�?��?	B�?:i�?ie�?8�?��?�f�?oƷ?-�?��?��?���?���?�l�?M�?a��?,�?�P�?���?��?|"�?�V�?���?:��?�ۯ?��?4�?�e�?b��?vܫ?s$�?�v�?dԩ?�>�?`��?f<�?��?乪?D�?p��?B�?���?
\ No newline at end of file
diff --git a/tests/assets/noisebuilder_turbulence_offset_32_4d.bin b/tests/assets/noisebuilder_turbulence_offset_32_4d.bin
index f71fd93..6f7e68c 100644
Binary files a/tests/assets/noisebuilder_turbulence_offset_32_4d.bin and b/tests/assets/noisebuilder_turbulence_offset_32_4d.bin differ
diff --git a/tests/intrinsics.rs b/tests/intrinsics.rs
index 4d317d4..f5a603f 100644
--- a/tests/intrinsics.rs
+++ b/tests/intrinsics.rs
@@ -1,14 +1,13 @@
-use core::arch::x86_64::__m256;
 use simdnoise::intrinsics::{avx2, scalar, sse2, sse41};
 use simdnoise::{
     Cell2ReturnType, CellDistanceFunction, CellReturnType, Cellular2Settings, CellularSettings,
-    FbmSettings, GradientSettings, NoiseDimensions, NoiseType, RidgeSettings, Settings,
-    SimplexSettings, TurbulenceSettings,
+    FbmSettings, GradientSettings, NoiseDimensions, RidgeSettings, Settings, SimplexSettings,
+    TurbulenceSettings,
 };
 
 mod helpers;
 use helpers::{
-    read_from_file_f32, read_from_file_f64, save_to_file_f32, save_to_file_f64, BIN_PATH,
+    read_from_file_f32, read_from_file_f64, /*save_to_file_f32, save_to_file_f64, */ BIN_PATH,
 };
 
 #[target_feature(enable = "avx2")]
@@ -20,7 +19,7 @@ unsafe fn do_intrinsic_cellular_2_avx2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = CellularSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -46,7 +45,7 @@ unsafe fn do_intrinsic_cellular_2_scalar_32_normal() -> Vec<f32> {
     };
 
     let noise_type = CellularSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -73,7 +72,7 @@ unsafe fn do_intrinsic_cellular_2_sse2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = CellularSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -100,7 +99,7 @@ unsafe fn do_intrinsic_cellular_2_sse41_32_normal() -> Vec<f32> {
     };
 
     let noise_type = CellularSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -128,7 +127,7 @@ unsafe fn do_intrinsic_cellular_3_avx2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = CellularSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -155,7 +154,7 @@ unsafe fn do_intrinsic_cellular_3_scalar_32_normal() -> Vec<f32> {
     };
 
     let noise_type = CellularSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -183,7 +182,7 @@ unsafe fn do_intrinsic_cellular_3_sse2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = CellularSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -211,7 +210,7 @@ unsafe fn do_intrinsic_cellular_3_sse41_32_normal() -> Vec<f32> {
     };
 
     let noise_type = CellularSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -242,7 +241,7 @@ unsafe fn do_intrinsic_cellular_2_avx2_32_euclidean_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -272,7 +271,7 @@ unsafe fn do_intrinsic_cellular_2_scalar_32_euclidean_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -303,7 +302,7 @@ unsafe fn do_intrinsic_cellular_2_sse2_32_euclidean_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -334,7 +333,7 @@ unsafe fn do_intrinsic_cellular_2_sse41_32_euclidean_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -366,7 +365,7 @@ unsafe fn do_intrinsic_cellular_3_avx2_32_euclidean_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -397,7 +396,7 @@ unsafe fn do_intrinsic_cellular_3_scalar_32_euclidean_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -429,7 +428,7 @@ unsafe fn do_intrinsic_cellular_3_sse2_32_euclidean_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -461,7 +460,7 @@ unsafe fn do_intrinsic_cellular_3_sse41_32_euclidean_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -492,7 +491,7 @@ unsafe fn do_intrinsic_cellular_2_avx2_32_euclidean_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -522,7 +521,7 @@ unsafe fn do_intrinsic_cellular_2_scalar_32_euclidean_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -553,7 +552,7 @@ unsafe fn do_intrinsic_cellular_2_sse2_32_euclidean_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -584,7 +583,7 @@ unsafe fn do_intrinsic_cellular_2_sse41_32_euclidean_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -616,7 +615,7 @@ unsafe fn do_intrinsic_cellular_3_avx2_32_euclidean_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -647,7 +646,7 @@ unsafe fn do_intrinsic_cellular_3_scalar_32_euclidean_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -679,7 +678,7 @@ unsafe fn do_intrinsic_cellular_3_sse2_32_euclidean_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -711,7 +710,7 @@ unsafe fn do_intrinsic_cellular_3_sse41_32_euclidean_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -742,7 +741,7 @@ unsafe fn do_intrinsic_cellular_2_avx2_32_manhattan_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -772,7 +771,7 @@ unsafe fn do_intrinsic_cellular_2_scalar_32_manhattan_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -803,7 +802,7 @@ unsafe fn do_intrinsic_cellular_2_sse2_32_manhattan_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -834,7 +833,7 @@ unsafe fn do_intrinsic_cellular_2_sse41_32_manhattan_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -866,7 +865,7 @@ unsafe fn do_intrinsic_cellular_3_avx2_32_manhattan_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -897,7 +896,7 @@ unsafe fn do_intrinsic_cellular_3_scalar_32_manhattan_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -929,7 +928,7 @@ unsafe fn do_intrinsic_cellular_3_sse2_32_manhattan_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -961,7 +960,7 @@ unsafe fn do_intrinsic_cellular_3_sse41_32_manhattan_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -992,7 +991,7 @@ unsafe fn do_intrinsic_cellular_2_avx2_32_manhattan_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -1022,7 +1021,7 @@ unsafe fn do_intrinsic_cellular_2_scalar_32_manhattan_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -1053,7 +1052,7 @@ unsafe fn do_intrinsic_cellular_2_sse2_32_manhattan_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -1084,7 +1083,7 @@ unsafe fn do_intrinsic_cellular_2_sse41_32_manhattan_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -1116,7 +1115,7 @@ unsafe fn do_intrinsic_cellular_3_avx2_32_manhattan_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -1147,7 +1146,7 @@ unsafe fn do_intrinsic_cellular_3_scalar_32_manhattan_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -1179,7 +1178,7 @@ unsafe fn do_intrinsic_cellular_3_sse2_32_manhattan_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -1211,7 +1210,7 @@ unsafe fn do_intrinsic_cellular_3_sse41_32_manhattan_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -1242,7 +1241,7 @@ unsafe fn do_intrinsic_cellular_2_avx2_32_natural_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -1272,7 +1271,7 @@ unsafe fn do_intrinsic_cellular_2_scalar_32_natural_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -1303,7 +1302,7 @@ unsafe fn do_intrinsic_cellular_2_sse2_32_natural_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -1334,7 +1333,7 @@ unsafe fn do_intrinsic_cellular_2_sse41_32_natural_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -1366,7 +1365,7 @@ unsafe fn do_intrinsic_cellular_3_avx2_32_natural_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -1397,7 +1396,7 @@ unsafe fn do_intrinsic_cellular_3_scalar_32_natural_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -1429,7 +1428,7 @@ unsafe fn do_intrinsic_cellular_3_sse2_32_natural_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -1461,7 +1460,7 @@ unsafe fn do_intrinsic_cellular_3_sse41_32_natural_cellvalue() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::CellValue)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -1492,7 +1491,7 @@ unsafe fn do_intrinsic_cellular_2_avx2_32_natural_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -1522,7 +1521,7 @@ unsafe fn do_intrinsic_cellular_2_scalar_32_natural_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -1553,7 +1552,7 @@ unsafe fn do_intrinsic_cellular_2_sse2_32_natural_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -1584,7 +1583,7 @@ unsafe fn do_intrinsic_cellular_2_sse41_32_natural_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -1616,7 +1615,7 @@ unsafe fn do_intrinsic_cellular_3_avx2_32_natural_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -1647,7 +1646,7 @@ unsafe fn do_intrinsic_cellular_3_scalar_32_natural_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -1679,7 +1678,7 @@ unsafe fn do_intrinsic_cellular_3_sse2_32_natural_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -1711,7 +1710,7 @@ unsafe fn do_intrinsic_cellular_3_sse41_32_natural_distance() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(CellReturnType::Distance)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -1738,7 +1737,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = Cellular2Settings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -1764,7 +1763,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_normal() -> Vec<f32> {
     };
 
     let noise_type = Cellular2Settings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -1791,7 +1790,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = Cellular2Settings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -1818,7 +1817,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_normal() -> Vec<f32> {
     };
 
     let noise_type = Cellular2Settings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -1846,7 +1845,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = Cellular2Settings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -1873,7 +1872,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_normal() -> Vec<f32> {
     };
 
     let noise_type = Cellular2Settings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -1901,7 +1900,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = Cellular2Settings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -1929,7 +1928,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_normal() -> Vec<f32> {
     };
 
     let noise_type = Cellular2Settings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -1960,7 +1959,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_euclidean_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -1990,7 +1989,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_euclidean_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -2021,7 +2020,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_euclidean_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -2052,7 +2051,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_euclidean_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -2084,7 +2083,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_euclidean_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -2115,7 +2114,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_euclidean_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -2147,7 +2146,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_euclidean_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -2179,7 +2178,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_euclidean_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -2210,7 +2209,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_euclidean_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -2240,7 +2239,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_euclidean_distance2add() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -2271,7 +2270,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_euclidean_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -2302,7 +2301,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_euclidean_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -2334,7 +2333,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_euclidean_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -2365,7 +2364,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_euclidean_distance2add() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -2397,7 +2396,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_euclidean_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -2429,7 +2428,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_euclidean_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -2460,7 +2459,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_euclidean_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -2490,7 +2489,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_euclidean_distance2sub() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -2521,7 +2520,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_euclidean_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -2552,7 +2551,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_euclidean_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -2584,7 +2583,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_euclidean_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -2615,7 +2614,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_euclidean_distance2sub() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -2647,7 +2646,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_euclidean_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -2679,7 +2678,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_euclidean_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -2710,7 +2709,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_euclidean_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -2740,7 +2739,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_euclidean_distance2mul() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -2771,7 +2770,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_euclidean_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -2802,7 +2801,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_euclidean_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -2834,7 +2833,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_euclidean_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -2865,7 +2864,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_euclidean_distance2mul() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -2897,7 +2896,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_euclidean_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -2929,7 +2928,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_euclidean_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -2960,7 +2959,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_euclidean_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -2990,7 +2989,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_euclidean_distance2div() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -3021,7 +3020,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_euclidean_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -3052,7 +3051,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_euclidean_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -3084,7 +3083,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_euclidean_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -3115,7 +3114,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_euclidean_distance2div() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -3147,7 +3146,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_euclidean_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -3179,7 +3178,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_euclidean_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Euclidean)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -3210,7 +3209,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_manhattan_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -3240,7 +3239,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_manhattan_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -3271,7 +3270,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_manhattan_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -3302,7 +3301,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_manhattan_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -3334,7 +3333,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_manhattan_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -3365,7 +3364,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_manhattan_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -3397,7 +3396,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_manhattan_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -3429,7 +3428,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_manhattan_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -3460,7 +3459,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_manhattan_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -3490,7 +3489,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_manhattan_distance2add() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -3521,7 +3520,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_manhattan_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -3552,7 +3551,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_manhattan_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -3584,7 +3583,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_manhattan_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -3615,7 +3614,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_manhattan_distance2add() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -3647,7 +3646,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_manhattan_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -3679,7 +3678,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_manhattan_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -3710,7 +3709,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_manhattan_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -3740,7 +3739,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_manhattan_distance2sub() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -3771,7 +3770,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_manhattan_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -3802,7 +3801,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_manhattan_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -3834,7 +3833,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_manhattan_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -3865,7 +3864,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_manhattan_distance2sub() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -3897,7 +3896,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_manhattan_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -3929,7 +3928,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_manhattan_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -3960,7 +3959,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_manhattan_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -3990,7 +3989,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_manhattan_distance2mul() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -4021,7 +4020,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_manhattan_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -4052,7 +4051,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_manhattan_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -4084,7 +4083,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_manhattan_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -4115,7 +4114,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_manhattan_distance2mul() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -4147,7 +4146,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_manhattan_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -4179,7 +4178,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_manhattan_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -4210,7 +4209,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_manhattan_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -4240,7 +4239,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_manhattan_distance2div() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -4271,7 +4270,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_manhattan_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -4302,7 +4301,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_manhattan_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -4334,7 +4333,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_manhattan_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -4365,7 +4364,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_manhattan_distance2div() -> Vec<f32
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -4397,7 +4396,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_manhattan_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -4429,7 +4428,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_manhattan_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Manhattan)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -4460,7 +4459,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_natural_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -4490,7 +4489,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_natural_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -4521,7 +4520,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_natural_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -4552,7 +4551,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_natural_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -4584,7 +4583,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_natural_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -4615,7 +4614,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_natural_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -4647,7 +4646,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_natural_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -4679,7 +4678,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_natural_distance2() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -4710,7 +4709,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_natural_distance2add() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -4740,7 +4739,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_natural_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -4771,7 +4770,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_natural_distance2add() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -4802,7 +4801,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_natural_distance2add() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -4834,7 +4833,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_natural_distance2add() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -4865,7 +4864,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_natural_distance2add() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -4897,7 +4896,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_natural_distance2add() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -4929,7 +4928,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_natural_distance2add() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Add)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -4960,7 +4959,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_natural_distance2sub() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -4990,7 +4989,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_natural_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -5021,7 +5020,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_natural_distance2sub() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -5052,7 +5051,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_natural_distance2sub() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -5084,7 +5083,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_natural_distance2sub() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -5115,7 +5114,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_natural_distance2sub() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -5147,7 +5146,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_natural_distance2sub() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -5179,7 +5178,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_natural_distance2sub() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Sub)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -5210,7 +5209,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_natural_distance2mul() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -5240,7 +5239,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_natural_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -5271,7 +5270,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_natural_distance2mul() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -5302,7 +5301,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_natural_distance2mul() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -5334,7 +5333,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_natural_distance2mul() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -5365,7 +5364,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_natural_distance2mul() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -5397,7 +5396,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_natural_distance2mul() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -5429,7 +5428,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_natural_distance2mul() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Mul)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -5460,7 +5459,7 @@ unsafe fn do_intrinsic_cellular2_2_avx2_32_natural_distance2div() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -5490,7 +5489,7 @@ unsafe fn do_intrinsic_cellular2_2_scalar_32_natural_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -5521,7 +5520,7 @@ unsafe fn do_intrinsic_cellular2_2_sse2_32_natural_distance2div() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -5552,7 +5551,7 @@ unsafe fn do_intrinsic_cellular2_2_sse41_32_natural_distance2div() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -5584,7 +5583,7 @@ unsafe fn do_intrinsic_cellular2_3_avx2_32_natural_distance2div() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -5615,7 +5614,7 @@ unsafe fn do_intrinsic_cellular2_3_scalar_32_natural_distance2div() -> Vec<f32>
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -5647,7 +5646,7 @@ unsafe fn do_intrinsic_cellular2_3_sse2_32_natural_distance2div() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -5679,7 +5678,7 @@ unsafe fn do_intrinsic_cellular2_3_sse41_32_natural_distance2div() -> Vec<f32> {
         .with_distance_function(CellDistanceFunction::Natural)
         .with_return_type(Cell2ReturnType::Distance2Div)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -5710,10 +5709,9 @@ unsafe fn do_intrinsic_ridge_1_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_1d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
-
 #[test]
 fn test_intrinsic_ridge_1_avx2_32_normal() {
     let file_name = format!(
@@ -5741,7 +5739,7 @@ unsafe fn do_intrinsic_ridge_1_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_1d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -5771,7 +5769,7 @@ unsafe fn do_intrinsic_ridge_1_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_1d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -5801,7 +5799,7 @@ unsafe fn do_intrinsic_ridge_1_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_1d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -5832,7 +5830,7 @@ unsafe fn do_intrinsic_ridge_1_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_1d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -5863,7 +5861,7 @@ unsafe fn do_intrinsic_ridge_1_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_1d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -5894,7 +5892,7 @@ unsafe fn do_intrinsic_ridge_1_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_1d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -5925,7 +5923,7 @@ unsafe fn do_intrinsic_ridge_1_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_1d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -5957,7 +5955,7 @@ unsafe fn do_intrinsic_ridge_2_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -5989,7 +5987,7 @@ unsafe fn do_intrinsic_ridge_2_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -6020,7 +6018,7 @@ unsafe fn do_intrinsic_ridge_2_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -6051,7 +6049,7 @@ unsafe fn do_intrinsic_ridge_2_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -6083,7 +6081,7 @@ unsafe fn do_intrinsic_ridge_2_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -6115,11 +6113,12 @@ unsafe fn do_intrinsic_ridge_2_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
 #[test]
+#[ignore]
 fn test_intrinsic_ridge_2_sse2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -6147,7 +6146,7 @@ unsafe fn do_intrinsic_ridge_2_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -6179,7 +6178,7 @@ unsafe fn do_intrinsic_ridge_2_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -6212,7 +6211,7 @@ unsafe fn do_intrinsic_ridge_3_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -6245,12 +6244,12 @@ unsafe fn do_intrinsic_ridge_3_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_ridge_3_avx2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -6278,7 +6277,7 @@ unsafe fn do_intrinsic_ridge_3_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -6310,12 +6309,12 @@ unsafe fn do_intrinsic_ridge_3_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_ridge_3_scalar_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -6344,7 +6343,7 @@ unsafe fn do_intrinsic_ridge_3_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -6377,12 +6376,12 @@ unsafe fn do_intrinsic_ridge_3_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_ridge_3_sse2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -6411,7 +6410,7 @@ unsafe fn do_intrinsic_ridge_3_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -6444,12 +6443,12 @@ unsafe fn do_intrinsic_ridge_3_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_ridge_3_sse41_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -6479,7 +6478,7 @@ unsafe fn do_intrinsic_ridge_4_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_4d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -6513,7 +6512,7 @@ unsafe fn do_intrinsic_ridge_4_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_4d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -6546,7 +6545,7 @@ unsafe fn do_intrinsic_ridge_4_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_4d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -6579,7 +6578,7 @@ unsafe fn do_intrinsic_ridge_4_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_4d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -6613,7 +6612,7 @@ unsafe fn do_intrinsic_ridge_4_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_4d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -6647,7 +6646,7 @@ unsafe fn do_intrinsic_ridge_4_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_4d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -6681,7 +6680,7 @@ unsafe fn do_intrinsic_ridge_4_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_4d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -6715,7 +6714,7 @@ unsafe fn do_intrinsic_ridge_4_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_4d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -6746,7 +6745,7 @@ unsafe fn do_intrinsic_fbm_1_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_1d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -6777,7 +6776,7 @@ unsafe fn do_intrinsic_fbm_1_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_1d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -6807,7 +6806,7 @@ unsafe fn do_intrinsic_fbm_1_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_1d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -6837,7 +6836,7 @@ unsafe fn do_intrinsic_fbm_1_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_1d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -6868,7 +6867,7 @@ unsafe fn do_intrinsic_fbm_1_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_1d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -6899,7 +6898,7 @@ unsafe fn do_intrinsic_fbm_1_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_1d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -6930,7 +6929,7 @@ unsafe fn do_intrinsic_fbm_1_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_1d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -6961,7 +6960,7 @@ unsafe fn do_intrinsic_fbm_1_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_1d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -6978,7 +6977,6 @@ fn test_intrinsic_fbm_1_sse41_64_normal() {
         assert_eq!(expected, noise);
     }
 }
-
 #[target_feature(enable = "avx2")]
 unsafe fn do_intrinsic_fbm_2_avx2_32_normal() -> Vec<f32> {
     let dims = NoiseDimensions {
@@ -6993,7 +6991,7 @@ unsafe fn do_intrinsic_fbm_2_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -7025,7 +7023,7 @@ unsafe fn do_intrinsic_fbm_2_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -7056,7 +7054,7 @@ unsafe fn do_intrinsic_fbm_2_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -7087,7 +7085,7 @@ unsafe fn do_intrinsic_fbm_2_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -7119,7 +7117,7 @@ unsafe fn do_intrinsic_fbm_2_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -7151,11 +7149,12 @@ unsafe fn do_intrinsic_fbm_2_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
 #[test]
+#[ignore]
 fn test_intrinsic_fbm_2_sse2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -7183,7 +7182,7 @@ unsafe fn do_intrinsic_fbm_2_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -7215,7 +7214,7 @@ unsafe fn do_intrinsic_fbm_2_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -7248,7 +7247,7 @@ unsafe fn do_intrinsic_fbm_3_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -7281,12 +7280,12 @@ unsafe fn do_intrinsic_fbm_3_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_fbm_3_avx2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -7314,7 +7313,7 @@ unsafe fn do_intrinsic_fbm_3_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -7346,12 +7345,12 @@ unsafe fn do_intrinsic_fbm_3_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_fbm_3_scalar_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -7380,7 +7379,7 @@ unsafe fn do_intrinsic_fbm_3_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -7413,12 +7412,12 @@ unsafe fn do_intrinsic_fbm_3_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_fbm_3_sse2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -7447,7 +7446,7 @@ unsafe fn do_intrinsic_fbm_3_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -7480,12 +7479,12 @@ unsafe fn do_intrinsic_fbm_3_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_fbm_3_sse41_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -7515,7 +7514,7 @@ unsafe fn do_intrinsic_fbm_4_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_4d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -7549,7 +7548,7 @@ unsafe fn do_intrinsic_fbm_4_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_4d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -7582,7 +7581,7 @@ unsafe fn do_intrinsic_fbm_4_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_4d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -7615,7 +7614,7 @@ unsafe fn do_intrinsic_fbm_4_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_4d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -7649,7 +7648,7 @@ unsafe fn do_intrinsic_fbm_4_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_4d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -7683,7 +7682,7 @@ unsafe fn do_intrinsic_fbm_4_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_4d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -7717,7 +7716,7 @@ unsafe fn do_intrinsic_fbm_4_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_4d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -7751,7 +7750,7 @@ unsafe fn do_intrinsic_fbm_4_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_4d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -7782,7 +7781,7 @@ unsafe fn do_intrinsic_turbulence_1_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_1d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -7813,7 +7812,7 @@ unsafe fn do_intrinsic_turbulence_1_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_1d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -7843,7 +7842,7 @@ unsafe fn do_intrinsic_turbulence_1_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_1d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -7873,7 +7872,7 @@ unsafe fn do_intrinsic_turbulence_1_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_1d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -7904,7 +7903,7 @@ unsafe fn do_intrinsic_turbulence_1_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_1d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -7935,7 +7934,7 @@ unsafe fn do_intrinsic_turbulence_1_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_1d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -7966,7 +7965,7 @@ unsafe fn do_intrinsic_turbulence_1_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_1d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -7997,7 +7996,7 @@ unsafe fn do_intrinsic_turbulence_1_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_1d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -8029,7 +8028,7 @@ unsafe fn do_intrinsic_turbulence_2_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -8061,7 +8060,7 @@ unsafe fn do_intrinsic_turbulence_2_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -8092,7 +8091,7 @@ unsafe fn do_intrinsic_turbulence_2_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -8123,7 +8122,7 @@ unsafe fn do_intrinsic_turbulence_2_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -8155,7 +8154,7 @@ unsafe fn do_intrinsic_turbulence_2_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -8187,11 +8186,12 @@ unsafe fn do_intrinsic_turbulence_2_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
 #[test]
+#[ignore]
 fn test_intrinsic_turbulence_2_sse2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -8219,7 +8219,7 @@ unsafe fn do_intrinsic_turbulence_2_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -8251,7 +8251,7 @@ unsafe fn do_intrinsic_turbulence_2_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -8284,7 +8284,7 @@ unsafe fn do_intrinsic_turbulence_3_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -8317,12 +8317,12 @@ unsafe fn do_intrinsic_turbulence_3_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_turbulence_3_avx2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -8350,7 +8350,7 @@ unsafe fn do_intrinsic_turbulence_3_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -8382,12 +8382,12 @@ unsafe fn do_intrinsic_turbulence_3_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_turbulence_3_scalar_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -8416,7 +8416,7 @@ unsafe fn do_intrinsic_turbulence_3_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -8449,12 +8449,12 @@ unsafe fn do_intrinsic_turbulence_3_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_turbulence_3_sse2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -8483,12 +8483,12 @@ unsafe fn do_intrinsic_turbulence_3_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
 #[test]
-fn test_intrinsic_turbulence_3_sse41_32_normal() {
+fn test_intrinsic_turbulence_3_sse41_2_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
         BIN_PATH, "intrinsics", "turbulence", "32", "3d", "sse41", "normal"
@@ -8516,12 +8516,12 @@ unsafe fn do_intrinsic_turbulence_3_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_turbulence_3_sse41_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -8551,7 +8551,7 @@ unsafe fn do_intrinsic_turbulence_4_avx2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_4d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -8585,7 +8585,7 @@ unsafe fn do_intrinsic_turbulence_4_avx2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = avx2::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_4d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -8618,7 +8618,7 @@ unsafe fn do_intrinsic_turbulence_4_scalar_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_4d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -8651,7 +8651,7 @@ unsafe fn do_intrinsic_turbulence_4_scalar_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = scalar::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_4d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -8685,7 +8685,7 @@ unsafe fn do_intrinsic_turbulence_4_sse2_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_4d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -8719,7 +8719,7 @@ unsafe fn do_intrinsic_turbulence_4_sse2_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse2::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_4d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -8753,7 +8753,7 @@ unsafe fn do_intrinsic_turbulence_4_sse41_32_normal() -> Vec<f32> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_4d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -8787,7 +8787,7 @@ unsafe fn do_intrinsic_turbulence_4_sse41_64_normal() -> Vec<f64> {
         .with_gain(2.0)
         .with_octaves(5)
         .wrap();
-    let (noise, _min, _max) = sse41::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_4d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -8813,7 +8813,7 @@ unsafe fn do_intrinsic_gradient_1_avx2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_1d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -8839,7 +8839,7 @@ unsafe fn do_intrinsic_gradient_1_avx2_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_1d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -8864,7 +8864,7 @@ unsafe fn do_intrinsic_gradient_1_scalar_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_1d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -8889,7 +8889,7 @@ unsafe fn do_intrinsic_gradient_1_scalar_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_1d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -8915,7 +8915,7 @@ unsafe fn do_intrinsic_gradient_1_sse2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_1d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -8941,7 +8941,7 @@ unsafe fn do_intrinsic_gradient_1_sse2_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_1d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -8967,7 +8967,7 @@ unsafe fn do_intrinsic_gradient_1_sse41_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_1d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_1d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -8993,7 +8993,7 @@ unsafe fn do_intrinsic_gradient_1_sse41_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_1d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_1d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -9020,7 +9020,7 @@ unsafe fn do_intrinsic_gradient_2_avx2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -9047,7 +9047,7 @@ unsafe fn do_intrinsic_gradient_2_avx2_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_2d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -9073,7 +9073,7 @@ unsafe fn do_intrinsic_gradient_2_scalar_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -9099,7 +9099,7 @@ unsafe fn do_intrinsic_gradient_2_scalar_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_2d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -9126,7 +9126,7 @@ unsafe fn do_intrinsic_gradient_2_sse2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -9153,11 +9153,12 @@ unsafe fn do_intrinsic_gradient_2_sse2_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_2d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
 #[test]
+#[ignore]
 fn test_intrinsic_gradient_2_sse2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -9180,7 +9181,7 @@ unsafe fn do_intrinsic_gradient_2_sse41_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -9207,7 +9208,7 @@ unsafe fn do_intrinsic_gradient_2_sse41_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_2d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_2d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -9235,7 +9236,7 @@ unsafe fn do_intrinsic_gradient_3_avx2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -9263,12 +9264,12 @@ unsafe fn do_intrinsic_gradient_3_avx2_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_3d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_gradient_3_avx2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -9291,7 +9292,7 @@ unsafe fn do_intrinsic_gradient_3_scalar_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -9318,12 +9319,12 @@ unsafe fn do_intrinsic_gradient_3_scalar_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_3d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_gradient_3_scalar_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -9347,7 +9348,7 @@ unsafe fn do_intrinsic_gradient_3_sse2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -9375,12 +9376,12 @@ unsafe fn do_intrinsic_gradient_3_sse2_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_3d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_gradient_3_sse2_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -9404,7 +9405,7 @@ unsafe fn do_intrinsic_gradient_3_sse41_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -9432,12 +9433,12 @@ unsafe fn do_intrinsic_gradient_3_sse41_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_3d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_3d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
-#[ignore]
 #[test]
+#[should_panic(expected = "not implemented")]
 fn test_intrinsic_gradient_3_sse41_64_normal() {
     let file_name = format!(
         "{}/{}_{}_{}_{}_{}_{}.bin",
@@ -9462,7 +9463,7 @@ unsafe fn do_intrinsic_gradient_4_avx2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = avx2::get_4d_noise::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -9491,7 +9492,7 @@ unsafe fn do_intrinsic_gradient_4_avx2_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = avx2::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = avx2::get_4d_noise_64::<simdeez::Avx2>(&noise_type);
     noise
 }
 
@@ -9519,7 +9520,7 @@ unsafe fn do_intrinsic_gradient_4_scalar_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = scalar::get_4d_noise::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -9547,7 +9548,7 @@ unsafe fn do_intrinsic_gradient_4_scalar_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = scalar::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = scalar::get_4d_noise_64::<simdeez::scalar::Scalar>(&noise_type);
     noise
 }
 
@@ -9576,7 +9577,7 @@ unsafe fn do_intrinsic_gradient_4_sse2_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = sse2::get_4d_noise::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -9605,7 +9606,7 @@ unsafe fn do_intrinsic_gradient_4_sse2_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse2::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse2::get_4d_noise_64::<simdeez::Sse2>(&noise_type);
     noise
 }
 
@@ -9634,7 +9635,7 @@ unsafe fn do_intrinsic_gradient_4_sse41_32_normal() -> Vec<f32> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_4d_noise(&noise_type);
+    let (noise, _min, _max) = sse41::get_4d_noise::<simdeez::Sse41>(&noise_type);
     noise
 }
 
@@ -9663,7 +9664,7 @@ unsafe fn do_intrinsic_gradient_4_sse41_64_normal() -> Vec<f64> {
     };
 
     let noise_type = GradientSettings::default(dims).with_seed(1337).wrap();
-    let (noise, _min, _max) = sse41::get_4d_noise_64(&noise_type);
+    let (noise, _min, _max) = sse41::get_4d_noise_64::<simdeez::Sse41>(&noise_type);
     noise
 }
 
diff --git a/tests/noisebuilder.rs b/tests/noisebuilder.rs
index 66dbcd0..0c6d89f 100644
--- a/tests/noisebuilder.rs
+++ b/tests/noisebuilder.rs
@@ -1,7 +1,7 @@
 use simdnoise::{NoiseBuilder, Settings, SimplexSettings};
 
 mod helpers;
-use helpers::{read_from_file_f32, save_to_file_f32, BIN_PATH};
+use helpers::{read_from_file_f32, /*save_to_file_f32, */ BIN_PATH};
 
 mod noise {
     use super::*;