From 5174060c8e4e8dd48fa4bb7a2714c4d4e497195d Mon Sep 17 00:00:00 2001 From: Simon Weinberger Date: Sun, 7 Mar 2021 21:32:59 +0100 Subject: [PATCH] (Persistent) Hash array mapped trie --- src/Cornucopia.BitOperations/BitOperations.cs | 14 ++ .../Cornucopia.DataStructures.csproj | 1 + .../Persistent/HamtDictionary.cs | 81 +++++++ .../Persistent/HashArrayMappedTrie.cs | 219 ++++++++++++++++++ 4 files changed, 315 insertions(+) create mode 100644 src/Cornucopia.DataStructures/Persistent/HamtDictionary.cs create mode 100644 src/Cornucopia.DataStructures/Persistent/HashArrayMappedTrie.cs diff --git a/src/Cornucopia.BitOperations/BitOperations.cs b/src/Cornucopia.BitOperations/BitOperations.cs index f1c50be..43fa494 100644 --- a/src/Cornucopia.BitOperations/BitOperations.cs +++ b/src/Cornucopia.BitOperations/BitOperations.cs @@ -32,5 +32,19 @@ public static int Log2(ulong value) return 32 + Log2(hi); } + + public static int PopCount(uint value) + { + const uint c1 = 0x_55555555u; + const uint c2 = 0x_33333333u; + const uint c3 = 0x_0F0F0F0Fu; + const uint c4 = 0x_01010101u; + + value -= (value >> 1) & c1; + value = (value & c2) + ((value >> 2) & c2); + value = (((value + (value >> 4)) & c3) * c4) >> 24; + + return (int) value; + } } } \ No newline at end of file diff --git a/src/Cornucopia.DataStructures/Cornucopia.DataStructures.csproj b/src/Cornucopia.DataStructures/Cornucopia.DataStructures.csproj index db03b9e..c205e45 100644 --- a/src/Cornucopia.DataStructures/Cornucopia.DataStructures.csproj +++ b/src/Cornucopia.DataStructures/Cornucopia.DataStructures.csproj @@ -16,6 +16,7 @@ runtime; build; native; contentfiles; analyzers; buildtransitive + diff --git a/src/Cornucopia.DataStructures/Persistent/HamtDictionary.cs b/src/Cornucopia.DataStructures/Persistent/HamtDictionary.cs new file mode 100644 index 0000000..6cd784c --- /dev/null +++ b/src/Cornucopia.DataStructures/Persistent/HamtDictionary.cs @@ -0,0 +1,81 @@ +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; + +namespace Cornucopia.DataStructures.Persistent +{ + public struct HamtDictionary + { + private HashArrayMappedTrie, KeyComparer> _trie; + + public HamtDictionary(IEqualityComparer keyComparer) + { + this._trie = new(new(keyComparer)); + this.Count = 0; + } + + public int Count { get; private set; } + + public bool ContainsKey(TKey key) + { + return this._trie.Contains(new(key, default!)); + } + + public bool TryGetValue(TKey key, [MaybeNullWhen(false)] out TValue value) + { + var pair = new KeyValuePair(key, default!); + if (this._trie.TryGetFirst(ref pair)) + { + value = pair.Value; + return true; + } + + value = default; + return false; + } + + public TValue this[TKey key] + { + get + { + var pair = new KeyValuePair(key, default!); + if (this._trie.TryGetFirst(ref pair)) + { + return pair.Value; + } + + throw new KeyNotFoundException(); + } + set + { + var pair = new KeyValuePair(key, value); + if (this._trie.Contains(pair)) + { + return; + } + + this._trie.Add(pair); + this.Count++; + } + } + + private readonly struct KeyComparer : IEqualityComparer> + { + private readonly IEqualityComparer _keyComparer; + + public KeyComparer(IEqualityComparer keyComparer) + { + this._keyComparer = keyComparer; + } + + public bool Equals(KeyValuePair x, KeyValuePair y) + { + return this._keyComparer.Equals(x.Key, y.Key); + } + + public int GetHashCode(KeyValuePair obj) + { + return obj.Key is null ? 0 : this._keyComparer.GetHashCode(obj.Key); + } + } + } +} \ No newline at end of file diff --git a/src/Cornucopia.DataStructures/Persistent/HashArrayMappedTrie.cs b/src/Cornucopia.DataStructures/Persistent/HashArrayMappedTrie.cs new file mode 100644 index 0000000..1043675 --- /dev/null +++ b/src/Cornucopia.DataStructures/Persistent/HashArrayMappedTrie.cs @@ -0,0 +1,219 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +namespace Cornucopia.DataStructures.Persistent +{ + public readonly struct HashArrayMappedTrie + where TComparer : struct, IEqualityComparer + { + private readonly TComparer _comparer; + private readonly HashEntry[] _mainArray; + private readonly uint _mainMap; + + public HashArrayMappedTrie(TComparer comparer) + { + this._comparer = comparer; +#if NETCOREAPP3_1 + this._mainArray = Array.Empty(); +#else + this._mainArray = new HashEntry[0]; +#endif + this._mainMap = 0; + } + + private HashArrayMappedTrie(TComparer comparer, HashEntry[] mainArray, uint mainMap) + { + this._comparer = comparer; + this._mainArray = mainArray; + this._mainMap = mainMap; + } + + public HashArrayMappedTrie Add(T item) + { + var hashCode = this.GetHashCode(item); + var (map, array) = this.Add(this._mainArray, this._mainMap, item, hashCode, 0); + return new(this._comparer, array, map); + } + + public bool Contains(T item) + { + var hashCode = this.GetHashCode(item); + var node = this.Find(hashCode); + if (node == null) + { + return false; + } + + if (node is T singleValue) + { + return this._comparer.Equals(singleValue, item); + } + + var array = (T[]) node; + foreach (var value in array) + { + if (this._comparer.Equals(value, item)) + { + return true; + } + } + + return false; + } + + public bool TryGetFirst(ref T item) + { + var hashCode = this.GetHashCode(item); + var node = this.Find(hashCode); + if (node == null) + { + return false; + } + + if (node is T singleValue) + { + if (this._comparer.Equals(singleValue, item)) + { + item = singleValue; + return true; + } + + return false; + } + + var array = (T[]) node; + foreach (var value in array) + { + if (this._comparer.Equals(value, item)) + { + item = value; + return true; + } + } + + return false; + } + + private uint GetHashCode(T item) + { + return item is null ? 0 : (uint) this._comparer.GetHashCode(item); + } + + private static int? GetIndex(uint map, uint hashCode) + { + var bit = 1u << (int) hashCode; + if ((map & bit) == 0) + { + return null; + } + + return BitOperations.PopCount(map & (bit - 1)); + } + + private static int GetInsertIndex(ref uint map, uint hashCode) + { + var bit = 1u << (int) hashCode; + map |= bit; + return BitOperations.PopCount(map & (bit - 1)); + } + + private (uint map, HashEntry[] array) Add(HashEntry[] array, uint map, in T item, uint hashCode, int shift) + { + var idx = GetIndex(map, hashCode >> shift); + if (!idx.HasValue) + { + var insertIndex = GetInsertIndex(ref map, hashCode >> shift); + var newTable = new HashEntry[array.Length + 1]; + Array.Copy(array, 0, newTable, 0, insertIndex); + newTable[insertIndex] = new HashEntry(hashCode, item); + Array.Copy(array, insertIndex, newTable, insertIndex + 1, array.Length - insertIndex); + return (map, newTable); + } + + ref var entry = ref array[idx.Value]; + if (entry.SubHashTableOrLeaf is HashEntry[] subHashTable) + { + var newEntry = this.Add(subHashTable, entry.MapOrHash, item, hashCode, shift + 5); + var copy = array.ToArray(); + copy[idx.Value] = new HashEntry(newEntry.map, newEntry.array); + return (map, copy); + } + + if (entry.SubHashTableOrLeaf is T[] multiLeaf) + { + if (entry.MapOrHash == hashCode) + { + var newEntries = new T[multiLeaf.Length + 1]; + Array.Copy(multiLeaf, newEntries, multiLeaf.Length); + newEntries[multiLeaf.Length] = item; + var copy = array.ToArray(); + copy[idx.Value] = new HashEntry(hashCode, newEntries); + return (map, copy); + } + } + else + { + if (entry.MapOrHash == hashCode) + { + var newEntries = new[] { (T) entry.SubHashTableOrLeaf!, item }; + var copy = array.ToArray(); + copy[idx.Value] = new HashEntry(hashCode, newEntries); + return (map, copy); + } + } + + { + subHashTable = new[] { entry }; + var subMap = 1u << (int) (entry.MapOrHash >> shift); + var newEntry = this.Add(subHashTable, subMap, item, hashCode, shift + 5); + var copy = array.ToArray(); + copy[idx.Value] = new HashEntry(newEntry.map, newEntry.array); + return (map, copy); + } + } + + private object? Find(uint hashCode) + { + var array = this._mainArray; + var map = this._mainMap; + + while (true) + { + var index = GetIndex(map, hashCode); + if (!index.HasValue) + { + return null; + } + + var node = array[index.Value]; + if (node.SubHashTableOrLeaf is HashEntry[] subHashTable) + { + array = subHashTable; + map = node.MapOrHash; + continue; + } + + if (node.MapOrHash == hashCode) + { + return node.SubHashTableOrLeaf; + } + + return null; + } + } + + private readonly struct HashEntry + { + public HashEntry(uint mapOrHash, object? subHashTableOrLeaf) + { + this.MapOrHash = mapOrHash; + this.SubHashTableOrLeaf = subHashTableOrLeaf; + } + + public uint MapOrHash { get; } + public object? SubHashTableOrLeaf { get; } + } + } +}