diff --git a/BitSliceIndexing/bsi.go b/BitSliceIndexing/bsi.go index de24d444..350f77be 100644 --- a/BitSliceIndexing/bsi.go +++ b/BitSliceIndexing/bsi.go @@ -1,6 +1,7 @@ package roaring import ( + "fmt" "github.com/RoaringBitmap/roaring" "math/bits" "runtime" @@ -28,7 +29,11 @@ type BSI struct { // then the underlying BSI will be automatically sized. func NewBSI(maxValue int64, minValue int64) *BSI { - ba := make([]*roaring.Bitmap, bits.Len64(uint64(maxValue))) + bitsz := bits.Len64(uint64(minValue)) + if bits.Len64(uint64(maxValue)) > bitsz { + bitsz = bits.Len64(uint64(maxValue)) + } + ba := make([]*roaring.Bitmap, bitsz) for i := 0; i < len(ba); i++ { ba[i] = roaring.NewBitmap() } @@ -267,19 +272,42 @@ func compareValue(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg results.RunOptimize() } + x := e.bsi.BitCount() + startIsNegative := x == 64 && uint64(e.valueOrStart)&(1< 0 + endIsNegative := x == 64 && uint64(e.end)&(1< 0 + for i := 0; i < len(batch); i++ { cID := batch[i] eq1, eq2 := true, true lt1, lt2, gt1 := false, false, false - for j := e.bsi.BitCount() - 1; j >= 0; j-- { + j := e.bsi.BitCount() - 1 + isNegative := false + if x == 64 { + isNegative = e.bsi.bA[j].Contains(cID) + j-- + } + compStartValue := e.valueOrStart + compEndValue := e.end + if isNegative != startIsNegative { + compStartValue = ^e.valueOrStart + 1 + } + if isNegative != endIsNegative { + compEndValue = ^e.end + 1 + } + for ; j >= 0; j-- { sliceContainsBit := e.bsi.bA[j].Contains(cID) - if uint64(e.valueOrStart)&(1< 0 { + if uint64(compStartValue)&(1< 0 { // BIT in value is SET if !sliceContainsBit { if eq1 { + if (e.op == GT || e.op == GE || e.op == RANGE) && startIsNegative && !isNegative { + gt1 = true + } if e.op == LT || e.op == LE { - lt1 = true + if !startIsNegative || (startIsNegative == isNegative) { + lt1 = true + } } eq1 = false break @@ -289,8 +317,13 @@ func compareValue(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg // BIT in value is CLEAR if sliceContainsBit { if eq1 { + if (e.op == LT || e.op == LE) && isNegative && !startIsNegative { + lt1 = true + } if e.op == GT || e.op == GE || e.op == RANGE { - gt1 = true + if startIsNegative || (startIsNegative == isNegative) { + gt1 = true + } } eq1 = false if e.op != RANGE { @@ -300,23 +333,31 @@ func compareValue(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg } } - if e.op == RANGE && uint64(e.end)&(1< 0 { + if e.op == RANGE && uint64(compEndValue)&(1< 0 { // BIT in value is SET if !sliceContainsBit { if eq2 { - lt2 = true + if !endIsNegative || (endIsNegative == isNegative) { + lt2 = true + } eq2 = false + if startIsNegative && !endIsNegative { + break + } } } - } else { + } else if e.op == RANGE { // BIT in value is CLEAR if sliceContainsBit { if eq2 { + if isNegative && !endIsNegative { + lt2 = true + } eq2 = false + break } } } - } switch e.op { @@ -325,7 +366,7 @@ func compareValue(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg results.Add(cID) } case LE: - if eq1 || lt1 { + if lt1 || (eq1 && (!startIsNegative || (startIsNegative && isNegative))) { results.Add(cID) } case EQ: @@ -333,7 +374,7 @@ func compareValue(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg results.Add(cID) } case GE: - if eq1 || gt1 { + if gt1 || (eq1 && (startIsNegative || (!startIsNegative && !isNegative))) { results.Add(cID) } case GT: @@ -345,9 +386,7 @@ func compareValue(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg results.Add(cID) } default: - if eq1 { - results.Add(cID) - } + panic(fmt.Sprintf("Unknown operation [%v]", e.op)) } } diff --git a/BitSliceIndexing/bsi_test.go b/BitSliceIndexing/bsi_test.go index 93f21586..d16e78f9 100644 --- a/BitSliceIndexing/bsi_test.go +++ b/BitSliceIndexing/bsi_test.go @@ -1,7 +1,7 @@ package roaring import ( - _ "fmt" + _ "fmt" "github.com/RoaringBitmap/roaring" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -31,8 +31,35 @@ func setup() *BSI { return bsi } -func TestEQ(t *testing.T) { +func setupNegativeBoundary() *BSI { + + bsi := NewBSI(5, -5) + // Setup values + for i := int(bsi.MinValue); i <= int(bsi.MaxValue); i++ { + bsi.SetValue(uint64(i), int64(i)) + } + return bsi +} +func setupAllNegative() *BSI { + bsi := NewBSI(-1, -100) + // Setup values + for i := int(bsi.MinValue); i <= int(bsi.MaxValue); i++ { + bsi.SetValue(uint64(i), int64(i)) + } + return bsi +} + +func setupAutoSizeNegativeBoundary() *BSI { + bsi := NewDefaultBSI() + // Setup values + for i := int(-5); i <= int(5); i++ { + bsi.SetValue(uint64(i), int64(i)) + } + return bsi +} + +func TestEQ(t *testing.T) { bsi := setup() eq := bsi.CompareValue(0, EQ, 50, 0, nil) assert.Equal(t, uint64(1), eq.GetCardinality()) @@ -288,3 +315,72 @@ func TestTransposeWithCounts(t *testing.T) { assert.True(t, ok) assert.Equal(t, int64(2), a) } + +func TestRangeAllNegative(t *testing.T) { + bsi := setupAllNegative() + assert.Equal(t, uint64(100), bsi.GetCardinality()) + set := bsi.CompareValue(0, RANGE, -55, -45, nil) + assert.Equal(t, uint64(11), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + val, _ := bsi.GetValue(uint64(i.Next())) + assert.GreaterOrEqual(t, val, int64(-55)) + assert.LessOrEqual(t, val, int64(-45)) + } +} + +func TestSumWithNegative(t *testing.T) { + bsi := setupNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + sum, cnt := bsi.Sum(bsi.GetExistenceBitmap()) + assert.Equal(t, uint64(11), cnt) + assert.Equal(t, int64(0), sum) +} + +func TestGEWithNegative(t *testing.T) { + bsi := setupNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + set := bsi.CompareValue(0, GE, 3, 0, nil) + assert.Equal(t, uint64(3), set.GetCardinality()) + set = bsi.CompareValue(0, GE, -3, 0, nil) + assert.Equal(t, uint64(9), set.GetCardinality()) +} + +func TestLEWithNegative(t *testing.T) { + bsi := setupNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + set := bsi.CompareValue(0, LE, -3, 0, nil) + assert.Equal(t, uint64(3), set.GetCardinality()) + set = bsi.CompareValue(0, LE, 3, 0, nil) + assert.Equal(t, uint64(9), set.GetCardinality()) +} + +func TestRangeWithNegative(t *testing.T) { + bsi := setupNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + set := bsi.CompareValue(0, RANGE, -3, 3, nil) + assert.Equal(t, uint64(7), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + val, _ := bsi.GetValue(uint64(i.Next())) + assert.GreaterOrEqual(t, val, int64(-3)) + assert.LessOrEqual(t, val, int64(3)) + } +} + +func TestAutoSizeWithNegative(t *testing.T) { + bsi := setupAutoSizeNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + assert.Equal(t, 64, bsi.BitCount()) + set := bsi.CompareValue(0, RANGE, -3, 3, nil) + assert.Equal(t, uint64(7), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + val, _ := bsi.GetValue(uint64(i.Next())) + assert.GreaterOrEqual(t, val, int64(-3)) + assert.LessOrEqual(t, val, int64(3)) + } +} diff --git a/roaring64/bsi64.go b/roaring64/bsi64.go index c7961915..24ae60c6 100644 --- a/roaring64/bsi64.go +++ b/roaring64/bsi64.go @@ -1,6 +1,7 @@ package roaring64 import ( + "fmt" "math/bits" "runtime" "sync" @@ -27,7 +28,11 @@ type BSI struct { // then the underlying BSI will be automatically sized. func NewBSI(maxValue int64, minValue int64) *BSI { - ba := make([]*Bitmap, bits.Len64(uint64(maxValue))) + bitsz := bits.Len64(uint64(minValue)) + if bits.Len64(uint64(maxValue)) > bitsz { + bitsz = bits.Len64(uint64(maxValue)) + } + ba := make([]*Bitmap, bitsz) for i := 0; i < len(ba); i++ { ba[i] = NewBitmap() } @@ -266,53 +271,90 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa results.RunOptimize() } + x := e.bsi.BitCount() + startIsNegative := x == 64 && uint64(e.valueOrStart)&(1< 0 + endIsNegative := x == 64 && uint64(e.end)&(1< 0 + for i := 0; i < len(batch); i++ { cID := batch[i] eq1, eq2 := true, true lt1, lt2, gt1 := false, false, false - for j := e.bsi.BitCount() - 1; j >= 0; j-- { + j := e.bsi.BitCount() - 1 + isNegative := false + if x == 64 { + isNegative = e.bsi.bA[j].Contains(cID) + j-- + } + compStartValue := e.valueOrStart + compEndValue := e.end + if isNegative != startIsNegative { + compStartValue = ^e.valueOrStart + 1 + } + if isNegative != endIsNegative { + compEndValue = ^e.end + 1 + } + for ; j >= 0; j-- { sliceContainsBit := e.bsi.bA[j].Contains(cID) - if uint64(e.valueOrStart)&(1< 0 { + if uint64(compStartValue)&(1< 0 { // BIT in value is SET if !sliceContainsBit { - if eq1 { - if e.op == LT || e.op == LE { - lt1 = true - } - eq1 = false - break - } + if eq1 { + if (e.op == GT || e.op == GE || e.op == RANGE) && startIsNegative && !isNegative { + gt1 = true + } + if e.op == LT || e.op == LE { + if !startIsNegative || (startIsNegative == isNegative) { + lt1 = true + } + } + eq1 = false + break + } } } else { // BIT in value is CLEAR if sliceContainsBit { - if eq1 { - if e.op == GT || e.op == GE || e.op == RANGE { - gt1 = true - } - eq1 = false - if e.op != RANGE { - break - } - } + if eq1 { + if (e.op == LT || e.op == LE) && isNegative && !startIsNegative { + lt1 = true + } + if e.op == GT || e.op == GE || e.op == RANGE { + if startIsNegative || (startIsNegative == isNegative) { + gt1 = true + } + } + eq1 = false + if e.op != RANGE { + break + } + } } } - if e.op == RANGE && uint64(e.end)&(1< 0 { - // BIT in value is SET - if !sliceContainsBit { - if eq2 { - lt2 = true - eq2 = false - } - } - } else { + if e.op == RANGE && uint64(compEndValue)&(1< 0 { + // BIT in value is SET + if !sliceContainsBit { + if eq2 { + if !endIsNegative || (endIsNegative == isNegative) { + lt2 = true + } + eq2 = false + if startIsNegative && !endIsNegative { + break + } + } + } + } else if e.op == RANGE { // BIT in value is CLEAR if sliceContainsBit { - if eq2 { - eq2 = false - } + if eq2 { + if isNegative && !endIsNegative { + lt2 = true + } + eq2 = false + break + } } } @@ -324,7 +366,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa results.Add(cID) } case LE: - if eq1 || lt1 { + if lt1 || (eq1 && (!startIsNegative || (startIsNegative && isNegative))) { results.Add(cID) } case EQ: @@ -332,7 +374,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa results.Add(cID) } case GE: - if eq1 || gt1 { + if gt1 || (eq1 && (startIsNegative || (!startIsNegative && !isNegative))) { results.Add(cID) } case GT: @@ -344,9 +386,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa results.Add(cID) } default: - if eq1 { - results.Add(cID) - } + panic(fmt.Sprintf("Unknown operation [%v]", e.op)) } } diff --git a/roaring64/bsi64_test.go b/roaring64/bsi64_test.go index cdffeb71..12826cc0 100644 --- a/roaring64/bsi64_test.go +++ b/roaring64/bsi64_test.go @@ -29,6 +29,34 @@ func setup() *BSI { return bsi } +func setupNegativeBoundary() *BSI { + + bsi := NewBSI(5, -5) + // Setup values + for i := int(bsi.MinValue); i <= int(bsi.MaxValue); i++ { + bsi.SetValue(uint64(i), int64(i)) + } + return bsi +} + +func setupAllNegative() *BSI { + bsi := NewBSI(-1, -100) + // Setup values + for i := int(bsi.MinValue); i <= int(bsi.MaxValue); i++ { + bsi.SetValue(uint64(i), int64(i)) + } + return bsi +} + +func setupAutoSizeNegativeBoundary() *BSI { + bsi := NewDefaultBSI() + // Setup values + for i := int(-5); i <= int(5); i++ { + bsi.SetValue(uint64(i), int64(i)) + } + return bsi +} + func TestEQ(t *testing.T) { bsi := setup() @@ -292,3 +320,73 @@ func TestTransposeWithCounts(t *testing.T) { assert.True(t, ok) assert.Equal(t, int64(2), a) } + +func TestRangeAllNegative(t *testing.T) { + bsi := setupAllNegative() + assert.Equal(t, uint64(100), bsi.GetCardinality()) + set := bsi.CompareValue(0, RANGE, -55, -45, nil) + assert.Equal(t, uint64(11), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + val, _ := bsi.GetValue(uint64(i.Next())) + assert.GreaterOrEqual(t, val, int64(-55)) + assert.LessOrEqual(t, val, int64(-45)) + } +} + +func TestSumWithNegative(t *testing.T) { + bsi := setupNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + sum, cnt := bsi.Sum(bsi.GetExistenceBitmap()) + assert.Equal(t, uint64(11), cnt) + assert.Equal(t, int64(0), sum) +} + +func TestGEWithNegative(t *testing.T) { + + bsi := setupNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + set := bsi.CompareValue(0, GE, 3, 0, nil) + assert.Equal(t, uint64(3), set.GetCardinality()) + set = bsi.CompareValue(0, GE, -3, 0, nil) + assert.Equal(t, uint64(9), set.GetCardinality()) +} + +func TestLEWithNegative(t *testing.T) { + bsi := setupNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + set := bsi.CompareValue(0, LE, -3, 0, nil) + assert.Equal(t, uint64(3), set.GetCardinality()) + set = bsi.CompareValue(0, LE, 3, 0, nil) + assert.Equal(t, uint64(9), set.GetCardinality()) +} + +func TestRangeWithNegative(t *testing.T) { + bsi := setupNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + set := bsi.CompareValue(0, RANGE, -3, 3, nil) + assert.Equal(t, uint64(7), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + val, _ := bsi.GetValue(uint64(i.Next())) + assert.GreaterOrEqual(t, val, int64(-3)) + assert.LessOrEqual(t, val, int64(3)) + } +} + +func TestAutoSizeWithNegative(t *testing.T) { + bsi := setupAutoSizeNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + assert.Equal(t, 64, bsi.BitCount()) + set := bsi.CompareValue(0, RANGE, -3, 3, nil) + assert.Equal(t, uint64(7), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + val, _ := bsi.GetValue(uint64(i.Next())) + assert.GreaterOrEqual(t, val, int64(-3)) + assert.LessOrEqual(t, val, int64(3)) + } +}