From 32dbf7174b13f7771e6ba81b7bf869838b723644 Mon Sep 17 00:00:00 2001 From: Jidian Shan Date: Thu, 31 Oct 2024 10:00:55 +0800 Subject: [PATCH 1/2] fix use nil parameter, the MinMaxBig func will throw panic's problem --- roaring64/bsi64.go | 4 ++++ roaring64/bsi64_test.go | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/roaring64/bsi64.go b/roaring64/bsi64.go index 255549ab..173adc5f 100644 --- a/roaring64/bsi64.go +++ b/roaring64/bsi64.go @@ -509,6 +509,10 @@ func (b *BSI) MinMaxBig(parallelism int, op Operation, foundSet *Bitmap) *big.In resultsChan := make(chan *big.Int, n) + if foundSet == nil { + foundSet = &b.eBM + } + card := foundSet.GetCardinality() x := card / uint64(n) diff --git a/roaring64/bsi64_test.go b/roaring64/bsi64_test.go index 01a3492c..5ec68dc3 100644 --- a/roaring64/bsi64_test.go +++ b/roaring64/bsi64_test.go @@ -649,6 +649,12 @@ func TestMinMaxWithRandom(t *testing.T) { assert.Equal(t, max, bsi.MinMax(0, MAX, bsi.GetExistenceBitmap())) } +func TestMinMaxWithNilFoundSet(t *testing.T) { + bsi, min, max := setupRandom() + assert.Equal(t, min, bsi.MinMax(0, MIN, nil)) + assert.Equal(t, max, bsi.MinMax(0, MAX, nil)) +} + func TestBSIWriteToReadFrom(t *testing.T) { file, err := ioutil.TempFile("./testdata", "bsi-test") if err != nil { From 75f54f85537619ff43a9edeb303d2932433b4f08 Mon Sep 17 00:00:00 2001 From: Jidian Shan Date: Fri, 1 Nov 2024 10:50:25 +0800 Subject: [PATCH 2/2] fix all nil pointer error in bsi functions --- BitSliceIndexing/bsi.go | 19 +++++++++++--- BitSliceIndexing/bsi_test.go | 25 ++++++++++++++++++ roaring64/bsi64.go | 25 +++++++++++++++--- roaring64/bsi64_test.go | 49 ++++++++++++++++++++++++++++++++++++ 4 files changed, 112 insertions(+), 6 deletions(-) diff --git a/BitSliceIndexing/bsi.go b/BitSliceIndexing/bsi.go index bda0c93b..5487edbf 100644 --- a/BitSliceIndexing/bsi.go +++ b/BitSliceIndexing/bsi.go @@ -435,6 +435,10 @@ func (b *BSI) MinMax(parallelism int, op Operation, foundSet *roaring.Bitmap) in resultsChan := make(chan int64, n) + if foundSet == nil { + foundSet = b.eBM + } + card := foundSet.GetCardinality() x := card / uint64(n) @@ -545,7 +549,9 @@ func (b *BSI) minOrMax(op Operation, batch []uint32, resultsChan chan int64, wg // Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet // is also returned (for calculating the average). func (b *BSI) Sum(foundSet *roaring.Bitmap) (sum int64, count uint64) { - + if foundSet == nil { + foundSet = b.eBM + } count = foundSet.GetCardinality() var wg sync.WaitGroup for i := 0; i < b.BitCount(); i++ { @@ -569,7 +575,9 @@ func (b *BSI) Transpose() *roaring.Bitmap { // the column IDs in the source (foundSet) as compared with this BSI. This can be useful for // vectoring one set of integers to another. func (b *BSI) IntersectAndTranspose(parallelism int, foundSet *roaring.Bitmap) *roaring.Bitmap { - + if foundSet == nil { + foundSet = b.eBM + } trans := &task{bsi: b} return parallelExecutor(parallelism, trans, transpose, foundSet) } @@ -820,7 +828,9 @@ func (b *BSI) addDigit(foundSet *roaring.Bitmap, i int) { // is useful for situations where there is a one-to-many relationship between the vectored integer sets. The resulting BSI // contains the number of times a particular value appeared in the input BSI as an integer count. func (b *BSI) TransposeWithCounts(parallelism int, foundSet *roaring.Bitmap) *BSI { - + if foundSet == nil { + foundSet = b.eBM + } return parallelExecutorBSIResults(parallelism, b, transposeWithCounts, foundSet, true) } @@ -847,6 +857,9 @@ func transposeWithCounts(input *BSI, batch []uint32, resultsChan chan *BSI, wg * // Increment - In-place increment of values in a BSI. Found set select columns for incrementing. func (b *BSI) Increment(foundSet *roaring.Bitmap) { + if foundSet == nil { + foundSet = b.eBM + } b.addDigit(foundSet, 0) b.eBM.Or(foundSet) } diff --git a/BitSliceIndexing/bsi_test.go b/BitSliceIndexing/bsi_test.go index 9af7f6fe..a28a5d4e 100644 --- a/BitSliceIndexing/bsi_test.go +++ b/BitSliceIndexing/bsi_test.go @@ -475,3 +475,28 @@ func TestIssue426(t *testing.T) { fmt.Println(bitmap.ToArray()) assert.Equal(t, uint64(0), bitmap.GetCardinality()) } + +func TestMinMaxWithNil(t *testing.T) { + bsi := setupRandom() + assert.Equal(t, bsi.MinValue, bsi.MinMax(0, MIN, nil)) + assert.Equal(t, bsi.MaxValue, bsi.MinMax(0, MAX, nil)) +} + +func TestSumWithNil(t *testing.T) { + + bsi := setup() + + sum, count := bsi.Sum(bsi.GetExistenceBitmap()) + sumNil, countNil := bsi.Sum(nil) + assert.Equal(t, countNil, count) + assert.Equal(t, sumNil, sum) +} + +func TestTransposeWithCountsNil(t *testing.T) { + bsi := setup() + bsi.SetValue(101, 50) + transposed := bsi.TransposeWithCounts(0, nil) + a, ok := transposed.GetValue(uint64(50)) + assert.True(t, ok) + assert.Equal(t, int64(2), a) +} diff --git a/roaring64/bsi64.go b/roaring64/bsi64.go index 173adc5f..a7e4d7fb 100644 --- a/roaring64/bsi64.go +++ b/roaring64/bsi64.go @@ -312,6 +312,13 @@ func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int6 func (b *BSI) CompareBigValue(parallelism int, op Operation, valueOrStart, end *big.Int, foundSet *Bitmap) *Bitmap { + if valueOrStart == nil { + valueOrStart = b.MinMaxBig(parallelism, MIN, &b.eBM) + } + if end == nil { + end = b.MinMaxBig(parallelism, MAX, &b.eBM) + } + comp := &task{bsi: b, op: op, valueOrStart: valueOrStart, end: end} if foundSet == nil { return parallelExecutor(parallelism, comp, compareValue, &b.eBM) @@ -656,7 +663,9 @@ func (b *BSI) Sum(foundSet *Bitmap) (int64, uint64) { // SumBigValues - Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet // is also returned (for calculating the average). This method will sum arbitrarily large values. func (b *BSI) SumBigValues(foundSet *Bitmap) (sum *big.Int, count uint64) { - + if foundSet == nil { + foundSet = &b.eBM + } sum = new(big.Int) count = foundSet.GetCardinality() resultsChan := make(chan int64, b.BitCount()) @@ -691,7 +700,9 @@ func (b *BSI) Transpose() *Bitmap { // // TODO: This implementation is functional but not performant, needs to be re-written perhaps using SIMD SSE2 instructions. func (b *BSI) IntersectAndTranspose(parallelism int, foundSet *Bitmap) *Bitmap { - + if foundSet == nil { + foundSet = &b.eBM + } trans := &task{bsi: b} return parallelExecutor(parallelism, trans, transpose, foundSet) } @@ -1009,7 +1020,12 @@ func (b *BSI) addDigit(foundSet *Bitmap, i int) { // is useful for situations where there is a one-to-many relationship between the vectored integer sets. The resulting BSI // contains the number of times a particular value appeared in the input BSI. func (b *BSI) TransposeWithCounts(parallelism int, foundSet, filterSet *Bitmap) *BSI { - + if foundSet == nil { + foundSet = &b.eBM + } + if filterSet == nil { + filterSet = &b.eBM + } return parallelExecutorBSIResults(parallelism, b, transposeWithCounts, foundSet, filterSet, true) } @@ -1039,6 +1055,9 @@ func transposeWithCounts(input *BSI, filterSet *Bitmap, batch []uint64, resultsC // Increment - In-place increment of values in a BSI. Found set select columns for incrementing. func (b *BSI) Increment(foundSet *Bitmap) { + if foundSet == nil { + foundSet = &b.eBM + } b.addDigit(foundSet, 0) b.eBM.Or(foundSet) } diff --git a/roaring64/bsi64_test.go b/roaring64/bsi64_test.go index 5ec68dc3..4e0d2a5b 100644 --- a/roaring64/bsi64_test.go +++ b/roaring64/bsi64_test.go @@ -770,3 +770,52 @@ func TestMutatedBsiEquality(t *testing.T) { fresh.SetValue(0, 1) assert.True(t, fresh.Equals(mutated)) } + +func TestSumWithNil(t *testing.T) { + bsi := setupNegativeBoundary() + assert.Equal(t, uint64(11), bsi.GetCardinality()) + sum, cnt := bsi.Sum(nil) + assert.Equal(t, uint64(11), cnt) + assert.Equal(t, int64(0), sum) +} + +func TestTransposeWithCountsNil(t *testing.T) { + bsi := setup() + bsi.SetValue(101, 50) + transposed := bsi.TransposeWithCounts(0, nil, nil) + a, ok := transposed.GetValue(uint64(50)) + assert.True(t, ok) + assert.Equal(t, int64(2), a) + a, ok = transposed.GetValue(uint64(49)) + assert.True(t, ok) + assert.Equal(t, int64(1), a) +} + +func TestRangeNilBig(t *testing.T) { + + bsi := NewDefaultBSI() + + // Populate large timestamp values + for i := 0; i <= 100; i++ { + t := time.Now() + newTime := t.AddDate(1000, 0, 0) // Add 1000 years + secs := int64(newTime.UnixMilli() / 1000) + nano := int32(newTime.Nanosecond()) + bigTime := secondsAndNanosToBigInt(secs, nano) + bsi.SetBigValue(uint64(i), bigTime) + } + + start, _ := bsi.GetBigValue(uint64(45)) // starting value at columnID 45 + end, _ := bsi.GetBigValue(uint64(55)) // ending value at columnID 55 + setStart := bsi.CompareBigValue(0, RANGE, nil, end, nil) + tmpStart := bsi.CompareBigValue(0, RANGE, bsi.MinMaxBig(0, MIN, nil), end, nil) + assert.Equal(t, tmpStart.GetCardinality(), setStart.GetCardinality()) + + setEnd := bsi.CompareBigValue(0, RANGE, start, nil, nil) + tmpEnd := bsi.CompareBigValue(0, RANGE, start, bsi.MinMaxBig(0, MAX, nil), nil) + assert.Equal(t, tmpEnd.GetCardinality(), setEnd.GetCardinality()) + + setAll := bsi.CompareBigValue(0, RANGE, nil, nil, nil) + tmpAll := bsi.CompareBigValue(0, RANGE, bsi.MinMaxBig(0, MIN, nil), bsi.MinMaxBig(0, MAX, nil), nil) + assert.Equal(t, tmpAll.GetCardinality(), setAll.GetCardinality()) +}