diff --git a/src/sketch.cpp b/src/sketch.cpp index 4809e19d..4f8484a7 100644 --- a/src/sketch.cpp +++ b/src/sketch.cpp @@ -91,6 +91,45 @@ void Sketch::zero_contents() { reset_sample_state(); } +#if 1 +SketchSample Sketch::sample() { + if (sample_idx >= num_samples) { + throw OutOfSamplesException(seed, num_samples, sample_idx); + } + + size_t idx = sample_idx++; + size_t first_column = idx * cols_per_sample; + + if (Bucket_Boruvka::is_empty(buckets[num_buckets - 1])) + return {0, ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return + + if (Bucket_Boruvka::is_good(buckets[num_buckets - 1], checksum_seed())) + return {buckets[num_buckets - 1].alpha, GOOD}; + + for (size_t col = first_column; col < first_column + cols_per_sample; ++col) { + // start from the bottom of the column and iterate up until non-empty found + int prev = bkt_per_col - 1; + int row = prev - 4; + while (Bucket_Boruvka::is_empty(buckets[col * bkt_per_col + row]) && row > 0) { + prev = row; + row -= 4; + } + row = prev; + while (Bucket_Boruvka::is_empty(buckets[col * bkt_per_col + row]) && row > 0) { + --row; + } + + // now that we've found a non-zero bucket check next if next 4 buckets good + int stop = std::max(row - 5, 0); + for (; row >= stop; row--) { + if (Bucket_Boruvka::is_good(buckets[col * bkt_per_col + row], checksum_seed())) + return {buckets[col * bkt_per_col + row].alpha, GOOD}; + } + } + return {0, FAIL}; +} +#else +#if 1 SketchSample Sketch::sample() { if (sample_idx >= num_samples) { throw OutOfSamplesException(seed, num_samples, sample_idx); @@ -113,7 +152,7 @@ SketchSample Sketch::sample() { } // now that we've found a non-zero bucket check next if next 4 buckets good - int stop = std::max(row - 4, 0); + int stop = std::max(row - 5, 0); for (; row >= stop; row--) { if (Bucket_Boruvka::is_good(buckets[col * bkt_per_col + row], checksum_seed())) return {buckets[col * bkt_per_col + row].alpha, GOOD}; @@ -121,6 +160,31 @@ SketchSample Sketch::sample() { } return {0, FAIL}; } +#else +SketchSample Sketch::sample() { + if (sample_idx >= num_samples) { + throw OutOfSamplesException(seed, num_samples, sample_idx); + } + + size_t idx = sample_idx++; + size_t first_column = idx * cols_per_sample; + + if (Bucket_Boruvka::is_empty(buckets[num_buckets - 1])) + return {0, ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return + + if (Bucket_Boruvka::is_good(buckets[num_buckets - 1], checksum_seed())) + return {buckets[num_buckets - 1].alpha, GOOD}; + + for (size_t col = first_column; col < first_column + cols_per_sample; ++col) { + for (size_t row = 0; row < bkt_per_col; row++) { + if (Bucket_Boruvka::is_good(buckets[col * bkt_per_col + row], checksum_seed())) + return {buckets[col * bkt_per_col + row].alpha, GOOD}; + } + } + return {0, FAIL}; +} +#endif +#endif ExhaustiveSketchSample Sketch::exhaustive_sample() { if (sample_idx >= num_samples) { diff --git a/tools/benchmark/graphcc_bench.cpp b/tools/benchmark/graphcc_bench.cpp index 4759cc4d..778ea1e7 100644 --- a/tools/benchmark/graphcc_bench.cpp +++ b/tools/benchmark/graphcc_bench.cpp @@ -250,7 +250,7 @@ static void BM_Sketch_Update(benchmark::State& state) { BENCHMARK(BM_Sketch_Update)->RangeMultiplier(4)->Ranges({{KB << 4, MB << 4}}); // Benchmark the speed of querying sketches -static constexpr size_t sample_vec_size = MB; +static constexpr size_t sample_vec_size = size_t(1) << 40; static void BM_Sketch_Sample(benchmark::State& state) { constexpr size_t num_sketches = 400; @@ -281,14 +281,14 @@ static void BM_Sketch_Sample(benchmark::State& state) { benchmark::Counter(state.iterations() * num_sketches, benchmark::Counter::kIsRate); state.counters["Successes"] = double(successes) / (state.iterations() * num_sketches); } -BENCHMARK(BM_Sketch_Sample)->RangeMultiplier(4)->Range(1, sample_vec_size / 2); +BENCHMARK(BM_Sketch_Sample)->RangeMultiplier(4)->Range(1, sample_vec_size / (1 << 18)); static void BM_Sketch_Merge(benchmark::State& state) { size_t n = state.range(0); - size_t upds = n / 100; + size_t upds = n / (1 << 14); size_t seed = get_seed(); - Sketch s1(n, seed); - Sketch s2(n, seed); + Sketch s1(n, seed, 5); + Sketch s2(n, seed, 5); for (size_t i = 0; i < upds; i++) { s1.update(static_cast(concat_pairing_fn(rand() % n, rand() % n))); @@ -299,7 +299,25 @@ static void BM_Sketch_Merge(benchmark::State& state) { s1.merge(s2); } } -BENCHMARK(BM_Sketch_Merge)->RangeMultiplier(10)->Range(1e3, 1e6); +BENCHMARK(BM_Sketch_Merge)->RangeMultiplier(8)->Range(1 << 18, 1 << 30); + +static void BM_Sketch_RangeMerge(benchmark::State& state) { + size_t n = state.range(0); + size_t upds = n / (1 << 14); + size_t seed = get_seed(); + Sketch s1(n, seed, 5); + Sketch s2(n, seed, 5); + + for (size_t i = 0; i < upds; i++) { + s1.update(static_cast(concat_pairing_fn(rand() % n, rand() % n))); + s2.update(static_cast(concat_pairing_fn(rand() % n, rand() % n))); + } + + for (auto _ : state) { + s1.range_merge(s2, 3, 1); + } +} +BENCHMARK(BM_Sketch_RangeMerge)->RangeMultiplier(8)->Range(1 << 18, 1 << 30); static void BM_Sketch_Serialize(benchmark::State& state) { size_t n = state.range(0);