Skip to content

Commit

Permalink
3 sample options and more benchmarking changes
Browse files Browse the repository at this point in the history
  • Loading branch information
etwest committed Mar 13, 2024
1 parent 2c2bf59 commit 52491cb
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 7 deletions.
66 changes: 65 additions & 1 deletion src/sketch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,45 @@ void Sketch::zero_contents() {
reset_sample_state();
}

#if 1
SketchSample Sketch::sample() {
if (sample_idx >= num_samples) {
throw OutOfSamplesException(seed, num_samples, sample_idx);
}

size_t idx = sample_idx++;
size_t first_column = idx * cols_per_sample;

if (Bucket_Boruvka::is_empty(buckets[num_buckets - 1]))
return {0, ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return

if (Bucket_Boruvka::is_good(buckets[num_buckets - 1], checksum_seed()))
return {buckets[num_buckets - 1].alpha, GOOD};

for (size_t col = first_column; col < first_column + cols_per_sample; ++col) {
// start from the bottom of the column and iterate up until non-empty found
int prev = bkt_per_col - 1;
int row = prev - 4;
while (Bucket_Boruvka::is_empty(buckets[col * bkt_per_col + row]) && row > 0) {
prev = row;
row -= 4;
}
row = prev;
while (Bucket_Boruvka::is_empty(buckets[col * bkt_per_col + row]) && row > 0) {
--row;
}

// now that we've found a non-zero bucket check next if next 4 buckets good
int stop = std::max(row - 5, 0);
for (; row >= stop; row--) {
if (Bucket_Boruvka::is_good(buckets[col * bkt_per_col + row], checksum_seed()))
return {buckets[col * bkt_per_col + row].alpha, GOOD};
}
}
return {0, FAIL};
}
#else
#if 1
SketchSample Sketch::sample() {
if (sample_idx >= num_samples) {
throw OutOfSamplesException(seed, num_samples, sample_idx);
Expand All @@ -113,14 +152,39 @@ SketchSample Sketch::sample() {
}
// now that we've found a non-zero bucket check next if next 4 buckets good
int stop = std::max(row - 4, 0);
int stop = std::max(row - 5, 0);
for (; row >= stop; row--) {
if (Bucket_Boruvka::is_good(buckets[col * bkt_per_col + row], checksum_seed()))
return {buckets[col * bkt_per_col + row].alpha, GOOD};
}
}
return {0, FAIL};
}
#else
SketchSample Sketch::sample() {
if (sample_idx >= num_samples) {
throw OutOfSamplesException(seed, num_samples, sample_idx);
}
size_t idx = sample_idx++;
size_t first_column = idx * cols_per_sample;
if (Bucket_Boruvka::is_empty(buckets[num_buckets - 1]))
return {0, ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return
if (Bucket_Boruvka::is_good(buckets[num_buckets - 1], checksum_seed()))
return {buckets[num_buckets - 1].alpha, GOOD};
for (size_t col = first_column; col < first_column + cols_per_sample; ++col) {
for (size_t row = 0; row < bkt_per_col; row++) {
if (Bucket_Boruvka::is_good(buckets[col * bkt_per_col + row], checksum_seed()))
return {buckets[col * bkt_per_col + row].alpha, GOOD};
}
}
return {0, FAIL};
}
#endif
#endif

ExhaustiveSketchSample Sketch::exhaustive_sample() {
if (sample_idx >= num_samples) {
Expand Down
30 changes: 24 additions & 6 deletions tools/benchmark/graphcc_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ static void BM_Sketch_Update(benchmark::State& state) {
BENCHMARK(BM_Sketch_Update)->RangeMultiplier(4)->Ranges({{KB << 4, MB << 4}});

// Benchmark the speed of querying sketches
static constexpr size_t sample_vec_size = MB;
static constexpr size_t sample_vec_size = size_t(1) << 40;
static void BM_Sketch_Sample(benchmark::State& state) {
constexpr size_t num_sketches = 400;

Expand Down Expand Up @@ -281,14 +281,14 @@ static void BM_Sketch_Sample(benchmark::State& state) {
benchmark::Counter(state.iterations() * num_sketches, benchmark::Counter::kIsRate);
state.counters["Successes"] = double(successes) / (state.iterations() * num_sketches);
}
BENCHMARK(BM_Sketch_Sample)->RangeMultiplier(4)->Range(1, sample_vec_size / 2);
BENCHMARK(BM_Sketch_Sample)->RangeMultiplier(4)->Range(1, sample_vec_size / (1 << 18));

static void BM_Sketch_Merge(benchmark::State& state) {
size_t n = state.range(0);
size_t upds = n / 100;
size_t upds = n / (1 << 14);
size_t seed = get_seed();
Sketch s1(n, seed);
Sketch s2(n, seed);
Sketch s1(n, seed, 5);
Sketch s2(n, seed, 5);

for (size_t i = 0; i < upds; i++) {
s1.update(static_cast<vec_t>(concat_pairing_fn(rand() % n, rand() % n)));
Expand All @@ -299,7 +299,25 @@ static void BM_Sketch_Merge(benchmark::State& state) {
s1.merge(s2);
}
}
BENCHMARK(BM_Sketch_Merge)->RangeMultiplier(10)->Range(1e3, 1e6);
BENCHMARK(BM_Sketch_Merge)->RangeMultiplier(8)->Range(1 << 18, 1 << 30);

static void BM_Sketch_RangeMerge(benchmark::State& state) {
size_t n = state.range(0);
size_t upds = n / (1 << 14);
size_t seed = get_seed();
Sketch s1(n, seed, 5);
Sketch s2(n, seed, 5);

for (size_t i = 0; i < upds; i++) {
s1.update(static_cast<vec_t>(concat_pairing_fn(rand() % n, rand() % n)));
s2.update(static_cast<vec_t>(concat_pairing_fn(rand() % n, rand() % n)));
}

for (auto _ : state) {
s1.range_merge(s2, 3, 1);
}
}
BENCHMARK(BM_Sketch_RangeMerge)->RangeMultiplier(8)->Range(1 << 18, 1 << 30);

static void BM_Sketch_Serialize(benchmark::State& state) {
size_t n = state.range(0);
Expand Down

0 comments on commit 52491cb

Please sign in to comment.