Skip to content

Commit

Permalink
faster merge code
Browse files Browse the repository at this point in the history
  • Loading branch information
Gillgamesh committed Dec 22, 2024
1 parent 07cd7dd commit f6dcdf6
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 6 deletions.
14 changes: 14 additions & 0 deletions include/bucket_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,20 @@ class BucketBuffer {
// std::cout << std::endl;
// }
}

size_t partition(size_t pivot_value) {
// used for the following: all updates with row index smaller than pivot
// thus, all values larger to the left of pivot
// go on the right side
size_t partition_size = 0;
for (size_t i = 0; i < size(); ++i) {
if (entries[i].row_idx >= pivot_value) {
std::swap(entries[i], entries[partition_size]);
partition_size++;
}
}
return partition_size;
}

bool merge(const BucketBuffer &other) {
// YOU SHOULD ONLY MERGE WITH AN UNDER CAPACITY BUFFER
Expand Down
2 changes: 1 addition & 1 deletion include/cc_sketch_alg.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class CCSketchAlg {
// new Sketch(Sketch::calc_vector_length(num_vertices), seed,
// Sketch::calc_cc_samples(num_vertices, config.get_sketches_factor()));
delta_sketches[i] =
new Sketch(5, seed,
new Sketch(6, seed,
Sketch::calc_cc_samples(num_vertices, config.get_sketches_factor()));
}
}
Expand Down
4 changes: 2 additions & 2 deletions include/sketch.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ class Sketch {
* @return The length of the vector to sketch
*/
static vec_t calc_vector_length(node_id_t num_vertices) {
return ceil(double(num_vertices) * (num_vertices - 1) / 2);
// return num_vertices * 4;
// return ceil(double(num_vertices) * (num_vertices - 1) / 2);
return num_vertices * 2;
// return 50; // round to something thats approx 2^6
// return 3;
// return 15;
Expand Down
20 changes: 17 additions & 3 deletions src/sketch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,15 +194,19 @@ Sketch::~Sketch() {
* backwards until we reach the point where the columns are once again not
* being stored
*/
bucket_buffer.sort_and_compact();
// bucket_buffer.sort_and_compact();
size_t buffer_size = bucket_buffer.size();
// ACTUALLY - we dont need to sort. just need to partition
size_t to_keep_sz = bucket_buffer.partition(bkt_per_col);
int i = ((int) buffer_size)-1;
while (i >= 0 && bucket_buffer[i].row_idx < bkt_per_col) {
// while (i >= 0 && bucket_buffer[i].row_idx < bkt_per_col) {
while (i >= 0 && i >= to_keep_sz) {
// update the bucket
get_bucket(bucket_buffer[i].col_idx, bucket_buffer[i].row_idx) ^= bucket_buffer[i].value;
i--;
}
bucket_buffer.entries.resize(i+1);
bucket_buffer.entries.resize(to_keep_sz);
// bucket_buffer.entries.resize(i+1);
// if (buffer_size > 3)
// std::cout << "Injected buffer buckets:" << buffer_size << " to " << i+1 << std::endl;
}
Expand Down Expand Up @@ -392,6 +396,11 @@ void Sketch::merge(const Sketch &other) {
// TODO - when sketches have dynamic sizes, this will require more work
// ie we would want to deal with some depths seperately.
bool sufficient_space = bucket_buffer.merge(other.bucket_buffer);
// TODO - make this procedure better. this isnt a great implementation
if (!sufficient_space) {
inject_buffer_buckets();
sufficient_space = !bucket_buffer.over_capacity();
}
while (!sufficient_space) {
// std::cout << "Merge: Buffer full, reallocating" << std::endl;
// reallocate((bkt_per_col * 8) / 5);
Expand Down Expand Up @@ -485,6 +494,11 @@ void Sketch::range_merge(const Sketch &other, size_t start_sample, size_t n_samp
}
#endif
bool sufficient_space = bucket_buffer.merge(other.bucket_buffer);
// TODO - make this procedure better. this isnt a great implementation
if (!sufficient_space) {
inject_buffer_buckets();
sufficient_space = !bucket_buffer.over_capacity();
}
while (!sufficient_space) {
// std::cout << "Merge: Buffer full, reallocating" << std::endl;
// reallocate((bkt_per_col * 8) / 5);
Expand Down

0 comments on commit f6dcdf6

Please sign in to comment.