Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce minPoints option #25

Merged
merged 4 commits into from
Feb 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 56 additions & 32 deletions include/supercluster.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,20 @@ namespace supercluster {
using namespace mapbox::geometry;
using namespace mapbox::feature;

struct Cluster {
class Cluster {
public:
const point<double> pos;
const std::uint32_t num_points;
std::uint32_t id;
std::uint32_t parent_id = 0;
bool visited = false;
std::unique_ptr<property_map> properties{ nullptr };

Cluster(const point<double> pos_, const std::uint32_t num_points_, const std::uint32_t id_)
Cluster(const point<double> &pos_, const std::uint32_t num_points_, const std::uint32_t id_)
: pos(pos_), num_points(num_points_), id(id_) {
}

Cluster(const point<double> pos_,
Cluster(const point<double> &pos_,
const std::uint32_t num_points_,
const std::uint32_t id_,
const property_map &properties_)
Expand Down Expand Up @@ -124,6 +125,7 @@ struct Options {
std::uint8_t maxZoom = 16; // max zoom level to cluster the points on
std::uint16_t radius = 40; // cluster radius in pixels
std::uint16_t extent = 512; // tile extent (radius is calculated relative to it)
std::size_t minPoints = 2; // minimum points to form a cluster

std::function<property_map(const property_map &)> map =
[](const property_map &p) -> property_map { return p; };
Expand All @@ -143,8 +145,8 @@ class Supercluster {
const GeoJSONFeatures features;
const Options options;

Supercluster(const GeoJSONFeatures &features_, const Options options_ = Options())
: features(features_), options(options_) {
Supercluster(const GeoJSONFeatures &features_, Options options_ = Options())
: features(features_), options(std::move(options_)) {

#ifdef DEBUG_TIMER
Timer timer;
Expand Down Expand Up @@ -286,41 +288,63 @@ class Supercluster {

p.visited = true;

auto num_points = p.num_points;
point<double> weight = p.pos * double(num_points);

auto clusterProperties = p.properties ? *p.properties : property_map{};
std::uint32_t id = static_cast<std::uint32_t>((i << 5) + (zoom + 1));

// find all nearby points
const auto num_points_origin = p.num_points;
auto num_points = num_points_origin;
auto cluster_size = previous.clusters.size();
// count the number of points in a potential cluster
previous.tree.within(p.pos.x, p.pos.y, r, [&](const auto &neighbor_id) {
assert(neighbor_id < previous.clusters.size());
auto &b = previous.clusters[neighbor_id];

assert(neighbor_id < cluster_size);
const auto &b = previous.clusters[neighbor_id];
// filter out neighbors that are already processed
if (b.visited) {
return;
}

b.visited = true;
b.parent_id = id;

// accumulate coordinates for calculating weighted center
weight += b.pos * double(b.num_points);
num_points += b.num_points;

if (options_.reduce && b.properties) {
// apply reduce function to update clusterProperites
options_.reduce(clusterProperties, *b.properties);
if (!b.visited) {
num_points += b.num_points;
}
});

if (num_points == 1) {
clusters.emplace_back(weight, 1, p.id, clusterProperties);
} else {
auto clusterProperties = p.properties ? *p.properties : property_map{};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clusterProperties only mutated if num_points >= options_.minPoints and if we have reduce function. Could we share p.properties between clusters if no mutation is required?

if (num_points >= options_.minPoints) { // enough points to form a cluster
point<double> weight = p.pos * double(num_points_origin);
std::uint32_t id = static_cast<std::uint32_t>((i << 5) + (zoom + 1));

// find all nearby points
previous.tree.within(p.pos.x, p.pos.y, r, [&](const auto &neighbor_id) {
assert(neighbor_id < cluster_size);
auto &b = previous.clusters[neighbor_id];

// filter out neighbors that are already processed
if (b.visited) {
return;
}

b.visited = true;
b.parent_id = id;

// accumulate coordinates for calculating weighted center
weight += b.pos * double(b.num_points);

if (options_.reduce && b.properties) {
// apply reduce function to update clusterProperites
options_.reduce(clusterProperties, *b.properties);
}
});
p.parent_id = id;
clusters.emplace_back(weight / double(num_points), num_points, id,
clusterProperties);
} else {
clusters.emplace_back(p.pos, 1, p.id, clusterProperties);
if (num_points > 1) {
previous.tree.within(p.pos.x, p.pos.y, r, [&](const auto &neighbor_id) {
assert(neighbor_id < cluster_size);
auto &b = previous.clusters[neighbor_id];
// filter out neighbors that are already processed
if (b.visited) {
return;
}
b.visited = true;
clusters.emplace_back(b.pos, 1, b.id,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, b.properties can be shared between clusters? Could we change std::unique_ptr<property_map> properties{ nullptr } to std::shared_ptr<const property_map> properties{ nullptr }; and then try sharing properties between clusters if property map is not changed?

b.properties ? *b.properties : property_map{});
});
}
}
}

Expand Down
67 changes: 61 additions & 6 deletions test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@
#include <iostream>
#include <vector>

int main() {
std::FILE *fp = std::fopen("test/fixtures/places.json", "r");
mapbox::feature::feature_collection<double> parseFeatures(const char *filename) {
std::FILE *fp = std::fopen(filename, "r");
char buffer[65536];
rapidjson::FileReadStream is(fp, buffer, sizeof(buffer));

rapidjson::Document d;
d.ParseStream(is);

std::fclose(fp);
const auto &json_features = d["features"];

assert(json_features.IsArray());
mapbox::feature::feature_collection<double> features;
features.reserve(json_features.Size());

Expand All @@ -27,13 +28,41 @@ int main() {
const auto lng = json_coords[0].GetDouble();
const auto lat = json_coords[1].GetDouble();
mapbox::geometry::point<double> point(lng, lat);

mapbox::feature::feature<double> feature{ point };
feature.properties["name"] = std::string((*itr)["properties"]["name"].GetString());
feature.properties["scalerank"] =
std::uint64_t((*itr)["properties"]["scalerank"].GetUint64());
const auto &properties = (*itr)["properties"];
const auto readProperty = [&feature, &properties](const char *key) {
if (properties.HasMember(key)) {
const auto &value = properties[key];
if (value.IsNull())
feature.properties[key] = std::string("null");
if (value.IsString())
feature.properties[key] = std::string(value.GetString());
if (value.IsUint64())
feature.properties[key] = std::uint64_t(value.GetUint64());
if (value.IsDouble())
feature.properties[key] = value.GetDouble();
}
};
readProperty("name");
readProperty("scalerank");
readProperty("lat_y");
readProperty("long_x");
readProperty("region");
readProperty("featureclass");
readProperty("comment");
readProperty("name_alt");
readProperty("subregion");
features.push_back(feature);
}

return features;
}

int main() {
const auto features = parseFeatures("test/fixtures/places.json");

// ----------------------- test 1 ------------------------------------
mapbox::supercluster::Options options;
mapbox::supercluster::Supercluster index(features, options);

Expand Down Expand Up @@ -80,6 +109,7 @@ int main() {
assert(leaves[8].properties["name"].get<std::string>() == "Miquelon");
assert(leaves[9].properties["name"].get<std::string>() == "Cape Bauld");

// ----------------------- test 2 ------------------------------------
mapbox::supercluster::Options options2;
options2.radius = 60;
options2.extent = 256;
Expand All @@ -105,6 +135,8 @@ int main() {
iter2->second.get<std::uint64_t>());
}
};

// ----------------------- test 3 ------------------------------------
mapbox::supercluster::Options options3;
options3.map = map;
options3.reduce = reduce;
Expand All @@ -116,6 +148,7 @@ int main() {
assert(tile3[0].properties.count("sum") != 0);
assert(tile3[0].properties["sum"].get<std::uint64_t>() == 69);

// ----------------------- test 4 ------------------------------------
mapbox::supercluster::Options options4;
options4.radius = 100;
options4.map = map;
Expand All @@ -141,4 +174,26 @@ int main() {
}
}
assert(actualVec1 == expectVec1);

// ----------------------- test 5 ------------------------------------
mapbox::supercluster::Options options5;
options5.minPoints = 5;
mapbox::supercluster::Supercluster index5(features, options5);

mapbox::feature::feature_collection<std::int16_t> tile5 = index5.getTile(0, 0, 0);
assert(tile5.size() == 49);

std::uint64_t num_points1 = 0;
for (auto &f : tile5) {
const auto itr = f.properties.find("cluster");
if (itr != f.properties.end() && itr->second.get<bool>()) {
const auto &point_count = f.properties["point_count"].get<std::uint64_t>();
assert(point_count >= 5);
num_points1 += point_count;
} else {
num_points1 += 1;
}
}

assert(num_points1 == 195);
}