From 6142e43320253d46ab7ba4a83a73db8886327315 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Fri, 6 Jan 2023 01:32:20 +0100 Subject: [PATCH 1/9] Improve s2region_term_indexer: * Add ability to reuse term buffer * Add option for optimize index size if query only points --- src/s2/s2region_term_indexer.cc | 85 +++++++++++++++++++++++---------- src/s2/s2region_term_indexer.h | 35 ++++++++++++-- 2 files changed, 91 insertions(+), 29 deletions(-) diff --git a/src/s2/s2region_term_indexer.cc b/src/s2/s2region_term_indexer.cc index 1b120bbc9..349c88a73 100644 --- a/src/s2/s2region_term_indexer.cc +++ b/src/s2/s2region_term_indexer.cc @@ -129,6 +129,14 @@ string S2RegionTermIndexer::GetTerm(TermType term_type, const S2CellId id, vector S2RegionTermIndexer::GetIndexTerms(const S2Point& point, string_view prefix) { + vector terms; + GetIndexTerms(point, prefix, &terms); + return terms; +} + +void S2RegionTermIndexer::GetIndexTerms(const S2Point& point, + string_view prefix, + vector* terms) { // See the top of this file for an overview of the indexing strategy. // // The last cell generated by this loop is effectively the covering for @@ -139,12 +147,13 @@ vector S2RegionTermIndexer::GetIndexTerms(const S2Point& point, // max_level() != true_max_level() (see S2RegionCoverer::Options). const S2CellId id(point); - vector terms; - for (int level = options_.min_level(); level <= options_.max_level(); - level += options_.level_mod()) { - terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); + int level = options_.min_level(); + if (options_.query_contains_points_only()) { + level = options_.true_max_level(); + } + for (; level <= options_.max_level(); level += options_.level_mod()) { + terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); } - return terms; } vector S2RegionTermIndexer::GetIndexTerms(const S2Region& region, @@ -157,6 +166,13 @@ vector S2RegionTermIndexer::GetIndexTerms(const S2Region& region, vector S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( const S2CellUnion& covering, string_view prefix) { + vector terms; + GetIndexTermsForCanonicalCovering(covering, prefix, &terms); + return terms; +} + +void S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( + const S2CellUnion& covering, string_view prefix, vector* terms) { // See the top of this file for an overview of the indexing strategy. // // Cells in the covering are normally indexed as covering terms. If we are @@ -171,7 +187,6 @@ vector S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( *coverer_.mutable_options() = options_; S2_CHECK(coverer_.IsCanonical(covering)); } - vector terms; S2CellId prev_id = S2CellId::None(); int true_max_level = options_.true_max_level(); for (S2CellId id : covering) { @@ -181,14 +196,20 @@ vector S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( S2_DCHECK_GE(level, options_.min_level()); S2_DCHECK_LE(level, options_.max_level()); S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod()); + // assume level <= options_.true_max_level() - if (level < true_max_level) { - // Add a covering term for this cell. - terms.push_back(GetTerm(TermType::COVERING, id, prefix)); - } - if (level == true_max_level || !options_.optimize_for_space()) { - // Add an ancestor term for this cell at the constrained level. - terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); + const bool is_max_level_cell = level == true_max_level; + // Add a term for this cell, max_level cell ANCESTOR is optimization + terms->push_back(GetTerm(is_max_level_cell ? TermType::ANCESTOR + : TermType::COVERING, + id, prefix)); + + // If query only contains points, there are no need other terms. + if (options_.query_contains_points_only()) continue; + + if (!options_.optimize_for_space() && !is_max_level_cell) { + // Add an ancestor term for this cell. + terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix)); } // Finally, add ancestor terms for all the ancestors of this cell. while ((level -= options_.level_mod()) >= options_.min_level()) { @@ -197,29 +218,34 @@ vector S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( prev_id.parent(level) == ancestor_id) { break; // We have already processed this cell and its ancestors. } - terms.push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix)); + terms->push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix)); } prev_id = id; } - return terms; } vector S2RegionTermIndexer::GetQueryTerms(const S2Point& point, string_view prefix) { + vector terms; + GetQueryTerms(point, prefix, &terms); + return terms; +} + +void S2RegionTermIndexer::GetQueryTerms(const S2Point& point, + string_view prefix, + vector* terms) { // See the top of this file for an overview of the indexing strategy. const S2CellId id(point); - vector terms; // Recall that all true_max_level() cells are indexed only as ancestor terms. int level = options_.true_max_level(); - terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); - if (options_.index_contains_points_only()) return terms; + terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix)); + if (options_.index_contains_points_only()) return; // Add covering terms for all the ancestor cells. for (; level >= options_.min_level(); level -= options_.level_mod()) { - terms.push_back(GetTerm(TermType::COVERING, id.parent(level), prefix)); + terms->push_back(GetTerm(TermType::COVERING, id.parent(level), prefix)); } - return terms; } vector S2RegionTermIndexer::GetQueryTerms(const S2Region& region, @@ -232,13 +258,20 @@ vector S2RegionTermIndexer::GetQueryTerms(const S2Region& region, vector S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( const S2CellUnion& covering, string_view prefix) { + vector terms; + GetQueryTermsForCanonicalCovering(covering, prefix, &terms); + return terms; +} + +void S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( + const S2CellUnion& covering, string_view prefix, vector* terms) { // See the top of this file for an overview of the indexing strategy. + S2_CHECK(!options_.query_contains_points_only()); if (google::DEBUG_MODE) { *coverer_.mutable_options() = options_; S2_CHECK(coverer_.IsCanonical(covering)); } - vector terms; S2CellId prev_id = S2CellId::None(); int true_max_level = options_.true_max_level(); for (S2CellId id : covering) { @@ -248,9 +281,10 @@ vector S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( S2_DCHECK_GE(level, options_.min_level()); S2_DCHECK_LE(level, options_.max_level()); S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod()); + // assume level <= options_.true_max_level() // Cells in the covering are always queried as ancestor terms. - terms.push_back(GetTerm(TermType::ANCESTOR, id, prefix)); + terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix)); // If the index only contains points, there are no covering terms. if (options_.index_contains_points_only()) continue; @@ -258,8 +292,8 @@ vector S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( // If we are optimizing for index space rather than query time, cells are // also queried as covering terms (except for true_max_level() cells, // which are indexed and queried as ancestor cells only). - if (options_.optimize_for_space() && level < true_max_level) { - terms.push_back(GetTerm(TermType::COVERING, id, prefix)); + if (options_.optimize_for_space() && level != true_max_level) { + terms->push_back(GetTerm(TermType::COVERING, id, prefix)); } // Finally, add covering terms for all the ancestors of this cell. while ((level -= options_.level_mod()) >= options_.min_level()) { @@ -268,9 +302,8 @@ vector S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( prev_id.parent(level) == ancestor_id) { break; // We have already processed this cell and its ancestors. } - terms.push_back(GetTerm(TermType::COVERING, ancestor_id, prefix)); + terms->push_back(GetTerm(TermType::COVERING, ancestor_id, prefix)); } prev_id = id; } - return terms; } diff --git a/src/s2/s2region_term_indexer.h b/src/s2/s2region_term_indexer.h index 9db405284..6eb0aa8b7 100644 --- a/src/s2/s2region_term_indexer.h +++ b/src/s2/s2region_term_indexer.h @@ -198,8 +198,21 @@ class S2RegionTermIndexer { // this flag if your index consists entirely of points.) // // DEFAULT: false - bool index_contains_points_only() const { return points_only_; } - void set_index_contains_points_only(bool value) { points_only_ = value; } + bool index_contains_points_only() const { return index_points_only_; } + void set_index_contains_points_only(bool value) { index_points_only_ = value; } + + // If your query will only contain points (rather than regions), be sure + // to set this flag. This will generate smaller and faster index that + // are specialized for the points-only case. + // + // With the default quality settings, this flag reduces the number of + // index terms by about a factor of two. (The improvement gets smaller + // as max_cells() is increased, but there is really no reason not to use + // this flag if your query consist entirely of points.) + // + // DEFAULT: false + bool query_contains_points_only() const { return query_points_only_; } + void set_query_contains_points_only(bool value) { query_points_only_ = value; } // If true, the index will be optimized for space rather than for query // time. With the default quality settings, this flag reduces the number @@ -223,7 +236,8 @@ class S2RegionTermIndexer { void set_marker_character(char ch); private: - bool points_only_ = false; + bool index_points_only_ = false; + bool query_points_only_ = false; bool optimize_for_space_ = false; std::string marker_ = std::string(1, '$'); }; @@ -289,6 +303,21 @@ class S2RegionTermIndexer { std::vector GetQueryTermsForCanonicalCovering( const S2CellUnion& covering, absl::string_view prefix); + // Same as above but allows to reuse same buffer for different points or use + // single buffer for multiple points (common case is GeoJson MultiPoint) + void GetIndexTerms(const S2Point& point, absl::string_view prefix, + std::vector* terms); + void GetQueryTerms(const S2Point& point, absl::string_view prefix, + std::vector* terms); + + // Same as above but allows to reuse same buffer for different covering + void GetIndexTermsForCanonicalCovering(const S2CellUnion &covering, + absl::string_view prefix, + std::vector *terms); + void GetQueryTermsForCanonicalCovering(const S2CellUnion &covering, + absl::string_view prefix, + std::vector *terms); + private: enum TermType { ANCESTOR, COVERING }; From 7e6da011b906df4fdf7cc150ba01bb026d2b0817 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Mon, 9 Jan 2023 17:55:33 +0100 Subject: [PATCH 2/9] Fix test --- src/s2/s2region_term_indexer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index f92c245ef..3d903130e 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -85,7 +85,7 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, // random size. S2Cap cap; vector terms; - if (query_type == QueryType::CAP) { + if (query_type == QueryType::POINT) { cap = S2Cap::FromPoint(S2Testing::RandomPoint()); terms = indexer.GetQueryTerms(cap.center(), ""); } else { From 971346456e045f572c719c41c2e6cf081b65ce3e Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Mon, 9 Jan 2023 17:55:40 +0100 Subject: [PATCH 3/9] Add tests --- src/s2/s2region_term_indexer_test.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index 3d903130e..c4411e26c 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -123,6 +123,9 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTime) { options.set_max_level(16); options.set_max_cells(20); TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexRegionsQueryPointsOptimizeTime) { @@ -132,6 +135,8 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryPointsOptimizeTime) { options.set_max_level(16); options.set_max_cells(20); TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTimeWithLevelMod) { @@ -141,6 +146,9 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTimeWithLevelMod) { options.set_max_level(12); options.set_level_mod(3); TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeSpace) { @@ -150,6 +158,9 @@ TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeSpace) { options.set_max_level(S2CellId::kMaxLevel); // Use leaf cells. options.set_max_cells(8); TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeTime) { @@ -161,6 +172,9 @@ TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeTime) { options.set_max_cells(20); options.set_index_contains_points_only(true); TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeSpace) { @@ -169,6 +183,9 @@ TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeSpace) { options.set_index_contains_points_only(true); // Use default parameter values. TestRandomCaps(options, QueryType::CAP); + TestRandomCaps(options, QueryType::POINT); + options.set_query_contains_points_only(true); + TestRandomCaps(options, QueryType::POINT); } TEST(S2RegionTermIndexer, MarkerCharacter) { From ce3a3454ab18fcca04b8277fc5c7349aac2b90cc Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 21:38:00 +0100 Subject: [PATCH 4/9] Make test complete --- src/s2/s2region_term_indexer_test.cc | 98 +++++++++++++--------------- 1 file changed, 44 insertions(+), 54 deletions(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index c4411e26c..2253e84e4 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -113,79 +113,69 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, static_cast(query_terms) / absl::GetFlag(FLAGS_iters)); } -// We run one test case for each combination of space vs. time optimization, -// and indexing regions vs. only points. +using TestCase = std::tuple; + +class S2RegionTermIndexerTest : public testing::TestWithParam { +protected: + void SetUp() override { + query_type = std::get<0>(GetParam()); + options.set_optimize_for_space(std::get<1>(GetParam())); + options.set_index_contains_points_only(std::get<2>(GetParam())); + options.set_query_contains_points_only(std::get<3>(GetParam())); + if (query_type != QueryType::POINT && + options.query_contains_points_only()) { + GTEST_SKIP() << "Case query_type != QueryType::POINT && " + "options.query_contains_points_only() is invalid."; + } + } -TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTime) { S2RegionTermIndexer::Options options; - options.set_optimize_for_space(false); // Optimize for time. - options.set_min_level(0); // Use face cells. - options.set_max_level(16); - options.set_max_cells(20); - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + QueryType query_type{}; +}; + +// We run one test case for each combination: +// QueryType: POINT, CAP +// optimize_for_space: false, true +// index_contains_points_only: false, true +// query_contains_points_only: false, true +// Case QueryType != Cap and query_contains_points_only == true is invalid, +// so we skip it +INSTANTIATE_TEST_CASE_P( + S2RegionTermIndexerTests, S2RegionTermIndexerTest, + testing::Combine(testing::Values(QueryType::POINT, QueryType::CAP), + testing::Bool(), testing::Bool(), testing::Bool())); + +TEST_P(S2RegionTermIndexerTest, DefaultParametersValues) { + TestRandomCaps(options, query_type); } -TEST(S2RegionTermIndexer, IndexRegionsQueryPointsOptimizeTime) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(false); // Optimize for time. - options.set_min_level(0); // Use face cells. +TEST_P(S2RegionTermIndexerTest, UseFaceCells) { + options.set_min_level(0); options.set_max_level(16); options.set_max_cells(20); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + TestRandomCaps(options, query_type); } -TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeTimeWithLevelMod) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(false); // Optimize for time. - options.set_min_level(6); // Constrain min/max levels. +TEST_P(S2RegionTermIndexerTest, ConstrainMinMaxLevels) { + options.set_min_level(6); options.set_max_level(12); options.set_level_mod(3); - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + TestRandomCaps(options, query_type); } -TEST(S2RegionTermIndexer, IndexRegionsQueryRegionsOptimizeSpace) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(true); // Optimize for space. +TEST_P(S2RegionTermIndexerTest, UseLeafCells) { options.set_min_level(4); - options.set_max_level(S2CellId::kMaxLevel); // Use leaf cells. + options.set_max_level(S2CellId::kMaxLevel); options.set_max_cells(8); - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + TestRandomCaps(options, query_type); } -TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeTime) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(false); // Optimize for time. - options.set_min_level(0); // Use face cells. +TEST_P(S2RegionTermIndexerTest, UseFaceCells2) { + options.set_min_level(0); options.set_max_level(S2CellId::kMaxLevel); options.set_level_mod(2); options.set_max_cells(20); - options.set_index_contains_points_only(true); - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); -} - -TEST(S2RegionTermIndexer, IndexPointsQueryRegionsOptimizeSpace) { - S2RegionTermIndexer::Options options; - options.set_optimize_for_space(true); // Optimize for space. - options.set_index_contains_points_only(true); - // Use default parameter values. - TestRandomCaps(options, QueryType::CAP); - TestRandomCaps(options, QueryType::POINT); - options.set_query_contains_points_only(true); - TestRandomCaps(options, QueryType::POINT); + TestRandomCaps(options, query_type); } TEST(S2RegionTermIndexer, MarkerCharacter) { From 4fa417bf533729eb711fb7e60f9eb608913cef2a Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 21:52:30 +0100 Subject: [PATCH 5/9] Improve doc --- src/s2/s2region_term_indexer.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/s2/s2region_term_indexer.cc b/src/s2/s2region_term_indexer.cc index 349c88a73..91ba02fea 100644 --- a/src/s2/s2region_term_indexer.cc +++ b/src/s2/s2region_term_indexer.cc @@ -61,6 +61,12 @@ // never be any document regions larger than the query region. This can // significantly reduce the size of queries. // +// + If the query will contain only points (rather than general regions), then +// we can skip all the ancestor terms mentioned above (except last cell see +// `GetIndexTerms(const S2Point& point...` for details) because there will +// never be any document regions larger than the index region. This can +// significantly reduce the size of index. +// // + If it is more important to optimize index size rather than query speed, // the number of index terms can be reduced by creating ancestor terms only // for the *proper* ancestors of the cells in a document region, and @@ -196,7 +202,7 @@ void S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( S2_DCHECK_GE(level, options_.min_level()); S2_DCHECK_LE(level, options_.max_level()); S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod()); - // assume level <= options_.true_max_level() + S2_DCHECK_LE(level, options_.true_max_level()); const bool is_max_level_cell = level == true_max_level; // Add a term for this cell, max_level cell ANCESTOR is optimization @@ -281,7 +287,7 @@ void S2RegionTermIndexer::GetQueryTermsForCanonicalCovering( S2_DCHECK_GE(level, options_.min_level()); S2_DCHECK_LE(level, options_.max_level()); S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod()); - // assume level <= options_.true_max_level() + S2_DCHECK_LE(level, options_.true_max_level()); // Cells in the covering are always queried as ancestor terms. terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix)); From b66dbc18b43cc942d9f9cd6ce69242744c5c1eb1 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 22:05:09 +0100 Subject: [PATCH 6/9] Make better coverage --- src/s2/s2region_term_indexer_test.cc | 44 ++++++++++++++++------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index 2253e84e4..213de67fb 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -45,10 +45,10 @@ S2_DEFINE_int32(iters, 400, "number of iterations for testing"); namespace { -enum QueryType { POINT, CAP }; +enum DataType { POINT, CAP }; void TestRandomCaps(const S2RegionTermIndexer::Options& options, - QueryType query_type) { + DataType index_type, DataType query_type) { // This function creates an index consisting either of points (if // options.index_contains_points_only() is true) or S2Caps of random size. // It then executes queries consisting of points (if query_type == POINT) @@ -64,7 +64,7 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, // of random size (up to a full sphere). S2Cap cap; vector terms; - if (options.index_contains_points_only()) { + if (index_type == DataType::POINT) { cap = S2Cap::FromPoint(S2Testing::RandomPoint()); terms = indexer.GetIndexTerms(cap.center(), ""); } else { @@ -85,7 +85,7 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, // random size. S2Cap cap; vector terms; - if (query_type == QueryType::POINT) { + if (query_type == DataType::POINT) { cap = S2Cap::FromPoint(S2Testing::RandomPoint()); terms = indexer.GetQueryTerms(cap.center(), ""); } else { @@ -113,24 +113,29 @@ void TestRandomCaps(const S2RegionTermIndexer::Options& options, static_cast(query_terms) / absl::GetFlag(FLAGS_iters)); } -using TestCase = std::tuple; +using TestCase = std::tuple; class S2RegionTermIndexerTest : public testing::TestWithParam { protected: void SetUp() override { - query_type = std::get<0>(GetParam()); - options.set_optimize_for_space(std::get<1>(GetParam())); - options.set_index_contains_points_only(std::get<2>(GetParam())); - options.set_query_contains_points_only(std::get<3>(GetParam())); - if (query_type != QueryType::POINT && - options.query_contains_points_only()) { - GTEST_SKIP() << "Case query_type != QueryType::POINT && " + index_type = std::get<0>(GetParam()); + query_type = std::get<1>(GetParam()); + options.set_optimize_for_space(std::get<2>(GetParam())); + options.set_index_contains_points_only(std::get<3>(GetParam())); + options.set_query_contains_points_only(std::get<4>(GetParam())); + if (index_type != DataType::POINT && options.index_contains_points_only()) { + GTEST_SKIP() << "Case index_type != DataType::POINT && " + "options.index_contains_points_only() is invalid."; + } + if (query_type != DataType::POINT && options.query_contains_points_only()) { + GTEST_SKIP() << "Case query_type != DataType::POINT && " "options.query_contains_points_only() is invalid."; } } S2RegionTermIndexer::Options options; - QueryType query_type{}; + DataType index_type{}; + DataType query_type{}; }; // We run one test case for each combination: @@ -142,32 +147,33 @@ class S2RegionTermIndexerTest : public testing::TestWithParam { // so we skip it INSTANTIATE_TEST_CASE_P( S2RegionTermIndexerTests, S2RegionTermIndexerTest, - testing::Combine(testing::Values(QueryType::POINT, QueryType::CAP), + testing::Combine(testing::Values(DataType::POINT, DataType::CAP), + testing::Values(DataType::POINT, DataType::CAP), testing::Bool(), testing::Bool(), testing::Bool())); TEST_P(S2RegionTermIndexerTest, DefaultParametersValues) { - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST_P(S2RegionTermIndexerTest, UseFaceCells) { options.set_min_level(0); options.set_max_level(16); options.set_max_cells(20); - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST_P(S2RegionTermIndexerTest, ConstrainMinMaxLevels) { options.set_min_level(6); options.set_max_level(12); options.set_level_mod(3); - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST_P(S2RegionTermIndexerTest, UseLeafCells) { options.set_min_level(4); options.set_max_level(S2CellId::kMaxLevel); options.set_max_cells(8); - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST_P(S2RegionTermIndexerTest, UseFaceCells2) { @@ -175,7 +181,7 @@ TEST_P(S2RegionTermIndexerTest, UseFaceCells2) { options.set_max_level(S2CellId::kMaxLevel); options.set_level_mod(2); options.set_max_cells(20); - TestRandomCaps(options, query_type); + TestRandomCaps(options, index_type, query_type); } TEST(S2RegionTermIndexer, MarkerCharacter) { From 960c16fbc45f75fd5a4df1ce7292f6d75e3c28cf Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 22:11:19 +0100 Subject: [PATCH 7/9] Fix comment --- src/s2/s2region_term_indexer_test.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index 213de67fb..15ba88b55 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -139,12 +139,11 @@ class S2RegionTermIndexerTest : public testing::TestWithParam { }; // We run one test case for each combination: -// QueryType: POINT, CAP +// index_type: POINT, CAP +// query_type: POINT, CAP // optimize_for_space: false, true // index_contains_points_only: false, true // query_contains_points_only: false, true -// Case QueryType != Cap and query_contains_points_only == true is invalid, -// so we skip it INSTANTIATE_TEST_CASE_P( S2RegionTermIndexerTests, S2RegionTermIndexerTest, testing::Combine(testing::Values(DataType::POINT, DataType::CAP), From c920d8f020680c37abc531cf928f1c1db23a71a4 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Tue, 10 Jan 2023 22:22:53 +0100 Subject: [PATCH 8/9] Make better test case names --- src/s2/s2region_term_indexer_test.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index 15ba88b55..001fd0a53 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -26,6 +26,7 @@ #include #include "absl/container/flat_hash_map.h" +#include "absl/strings/str_cat.h" #include "s2/base/commandlineflags.h" #include "s2/base/logging.h" @@ -45,7 +46,10 @@ S2_DEFINE_int32(iters, 400, "number of iterations for testing"); namespace { -enum DataType { POINT, CAP }; +enum DataType { + POINT = 0, + CAP = 1, +}; void TestRandomCaps(const S2RegionTermIndexer::Options& options, DataType index_type, DataType query_type) { @@ -148,7 +152,13 @@ INSTANTIATE_TEST_CASE_P( S2RegionTermIndexerTests, S2RegionTermIndexerTest, testing::Combine(testing::Values(DataType::POINT, DataType::CAP), testing::Values(DataType::POINT, DataType::CAP), - testing::Bool(), testing::Bool(), testing::Bool())); + testing::Bool(), testing::Bool(), testing::Bool()), + [](const testing::TestParamInfo& info) { + return absl::StrCat( + "Index", std::get<0>(info.param), "Query", std::get<1>(info.param), + "SpaceOpt", std::get<2>(info.param), "IndexOpt", + std::get<3>(info.param), "QueryOpt", std::get<4>(info.param)); + }); TEST_P(S2RegionTermIndexerTest, DefaultParametersValues) { TestRandomCaps(options, index_type, query_type); From a43ee3654e9290f269a2c4c31603629c1c1410ee Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Fri, 13 Jan 2023 12:55:35 +0100 Subject: [PATCH 9/9] Apply review suggestions --- src/s2/s2region_term_indexer.cc | 1 - src/s2/s2region_term_indexer_test.cc | 9 +++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/s2/s2region_term_indexer.cc b/src/s2/s2region_term_indexer.cc index 91ba02fea..1aa13358b 100644 --- a/src/s2/s2region_term_indexer.cc +++ b/src/s2/s2region_term_indexer.cc @@ -210,7 +210,6 @@ void S2RegionTermIndexer::GetIndexTermsForCanonicalCovering( : TermType::COVERING, id, prefix)); - // If query only contains points, there are no need other terms. if (options_.query_contains_points_only()) continue; if (!options_.optimize_for_space() && !is_max_level_cell) { diff --git a/src/s2/s2region_term_indexer_test.cc b/src/s2/s2region_term_indexer_test.cc index 001fd0a53..ea214140e 100644 --- a/src/s2/s2region_term_indexer_test.cc +++ b/src/s2/s2region_term_indexer_test.cc @@ -46,10 +46,7 @@ S2_DEFINE_int32(iters, 400, "number of iterations for testing"); namespace { -enum DataType { - POINT = 0, - CAP = 1, -}; +enum DataType { POINT, CAP }; void TestRandomCaps(const S2RegionTermIndexer::Options& options, DataType index_type, DataType query_type) { @@ -180,12 +177,12 @@ TEST_P(S2RegionTermIndexerTest, ConstrainMinMaxLevels) { TEST_P(S2RegionTermIndexerTest, UseLeafCells) { options.set_min_level(4); - options.set_max_level(S2CellId::kMaxLevel); + options.set_max_level(S2CellId::kMaxLevel); // Use leaf cells. options.set_max_cells(8); TestRandomCaps(options, index_type, query_type); } -TEST_P(S2RegionTermIndexerTest, UseFaceCells2) { +TEST_P(S2RegionTermIndexerTest, UseFaceCellsCustomLevelMode) { options.set_min_level(0); options.set_max_level(S2CellId::kMaxLevel); options.set_level_mod(2);