From de97c030260fbcf68a14ef78e6529180120cc247 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Wed, 9 Nov 2022 23:26:06 +0100 Subject: [PATCH] New version based on intensive round of reviewing together with Johannes In particular, there is no longer a "construction" phase now, at the cost of using a slower hash map (absl::node_hash_map), slightly more space (the hash map and a vector to the strings stored in the hash map), and an indirection when looking up the word for an index (we have to follow the pointer to the actual string stored in the hash map). --- src/engine/Bind.cpp | 29 +++++--- src/engine/CMakeLists.txt | 1 + src/engine/GroupBy.cpp | 10 +-- src/engine/Join.cpp | 17 +---- src/engine/LocalVocab.cpp | 51 ++++++++++++++ src/engine/LocalVocab.h | 69 +++++++++++++++++++ src/engine/QueryExecutionTree.cpp | 11 +-- src/engine/ResultTable.cpp | 41 +---------- src/engine/ResultTable.h | 61 +--------------- src/engine/Sort.cpp | 1 - .../sparqlExpressions/SparqlExpressionTypes.h | 11 +-- .../SparqlExpressionValueGetters.cpp | 8 +-- src/parser/CMakeLists.txt | 2 +- src/parser/data/Variable.cpp | 2 +- test/CMakeLists.txt | 10 +-- test/GroupByTest.cpp | 2 - test/LocalVocabTest.cpp | 10 +-- 17 files changed, 167 insertions(+), 169 deletions(-) create mode 100644 src/engine/LocalVocab.cpp create mode 100644 src/engine/LocalVocab.h diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 10f878bd4d..84f828d527 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -160,20 +160,31 @@ void Bind::computeExpressionBind( *resultType = sparqlExpression::detail::expressionResultTypeToQleverResultType(); - size_t i = 0; - for (auto&& resultValue : resultGenerator) { - output(i, inCols) = - sparqlExpression::detail::constantExpressionResultToId( - resultValue, *(outputResultTable->_localVocab), - isConstant && i > 0); - i++; + if (isConstant) { + auto it = resultGenerator.begin(); + if (it != resultGenerator.end()) { + Id constantId = + sparqlExpression::detail::constantExpressionResultToId( + *it, *(outputResultTable->_localVocab)); + for (size_t i = 0; i < inSize; ++i) { + output(i, inCols) = constantId; + } + } + } else { + size_t i = 0; + for (auto&& resultValue : resultGenerator) { + output(i, inCols) = + sparqlExpression::detail::constantExpressionResultToId( + resultValue, *(outputResultTable->_localVocab)); + i++; + } } } }; - outputResultTable->_localVocab->startConstructionPhase(); + // outputResultTable->_localVocab->startConstructionPhase(); std::visit(visitor, std::move(expressionResult)); - outputResultTable->_localVocab->endConstructionPhase(); + // outputResultTable->_localVocab->endConstructionPhase(); outputResultTable->_idTable = output.moveToDynamic(); } diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 5e2804f190..10cad9c16f 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -10,6 +10,7 @@ add_library(engine ../util/Socket.h Comparators.h ResultTable.h ResultTable.cpp + LocalVocab.h LocalVocab.cpp QueryExecutionContext.h IndexScan.h IndexScan.cpp Join.h Join.cpp diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 7d43de8e8c..f1adfacb19 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -171,7 +171,7 @@ void GroupBy::processGroup( *resultType = sparqlExpression::detail::expressionResultTypeToQleverResultType(); resultEntry = sparqlExpression::detail::constantExpressionResultToId( - singleResult, *(outTable->_localVocab), false); + singleResult, *(outTable->_localVocab)); } else { // This should never happen since aggregates always return constants. AD_FAIL() @@ -235,9 +235,9 @@ void GroupBy::doGroupBy(const IdTable& dynInput, if (groupByCols.empty()) { // The entire input is a single group - outTable->_localVocab->startConstructionPhase(); + // outTable->_localVocab->startConstructionPhase(); processNextBlock(0, input.size()); - outTable->_localVocab->endConstructionPhase(); + // outTable->_localVocab->endConstructionPhase(); *dynResult = result.moveToDynamic(); return; } @@ -251,7 +251,7 @@ void GroupBy::doGroupBy(const IdTable& dynInput, size_t blockStart = 0; auto checkTimeoutAfterNCalls = checkTimeoutAfterNCallsFactory(32000); - outTable->_localVocab->startConstructionPhase(); + // outTable->_localVocab->startConstructionPhase(); for (size_t pos = 1; pos < input.size(); pos++) { checkTimeoutAfterNCalls(currentGroupBlock.size()); bool rowMatchesCurrentBlock = @@ -269,7 +269,7 @@ void GroupBy::doGroupBy(const IdTable& dynInput, } } processNextBlock(blockStart, input.size()); - outTable->_localVocab->endConstructionPhase(); + // outTable->_localVocab->endConstructionPhase(); *dynResult = result.moveToDynamic(); } diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index 7c133ae2c5..170e2785ac 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -145,21 +145,10 @@ void Join::computeResult(ResultTable* result) { &result->_idTable); // If only one of the two operands has a local vocab, pass it on. - bool leftLocalVocabEmpty = leftRes->_localVocab->empty(); - bool rightLocalVocabEmpty = rightRes->_localVocab->empty(); - if (!leftLocalVocabEmpty || !rightLocalVocabEmpty) { - if (!leftLocalVocabEmpty && rightLocalVocabEmpty) { - result->_localVocab = std::move(leftRes->_localVocab); - } else if (leftLocalVocabEmpty && !rightLocalVocabEmpty) { - result->_localVocab = std::move(rightRes->_localVocab); - } else { - throw std::runtime_error( - "JOIN of two results, where both have a non-empty vocabulary, is " - "currently not supported"); - } - } + result->_localVocab = LocalVocab::mergeLocalVocabsIfOneIsEmpty( + leftRes->_localVocab, rightRes->_localVocab); - LOG(DEBUG) << "Join result computation done." << endl; + LOG(DEBUG) << "Join result computation done" << endl; } // _____________________________________________________________________________ diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp new file mode 100644 index 0000000000..426db855da --- /dev/null +++ b/src/engine/LocalVocab.cpp @@ -0,0 +1,51 @@ +// Copyright 2022, University of Freiburg +// Chair of Algorithms and Data Structures +// Author: Hannah Bast + +#include "engine/LocalVocab.h" + +#include "absl/strings/str_cat.h" +#include "global/Id.h" +#include "global/ValueId.h" + +// _____________________________________________________________________________ +Id LocalVocab::getIdAndAddIfNotContained(const std::string& word) { + // The following code avoids computing the hash for `word` twice in case we + // see it for the first time (note that hashing a string is not cheap). The + // return value of the `insert` operation is a pair, where `result.first` is + // an iterator to the (already existing or newly inserted) key-value pair, and + // `result.second` is a `bool`, which is `true` if and only if the value was + // newly inserted. + auto [keyValuePair, isNewWord] = wordsToIdsMap_.insert({word, nextFreeId_}); + if (isNewWord) { + idsToWordsMap_.push_back(&(keyValuePair->first)); + nextFreeId_ = Id::makeFromLocalVocabIndex( + LocalVocabIndex::make(idsToWordsMap_.size())); + } + return keyValuePair->second; +} + +// _____________________________________________________________________________ +const std::string& LocalVocab::getWord(LocalVocabIndex localVocabIndex) const { + if (localVocabIndex.get() > idsToWordsMap_.size()) { + throw std::runtime_error(absl::StrCat( + "LocalVocab error: request for word with local vocab index ", + localVocabIndex.get(), ", but size of local vocab is only ", + idsToWordsMap_.size(), ", please contact the developers")); + } + return *(idsToWordsMap_[localVocabIndex.get()]); +} + +// _____________________________________________________________________________ +std::shared_ptr LocalVocab::mergeLocalVocabsIfOneIsEmpty( + std::shared_ptr localVocab1, + std::shared_ptr localVocab2) { + bool isLocalVocab1Empty = localVocab1->empty(); + bool isLocalVocab2Empty = localVocab2->empty(); + if (!isLocalVocab1Empty && !isLocalVocab2Empty) { + throw std::runtime_error( + "Merging of two non-empty local vocabularies is currently not " + "supported, please contact the developers"); + } + return !isLocalVocab1Empty ? localVocab1 : localVocab2; +} diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h new file mode 100644 index 0000000000..e7ec5b0a34 --- /dev/null +++ b/src/engine/LocalVocab.h @@ -0,0 +1,69 @@ +// Copyright 2022, University of Freiburg +// Chair of Algorithms and Data Structures +// Author: Hannah Bast + +#pragma once + +#include "absl/container/node_hash_map.h" + +// A class for maintaing a local vocabulary with contiguous (local) IDs. This is +// meant for words that are not part of the normal vocabulary (constructed from +// the input data at indexing time). +// +// TODO: This is a first version of this class with basic functionality. Note +// that the local vocabulary used to be a simple `std::vector` +// defined inside of the `ResultTable` class. You gotta start somewhere. +class LocalVocab { + public: + // Create a new, empty local vocabulary. + LocalVocab() = default; + + // Prevent accidental copying of a local vocabulary (it can be quite large), + // but moving it is OK. + // + // TODO: does the default move do the "right" thing, that is, move the hash + // map instead of copying it? + LocalVocab(const LocalVocab&) = delete; + LocalVocab(LocalVocab&&) = default; + + // Get ID of a word in the local vocabulary. If the word was already + // contained, return the already existing ID. If the word was not yet + // contained, add it, and return the new ID. + Id getIdAndAddIfNotContained(const std::string& word); + + // The number of words in the vocabulary. + size_t size() const { return idsToWordsMap_.size(); } + + // Return true if and only if the local vocabulary is empty. + bool empty() const { return idsToWordsMap_.empty(); } + + // Return a const reference to the word. + const std::string& getWord(LocalVocabIndex localVocabIndex) const; + + // Merge two local vocabularies if at least one of them is empty. If both are + // non-empty, throws an exception. + // + // TODO: Eventually, we want to have one local vocab for the whole query to + // which each operation writes (one after the other). Then we don't need a + // merge function anymore. + static std::shared_ptr mergeLocalVocabsIfOneIsEmpty( + std::shared_ptr localVocab1, + std::shared_ptr localVocab2); + + private: + // A map of the words in the local vocabulary to their local IDs. This is a + // node hash map because we need the addresses of the words (which are of type + // `std::string`) to remain stable over their lifetime in the hash map because + // we refer to them in `wordsToIdsMap_` below. + absl::node_hash_map wordsToIdsMap_; + + // A map of the local IDs to the words. Since the IDs are contiguous, we can + // use a `std::vector`. We store pointers to the actual words in + // `wordsToIdsMap_` to avoid storing every word twice. This saves space, but + // costs us an indirection when looking up a word by its ID. + std::vector idsToWordsMap_; + + // The next free local ID (will be incremented by one each time we add a new + // word). + Id nextFreeId_ = Id::makeFromLocalVocabIndex(LocalVocabIndex::make(0)); +}; diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index cbd9a85656..8a370ab127 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -333,12 +333,8 @@ QueryExecutionTree::idToStringAndType(Id id, return std::pair{std::move(entity.value()), nullptr}; } case Datatype::LocalVocabIndex: { - auto optionalString = - resultTable.indexToOptionalString(id.getLocalVocabIndex()); - if (!optionalString.has_value()) { - return std::nullopt; - } - return std::pair{optionalString.value(), nullptr}; + return std::pair{ + resultTable._localVocab->getWord(id.getLocalVocabIndex()), nullptr}; } case Datatype::TextRecordIndex: return std::pair{_qec->getIndex().getTextExcerpt(id.getTextRecordIndex()), @@ -453,8 +449,7 @@ ad_utility::streams::stream_generator QueryExecutionTree::generateResults( break; case Datatype::LocalVocabIndex: co_yield escapeFunction( - resultTable->indexToOptionalString(id.getLocalVocabIndex()) - .value_or("")); + resultTable->_localVocab->getWord(id.getLocalVocabIndex())); break; case Datatype::TextRecordIndex: co_yield escapeFunction( diff --git a/src/engine/ResultTable.cpp b/src/engine/ResultTable.cpp index 6d1f7c5610..3a76a30dbe 100644 --- a/src/engine/ResultTable.cpp +++ b/src/engine/ResultTable.cpp @@ -7,46 +7,7 @@ #include -#include "global/Id.h" -#include "global/ValueId.h" - -// _____________________________________________________________________________ -Id LocalVocab::getIdAndAddIfNotContained(const std::string& word) { - if (constructionHasFinished_) { - throw std::runtime_error( - "Invalid use of `LocalVocab`: You must not call " - "`getIdAndAddIfNotContained` after `endConstructionPhase` has been " - "called"); - } - // The following code avoids computing the hash for `word` twice in case we - // see it for the first time (note that hashing a string is not cheap). The - // return value of the `insert` operation is a pair, where `result.first` is - // an iterator to the (already existing or newly inserted) key-value pair, and - // `result.second` is a `bool`, which is `true` if and only if the value was - // newly inserted. - auto result = wordsToIdsMap_.insert( - {word, - Id::makeFromLocalVocabIndex(LocalVocabIndex::make(words_.size()))}); - if (result.second) { - words_.push_back(word); - } - return result.first->second; -} - -// _____________________________________________________________________________ -void LocalVocab::startConstructionPhase() { - if (!words_.empty()) { - throw std::runtime_error( - "Invalid use of `LocalVocab`: `startConstructionPhase` must currently " - "only be called when the vocabulary is still empty"); - } -} - -// _____________________________________________________________________________ -void LocalVocab::endConstructionPhase() { - wordsToIdsMap_.clear(); - constructionHasFinished_ = true; -} +#include "engine/LocalVocab.h" // _____________________________________________________________________________ ResultTable::ResultTable(ad_utility::AllocatorWithLimit allocator) diff --git a/src/engine/ResultTable.h b/src/engine/ResultTable.h index 62c0de8233..b7f38a3819 100644 --- a/src/engine/ResultTable.h +++ b/src/engine/ResultTable.h @@ -12,6 +12,7 @@ #include #include "engine/IdTable.h" +#include "engine/LocalVocab.h" #include "engine/ResultType.h" #include "global/Id.h" #include "global/ValueId.h" @@ -25,59 +26,6 @@ using std::mutex; using std::unique_lock; using std::vector; -// The local vocabulary for a particular result table. It maps the IDs that are -// not part of the normal vocabulary -// -// -// It contains a map from -// (local vocab) ids -class LocalVocab { - public: - // Create a new, empty local vocabulary. - LocalVocab() {} - - // Prevent accidental copying of a local vocabulary. - // TODO: Needed in SparqlExpressionTestHelpers.h:91. - // LocalVocab(const LocalVocab&) = delete; - - // Get ID of a word in the local vocabulary. If the word was already - // contained, return the already existing ID. If the word was not yet - // contained, add it, and return the new ID. - [[maybe_unused]] Id getIdAndAddIfNotContained(const std::string& word); - - // Start the construction of a local vocabulary. This is currently allowed - // only once, when the vocabulary is still empty. - void startConstructionPhase(); - - // Signal that the construction of the local vocabulary is done. This call - // will clear the `wordsToIdsMap_` (to save space) and afterwards, - // `getIdAndAddIfNotContained` can no longer be called. - void endConstructionPhase(); - - // The number of words in the vocabulary. - size_t size() const { return words_.size(); } - - // Return true if and only if the local vocabulary is empty. - bool empty() const { return words_.empty(); } - - // Return a const reference to the i-th word. - const std::string& operator[](size_t i) const { return words_[i]; } - - private: - // The words of the local vocabulary. The index of a word in the `std::vector` - // corresponds to its ID in the local vocabulary. - std::vector words_; - - // Remember which words are already in the vocabulary and with which ID. This - // map is only used during the construction of a local vocabulary and can (and - // should) be cleared when the construction is done (to save space). - ad_utility::HashMap wordsToIdsMap_; - - // Indicator whether the vocabulary is still under construction (only then can - // `getIdAndAddIfNotContained` be called) or done. - bool constructionHasFinished_ = false; -}; - class ResultTable { public: enum Status { IN_PROGRESS = 0, FINISHED = 1, ABORTED = 2 }; @@ -109,13 +57,6 @@ class ResultTable { virtual ~ResultTable(); - std::optional indexToOptionalString(LocalVocabIndex idx) const { - if (idx.get() < _localVocab->size()) { - return (*_localVocab)[idx.get()]; - } - return std::nullopt; - } - size_t size() const; size_t width() const { return _idTable.cols(); } diff --git a/src/engine/Sort.cpp b/src/engine/Sort.cpp index 017368b09f..42935a8cc9 100644 --- a/src/engine/Sort.cpp +++ b/src/engine/Sort.cpp @@ -77,7 +77,6 @@ void Sort::computeResult(ResultTable* result) { result->_resultTypes.insert(result->_resultTypes.end(), subRes->_resultTypes.begin(), subRes->_resultTypes.end()); - // TODO: Shouldn't we use std::move here? result->_localVocab = subRes->_localVocab; result->_idTable.insert(result->_idTable.end(), subRes->_idTable.begin(), subRes->_idTable.end()); diff --git a/src/engine/sparqlExpressions/SparqlExpressionTypes.h b/src/engine/sparqlExpressions/SparqlExpressionTypes.h index 236bc45d61..4b35a7443f 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionTypes.h +++ b/src/engine/sparqlExpressions/SparqlExpressionTypes.h @@ -238,19 +238,10 @@ constexpr static qlever::ResultType expressionResultTypeToQleverResultType() { /// Get Id of constant result of type T. template -Id constantExpressionResultToId(T&& result, LocalVocabT& localVocab, - [[maybe_unused]] bool isRepetitionOfConstant) { +Id constantExpressionResultToId(T&& result, LocalVocabT& localVocab) { static_assert(isConstantResult); if constexpr (ad_utility::isSimilar) { - // TODO: Should we make this more efficient by profiting from the - // knowledge in `isRepetitionOfConstant`? The previous code did this (out of - // neccessity because it just appended words to the local vocab vector). return localVocab.getIdAndAddIfNotContained(std::forward(result)); - // if (!isRepetitionOfConstant) { - // localVocab.addWordNoMatterWhat(std::forward(result)); - // } - // return Id::makeFromLocalVocabIndex( - // LocalVocabIndex::make(localVocab.size() - 1)); } else if constexpr (ad_utility::isSimilar) { return Id::makeFromDouble(result); } else if constexpr (ad_utility::isSimilar) { diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index d407b1a6de..f9048f736e 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -50,9 +50,7 @@ bool EffectiveBooleanValueGetter::operator()(ValueId id, .empty(); } case Datatype::LocalVocabIndex: { - auto index = id.getLocalVocabIndex(); - AD_CHECK(index.get() < context->_localVocab.size()); - return !(context->_localVocab[index.get()].empty()); + return !(context->_localVocab.getWord(id.getLocalVocabIndex()).empty()); } case Datatype::TextRecordIndex: return true; @@ -75,9 +73,7 @@ string StringValueGetter::operator()(Id id, EvaluationContext* context) const { .indexToOptionalString(id.getVocabIndex()) .value_or(""); case Datatype::LocalVocabIndex: { - auto index = id.getLocalVocabIndex().get(); - AD_CHECK(index < context->_localVocab.size()); - return context->_localVocab[index]; + return context->_localVocab.getWord(id.getLocalVocabIndex()); } case Datatype::TextRecordIndex: return context->_qec.getIndex().getTextExcerpt(id.getTextRecordIndex()); diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index 9aaf0bac0a..3725ba202e 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -24,5 +24,5 @@ add_library(parser SelectClause.h GraphPatternOperation.cpp GraphPatternOperation.h # The `Variable.cpp` from the subdirectory is linked here because otherwise we get linking errors. GraphPattern.cpp GraphPattern.h ConstructClause.h data/Variable.cpp) -target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2 absl::flat_hash_map util) +target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2 absl::flat_hash_map util engine) diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index 7e4678e5da..8e86b83f1b 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -45,7 +45,7 @@ Variable::Variable(std::string name) : _name{std::move(name)} { case Datatype::VocabIndex: return qecIndex.idToOptionalString(id).value_or(""); case Datatype::LocalVocabIndex: - return res.indexToOptionalString(id.getLocalVocabIndex()).value_or(""); + return res._localVocab->getWord(id.getLocalVocabIndex()); case Datatype::TextRecordIndex: return qecIndex.getTextExcerpt(id.getTextRecordIndex()); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 55448c9817..5c068e97c4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -61,7 +61,7 @@ endfunction() addLinkAndDiscoverTest(ValueIdComparatorsTest) -addLinkAndDiscoverTest(SparqlParserTest parser sparqlExpressions ${ICU_LIBRARIES}) +addLinkAndDiscoverTest(SparqlParserTest parser engine sparqlExpressions ${ICU_LIBRARIES}) addLinkAndDiscoverTest(StringUtilsTest ${ICU_LIBRARIES}) @@ -139,7 +139,7 @@ addLinkAndDiscoverTest(MinusTest engine) addAndLinkTest(SortPerformanceEstimatorTest SortPerformanceEstimator) #addLinkAndDiscoverTest(SortPerformanceEstimatorTest SortPerformanceEstimator) -addLinkAndDiscoverTest(SparqlAntlrParserTest parser sparqlExpressions) +addLinkAndDiscoverTest(SparqlAntlrParserTest parser sparqlExpressions engine) # The SerializerTest uses temporary files. The tests fail when multiple test # cases are run in parallel. This should be fixed by using distinct filenames @@ -157,7 +157,7 @@ addLinkAndDiscoverTest(SetOfIntervalsTest sparqlExpressions) addLinkAndDiscoverTest(TypeTraitsTest) -addLinkAndDiscoverTest(SparqlExpressionTest sparqlExpressions index) +addLinkAndDiscoverTest(SparqlExpressionTest sparqlExpressions index engine) addLinkAndDiscoverTest(StreamableBodyTest) @@ -226,11 +226,11 @@ addLinkAndDiscoverTest(ValueIdTest absl::flat_hash_set) addLinkAndDiscoverTest(LambdaHelpersTest) -addLinkAndDiscoverTest(ParseExceptionTest parser) +addLinkAndDiscoverTest(ParseExceptionTest parser engine) addLinkAndDiscoverTest(TransparentFunctorsTest) -addLinkAndDiscoverTest(SelectClauseTest parser) +addLinkAndDiscoverTest(SelectClauseTest parser engine) addLinkAndDiscoverTestSerial(RelationalExpressionTest sparqlExpressions index engine) diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index 4b31cb4fcc..6befc14b03 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -104,11 +104,9 @@ TEST_F(GroupByTest, doGroupBy) { // create an input result table with a local vocabulary ResultTable inTable{allocator()}; - inTable._localVocab->startConstructionPhase(); inTable._localVocab->getIdAndAddIfNotContained(""); inTable._localVocab->getIdAndAddIfNotContained(""); inTable._localVocab->getIdAndAddIfNotContained(""); - inTable._localVocab->endConstructionPhase(); IdTable inputData(6, allocator()); // The input data types are diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index 54cdd9165a..c4c2dd07e6 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -13,16 +13,12 @@ TEST(LocalVocab, construction) { }; LocalVocab localVocab; ASSERT_TRUE(localVocab.empty()); - localVocab.startConstructionPhase(); checkId(localVocab.getIdAndAddIfNotContained("bla"), 0); checkId(localVocab.getIdAndAddIfNotContained("blu"), 1); checkId(localVocab.getIdAndAddIfNotContained("bli"), 2); checkId(localVocab.getIdAndAddIfNotContained("bla"), 0); - localVocab.endConstructionPhase(); ASSERT_EQ(localVocab.size(), 3); - ASSERT_EQ(localVocab[0], "bla"); - ASSERT_EQ(localVocab[1], "blu"); - ASSERT_EQ(localVocab[2], "bli"); - ASSERT_THROW(localVocab.getIdAndAddIfNotContained("one more"), - std::runtime_error); + ASSERT_EQ(localVocab.getWord(LocalVocabIndex::make(0)), "bla"); + ASSERT_EQ(localVocab.getWord(LocalVocabIndex::make(1)), "blu"); + ASSERT_EQ(localVocab.getWord(LocalVocabIndex::make(2)), "bli"); }