Skip to content

Commit

Permalink
Set word2vec getSynonyms method synchronized.
Browse files Browse the repository at this point in the history
This is a quick mitigation to avoid unexpected behaviour if multiple queries
are executed concurrently.

This commit wants to address some comments received in the PR apache#12169
  • Loading branch information
dantuzi committed Apr 27, 2023
1 parent 1fa2be9 commit 87c0ea3
Showing 1 changed file with 12 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class Word2VecSynonymProvider {
private static final VectorSimilarityFunction SIMILARITY_FUNCTION =
VectorSimilarityFunction.DOT_PRODUCT;
private static final VectorEncoding VECTOR_ENCODING = VectorEncoding.FLOAT32;
private static final int NO_LIMIT_ON_VISITED_NODES = Integer.MAX_VALUE;
private final Word2VecModel word2VecModel;
private final HnswGraph hnswGraph;

Expand All @@ -64,7 +65,16 @@ public Word2VecSynonymProvider(Word2VecModel model) throws IOException {
this.hnswGraph = builder.build(word2VecModel.copy());
}

public List<TermAndBoost> getSynonyms(
/**
* Returns the list of synonyms of a provided term.
* This method is synchronized because it uses the {@link org.apache.lucene.util.hnsw.OnHeapHnswGraph}
* that is not thread-safe.
*
* @param term term to search to find synonyms
* @param maxSynonymsPerTerm limit of synonyms returned
* @param minAcceptedSimilarity lower similarity threshold to consider another term as synonym
*/
public synchronized List<TermAndBoost> getSynonyms(
BytesRef term, int maxSynonymsPerTerm, float minAcceptedSimilarity) throws IOException {

if (term == null) {
Expand All @@ -85,7 +95,7 @@ public List<TermAndBoost> getSynonyms(
SIMILARITY_FUNCTION,
hnswGraph,
null,
word2VecModel.size());
NO_LIMIT_ON_VISITED_NODES);

int size = synonyms.size();
for (int i = 0; i < size; i++) {
Expand Down

0 comments on commit 87c0ea3

Please sign in to comment.