-
Notifications
You must be signed in to change notification settings - Fork 198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Boost tensor fields #300
Merged
Merged
Boost tensor fields #300
Changes from 4 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
a8f00b6
added field score boosters
aryanagarwal9 ec208b2
add tests
aryanagarwal9 4a5a5ac
added error handling
aryanagarwal9 7e0d00c
added tests
aryanagarwal9 fbca383
Merge branch 'mainline' into re-weight-search
pandu-k b14b046
Added validation to boost
pandu-k 232c869
added exclude vectors to search
pandu-k 71cc8a0
Merge branch 'mainline' into re-weight-search
pandu-k c5fcd08
add a test to test boost equation.
wanliAlex c26c7ed
add a test to test boost equation.
wanliAlex 500b718
add a test to test boost equation.
wanliAlex e04388e
add test to different scores
wanliAlex 6c2645e
add test to different scores
wanliAlex b1a23b5
delete print
wanliAlex 1276144
added extra image search test
pandu-k File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from marqo.errors import IndexNotFoundError, InvalidArgError | ||
from marqo.tensor_search import tensor_search | ||
|
||
from tests.marqo_test import MarqoTestCase | ||
|
||
|
||
class TestBoostFieldScores(MarqoTestCase): | ||
|
||
def setUp(self): | ||
self.index_name_1 = "my-test-index-1" | ||
try: | ||
tensor_search.delete_index(config=self.config, index_name=self.index_name_1) | ||
except IndexNotFoundError as e: | ||
pass | ||
finally: | ||
tensor_search.create_vector_index( | ||
index_name=self.index_name_1, config=self.config) | ||
|
||
tensor_search.add_documents(config=self.config, index_name=self.index_name_1, docs=[ | ||
{ | ||
"Title": "The Travels of Marco Polo", | ||
"Description": "A 13th-century travelogue describing Polo's travels", | ||
"_id": "article_590" | ||
} | ||
, | ||
{ | ||
"Title": "Extravehicular Mobility Unit (EMU)", | ||
"Description": "The EMU is a spacesuit that provides environmental protection, " | ||
"mobility, life support, and communications for astronauts", | ||
"_id": "article_591" | ||
} | ||
], auto_refresh=True) | ||
|
||
def tearDown(self) -> None: | ||
pass | ||
|
||
def test_score_is_boosted(self): | ||
q = "What is the best outfit to wear on the moon?" | ||
|
||
res = tensor_search.search( | ||
config=self.config, index_name=self.index_name_1, text=q, | ||
) | ||
res_boosted = tensor_search.search( | ||
config=self.config, index_name=self.index_name_1, text=q, boost={'Title': (5, 1)} | ||
) | ||
|
||
score = res['hits'][0]['_score'] | ||
score_boosted = res_boosted['hits'][0]['_score'] | ||
|
||
self.assertGreater(score_boosted, score) | ||
|
||
def test_boost_empty_dict(self): | ||
"""Passing an empty dict in the boost argument should not affect the score. | ||
""" | ||
q = "What is the best outfit to wear on the moon?" | ||
|
||
res = tensor_search.search( | ||
config=self.config, index_name=self.index_name_1, text=q | ||
) | ||
res_boosted = tensor_search.search( | ||
config=self.config, index_name=self.index_name_1, text=q, boost={} | ||
) | ||
|
||
score = res['hits'][0]['_score'] | ||
score_boosted = res_boosted['hits'][0]['_score'] | ||
|
||
self.assertEqual(score_boosted, score) | ||
|
||
def test_different_attributes_searched_and_boosted(self): | ||
"""An error should be raised if the user tries to | ||
boost a field which is not being searched. | ||
""" | ||
q = "What is the best outfit to wear on the moon?" | ||
|
||
with self.assertRaises(InvalidArgError) as ctx: | ||
res_boosted = tensor_search.search( | ||
config=self.config, index_name=self.index_name_1, text=q, | ||
searchable_attributes=['Description'], boost={'Title': (0.5, 1)} | ||
) | ||
|
||
self.assertTrue('Title' in str(ctx.exception)) | ||
|
||
def test_boost_invalid_fields(self): | ||
"""An error should be raised if the user tries to boost a non-existent field. | ||
The error message should tell the user which field(s) were unable to be boosted. | ||
""" | ||
q = "What is the best outfit to wear on the moon?" | ||
|
||
with self.assertRaises(InvalidArgError) as ctx: | ||
res_boosted = tensor_search.search( | ||
config=self.config, index_name=self.index_name_1, text=q, | ||
boost={'Title': (0.2, 1), 'invalid_field_name': (0.5, 1)} | ||
) | ||
|
||
self.assertTrue('invalid_field_name' in str(ctx.exception)) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we test with more documents (10 or so) with varying fields, with varying other params (like pagination)?