-
Notifications
You must be signed in to change notification settings - Fork 138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Random sample in local mode #705
Changes from all commits
f1cecad
e34ff35
c70e80e
11b9c59
c474cf4
911ba0f
f735f9d
41c2fd5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -668,6 +668,19 @@ def dense_query_lookup_from( | |
def no_query_no_prefetch(cls, client: QdrantBase) -> models.QueryResponse: | ||
return client.query_points(collection_name=COLLECTION_NAME, limit=10) | ||
|
||
@classmethod | ||
def random_query(cls, client: QdrantBase) -> models.QueryResponse: | ||
result = client.query_points( | ||
collection_name=COLLECTION_NAME, | ||
query=models.SampleQuery(sample=models.Sample.RANDOM), | ||
limit=100, | ||
) | ||
|
||
# sort to be able to compare | ||
result.points.sort(key=lambda point: point.id) | ||
|
||
return result | ||
|
||
|
||
def group_by_keys(): | ||
return ["maybe", "rand_digit", "two_words", "city.name", "maybe_null", "id"] | ||
|
@@ -1341,3 +1354,18 @@ def test_query_group(prefer_grpc): | |
except AssertionError as e: | ||
print(f"\nFailed with filter {query_filter}") | ||
raise e | ||
|
||
|
||
@pytest.mark.parametrize("prefer_grpc", [False, True]) | ||
def test_random_sampling(prefer_grpc): | ||
fixture_points = generate_fixtures(100) | ||
|
||
searcher = TestSimpleSearcher() | ||
|
||
local_client = init_local() | ||
init_client(local_client, fixture_points) | ||
|
||
remote_client = init_remote(prefer_grpc=prefer_grpc) | ||
init_client(remote_client, fixture_points) | ||
|
||
compare_client_results(local_client, remote_client, searcher.random_query) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are the random implementations between local and server equivalent? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this test we basically sample all the points available in the collection There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We return all points postprocessed to be sorted by ID. Just to make sure we can return all of them |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
np.random.permutation(self.inv_ids)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but we need the internal ids to filter against the mask, not the external ones