Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove deprecated queries option in query() #272

Merged
merged 3 commits into from
Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 7 additions & 34 deletions pinecone/data/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
FetchResponse,
QueryRequest,
QueryResponse,
QueryVector,
RpcStatus,
ScoredVector,
SingleQueryResults,
Expand All @@ -33,7 +32,6 @@
"FetchResponse",
"QueryRequest",
"QueryResponse",
"QueryVector",
"RpcStatus",
"ScoredVector",
"SingleQueryResults",
Expand All @@ -60,12 +58,8 @@
"async_req",
)

def parse_query_response(response: QueryResponse, unary_query: bool):
if unary_query:
response._data_store.pop("results", None)
else:
response._data_store.pop("matches", None)
response._data_store.pop("namespace", None)
def parse_query_response(response: QueryResponse):
response._data_store.pop("results", None)
return response

class Index():
Expand Down Expand Up @@ -324,7 +318,6 @@ def query(
top_k: int,
vector: Optional[List[float]] = None,
id: Optional[str] = None,
queries: Optional[Union[List[QueryVector], List[Tuple]]] = None,
namespace: Optional[str] = None,
filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
include_values: Optional[bool] = None,
Expand All @@ -351,13 +344,10 @@ def query(
Args:
vector (List[float]): The query vector. This should be the same length as the dimension of the index
being queried. Each `query()` request can contain only one of the parameters
`queries`, `id` or `vector`.. [optional]
`id` or `vector`.. [optional]
id (str): The unique ID of the vector to be used as a query vector.
Each `query()` request can contain only one of the parameters
`queries`, `vector`, or `id`.. [optional]
queries ([QueryVector]): DEPRECATED. The query vectors.
Each `query()` request can contain only one of the parameters
`queries`, `vector`, or `id`.. [optional]
`vector` or `id`. [optional]
top_k (int): The number of results to return for each query. Must be an integer greater than 1.
namespace (str): The namespace to fetch vectors from.
If not specified, the default namespace is used. [optional]
Expand All @@ -371,35 +361,19 @@ def query(
sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector.
Expected to be either a SparseValues object or a dict of the form:
{'indices': List[int], 'values': List[float]}, where the lists each have the same length.
Keyword Args:
Supports OpenAPI client keyword arguments. See pinecone.core.client.models.QueryRequest for more details.
Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects,
and namespace name.
"""

def _query_transform(item):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗑️

if isinstance(item, QueryVector):
return item
if isinstance(item, tuple):
values, filter = fix_tuple_length(item, 2)
if filter is None:
return QueryVector(values=values, _check_type=_check_type)
else:
return QueryVector(values=values, filter=filter, _check_type=_check_type)
if isinstance(item, Iterable):
return QueryVector(values=item, _check_type=_check_type)
raise ValueError(f"Invalid query vector value passed: cannot interpret type {type(item)}")

_check_type = kwargs.pop("_check_type", False)
queries = list(map(_query_transform, queries)) if queries is not None else None

sparse_vector = self._parse_sparse_values_arg(sparse_vector)
args_dict = self._parse_non_empty_args(
[
("vector", vector),
("id", id),
("queries", queries),
("queries", None),
("top_k", top_k),
("namespace", namespace),
("filter", filter),
Expand All @@ -416,8 +390,7 @@ def _query_transform(item):
),
**{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS},
)
unary_query = True if vector is not None or id else False
return parse_query_response(response, unary_query)
return parse_query_response(response)

@validate_and_convert_errors
def update(
Expand Down
2 changes: 0 additions & 2 deletions pinecone/grpc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@

from pinecone.core.grpc.protos.vector_service_pb2 import (
Vector as GRPCVector,
QueryVector as GRPCQueryVector,
SparseValues as GRPCSparseValues,
Vector,
QueryVector,
SparseValues
)
25 changes: 3 additions & 22 deletions pinecone/grpc/index_grpc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import logging
import numbers
from typing import Optional, Dict, Iterable, Union, List, Tuple, Any, TypedDict, cast
from collections.abc import Mapping
from typing import Optional, Dict, Union, List, Tuple, Any, TypedDict, cast

from google.protobuf import json_format

Expand Down Expand Up @@ -31,7 +29,6 @@
)
from pinecone import Vector as NonGRPCVector
from pinecone.core.grpc.protos.vector_service_pb2_grpc import VectorServiceStub
from pinecone.utils import fix_tuple_length
from .base import GRPCIndexBase
from .future import PineconeGrpcFuture

Expand Down Expand Up @@ -293,7 +290,6 @@ def query(
self,
vector: Optional[List[float]] = None,
id: Optional[str] = None,
queries: Optional[Union[List[GRPCQueryVector], List[Tuple]]] = None,
namespace: Optional[str] = None,
top_k: Optional[int] = None,
filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
Expand Down Expand Up @@ -323,9 +319,6 @@ def query(
id (str): The unique ID of the vector to be used as a query vector.
Each `query()` request can contain only one of the parameters
`queries`, `vector`, or `id`.. [optional]
queries ([GRPCQueryVector]): DEPRECATED. The query vectors.
Each `query()` request can contain only one of the parameters
`queries`, `vector`, or `id`.. [optional]
top_k (int): The number of results to return for each query. Must be an integer greater than 1.
namespace (str): The namespace to fetch vectors from.
If not specified, the default namespace is used. [optional]
Expand All @@ -344,18 +337,7 @@ def query(
and namespace name.
"""

def _query_transform(item):
if isinstance(item, GRPCQueryVector):
return item
if isinstance(item, tuple):
values, filter = fix_tuple_length(item, 2)
filter = dict_to_proto_struct(filter)
return GRPCQueryVector(values=values, filter=filter)
if isinstance(item, Iterable):
return GRPCQueryVector(values=item)
raise ValueError(f"Invalid query vector value passed: cannot interpret type {type(item)}")

queries = list(map(_query_transform, queries)) if queries is not None else None
queries = None

if filter is not None:
filter_struct = dict_to_proto_struct(filter)
Expand All @@ -382,8 +364,7 @@ def _query_transform(item):
timeout = kwargs.pop("timeout", None)
response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout)
json_response = json_format.MessageToDict(response)
unary_query = True if vector is not None or id else False
return parse_query_response(json_response, unary_query, _check_type=False)
return parse_query_response(json_response, _check_type=False)

def update(
self,
Expand Down
66 changes: 14 additions & 52 deletions pinecone/grpc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,21 @@

from google.protobuf.struct_pb2 import Struct

def _generate_request_id() -> str:
return str(uuid.uuid4())

from pinecone.core.client.models import (
Vector as _Vector,
Usage,
ScoredVector,
SparseValues,
FetchResponse,
SingleQueryResults,
QueryResponse,
DescribeIndexStatsResponse,
NamespaceSummary,
)

from typing import NamedTuple, Optional

class QueryResponseKwargs(NamedTuple):
check_type: bool
namespace: Optional[str]
matches: Optional[list]
results: Optional[list]
usage: Usage

def _generate_request_id() -> str:
return str(uuid.uuid4())
from typing import Optional

def dict_to_proto_struct(d: Optional[dict]) -> "Struct":
if not d:
Expand Down Expand Up @@ -54,13 +46,11 @@ def parse_fetch_response(response: dict):
metadata=vec.get("metadata", None),
_check_type=False,
)

usage = parse_usage(response)

return FetchResponse(
vectors=vd,
namespace=namespace,
usage=usage,
usage=parse_usage(response),
_check_type=False
)

Expand All @@ -69,26 +59,8 @@ def parse_usage(response):
return Usage(read_units=int(u.get("readUnits", 0)))


def parse_query_response(response: dict, unary_query: bool, _check_type: bool = False):
res = []

# TODO: consider deleting this deprecated case
for match in response.get("results", []):
namespace = match.get("namespace", "")
m = []
if "matches" in match:
for item in match["matches"]:
sc = ScoredVector(
id=item["id"],
score=item.get("score", 0.0),
values=item.get("values", []),
sparse_values=parse_sparse_values(item.get("sparseValues")),
metadata=item.get("metadata", None),
)
m.append(sc)
res.append(SingleQueryResults(matches=m, namespace=namespace))

m = []
def parse_query_response(response: dict, _check_type: bool = False):
matches = []
for item in response.get("matches", []):
sc = ScoredVector(
id=item["id"],
Expand All @@ -98,24 +70,14 @@ def parse_query_response(response: dict, unary_query: bool, _check_type: bool =
metadata=item.get("metadata", None),
_check_type=_check_type,
)
m.append(sc)

if unary_query:
namespace = response.get("namespace", "")
matches = m
results = []
else:
namespace = ""
matches = []
results = res
matches.append(sc)

usage = parse_usage(response)


kw = QueryResponseKwargs(_check_type, namespace, matches, results, usage)
kw_dict = kw._asdict()
kw_dict["_check_type"] = kw.check_type
return QueryResponse(**kw._asdict())
return QueryResponse(
namespace=response.get("namespace", ""),
matches=matches,
usage = parse_usage(response),
_check_type=_check_type
)


def parse_stats_response(response: dict):
Expand Down
1 change: 0 additions & 1 deletion pinecone/grpc/vector_factory_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from pinecone.core.grpc.protos.vector_service_pb2 import (
Vector as GRPCVector,
QueryVector as GRPCQueryVector,
SparseValues as GRPCSparseValues,
)
from pinecone import (
Expand Down
22 changes: 0 additions & 22 deletions tests/unit/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,28 +363,6 @@ def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker):
pinecone.QueryRequest(top_k=10, vector=self.vals1, filter=self.filter1, namespace="ns")
)

def test_query_byTuplesNoFilter_queryVectorsNoFilter(self, mocker):
mocker.patch.object(self.index._vector_api, "query", autospec=True)
self.index.query(top_k=10, queries=[(self.vals1,), (self.vals2,)])
self.index._vector_api.query.assert_called_once_with(
pinecone.QueryRequest(
top_k=10, queries=[pinecone.QueryVector(values=self.vals1), pinecone.QueryVector(values=self.vals2)]
)
)

def test_query_byTuplesWithFilter_queryVectorsWithFilter(self, mocker):
mocker.patch.object(self.index._vector_api, "query", autospec=True)
self.index.query(top_k=10, queries=[(self.vals1, self.filter1), (self.vals2, self.filter2)])
self.index._vector_api.query.assert_called_once_with(
pinecone.QueryRequest(
top_k=10,
queries=[
pinecone.QueryVector(values=self.vals1, filter=self.filter1),
pinecone.QueryVector(values=self.vals2, filter=self.filter2),
],
)
)

def test_query_byVecId_queryByVecId(self, mocker):
mocker.patch.object(self.index._vector_api, "query", autospec=True)
self.index.query(top_k=10, id="vec1", include_metadata=True, include_values=False)
Expand Down
28 changes: 0 additions & 28 deletions tests/unit_grpc/test_grpc_index_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
from pinecone.grpc import GRPCIndex
from pinecone.core.grpc.protos.vector_service_pb2 import (
QueryRequest,
QueryVector,
)
from pinecone.grpc.utils import dict_to_proto_struct


class TestGrpcIndexQuery:
def setup_method(self):
self.config = Config(api_key="test-api-key", host="foo")
Expand All @@ -36,32 +34,6 @@ def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker, vals1, fil
timeout=10,
)

def test_query_byTuplesNoFilter_queryVectorsNoFilter(self, mocker, vals1, vals2):
mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True)
self.index.query(top_k=10, queries=[(vals1,), (vals2,)])
self.index._wrap_grpc_call.assert_called_once_with(
self.index.stub.Query,
QueryRequest(
queries=[QueryVector(values=vals1, filter={}), QueryVector(values=vals2, filter={})], top_k=10
),
timeout=None,
)

def test_query_byTuplesWithFilter_queryVectorsWithFilter(self, mocker, vals1, vals2, filter1, filter2):
mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True)
self.index.query(top_k=10, queries=[(vals1, filter1), (vals2, filter2)])
self.index._wrap_grpc_call.assert_called_once_with(
self.index.stub.Query,
QueryRequest(
queries=[
QueryVector(values=vals1, filter=dict_to_proto_struct(filter1)),
QueryVector(values=vals2, filter=dict_to_proto_struct(filter2)),
],
top_k=10,
),
timeout=None,
)

def test_query_byVecId_queryByVecId(self, mocker):
mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True)
self.index.query(top_k=10, id="vec1", include_metadata=True, include_values=False)
Expand Down
Loading