Skip to content

Commit

Permalink
Merge pull request #1 from mmga-lab/add_qps_measure
Browse files Browse the repository at this point in the history
Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
  • Loading branch information
zhuwenxing authored Dec 11, 2024
2 parents 95657dc + dc4153b commit 6560d2f
Show file tree
Hide file tree
Showing 13 changed files with 1,472 additions and 129 deletions.
114 changes: 26 additions & 88 deletions example/milvus_beir_example.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from beir import util
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
from pymilvus import MilvusClient
from ranx import Qrels, Run, compare

from milvus_beir.retrieval.search.dense.dense_search import MilvusDenseSearch
from milvus_beir.retrieval.search.hybrid.bm25_hybrid_search import MilvusBM25DenseHybridSearch
Expand All @@ -11,95 +9,35 @@
from milvus_beir.retrieval.search.lexical.multi_match_search import MilvusMultiMatchSearch
from milvus_beir.retrieval.search.sparse.sparse_search import MilvusSparseSearch

dataset = "nfcorpus"
dataset = "scifact"
url = f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip"
data_path = util.download_and_unzip(url, "/tmp/datasets")
data_path = util.download_and_unzip(url, "datasets")
corpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split="test")


print("Corpus:", len(corpus))
print("Queries:", len(queries))

milvus_client = MilvusClient(uri="http://10.104.20.192:19530")
model = MilvusDenseSearch(milvus_client, collection_name="milvus_beir_demo", nq=100, nb=1000)
retriever = EvaluateRetrieval(model)
dense_results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, dense_results, retriever.k_values)
print("NDCG:", ndcg)
print("MAP:", _map)
print("Recall:", recall)
print("Precision:", precision)

model = MilvusSparseSearch(milvus_client, collection_name="milvus_beir_demo", nq=100, nb=1000)
retriever = EvaluateRetrieval(model)
sparse_results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, sparse_results, retriever.k_values)
print("NDCG:", ndcg)
print("MAP:", _map)
print("Recall:", recall)
print("Precision:", precision)

model = MilvusBM25DenseHybridSearch(
milvus_client, collection_name="milvus_beir_demo", nq=100, nb=1000
)
retriever = EvaluateRetrieval(model)
bm25_dense_hybrid_results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(
qrels, bm25_dense_hybrid_results, retriever.k_values
)
print("NDCG:", ndcg)
print("MAP:", _map)
print("Recall:", recall)
print("Precision:", precision)

model = MilvusSparseDenseHybridSearch(
milvus_client, collection_name="milvus_beir_demo", nq=100, nb=1000
)
retriever = EvaluateRetrieval(model)
sparse_dense_results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, sparse_dense_results, retriever.k_values)
print("NDCG:", ndcg)
print("MAP:", _map)
print("Recall:", recall)
print("Precision:", precision)

model = MilvusMultiMatchSearch(milvus_client, collection_name="milvus_beir_demo", nq=100, nb=1000)
retriever = EvaluateRetrieval(model)
multi_match_results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, multi_match_results, retriever.k_values)
print("NDCG:", ndcg)
print("MAP:", _map)
print("Recall:", recall)
print("Precision:", precision)

model = MilvusBM25Search(milvus_client, collection_name="milvus_beir_demo", nq=100, nb=1000)
retriever = EvaluateRetrieval(model)
bm25_results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, bm25_results, retriever.k_values)
print("NDCG:", ndcg)
print("MAP:", _map)
print("Recall:", recall)
print("Precision:", precision)


qrels = Qrels(qrels)
run_dense = Run(dense_results, name="dense")
run_sparse = Run(sparse_results, name="sparse")
run_bm25 = Run(bm25_results, name="bm25")
run_multi_match = Run(multi_match_results, name="multi_match")
run_bm25_dense_hybrid = Run(bm25_dense_hybrid_results, name="bm25_dense_hybrid")
run_sparse_dense = Run(sparse_dense_results, name="sparse_dense")

report = compare(
qrels=qrels,
runs=[
run_dense,
run_sparse,
run_bm25,
run_multi_match,
run_bm25_dense_hybrid,
run_sparse_dense,
],
metrics=["ndcg@10", "map@10", "recall@10", "precision@10"],
)
print(report)
uri = "http://10.104.26.252:19530"
token = None

models = [
MilvusDenseSearch(uri, token, collection_name="milvus_beir_demo", nq=100, nb=1000),
MilvusSparseSearch(uri, token, collection_name="milvus_beir_demo", nq=100, nb=1000),
MilvusBM25DenseHybridSearch(uri, token, collection_name="milvus_beir_demo", nq=100, nb=1000),
MilvusSparseDenseHybridSearch(uri, token, collection_name="milvus_beir_demo", nq=100, nb=1000),
MilvusMultiMatchSearch(uri, token, collection_name="milvus_beir_demo", nq=100, nb=1000),
MilvusBM25Search(uri, token, collection_name="milvus_beir_demo", nq=100, nb=1000),
]

for model in models:
retriever = EvaluateRetrieval(model)
results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)
print("NDCG:", ndcg)
print("MAP:", _map)
print("Recall:", recall)
print("Precision:", precision)
qps = model.measure_search_qps(
corpus, queries, top_k=1000, concurrency_levels=[1, 2], test_duration=60
)
print("QPS:", qps)
Loading

0 comments on commit 6560d2f

Please sign in to comment.