-
Notifications
You must be signed in to change notification settings - Fork 767
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #538 from cmuparlay/main
Add ParlayANN to ann-benchmarks
- Loading branch information
Showing
4 changed files
with
148 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,6 +54,7 @@ jobs: | |
- onng_ngt | ||
- opensearchknn | ||
- panng_ngt | ||
- parlayann | ||
- pg_embedding | ||
- pgvector | ||
- pgvecto_rs | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
FROM ann-benchmarks | ||
|
||
RUN apt update | ||
RUN apt install -y software-properties-common | ||
RUN add-apt-repository -y ppa:git-core/ppa | ||
RUN apt update | ||
RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 | ||
|
||
#RUN apt-get update | ||
#RUN apt-get install -y g++ software-properties-common | ||
RUN pip3 install pybind11 numpy | ||
|
||
ARG CACHEBUST=1 | ||
RUN git clone -b annbench https://github.com/cmuparlay/ParlayANN.git | ||
RUN cd ParlayANN && git submodule update --init --recursive | ||
RUN cd ParlayANN/python && bash compile.sh | ||
#RUN cd ParlayANN/python && pip install -e . | ||
#RUN python3 -c 'import parlaypy' | ||
ENV PYTHONPATH=$PYTHONPATH:/home/app/ParlayANN/python | ||
WORKDIR /home/app |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
float: | ||
euclidean: | ||
- base_args: ['@metric'] | ||
constructor: ParlayANN | ||
disabled: false | ||
docker_tag: ann-benchmarks-parlayann | ||
module: ann_benchmarks.algorithms.parlayann | ||
name: parlayann | ||
run_groups: | ||
parlay_80: | ||
args: [{alpha: 1.15, R: 80, L: 160, two_pass: True}] | ||
query_args: [[{Q: 20}, {Q: 22}, {Q: 25}, {Q: 30}, {Q: 40}, {Q: 50}, {Q: 60}, {Q: 80}, {Q: 100}, {Q: 125}, {Q: 150}, {Q: 200}, {Q: 300}, {Q: 400}, {Q: 600}, {Q: 800}]] | ||
parlay_64: | ||
args: [{alpha: 1.1, R: 64, L: 128, two_pass: True}] | ||
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 14}, {Q: 16}, {Q: 18}, {Q: 20}, {Q: 22}, {Q: 25}, {Q: 30}, {Q: 40}, {Q: 50}, {Q: 60}, {Q: 80}, {Q: 100}, {Q: 125}, {Q: 150}, {Q: 200}, {Q: 300}, {Q: 400}, {Q: 600}, {Q: 800}]] | ||
parlay_40: | ||
args: [{alpha: 1.08, R: 40, L: 80, two_pass: True}] | ||
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 14}, {Q: 16}, {Q: 18}, {Q: 20}]] | ||
parlay_32_05: | ||
args: [{alpha: 1.05, R: 32, L: 64, two_pass: True}] | ||
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 14}, {Q: 16}, {Q: 18}, {Q: 20}]] | ||
angular: | ||
- base_args: ['@metric'] | ||
constructor: ParlayANN | ||
disabled: false | ||
docker_tag: ann-benchmarks-parlayann | ||
module: ann_benchmarks.algorithms.parlayann | ||
name: parlayann | ||
run_groups: | ||
parlay_130: | ||
args: [{alpha: .85, R: 130, L: 260, two_pass: True}] | ||
query_args: [[{Q: 20}, {Q: 22}, {Q: 25}, {Q: 30}, {Q: 40}, {Q: 50}, {Q: 60}, {Q: 80}, {Q: 100}, {Q: 125}, {Q: 150}, {Q: 200}, {Q: 400}, {Q: 800}]] | ||
parlay_100: | ||
args: [{alpha: .85, R: 100, L: 200, two_pass: True}] | ||
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {limit: 25}, {limit: 30}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 13}, {Q: 14}, {Q: 15}, {Q: 16}, {Q: 17}, {Q: 18}, {Q: 20}, {Q: 22}, {Q: 25}, {Q: 30}, {Q: 40}, {Q: 50}, {Q: 60}, {Q: 80}, {Q: 100}, {Q: 125}, {Q: 150}, {Q: 200}, {Q: 400}, {Q: 800}]] | ||
parlay_80: | ||
args: [{alpha: .90, R: 80, L: 160, two_pass: True}] | ||
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {limit: 25}, {limit: 30}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 13}, {Q: 14}, {Q: 15}, {Q: 16}, {Q: 17}, {Q: 18}, {Q: 20}]] | ||
parlay_50: | ||
args: [{alpha: .95, R: 50, L: 100, two_pass: True}] | ||
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {limit: 25}, {limit: 30}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 14}, {Q: 16}, {Q: 18}, {Q: 20}]] | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
from __future__ import absolute_import | ||
import psutil | ||
import os | ||
import struct | ||
import time | ||
import numpy as np | ||
import wrapper as pann | ||
|
||
from ..base.module import BaseANN | ||
|
||
class ParlayANN(BaseANN): | ||
def __init__(self, metric, index_params): | ||
self.name = "parlayann_(" + str(index_params) + ")" | ||
self._index_params = index_params | ||
self._metric = self.translate_dist_fn(metric) | ||
|
||
self.R = int(index_params.get("R", 50)) | ||
self.L = int(index_params.get("L", 100)) | ||
self.alpha = float(index_params.get("alpha", 1.15)) | ||
self.two_pass = bool(index_params.get("two_pass", False)) | ||
|
||
def translate_dist_fn(self, metric): | ||
if metric == 'euclidean': | ||
return 'Euclidian' | ||
elif metric == 'ip': | ||
return 'mips' | ||
elif metric == 'angular': | ||
return 'mips' | ||
else: | ||
raise Exception('Invalid metric') | ||
|
||
def translate_dtype(self, dtype:str): | ||
if dtype == 'float32': | ||
return 'float' | ||
else: | ||
return dtype | ||
|
||
def fit(self, X): | ||
def bin_to_float(binary): | ||
return struct.unpack("!f", struct.pack("!I", int(binary, 2)))[0] | ||
|
||
print("Vamana: Starting Fit...") | ||
index_dir = "indices" | ||
|
||
if not os.path.exists(index_dir): | ||
os.makedirs(index_dir) | ||
|
||
data_path = os.path.join(index_dir, "base.bin") | ||
save_path = os.path.join(index_dir, self.name) | ||
print("parlayann: Index Stored At: " + save_path) | ||
nb, dims = X.shape | ||
shape = [ | ||
np.float32(bin_to_float("{:032b}".format(nb))), | ||
np.float32(bin_to_float("{:032b}".format(dims))), | ||
] | ||
X = X.flatten() | ||
X = np.insert(X, 0, shape) | ||
X.tofile(data_path) | ||
|
||
if not os.path.exists(save_path): | ||
print("parlayann: Creating Index") | ||
start = time.time() | ||
self.params = pann.build_vamana_index(self._metric, "float", data_path, save_path, | ||
self.R, self.L, self.alpha, self.two_pass) | ||
end = time.time() | ||
print("Indexing time: ", end - start) | ||
print(f"Wrote index to {save_path}") | ||
self.index = pann.load_index(self._metric, "float", data_path, save_path) | ||
print("Index loaded") | ||
|
||
def query(self, X, k): | ||
return self.index.single_search(X, k, self.Q, True, self.limit) | ||
|
||
def batch_query(self, X, k): | ||
print("running batch") | ||
nq, dims = X.shape | ||
self.res, self.distances = self.index.batch_search(X, k, self.Q, True, self.limit) | ||
return self.res | ||
|
||
def set_query_arguments(self, query_args): | ||
self.name = "parlayann_(" + str(self._index_params) + "," + str(query_args) + ")" | ||
print(query_args) | ||
self.limit = 1000 if query_args.get("limit") is None else query_args.get("limit") | ||
self.Q = 10 if query_args.get("Q") is None else query_args.get("Q") |