Skip to content

Commit 1cee707

Browse files
committed
Refactoring
Refactor, further delineate responsibilities of client vs. build request, add more abstraction Signed-off-by: owenhalpert <ohalpert@gmail.com>
1 parent 158004c commit 1cee707

15 files changed

+261
-251
lines changed

src/main/java/org/opensearch/knn/index/codec/nativeindex/remote/RemoteIndexBuildStrategy.java

+12-22
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
import org.opensearch.knn.index.KNNSettings;
1414
import org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategy;
1515
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
16+
import org.opensearch.knn.index.remote.HTTPRemoteBuildRequest;
1617
import org.opensearch.knn.index.remote.RemoteBuildRequest;
18+
import org.opensearch.knn.index.remote.RemoteBuildRequestBuilder;
1719
import org.opensearch.knn.index.remote.RemoteBuildResponse;
1820
import org.opensearch.knn.index.remote.RemoteIndexClient;
19-
import org.opensearch.knn.index.remote.RemoteIndexHTTPClient;
21+
import org.opensearch.knn.index.remote.RemoteIndexClientFactory;
2022
import org.opensearch.repositories.RepositoriesService;
2123
import org.opensearch.repositories.Repository;
2224
import org.opensearch.repositories.RepositoryMissingException;
@@ -27,7 +29,6 @@
2729

2830
import static org.opensearch.knn.index.KNNSettings.KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING;
2931
import static org.opensearch.knn.index.KNNSettings.KNN_INDEX_REMOTE_VECTOR_BUILD_THRESHOLD_SETTING;
30-
import static org.opensearch.knn.index.KNNSettings.KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING;
3132
import static org.opensearch.knn.index.KNNSettings.KNN_REMOTE_VECTOR_REPO_SETTING;
3233

3334
/**
@@ -129,15 +130,16 @@ public void buildAndWriteIndex(BuildIndexParams indexInfo) throws IOException {
129130
time_in_millis = stopWatch.stop().totalTime().millis();
130131
log.debug("Repository write took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
131132

133+
// TODO future implementations will set the following two params depending on some setting to denote the protocol
134+
RemoteIndexClient client = RemoteIndexClientFactory.getRemoteIndexClient(RemoteIndexClientFactory.TYPE_HTTP);
135+
RemoteBuildRequest request = RemoteBuildRequestBuilder.builder(HTTPRemoteBuildRequest.class)
136+
.indexSettings(indexSettings)
137+
.indexInfo(indexInfo)
138+
.repositoryMetadata(getRepository().getMetadata())
139+
.blobName(blobName)
140+
.build();
132141
stopWatch = new StopWatch().start();
133-
RemoteIndexClient client = getRemoteIndexClient();
134-
RemoteBuildRequest remoteBuildRequest = client.constructBuildRequest(
135-
indexSettings,
136-
indexInfo,
137-
getRepository().getMetadata(),
138-
blobName
139-
);
140-
RemoteBuildResponse remoteBuildResponse = client.submitVectorBuild(remoteBuildRequest);
142+
RemoteBuildResponse remoteBuildResponse = client.submitVectorBuild(request);
141143
time_in_millis = stopWatch.stop().totalTime().millis();
142144
log.debug("Submit vector build took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
143145

@@ -174,16 +176,4 @@ private BlobStoreRepository getRepository() throws RepositoryMissingException {
174176
assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository";
175177
return (BlobStoreRepository) repository;
176178
}
177-
178-
/**
179-
* Determine which implementation of RemoteIndexClient to be used by the build strategy
180-
* @return Concrete RemoteIndexClient implementation
181-
*/
182-
private RemoteIndexClient getRemoteIndexClient() {
183-
String endpoint = KNNSettings.state().getSettingValue(KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING.getKey());
184-
if (endpoint == null || endpoint.isEmpty()) {
185-
throw new IllegalArgumentException("No endpoint set for RemoteIndexClient");
186-
}
187-
return RemoteIndexHTTPClient.getInstance();
188-
}
189179
}

src/main/java/org/opensearch/knn/index/engine/KNNEngine.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import com.google.common.collect.ImmutableSet;
99
import org.opensearch.common.ValidationException;
1010
import org.opensearch.knn.index.SpaceType;
11-
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
1211
import org.opensearch.knn.index.engine.faiss.Faiss;
1312
import org.opensearch.knn.index.engine.lucene.Lucene;
1413
import org.opensearch.knn.index.engine.nmslib.Nmslib;
@@ -179,8 +178,8 @@ public KNNLibraryIndexingContext getKNNLibraryIndexingContext(
179178
}
180179

181180
@Override
182-
public Map<String, Object> getRemoteIndexingParameters(BuildIndexParams params) {
183-
return knnLibrary.getRemoteIndexingParameters(params);
181+
public Map<String, Object> getRemoteIndexingParameters(Map<String, Object> indexInfoParameters) {
182+
return knnLibrary.getRemoteIndexingParameters(indexInfoParameters);
184183
}
185184

186185
@Override

src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import org.opensearch.common.ValidationException;
99
import org.opensearch.knn.index.SpaceType;
10-
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
1110

1211
import java.util.Collections;
1312
import java.util.List;
@@ -152,9 +151,9 @@ default boolean supportsRemoteIndexBuild() {
152151

153152
/**
154153
* Get the remote build supported index parameter mapping to be sent to the remote build service.
155-
* @param params to parse
154+
* @param indexInfoParameters the index parameters from BuildIndexParams
156155
*/
157-
default Map<String, Object> getRemoteIndexingParameters(BuildIndexParams params) {
156+
default Map<String, Object> getRemoteIndexingParameters(Map<String, Object> indexInfoParameters) {
158157
throw new UnsupportedOperationException("This method must be implemented by the implementing class");
159158
}
160159
}

src/main/java/org/opensearch/knn/index/engine/faiss/Faiss.java

+9-8
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import com.google.common.collect.ImmutableMap;
99
import org.opensearch.knn.common.KNNConstants;
1010
import org.opensearch.knn.index.SpaceType;
11-
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
1211
import org.opensearch.knn.index.engine.KNNMethod;
1312
import org.opensearch.knn.index.engine.KNNMethodConfigContext;
1413
import org.opensearch.knn.index.engine.KNNMethodContext;
@@ -128,20 +127,22 @@ public Float scoreToRadialThreshold(Float score, SpaceType spaceType) {
128127
return spaceType.scoreToDistanceTranslation(score);
129128
}
130129

130+
// TODO refactor to make the index parameter fetching more intelligent and less cumbersome
131131
/**
132132
* Get the parameters that need to be passed to the remote build service for training
133-
* @param indexInfo to parse
133+
*
134+
* @param indexInfoParameters result of indexInfo.getParameters() to parse
134135
* @return Map of parameters to be used as "index_parameters"
135136
*/
136137
@Override
137-
public Map<String, Object> getRemoteIndexingParameters(BuildIndexParams indexInfo) {
138+
public Map<String, Object> getRemoteIndexingParameters(Map<String, Object> indexInfoParameters) {
138139
Map<String, Object> indexParameters = new HashMap<>();
139-
String methodName = (String) indexInfo.getParameters().get(NAME);
140+
String methodName = (String) indexInfoParameters.get(NAME);
140141
indexParameters.put(ALGORITHM, methodName);
141-
indexParameters.put(METHOD_PARAMETER_SPACE_TYPE, indexInfo.getParameters().getOrDefault(SPACE_TYPE, INDEX_KNN_DEFAULT_SPACE_TYPE));
142+
indexParameters.put(METHOD_PARAMETER_SPACE_TYPE, indexInfoParameters.getOrDefault(SPACE_TYPE, INDEX_KNN_DEFAULT_SPACE_TYPE));
142143

143-
assert (indexInfo.getParameters().containsKey(PARAMETERS));
144-
Object innerParams = indexInfo.getParameters().get(PARAMETERS);
144+
assert (indexInfoParameters.containsKey(PARAMETERS));
145+
Object innerParams = indexInfoParameters.get(PARAMETERS);
145146
assert (innerParams instanceof Map);
146147
{
147148
Map<String, Object> algorithmParams = new HashMap<>();
@@ -156,7 +157,7 @@ public Map<String, Object> getRemoteIndexingParameters(BuildIndexParams indexInf
156157
METHOD_PARAMETER_EF_SEARCH,
157158
innerMap.getOrDefault(METHOD_PARAMETER_EF_SEARCH, INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH)
158159
);
159-
Object indexDescription = indexInfo.getParameters().get(INDEX_DESCRIPTION_PARAMETER);
160+
Object indexDescription = indexInfoParameters.get(INDEX_DESCRIPTION_PARAMETER);
160161
assert indexDescription instanceof String;
161162
algorithmParams.put(METHOD_PARAMETER_M, getMFromIndexDescription((String) indexDescription));
162163
}

src/main/java/org/opensearch/knn/index/remote/HTTPRemoteBuildRequest.java

+3-9
Original file line numberDiff line numberDiff line change
@@ -10,39 +10,33 @@
1010
import org.opensearch.common.xcontent.json.JsonXContent;
1111
import org.opensearch.core.xcontent.XContentBuilder;
1212
import org.opensearch.index.IndexSettings;
13-
import org.opensearch.knn.index.KNNSettings;
1413
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
1514

1615
import java.io.IOException;
17-
import java.net.URI;
1816

1917
import static org.opensearch.knn.common.KNNConstants.CONTAINER_NAME;
2018
import static org.opensearch.knn.common.KNNConstants.DIMENSION;
2119
import static org.opensearch.knn.common.KNNConstants.DOC_COUNT;
22-
import static org.opensearch.knn.common.KNNConstants.VECTOR_PATH;
2320
import static org.opensearch.knn.common.KNNConstants.DOC_ID_PATH;
2421
import static org.opensearch.knn.common.KNNConstants.INDEX_PARAMETERS;
2522
import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE;
2623
import static org.opensearch.knn.common.KNNConstants.REPOSITORY_TYPE;
2724
import static org.opensearch.knn.common.KNNConstants.TENANT_ID;
2825
import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD;
29-
import static org.opensearch.knn.index.KNNSettings.KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING;
26+
import static org.opensearch.knn.common.KNNConstants.VECTOR_PATH;
3027

3128
/**
32-
* RemoteBuildRequest implementation for HTTP clients that sets the endpoint and offers a JSON conversion.
29+
* HTTP-specific implementation of RemoteBuildRequest.
3330
*/
3431
@Getter
3532
public class HTTPRemoteBuildRequest extends RemoteBuildRequest {
36-
private final URI endpoint;
37-
3833
public HTTPRemoteBuildRequest(
3934
IndexSettings indexSettings,
4035
BuildIndexParams indexInfo,
4136
RepositoryMetadata repositoryMetadata,
4237
String blobName
4338
) throws IOException {
4439
super(indexSettings, indexInfo, repositoryMetadata, blobName);
45-
this.endpoint = URI.create(KNNSettings.state().getSettingValue(KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING.getKey()));
4640
}
4741

4842
public String toJson() throws IOException {
@@ -55,7 +49,7 @@ public String toJson() throws IOException {
5549
builder.field(TENANT_ID, tenantId);
5650
builder.field(DIMENSION, dimension);
5751
builder.field(DOC_COUNT, docCount);
58-
builder.field(VECTOR_DATA_TYPE_FIELD, dataType);
52+
builder.field(VECTOR_DATA_TYPE_FIELD, vectorDataType);
5953
builder.field(KNN_ENGINE, engine);
6054
builder.field(INDEX_PARAMETERS, indexParameters);
6155
builder.endObject();

src/main/java/org/opensearch/knn/index/remote/HTTPRemoteBuildResponse.java

-24
This file was deleted.

src/main/java/org/opensearch/knn/index/remote/RemoteBuildRequest.java

+21-22
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
package org.opensearch.knn.index.remote;
77

8+
import lombok.Getter;
89
import org.opensearch.cluster.ClusterName;
910
import org.opensearch.cluster.metadata.RepositoryMetadata;
10-
import lombok.Getter;
1111
import org.opensearch.index.IndexSettings;
1212
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
1313
import org.opensearch.knn.index.codec.nativeindex.remote.RemoteIndexBuildStrategy;
@@ -20,28 +20,22 @@
2020
import static org.opensearch.knn.common.KNNConstants.BUCKET;
2121
import static org.opensearch.knn.common.KNNConstants.S3;
2222

23+
/**
24+
* Abstract base class for Remote Build Requests.
25+
*/
2326
@Getter
2427
public abstract class RemoteBuildRequest {
25-
public String repositoryType;
26-
public String containerName;
27-
public String vectorPath;
28-
public String docIdPath;
29-
public String tenantId;
30-
public int dimension;
31-
public int docCount;
32-
public String dataType;
33-
public String engine;
34-
public Map<String, Object> indexParameters;
28+
protected String repositoryType;
29+
protected String containerName;
30+
protected String vectorPath;
31+
protected String docIdPath;
32+
protected String tenantId;
33+
protected int dimension;
34+
protected int docCount;
35+
protected String vectorDataType;
36+
protected String engine;
37+
protected Map<String, Object> indexParameters;
3538

36-
/**
37-
* Construct the RemoteBuildRequest object for the index build request.
38-
* Regardless of the Client implementation, the remote build service will need these parameters to build the index.
39-
* This method being included in the interface ensures that the request parameters will be set for each Client implementation to use or transform for its needs.
40-
* @param indexSettings Index settings
41-
* @param indexInfo Index parameters
42-
* @param repositoryMetadata Metadata of the repository containing the index
43-
* @param blobName File name generated by the Build Strategy with a UUID
44-
*/
4539
public RemoteBuildRequest(
4640
IndexSettings indexSettings,
4741
BuildIndexParams indexInfo,
@@ -62,7 +56,7 @@ public RemoteBuildRequest(
6256
KNNCodecUtil.initializeVectorValues(vectorValues);
6357
assert (vectorValues.dimension() > 0);
6458

65-
Map<String, Object> indexParameters = indexInfo.getKnnEngine().getRemoteIndexingParameters(indexInfo);
59+
Map<String, Object> indexParameters = indexInfo.getKnnEngine().getRemoteIndexingParameters(indexInfo.getParameters());
6660

6761
this.repositoryType = repositoryType;
6862
this.containerName = containerName;
@@ -71,8 +65,13 @@ public RemoteBuildRequest(
7165
this.tenantId = indexSettings.getSettings().get(ClusterName.CLUSTER_NAME_SETTING.getKey());
7266
this.dimension = vectorValues.dimension();
7367
this.docCount = indexInfo.getTotalLiveDocs();
74-
this.dataType = vectorDataType;
68+
this.vectorDataType = vectorDataType;
7569
this.engine = indexInfo.getKnnEngine().getName();
7670
this.indexParameters = indexParameters;
7771
}
72+
73+
/**
74+
* Convert the request to JSON format.
75+
*/
76+
public abstract String toJson() throws IOException;
7877
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
package org.opensearch.knn.index.remote;
6+
7+
import org.opensearch.cluster.metadata.RepositoryMetadata;
8+
import org.opensearch.index.IndexSettings;
9+
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
10+
11+
import java.io.IOException;
12+
13+
/**
14+
* Generic Builder for constructing RemoteBuildRequest objects per requestType (HTTP, etc).
15+
*/
16+
public class RemoteBuildRequestBuilder<T extends RemoteBuildRequest> {
17+
private IndexSettings indexSettings;
18+
private BuildIndexParams indexInfo;
19+
private RepositoryMetadata repositoryMetadata;
20+
private String blobName;
21+
private final Class<T> requestType;
22+
23+
// Private constructor
24+
private RemoteBuildRequestBuilder(Class<T> requestType) {
25+
this.requestType = requestType;
26+
}
27+
28+
/**
29+
* Static factory method to create a builder instance.
30+
* @param requestType Class type of the RemoteBuildRequest
31+
* @return RemoteBuildRequestBuilder instance
32+
*/
33+
public static <T extends RemoteBuildRequest> RemoteBuildRequestBuilder<T> builder(Class<T> requestType) {
34+
return new RemoteBuildRequestBuilder<>(requestType);
35+
}
36+
37+
public RemoteBuildRequestBuilder<T> indexSettings(IndexSettings indexSettings) {
38+
this.indexSettings = indexSettings;
39+
return this;
40+
}
41+
42+
public RemoteBuildRequestBuilder<T> indexInfo(BuildIndexParams indexInfo) {
43+
this.indexInfo = indexInfo;
44+
return this;
45+
}
46+
47+
public RemoteBuildRequestBuilder<T> repositoryMetadata(RepositoryMetadata repositoryMetadata) {
48+
this.repositoryMetadata = repositoryMetadata;
49+
return this;
50+
}
51+
52+
public RemoteBuildRequestBuilder<T> blobName(String blobName) {
53+
this.blobName = blobName;
54+
return this;
55+
}
56+
57+
public T build() throws IOException {
58+
try {
59+
return requestType.getConstructor(IndexSettings.class, BuildIndexParams.class, RepositoryMetadata.class, String.class)
60+
.newInstance(indexSettings, indexInfo, repositoryMetadata, blobName);
61+
} catch (Exception e) {
62+
throw new IOException("Failed to instantiate RemoteBuildRequest", e);
63+
}
64+
}
65+
}

src/main/java/org/opensearch/knn/index/remote/RemoteBuildResponse.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
package org.opensearch.knn.index.remote;
77

88
/**
9-
* Generic remote build response interface
9+
* Remote build response. Currently, this just contains the jobId from the server.
10+
* In the future, this may be an interface if different clients expect different responses.
1011
*/
11-
public interface RemoteBuildResponse {
12-
String getJobId();
12+
public record RemoteBuildResponse(String jobId) {
1313
}

0 commit comments

Comments
 (0)