Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[serving] allow load model with specified engine #977

Merged
merged 1 commit into from
May 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ private void initModelStore() throws IOException {
modelManager.registerModel(
ModelInfo.inferModelNameFromUrl(url),
url,
null,
configManager.getBatchSize(),
configManager.getMaxBatchDelay(),
configManager.getMaxIdleTime());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,15 @@ private void predict(
throw new ModelNotFoundException("Permission denied: " + modelUrl);
}
}
String engineName = input.getProperty("engine_name", null);

logger.info("Loading model {} from: {}", modelName, modelUrl);

modelManager
.registerModel(
modelName,
modelUrl,
engineName,
ConfigManager.getInstance().getBatchSize(),
ConfigManager.getInstance().getMaxBatchDelay(),
ConfigManager.getInstance().getMaxIdleTime())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,25 @@
*/
public class ManagementRequestHandler extends HttpRequestHandler {

/** HTTP Paramater "synchronous". */
/** HTTP Parameter "synchronous". */
private static final String SYNCHRONOUS_PARAMETER = "synchronous";
/** HTTP Paramater "initial_workers". */
/** HTTP Parameter "initial_workers". */
private static final String INITIAL_WORKERS_PARAMETER = "initial_workers";
/** HTTP Paramater "url". */
/** HTTP Parameter "url". */
private static final String URL_PARAMETER = "url";
/** HTTP Paramater "batch_size". */
/** HTTP Parameter "batch_size". */
private static final String BATCH_SIZE_PARAMETER = "batch_size";
/** HTTP Paramater "model_name". */
/** HTTP Parameter "model_name". */
private static final String MODEL_NAME_PARAMETER = "model_name";
/** HTTP Paramater "max_batch_delay". */
/** HTTP Parameter "model_name". */
private static final String ENGINE_NAME_PARAMETER = "engine_name";
/** HTTP Parameter "max_batch_delay". */
private static final String MAX_BATCH_DELAY_PARAMETER = "max_batch_delay";
/** HTTP Paramater "max_idle_time". */
/** HTTP Parameter "max_idle_time". */
private static final String MAX_IDLE_TIME__PARAMETER = "max_idle_time";
/** HTTP Paramater "max_worker". */
/** HTTP Parameter "max_worker". */
private static final String MAX_WORKER_PARAMETER = "max_worker";
/** HTTP Paramater "min_worker". */
/** HTTP Parameter "min_worker". */
private static final String MIN_WORKER_PARAMETER = "min_worker";

private static final Pattern PATTERN = Pattern.compile("^/models([/?].*)?");
Expand Down Expand Up @@ -147,6 +149,7 @@ private void handleRegisterModel(final ChannelHandlerContext ctx, QueryStringDec
if (modelName == null || modelName.isEmpty()) {
modelName = ModelInfo.inferModelNameFromUrl(modelUrl);
}
String engineName = NettyUtils.getParameter(decoder, ENGINE_NAME_PARAMETER, null);
int batchSize = NettyUtils.getIntParameter(decoder, BATCH_SIZE_PARAMETER, 1);
int maxBatchDelay = NettyUtils.getIntParameter(decoder, MAX_BATCH_DELAY_PARAMETER, 100);
int maxIdleTime = NettyUtils.getIntParameter(decoder, MAX_IDLE_TIME__PARAMETER, 60);
Expand All @@ -159,7 +162,7 @@ private void handleRegisterModel(final ChannelHandlerContext ctx, QueryStringDec
final ModelManager modelManager = ModelManager.getInstance();
CompletableFuture<ModelInfo> future =
modelManager.registerModel(
modelName, modelUrl, batchSize, maxBatchDelay, maxIdleTime);
modelName, modelUrl, engineName, batchSize, maxBatchDelay, maxIdleTime);
CompletableFuture<Void> f =
future.thenAccept(
modelInfo ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public static ModelManager getInstance() {
*
* @param modelName the name of the model for HTTP endpoint
* @param modelUrl the model url
* @param engineName the engine to load the model
* @param batchSize the batch size
* @param maxBatchDelay the maximum delay for batching
* @param maxIdleTime the maximum idle time of the worker threads before scaling down.
Expand All @@ -83,6 +84,7 @@ public static ModelManager getInstance() {
public CompletableFuture<ModelInfo> registerModel(
final String modelName,
final String modelUrl,
final String engineName,
final int batchSize,
final int maxBatchDelay,
final int maxIdleTime) {
Expand All @@ -93,6 +95,7 @@ public CompletableFuture<ModelInfo> registerModel(
Criteria.builder()
.setTypes(Input.class, Output.class)
.optModelUrls(modelUrl)
.optEngine(engineName)
.build();
ZooModel<Input, Output> model = ModelZoo.loadModel(criteria);
ModelInfo modelInfo =
Expand Down