Skip to content

Commit

Permalink
[serving] allow load model with specified engine (#977)
Browse files Browse the repository at this point in the history
Change-Id: I0c4113a42dc30e31b334f6bb7ef1e35215a88b6d
  • Loading branch information
frankfliu authored May 24, 2021
1 parent 993071d commit b9eb77f
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ private void initModelStore() throws IOException {
modelManager.registerModel(
ModelInfo.inferModelNameFromUrl(url),
url,
null,
configManager.getBatchSize(),
configManager.getMaxBatchDelay(),
configManager.getMaxIdleTime());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,15 @@ private void predict(
throw new ModelNotFoundException("Permission denied: " + modelUrl);
}
}
String engineName = input.getProperty("engine_name", null);

logger.info("Loading model {} from: {}", modelName, modelUrl);

modelManager
.registerModel(
modelName,
modelUrl,
engineName,
ConfigManager.getInstance().getBatchSize(),
ConfigManager.getInstance().getMaxBatchDelay(),
ConfigManager.getInstance().getMaxIdleTime())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,25 @@
*/
public class ManagementRequestHandler extends HttpRequestHandler {

/** HTTP Paramater "synchronous". */
/** HTTP Parameter "synchronous". */
private static final String SYNCHRONOUS_PARAMETER = "synchronous";
/** HTTP Paramater "initial_workers". */
/** HTTP Parameter "initial_workers". */
private static final String INITIAL_WORKERS_PARAMETER = "initial_workers";
/** HTTP Paramater "url". */
/** HTTP Parameter "url". */
private static final String URL_PARAMETER = "url";
/** HTTP Paramater "batch_size". */
/** HTTP Parameter "batch_size". */
private static final String BATCH_SIZE_PARAMETER = "batch_size";
/** HTTP Paramater "model_name". */
/** HTTP Parameter "model_name". */
private static final String MODEL_NAME_PARAMETER = "model_name";
/** HTTP Paramater "max_batch_delay". */
/** HTTP Parameter "model_name". */
private static final String ENGINE_NAME_PARAMETER = "engine_name";
/** HTTP Parameter "max_batch_delay". */
private static final String MAX_BATCH_DELAY_PARAMETER = "max_batch_delay";
/** HTTP Paramater "max_idle_time". */
/** HTTP Parameter "max_idle_time". */
private static final String MAX_IDLE_TIME__PARAMETER = "max_idle_time";
/** HTTP Paramater "max_worker". */
/** HTTP Parameter "max_worker". */
private static final String MAX_WORKER_PARAMETER = "max_worker";
/** HTTP Paramater "min_worker". */
/** HTTP Parameter "min_worker". */
private static final String MIN_WORKER_PARAMETER = "min_worker";

private static final Pattern PATTERN = Pattern.compile("^/models([/?].*)?");
Expand Down Expand Up @@ -147,6 +149,7 @@ private void handleRegisterModel(final ChannelHandlerContext ctx, QueryStringDec
if (modelName == null || modelName.isEmpty()) {
modelName = ModelInfo.inferModelNameFromUrl(modelUrl);
}
String engineName = NettyUtils.getParameter(decoder, ENGINE_NAME_PARAMETER, null);
int batchSize = NettyUtils.getIntParameter(decoder, BATCH_SIZE_PARAMETER, 1);
int maxBatchDelay = NettyUtils.getIntParameter(decoder, MAX_BATCH_DELAY_PARAMETER, 100);
int maxIdleTime = NettyUtils.getIntParameter(decoder, MAX_IDLE_TIME__PARAMETER, 60);
Expand All @@ -159,7 +162,7 @@ private void handleRegisterModel(final ChannelHandlerContext ctx, QueryStringDec
final ModelManager modelManager = ModelManager.getInstance();
CompletableFuture<ModelInfo> future =
modelManager.registerModel(
modelName, modelUrl, batchSize, maxBatchDelay, maxIdleTime);
modelName, modelUrl, engineName, batchSize, maxBatchDelay, maxIdleTime);
CompletableFuture<Void> f =
future.thenAccept(
modelInfo ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public static ModelManager getInstance() {
*
* @param modelName the name of the model for HTTP endpoint
* @param modelUrl the model url
* @param engineName the engine to load the model
* @param batchSize the batch size
* @param maxBatchDelay the maximum delay for batching
* @param maxIdleTime the maximum idle time of the worker threads before scaling down.
Expand All @@ -83,6 +84,7 @@ public static ModelManager getInstance() {
public CompletableFuture<ModelInfo> registerModel(
final String modelName,
final String modelUrl,
final String engineName,
final int batchSize,
final int maxBatchDelay,
final int maxIdleTime) {
Expand All @@ -93,6 +95,7 @@ public CompletableFuture<ModelInfo> registerModel(
Criteria.builder()
.setTypes(Input.class, Output.class)
.optModelUrls(modelUrl)
.optEngine(engineName)
.build();
ZooModel<Input, Output> model = ModelZoo.loadModel(criteria);
ModelInfo modelInfo =
Expand Down

0 comments on commit b9eb77f

Please sign in to comment.