diff --git a/build.gradle b/build.gradle
index 85c949f0c04..89f64570993 100644
--- a/build.gradle
+++ b/build.gradle
@@ -83,7 +83,7 @@ configure(javaProjects()) {
         doFirst {
             systemProperties System.getProperties()
             systemProperties.remove("user.dir")
-            systemProperty "ai.djl.logging.level", "debug"
+            // systemProperty "ai.djl.logging.level", "debug"
             systemProperty "org.slf4j.simpleLogger.defaultLogLevel", "debug"
             systemProperty "org.slf4j.simpleLogger.log.org.mortbay.log", "warn"
             systemProperty "disableProgressBar", "true"
diff --git a/examples/build.gradle b/examples/build.gradle
index dd39a606f97..75aabfa1664 100644
--- a/examples/build.gradle
+++ b/examples/build.gradle
@@ -23,21 +23,6 @@ dependencies {
         runtimeOnly "ai.djl.mxnet:mxnet-native-auto:${mxnet_version}"
     }
 
-    // DLR, tflite, PaddlePaddle and OnnxRuntime are only used for benchmark
-    if (System.getProperty("ai.djl.default_engine") == "DLR") {
-        runtimeOnly project(":dlr:dlr-engine")
-        runtimeOnly "ai.djl.dlr:dlr-native-auto:${dlr_version}"
-    } else if (System.getProperty("ai.djl.default_engine") == "TFLite") {
-        runtimeOnly project(":tflite:tflite-engine")
-        runtimeOnly "ai.djl.tflite:tflite:tflite-native:${tflite_version}"
-    } else if (System.getProperty("ai.djl.default_engine") == "PaddlePaddle") {
-        runtimeOnly project(":paddlepaddle:paddlepaddle-model-zoo")
-        runtimeOnly "ai.djl.paddlepaddle:paddlepaddle-native-auto:${paddlepaddle_version}"
-    } else if (System.getProperty("ai.djl.default_engine") == "OnnxRuntime") {
-        // onnxruntime requires user install libgomp.so.1 manually, exclude from default dependency
-        runtimeOnly project(":onnxruntime:onnxruntime-engine")
-    }
-
     testImplementation("org.testng:testng:${testng_version}") {
         exclude group: "junit", module: "junit"
     }
@@ -61,45 +46,5 @@ task listmodels(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
     main = "ai.djl.examples.inference.ListModels"
 }
-
-task benchmark(type: JavaExec) {
-    environment("TF_CPP_MIN_LOG_LEVEL", "1") // turn off TensorFlow print out
-    List<String> arguments = gradle.startParameter["taskRequests"]["args"].getAt(0)
-    for (String argument : arguments) {
-        if (argument.trim().startsWith("--args")) {
-            String[] line = argument.split("=", 2)
-            if (line.length == 2) {
-                line = line[1].split(" ");
-                if (line.contains("-t")) {
-                    if (System.properties["ai.djl.default_engine"] == "PyTorch") {
-                        System.setProperty("ai.djl.pytorch.num_interop_threads", "1")
-                        System.setProperty("ai.djl.pytorch.num_threads", "1")
-                    } else if (System.properties["ai.djl.default_engine"] == "TensorFlow") {
-                        environment("OMP_NUM_THREADS", "1")
-                        environment("TF_NUM_INTRAOP_THREADS", "1")
-                    } else {
-                        environment("MXNET_ENGINE_TYPE", "NaiveEngine")
-                        environment("OMP_NUM_THREADS", "1")
-                    }
-                }
-                break;
-            }
-        }
-    }
-
-    systemProperties System.getProperties()
-    systemProperties.remove("user.dir")
-    systemProperty("file.encoding", "UTF-8")
-    classpath = sourceSets.main.runtimeClasspath
-    // restrict the jvm heap size for better monitoring benchmark
-    jvmArgs = ["-Xmx2g"]
-    if (Boolean.getBoolean("loggc")) {
-        if (JavaVersion.current() == JavaVersion.VERSION_1_8) {
-            jvmArgs += ["-XX:+PrintGCTimeStamps", "-Xloggc:build/gc.log"]
-        } else {
-            jvmArgs += ["-Xlog:gc*=debug:file=build/gc.log"]
-        }
-    }
-    main = "ai.djl.examples.inference.benchmark.Benchmark"
-}
 tasks.distTar.enabled = false
+tasks.distZip.enabled = false
diff --git a/docs/development/benchmark_with_djl.md b/extensions/benchmark/README.md
similarity index 71%
rename from docs/development/benchmark_with_djl.md
rename to extensions/benchmark/README.md
index 1f1c606ff10..608a3e00b44 100644
--- a/docs/development/benchmark_with_djl.md
+++ b/extensions/benchmark/README.md
@@ -1,6 +1,6 @@
 # Benchmark your DL model
 
-DJL offers a comprehensive script to benchmark the model on all different 
+DJL offers a comprehensive script to benchmark the model on all different
 platforms for single-thread/multi-thread inference performance.This document will guide you how to run benchmark with DJL.
 
 ## Prerequisite
@@ -31,13 +31,13 @@ the 4-step instructions for your own model.
 Benchmark on a Tensorflow model from http url with all-ones NDArray input for 10 times:
 
 ```
-./gradlew benchmark -Dai.djl.default_engine=TensorFlow -Dai.djl.repository.zoo.location=https://storage.googleapis.com/tfhub-modules/tensorflow/resnet_50/classification/1.tar.gz --args='-c 10 -s 1,224,224,3'
+./gradlew benchmark --args='-e TensorFlow -u https://storage.googleapis.com/tfhub-modules/tensorflow/resnet_50/classification/1.tar.gz -c 10 -s 1,224,224,3'
 ```
 
 Similarly, this is for PyTorch
 
 ```
-./gradlew benchmark -Dai.djl.default_engine=PyTorch -Dai.djl.repository.zoo.location=https://alpha-djl-demos.s3.amazonaws.com/model/djl-blockrunner/pytorch_resnet18.zip --args='-n traced_resnet18 -c 10 -s 1,3,224,224'
+./gradlew benchmark --args='-e PyTorch -u https://alpha-djl-demos.s3.amazonaws.com/model/djl-blockrunner/pytorch_resnet18.zip -n traced_resnet18 -c 10 -s 1,3,224,224'
 ```
 
 ### Benchmark from ModelZoo
@@ -47,7 +47,7 @@ Similarly, this is for PyTorch
 Resnet50 image classification model:
 
 ```
-./gradlew benchmark --args="-c 1 -s 1,3,224,224 -a ai.djl.mxnet:resnet -r {'layers':'50','flavor':'v2','dataset':'imagenet'}"
+./gradlew benchmark --args="-c 2 -s 1,3,224,224 -a ai.djl.mxnet:resnet -r {'layers':'50','flavor':'v2','dataset':'imagenet'}"
 ```
 
 #### PyTorch
@@ -55,7 +55,7 @@ Resnet50 image classification model:
 SSD object detection model:
 
 ```
-./gradlew benchmark -Dai.djl.default_engine=PyTorch --args="-c 1 -s 1,3,300,300 -a ai.djl.pytorch:ssd -r {'size':'300','backbone':'resnet50'}"
+./gradlew benchmark --args="-e PyTorch -c 2 -s 1,3,300,300 -a ai.djl.pytorch:ssd -r {'size':'300','backbone':'resnet50'}"
 ```
 
 
@@ -69,7 +69,7 @@ To start your benchmarking, we need to make sure we provide the following inform
 - Sample input for the model
 - (Optional) Multi-thread benchmark
 
-The benchmark script located [here](https://github.com/deepjavalibrary/djl/blob/master/examples/src/main/java/ai/djl/examples/inference/benchmark/Benchmark.java).
+The benchmark script located [here](https://github.com/deepjavalibrary/djl/blob/master/benchmark/src/main/java/ai/djl/benchmark/Benchmark.java).
 
 Just do the following:
 
@@ -90,15 +90,16 @@ usage: ./gradlew benchmark --args='[OPTIONS]'
  -a,--artifact-id <ARTIFACT-ID>     Model artifact id.
  -c,--iteration <ITERATION>         Number of total iterations (per thread).
  -d,--duration <DURATION>           Duration of the test in minutes.
- -e,--engine-name <ENGINE-NAME>     Engine name.
+ -e,--engine <ENGINE-NAME>          Choose an Engine for the benchmark.
  -h,--help                          Print this help.
  -l,--delay <DELAY>                 Delay of incremental threads.
- -n,--model-name <MODEL-NAME>       Model name.
+ -n,--model-name <MODEL-NAME>       Specify model file name.
  -o,--output-dir <OUTPUT-DIR>       Directory for output logs.
  -p,--model-path <MODEL-PATH>       Model directory file path.
  -r,--criteria <CRITERIA>           The criteria (json string) used for searching the model.
- -s,--input-shapes <INPUT-SHAPES>   Input data shapes for non-CV model.
+ -s,--input-shapes <INPUT-SHAPES>   Input data shapes for the model.
  -t,--threads <NUMBER_THREADS>      Number of inference threads.
+ -u,--model-url <MODEL-URL>         Model archive file URL.
 ```
 
 ### Step 1: Pick your deep engine
@@ -106,10 +107,15 @@ usage: ./gradlew benchmark --args='[OPTIONS]'
 By default, the above script will use MXNet as the default Engine, but you can always change that by adding the followings:
 
 ```
--Dai.djl.default_engine=TensorFlow # tensorflow
--Dai.djl.default_engine=PyTorch # pytorch
+--args='-e TensorFlow' # TensorFlow
+--args='-e PyTorch' # PyTorch
+--args='-e MXNet' # Apache MXNet
+--args='-e PaddlePaddle' # PaddlePaddle
+--args='-e OnnxRuntime' # pytorch
+--args='-e TFLite' # TFLite
+--args='-e DLR' # Neo DLR
+--args='-e XGBoost' # XGBoost
 ```
-to change your default engine.
 
 ### Step 2: Identify the source of your model
 
@@ -117,15 +123,19 @@ DJL accept variety of models came from different places.
 
 #### Remote location
 
+Use `--model-url` option to load a model from a URL. The URL must point to an archive file.
+
 The following is a pytorch model
 
 ```
--Dai.djl.repository.zoo.location=https://alpha-djl-demos.s3.amazonaws.com/model/djl-blockrunner/pytorch_resnet18.zip
+--args='-u https://alpha-djl-demos.s3.amazonaws.com/model/djl-blockrunner/pytorch_resnet18.zip'
 ```
 We would recommend to make model files in a zip for better file tracking.
 
 #### Local directory
 
+Use `--model-path` option to load model from a local directory or an archive file.
+
 Mac/Linux
 
 ```
@@ -143,29 +153,12 @@ or
 ```
 
 If the model file name is different from the parent folder name (or the archive file name), you need
-to specify `-n MODEL_NAME` in the `--args`:
+to specify `--model-name` in the `--args`:
 
 ```
 --args='-n traced_resnet18'
 ```
 
-#### DJL Model zoo
-
-You can run `listmodels` to list available models that you can use from different model zoos.
-
-```
-./gradlew listmodels # MXNet models
-./gradlew listmodels -Dai.djl.default_engine=TensorFlow # TensorFlow models
-./gradlew listmodels -Dai.djl.default_engine=PyTorch # PyTorch models
-```
-
-After that, just simply copy the json formatted criteria like `{"layers":"18","flavor":"v1","dataset":"imagenet"}` with the artifact id like `ai.djl.mxnet:resnet:0.0.1`.
-Then, you can just pass the information in the `--args` (remove `0.0.1` at the end):
-
-```
--a ai.djl.mxnet:resnet -r {"layers":"18","flavor":"v1","dataset":"imagenet"}
-```
-
 ### Step 3: Define how many runs you would like to make
 
 add `-c` inside with a number
@@ -178,7 +171,7 @@ This will run 1000 times inference.
 
 ### Step 4: Define your model inputs
 
-The benchmark script support dummy NDArray inputs.
+The benchmark script uses dummy NDArray inputs.
 It will make fake NDArrays (like `NDArray.ones`) to feed in the model for inference.
 
 If we would like to fake an image:
@@ -212,7 +205,7 @@ For example:
 -s (1)i,(384)f,(384)
 ```
 
-### Optional Step: multithreading inference 
+### Optional Step: multithreading inference
 
 You can also do multi-threading inference with DJL. For example, if you would like to run the inference with 10 threads:
 
@@ -234,10 +227,10 @@ The above code will create 10 threads with the wait time of 100ms.
 For different purposes, we designed different mode you can play with. Such as the following arg:
 
 ```
--d 1440
+-d 86400
 ```
 
-This will ask the benchmark script repeatly running the designed task for 1440 minutes (24 hour).
+This will ask the benchmark script repeatedly running the designed task for 86400 seconds (24 hour).
 If you would like to make sure DJL is stable in the long run, you can do that.
 
 You can also keep monitoring the DJL memory usages by enable the following flag:
diff --git a/extensions/benchmark/build.gradle b/extensions/benchmark/build.gradle
new file mode 100644
index 00000000000..97fd325015f
--- /dev/null
+++ b/extensions/benchmark/build.gradle
@@ -0,0 +1,110 @@
+plugins {
+    id 'application'
+}
+
+dependencies {
+    implementation "commons-cli:commons-cli:${commons_cli_version}"
+    implementation "org.apache.logging.log4j:log4j-slf4j-impl:${log4j_slf4j_version}"
+    implementation project(":model-zoo")
+
+    runtimeOnly project(":pytorch:pytorch-model-zoo")
+    runtimeOnly "ai.djl.pytorch:pytorch-native-auto:1.8.1"
+
+    // javacpp bug fix https://github.com/bytedeco/javacpp/commit/7f27899578dfa18e22738a3dd49701e1806b464a
+    runtimeOnly "org.bytedeco:javacpp:1.5.6-SNAPSHOT"
+    runtimeOnly(project(":tensorflow:tensorflow-model-zoo")) {
+        exclude group: "org.bytedeco", module: "javacpp"
+    }
+    runtimeOnly "ai.djl.tensorflow:tensorflow-native-auto:${tensorflow_version}"
+
+    runtimeOnly project(":mxnet:mxnet-model-zoo")
+    runtimeOnly "ai.djl.mxnet:mxnet-native-auto:${mxnet_version}"
+
+    runtimeOnly project(":tflite:tflite-engine")
+    runtimeOnly "ai.djl.tflite:tflite-native-auto:${tflite_version}"
+
+    runtimeOnly project(":paddlepaddle:paddlepaddle-model-zoo")
+    runtimeOnly "ai.djl.paddlepaddle:paddlepaddle-native-auto:${paddlepaddle_version}"
+
+    // onnxruntime requires user install libgomp.so.1 manually, exclude from default dependency
+    runtimeOnly project(":onnxruntime:onnxruntime-engine")
+
+    runtimeOnly project(":dlr:dlr-engine")
+    runtimeOnly "ai.djl.dlr:dlr-native-auto:${dlr_version}"
+
+    runtimeOnly(project(":ml:xgboost")) {
+        exclude group: "ml.dmlc", module: "xgboost4j_2.12"
+    }
+
+    testImplementation("org.testng:testng:${testng_version}") {
+        exclude group: "junit", module: "junit"
+    }
+}
+
+application {
+    mainClassName = System.getProperty("main", "ai.djl.benchmark.Benchmark")
+}
+
+run {
+    environment("TF_CPP_MIN_LOG_LEVEL", "1") // turn off TensorFlow print out
+    systemProperties System.getProperties()
+    systemProperties.remove("user.dir")
+    systemProperty("file.encoding", "UTF-8")
+}
+
+task benchmark(type: JavaExec) {
+    environment("TF_CPP_MIN_LOG_LEVEL", "1") // turn off TensorFlow print out
+    List<String> arguments = gradle.startParameter["taskRequests"]["args"].getAt(0)
+    for (String argument : arguments) {
+        if (argument.trim().startsWith("--args")) {
+            String[] line = argument.split("=", 2)
+            if (line.length == 2) {
+                line = line[1].split(" ")
+                if (line.contains("-t")) {
+                    if (System.getProperty("ai.djl.default_engine") == "TensorFlow") {
+                        environment("OMP_NUM_THREADS", "1")
+                        environment("TF_NUM_INTRAOP_THREADS", "1")
+                    } else {
+                        environment("MXNET_ENGINE_TYPE", "NaiveEngine")
+                        environment("OMP_NUM_THREADS", "1")
+                    }
+                }
+                break
+            }
+        }
+    }
+
+    systemProperties System.getProperties()
+    systemProperties.remove("user.dir")
+    systemProperty("file.encoding", "UTF-8")
+    classpath = sourceSets.main.runtimeClasspath
+    // restrict the jvm heap size for better monitoring benchmark
+    jvmArgs = ["-Xmx2g"]
+    if (Boolean.getBoolean("loggc")) {
+        if (JavaVersion.current() == JavaVersion.VERSION_1_8) {
+            jvmArgs += ["-XX:+PrintGCTimeStamps", "-Xloggc:build/gc.log"]
+        } else {
+            jvmArgs += ["-Xlog:gc*=debug:file=build/gc.log"]
+        }
+    }
+    main = "ai.djl.benchmark.Benchmark"
+}
+
+startScripts {
+    defaultJvmOpts = []
+    doLast {
+        String replacement = 'CLASSPATH=\\$APP_HOME/lib/*\n\n' +
+                'if [[ "\\$*" == *-t* || "\\$*" == *--threads* ]]\n' +
+                'then\n' +
+                '    export TF_CPP_MIN_LOG_LEVEL=1\n' +
+                '    export MXNET_ENGINE_TYPE=NaiveEngine\n' +
+                '    export OMP_NUM_THREADS=1\n' +
+                '    export TF_NUM_INTRAOP_THREADS=1\n' +
+                'fi'
+
+        String text = unixScript.text.replaceAll('CLASSPATH=\\$APP_HOME/lib/.*', replacement)
+        unixScript.text = text
+    }
+}
+
+tasks.distZip.enabled = false
diff --git a/examples/src/main/java/ai/djl/examples/inference/benchmark/util/AbstractBenchmark.java b/extensions/benchmark/src/main/java/ai/djl/benchmark/AbstractBenchmark.java
similarity index 85%
rename from examples/src/main/java/ai/djl/examples/inference/benchmark/util/AbstractBenchmark.java
rename to extensions/benchmark/src/main/java/ai/djl/benchmark/AbstractBenchmark.java
index 211f929c3e2..eeecdef031a 100644
--- a/examples/src/main/java/ai/djl/examples/inference/benchmark/util/AbstractBenchmark.java
+++ b/extensions/benchmark/src/main/java/ai/djl/benchmark/AbstractBenchmark.java
@@ -10,12 +10,11 @@
  * OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
  * and limitations under the License.
  */
-package ai.djl.examples.inference.benchmark.util;
+package ai.djl.benchmark;
 
 import ai.djl.Device;
 import ai.djl.ModelException;
 import ai.djl.engine.Engine;
-import ai.djl.examples.inference.benchmark.MultithreadedBenchmark;
 import ai.djl.metric.Metrics;
 import ai.djl.ndarray.NDList;
 import ai.djl.ndarray.types.DataType;
@@ -41,7 +40,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/** Abstract class that encapsulate command line options for example project. */
+/** Abstract benchmark class. */
 public abstract class AbstractBenchmark {
 
     private static final Logger logger = LoggerFactory.getLogger(AbstractBenchmark.class);
@@ -64,52 +63,31 @@ protected abstract float[] predict(Arguments arguments, Metrics metrics, int ite
             throws IOException, ModelException, TranslateException, ClassNotFoundException;
 
     /**
-     * Returns command line options.
-     *
-     * <p>Child class can override this method and return different command line options.
-     *
-     * @return command line options
-     */
-    protected Options getOptions() {
-        return Arguments.getOptions();
-    }
-
-    /**
-     * Parse command line into arguments.
-     *
-     * <p>Child class can override this method and return extension of {@link Arguments}.
-     *
-     * @param cmd list of arguments parsed against a {@link Options} descriptor
-     * @return parsed arguments
-     */
-    protected Arguments parseArguments(CommandLine cmd) {
-        return new Arguments(cmd);
-    }
-
-    /**
-     * Execute example code.
+     * Execute benchmark.
      *
      * @param args input raw arguments
      * @return if example execution complete successfully
      */
     public final boolean runBenchmark(String[] args) {
-        Options options = getOptions();
+        Options options = Arguments.getOptions();
         try {
-            DefaultParser parser = new DefaultParser();
-            CommandLine cmd = parser.parse(options, args, null, false);
-            Arguments arguments = parseArguments(cmd);
-            if (arguments.hasHelp()) {
-                printHelp("./gradlew benchmark --args='[OPTIONS]'", options);
+            if (Arguments.hasHelp(args)) {
+                printHelp("benchmark [-p MODEL-PATH] -s INPUT-SHAPES [OPTIONS]", options);
                 return true;
             }
+            DefaultParser parser = new DefaultParser();
+            CommandLine cmd = parser.parse(options, args, null, false);
+            Arguments arguments = new Arguments(cmd);
+            String engine = arguments.getEngine();
 
             long init = System.nanoTime();
-            String version = Engine.getInstance().getVersion();
+            String version = Engine.getEngine(engine).getVersion();
             long loaded = System.nanoTime();
             logger.info(
                     String.format(
-                            "Load library %s in %.3f ms.", version, (loaded - init) / 1_000_000f));
-            Duration duration = Duration.ofMinutes(arguments.getDuration());
+                            "Load %s (%s) in %.3f ms.",
+                            engine, version, (loaded - init) / 1_000_000f));
+            Duration duration = Duration.ofSeconds(arguments.getDuration());
             if (arguments.getDuration() != 0) {
                 logger.info(
                         "Running {} on: {}, duration: {} minutes.",
@@ -219,23 +197,26 @@ public final boolean runBenchmark(String[] args) {
                                 metrics.getMetric("Heap").get(1).getValue().longValue();
                         float heap = metrics.percentile("Heap", 90).getValue().longValue();
                         float nonHeap = metrics.percentile("NonHeap", 90).getValue().longValue();
-                        float rssBeforeModel =
-                                metrics.getMetric("rss").get(0).getValue().longValue();
-                        float rssBeforeInference =
-                                metrics.getMetric("rss").get(1).getValue().longValue();
-                        float rss = metrics.percentile("rss", 90).getValue().longValue();
-                        float cpu = metrics.percentile("cpu", 90).getValue().longValue();
                         int mb = 1024 * 1024;
-
-                        logger.info(String.format("cpu P90: %.3f %%", cpu));
                         logger.info(String.format("heap (base): %.3f MB", heapBeforeModel / mb));
                         logger.info(
                                 String.format("heap (model): %.3f MB", heapBeforeInference / mb));
                         logger.info(String.format("heap P90: %.3f MB", heap / mb));
                         logger.info(String.format("nonHeap P90: %.3f MB", nonHeap / mb));
-                        logger.info(String.format("rss (base): %.3f MB", rssBeforeModel / mb));
-                        logger.info(String.format("rss (model): %.3f MB", rssBeforeInference / mb));
-                        logger.info(String.format("rss P90: %.3f MB", rss / mb));
+
+                        if (!System.getProperty("os.name").startsWith("Win")) {
+                            float rssBeforeModel =
+                                    metrics.getMetric("rss").get(0).getValue().longValue();
+                            float rssBeforeInference =
+                                    metrics.getMetric("rss").get(1).getValue().longValue();
+                            float rss = metrics.percentile("rss", 90).getValue().longValue();
+                            float cpu = metrics.percentile("cpu", 90).getValue().longValue();
+                            logger.info(String.format("cpu P90: %.3f %%", cpu));
+                            logger.info(String.format("rss (base): %.3f MB", rssBeforeModel / mb));
+                            logger.info(
+                                    String.format("rss (model): %.3f MB", rssBeforeInference / mb));
+                            logger.info(String.format("rss P90: %.3f MB", rss / mb));
+                        }
                     }
                 }
                 MemoryTrainingListener.dumpMemoryInfo(metrics, arguments.getOutputDir());
@@ -266,6 +247,7 @@ protected ZooModel<Void, float[]> loadModel(Arguments arguments, Metrics metrics
                         .setTypes(Void.class, float[].class)
                         .optModelUrls(arguments.getModelUrls())
                         .optModelName(arguments.getModelName())
+                        .optEngine(arguments.getEngine())
                         .optFilters(arguments.getCriteria())
                         .optArtifactId(artifactId)
                         .optTranslator(translator)
diff --git a/examples/src/main/java/ai/djl/examples/inference/benchmark/util/Arguments.java b/extensions/benchmark/src/main/java/ai/djl/benchmark/Arguments.java
similarity index 81%
rename from examples/src/main/java/ai/djl/examples/inference/benchmark/util/Arguments.java
rename to extensions/benchmark/src/main/java/ai/djl/benchmark/Arguments.java
index 27c992573cd..85b389de664 100644
--- a/examples/src/main/java/ai/djl/examples/inference/benchmark/util/Arguments.java
+++ b/extensions/benchmark/src/main/java/ai/djl/benchmark/Arguments.java
@@ -10,7 +10,7 @@
  * OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
  * and limitations under the License.
  */
-package ai.djl.examples.inference.benchmark.util;
+package ai.djl.benchmark;
 
 import ai.djl.engine.Engine;
 import ai.djl.ndarray.types.DataType;
@@ -23,11 +23,13 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionGroup;
 import org.apache.commons.cli.Options;
 
 /** A class represents parsed command line arguments. */
@@ -44,28 +46,33 @@ public class Arguments {
     private int threads;
     private int delay;
     private PairList<DataType, Shape> inputShapes;
-    private boolean help;
 
-    public Arguments(CommandLine cmd) {
-        help = cmd.hasOption("help");
+    /**
+     * Constructs a {@code Arguments} instance.
+     *
+     * @param cmd command line options
+     */
+    Arguments(CommandLine cmd) {
         artifactId = cmd.getOptionValue("artifact-id");
-        modelUrls = cmd.getOptionValue("model-path");
-        if (modelUrls == null) {
-            String location = System.getProperty("ai.djl.repository.zoo.location");
-            if (location != null) {
-                modelUrls = location;
-            }
-        } else if (!modelUrls.startsWith("http") || !modelUrls.startsWith("file")) {
-            Path path = Paths.get(modelUrls);
+        if (cmd.hasOption("model-path")) {
+            String modelPath = cmd.getOptionValue("model-path");
+            Path path = Paths.get(modelPath);
             try {
                 modelUrls = path.toUri().toURL().toExternalForm();
             } catch (IOException e) {
                 throw new IllegalArgumentException("Invalid model-path: " + modelUrls, e);
             }
+        } else if (cmd.hasOption("model-url")) {
+            modelUrls = cmd.getOptionValue("model-url");
+        }
+        if (modelUrls != null) {
+            artifactId = "ai.djl.localmodelzoo:";
         }
+
         modelName = cmd.getOptionValue("model-name");
         outputDir = cmd.getOptionValue("output-dir");
         inputShapes = new PairList<>();
+
         if (cmd.hasOption("engine")) {
             engine = cmd.getOptionValue("engine");
         } else {
@@ -94,8 +101,9 @@ public Arguments(CommandLine cmd) {
         if (cmd.hasOption("delay")) {
             delay = Integer.parseInt(cmd.getOptionValue("delay"));
         }
-        if (cmd.hasOption("input-shapes")) {
-            String shape = cmd.getOptionValue("input-shapes");
+
+        String shape = cmd.getOptionValue("input-shapes");
+        if (shape != null) {
             if (shape.contains("(")) {
                 Pattern pattern =
                         Pattern.compile("\\((\\s*(\\d+)([,\\s]+\\d+)*\\s*)\\)([sdubilBfS]?)");
@@ -147,44 +155,55 @@ public Arguments(CommandLine cmd) {
         }
     }
 
-    public static Options getOptions() {
+    static Options getOptions() {
         Options options = new Options();
         options.addOption(
                 Option.builder("h").longOpt("help").hasArg(false).desc("Print this help.").build());
-        options.addOption(
+        OptionGroup artifactGroup = new OptionGroup();
+        artifactGroup.setRequired(true);
+        artifactGroup.addOption(
                 Option.builder("a")
                         .longOpt("artifact-id")
                         .hasArg()
                         .argName("ARTIFACT-ID")
                         .desc("Model artifact id.")
                         .build());
-        options.addOption(
+        artifactGroup.addOption(
                 Option.builder("p")
                         .longOpt("model-path")
                         .hasArg()
                         .argName("MODEL-PATH")
                         .desc("Model directory file path.")
                         .build());
+        artifactGroup.addOption(
+                Option.builder("u")
+                        .longOpt("model-url")
+                        .hasArg()
+                        .argName("MODEL-URL")
+                        .desc("Model archive file URL.")
+                        .build());
+        options.addOptionGroup(artifactGroup);
         options.addOption(
                 Option.builder("n")
                         .longOpt("model-name")
                         .hasArg()
                         .argName("MODEL-NAME")
-                        .desc("Model name.")
+                        .desc("Specify model file name.")
                         .build());
         options.addOption(
                 Option.builder("e")
-                        .longOpt("engine-name")
+                        .longOpt("engine")
                         .hasArg()
                         .argName("ENGINE-NAME")
-                        .desc("Engine name.")
+                        .desc("Choose an Engine for the benchmark.")
                         .build());
         options.addOption(
                 Option.builder("s")
+                        .required()
                         .longOpt("input-shapes")
                         .hasArg()
                         .argName("INPUT-SHAPES")
-                        .desc("Input data shapes for non-CV model.")
+                        .desc("Input data shapes for the model.")
                         .build());
         options.addOption(
                 Option.builder("d")
@@ -231,73 +250,55 @@ public static Options getOptions() {
         return options;
     }
 
-    public int getDuration() {
+    static boolean hasHelp(String[] args) {
+        List<String> list = Arrays.asList(args);
+        return list.contains("-h") || list.contains("help");
+    }
+
+    int getDuration() {
         return duration;
     }
 
-    public String getModelUrls() {
+    String getEngine() {
+        return engine;
+    }
+
+    String getModelUrls() {
         return modelUrls;
     }
 
-    public String getModelName() {
+    String getModelName() {
         return modelName;
     }
 
-    public String getArtifactId() {
-        if (modelUrls != null) {
-            return "ai.djl.localmodelzoo:";
-        }
-
-        if (artifactId != null) {
-            return artifactId;
-        }
-
-        if (inputShapes.isEmpty()) {
-            inputShapes.add(DataType.FLOAT32, new Shape(1, 3, 224, 224));
-        }
-
-        switch (engine) {
-            case "PyTorch":
-                return "ai.djl.pytorch:resnet";
-            case "TensorFlow":
-                return "ai.djl.tensorflow:resnet";
-            case "MXNet":
-            default:
-                return "ai.djl.mxnet:resnet";
-        }
+    String getArtifactId() {
+        return artifactId;
     }
 
-    public int getIteration() {
+    int getIteration() {
         return iteration;
     }
 
-    public int getThreads() {
+    int getThreads() {
         return threads;
     }
 
-    public String getOutputDir() {
+    String getOutputDir() {
         if (outputDir == null) {
             outputDir = "build";
         }
         return outputDir;
     }
 
-    public Map<String, String> getCriteria() {
+    Map<String, String> getCriteria() {
         return criteria;
     }
 
-    public int getDelay() {
+    int getDelay() {
         return delay;
     }
 
-    public PairList<DataType, Shape> getInputShapes() {
-        if (inputShapes.isEmpty()) {
-            throw new IllegalArgumentException("Input share is required.");
-        }
+    PairList<DataType, Shape> getInputShapes() {
         return inputShapes;
     }
-
-    public boolean hasHelp() {
-        return help;
-    }
 }
diff --git a/examples/src/main/java/ai/djl/examples/inference/benchmark/Benchmark.java b/extensions/benchmark/src/main/java/ai/djl/benchmark/Benchmark.java
similarity index 56%
rename from examples/src/main/java/ai/djl/examples/inference/benchmark/Benchmark.java
rename to extensions/benchmark/src/main/java/ai/djl/benchmark/Benchmark.java
index e661f89918c..1db8b56bc3d 100644
--- a/examples/src/main/java/ai/djl/examples/inference/benchmark/Benchmark.java
+++ b/extensions/benchmark/src/main/java/ai/djl/benchmark/Benchmark.java
@@ -10,11 +10,9 @@
  * OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
  * and limitations under the License.
  */
-package ai.djl.examples.inference.benchmark;
+package ai.djl.benchmark;
 
 import ai.djl.ModelException;
-import ai.djl.examples.inference.benchmark.util.AbstractBenchmark;
-import ai.djl.examples.inference.benchmark.util.Arguments;
 import ai.djl.inference.Predictor;
 import ai.djl.metric.Metrics;
 import ai.djl.repository.zoo.ZooModel;
@@ -22,12 +20,22 @@
 import ai.djl.translate.TranslateException;
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.List;
 
+/** A class runs single threaded benchmark. */
 public final class Benchmark extends AbstractBenchmark {
 
+    /**
+     * Main entry point.
+     *
+     * @param args command line arguments
+     */
     public static void main(String[] args) {
+        List<String> list = Arrays.asList(args);
+        boolean multithreading = list.contains("-t") || list.contains("--threads");
+        configEngines(multithreading);
         boolean success;
-        if (Arrays.asList(args).contains("-t")) {
+        if (multithreading) {
             success = new MultithreadedBenchmark().runBenchmark(args);
         } else {
             success = new Benchmark().runBenchmark(args);
@@ -54,4 +62,27 @@ public float[] predict(Arguments arguments, Metrics metrics, int iteration)
             return predictResult;
         }
     }
+
+    private static void configEngines(boolean multithreading) {
+        if (multithreading) {
+            if (System.getProperty("ai.djl.pytorch.num_interop_threads") == null) {
+                System.setProperty("ai.djl.pytorch.num_interop_threads", "1");
+            }
+            if (System.getProperty("ai.djl.pytorch.num_threads") == null) {
+                System.setProperty("ai.djl.pytorch.num_threads", "1");
+            }
+        }
+        if (System.getProperty("ai.djl.tflite.disable_alternative") == null) {
+            System.setProperty("ai.djl.tflite.disable_alternative", "true");
+        }
+        if (System.getProperty("ai.djl.dlr.disable_alternative") == null) {
+            System.setProperty("ai.djl.dlr.disable_alternative", "true");
+        }
+        if (System.getProperty("ai.djl.paddlepaddle.disable_alternative") == null) {
+            System.setProperty("ai.djl.paddlepaddle.disable_alternative", "true");
+        }
+        if (System.getProperty("ai.djl.onnx.disable_alternative") == null) {
+            System.setProperty("ai.djl.onnx.disable_alternative", "true");
+        }
+    }
 }
diff --git a/examples/src/main/java/ai/djl/examples/inference/benchmark/MultithreadedBenchmark.java b/extensions/benchmark/src/main/java/ai/djl/benchmark/MultithreadedBenchmark.java
similarity index 92%
rename from examples/src/main/java/ai/djl/examples/inference/benchmark/MultithreadedBenchmark.java
rename to extensions/benchmark/src/main/java/ai/djl/benchmark/MultithreadedBenchmark.java
index 9a93e89ca77..0c12ce093ba 100644
--- a/examples/src/main/java/ai/djl/examples/inference/benchmark/MultithreadedBenchmark.java
+++ b/extensions/benchmark/src/main/java/ai/djl/benchmark/MultithreadedBenchmark.java
@@ -10,11 +10,9 @@
  * OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
  * and limitations under the License.
  */
-package ai.djl.examples.inference.benchmark;
+package ai.djl.benchmark;
 
 import ai.djl.ModelException;
-import ai.djl.examples.inference.benchmark.util.AbstractBenchmark;
-import ai.djl.examples.inference.benchmark.util.Arguments;
 import ai.djl.inference.Predictor;
 import ai.djl.metric.Metrics;
 import ai.djl.repository.zoo.ZooModel;
@@ -31,17 +29,11 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/** A class runs single threaded benchmark. */
 public class MultithreadedBenchmark extends AbstractBenchmark {
 
     private static final Logger logger = LoggerFactory.getLogger(MultithreadedBenchmark.class);
 
-    public static void main(String[] args) {
-        if (new MultithreadedBenchmark().runBenchmark(args)) {
-            System.exit(0); // NOPMD
-        }
-        System.exit(-1); // NOPMD
-    }
-
     /** {@inheritDoc} */
     @Override
     public float[] predict(Arguments arguments, Metrics metrics, int iteration)
@@ -54,7 +46,7 @@ public float[] predict(Arguments arguments, Metrics metrics, int iteration)
         int numOfThreads = arguments.getThreads();
         int delay = arguments.getDelay();
         AtomicInteger counter = new AtomicInteger(iteration);
-        logger.info("Multithreaded inference with {} threads.", numOfThreads);
+        logger.info("Multithreading inference with {} threads.", numOfThreads);
 
         List<PredictorCallable> callables = new ArrayList<>(numOfThreads);
         for (int i = 0; i < numOfThreads; ++i) {
diff --git a/examples/src/main/java/ai/djl/examples/inference/benchmark/util/package-info.java b/extensions/benchmark/src/main/java/ai/djl/benchmark/package-info.java
similarity index 74%
rename from examples/src/main/java/ai/djl/examples/inference/benchmark/util/package-info.java
rename to extensions/benchmark/src/main/java/ai/djl/benchmark/package-info.java
index 8f3b7bc8aa7..6436a24fe15 100644
--- a/examples/src/main/java/ai/djl/examples/inference/benchmark/util/package-info.java
+++ b/extensions/benchmark/src/main/java/ai/djl/benchmark/package-info.java
@@ -11,8 +11,5 @@
  * and limitations under the License.
  */
 
-/**
- * Contains utilities used for the inference benchmarking examples within the package {@link
- * ai.djl.examples.inference.benchmark}.
- */
-package ai.djl.examples.inference.benchmark.util;
+/** Contains benchmarking utility classes. */
+package ai.djl.benchmark;
diff --git a/extensions/benchmark/src/main/resources/log4j2.xml b/extensions/benchmark/src/main/resources/log4j2.xml
new file mode 100644
index 00000000000..cb6d3c6fbee
--- /dev/null
+++ b/extensions/benchmark/src/main/resources/log4j2.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Configuration status="INFO">
+  <Appenders>
+    <Console name="console" target="SYSTEM_OUT">
+      <PatternLayout
+          pattern="[%-5level] - %msg%n"/>
+    </Console>
+  </Appenders>
+  <Loggers>
+    <Root level="info" additivity="false">
+      <AppenderRef ref="console"/>
+    </Root>
+    <Logger name="ai.djl" level="${sys:ai.djl.logging.level:-info}" additivity="false">
+      <AppenderRef ref="console"/>
+    </Logger>
+    <Logger name="ai.djl.repository.zoo" level="${sys:ai.djl.modelzoo.logging.level:-info}" additivity="false">
+      <AppenderRef ref="console"/>
+    </Logger>
+  </Loggers>
+</Configuration>
diff --git a/examples/src/test/java/ai/djl/examples/inference/BenchmarkTest.java b/extensions/benchmark/src/test/java/ai/djl/benchmark/BenchmarkTest.java
similarity index 51%
rename from examples/src/test/java/ai/djl/examples/inference/BenchmarkTest.java
rename to extensions/benchmark/src/test/java/ai/djl/benchmark/BenchmarkTest.java
index 262325082ec..7528a84dd78 100644
--- a/examples/src/test/java/ai/djl/examples/inference/BenchmarkTest.java
+++ b/extensions/benchmark/src/test/java/ai/djl/benchmark/BenchmarkTest.java
@@ -10,25 +10,47 @@
  * OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
  * and limitations under the License.
  */
-package ai.djl.examples.inference;
+package ai.djl.benchmark;
 
-import ai.djl.examples.inference.benchmark.Benchmark;
-import ai.djl.examples.inference.benchmark.MultithreadedBenchmark;
 import org.testng.annotations.Test;
 
 public class BenchmarkTest {
 
     @Test
-    public void testBenchmark() {
-        String[] args = {"-c", "2", "-r", "{'layers':'18','flavor':'v1'}"};
+    public void testHelp() {
+        String[] args = {"-h"};
         new Benchmark().runBenchmark(args);
     }
 
     @Test
-    public void testMultithreadedBenchmark() {
+    public void testBenchmark() {
         String[] args = {
-            "-c", "2", "-s", "(1,3,224,224)f", "-r", "{'layers':'18','flavor':'v1'}", "-t", "2"
+            "-a", "resnet", "-s", "1,3,224,224", "-c", "2", "-r", "{'layers':'18','flavor':'v1'}"
         };
-        new MultithreadedBenchmark().runBenchmark(args);
+        new Benchmark().runBenchmark(args);
+    }
+
+    @Test
+    public void testMultithreadedBenchmark() {
+        System.setProperty("collect-memory", "true");
+        try {
+            String[] args = {
+                "-a",
+                "resnet",
+                "-s",
+                "(1,3,224,224)f",
+                "-d",
+                "1",
+                "-c",
+                "2",
+                "-r",
+                "{'layers':'18','flavor':'v1'}",
+                "-t",
+                "-1"
+            };
+            Benchmark.main(args);
+        } finally {
+            System.clearProperty("collect-memory");
+        }
     }
 }
diff --git a/examples/src/main/java/ai/djl/examples/inference/benchmark/package-info.java b/extensions/benchmark/src/test/java/ai/djl/benchmark/package-info.java
similarity index 65%
rename from examples/src/main/java/ai/djl/examples/inference/benchmark/package-info.java
rename to extensions/benchmark/src/test/java/ai/djl/benchmark/package-info.java
index 066fcc73e63..fd842219c53 100644
--- a/examples/src/main/java/ai/djl/examples/inference/benchmark/package-info.java
+++ b/extensions/benchmark/src/test/java/ai/djl/benchmark/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
  * with the License. A copy of the License is located at
@@ -11,9 +11,5 @@
  * and limitations under the License.
  */
 
-/**
- * Contains inference benchmarking examples and code.
- *
- * <p>See the inference benchmarking utilities in {@link ai.djl.examples.inference.benchmark.util}.
- */
-package ai.djl.examples.inference.benchmark;
+/** Contains tests for the benchmark module. */
+package ai.djl.benchmark;
diff --git a/integration/src/main/resources/log4j2.xml b/integration/src/main/resources/log4j2.xml
index d9cd57d370a..cb6d3c6fbee 100644
--- a/integration/src/main/resources/log4j2.xml
+++ b/integration/src/main/resources/log4j2.xml
@@ -13,7 +13,7 @@
     <Logger name="ai.djl" level="${sys:ai.djl.logging.level:-info}" additivity="false">
       <AppenderRef ref="console"/>
     </Logger>
-    <Logger name="ai.djl.repository.zoo" level="${sys:ai.djl.modelzoo.level:-info}" additivity="false">
+    <Logger name="ai.djl.repository.zoo" level="${sys:ai.djl.modelzoo.logging.level:-info}" additivity="false">
       <AppenderRef ref="console"/>
     </Logger>
   </Loggers>
diff --git a/settings.gradle b/settings.gradle
index 24224b3cd09..0e2277f74d2 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -6,6 +6,7 @@ include ':dlr:dlr-engine'
 include ':dlr:dlr-native'
 include ':examples'
 include ':extensions:aws-ai'
+include ':extensions:benchmark'
 include ':extensions:fasttext'
 include ':extensions:hadoop'
 include ':extensions:sentencepiece'