diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index c31b206d6f60e..03a8959a7d9ff 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -238,7 +238,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: choices=get_args(TaskOption), help='The task to use the model for. Each vLLM instance only ' 'supports one task, even if the same model can be used for ' - 'multiple tasks. When the model only supports one task, "auto" ' + 'multiple tasks. When the model only supports one task, ``"auto"`` ' 'can be used to select it; otherwise, you must specify explicitly ' 'which task to use.') parser.add_argument( @@ -250,7 +250,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument( '--skip-tokenizer-init', action='store_true', - help='Skip initialization of tokenizer and detokenizer') + help='Skip initialization of tokenizer and detokenizer.') parser.add_argument( '--revision', type=nullable_str, @@ -401,7 +401,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument( '--worker-use-ray', action='store_true', - help='Deprecated, use --distributed-executor-backend=ray.') + help='Deprecated, use ``--distributed-executor-backend=ray``.') parser.add_argument('--pipeline-parallel-size', '-pp', type=int, @@ -430,7 +430,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: choices=[8, 16, 32, 64, 128], help='Token block size for contiguous chunks of ' 'tokens. This is ignored on neuron devices and ' - 'set to max-model-len. On CUDA devices, ' + 'set to ``--max-model-len``. On CUDA devices, ' 'only block sizes up to 32 are supported. ' 'On HPU devices, block size defaults to 128.') @@ -439,12 +439,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: action=argparse.BooleanOptionalAction, default=EngineArgs.enable_prefix_caching, help="Enables automatic prefix caching. " - "Use --no-enable-prefix-caching to disable explicitly.", + "Use ``--no-enable-prefix-caching`` to disable explicitly.", ) parser.add_argument('--disable-sliding-window', action='store_true', help='Disables sliding window, ' - 'capping to sliding window size') + 'capping to sliding window size.') parser.add_argument('--use-v2-block-manager', action='store_true', default=True, @@ -861,7 +861,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: "of the provided names. The model name in the model " "field of a response will be the first name in this " "list. If not specified, the model name will be the " - "same as the `--model` argument. Noted that this name(s) " + "same as the ``--model`` argument. Noted that this name(s) " "will also be used in `model_name` tag content of " "prometheus metrics, if multiple names provided, metrics " "tag will take the first one.") @@ -881,7 +881,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: default=None, help="Valid choices are " + ",".join(ALLOWED_DETAILED_TRACE_MODULES) + - ". It makes sense to set this only if --otlp-traces-endpoint is" + ". It makes sense to set this only if ``--otlp-traces-endpoint`` is" " set. If set, it will collect detailed traces for the specified " "modules. This involves use of possibly costly and or blocking " "operations and hence might have a performance impact.") diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index 22206ef8dbfe6..35445449463e9 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -79,29 +79,29 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument("--host", type=nullable_str, default=None, - help="host name") - parser.add_argument("--port", type=int, default=8000, help="port number") + help="Host name.") + parser.add_argument("--port", type=int, default=8000, help="Port number.") parser.add_argument( "--uvicorn-log-level", type=str, default="info", choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'], - help="log level for uvicorn") + help="Log level for uvicorn.") parser.add_argument("--allow-credentials", action="store_true", - help="allow credentials") + help="Allow credentials.") parser.add_argument("--allowed-origins", type=json.loads, default=["*"], - help="allowed origins") + help="Allowed origins.") parser.add_argument("--allowed-methods", type=json.loads, default=["*"], - help="allowed methods") + help="Allowed methods.") parser.add_argument("--allowed-headers", type=json.loads, default=["*"], - help="allowed headers") + help="Allowed headers.") parser.add_argument("--api-key", type=nullable_str, default=None, @@ -115,10 +115,10 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: action=LoRAParserAction, help="LoRA module configurations in either 'name=path' format" "or JSON format. " - "Example (old format): 'name=path' " + "Example (old format): ``'name=path'`` " "Example (new format): " - "'{\"name\": \"name\", \"local_path\": \"path\", " - "\"base_model_name\": \"id\"}'") + "``{\"name\": \"name\", \"local_path\": \"path\", " + "\"base_model_name\": \"id\"}``") parser.add_argument( "--prompt-adapters", type=nullable_str, @@ -132,7 +132,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: default=None, help="The file path to the chat template, " "or the template in single-line form " - "for the specified model") + "for the specified model.") parser.add_argument( '--chat-template-content-format', type=str, @@ -141,38 +141,39 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: help='The format to render message content within a chat template.' '\n\n' '* "string" will render the content as a string. ' - 'Example: "Hello World"\n' + 'Example: ``"Hello World"``\n' '* "openai" will render the content as a list of dictionaries, ' 'similar to OpenAI schema. ' - 'Example: [{"type": "text", "text": "Hello world!"}]') + 'Example: ``[{"type": "text", "text": "Hello world!"}]``') parser.add_argument("--response-role", type=nullable_str, default="assistant", help="The role name to return if " - "`request.add_generation_prompt=true`.") + "``request.add_generation_prompt=true``.") parser.add_argument("--ssl-keyfile", type=nullable_str, default=None, - help="The file path to the SSL key file") + help="The file path to the SSL key file.") parser.add_argument("--ssl-certfile", type=nullable_str, default=None, - help="The file path to the SSL cert file") + help="The file path to the SSL cert file.") parser.add_argument("--ssl-ca-certs", type=nullable_str, default=None, - help="The CA certificates file") + help="The CA certificates file.") parser.add_argument( "--ssl-cert-reqs", type=int, default=int(ssl.CERT_NONE), - help="Whether client certificate is required (see stdlib ssl module's)" + help="Whether client certificate is required (see stdlib ssl module's)." ) parser.add_argument( "--root-path", type=nullable_str, default=None, - help="FastAPI root_path when app is behind a path based routing proxy") + help="FastAPI root_path when app is behind a path based routing proxy." + ) parser.add_argument( "--middleware", type=nullable_str, @@ -182,15 +183,15 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: "We accept multiple --middleware arguments. " "The value should be an import path. " "If a function is provided, vLLM will add it to the server " - "using @app.middleware('http'). " + "using ``@app.middleware('http')``. " "If a class is provided, vLLM will add it to the server " - "using app.add_middleware(). ") + "using ``app.add_middleware()``. ") parser.add_argument( "--return-tokens-as-token-ids", action="store_true", - help="When --max-logprobs is specified, represents single tokens as " - "strings of the form 'token_id:{token_id}' so that tokens that " - "are not JSON-encodable can be identified.") + help="When ``--max-logprobs`` is specified, represents single tokens " + " as strings of the form 'token_id:{token_id}' so that tokens " + "that are not JSON-encodable can be identified.") parser.add_argument( "--disable-frontend-multiprocessing", action="store_true", @@ -205,9 +206,8 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: "--enable-auto-tool-choice", action="store_true", default=False, - help= - "Enable auto tool choice for supported models. Use --tool-call-parser" - " to specify which parser to use") + help="Enable auto tool choice for supported models. Use " + "``--tool-call-parser`` to specify which parser to use.") valid_tool_parsers = ToolParserManager.tool_parsers.keys() parser.add_argument( @@ -219,7 +219,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: help= "Select the tool call parser depending on the model that you're using." " This is used to parse the model-generated tool call into OpenAI API " - "format. Required for --enable-auto-tool-choice.") + "format. Required for ``--enable-auto-tool-choice``.") parser.add_argument( "--tool-parser-plugin", @@ -228,7 +228,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: help= "Special the tool parser plugin write to parse the model-generated tool" " into OpenAI API format, the name register in this plugin can be used " - "in --tool-call-parser.") + "in ``--tool-call-parser``.") parser = AsyncEngineArgs.add_cli_args(parser) @@ -243,7 +243,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: "--disable-fastapi-docs", action='store_true', default=False, - help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint" + help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint." ) parser.add_argument( "--enable-prompt-tokens-details",