You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
One of the payloads results in context-length-exceeded error, which is expected, but the functionary pipeline doesn't seem to handle it well.
[2024-11-09 07:37:08] INFO: 127.0.0.1:49194 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
[2024-11-09 07:37:08] ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/home/ubuntu/functionary/functionary/sglang_inference.py", line 466, in v1_chat_generate_completion
ret = await params.tokenizer_manager.generate_request(
File "/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/tokenizer_manager.py", line 161, in generate_request
async for response in self._handle_single_request(obj, request):
File "/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/tokenizer_manager.py", line 323, in _handle_single_request
rid, input_ids = await self._send_single_request(
File "/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/tokenizer_manager.py", line 230, in _send_single_request
self._validate_input_length(input_ids)
File "/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/tokenizer_manager.py", line 434, in _validate_input_length
raise ValueError(
ValueError: The input (10464 tokens) is longer than the model's context length (8192 tokens).
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ubuntu/.local/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 401, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/home/ubuntu/.local/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
return await self.app(scope, receive, send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in __call__
await super().__call__(scope, receive, send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/applications.py", line 113, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/middleware/errors.py", line 187, in __call__
raise exc
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/middleware/errors.py", line 165, in __call__
await self.app(scope, receive, _send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in __call__
await self.app(scope, receive, send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/routing.py", line 715, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/routing.py", line 735, in app
await route.handle(scope, receive, send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/routing.py", line 288, in handle
await self.app(scope, receive, send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/home/ubuntu/.local/lib/python3.10/site-packages/starlette/routing.py", line 73, in app
response = await f(request)
File "/home/ubuntu/.local/lib/python3.10/site-packages/fastapi/routing.py", line 301, in app
raw_response = await run_endpoint_function(
File "/home/ubuntu/.local/lib/python3.10/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
return await dependant.call(**values)
File "/home/ubuntu/functionary/server_sglang.py", line 175, in openai_v1_chat_completions
return await v1_chat_completions(tokenizer_manager, None, raw_request, served_model)
File "/home/ubuntu/functionary/functionary/sglang_inference.py", line 605, in v1_chat_completions
output, error = await v1_chat_generate_completion(params)
File "/home/ubuntu/functionary/functionary/sglang_inference.py", line 470, in v1_chat_generate_completion
return None, create_error_response(HTTPStatus.BAD_REQUEST, str(e))
TypeError: create_error_response() missing 1 required positional argument: 'param'
The text was updated successfully, but these errors were encountered:
I'm running the following command on 2x H100 machine.
One of the payloads results in context-length-exceeded error, which is expected, but the functionary pipeline doesn't seem to handle it well.
The text was updated successfully, but these errors were encountered: