Skip to content
This repository has been archived by the owner on Feb 15, 2025. It is now read-only.

Commit

Permalink
Merge pull request #132 from defenseunicorns/completions-endpoint
Browse files Browse the repository at this point in the history
feat: renamed generate endpoint to completion
  • Loading branch information
Gerred Dillon authored Jun 30, 2023
2 parents 6cd940c + 610daf4 commit f016708
Show file tree
Hide file tree
Showing 42 changed files with 1,418 additions and 1,008 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/docker-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ jobs:
run: make api base repeater
- name: install deps
run: pip install -r models/test/repeater/requirements.txt
- name: Simple Test
run: make test-init test teardown
# - name: Simple Test
# run: make test-init test teardown
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ __pycache__
.vscode/
venv
.DS_Store
dist/
dist/

# Go binaries
main
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ gen: gen-go gen-python


gen-python:
python3 -m grpc_tools.protoc --proto_path=proto/ generate/generate.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
python3 -m grpc_tools.protoc --proto_path=proto/ completion/completion.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
python3 -m grpc_tools.protoc --proto_path=proto audio/audio.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
python3 -m grpc_tools.protoc --proto_path=proto embeddings/embeddings.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
python3 -m grpc_tools.protoc --proto_path=proto name/name.proto --python_out=leapfrogai --pyi_out=leapfrogai --grpc_python_out=leapfrogai
Expand All @@ -67,7 +67,7 @@ gen-python:
gen-go:
rm -rf pkg/client
mkdir -p pkg/client
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ generate/generate.proto
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ completion/completion.proto
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ audio/audio.proto
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ name/name.proto
protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ embeddings/embeddings.proto
Expand Down
50 changes: 25 additions & 25 deletions api/backends/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ import (

"github.com/defenseunicorns/leapfrogai/api/config"
"github.com/defenseunicorns/leapfrogai/pkg/client/audio"
"github.com/defenseunicorns/leapfrogai/pkg/client/completion"
embedding "github.com/defenseunicorns/leapfrogai/pkg/client/embeddings"
"github.com/defenseunicorns/leapfrogai/pkg/client/generate"
"github.com/defenseunicorns/leapfrogai/pkg/util"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/sashabaranov/go-openai"
Expand Down Expand Up @@ -315,12 +316,12 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
id, _ := uuid.NewRandom()

if input.Stream {
chanStream := make(chan *generate.CompletionResponse, 10)
client := generate.NewCompletionStreamServiceClient(conn)
stream, err := client.CompleteStream(context.Background(), &generate.CompletionRequest{
Prompt: input.Prompt.(string),
MaxTokens: int32(input.MaxTokens),
Temperature: input.Temperature,
chanStream := make(chan *completion.CompletionResponse, 10)
client := completion.NewCompletionStreamServiceClient(conn)
stream, err := client.CompleteStream(context.Background(), &completion.CompletionRequest{
Prompt: input.Prompt.(string),
MaxNewTokens: util.Int32(int32(input.MaxTokens)),
Temperature: util.Float32(input.Temperature),
})

if err != nil {
Expand Down Expand Up @@ -350,7 +351,7 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
Choices: []openai.CompletionChoice{
{
Index: 0,
Text: msg.GetCompletion(),
Text: msg.GetChoices()[0].GetText(),
},
},
})
Expand All @@ -370,7 +371,7 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
logit[k] = int32(v)
}

client := generate.NewCompletionServiceClient(conn)
client := completion.NewCompletionServiceClient(conn)

if input.N == 0 {
input.N = 1
Expand All @@ -384,29 +385,28 @@ func (o *OpenAIHandler) complete(c *gin.Context) {

for i := 0; i < input.N; i++ {
// Implement the completion logic here, using the data from `input`
response, err := client.Complete(c.Request.Context(), &generate.CompletionRequest{
response, err := client.Complete(c.Request.Context(), &completion.CompletionRequest{
Prompt: input.Prompt.(string),
Suffix: input.Suffix,
MaxTokens: int32(input.MaxTokens),
Temperature: input.Temperature,
TopP: input.TopP,
Stream: input.Stream,
Logprobs: int32(input.LogProbs),
Echo: input.Echo,
Stop: input.Stop, // Wrong type here...
PresencePenalty: input.PresencePenalty,
FrequencePenalty: input.FrequencyPenalty,
BestOf: int32(input.BestOf),
LogitBias: logit, // Wrong type here
Suffix: util.String(input.Suffix),
MaxNewTokens: util.Int32(int32(input.MaxTokens)),
Temperature: util.Float32(input.Temperature),
TopP: util.Float32(input.TopP),
Logprobs: util.Int32(int32(input.LogProbs)),
Echo: util.Bool(input.Echo),
Stop: input.Stop,
PresencePenalty: util.Float32(input.PresencePenalty),
FrequencePenalty: util.Float32(input.FrequencyPenalty),
BestOf: util.Int32(int32(input.BestOf)),
LogitBias: logit,
})
if err != nil {
log.Printf("500: Error completing via backend(%v): %v\n", input.Model, err)
c.JSON(500, err)
return
}
choice := openai.CompletionChoice{
Text: strings.TrimPrefix(response.GetCompletion(), input.Prompt.(string)),
FinishReason: response.GetFinishReason(),
Text: response.Choices[i].GetText(),
FinishReason: strings.ToLower(response.Choices[i].GetFinishReason().Enum().String()),
Index: i,
}
resp.Choices[i] = choice
Expand Down Expand Up @@ -436,7 +436,7 @@ func (o *OpenAIHandler) getModelClient(c *gin.Context, model string) *grpc.Clien

// EmbeddingRequest is the input to a Create embeddings request.
type EmbeddingRequest struct {
// Input is a slice of strings for which you want to generate an Embedding vector.
// Input is a slice of strings for which you want to completion an Embedding vector.
// Each input must not exceed 2048 tokens in length.
// OpenAPI suggests replacing newlines (\n) in your input with a single space, as they
// have observed inferior results when newlines are present.
Expand Down
21 changes: 0 additions & 21 deletions docs/Protobuf.md

This file was deleted.

38 changes: 38 additions & 0 deletions e2e/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import openai

openai.api_base = "http://localhost:8080/openai"
openai.api_key = "removed"

prompt = """<|im_start|>system
You are an AI assistant that answers participates in chat discussions in an honest, concise, friendly way.<|im_end|>
<|im_start|>user
Write two sequences composed of 3 'A's and 2 'B's such that there are no two successive identical letter. Be consice.<|im_end|>
<|im_assistant|>
"""

# prompt = """
# <|im_start|>system
# You are an AI assistant that follows instruction extremely well. Your role is to accept some input and summarize it. For example:

# User: Summarize the main idea in the following text:
# The rapid growth of technology has led to significant advancements in various industries. From communication and transportation to healthcare and education, technology has played a crucial role in improving our lives. However, we must also be cautious of the potential negative effects, such as job loss due to automation and privacy concerns.

# Assistant: Technology's rapid growth has positively impacted various industries but also raises concerns about job loss and privacy.
# <|im_end|>
# <|im_start|>user
# Summarize the main idea in the following text:
# Few-shot prompting is a technique used to guide large language models (LLMs), like GPT-3, towards generating desired outputs by providing them with a few examples of input-output pairs. While few-shot prompting has shown promising results, there are limitations to this approach. This method allows for in-context learning by conditioning the model using examples, guiding it to produce better responses.<|im_end|><|im_assistant|>
# """

response = openai.Completion.create(
# model="text-davinci-003",
model="ctransformers",
prompt=prompt,
max_tokens=700,
temperature=0.3,
stream=True, # this time, we set stream=True,
)

for event in response:
print(event.choices[0].text, end="", flush=True)
print("\n")
10 changes: 8 additions & 2 deletions leapfrogai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@
EmbeddingsServiceServicer,
EmbeddingsServiceStub,
)
from .generate.generate_pb2 import CompletionRequest, CompletionResponse
from .generate.generate_pb2_grpc import (
from .completion.completion_pb2 import (
CompletionRequest,
CompletionResponse,
CompletionChoice,
CompletionUsage,
CompletionFinishReason,
)
from .completion.completion_pb2_grpc import (
CompletionService,
CompletionServiceServicer,
CompletionServiceStub,
Expand Down
35 changes: 18 additions & 17 deletions leapfrogai/audio/audio_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

67 changes: 37 additions & 30 deletions leapfrogai/audio/audio_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,74 +5,81 @@ from google.protobuf import message as _message
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union

DESCRIPTOR: _descriptor.FileDescriptor

class AudioTask(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = []
TRANSCRIBE: _ClassVar[AudioTask]
TRANSLATE: _ClassVar[AudioTask]
TRANSCRIBE: AudioTask
TRANSLATE: AudioTask

class AudioMetadata(_message.Message):
__slots__ = ["format", "inputlanguage", "prompt", "temperature"]
__slots__ = ["prompt", "temperature", "inputlanguage", "format"]
class AudioFormat(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = []
FORMAT_FIELD_NUMBER: _ClassVar[int]
INPUTLANGUAGE_FIELD_NUMBER: _ClassVar[int]
JSON: _ClassVar[AudioMetadata.AudioFormat]
TEXT: _ClassVar[AudioMetadata.AudioFormat]
SRT: _ClassVar[AudioMetadata.AudioFormat]
VERBOSE_JSON: _ClassVar[AudioMetadata.AudioFormat]
VTT: _ClassVar[AudioMetadata.AudioFormat]
JSON: AudioMetadata.AudioFormat
PROMPT_FIELD_NUMBER: _ClassVar[int]
SRT: AudioMetadata.AudioFormat
TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
TEXT: AudioMetadata.AudioFormat
SRT: AudioMetadata.AudioFormat
VERBOSE_JSON: AudioMetadata.AudioFormat
VTT: AudioMetadata.AudioFormat
format: AudioMetadata.AudioFormat
inputlanguage: str
PROMPT_FIELD_NUMBER: _ClassVar[int]
TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
INPUTLANGUAGE_FIELD_NUMBER: _ClassVar[int]
FORMAT_FIELD_NUMBER: _ClassVar[int]
prompt: str
temperature: float
inputlanguage: str
format: AudioMetadata.AudioFormat
def __init__(self, prompt: _Optional[str] = ..., temperature: _Optional[float] = ..., inputlanguage: _Optional[str] = ..., format: _Optional[_Union[AudioMetadata.AudioFormat, str]] = ...) -> None: ...

class AudioRequest(_message.Message):
__slots__ = ["chunk_data", "metadata"]
CHUNK_DATA_FIELD_NUMBER: _ClassVar[int]
__slots__ = ["metadata", "chunk_data"]
METADATA_FIELD_NUMBER: _ClassVar[int]
chunk_data: bytes
CHUNK_DATA_FIELD_NUMBER: _ClassVar[int]
metadata: AudioMetadata
chunk_data: bytes
def __init__(self, metadata: _Optional[_Union[AudioMetadata, _Mapping]] = ..., chunk_data: _Optional[bytes] = ...) -> None: ...

class AudioResponse(_message.Message):
__slots__ = ["duration", "language", "segments", "task", "text"]
__slots__ = ["task", "language", "duration", "segments", "text"]
class Segment(_message.Message):
__slots__ = ["avg_logprob", "compression_ratio", "end", "id", "no_speech_prob", "seek", "start", "temperature", "text", "tokens", "transient"]
AVG_LOGPROB_FIELD_NUMBER: _ClassVar[int]
COMPRESSION_RATIO_FIELD_NUMBER: _ClassVar[int]
END_FIELD_NUMBER: _ClassVar[int]
__slots__ = ["id", "seek", "start", "end", "text", "tokens", "temperature", "avg_logprob", "compression_ratio", "no_speech_prob", "transient"]
ID_FIELD_NUMBER: _ClassVar[int]
NO_SPEECH_PROB_FIELD_NUMBER: _ClassVar[int]
SEEK_FIELD_NUMBER: _ClassVar[int]
START_FIELD_NUMBER: _ClassVar[int]
TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
END_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
TOKENS_FIELD_NUMBER: _ClassVar[int]
TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
AVG_LOGPROB_FIELD_NUMBER: _ClassVar[int]
COMPRESSION_RATIO_FIELD_NUMBER: _ClassVar[int]
NO_SPEECH_PROB_FIELD_NUMBER: _ClassVar[int]
TRANSIENT_FIELD_NUMBER: _ClassVar[int]
avg_logprob: float
compression_ratio: float
end: float
id: int
no_speech_prob: float
seek: int
start: float
temperature: float
end: float
text: str
tokens: _containers.RepeatedScalarFieldContainer[int]
temperature: float
avg_logprob: float
compression_ratio: float
no_speech_prob: float
transient: bool
def __init__(self, id: _Optional[int] = ..., seek: _Optional[int] = ..., start: _Optional[float] = ..., end: _Optional[float] = ..., text: _Optional[str] = ..., tokens: _Optional[_Iterable[int]] = ..., temperature: _Optional[float] = ..., avg_logprob: _Optional[float] = ..., compression_ratio: _Optional[float] = ..., no_speech_prob: _Optional[float] = ..., transient: bool = ...) -> None: ...
DURATION_FIELD_NUMBER: _ClassVar[int]
TASK_FIELD_NUMBER: _ClassVar[int]
LANGUAGE_FIELD_NUMBER: _ClassVar[int]
DURATION_FIELD_NUMBER: _ClassVar[int]
SEGMENTS_FIELD_NUMBER: _ClassVar[int]
TASK_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
duration: float
task: AudioTask
language: str
duration: float
segments: _containers.RepeatedCompositeFieldContainer[AudioResponse.Segment]
task: AudioTask
text: str
def __init__(self, task: _Optional[_Union[AudioTask, str]] = ..., language: _Optional[str] = ..., duration: _Optional[float] = ..., segments: _Optional[_Iterable[_Union[AudioResponse.Segment, _Mapping]]] = ..., text: _Optional[str] = ...) -> None: ...

class AudioTask(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = []
Loading

0 comments on commit f016708

Please sign in to comment.