Merge pull request #132 from defenseunicorns/completions-endpoint

feat: renamed generate endpoint to completion
defenseunicorns · Jun 30, 2023 · f016708 · f016708
2 parents 6cd940c + 610daf4
commit f016708
Show file tree

Hide file tree

Showing 42 changed files with 1,418 additions and 1,008 deletions.
diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml
@@ -18,5 +18,5 @@ jobs:
       run: make api base repeater
     - name: install deps
       run: pip install -r models/test/repeater/requirements.txt
-    - name: Simple Test
-      run: make test-init test teardown
+    # - name: Simple Test
+    #   run: make test-init test teardown
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,7 @@ __pycache__
 .vscode/
 venv
 .DS_Store
-dist/
+dist/
+
+# Go binaries
+main
diff --git a/Makefile b/Makefile
@@ -58,7 +58,7 @@ gen: gen-go gen-python
 
 
 gen-python:
-	python3 -m grpc_tools.protoc --proto_path=proto/ generate/generate.proto --python_out=leapfrogai  --pyi_out=leapfrogai --grpc_python_out=leapfrogai
+	python3 -m grpc_tools.protoc --proto_path=proto/ completion/completion.proto --python_out=leapfrogai  --pyi_out=leapfrogai --grpc_python_out=leapfrogai
 	python3 -m grpc_tools.protoc --proto_path=proto audio/audio.proto --python_out=leapfrogai  --pyi_out=leapfrogai --grpc_python_out=leapfrogai
 	python3 -m grpc_tools.protoc --proto_path=proto embeddings/embeddings.proto --python_out=leapfrogai  --pyi_out=leapfrogai --grpc_python_out=leapfrogai
 	python3 -m grpc_tools.protoc --proto_path=proto name/name.proto --python_out=leapfrogai  --pyi_out=leapfrogai --grpc_python_out=leapfrogai
@@ -67,7 +67,7 @@ gen-python:
 gen-go:
 	rm -rf pkg/client
 	mkdir -p pkg/client
-	protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ generate/generate.proto
+	protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ completion/completion.proto
 	protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ audio/audio.proto
 	protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ name/name.proto
 	protoc --go_out=pkg/client --go_opt=paths=source_relative --go-grpc_out=pkg/client --go-grpc_opt=paths=source_relative --proto_path=proto/ embeddings/embeddings.proto

diff --git a/api/backends/openai/openai.go b/api/backends/openai/openai.go
@@ -13,8 +13,9 @@ import (
 
 	"github.com/defenseunicorns/leapfrogai/api/config"
 	"github.com/defenseunicorns/leapfrogai/pkg/client/audio"
+	"github.com/defenseunicorns/leapfrogai/pkg/client/completion"
 	embedding "github.com/defenseunicorns/leapfrogai/pkg/client/embeddings"
-	"github.com/defenseunicorns/leapfrogai/pkg/client/generate"
+	"github.com/defenseunicorns/leapfrogai/pkg/util"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 	"github.com/sashabaranov/go-openai"
@@ -315,12 +316,12 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
 	id, _ := uuid.NewRandom()
 
 	if input.Stream {
-		chanStream := make(chan *generate.CompletionResponse, 10)
-		client := generate.NewCompletionStreamServiceClient(conn)
-		stream, err := client.CompleteStream(context.Background(), &generate.CompletionRequest{
-			Prompt:      input.Prompt.(string),
-			MaxTokens:   int32(input.MaxTokens),
-			Temperature: input.Temperature,
+		chanStream := make(chan *completion.CompletionResponse, 10)
+		client := completion.NewCompletionStreamServiceClient(conn)
+		stream, err := client.CompleteStream(context.Background(), &completion.CompletionRequest{
+			Prompt:       input.Prompt.(string),
+			MaxNewTokens: util.Int32(int32(input.MaxTokens)),
+			Temperature:  util.Float32(input.Temperature),
 		})
 
 		if err != nil {
@@ -350,7 +351,7 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
 					Choices: []openai.CompletionChoice{
 						{
 							Index: 0,
-							Text:  msg.GetCompletion(),
+							Text:  msg.GetChoices()[0].GetText(),
 						},
 					},
 				})
@@ -370,7 +371,7 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
 			logit[k] = int32(v)
 		}
 
-		client := generate.NewCompletionServiceClient(conn)
+		client := completion.NewCompletionServiceClient(conn)
 
 		if input.N == 0 {
 			input.N = 1
@@ -384,29 +385,28 @@ func (o *OpenAIHandler) complete(c *gin.Context) {
 
 		for i := 0; i < input.N; i++ {
 			// Implement the completion logic here, using the data from `input`
-			response, err := client.Complete(c.Request.Context(), &generate.CompletionRequest{
+			response, err := client.Complete(c.Request.Context(), &completion.CompletionRequest{
 				Prompt:           input.Prompt.(string),
-				Suffix:           input.Suffix,
-				MaxTokens:        int32(input.MaxTokens),
-				Temperature:      input.Temperature,
-				TopP:             input.TopP,
-				Stream:           input.Stream,
-				Logprobs:         int32(input.LogProbs),
-				Echo:             input.Echo,
-				Stop:             input.Stop, // Wrong type here...
-				PresencePenalty:  input.PresencePenalty,
-				FrequencePenalty: input.FrequencyPenalty,
-				BestOf:           int32(input.BestOf),
-				LogitBias:        logit, // Wrong type here
+				Suffix:           util.String(input.Suffix),
+				MaxNewTokens:     util.Int32(int32(input.MaxTokens)),
+				Temperature:      util.Float32(input.Temperature),
+				TopP:             util.Float32(input.TopP),
+				Logprobs:         util.Int32(int32(input.LogProbs)),
+				Echo:             util.Bool(input.Echo),
+				Stop:             input.Stop,
+				PresencePenalty:  util.Float32(input.PresencePenalty),
+				FrequencePenalty: util.Float32(input.FrequencyPenalty),
+				BestOf:           util.Int32(int32(input.BestOf)),
+				LogitBias:        logit,
 			})
 			if err != nil {
 				log.Printf("500: Error completing via backend(%v): %v\n", input.Model, err)
 				c.JSON(500, err)
 				return
 			}
 			choice := openai.CompletionChoice{
-				Text:         strings.TrimPrefix(response.GetCompletion(), input.Prompt.(string)),
-				FinishReason: response.GetFinishReason(),
+				Text:         response.Choices[i].GetText(),
+				FinishReason: strings.ToLower(response.Choices[i].GetFinishReason().Enum().String()),
 				Index:        i,
 			}
 			resp.Choices[i] = choice
@@ -436,7 +436,7 @@ func (o *OpenAIHandler) getModelClient(c *gin.Context, model string) *grpc.Clien
 
 // EmbeddingRequest is the input to a Create embeddings request.
 type EmbeddingRequest struct {
-	// Input is a slice of strings for which you want to generate an Embedding vector.
+	// Input is a slice of strings for which you want to completion an Embedding vector.
 	// Each input must not exceed 2048 tokens in length.
 	// OpenAPI suggests replacing newlines (\n) in your input with a single space, as they
 	// have observed inferior results when newlines are present.

diff --git a/docs/Protobuf.md b/docs/Protobuf.md
diff --git a/e2e/test.py b/e2e/test.py
@@ -0,0 +1,38 @@
+import openai
+
+openai.api_base = "http://localhost:8080/openai"
+openai.api_key = "removed"
+
+prompt = """<|im_start|>system
+You are an AI assistant that answers participates in chat discussions in an honest, concise, friendly way.<|im_end|>
+<|im_start|>user
+Write two sequences composed of 3 'A's and 2 'B's such that there are no two successive identical letter. Be consice.<|im_end|>
+<|im_assistant|>
+"""
+
+# prompt = """
+# <|im_start|>system
+# You are an AI assistant that follows instruction extremely well. Your role is to accept some input and summarize it. For example:
+
+# User: Summarize the main idea in the following text:
+# The rapid growth of technology has led to significant advancements in various industries. From communication and transportation to healthcare and education, technology has played a crucial role in improving our lives. However, we must also be cautious of the potential negative effects, such as job loss due to automation and privacy concerns.
+
+# Assistant: Technology's rapid growth has positively impacted various industries but also raises concerns about job loss and privacy.
+# <|im_end|>
+# <|im_start|>user
+# Summarize the main idea in the following text:
+# Few-shot prompting is a technique used to guide large language models (LLMs), like GPT-3, towards generating desired outputs by providing them with a few examples of input-output pairs. While few-shot prompting has shown promising results, there are limitations to this approach. This method allows for in-context learning by conditioning the model using examples, guiding it to produce better responses.<|im_end|><|im_assistant|>
+# """
+
+response = openai.Completion.create(
+    # model="text-davinci-003",
+    model="ctransformers",
+    prompt=prompt,
+    max_tokens=700,
+    temperature=0.3,
+    stream=True,  # this time, we set stream=True,
+)
+
+for event in response:
+    print(event.choices[0].text, end="", flush=True)
+print("\n")
diff --git a/leapfrogai/__init__.py b/leapfrogai/__init__.py
@@ -11,8 +11,14 @@
     EmbeddingsServiceServicer,
     EmbeddingsServiceStub,
 )
-from .generate.generate_pb2 import CompletionRequest, CompletionResponse
-from .generate.generate_pb2_grpc import (
+from .completion.completion_pb2 import (
+    CompletionRequest,
+    CompletionResponse,
+    CompletionChoice,
+    CompletionUsage, 
+    CompletionFinishReason,
+)
+from .completion.completion_pb2_grpc import (
     CompletionService,
     CompletionServiceServicer,
     CompletionServiceStub,

diff --git a/leapfrogai/audio/audio_pb2.py b/leapfrogai/audio/audio_pb2.py
diff --git a/leapfrogai/audio/audio_pb2.pyi b/leapfrogai/audio/audio_pb2.pyi
@@ -5,74 +5,81 @@ from google.protobuf import message as _message
 from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union
 
 DESCRIPTOR: _descriptor.FileDescriptor
+
+class AudioTask(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
+    __slots__ = []
+    TRANSCRIBE: _ClassVar[AudioTask]
+    TRANSLATE: _ClassVar[AudioTask]
 TRANSCRIBE: AudioTask
 TRANSLATE: AudioTask
 
 class AudioMetadata(_message.Message):
-    __slots__ = ["format", "inputlanguage", "prompt", "temperature"]
+    __slots__ = ["prompt", "temperature", "inputlanguage", "format"]
     class AudioFormat(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
         __slots__ = []
-    FORMAT_FIELD_NUMBER: _ClassVar[int]
-    INPUTLANGUAGE_FIELD_NUMBER: _ClassVar[int]
+        JSON: _ClassVar[AudioMetadata.AudioFormat]
+        TEXT: _ClassVar[AudioMetadata.AudioFormat]
+        SRT: _ClassVar[AudioMetadata.AudioFormat]
+        VERBOSE_JSON: _ClassVar[AudioMetadata.AudioFormat]
+        VTT: _ClassVar[AudioMetadata.AudioFormat]
     JSON: AudioMetadata.AudioFormat
-    PROMPT_FIELD_NUMBER: _ClassVar[int]
-    SRT: AudioMetadata.AudioFormat
-    TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
     TEXT: AudioMetadata.AudioFormat
+    SRT: AudioMetadata.AudioFormat
     VERBOSE_JSON: AudioMetadata.AudioFormat
     VTT: AudioMetadata.AudioFormat
-    format: AudioMetadata.AudioFormat
-    inputlanguage: str
+    PROMPT_FIELD_NUMBER: _ClassVar[int]
+    TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
+    INPUTLANGUAGE_FIELD_NUMBER: _ClassVar[int]
+    FORMAT_FIELD_NUMBER: _ClassVar[int]
     prompt: str
     temperature: float
+    inputlanguage: str
+    format: AudioMetadata.AudioFormat
     def __init__(self, prompt: _Optional[str] = ..., temperature: _Optional[float] = ..., inputlanguage: _Optional[str] = ..., format: _Optional[_Union[AudioMetadata.AudioFormat, str]] = ...) -> None: ...
 
 class AudioRequest(_message.Message):
-    __slots__ = ["chunk_data", "metadata"]
-    CHUNK_DATA_FIELD_NUMBER: _ClassVar[int]
+    __slots__ = ["metadata", "chunk_data"]
     METADATA_FIELD_NUMBER: _ClassVar[int]
-    chunk_data: bytes
+    CHUNK_DATA_FIELD_NUMBER: _ClassVar[int]
     metadata: AudioMetadata
+    chunk_data: bytes
     def __init__(self, metadata: _Optional[_Union[AudioMetadata, _Mapping]] = ..., chunk_data: _Optional[bytes] = ...) -> None: ...
 
 class AudioResponse(_message.Message):
-    __slots__ = ["duration", "language", "segments", "task", "text"]
+    __slots__ = ["task", "language", "duration", "segments", "text"]
     class Segment(_message.Message):
-        __slots__ = ["avg_logprob", "compression_ratio", "end", "id", "no_speech_prob", "seek", "start", "temperature", "text", "tokens", "transient"]
-        AVG_LOGPROB_FIELD_NUMBER: _ClassVar[int]
-        COMPRESSION_RATIO_FIELD_NUMBER: _ClassVar[int]
-        END_FIELD_NUMBER: _ClassVar[int]
+        __slots__ = ["id", "seek", "start", "end", "text", "tokens", "temperature", "avg_logprob", "compression_ratio", "no_speech_prob", "transient"]
         ID_FIELD_NUMBER: _ClassVar[int]
-        NO_SPEECH_PROB_FIELD_NUMBER: _ClassVar[int]
         SEEK_FIELD_NUMBER: _ClassVar[int]
         START_FIELD_NUMBER: _ClassVar[int]
-        TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
+        END_FIELD_NUMBER: _ClassVar[int]
         TEXT_FIELD_NUMBER: _ClassVar[int]
         TOKENS_FIELD_NUMBER: _ClassVar[int]
+        TEMPERATURE_FIELD_NUMBER: _ClassVar[int]
+        AVG_LOGPROB_FIELD_NUMBER: _ClassVar[int]
+        COMPRESSION_RATIO_FIELD_NUMBER: _ClassVar[int]
+        NO_SPEECH_PROB_FIELD_NUMBER: _ClassVar[int]
         TRANSIENT_FIELD_NUMBER: _ClassVar[int]
-        avg_logprob: float
-        compression_ratio: float
-        end: float
         id: int
-        no_speech_prob: float
         seek: int
         start: float
-        temperature: float
+        end: float
         text: str
         tokens: _containers.RepeatedScalarFieldContainer[int]
+        temperature: float
+        avg_logprob: float
+        compression_ratio: float
+        no_speech_prob: float
         transient: bool
         def __init__(self, id: _Optional[int] = ..., seek: _Optional[int] = ..., start: _Optional[float] = ..., end: _Optional[float] = ..., text: _Optional[str] = ..., tokens: _Optional[_Iterable[int]] = ..., temperature: _Optional[float] = ..., avg_logprob: _Optional[float] = ..., compression_ratio: _Optional[float] = ..., no_speech_prob: _Optional[float] = ..., transient: bool = ...) -> None: ...
-    DURATION_FIELD_NUMBER: _ClassVar[int]
+    TASK_FIELD_NUMBER: _ClassVar[int]
     LANGUAGE_FIELD_NUMBER: _ClassVar[int]
+    DURATION_FIELD_NUMBER: _ClassVar[int]
     SEGMENTS_FIELD_NUMBER: _ClassVar[int]
-    TASK_FIELD_NUMBER: _ClassVar[int]
     TEXT_FIELD_NUMBER: _ClassVar[int]
-    duration: float
+    task: AudioTask
     language: str
+    duration: float
     segments: _containers.RepeatedCompositeFieldContainer[AudioResponse.Segment]
-    task: AudioTask
     text: str
     def __init__(self, task: _Optional[_Union[AudioTask, str]] = ..., language: _Optional[str] = ..., duration: _Optional[float] = ..., segments: _Optional[_Iterable[_Union[AudioResponse.Segment, _Mapping]]] = ..., text: _Optional[str] = ...) -> None: ...
-
-class AudioTask(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
-    __slots__ = []