-
Notifications
You must be signed in to change notification settings - Fork 70
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This PR adds the initial draft for the RAGEngine CRD in Kaito. A RAGEngine CRD defines all resources needed to run a RAG on top of a LLM inference service. Upon creating a RAGEngine CR, a new controller will create a deployment which runs a RAG engine instance. The instance provides http endpoints for both `index` and `query` services. The instance can optionally choose a public model embedding service or run a local embedding model with GPU to convert the input index data to vectors. The instance can also connect to a Vector DB instance to persist the vectors db or by default using an in-memory vector DB. The instance uses the `llamaIndex` library to orchestrate the workflow. When RAGEngine instance is up and running, users should send questions to the `query` endpoint of RAG instance instead of the normal `chat` endpoint in the inference service. The RAGEngine is intended to be "standalone". It can use any public inference service or inference services hosted by Kaito workspace. The RAG engine instance is designed to help retrieve prompts from unstructured data (arbitrary index data provided by the users). Retrieving from structured data or search engine is out of the scope for now.
- Loading branch information
Showing
7 changed files
with
591 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT license. | ||
|
||
package v1alpha1 | ||
|
||
import ( | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
) | ||
|
||
type StorageSpec struct { | ||
//TODO: add vendor specific APIs for accessing vector DB services here. | ||
} | ||
|
||
type RemoteEmbeddingSpec struct { | ||
// URL points to a publicly available embedding service, such as OpenAI. | ||
URL string `json:"url"` | ||
// AccessSecret is the name of the secret that contains the service access token. | ||
// +optional | ||
AccessSecret string `json:"accessSecret,omitempty"` | ||
} | ||
|
||
type LocalEmbeddingSpec struct { | ||
// Image is the name of the containerized embedding model image. | ||
// +optional | ||
Image string `json:"image,omitempty"` | ||
// +optional | ||
ImagePullSecret string `json:"imagePullSecret,omitempty"` | ||
// ModelID is the ID of the embedding model hosted by huggingface, e.g., BAAI/bge-small-en-v1.5. | ||
// When this field is specified, the RAG engine will download the embedding model | ||
// from huggingface repository during startup. The embedding model will not persist in local storage. | ||
// Note that if Image is specified, ModelID should not be specified and vice versa. | ||
// +optional | ||
ModelID string `json:"modelID,omitempty"` | ||
// ModelAccessSecret is the name of the secret that contains the huggingface access token. | ||
// +optional | ||
ModelAccessSecret string `json:"modelAccessSecret,omitempty"` | ||
} | ||
|
||
type EmbeddingSpec struct { | ||
// Remote specifies how to generate embeddings for index data using a remote service. | ||
// Note that either Remote or Local needs to be specified, not both. | ||
// +optional | ||
Remote *RemoteEmbeddingSpec `json:"remote,omitempty"` | ||
// Local specifies how to generate embeddings for index data using a model run locally. | ||
// +optional | ||
Local *LocalEmbeddingSpec `json:"local,omitempty"` | ||
} | ||
|
||
type InferenceServiceSpec struct { | ||
// URL points to a running inference service endpoint which accepts http(s) payload. | ||
URL string `json:"url"` | ||
// AccessSecret is the name of the secret that contains the service access token. | ||
// +optional | ||
AccessSecret string `json:"accessSecret,omitempty"` | ||
} | ||
|
||
type RAGEngineSpec struct { | ||
// Compute specifies the dedicated GPU resource used by an embedding model running locally if required. | ||
// +optional | ||
Compute *ResourceSpec `json:"compute,omitempty"` | ||
// Storage specifies how to access the vector database used to save the embedding vectors. | ||
// If this field is not specified, by default, an in-memory vector DB will be used. | ||
// The data will not be persisted. | ||
// +optional | ||
Storage *StorageSpec `json:"storage,omitempty"` | ||
// Embedding specifies whether the RAG engine generates embedding vectors using a remote service | ||
// or using a embedding model running locally. | ||
Embedding *EmbeddingSpec `json:"embedding"` | ||
InferenceService *InferenceServiceSpec `json:"inferenceService"` | ||
// QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the | ||
// inference service. If not specified, a default service name will be created by the RAG engine. | ||
// +optional | ||
QueryServiceName string `json:"queryServiceName,omitempty"` | ||
// IndexServiceName is the name of the service which exposes the endpoint for user to input the index data | ||
// to generate embeddings. If not specified, a default service name will be created by the RAG engine. | ||
// +optional | ||
IndexServiceName string `json:"indexServiceName,omitempty"` | ||
} | ||
|
||
// RAGEngineStatus defines the observed state of RAGEngine | ||
type RAGEngineStatus struct { | ||
Conditions []metav1.Condition `json:"conditions,omitempty"` | ||
} | ||
|
||
// RAGEngine is the Schema for the ragengine API | ||
// +kubebuilder:object:root=true | ||
// +kubebuilder:subresource:status | ||
// +kubebuilder:resource:path=ragengines,scope=Namespaced,categories=ragengine | ||
// +kubebuilder:storageversion | ||
type RAGEngine struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ObjectMeta `json:"metadata,omitempty"` | ||
|
||
Spec *RAGEngineSpec `json:"spec,omitempty"` | ||
|
||
Status RAGEngineStatus `json:"status,omitempty"` | ||
} | ||
|
||
// RAGEngineList contains a list of RAGEngine | ||
// +kubebuilder:object:root=true | ||
type RAGEngineList struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ListMeta `json:"metadata,omitempty"` | ||
Items []RAGEngine `json:"items"` | ||
} | ||
|
||
func init() { | ||
SchemeBuilder.Register(&RAGEngine{}, &RAGEngineList{}) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.