feat(mistral): add Mistral AI component (#204)

Because - We want to integrate Mistral AI's models into our VDP pipeline platform. This commit - Added the Mistral Component, which supports the following tasks: (a) TASK_TEXT_GENERATION_CHAT models: [open-mixtral-8x22b, open-mixtral-8x7b, open-mistral-7b, mistral-large-latest, mistral-small-latest] (b) TASK_TEXT_EMBEDDINGS Note that the mistral-medium model will soon be deprecated ,according to this [documentation](https://docs.mistral.ai/getting-started/models/), thus it is better to skip it. ![Screenshot from 2024-07-09 14-13-08](https://github.com/instill-ai/component/assets/54308739/7013a36a-0d0f-4b64-8c4a-37510c8ff9d2) --------- Co-authored-by: Chang, Hui-Tang <huitang.chang@instill.tech>
instill-ai · Jul 15, 2024 · 12aaf4f · 12aaf4f
1 parent cba4aac
commit 12aaf4f
Show file tree

Hide file tree

Showing 12 changed files with 1,071 additions and 0 deletions.
diff --git a/ai/mistralai/v0/README.mdx b/ai/mistralai/v0/README.mdx
@@ -0,0 +1,97 @@
+---
+title: "Mistral"
+lang: "en-US"
+draft: false
+description: "Learn about how to set up a VDP Mistral component https://github.com/instill-ai/instill-core"
+---
+
+The Mistral component is an AI component that allows users to connect the AI models served on the Mistral Platform.
+It can carry out the following tasks:
+
+- [Text Generation Chat](#text-generation-chat)
+- [Text Embeddings](#text-embeddings)
+
+
+
+## Release Stage
+
+`Alpha`
+
+
+
+## Configuration
+
+The component configuration is defined and maintained [here](https://github.com/instill-ai/component/blob/main/ai/mistral/v0/config/definition.json).
+
+
+
+
+## Setup
+
+
+| Field | Field ID | Type | Note |
+| :--- | :--- | :--- | :--- |
+| API Key (required) | `api-key` | string | Fill in your Mistral API key. To find your keys, visit the Mistral platform page. |
+
+
+
+
+## Supported Tasks
+
+### Text Generation Chat
+
+Provide text outputs in response to text inputs.
+
+
+| Input | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| Task ID (required) | `task` | string | `TASK_TEXT_GENERATION_CHAT` |
+| Model Name (required) | `model-name` | string | The Mistral model to be used |
+| Prompt (required) | `prompt` | string | The prompt text |
+| System message | `system-message` | string | The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. By default, the model’s behavior is set using a generic message as "You are a helpful assistant." |
+| Prompt Images | `prompt-images` | array[string] | The prompt images (Note: The Mistral models are not trained to process images, thus images will be omitted) |
+| Chat history | `chat-history` | array[object] | Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : \{"role": "The message role, i.e. 'system', 'user' or 'assistant'", "content": "message content"\} |
+| Seed | `seed` | integer | The seed |
+| Temperature | `temperature` | number | The temperature for sampling |
+| Top K | `top-k` | integer | Integer to define the top tokens considered within the sample operation to create new text (Note: The Mistral models does not support top-k sampling) |
+| Max new tokens | `max-new-tokens` | integer | The maximum number of tokens for model to generate |
+| Top P | `top-p` | number | Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top-p (default=0.5) |
+| Safe | `safe` | boolean | Safe generation mode |
+
+
+
+| Output | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| Text | `text` | string | Model Output |
+| Usage (optional) | `usage` | object | Token usage on the Mistral platform text generation models |
+
+
+
+
+
+
+### Text Embeddings
+
+Turn text into a vector of numbers that capture its meaning, unlocking use cases like semantic search.
+
+
+| Input | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| Task ID (required) | `task` | string | `TASK_TEXT_EMBEDDINGS` |
+| Embedding Type (required) | `embedding-type` | string | Specifies the return type of embedding. |
+| Model Name (required) | `model-name` | string | The Mistral embed model to be used |
+| Text (required) | `text` | string | The text |
+
+
+
+| Output | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| Embedding | `embedding` | array[number] | Embedding of the input text |
+| Usage (optional) | `usage` | object | Token usage on the Mistral platform embedding models |
+
+
+
+
+
+
+
diff --git a/ai/mistralai/v0/assets/mistral.svg b/ai/mistralai/v0/assets/mistral.svg
diff --git a/ai/mistralai/v0/client.go b/ai/mistralai/v0/client.go
@@ -0,0 +1,26 @@
+package mistralai
+
+import (
+	mistralSDK "github.com/gage-technologies/mistral-go"
+	"go.uber.org/zap"
+	"google.golang.org/protobuf/types/known/structpb"
+)
+
+type MistralClient struct {
+	sdkClient mistralClientInterface
+	logger    *zap.Logger
+}
+
+type mistralClientInterface interface {
+	Embeddings(model string, input []string) (*mistralSDK.EmbeddingResponse, error)
+	Chat(model string, messages []mistralSDK.ChatMessage, params *mistralSDK.ChatRequestParams) (*mistralSDK.ChatCompletionResponse, error)
+}
+
+func newClient(apiKey string, logger *zap.Logger) MistralClient {
+	client := mistralSDK.NewMistralClientDefault(apiKey)
+	return MistralClient{sdkClient: client, logger: logger}
+}
+
+func getAPIKey(setup *structpb.Struct) string {
+	return setup.GetFields()[cfgAPIKey].GetStringValue()
+}
diff --git a/ai/mistralai/v0/component_test.go b/ai/mistralai/v0/component_test.go
@@ -0,0 +1,167 @@
+package mistralai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"testing"
+
+	qt "github.com/frankban/quicktest"
+	mistralSDK "github.com/gage-technologies/mistral-go"
+	"github.com/instill-ai/component/base"
+	"go.uber.org/zap"
+	"google.golang.org/protobuf/types/known/structpb"
+)
+
+type MockMistralClient struct {
+}
+
+func (m *MockMistralClient) Embeddings(model string, input []string) (*mistralSDK.EmbeddingResponse, error) {
+	return &mistralSDK.EmbeddingResponse{
+		ID:     "embd-aad6fc62b17349b192ef09225058bc45",
+		Object: "list",
+		Data: []mistralSDK.EmbeddingObject{
+			{
+				Object:    "embedding",
+				Embedding: []float64{1.0, 2.0, 3.0},
+				Index:     0,
+			},
+		},
+		Model: model,
+		Usage: mistralSDK.UsageInfo{
+			PromptTokens: len(input[0]),
+			TotalTokens:  len(input[0]),
+		},
+	}, nil
+}
+
+func (m *MockMistralClient) Chat(model string, messages []mistralSDK.ChatMessage, params *mistralSDK.ChatRequestParams) (*mistralSDK.ChatCompletionResponse, error) {
+	message := fmt.Sprintf("Hello Mistral! message count: %d", len(messages))
+	promptToken := 0
+	for _, m := range messages {
+		promptToken += len(m.Content)
+	}
+	return &mistralSDK.ChatCompletionResponse{
+		ID:      "cmpl-e5cc70bb28c444948073e77776eb30ef",
+		Object:  "chat.completion",
+		Created: 1702256327,
+		Model:   model,
+		Usage: mistralSDK.UsageInfo{
+			PromptTokens:     promptToken,
+			CompletionTokens: len(message),
+			TotalTokens:      promptToken + len(message),
+		},
+		Choices: []mistralSDK.ChatCompletionResponseChoice{
+			{
+				Index: 0,
+				Message: mistralSDK.ChatMessage{
+					Role:    "assistant",
+					Content: message,
+				},
+				FinishReason: mistralSDK.FinishReasonStop,
+			},
+		},
+	}, nil
+}
+
+const (
+	apiKey = "### MOCK API KEY ###"
+)
+
+func TestComponent_Execute(t *testing.T) {
+	c := qt.New(t)
+
+	bc := base.Component{Logger: zap.NewNop()}
+	connector := Init(bc)
+
+	c.Run("ok - supported task", func(c *qt.C) {
+		task := TextGenerationTask
+
+		_, err := connector.CreateExecution(nil, nil, task)
+		c.Check(err, qt.IsNil)
+	})
+	c.Run("ok - supported task", func(c *qt.C) {
+		task := TextEmbeddingTask
+
+		_, err := connector.CreateExecution(nil, nil, task)
+		c.Check(err, qt.IsNil)
+	})
+
+	c.Run("nok - unsupported task", func(c *qt.C) {
+		task := "FOOBAR"
+
+		_, err := connector.CreateExecution(nil, nil, task)
+		c.Check(err, qt.ErrorMatches, "unsupported task")
+	})
+}
+
+func TestComponent_Tasks(t *testing.T) {
+	c := qt.New(t)
+
+	bc := base.Component{Logger: zap.NewNop()}
+	connector := Init(bc)
+	ctx := context.Background()
+
+	chatTc := struct {
+		input    map[string]any
+		wantResp textGenerationOutput
+	}{
+		input:    map[string]any{"model-name": "open-mixtral-8x22b", "prompt": "Hello World"},
+		wantResp: textGenerationOutput{Text: "Hello Mistral! message count: 1", Usage: chatUsage{InputTokens: len("Hello World"), OutputTokens: len("Hello Mistral! message count: 1")}},
+	}
+
+	c.Run("ok - task text generation", func(c *qt.C) {
+		setup, err := structpb.NewStruct(map[string]any{
+			"api-key": apiKey,
+		})
+		c.Assert(err, qt.IsNil)
+		e := &execution{
+			ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TextGenerationTask},
+			client:             MistralClient{sdkClient: &MockMistralClient{}, logger: nil},
+		}
+		e.execute = e.taskTextGeneration
+		exec := &base.ExecutionWrapper{Execution: e}
+
+		pbIn, err := base.ConvertToStructpb(chatTc.input)
+		c.Assert(err, qt.IsNil)
+
+		got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn})
+		c.Assert(err, qt.IsNil)
+
+		wantJSON, err := json.Marshal(chatTc.wantResp)
+		c.Assert(err, qt.IsNil)
+		c.Check(wantJSON, qt.JSONEquals, got[0].AsMap())
+	})
+
+	embeddingTc := struct {
+		input    map[string]any
+		wantResp textEmbeddingOutput
+	}{
+		input:    map[string]any{"model-name": "mistral-embed", "text": "Hello World"},
+		wantResp: textEmbeddingOutput{Embedding: []float64{1.0, 2.0, 3.0}, Usage: textEmbeddingUsage{Tokens: len("Hello World")}},
+	}
+
+	c.Run("ok - task embedding", func(c *qt.C) {
+		setup, err := structpb.NewStruct(map[string]any{
+			"api-key": apiKey,
+		})
+		c.Assert(err, qt.IsNil)
+		e := &execution{
+			ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TextEmbeddingTask},
+			client:             MistralClient{sdkClient: &MockMistralClient{}, logger: nil},
+		}
+		e.execute = e.taskTextEmbedding
+		exec := &base.ExecutionWrapper{Execution: e}
+
+		pbIn, err := base.ConvertToStructpb(embeddingTc.input)
+		c.Assert(err, qt.IsNil)
+
+		got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn})
+		c.Assert(err, qt.IsNil)
+
+		wantJSON, err := json.Marshal(embeddingTc.wantResp)
+		c.Assert(err, qt.IsNil)
+		c.Check(wantJSON, qt.JSONEquals, got[0].AsMap())
+	})
+
+}
diff --git a/ai/mistralai/v0/config/definition.json b/ai/mistralai/v0/config/definition.json
@@ -0,0 +1,20 @@
+{
+  "availableTasks": [
+    "TASK_TEXT_GENERATION_CHAT",
+    "TASK_TEXT_EMBEDDINGS"
+  ],
+  "custom": false,
+  "documentationUrl": "https://www.instill.tech/docs/component/ai/mistralai",
+  "icon": "assets/mistral-ai.svg",
+  "id": "mistral-ai",
+  "public": true,
+  "title": "Mistral AI",
+  "description": "Connect the AI models served on the Mistral AI Platform",
+  "type": "COMPONENT_TYPE_AI",
+  "uid": "5e349d27-b00d-4961-86a3-249c30c06073",
+  "vendor": "Mistral AI",
+  "vendorAttributes": {},
+  "version": "0.1.0",
+  "sourceUrl": "https://github.com/instill-ai/component/blob/main/ai/mistralai/v0",
+  "releaseStage": "RELEASE_STAGE_ALPHA"
+}
diff --git a/ai/mistralai/v0/config/setup.json b/ai/mistralai/v0/config/setup.json
@@ -0,0 +1,28 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "additionalProperties": true,
+  "properties": {
+    "api-key": {
+      "description": "Fill in your Mistral API key. To find your keys, visit the Mistral AI platform page.",
+      "instillUpstreamTypes": [
+        "reference"
+      ],
+      "instillAcceptFormats": [
+        "string"
+      ],
+      "instillSecret": true,
+      "instillCredential": true,
+      "instillUIOrder": 0,
+      "title": "API Key",
+      "type": "string"
+    }
+  },
+  "required": [
+    "api-key"
+  ],
+  "instillEditOnNodeFields": [
+    "api-key"
+  ],
+  "title": "Mistral AI Connection",
+  "type": "object"
+}