Skip to content
This repository has been archived by the owner on Jan 9, 2025. It is now read-only.

Commit

Permalink
feat(artifact): improve artifact component (#289)
Browse files Browse the repository at this point in the history
Because

- ins-5822
  - we want users to be able to upload file by base64 encoded raw text

- ins-5825
  - we want uses to be able to upload multiple files with 1 trigger

- other
  - CI keeps failing because of timeout

This commit

- ins-5822
  - judge the file type from file name rather than the file extension

- ins-5825
  - add the task upload files in artifact component

- other
  - increase golangci-lint timeout length
  • Loading branch information
chuang8511 authored Aug 15, 2024
1 parent c0d8d31 commit 44ea196
Show file tree
Hide file tree
Showing 10 changed files with 568 additions and 19 deletions.
1 change: 1 addition & 0 deletions .github/workflows/golangci-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ jobs:
uses: golangci/golangci-lint-action@v3
with:
version: v1.54
args: --timeout=5m
25 changes: 24 additions & 1 deletion data/artifact/v0/README.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The Artifact component is a data component that allows users to manipulate and s
It can carry out the following tasks:

- [Upload File](#upload-file)
- [Upload Files](#upload-files)
- [Get Files Metadata](#get-files-metadata)
- [Get Chunks Metadata](#get-chunks-metadata)
- [Get File In Markdown](#get-file-in-markdown)
Expand Down Expand Up @@ -51,14 +52,36 @@ Upload and process the files into chunks into Catalog

| Output | ID | Type | Description |
| :--- | :--- | :--- | :--- |
| File | `file` | object | Result of uploading files into catalog |
| File | `file` | object | Result of uploading file into catalog |
| Status | `status` | boolean | The status of trigger file processing, if succeeded, return true |






### Upload Files

Upload and process the files into chunks into Catalog


| Input | ID | Type | Description |
| :--- | :--- | :--- | :--- |
| Task ID (required) | `task` | string | `TASK_UPLOAD_FILES` |
| Options (required) | `options` | object | Choose to upload the files to existing catalog or create a new catalog |



| Output | ID | Type | Description |
| :--- | :--- | :--- | :--- |
| Files | `files` | array[object] | Files metadata in catalog |
| Status | `status` | boolean | The status of trigger file processing, if ALL succeeded, return true |






### Get Files Metadata

get the metadata of the files in the catalog
Expand Down
140 changes: 130 additions & 10 deletions data/artifact/v0/artifact_operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package artifact
import (
"context"
"fmt"
"log"
"strings"
"time"

Expand All @@ -13,7 +14,7 @@ import (
"google.golang.org/protobuf/types/known/structpb"
)

type UploadFilesInput struct {
type UploadFileInput struct {
Options UploadData `json:"options"`
}

Expand All @@ -28,11 +29,11 @@ type UploadData struct {
Tags []string `json:"tags"`
}

func (input *UploadFilesInput) isNewCatalog() bool {
func (input *UploadFileInput) isNewCatalog() bool {
return input.Options.Option == "create new catalog"
}

type UploadFilesOutput struct {
type UploadFileOutput struct {
File FileOutput `json:"file"`
Status bool `json:"status"`
}
Expand All @@ -51,9 +52,9 @@ type Connection interface {
Close() error
}

func (e *execution) uploadFiles(input *structpb.Struct) (*structpb.Struct, error) {
func (e *execution) uploadFile(input *structpb.Struct) (*structpb.Struct, error) {

inputStruct := UploadFilesInput{}
inputStruct := UploadFileInput{}

err := base.ConvertFromStructpb(input, &inputStruct)
if err != nil {
Expand All @@ -77,22 +78,27 @@ func (e *execution) uploadFiles(input *structpb.Struct) (*structpb.Struct, error
})

if err != nil {
return nil, fmt.Errorf("failed to create new catalog: %w", err)
if strings.Contains(err.Error(), "knowledge base name already exists") {
log.Println("Catalog already exists, skipping creation")
} else {
return nil, fmt.Errorf("failed to create new catalog: %w", err)
}
}
}

output := UploadFilesOutput{
output := UploadFileOutput{
File: FileOutput{},
}
file := inputStruct.Options.File

contentType, err := util.GetContentTypeFromBase64(file)
fileType, err := util.GetFileType(file, inputStruct.Options.FileName)
if err != nil {
return nil, fmt.Errorf("failed to get content type: %w", err)
return nil, fmt.Errorf("failed to get file type: %w", err)
}
typeString := "FILE_TYPE_" + strings.ToUpper(fileType)

typeString := "FILE_TYPE_" + strings.ToUpper(util.TransformContentTypeToFileExtension(contentType))
content := util.GetFileBase64Content(file)

typePB := artifactPB.FileType_value[typeString]
filePB := &artifactPB.File{
Name: inputStruct.Options.FileName,
Expand Down Expand Up @@ -133,6 +139,120 @@ func (e *execution) uploadFiles(input *structpb.Struct) (*structpb.Struct, error
return base.ConvertToStructpb(output)
}

type UploadFilesInput struct {
Options UploadMultipleData `json:"options"`
}

type UploadMultipleData struct {
Option string `json:"option"`
Namespace string `json:"namespace"`
CatalogID string `json:"catalog-id"`
// Base64 encoded file content
Files []string `json:"files"`
FileNames []string `json:"file-names"`
Description string `json:"description"`
Tags []string `json:"tags"`
}

type UploadFilesOutput struct {
Files []FileOutput `json:"files"`
Status bool `json:"status"`
}

func (e *execution) uploadFiles(input *structpb.Struct) (*structpb.Struct, error) {

inputStruct := UploadFilesInput{}

err := base.ConvertFromStructpb(input, &inputStruct)
if err != nil {
return nil, fmt.Errorf("failed to convert input to struct: %w", err)
}

if len(inputStruct.Options.Files) != len(inputStruct.Options.FileNames) {
return nil, fmt.Errorf("number of files and file names do not match")
}

artifactClient, connection := e.client, e.connection
defer connection.Close()

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()

ctx = metadata.NewOutgoingContext(ctx, getRequestMetadata(e.SystemVariables))

if inputStruct.Options.Option == "create new catalog" {
_, err = artifactClient.CreateCatalog(ctx, &artifactPB.CreateCatalogRequest{
NamespaceId: inputStruct.Options.Namespace,
Name: inputStruct.Options.CatalogID,
Description: inputStruct.Options.Description,
Tags: inputStruct.Options.Tags,
})

if err != nil {
if strings.Contains(err.Error(), "knowledge base name already exists") {
log.Println("Catalog already exists, skipping creation")
} else {
return nil, fmt.Errorf("failed to create new catalog: %w", err)
}
}
}

output := UploadFilesOutput{
Files: []FileOutput{},
}

fileUIDs := []string{}
for i, file := range inputStruct.Options.Files {
fileType, err := util.GetFileType(file, inputStruct.Options.FileNames[i])
if err != nil {
return nil, fmt.Errorf("failed to get file type: %w", err)
}
typeString := "FILE_TYPE_" + strings.ToUpper(fileType)

content := util.GetFileBase64Content(file)

typePB := artifactPB.FileType_value[typeString]
filePB := &artifactPB.File{
Name: inputStruct.Options.FileNames[i],
Type: artifactPB.FileType(typePB),
Content: content,
}
uploadRes, err := artifactClient.UploadCatalogFile(ctx, &artifactPB.UploadCatalogFileRequest{
NamespaceId: inputStruct.Options.Namespace,
CatalogId: inputStruct.Options.CatalogID,
File: filePB,
})

if err != nil {
return nil, fmt.Errorf("failed to upload file: %w", err)
}

uploadedFilePB := uploadRes.File

fileUIDs = append(fileUIDs, uploadedFilePB.FileUid)

output.Files = append(output.Files, FileOutput{
FileUID: uploadedFilePB.FileUid,
FileName: uploadedFilePB.Name,
FileType: artifactPB.FileType_name[int32(uploadedFilePB.Type)],
CreateTime: uploadedFilePB.CreateTime.AsTime().Format(time.RFC3339),
UpdateTime: uploadedFilePB.UpdateTime.AsTime().Format(time.RFC3339),
Size: uploadedFilePB.Size,
CatalogID: inputStruct.Options.CatalogID,
})
}

_, err = artifactClient.ProcessCatalogFiles(ctx, &artifactPB.ProcessCatalogFilesRequest{
FileUids: fileUIDs,
})

if err == nil {
output.Status = true
}

return base.ConvertToStructpb(output)
}

type GetFilesMetadataInput struct {
Namespace string `json:"namespace"`
CatalogID string `json:"catalog-id"`
Expand Down
1 change: 1 addition & 0 deletions data/artifact/v0/config/definition.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"availableTasks": [
"TASK_UPLOAD_FILE",
"TASK_UPLOAD_FILES",
"TASK_GET_FILES_METADATA",
"TASK_GET_CHUNKS_METADATA",
"TASK_GET_FILE_IN_MARKDOWN",
Expand Down
Loading

0 comments on commit 44ea196

Please sign in to comment.