Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cybertron: add example for cybertron embeddings #543

Merged
merged 1 commit into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ cover.cov
vendor/*

embeddings/cybertron/models/*
examples/cybertron-embedding-example/models/*
140 changes: 140 additions & 0 deletions examples/cybertron-embedding-example/cybertron-embedding.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package main

import (
"context"
"fmt"
"log"
"os"
"strings"

"github.com/chewxy/math32"
"github.com/google/uuid"
"github.com/tmc/langchaingo/embeddings"
"github.com/tmc/langchaingo/embeddings/cybertron"
"github.com/tmc/langchaingo/schema"
"github.com/tmc/langchaingo/vectorstores"
"github.com/tmc/langchaingo/vectorstores/weaviate"
)

func cosineSimilarity(x, y []float32) float32 {
if len(x) != len(y) {
log.Fatal("x and y have different lengths")
}

var dot, nx, ny float32

for i := range x {
nx += x[i] * x[i]
ny += y[i] * y[i]
dot += x[i] * y[i]
}

return dot / (math32.Sqrt(nx) * math32.Sqrt(ny))
}

func randomIndexName() string {
return "Test" + strings.ReplaceAll(uuid.New().String(), "-", "")
}

func exampleInMemory(ctx context.Context, emb embeddings.Embedder) {
// We're going to create embeddings for the following strings, then calculate the similarity
// between them using cosine-simularity.
docs := []string{
"tokyo",
"japan",
"potato",
}

vecs, err := emb.EmbedDocuments(ctx, docs)
if err != nil {
log.Fatal("embed query", err)
}

fmt.Println("Similarities:")

for i := range docs {
for j := range docs {
fmt.Printf("%6s ~ %6s = %0.2f\n", docs[i], docs[j], cosineSimilarity(vecs[i], vecs[j]))
}
}
}

func exampleWeaviate(ctx context.Context, emb embeddings.Embedder) {
scheme := os.Getenv("WEAVIATE_SCHEME")
host := os.Getenv("WEAVIATE_HOST")

if scheme == "" || host == "" {
log.Print("Set WEAVIATE_HOST and WEAVIATE_SCHEME to run the weaviate example")

return
}

// Create a new Weaviate vector store with the Cybertron Embedder to generate embeddings.
store, err := weaviate.New(
weaviate.WithEmbedder(emb),
weaviate.WithScheme(scheme),
weaviate.WithHost(host),
weaviate.WithIndexName(randomIndexName()),
)
if err != nil {
log.Fatal("create weaviate store", err)
}

// Add some documents to the vector store. This will use the Cybertron Embedder to create
// embeddings for the documents.
_, err = store.AddDocuments(ctx, []schema.Document{
{PageContent: "tokyo"},
{PageContent: "japan"},
{PageContent: "potato"},
})
if err != nil {
log.Fatal("add documents", err)
}

// Perform a similarity search, returning at most three results with similarity scores of
// at least 0.8. This again uses the Cybertron Embedder to create an embedding for the
// search query.
matches, err := store.SimilaritySearch(ctx, "japan", 3,
vectorstores.WithScoreThreshold(0.8),
)
if err != nil {
log.Fatal("similarity search", err)
}

fmt.Println("Matches:")
for _, match := range matches {
fmt.Printf(" japan ~ %6s = %0.2f\n", match.PageContent, match.Score)
}
}

func main() {
ctx := context.Background()

// Create an embedder client that uses the "BAAI/bge-small-en-v1.5" model and caches it in
// the "models" directory. Cybertron will automatically download the model from HuggingFace
// and convert it when needed.
//
// Note that not all models are supported and that Cybertron executes the model locally on
// the CPU, so larger models will be quite slow!
emc, err := cybertron.NewCybertron(
cybertron.WithModelsDir("models"),
cybertron.WithModel("BAAI/bge-small-en-v1.5"),
)
if err != nil {
log.Fatal("create embedder client", err)
}

// Create an embedder from the previously created client.
emb, err := embeddings.NewEmbedder(emc,
embeddings.WithStripNewLines(false),
)
if err != nil {
log.Fatal("create embedder", err)
}

// Example: use the Embedder to do an in-memory comparison between some documents.
exampleInMemory(ctx, emb)

// Example: use the Embedder together with a Vector Store.
exampleWeaviate(ctx, emb)
}
51 changes: 51 additions & 0 deletions examples/cybertron-embedding-example/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
module github.com/tmtsmc/langchaingo/examples/cybertron-embedding-example

go 1.21

require (
github.com/chewxy/math32 v1.10.1
github.com/google/uuid v1.4.0
github.com/tmc/langchaingo v0.1.4-0.20240123022810-6f20ee5cf8df
)

require (
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d // indirect
github.com/dlclark/regexp2 v1.8.1 // indirect
github.com/go-openapi/analysis v0.21.2 // indirect
github.com/go-openapi/errors v0.20.3 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.19.6 // indirect
github.com/go-openapi/loads v0.21.1 // indirect
github.com/go-openapi/spec v0.20.4 // indirect
github.com/go-openapi/strfmt v0.21.3 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/go-openapi/validate v0.21.0 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/flatbuffers v23.5.26+incompatible // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/nlpodyssey/cybertron v0.2.1 // indirect
github.com/nlpodyssey/gopickle v0.2.0 // indirect
github.com/nlpodyssey/gotokenizers v0.2.0 // indirect
github.com/nlpodyssey/spago v1.1.0 // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/pkoukk/tiktoken-go v0.1.2 // indirect
github.com/rs/zerolog v1.31.0 // indirect
github.com/weaviate/weaviate v1.19.13 // indirect
github.com/weaviate/weaviate-go-client/v4 v4.8.1 // indirect
go.mongodb.org/mongo-driver v1.11.3 // indirect
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect
golang.org/x/net v0.19.0 // indirect
golang.org/x/oauth2 v0.15.0 // indirect
golang.org/x/sync v0.5.0 // indirect
golang.org/x/sys v0.15.0 // indirect
golang.org/x/text v0.14.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading
Loading