Skip to content

Commit

Permalink
Merge branch 'main' into testcontainers-go
Browse files Browse the repository at this point in the history
  • Loading branch information
mdelapenya authored Feb 13, 2024
2 parents 14871d2 + fcd78c9 commit 452e631
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 16 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Socktastic

Here are some links to blog posts and articles on using Langchain Go:

- [Using Gemini models in Go with LangChainGo](https://eli.thegreenplace.net/2024/using-gemini-models-in-go-with-langchaingo/) - Jan 2024
- [Using Ollama with LangChainGo](https://eli.thegreenplace.net/2023/using-ollama-with-langchaingo/) - Nov 2023
- [Creating a simple ChatGPT clone with Go](https://sausheong.com/creating-a-simple-chatgpt-clone-with-go-c40b4bec9267?sk=53a2bcf4ce3b0cfae1a4c26897c0deb0) - Aug 2023
- [Creating a ChatGPT Clone that Runs on Your Laptop with Go](https://sausheong.com/creating-a-chatgpt-clone-that-runs-on-your-laptop-with-go-bf9d41f1cf88?sk=05dc67b60fdac6effb1aca84dd2d654e) - Aug 2023
25 changes: 13 additions & 12 deletions textsplitter/recursive_character.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,11 @@ func (s RecursiveCharacter) SplitText(text string) ([]string, error) {

// Find the appropriate separator
separator := s.Separators[len(s.Separators)-1]
for _, s := range s.Separators {
if s == "" {
separator = s
break
}

if strings.Contains(text, s) {
separator = s
newSeparators := []string{}
for i, c := range s.Separators {
if c == "" || strings.Contains(text, c) {
separator = c
newSeparators = s.Separators[i+1:]
break
}
}
Expand All @@ -65,11 +62,15 @@ func (s RecursiveCharacter) SplitText(text string) ([]string, error) {
goodSplits = make([]string, 0)
}

otherInfo, err := s.SplitText(split)
if err != nil {
return nil, err
if len(newSeparators) == 0 {
finalChunks = append(finalChunks, split)
} else {
otherInfo, err := s.SplitText(split)
if err != nil {
return nil, err
}
finalChunks = append(finalChunks, otherInfo...)
}
finalChunks = append(finalChunks, otherInfo...)
}

if len(goodSplits) > 0 {
Expand Down
15 changes: 11 additions & 4 deletions textsplitter/recursive_character_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,32 @@ import (
"github.com/tmc/langchaingo/schema"
)

//nolint:dupword
//nolint:dupword,funlen
func TestRecursiveCharacterSplitter(t *testing.T) {
t.Parallel()
type testCase struct {
text string
chunkOverlap int
chunkSize int
separators []string
expectedDocs []schema.Document
}
testCases := []testCase{
{
text: "Hi.\nI'm Harrison.\n\nHow?\na\nb",
text: "Hi, Harrison. \nI am glad to meet you",
chunkOverlap: 1,
chunkSize: 20,
separators: []string{"\n", "$"},
expectedDocs: []schema.Document{
{PageContent: "Hi.\nI'm Harrison.", Metadata: map[string]any{}},
{PageContent: "How?\na\nb", Metadata: map[string]any{}},
{PageContent: "Hi, Harrison.", Metadata: map[string]any{}},
{PageContent: "I am glad to meet you", Metadata: map[string]any{}},
},
},
{
text: "Hi.\nI'm Harrison.\n\nHow?\na\nbHi.\nI'm Harrison.\n\nHow?\na\nb",
chunkOverlap: 1,
chunkSize: 40,
separators: []string{"\n\n", "\n", " ", ""},
expectedDocs: []schema.Document{
{PageContent: "Hi.\nI'm Harrison.", Metadata: map[string]any{}},
{PageContent: "How?\na\nbHi.\nI'm Harrison.\n\nHow?\na\nb", Metadata: map[string]any{}},
Expand All @@ -40,6 +43,7 @@ func TestRecursiveCharacterSplitter(t *testing.T) {
text: "name: Harrison\nage: 30",
chunkOverlap: 1,
chunkSize: 40,
separators: []string{"\n\n", "\n", " ", ""},
expectedDocs: []schema.Document{
{PageContent: "name: Harrison\nage: 30", Metadata: map[string]any{}},
},
Expand All @@ -52,6 +56,7 @@ name: Joe
age: 32`,
chunkOverlap: 1,
chunkSize: 40,
separators: []string{"\n\n", "\n", " ", ""},
expectedDocs: []schema.Document{
{PageContent: "name: Harrison\nage: 30", Metadata: map[string]any{}},
{PageContent: "name: Joe\nage: 32", Metadata: map[string]any{}},
Expand All @@ -70,6 +75,7 @@ Bye!
-H.`,
chunkOverlap: 1,
chunkSize: 10,
separators: []string{"\n\n", "\n", " ", ""},
expectedDocs: []schema.Document{
{PageContent: "Hi.", Metadata: map[string]any{}},
{PageContent: "I'm", Metadata: map[string]any{}},
Expand All @@ -95,6 +101,7 @@ Bye!
for _, tc := range testCases {
splitter.ChunkOverlap = tc.chunkOverlap
splitter.ChunkSize = tc.chunkSize
splitter.Separators = tc.separators

docs, err := CreateDocuments(splitter, []string{tc.text}, nil)
require.NoError(t, err)
Expand Down

0 comments on commit 452e631

Please sign in to comment.