Skip to content
This repository has been archived by the owner on Jan 9, 2025. It is now read-only.

Commit

Permalink
fix(text): bug of infinity loop (#331)
Browse files Browse the repository at this point in the history
Because

- there is infinity loop

This commit

- set the protection and fix the bugs
  • Loading branch information
chuang8511 authored Sep 12, 2024
1 parent 620df7c commit 4387219
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions operator/text/v0/markdown_splitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,13 @@ func (sp MarkdownTextSplitter) processChunks(lists []List) []ContentChunk {
shouldOverlapPreviousList := false

addListCount := 0
countI := map[int]int{}
for i := 0; i < len(lists); i++ {
countI[i] = 0
}

for i := 0; i < len(lists); i++ {
countI[i]++
list := lists[i]

// Add the title
Expand Down Expand Up @@ -271,7 +277,8 @@ func (sp MarkdownTextSplitter) processChunks(lists []List) []ContentChunk {
i--
addListCount = 0
} else if overlapType == "last chunk final list" {
if i > 1 {
// countI[i] < 10 is a protection against infinite loop. A list item should not be split more than 5 times.
if i > 1 && countI[i] < 5 {
i -= 2
} else {
i--
Expand Down Expand Up @@ -328,7 +335,9 @@ func (sp MarkdownTextSplitter) overlapType(lists []List, i int) string {
return "no overlap"
}

if sizeEnough {
isInfinityLoop := (i > 0 && sizeOfString(lists[i-1].Text)+sizeOfString((lists[i].Text)) > sp.ChunkSize)

if sizeEnough && !isInfinityLoop {
return "last chunk final list"
} else {
return "no overlap"
Expand Down

0 comments on commit 4387219

Please sign in to comment.