Skip to content

Commit

Permalink
Merge pull request #31 from Firdous2307/implement-document-chunking
Browse files Browse the repository at this point in the history
Implement document chunking
  • Loading branch information
Sammybams authored Oct 16, 2024
2 parents 7d61b3d + 89e9e36 commit 6221d18
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions src/rag_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from werkzeug.utils import secure_filename
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Setting up logging
logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -54,6 +55,16 @@ def file_check_num(uploaded_file):
return -1



def chunk_document(text, chunk_size=1000, chunk_overlap=200):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
)
chunks = text_splitter.split_text(text)
return chunks

def extract_contents_from_doc(files, temp_dir):
"""
Azure Document Intelligence
Expand Down

0 comments on commit 6221d18

Please sign in to comment.