Skip to content

Commit

Permalink
refactor: use init_splitting_data with cache annotation (#75)
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanorosanelli authored Oct 15, 2024
1 parent 3b99188 commit 3ff8c84
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 6 deletions.
12 changes: 11 additions & 1 deletion brevia/index.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Index document with embeddings in vector database."""
from functools import lru_cache
from os import path
from logging import getLogger
from warnings import warn
from langchain_community.vectorstores.pgembedding import CollectionStore
from langchain_community.vectorstores.pgembedding import EmbeddingStore
from langchain_community.vectorstores.pgvector import PGVector
Expand All @@ -19,9 +21,16 @@

def init_index():
"""Init index data"""
warn("init_index deprecated, use init_splitting_data instead", DeprecationWarning)
init_splitting_data()


@lru_cache
def init_splitting_data() -> bool:
"""Init splitting tools data (NLTK for now)"""
try:
import nltk # pylint: disable=import-outside-toplevel
nltk.download('punkt')
return nltk.download('punkt')

except ImportError as exc:
raise ImportError(
Expand Down Expand Up @@ -92,6 +101,7 @@ def split_document(
document: Document, collection_meta: dict = {}
) -> list[Document]:
""" Split document into text chunks and return a list of documents"""
init_splitting_data()
text_splitter = create_splitter(collection_meta)
texts = text_splitter.split_documents([document])
counter = 1
Expand Down
3 changes: 0 additions & 3 deletions brevia/routers/app_routers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Add brevia app routers."""
from fastapi import FastAPI
from brevia import index
from brevia.routers import (
analyze_router,
index_router,
Expand All @@ -25,5 +24,3 @@ def add_routers(app: FastAPI) -> None:
app.include_router(qa_router.router)
app.include_router(status_router.router)
app.include_router(completion_router.router)

index.init_index()
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
from alembic import command
from alembic.config import Config
from dotenv import dotenv_values
from brevia.index import init_index
from brevia.index import init_splitting_data
from brevia.settings import get_settings


def pytest_sessionstart(session):
"""Init index data, just once"""
return init_index()
return init_splitting_data()


def update_settings():
Expand Down

0 comments on commit 3ff8c84

Please sign in to comment.