From c85116da1878e2c23fa43a3d882179399c5f0d39 Mon Sep 17 00:00:00 2001 From: Ishaan Sehgal Date: Mon, 24 Feb 2025 12:20:54 -0800 Subject: [PATCH 1/2] fix: Huggingface Request Format (#895) **Reason for Change**: Switch to use prompt format as we do single-turn query https://huggingface.co/docs/text-generation-inference/en/reference/api_reference Signed-off-by: Ishaan Sehgal --- presets/ragengine/inference/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/presets/ragengine/inference/inference.py b/presets/ragengine/inference/inference.py index aa745db35..e556cafbe 100644 --- a/presets/ragengine/inference/inference.py +++ b/presets/ragengine/inference/inference.py @@ -89,8 +89,8 @@ async def _async_openai_complete(self, prompt: str, **kwargs: Any) -> Completion return await OpenAI(api_key=LLM_ACCESS_SECRET, **kwargs).acomplete(prompt) async def _async_huggingface_remote_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: - return await self._async_post_request({"messages": [{"role": "user", "content": prompt}]}, headers={"Authorization": f"Bearer {LLM_ACCESS_SECRET}"}) - + data = {"prompt": prompt, **kwargs} + return await self._async_post_request(data, headers={"Authorization": f"Bearer {LLM_ACCESS_SECRET}", "Content-Type": "application/json"}) async def _async_custom_api_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: model_name, model_max_len = self._get_default_model_info() if kwargs.get("model"): From 45e6882749b357172bee7686f5721f9fa325dc23 Mon Sep 17 00:00:00 2001 From: Ishaan Sehgal Date: Mon, 24 Feb 2025 12:21:16 -0800 Subject: [PATCH 2/2] fix: Excluding Metadata to LLM during Response Synthesis & Updating default top_k (#896) **Reason for Change**: https://docs.llamaindex.ai/en/stable/module_guides/loading/documents_and_nodes/usage_documents/ "Typically, a document might have many metadata keys, but you might not want all of them visible to the LLM during response synthesis." --------- Signed-off-by: Ishaan Sehgal --- presets/ragengine/models.py | 4 ++-- presets/ragengine/vector_store/base.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/presets/ragengine/models.py b/presets/ragengine/models.py index 454fd8bbb..32f9f769e 100644 --- a/presets/ragengine/models.py +++ b/presets/ragengine/models.py @@ -28,7 +28,7 @@ class IndexRequest(BaseModel): class QueryRequest(BaseModel): index_name: str query: str - top_k: int = 10 + top_k: int = 5 # Accept a dictionary for our LLM parameters llm_params: Optional[Dict[str, Any]] = Field( default_factory=dict, @@ -70,4 +70,4 @@ class QueryResponse(BaseModel): class HealthStatus(BaseModel): status: str - detail: Optional[str] = None \ No newline at end of file + detail: Optional[str] = None diff --git a/presets/ragengine/vector_store/base.py b/presets/ragengine/vector_store/base.py index 90963cd21..efd42226f 100644 --- a/presets/ragengine/vector_store/base.py +++ b/presets/ragengine/vector_store/base.py @@ -191,7 +191,7 @@ async def add_document_to_index(self, index_name: str, document: Document, doc_i """Common logic for adding a single document.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") - llama_doc = LlamaDocument(id_=doc_id, text=document.text, metadata=document.metadata) + llama_doc = LlamaDocument(id_=doc_id, text=document.text, metadata=document.metadata, excluded_llm_metadata_keys=[key for key in document.metadata.keys()]) if self.use_rwlock: async with self.rwlock.writer_lock: