Skip to content

Commit

Permalink
Add Vectorstore instrumentation automation (#1279)
Browse files Browse the repository at this point in the history
* Tweak instrumentation & add automation script

* Remove commented out code

* Move script and add description

* Remove breakpoint

---------

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
Co-authored-by: Timothy Pansino <11214426+TimPansino@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 29, 2025
1 parent 82918c5 commit fb171c0
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 13 deletions.
11 changes: 4 additions & 7 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2162,13 +2162,10 @@ def _process_module_builtin_defaults():
"newrelic.hooks.mlmodel_langchain",
"instrument_langchain_callbacks_manager",
)

# VectorStores with similarity_search method
_process_module_definition(
"langchain_community.vectorstores.docarray.hnsw",
"newrelic.hooks.mlmodel_langchain",
"instrument_langchain_vectorstore_similarity_search",
)
_process_module_definition(
"langchain_community.vectorstores.docarray.in_memory",
"langchain_community.vectorstores.docarray",
"newrelic.hooks.mlmodel_langchain",
"instrument_langchain_vectorstore_similarity_search",
)
Expand All @@ -2178,7 +2175,7 @@ def _process_module_builtin_defaults():
"instrument_langchain_vectorstore_similarity_search",
)
_process_module_definition(
"langchain_community.vectorstores.redis.base",
"langchain_community.vectorstores.redis",
"newrelic.hooks.mlmodel_langchain",
"instrument_langchain_vectorstore_similarity_search",
)
Expand Down
8 changes: 3 additions & 5 deletions newrelic/hooks/mlmodel_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@
"langchain_community.vectorstores.documentdb": "DocumentDBVectorSearch",
"langchain_community.vectorstores.duckdb": "DuckDB",
"langchain_community.vectorstores.ecloud_vector_search": "EcloudESVectorStore",
"langchain_community.vectorstores.elastic_vector_search": "ElasticVectorSearch",
# "langchain_community.vectorstores.elastic_vector_search": "ElasticKnnSearch", # Deprecated
"langchain_community.vectorstores.elastic_vector_search": ["ElasticVectorSearch", "ElasticKnnSearch"],
"langchain_community.vectorstores.elasticsearch": "ElasticsearchStore",
"langchain_community.vectorstores.epsilla": "Epsilla",
"langchain_community.vectorstores.faiss": "FAISS",
Expand Down Expand Up @@ -93,7 +92,7 @@
"langchain_community.vectorstores.pgvector": "PGVector",
"langchain_community.vectorstores.pinecone": "Pinecone",
"langchain_community.vectorstores.qdrant": "Qdrant",
"langchain_community.vectorstores.redis.base": "Redis",
"langchain_community.vectorstores.redis": "Redis",
"langchain_community.vectorstores.relyt": "Relyt",
"langchain_community.vectorstores.rocksetdb": "Rockset",
"langchain_community.vectorstores.scann": "ScaNN",
Expand Down Expand Up @@ -126,8 +125,7 @@
"langchain_community.vectorstores.yellowbrick": "Yellowbrick",
"langchain_community.vectorstores.zep_cloud": "ZepCloudVectorStore",
"langchain_community.vectorstores.zep": "ZepVectorStore",
"langchain_community.vectorstores.docarray.hnsw": "DocArrayHnswSearch",
"langchain_community.vectorstores.docarray.in_memory": "DocArrayInMemorySearch",
"langchain_community.vectorstores.docarray": ["DocArrayHnswSearch", "DocArrayInMemorySearch"],
}


Expand Down
100 changes: 100 additions & 0 deletions tests/mlmodel_langchain/new_vectorstore_adder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
This script is used to automatically add new vectorstore classes to the newrelic-python-agent.
To run this script, start from the root of the newrelic-python-agent repository and run:
`python tests/mlmodel_langchain/new_vectorstore_adder.py`
This will generate the necessary code to instrument the new vectorstore classes in the local
copy of the newrelic-python-agent repository.
"""

import os

from langchain_community import vectorstores

from newrelic.hooks.mlmodel_langchain import VECTORSTORE_CLASSES

dir_path = os.path.dirname(os.path.realpath(__file__))
test_dir = os.path.abspath(os.path.join(dir_path, os.pardir))
REPO_PATH = os.path.abspath(os.path.join(test_dir, os.pardir))


def add_to_config(directory, instrumented_class=None):
# Only implement this if there is not an instrumented class within the directory already.
if instrumented_class:
return

with open(f"{REPO_PATH}/newrelic/config.py", "r+") as file:
text = file.read()
text = text.replace(
"VectorStores with similarity_search method",
"VectorStores with similarity_search method\n "
+ "_process_module_definition(\n "
+ f'"{directory}",\n '
+ '"newrelic.hooks.mlmodel_langchain",\n '
+ '"instrument_langchain_vectorstore_similarity_search",\n '
+ ")\n",
1,
)
file.seek(0)
file.write(text)


def add_to_hooks(class_name, directory, instrumented_class=None):
with open(f"{REPO_PATH}/newrelic/hooks/mlmodel_langchain.py", "r+") as file:
text = file.read()

# The directory does not exist yet. Add the new directory and class name to the beginning of the dictionary
if not instrumented_class:
text = text.replace(
"VECTORSTORE_CLASSES = {", "VECTORSTORE_CLASSES = {\n " + f'"{directory}": "{class_name}",', 1
)

# The directory exists, and there are multiple instrumented classes in it. Append to the list.
elif isinstance(instrumented_class, list):
original_list = str(instrumented_class).replace("'", '"')
instrumented_class.append(class_name)
instrumented_class = str(instrumented_class).replace("'", '"')
text = text.replace(
f'"{directory}": {original_list}', f'"{directory}": {instrumented_class}' # TODO: NOT WORKING
)

# The directory exists, but it only has one class. We need to convert this to a list.
else:
text = text.replace(f'"{instrumented_class}"', f'["{instrumented_class}", "{class_name}"]', 1)

file.seek(0)
file.write(text)


def main():
_test_vectorstore_modules_instrumented_ignored_classes = set(
[
"VectorStore", # Base class
"Zilliz", # Inherited from Milvus, which we are already instrumenting.
]
)

vector_store_class_directory = vectorstores._module_lookup
for class_name, directory in vector_store_class_directory.items():
class_ = getattr(vectorstores, class_name)
instrumented_class = VECTORSTORE_CLASSES.get(directory, None)

if (
not hasattr(class_, "similarity_search")
or class_name in _test_vectorstore_modules_instrumented_ignored_classes
):
continue

if not instrumented_class or class_name not in instrumented_class:
if class_name in vector_store_class_directory:
uninstrumented_directory = vector_store_class_directory[class_name]

# Add in newrelic/config.py if there is not an instrumented directory
# Otherwise, config already exists, so no need to duplicate it.
add_to_config(uninstrumented_directory, instrumented_class)

# Add in newrelic/hooks/mlmodel_langchain.py
add_to_hooks(class_name, uninstrumented_directory, instrumented_class)


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion tests/mlmodel_langchain/test_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def vectorstore_events_sans_content(event):
_test_vectorstore_modules_instrumented_ignored_classes = set(
[
"VectorStore", # Base class
"ElasticKnnSearch", # Deprecated, so we will not be instrumenting this.
"Zilliz", # Inherited from Milvus, which we are already instrumenting.
]
)

Expand Down

0 comments on commit fb171c0

Please sign in to comment.