Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Vectorstore instrumentation automation #1279

Merged
merged 15 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2162,13 +2162,10 @@ def _process_module_builtin_defaults():
"newrelic.hooks.mlmodel_langchain",
"instrument_langchain_callbacks_manager",
)

# VectorStores with similarity_search method
_process_module_definition(
"langchain_community.vectorstores.docarray.hnsw",
"newrelic.hooks.mlmodel_langchain",
"instrument_langchain_vectorstore_similarity_search",
)
_process_module_definition(
"langchain_community.vectorstores.docarray.in_memory",
"langchain_community.vectorstores.docarray",
"newrelic.hooks.mlmodel_langchain",
"instrument_langchain_vectorstore_similarity_search",
)
Expand All @@ -2178,7 +2175,7 @@ def _process_module_builtin_defaults():
"instrument_langchain_vectorstore_similarity_search",
)
_process_module_definition(
"langchain_community.vectorstores.redis.base",
"langchain_community.vectorstores.redis",
"newrelic.hooks.mlmodel_langchain",
"instrument_langchain_vectorstore_similarity_search",
TimPansino marked this conversation as resolved.
Show resolved Hide resolved
)
Expand Down
8 changes: 3 additions & 5 deletions newrelic/hooks/mlmodel_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@
"langchain_community.vectorstores.documentdb": "DocumentDBVectorSearch",
"langchain_community.vectorstores.duckdb": "DuckDB",
"langchain_community.vectorstores.ecloud_vector_search": "EcloudESVectorStore",
"langchain_community.vectorstores.elastic_vector_search": "ElasticVectorSearch",
# "langchain_community.vectorstores.elastic_vector_search": "ElasticKnnSearch", # Deprecated
"langchain_community.vectorstores.elastic_vector_search": ["ElasticVectorSearch", "ElasticKnnSearch"],
"langchain_community.vectorstores.elasticsearch": "ElasticsearchStore",
"langchain_community.vectorstores.epsilla": "Epsilla",
"langchain_community.vectorstores.faiss": "FAISS",
Expand Down Expand Up @@ -93,7 +92,7 @@
"langchain_community.vectorstores.pgvector": "PGVector",
"langchain_community.vectorstores.pinecone": "Pinecone",
"langchain_community.vectorstores.qdrant": "Qdrant",
"langchain_community.vectorstores.redis.base": "Redis",
"langchain_community.vectorstores.redis": "Redis",
"langchain_community.vectorstores.relyt": "Relyt",
"langchain_community.vectorstores.rocksetdb": "Rockset",
"langchain_community.vectorstores.scann": "ScaNN",
Expand Down Expand Up @@ -126,8 +125,7 @@
"langchain_community.vectorstores.yellowbrick": "Yellowbrick",
"langchain_community.vectorstores.zep_cloud": "ZepCloudVectorStore",
"langchain_community.vectorstores.zep": "ZepVectorStore",
"langchain_community.vectorstores.docarray.hnsw": "DocArrayHnswSearch",
"langchain_community.vectorstores.docarray.in_memory": "DocArrayInMemorySearch",
"langchain_community.vectorstores.docarray": ["DocArrayHnswSearch", "DocArrayInMemorySearch"],
}


Expand Down
100 changes: 100 additions & 0 deletions tests/mlmodel_langchain/new_vectorstore_adder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
This script is used to automatically add new vectorstore classes to the newrelic-python-agent.
To run this script, start from the root of the newrelic-python-agent repository and run:
`python tests/mlmodel_langchain/new_vectorstore_adder.py`
This will generate the necessary code to instrument the new vectorstore classes in the local
copy of the newrelic-python-agent repository.
"""

import os

from langchain_community import vectorstores

from newrelic.hooks.mlmodel_langchain import VECTORSTORE_CLASSES

dir_path = os.path.dirname(os.path.realpath(__file__))
test_dir = os.path.abspath(os.path.join(dir_path, os.pardir))
REPO_PATH = os.path.abspath(os.path.join(test_dir, os.pardir))


def add_to_config(directory, instrumented_class=None):
# Only implement this if there is not an instrumented class within the directory already.
if instrumented_class:
return

with open(f"{REPO_PATH}/newrelic/config.py", "r+") as file:
text = file.read()
text = text.replace(
"VectorStores with similarity_search method",
"VectorStores with similarity_search method\n "
+ "_process_module_definition(\n "
+ f'"{directory}",\n '
+ '"newrelic.hooks.mlmodel_langchain",\n '
+ '"instrument_langchain_vectorstore_similarity_search",\n '
+ ")\n",
1,
)
file.seek(0)
file.write(text)


def add_to_hooks(class_name, directory, instrumented_class=None):
with open(f"{REPO_PATH}/newrelic/hooks/mlmodel_langchain.py", "r+") as file:
text = file.read()

# The directory does not exist yet. Add the new directory and class name to the beginning of the dictionary
if not instrumented_class:
text = text.replace(
"VECTORSTORE_CLASSES = {", "VECTORSTORE_CLASSES = {\n " + f'"{directory}": "{class_name}",', 1
)

# The directory exists, and there are multiple instrumented classes in it. Append to the list.
elif isinstance(instrumented_class, list):
original_list = str(instrumented_class).replace("'", '"')
instrumented_class.append(class_name)
instrumented_class = str(instrumented_class).replace("'", '"')
text = text.replace(
f'"{directory}": {original_list}', f'"{directory}": {instrumented_class}' # TODO: NOT WORKING
)

# The directory exists, but it only has one class. We need to convert this to a list.
else:
text = text.replace(f'"{instrumented_class}"', f'["{instrumented_class}", "{class_name}"]', 1)

file.seek(0)
file.write(text)


def main():
_test_vectorstore_modules_instrumented_ignored_classes = set(
[
"VectorStore", # Base class
"Zilliz", # Inherited from Milvus, which we are already instrumenting.
]
)

vector_store_class_directory = vectorstores._module_lookup
for class_name, directory in vector_store_class_directory.items():
class_ = getattr(vectorstores, class_name)
instrumented_class = VECTORSTORE_CLASSES.get(directory, None)

if (
not hasattr(class_, "similarity_search")
or class_name in _test_vectorstore_modules_instrumented_ignored_classes
):
continue

if not instrumented_class or class_name not in instrumented_class:
if class_name in vector_store_class_directory:
uninstrumented_directory = vector_store_class_directory[class_name]

# Add in newrelic/config.py if there is not an instrumented directory
# Otherwise, config already exists, so no need to duplicate it.
add_to_config(uninstrumented_directory, instrumented_class)

# Add in newrelic/hooks/mlmodel_langchain.py
add_to_hooks(class_name, uninstrumented_directory, instrumented_class)


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion tests/mlmodel_langchain/test_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def vectorstore_events_sans_content(event):
_test_vectorstore_modules_instrumented_ignored_classes = set(
[
"VectorStore", # Base class
"ElasticKnnSearch", # Deprecated, so we will not be instrumenting this.
"Zilliz", # Inherited from Milvus, which we are already instrumenting.
]
)

Expand Down
Loading