Skip to content

Commit

Permalink
Revert "First pass at optionally truncating long string values"
Browse files Browse the repository at this point in the history
This reverts commit 8bde832.
  • Loading branch information
dagardner-nv committed Apr 22, 2024
1 parent d310123 commit c39b845
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 28 deletions.
8 changes: 2 additions & 6 deletions morpheus/modules/output/write_to_vector_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,7 @@ def on_completed():
try:
if accum_stats.data:
merged_df = cudf.concat(accum_stats.data)

service.insert_dataframe(name=key, df=merged_df, truncate_long_strings=True)
service.insert_dataframe(name=key, df=merged_df)
final_df_references.append(accum_stats.data)
except Exception as e:
logger.error("Unable to upload dataframe entries to vector database: %s", e)
Expand Down Expand Up @@ -214,10 +213,7 @@ def on_data(msg: typing.Union[ControlMessage, MultiResponseMessage, MultiMessage
merged_df = cudf.concat(accum_stats.data)

# pylint: disable=not-a-mapping
service.insert_dataframe(name=key,
df=merged_df,
truncate_long_strings=True,
**resource_kwargs)
service.insert_dataframe(name=key, df=merged_df, **resource_kwargs)
# Reset accumulator stats
accum_stats.data.clear()
accum_stats.last_insert_time = current_time
Expand Down
23 changes: 1 addition & 22 deletions morpheus/service/vdb/milvus_vector_db_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@
IMPORT_EXCEPTION = None
IMPORT_ERROR_MESSAGE = "MilvusVectorDBResourceService requires the milvus and pymilvus packages to be installed."

# https://milvus.io/docs/limitations.md#Length-of-a-string
MAX_STRING_LENGTH = 65535

try:
import pymilvus
from pymilvus.orm.mutation import MutationResult
Expand Down Expand Up @@ -278,19 +275,14 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any])

return self._insert_result_to_dict(result=result)

def insert_dataframe(self,
df: typing.Union[cudf.DataFrame, pd.DataFrame],
truncate_long_strings: bool = False,
**kwargs: dict[str, typing.Any]) -> dict:
def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs: dict[str, typing.Any]) -> dict:
"""
Insert a dataframe entires into the vector database.
Parameters
----------
df : typing.Union[cudf.DataFrame, pd.DataFrame]
Dataframe to be inserted into the collection.
truncate_long_strings : bool, optional
When true, truncate strings values that are longer than the max length supported by Milvus (65535).
**kwargs : dict[str, typing.Any]
Extra keyword arguments specific to the vector database implementation.
Expand All @@ -303,19 +295,6 @@ def insert_dataframe(self,
if isinstance(df, cudf.DataFrame):
df = df.to_pandas()

if truncate_long_strings:
for col in df:
str_series = df[col]
if str_series.dtype == "object":
max_len = str_series.str.len().max()
if max_len > MAX_STRING_LENGTH:
logger.warning(("Column '%s' has a string length of %d, larger than the max of %d"
"supported by Milvus, truncating"),
col,
max_len,
MAX_STRING_LENGTH)
df[col] = str_series.str.slice(0, MAX_STRING_LENGTH)

# Ensure that there are no None values in the DataFrame entries.
for field_name, dtype in self._fillna_fields_dict.items():
if dtype in (pymilvus.DataType.VARCHAR, pymilvus.DataType.STRING):
Expand Down

0 comments on commit c39b845

Please sign in to comment.