-
Notifications
You must be signed in to change notification settings - Fork 161
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Morpheus modules implementation for DFP (Azure, Duo) pipeline training and preprocessing stages - Added modules utility - Moved `column_info.py` to `morpheus.utils` - Added tests - Updated digital fingerprinting production example README.md - Added dask and distributed packages - Updated DFP production examples notebooks Authors: - Bhargav Suryadevara (https://github.com/bsuryadevara) - https://github.com/bsuryadev Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: #510
- Loading branch information
1 parent
3a13a03
commit 83479a5
Showing
41 changed files
with
4,021 additions
and
318 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ dependencies: | |
- boto3 | ||
- dask | ||
- dill | ||
- distributed | ||
- kfp | ||
- librdkafka | ||
- mlflow>1.29.0,<2 | ||
|
13 changes: 13 additions & 0 deletions
13
examples/digital_fingerprinting/production/morpheus/dfp/modules/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Copyright (c) 2022-2023, NVIDIA CORPORATION. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. |
82 changes: 82 additions & 0 deletions
82
examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_data_prep.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# Copyright (c) 2022-2023, NVIDIA CORPORATION. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import logging | ||
import pickle | ||
import time | ||
|
||
import mrc | ||
from mrc.core import operators as ops | ||
|
||
from morpheus.utils.column_info import process_dataframe | ||
from morpheus.utils.module_ids import MODULE_NAMESPACE | ||
from morpheus.utils.module_utils import get_module_config | ||
from morpheus.utils.module_utils import register_module | ||
|
||
from ..messages.multi_dfp_message import MultiDFPMessage | ||
from ..utils.module_ids import DFP_DATA_PREP | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@register_module(DFP_DATA_PREP, MODULE_NAMESPACE) | ||
def dfp_data_prep(builder: mrc.Builder): | ||
""" | ||
This module function prepares data for either inference or model training. | ||
Parameters | ||
---------- | ||
builder : mrc.Builder | ||
Pipeline budler instance. | ||
""" | ||
|
||
config = get_module_config(DFP_DATA_PREP, builder) | ||
|
||
schema_config = config.get("schema", None) | ||
schema_str = schema_config.get("schema_str", None) | ||
encoding = schema_config.get("encoding", None) | ||
timestamp_column_name = config.get("timestamp_column_name", None) | ||
|
||
schema = pickle.loads(bytes(schema_str, encoding)) | ||
|
||
def process_features(message: MultiDFPMessage): | ||
if (message is None): | ||
return None | ||
|
||
start_time = time.time() | ||
|
||
# Process the columns | ||
df_processed = process_dataframe(message.get_meta_dataframe(), schema) | ||
|
||
# Apply the new dataframe, only the rows in the offset | ||
message.set_meta_dataframe(list(df_processed.columns), df_processed) | ||
|
||
if logger.isEnabledFor(logging.DEBUG): | ||
duration = (time.time() - start_time) * 1000.0 | ||
|
||
logger.debug("Preprocessed %s data for logs in %s to %s in %s ms", | ||
message.mess_count, | ||
message.get_meta(timestamp_column_name).min(), | ||
message.get_meta(timestamp_column_name).max(), | ||
duration) | ||
|
||
return message | ||
|
||
def node_fn(obs: mrc.Observable, sub: mrc.Subscriber): | ||
obs.pipe(ops.map(process_features)).subscribe(sub) | ||
|
||
node = builder.make_node_full(DFP_DATA_PREP, node_fn) | ||
|
||
builder.register_module_input("input", node) | ||
builder.register_module_output("output", node) |
58 changes: 58 additions & 0 deletions
58
examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_model_train_deploy.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# Copyright (c) 2022-2023, NVIDIA CORPORATION. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import logging | ||
|
||
import dfp.modules.dfp_training # noqa: F401 | ||
import mrc | ||
|
||
import morpheus.modules.mlflow_model_writer # noqa: F401 | ||
from morpheus.utils.module_ids import MLFLOW_MODEL_WRITER | ||
from morpheus.utils.module_ids import MODULE_NAMESPACE | ||
from morpheus.utils.module_utils import get_module_config | ||
from morpheus.utils.module_utils import load_module | ||
from morpheus.utils.module_utils import register_module | ||
|
||
from ..utils.module_ids import DFP_MODEL_TRAIN_DEPLOY | ||
from ..utils.module_ids import DFP_TRAINING | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@register_module(DFP_MODEL_TRAIN_DEPLOY, MODULE_NAMESPACE) | ||
def dfp_model_train_deploy(builder: mrc.Builder): | ||
""" | ||
This module function allows for the consolidation of multiple dfp training and mlflow model deployment modules into | ||
a single module. | ||
Parameters | ||
---------- | ||
builder : mrc.Builder | ||
Pipeline budler instance. | ||
""" | ||
|
||
config = get_module_config(DFP_MODEL_TRAIN_DEPLOY, builder) | ||
|
||
dfp_training_conf = config.get(DFP_TRAINING, None) | ||
mlflow_model_writer_conf = config.get(MLFLOW_MODEL_WRITER, None) | ||
|
||
dfp_training_module = load_module(dfp_training_conf, builder=builder) | ||
mlflow_model_writer_module = load_module(mlflow_model_writer_conf, builder=builder) | ||
|
||
# Make an edge between the modules. | ||
builder.make_edge(dfp_training_module.output_port("output"), mlflow_model_writer_module.input_port("input")) | ||
|
||
# Register input and output port for a module. | ||
builder.register_module_input("input", dfp_training_module.input_port("input")) | ||
builder.register_module_output("output", mlflow_model_writer_module.output_port("output")) |
Oops, something went wrong.