Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NIM saved views and logs pipeline #19113

Merged
merged 15 commits into from
Dec 11, 2024
47 changes: 47 additions & 0 deletions nvidia_nim/assets/logs/nvidia_nim.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
id: nvidia_nim
metric_id: nvidia-nim
backend_only: false
facets:
pipeline:
type: pipeline
name: 'NVIDIA NIM'
enabled: true
filter:
query: source:nvidia_nim
processors:
- type: grok-parser
name: Parse timestamp, level, logger, and message
enabled: true
source: message
samples:
- "2024-10-30 21:56:25,295 [INFO] PyTorch version 2.3.1 available."
- "2024-10-30 21:58:26,914 [WARNING] [TRT-LLM] [W] Logger level already set from environment. Discard new verbosity: error"
- "INFO 2024-10-30 21:56:28.831 ngc_injector.py:152] Valid profile: e45b4b991bbc51d0df3ce53e87060fc3a7f76555406ed534a8479c6faa706987 (tensorrt_llm-a10g-bf16-tp4-latency) on GPUs [0, 1, 2, 3]"
- "WARNING 2024-10-30 21:58:27.670 arg_utils.py:775] Chunked prefill is enabled by default for models with max_model_len > 32K. Currently, chunked prefill might not work with some features or models. If you encounter any issues, please disable chunked prefill by setting --enable-chunked-prefill=False."
- "[1730325496.647520] [dd317ab0670e:126 :0] parser.c:2305 UCX WARN (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning)"
grok:
matchRules: |
nvidia_nim %{date("yyyy-MM-dd HH:mm:ss,SSS"):timestamp} \[%{_level}\] \[%{notSpace:component_name}\] \[%{word}\] %{_msg}
nvidia_nim_logger %{_level} %{date("yyyy-MM-dd HH:mm:ss.SSS"):timestamp} %{_logger_name}:%{_logger_line}\] %{_msg}
generic_log %{date("yyyy-MM-dd HH:mm:ss,SSS"):timestamp} \[%{_level}\] %{_msg}
componont_log \[%{number:timestamp}\]\W+\[%{notSpace:container_id}:%{number:pid}\W+:%{number:thread_id}\W+%{_logger_name}:%{_logger_line} %{word:component_name}\W+%{_level}\W+\(%{_msg}\)
supportRules: |
_logger_line %{notSpace:logger.line}
_logger_name %{notSpace:logger.name}
_level %{word:level}
_msg %{data:msg}
- type: message-remapper
name: Define `msg` as the official message of the log
enabled: true
sources:
- msg
- type: date-remapper
name: Define `timestamp` as the official date of the log
enabled: true
sources:
- timestamp
- type: status-remapper
name: Define `level` as the official status of the log
enabled: true
sources:
- level
58 changes: 58 additions & 0 deletions nvidia_nim/assets/logs/nvidia_nim_tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
id: "nvidia_nim"
tests:
# This log sample satisfies the validation.
-
sample: |-
2024-10-30 21:56:25,295 [INFO] PyTorch version 2.3.1 available.
result:
custom:
level: "INFO"
timestamp: 1730325385295
message: "PyTorch version 2.3.1 available."
status: "info"
tags:
- "source:LOGS_SOURCE"
timestamp: 1730325385295
-
sample: |-
2024-10-30 21:58:26,914 [WARNING] [TRT-LLM] [W] Logger level already set from environment. Discard new verbosity: error
result:
custom:
level: "WARNING"
timestamp: 1730325506914
component_name: "TRT-LLM"
message: "Logger level already set from environment. Discard new verbosity: error"
status: "warn"
tags:
- "source:LOGS_SOURCE"
timestamp: 1730325506914
-
sample: |-
INFO 2024-10-30 21:56:28.831 ngc_injector.py:152] Valid profile: e45b4b991bbc51d0df3ce53e87060fc3a7f76555406ed534a8479c6faa706987 (tensorrt_llm-a10g-bf16-tp4-latency) on GPUs [0, 1, 2, 3]
result:
custom:
level: "INFO"
timestamp: 1730325388831
logger:
line: "152"
name: "ngc_injector.py"
message: "Valid profile: e45b4b991bbc51d0df3ce53e87060fc3a7f76555406ed534a8479c6faa706987 (tensorrt_llm-a10g-bf16-tp4-latency) on GPUs [0, 1, 2, 3]"
status: "info"
tags:
- "source:LOGS_SOURCE"
timestamp: 1730325388831
-
sample: |-
WARNING 2024-10-30 21:58:27.670 arg_utils.py:775] Chunked prefill is enabled by default for models with max_model_len > 32K. Currently, chunked prefill might not work with some features or models. If you encounter any issues, please disable chunked prefill by setting --enable-chunked-prefill=False.
result:
custom:
level: "WARNING"
timestamp: 1730325507670
logger:
line: "775"
name: "arg_utils.py"
message: "Chunked prefill is enabled by default for models with max_model_len > 32K. Currently, chunked prefill might not work with some features or models. If you encounter any issues, please disable chunked prefill by setting --enable-chunked-prefill=False."
status: "warn"
tags:
- "source:LOGS_SOURCE"
timestamp: 1730325507670
20 changes: 20 additions & 0 deletions nvidia_nim/assets/saved_views/nim_errors.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"name": "NVIDIA NIM Errors",
"options": {
"columns": [
"host",
"service"
],
"message_display": "inline",
"show_date_column": true,
"show_message_column": true,
"show_timeline": true
},
"page": "stream",
"query": "source:nvidia_nim status:error",
"timerange": {
"interval_ms": 900000
},
"type": "logs",
"visible_facets": []
}
3 changes: 3 additions & 0 deletions nvidia_nim/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
},
"monitors": {
"Average Request Latency is High": "assets/monitors/latency.json"
},
"saved_views": {
"NVIDIA NIM Errors": "assets/saved_views/nim_errors.json"
}
},
"author": {
Expand Down
Loading