-
Notifications
You must be signed in to change notification settings - Fork 161
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ABP nvsmi sample data generation (#1108)
- Add script to ABP nvsmi example for generating sample data - Data generated using script does not contain all the columns used to train the current nvsmi model. Retrain the model using the 18 overlapping columns. - Update model, model config, training notebook/script, feature columns file - Update README with instructions on how to run script Closes #1097 Authors: - Eli Fajardo (https://github.com/efajardo-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) - https://github.com/gbatmaz URL: #1108
- Loading branch information
1 parent
0351ae2
commit e73b03a
Showing
12 changed files
with
131 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import argparse | ||
import time | ||
|
||
import pandas as pd | ||
from pynvml.smi import NVSMI_QUERY_GPU | ||
from pynvml.smi import nvidia_smi | ||
|
||
|
||
def main(): | ||
query_opts = NVSMI_QUERY_GPU.copy() | ||
|
||
# Remove the timestamp and supported clocks from the query | ||
del query_opts["timestamp"] | ||
del query_opts["supported-clocks"] | ||
|
||
nvsmi = nvidia_smi.getInstance() | ||
|
||
with open(args.output_file, "w", encoding="UTF-8") as f: | ||
|
||
while (True): | ||
|
||
device_query = nvsmi.DeviceQuery(list(query_opts.values())) | ||
|
||
output_dicts = [] | ||
|
||
# Flatten the GPUs to allow for a new row per GPU | ||
for gpu in device_query["gpu"]: | ||
single_gpu = device_query.copy() | ||
|
||
# overwrite the gpu list with a single gpu | ||
single_gpu["gpu"] = gpu | ||
|
||
output_dicts.append(single_gpu) | ||
|
||
df = pd.json_normalize(output_dicts, record_prefix="nvidia_smi_log") | ||
|
||
# Rename the id column to match the XML converted output from NetQ | ||
df.rename(columns={"gpu.id": "gpu.@id", "count": "attached_gpus"}, inplace=True) | ||
|
||
df.rename(columns=lambda x: "nvidia_smi_log" + "." + x, inplace=True) | ||
|
||
# Add the current timestamp | ||
df.insert(0, "timestamp", time.time()) | ||
|
||
df.to_json(f, orient="records", lines=True) | ||
|
||
f.flush() | ||
|
||
time.sleep(args.interval_ms / 1000.0) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description=__doc__) | ||
parser.add_argument('--interval-ms', default=1000, help='interval in ms between writes to output file') | ||
parser.add_argument("--output-file", default='nvsmi.jsonlines', help='output file to save dataset') | ||
args = parser.parse_args() | ||
|
||
main() |
This file was deleted.
Oops, something went wrong.
Git LFS file not shown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,18 @@ | ||
nvidia_smi_log.gpu.pci.tx_util | ||
nvidia_smi_log.gpu.pci.rx_util | ||
nvidia_smi_log.gpu.fb_memory_usage.used | ||
nvidia_smi_log.gpu.fb_memory_usage.free | ||
nvidia_smi_log.gpu.bar1_memory_usage.total | ||
nvidia_smi_log.gpu.bar1_memory_usage.used | ||
nvidia_smi_log.gpu.bar1_memory_usage.free | ||
nvidia_smi_log.gpu.utilization.gpu_util | ||
nvidia_smi_log.gpu.utilization.memory_util | ||
nvidia_smi_log.gpu.temperature.gpu_temp | ||
nvidia_smi_log.gpu.temperature.gpu_temp_max_threshold | ||
nvidia_smi_log.gpu.temperature.gpu_temp_slow_threshold | ||
nvidia_smi_log.gpu.temperature.gpu_temp_max_gpu_threshold | ||
nvidia_smi_log.gpu.temperature.memory_temp | ||
nvidia_smi_log.gpu.temperature.gpu_temp_max_mem_threshold | ||
nvidia_smi_log.gpu.power_readings.power_draw | ||
nvidia_smi_log.gpu.clocks.graphics_clock | ||
nvidia_smi_log.gpu.clocks.sm_clock | ||
nvidia_smi_log.gpu.clocks.mem_clock | ||
nvidia_smi_log.gpu.clocks.video_clock | ||
nvidia_smi_log.gpu.applications_clocks.graphics_clock | ||
nvidia_smi_log.gpu.applications_clocks.mem_clock | ||
nvidia_smi_log.gpu.default_applications_clocks.graphics_clock | ||
nvidia_smi_log.gpu.default_applications_clocks.mem_clock | ||
nvidia_smi_log.gpu.max_clocks.graphics_clock | ||
nvidia_smi_log.gpu.max_clocks.sm_clock | ||
nvidia_smi_log.gpu.max_clocks.mem_clock | ||
nvidia_smi_log.gpu.max_clocks.video_clock | ||
nvidia_smi_log.gpu.max_customer_boost_clocks.graphics_clock | ||
nvidia_smi_log.gpu.max_clocks.mem_clock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
../../../abp-models/abp-nvsmi-xgb-20210310.bst | ||
../../../abp-models/abp-nvsmi-xgb-20230831.bst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ input [ | |
{ | ||
name: "input__0" | ||
data_type: TYPE_FP32 | ||
dims: [ 29 ] | ||
dims: [ 18 ] | ||
} | ||
] | ||
output [ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters