Skip to content

Commit

Permalink
Completely remove GPU A2 type. It doesn't exist on the nerc-prod clus…
Browse files Browse the repository at this point in the history
…ter nor

do we know what it would be named, so I removing it. We can add it if it's added
to the cluster.

Also update the test to include A100SXM4 instead of A2
  • Loading branch information
naved001 committed Mar 26, 2024
1 parent 1451c1e commit 902162e
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 11 deletions.
4 changes: 2 additions & 2 deletions openshift_metrics/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def test_write_metrics_log(self, mock_gna):
},
"pod5": {
"namespace": "namespace2",
"gpu_type": utils.GPU_A2,
"gpu_type": utils.GPU_A100_SXM4,
"metrics": {
0: {
"cpu_request": 24,
Expand All @@ -659,7 +659,7 @@ def test_write_metrics_log(self, mock_gna):
"2023-01,namespace1,namespace1,PI1,,,,76,1128,OpenShift CPU,0.013,14.66\n"
"2023-01,namespace2,namespace2,PI2,,,,,96,OpenShift CPU,0.013,1.25\n"
"2023-01,namespace2,namespace2,PI2,,,,,48,OpenShift GPUA100,1.803,86.54\n"
"2023-01,namespace2,namespace2,PI2,,,,,144,OpenShift GPUA2,0.466,67.1\n")
"2023-01,namespace2,namespace2,PI2,,,,,48,OpenShift GPUA100SXM4,2.078,99.74\n")

with tempfile.NamedTemporaryFile(mode="w+") as tmp:
utils.write_metrics_by_namespace(test_metrics_dict, tmp.name, "2023-01")
Expand Down
9 changes: 0 additions & 9 deletions openshift_metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,13 @@
GPU_A100 = "NVIDIA-A100-40GB"
GPU_A100_SXM4 = "NVIDIA-A100-SXM4-40GB"
GPU_V100 = "Tesla-V100-PCIE-32GB"
GPU_A2 = "nvidia.com/gpu_A2"
GPU_GENERIC = "nvidia.com/gpu"
NO_GPU = "No GPU"

# SU Types
SU_CPU = "OpenShift CPU"
SU_A100_GPU = "OpenShift GPUA100"
SU_A100_SXM4_GPU = "OpenShift GPUA100SXM4"
SU_A2_GPU = "OpenShift GPUA2"
SU_V100_GPU = "OpenShift GPUV100"
SU_UNKNOWN_GPU = "OpenShift Unknown GPU"
SU_UNKNOWN = "Openshift Unknown"
Expand All @@ -45,7 +43,6 @@
SU_CPU: 0.013,
SU_A100_GPU: 1.803,
SU_A100_SXM4_GPU: 2.078,
SU_A2_GPU: 0.466,
SU_V100_GPU: 1.214,
SU_UNKNOWN_GPU: 0,
}
Expand Down Expand Up @@ -358,7 +355,6 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
"SU_CPU_HOURS": 0,
"SU_A100_GPU_HOURS": 0,
"SU_A100_SXM4_GPU_HOURS": 0,
"SU_A2_GPU_HOURS": 0,
"SU_V100_GPU_HOURS": 0,
"SU_UNKNOWN_GPU_HOURS": 0,
"total_cost": 0,
Expand All @@ -378,8 +374,6 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
metrics_by_namespace[namespace]["SU_A100_SXM4_GPU_HOURS"] += su_count * duration_in_hours
elif gpu_type == GPU_V100:
metrics_by_namespace[namespace]["SU_V100_GPU_HOURS"] += su_count * duration_in_hours
elif gpu_type == GPU_A2:
metrics_by_namespace[namespace]["SU_A2_GPU_HOURS"] += su_count * duration_in_hours
elif gpu_type == GPU_GENERIC:
metrics_by_namespace[namespace]["SU_UNKNOWN_GPU_HOURS"] += su_count * duration_in_hours
else:
Expand All @@ -404,9 +398,6 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
if metrics["SU_A100_SXM4_GPU_HOURS"] != 0:
add_row(hours=metrics["SU_A100_SXM4_GPU_HOURS"], su_type=SU_A100_SXM4_GPU, **common_args)

if metrics["SU_A2_GPU_HOURS"] != 0:
add_row(hours=metrics["SU_A2_GPU_HOURS"], su_type=SU_A2_GPU, **common_args)

if metrics["SU_V100_GPU_HOURS"] != 0:
add_row(hours=metrics["SU_V100_GPU_HOURS"], su_type=SU_V100_GPU, **common_args)

Expand Down

0 comments on commit 902162e

Please sign in to comment.