Skip to content

Commit

Permalink
BUG: fail noisly if gpu required not found for gpu flavor
Browse files Browse the repository at this point in the history
change code so that we fail noisly if a gpu is not found instead of assuming it to be 1
  • Loading branch information
anish-mudaraddi committed Feb 7, 2024
1 parent 0fb32b1 commit 5f868db
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
19 changes: 19 additions & 0 deletions MonitoringTools/tests/test_slottifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,25 @@ def test_calculate_slots_on_hv_non_gpu_disabled():
assert res.max_gpu_slots_capacity_enabled == 0


def test_calculate_slots_on_hv_gpu_no_gpunum():
"""
tests calculate_slots_on_hv when provided a gpu flavor but gpus_required is set to 0
should raise error
"""
with pytest.raises(RuntimeError):
calculate_slots_on_hv(
# g- specifies gpu flavor
"g-flavor1",
{"gpus_required": 0, "cores_required": 10, "mem_required": 10},
{
"compute_service_status": "disabled",
# can fit 10 slots, but should be 0 since compute service disabled
"cores_available": 100,
"mem_available": 100,
}
)


def test_calculate_slots_on_hv_gpu_disabled():
"""
tests calculate_slots_on_hv calculates slots properly for gpu flavor
Expand Down
14 changes: 1 addition & 13 deletions MonitoringTools/usr/local/bin/slottifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from slottifier_entry import SlottifierEntry
from send_metric_utils import parse_args, run_scrape

UNKNOWN_GPU_NUM_FLAVORS = []


def get_hv_info(hypervisor: Dict, aggregate_info, service_info) -> Dict:
"""
Expand Down Expand Up @@ -133,10 +131,7 @@ def calculate_slots_on_hv(flavor_name, flavor_reqs, hv_info) -> SlottifierEntry:
if "g-" in flavor_name:
# workaround for bugs where gpu number not specified
if flavor_reqs["gpus_required"] == 0:
flavor_reqs["gpus_required"] = 1
# For debugging purposes
if flavor_name not in UNKNOWN_GPU_NUM_FLAVORS:
UNKNOWN_GPU_NUM_FLAVORS.append(flavor_name)
raise RuntimeError(f"gpu flavor {flavor_name} does not have 'gpunum' metadata")

theoretical_gpu_slots_available = (
hv_info["gpu_capacity"] // flavor_reqs["gpus_required"]
Expand Down Expand Up @@ -295,13 +290,6 @@ def main(user_args: List):
influxdb_args = parse_args(user_args, description="Get All Service Statuses")
run_scrape(influxdb_args, get_slottifier_details)

# for debugging purposes
for missing_flavor in UNKNOWN_GPU_NUM_FLAVORS:
print(
f"{missing_flavor} missing metadata property 'extra_specs:accounting:gpu_num'"
"do not know how many GPUs the flavor requires, assuming 1 gpu required"
)


if __name__ == "__main__":
main(sys.argv[1:])

0 comments on commit 5f868db

Please sign in to comment.