From abc77799fa827a8b36b9c797eec64d2822c7e75e Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Fri, 25 Oct 2024 09:46:56 -0400 Subject: [PATCH] Workarounds for using gpurun under Hyper-V This patch contains three adjustments that are aimed at improving support under Hyper-V virtualisation. The first of these is the addition of a GPURUN_BYPASS environment variable to turn gpurun into a no-op (mostly for testing purposes). The second changes processing of the BDFID output from rocminfo to use arithmetic instead of string slicing, which is potentially a bit more robust. (It seems that on Hyper-V VMs, the BDFID is reported as zero. That might be a separate bug, perhaps in the Hyper-V implementation itself.) The third change allows for a zero BDFID when matching the graphics card, in which case it will fall back to a UUID instead. Change-Id: I2eef92acaf0c2fc86bd71779b70de9be52788c1f --- utils/bin/gpurun | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/utils/bin/gpurun b/utils/bin/gpurun index 2b90a8e..9c8dec6 100755 --- a/utils/bin/gpurun +++ b/utils/bin/gpurun @@ -28,6 +28,14 @@ # mpirun -np 4 gpurun env | grep ROCR_VISIBLE_DEVICES # +# If set to 1, just invoke the rest of the command line without doing anything +# else. +GPURUN_BYPASS=${GPURUN_BYPASS:-0} + +if [ "$GPURUN_BYPASS" = "1" ]; then + exec "$@" +fi + # PROGVERSION string is updated by cmake when component is installed PROGVERSION=X.Y-Z function version(){ @@ -256,12 +264,10 @@ while read _linepair ; do _this_uuid=$_fieldvalue elif [ "$_fieldtype" == "BDFID" ] ; then if [[ $_last_device_type_was_gpu == 1 ]] ; then - _bdfidstr=`echo "obase=16; $_fieldvalue" | bc | tr '[:upper:]' '[:lower:]'` - if [ ${#_bdfidstr} == 3 ] ; then - _bdfidstr="0${_bdfidstr:0:1}:${_bdfidstr:1:2}" - else - _bdfidstr="${_bdfidstr:0:2}:${_bdfidstr:2:2}" - fi + # _domain="$(echo "$_fieldvalue / (2^32)" | bc)" + _bus="$(echo "($_fieldvalue / (2^8)) % (2^8)" | bc)" + _devfn="$(echo "($_fieldvalue % (2^8))" | bc)" + _bdfidstr="$(printf "%.2x:%.2x" "$_bus" "$_devfn")" fi elif [ "$_fieldtype" == "Name" ] ; then # The device name field is last in rocminfo output, so we can create new _ri_ array entry @@ -354,9 +360,13 @@ for _devid in `ls $_sysdevdir` ; do done # Search _ri_ arrays for matching uuids or matching bdfids. for _ri_i in ${!_ri_bdfids[@]} ; do - if [ $_has_unique_id_file == 1 ] ; then + if [ "$_has_unique_id_file" == "1" ] ; then _ss_value=$_this_uuid _ri_value=${_ri_uuid[$_ri_i]} + elif [ "${_ri_bdfids[$_ri_i]}" == "00:00" ]; then + # Under Hyper-V, we may see a zero BDFID. Fall back to UUID. + _ss_value=$_devid + _ri_value=$_devid else _ss_value=$_devid _ri_value="0000:${_ri_bdfids[$_ri_i]}.0"