Skip to content

Commit

Permalink
[core] stats: minor simplification
Browse files Browse the repository at this point in the history
add timeout on nvidia-smi
add try/except
remove duplicated code
no error is no nvidia-smi available
  • Loading branch information
fabiencastan committed Oct 13, 2020
1 parent 88099dd commit cdee25e
Showing 1 changed file with 26 additions and 36 deletions.
62 changes: 26 additions & 36 deletions meshroom/core/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(self):
self.gpuMemoryTotal = 0
self.gpuName = ''
self.curves = defaultdict(list)

self.nvidia_smi = None
self._isInit = False

def initOnFirstTime(self):
Expand All @@ -53,40 +53,21 @@ def initOnFirstTime(self):
self._isInit = True

self.cpuFreq = psutil.cpu_freq().max
self.ramTotal = psutil.virtual_memory().total / 1024/1024/1024
self.ramTotal = psutil.virtual_memory().total / (1024*1024*1024)

if platform.system() == "Windows":
from distutils import spawn
# If the platform is Windows and nvidia-smi
# could not be found from the environment path,
# try to find it from system drive with default installation path
self.nvidia_smi = spawn.find_executable('nvidia-smi')
if self.nvidia_smi is None:
self.nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
# could not be found from the environment path,
# try to find it from system drive with default installation path
default_nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
if os.path.isfile(default_nvidia_smi):
self.nvidia_smi = default_nvidia_smi
else:
self.nvidia_smi = "nvidia-smi"

try:
p = subprocess.Popen([self.nvidia_smi, "-q", "-x"], stdout=subprocess.PIPE)
xmlGpu, stdError = p.communicate()

smiTree = ET.fromstring(xmlGpu)
gpuTree = smiTree.find('gpu')

try:
self.gpuMemoryTotal = gpuTree.find('fb_memory_usage').find('total').text.split(" ")[0]
except Exception as e:
logging.debug('Failed to get gpuMemoryTotal: "{}".'.format(str(e)))
pass
try:
self.gpuName = gpuTree.find('product_name').text
except Exception as e:
logging.debug('Failed to get gpuName: "{}".'.format(str(e)))
pass

except Exception as e:
logging.debug('Failed to get information from nvidia_smi at init: "{}".'.format(str(e)))

def _addKV(self, k, v):
if isinstance(v, tuple):
for ki, vi in v._asdict().items():
Expand All @@ -98,18 +79,23 @@ def _addKV(self, k, v):
self.curves[k].append(v)

def update(self):
self.initOnFirstTime()
self._addKV('cpuUsage', psutil.cpu_percent(percpu=True)) # interval=None => non-blocking (percentage since last call)
self._addKV('ramUsage', psutil.virtual_memory().percent)
self._addKV('swapUsage', psutil.swap_memory().percent)
self._addKV('vramUsage', 0)
self._addKV('ioCounters', psutil.disk_io_counters())
self.updateGpu()
try:
self.initOnFirstTime()
self._addKV('cpuUsage', psutil.cpu_percent(percpu=True)) # interval=None => non-blocking (percentage since last call)
self._addKV('ramUsage', psutil.virtual_memory().percent)
self._addKV('swapUsage', psutil.swap_memory().percent)
self._addKV('vramUsage', 0)
self._addKV('ioCounters', psutil.disk_io_counters())
self.updateGpu()
except Exception as e:
logging.debug('Failed to get statistics: "{}".'.format(str(e)))

def updateGpu(self):
if not self.nvidia_smi:
return
try:
p = subprocess.Popen([self.nvidia_smi, "-q", "-x"], stdout=subprocess.PIPE)
xmlGpu, stdError = p.communicate()
p = subprocess.Popen([self.nvidia_smi, "-q", "-x"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
xmlGpu, stdError = p.communicate(timeout=10) # 10 seconds

smiTree = ET.fromstring(xmlGpu)
gpuTree = smiTree.find('gpu')
Expand All @@ -129,7 +115,11 @@ def updateGpu(self):
except Exception as e:
logging.debug('Failed to get gpuTemperature: "{}".'.format(str(e)))
pass

except subprocess.TimeoutExpired as e:
logging.debug('Timeout when retrieving information from nvidia_smi: "{}".'.format(str(e)))
p.kill()
outs, errs = p.communicate()
return
except Exception as e:
logging.debug('Failed to get information from nvidia_smi: "{}".'.format(str(e)))
return
Expand Down

0 comments on commit cdee25e

Please sign in to comment.