From 1c429397cb7356b633d2d903eb4bca938955d8a4 Mon Sep 17 00:00:00 2001 From: William Krinsman Date: Tue, 3 Sep 2019 13:19:10 -0700 Subject: [PATCH 1/4] CPU info --- nbresuse/__init__.py | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/nbresuse/__init__.py b/nbresuse/__init__.py index c564406..4347885 100644 --- a/nbresuse/__init__.py +++ b/nbresuse/__init__.py @@ -29,6 +29,20 @@ def get(self): else: # mem_limit is an Int mem_limit = config.mem_limit + def get_cpu_percent(p): + try: + return p.cpu_percent(interval=0.1) + # Avoid littering logs with stack traces complaining + # about dead processes having no CPU usage + except: + return 0 + cpu_percent = sum([get_cpu_percent(p) for p in all_processes]) + # A better approach would use cpu_affinity to account for the + # fact that the number of logical CPUs in the system is not + # necessarily the same as the number of CPUs the process + # can actually use. But cpu_affinity isn't available for OS X. + cpu_count = psutil.cpu_count() + limits = {} if config.mem_limit != 0: @@ -37,8 +51,18 @@ def get(self): } if config.mem_warning_threshold != 0: limits['memory']['warn'] = (config.mem_limit - rss) < (config.mem_limit * config.mem_warning_threshold) + + if config.cpu_limit != 0: + limits['cpu'] = { + 'cpu': config.cpu_limit + } + if config.cpu_warning_threshold != 0: + limits['cpu']['warn'] = (config.cpu_limit - cpu_percent) < (config.cpu_limit * config.cpu_warning_threshold) + metrics = { 'rss': rss, + 'cpu_percent': cpu_percent, + 'cpu_count': cpu_count, 'limits': limits, } self.write(json.dumps(metrics)) @@ -95,10 +119,39 @@ class ResourceUseDisplay(Configurable): """ ).tag(config=True) + cpu_warning_threshold = Float( + 0.1, + help=""" + Warn user with flashing lights when CPU usage is within this fraction + CPU usage limit. + + For example, if memory limit is 150%, `cpu_warning_threshold` is 0.1, + we will start warning the user when they use (150 - (150 * 0.1)) %. + + Set to 0 to disable warning. + """ + ).tag(config=True) + + cpu_limit = Float( + 0, + help=""" + CPU usage limit to display to the user. + + Note that this does not actually limit the user's CPU usage! + + Defaults to reading from the `CPU_LIMIT` environment variable. If + set to 0, no CPU usage limit is displayed. + """ + ).tag(config=True) + @default('mem_limit') def _mem_limit_default(self): return int(os.environ.get('MEM_LIMIT', 0)) + @default('cpu_limit') + def _cpu_limit_default(self): + return float(os.environ.get('CPU_LIMIT', 0)) + def load_jupyter_server_extension(nbapp): """ Called during notebook start From 34005aeece8b0d80e1b311a60ce947fc8ff6f845 Mon Sep 17 00:00:00 2001 From: Trevor Slaton Date: Thu, 5 Sep 2019 17:29:24 -0700 Subject: [PATCH 2/4] Thread cpu_percent two different ways --- nbresuse/__init__.py | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/nbresuse/__init__.py b/nbresuse/__init__.py index 4347885..2057b13 100644 --- a/nbresuse/__init__.py +++ b/nbresuse/__init__.py @@ -6,14 +6,53 @@ from notebook.utils import url_path_join from notebook.base.handlers import IPythonHandler from tornado import web + try: # Traitlets >= 4.3.3 from traitlets import Callable except ImportError: from .callable import Callable +from threading import Thread +from concurrent.futures import ThreadPoolExecutor, as_completed class MetricsHandler(IPythonHandler): + def initialize(self): + super().initialize() + self.cpu_percent = 0 + # A better approach would use cpu_affinity to account for the + # fact that the number of logical CPUs in the system is not + # necessarily the same as the number of CPUs the process + # can actually use. But cpu_affinity isn't available for OS X. + self.cpu_count = psutil.cpu_count() + + def update_cpu_percent(): + def get_cpu_percent(p): + try: + return p.cpu_percent(interval=0.1) + # Avoid littering logs with stack traces complaining + # about dead processes having no CPU usage + except: + return 0 + # This loop should execute roughly every "interval" seconds + # Slower if max_workers is much less than the number of processes + while True: + cur_process = psutil.Process() + all_processes = [cur_process] + cur_process.children(recursive=True) + # Could have a worker for every process + with ThreadPoolExecutor(max_workers=10) as executor: + cpu_percents = [executor.submit(get_cpu_percent, p) for p in all_processes] + total_percent = 0 + for future in as_completed(cpu_percents): + try: + total_percent += future.result() + except: + pass + self.cpu_percent = total_percent + + t = Thread(target=update_cpu_percent) + t.start() + @web.authenticated def get(self): """ @@ -61,8 +100,8 @@ def get_cpu_percent(p): metrics = { 'rss': rss, - 'cpu_percent': cpu_percent, - 'cpu_count': cpu_count, + 'cpu_percent': self.cpu_percent, + 'cpu_count': self.cpu_count, 'limits': limits, } self.write(json.dumps(metrics)) From 48f1672db3be6c7af667bc87fed1c7b5247012f5 Mon Sep 17 00:00:00 2001 From: William Krinsman Date: Thu, 5 Sep 2019 19:01:49 -0700 Subject: [PATCH 3/4] New approach --- nbresuse/__init__.py | 65 ++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/nbresuse/__init__.py b/nbresuse/__init__.py index 2057b13..3f527e9 100644 --- a/nbresuse/__init__.py +++ b/nbresuse/__init__.py @@ -1,7 +1,7 @@ import os import json import psutil -from traitlets import Float, Int, Union, default +from traitlets import Bool, Float, Int, Union, default from traitlets.config import Configurable from notebook.utils import url_path_join from notebook.base.handlers import IPythonHandler @@ -15,46 +15,37 @@ from threading import Thread from concurrent.futures import ThreadPoolExecutor, as_completed +from tornado.concurrent import run_on_executor class MetricsHandler(IPythonHandler): def initialize(self): super().initialize() self.cpu_percent = 0 + + # https://www.tornadoweb.org/en/stable/concurrent.html#tornado.concurrent.run_on_executor + self.executor = ThreadPoolExecutor(max_workers=10) + # A better approach would use cpu_affinity to account for the # fact that the number of logical CPUs in the system is not # necessarily the same as the number of CPUs the process # can actually use. But cpu_affinity isn't available for OS X. self.cpu_count = psutil.cpu_count() - def update_cpu_percent(): - def get_cpu_percent(p): - try: - return p.cpu_percent(interval=0.1) - # Avoid littering logs with stack traces complaining - # about dead processes having no CPU usage - except: - return 0 - # This loop should execute roughly every "interval" seconds - # Slower if max_workers is much less than the number of processes - while True: - cur_process = psutil.Process() - all_processes = [cur_process] + cur_process.children(recursive=True) - # Could have a worker for every process - with ThreadPoolExecutor(max_workers=10) as executor: - cpu_percents = [executor.submit(get_cpu_percent, p) for p in all_processes] - total_percent = 0 - for future in as_completed(cpu_percents): - try: - total_percent += future.result() - except: - pass - self.cpu_percent = total_percent - - t = Thread(target=update_cpu_percent) - t.start() + @run_on_executor + def update_cpu_percent(self, all_processes): + + def get_cpu_percent(p): + try: + return p.cpu_percent(interval=0.05) + # Avoid littering logs with stack traces complaining + # about dead processes having no CPU usage + except: + return 0 + + return sum([get_cpu_percent(p) for p in all_processes]) @web.authenticated - def get(self): + async def get(self): """ Calculate and return current resource usage metrics """ @@ -68,20 +59,15 @@ def get(self): else: # mem_limit is an Int mem_limit = config.mem_limit - def get_cpu_percent(p): - try: - return p.cpu_percent(interval=0.1) - # Avoid littering logs with stack traces complaining - # about dead processes having no CPU usage - except: - return 0 - cpu_percent = sum([get_cpu_percent(p) for p in all_processes]) # A better approach would use cpu_affinity to account for the # fact that the number of logical CPUs in the system is not # necessarily the same as the number of CPUs the process # can actually use. But cpu_affinity isn't available for OS X. cpu_count = psutil.cpu_count() + if config.track_cpu_percent: + self.cpu_percent = await self.update_cpu_percent(all_processes) + limits = {} if config.mem_limit != 0: @@ -158,6 +144,13 @@ class ResourceUseDisplay(Configurable): """ ).tag(config=True) + track_cpu_percent = Bool( + False, + help=""" + Set to True in order to enable reporting of CPU usage statistics. + """ + ).tag(config=True) + cpu_warning_threshold = Float( 0.1, help=""" From 17cdf4cb9b6d04ee900aefd5fec8516d058ec6ff Mon Sep 17 00:00:00 2001 From: William Krinsman Date: Sun, 8 Sep 2019 16:03:01 -0700 Subject: [PATCH 4/4] Better implementation --- nbresuse/__init__.py | 60 +++++++++++++++--------------- nbresuse/{callable.py => utils.py} | 0 2 files changed, 31 insertions(+), 29 deletions(-) rename nbresuse/{callable.py => utils.py} (100%) diff --git a/nbresuse/__init__.py b/nbresuse/__init__.py index 3f527e9..68788b2 100644 --- a/nbresuse/__init__.py +++ b/nbresuse/__init__.py @@ -11,10 +11,9 @@ # Traitlets >= 4.3.3 from traitlets import Callable except ImportError: - from .callable import Callable + from .utils import Callable -from threading import Thread -from concurrent.futures import ThreadPoolExecutor, as_completed +from concurrent.futures import ThreadPoolExecutor from tornado.concurrent import run_on_executor class MetricsHandler(IPythonHandler): @@ -25,10 +24,6 @@ def initialize(self): # https://www.tornadoweb.org/en/stable/concurrent.html#tornado.concurrent.run_on_executor self.executor = ThreadPoolExecutor(max_workers=10) - # A better approach would use cpu_affinity to account for the - # fact that the number of logical CPUs in the system is not - # necessarily the same as the number of CPUs the process - # can actually use. But cpu_affinity isn't available for OS X. self.cpu_count = psutil.cpu_count() @run_on_executor @@ -51,7 +46,10 @@ async def get(self): """ config = self.settings['nbresuse_display_config'] cur_process = psutil.Process() - all_processes = [cur_process] + cur_process.children(recursive=True) + all_processes = [cur_process] + cur_process.children(recursive=True) + limits = {} + + # Get memory information rss = sum([p.memory_info().rss for p in all_processes]) if callable(config.mem_limit): @@ -68,28 +66,33 @@ async def get(self): if config.track_cpu_percent: self.cpu_percent = await self.update_cpu_percent(all_processes) - limits = {} - if config.mem_limit != 0: limits['memory'] = { 'rss': mem_limit } if config.mem_warning_threshold != 0: - limits['memory']['warn'] = (config.mem_limit - rss) < (config.mem_limit * config.mem_warning_threshold) + limits['memory']['warn'] = (mem_limit - rss) < (mem_limit * config.mem_warning_threshold) - if config.cpu_limit != 0: - limits['cpu'] = { - 'cpu': config.cpu_limit - } - if config.cpu_warning_threshold != 0: - limits['cpu']['warn'] = (config.cpu_limit - cpu_percent) < (config.cpu_limit * config.cpu_warning_threshold) + # Optionally get CPU information + if config.track_cpu_percent: + self.cpu_percent = await self.update_cpu_percent(all_processes) + + if config.cpu_limit != 0: + limits['cpu'] = { + 'cpu': config.cpu_limit + } + if config.cpu_warning_threshold != 0: + limits['cpu']['warn'] = (config.cpu_limit - self.cpu_percent) < (config.cpu_limit * config.cpu_warning_threshold) metrics = { 'rss': rss, - 'cpu_percent': self.cpu_percent, - 'cpu_count': self.cpu_count, 'limits': limits, } + if config.track_cpu_percent: + metrics.update(cpu_percent=self.cpu_percent, + cpu_count=self.cpu_count) + + self.log.debug("NBResuse metrics: %s", metrics) self.write(json.dumps(metrics)) @@ -118,7 +121,7 @@ class ResourceUseDisplay(Configurable): """ mem_warning_threshold = Float( - 0.1, + default_value=0.1, help=""" Warn user with flashing lights when memory usage is within this fraction memory limit. @@ -132,7 +135,6 @@ class ResourceUseDisplay(Configurable): mem_limit = Union( trait_types=[Int(), Callable()], - 0, help=""" Memory limit to display to the user, in bytes. Can also be a function which calculates the memory limit. @@ -144,20 +146,24 @@ class ResourceUseDisplay(Configurable): """ ).tag(config=True) + @default('mem_limit') + def _mem_limit_default(self): + return int(os.environ.get('MEM_LIMIT', 0)) + track_cpu_percent = Bool( - False, + default_value=False, help=""" Set to True in order to enable reporting of CPU usage statistics. """ ).tag(config=True) cpu_warning_threshold = Float( - 0.1, + default_value=0.1, help=""" Warn user with flashing lights when CPU usage is within this fraction CPU usage limit. - For example, if memory limit is 150%, `cpu_warning_threshold` is 0.1, + For example, if CPU limit is 150%, `cpu_warning_threshold` is 0.1, we will start warning the user when they use (150 - (150 * 0.1)) %. Set to 0 to disable warning. @@ -165,7 +171,7 @@ class ResourceUseDisplay(Configurable): ).tag(config=True) cpu_limit = Float( - 0, + default_value=0, help=""" CPU usage limit to display to the user. @@ -176,10 +182,6 @@ class ResourceUseDisplay(Configurable): """ ).tag(config=True) - @default('mem_limit') - def _mem_limit_default(self): - return int(os.environ.get('MEM_LIMIT', 0)) - @default('cpu_limit') def _cpu_limit_default(self): return float(os.environ.get('CPU_LIMIT', 0)) diff --git a/nbresuse/callable.py b/nbresuse/utils.py similarity index 100% rename from nbresuse/callable.py rename to nbresuse/utils.py