Add new PRs to release (#944)

* Prometheus bug fix (#942) * local train bug fix * normalization update * fix tests * remove test * updated normalization * Naming changes, bug fixes * subtensor update for max clip * max weight to a million * Fixes for ordering and comments * additional tests * string fix * numerical stability and testing updates * minor update for division by zero * Naming and spacing fixes * epsilon update * small fix * additional subtensor parameters * remove print * help string fixes * small bug fix * [Fix] only reregister if flag is set (#937) * add test for expected reregister behaviour * add fix * pass passed args into earlier parse * fix test by using args * exit before actual register * use strtobool Co-authored-by: Unconst <32490803+unconst@users.noreply.github.com> * [BIT 584] [feature] btcli register output stats not in place (#923) * add flags for output_in_place during registration * stop tracking best * refactor registration logging output * fix reregister from type bool * change in_place and use_cuda to strtobool * add param and defaults * fix reference before assignment * add new logger to cuda rege * pass param to btcli register call * oops * fix init * try slight timeout * try fix * oop * ? * fix use_cuda flag * add test for new use_cuda flag setup * use create pow to patch * all no prompt dev id * fix console.error * use lower for str comparison * call self register instead * add test for wallet register call * tests are for wallet reregister * fix typo * no self on top-level test * fix tests? * use reregister * typo in test * fix assert * fix assert * should be False * fix time output to use timedelta * add log verbose as option to reg output * should be action * fix typo * add missing function arg * fix spacing * fix flags * fix flags * fix test * should pass in args to config pre-parse * use None instead of NA Co-authored-by: isabella618033 <49876827+isabella618033@users.noreply.github.com> Co-authored-by: Unconst <32490803+unconst@users.noreply.github.com> * [Fix] multi cuda fix (#940) * adjust none end calculation * attempt to fix stop issue * modify stop * update nonce_start by correct amount * fix nonce init to only random and update * fix update amount * add start values * add test * try different hashrate calc * try EWMA for hash_rate * oops bad import * change name to worker * extract helper and modify comment * fix time now * catch Full * use a finished queue instead of times * move constants to function params * fix name of n * fix verbose log * allow --output_in_place * fix n * change to --no_ouput_in_place * fix test Co-authored-by: Eugene-hu <85906264+Eugene-hu@users.noreply.github.com> Co-authored-by: Unconst <32490803+unconst@users.noreply.github.com> Co-authored-by: isabella618033 <49876827+isabella618033@users.noreply.github.com>
opentensor · Oct 12, 2022 · 90c383f · 90c383f
1 parent 2a8685b
commit 90c383f
Show file tree

Hide file tree

Showing 12 changed files with 695 additions and 312 deletions.
diff --git a/bittensor/_cli/__init__.py b/bittensor/_cli/__init__.py
@@ -832,32 +832,38 @@ def check_overview_config( config: 'bittensor.Config' ):
     def _check_for_cuda_reg_config( config: 'bittensor.Config' ) -> None:
         """Checks, when CUDA is available, if the user would like to register with their CUDA device."""
         if torch.cuda.is_available():
-            if config.subtensor.register.cuda.get('use_cuda') is None:
-                # Ask about cuda registration only if a CUDA device is available.
-                cuda = Confirm.ask("Detected CUDA device, use CUDA for registration?\n")
-                config.subtensor.register.cuda.use_cuda = cuda
-
-            # Only ask about which CUDA device if the user has more than one CUDA device.
-            if config.subtensor.register.cuda.use_cuda and config.subtensor.register.cuda.get('dev_id') is None and torch.cuda.device_count() > 0:
-                devices: List[str] = [str(x) for x in range(torch.cuda.device_count())]
-                device_names: List[str] = [torch.cuda.get_device_name(x) for x in range(torch.cuda.device_count())]
-                console.print("Available CUDA devices:")
-                choices_str: str = ""
-                for i, device in enumerate(devices):
-                    choices_str += ("  {}: {}\n".format(device, device_names[i]))
-                console.print(choices_str)
-                dev_id = IntListPrompt.ask("Which GPU(s) would you like to use? Please list one, or comma-separated", choices=devices, default='All')
-                if dev_id == 'All':
-                    dev_id = list(range(torch.cuda.device_count()))
-                else:
-                    try:
-                        # replace the commas with spaces then split over whitespace.,
-                        # then strip the whitespace and convert to ints.
-                        dev_id = [int(dev_id.strip()) for dev_id in dev_id.replace(',', ' ').split()]
-                    except ValueError:
-                        console.error(":cross_mark:[red]Invalid GPU device[/red] [bold white]{}[/bold white]\nAvailable CUDA devices:{}".format(dev_id, choices_str))
-                        sys.exit(1)
-                config.subtensor.register.cuda.dev_id = dev_id
+            if not config.no_prompt:
+                if config.subtensor.register.cuda.get('use_cuda') == None: # flag not set
+                    # Ask about cuda registration only if a CUDA device is available.
+                    cuda = Confirm.ask("Detected CUDA device, use CUDA for registration?\n")
+                    config.subtensor.register.cuda.use_cuda = cuda
+
+
+                # Only ask about which CUDA device if the user has more than one CUDA device.
+                if config.subtensor.register.cuda.use_cuda and config.subtensor.register.cuda.get('dev_id') is None:
+                    devices: List[str] = [str(x) for x in range(torch.cuda.device_count())]
+                    device_names: List[str] = [torch.cuda.get_device_name(x) for x in range(torch.cuda.device_count())]
+                    console.print("Available CUDA devices:")
+                    choices_str: str = ""
+                    for i, device in enumerate(devices):
+                        choices_str += ("  {}: {}\n".format(device, device_names[i]))
+                    console.print(choices_str)
+                    dev_id = IntListPrompt.ask("Which GPU(s) would you like to use? Please list one, or comma-separated", choices=devices, default='All')
+                    if dev_id.lower() == 'all':
+                        dev_id = list(range(torch.cuda.device_count()))
+                    else:
+                        try:
+                            # replace the commas with spaces then split over whitespace.,
+                            # then strip the whitespace and convert to ints.
+                            dev_id = [int(dev_id.strip()) for dev_id in dev_id.replace(',', ' ').split()]
+                        except ValueError:
+                            console.log(":cross_mark:[red]Invalid GPU device[/red] [bold white]{}[/bold white]\nAvailable CUDA devices:{}".format(dev_id, choices_str))
+                            sys.exit(1)
+                    config.subtensor.register.cuda.dev_id = dev_id
+            else:
+                # flag was not set, use default value.
+                if config.subtensor.register.cuda.get('use_cuda') is None: 
+                    config.subtensor.register.cuda.use_cuda = bittensor.defaults.subtensor.register.cuda.use_cuda
 
     def check_register_config( config: 'bittensor.Config' ):
         if config.subtensor.get('network') == bittensor.defaults.subtensor.network and not config.no_prompt:

diff --git a/bittensor/_cli/cli_impl.py b/bittensor/_cli/cli_impl.py
@@ -196,7 +196,8 @@ def run_miner ( self ):
         wallet.coldkeypub
 
         # Check registration
-        self.register()
+        ## Will exit if --wallet.reregister is False
+        wallet.reregister()
 
         # Run miner.
         if self.config.model == 'core_server':
@@ -245,8 +246,10 @@ def register( self ):
             TPB = self.config.subtensor.register.cuda.get('TPB', None),
             update_interval = self.config.subtensor.register.get('update_interval', None),
             num_processes = self.config.subtensor.register.get('num_processes', None),
-            cuda = self.config.subtensor.register.cuda.get('use_cuda', None),
-            dev_id = self.config.subtensor.register.cuda.get('dev_id', None)
+            cuda = self.config.subtensor.register.cuda.get('use_cuda', bittensor.defaults.subtensor.register.cuda.use_cuda),
+            dev_id = self.config.subtensor.register.cuda.get('dev_id', None),
+            output_in_place = self.config.subtensor.register.get('output_in_place', bittensor.defaults.subtensor.register.output_in_place),
+            log_verbose = self.config.subtensor.register.get('verbose', bittensor.defaults.subtensor.register.verbose),
         )
 
     def transfer( self ):

diff --git a/bittensor/_config/__init__.py b/bittensor/_config/__init__.py
@@ -68,16 +68,16 @@ def __new__( cls, parser: ArgumentParser = None, strict: bool = False, args: Opt
             # this can fail if the --config has already been added.
             pass
 
+        # Get args from argv if not passed in.
+        if args == None:
+            args = sys.argv[1:]
+
         # 1.1 Optionally load defaults if the --config is set.
         try:
-            config_file_path = str(os.getcwd()) + '/' + vars(parser.parse_known_args()[0])['config']
+            config_file_path = str(os.getcwd()) + '/' + vars(parser.parse_known_args(args)[0])['config']
         except Exception as e:
             config_file_path = None
 
-        # Get args from argv if not passed in.
-        if args == None:
-            args = sys.argv[1:]
-
         # Parse args not strict
         params = cls.__parse_args__(args=args, parser=parser, strict=False)
 

diff --git a/bittensor/_neuron/text/core_validator/__init__.py b/bittensor/_neuron/text/core_validator/__init__.py
@@ -398,7 +398,6 @@ def run_epoch( self ):
         self.prometheus_gauges.labels("sequence_length").set( sequence_length )
         self.prometheus_gauges.labels("validation_len").set( validation_len )
         self.prometheus_gauges.labels("min_allowed_weights").set( min_allowed_weights )
-        self.prometheus_gauges.labels("max_allowed_ratio").set( max_allowed_ratio )
         self.prometheus_gauges.labels("blocks_per_epoch").set( blocks_per_epoch )
         self.prometheus_gauges.labels("epochs_until_reset").set( epochs_until_reset )
 

diff --git a/bittensor/_subtensor/__init__.py b/bittensor/_subtensor/__init__.py
@@ -23,6 +23,8 @@
 from substrateinterface import SubstrateInterface
 from torch.cuda import is_available as is_cuda_available
 
+from bittensor.utils import strtobool_with_default
+
 from . import subtensor_impl, subtensor_mock
 
 logger = logger.opt(colors=True)
@@ -187,13 +189,17 @@ def add_args(cls, parser: argparse.ArgumentParser, prefix: str = None ):
                                 help='''The subtensor endpoint flag. If set, overrides the --network flag.
                                     ''')       
             parser.add_argument('--' + prefix_str + 'subtensor._mock', action='store_true', help='To turn on subtensor mocking for testing purposes.', default=bittensor.defaults.subtensor._mock)
-
-            parser.add_argument('--' + prefix_str + 'subtensor.register.num_processes', '-n', dest='subtensor.register.num_processes', help="Number of processors to use for registration", type=int, default=bittensor.defaults.subtensor.register.num_processes)
+            # registration args. Used for register and re-register and anything that calls register.
+            parser.add_argument('--' + prefix_str + 'subtensor.register.num_processes', '-n', dest=prefix_str + 'subtensor.register.num_processes', help="Number of processors to use for registration", type=int, default=bittensor.defaults.subtensor.register.num_processes)
             parser.add_argument('--' + prefix_str + 'subtensor.register.update_interval', '--' + prefix_str + 'subtensor.register.cuda.update_interval', '--' + prefix_str + 'cuda.update_interval', '-u', help="The number of nonces to process before checking for next block during registration", type=int, default=bittensor.defaults.subtensor.register.update_interval)
-             # registration args. Used for register and re-register and anything that calls register.
-            parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.use_cuda', '--' + prefix_str + 'cuda', '--' + prefix_str + 'cuda.use_cuda', default=argparse.SUPPRESS, help='''Set true to use CUDA.''', action='store_true', required=False )
-            parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.dev_id', '--' + prefix_str + 'cuda.dev_id',  type=int, nargs='+', default=argparse.SUPPRESS, help='''Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest).''', required=False )
+            parser.add_argument('--' + prefix_str + 'subtensor.register.no_output_in_place', '--' + prefix_str + 'no_output_in_place', dest="subtensor.register.output_in_place", help="Whether to not ouput the registration statistics in-place. Set flag to disable output in-place.", action='store_false', required=False, default=bittensor.defaults.subtensor.register.output_in_place)
+            parser.add_argument('--' + prefix_str + 'subtensor.register.verbose', help="Whether to ouput the registration statistics verbosely.", action='store_true', required=False, default=bittensor.defaults.subtensor.register.verbose)
+
+            ## Registration args for CUDA registration.
+            parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.use_cuda', '--' + prefix_str + 'cuda', '--' + prefix_str + 'cuda.use_cuda', default=argparse.SUPPRESS, help='''Set flag to use CUDA to register.''', action="store_true", required=False )
+            parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.no_cuda', '--' + prefix_str + 'no_cuda', '--' + prefix_str + 'cuda.no_cuda', dest=prefix_str + 'subtensor.register.cuda.use_cuda', default=argparse.SUPPRESS, help='''Set flag to not use CUDA for registration''', action="store_false", required=False )
 
+            parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.dev_id', '--' + prefix_str + 'cuda.dev_id',  type=int, nargs='+', default=argparse.SUPPRESS, help='''Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest).''', required=False )
             parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.TPB', '--' + prefix_str + 'cuda.TPB', type=int, default=bittensor.defaults.subtensor.register.cuda.TPB, help='''Set the number of Threads Per Block for CUDA.''', required=False )
 
         except argparse.ArgumentError:
@@ -212,20 +218,24 @@ def add_defaults(cls, defaults ):
         defaults.subtensor.register = bittensor.Config()
         defaults.subtensor.register.num_processes = os.getenv('BT_SUBTENSOR_REGISTER_NUM_PROCESSES') if os.getenv('BT_SUBTENSOR_REGISTER_NUM_PROCESSES') != None else None # uses processor count by default within the function
         defaults.subtensor.register.update_interval = os.getenv('BT_SUBTENSOR_REGISTER_UPDATE_INTERVAL') if os.getenv('BT_SUBTENSOR_REGISTER_UPDATE_INTERVAL') != None else 50_000
+        defaults.subtensor.register.output_in_place = True
+        defaults.subtensor.register.verbose = False
 
         defaults.subtensor.register.cuda = bittensor.Config()
         defaults.subtensor.register.cuda.dev_id = [0]
         defaults.subtensor.register.cuda.use_cuda = False
         defaults.subtensor.register.cuda.TPB = 256
 
+
+
     @staticmethod   
     def check_config( config: 'bittensor.Config' ):
         assert config.subtensor
         #assert config.subtensor.network != None
         if config.subtensor.get('register') and config.subtensor.register.get('cuda'):
             assert all((isinstance(x, int) or isinstance(x, str) and x.isnumeric() ) for x in config.subtensor.register.cuda.get('dev_id', []))
 
-            if config.subtensor.register.cuda.get('use_cuda', False):
+            if config.subtensor.register.cuda.get('use_cuda', bittensor.defaults.subtensor.register.cuda.use_cuda):
                 try:
                     import cubit
                 except ImportError:

diff --git a/bittensor/_subtensor/subtensor_impl.py b/bittensor/_subtensor/subtensor_impl.py
@@ -500,11 +500,13 @@ def register (
         wait_for_finalization: bool = True,
         prompt: bool = False,
         max_allowed_attempts: int = 3,
+        output_in_place: bool = True,
         cuda: bool = False,
         dev_id: Union[List[int], int] = 0,
         TPB: int = 256,
         num_processes: Optional[int] = None,
         update_interval: Optional[int] = None,
+        log_verbose: bool = False,
     ) -> bool:
         r""" Registers the wallet to chain.
         Args:
@@ -530,6 +532,8 @@ def register (
                 The number of processes to use to register.
             update_interval (int):
                 The number of nonces to solve between updates.
+            log_verbose (bool):
+                If true, the registration process will log more information.
         Returns:
             success (bool):
                 flag is true if extrinsic was finalized or uncluded in the block. 
@@ -556,9 +560,9 @@ def register (
                     if prompt:
                         bittensor.__console__.error('CUDA is not available.')
                     return False
-                pow_result = bittensor.utils.create_pow( self, wallet, cuda, dev_id, TPB, num_processes=num_processes, update_interval=update_interval )
+                pow_result = bittensor.utils.create_pow( self, wallet, output_in_place, cuda, dev_id, TPB, num_processes=num_processes, update_interval=update_interval, log_verbose=log_verbose )
             else:
-                pow_result = bittensor.utils.create_pow( self, wallet, num_processes=num_processes, update_interval=update_interval)
+                pow_result = bittensor.utils.create_pow( self, wallet, output_in_place, num_processes=num_processes, update_interval=update_interval, log_verbose=log_verbose )
 
             # pow failed
             if not pow_result:

diff --git a/bittensor/_wallet/__init__.py b/bittensor/_wallet/__init__.py
@@ -19,9 +19,11 @@
 
 import argparse
 import copy
+from distutils.util import strtobool
 import os
 
 import bittensor
+from bittensor.utils import strtobool
 
 from . import wallet_impl, wallet_mock
 
@@ -114,7 +116,7 @@ def add_args(cls, parser: argparse.ArgumentParser, prefix: str = None ):
 
             parser.add_argument('--' + prefix_str + 'wallet.hotkeys', '--' + prefix_str + 'wallet.exclude_hotkeys', required=False, action='store', default=bittensor.defaults.wallet.hotkeys, type=str, nargs='*', help='''Specify the hotkeys by name. (e.g. hk1 hk2 hk3)''')
             parser.add_argument('--' + prefix_str + 'wallet.all_hotkeys', required=False, action='store_true', default=bittensor.defaults.wallet.all_hotkeys, help='''To specify all hotkeys. Specifying hotkeys will exclude them from this all.''')
-            parser.add_argument('--' + prefix_str + 'wallet.reregister', required=False, action='store', default=bittensor.defaults.wallet.reregister, type=bool, help='''Whether to reregister the wallet if it is not already registered.''')
+            parser.add_argument('--' + prefix_str + 'wallet.reregister', required=False, action='store', default=bittensor.defaults.wallet.reregister, type=strtobool, help='''Whether to reregister the wallet if it is not already registered.''')
 
         except argparse.ArgumentError as e:
             pass

diff --git a/bittensor/_wallet/wallet_impl.py b/bittensor/_wallet/wallet_impl.py
@@ -246,16 +246,18 @@ def reregister(
             if not self.config.wallet.get('reregister'):
                 sys.exit(0)
 
-            subtensor.register(
-                wallet = self,
+            self.register(
+                subtensor = subtensor,
                 prompt = prompt,
                 TPB = self.config.subtensor.register.cuda.get('TPB', None),
                 update_interval = self.config.subtensor.register.cuda.get('update_interval', None),
                 num_processes = self.config.subtensor.register.get('num_processes', None),
-                cuda = self.config.subtensor.register.cuda.get('use_cuda', None),
+                cuda = self.config.subtensor.register.cuda.get('use_cuda', bittensor.defaults.subtensor.register.cuda.use_cuda),
                 dev_id = self.config.subtensor.register.cuda.get('dev_id', None),
                 wait_for_inclusion = wait_for_inclusion,
                 wait_for_finalization = wait_for_finalization,
+                output_in_place = self.config.subtensor.register.get('output_in_place', bittensor.defaults.subtensor.register.output_in_place),
+                log_verbose = self.config.subtensor.register.get('verbose', bittensor.defaults.subtensor.register.verbose),
             )
 
         return self
@@ -272,6 +274,8 @@ def register (
             TPB: int = 256,
             num_processes: Optional[int] = None,
             update_interval: Optional[int] = None,
+            output_in_place: bool = True,
+            log_verbose: bool = False,
         ) -> 'bittensor.Wallet':
         """ Registers the wallet to chain.
         Args:
@@ -297,6 +301,10 @@ def register (
                 The number of processes to use to register.
             update_interval (int):
                 The number of nonces to solve between updates.
+            output_in_place (bool):
+                If true, the registration output is printed in-place.
+            log_verbose (bool):
+                If true, the registration output is more verbose.
         Returns:
             success (bool):
                 flag is true if extrinsic was finalized or uncluded in the block. 
@@ -309,11 +317,13 @@ def register (
             wait_for_inclusion = wait_for_inclusion,
             wait_for_finalization = wait_for_finalization,
             prompt=prompt, max_allowed_attempts=max_allowed_attempts,
+            output_in_place = output_in_place,
             cuda=cuda,
             dev_id=dev_id,
             TPB=TPB,
             num_processes=num_processes,
-            update_interval=update_interval
+            update_interval=update_interval,
+            log_verbose=log_verbose,
         )
 
         return self