diff --git a/bittensor/__init__.py b/bittensor/__init__.py index 0157ebb47c..dcf97637f0 100644 --- a/bittensor/__init__.py +++ b/bittensor/__init__.py @@ -188,8 +188,6 @@ def turn_console_off(): from bittensor._proto.bittensor_pb2 import ForwardTextPromptingRequest from bittensor._proto.bittensor_pb2 import ForwardTextPromptingResponse -from bittensor._proto.bittensor_pb2 import MultiForwardTextPromptingRequest -from bittensor._proto.bittensor_pb2 import MultiForwardTextPromptingResponse from bittensor._proto.bittensor_pb2 import BackwardTextPromptingRequest from bittensor._proto.bittensor_pb2 import BackwardTextPromptingResponse @@ -204,12 +202,6 @@ def turn_console_off(): from bittensor._dendrite.text_prompting.dendrite import TextPromptingDendrite as text_prompting from bittensor._dendrite.text_prompting.dendrite_pool import TextPromptingDendritePool as text_prompting_pool -# ---- Base Miners ----- -from bittensor._neuron.base_miner_neuron import BaseMinerNeuron -from bittensor._neuron.base_validator import BaseValidator -from bittensor._neuron.base_prompting_miner import BasePromptingMiner -from bittensor._neuron.base_huggingface_miner import HuggingFaceMiner - # ---- Errors and Exceptions ----- from bittensor._keyfile.keyfile_impl import KeyFileError as KeyFileError @@ -318,19 +310,11 @@ def forward( return_all: bool = False, ) -> Union[str, List[str]]: roles, messages = self.format_content( content ) - if not return_all: - return self._dendrite.forward( - roles = roles, - messages = messages, - timeout = timeout - ).completion - else: - return self._dendrite.multi_forward( - roles = roles, - messages = messages, - timeout = timeout - ).multi_completions - + return self._dendrite.forward( + roles = roles, + messages = messages, + timeout = timeout + ).completion async def async_forward( self, @@ -339,18 +323,11 @@ async def async_forward( return_all: bool = False, ) -> Union[str, List[str]]: roles, messages = self.format_content( content ) - if not return_all: - return await self._dendrite.async_forward( - roles = roles, - messages = messages, - timeout = timeout - ).completion - else: - return self._dendrite.async_multi_forward( + return await self._dendrite.async_forward( roles = roles, messages = messages, timeout = timeout - ).multi_completions + ).completion class BittensorLLM(LLM): """Wrapper around Bittensor Prompting Subnetwork. diff --git a/bittensor/_dendrite/text_prompting/dendrite.py b/bittensor/_dendrite/text_prompting/dendrite.py index 56c25ee96e..cd24be0361 100644 --- a/bittensor/_dendrite/text_prompting/dendrite.py +++ b/bittensor/_dendrite/text_prompting/dendrite.py @@ -14,12 +14,10 @@ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -import grpc import json import torch -import asyncio import bittensor -from typing import Callable, List, Dict, Union +from typing import Callable, List, Union class DendriteForwardCall( bittensor.DendriteCall ): @@ -77,46 +75,6 @@ async def async_backward( self, reward: float, timeout: float = None ) -> 'Dendr timeout = self.timeout if timeout is None else bittensor.__blocktime__ ) - -class MultiDendriteForwardCall( bittensor.DendriteCall ): - - name: str = "text_prompting_multi_forward" - is_forward: bool = True - multi_completions: List[str] = [""] # To be filled. - - def __init__( - self, - dendrite: 'bittensor.TextPromptingDendrite', - messages: List[str], - roles: List[str], - timeout: float = bittensor.__blocktime__, - ): - super().__init__( dendrite = dendrite, timeout = timeout ) - self.messages = messages - self.roles = roles - self.packed_messages = [json.dumps({"role": role, "content": message}) for role, message in zip(self.roles, self.messages)] - - def __repr__(self) -> str: - return f"MultiDendriteForwardCall( {bittensor.utils.codes.code_to_string(self.return_code)}, to: {self.dest_hotkey[:4]}...{self.dest_hotkey[-4:]}, msg: {self.return_message}, n_completion: {len(self.multi_completions)})" - - def __str__(self) -> str: return self.__repr__() - - def get_callable( self ) -> Callable: - return bittensor.grpc.TextPromptingStub( self.dendrite.channel ).MultiForward - - def get_request_proto( self ) -> bittensor.proto.MultiForwardTextPromptingRequest: - return bittensor.MultiForwardTextPromptingRequest( timeout = self.timeout, messages = self.packed_messages ) - - def apply_response_proto( self, response_proto: bittensor.MultiForwardTextPromptingResponse ): - self.multi_completions = response_proto.multi_completions - - def get_inputs_shape(self) -> torch.Size: - return torch.Size( [len(message) for message in self.packed_messages] ) - - def get_outputs_shape(self) -> torch.Size: - return torch.Size([ len(self.multi_completions) ] ) - - class DendriteBackwardCall( bittensor.DendriteCall ): name: str = "text_prompting_backward" @@ -198,40 +156,6 @@ async def async_forward( if return_call: return forward_call else: return forward_call.completion - def multi_forward( - self, - roles: List[ str ] , - messages: List[ str ], - timeout: float = bittensor.__blocktime__, - return_call:bool = True, - ) -> Union[ str, DendriteForwardCall ]: - forward_call = MultiDendriteForwardCall( - dendrite = self, - messages = messages, - roles = roles, - timeout = timeout, - ) - response_call = self.loop.run_until_complete( self.apply( dendrite_call = forward_call ) ) - if return_call: return response_call - else: return response_call.multi_completions - - async def async_multi_forward( - self, - roles: List[ str ], - messages: List[ str ], - timeout: float = bittensor.__blocktime__, - return_call: bool = True, - ) -> Union[ str, DendriteForwardCall ]: - forward_call = MultiDendriteForwardCall( - dendrite = self, - messages = messages, - roles = roles, - timeout = timeout, - ) - forward_call = await self.apply( dendrite_call = forward_call ) - if return_call: return forward_call - else: return forward_call.multi_completions - def backward( self, roles: List[ str ], diff --git a/bittensor/_neuron/__init__.py b/bittensor/_neuron/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/bittensor/_neuron/base_huggingface_miner.py b/bittensor/_neuron/base_huggingface_miner.py deleted file mode 100644 index 0c78648edd..0000000000 --- a/bittensor/_neuron/base_huggingface_miner.py +++ /dev/null @@ -1,95 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import bittensor -import argparse -from typing import List, Dict -from abc import ABC, abstractmethod - -class HuggingFaceMiner( bittensor.BasePromptingMiner, ABC ): - arg_prefix: str - assistant_label: str - user_label: str - system_label: str - - @classmethod - def check_config( cls, config: 'bittensor.Config' ): - pass - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument( f'--{cls.arg_prefix}.model_name', type=str, default=None, help='Name or path of model to load' ) - parser.add_argument( f'--{cls.arg_prefix}.api_key', type=str, help='huggingface api key', default=None ) - parser.add_argument( f'--{cls.arg_prefix}.device', type=str, help='Device to load model', default="cuda" ) - parser.add_argument( f'--{cls.arg_prefix}.max_new_tokens', type=int, help='Max tokens for model output.', default=256 ) - parser.add_argument( f'--{cls.arg_prefix}.temperature', type=float, help='Sampling temperature of model', default=0.5 ) - parser.add_argument( f'--{cls.arg_prefix}.do_sample', action='store_true', default=False, help='Whether to use multinomial sampling.' ) - parser.add_argument( f'--{cls.arg_prefix}.repetition_penalty', type=float, help='Repetition penalty for model', default=1.3 ) - parser.add_argument( f'--{cls.arg_prefix}.do_prompt_injection', action='store_true', default=False, help='Whether to use a custom "system" prompt instead of the one sent by bittensor.' ) - parser.add_argument( f'--{cls.arg_prefix}.system_prompt', type=str, help='What prompt to replace the system prompt with', default= "BEGINNING OF CONVERSATION: " ) - parser.add_argument( f'--{cls.arg_prefix}.repetition-penalty', type=float, default=1.1, help='Repetition penalty for greedy decoding. Between 1.0 and infinity. 1.0 means no penalty. Default: 1.0' ) - parser.add_argument( f'--{cls.arg_prefix}.top_p', type=float, default=0.9, help='Top-p (nucleus) sampling. Defaults to 1.0 (top-k sampling). Must be between 0.0 and 1.0.' ) - parser.add_argument( f'--{cls.arg_prefix}.top_k', type=int, default=0, help='Top-k sampling. Defaults to 0 (no top-k sampling). Must be between 0 and 1000.' ) - parser.add_argument( f'--{cls.arg_prefix}.load_in_8bit', type=bool, default=False, help='Load model in 8 bit precision') - parser.add_argument( f'--{cls.arg_prefix}.device_map', type=str, default=None, help='Device map for model parallelism.') - parser.add_argument( f'--{cls.arg_prefix}.pad_tokens', type=int, default=[], nargs='+', help='A list of integers separated by spaces for the pad_tokens.') - - def __init__(self): - super( HuggingFaceMiner, self ).__init__() - - # Set model name if unset. - if getattr( self.config, self.arg_prefix ).model_name == None: - getattr( self.config, self.arg_prefix ).model_name = self.arg_prefix - - bittensor.logging.info( 'Loading ' + str( getattr( self.config, self.arg_prefix ).model_name ) ) - self.tokenizer = self.load_tokenizer() - self.model = self.load_model() - bittensor.logging.info( 'Model loaded!' ) - - # Device already configured if using pipieline or device_map is set. (i.e. Pipelines have no `.to()` method) - if getattr( self.config, self.arg_prefix ).device != "cpu" \ - and 'pipeline' not in self.model.__class__.__name__.lower() \ - and getattr( self.config, self.arg_prefix ).device_map == None: - self.model = self.model.to( getattr( self.config, self.arg_prefix ).device ) - - @abstractmethod - def load_model(self): - ... - - @abstractmethod - def load_tokenizer(self): - ... - - @abstractmethod - def forward(self, messages: List[Dict[str, str]], **kwargs) -> str: - ... - - def process_history( self, history: List[Dict[str, str]] ) -> str: - processed_history = '' - - if getattr(self.config, self.arg_prefix).do_prompt_injection: - processed_history += getattr(self.config, self.arg_prefix).system_prompt - - for message in history: - if message['role'] == 'system': - if not getattr(self.config, self.arg_prefix).do_prompt_injection or message != history[0]: - processed_history += self.system_label + message['content'].strip() + ' ' - if message['role'] == 'assistant': - processed_history += self.assistant_label + message['content'].strip() + '' - if message['role'] == 'user': - processed_history += self.user_label + message['content'].strip() + ' ' - return processed_history diff --git a/bittensor/_neuron/base_miner_neuron.py b/bittensor/_neuron/base_miner_neuron.py deleted file mode 100644 index a2d25862ed..0000000000 --- a/bittensor/_neuron/base_miner_neuron.py +++ /dev/null @@ -1,214 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import os -import time -import torch -import threading -import argparse -import bittensor - -from rich import print -from typing import Union, Tuple -from datetime import datetime - -class BaseMinerNeuron: - - def priority( self, forward_call: "bittensor.SynapseCall" ) -> float: - return self.prioritizer.priority( forward_call, metagraph = self.metagraph ) - - def blacklist( self, forward_call: "bittensor.SynapseCall" ) -> Union[ Tuple[bool, str], bool ]: - return self.blacklister.blacklist( forward_call, metagraph = self.metagraph ) - - @classmethod - def config( cls ) -> "bittensor.Config": - parser = argparse.ArgumentParser() - cls.add_args( parser ) - return bittensor.config( parser ) - - @classmethod - def help( cls ): - parser = argparse.ArgumentParser() - cls.add_args(parser) - print( cls.__new__.__doc__ ) - parser.print_help() - - @classmethod - def check_config( cls, config: "bittensor.Config" ): - bittensor.axon.check_config( config ) - bittensor.wallet.check_config( config ) - bittensor.logging.check_config( config ) - bittensor.subtensor.check_config( config ) - full_path = os.path.expanduser( - '{}/{}/{}/{}'.format( config.logging.logging_dir, config.wallet.get('name', bittensor.defaults.wallet.name), - config.wallet.get('hotkey', bittensor.defaults.wallet.hotkey), config.neuron.name ) ) - config.neuron.full_path = os.path.expanduser( full_path ) - if not os.path.exists( config.neuron.full_path ): - os.makedirs( config.neuron.full_path ) - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser, prefix: str = None ): - prefix_str = "" if prefix is None else prefix + "." - parser.add_argument( - '--' + prefix_str + 'netuid', - type = int, - help = 'Subnet netuid', - default = 1 - ) - parser.add_argument( - '--' + prefix_str + 'neuron.name', - type = str, - help = 'Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ', - default = 'openai_prompting_miner' - ) - parser.add_argument( - '--' + prefix_str + 'neuron.blocks_per_epoch', - type = str, - help = 'Blocks until the miner sets weights on chain', - default = 100 - ) - parser.add_argument( - '--' + prefix_str + 'neuron.no_set_weights', - action = 'store_true', - help = 'If True, the model does not set weights.', - default = False - ) - bittensor.wallet.add_args( parser, prefix = prefix ) - bittensor.axon.add_args( parser, prefix = prefix ) - bittensor.subtensor.add_args( parser, prefix = prefix ) - bittensor.logging.add_args( parser, prefix = prefix ) - bittensor.blacklist.add_args( parser, prefix = prefix_str + 'neuron' ) - bittensor.priority.add_args( parser, prefix = prefix_str + 'neuron' ) - - def __init__(self, netuid: int = None, config: "bittensor.Config" = None ): - super_config = config if config != None else BaseMinerNeuron.config() # Grab super (BaseMinerNeuron) config - child_config = self.config() # grab child (Miner) class configs. - self.config = child_config - self.config.merge( super_config ) # Merge the two configs. Child configs override super configs. - self.config.netuid = netuid or self.config.netuid - BaseMinerNeuron.check_config( self.config ) - - # Build objects. - bittensor.logging( config = self.config, logging_dir = self.config.neuron.full_path ) - self.subtensor = bittensor.subtensor( self.config ) - self.wallet = bittensor.wallet( self.config ) - self.metagraph = self.subtensor.metagraph( self.config.netuid ) - self.axon = bittensor.axon( wallet = self.wallet, config = self.config ) - self.blacklister = bittensor.blacklist( config = self.config.neuron ) - self.prioritizer = bittensor.priority( config = self.config.neuron ) - - # Used for backgounr process. - self.is_running = False - self.should_exit = False - self.background_thread = None - - def attach( self, synapse: "bittensor.Synapse" ): - # pass through attach function. - self.axon.attach( synapse ) - - def __enter__(self): - bittensor.logging.trace( 'BaseMinerNeuron.__enter__()' ) - self.start_in_background() - return self - - def __exit__(self, exc_type, exc_value, traceback): - bittensor.logging.trace( 'BaseMinerNeuron.__exit__()' ) - self.stop() - - def start_in_background(self): - if self.is_running: - bittensor.logging.warning( 'The base miner neuron is already running.') - else: - self.should_exit = False - self.background_thread = threading.Thread( target = self.run, daemon = True ) - self.background_thread.start() - self.is_running = True - bittensor.logging.trace( 'Starting the base miner neuron in the background.') - - def stop(self): - if self.is_running: - self.should_exit = True - else: - bittensor.logging.warning( 'The base miner neuron is not running.') - - def run( self ): - bittensor.logging.debug( 'BaseMinerNeuron.run()' ) - - # --- Start the miner. - self.is_running = True - self.wallet.reregister( netuid = self.config.netuid, subtensor = self.subtensor ) - self.axon.start() - self.subtensor.serve_axon( netuid = self.config.netuid, axon = self.axon, wait_for_finalization = False, wait_for_inclusion = False ) #TODO: fix finalization & inclusion - - # --- Run Forever. - last_update = self.subtensor.get_current_block() - retries = 0 - while not self.should_exit: - - # --- Wait until next epoch. - current_block = self.subtensor.get_current_block() - while (current_block - last_update) < self.config.neuron.blocks_per_epoch: - if self.should_exit: continue - time.sleep( 0.1 ) #bittensor.__blocktime__ - current_block = self.subtensor.get_current_block() - last_update = self.subtensor.get_current_block() - - # --- Update the metagraph with the latest network state. - try: - self.metagraph.sync( lite = True ) - uid = self.metagraph.hotkeys.index( self.wallet.hotkey.ss58_address ) - except: - # --- If we fail to sync the metagraph, wait and try again. - if(retries > 8): - bittensor.logging.error( f'Failed to sync metagraph, exiting.') - self.stop() - break - seconds_to_sleep = 5 * 1.5**(retries) - bittensor.logging.error( f'Failed to sync metagraph, retrying in {seconds_to_sleep} seconds.') - time.sleep( seconds_to_sleep ) - retries += 1 - continue - - if(retries > 0): - retries = 0 - - # --- Log performance. - print( - f"[white not bold]{datetime.now():%Y-%m-%d %H:%M:%S}[/white not bold]{' ' * 4} | " - f"{f'UID [bright_cyan]{uid}[/bright_cyan]'.center(16 + len('[bright_cyan][/bright_cyan]'))} | " - f'[dim white not bold] [green]{str(self.metagraph.S[uid].item()):.4}[/green] Stake [/dim white not bold]' - f'[dim white not bold]| [yellow]{str(self.metagraph.trust[uid].item()) :.3}[/yellow] Trust [/dim white not bold]' - f'[dim white not bold]| [green]{str(self.metagraph.incentive[uid].item()):.3}[/green] Incentive [/dim white not bold]') - - # --- Set weights. - if not self.config.neuron.no_set_weights: - try: - # --- query the chain for the most current number of peers on the network - chain_weights = torch.zeros( self.subtensor.subnetwork_n( netuid = self.config.netuid )) - chain_weights[uid] = 1 - did_set = self.subtensor.set_weights( - uids = torch.arange(0, len(chain_weights)), - netuid = self.config.netuid, - weights = chain_weights, - wait_for_inclusion = False, - walle = self.wallet, - version_key = 1 - ) - except: - pass - - self.axon.stop() \ No newline at end of file diff --git a/bittensor/_neuron/base_prompting_miner.py b/bittensor/_neuron/base_prompting_miner.py deleted file mode 100644 index 1bcd155773..0000000000 --- a/bittensor/_neuron/base_prompting_miner.py +++ /dev/null @@ -1,77 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import argparse -import bittensor - -from rich import print -from typing import List, Dict, Union, Tuple -from abc import ABC, abstractmethod - -class BasePromptingMiner( bittensor.BaseMinerNeuron, ABC ): - - @classmethod - @abstractmethod - def add_args( cls, parser: argparse.ArgumentParser ): - ... - - @abstractmethod - def forward( self, messages: List[Dict[str, str]] ) -> str: - ... - - @classmethod - @abstractmethod - def check_config( cls, config: 'bittensor.Config' ): - ... - - @classmethod - def config( cls ) -> "bittensor.Config": - parser = argparse.ArgumentParser() - cls.add_super_args( parser ) - return bittensor.config( parser ) - - @classmethod - def add_super_args( cls, parser: argparse.ArgumentParser ): - """ Add arguments specific to BasePromptingMiner to parser. - """ - cls.add_args(parser) - parser.add_argument( - '--neuron.max_batch_size', - type = int, - help = 'The maximum batch size for forward requests.', - default = -1 - ) - parser.add_argument( - '--neuron.max_sequence_len', - type = int, - help = 'The maximum sequence length for forward requests.', - default = -1 - ) - - def __init__( self, config: "bittensor.Config" = None ): - super( BasePromptingMiner, self ).__init__() - - class Synapse( bittensor.TextPromptingSynapse ): - def priority( _, forward_call: "bittensor.TextPromptingForwardCall" ) -> float: - return self.priority( forward_call ) - def blacklist( _, forward_call: "bittensor.TextPromptingForwardCall" ) -> Union[ Tuple[bool, str], bool ]: - return self.blacklist( forward_call ) - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - def forward( _, messages: List[Dict[str, str]] ) -> str: - return self.forward( messages ) - self.synapse = Synapse( axon = self.axon ) diff --git a/bittensor/_neuron/base_validator.py b/bittensor/_neuron/base_validator.py deleted file mode 100644 index 1542bb450f..0000000000 --- a/bittensor/_neuron/base_validator.py +++ /dev/null @@ -1,167 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import os -import time -import torch -import threading -import argparse -import bittensor - -from rich import print -from typing import List, Dict, Union, Tuple, Optional -from datetime import datetime - -class BaseValidator: - - @classmethod - def config( cls ) -> "bittensor.Config": - parser = argparse.ArgumentParser() - cls.add_args( parser ) - return bittensor.config( parser ) - - @classmethod - def help( cls ): - parser = argparse.ArgumentParser() - cls.add_args(parser) - print( cls.__new__.__doc__ ) - parser.print_help() - - @classmethod - def check_config( cls, config: "bittensor.Config" ): - bittensor.wallet.check_config( config ) - bittensor.logging.check_config( config ) - bittensor.subtensor.check_config( config ) - full_path = os.path.expanduser( - '{}/{}/{}/{}'.format( config.logging.logging_dir, config.wallet.get('name', bittensor.defaults.wallet.name), - config.wallet.get('hotkey', bittensor.defaults.wallet.hotkey), config.neuron.name ) ) - config.neuron.full_path = os.path.expanduser( full_path ) - if not os.path.exists( config.neuron.full_path ): - os.makedirs( config.neuron.full_path ) - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser, prefix: str = None ): - prefix_str = "" if prefix is None else prefix + "." - parser.add_argument( - '--' + prefix_str + 'netuid', - type = int, - help = 'Subnet netuid', - default = 1 - ) - parser.add_argument( - '--' + prefix_str + 'neuron.name', - type = str, - help = 'Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ', - default = 'openai_prompting_miner' - ) - parser.add_argument( - '--' + prefix_str + 'neuron.blocks_per_epoch', - type = str, - help = 'Blocks until the miner sets weights on chain', - default = 100 - ) - parser.add_argument( - '--' + prefix_str + 'neuron.no_set_weights', - action = 'store_true', - help = 'If True, the model does not set weights.', - default = False - ) - bittensor.wallet.add_args( parser, prefix = prefix ) - bittensor.subtensor.add_args( parser, prefix = prefix ) - bittensor.logging.add_args( parser, prefix = prefix ) - - def __init__(self, netuid: int = None, config: "bittensor.Config" = None ): - # Build config. - self.config = config if config != None else BaseValidator.config() - self.config.netuid = netuid or self.config.netuid - BaseValidator.check_config( self.config ) - - # Build objects. - bittensor.logging( config = self.config, logging_dir = self.config.neuron.full_path ) - self.subtensor = bittensor.subtensor( self.config ) - self.wallet = bittensor.wallet( self.config ) - self.metagraph = self.subtensor.metagraph( self.config.netuid ) - - # Used for backgounr process. - self.is_running = False - self.should_exit = False - self.background_thread = None - - def __enter__(self): - bittensor.logging.trace( 'BaseValidator.__enter__()' ) - self.start_in_background() - return self - - def __exit__(self, exc_type, exc_value, traceback): - bittensor.logging.trace( 'BaseValidator.__exit__()' ) - self.stop() - - def start_in_background(self): - if self.is_running: - bittensor.logging.warning( 'The base miner neuron is already running.') - else: - self.should_exit = False - self.background_thread = threading.Thread( target = self.run, daemon = True ) - self.background_thread.start() - self.is_running = True - bittensor.logging.trace( 'Starting the base miner neuron in the background.') - - def stop(self): - if self.is_running: - self.should_exit = True - else: - bittensor.logging.warning( 'The base miner neuron is not running.') - - def run( self ): - bittensor.logging.debug( 'BaseMinBaseValidatorerNeuron.run()' ) - - # --- Start the miner. - self.is_running = True - self.wallet.reregister( netuid = self.config.netuid, subtensor = self.subtensor ) - - # --- Run Forever. - last_update = self.subtensor.get_current_block() - while not self.should_exit: - - # --- Wait until next epoch. - current_block = self.subtensor.get_current_block() - while (current_block - last_update) < self.config.neuron.blocks_per_epoch: - if self.should_exit: continue - time.sleep( 12 ) - current_block = self.subtensor.get_current_block() - last_update = self.subtensor.get_current_block() - - # --- Update the metagraph with the latest network state. - self.metagraph.sync( lite = True ) - uid = self.metagraph.hotkeys.index( self.wallet.hotkey.ss58_address ) - - # --- Set weights. - if not self.config.neuron.no_set_weights: - try: - # --- query the chain for the most current number of peers on the network - chain_weights = torch.zeros( self.subtensor.subnetwork_n( netuid = self.config.netuid )) - chain_weights[uid] = 1 - did_set = self.subtensor.set_weights( - uids = torch.arange(0, len(chain_weights)), - netuid = self.config.netuid, - weights = chain_weights, - wait_for_inclusion = False, - walle = self.wallet, - version_key = 1 - ) - except: - pass \ No newline at end of file diff --git a/bittensor/_proto/bittensor.proto b/bittensor/_proto/bittensor.proto index 597f5e26d2..18f8301d39 100644 --- a/bittensor/_proto/bittensor.proto +++ b/bittensor/_proto/bittensor.proto @@ -3,7 +3,6 @@ syntax = "proto3"; service TextPrompting { rpc Forward (ForwardTextPromptingRequest) returns (ForwardTextPromptingResponse) {} - rpc MultiForward (MultiForwardTextPromptingRequest) returns (MultiForwardTextPromptingResponse) {} rpc Backward (BackwardTextPromptingRequest) returns (BackwardTextPromptingResponse) {} } @@ -23,19 +22,6 @@ message ForwardTextPromptingResponse { string return_message = 4; ReturnCode return_code = 5; } -message MultiForwardTextPromptingRequest { - int32 version = 1; - string hotkey = 2; - repeated string messages = 3; - float timeout = 4; -} -message MultiForwardTextPromptingResponse { - int32 version = 1; - string hotkey = 2; - repeated string multi_completions = 3; - string return_message = 4; - ReturnCode return_code = 5; -} message BackwardTextPromptingRequest { int32 version = 1; string hotkey = 2; diff --git a/bittensor/_proto/bittensor_pb2.py b/bittensor/_proto/bittensor_pb2.py index 2cc1267e25..10e3a699f7 100644 --- a/bittensor/_proto/bittensor_pb2.py +++ b/bittensor/_proto/bittensor_pb2.py @@ -20,7 +20,7 @@ syntax='proto3', serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_pb=b'\n bittensor/_proto/bittensor.proto\"a\n\x1b\x46orwardTextPromptingRequest\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x10\n\x08messages\x18\x03 \x03(\t\x12\x0f\n\x07timeout\x18\x04 \x01(\x02\"\x8b\x01\n\x1c\x46orwardTextPromptingResponse\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x10\n\x08response\x18\x03 \x01(\t\x12\x16\n\x0ereturn_message\x18\x04 \x01(\t\x12 \n\x0breturn_code\x18\x05 \x01(\x0e\x32\x0b.ReturnCode\"f\n MultiForwardTextPromptingRequest\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x10\n\x08messages\x18\x03 \x03(\t\x12\x0f\n\x07timeout\x18\x04 \x01(\x02\"\x99\x01\n!MultiForwardTextPromptingResponse\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x19\n\x11multi_completions\x18\x03 \x03(\t\x12\x16\n\x0ereturn_message\x18\x04 \x01(\t\x12 \n\x0breturn_code\x18\x05 \x01(\x0e\x32\x0b.ReturnCode\"\x85\x01\n\x1c\x42\x61\x63kwardTextPromptingRequest\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x0f\n\x07rewards\x18\x03 \x03(\x02\x12\x10\n\x08messages\x18\x04 \x03(\t\x12\x10\n\x08response\x18\x05 \x01(\t\x12\x0f\n\x07timeout\x18\x06 \x01(\x02\"z\n\x1d\x42\x61\x63kwardTextPromptingResponse\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x16\n\x0ereturn_message\x18\x04 \x01(\t\x12 \n\x0breturn_code\x18\x05 \x01(\x0e\x32\x0b.ReturnCode\"\xac\x01\n\x06Tensor\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06\x62uffer\x18\x02 \x01(\x0c\x12\r\n\x05shape\x18\x03 \x03(\x03\x12\x1f\n\nserializer\x18\x04 \x01(\x0e\x32\x0b.Serializer\x12 \n\x0btensor_type\x18\x05 \x01(\x0e\x32\x0b.TensorType\x12\x18\n\x05\x64type\x18\x06 \x01(\x0e\x32\t.DataType\x12\x15\n\rrequires_grad\x18\x08 \x01(\x08*\xda\x04\n\nReturnCode\x12\x0c\n\x08NoReturn\x10\x00\x12\x0b\n\x07Success\x10\x01\x12\x0b\n\x07Timeout\x10\x02\x12\x0b\n\x07\x42\x61\x63koff\x10\x03\x12\x0f\n\x0bUnavailable\x10\x04\x12\x12\n\x0eNotImplemented\x10\x05\x12\x10\n\x0c\x45mptyRequest\x10\x06\x12\x11\n\rEmptyResponse\x10\x07\x12\x13\n\x0fInvalidResponse\x10\x08\x12\x12\n\x0eInvalidRequest\x10\t\x12\x19\n\x15RequestShapeException\x10\n\x12\x1a\n\x16ResponseShapeException\x10\x0b\x12!\n\x1dRequestSerializationException\x10\x0c\x12\"\n\x1eResponseSerializationException\x10\r\x12#\n\x1fRequestDeserializationException\x10\x0e\x12$\n ResponseDeserializationException\x10\x0f\x12\x15\n\x11NotServingNucleus\x10\x10\x12\x12\n\x0eNucleusTimeout\x10\x11\x12\x0f\n\x0bNucleusFull\x10\x12\x12\x1e\n\x1aRequestIncompatibleVersion\x10\x13\x12\x1f\n\x1bResponseIncompatibleVersion\x10\x14\x12\x11\n\rSenderUnknown\x10\x15\x12\x14\n\x10UnknownException\x10\x16\x12\x13\n\x0fUnauthenticated\x10\x17\x12\x0f\n\x0b\x42\x61\x64\x45ndpoint\x10\x18\x12\x0f\n\x0b\x42lacklisted\x10\x19*&\n\nSerializer\x12\x0b\n\x07MSGPACK\x10\x00\x12\x0b\n\x07\x43MPPACK\x10\x01*2\n\nTensorType\x12\t\n\x05TORCH\x10\x00\x12\x0e\n\nTENSORFLOW\x10\x01\x12\t\n\x05NUMPY\x10\x02*h\n\x08\x44\x61taType\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x0b\n\x07\x46LOAT32\x10\x01\x12\x0b\n\x07\x46LOAT64\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\x08\n\x04UTF8\x10\x05\x12\x0b\n\x07\x46LOAT16\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x32\xff\x01\n\rTextPrompting\x12H\n\x07\x46orward\x12\x1c.ForwardTextPromptingRequest\x1a\x1d.ForwardTextPromptingResponse\"\x00\x12W\n\x0cMultiForward\x12!.MultiForwardTextPromptingRequest\x1a\".MultiForwardTextPromptingResponse\"\x00\x12K\n\x08\x42\x61\x63kward\x12\x1d.BackwardTextPromptingRequest\x1a\x1e.BackwardTextPromptingResponse\"\x00\x62\x06proto3' + serialized_pb=b'\n bittensor/_proto/bittensor.proto\"a\n\x1b\x46orwardTextPromptingRequest\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x10\n\x08messages\x18\x03 \x03(\t\x12\x0f\n\x07timeout\x18\x04 \x01(\x02\"\x8b\x01\n\x1c\x46orwardTextPromptingResponse\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x10\n\x08response\x18\x03 \x01(\t\x12\x16\n\x0ereturn_message\x18\x04 \x01(\t\x12 \n\x0breturn_code\x18\x05 \x01(\x0e\x32\x0b.ReturnCode\"\x85\x01\n\x1c\x42\x61\x63kwardTextPromptingRequest\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x0f\n\x07rewards\x18\x03 \x03(\x02\x12\x10\n\x08messages\x18\x04 \x03(\t\x12\x10\n\x08response\x18\x05 \x01(\t\x12\x0f\n\x07timeout\x18\x06 \x01(\x02\"z\n\x1d\x42\x61\x63kwardTextPromptingResponse\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06hotkey\x18\x02 \x01(\t\x12\x16\n\x0ereturn_message\x18\x04 \x01(\t\x12 \n\x0breturn_code\x18\x05 \x01(\x0e\x32\x0b.ReturnCode\"\xac\x01\n\x06Tensor\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0e\n\x06\x62uffer\x18\x02 \x01(\x0c\x12\r\n\x05shape\x18\x03 \x03(\x03\x12\x1f\n\nserializer\x18\x04 \x01(\x0e\x32\x0b.Serializer\x12 \n\x0btensor_type\x18\x05 \x01(\x0e\x32\x0b.TensorType\x12\x18\n\x05\x64type\x18\x06 \x01(\x0e\x32\t.DataType\x12\x15\n\rrequires_grad\x18\x08 \x01(\x08*\xda\x04\n\nReturnCode\x12\x0c\n\x08NoReturn\x10\x00\x12\x0b\n\x07Success\x10\x01\x12\x0b\n\x07Timeout\x10\x02\x12\x0b\n\x07\x42\x61\x63koff\x10\x03\x12\x0f\n\x0bUnavailable\x10\x04\x12\x12\n\x0eNotImplemented\x10\x05\x12\x10\n\x0c\x45mptyRequest\x10\x06\x12\x11\n\rEmptyResponse\x10\x07\x12\x13\n\x0fInvalidResponse\x10\x08\x12\x12\n\x0eInvalidRequest\x10\t\x12\x19\n\x15RequestShapeException\x10\n\x12\x1a\n\x16ResponseShapeException\x10\x0b\x12!\n\x1dRequestSerializationException\x10\x0c\x12\"\n\x1eResponseSerializationException\x10\r\x12#\n\x1fRequestDeserializationException\x10\x0e\x12$\n ResponseDeserializationException\x10\x0f\x12\x15\n\x11NotServingNucleus\x10\x10\x12\x12\n\x0eNucleusTimeout\x10\x11\x12\x0f\n\x0bNucleusFull\x10\x12\x12\x1e\n\x1aRequestIncompatibleVersion\x10\x13\x12\x1f\n\x1bResponseIncompatibleVersion\x10\x14\x12\x11\n\rSenderUnknown\x10\x15\x12\x14\n\x10UnknownException\x10\x16\x12\x13\n\x0fUnauthenticated\x10\x17\x12\x0f\n\x0b\x42\x61\x64\x45ndpoint\x10\x18\x12\x0f\n\x0b\x42lacklisted\x10\x19*&\n\nSerializer\x12\x0b\n\x07MSGPACK\x10\x00\x12\x0b\n\x07\x43MPPACK\x10\x01*2\n\nTensorType\x12\t\n\x05TORCH\x10\x00\x12\x0e\n\nTENSORFLOW\x10\x01\x12\t\n\x05NUMPY\x10\x02*h\n\x08\x44\x61taType\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x0b\n\x07\x46LOAT32\x10\x01\x12\x0b\n\x07\x46LOAT64\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\x08\n\x04UTF8\x10\x05\x12\x0b\n\x07\x46LOAT16\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x32\xa6\x01\n\rTextPrompting\x12H\n\x07\x46orward\x12\x1c.ForwardTextPromptingRequest\x1a\x1d.ForwardTextPromptingResponse\"\x00\x12K\n\x08\x42\x61\x63kward\x12\x1d.BackwardTextPromptingRequest\x1a\x1e.BackwardTextPromptingResponse\"\x00\x62\x06proto3' ) _RETURNCODE = _descriptor.EnumDescriptor( @@ -163,8 +163,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=973, - serialized_end=1575, + serialized_start=713, + serialized_end=1315, ) _sym_db.RegisterEnumDescriptor(_RETURNCODE) @@ -189,8 +189,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1577, - serialized_end=1615, + serialized_start=1317, + serialized_end=1355, ) _sym_db.RegisterEnumDescriptor(_SERIALIZER) @@ -220,8 +220,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1617, - serialized_end=1667, + serialized_start=1357, + serialized_end=1407, ) _sym_db.RegisterEnumDescriptor(_TENSORTYPE) @@ -276,8 +276,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1669, - serialized_end=1773, + serialized_start=1409, + serialized_end=1513, ) _sym_db.RegisterEnumDescriptor(_DATATYPE) @@ -437,119 +437,6 @@ ) -_MULTIFORWARDTEXTPROMPTINGREQUEST = _descriptor.Descriptor( - name='MultiForwardTextPromptingRequest', - full_name='MultiForwardTextPromptingRequest', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='version', full_name='MultiForwardTextPromptingRequest.version', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='hotkey', full_name='MultiForwardTextPromptingRequest.hotkey', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=b"".decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='messages', full_name='MultiForwardTextPromptingRequest.messages', index=2, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='timeout', full_name='MultiForwardTextPromptingRequest.timeout', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=277, - serialized_end=379, -) - - -_MULTIFORWARDTEXTPROMPTINGRESPONSE = _descriptor.Descriptor( - name='MultiForwardTextPromptingResponse', - full_name='MultiForwardTextPromptingResponse', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='version', full_name='MultiForwardTextPromptingResponse.version', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='hotkey', full_name='MultiForwardTextPromptingResponse.hotkey', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=b"".decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='multi_completions', full_name='MultiForwardTextPromptingResponse.multi_completions', index=2, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='return_message', full_name='MultiForwardTextPromptingResponse.return_message', index=3, - number=4, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=b"".decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='return_code', full_name='MultiForwardTextPromptingResponse.return_code', index=4, - number=5, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=382, - serialized_end=535, -) - - _BACKWARDTEXTPROMPTINGREQUEST = _descriptor.Descriptor( name='BackwardTextPromptingRequest', full_name='BackwardTextPromptingRequest', @@ -612,8 +499,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=538, - serialized_end=671, + serialized_start=278, + serialized_end=411, ) @@ -665,8 +552,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=673, - serialized_end=795, + serialized_start=413, + serialized_end=535, ) @@ -739,20 +626,17 @@ extension_ranges=[], oneofs=[ ], - serialized_start=798, - serialized_end=970, + serialized_start=538, + serialized_end=710, ) _FORWARDTEXTPROMPTINGRESPONSE.fields_by_name['return_code'].enum_type = _RETURNCODE -_MULTIFORWARDTEXTPROMPTINGRESPONSE.fields_by_name['return_code'].enum_type = _RETURNCODE _BACKWARDTEXTPROMPTINGRESPONSE.fields_by_name['return_code'].enum_type = _RETURNCODE _TENSOR.fields_by_name['serializer'].enum_type = _SERIALIZER _TENSOR.fields_by_name['tensor_type'].enum_type = _TENSORTYPE _TENSOR.fields_by_name['dtype'].enum_type = _DATATYPE DESCRIPTOR.message_types_by_name['ForwardTextPromptingRequest'] = _FORWARDTEXTPROMPTINGREQUEST DESCRIPTOR.message_types_by_name['ForwardTextPromptingResponse'] = _FORWARDTEXTPROMPTINGRESPONSE -DESCRIPTOR.message_types_by_name['MultiForwardTextPromptingRequest'] = _MULTIFORWARDTEXTPROMPTINGREQUEST -DESCRIPTOR.message_types_by_name['MultiForwardTextPromptingResponse'] = _MULTIFORWARDTEXTPROMPTINGRESPONSE DESCRIPTOR.message_types_by_name['BackwardTextPromptingRequest'] = _BACKWARDTEXTPROMPTINGREQUEST DESCRIPTOR.message_types_by_name['BackwardTextPromptingResponse'] = _BACKWARDTEXTPROMPTINGRESPONSE DESCRIPTOR.message_types_by_name['Tensor'] = _TENSOR @@ -776,20 +660,6 @@ }) _sym_db.RegisterMessage(ForwardTextPromptingResponse) -MultiForwardTextPromptingRequest = _reflection.GeneratedProtocolMessageType('MultiForwardTextPromptingRequest', (_message.Message,), { - 'DESCRIPTOR' : _MULTIFORWARDTEXTPROMPTINGREQUEST, - '__module__' : 'bittensor._proto.bittensor_pb2' - # @@protoc_insertion_point(class_scope:MultiForwardTextPromptingRequest) - }) -_sym_db.RegisterMessage(MultiForwardTextPromptingRequest) - -MultiForwardTextPromptingResponse = _reflection.GeneratedProtocolMessageType('MultiForwardTextPromptingResponse', (_message.Message,), { - 'DESCRIPTOR' : _MULTIFORWARDTEXTPROMPTINGRESPONSE, - '__module__' : 'bittensor._proto.bittensor_pb2' - # @@protoc_insertion_point(class_scope:MultiForwardTextPromptingResponse) - }) -_sym_db.RegisterMessage(MultiForwardTextPromptingResponse) - BackwardTextPromptingRequest = _reflection.GeneratedProtocolMessageType('BackwardTextPromptingRequest', (_message.Message,), { 'DESCRIPTOR' : _BACKWARDTEXTPROMPTINGREQUEST, '__module__' : 'bittensor._proto.bittensor_pb2' @@ -820,8 +690,8 @@ index=0, serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_start=1776, - serialized_end=2031, + serialized_start=1516, + serialized_end=1682, methods=[ _descriptor.MethodDescriptor( name='Forward', @@ -833,20 +703,10 @@ serialized_options=None, create_key=_descriptor._internal_create_key, ), - _descriptor.MethodDescriptor( - name='MultiForward', - full_name='TextPrompting.MultiForward', - index=1, - containing_service=None, - input_type=_MULTIFORWARDTEXTPROMPTINGREQUEST, - output_type=_MULTIFORWARDTEXTPROMPTINGRESPONSE, - serialized_options=None, - create_key=_descriptor._internal_create_key, - ), _descriptor.MethodDescriptor( name='Backward', full_name='TextPrompting.Backward', - index=2, + index=1, containing_service=None, input_type=_BACKWARDTEXTPROMPTINGREQUEST, output_type=_BACKWARDTEXTPROMPTINGRESPONSE, diff --git a/bittensor/_proto/bittensor_pb2_grpc.py b/bittensor/_proto/bittensor_pb2_grpc.py index 4af9882962..e9ea07e3a2 100644 --- a/bittensor/_proto/bittensor_pb2_grpc.py +++ b/bittensor/_proto/bittensor_pb2_grpc.py @@ -19,11 +19,6 @@ def __init__(self, channel): request_serializer=bittensor_dot___proto_dot_bittensor__pb2.ForwardTextPromptingRequest.SerializeToString, response_deserializer=bittensor_dot___proto_dot_bittensor__pb2.ForwardTextPromptingResponse.FromString, ) - self.MultiForward = channel.unary_unary( - '/TextPrompting/MultiForward', - request_serializer=bittensor_dot___proto_dot_bittensor__pb2.MultiForwardTextPromptingRequest.SerializeToString, - response_deserializer=bittensor_dot___proto_dot_bittensor__pb2.MultiForwardTextPromptingResponse.FromString, - ) self.Backward = channel.unary_unary( '/TextPrompting/Backward', request_serializer=bittensor_dot___proto_dot_bittensor__pb2.BackwardTextPromptingRequest.SerializeToString, @@ -40,12 +35,6 @@ def Forward(self, request, context): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def MultiForward(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - def Backward(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) @@ -60,11 +49,6 @@ def add_TextPromptingServicer_to_server(servicer, server): request_deserializer=bittensor_dot___proto_dot_bittensor__pb2.ForwardTextPromptingRequest.FromString, response_serializer=bittensor_dot___proto_dot_bittensor__pb2.ForwardTextPromptingResponse.SerializeToString, ), - 'MultiForward': grpc.unary_unary_rpc_method_handler( - servicer.MultiForward, - request_deserializer=bittensor_dot___proto_dot_bittensor__pb2.MultiForwardTextPromptingRequest.FromString, - response_serializer=bittensor_dot___proto_dot_bittensor__pb2.MultiForwardTextPromptingResponse.SerializeToString, - ), 'Backward': grpc.unary_unary_rpc_method_handler( servicer.Backward, request_deserializer=bittensor_dot___proto_dot_bittensor__pb2.BackwardTextPromptingRequest.FromString, @@ -97,23 +81,6 @@ def Forward(request, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - @staticmethod - def MultiForward(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/TextPrompting/MultiForward', - bittensor_dot___proto_dot_bittensor__pb2.MultiForwardTextPromptingRequest.SerializeToString, - bittensor_dot___proto_dot_bittensor__pb2.MultiForwardTextPromptingResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - @staticmethod def Backward(request, target, diff --git a/bittensor/_synapse/text_prompting/synapse.py b/bittensor/_synapse/text_prompting/synapse.py index 77af418f22..779e4d2b0c 100644 --- a/bittensor/_synapse/text_prompting/synapse.py +++ b/bittensor/_synapse/text_prompting/synapse.py @@ -20,44 +20,10 @@ import bittensor from typing import List, Dict, Union, Callable -from abc import ABC, abstractmethod +from abc import abstractmethod import json -class SynapseForwardMulti( bittensor.SynapseCall ): - name: str = "text_prompting_forward_multi" - is_forward: bool = True - multi_completions: List[ str ] = [""] - - def __init__( - self, - synapse: "bittensor.TextPromptingSynapseMulti", - request_proto: bittensor.proto.MultiForwardTextPromptingRequest, - multi_forward_callback: Callable, - context: grpc.ServicerContext - ): - super().__init__( synapse = synapse, request_proto = request_proto, context = context ) - self.messages: List[ Dict[str, str] ] = request_proto.messages - self.formatted_messages = [ json.loads(message) for message in self.messages ] - self.multi_forward_callback = multi_forward_callback - - def apply( self ): - bittensor.logging.trace( "SynapseForward.apply()" ) - self.multi_completions = self.multi_forward_callback( messages = self.formatted_messages ) - bittensor.logging.trace( "SynapseForward.apply() = ", self.multi_completions ) - - def get_response_proto( self ) -> bittensor.proto.MultiForwardTextPromptingResponse: - bittensor.logging.trace( "SynapseForward.get_response_proto()") - return bittensor.MultiForwardTextPromptingResponse( multi_completions = self.multi_completions ) - - def get_inputs_shape(self) -> Union[torch.Size, None]: - bittensor.logging.trace( "SynapseForward.get_inputs_shape()" ) - return torch.Size( [ len(message) for message in self.messages ] ) - - def get_outputs_shape(self) -> Union[torch.Size, None]: - bittensor.logging.trace( "SynapseForward.get_outputs_shape()" ) - return torch.Size( [ len(self.multi_completions) ] ) - class SynapseForward( bittensor.SynapseCall ): name: str = "text_prompting_forward" is_forward: bool = True @@ -137,8 +103,6 @@ def __init__(self, axon: "bittensor.axon" ): @abstractmethod def forward( self, messages: List[Dict[str, str]] ) -> str: ... - def multi_forward( self, messages: List[Dict[str, str]] ) -> List[ str ]: ... - @abstractmethod def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: ... @@ -147,11 +111,6 @@ def Forward( self, request: bittensor.proto.ForwardTextPromptingRequest, context bittensor.logging.trace( 'Forward: {} '.format( call ) ) return self.apply( call = call ) - def MultiForward( self, request: bittensor.proto.MultiForwardTextPromptingRequest, context: grpc.ServicerContext ) -> bittensor.proto.MultiForwardTextPromptingResponse: - call = SynapseForwardMulti( self, request, self.multi_forward, context ) - bittensor.logging.trace( 'MultiForward: {} '.format( call ) ) - return self.apply( call = call ) - def Backward( self, request: bittensor.proto.BackwardTextPromptingRequest, context: grpc.ServicerContext ) -> bittensor.proto.BackwardTextPromptingResponse: call = SynapseBackward( self, request, self.backward, context ) bittensor.logging.trace( 'Backward: {}'.format( call ) ) diff --git a/examples/text_prompting.py b/examples/text_prompting.py deleted file mode 100644 index 9329593804..0000000000 --- a/examples/text_prompting.py +++ /dev/null @@ -1,66 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict, Union, Tuple - -bittensor.logging( bittensor.logging.config() ) - -class Synapse( bittensor.TextPromptingSynapse ): - def priority(self, forward_call: "bittensor.TextPromptingForwardCall") -> float: - return 0.0 - - def blacklist(self, forward_call: "bittensor.TextPromptingForwardCall") -> Union[ Tuple[bool, str], bool ]: - return False - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: - pass - - def forward(self, messages: List[Dict[str, str]]) -> str: - return "hello im a chat bot." - - def multi_forward(self, messages: List[Dict[str, str]]) -> List[ str ]: - return ["hello im a chat bot.", "my name is bob" ] - -# Create a mock wallet. -wallet = bittensor.wallet().create_if_non_existent() -axon = bittensor.axon( wallet = wallet, port = 9090, external_ip = "127.0.0.1" ) - -dendrite = bittensor.text_prompting( axon = axon, keypair = wallet.hotkey ) -synapse = Synapse( axon = axon ) -axon.start() - - -forward_call = dendrite.forward( - roles = ['system', 'assistant'], - messages = ['you are chat bot', 'what is the whether'], - timeout = 1e6 -) -print ( forward_call ) -print ( 'success', forward_call.is_success, 'failed', forward_call.did_fail, 'timedout', forward_call.did_timeout ) -print ( 'completion', forward_call.completion ) - - -multi_forward_call = dendrite.multi_forward( - roles = ['system', 'assistant'], - messages = ['you are chat bot', 'what is the whether'], - timeout = 1e6 -) -print ( multi_forward_call ) -print ( 'success', multi_forward_call.is_success, 'failed', multi_forward_call.did_fail, 'timedout', multi_forward_call.did_timeout ) -print ( 'completions', multi_forward_call.multi_completions ) diff --git a/neurons/text/prompting/miners/AI21/README.md b/neurons/text/prompting/miners/AI21/README.md deleted file mode 100644 index cc6a58d43d..0000000000 --- a/neurons/text/prompting/miners/AI21/README.md +++ /dev/null @@ -1,114 +0,0 @@ -## AI21 Miner -AI21 Language Model Serving with BitTensor -This code is for running a language model powered by AI21 through the BitTensor framework. - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/AI21/requirements.txt -python3 neurons/text/prompting/miners/AI21/miner.py --ai21.api_key -``` - -# Full Usage -``` -usage: neuron.py [-h] --ai21.api_key AI21.API_KEY [--ai21.model_name AI21.MODEL_NAME] [--ai21.stop AI21.STOP] [--netuid NETUID] - [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --ai21.api_key AI21.API_KEY - AI21 API key. - --ai21.model_name AI21.MODEL_NAME - Name of the model. - --ai21.stop AI21.STOP - Stop tokens. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. - ``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/AI21/neuron.py b/neurons/text/prompting/miners/AI21/neuron.py deleted file mode 100644 index e9f33ae090..0000000000 --- a/neurons/text/prompting/miners/AI21/neuron.py +++ /dev/null @@ -1,69 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import argparse -import bittensor - -from typing import List, Dict -from langchain.llms import AI21 - -class AI21Miner( bittensor.BasePromptingMiner ): - - @classmethod - def check_config( cls, config: 'bittensor.Config' ): - assert config.ai21.api_key != None, 'the miner requires passing --ai21.api_key as an argument of the config.' - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument('--ai21.api_key', type=str, help='AI21 API key.', required=True) - parser.add_argument('--ai21.model_name', type=str, help='Name of the model.', default='j2-jumbo-instruct') - parser.add_argument('--ai21.stop', help='Stop tokens.', default=['user: ', 'bot: ', 'system: ']) - - def __init__( self ): - super( AI21Miner, self ).__init__() - print ( self.config ) - - bittensor.logging.info( 'Loading AI21 Model...' ) - self.model = AI21( - model = self.config.ai21.model_name, - ai21_api_key = self.config.ai21.api_key, - stop = self.config.ai21.stop - ) - bittensor.logging.info( 'Model loaded!' ) - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - - @staticmethod - def _process_history( history: List[Dict[str, str]] ) -> str: - processed_history = '' - for message in history: - if message['role'] == 'system': - processed_history += 'system: ' + message['content'] + '\n' - if message['role'] == 'assistant': - processed_history += 'assistant: ' + message['content'] + '\n' - if message['role'] == 'user': - processed_history += 'user: ' + message['content'] + '\n' - return processed_history - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self._process_history(messages) - resp = self.model(history) - return resp - -if __name__ == "__main__": - bittensor.utils.version_checking() - AI21Miner().run() diff --git a/neurons/text/prompting/miners/AI21/requirements.txt b/neurons/text/prompting/miners/AI21/requirements.txt deleted file mode 100644 index e7f08b2136..0000000000 --- a/neurons/text/prompting/miners/AI21/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -langchain \ No newline at end of file diff --git a/neurons/text/prompting/miners/AlephAlpha/README.md b/neurons/text/prompting/miners/AlephAlpha/README.md deleted file mode 100644 index da66c18d70..0000000000 --- a/neurons/text/prompting/miners/AlephAlpha/README.md +++ /dev/null @@ -1,124 +0,0 @@ -## AlephAlpha Miner -AlephAlpha Language Model Serving with BitTensor -This code is for running a language model powered by AlephAlpha through the BitTensor framework. - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/AlephAlpha/requirements.txt -python3 neurons/text/prompting/miners/AlephAlpha/miner.py --aleph.api_key -``` - -# Full Usage -``` -usage: neuron.py [-h] --aleph.api_key ALEPH.API_KEY [--aleph.model ALEPH.MODEL] [--aleph.maximum_tokens ALEPH.MAXIMUM_TOKENS] - [--aleph.temperature ALEPH.TEMPERATURE] [--aleph.stop_sequences ALEPH.STOP_SEQUENCES] [--aleph.top_k ALEPH.TOP_K] - [--aleph.top_p ALEPH.TOP_P] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --aleph.api_key ALEPH.API_KEY - AlephAlpha API key. - --aleph.model ALEPH.MODEL - Model name to use. - --aleph.maximum_tokens ALEPH.MAXIMUM_TOKENS - The maximum number of tokens to be generated. - --aleph.temperature ALEPH.TEMPERATURE - A non-negative float that tunes the degree of randomness in generation. - --aleph.stop_sequences ALEPH.STOP_SEQUENCES - Stop tokens. - --aleph.top_k ALEPH.TOP_K - Number of most likely tokens to consider at each step. - --aleph.top_p ALEPH.TOP_P - Total probability mass of tokens to consider at each step. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/AlephAlpha/neuron.py b/neurons/text/prompting/miners/AlephAlpha/neuron.py deleted file mode 100644 index 3ff6519e1f..0000000000 --- a/neurons/text/prompting/miners/AlephAlpha/neuron.py +++ /dev/null @@ -1,82 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import argparse -import bittensor -from rich import print -from typing import List, Dict - -from langchain.llms import AlephAlpha - -class AlephAlphaMiner( bittensor.BasePromptingMiner ): - - @classmethod - def check_config( cls, config: 'bittensor.Config' ): - assert config.aleph.api_key != None, 'the miner requires passing --aleph.api_key as an argument of the config.' - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument('--aleph.api_key', type=str, help='AlephAlpha API key.', required=True) - parser.add_argument('--aleph.model', type=str, help='Model name to use.', default='luminous-base') - parser.add_argument('--aleph.maximum_tokens', type=int, help='The maximum number of tokens to be generated.', default=64) - parser.add_argument('--aleph.temperature', type=float, help='A non-negative float that tunes the degree of randomness in generation.', default=0.0) - parser.add_argument('--aleph.stop_sequences', type=List[str], help='Stop tokens.', default=['user: ', 'bot: ', 'system: ']) - parser.add_argument('--aleph.top_k', type=int, help='Number of most likely tokens to consider at each step.', default=0) - parser.add_argument('--aleph.top_p', type=float, help='Total probability mass of tokens to consider at each step.', default=0.0) - - def __init__( self ): - super( AlephAlphaMiner, self ).__init__() - print ( self.config ) - - self.model = AlephAlpha( - aleph_alpha_api_key = self.config.aleph.api_key, - model = self.config.aleph.model, - maximum_tokens = self.config.aleph.maximum_tokens, - temperature = self.config.aleph.temperature, - top_k = self.config.aleph.top_k, - top_p = self.config.aleph.top_p, - stop_sequences = self.config.aleph.stop_sequences - ) - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - - @staticmethod - def _process_history( history: List[Dict[str, str]] ) -> str: - processed_history = '' - for message in history: - if message['role'] == 'system': - processed_history += 'system: ' + message['content'] + '\n' - if message['role'] == 'assistant': - processed_history += 'assistant: ' + message['content'] + '\n' - if message['role'] == 'user': - processed_history += 'user: ' + message['content'] + '\n' - return processed_history - - def blacklist(self, forward_call: 'bittensor.BittensorCall' ): - return False - - def forward( self, messages: List[Dict[str, str]] ) -> str: - bittensor.logging.info('messages', str(messages)) - history = self._process_history(messages) - bittensor.logging.info('history', str(history)) - resp = self.model(history) - bittensor.logging.info('response', str(resp)) - return resp - -if __name__ == "__main__": - bittensor.utils.version_checking() - AlephAlphaMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/AlephAlpha/requiremenets.txt b/neurons/text/prompting/miners/AlephAlpha/requiremenets.txt deleted file mode 100644 index 110dc27b24..0000000000 --- a/neurons/text/prompting/miners/AlephAlpha/requiremenets.txt +++ /dev/null @@ -1,2 +0,0 @@ -aleph_alpha_client -langchain \ No newline at end of file diff --git a/neurons/text/prompting/miners/cerebras/README.md b/neurons/text/prompting/miners/cerebras/README.md deleted file mode 100644 index 5bb5f52505..0000000000 --- a/neurons/text/prompting/miners/cerebras/README.md +++ /dev/null @@ -1,117 +0,0 @@ -## Cerebras Miner -Cerebras 13B Language Model Serving with BitTensor -This code is for running a language model powered by Cerebrus through the BitTensor framework. - -# Example Usage -``` -python3 neurons/text/prompting/miners/cerebras/neuron.py -``` - -# Full Usage -``` -usage: neuron.py [-h] [--cerebras.device CEREBRAS.DEVICE] [--cerebras.max_length CEREBRAS.MAX_LENGTH] [--cerebras.do_sample] - [--cerebras.no_repeat_ngram_size CEREBRAS.NO_REPEAT_NGRAM_SIZE] [--cerebras.model_size {1.3B,2.7B,6.7B,13B}] - [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --cerebras.device CEREBRAS.DEVICE - Device to load model - --cerebras.max_length CEREBRAS.MAX_LENGTH - The maximum length (in tokens) of the generated text. - --cerebras.do_sample Whether to use sampling or not (if not, uses greedy decoding). - --cerebras.no_repeat_ngram_size CEREBRAS.NO_REPEAT_NGRAM_SIZE - The size of the n-grams to avoid repeating in the generated text. - --cerebras.model_size {1.3B,2.7B,6.7B,13B} - Model size to use. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. - ``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/cerebras/neuron.py b/neurons/text/prompting/miners/cerebras/neuron.py deleted file mode 100644 index 714c3baac7..0000000000 --- a/neurons/text/prompting/miners/cerebras/neuron.py +++ /dev/null @@ -1,76 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# General. -import argparse -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline - -class CerebrasMiner( bittensor.BasePromptingMiner ): - - @classmethod - def check_config( cls, config: 'bittensor.Config' ): - pass - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument('--cerebras.device', type=str, help='Device to load model', default="cuda") - parser.add_argument('--cerebras.max_length', type=int, default=50, help='The maximum length (in tokens) of the generated text.') - parser.add_argument('--cerebras.do_sample', action='store_true', default=False, help='Whether to use sampling or not (if not, uses greedy decoding).') - parser.add_argument('--cerebras.no_repeat_ngram_size', type=int, default=2, help='The size of the n-grams to avoid repeating in the generated text.') - parser.add_argument('--cerebras.model_size', type=str, choices=['1.3B', '2.7B', '6.7B', '13B'], default="1.3B", help='Model size to use.') - - def __init__( self ): - super( CerebrasMiner, self ).__init__() - print ( self.config ) - - bittensor.logging.info( "Loading Cerebras GPT {} model...".format( self.config.cerebras.model_size) ) - model = AutoModelForCausalLM.from_pretrained( "cerebras/Cerebras-GPT-{}".format( self.config.cerebras.model_size) ) - tokenizer = AutoTokenizer.from_pretrained( "cerebras/Cerebras-GPT-{}".format( self.config.cerebras.model_size) ) - - self.pipe = pipeline( - "text-generation", - model = model, - tokenizer = tokenizer, - device = 0, - do_sample = False, - max_new_tokens = self.config.cerebras.max_length, - no_repeat_ngram_size = self.config.cerebras.no_repeat_ngram_size - ) - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - - @staticmethod - def _process_history( history: List[Dict[str, str]] ) -> str: - processed_history = '' - for message in history: - if message['role'] == 'system': - processed_history += 'system: ' + message['content'] + '\n' - if message['role'] == 'assistant': - processed_history += 'assistant: ' + message['content'] + '\n' - if message['role'] == 'user': - processed_history += 'user: ' + message['content'] + '\n' - return processed_history - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self._process_history(messages) - return self.pipe( history )[0]['generated_text'].split(':')[-1].replace( str( history ), "") - -if __name__ == "__main__": - bittensor.utils.version_checking() - CerebrasMiner().run() diff --git a/neurons/text/prompting/miners/cohere/README.md b/neurons/text/prompting/miners/cohere/README.md deleted file mode 100644 index 03f971646a..0000000000 --- a/neurons/text/prompting/miners/cohere/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# Cohere Miner -This repository contains the implementation of a language model server using the Cohere API. The model is integrated into the Bittensor network, allowing it to serve as a Bittensor neuron. - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/cohere/requirements.txt -python3 neurons/text/prompting/miners/cohere/neuron.py --cohere.api_key -``` - -# Full Usage -``` -usage: neuron.py [-h] [--cohere.model_name COHERE.MODEL_NAME] [--cohere.max_tokens COHERE.MAX_TOKENS] - [--cohere.temperature COHERE.TEMPERATURE] [--cohere.k COHERE.K] [--cohere.p COHERE.P] - [--cohere.frequency_penalty COHERE.FREQUENCY_PENALTY] [--cohere.presence_penalty COHERE.PRESENCE_PENALTY] - [--cohere.truncate COHERE.TRUNCATE] [--cohere.stop COHERE.STOP] --cohere.api_key COHERE.API_KEY [--netuid NETUID] - [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --cohere.model_name COHERE.MODEL_NAME - Name of the model. - --cohere.max_tokens COHERE.MAX_TOKENS - Number of tokens to generate. - --cohere.temperature COHERE.TEMPERATURE - Temperature of generation. - --cohere.k COHERE.K Number of most likely tokens to consider at each step. - --cohere.p COHERE.P Total probability mass of tokens to consider at each step. - --cohere.frequency_penalty COHERE.FREQUENCY_PENALTY - Penalizes repeated tokens according to frequency. - --cohere.presence_penalty COHERE.PRESENCE_PENALTY - Penalizes repeated tokens. - --cohere.truncate COHERE.TRUNCATE - Specify how the client handles inputs longer than the maximum token length: Truncate from START, END or NONE - --cohere.stop COHERE.STOP - List of tokens to stop generation on. - --cohere.api_key COHERE.API_KEY - API key for Cohere. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/cohere/neuron.py b/neurons/text/prompting/miners/cohere/neuron.py deleted file mode 100644 index a4a143313d..0000000000 --- a/neurons/text/prompting/miners/cohere/neuron.py +++ /dev/null @@ -1,81 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# General. -import json -import argparse -import bittensor -from typing import List, Dict -from langchain.llms import Cohere - -class CohereMiner( bittensor.BasePromptingMiner ): - - @classmethod - def check_config( cls, config: 'bittensor.Config' ): - assert config.cohere.api_key != None, 'the miner requires passing --cohere.api_key as an argument of the config.' - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument('--cohere.model_name', type=str, help='Name of the model.', default='command-xlarge-nightly') - parser.add_argument('--cohere.max_tokens', type=int, help='Number of tokens to generate.', default=256) - parser.add_argument('--cohere.temperature', type=float, help='Temperature of generation.', default=0.75) - parser.add_argument('--cohere.k', type=int, help='Number of most likely tokens to consider at each step.', default=0) - parser.add_argument('--cohere.p', type=int, help='Total probability mass of tokens to consider at each step.', default=1) - parser.add_argument('--cohere.frequency_penalty', type=float, help='Penalizes repeated tokens according to frequency.', default=0.0) - parser.add_argument('--cohere.presence_penalty', type=float, help='Penalizes repeated tokens.', default=0.0) - parser.add_argument('--cohere.truncate', type=str, help='Specify how the client handles inputs longer than the maximum token length: Truncate from START, END or NONE', default=None) - parser.add_argument('--cohere.stop', type=str, help='List of tokens to stop generation on.', default=None) - parser.add_argument('--cohere.api_key', type=str, help='API key for Cohere.', required=True) - - def __init__( self ): - super( CohereMiner, self ).__init__() - print ( self.config ) - - self.model = Cohere( - model=self.config.cohere.model_name, - cohere_api_key=self.config.cohere.api_key, - max_tokens=self.config.cohere.max_tokens, - temperature=self.config.cohere.temperature, - k=self.config.cohere.k, - p=self.config.cohere.p, - frequency_penalty=self.config.cohere.frequency_penalty, - presence_penalty=self.config.cohere.presence_penalty, - truncate=self.config.cohere.truncate, - stop=self.config.cohere.stop, - ) - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - - @staticmethod - def _process_history( history: List[Dict[str, str]] ) -> str: - processed_history = '' - for message in history: - if message['role'] == 'system': - processed_history += 'system: ' + message['content'] + '\n' - if message['role'] == 'assistant': - processed_history += 'assistant: ' + message['content'] + '\n' - if message['role'] == 'user': - processed_history += 'user: ' + message['content'] + '\n' - return processed_history - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self._process_history( messages ) - return self.model( history ) - -if __name__ == "__main__": - bittensor.utils.version_checking() - CohereMiner().run() diff --git a/neurons/text/prompting/miners/cohere/requirements.txt b/neurons/text/prompting/miners/cohere/requirements.txt deleted file mode 100644 index 889f2e0a9c..0000000000 --- a/neurons/text/prompting/miners/cohere/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -cohere \ No newline at end of file diff --git a/neurons/text/prompting/miners/gooseai/README.md b/neurons/text/prompting/miners/gooseai/README.md deleted file mode 100644 index 55c4ae8e42..0000000000 --- a/neurons/text/prompting/miners/gooseai/README.md +++ /dev/null @@ -1,149 +0,0 @@ -# GooseAI Bittensor Miner -This repository contains a Bittensor Miner that uses GooseAI's endpoint. The miner connects to the Bittensor network, registers its wallet, and serves a GooseAI model to the network. - -## Prerequisites - -- Python 3.8+ -- langchain - -## Installation - -1. Clone the repository -2. Install the required packages with `pip install -r requirements.txt` -3. Set your GooseAI API key in the `api_key` argument when running the script - -For more configuration options related to the wallet, axon, subtensor, logging, and metagraph, please refer to the Bittensor documentation. - -## Example Usage - -To run the GooseAI Bittensor Miner with default settings, use the following command: - -``` -python3 -m pip install -r neurons/text/prompting/miners/gooseai/requirements.txt -python3 neurons/text/prompting/miners/gooseai/neuron.py --gooseai.api_key -``` - -# Full Usage -``` -usage: neuron.py [-h] --gooseai.api_key GOOSEAI.API_KEY [--gooseai.model_name GOOSEAI.MODEL_NAME] - [--gooseai.temperature GOOSEAI.TEMPERATURE] [--gooseai.max_tokens GOOSEAI.MAX_TOKENS] [--gooseai.top_p GOOSEAI.TOP_P] - [--gooseai.min_tokens GOOSEAI.MIN_TOKENS] [--gooseai.frequency_penalty GOOSEAI.FREQUENCY_PENALTY] - [--gooseai.presence_penalty GOOSEAI.PRESENCE_PENALTY] [--gooseai.n GOOSEAI.N] [--gooseai.model_kwargs GOOSEAI.MODEL_KWARGS] - [--gooseai.logit_bias GOOSEAI.LOGIT_BIAS] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --gooseai.api_key GOOSEAI.API_KEY - GooseAI api key required. - --gooseai.model_name GOOSEAI.MODEL_NAME - Model name to use - --gooseai.temperature GOOSEAI.TEMPERATURE - What sampling temperature to use - --gooseai.max_tokens GOOSEAI.MAX_TOKENS - The maximum number of tokens to generate in the completion - --gooseai.top_p GOOSEAI.TOP_P - Total probability mass of tokens to consider at each step - --gooseai.min_tokens GOOSEAI.MIN_TOKENS - The minimum number of tokens to generate in the completion - --gooseai.frequency_penalty GOOSEAI.FREQUENCY_PENALTY - Penalizes repeated tokens according to frequency - --gooseai.presence_penalty GOOSEAI.PRESENCE_PENALTY - Penalizes repeated tokens - --gooseai.n GOOSEAI.N - How many completions to generate for each prompt - --gooseai.model_kwargs GOOSEAI.MODEL_KWARGS - Holds any model parameters valid for `create` call not explicitly specified - --gooseai.logit_bias GOOSEAI.LOGIT_BIAS - Adjust the probability of specific tokens being generated - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/gooseai/neuron.py b/neurons/text/prompting/miners/gooseai/neuron.py deleted file mode 100644 index 6e633d2e99..0000000000 --- a/neurons/text/prompting/miners/gooseai/neuron.py +++ /dev/null @@ -1,80 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import argparse -import bittensor -from typing import List, Dict, Any, Optional -from langchain.llms import GooseAI - -class GooseAIMiner( bittensor.BasePromptingMiner ): - - @classmethod - def check_config( cls, config: 'bittensor.Config' ): pass - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument("--gooseai.api_key", type=str, required=True, help="GooseAI api key required.") - parser.add_argument("--gooseai.model_name", type=str, default="gpt-neo-20b", help="Model name to use") - parser.add_argument("--gooseai.temperature", type=float, default=0.7, help="What sampling temperature to use") - parser.add_argument("--gooseai.max_tokens", type=int, default=256, help="The maximum number of tokens to generate in the completion") - parser.add_argument("--gooseai.top_p", type=float, default=1, help="Total probability mass of tokens to consider at each step") - parser.add_argument("--gooseai.min_tokens", type=int, default=1, help="The minimum number of tokens to generate in the completion") - parser.add_argument("--gooseai.frequency_penalty", type=float, default=0, help="Penalizes repeated tokens according to frequency") - parser.add_argument("--gooseai.presence_penalty", type=float, default=0, help="Penalizes repeated tokens") - parser.add_argument("--gooseai.n", type=int, default=1, help="How many completions to generate for each prompt") - parser.add_argument("--gooseai.model_kwargs", type=Dict[str, Any], default=dict(), help="Holds any model parameters valid for `create` call not explicitly specified") - parser.add_argument("--gooseai.logit_bias", type=Optional[Dict[str, float]], default=dict(), help="Adjust the probability of specific tokens being generated") - - - def __init__( self ): - super( GooseAIMiner, self ).__init__() - print ( self.config ) - model_kwargs = { - 'model': self.config.gooseai.model_name, - 'n_ctx': self.config.gooseai.max_tokens, - 'n_parts': self.config.gooseai.n, - 'temp': self.config.gooseai.temperature, - 'top_p': self.config.gooseai.top_p, - 'repeat_penalty': self.config.gooseai.frequency_penalty, - } - self.model = GooseAI(gooseai_api_key=self.config.gooseai.api_key, model_kwargs=model_kwargs) - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - - @staticmethod - def _process_history(history: List[dict]) -> str: - processed_history = '' - for message in history: - if message['role'] == 'system': - processed_history += 'system: ' + message['content'] + '\n' - if message['role'] == 'assistant': - processed_history += 'assistant: ' + message['content'] + '\n' - if message['role'] == 'user': - processed_history += 'user: ' + message['content'] + '\n' - return processed_history - - def forward( self, messages: List[Dict[str, str]] ) -> str: - bittensor.logging.info( 'messages', str( messages ) ) - history = self._process_history( messages ) - bittensor.logging.info( 'history', str( history ) ) - resp = self.model( history ) - bittensor.logging.info('response', str( resp )) - return resp - -if __name__ == "__main__": - bittensor.utils.version_checking() - GooseAIMiner().run() diff --git a/neurons/text/prompting/miners/gooseai/requirements.txt b/neurons/text/prompting/miners/gooseai/requirements.txt deleted file mode 100644 index e7f08b2136..0000000000 --- a/neurons/text/prompting/miners/gooseai/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -langchain \ No newline at end of file diff --git a/neurons/text/prompting/miners/gpt4all/README.md b/neurons/text/prompting/miners/gpt4all/README.md deleted file mode 100644 index c9c876d548..0000000000 --- a/neurons/text/prompting/miners/gpt4all/README.md +++ /dev/null @@ -1,162 +0,0 @@ -## GPT4ALL Miner -GPT4ALL prompting miner for bittensor - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/gpt4all/requirements.txt -python3 neurons/text/prompting/miners/gpt4all/neuron.py -``` - -# Obtaining (converted) [GPT4All](https://github.com/nomic-ai/gpt4all) weights - -- Obtain the `gpt4all-lora-quantized.bin` model -- It is distributed in the old `ggml` format which is now obsoleted -- You have to convert it to the new format using [./convert-gpt4all-to-ggml.py](https://github.com/ggerganov/llama.cpp/blob/master/convert-gpt4all-to-ggml.py). You may also need to -convert the model from the old format to the new format with [./migrate-ggml-2023-03-30-pr613.py](https://github.com/ggerganov/llama.cpp/blob/master/migrate-ggml-2023-03-30-pr613.py): - - ```bash - python3 convert-gpt4all-to-ggml.py models/gpt4all-7B/gpt4all-lora-quantized.bin ./models/tokenizer.model - python3 migrate-ggml-2023-03-30-pr613.py models/gpt4all-7B/gpt4all-lora-quantized.bin models/gpt4all-7B/gpt4all-lora-quantized-new.bin - ``` - -- You can now use the newly generated `gpt4all-lora-quantized-new.bin` model in exactly the same way as all other models -- The original model is saved in the same folder with a suffix `.orig` -- Tokenizer can be found [here](https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/tokenizer.model) - - -# Full Usage -``` -usage: neuron.py [-h] --gpt4all.model GPT4ALL.MODEL [--gpt4all.n_ctx GPT4ALL.N_CTX] [--gpt4all.n_parts GPT4ALL.N_PARTS] - [--gpt4all.seed GPT4ALL.SEED] [--gpt4all.f16_kv] [--gpt4all.logits_all] [--gpt4all.vocab_only] [--gpt4all.use_mlock] - [--gpt4all.embedding] [--gpt4all.n_threads GPT4ALL.N_THREADS] [--gpt4all.n_predict GPT4ALL.N_PREDICT] - [--gpt4all.temp GPT4ALL.TEMP] [--gpt4all.top_p GPT4ALL.TOP_P] [--gpt4all.top_k GPT4ALL.TOP_K] [--gpt4all.echo] - [--gpt4all.stop GPT4ALL.STOP] [--gpt4all.repeat_last_n GPT4ALL.REPEAT_LAST_N] - [--gpt4all.repeat_penalty GPT4ALL.REPEAT_PENALTY] [--gpt4all.n_batch GPT4ALL.N_BATCH] [--gpt4all.streaming] - [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --gpt4all.model GPT4ALL.MODEL - Path to pretrained gpt4all model in ggml format. - --gpt4all.n_ctx GPT4ALL.N_CTX - Token context window. - --gpt4all.n_parts GPT4ALL.N_PARTS - Number of parts to split the model into. If -1, the number of parts is automatically determined. - --gpt4all.seed GPT4ALL.SEED - Seed. If -1, a random seed is used. - --gpt4all.f16_kv Use half-precision for key/value cache. - --gpt4all.logits_all Return logits for all tokens, not just the last token. - --gpt4all.vocab_only Only load the vocabulary, no weights. - --gpt4all.use_mlock Force system to keep model in RAM. - --gpt4all.embedding Use embedding mode only. - --gpt4all.n_threads GPT4ALL.N_THREADS - Number of threads to use. - --gpt4all.n_predict GPT4ALL.N_PREDICT - The maximum number of tokens to generate. - --gpt4all.temp GPT4ALL.TEMP - The temperature to use for sampling. - --gpt4all.top_p GPT4ALL.TOP_P - The top-p value to use for sampling. - --gpt4all.top_k GPT4ALL.TOP_K - The top-k value to use for sampling. - --gpt4all.echo Whether to echo the prompt. - --gpt4all.stop GPT4ALL.STOP - Stop tokens. - --gpt4all.repeat_last_n GPT4ALL.REPEAT_LAST_N - Last n tokens to penalize. - --gpt4all.repeat_penalty GPT4ALL.REPEAT_PENALTY - The penalty to apply to repeated tokens. - --gpt4all.n_batch GPT4ALL.N_BATCH - Batch size for prompt processing. - --gpt4all.streaming Whether to stream the results or not. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/gpt4all/neuron.py b/neurons/text/prompting/miners/gpt4all/neuron.py deleted file mode 100644 index 3408c7ee54..0000000000 --- a/neurons/text/prompting/miners/gpt4all/neuron.py +++ /dev/null @@ -1,100 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import argparse -import bittensor -from typing import List, Dict -from langchain.llms import GPT4All - -class GPT4ALLMiner( bittensor.BasePromptingMiner ): - - @classmethod - def check_config( cls, config: 'bittensor.Config' ): pass - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument('--gpt4all.model', type=str, help='Path to pretrained gpt4all model in ggml format.', required=True) - parser.add_argument('--gpt4all.n_ctx', type=int, default=512, help='Token context window.') - parser.add_argument('--gpt4all.n_parts', type=int, default=-1, help='Number of parts to split the model into. If -1, the number of parts is automatically determined.') - parser.add_argument('--gpt4all.seed', type=int, default=0, help='Seed. If -1, a random seed is used.') - parser.add_argument('--gpt4all.f16_kv', action='store_true', default=False, help='Use half-precision for key/value cache.') - parser.add_argument('--gpt4all.logits_all', action='store_true', default=False, help='Return logits for all tokens, not just the last token.') - parser.add_argument('--gpt4all.vocab_only', action='store_true', default=False, help='Only load the vocabulary, no weights.') - parser.add_argument('--gpt4all.use_mlock', action='store_true', default=False, help='Force system to keep model in RAM.') - parser.add_argument('--gpt4all.embedding', action='store_true', default=False, help='Use embedding mode only.') - parser.add_argument('--gpt4all.n_threads', type=int, default=4, help='Number of threads to use.') - parser.add_argument('--gpt4all.n_predict', type=int, default=256, help='The maximum number of tokens to generate.') - parser.add_argument('--gpt4all.temp', type=float, default=0.8, help='The temperature to use for sampling.') - parser.add_argument('--gpt4all.top_p', type=float, default=0.95, help='The top-p value to use for sampling.') - parser.add_argument('--gpt4all.top_k', type=int, default=40, help='The top-k value to use for sampling.') - parser.add_argument('--gpt4all.echo', action='store_true', default=False, help='Whether to echo the prompt.') - parser.add_argument('--gpt4all.repeat_last_n', type=int, default=64, help='Last n tokens to penalize.') - parser.add_argument('--gpt4all.repeat_penalty', type=float, default=1.3, help='The penalty to apply to repeated tokens.') - parser.add_argument('--gpt4all.n_batch', type=int, default=1, help='Batch size for prompt processing.') - parser.add_argument('--gpt4all.streaming', action='store_true', default=False, help='Whether to stream the results or not.') - - def __init__( self ): - super( GPT4ALLMiner, self ).__init__() - print ( self.config ) - self.model = GPT4All( - model=self.config.gpt4all.model, - n_ctx=self.config.gpt4all.n_ctx, - n_parts=self.config.gpt4all.n_parts, - seed=self.config.gpt4all.seed, - f16_kv=self.config.gpt4all.f16_kv, - logits_all=self.config.gpt4all.logits_all, - vocab_only=self.config.gpt4all.vocab_only, - use_mlock=self.config.gpt4all.use_mlock, - embedding=self.config.gpt4all.embedding, - n_threads=self.config.gpt4all.n_threads, - n_predict=self.config.gpt4all.n_predict, - temp=self.config.gpt4all.temp, - top_p=self.config.gpt4all.top_p, - top_k=self.config.gpt4all.top_k, - echo=self.config.gpt4all.echo, - stop=['user: ', 'bot: ', 'system: '], - repeat_last_n=self.config.gpt4all.repeat_last_n, - repeat_penalty=self.config.gpt4all.repeat_penalty, - n_batch=self.config.gpt4all.n_batch, - streaming=self.config.gpt4all.streaming, - ) - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - - @staticmethod - def _process_history(history: List[dict]) -> str: - processed_history = '' - for message in history: - if message['role'] == 'system': - processed_history += 'system: ' + message['content'] + '\n' - if message['role'] == 'assistant': - processed_history += 'assistant: ' + message['content'] + '\n' - if message['role'] == 'user': - processed_history += 'user: ' + message['content'] + '\n' - return processed_history - - def forward( self, messages: List[Dict[str, str]] ) -> str: - bittensor.logging.info( 'messages', str( messages ) ) - history = self._process_history( messages ) - bittensor.logging.info( 'history', str( history ) ) - resp = self.model( history ) - bittensor.logging.info('response', str( resp )) - return resp - -if __name__ == "__main__": - bittensor.utils.version_checking() - GPT4ALLMiner().run() diff --git a/neurons/text/prompting/miners/gpt4all/requirements.txt b/neurons/text/prompting/miners/gpt4all/requirements.txt deleted file mode 100644 index 8e3ed575ce..0000000000 --- a/neurons/text/prompting/miners/gpt4all/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -langchain -pyllamacpp diff --git a/neurons/text/prompting/miners/huggingface/__init__.py b/neurons/text/prompting/miners/huggingface/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/neurons/text/prompting/miners/huggingface/chat_glm/README.md b/neurons/text/prompting/miners/huggingface/chat_glm/README.md deleted file mode 100644 index 7d22d11885..0000000000 --- a/neurons/text/prompting/miners/huggingface/chat_glm/README.md +++ /dev/null @@ -1,104 +0,0 @@ -## ChatGLM Miner - THUDM/chatglm-6b Language Model Serving with BitTensor - This code is for running a language model powered by ChatGLM through the BitTensor framework. - - # Example Usage - ``` - python3 -m pip install -r neurons/text/prompting/miners/huggingface/chat_glm/requirements.txt - python3 neurons/text/prompting/miners/huggingface/chat_glm/neuron.py - ``` - - # Full Usage - ``` - usage: neuron.py [-h] [--chat_glm.device CHAT_GLM.DEVICE] [--chat_glm.max_new_tokens CHAT_GLM.MAX_NEW_TOKENS] [--chat_glm.temperature CHAT_GLM.TEMPERATURE] [--chat_glm.do_sample] - [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] - [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] - [--subtensor._mock] [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] - [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - - optional arguments: - -h, --help show this help message and exit - --chat_glm.device CHAT_GLM.DEVICE - Device to load model - --chat_glm.max_new_tokens CHAT_GLM.MAX_NEW_TOKENS - Max tokens for model output. - --chat_glm.temperature CHAT_GLM.TEMPERATURE - Sampling temperature of model - --chat_glm.do_sample Whether to use sampling or not (if not, uses greedy decoding). - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new worker threads to service requests - up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock (creates a mock connection (for - testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. - ``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/chat_glm/neuron.py b/neurons/text/prompting/miners/huggingface/chat_glm/neuron.py deleted file mode 100644 index 23136e752d..0000000000 --- a/neurons/text/prompting/miners/huggingface/chat_glm/neuron.py +++ /dev/null @@ -1,59 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import argparse -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModel - -class ChatGLMMiner( bittensor.HuggingFaceMiner ): - - arg_prefix: str = 'chat_glm' - assistant_label: str = '' - user_label: str = '' - system_label: str = '' - - def load_tokenizer( self ): - return AutoTokenizer.from_pretrained( "THUDM/chatglm-6b", trust_remote_code=True) - - def load_model( self ): - return AutoModel.from_pretrained( "THUDM/chatglm-6b",trust_remote_code=True, torch_dtype = torch.float16 ) - - def forward(self, messages: List[Dict[str, str]]) -> str: - history = self.process_history( messages ) - prompt = history[-1][-1] - if len(history) == 1: - history = [] - generation, history = self.model.chat( - self.tokenizer, - prompt, - history, - max_length=self.config.chat_glm.max_new_tokens, - temperature=self.config.chat_glm.temperature, - do_sample=self.config.chat_glm.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - - bittensor.logging.debug("Message: " + str( messages ).replace( "<","-" ).replace( ">","-" ) ) - bittensor.logging.debug("Generation: " + str( generation ).replace( "<","-" ).replace( ">","-" ) ) - return generation - - -if __name__ == "__main__": - bittensor.utils.version_checking() - ChatGLMMiner().run() diff --git a/neurons/text/prompting/miners/huggingface/chat_glm/requirements.txt b/neurons/text/prompting/miners/huggingface/chat_glm/requirements.txt deleted file mode 100644 index bb07cd001d..0000000000 --- a/neurons/text/prompting/miners/huggingface/chat_glm/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -protobuf==3.20.0 -transformers==4.27.1 -icetk -cpm_kernels \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/dolly/README.md b/neurons/text/prompting/miners/huggingface/dolly/README.md deleted file mode 100644 index 724506535b..0000000000 --- a/neurons/text/prompting/miners/huggingface/dolly/README.md +++ /dev/null @@ -1,114 +0,0 @@ - -## Databricks Dolly 3B/12B Miner -Dolyl 3B and 12B completion miner for bittensor's prompting network. - -# Example Usage -``` -python3 neurons/text/prompting/miners/huggingface/dolly/neuron.py --dolly.model_name databricks/dolly-v2-12b -``` - -# Full Usage -``` -usage: neuron.py [-h] [--dolly.model_name DOLLY.MODEL_NAME] [--dolly.device DOLLY.DEVICE] [--dolly.max_new_tokens DOLLY.MAX_NEW_TOKENS] [--dolly.temperature DOLLY.TEMPERATURE] - [--dolly.do_sample] [--dolly.do_prompt_injection] [--dolly.system_prompt DOLLY.SYSTEM_PROMPT] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] - [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] - [--neuron.blacklist.allow_non_registered] [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] - [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] - [--subtensor._mock] [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] - [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] - [--logging.debug] [--logging.trace] [--logging.record_log] [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --dolly.model_name DOLLY.MODEL_NAME - Name/path of model to load - --dolly.device DOLLY.DEVICE - Device to load model - --dolly.max_new_tokens DOLLY.MAX_NEW_TOKENS - Max tokens for model output. - --dolly.temperature DOLLY.TEMPERATURE - Sampling temperature of model - --dolly.do_sample Whether to use sampling or not (if not, uses greedy decoding). - --dolly.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --dolly.system_prompt DOLLY.SYSTEM_PROMPT - What prompt to replace the system prompt with - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new worker threads to service requests up to - this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock (creates a mock connection (for testing)) - If this option is set it overloads subtensor.chain_endpoint with an entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. - ``` diff --git a/neurons/text/prompting/miners/huggingface/dolly/neuron.py b/neurons/text/prompting/miners/huggingface/dolly/neuron.py deleted file mode 100644 index 52797011d6..0000000000 --- a/neurons/text/prompting/miners/huggingface/dolly/neuron.py +++ /dev/null @@ -1,53 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict -from transformers import pipeline - - -class Dolly12BMiner( bittensor.HuggingFaceMiner ): - - arg_prefix: str = "dolly" - assistant_label: str = "### Response:" - user_label: str = "### Instruction:" - system_label: str = "" - - def load_model( self ): - bittensor.logging.info( 'Loading ' + str( self.config.dolly.model_name ) ) - model = pipeline( model=self.config.dolly.model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, device=0 ) - bittensor.logging.info( 'Model loaded!' ) - return model - - def load_tokenizer( self ): - pass - - def forward(self, messages: List[Dict[str, str]]) -> str: - - history = self.process_history( messages ) - prompt = history + self.assistant_label - generation = self.model( prompt ) - - bittensor.logging.debug(" Message: " + str( messages ) ) - bittensor.logging.debug( "Generation: " + str( generation ) ) - return generation - - -if __name__ == "__main__": - bittensor.utils.version_checking() - Dolly12BMiner().run() diff --git a/neurons/text/prompting/miners/huggingface/dolly/requirements.txt b/neurons/text/prompting/miners/huggingface/dolly/requirements.txt deleted file mode 100644 index fe2343560f..0000000000 --- a/neurons/text/prompting/miners/huggingface/dolly/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -accelerate>=0.16.0,<1 -transformers[torch]>=4.28.1,<5 -torch>=1.13.1,<2" \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/dromedary/README.md b/neurons/text/prompting/miners/huggingface/dromedary/README.md deleted file mode 100644 index c526dac858..0000000000 --- a/neurons/text/prompting/miners/huggingface/dromedary/README.md +++ /dev/null @@ -1,116 +0,0 @@ - -## Dromedary Miner -Dromedary 65B completion miner for bittensor's prompting network. - -# Example Usage -``` -python3 neurons/text/prompting/miners/huggingface/dromedary/neuron.py -``` - -# Full Usage -``` -usage: neuron.py [-h] [--dromedary.model_name DROMEDARY.MODEL_NAME] [--dromedary.device DROMEDARY.DEVICE] [--dromedary.max_new_tokens DROMEDARY.MAX_NEW_TOKENS] - [--dromedary.temperature DROMEDARY.TEMPERATURE] [--dromedary.do_sample] [--dromedary.do_prompt_injection] [--dromedary.system_prompt DROMEDARY.SYSTEM_PROMPT] - [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] - [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] - [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] - [--subtensor._mock] [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] - [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] - [--logging.debug] [--logging.trace] [--logging.record_log] [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --dromedary.model_name DROMEDARY.MODEL_NAME - Name/path of model to load - --dromedary.device DROMEDARY.DEVICE - Device to load model - --dromedary.max_new_tokens DROMEDARY.MAX_NEW_TOKENS - Max tokens for model output. - --dromedary.temperature DROMEDARY.TEMPERATURE - Sampling temperature of model - --dromedary.do_sample - Whether to use sampling or not (if not, uses greedy decoding). - --dromedary.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --dromedary.system_prompt DROMEDARY.SYSTEM_PROMPT - What prompt to replace the system prompt with - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new worker threads to service requests up - to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock (creates a mock connection (for - testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/dromedary/neuron.py b/neurons/text/prompting/miners/huggingface/dromedary/neuron.py deleted file mode 100644 index 21ec14f75a..0000000000 --- a/neurons/text/prompting/miners/huggingface/dromedary/neuron.py +++ /dev/null @@ -1,70 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import argparse -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM - - -class DromedaryMiner( bittensor.HuggingFaceMiner ): - arg_prefix: str = "dromedary" - assistant_label: str = "Dromedary:" - user_label: str = "User:" - system_label: str = "System:" - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument( '--dromedary.device_map', type=str, help='Device to load model: Default "auto" for multi-GPU', default="auto" ) - - def __init__( self ): - super( DromedaryMiner, self ).__init__() - print ( self.config ) - - bittensor.logging.info( 'Loading ' + str( self.config.dromedary.model_name ) ) - self.tokenizer = AutoTokenizer.from_pretrained( self.config.dromedary.model_name, use_fast=False ) - self.model = AutoModelForCausalLM.from_pretrained( - self.config.dromedary.model_name, - device_map=self.config.dromedary.device_map, - torch_dtype=torch.float16, - low_cpu_mem_usage=True - ) - bittensor.logging.info( 'Model loaded!' ) - - def forward( self, messages: List[Dict[str, str]] ) -> str: - - history = self._process_history( self, messages ) - prompt = history + self.assistant_label - - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.dromedary.device ) - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.dromedary.max_new_tokens, - temperature=self.config.dromedary.temperature, - do_sample=self.config.dromedary.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - generation = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) - - bittensor.logging.debug( "Message: " + str( messages ) ) - bittensor.logging.debug( "Generation: " + str( generation ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - DromedaryMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/fastchat_t5/README.md b/neurons/text/prompting/miners/huggingface/fastchat_t5/README.md deleted file mode 100644 index d74449977b..0000000000 --- a/neurons/text/prompting/miners/huggingface/fastchat_t5/README.md +++ /dev/null @@ -1,127 +0,0 @@ -# FastChat T5 Miner -FastChat T5 completion miner for bittensor's prompting network. - -# Download weights -They disabled API inference requests via HuggingFace so you've gotta do it yourself by downloading the weights and passing the path directly. - -```bash -git lfs install -git clone https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -``` - -# Example Usage -``` -python3 neurons/text/prompting/miners/huggingface/fastchat_t5/neuron.py --fastchat_t5.model_path /path/to/fastchat-t5-3b-v1.0 -``` - -# Full Usage -``` -usage: fastchat-t5.py [-h] [--fastchat_t5.MODEL_PATH FASTCHAT_T5.MODEL_PATH] [--fastchat_t5.device FASTCHAT_T5.DEVICE] [--fastchat_t5.max_new_tokens FASTCHAT_T5.MAX_NEW_TOKENS] - [--fastchat_t5.temperature FASTCHAT_T5.TEMPERATURE] [--fastchat_t5.greedy_decoding] [--fastchat_t5.repetition_penalty FASTCHAT_T5.REPETITION_PENALTY] - [--fastchat_t5.do_prompt_injection] [--fastchat_t5.system_prompt FASTCHAT_T5.SYSTEM_PROMPT] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] - [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] - [--neuron.blacklist.allow_non_registered] [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] - [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] - [--axon.port AXON.PORT] [--axon.ip AXON.IP] [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] - [--axon.max_workers AXON.MAX_WORKERS] [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] - [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --fastchat_t5.MODEL_PATH FASTCHAT_T5.MODEL_PATH - Name/path of model to load - --fastchat_t5.device FASTCHAT_T5.DEVICE - Device to load model - --fastchat_t5.max_new_tokens FASTCHAT_T5.MAX_NEW_TOKENS - Max tokens for model output. - --fastchat_t5.temperature FASTCHAT_T5.TEMPERATURE - Sampling temperature of model - --fastchat_t5.greedy_decoding - Whether to use greedy sampling or not (if not, uses multinomial sampling). - --fastchat_t5.repetition_penalty FASTCHAT_T5.REPETITION_PENALTY - Repetition penalty for model - --fastchat_t5.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --fastchat_t5.system_prompt FASTCHAT_T5.SYSTEM_PROMPT - What prompt to replace the system prompt with - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new worker threads to service requests up - to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock (creates a mock connection (for - testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/fastchat_t5/neuron.py b/neurons/text/prompting/miners/huggingface/fastchat_t5/neuron.py deleted file mode 100644 index 4bc47638bd..0000000000 --- a/neurons/text/prompting/miners/huggingface/fastchat_t5/neuron.py +++ /dev/null @@ -1,41 +0,0 @@ -import torch -import argparse -import bittensor -from typing import List, Dict -from transformers import T5Tokenizer, AutoModelForSeq2SeqLM - -class FastChatT5Miner( bittensor.HuggingFaceMiner ): - - arg_prefix: str = "fastchat_t5" - assistant_label: str = "ASSISTANT:" - user_label: str = "USER:" - system_label: str = "SYSTEM:" - - def load_model( self ): - bittensor.logging.info( 'Loading ' + str( self.config.fastchat_t5.model_name ) ) - model = AutoModelForSeq2SeqLM.from_pretrained( self.config.fastchat_t5.model_name, local_files_only=True, low_cpu_mem_usage=True, torch_dtype=torch.float16 ) - bittensor.logging.info( 'Model loaded!' ) - return model - - def load_tokenizer( self ): - return T5Tokenizer.from_pretrained( self.config.fastchat_t5.model_name, local_files_only=True ) - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self.process_history( messages ) - prompt = history + self.assistant_label - input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.config.fastchat_t5.device) - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.fastchat_t5.max_new_tokens, - temperature=self.config.fastchat_t5.temperature, - pad_token_id=self.tokenizer.eos_token_id, - ) - generation = self.tokenizer.decode( output[0], skip_special_tokens=True ) - - bittensor.logging.debug( "Message: " + str( messages ) ) - bittensor.logging.debug( "Generation: " + str( generation ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - FastChatT5Miner().run() diff --git a/neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/README.md b/neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/README.md deleted file mode 100644 index 733d99467a..0000000000 --- a/neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/README.md +++ /dev/null @@ -1,139 +0,0 @@ -## Gpt4_x_vicuna Miner -Gpt4_x_vicuna Language Model Serving with BitTensor -This code is for running the Gpt4_x_vicuna by Nous Research model through the BitTensor framework. - -# Overview - -## Contents - -- [Licence](#Licence) -- [Installing Dependencies](#installing-dependencies) -- [Starting Miner](#starting-miner) - - -# Licence -gpl - -# Installing Dependencies - -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/requirements.txt -``` - -# Starting Miner -To start the miner, all you need to do is to specify the path to the model, or the name on Huggingface hub, and it will be downloaded. - -You can find different model checkpoints by searching Huggingface, or by looking at Nous Research's Huggingface page https://huggingface.co/NousResearch - -``` -python3 neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/neuron.py --gpt4_x_vicuna.model_name GPT4_X_VICUNA.MODEL_NAME_OR_PATH -``` - -# Full Usage -``` -usage: neuron.py [-h] [--gpt4_x_vicuna.model_name GPT4_X_VICUNA.MODEL_NAME] [--gpt4_x_vicuna.device GPT4_X_VICUNA.DEVICE] [--gpt4_x_vicuna.max_new_tokens GPT4_X_VICUNA.MAX_NEW_TOKENS] - [--gpt4_x_vicuna.temperature GPT4_X_VICUNA.TEMPERATURE] [--gpt4_x_vicuna.do_sample] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --neoxt.model_name NEOXT.MODEL_NAME - Name/path of model to load of model to load - --gpt4_x_vicuna.device GPT4_X_VICUNA.DEVICE - Device to load model - --gpt4_x_vicuna.max_new_tokens GPT4_X_VICUNA.MAX_NEW_TOKENS - Max tokens for model output. - --gpt4_x_vicuna.temperature GPT4_X_VICUNA.TEMPERATURE - Sampling temperature of model - --gpt4_x_vicuna.do_sample Whether to use sampling or not (if not, uses greedy decoding). - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/neuron.py b/neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/neuron.py deleted file mode 100644 index a3a49ff67c..0000000000 --- a/neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/neuron.py +++ /dev/null @@ -1,58 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM - -class Gpt4_x_vicunaMiner( bittensor.HuggingFaceMiner ): - arg_prefix = 'gpt4_x_vicuna' - system_label = '### System:' - user_label = '### User:' - assistant_label = '### Response:' - - def load_tokenizer(self): - return AutoTokenizer.from_pretrained( self.config.gpt4_x_vicuna.model_name, use_fast=False ) - - def load_model(self): - return AutoModelForCausalLM.from_pretrained( self.config.gpt4_x_vicuna.model_name, torch_dtype = torch.float16, low_cpu_mem_usage=True ) - - def forward(self, messages: List[Dict[str, str]]) -> str: - - history = self.process_history( messages ) - prompt = history + self.assistant_label - - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.gpt4_x_vicuna.device ) - - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.gpt4_x_vicuna.max_new_tokens, - temperature=self.config.gpt4_x_vicuna.temperature, - do_sample=self.config.gpt4_x_vicuna.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - generation = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) - - bittensor.logging.debug( "Messages: " + str( messages ) ) - bittensor.logging.debug( "Prompt: " + str( prompt ) ) - bittensor.logging.debug( "Generation: " + str( generation ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - Gpt4_x_vicunaMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/requirements.txt b/neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/requirements.txt deleted file mode 100644 index 5e83b53a0d..0000000000 --- a/neurons/text/prompting/miners/huggingface/gpt4_x_vicuna/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -transformers>=4.28.0 -fschat -tokenizers>=0.13.3 -accelerate \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/guanaco/README.md b/neurons/text/prompting/miners/huggingface/guanaco/README.md deleted file mode 100644 index 5ecdb24207..0000000000 --- a/neurons/text/prompting/miners/huggingface/guanaco/README.md +++ /dev/null @@ -1,150 +0,0 @@ -# Guanaco Miner -timdettmers Guanaco Language Model Serving with BitTensor -This code is for running a language model powered by togethercomputer through the BitTensor framework. -Reference: [code](https://github.com/artidoro/qlora) - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/guanaco/requirements.txt -python neurons/text/prompting/miners/huggingface/guanaco/neuron.py --guanaco.model_name timdettmers/guanaco-33b-merged --guanaco.device_map auto -``` - -# Full Usage -``` -usage: guanaco_miner.py [-h] [--guanaco.model_name GUANACO.MODEL_NAME] [--guanaco.api_key GUANACO.API_KEY] - [--guanaco.device GUANACO.DEVICE] [--guanaco.max_new_tokens GUANACO.MAX_NEW_TOKENS] - [--guanaco.temperature GUANACO.TEMPERATURE] [--guanaco.do_sample] - [--guanaco.repetition_penalty GUANACO.REPETITION_PENALTY] [--guanaco.do_prompt_injection] - [--guanaco.system_prompt GUANACO.SYSTEM_PROMPT] - [--guanaco.repetition-penalty GUANACO.REPETITION_PENALTY] [--guanaco.top_p GUANACO.TOP_P] - [--guanaco.top_k GUANACO.TOP_K] [--guanaco.load_in_8bit GUANACO.LOAD_IN_8BIT] - [--guanaco.device_map GUANACO.DEVICE_MAP] - [--guanaco.pad_tokens GUANACO.PAD_TOKENS [GUANACO.PAD_TOKENS ...]] [--netuid NETUID] - [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] - [--neuron.no_set_weights] [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] - [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] - [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] - [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] - [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] - [--axon.max_workers AXON.MAX_WORKERS] [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] - [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] - [--subtensor._mock] [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] - [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] - [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] - [--logging.record_log] [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --guanaco.model_name GUANACO.MODEL_NAME - Name or path of model to load - --guanaco.api_key GUANACO.API_KEY - huggingface api key - --guanaco.device GUANACO.DEVICE - Device to load model - --guanaco.max_new_tokens GUANACO.MAX_NEW_TOKENS - Max tokens for model output. - --guanaco.temperature GUANACO.TEMPERATURE - Sampling temperature of model - --guanaco.do_sample Whether to use multinomial sampling. - --guanaco.repetition_penalty GUANACO.REPETITION_PENALTY - Repetition penalty for model - --guanaco.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --guanaco.system_prompt GUANACO.SYSTEM_PROMPT - What prompt to replace the system prompt with - --guanaco.repetition-penalty GUANACO.REPETITION_PENALTY - Repetition penalty for greedy decoding. Between 1.0 and infinity. 1.0 means no penalty. Default: 1.0 - --guanaco.top_p GUANACO.TOP_P - Top-p (nucleus) sampling. Defaults to 1.0 (top-k sampling). Must be between 0.0 and 1.0. - --guanaco.top_k GUANACO.TOP_K - Top-k sampling. Defaults to 0 (no top-k sampling). Must be between 0 and 1000. - --guanaco.load_in_8bit GUANACO.LOAD_IN_8BIT - Load model in 8 bit precision - --guanaco.device_map GUANACO.DEVICE_MAP - Device map for model parallelism. - --guanaco.pad_tokens GUANACO.PAD_TOKENS [GUANACO.PAD_TOKENS ...] - A list of integers separated by spaces for the pad_tokens. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this - wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc - server distributes new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running - network) -- mock (creates a mock connection (for testing)) If this option is set it overloads - subtensor.chain_endpoint with an entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. - ``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/guanaco/neuron.py b/neurons/text/prompting/miners/huggingface/guanaco/neuron.py deleted file mode 100644 index 6327c3e5be..0000000000 --- a/neurons/text/prompting/miners/huggingface/guanaco/neuron.py +++ /dev/null @@ -1,70 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM - -class GuanacoMiner( bittensor.HuggingFaceMiner ): - - arg_prefix: str = 'guanaco' - assistant_label: str = '### Assistant:' - user_label: str = '### Human:' - system_label: str = '' - - def load_tokenizer( self ): - return AutoTokenizer.from_pretrained( self.config.guanaco.model_name ) - - def load_model( self ): - return AutoModelForCausalLM.from_pretrained( - self.config.guanaco.model_name, - torch_dtype = torch.float16, - low_cpu_mem_usage=True, - device_map=self.config.guanaco.device_map - ) - - def forward(self, messages: List[Dict[str, str]]) -> str: - history = self.process_history( messages ) - prompt = history + self.assistant_label - - generate_kwargs = dict( - temperature=self.config.guanaco.temperature, - max_new_tokens=self.config.guanaco.max_new_tokens, - top_p=self.config.guanaco.top_p, - repetition_penalty=self.config.guanaco.repetition_penalty, - do_sample=self.config.guanaco.do_sample, - ) - if '33B' in self.config.guanaco.model_name: # Tim Dettmers 33B model-specific parameters - generate_kwargs['truncate'] = 999 - generate_kwargs['seed'] = 42 - - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.guanaco.device ) - output = self.model.generate( - input_ids, - **generate_kwargs - ) - generated_text = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) - generation = generated_text.split( self.assistant_label )[0].strip() - - bittensor.logging.debug("Message: " + str( messages ) ) - bittensor.logging.debug("Generation: " + str( generation ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - GuanacoMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/guanaco/requirements.txt b/neurons/text/prompting/miners/huggingface/guanaco/requirements.txt deleted file mode 100644 index 33059ec77c..0000000000 --- a/neurons/text/prompting/miners/huggingface/guanaco/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -transformers>=4.29.2 -accelerate \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/koala/README.md b/neurons/text/prompting/miners/huggingface/koala/README.md deleted file mode 100644 index 6e56fed45d..0000000000 --- a/neurons/text/prompting/miners/huggingface/koala/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Koala Miner -Koala Language Model Serving with BitTensor -This code is for running the Koala model through the BitTensor framework. - -# Overview - -## Contents - -- [Installing Dependencies](#installing-Dependencies) -- [Converting Weights Into Model](#converting-weights-into-model) -- [Starting Miner](#starting-miner) - - -# Installing Dependencies - -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/koala/requirements.txt -``` - -# Converting Weights Into Model -If you already have a converted checkpoint of the model, you can skip this step. - -Original documentation for creating the model from weights can be found [here](https://github.com/young-geng/EasyLM/blob/main/docs/koala.md) - -## Obtaining the Wegith Diff of Koala -Due to the licence of the LLaMA model, the fine-tuned -Koala model weights can not be directly released. Instead, the diff of weights, which can be used -to recover the Koala model weights with the original LLaMA model weights, is released. The diff -weights can be downloaded from the following sources: -* [HuggingFace Hub](https://huggingface.co/young-geng/koala/tree/main). -* [Google Drive](https://drive.google.com/drive/folders/10f7wrlAFoPIy-TECHsx9DKIvbQYunCfl?usp=sharing). - -## Recovering the Koala Model Weights -The first step of recovering the Koala model weights is to obtain the original -LLaMA model weights and convert it to EasyLM checkpoint format. To convert the weights, -use the following command: - -``` shell -python -m EasyLM.models.llama.convert_torch_to_easylm \ - --checkpoint_dir='path/to/torch/llama/checkpoint/directory' \ - --output_file='path/to/output/easylm/checkpoint/file' \ - --streaming=True -``` - -This script will convert the official torch checkpoint from Meta to the -streaming checkpoint format used by EasyLM. For more information -about the checkpoint format of EasyLM, see [the checkpointing documentation](checkpointing.md). - - -After converting the original LLaMA model weights, you can recover the Koala -model weights with the following command: - -``` shell -python -m EasyLM.scripts.diff_checkpoint \ - --recover_diff=True \ - --load_base_checkpoint='params::path/to/llama/checkpoint/file' \ - --load_target_checkpoint='params::path/to/koala/diff/checkpoint/file' \ - --output_file='path/to/output/checkpoint/file' \ - --streaming=True -``` - - -# Starting Miner -``` -python3 neurons/text/prompting/miners/huggingface/koala_miner.py --koala.model_name TheBloke/koala-7B-HF -``` - -# Full Usage -``` -usage: neuron.py [-h] [--koala.model_name KOALA.MODEL_NAME] [--koala.device KOALA.DEVICE] [--koala.max_new_tokens KOALA.MAX_NEW_TOKENS] - [--koala.temperature KOALA.TEMPERATURE] [--koala.do_sample] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --neoxt.model_name NEOXT.MODEL_NAME - Name/path of model to load of model to load - --koala.device KOALA.DEVICE - Device to load model - --koala.max_new_tokens KOALA.MAX_NEW_TOKENS - Max tokens for model output. - --koala.temperature KOALA.TEMPERATURE - Sampling temperature of model - --koala.do_sample Whether to use sampling or not (if not, uses greedy decoding). - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/koala/neuron.py b/neurons/text/prompting/miners/huggingface/koala/neuron.py deleted file mode 100644 index 5f2bee3de4..0000000000 --- a/neurons/text/prompting/miners/huggingface/koala/neuron.py +++ /dev/null @@ -1,54 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Opentensor Foundation - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM - -class KoalaMiner( bittensor.HuggingFaceMiner ): - arg_prefix: str = 'koala' - assistant_label: str = 'GPT:' - user_label: str = 'USER:' - system_label: str = '' - - def load_tokenizer( self ): - return AutoTokenizer.from_pretrained( self.config.koala.model_name, use_fast=False ) - - def load_model( self ): - return AutoModelForCausalLM.from_pretrained( self.config.koala.model_name, torch_dtype = torch.float16, low_cpu_mem_usage=True ) - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self._process_history( messages ) - prompt = history + self.system_label - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.koala.device ) - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.koala.max_new_tokens, - temperature=self.config.koala.temperature, - do_sample=self.config.koala.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - generation = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) - - bittensor.logging.debug( "Message: " + str( messages ) ) - bittensor.logging.debug( "Generation: " + str( generation ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - KoalaMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/koala/requirements.txt b/neurons/text/prompting/miners/huggingface/koala/requirements.txt deleted file mode 100644 index bd7d62c8ca..0000000000 --- a/neurons/text/prompting/miners/huggingface/koala/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers>=4.28.0 -fschat -accelerate \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/mpt_chat/README.md b/neurons/text/prompting/miners/huggingface/mpt_chat/README.md deleted file mode 100644 index c80893753e..0000000000 --- a/neurons/text/prompting/miners/huggingface/mpt_chat/README.md +++ /dev/null @@ -1,139 +0,0 @@ -## MPT_Chat Miner -MPT_Chat Language Model Serving with BitTensor -This code is for running the Mpt_chat by MosaicML model through the BitTensor framework. - -# Overview - -## Contents - -- [Licence](#Licence) -- [Installing Dependencies](#installing-dependencies) -- [Starting Miner](#starting-miner) - - -# Licence -CC-By-NC-SA-4.0 (non-commercial use only) - - -# Installing Dependencies - -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/mpt_chat/requirements.txt -``` - -# Starting Miner -To start the miner, all you need to do is to specify the path to the model, or the name on Huggingface hub, and it will be downloaded. - -You can find different model checkpoints by searching Huggingface, or by looking at MosaicML's Huggingface page https://huggingface.co/NousResearch -``` -python3 neurons/text/prompting/miners/huggingface/mpt_chat/neuron.py --mpt_chat.model_name MPT_CHAT.MODEL_NAME_OR_PATH -``` - -# Full Usage -``` -usage: neuron.py [-h] [--mpt_chat.model_name MPT_CHAT.MODEL_NAME] [--mpt_chat.device MPT_CHAT.DEVICE] [--mpt_chat.max_new_tokens MPT_CHAT.MAX_NEW_TOKENS] - [--mpt_chat.temperature MPT_CHAT.TEMPERATURE] [--mpt_chat.do_sample] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --neoxt.model_name NEOXT.MODEL_NAME - Name/path of model to load of model to load - --mpt_chat.device MPT_CHAT.DEVICE - Device to load model - --mpt_chat.max_new_tokens MPT_CHAT.MAX_NEW_TOKENS - Max tokens for model output. - --mpt_chat.temperature MPT_CHAT.TEMPERATURE - Sampling temperature of model - --mpt_chat.do_sample Whether to use sampling or not (if not, uses greedy decoding). - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/mpt_chat/neuron.py b/neurons/text/prompting/miners/huggingface/mpt_chat/neuron.py deleted file mode 100644 index a3ac45bc55..0000000000 --- a/neurons/text/prompting/miners/huggingface/mpt_chat/neuron.py +++ /dev/null @@ -1,78 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import argparse -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig - -class Mpt_chatMiner( bittensor.HuggingFaceMiner ): - arg_prefix: str = 'mpt_chat' - system_label: str = '<|im_start|>system\n' - user_label: str = '<|im_start|>user\n' - assistant_label: str = '<|im_start|>assistant\n' - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument( '--mpt_chat.tokenizer_name', type=str, required=False, help='Name/path of model to load' , default="EleutherAI/gpt-neox-20b") - parser.add_argument( '--mpt_chat.use_triton', action='store_true', default=False, help='Whether to use a triton to speed up inference' ) - - def load_tokenizer(self): - return AutoTokenizer.from_pretrained( self.config.mpt_chat.tokenizer_name ) - - def load_model(self): - config = AutoConfig.from_pretrained( 'mosaicml/mpt-7b-chat', trust_remote_code=True ) - - if self.config.mpt_chat.use_triton: - config.attn_config['attn_impl'] = 'triton' - - model = AutoModelForCausalLM.from_pretrained( - self.config.mpt_chat.model_name, - torch_dtype = torch.float16, - low_cpu_mem_usage=True, - trust_remote_code=True, - config=config - ) - return model - - def forward(self, messages: List[Dict[str, str]]) -> str: - - history = self.process_history( messages ) - prompt = history + self.assistant_label - - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.mpt_chat.device ) - - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.mpt_chat.max_new_tokens, - temperature=self.config.mpt_chat.temperature, - do_sample=self.config.mpt_chat.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - - generation = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=False ).strip() - generation = generation.split( "<|endoftext|>" )[0] - - bittensor.logging.debug( "Message: " + str( messages ) ) - bittensor.logging.debug( "Prompt: " + str( prompt ) ) - bittensor.logging.debug( "Generation: " + str( generation.replace( "<", "-" ) ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - Mpt_chatMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/mpt_chat/requirements.txt b/neurons/text/prompting/miners/huggingface/mpt_chat/requirements.txt deleted file mode 100644 index 406fd1d021..0000000000 --- a/neurons/text/prompting/miners/huggingface/mpt_chat/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -transformers>=4.28.0 -fschat -tokenizers>=0.13.3 -accelerate -einops \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/neoxt/README.md b/neurons/text/prompting/miners/huggingface/neoxt/README.md deleted file mode 100644 index ffb387c9e0..0000000000 --- a/neurons/text/prompting/miners/huggingface/neoxt/README.md +++ /dev/null @@ -1,136 +0,0 @@ -## Neoxt Miner -togethercomputer/GPT-NeoXT-Chat-Base-20B Language Model Serving with BitTensor -This code is for running a language model powered by togethercomputer through the BitTensor framework. - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/neoxt/requirements.txt -python3 neurons/text/prompting/miners/huggingface/neoxt/neuron.py --neoxt.model_name togethercomputer/GPT-NeoXT-Chat-Base-20B -``` - -# Full Usage -``` -usage: neoxt_miner.py [-h] --neoxt.model_name NEOXT.MODEL_NAME [--neoxt.device NEOXT.DEVICE] [--neoxt.max_new_tokens NEOXT.MAX_NEW_TOKENS] - [--neoxt.temperature NEOXT.TEMPERATURE] [--neoxt.do_sample] [--neoxt.repetition_penalty NEOXT.REPETITION_PENALTY] - [--neoxt.do_prompt_injection] [--neoxt.system_prompt NEOXT.SYSTEM_PROMPT] - [--neoxt.repetition-penalty NEOXT.REPETITION_PENALTY] [--neoxt.top_p NEOXT.TOP_P] [--neoxt.top_k NEOXT.TOP_K] - [--neoxt.load_in_8bit NEOXT.LOAD_IN_8BIT] [--neoxt.pad_tokens NEOXT.PAD_TOKENS [NEOXT.PAD_TOKENS ...]] [--netuid NETUID] - [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] - [--neuron.blacklist.allow_non_registered] [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] - [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] - [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] - [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] - [--axon.port AXON.PORT] [--axon.ip AXON.IP] [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] - [--axon.max_workers AXON.MAX_WORKERS] [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] - [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --neoxt.model_name NEOXT.MODEL_NAME - Name or path of model to load - --neoxt.device NEOXT.DEVICE - Device to load model - --neoxt.max_new_tokens NEOXT.MAX_NEW_TOKENS - Max tokens for model output. - --neoxt.temperature NEOXT.TEMPERATURE - Sampling temperature of model - --neoxt.do_sample Whether to use multinomial sampling. - --neoxt.repetition_penalty NEOXT.REPETITION_PENALTY - Repetition penalty for model - --neoxt.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --neoxt.system_prompt NEOXT.SYSTEM_PROMPT - What prompt to replace the system prompt with - --neoxt.repetition-penalty NEOXT.REPETITION_PENALTY - Repetition penalty for greedy decoding. Between 1.0 and infinity. 1.0 means no penalty. Default: 1.0 - --neoxt.top_p NEOXT.TOP_P - Top-p (nucleus) sampling. Defaults to 1.0 (top-k sampling). Must be between 0.0 and 1.0. - --neoxt.top_k NEOXT.TOP_K - Top-k sampling. Defaults to 0 (no top-k sampling). Must be between 0 and 1000. - --neoxt.load_in_8bit NEOXT.LOAD_IN_8BIT - Load model in 8 bit precision - --neoxt.pad_tokens NEOXT.PAD_TOKENS [NEOXT.PAD_TOKENS ...] - A list of integers separated by spaces for the pad_tokens. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new - worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock - (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point - node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. - ``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/neoxt/neuron.py b/neurons/text/prompting/miners/huggingface/neoxt/neuron.py deleted file mode 100644 index 9fb87cc8d9..0000000000 --- a/neurons/text/prompting/miners/huggingface/neoxt/neuron.py +++ /dev/null @@ -1,56 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Opentensor Foundation - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM - - -class NeoxtMiner( bittensor.HuggingFaceMiner ): - arg_prefix: str = 'neoxt' - assistant_label: str = ':' - user_label: str = ':' - system_label: str = '' - - def load_tokenizer( self ): - return AutoTokenizer.from_pretrained( self.config.neoxt.model_name ) - - def load_model( self ): - return AutoModelForCausalLM.from_pretrained( self.config.neoxt.model_name, torch_dtype = torch.float16, low_cpu_mem_usage=True ) - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self.process_history( messages ) - prompt = history + self.assistant_label - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.neoxt.device ) - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.neoxt.max_new_tokens, - temperature=self.config.neoxt.temperature, - do_sample=self.config.neoxt.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - generated_text = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) - generation = generated_text.split( "" )[0].strip() - - bittensor.logging.debug( "Message: " + str( messages ).replace( "<","-" ).replace( ">","-" ) ) - bittensor.logging.debug( "Generation: " + str( generation ).replace( "<","-" ).replace( ">","-" ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - NeoxtMiner().run() diff --git a/neurons/text/prompting/miners/huggingface/neoxt/requirements.txt b/neurons/text/prompting/miners/huggingface/neoxt/requirements.txt deleted file mode 100644 index b33885a650..0000000000 --- a/neurons/text/prompting/miners/huggingface/neoxt/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -transformers>=4.27.4 -accelerate \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/oasst_pythia/README.md b/neurons/text/prompting/miners/huggingface/oasst_pythia/README.md deleted file mode 100644 index 6a011725d6..0000000000 --- a/neurons/text/prompting/miners/huggingface/oasst_pythia/README.md +++ /dev/null @@ -1,118 +0,0 @@ - -## OpenAssistant Pythia Miner -OpenAssistant's Pythia (12B) completion miner for bittensor's prompting network. - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/oasst_pythia/requirements.txt -python3 neurons/text/prompting/miners/huggingface/oasst_pythia/neuron.py --oasst_pythia.OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -``` - -# Full Usage -``` -usage: neuron.py [-h] [--pythia12B.model_name PYTHIA12B.MODEL_NAME] [--pythia12B.load_in_8bit PYTHIA12B.LOAD_IN_8BIT] [--pythia12B.max_new_tokens PYTHIA12B.MAX_NEW_TOKENS] - [--pythia12B.temperature PYTHIA12B.TEMPERATURE] [--pythia12B.greedy_sampling] [--pythia12B.repetition-penalty PYTHIA12B.REPETITION_PENALTY] - [--pythia12B.top_p PYTHIA12B.TOP_P] [--pythia12B.top_k PYTHIA12B.TOP_K] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] - [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] - [--neuron.blacklist.allow_non_registered] [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] - [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] - [--subtensor._mock] [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] - [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] - [--logging.debug] [--logging.trace] [--logging.record_log] [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] -optional arguments: - -h, --help show this help message and exit - --pythia12B.model_name PYTHIA12B.MODEL_NAME - Name/path of model to load - --pythia12B.load_in_8bit PYTHIA12B.LOAD_IN_8BIT - Load model in 8 bit precision - --pythia12B.max_new_tokens PYTHIA12B.MAX_NEW_TOKENS - Max tokens for model output. - --pythia12B.temperature PYTHIA12B.TEMPERATURE - Sampling temperature of model - --pythia12B.greedy_sampling - Whether to use greedy sampling or not (if not, uses multinomial sampling). - --pythia12B.repetition-penalty PYTHIA12B.REPETITION_PENALTY - Repetition penalty for greedy decoding. Between 1.0 and infinity. 1.0 means no penalty. Default: 1.0 - --pythia12B.top_p PYTHIA12B.TOP_P - Top-p (nucleus) sampling. Defaults to 1.0 (top-k sampling). Must be between 0.0 and 1.0. - --pythia12B.top_k PYTHIA12B.TOP_K - Top-k sampling. Defaults to 0 (no top-k sampling). Must be between 0 and 1000. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new worker threads to service requests up - to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock (creates a mock connection (for - testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/oasst_pythia/neuron.py b/neurons/text/prompting/miners/huggingface/oasst_pythia/neuron.py deleted file mode 100644 index 9e718eebbc..0000000000 --- a/neurons/text/prompting/miners/huggingface/oasst_pythia/neuron.py +++ /dev/null @@ -1,89 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Opentensor Foundation - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import argparse -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, GPTNeoXForCausalLM -from transformers import StoppingCriteria, StoppingCriteriaList - -class StopOnTokens( StoppingCriteria ): - def __init__( self, stop_token_ids: List[int] = None ): - self.stop_token_ids = stop_token_ids - - def __call__( self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs ) -> bool: - for stop_id in self.stop_token_ids: - if input_ids[0][-1] == stop_id: - return True - return False - -class OasstPythiaMiner( bittensor.HuggingFaceMiner ): - arg_prefix = 'oasst_pythia' - system_label = "<|system|>" - assistant_label = "<|assistant|>" - user_label = "<|prompter|>" - - def __init__( self ): - super( OasstPythiaMiner, self ).__init__() - self.stop = StopOnTokens( self.tokenizer.convert_tokens_to_ids( [ "<|endoftext|>" ] ) ) - - def load_tokenizer(self): - return AutoTokenizer.from_pretrained( self.config.oasst_pythia.model_name, torch_dtype=torch.bfloat16 ) - - def load_model( self ): - bittensor.logging.info( 'Loading ' + str( self.config.oasst_pythia.model_name ) ) - model = GPTNeoXForCausalLM.from_pretrained( - self.config.oasst_pythia.model_name, - device_map="auto", - low_cpu_mem_usage=True, - torch_dtype=torch.bfloat16 - ) - bittensor.logging.info( 'Model loaded!' ) - return model - - def forward( self, messages: List[Dict[str, str]] ): - history = self.process_history(messages) - prompt = history + self.assistant_label - - inputs = self.tokenizer( prompt, return_tensors="pt" ) - inputs = inputs.to( self.model.device ) - - gkw = { - **{ - "input_ids": inputs.input_ids, - "attention_mask": inputs.attention_mask, - "max_new_tokens": self.config.oasst_pythia.max_new_tokens, - "temperature": self.config.oasst_pythia.temperature, - "do_sample": self.config.oasst_pythia.do_sample, - "top_p": self.config.oasst_pythia.top_p, - "top_k": self.config.oasst_pythia.top_k, - "repetition_penalty": self.config.oasst_pythia.repetition_penalty, - "stopping_criteria": StoppingCriteriaList( [ self.stop ] ), - "pad_token_id": self.tokenizer.eos_token_id, - }, - } - output = self.model.generate( **gkw ) - generation = self.tokenizer.decode( output[0][inputs.input_ids.shape[1]:], skip_special_tokens=True ) - - bittensor.logging.debug( "Message: " + str(messages ) ) - bittensor.logging.debug( "Generation: " + str(generation ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - OasstPythiaMiner().run() diff --git a/neurons/text/prompting/miners/huggingface/open_llama/README.md b/neurons/text/prompting/miners/huggingface/open_llama/README.md deleted file mode 100644 index e81e38d306..0000000000 --- a/neurons/text/prompting/miners/huggingface/open_llama/README.md +++ /dev/null @@ -1,139 +0,0 @@ -## OpenLLaMA Miner -OpenLLaMA Language Model Serving with BitTensor -This code is for running the Open_llama model by OpenLM-Research through the BitTensor framework. - -# Overview - -## Contents - -- [Licence](#licence) -- [Model Download](#model-download) -- [Installing Dependencies](#installing-dependencies) -- [Starting Miner](#starting-miner) - -# Licence -Apache 2.0 (Open source and commercial purposes allowed) - -# Installing Dependencies - -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/open_llama/requirements.txt -``` - -# Starting Miner -To start the miner, all you need to do is to specify the path to the model, or the name on Huggingface hub, and it will be downloaded. - -You can find different model checkpoints by searching Huggingface, or by looking at OpenLM-Research's Huggingface page https://huggingface.co/openlm-research - -``` -python3 neurons/text/prompting/miners/huggingface/open_llama/neuron.py --open_llama.model_name OPEN_LLAMA.MODEL_NAME_OR_PATH -``` - -# Full Usage -``` -usage: neuron.py [-h] [--open_llama.model_name OPEN_LLAMA.MODEL_NAME] [--open_llama.device OPEN_LLAMA.DEVICE] [--open_llama.max_new_tokens OPEN_LLAMA.MAX_NEW_TOKENS] - [--open_llama.temperature OPEN_LLAMA.TEMPERATURE] [--open_llama.do_sample] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --neoxt.model_name NEOXT.MODEL_NAME - Name/path of model to load of model to load - --open_llama.device OPEN_LLAMA.DEVICE - Device to load model - --open_llama.max_new_tokens OPEN_LLAMA.MAX_NEW_TOKENS - Max tokens for model output. - --open_llama.temperature OPEN_LLAMA.TEMPERATURE - Sampling temperature of model - --open_llama.do_sample Whether to use sampling or not (if not, uses greedy decoding). - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/open_llama/neuron.py b/neurons/text/prompting/miners/huggingface/open_llama/neuron.py deleted file mode 100644 index e004009ffb..0000000000 --- a/neurons/text/prompting/miners/huggingface/open_llama/neuron.py +++ /dev/null @@ -1,60 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict -from transformers import LlamaForCausalLM, LlamaTokenizer - -class OpenLlamaMiner( bittensor.HuggingFaceMiner ): - arg_prefix = 'open_llama' - system_label = '\nSystem:' - assistant_label = '\nAssistant:' - user_label = '\nUser:' - - def load_tokenizer( self ): - return LlamaTokenizer.from_pretrained( self.config.open_llama.model_name, use_fast=False ) - - def load_model( self ): - return LlamaForCausalLM.from_pretrained( self.config.open_llama.model_name, torch_dtype = torch.float16, low_cpu_mem_usage=True ) - - def forward( self, messages: List[Dict[str, str]] ) -> str: - - history = self.process_history( messages ) - prompt = history + self.assistant_label - - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.open_llama.device ) - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.open_llama.max_new_tokens, - temperature=self.config.open_llama.temperature, - do_sample=self.config.open_llama.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - - generation = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) - generation = generation.split( "User:" )[0].strip() - - # Logging input and generation if debugging is active - bittensor.logging.debug( "Prompt: " + str( prompt) ) - bittensor.logging.debug( "Message: " + str( messages ) ) - bittensor.logging.debug( "Generation: " + str( generation ).replace( "<", "-" ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - OpenLlamaMiner().run() diff --git a/neurons/text/prompting/miners/huggingface/open_llama/requirements.txt b/neurons/text/prompting/miners/huggingface/open_llama/requirements.txt deleted file mode 100644 index 98637aee3d..0000000000 --- a/neurons/text/prompting/miners/huggingface/open_llama/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -git+https://github.com/huggingface/transformers -sentencepiece -fschat -tokenizers>=0.13.3 -accelerate diff --git a/neurons/text/prompting/miners/huggingface/pythia/README.md b/neurons/text/prompting/miners/huggingface/pythia/README.md deleted file mode 100644 index b5f5af5daa..0000000000 --- a/neurons/text/prompting/miners/huggingface/pythia/README.md +++ /dev/null @@ -1,136 +0,0 @@ -# Pythia Miner -togethercomputer/Pythia-7B Language Model Serving with BitTensor -This code is for running a language model powered by togethercomputer through the BitTensor framework. - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/pythia/requirements.txt -python3 neurons/text/prompting/miners/huggingface/pythia/neuron.py --pythia.model_name togethercomputer/Pythia-Chat-Base-7B -``` - -# Full Usage -``` -usage: pythia_miner.py [-h] --pythia.model_name PYTHIA.MODEL_NAME [--pythia.device PYTHIA.DEVICE] [--pythia.max_new_tokens PYTHIA.MAX_NEW_TOKENS] - [--pythia.temperature PYTHIA.TEMPERATURE] [--pythia.do_sample] [--pythia.repetition_penalty PYTHIA.REPETITION_PENALTY] - [--pythia.do_prompt_injection] [--pythia.system_prompt PYTHIA.SYSTEM_PROMPT] - [--pythia.repetition-penalty PYTHIA.REPETITION_PENALTY] [--pythia.top_p PYTHIA.TOP_P] [--pythia.top_k PYTHIA.TOP_K] - [--pythia.load_in_8bit PYTHIA.LOAD_IN_8BIT] [--pythia.pad_tokens PYTHIA.PAD_TOKENS [PYTHIA.PAD_TOKENS ...]] - [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] - [--neuron.blacklist.allow_non_registered] [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] - [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] - [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] - [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] - [--axon.port AXON.PORT] [--axon.ip AXON.IP] [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] - [--axon.max_workers AXON.MAX_WORKERS] [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] - [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --pythia.model_name PYTHIA.MODEL_NAME - Name or path of model to load - --pythia.device PYTHIA.DEVICE - Device to load model - --pythia.max_new_tokens PYTHIA.MAX_NEW_TOKENS - Max tokens for model output. - --pythia.temperature PYTHIA.TEMPERATURE - Sampling temperature of model - --pythia.do_sample Whether to use multinomial sampling. - --pythia.repetition_penalty PYTHIA.REPETITION_PENALTY - Repetition penalty for model - --pythia.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --pythia.system_prompt PYTHIA.SYSTEM_PROMPT - What prompt to replace the system prompt with - --pythia.repetition-penalty PYTHIA.REPETITION_PENALTY - Repetition penalty for greedy decoding. Between 1.0 and infinity. 1.0 means no penalty. Default: 1.0 - --pythia.top_p PYTHIA.TOP_P - Top-p (nucleus) sampling. Defaults to 1.0 (top-k sampling). Must be between 0.0 and 1.0. - --pythia.top_k PYTHIA.TOP_K - Top-k sampling. Defaults to 0 (no top-k sampling). Must be between 0 and 1000. - --pythia.load_in_8bit PYTHIA.LOAD_IN_8BIT - Load model in 8 bit precision - --pythia.pad_tokens PYTHIA.PAD_TOKENS [PYTHIA.PAD_TOKENS ...] - A list of integers separated by spaces for the pad_tokens. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new - worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock - (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point - node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. - ``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/pythia/neuron.py b/neurons/text/prompting/miners/huggingface/pythia/neuron.py deleted file mode 100644 index 65d5620ff8..0000000000 --- a/neurons/text/prompting/miners/huggingface/pythia/neuron.py +++ /dev/null @@ -1,56 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM - -class PythiaMiner( bittensor.HuggingFaceMiner ): - - arg_prefix: str = 'pythia' - assistant_label: str = ':' - user_label: str = ':' - system_label: str = '' - - def load_tokenizer( self ): - return AutoTokenizer.from_pretrained( self.config.pythia.model_name ) - - def load_model( self ): - return AutoModelForCausalLM.from_pretrained( self.config.pythia.model_name, torch_dtype = torch.float16, low_cpu_mem_usage=True ) - - def forward(self, messages: List[Dict[str, str]]) -> str: - history = self.process_history( messages ) - prompt = history + self.assistant_label - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.pythia.device ) - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.pythia.max_new_tokens, - temperature=self.config.pythia.temperature, - do_sample=self.config.pythia.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - generated_text = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) - generation = generated_text.split( "" )[0].strip() - - bittensor.logging.debug("Message: " + str( messages ).replace( "<","-" ).replace( ">","-" ) ) - bittensor.logging.debug("Generation: " + str( generation ).replace( "<","-" ).replace( ">","-" ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - PythiaMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/pythia/requirements.txt b/neurons/text/prompting/miners/huggingface/pythia/requirements.txt deleted file mode 100644 index b33885a650..0000000000 --- a/neurons/text/prompting/miners/huggingface/pythia/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -transformers>=4.27.4 -accelerate \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/raven/README.md b/neurons/text/prompting/miners/huggingface/raven/README.md deleted file mode 100644 index d931a450cb..0000000000 --- a/neurons/text/prompting/miners/huggingface/raven/README.md +++ /dev/null @@ -1,143 +0,0 @@ -# Raven RWKV Miner -BlinkDL/Raven-RWKV-7B Language Model Serving with BitTensor -This code is for running a language model powered by BlinkDL through the BitTensor framework. - -## Setup -Go to the huggingface repo for more information: [rwkv-4-raven](https://huggingface.co/BlinkDL/rwkv-4-raven) - -NOTE: You need to pass the path to the tokenizer.json from the command line. -- Find it [here](https://huggingface.co/spaces/BlinkDL/Raven-RWKV-7B/resolve/main/20B_tokenizer.json) - -NOTE: You will want to browse and see what Raven model you wish to load [here](https://huggingface.co/BlinkDL/rwkv-4-raven/tree/main) -e.g. `RWKV-4-Raven-7B-v11-Eng99%25-Other1%25-20230427-ctx8192` for Engligh 99% and Other languages 1%= -e.g. `RWKV-4-Raven-7B-v11-Eng49%-Chn49%-Jpn1%-Other1%-20230430-ctx8192` for 49% English, 49% Chinese, 1% Japanese - -These percentages refer to the amount of training data from that particular language. - -# Usage -``` -wget https://huggingface.co/spaces/BlinkDL/Raven-RWKV-7B/resolve/main/20B_tokenizer.json -python3 -m pip install -r neurons/text/prompting/miners/huggingface/raven-rwkv/requirements.txt -python3 neurons/text/prompting/miners/huggingface/raven-rwkv/neuron.py --raven.tokenizer_path /home/jason/bittensor/20B_tokenizer.json \ - --raven.model_name RWKV-4-Raven-7B-v11x-Eng99%-Other1%-20230429-ctx8192 \ - --raven.repetition-penalty 0.2 --raven.top_p 0.0 --raven.temperature 1.0 -``` - -# Full Usage -``` -usage: neuron.py [-h] [--raven.model_name RAVEN.MODEL_NAME] [--raven.repo_id RAVEN.REPO_ID] [--raven.tokenizer_path RAVEN.TOKENIZER_PATH] [--raven.device RAVEN.DEVICE] [--raven.ctx_limit RAVEN.CTX_LIMIT] [--raven.max_new_tokens RAVEN.MAX_NEW_TOKENS] - [--raven.temperature RAVEN.TEMPERATURE] [--raven.top_p RAVEN.TOP_P] [--raven.do_prompt_injection] [--raven.system_prompt RAVEN.SYSTEM_PROMPT] [--raven.jit_on] [--raven.cuda_on] [--raven.strategy RAVEN.STRATEGY] - [--raven.pad_tokens RAVEN.PAD_TOKENS [RAVEN.PAD_TOKENS ...]] [--raven.repetition_penalty RAVEN.REPETITION_PENALTY] [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.blacklist.vpermit_required] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] [--axon.external_port AXON.EXTERNAL_PORT] - [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] - [--subtensor.register.cuda.no_cuda] [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --raven.model_name RAVEN.MODEL_NAME - Name/path of model to load - --raven.repo_id RAVEN.REPO_ID - Repo id of model to load - --raven.tokenizer_path RAVEN.TOKENIZER_PATH - Path to tokenizer json file - --raven.device RAVEN.DEVICE - Device to load model - --raven.ctx_limit RAVEN.CTX_LIMIT - Max context length for model input. - --raven.max_new_tokens RAVEN.MAX_NEW_TOKENS - Max tokens for model output. - --raven.temperature RAVEN.TEMPERATURE - Sampling temperature of model - --raven.top_p RAVEN.TOP_P - Top p sampling of model - --raven.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --raven.system_prompt RAVEN.SYSTEM_PROMPT - What prompt to replace the system prompt with - --raven.jit_on Whether to use Just-In-Time complication (JIT) - --raven.cuda_on Whether to use CUDA kernel for seq mode (much faster). [Requires CUDA_HOME env_variable to be set] - --raven.strategy RAVEN.STRATEGY - Strategy to use for RWKV model - --raven.pad_tokens RAVEN.PAD_TOKENS [RAVEN.PAD_TOKENS ...] - A list of integers separated by spaces for the pad_tokens. - --raven.repetition_penalty RAVEN.REPETITION_PENALTY - Repetition penalty for RWKV model - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, this miner will allow non-registered hotkeys to query it. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.blacklist.vpermit_required - Require vpermit to query this miner. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point node from that - network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. - ``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/raven/neuron.py b/neurons/text/prompting/miners/huggingface/raven/neuron.py deleted file mode 100644 index a050ab8fa9..0000000000 --- a/neurons/text/prompting/miners/huggingface/raven/neuron.py +++ /dev/null @@ -1,102 +0,0 @@ -import os -import argparse -import bittensor -from typing import List, Dict -from huggingface_hub import hf_hub_download -from rwkv.model import RWKV -from rwkv.utils import PIPELINE - -class RavenMiner( bittensor.HuggingFaceMiner ): - - arg_prefix = 'raven' - system_label = "" - assistant_label = "Alice:" - user_label = "Bob:" - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - super( RavenMiner, cls ).add_args( parser ) - parser.add_argument( '--raven.repo_id', type=str, default="BlinkDL/rwkv-4-raven", help='Repo id of model to load' ) - parser.add_argument( '--raven.tokenizer_path', type=str, required=True, help='Path to tokenizer json file' ) - parser.add_argument( '--raven.ctx_limit', type=int, help='Max context length for model input.', default=1536 ) - parser.add_argument( '--raven.jit_on', action='store_true', default=False, help='Whether to use Just-In-Time complication (JIT)' ) - parser.add_argument( '--raven.cuda_on', action='store_true', default=False, help='Whether to use CUDA kernel for seq mode (much faster). [Requires CUDA_HOME env_variable to be set]' ) - parser.add_argument( '--raven.strategy', type=str, default='cuda fp16i8 *8 -> cuda fp16', help='Strategy to use for RWKV model') - - def __init__(self): - super( RavenMiner, self ).__init__() - - def load_model( self ): - model_path = hf_hub_download( repo_id=self.config.raven.repo_id, filename=f"{self.config.raven.model_name}.pth" ) - model = RWKV( model=model_path, strategy=self.config.raven.strategy ) - return PIPELINE( model, self.config.raven.tokenizer_path ) - - def load_tokenizer( self ): - pass - - os.environ["RWKV_JIT_ON"] = '1' if self.config.raven.jit_on else '0' - os.environ["RWKV_CUDA_ON"] = '1' if self.config.raven.cuda_on else '0' - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self.process_history( messages ) - - out_tokens = [] - out_last = 0 - generation = '' - occurrence = {} - state = None - for i in range( self.config.raven.max_new_tokens ): - tokens = self.config.raven.pad_tokens + self.model.encode( history ) if i == 0 else [token] - - out, state = self.model.model.forward(tokens, state) - for n in occurrence: - out[n] -= ( self.config.raven.repetition_penalty + occurrence[n] * self.config.raven.repetition_penalty ) - - token = self.model.sample_logits( out, temperature=self.config.raven.temperature, top_p=self.config.raven.top_p ) - if token == 0: break # exit when 'endoftext' - - out_tokens += [token] - occurrence[token] = 1 + ( occurrence[token] if token in occurrence else 0 ) - - tmp = self.model.decode( out_tokens[out_last:] ) - if ( '\ufffd' not in tmp ) and ( not tmp.endswith('\n') ): - generation += tmp - out_last = i + 1 - - if '\n\n' in tmp: # exit when '\n\n' - generation += tmp - generation = generation.strip() - break - - bittensor.logging.debug( "Message: " + str( messages ) ) - bittensor.logging.debug( "Generation: " + str( generation ) ) - return generation - - -if __name__ == "__main__": - bittensor.utils.version_checking() - RavenMiner().run() - -def test_miner( model ): - prompt = """ - You are George Carlin. - George Carlin is a comedian known for his witty, cutting, poignant observational comedy. - He is also known for his social commentary, philosophy, and cutting remarks on religion. - Write a joke about the following topic: - """ - - message = "who am I, really?" - - if prompt is not None: - roles = ['system', 'user'] - messages = [ prompt, message ] - else: - roles = ['user'] - messages = [ message ] - - messages = [{'role': role, 'content': message} for role, message in zip(roles, messages)] - - return model.forward( messages ) - -miner = RavenMiner() -print( test_miner(miner) ) \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/raven/requirements.txt b/neurons/text/prompting/miners/huggingface/raven/requirements.txt deleted file mode 100644 index 2573350bcc..0000000000 --- a/neurons/text/prompting/miners/huggingface/raven/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -rwkv -huggingface_hub \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/robertmyers/README.md b/neurons/text/prompting/miners/huggingface/robertmyers/README.md deleted file mode 100644 index 965e435fc4..0000000000 --- a/neurons/text/prompting/miners/huggingface/robertmyers/README.md +++ /dev/null @@ -1,140 +0,0 @@ - -## RoberMyers Miner -Robert myers completion miner for bittensor's prompting network. - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/robertmyers/requirements.txt -python3 neurons/text/prompting/miners/huggingface/robertmyers/neuron.py --robertmyers.model_name robertmyers/bpt-sft -``` - -# Full Usage -``` -usage: robertmyers_miner.py [-h] --robertmyers.model_name ROBERTMYERS.MODEL_NAME [--robertmyers.device ROBERTMYERS.DEVICE] - [--robertmyers.max_new_tokens ROBERTMYERS.MAX_NEW_TOKENS] [--robertmyers.temperature ROBERTMYERS.TEMPERATURE] - [--robertmyers.do_sample] [--robertmyers.repetition_penalty ROBERTMYERS.REPETITION_PENALTY] - [--robertmyers.do_prompt_injection] [--robertmyers.system_prompt ROBERTMYERS.SYSTEM_PROMPT] - [--robertmyers.repetition-penalty ROBERTMYERS.REPETITION_PENALTY] [--robertmyers.top_p ROBERTMYERS.TOP_P] - [--robertmyers.top_k ROBERTMYERS.TOP_K] [--robertmyers.load_in_8bit ROBERTMYERS.LOAD_IN_8BIT] - [--robertmyers.pad_tokens ROBERTMYERS.PAD_TOKENS [ROBERTMYERS.PAD_TOKENS ...]] [--netuid NETUID] - [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] - [--neuron.blacklist.allow_non_registered] [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] - [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] - [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] - [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] - [--axon.port AXON.PORT] [--axon.ip AXON.IP] [--axon.external_port AXON.EXTERNAL_PORT] - [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --robertmyers.model_name ROBERTMYERS.MODEL_NAME - Name or path of model to load - --robertmyers.device ROBERTMYERS.DEVICE - Device to load model - --robertmyers.max_new_tokens ROBERTMYERS.MAX_NEW_TOKENS - Max tokens for model output. - --robertmyers.temperature ROBERTMYERS.TEMPERATURE - Sampling temperature of model - --robertmyers.do_sample - Whether to use multinomial sampling. - --robertmyers.repetition_penalty ROBERTMYERS.REPETITION_PENALTY - Repetition penalty for model - --robertmyers.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --robertmyers.system_prompt ROBERTMYERS.SYSTEM_PROMPT - What prompt to replace the system prompt with - --robertmyers.repetition-penalty ROBERTMYERS.REPETITION_PENALTY - Repetition penalty for greedy decoding. Between 1.0 and infinity. 1.0 means no penalty. Default: 1.0 - --robertmyers.top_p ROBERTMYERS.TOP_P - Top-p (nucleus) sampling. Defaults to 1.0 (top-k sampling). Must be between 0.0 and 1.0. - --robertmyers.top_k ROBERTMYERS.TOP_K - Top-k sampling. Defaults to 0 (no top-k sampling). Must be between 0 and 1000. - --robertmyers.load_in_8bit ROBERTMYERS.LOAD_IN_8BIT - Load model in 8 bit precision - --robertmyers.pad_tokens ROBERTMYERS.PAD_TOKENS [ROBERTMYERS.PAD_TOKENS ...] - A list of integers separated by spaces for the pad_tokens. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new - worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock - (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point - node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/robertmyers/neuron.py b/neurons/text/prompting/miners/huggingface/robertmyers/neuron.py deleted file mode 100644 index 2cbd84c6d4..0000000000 --- a/neurons/text/prompting/miners/huggingface/robertmyers/neuron.py +++ /dev/null @@ -1,51 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# General. -import torch -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline - -class RobertMyersMiner( bittensor.HuggingFaceMiner ): - - arg_prefix: str = 'robertmyers' - system_label: str = 'system:' - assistant_label: str = 'assistant:' - user_label: str = 'user:' - - def load_tokenizer( self ): - return AutoTokenizer.from_pretrained( self.config.robertmyers.model_name ) - - def load_model( self ): - model = AutoModelForCausalLM.from_pretrained( self.config.robertmyers.model_name, torch_dtype=torch.float16 ) - model.to( self.config.robertmyers.device ) - return pipeline( - "text-generation", model, tokenizer=self.tokenizer, - device = 0, max_new_tokens = self.config.robertmyers.max_new_tokens, - temperature = self.config.robertmyers.temperature, - do_sample = self.config.robertmyers.do_sample, pad_token_id = self.tokenizer.eos_token_id - ) - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self.process_history( messages ) - resp = self.model( history )[0]['generated_text'].split(':')[-1].replace( str( history ), "") - return resp - -if __name__ == "__main__": - bittensor.utils.version_checking() - RobertMyersMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/robertmyers/requirements.txt b/neurons/text/prompting/miners/huggingface/robertmyers/requirements.txt deleted file mode 100644 index bab195ea3c..0000000000 --- a/neurons/text/prompting/miners/huggingface/robertmyers/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -xformers \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/stabilityai/README.md b/neurons/text/prompting/miners/huggingface/stabilityai/README.md deleted file mode 100644 index f77f615690..0000000000 --- a/neurons/text/prompting/miners/huggingface/stabilityai/README.md +++ /dev/null @@ -1,158 +0,0 @@ -## StabilityAI Miner -StabilityAI 7B completion miner for bittensor's prompting network. - -# Example Usage -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/stabilityai/requirements.txt -python3 neurons/text/prompting/miners/huggingface/stabilityai/neuron.py --stabilityai.model_size 7 # for 7B model - -# Some suggested settings -python3 neurons/text/prompting/miners/huggingface/stabilityai/neuron.py --stabilityai.temperature 1.0 --stabilityai.top_k 10 --stabilityai.top_p 0.95 --stabilityai.do_sample -``` - -# Full Usage -``` -usage: stabilityai_miner.py [-h] [--stabilityai.model_name STABILITYAI.MODEL_NAME] [--stabilityai.api_key STABILITYAI.API_KEY] - [--stabilityai.device STABILITYAI.DEVICE] [--stabilityai.max_new_tokens STABILITYAI.MAX_NEW_TOKENS] - [--stabilityai.temperature STABILITYAI.TEMPERATURE] [--stabilityai.do_sample] - [--stabilityai.repetition_penalty STABILITYAI.REPETITION_PENALTY] [--stabilityai.do_prompt_injection] - [--stabilityai.system_prompt STABILITYAI.SYSTEM_PROMPT] - [--stabilityai.repetition-penalty STABILITYAI.REPETITION_PENALTY] [--stabilityai.top_p STABILITYAI.TOP_P] - [--stabilityai.top_k STABILITYAI.TOP_K] [--stabilityai.load_in_8bit STABILITYAI.LOAD_IN_8BIT] - [--stabilityai.pad_tokens STABILITYAI.PAD_TOKENS [STABILITYAI.PAD_TOKENS ...]] [--stabilityai.model_size {3,7}] - [--stabilityai.suffix STABILITYAI.SUFFIX] [--stabilityai.num_return_sequences STABILITYAI.NUM_RETURN_SEQUENCES] - [--stabilityai.num_beams STABILITYAI.NUM_BEAMS] [--stabilityai.stopping_criteria STABILITYAI.STOPPING_CRITERIA] - [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] - [--neuron.no_set_weights] [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] - [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] - [--neuron.blacklist.allow_non_registered] [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] - [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] - [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] - [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] - [--axon.port AXON.PORT] [--axon.ip AXON.IP] [--axon.external_port AXON.EXTERNAL_PORT] - [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --stabilityai.model_name STABILITYAI.MODEL_NAME - Name or path of model to load - --stabilityai.api_key STABILITYAI.API_KEY - huggingface api key - --stabilityai.device STABILITYAI.DEVICE - Device to load model - --stabilityai.max_new_tokens STABILITYAI.MAX_NEW_TOKENS - Max tokens for model output. - --stabilityai.temperature STABILITYAI.TEMPERATURE - Sampling temperature of model - --stabilityai.do_sample - Whether to use multinomial sampling. - --stabilityai.repetition_penalty STABILITYAI.REPETITION_PENALTY - Repetition penalty for model - --stabilityai.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --stabilityai.system_prompt STABILITYAI.SYSTEM_PROMPT - What prompt to replace the system prompt with - --stabilityai.repetition-penalty STABILITYAI.REPETITION_PENALTY - Repetition penalty for greedy decoding. Between 1.0 and infinity. 1.0 means no penalty. Default: 1.0 - --stabilityai.top_p STABILITYAI.TOP_P - Top-p (nucleus) sampling. Defaults to 1.0 (top-k sampling). Must be between 0.0 and 1.0. - --stabilityai.top_k STABILITYAI.TOP_K - Top-k sampling. Defaults to 0 (no top-k sampling). Must be between 0 and 1000. - --stabilityai.load_in_8bit STABILITYAI.LOAD_IN_8BIT - Load model in 8 bit precision - --stabilityai.pad_tokens STABILITYAI.PAD_TOKENS [STABILITYAI.PAD_TOKENS ...] - A list of integers separated by spaces for the pad_tokens. - --stabilityai.model_size {3,7} - Run the 3B or 7B model. - --stabilityai.suffix STABILITYAI.SUFFIX - The suffix that comes after a completion of inserted text. - --stabilityai.num_return_sequences STABILITYAI.NUM_RETURN_SEQUENCES - Description of num_return_sequences - --stabilityai.num_beams STABILITYAI.NUM_BEAMS - Description of num_beams - --stabilityai.stopping_criteria STABILITYAI.STOPPING_CRITERIA - Description of stopping_criteria - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new - worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock - (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point - node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/stabilityai/neuron.py b/neurons/text/prompting/miners/huggingface/stabilityai/neuron.py deleted file mode 100644 index 5a7ec17ca1..0000000000 --- a/neurons/text/prompting/miners/huggingface/stabilityai/neuron.py +++ /dev/null @@ -1,83 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - - -# General. -import torch -import argparse -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, StoppingCriteria, StoppingCriteriaList - -class StopOnTokens( StoppingCriteria ): - def __call__( self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs ) -> bool: - stop_ids = [50278, 50279, 50277, 1, 0] - for stop_id in stop_ids: - if input_ids[0][-1] == stop_id: - return True - return False - -class StabilityAIMiner( bittensor.HuggingFaceMiner ): - arg_prefix: str = 'stabilityai' - system_label: str = '<|SYSTEM|>:' - assistant_label: str = '<|ASSISTANT|>:' - user_label: str = '<|USER|>:' - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - super( StabilityAIMiner, cls ).add_args( parser ) - parser.add_argument( '--stabilityai.model_size', type=int, choices=[3, 7], default=7, help='Run the 3B or 7B model.' ) - parser.add_argument( '--stabilityai.suffix', type=str, default=None, help="The suffix that comes after a completion of inserted text." ) - parser.add_argument( '--stabilityai.num_return_sequences', type=int, default=1, help='Description of num_return_sequences' ) - parser.add_argument( '--stabilityai.num_beams', type=int, default=1, help='Description of num_beams' ) - parser.add_argument( '--stabilityai.stopping_criteria', type=str, default='stop', help='Description of stopping_criteria' ) - - def load_tokenizer( self ): - return AutoTokenizer.from_pretrained( - "stabilityai/stablelm-tuned-alpha-{}b".format( self.config.stabilityai.model_size ), - use_auth_token=self.config.stabilityai.api_key - ) - - def load_model( self ): - model = AutoModelForCausalLM.from_pretrained( - "stabilityai/stablelm-tuned-alpha-{}b".format( self.config.stabilityai.model_size ), - use_auth_token=self.config.stabilityai.api_key, - torch_dtype=torch.float16 - ).cuda() - - return pipeline( - "text-generation", - model, - tokenizer = self.tokenizer, - device = 0, - max_new_tokens = self.config.stabilityai.max_new_tokens, - num_return_sequences = self.config.stabilityai.num_return_sequences, - num_beams = self.config.stabilityai.num_beams, - do_sample = self.config.stabilityai.do_sample, - temperature = self.config.stabilityai.temperature, - top_p = self.config.stabilityai.top_p, - top_k = self.config.stabilityai.top_k, - stopping_criteria=StoppingCriteriaList( [StopOnTokens()] ) - ) - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self.process_history( messages ) - return self.model( history )[0]['generated_text'].split(':')[-1].replace( str( history ), "") - -if __name__ == "__main__": - bittensor.utils.version_checking() - StabilityAIMiner().run() diff --git a/neurons/text/prompting/miners/huggingface/stabilityai/stabilityai_requirements.txt b/neurons/text/prompting/miners/huggingface/stabilityai/stabilityai_requirements.txt deleted file mode 100644 index 747b7aa97a..0000000000 --- a/neurons/text/prompting/miners/huggingface/stabilityai/stabilityai_requirements.txt +++ /dev/null @@ -1 +0,0 @@ -transformers \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/vicuna/README.md b/neurons/text/prompting/miners/huggingface/vicuna/README.md deleted file mode 100644 index e7d2f55f8a..0000000000 --- a/neurons/text/prompting/miners/huggingface/vicuna/README.md +++ /dev/null @@ -1,183 +0,0 @@ -## Vicuna Miner -Vicuna Language Model Serving with BitTensor -This code is for running the Vicuna model through the BitTensor framework. - -# Overview - -## Contents - -- [Installing Dependencies](#installing-dependencies) -- [Converting Weights Into Model](#converting-weights-into-model) -- [Starting Miner](#starting-miner) - - -# Installing Dependencies - -``` -python3 -m pip install -r neurons/text/prompting/miners/huggingface/vicuna/requirements.txt -``` - -# Converting Weights Into Model -If you already have a converted checkpoint of the model, you can skip this step. - -## Vicuna Weights -The [Vicuna](https://vicuna.lmsys.org/) weights as delta weights to comply with the LLaMA model license. -You can add our delta to the original LLaMA weights to obtain the Vicuna weights. Instructions: - -1. Get the original LLaMA weights in the huggingface format by following the instructions [here](https://huggingface.co/docs/transformers/main/model_doc/llama). -2. Use the following scripts to get Vicuna weights by applying our delta. They will automatically download delta weights from our Hugging Face [account](https://huggingface.co/lmsys). - -**NOTE**: -Weights v1.1 are only compatible with the latest main branch of huggingface/transformers and ``fschat >= 0.2.0``. -Please update your local packages accordingly. If you follow the above commands to do a fresh install, then you should get all the correct versions. - -Depending on which conversion script was used to create the Huggingface checkpoint of Llama, you might get an error that the tokenizer can not be found when loading the tokenizer. You can then replace all AutoTokenizers command with the correct tokenizer (in the example "LlamaTokenizer"), using this command: -``` -find /path/to/fastchat -type f -name '*.py' -exec sed -i 's/AutoTokenizer/LlamaTokenizer/g' {} + -``` - -### Vicuna-7B -This conversion command needs around 30 GB of CPU RAM. -If you do not have enough memory, you can create a large swap file that allows the operating system to automatically utilize the disk as virtual memory. -```bash -python3 -m fastchat.model.apply_delta \ - --base /path/to/llama-7b \ - --target /output/path/to/vicuna-7b \ - --delta lmsys/vicuna-7b-delta-v1.1 -``` - -### Vicuna-13B -This conversion command needs around 60 GB of CPU RAM. -If you do not have enough memory, you can create a large swap file that allows the operating system to automatically utilize the disk as virtual memory. -```bash -python3 -m fastchat.model.apply_delta \ - --base /path/to/llama-13b \ - --target /output/path/to/vicuna-13b \ - --delta lmsys/vicuna-13b-delta-v1.1 -``` - - -# Starting Miner -``` -# If using HuggingFace model directly, only need to supply the repo ID. -python3 neurons/text/prompting/miners/huggingface/vicuna/neuron.py --vicuna.model_name - -# If merging the weights yourself supply the path. -python3 neurons/text/prompting/miners/huggingface/vicuna/neuron.py --vicuna.model_name /path/to/merged/vicuna/weights - -``` - -# Full Usage -``` -usage: vicuna_miner.py [-h] --vicuna.model_name VICUNA.MODEL_NAME [--vicuna.device VICUNA.DEVICE] [--vicuna.max_new_tokens VICUNA.MAX_NEW_TOKENS] [--vicuna.temperature VICUNA.TEMPERATURE] [--vicuna.do_sample] - [--vicuna.repetition_penalty VICUNA.REPETITION_PENALTY] [--vicuna.do_prompt_injection] [--vicuna.system_prompt VICUNA.SYSTEM_PROMPT] [--vicuna.repetition-penalty VICUNA.REPETITION_PENALTY] [--vicuna.top_p VICUNA.TOP_P] - [--vicuna.top_k VICUNA.TOP_K] [--vicuna.load_in_8bit VICUNA.LOAD_IN_8BIT] [--vicuna.pad_tokens VICUNA.PAD_TOKENS [VICUNA.PAD_TOKENS ...]] [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] - [--neuron.no_set_weights] [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] - [--neuron.blacklist.allow_non_registered] [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] - [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] - [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --vicuna.model_name VICUNA.MODEL_NAME - Name or path of model to load - --vicuna.device VICUNA.DEVICE - Device to load model - --vicuna.max_new_tokens VICUNA.MAX_NEW_TOKENS - Max tokens for model output. - --vicuna.temperature VICUNA.TEMPERATURE - Sampling temperature of model - --vicuna.do_sample Whether to use multinomial sampling. - --vicuna.repetition_penalty VICUNA.REPETITION_PENALTY - Repetition penalty for model - --vicuna.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --vicuna.system_prompt VICUNA.SYSTEM_PROMPT - What prompt to replace the system prompt with - --vicuna.repetition-penalty VICUNA.REPETITION_PENALTY - Repetition penalty for greedy decoding. Between 1.0 and infinity. 1.0 means no penalty. Default: 1.0 - --vicuna.top_p VICUNA.TOP_P - Top-p (nucleus) sampling. Defaults to 1.0 (top-k sampling). Must be between 0.0 and 1.0. - --vicuna.top_k VICUNA.TOP_K - Top-k sampling. Defaults to 0 (no top-k sampling). Must be between 0 and 1000. - --vicuna.load_in_8bit VICUNA.LOAD_IN_8BIT - Load model in 8 bit precision - --vicuna.pad_tokens VICUNA.PAD_TOKENS [VICUNA.PAD_TOKENS ...] - A list of integers separated by spaces for the pad_tokens. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an entry - point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/vicuna/neuron.py b/neurons/text/prompting/miners/huggingface/vicuna/neuron.py deleted file mode 100644 index 61e466d11c..0000000000 --- a/neurons/text/prompting/miners/huggingface/vicuna/neuron.py +++ /dev/null @@ -1,64 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM - -class VicunaMiner( bittensor.HuggingFaceMiner ): - - arg_prefix: str = 'vicuna' - system_label: str = '' - assistant_label: str = 'ASSISTANT:' - user_label: str = 'USER:' - - def __init__( self ): - super( VicunaMiner, self ).__init__() - print ( self.config ) - - def load_tokenizer( self ): - return AutoTokenizer.from_pretrained( self.config.vicuna.model_name, use_fast=False ) - - def load_model( self ): - return AutoModelForCausalLM.from_pretrained( self.config.vicuna.model_name, torch_dtype = torch.float16, low_cpu_mem_usage=True ) - - def forward(self, messages: List[Dict[str, str]]) -> str: - - history = self.process_history( messages ) - prompt = history + self.assistant_label - print(prompt) - - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.vicuna.device ) - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.vicuna.max_new_tokens, - temperature=self.config.vicuna.temperature, - do_sample=self.config.vicuna.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - - generation = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) - print(generation) - - bittensor.logging.debug( "Message: " + str( messages ) ) - bittensor.logging.debug( "Generation: " + str( generation ) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - VicunaMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/vicuna/requirements.txt b/neurons/text/prompting/miners/huggingface/vicuna/requirements.txt deleted file mode 100644 index 5e83b53a0d..0000000000 --- a/neurons/text/prompting/miners/huggingface/vicuna/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -transformers>=4.28.0 -fschat -tokenizers>=0.13.3 -accelerate \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/wizard_vicuna/README.md b/neurons/text/prompting/miners/huggingface/wizard_vicuna/README.md deleted file mode 100644 index 699dcd2b26..0000000000 --- a/neurons/text/prompting/miners/huggingface/wizard_vicuna/README.md +++ /dev/null @@ -1,117 +0,0 @@ - -## WizardLM + Vicuna Miner -WizardLM Vicuna completion miner for bittensor's prompting network. - -# Example Usage -``` -python3 neurons/text/prompting/miners/huggingface/wizard_vicuna/neuron.py --wiz_vic.model_name -``` - -# Full Usage -``` -usage: neuron.py [-h] [--wiz_vic.model_name WIZ_VIC.MODEL_NAME] [--wiz_vic.device WIZ_VIC.DEVICE] [--wiz_vic.max_new_tokens WIZ_VIC.MAX_NEW_TOKENS] - [--wiz_vic.temperature WIZ_VIC.TEMPERATURE] [--wiz_vic.greedy_decoding] [--wiz_vic.do_sample] [--wiz_vic.do_prompt_injection] - [--wiz_vic.system_prompt WIZ_VIC.SYSTEM_PROMPT] [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] - [--neuron.no_set_weights] [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] [--wallet.name WALLET.NAME] - [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] - [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] - [--subtensor._mock] [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] - [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] - [--logging.debug] [--logging.trace] [--logging.record_log] [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --wiz_vic.model_name WIZ_VIC.MODEL_NAME - Name/path of model to load - --wiz_vic.device WIZ_VIC.DEVICE - Device to load model - --wiz_vic.max_new_tokens WIZ_VIC.MAX_NEW_TOKENS - Max tokens for model output. - --wiz_vic.temperature WIZ_VIC.TEMPERATURE - Sampling temperature of model - --wiz_vic.greedy_decoding - Whether to use greedy sampling or not (if not, uses multinomial sampling). - --wiz_vic.do_sample Whether to use sampling or not (if not, uses greedy decoding). - --wiz_vic.do_prompt_injection - Whether to use a custom "system" prompt instead of the one sent by bittensor. - --wiz_vic.system_prompt WIZ_VIC.SYSTEM_PROMPT - What prompt to replace the system prompt with - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS [NEURON.BLACKLIST.HOTKEYS ...]] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new worker threads to service requests up - to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock (creates a mock connection (for - testing)) If this option is set it overloads subtensor.chain_endpoint with an entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/miners/huggingface/wizard_vicuna/neuron.py b/neurons/text/prompting/miners/huggingface/wizard_vicuna/neuron.py deleted file mode 100644 index 2881263172..0000000000 --- a/neurons/text/prompting/miners/huggingface/wizard_vicuna/neuron.py +++ /dev/null @@ -1,54 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Opentensor Foundation - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import bittensor -from typing import List, Dict -from transformers import AutoTokenizer, AutoModelForCausalLM - -class WizardVicunaMiner( bittensor.HuggingFaceMiner ): - arg_prefix = "wiz_vic" - system_label = "" - assistant_label = "ASSISTANT:" - user_label = "USER:" - - def load_tokenizer( self ): - return AutoTokenizer.from_pretrained( self.config.wiz_vic.model_name, use_fast=False ) - - def load_model( self ): - return AutoModelForCausalLM.from_pretrained( self.config.wiz_vic.model_name, torch_dtype=torch.float16, low_cpu_mem_usage=True ) - - def forward( self, messages: List[Dict[str, str]] ) -> str: - history = self.process_history( messages ) - prompt = history + self.assistant_label - input_ids = self.tokenizer.encode( prompt, return_tensors="pt" ).to( self.config.wiz_vic.device ) - output = self.model.generate( - input_ids, - max_length=input_ids.shape[1] + self.config.wiz_vic.max_new_tokens, - temperature=self.config.wiz_vic.temperature, - do_sample=self.config.wiz_vic.do_sample, - pad_token_id=self.tokenizer.eos_token_id, - ) - generation = self.tokenizer.decode( output[0][input_ids.shape[1]:], skip_special_tokens=True ) - - bittensor.logging.debug( "Message: " + str( messages ) ) - bittensor.logging.debug( "Generation: " + str( generation) ) - return generation - -if __name__ == "__main__": - bittensor.utils.version_checking() - WizardVicunaMiner().run() diff --git a/neurons/text/prompting/miners/huggingface/wizard_vicuna/requirements.txt b/neurons/text/prompting/miners/huggingface/wizard_vicuna/requirements.txt deleted file mode 100644 index 33059ec77c..0000000000 --- a/neurons/text/prompting/miners/huggingface/wizard_vicuna/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -transformers>=4.29.2 -accelerate \ No newline at end of file diff --git a/neurons/text/prompting/miners/openai/README.md b/neurons/text/prompting/miners/openai/README.md deleted file mode 100644 index c7b6ee8663..0000000000 --- a/neurons/text/prompting/miners/openai/README.md +++ /dev/null @@ -1,143 +0,0 @@ -# OpenAI Bittensor Miner -This repository contains a Bittensor Miner that uses OpenAI's GPT-3.5-turbo model as its synapse. The miner connects to the Bittensor network, registers its wallet, and serves the GPT-3.5-turbo model to the network. - -## Prerequisites - -- Python 3.8+ -- OpenAI Python API (https://github.com/openai/openai) - -## Installation - -1. Clone the repository -2. Install the required packages with `pip install -r requirements.txt` -3. Set your OpenAI API key in the `api_key` argument when running the script - -For more configuration options related to the wallet, axon, subtensor, logging, and metagraph, please refer to the Bittensor documentation. - -## Example Usage - -To run the OpenAI Bittensor Miner with default settings, use the following command: - -``` -python3 -m pip install -r neurons/text/prompting/miners/openai/requirements.txt -python3 neurons/text/prompting/miners/openai/neuron.py --openai.api_key -``` - -# Full Usage -``` -usage: neuron.py [-h] [--openai.api_key OPENAI.API_KEY] [--openai.suffix OPENAI.SUFFIX] [--openai.max_tokens OPENAI.MAX_TOKENS] - [--openai.temperature OPENAI.TEMPERATURE] [--openai.top_p OPENAI.TOP_P] [--openai.n OPENAI.N] - [--openai.presence_penalty OPENAI.PRESENCE_PENALTY] [--openai.frequency_penalty OPENAI.FREQUENCY_PENALTY] - [--openai.model_name OPENAI.MODEL_NAME] [--netuid NETUID] [--neuron.name NEURON.NAME] - [--neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH] [--neuron.no_set_weights] - [--neuron.max_batch_size NEURON.MAX_BATCH_SIZE] [--neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN] - [--neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...]] [--neuron.blacklist.allow_non_registered] - [--neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE] [--neuron.default_priority NEURON.DEFAULT_PRIORITY] - [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] [--wallet.path WALLET.PATH] [--wallet._mock] - [--wallet.reregister WALLET.REREGISTER] [--axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS] - [--axon.priority.maxsize AXON.PRIORITY.MAXSIZE] [--axon.port AXON.PORT] [--axon.ip AXON.IP] - [--axon.external_port AXON.EXTERNAL_PORT] [--axon.external_ip AXON.EXTERNAL_IP] [--axon.max_workers AXON.MAX_WORKERS] - [--axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS] [--subtensor.network SUBTENSOR.NETWORK] - [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] - [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] - [--subtensor.register.verbose] [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] - [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--logging.debug] [--logging.trace] [--logging.record_log] - [--logging.logging_dir LOGGING.LOGGING_DIR] [--metagraph._mock] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --openai.api_key OPENAI.API_KEY - openai api key - --openai.suffix OPENAI.SUFFIX - The suffix that comes after a completion of inserted text. - --openai.max_tokens OPENAI.MAX_TOKENS - The maximum number of tokens to generate in the completion. - --openai.temperature OPENAI.TEMPERATURE - Sampling temperature to use, between 0 and 2. - --openai.top_p OPENAI.TOP_P - Nucleus sampling parameter, top_p probability mass. - --openai.n OPENAI.N How many completions to generate for each prompt. - --openai.presence_penalty OPENAI.PRESENCE_PENALTY - Penalty for tokens based on their presence in the text so far. - --openai.frequency_penalty OPENAI.FREQUENCY_PENALTY - Penalty for tokens based on their frequency in the text so far. - --openai.model_name OPENAI.MODEL_NAME - OpenAI model to use for completion. - --netuid NETUID Subnet netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.blocks_per_epoch NEURON.BLOCKS_PER_EPOCH - Blocks until the miner sets weights on chain - --neuron.no_set_weights - If True, the model does not set weights. - --neuron.max_batch_size NEURON.MAX_BATCH_SIZE - The maximum batch size for forward requests. - --neuron.max_sequence_len NEURON.MAX_SEQUENCE_LEN - The maximum sequence length for forward requests. - --neuron.blacklist.hotkeys [NEURON.BLACKLIST.HOTKEYS ...] - To blacklist certain hotkeys - --neuron.blacklist.allow_non_registered - If True, the miner will allow non-registered hotkeys to mine. - --neuron.blacklist.default_stake NEURON.BLACKLIST.DEFAULT_STAKE - Set default stake for miners. - --neuron.default_priority NEURON.DEFAULT_PRIORITY - Set default priority for miners. - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --axon.priority.max_workers AXON.PRIORITY.MAX_WORKERS - maximum number of threads in thread pool - --axon.priority.maxsize AXON.PRIORITY.MAXSIZE - maximum size of tasks in priority queue - --axon.port AXON.PORT - The local port this axon endpoint is bound to. i.e. 8091 - --axon.ip AXON.IP The local ip this axon binds to. ie. [::] - --axon.external_port AXON.EXTERNAL_PORT - The public port this axon broadcasts to the network. i.e. 8091 - --axon.external_ip AXON.EXTERNAL_IP - The external ip this axon broadcasts to the network to. ie. [::] - --axon.max_workers AXON.MAX_WORKERS - The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes - new worker threads to service requests up to this number. - --axon.maximum_concurrent_rpcs AXON.MAXIMUM_CONCURRENT_RPCS - Maximum number of allowed active connections - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- - mock (creates a mock connection (for testing)) If this option is set it overloads subtensor.chain_endpoint with an - entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` diff --git a/neurons/text/prompting/miners/openai/neuron.py b/neurons/text/prompting/miners/openai/neuron.py deleted file mode 100644 index 49e14c963c..0000000000 --- a/neurons/text/prompting/miners/openai/neuron.py +++ /dev/null @@ -1,64 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import openai -import argparse -import bittensor -from typing import List, Dict - -class OpenAIMiner( bittensor.BasePromptingMiner ): - - @classmethod - def check_config( cls, config: 'bittensor.Config' ): - assert config.openai.api_key != None, 'the miner requires passing --openai.api_key as an argument of the config.' - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - parser.add_argument('--openai.api_key', type=str, help='openai api key') - parser.add_argument('--openai.suffix', type=str, default=None, help="The suffix that comes after a completion of inserted text.") - parser.add_argument('--openai.max_tokens', type=int, default=256, help="The maximum number of tokens to generate in the completion.") - parser.add_argument('--openai.temperature', type=float, default=0.7, help="Sampling temperature to use, between 0 and 2.") - parser.add_argument('--openai.top_p', type=float, default=1, help="Nucleus sampling parameter, top_p probability mass.") - parser.add_argument('--openai.n', type=int, default=1, help="How many completions to generate for each prompt.") - parser.add_argument('--openai.presence_penalty', type=float, default=0, help="Penalty for tokens based on their presence in the text so far.") - parser.add_argument('--openai.frequency_penalty', type=float, default=0, help="Penalty for tokens based on their frequency in the text so far.") - parser.add_argument('--openai.model_name', type=str, default='gpt-3.5-turbo', help="OpenAI model to use for completion.") - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - - def __init__( self ): - super( OpenAIMiner, self ).__init__() - print ( self.config ) - openai.api_key = self.config.openai.api_key - - def forward( self, messages: List[Dict[str, str]] ) -> str: - resp = openai.ChatCompletion.create( - model = self.config.openai.model_name, - messages = messages, - temperature = self.config.openai.temperature, - max_tokens = self.config.openai.max_tokens, - top_p = self.config.openai.top_p, - frequency_penalty = self.config.openai.frequency_penalty, - presence_penalty = self.config.openai.presence_penalty, - n = self.config.openai.n, - )['choices'][0]['message']['content'] - return resp - -if __name__ == "__main__": - bittensor.utils.version_checking() - OpenAIMiner().run() \ No newline at end of file diff --git a/neurons/text/prompting/miners/openai/requirements.txt b/neurons/text/prompting/miners/openai/requirements.txt deleted file mode 100644 index f0dd0aec55..0000000000 --- a/neurons/text/prompting/miners/openai/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -openai \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/__init__.py b/neurons/text/prompting/miners/self_hosted/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/neurons/text/prompting/miners/self_hosted/coati/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/neurons/text/prompting/miners/self_hosted/coati/dataset/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/dataset/__init__.py deleted file mode 100644 index 2b21e9c4b1..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/dataset/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .prompt_dataset import PromptDataset -from .reward_dataset import HhRlhfDataset, RmStaticDataset, SHPDataset -from .sft_dataset import DataCollatorForSupervisedDataset, SFTDataset, SupervisedDataset -from .utils import is_rank_0 - -__all__ = [ - 'RmStaticDataset', 'HhRlhfDataset', 'SHPDataset', 'is_rank_0', 'SFTDataset', 'SupervisedDataset', - 'DataCollatorForSupervisedDataset', 'PromptDataset' -] diff --git a/neurons/text/prompting/miners/self_hosted/coati/dataset/prompt_dataset.py b/neurons/text/prompting/miners/self_hosted/coati/dataset/prompt_dataset.py deleted file mode 100644 index 4367a2c6f3..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/dataset/prompt_dataset.py +++ /dev/null @@ -1,46 +0,0 @@ -import copy -import random -from dataclasses import dataclass, field -from typing import Callable, Dict, Sequence - -import torch -import torch.distributed as dist -import transformers -from torch.utils.data import Dataset -from tqdm import tqdm - -from colossalai.logging import get_dist_logger - -from .utils import is_rank_0, jload - -logger = get_dist_logger() - - -class PromptDataset(Dataset): - """Dataset for supervised fine-tuning.""" - - def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, max_datasets_size: int = None): - super(PromptDataset, self).__init__() - self.prompt = [] - logger.info("Loading data...") - list_data_dict = jload(data_path) - logger.info(f"Loaded {len(list_data_dict)} examples.") - - if max_datasets_size is not None: - logger.info(f"Limiting dataset to {max_datasets_size} examples.") - list_data_dict = list_data_dict[:max_datasets_size] - - for data_dict in list_data_dict: - token = tokenizer(data_dict["instruction"], - return_tensors='pt', - max_length=96, - padding='max_length', - truncation=True) - for idx in token['input_ids']: - self.prompt.append(idx.to(torch.cuda.current_device())) - - def __len__(self): - return len(self.prompt) - - def __getitem__(self, i) -> Dict[str, torch.Tensor]: - return self.prompt[i] diff --git a/neurons/text/prompting/miners/self_hosted/coati/dataset/reward_dataset.py b/neurons/text/prompting/miners/self_hosted/coati/dataset/reward_dataset.py deleted file mode 100644 index 51aa0ec4ca..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/dataset/reward_dataset.py +++ /dev/null @@ -1,167 +0,0 @@ -from typing import Callable - -from torch.utils.data import Dataset -from tqdm import tqdm - -from .utils import is_rank_0 - - -# Dahaos/rm-static -class RmStaticDataset(Dataset): - """ - Dataset for reward model - - Args: - dataset: dataset for reward model - tokenizer: tokenizer for reward model - max_length: max length of input - special_token: special token at the end of sentence - """ - - def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None: - super().__init__() - self.chosen = [] - self.reject = [] - if special_token is None: - self.end_token = tokenizer.eos_token - else: - self.end_token = special_token - for data in tqdm(dataset, disable=not is_rank_0()): - prompt = data['prompt'] - - chosen = prompt + data['chosen'] + self.end_token - chosen_token = tokenizer(chosen, - max_length=max_length, - padding="max_length", - truncation=True, - return_tensors="pt") - self.chosen.append({ - "input_ids": chosen_token['input_ids'], - "attention_mask": chosen_token['attention_mask'] - }) - - reject = prompt + data['rejected'] + self.end_token - reject_token = tokenizer(reject, - max_length=max_length, - padding="max_length", - truncation=True, - return_tensors="pt") - self.reject.append({ - "input_ids": reject_token['input_ids'], - "attention_mask": reject_token['attention_mask'] - }) - - def __len__(self): - length = len(self.chosen) - return length - - def __getitem__(self, idx): - return self.chosen[idx]["input_ids"], self.chosen[idx]["attention_mask"], self.reject[idx][ - "input_ids"], self.reject[idx]["attention_mask"] - -# Dahoas/filtered-SHP -class SHPDataset(Dataset): - """ - Dataset for reward model - - Args: - dataset: dataset for reward model - tokenizer: tokenizer for reward model - max_length: max length of input - special_token: special token at the end of sentence - """ - - def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None: - super().__init__() - self.chosen = [] - self.reject = [] - if special_token is None: - self.end_token = tokenizer.eos_token - else: - self.end_token = special_token - for data in tqdm(dataset, disable=not is_rank_0()): - prompt = data['prompt'] - - chosen = prompt + data['chosen'] + self.end_token - chosen_token = tokenizer(chosen, - max_length=max_length, - padding="max_length", - truncation=True, - return_tensors="pt") - self.chosen.append({ - "input_ids": chosen_token['input_ids'], - "attention_mask": chosen_token['attention_mask'] - }) - - reject = prompt + data['rejected'] + self.end_token - reject_token = tokenizer(reject, - max_length=max_length, - padding="max_length", - truncation=True, - return_tensors="pt") - self.reject.append({ - "input_ids": reject_token['input_ids'], - "attention_mask": reject_token['attention_mask'] - }) - - def __len__(self): - length = len(self.chosen) - return length - - def __getitem__(self, idx): - return self.chosen[idx]["input_ids"], self.chosen[idx]["attention_mask"], self.reject[idx][ - "input_ids"], self.reject[idx]["attention_mask"] - - - - -# Anthropic/hh-rlhf -class HhRlhfDataset(Dataset): - """ - Dataset for reward model - - Args: - dataset: dataset for reward model - tokenizer: tokenizer for reward model - max_length: max length of input - special_token: special token at the end of sentence - """ - - def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None: - super().__init__() - self.chosen = [] - self.reject = [] - if special_token is None: - self.end_token = tokenizer.eos_token - else: - self.end_token = special_token - for data in tqdm(dataset, disable=not is_rank_0()): - chosen = data['chosen'] + self.end_token - chosen_token = tokenizer(chosen, - max_length=max_length, - padding="max_length", - truncation=True, - return_tensors="pt") - self.chosen.append({ - "input_ids": chosen_token['input_ids'], - "attention_mask": chosen_token['attention_mask'] - }) - - reject = data['rejected'] + self.end_token - reject_token = tokenizer(reject, - max_length=max_length, - padding="max_length", - truncation=True, - return_tensors="pt") - self.reject.append({ - "input_ids": reject_token['input_ids'], - "attention_mask": reject_token['attention_mask'] - }) - - def __len__(self): - length = len(self.chosen) - return length - - def __getitem__(self, idx): - return self.chosen[idx]["input_ids"], self.chosen[idx]["attention_mask"], self.reject[idx][ - "input_ids"], self.reject[idx]["attention_mask"] diff --git a/neurons/text/prompting/miners/self_hosted/coati/dataset/sft_dataset.py b/neurons/text/prompting/miners/self_hosted/coati/dataset/sft_dataset.py deleted file mode 100644 index 76ae6b1588..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/dataset/sft_dataset.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import random -from dataclasses import dataclass, field -from typing import Callable, Dict, Sequence - -import torch -import torch.distributed as dist -import transformers -from torch.utils.data import Dataset -from tqdm import tqdm - -from colossalai.logging import get_dist_logger - -from .utils import is_rank_0, jload - -logger = get_dist_logger() - -IGNORE_INDEX = -100 -PROMPT_DICT = { - "prompt_input": - ("Below is an instruction that describes a task, paired with an input that provides further context. " - "Write a response that appropriately completes the request.\n\n" - "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:"), - "prompt_no_input": ("Below is an instruction that describes a task. " - "Write a response that appropriately completes the request.\n\n" - "### Instruction:\n{instruction}\n\n### Response:"), -} - - -class SFTDataset(Dataset): - """ - Dataset for sft model - - Args: - dataset: dataset for supervised model - tokenizer: tokenizer for supervised model - max_length: max length of input - """ - - def __init__(self, dataset, tokenizer: Callable, max_length: int = 512) -> None: - super().__init__() - # self.prompts = [] - self.input_ids = [] - - for data in tqdm(dataset, disable=not is_rank_0()): - prompt = data['prompt'] + data['completion'] + "<|endoftext|>" - prompt_token = tokenizer(prompt, - max_length=max_length, - padding="max_length", - truncation=True, - return_tensors="pt") - - # self.prompts.append(prompt_token)s - self.input_ids.append(prompt_token) - self.labels = copy.deepcopy(self.input_ids) - - def __len__(self): - length = len(self.prompts) - return length - - def __getitem__(self, idx): - # dict(input_ids=self.input_ids[i], labels=self.labels[i]) - return dict(input_ids=self.input_ids[idx], labels=self.labels[idx]) - # return dict(self.prompts[idx], self.prompts[idx]) - - -def _tokenize_fn(strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer) -> Dict: - """Tokenize a list of strings.""" - tokenized_list = [ - tokenizer( - text, - return_tensors="pt", - padding="longest", - max_length=tokenizer.model_max_length, - truncation=True, - ) for text in strings - ] - input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list] - input_ids_lens = labels_lens = [ - tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item() for tokenized in tokenized_list - ] - return dict( - input_ids=input_ids, - labels=labels, - input_ids_lens=input_ids_lens, - labels_lens=labels_lens, - ) - - -def preprocess( - sources: Sequence[str], - targets: Sequence[str], - tokenizer: transformers.PreTrainedTokenizer, -) -> Dict: - """Preprocess the data by tokenizing.""" - examples = [s + t for s, t in zip(sources, targets)] - examples_tokenized, sources_tokenized = [_tokenize_fn(strings, tokenizer) for strings in (examples, sources)] - input_ids = examples_tokenized["input_ids"] - labels = copy.deepcopy(input_ids) - for label, source_len in zip(labels, sources_tokenized["input_ids_lens"]): - label[:source_len] = IGNORE_INDEX - return dict(input_ids=input_ids, labels=labels) - - -class SupervisedDataset(Dataset): - """Dataset for supervised fine-tuning.""" - - def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, max_datasets_size: int = None): - super(SupervisedDataset, self).__init__() - logger.info("Loading data...") - list_data_dict = jload(data_path) - logger.info(f"Loaded {len(list_data_dict)} examples.") - - if max_datasets_size is not None: - logger.info(f"Limiting dataset to {max_datasets_size} examples.") - list_data_dict = list_data_dict[:max_datasets_size] - - logger.info("Formatting inputs...") - prompt_input, prompt_no_input = PROMPT_DICT["prompt_input"], PROMPT_DICT["prompt_no_input"] - sources = [ - prompt_input.format_map(example) if example.get("input", "") != "" else prompt_no_input.format_map(example) - for example in list_data_dict - ] - targets = [f"{example['output']}{tokenizer.eos_token}" for example in list_data_dict] - - logger.info("Tokenizing inputs... This may take some time...") - data_dict = preprocess(sources, targets, tokenizer) - - self.input_ids = data_dict["input_ids"] - self.labels = data_dict["labels"] - - def __len__(self): - return len(self.input_ids) - - def __getitem__(self, i) -> Dict[str, torch.Tensor]: - return dict(input_ids=self.input_ids[i], labels=self.labels[i]) - - -@dataclass -class DataCollatorForSupervisedDataset(object): - """Collate examples for supervised fine-tuning.""" - - tokenizer: transformers.PreTrainedTokenizer - - def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: - input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels")) - input_ids = torch.nn.utils.rnn.pad_sequence(input_ids, - batch_first=True, - padding_value=self.tokenizer.pad_token_id) - labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX) - return dict( - input_ids=input_ids, - labels=labels, - attention_mask=input_ids.ne(self.tokenizer.pad_token_id), - ) diff --git a/neurons/text/prompting/miners/self_hosted/coati/dataset/utils.py b/neurons/text/prompting/miners/self_hosted/coati/dataset/utils.py deleted file mode 100644 index f37fce67a7..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/dataset/utils.py +++ /dev/null @@ -1,22 +0,0 @@ -import io -import json - -import torch.distributed as dist - - -def is_rank_0() -> bool: - return not dist.is_initialized() or dist.get_rank() == 0 - - -def _make_r_io_base(f, mode: str): - if not isinstance(f, io.IOBase): - f = open(f, mode=mode) - return f - - -def jload(f, mode="r"): - """Load a .json file into a dictionary.""" - f = _make_r_io_base(f, mode) - jdict = json.load(f) - f.close() - return jdict diff --git a/neurons/text/prompting/miners/self_hosted/coati/experience_maker/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/experience_maker/__init__.py deleted file mode 100644 index 39ca7576b2..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/experience_maker/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .base import Experience, ExperienceMaker -from .naive import NaiveExperienceMaker - -__all__ = ['Experience', 'ExperienceMaker', 'NaiveExperienceMaker'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/experience_maker/base.py b/neurons/text/prompting/miners/self_hosted/coati/experience_maker/base.py deleted file mode 100644 index 61fd4f6744..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/experience_maker/base.py +++ /dev/null @@ -1,77 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Optional - -import torch -import torch.nn as nn -from coati.models.base import Actor - - -@dataclass -class Experience: - """Experience is a batch of data. - These data should have the the sequence length and number of actions. - Left padding for sequences is applied. - - Shapes of each tensor: - sequences: (B, S) - action_log_probs: (B, A) - values: (B) - reward: (B) - advatanges: (B) - attention_mask: (B, S) - action_mask: (B, A) - - "A" is the number of actions. - """ - sequences: torch.Tensor - action_log_probs: torch.Tensor - values: torch.Tensor - reward: torch.Tensor - advantages: torch.Tensor - attention_mask: Optional[torch.LongTensor] - action_mask: Optional[torch.BoolTensor] - - @torch.no_grad() - def to_device(self, device: torch.device) -> None: - self.sequences = self.sequences.to(device) - self.action_log_probs = self.action_log_probs.to(device) - self.values = self.values.to(device) - self.reward = self.reward.to(device) - self.advantages = self.advantages.to(device) - if self.attention_mask is not None: - self.attention_mask = self.attention_mask.to(device) - if self.action_mask is not None: - self.action_mask = self.action_mask.to(device) - - def pin_memory(self): - self.sequences = self.sequences.pin_memory() - self.action_log_probs = self.action_log_probs.pin_memory() - self.values = self.values.pin_memory() - self.reward = self.reward.pin_memory() - self.advantages = self.advantages.pin_memory() - if self.attention_mask is not None: - self.attention_mask = self.attention_mask.pin_memory() - if self.action_mask is not None: - self.action_mask = self.action_mask.pin_memory() - return self - - -class ExperienceMaker(ABC): - - def __init__(self, - actor: Actor, - critic: nn.Module, - reward_model: nn.Module, - initial_model: Actor, - kl_coef: float = 0.1) -> None: - super().__init__() - self.actor = actor - self.critic = critic - self.reward_model = reward_model - self.initial_model = initial_model - self.kl_coef = kl_coef - - @abstractmethod - def make_experience(self, input_ids: torch.Tensor, **generate_kwargs) -> Experience: - pass diff --git a/neurons/text/prompting/miners/self_hosted/coati/experience_maker/naive.py b/neurons/text/prompting/miners/self_hosted/coati/experience_maker/naive.py deleted file mode 100644 index 94546eeb28..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/experience_maker/naive.py +++ /dev/null @@ -1,35 +0,0 @@ -import torch -from coati.models.utils import compute_reward, normalize - -from .base import Experience, ExperienceMaker - - -class NaiveExperienceMaker(ExperienceMaker): - """ - Naive experience maker. - """ - - @torch.no_grad() - def make_experience(self, input_ids: torch.Tensor, **generate_kwargs) -> Experience: - self.actor.eval() - self.critic.eval() - self.initial_model.eval() - self.reward_model.eval() - - sequences, attention_mask, action_mask = self.actor.generate(input_ids, - return_action_mask=True, - **generate_kwargs) - num_actions = action_mask.size(1) - - action_log_probs = self.actor(sequences, num_actions, attention_mask) - base_action_log_probs = self.initial_model(sequences, num_actions, attention_mask) - value = self.critic(sequences, action_mask, attention_mask) - r = self.reward_model(sequences, attention_mask) - reward = compute_reward(r, self.kl_coef, action_log_probs, base_action_log_probs, action_mask=action_mask) - - advantage = reward - value - # TODO(ver217): maybe normalize adv - if advantage.ndim == 1: - advantage = advantage.unsqueeze(-1) - - return Experience(sequences, action_log_probs, value, reward, advantage, attention_mask, action_mask) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/models/__init__.py deleted file mode 100644 index 7489b2e87c..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .base import Actor, Critic, RewardModel -from .loss import LogExpLoss, LogSigLoss, PolicyLoss, PPOPtxActorLoss, ValueLoss - -__all__ = ['Actor', 'Critic', 'RewardModel', 'PolicyLoss', 'ValueLoss', 'PPOPtxActorLoss', 'LogSigLoss', 'LogExpLoss'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/auto/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/models/auto/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/auto/actor.py b/neurons/text/prompting/miners/self_hosted/coati/models/auto/actor.py deleted file mode 100644 index be9e9abf1f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/auto/actor.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional - -from transformers import AutoConfig, AutoModelForCausalLM - - -from base import Actor - - -class AutoActor(Actor): - """ - Auto Actor model. - - Args: - pretrained (str): Pretrained model name or path. - config (AutoConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[AutoConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = AutoModelForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = AutoModelForCausalLM(config) - else: - model = AutoModelForCausalLM(AutoConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/auto/critic.py b/neurons/text/prompting/miners/self_hosted/coati/models/auto/critic.py deleted file mode 100644 index fa3d698f4a..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/auto/critic.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import AutoConfig, AutoModel -from ..base import Critic - - -class AutoCritic(Critic): - """ - Auto Critic model. - - Args: - pretrained (str): Pretrained model name or path. - config (AutoConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[AutoConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none', - **kwargs) -> None: - if pretrained is not None: - model = AutoModel.from_pretrained(pretrained) - elif config is not None: - model = AutoModel(config) - else: - model = AutoModel(AutoConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.word_embed_proj_dim, 1) - super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/auto/lm.py b/neurons/text/prompting/miners/self_hosted/coati/models/auto/lm.py deleted file mode 100644 index 6375429f82..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/auto/lm.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Optional - -from transformers import AutoConfig, AutoModelForCausalLM -from ..base import LM - - -class AutoLM(LM): - """ - Auto language model. - - Args: - pretrained (str): Pretrained model name or path. - config (AutoConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[AutoConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = AutoModelForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = AutoModelForCausalLM(config) - else: - model = AutoModelForCausalLM(AutoConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) - - def forward(self, input_ids, attention_mask=None, labels=None, **kwargs): - return self.model(input_ids, attention_mask=attention_mask, labels=labels, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/auto/reward_model.py b/neurons/text/prompting/miners/self_hosted/coati/models/auto/reward_model.py deleted file mode 100644 index bbca36a48e..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/auto/reward_model.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import AutoConfig, AutoModel -from ..base import RewardModel - - -class AutoRM(RewardModel): - """ - Auto Reward model. - - Args: - pretrained (str): Pretrained model name or path. - config (AutoConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[AutoConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = AutoModel.from_pretrained(pretrained) - elif config is not None: - model = AutoModel(config) - else: - model = AutoModel(AutoConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - - value_head = nn.Linear(model.config.word_embed_proj_dim, 1) - value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.word_embed_proj_dim + 1)) - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/base/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/models/base/__init__.py deleted file mode 100644 index 7cf82309af..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/base/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .actor import Actor -from .critic import Critic -from .lm import LM -from .reward_model import RewardModel - -__all__ = ['Actor', 'Critic', 'RewardModel', 'LM'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/base/actor.py b/neurons/text/prompting/miners/self_hosted/coati/models/base/actor.py deleted file mode 100644 index 71fbf7bbae..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/base/actor.py +++ /dev/null @@ -1,65 +0,0 @@ -from typing import Optional, Tuple, Union - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from ..generation import generate -from ..lora import LoRAModule -from ..utils import log_probs_from_logits - - -class Actor(LoRAModule): - """ - Actor model base class. - - Args: - model (nn.Module): Actor Model. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = 'none') -> None: - super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias) - self.model = model - self.convert_to_lora() - - @torch.no_grad() - def generate( - self, - input_ids: torch.Tensor, - return_action_mask: bool = True, - **kwargs - ) -> Union[Tuple[torch.LongTensor, torch.LongTensor], Tuple[torch.LongTensor, torch.LongTensor, torch.BoolTensor]]: - sequences = generate(self.model, input_ids, **kwargs) - attention_mask = None - pad_token_id = kwargs.get('pad_token_id', None) - if pad_token_id is not None: - attention_mask = sequences.not_equal(pad_token_id).to(dtype=torch.long, device=sequences.device) - if not return_action_mask: - return sequences, attention_mask, None - input_len = input_ids.size(1) - eos_token_id = kwargs.get('eos_token_id', None) - if eos_token_id is None: - action_mask = torch.ones_like(sequences, dtype=torch.bool) - else: - # left padding may be applied, only mask action - action_mask = (sequences[:, input_len:] == eos_token_id).cumsum(dim=-1) == 0 - action_mask = F.pad(action_mask, (1 + input_len, -1), value=True) # include eos token and input - action_mask[:, :input_len] = False - action_mask = action_mask[:, 1:] - return sequences, attention_mask, action_mask[:, -(sequences.size(1) - input_len):] - - def forward(self, - sequences: torch.LongTensor, - num_actions: int, - attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - """Returns action log probs - """ - output = self.model(sequences, attention_mask=attention_mask) - logits = output['logits'] - log_probs = log_probs_from_logits(logits[:, :-1, :], sequences[:, 1:]) - return log_probs[:, -num_actions:] - - def get_base_model(self): - return self.model diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/base/critic.py b/neurons/text/prompting/miners/self_hosted/coati/models/base/critic.py deleted file mode 100644 index e68a743a77..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/base/critic.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Optional - -import torch -import torch.nn as nn - -from ..lora import LoRAModule -from ..utils import masked_mean - - -class Critic(LoRAModule): - """ - Critic model base class. - - Args: - model (nn.Module): Critic model. - value_head (nn.Module): Value head to get value. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__( - self, - model: nn.Module, - value_head: nn.Module, - lora_rank: int = 0, - lora_train_bias: str = 'none', - use_action_mask: bool = False, - ) -> None: - - super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias) - self.model = model - self.value_head = value_head - self.use_action_mask = use_action_mask - self.convert_to_lora() - - def forward(self, - sequences: torch.LongTensor, - action_mask: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - outputs = self.model(sequences, attention_mask=attention_mask) - last_hidden_states = outputs['last_hidden_state'] - - values = self.value_head(last_hidden_states).squeeze(-1) - - if action_mask is not None and self.use_action_mask: - num_actions = action_mask.size(1) - prompt_mask = attention_mask[:, :-num_actions] - values = values[:, :-num_actions] - value = masked_mean(values, prompt_mask, dim=1) - return value - - values = values[:, :-1] - value = values.mean(dim=1) - return value diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/base/lm.py b/neurons/text/prompting/miners/self_hosted/coati/models/base/lm.py deleted file mode 100644 index e32ba42533..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/base/lm.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import Optional, Tuple, Union - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from ..generation import generate -from .actor import Actor - - -class LM(Actor): - """ - Language model base class. - - Args: - model (nn.Module): Language Model. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = 'none') -> None: - super().__init__(model=model, lora_rank=lora_rank, lora_train_bias=lora_train_bias) - - def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - """Returns output log probs - """ - output = self.model(sequences, attention_mask=attention_mask) - logits = output['logits'] - log_probs = F.log_softmax(logits, dim=-1) - return log_probs diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/base/reward_model.py b/neurons/text/prompting/miners/self_hosted/coati/models/base/reward_model.py deleted file mode 100644 index ce8c0a1d35..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/base/reward_model.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import Optional - -import torch -import torch.nn as nn - -from ..lora import LoRAModule - - -class RewardModel(LoRAModule): - """ - Reward model base class. - - Args: - model (nn.Module): Reward model. - value_head (nn.Module): Value head to get reward score. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - model: nn.Module, - value_head: Optional[nn.Module] = None, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias) - self.model = model - self.convert_to_lora() - - if value_head is not None: - if value_head.out_features != 1: - raise ValueError("The value head of reward model's output dim should be 1!") - self.value_head = value_head - else: - self.value_head = nn.Linear(model.config.n_embd, 1) - - def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - outputs = self.model(sequences, attention_mask=attention_mask) - last_hidden_states = outputs['last_hidden_state'] - values = self.value_head(last_hidden_states)[:, :-1] - value = values.mean(dim=1).squeeze(1) # ensure shape is (B) - return value diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/models/bloom/__init__.py deleted file mode 100644 index 39dfe036a2..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .bloom_actor import BLOOMActor -from .bloom_critic import BLOOMCritic -from .bloom_lm import BLOOMLM -from .bloom_rm import BLOOMRM - -__all__ = ['BLOOMActor', 'BLOOMCritic', 'BLOOMRM', 'BLOOMLM'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_actor.py b/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_actor.py deleted file mode 100644 index d7577f0964..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_actor.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional - -import torch -from transformers import BloomConfig, BloomForCausalLM, BloomModel - -from ..base import Actor - - -class BLOOMActor(Actor): - """ - BLOOM Actor model. - - Args: - pretrained (str): Pretrained model name or path. - config (BloomConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: str = None, - config: Optional[BloomConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = BloomForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = BloomForCausalLM(config) - else: - model = BloomForCausalLM(BloomConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_critic.py b/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_critic.py deleted file mode 100644 index a32fb2e102..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_critic.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional - -import torch -import torch.nn as nn -from transformers import BloomConfig, BloomForCausalLM, BloomModel - -from ..base import Critic - - -class BLOOMCritic(Critic): - """ - BLOOM Critic model. - - Args: - pretrained (str): Pretrained model name or path. - config (BloomConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: str = None, - config: Optional[BloomConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none', - **kwargs) -> None: - if pretrained is not None: - model = BloomModel.from_pretrained(pretrained) - elif config is not None: - model = BloomModel(config) - else: - model = BloomModel(BloomConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.hidden_size, 1) - super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_lm.py b/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_lm.py deleted file mode 100644 index e4184fcd0d..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_lm.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional - -import torch -from transformers import BloomConfig, BloomForCausalLM, BloomModel - -from ..base import LM - - -class BLOOMLM(LM): - """ - BLOOM language model. - - Args: - pretrained (str): Pretrained model name or path. - config (BloomConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: str = None, - config: Optional[BloomConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = BloomForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = BloomForCausalLM(config) - else: - model = BloomForCausalLM(BloomConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) - - def forward(self, input_ids, attention_mask=None, labels=None, **kwargs): - return self.model(input_ids, attention_mask=attention_mask, labels=labels, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_rm.py b/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_rm.py deleted file mode 100644 index 22cfab441a..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/bloom/bloom_rm.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import BloomConfig, BloomForCausalLM, BloomModel - -from ..base import RewardModel - - -class BLOOMRM(RewardModel): - """ - BLOOM Reward model. - - Args: - pretrained (str): Pretrained model name or path. - config (BloomConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: str = None, - config: Optional[BloomConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = BloomModel.from_pretrained(pretrained) - elif config is not None: - model = BloomModel(config) - else: - model = BloomModel(BloomConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.hidden_size, 1) - value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.hidden_size + 1)) - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/deberta/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/models/deberta/__init__.py deleted file mode 100644 index b66888f34f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/deberta/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .deberta_critic import DebertaCritic -from .deberta_rm import DebertaRM - -__all__ = ['DebertaCritic', 'DebertaRM'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/deberta/deberta_critic.py b/neurons/text/prompting/miners/self_hosted/coati/models/deberta/deberta_critic.py deleted file mode 100644 index e84c1dbd83..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/deberta/deberta_critic.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import DebertaV2Config, DebertaV2Model - -from ..base import Critic - - -class DebertaCritic(Critic): - """ - Deberta Critic model. - - Args: - pretrained (str): Pretrained model name or path. - config (DebertaV2Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the LO-RA decomposition. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[DebertaV2Config] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = DebertaV2Model.from_pretrained(pretrained) - elif config is not None: - model = DebertaV2Model(config) - else: - model = DebertaV2Model(DebertaV2Config()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.hidden_size, 1) - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/deberta/deberta_rm.py b/neurons/text/prompting/miners/self_hosted/coati/models/deberta/deberta_rm.py deleted file mode 100644 index 2448c879ec..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/deberta/deberta_rm.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import DebertaV2Config, DebertaV2Model - -from ..base import RewardModel - - -class DebertaRM(RewardModel): - """ - Deberta Reward model. - - Args: - pretrained (str): Pretrained model name or path. - config (DebertaV2Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the LO-RA decomposition. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: str = None, - config: Optional[DebertaV2Config] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = DebertaV2Model.from_pretrained(pretrained) - elif config is not None: - model = DebertaV2Model(config) - else: - model = DebertaV2Model(DebertaV2Config()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.hidden_size, 1) - value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.hidden_size + 1)) - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/generation.py b/neurons/text/prompting/miners/self_hosted/coati/models/generation.py deleted file mode 100644 index eb30c36d0f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/generation.py +++ /dev/null @@ -1,146 +0,0 @@ -from typing import Any, Callable, Optional - -import torch -import torch.distributed as dist -import torch.nn as nn - -try: - from transformers.generation_logits_process import ( - LogitsProcessorList, - TemperatureLogitsWarper, - TopKLogitsWarper, - TopPLogitsWarper, - ) -except ImportError: - from transformers.generation import LogitsProcessorList, TemperatureLogitsWarper, TopKLogitsWarper, TopPLogitsWarper - - -def prepare_logits_processor(top_k: Optional[int] = None, - top_p: Optional[float] = None, - temperature: Optional[float] = None) -> LogitsProcessorList: - processor_list = LogitsProcessorList() - if temperature is not None and temperature != 1.0: - processor_list.append(TemperatureLogitsWarper(temperature)) - if top_k is not None and top_k != 0: - processor_list.append(TopKLogitsWarper(top_k)) - if top_p is not None and top_p < 1.0: - processor_list.append(TopPLogitsWarper(top_p)) - return processor_list - - -def _is_sequence_finished(unfinished_sequences: torch.Tensor) -> bool: - if dist.is_initialized() and dist.get_world_size() > 1: - # consider DP - unfinished_sequences = unfinished_sequences.clone() - dist.all_reduce(unfinished_sequences) - return unfinished_sequences.max() == 0 - - -def sample(model: nn.Module, - input_ids: torch.Tensor, - max_length: int, - early_stopping: bool = False, - eos_token_id: Optional[int] = None, - pad_token_id: Optional[int] = None, - top_k: Optional[int] = None, - top_p: Optional[float] = None, - temperature: Optional[float] = None, - prepare_inputs_fn: Optional[Callable[[torch.Tensor, Any], dict]] = None, - update_model_kwargs_fn: Optional[Callable[[dict, Any], dict]] = None, - **model_kwargs) -> torch.Tensor: - if input_ids.size(1) >= max_length: - return input_ids - - logits_processor = prepare_logits_processor(top_k, top_p, temperature) - unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1) - - for _ in range(input_ids.size(1), max_length): - model_inputs = prepare_inputs_fn(input_ids, **model_kwargs) if prepare_inputs_fn is not None else { - 'input_ids': input_ids - } - outputs = model(**model_inputs) - - next_token_logits = outputs['logits'][:, -1, :] - # pre-process distribution - next_token_logits = logits_processor(input_ids, next_token_logits) - # sample - probs = torch.softmax(next_token_logits, dim=-1, dtype=torch.float) - next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1) - - # finished sentences should have their next token be a padding token - if eos_token_id is not None: - if pad_token_id is None: - raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.") - next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences) - - # update generated ids, model inputs for next step - input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) - if update_model_kwargs_fn is not None: - model_kwargs = update_model_kwargs_fn(outputs, **model_kwargs) - - # if eos_token was found in one sentence, set sentence to finished - if eos_token_id is not None: - unfinished_sequences = unfinished_sequences.mul((next_tokens != eos_token_id).long()) - - # stop when each sentence is finished if early_stopping=True - if early_stopping and _is_sequence_finished(unfinished_sequences): - break - - return input_ids - - -def generate(model: nn.Module, - input_ids: torch.Tensor, - max_length: int, - num_beams: int = 1, - do_sample: bool = True, - early_stopping: bool = False, - eos_token_id: Optional[int] = None, - pad_token_id: Optional[int] = None, - top_k: Optional[int] = None, - top_p: Optional[float] = None, - temperature: Optional[float] = None, - prepare_inputs_fn: Optional[Callable[[torch.Tensor, Any], dict]] = None, - update_model_kwargs_fn: Optional[Callable[[dict, Any], dict]] = None, - **model_kwargs) -> torch.Tensor: - """Generate token sequence. The returned sequence is input_ids + generated_tokens. - - Args: - model (nn.Module): model - input_ids (torch.Tensor): input sequence - max_length (int): max length of the returned sequence - num_beams (int, optional): number of beams. Defaults to 1. - do_sample (bool, optional): whether to do sample. Defaults to True. - early_stopping (bool, optional): if True, the sequence length may be smaller than max_length due to finding eos. Defaults to False. - eos_token_id (Optional[int], optional): end of sequence token id. Defaults to None. - pad_token_id (Optional[int], optional): pad token id. Defaults to None. - top_k (Optional[int], optional): the number of highest probability vocabulary tokens to keep for top-k-filtering. Defaults to None. - top_p (Optional[float], optional): If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. Defaults to None. - temperature (Optional[float], optional): The value used to module the next token probabilities. Defaults to None. - prepare_inputs_fn (Optional[Callable[[torch.Tensor, Any], dict]], optional): Function to preprocess model inputs. Arguments of this function should be input_ids and model_kwargs. Defaults to None. - update_model_kwargs_fn (Optional[Callable[[dict, Any], dict]], optional): Function to update model_kwargs based on outputs. Arguments of this function should be outputs and model_kwargs. Defaults to None. - """ - is_greedy_gen_mode = ((num_beams == 1) and do_sample is False) - is_sample_gen_mode = ((num_beams == 1) and do_sample is True) - is_beam_gen_mode = ((num_beams > 1) and do_sample is False) - if is_greedy_gen_mode: - # run greedy search - raise NotImplementedError - elif is_sample_gen_mode: - # run sample - return sample(model, - input_ids, - max_length, - early_stopping=early_stopping, - eos_token_id=eos_token_id, - pad_token_id=pad_token_id, - top_k=top_k, - top_p=top_p, - temperature=temperature, - prepare_inputs_fn=prepare_inputs_fn, - update_model_kwargs_fn=update_model_kwargs_fn, - **model_kwargs) - elif is_beam_gen_mode: - raise NotImplementedError - else: - raise ValueError("Unsupported generation mode") diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/generation_utils.py b/neurons/text/prompting/miners/self_hosted/coati/models/generation_utils.py deleted file mode 100644 index c7bc1b383f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/generation_utils.py +++ /dev/null @@ -1,92 +0,0 @@ -from typing import Optional - -import torch - - -def gpt_prepare_inputs_fn(input_ids: torch.Tensor, past: Optional[torch.Tensor] = None, **kwargs) -> dict: - token_type_ids = kwargs.get("token_type_ids", None) - # only last token for inputs_ids if past is defined in kwargs - if past: - input_ids = input_ids[:, -1].unsqueeze(-1) - if token_type_ids is not None: - token_type_ids = token_type_ids[:, -1].unsqueeze(-1) - - attention_mask = kwargs.get("attention_mask", None) - position_ids = kwargs.get("position_ids", None) - - if attention_mask is not None and position_ids is None: - # create position_ids on the fly for batch generation - position_ids = attention_mask.long().cumsum(-1) - 1 - position_ids.masked_fill_(attention_mask == 0, 1) - if past: - position_ids = position_ids[:, -1].unsqueeze(-1) - else: - position_ids = None - return { - "input_ids": input_ids, - "past_key_values": past, - "use_cache": kwargs.get("use_cache"), - "position_ids": position_ids, - "attention_mask": attention_mask, - "token_type_ids": token_type_ids, - } - - -def update_model_kwargs_fn(outputs: dict, **model_kwargs) -> dict: - if "past_key_values" in outputs: - model_kwargs["past"] = outputs["past_key_values"] - else: - model_kwargs["past"] = None - - # update token_type_ids with last value - if "token_type_ids" in model_kwargs: - token_type_ids = model_kwargs["token_type_ids"] - model_kwargs["token_type_ids"] = torch.cat([token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], dim=-1) - - # update attention mask - if "attention_mask" in model_kwargs: - attention_mask = model_kwargs["attention_mask"] - model_kwargs["attention_mask"] = torch.cat( - [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1) - - return model_kwargs - - -def opt_prepare_inputs_fn(input_ids: torch.Tensor, - past: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None, - use_cache: Optional[bool] = None, - **kwargs) -> dict: - # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly - if attention_mask is None: - attention_mask = input_ids.new_ones(input_ids.shape) - - if past: - input_ids = input_ids[:, -1:] - # first step, decoder_cached_states are empty - return { - "input_ids": input_ids, # encoder_outputs is defined. input_ids not needed - "attention_mask": attention_mask, - "past_key_values": past, - "use_cache": use_cache, - } - - -def bloom_prepare_inputs_fn(input_ids: torch.Tensor, - past: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None, - use_cache: Optional[bool] = None, - **kwargs) -> dict: - # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly - if attention_mask is None: - attention_mask = input_ids.new_ones(input_ids.shape) - - if past: - input_ids = input_ids[:, -1:] - # first step, decoder_cached_states are empty - return { - "input_ids": input_ids, # encoder_outputs is defined. input_ids not needed - "attention_mask": attention_mask, - "past_key_values": past, - "use_cache": use_cache, - } diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/models/gpt/__init__.py deleted file mode 100644 index 9dc68e3754..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .gpt_actor import GPTActor -from .gpt_critic import GPTCritic -from .gpt_lm import GPTLM -from .gpt_rm import GPTRM - -__all__ = ['GPTActor', 'GPTCritic', 'GPTRM', 'GPTLM'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_actor.py b/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_actor.py deleted file mode 100644 index 6a53ad40b8..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_actor.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional - -from transformers.models.gpt2.configuration_gpt2 import GPT2Config -from transformers.models.gpt2.modeling_gpt2 import GPT2LMHeadModel - -from ..base import Actor - - -class GPTActor(Actor): - """ - GPT Actor model. - - Args: - pretrained (str): Pretrained model name or path. - config (GPT2Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the LoRa layer. - lora_train_bias (str): Bias training strategy for the LoRa layer. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[GPT2Config] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = GPT2LMHeadModel.from_pretrained(pretrained) - elif config is not None: - model = GPT2LMHeadModel(config) - else: - model = GPT2LMHeadModel(GPT2Config()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_critic.py b/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_critic.py deleted file mode 100644 index 25bb1ed94d..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_critic.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers.models.gpt2.configuration_gpt2 import GPT2Config -from transformers.models.gpt2.modeling_gpt2 import GPT2Model - -from ..base import Critic - - -class GPTCritic(Critic): - """ - GPT Critic model. - - Args: - pretrained (str): Pretrained model name or path. - config (GPT2Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the LO-RA decomposition. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[GPT2Config] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = GPT2Model.from_pretrained(pretrained) - elif config is not None: - model = GPT2Model(config) - else: - model = GPT2Model(GPT2Config()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.n_embd, 1) - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_lm.py b/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_lm.py deleted file mode 100644 index c558d7e9ea..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_lm.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional - -from transformers.models.gpt2.configuration_gpt2 import GPT2Config -from transformers.models.gpt2.modeling_gpt2 import GPT2LMHeadModel - -from ..base import LM - - -class GPTLM(LM): - """ - GPT language model. - - Args: - pretrained (str): Pretrained model name or path. - config (GPT2Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the LoRa layer. - lora_train_bias (str): Bias training strategy for the LoRa layer. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[GPT2Config] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = GPT2LMHeadModel.from_pretrained(pretrained) - elif config is not None: - model = GPT2LMHeadModel(config) - else: - model = GPT2LMHeadModel(GPT2Config()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) - - def forward(self, input_ids, attention_mask=None, labels=None, **kwargs): - return self.model(input_ids, attention_mask=attention_mask, labels=labels, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_rm.py b/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_rm.py deleted file mode 100644 index 054432e1ce..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/gpt/gpt_rm.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers.models.gpt2.configuration_gpt2 import GPT2Config -from transformers.models.gpt2.modeling_gpt2 import GPT2Model - -from ..base import RewardModel - - -class GPTRM(RewardModel): - """ - GPT Reward model. - - Args: - pretrained (str): Pretrained model name or path. - config (GPT2Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[GPT2Config] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = GPT2Model.from_pretrained(pretrained) - elif config is not None: - model = GPT2Model(config) - else: - model = GPT2Model(GPT2Config()) - if checkpoint: - model.gradient_checkpointing_enable() - - value_head = nn.Linear(model.config.n_embd, 1) - value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.n_embd + 1)) - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/llama/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/models/llama/__init__.py deleted file mode 100644 index 0d4dada3c9..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/llama/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .llama_actor import LlamaActor -from .llama_critic import LlamaCritic -from .llama_lm import LlamaLM -from .llama_rm import LlamaRM - -__all__ = ['LlamaActor', 'LlamaCritic', 'LlamaRM', 'LlamaLM'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_actor.py b/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_actor.py deleted file mode 100644 index 2c7adb390d..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_actor.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional - -import torch -from transformers import AutoModelForCausalLM, LlamaConfig, LlamaForCausalLM - -from ..base import Actor - - -class LlamaActor(Actor): - """ - Llama Actor model. - - Args: - pretrained (str): Pretrained model name or path. - config (LlamaConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[LlamaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - - if pretrained is not None: - model = LlamaForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = LlamaForCausalLM(config) - else: - model = LlamaForCausalLM(LlamaConfig()) - - if checkpoint: - model.gradient_checkpointing_enable() - - super().__init__(model, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_critic.py b/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_critic.py deleted file mode 100644 index cd565031e1..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_critic.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Optional - -import torch -import torch.nn as nn -from transformers import AutoModelForCausalLM, LlamaConfig, LlamaForCausalLM - -from ..base import Critic - - -class LlamaCritic(Critic): - """ - Llama Critic model. - - Args: - pretrained (str): Pretrained model name or path. - config (LlamaConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[LlamaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none', - **kwargs) -> None: - - if pretrained is not None: - model = LlamaForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = LlamaForCausalLM(config) - else: - model = LlamaForCausalLM(LlamaConfig()) - - if checkpoint: - model.gradient_checkpointing_enable() - - value_head = nn.Linear(model.config.hidden_size, 1) - - super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_lm.py b/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_lm.py deleted file mode 100644 index 181910fb13..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_lm.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Optional - -from transformers import LlamaConfig, LlamaForCausalLM - -from ..base import LM - - -class LlamaLM(LM): - """ - Llama language model. - - Args: - pretrained (str): Pretrained model name or path. - config (LlamaConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[LlamaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - - if pretrained is not None: - model = LlamaForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = LlamaForCausalLM(config) - else: - model = LlamaForCausalLM(LlamaConfig()) - - if checkpoint: - model.gradient_checkpointing_enable() - - super().__init__(model, lora_rank, lora_train_bias) - - def forward(self, input_ids, attention_mask=None, labels=None, **kwargs): - return self.model(input_ids, attention_mask=attention_mask, labels=labels, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_rm.py b/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_rm.py deleted file mode 100644 index f936019d62..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/llama/llama_rm.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import LlamaConfig, LlamaForCausalLM, LlamaModel - -from ..base import RewardModel - - -class LlamaRM(RewardModel): - """ - Llama Reward model. - - Args: - pretrained (str): Pretrained model name or path. - config (LlamaConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[LlamaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - - if pretrained is not None: - model = LlamaModel.from_pretrained(pretrained) - elif config is not None: - model = LlamaModel(config) - else: - model = LlamaModel(LlamaConfig()) - - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.hidden_size, 1) - value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.hidden_size + 1)) - - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/lora.py b/neurons/text/prompting/miners/self_hosted/coati/models/lora.py deleted file mode 100644 index f8f7a1cb5d..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/lora.py +++ /dev/null @@ -1,129 +0,0 @@ -import math -from typing import Optional - -import loralib as lora -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class LoraLinear(lora.LoRALayer, nn.Module): - """Replace in-place ops to out-of-place ops to fit gemini. Convert a torch.nn.Linear to LoraLinear. - """ - - def __init__( - self, - weight: nn.Parameter, - bias: Optional[nn.Parameter], - r: int = 0, - lora_alpha: int = 1, - lora_dropout: float = 0., - fan_in_fan_out: bool = False, # Set this to True if the layer to replace stores weight like (fan_in, fan_out) - merge_weights: bool = True, - ): - nn.Module.__init__(self) - lora.LoRALayer.__init__(self, - r=r, - lora_alpha=lora_alpha, - lora_dropout=lora_dropout, - merge_weights=merge_weights) - self.weight = weight - self.bias = bias - - out_features, in_features = weight.shape - self.in_features = in_features - self.out_features = out_features - - self.fan_in_fan_out = fan_in_fan_out - # Actual trainable parameters - if r > 0: - self.lora_A = nn.Parameter(self.weight.new_zeros((r, in_features))) - self.lora_B = nn.Parameter(self.weight.new_zeros((out_features, r))) - self.scaling = self.lora_alpha / self.r - # Freezing the pre-trained weight matrix - self.weight.requires_grad = False - self.reset_parameters() - if fan_in_fan_out: - self.weight.data = self.weight.data.T - - def reset_parameters(self): - if hasattr(self, 'lora_A'): - # initialize A the same way as the default for nn.Linear and B to zero - nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5)) - nn.init.zeros_(self.lora_B) - - def train(self, mode: bool = True): - - def T(w): - return w.T if self.fan_in_fan_out else w - - nn.Module.train(self, mode) - if self.merge_weights and self.merged: - # Make sure that the weights are not merged - if self.r > 0: - self.weight.data -= T(self.lora_B @ self.lora_A) * self.scaling - self.merged = False - - def eval(self): - - def T(w): - return w.T if self.fan_in_fan_out else w - - nn.Module.eval(self) - if self.merge_weights and not self.merged: - # Merge the weights and mark it - if self.r > 0: - self.weight.data += T(self.lora_B @ self.lora_A) * self.scaling - delattr(self, 'lora_A') - delattr(self, 'lora_B') - self.merged = True - - def forward(self, x: torch.Tensor): - - def T(w): - return w.T if self.fan_in_fan_out else w - - if self.r > 0 and not self.merged: - result = F.linear(x, T(self.weight), bias=self.bias) - if self.r > 0: - result = result + (self.lora_dropout(x) @ self.lora_A.t() @ self.lora_B.t()) * self.scaling - return result - else: - return F.linear(x, T(self.weight), bias=self.bias) - - -def lora_linear_wrapper(linear: nn.Linear, lora_rank: int) -> LoraLinear: - assert lora_rank <= linear.in_features, f'LoRA rank ({lora_rank}) must be less than or equal to in features ({linear.in_features})' - lora_linear = LoraLinear(linear.weight, linear.bias, r=lora_rank, merge_weights=False) - return lora_linear - - -def convert_to_lora_recursively(module: nn.Module, lora_rank: int) -> None: - for name, child in module.named_children(): - if isinstance(child, nn.Linear): - setattr(module, name, lora_linear_wrapper(child, lora_rank)) - else: - convert_to_lora_recursively(child, lora_rank) - - -class LoRAModule(nn.Module): - """A LoRA module base class. All derived classes should call `convert_to_lora()` at the bottom of `__init__()`. - This calss will convert all torch.nn.Linear layer to LoraLinear layer. - - Args: - lora_rank (int, optional): LoRA rank. 0 means LoRA is not applied. Defaults to 0. - lora_train_bias (str, optional): Whether LoRA train biases. - 'none' means it doesn't train biases. 'all' means it trains all biases. 'lora_only' means it only trains biases of LoRA layers. - Defaults to 'none'. - """ - - def __init__(self, lora_rank: int = 0, lora_train_bias: str = 'none') -> None: - super().__init__() - self.lora_rank = lora_rank - self.lora_train_bias = lora_train_bias - - def convert_to_lora(self) -> None: - if self.lora_rank <= 0: - return - convert_to_lora_recursively(self, self.lora_rank) - lora.mark_only_lora_as_trainable(self, self.lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/loss.py b/neurons/text/prompting/miners/self_hosted/coati/models/loss.py deleted file mode 100644 index 7fc437d90f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/loss.py +++ /dev/null @@ -1,117 +0,0 @@ -from typing import Optional - -import torch -import torch.nn as nn - -from .utils import masked_mean - - -class GPTLMLoss(nn.Module): - """ - GPT Language Model Loss - """ - - def __init__(self): - super().__init__() - self.loss = nn.CrossEntropyLoss() - - def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: - shift_logits = logits[..., :-1, :].contiguous() - shift_labels = labels[..., 1:].contiguous() - # Flatten the tokens - return self.loss(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) - - -class PolicyLoss(nn.Module): - """ - Policy Loss for PPO - """ - - def __init__(self, clip_eps: float = 0.2) -> None: - super().__init__() - self.clip_eps = clip_eps - - def forward(self, - log_probs: torch.Tensor, - old_log_probs: torch.Tensor, - advantages: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - ratio = (log_probs - old_log_probs).exp() - surr1 = ratio * advantages - surr2 = ratio.clamp(1 - self.clip_eps, 1 + self.clip_eps) * advantages - loss = -torch.min(surr1, surr2) - if action_mask is not None: - loss = masked_mean(loss, action_mask) - loss = loss.mean() - return loss - - -class ValueLoss(nn.Module): - """ - Value Loss for PPO - """ - - def __init__(self, clip_eps: float = 0.4) -> None: - super().__init__() - self.clip_eps = clip_eps - - def forward(self, - values: torch.Tensor, - old_values: torch.Tensor, - reward: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - values_clipped = old_values + (values - old_values).clamp(-self.clip_eps, self.clip_eps) - surr1 = (values_clipped - reward)**2 - surr2 = (values - reward)**2 - loss = torch.max(surr1, surr2) - loss = loss.mean() - return loss - - -class PPOPtxActorLoss(nn.Module): - """ - To Do: - - PPO-ptx Actor Loss - """ - - def __init__(self, policy_clip_eps: float = 0.2, pretrain_coef: float = 0.0, pretrain_loss_fn=GPTLMLoss()) -> None: - super().__init__() - self.pretrain_coef = pretrain_coef - self.policy_loss_fn = PolicyLoss(clip_eps=policy_clip_eps) - self.pretrain_loss_fn = pretrain_loss_fn - - def forward(self, - log_probs: torch.Tensor, - old_log_probs: torch.Tensor, - advantages: torch.Tensor, - lm_logits: torch.Tensor, - lm_input_ids: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - policy_loss = self.policy_loss_fn(log_probs, old_log_probs, advantages, action_mask=action_mask) - lm_loss = self.pretrain_loss_fn(lm_logits, lm_input_ids) - return policy_loss + self.pretrain_coef * lm_loss - - -class LogSigLoss(nn.Module): - """ - Pairwise Loss for Reward Model - Details: https://arxiv.org/abs/2203.02155 - """ - - def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Tensor) -> torch.Tensor: - probs = torch.sigmoid(chosen_reward - reject_reward) - log_probs = torch.log(probs) - loss = -log_probs.mean() - return loss - - -class LogExpLoss(nn.Module): - """ - Pairwise Loss for Reward Model - Details: https://arxiv.org/abs/2204.05862 - """ - - def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Tensor) -> torch.Tensor: - loss = torch.log(1 + torch.exp(reject_reward - chosen_reward)).mean() - return loss diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/opt/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/models/opt/__init__.py deleted file mode 100644 index 3d7a8adbf8..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/opt/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .opt_actor import OPTActor -from .opt_critic import OPTCritic -from .opt_lm import OPTLM -from .opt_rm import OPTRM - -__all__ = ['OPTActor', 'OPTCritic', 'OPTRM', 'OPTLM'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_actor.py b/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_actor.py deleted file mode 100644 index c14e4377ff..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_actor.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional - -from transformers.models.opt.configuration_opt import OPTConfig -from transformers.models.opt.modeling_opt import OPTForCausalLM - -from ..base import Actor - - -class OPTActor(Actor): - """ - OPT Actor model. - - Args: - pretrained (str): Pretrained model name or path. - config (OPTConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[OPTConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = OPTForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = OPTForCausalLM(config) - else: - model = OPTForCausalLM(OPTConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_critic.py b/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_critic.py deleted file mode 100644 index fcfebd8a8b..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_critic.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers.models.opt.configuration_opt import OPTConfig -from transformers.models.opt.modeling_opt import OPTModel - -from ..base import Critic - - -class OPTCritic(Critic): - """ - OPT Critic model. - - Args: - pretrained (str): Pretrained model name or path. - config (OPTConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[OPTConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none', - **kwargs) -> None: - if pretrained is not None: - model = OPTModel.from_pretrained(pretrained) - elif config is not None: - model = OPTModel(config) - else: - model = OPTModel(OPTConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.word_embed_proj_dim, 1) - super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_lm.py b/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_lm.py deleted file mode 100644 index 47afae847f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_lm.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional - -from transformers.models.opt.configuration_opt import OPTConfig -from transformers.models.opt.modeling_opt import OPTForCausalLM - -from ..base import LM - - -class OPTLM(LM): - """ - OPT language model. - - Args: - pretrained (str): Pretrained model name or path. - config (OPTConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[OPTConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = OPTForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = OPTForCausalLM(config) - else: - model = OPTForCausalLM(OPTConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) - - def forward(self, input_ids, attention_mask=None, labels=None, **kwargs): - return self.model(input_ids, attention_mask=attention_mask, labels=labels, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_rm.py b/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_rm.py deleted file mode 100644 index 50fc0dee85..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/opt/opt_rm.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import OPTConfig, OPTModel - -from ..base import RewardModel - - -class OPTRM(RewardModel): - """ - OPT Reward model. - - Args: - pretrained (str): Pretrained model name or path. - config (OPTConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[OPTConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = OPTModel.from_pretrained(pretrained) - elif config is not None: - model = OPTModel(config) - else: - model = OPTModel(OPTConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - - value_head = nn.Linear(model.config.word_embed_proj_dim, 1) - value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.word_embed_proj_dim + 1)) - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/roberta/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/models/roberta/__init__.py deleted file mode 100644 index 0f4a8de067..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/roberta/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .roberta_actor import RoBERTaActor -from .roberta_critic import RoBERTaCritic -from .roberta_rm import RoBERTaRM - -__all__ = ['RoBERTaActor', 'RoBERTaCritic', 'RoBERTaRM'] \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/roberta/roberta_actor.py b/neurons/text/prompting/miners/self_hosted/coati/models/roberta/roberta_actor.py deleted file mode 100644 index e35fa6eb19..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/roberta/roberta_actor.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional - -from transformers.models.roberta.configuration_roberta import RobertaConfig -from transformers.models.roberta.modeling_roberta import RobertaForCausalLM - -from ..base import Actor - -class RoBERTaActor(Actor): - """ - RoBERTa Actor model. - - Args: - pretrained (str): Pretrained model name or path. - config (RoBERTaConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[RobertaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = RobertaForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = RobertaForCausalLM(config) - else: - model = RobertaForCausalLM(RobertaConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/roberta/roberta_critic.py b/neurons/text/prompting/miners/self_hosted/coati/models/roberta/roberta_critic.py deleted file mode 100644 index c8dc0d9e14..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/roberta/roberta_critic.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers.models.roberta.configuration_roberta import RobertaConfig -from transformers.models.roberta.modeling_roberta import RobertaModel - -from ..base import Critic - - -class RoBERTaCritic(Critic): - """ - RoBERTa Critic model. - - Args: - pretrained (str): Pretrained model name or path. - config (RoBERTa Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[RobertaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none', - **kwargs) -> None: - if pretrained is not None: - model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False) - elif config is not None: - model = RobertaModel(config) - else: - model = RobertaModel(RobertaConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.hidden_size, 1) - super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/roberta/roberta_rm.py b/neurons/text/prompting/miners/self_hosted/coati/models/roberta/roberta_rm.py deleted file mode 100644 index 7707505297..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/roberta/roberta_rm.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import RobertaConfig, RobertaModel - - -from ..base import RewardModel - - -class RoBERTaRM(RewardModel): - """ - RoBERTa Reward model. - - Args: - pretrained (str): Pretrained model name or path. - config (RoBERTaConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[RobertaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False) - elif config is not None: - model = RobertaModel(config) - else: - model = RobertaModel(RobertaConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - - value_head = nn.Linear(model.config.hidden_size, 1) - value_head.weight.data.normal_(mean=0.0, std=1/(model.config.hidden_size + 1)) - super().__init__(model, value_head, lora_rank, lora_train_bias) \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/coati/models/utils.py b/neurons/text/prompting/miners/self_hosted/coati/models/utils.py deleted file mode 100644 index 0ff13181fc..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/models/utils.py +++ /dev/null @@ -1,92 +0,0 @@ -from typing import Optional, Union - -import loralib as lora -import torch -import torch.nn as nn -import torch.nn.functional as F - - -def compute_approx_kl(log_probs: torch.Tensor, - log_probs_base: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - """ - Compute the approximate KL divergence between two distributions. - Schulman blog: http://joschu.net/blog/kl-approx.html - - Args: - log_probs: Log probabilities of the new distribution. - log_probs_base: Log probabilities of the base distribution. - action_mask: Mask for actions. - """ - - log_ratio = log_probs - log_probs_base - approx_kl = (log_ratio.exp() - 1) - log_ratio - if action_mask is not None: - approx_kl = masked_mean(approx_kl, action_mask, dim=1) - return approx_kl - approx_kl = approx_kl.mean(dim=1) - return approx_kl - - -def compute_reward(r: Union[torch.Tensor, float], - kl_coef: float, - log_probs: torch.Tensor, - log_probs_base: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - if kl_coef <= 0.0: - return r - kl = compute_approx_kl(log_probs, log_probs_base, action_mask=action_mask) - reward = r - kl_coef * kl - return reward - - -def log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: - log_probs = F.log_softmax(logits, dim=-1) - log_probs_labels = log_probs.gather(dim=-1, index=labels.unsqueeze(-1)) - return log_probs_labels.squeeze(-1) - - -def masked_mean(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) -> torch.Tensor: - tensor = tensor * mask - tensor = tensor.sum(dim=dim) - mask_sum = mask.sum(dim=dim) - mean = tensor / (mask_sum + 1e-8) - return mean - - -def masked_normalize(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1, eps: float = 1e-8) -> torch.Tensor: - tensor = tensor * mask - mean = masked_mean(tensor, mask, dim=dim) - mean_centered = tensor - mean - var = masked_mean(mean_centered**2, mask, dim=dim) - return mean_centered * var.clamp(min=eps).rsqrt() - - -def normalize(tensor: torch.Tensor, dim: int = 0, eps: float = 1e-8) -> torch.Tensor: - mean = tensor.mean(dim) - mean_centered = tensor - mean - var = (mean_centered**2).mean(dim) - norm = mean_centered * var.clamp(min=eps).rsqrt() - return norm - - -def convert_to_lora(model: nn.Module, - input_size: int, - output_size: int, - lora_rank: int = 16, - lora_alpha: int = 1, - lora_dropout: float = 0., - fan_in_fan_out: bool = False, - merge_weights: bool = True): - if lora_rank > min(input_size, output_size): - raise ValueError(f"LoRA rank {lora_rank} must be less or equal than {min(input_size, output_size)}") - - for name, module in model.named_modules(): - if isinstance(module, nn.Linear): - module._modules[name] = lora.Linear(input_size, - output_size, - r=lora_rank, - lora_alpha=lora_alpha, - lora_dropout=lora_dropout, - fan_in_fan_out=fan_in_fan_out, - merge_weights=merge_weights) diff --git a/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/__init__.py deleted file mode 100644 index 1ebf603829..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .base import ReplayBuffer -from .naive import NaiveReplayBuffer - -__all__ = ['ReplayBuffer', 'NaiveReplayBuffer'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/base.py b/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/base.py deleted file mode 100644 index 4c3812461a..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/base.py +++ /dev/null @@ -1,43 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any - -from coati.experience_maker.base import Experience - - -class ReplayBuffer(ABC): - """Replay buffer base class. It stores experience. - - Args: - sample_batch_size (int): Batch size when sampling. - limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0. - """ - - def __init__(self, sample_batch_size: int, limit: int = 0) -> None: - super().__init__() - self.sample_batch_size = sample_batch_size - # limit <= 0 means unlimited - self.limit = limit - - @abstractmethod - def append(self, experience: Experience) -> None: - pass - - @abstractmethod - def clear(self) -> None: - pass - - @abstractmethod - def sample(self) -> Experience: - pass - - @abstractmethod - def __len__(self) -> int: - pass - - @abstractmethod - def __getitem__(self, idx: int) -> Any: - pass - - @abstractmethod - def collate_fn(self, batch: Any) -> Experience: - pass diff --git a/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/naive.py b/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/naive.py deleted file mode 100644 index 938f500643..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/naive.py +++ /dev/null @@ -1,57 +0,0 @@ -import random -from typing import List - -import torch -from coati.experience_maker.base import Experience - -from .base import ReplayBuffer -from .utils import BufferItem, make_experience_batch, split_experience_batch - - -class NaiveReplayBuffer(ReplayBuffer): - """Naive replay buffer class. It stores experience. - - Args: - sample_batch_size (int): Batch size when sampling. - limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0. - cpu_offload (bool, optional): Whether to offload experience to cpu when sampling. Defaults to True. - """ - - def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload: bool = True) -> None: - super().__init__(sample_batch_size, limit) - self.cpu_offload = cpu_offload - self.target_device = torch.device(f'cuda:{torch.cuda.current_device()}') - # TODO(ver217): add prefetch - self.items: List[BufferItem] = [] - - @torch.no_grad() - def append(self, experience: Experience) -> None: - if self.cpu_offload: - experience.to_device(torch.device('cpu')) - items = split_experience_batch(experience) - self.items.extend(items) - if self.limit > 0: - samples_to_remove = len(self.items) - self.limit - if samples_to_remove > 0: - self.items = self.items[samples_to_remove:] - - def clear(self) -> None: - self.items.clear() - - @torch.no_grad() - def sample(self) -> Experience: - items = random.sample(self.items, self.sample_batch_size) - experience = make_experience_batch(items) - if self.cpu_offload: - experience.to_device(self.target_device) - return experience - - def __len__(self) -> int: - return len(self.items) - - def __getitem__(self, idx: int) -> BufferItem: - return self.items[idx] - - def collate_fn(self, batch) -> Experience: - experience = make_experience_batch(batch) - return experience diff --git a/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/utils.py b/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/utils.py deleted file mode 100644 index 55ddb2ae81..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/replay_buffer/utils.py +++ /dev/null @@ -1,73 +0,0 @@ -from dataclasses import dataclass -from typing import List, Optional - -import torch -import torch.nn.functional as F -from coati.experience_maker.base import Experience - - -@dataclass -class BufferItem: - """BufferItem is an item of experience data. - - Shapes of each tensor: - sequences: (S) - action_log_probs: (A) - values: (1) - reward: (1) - advatanges: (1) - attention_mask: (S) - action_mask: (A) - - "A" is the number of actions. - """ - sequences: torch.Tensor - action_log_probs: torch.Tensor - values: torch.Tensor - reward: torch.Tensor - advantages: torch.Tensor - attention_mask: Optional[torch.LongTensor] - action_mask: Optional[torch.BoolTensor] - - -def split_experience_batch(experience: Experience) -> List[BufferItem]: - batch_size = experience.sequences.size(0) - batch_kwargs = [{} for _ in range(batch_size)] - keys = ('sequences', 'action_log_probs', 'values', 'reward', 'advantages', 'attention_mask', 'action_mask') - for key in keys: - value = getattr(experience, key) - if isinstance(value, torch.Tensor): - vals = torch.unbind(value) - else: - # None - vals = [value for _ in range(batch_size)] - assert batch_size == len(vals) - for i, v in enumerate(vals): - batch_kwargs[i][key] = v - items = [BufferItem(**kwargs) for kwargs in batch_kwargs] - return items - - -def zero_pad_sequences(sequences: List[torch.Tensor], side: str = 'left') -> torch.Tensor: - assert side in ('left', 'right') - max_len = max(seq.size(0) for seq in sequences) - padded_sequences = [] - for seq in sequences: - pad_len = max_len - seq.size(0) - padding = (pad_len, 0) if side == 'left' else (0, pad_len) - padded_sequences.append(F.pad(seq, padding)) - return torch.stack(padded_sequences, dim=0) - - -def make_experience_batch(items: List[BufferItem]) -> Experience: - kwargs = {} - to_pad_keys = set(('action_log_probs', 'action_mask')) - keys = ('sequences', 'action_log_probs', 'values', 'reward', 'advantages', 'attention_mask', 'action_mask') - for key in keys: - vals = [getattr(item, key) for item in items] - if key in to_pad_keys: - batch_data = zero_pad_sequences(vals) - else: - batch_data = torch.stack(vals, dim=0) - kwargs[key] = batch_data - return Experience(**kwargs) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/__init__.py deleted file mode 100644 index 525b57bf21..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .base import Trainer -from .ppo import PPOTrainer -from .rm import RewardModelTrainer -from .sft import SFTTrainer - -__all__ = ['Trainer', 'PPOTrainer', 'RewardModelTrainer', 'SFTTrainer'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/base.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/base.py deleted file mode 100644 index 610bb51119..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/base.py +++ /dev/null @@ -1,168 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, List, Optional, Union - -import torch -from coati.experience_maker import Experience, ExperienceMaker -from coati.replay_buffer import ReplayBuffer -from torch import Tensor -from torch.utils.data import DistributedSampler -from tqdm import tqdm - -from .callbacks import Callback -from .strategies import Strategy -from .utils import is_rank_0 - - -class Trainer(ABC): - """ - Base class for rlhf trainers. - - Args: - strategy (Strategy):the strategy to use for training - experience_maker (ExperienceMaker): the experience maker to use for produce experience to fullfill replay buffer - replay_buffer (ReplayBuffer): the replay buffer to use for training - experience_batch_size (int, defaults to 8): the batch size to use for experience generation - max_epochs (int, defaults to 1): the number of epochs of training process - tokenizer (Callable, optional): the tokenizer to use for tokenizing the input - sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer - data_loader_pin_memory (bool, defaults to True): whether to pin memory for data loader - callbacks (List[Callback], defaults to []): the callbacks to call during training process - generate_kwargs (dict, optional): the kwargs to use while model generating - """ - - def __init__(self, - strategy: Strategy, - experience_maker: ExperienceMaker, - replay_buffer: ReplayBuffer, - experience_batch_size: int = 8, - max_epochs: int = 1, - tokenizer: Optional[Callable[[Any], dict]] = None, - sample_replay_buffer: bool = False, - dataloader_pin_memory: bool = True, - callbacks: List[Callback] = [], - **generate_kwargs) -> None: - super().__init__() - self.strategy = strategy - self.experience_maker = experience_maker - self.replay_buffer = replay_buffer - self.experience_batch_size = experience_batch_size - self.max_epochs = max_epochs - self.tokenizer = tokenizer - self.generate_kwargs = generate_kwargs - self.sample_replay_buffer = sample_replay_buffer - self.dataloader_pin_memory = dataloader_pin_memory - self.callbacks = callbacks - - @abstractmethod - def training_step(self, experience: Experience) -> Dict[str, Any]: - pass - - def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experience: - if isinstance(inputs, Tensor): - return self.experience_maker.make_experience(inputs, **self.generate_kwargs) - elif isinstance(inputs, dict): - return self.experience_maker.make_experience(**inputs, **self.generate_kwargs) - else: - raise ValueError(f'Unsupported input type "{type(inputs)}"') - - def _sample_prompts(self, prompts) -> list: - indices = list(range(len(prompts))) - sampled_indices = self.strategy.experience_sampler.choice(indices, self.experience_batch_size, replace=False) - return [prompts[i] for i in sampled_indices] - - def _learn(self): - # replay buffer may be empty at first, we should rebuild at each training - if not self.sample_replay_buffer: - dataloader = self.strategy.setup_dataloader(self.replay_buffer, self.dataloader_pin_memory) - device = torch.cuda.current_device() - if self.sample_replay_buffer: - pbar = tqdm(range(self.max_epochs), desc='Train epoch', disable=not is_rank_0()) - for _ in pbar: - experience = self.replay_buffer.sample() - metrics = self.training_step(experience) - pbar.set_postfix(metrics) - else: - for epoch in range(self.max_epochs): - self._on_learn_epoch_start(epoch) - if isinstance(dataloader.sampler, DistributedSampler): - dataloader.sampler.set_epoch(epoch) - pbar = tqdm(dataloader, desc=f'Train epoch [{epoch+1}/{self.max_epochs}]', disable=not is_rank_0()) - for experience in pbar: - self._on_learn_batch_start() - experience.to_device(device) - metrics = self.training_step(experience) - self._on_learn_batch_end(metrics, experience) - pbar.set_postfix(metrics) - self._on_learn_epoch_end(epoch) - - def fit(self, - prompt_dataloader, - pretrain_dataloader, - num_episodes: int = 50000, - max_timesteps: int = 500, - update_timesteps: int = 5000) -> None: - time = 0 - self.pretrain_dataloader = pretrain_dataloader - self.prompt_dataloader = prompt_dataloader - self._on_fit_start() - for episode in range(num_episodes): - self._on_episode_start(episode) - for timestep in tqdm(range(max_timesteps), - desc=f'Episode [{episode+1}/{num_episodes}]', - disable=not is_rank_0()): - time += 1 - prompts = next(iter(self.prompt_dataloader)) - self._on_make_experience_start() - self.experience_maker.initial_model.to(torch.cuda.current_device()) - self.experience_maker.reward_model.to(torch.cuda.current_device()) - experience = self._make_experience(prompts) - self._on_make_experience_end(experience) - self.replay_buffer.append(experience) - if time % update_timesteps == 0: - self.experience_maker.initial_model.to('cpu') - self.experience_maker.reward_model.to('cpu') - self._learn() - self.replay_buffer.clear() - self._on_episode_end(episode) - self._on_fit_end() - - # TODO(ver217): maybe simplify these code using context - def _on_fit_start(self) -> None: - for callback in self.callbacks: - callback.on_fit_start() - - def _on_fit_end(self) -> None: - for callback in self.callbacks: - callback.on_fit_end() - - def _on_episode_start(self, episode: int) -> None: - for callback in self.callbacks: - callback.on_episode_start(episode) - - def _on_episode_end(self, episode: int) -> None: - for callback in self.callbacks: - callback.on_episode_end(episode) - - def _on_make_experience_start(self) -> None: - for callback in self.callbacks: - callback.on_make_experience_start() - - def _on_make_experience_end(self, experience: Experience) -> None: - for callback in self.callbacks: - callback.on_make_experience_end(experience) - - def _on_learn_epoch_start(self, epoch: int) -> None: - for callback in self.callbacks: - callback.on_learn_epoch_start(epoch) - - def _on_learn_epoch_end(self, epoch: int) -> None: - for callback in self.callbacks: - callback.on_learn_epoch_end(epoch) - - def _on_learn_batch_start(self) -> None: - for callback in self.callbacks: - callback.on_learn_batch_start() - - def _on_learn_batch_end(self, metrics: dict, experience: Experience) -> None: - for callback in self.callbacks: - callback.on_learn_batch_end(metrics, experience) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/__init__.py deleted file mode 100644 index 9ed0ee6f76..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .base import Callback -from .performance_evaluator import PerformanceEvaluator -from .save_checkpoint import SaveCheckpoint - -__all__ = ['Callback', 'PerformanceEvaluator', 'SaveCheckpoint'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/base.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/base.py deleted file mode 100644 index f561604885..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/base.py +++ /dev/null @@ -1,39 +0,0 @@ -from abc import ABC - -from coati.experience_maker import Experience - - -class Callback(ABC): - """ - Base callback class. It defines the interface for callbacks. - """ - - def on_fit_start(self) -> None: - pass - - def on_fit_end(self) -> None: - pass - - def on_episode_start(self, episode: int) -> None: - pass - - def on_episode_end(self, episode: int) -> None: - pass - - def on_make_experience_start(self) -> None: - pass - - def on_make_experience_end(self, experience: Experience) -> None: - pass - - def on_learn_epoch_start(self, epoch: int) -> None: - pass - - def on_learn_epoch_end(self, epoch: int) -> None: - pass - - def on_learn_batch_start(self) -> None: - pass - - def on_learn_batch_end(self, metrics: dict, experience: Experience) -> None: - pass diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/performance_evaluator.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/performance_evaluator.py deleted file mode 100644 index 0fc3b077a1..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/performance_evaluator.py +++ /dev/null @@ -1,133 +0,0 @@ -from time import time -from typing import Optional - -import torch -import torch.distributed as dist -from coati.experience_maker import Experience - -from .base import Callback - - -def get_world_size() -> int: - if dist.is_initialized(): - return dist.get_world_size() - return 1 - - -def print_rank_0(*args, **kwargs) -> None: - if not dist.is_initialized() or dist.get_rank() == 0: - print(*args, **kwargs) - - -@torch.no_grad() -def all_reduce_mean(x: float, world_size: int) -> float: - if world_size == 1: - return x - tensor = torch.tensor([x], device=torch.cuda.current_device()) - dist.all_reduce(tensor) - tensor = tensor / world_size - return tensor.item() - - -class PerformanceEvaluator(Callback): - """ - Callback for valuate the performance of the model. - Args: - actor_num_params: The number of parameters of the actor model. - critic_num_params: The number of parameters of the critic model. - initial_model_num_params: The number of parameters of the initial model. - reward_model_num_params: The number of parameters of the reward model. - enable_grad_checkpoint: Whether to enable gradient checkpointing. - ignore_episodes: The number of episodes to ignore when calculating the performance. - """ - - def __init__(self, - actor_num_params: int, - critic_num_params: int, - initial_model_num_params: int, - reward_model_num_params: int, - enable_grad_checkpoint: bool = False, - ignore_episodes: int = 0) -> None: - super().__init__() - self.world_size = get_world_size() - self.actor_num_params = actor_num_params - self.critic_num_params = critic_num_params - self.initial_model_num_params = initial_model_num_params - self.reward_model_num_params = reward_model_num_params - self.enable_grad_checkpoint = enable_grad_checkpoint - self.ignore_episodes = ignore_episodes - self.disable: bool = False - - self.make_experience_duration: float = 0. - self.make_experience_start_time: Optional[float] = None - self.make_experience_num_samples: int = 0 - self.make_experience_flop: int = 0 - self.learn_duration: float = 0. - self.learn_start_time: Optional[float] = None - self.learn_num_samples: int = 0 - self.learn_flop: int = 0 - - def on_episode_start(self, episode: int) -> None: - self.disable = self.ignore_episodes > 0 and episode < self.ignore_episodes - - def on_make_experience_start(self) -> None: - if self.disable: - return - self.make_experience_start_time = time() - - def on_make_experience_end(self, experience: Experience) -> None: - if self.disable: - return - self.make_experience_duration += time() - self.make_experience_start_time - - batch_size, seq_len = experience.sequences.shape - - self.make_experience_num_samples += batch_size - - # actor generate - num_actions = experience.action_mask.size(1) - input_len = seq_len - num_actions - total_seq_len = (input_len + seq_len - 1) * num_actions / 2 - self.make_experience_flop += self.actor_num_params * batch_size * total_seq_len * 2 - # actor forward - self.make_experience_flop += self.actor_num_params * batch_size * seq_len * 2 - # critic forward - self.make_experience_flop += self.critic_num_params * batch_size * seq_len * 2 - # initial model forward - self.make_experience_flop += self.initial_model_num_params * batch_size * seq_len * 2 - # reward model forward - self.make_experience_flop += self.reward_model_num_params * batch_size * seq_len * 2 - - def on_learn_batch_start(self) -> None: - if self.disable: - return - self.learn_start_time = time() - - def on_learn_batch_end(self, metrics: dict, experience: Experience) -> None: - if self.disable: - return - self.learn_duration += time() - self.learn_start_time - - batch_size, seq_len = experience.sequences.shape - - self.learn_num_samples += batch_size - - # actor forward-backward, 3 means forward(1) + backward(2) - self.learn_flop += self.actor_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint)) - # critic foward-backward - self.learn_flop += self.critic_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint)) - - def on_fit_end(self) -> None: - avg_make_experience_duration = all_reduce_mean(self.make_experience_duration, self.world_size) - avg_learn_duration = all_reduce_mean(self.learn_duration, self.world_size) - - avg_make_experience_throughput = self.make_experience_num_samples / (avg_make_experience_duration + 1e-12) - avg_make_experience_tflops = self.make_experience_flop / 1e12 / (avg_make_experience_duration + 1e-12) - - avg_learn_throughput = self.learn_num_samples / (avg_learn_duration + 1e-12) - avg_learn_tflops = self.learn_flop / 1e12 / (avg_learn_duration + 1e-12) - - print_rank_0( - f'Making experience throughput: {avg_make_experience_throughput:.3f} samples/sec, TFLOPS: {avg_make_experience_tflops:.3f}' - ) - print_rank_0(f'Learning throughput: {avg_learn_throughput:.3f} samples/sec, TFLOPS: {avg_learn_tflops:.3f}') diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/save_checkpoint.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/save_checkpoint.py deleted file mode 100644 index d2dcc0dd4c..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/callbacks/save_checkpoint.py +++ /dev/null @@ -1,75 +0,0 @@ -import os - -import torch.distributed as dist -from coati.trainer.strategies import ColossalAIStrategy, Strategy -from coati.trainer.utils import is_rank_0 -from torch import nn -from torch.optim import Optimizer - -from .base import Callback - - -class SaveCheckpoint(Callback): - """ - The callback for saving checkpoint for coati. - - Only support saving actor and critic model. - A typical architecture of the saved checkpoint would be: - - checkpoint - - episode_x - - actor.pt - - actor-optim-rank-0.pt - - actor-optim-rank-1.pt - - critic.pt - - critic-optim-rank-0.pt - - critic-optim-rank-1.pt - - ... - - Args: - path(str): the base path you want to save checkpoint, the checkpoint would be saved at `path/checkpoint` - interval(int): the interval episode of saving checkpoint - strategy(Strategy): the strategy used to train - actor(nn.Module): the actor model - critic(nn.Module): the critic model - actor_optim(Optimizer): the optimizer of actor - critic_optim(Optimizer): the optimizer of critic - - """ - - def __init__(self, - path: str, - interval: int, - strategy: Strategy, - actor: nn.Module = None, - critic: nn.Module = None, - actor_optim: Optimizer = None, - critic_optim: Optimizer = None) -> None: - super().__init__() - self.path = os.path.join(path, 'checkpoint') - self.interval = interval - self.strategy = strategy - self.model_dict = {'actor': [actor, actor_optim], 'critic': [critic, critic_optim]} - - def on_episode_end(self, episode: int) -> None: - if (episode + 1) % self.interval != 0: - return - base_path = os.path.join(self.path, f'episode_{episode}') - if not os.path.exists(base_path): - os.makedirs(base_path) - - for model in self.model_dict.keys(): - - # save model - if self.model_dict[model][0] is None: - # saving only optimizer states is meaningless, so it would be skipped - continue - model_path = os.path.join(base_path, f'{model}.pt') - self.strategy.save_model(model=self.model_dict[model][0], path=model_path, only_rank0=True) - - # save optimizer - if self.model_dict[model][1] is None: - continue - only_rank0 = not isinstance(self.strategy, ColossalAIStrategy) - rank = 0 if is_rank_0() else dist.get_rank() - optim_path = os.path.join(base_path, f'{model}-optim-rank-{rank}.pt') - self.strategy.save_optimizer(optimizer=self.model_dict[model][1], path=optim_path, only_rank0=only_rank0) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/ppo.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/ppo.py deleted file mode 100644 index 84254d50d7..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/ppo.py +++ /dev/null @@ -1,135 +0,0 @@ -from typing import Any, Callable, Dict, List, Optional - -import torch -import torch.nn as nn -from coati.experience_maker import Experience, NaiveExperienceMaker -from coati.models.base import Actor, Critic -from coati.models.generation_utils import update_model_kwargs_fn -from coati.models.loss import PolicyLoss, ValueLoss -from coati.replay_buffer import NaiveReplayBuffer -from torch.optim import Optimizer -from transformers.tokenization_utils_base import PreTrainedTokenizerBase - -from .base import Trainer -from .callbacks import Callback -from .strategies import Strategy - - -class PPOTrainer(Trainer): - """ - Trainer for PPO algorithm. - - Args: - strategy (Strategy): the strategy to use for training - actor (Actor): the actor model in ppo algorithm - critic (Critic): the critic model in ppo algorithm - reward_model (nn.Module): the reward model in rlhf algorithm to make reward of sentences - initial_model (Actor): the initial model in rlhf algorithm to generate reference logits to limit the update of actor - actor_optim (Optimizer): the optimizer to use for actor model - critic_optim (Optimizer): the optimizer to use for critic model - kl_coef (float, defaults to 0.1): the coefficient of kl divergence loss - train_batch_size (int, defaults to 8): the batch size to use for training - buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer - buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu - eps_clip (float, defaults to 0.2): the clip coefficient of policy loss - value_clip (float, defaults to 0.4): the clip coefficient of value loss - experience_batch_size (int, defaults to 8): the batch size to use for experience generation - max_epochs (int, defaults to 1): the number of epochs of training process - tokenier (Callable, optional): the tokenizer to use for tokenizing the input - sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer - dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader - callbacks (List[Callback], defaults to []): the callbacks to call during training process - generate_kwargs (dict, optional): the kwargs to use while model generating - """ - - def __init__(self, - strategy: Strategy, - actor: Actor, - critic: Critic, - reward_model: nn.Module, - initial_model: Actor, - actor_optim: Optimizer, - critic_optim: Optimizer, - kl_coef: float = 0.1, - ptx_coef: float = 0.9, - train_batch_size: int = 8, - buffer_limit: int = 0, - buffer_cpu_offload: bool = True, - eps_clip: float = 0.2, - value_clip: float = 0.4, - experience_batch_size: int = 8, - max_epochs: int = 1, - tokenizer: Optional[Callable[[Any], dict]] = None, - sample_replay_buffer: bool = False, - dataloader_pin_memory: bool = True, - callbacks: List[Callback] = [], - **generate_kwargs) -> None: - experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model, kl_coef) - replay_buffer = NaiveReplayBuffer(train_batch_size, buffer_limit, buffer_cpu_offload) - generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, actor) - super().__init__(strategy, experience_maker, replay_buffer, experience_batch_size, max_epochs, tokenizer, - sample_replay_buffer, dataloader_pin_memory, callbacks, **generate_kwargs) - self.actor = actor - self.critic = critic - - self.actor_loss_fn = PolicyLoss(eps_clip) - self.critic_loss_fn = ValueLoss(value_clip) - self.ptx_loss_fn = nn.CrossEntropyLoss(ignore_index=-100) - self.ptx_coef = ptx_coef - self.actor_optim = actor_optim - self.critic_optim = critic_optim - - def training_step(self, experience: Experience) -> Dict[str, float]: - self.actor.train() - self.critic.train() - # policy loss - num_actions = experience.action_mask.size(1) - action_log_probs = self.actor(experience.sequences, num_actions, attention_mask=experience.attention_mask) - actor_loss = self.actor_loss_fn(action_log_probs, - experience.action_log_probs, - experience.advantages, - action_mask=experience.action_mask) - - # ptx loss - if self.ptx_coef != 0: - ptx = next(iter(self.pretrain_dataloader))['input_ids'].to(torch.cuda.current_device()) - label = next(iter(self.pretrain_dataloader))['labels'].to(torch.cuda.current_device())[:, 1:] - attention_mask = next(iter(self.pretrain_dataloader))['attention_mask'].to(torch.cuda.current_device()) - ptx_log_probs = self.actor.get_base_model()(ptx, attention_mask=attention_mask)['logits'][..., :-1, :] - ptx_loss = self.ptx_loss_fn(ptx_log_probs.view(-1, ptx_log_probs.size(-1)), label.view(-1)) - actor_loss = ptx_loss * self.ptx_coef + actor_loss * (1 - self.ptx_coef) - - self.strategy.backward(actor_loss, self.actor, self.actor_optim) - self.strategy.optimizer_step(self.actor_optim) - self.actor_optim.zero_grad() - - # value loss - values = self.critic(experience.sequences, - action_mask=experience.action_mask, - attention_mask=experience.attention_mask) - critic_loss = self.critic_loss_fn(values, - experience.values, - experience.reward, - action_mask=experience.action_mask) - self.strategy.backward(critic_loss, self.critic, self.critic_optim) - self.strategy.optimizer_step(self.critic_optim) - self.critic_optim.zero_grad() - - return {'reward': experience.reward.mean().item()} - - -def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, actor: Actor) -> None: - origin_model = strategy._unwrap_actor(actor) - new_kwargs = {**generate_kwargs} - # use huggingface models method directly - if 'prepare_inputs_fn' not in generate_kwargs and hasattr(origin_model, 'prepare_inputs_for_generation'): - new_kwargs['prepare_inputs_fn'] = origin_model.prepare_inputs_for_generation - - if 'update_model_kwargs_fn' not in generate_kwargs: - new_kwargs['update_model_kwargs_fn'] = update_model_kwargs_fn - - return new_kwargs - - -def save_model(self, path: str, only_rank0: bool = False, tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None: - self.strategy.save_model(model=self.actor, path=path, only_rank0=only_rank0, tokenizer=tokenizer) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/rm.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/rm.py deleted file mode 100644 index 0cf09b0410..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/rm.py +++ /dev/null @@ -1,135 +0,0 @@ -from abc import ABC -from datetime import datetime -from typing import Optional - -import pandas as pd -import torch -import torch.distributed as dist -from torch.optim import Optimizer, lr_scheduler -from torch.utils.data import DataLoader, Dataset, DistributedSampler -from tqdm import tqdm -from transformers.tokenization_utils_base import PreTrainedTokenizerBase - -from .strategies import Strategy -from .utils import is_rank_0 - - -class RewardModelTrainer(ABC): - """ - Trainer to use while training reward model. - - Args: - model (torch.nn.Module): the model to train - strategy (Strategy): the strategy to use for training - optim(Optimizer): the optimizer to use for training - loss_fn (callable): the loss function to use for training - train_dataset (Dataset): the dataset to use for training - valid_dataset (Dataset): the dataset to use for validation - eval_dataset (Dataset): the dataset to use for evaluation - batch_size (int, defaults to 1): the batch size while training - max_epochs (int, defaults to 2): the number of epochs to train - """ - - def __init__( - self, - model, - strategy: Strategy, - optim: Optimizer, - loss_fn, - train_dataset: Dataset, - valid_dataset: Dataset, - eval_dataset: Dataset, - batch_size: int = 1, - max_epochs: int = 1, - ) -> None: - super().__init__() - self.strategy = strategy - self.epochs = max_epochs - train_sampler = None - - if dist.is_initialized() and dist.get_world_size() > 1: - train_sampler = DistributedSampler(train_dataset, shuffle=True, seed=42, drop_last=True) - self.train_dataloader = DataLoader(train_dataset, - shuffle=(train_sampler is None), - sampler=train_sampler, - batch_size=batch_size) - self.valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True) - self.eval_dataloader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=True) - - self.model = strategy.setup_model(model) - self.loss_fn = loss_fn - self.optimizer = strategy.setup_optimizer(optim, self.model) - self.scheduler = lr_scheduler.CosineAnnealingLR(self.optimizer, self.train_dataloader.__len__() // 100) - - def eval_acc(self, dataloader): - dist = 0 - on = 0 - cnt = 0 - self.model.eval() - with torch.no_grad(): - for chosen_ids, c_mask, reject_ids, r_mask in dataloader: - chosen_ids = chosen_ids.squeeze(1).to(torch.cuda.current_device()) - c_mask = c_mask.squeeze(1).to(torch.cuda.current_device()) - reject_ids = reject_ids.squeeze(1).to(torch.cuda.current_device()) - r_mask = r_mask.squeeze(1).to(torch.cuda.current_device()) - chosen_reward = self.model(chosen_ids, attention_mask=c_mask) - reject_reward = self.model(reject_ids, attention_mask=r_mask) - for i in range(len(chosen_reward)): - cnt += 1 - if chosen_reward[i] > reject_reward[i]: - on += 1 - dist += (chosen_reward - reject_reward).mean().item() - dist_mean = dist / len(dataloader) - acc = on / cnt - self.model.train() - return dist_mean, acc - - def fit(self): - time = datetime.now() - epoch_bar = tqdm(range(self.epochs), desc='Train epoch', disable=not is_rank_0()) - for epoch in range(self.epochs): - step_bar = tqdm(range(self.train_dataloader.__len__()), - desc='Train step of epoch %d' % epoch, - disable=not is_rank_0()) - # train - self.model.train() - cnt = 0 - acc = 0 - dist = 0 - for chosen_ids, c_mask, reject_ids, r_mask in self.train_dataloader: - chosen_ids = chosen_ids.squeeze(1).to(torch.cuda.current_device()) - c_mask = c_mask.squeeze(1).to(torch.cuda.current_device()) - reject_ids = reject_ids.squeeze(1).to(torch.cuda.current_device()) - r_mask = r_mask.squeeze(1).to(torch.cuda.current_device()) - chosen_reward = self.model(chosen_ids, attention_mask=c_mask) - reject_reward = self.model(reject_ids, attention_mask=r_mask) - loss = self.loss_fn(chosen_reward, reject_reward) - self.strategy.backward(loss, self.model, self.optimizer) - self.strategy.optimizer_step(self.optimizer) - self.optimizer.zero_grad() - cnt += 1 - if cnt == 100: - self.scheduler.step() - dist, acc = self.eval_acc(self.valid_dataloader) - cnt = 0 - if is_rank_0(): - log = pd.DataFrame([[step_bar.n, loss.item(), dist, acc]], - columns=['step', 'loss', 'dist', 'acc']) - log.to_csv('log_%s.csv' % time, mode='a', header=False, index=False) - step_bar.update() - step_bar.set_postfix({'dist': dist, 'acc': acc}) - - # eval - dist, acc = self.eval_acc(self.eval_dataloader) - if is_rank_0(): - log = pd.DataFrame([[step_bar.n, loss.item(), dist, acc]], columns=['step', 'loss', 'dist', 'acc']) - log.to_csv('log.csv', mode='a', header=False, index=False) - epoch_bar.update() - step_bar.set_postfix({'dist': dist, 'acc': acc}) - step_bar.close() - - def save_model(self, - path: str, - only_rank0: bool = False, - tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None: - self.strategy.save_model(model=self.model, path=path, only_rank0=only_rank0, tokenizer=tokenizer) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/sft.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/sft.py deleted file mode 100644 index 8eeffea48b..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/sft.py +++ /dev/null @@ -1,158 +0,0 @@ -import math -import time -from abc import ABC -from typing import Optional - -import loralib as lora -import torch -import torch.distributed as dist -import wandb -from coati.models.loss import GPTLMLoss -from torch import nn -from torch.optim import Adam, Optimizer -from torch.optim.lr_scheduler import LambdaLR -from torch.utils.data import DataLoader -from torch.utils.data.distributed import DistributedSampler -from tqdm import tqdm -from transformers.tokenization_utils_base import PreTrainedTokenizerBase -from transformers.trainer import get_scheduler - -from colossalai.logging import get_dist_logger - -from .strategies import Strategy -from .utils import is_rank_0 - - -class SFTTrainer(ABC): - """ - Trainer to use while training reward model. - - Args: - model (torch.nn.Module): the model to train - strategy (Strategy): the strategy to use for training - optim(Optimizer): the optimizer to use for training - train_dataloader: the dataloader to use for training - eval_dataloader: the dataloader to use for evaluation - batch_size (int, defaults to 1): the batch size while training - max_epochs (int, defaults to 2): the number of epochs to train - optim_kwargs (dict, defaults to {'lr':1e-4}): the kwargs to use while initializing optimizer - """ - - def __init__( - self, - model, - strategy: Strategy, - optim: Optimizer, - train_dataloader: DataLoader, - eval_dataloader: DataLoader = None, - batch_size: int = 1, - max_epochs: int = 2, - accimulation_steps: int = 8, - ) -> None: - super().__init__() - self.strategy = strategy - self.epochs = max_epochs - self.train_dataloader = train_dataloader - self.eval_dataloader = eval_dataloader - - self.model = strategy.setup_model(model) - if "DDP" in str(self.strategy): - self.model = self.model.module - self.optimizer = strategy.setup_optimizer(optim, self.model) - - self.accimulation_steps = accimulation_steps - num_update_steps_per_epoch = len(train_dataloader) // self.accimulation_steps - max_steps = math.ceil(self.epochs * num_update_steps_per_epoch) - - self.scheduler = get_scheduler("cosine", - self.optimizer, - num_warmup_steps=math.ceil(max_steps * 0.03), - num_training_steps=max_steps) - - def fit(self, logger, log_interval=10): - wandb.init(project="Coati", name=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) - wandb.watch(self.model) - total_loss = 0 - # epoch_bar = tqdm(range(self.epochs), desc='Epochs', disable=not is_rank_0()) - step_bar = tqdm(range(len(self.train_dataloader) // self.accimulation_steps * self.epochs), - desc=f'steps', - disable=not is_rank_0()) - for epoch in range(self.epochs): - - # process_bar = tqdm(range(len(self.train_dataloader)), desc=f'Train process for{epoch}', disable=not is_rank_0()) - # train - self.model.train() - for batch_id, batch in enumerate(self.train_dataloader): - - prompt_ids = batch["input_ids"].to(torch.cuda.current_device()) - p_mask = batch["attention_mask"].to(torch.cuda.current_device()) - labels = batch["labels"].to(torch.cuda.current_device()) - # prompt_ids = prompt_ids.squeeze(1).cuda() - # p_mask = p_mask.squeeze(1).cuda() - # prompt_logits = self.model(prompt_ids, attention_mask=p_mask, labels=labels) - - outputs = self.model(prompt_ids, attention_mask=p_mask, labels=labels) - - loss = outputs.loss - prompt_logits = outputs.logits - - if loss >= 2.5: - logger.warning(f"batch_id:{batch_id}, abnormal loss: {loss}") - - loss = loss / self.accimulation_steps - - self.strategy.backward(loss, self.model, self.optimizer) - - total_loss += loss.item() - - # gradient accumulation - if (batch_id + 1) % self.accimulation_steps == 0: - self.strategy.optimizer_step(self.optimizer) - self.optimizer.zero_grad() - self.scheduler.step() - wandb.log({ - "loss": total_loss / self.accimulation_steps, - "lr": self.scheduler.get_last_lr()[0], - "epoch": epoch, - "batch_id": batch_id - }) - total_loss = 0 - step_bar.update() - - # if batch_id % log_interval == 0: - # logger.info(f'Train Epoch {epoch}/{self.epochs} Batch {batch_id} Rank {dist.get_rank()} loss {loss.item()}') - # wandb.log({"loss": loss.item()}) - - # process_bar.update() - - # eval - if self.eval_dataloader is not None: - self.model.eval() - with torch.no_grad(): - loss_sum = 0 - num_seen = 0 - for batch in self.eval_dataloader: - prompt_ids = batch["input_ids"].to(torch.cuda.current_device()) - p_mask = batch["attention_mask"].to(torch.cuda.current_device()) - labels = batch["labels"].to(torch.cuda.current_device()) - # prompt_ids = prompt_ids.squeeze(1).cuda() - # p_mask = p_mask.squeeze(1).cuda() - - outputs = self.model(prompt_ids, attention_mask=p_mask, labels=labels) - loss = outputs.loss - # prompt_logits = outputs.logits - - loss_sum += loss.item() - num_seen += prompt_ids.size(0) - - loss_mean = loss_sum / num_seen - if dist.get_rank() == 0: - logger.info(f'Eval Epoch {epoch}/{self.epochs} loss {loss_mean}') - - # epoch_bar.update() - - def save_model(self, - path: str, - only_rank0: bool = False, - tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None: - self.strategy.save_model(model=self.model, path=path, only_rank0=only_rank0, tokenizer=tokenizer) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/__init__.py deleted file mode 100644 index f258c9b8a8..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .base import Strategy -from .colossalai import ColossalAIStrategy -from .ddp import DDPStrategy -from .naive import NaiveStrategy - -__all__ = ['Strategy', 'NaiveStrategy', 'DDPStrategy', 'ColossalAIStrategy'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/base.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/base.py deleted file mode 100644 index 7d25138561..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/base.py +++ /dev/null @@ -1,136 +0,0 @@ -from abc import ABC, abstractmethod -from contextlib import nullcontext -from typing import Any, List, Optional, Tuple, Union - -import numpy as np -import torch -import torch.nn as nn -from coati.models.base import LM, Actor, Critic, RewardModel -from coati.replay_buffer import ReplayBuffer -from torch.optim import Optimizer -from torch.utils.data import DataLoader -from transformers.tokenization_utils_base import PreTrainedTokenizerBase - -from .sampler import DistributedSampler - -ModelOptimPair = Tuple[nn.Module, Optimizer] -ModelOrModelOptimPair = Union[nn.Module, ModelOptimPair] - - -class Strategy(ABC): - """ - Base class for training strategies. - """ - - def __init__(self) -> None: - super().__init__() - self.setup_distributed() - - @abstractmethod - def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: Optimizer, **kwargs) -> None: - pass - - @abstractmethod - def optimizer_step(self, optimizer: Optimizer, **kwargs) -> None: - pass - - @abstractmethod - def setup_distributed(self) -> None: - pass - - @abstractmethod - def setup_model(self, model: nn.Module) -> nn.Module: - pass - - @abstractmethod - def setup_optimizer(self, optimizer: Optimizer, model: nn.Module) -> Optimizer: - pass - - @abstractmethod - def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader: - pass - - def model_init_context(self): - return nullcontext() - - def prepare( - self, *models_or_model_optim_pairs: ModelOrModelOptimPair - ) -> Union[List[ModelOrModelOptimPair], ModelOrModelOptimPair]: - """Prepare models or model-optimizer-pairs based on each strategy. - - Example:: - >>> # when fine-tuning actor and critic - >>> (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare((actor, actor_optim), (critic, critic_optim), reward_model, initial_model) - >>> # or when training reward model - >>> (reward_model, reward_model_optim) = strategy.prepare((reward_model, reward_model_optim)) - >>> # or just inference - >>> actor, critic = strategy.prepare(actor, critic) - - Returns: - Union[List[ModelOrModelOptimPair], ModelOrModelOptimPair]: Models or model-optimizer-pairs in the original order. - """ - - def prepare_model(model: nn.Module): - if isinstance(model, Actor): - return Actor(self.setup_model(self._unwrap_model(model))) - return self.setup_model(self._unwrap_model(model)) - - rets = [] - for arg in models_or_model_optim_pairs: - if isinstance(arg, tuple): - assert len(arg) == 2, f'Expect (model, optimizer) pair, got a tuple with size "{len(arg)}"' - model, optimizer = arg - model = prepare_model(model) - optimizer = self.setup_optimizer(optimizer, self._unwrap_model(model)) - rets.append((model, optimizer)) - elif isinstance(arg, nn.Module): - rets.append(prepare_model(arg)) - else: - raise RuntimeError(f'Expect model or (model, optimizer) pair, got {type(arg)}') - - if len(rets) == 1: - return rets[0] - return rets - - @staticmethod - def _unwrap_model(model: nn.Module) -> nn.Module: - """Useful for saving state dict. As actor is wrapped by Actor class again in `prepare()`, we should unwrap it before saving. - - Args: - model (nn.Module): an actor or a critic - """ - if isinstance(model, Actor) or isinstance(model, LM): - return model.model - return model - - @staticmethod - def _unwrap_actor(actor: Actor) -> nn.Module: - """Get `actor.model` from a wrapped (by `prepare()`) actor. Useful for getting original huggingface model. - - Args: - actor (Actor): a wrapped actor - """ - return Strategy._unwrap_model(actor) - - @abstractmethod - def save_model(self, - model: nn.Module, - path: str, - only_rank0: bool = False, - tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None: - pass - - @abstractmethod - def load_model(self, model: nn.Module, path: str, map_location: Any = None, strict: bool = True) -> None: - pass - - @abstractmethod - def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None: - pass - - @abstractmethod - def load_optimizer(self, optimizer: Optimizer, path: str, map_location: Any = None) -> None: - pass - - def setup_sampler(self, dataset) -> DistributedSampler: - return DistributedSampler(dataset, 1, 0) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/colossalai.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/colossalai.py deleted file mode 100644 index ba85ba76d4..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/colossalai.py +++ /dev/null @@ -1,212 +0,0 @@ -import warnings -from typing import Optional, Union - -import torch -import torch.distributed as dist -import torch.nn as nn -import torch.optim as optim -from coati.models.base import LM, Actor, RewardModel -from coati.models.lora import LoraLinear -from torch.optim import Optimizer -from transformers.modeling_utils import PreTrainedModel -from transformers.tokenization_utils_base import PreTrainedTokenizerBase - -import colossalai -from colossalai.logging import get_dist_logger -from colossalai.nn.optimizer import CPUAdam, HybridAdam -from colossalai.tensor import ProcessGroup, ShardSpec -from colossalai.utils import get_current_device -from colossalai.zero import ColoInitContext, ZeroDDP, zero_model_wrapper, zero_optim_wrapper -from colossalai.zero.gemini.utils import get_static_torch_model - -from .base import Strategy -from .ddp import DDPStrategy - -logger = get_dist_logger(__name__) - - -class ColossalAIStrategy(DDPStrategy): - """ - The strategy for training with ColossalAI. - - Args: - stage(int): The stage to use in ZeRO. Choose in (1, 2, 3) - precision(str): The precision to use. Choose in ('fp32', 'fp16'). Stage 3 only supports fp16. - seed(int): The seed for the random number generator. - shard_init(bool): Whether to shard the model parameters during initialization. Only for ZeRO-3. - This is not compativle with `from_pretrained()`. We temporarily disable this and will support it in the future. - placement_policy(str): The placement policy for gemini. Choose in ('cpu', 'cuda') - If it is “cpu”, parameters, gradients and optimizer states will be offloaded to CPU, - If it is “cuda”, they will not be offloaded, which means max CUDA memory will be used. It is the fastest. - pin_memory(bool): Whether to pin the memory for the data loader. Only for ZeRO-3. - force_outputs_fp32(bool): Whether to force the outputs to be fp32. Only for ZeRO-3. - search_range_mb(int): The search range in MB for the chunk size. Only for ZeRO-3. - hidden_dim(optional, int): The hidden dimension for the gemini. Only for ZeRO-3. - min_chunk_size_mb(float): The minimum chunk size in MB. Only for ZeRO-3. - gpu_margin_mem_ratio(float): The margin memory ratio for the GPU. Only for ZeRO-3. - reduce_bugket_size(int): The reduce bucket size in bytes. Only for ZeRO-1 and ZeRO-2. - overlap_communication(bool): Whether to overlap communication and computation. Only for ZeRO-1 and ZeRO-2. - initial_scale(float): The initial scale for the optimizer. - growth_factor(float): The growth factor for the optimizer. - backoff_factor(float): The backoff factor for the optimizer. - growth_interval(int): The growth interval for the optimizer. - hysteresis(int): The hysteresis for the optimizer. - min_scale(float): The minimum scale for the optimizer. - max_scale(float): The maximum scale for the optimizer. - max_norm(float): The maximum norm for the optimizer. - norm_type(float): The norm type for the optimizer. - - """ - - def __init__( - self, - stage: int = 3, - precision: str = 'fp16', - seed: int = 42, - shard_init: bool = False, # only for stage 3 - placement_policy: str = 'cuda', - pin_memory: bool = True, # only for stage 3 - force_outputs_fp32: bool = False, # only for stage 3 - search_range_mb: int = 32, # only for stage 3 - hidden_dim: Optional[int] = None, # only for stage 3 - min_chunk_size_mb: float = 32, # only for stage 3 - gpu_margin_mem_ratio: float = 0.0, # only for stage 3 - reduce_bucket_size: int = 12 * 1024**2, # only for stage 1&2 - overlap_communication: bool = True, # only for stage 1&2 - initial_scale: float = 2**16, - growth_factor: float = 2, - backoff_factor: float = 0.5, - growth_interval: int = 1000, - hysteresis: int = 2, - min_scale: float = 1, - max_scale: float = 2**32, - max_norm: float = 0.0, - norm_type: float = 2.0) -> None: - super().__init__(seed) - assert placement_policy in ('cpu', 'cuda'), f'Unsupported placement policy "{placement_policy}"' - assert precision in ('fp32', 'fp16'), f'Unsupported precision "{precision}"' - self.stage = stage - # TODO(ver217): support shard_init when using from_pretrained() - if shard_init: - warnings.warn( - f'Shard init is not supported model.from_pretrained() yet. Please load weights after strategy.prepare()' - ) - if stage == 3 and precision == 'fp32': - warnings.warn(f'Stage 3 only supports fp16. Precision is set to fp16.') - precision = 'fp16' - self.precision = precision - self.shard_init = shard_init - self.gemini_config = dict(device=get_current_device(), - placement_policy=placement_policy, - pin_memory=pin_memory, - force_outputs_fp32=force_outputs_fp32, - strict_ddp_mode=shard_init, - search_range_mb=search_range_mb, - hidden_dim=hidden_dim, - min_chunk_size_mb=min_chunk_size_mb) - if stage == 3: - self.zero_optim_config = dict(gpu_margin_mem_ratio=gpu_margin_mem_ratio) - else: - self.zero_optim_config = dict(reduce_bucket_size=reduce_bucket_size, - overlap_communication=overlap_communication, - cpu_offload=(placement_policy == 'cpu')) - self.optim_kwargs = dict(initial_scale=initial_scale, - growth_factor=growth_factor, - backoff_factor=backoff_factor, - growth_interval=growth_interval, - hysteresis=hysteresis, - min_scale=min_scale, - max_scale=max_scale, - max_norm=max_norm, - norm_type=norm_type) - - def setup_distributed(self) -> None: - colossalai.launch_from_torch({}, seed=self.seed) - - def model_init_context(self): - if self.stage == 3: - world_size = dist.get_world_size() - shard_pg = ProcessGroup(tp_degree=world_size) if self.shard_init else None - default_dist_spec = ShardSpec([-1], [world_size]) if self.shard_init else None - return ColoInitContext(device=get_current_device(), - dtype=torch.half, - default_pg=shard_pg, - default_dist_spec=default_dist_spec) - return super().model_init_context() - - def setup_model(self, model: nn.Module) -> nn.Module: - - model = zero_model_wrapper(model, zero_stage=self.stage, gemini_config=self.gemini_config) - - if self.stage != 3 and self.precision == 'fp16': - model = model.half() - return model - - def setup_optimizer(self, optimizer: optim.Optimizer, model: nn.Module) -> optim.Optimizer: - assert isinstance(optimizer, (CPUAdam, HybridAdam)), f'Unsupported optimizer {type(optimizer)}' - return zero_optim_wrapper(model, optimizer, optim_config=self.zero_optim_config, **self.optim_kwargs) - - def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: optim.Optimizer, **kwargs) -> None: - optimizer.backward(loss) - - def optimizer_step(self, optimizer: optim.Optimizer, **kwargs) -> None: - optimizer.step() - - @staticmethod - def _unwrap_actor(actor: Actor) -> nn.Module: - model: Union[nn.Module, ZeroDDP] = Strategy._unwrap_actor(actor) - if isinstance(model, ZeroDDP): - return model.module - return model - - def _unwrap_model(self, model: Union[nn.Module, ZeroDDP]) -> nn.Module: - if isinstance(model, ZeroDDP) and self.stage == 3: - logger.info(f"model type: {type(model)}, get static torch model") - model = get_static_torch_model(model) - logger.info(f"unwrapped_model type: {type(model)}") - - return super()._unwrap_model(model) - - def save_model(self, - model: nn.Module, - path: str, - only_rank0: bool = True, - tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None: - - if only_rank0 and dist.get_rank() != 0: - return None - unwrapped_model = self._unwrap_model(model) - # TODO : better way to get torch model from gemini model - # to get torch model from gemini model - - for module in unwrapped_model.modules(): - if isinstance(module, LoraLinear): - module.merge_weights = True - module.eval() - if isinstance(unwrapped_model, RewardModel): - state_dict = unwrapped_model.state_dict() - if only_rank0 and dist.get_rank() != 0: - return - torch.save(state_dict, path) - else: - try: - if isinstance(unwrapped_model, LM): - unwrapped_model = unwrapped_model.model - logger.info(f'Saving model to {path}', ranks=[0]) - unwrapped_model.save_pretrained(path) - logger.info(f'Model saved to {path} Successfully', ranks=[0]) - if tokenizer is not None: - logger.info(f'Saving tokenizer to {path}', ranks=[0]) - tokenizer.save_pretrained(path) - logger.info(f'Tokenizer saved to {path} Successfully', ranks=[0]) - except AttributeError: - state_dict = unwrapped_model.state_dict() - if only_rank0 and dist.get_rank() != 0: - return - torch.save(state_dict, path) - - def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None: - if only_rank0: - raise RuntimeError( - f'Optimizer states are sharded when using ColossalAIStrategy. Only rank0 is not supported.') - torch.save(optimizer.state_dict(), path) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/ddp.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/ddp.py deleted file mode 100644 index 7bf8e8ba84..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/ddp.py +++ /dev/null @@ -1,111 +0,0 @@ -from typing import Optional - -import os -import random - -import numpy as np -import torch -import torch.distributed as dist -import torch.nn as nn -from coati.models.base import LM, Actor, RewardModel -from coati.models.lora import LoraLinear -from coati.replay_buffer import ReplayBuffer -from torch.nn.parallel import DistributedDataParallel as DDP -from torch.optim import Optimizer -from torch.utils.data import DataLoader -from transformers.tokenization_utils_base import PreTrainedTokenizerBase - -from .base import Strategy -from .naive import NaiveStrategy -from .sampler import DistributedSampler - - -class DDPStrategy(NaiveStrategy): - """ - Strategy for distributed training using torch.distributed. - """ - - def __init__(self, seed: int = 42) -> None: - self.seed = seed - super().__init__() - - def setup_distributed(self) -> None: - try: - rank = int(os.environ['RANK']) - local_rank = int(os.environ['LOCAL_RANK']) - world_size = int(os.environ['WORLD_SIZE']) - host = os.environ['MASTER_ADDR'] - port = int(os.environ['MASTER_PORT']) - except KeyError as e: - raise RuntimeError( - f"Could not find {e} in the torch environment, visit https://www.colossalai.org/ for more information on launching with torch" - ) - dist.init_process_group('nccl', init_method=f'tcp://[{host}]:{port}', world_size=world_size, rank=rank) - self.set_seed(self.seed) - torch.cuda.set_device(local_rank) - - def set_seed(self, seed: int) -> None: - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - def setup_model(self, model: nn.Module) -> nn.Module: - device = torch.cuda.current_device() - return DDP(model, device_ids=[device]) - - def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader: - # DDP only mode, replay buffers on each rank are different. - # sampler = DistributedSampler(replay_buffer, - # num_replicas=dist.get_world_size(), - # rank=dist.get_rank(), - # shuffle=True, - # seed=self.seed, - # drop_last=True) - return DataLoader( - replay_buffer, - batch_size=replay_buffer.sample_batch_size, - # sampler=sampler, - shuffle=True, - drop_last=True, - pin_memory=pin_memory, - collate_fn=replay_buffer.collate_fn) - - @staticmethod - def _unwrap_actor(actor: Actor) -> nn.Module: - model: DDP = Strategy._unwrap_actor(actor) - return model.module - - def save_model(self, model: nn.Module, path: str, only_rank0: bool = False, tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None: - if only_rank0 and dist.get_rank() != 0: - return None - - for module in model.modules(): - if isinstance(module, LoraLinear): - module.merge_weights = True - module.eval() - - if isinstance(model, RewardModel): - state_dict = model.state_dict() - if only_rank0 and dist.get_rank() != 0: - return - torch.save(state_dict, path) - else: - try: - if isinstance(model, LM): - model = model.model - model.save_pretrained(path) - if tokenizer is not None: - tokenizer.save_pretrained(path) - except AttributeError: - state_dict = model.state_dict() - if only_rank0 and dist.get_rank() != 0: - return - torch.save(state_dict, path) - - def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None: - if only_rank0 and dist.get_rank() != 0: - return - super().save_optimizer(optimizer, path, only_rank0) - - def setup_sampler(self, dataset) -> DistributedSampler: - return DistributedSampler(dataset, dist.get_world_size(), dist.get_rank()) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/naive.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/naive.py deleted file mode 100644 index 3a86b13c00..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/naive.py +++ /dev/null @@ -1,74 +0,0 @@ -from typing import Any, Optional - -import torch -import torch.nn as nn -import torch.optim as optim -from coati.replay_buffer import ReplayBuffer -from coati.models.base import LM, RewardModel -from coati.models.lora import LoraLinear -from torch.optim import Optimizer -from torch.utils.data import DataLoader -from transformers.tokenization_utils_base import PreTrainedTokenizerBase - -from .base import Strategy - - -class NaiveStrategy(Strategy): - """ - Strategy for single GPU. No parallelism is used. - """ - - def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: optim.Optimizer, **kwargs) -> None: - loss.backward() - - def optimizer_step(self, optimizer: optim.Optimizer, **kwargs) -> None: - optimizer.step() - - def setup_distributed(self) -> None: - pass - - def setup_model(self, model: nn.Module) -> nn.Module: - return model - - def setup_optimizer(self, optimizer: optim.Optimizer, model: nn.Module) -> optim.Optimizer: - return optimizer - - def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader: - return DataLoader(replay_buffer, - batch_size=replay_buffer.sample_batch_size, - shuffle=True, - drop_last=True, - pin_memory=pin_memory, - collate_fn=replay_buffer.collate_fn) - - def save_model(self, model: nn.Module, path: str, only_rank0: bool = False, tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None: - for module in model.modules(): - if isinstance(module, LoraLinear): - module.merge_weights = True - module.eval() - - if isinstance(model, RewardModel): - state_dict = model.state_dict() - torch.save(state_dict, path) - else: - try: - if isinstance(model, LM): - model = model.model - model.save_pretrained(path) - if tokenizer is not None: - tokenizer.save_pretrained(path) - except AttributeError: - state_dict = model.state_dict() - torch.save(state_dict, path) - - def load_model(self, model: nn.Module, path: str, map_location: Any = None, strict: bool = True) -> None: - unwrapped_model = self._unwrap_model(model) - state_dict = torch.load(path, map_location=map_location) - unwrapped_model.load_state_dict(state_dict, strict=strict) - - def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None: - torch.save(optimizer.state_dict(), path) - - def load_optimizer(self, optimizer: Optimizer, path: str, map_location: Any = None) -> None: - state_dict = torch.load(path, map_location=map_location) - optimizer.load_state_dict(state_dict) diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/sampler.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/sampler.py deleted file mode 100644 index d726fa640f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/strategies/sampler.py +++ /dev/null @@ -1,32 +0,0 @@ -import math - -import numpy as np - - -class DistributedSampler: - - def __init__(self, dataset, num_replicas: int, rank: int) -> None: - self.dataset = dataset - self.num_replicas = num_replicas - self.rank = rank - - if len(self.dataset) % self.num_replicas != 0: - self.num_samples = math.ceil( - (len(self.dataset) - self.num_replicas) / self.num_replicas # type: ignore[arg-type] - ) - else: - self.num_samples = math.ceil(len(self.dataset) / self.num_replicas) - - self.total_size = self.num_samples * self.num_replicas - - indices = list(range(len(self.dataset))) - indices = indices[:self.total_size] - assert len(indices) == self.total_size - # subsample - indices = indices[self.rank:self.total_size:self.num_replicas] - assert len(indices) == self.num_samples - self.indices = indices - - def sample(self, batch_size: int) -> list: - sampled_indices = np.random.choice(self.indices, batch_size, replace=False) - return [self.dataset[idx] for idx in sampled_indices] diff --git a/neurons/text/prompting/miners/self_hosted/coati/trainer/utils.py b/neurons/text/prompting/miners/self_hosted/coati/trainer/utils.py deleted file mode 100644 index 6c9f7f085f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/trainer/utils.py +++ /dev/null @@ -1,5 +0,0 @@ -import torch.distributed as dist - - -def is_rank_0() -> bool: - return not dist.is_initialized() or dist.get_rank() == 0 diff --git a/neurons/text/prompting/miners/self_hosted/coati/utils/__init__.py b/neurons/text/prompting/miners/self_hosted/coati/utils/__init__.py deleted file mode 100644 index e75401d382..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/utils/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .tokenizer_utils import prepare_llama_tokenizer_and_embedding, smart_tokenizer_and_embedding_resize - -__all__ = ['smart_tokenizer_and_embedding_resize', 'prepare_llama_tokenizer_and_embedding'] diff --git a/neurons/text/prompting/miners/self_hosted/coati/utils/tokenizer_utils.py b/neurons/text/prompting/miners/self_hosted/coati/utils/tokenizer_utils.py deleted file mode 100644 index 35ebb96af9..0000000000 --- a/neurons/text/prompting/miners/self_hosted/coati/utils/tokenizer_utils.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict - -import transformers - -from ..models.llama.llama_lm import LlamaLM - -DEFAULT_PAD_TOKEN = "[PAD]" -DEFAULT_EOS_TOKEN = "" -DEFAULT_BOS_TOKEN = "" -DEFAULT_UNK_TOKEN = "" - - -def prepare_llama_tokenizer_and_embedding( - tokenizer: transformers.PreTrainedTokenizer, - model: transformers.PreTrainedModel, - special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN), -): - """prepare llama tokenizer and embedding. - - """ - - if tokenizer.pad_token is None: - smart_tokenizer_and_embedding_resize( - special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN), - tokenizer=tokenizer, - model=model, - ) - - tokenizer.add_special_tokens({ - "eos_token": DEFAULT_EOS_TOKEN, - "bos_token": DEFAULT_BOS_TOKEN, - "unk_token": DEFAULT_UNK_TOKEN, - }) - - return tokenizer - - -def smart_tokenizer_and_embedding_resize( - tokenizer: transformers.PreTrainedTokenizer, - model: transformers.PreTrainedModel, - special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN), -): - """Resize tokenizer and embedding. - - Note: This is the unoptimized version that may make your embedding size not be divisible by 64. - """ - - if tokenizer.pad_token is None: - num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict) - - if isinstance(model, LlamaLM): - model = model.get_base_model() - - model.model.resize_token_embeddings(len(tokenizer)) - - if num_new_tokens > 0: - input_embeddings = model.model.get_input_embeddings().weight.data - output_embeddings = model.model.get_output_embeddings().weight.data - - input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True) - output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True) - - input_embeddings[-num_new_tokens:] = input_embeddings_avg - output_embeddings[-num_new_tokens:] = output_embeddings_avg diff --git a/neurons/text/prompting/miners/self_hosted/neuron.py b/neurons/text/prompting/miners/self_hosted/neuron.py deleted file mode 100644 index 1f8123435f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/neuron.py +++ /dev/null @@ -1,209 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# General. -import os -import json -import time -import torch -import argparse -import bittensor - -from typing import List, Dict -from rich import print -from datetime import datetime - -# Torch tooling. -from torch.nn.utils.rnn import pad_sequence -from torch.optim import Adam -from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline - -# Coati PPO tooling. -from coati.models.auto import AutoActor as Actor, AutoCritic as Critic -from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy -from coati.models.loss import PolicyLoss, ValueLoss - -# Check run config. -def check_config(config: 'bittensor.Config'): - bittensor.logging.check_config(config) - bittensor.wallet.check_config(config) - bittensor.subtensor.check_config(config) - bittensor.metagraph.check_config(config) - bittensor.axon.check_config(config) - full_path = os.path.expanduser( - '{}/{}/{}/{}'.format(config.logging.logging_dir, config.wallet.get('name', bittensor.defaults.wallet.name), - config.wallet.get('hotkey', bittensor.defaults.wallet.hotkey), config.neuron.name)) - config.neuron.full_path = os.path.expanduser(full_path) - if not os.path.exists(config.neuron.full_path): - os.makedirs(config.neuron.full_path) - - -# Create run config. -def get_config(): - parser = argparse.ArgumentParser() - parser.add_argument('--netuid', type=int, help='Subnet netuid', default=21) - parser.add_argument('--config', type=str, help='If set, defaults are overridden by passed file.') - parser.add_argument('--neuron.name', type=str, - help='Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ', - default='robert_myers_prompting_miner') - parser.add_argument('--neuron.blocks_per_epoch', type=str, help='Blocks until the miner sets weights on chain', - default=100) - parser.add_argument('--neuron.no_set_weights', action='store_true', help='If True, the model does not set weights.', - default=False) - parser.add_argument('--neuron.max_batch_size', type=int, help='The maximum batch size for forward requests.', - default=-1) - parser.add_argument('--neuron.max_sequence_len', type=int, help='The maximum sequence length for forward requests.', - default=-1) - parser.add_argument('--neuron.blacklist.hotkeys', type=str, required=False, nargs='*', action='store', - help='To blacklist certain hotkeys', default=[]) - parser.add_argument('--neuron.lora_rank', type=int, help='The rank of the lora layer.', default=0) - - bittensor.wallet.add_args(parser) - bittensor.axon.add_args(parser) - bittensor.subtensor.add_args(parser) - bittensor.logging.add_args(parser) - bittensor.metagraph.add_args(parser) - # bittensor.TextPromptingSynapse.add_args(parser) - return bittensor.config(parser) - - - -# Main entry point for model serving. -def main(): - # --- Build, Check, Set and Print the run config. - config = get_config() - config.to_defaults() - check_config(config) - print(config) - - # --- PPO - strategy = NaiveStrategy() - - # --- Turn on logging. - bittensor.logging(config=config, logging_dir=config.neuron.full_path) - - # --- Create our chain connection. - subtensor = bittensor.subtensor(config) - - # --- Create our wallet and register it to the subnetwork. - wallet = bittensor.wallet(config) - wallet.register(netuid=config.netuid, subtensor=subtensor) - - # --- Create our network state cache - metagraph = bittensor.metagraph(config=config, netuid=config.netuid, ) - metagraph.sync(netuid=config.netuid, subtensor=subtensor).save() - uid = metagraph.hotkeys.index(wallet.hotkey.ss58_address) - - # --- Build /Load our model and set the device. - with bittensor.__console__.status("Loading huggingface model robertmyers/bpt-sft ..."): - bittensor.logging.info('Loading', "robertmyers/bpt-sft" ) - tokenizer = AutoTokenizer.from_pretrained( "robertmyers/bpt-sft" ) - actor = Actor( pretrained="robertmyers/bpt-sft", lora_rank=config.neuron.lora_rank ) - critic = Critic( pretrained="robertmyers/bpt-sft", lora_rank=config.neuron.lora_rank, use_action_mask=True ) - actor_optim = Adam(actor.parameters(), lr=1e-7) - critic_optim = Adam(critic.parameters(), lr=1e-7) - # model = AutoModelForCausalLM.from_pretrained( "robertmyers/bpt-sft", torch_dtype=torch.float16 ) - - actor.to( "cuda" ) - pipe = pipeline("text-generation", actor, tokenizer=tokenizer, device=0, max_new_tokens = 256 ) - - # --- Build axon server and start it.tensor.loggi - axon = bittensor.axon( - wallet=wallet, - metagraph=metagraph, - config=config, - ) - - def _process_history(history: List[str]) -> str: - processed_history = '' - for message in history: - message = json.loads(message) - if message['role'] == 'system': - processed_history += 'system: ' + message['content'] + '\n' - - if message['role'] == 'assistant': - processed_history += 'assistant: ' + message['content'] + '\n' - - if message['role'] == 'user': - processed_history += 'user: ' + message['content'] + '\n' - return processed_history - - class Synapse(bittensor.TextPromptingSynapse): - def _priority(self, forward_call: "bittensor.TextPromptingForwardCall") -> float: - return 0.0 - - def _blacklist(self, forward_call: "bittensor.TextPromptingForwardCall") -> bool: - return False - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - - def forward(self, messages: List[str]) -> str: - history = _process_history(messages) - return pipe( history )[0]['generated_text'].split(':')[-1].replace( str( history ), "") - - syn = Synapse() - axon.attach(syn) - axon.start() - axon.netuid = config.netuid - axon.protocol = 4 - subtensor.serve_axon( axon ) - print (axon) - - # --- Run Forever. - last_update = subtensor.get_current_block() - while True: - - # --- Wait until next epoch. - current_block = subtensor.get_current_block() - while (current_block - last_update) < config.neuron.blocks_per_epoch: - time.sleep(bittensor.__blocktime__) - current_block = subtensor.get_current_block() - last_update = subtensor.get_current_block() - - # --- Update the metagraph with the latest network state. - metagraph.sync(netuid=config.netuid, subtensor=subtensor) - uid = metagraph.hotkeys.index(wallet.hotkey.ss58_address) - - # --- Log performance. - print( - f"[white not bold]{datetime.now():%Y-%m-%d %H:%M:%S}[/white not bold]{' ' * 4} | " - f"{f'UID [bright_cyan]{uid}[/bright_cyan]'.center(16 + len('[bright_cyan][/bright_cyan]'))} | " - f'[dim white not bold] [green]{str(metagraph.S[uid].item()):.4}[/green] Stake [/dim white not bold]' - f'[dim white not bold]| [yellow]{str(metagraph.trust[uid].item()) :.3}[/yellow] Trust [/dim white not bold]' - f'[dim white not bold]| [green]{str(metagraph.incentive[uid].item()):.3}[/green] Incentive [/dim white not bold]') - - # --- Set weights. - if not config.neuron.no_set_weights: - try: - # --- query the chain for the most current number of peers on the network - chain_weights = torch.zeros(subtensor.subnetwork_n(netuid=config.netuid)) - chain_weights[uid] = 1 - did_set = subtensor.set_weights( - uids=torch.arange(0, len(chain_weights)), - netuid=config.netuid, - weights=chain_weights, - wait_for_inclusion=False, - wallet=wallet, - version_key=1 - ) - except: - pass - - -if __name__ == "__main__": - bittensor.utils.version_checking() - main() \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/__init__.py b/neurons/text/prompting/miners/self_hosted/ppo/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/neurons/text/prompting/miners/self_hosted/ppo/actor.py b/neurons/text/prompting/miners/self_hosted/ppo/actor.py deleted file mode 100644 index be9e9abf1f..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/actor.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional - -from transformers import AutoConfig, AutoModelForCausalLM - - -from base import Actor - - -class AutoActor(Actor): - """ - Auto Actor model. - - Args: - pretrained (str): Pretrained model name or path. - config (AutoConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[AutoConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = AutoModelForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = AutoModelForCausalLM(config) - else: - model = AutoModelForCausalLM(AutoConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/base/__init__.py b/neurons/text/prompting/miners/self_hosted/ppo/base/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/neurons/text/prompting/miners/self_hosted/ppo/base/actor.py b/neurons/text/prompting/miners/self_hosted/ppo/base/actor.py deleted file mode 100644 index 6baf33e4e5..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/base/actor.py +++ /dev/null @@ -1,65 +0,0 @@ -from typing import Optional, Tuple, Union - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from ..generation import generate -from ..lora import LoRAModule -from ..utils import log_probs_from_logits - - -class Actor(LoRAModule): - """ - Actor model base class. - - Args: - model (nn.Module): Actor Model. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = 'none') -> None: - super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias) - self.model = model - self.convert_to_lora() - - @torch.no_grad() - def generate( - self, - input_ids: torch.Tensor, - return_action_mask: bool = True, - **kwargs - ) -> Union[Tuple[torch.LongTensor, torch.LongTensor], Tuple[torch.LongTensor, torch.LongTensor, torch.BoolTensor]]: - sequences = generate(self.model, input_ids, **kwargs) - attention_mask = None - pad_token_id = kwargs.get('pad_token_id', None) - if pad_token_id is not None: - attention_mask = sequences.not_equal(pad_token_id).to(dtype=torch.long, device=sequences.device) - if not return_action_mask: - return sequences, attention_mask, None - input_len = input_ids.size(1) - eos_token_id = kwargs.get('eos_token_id', None) - if eos_token_id is None: - action_mask = torch.ones_like(sequences, dtype=torch.bool) - else: - # left padding may be applied, only mask action - action_mask = (sequences[:, input_len:] == eos_token_id).cumsum(dim=-1) == 0 - action_mask = F.pad(action_mask, (1 + input_len, -1), value=True) # include eos token and input - action_mask[:, :input_len] = False - action_mask = action_mask[:, 1:] - return sequences, attention_mask, action_mask[:, -(sequences.size(1) - input_len):] - - def forward(self, - sequences: torch.LongTensor, - num_actions: int, - attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - """Returns action log probs - """ - output = self.model(sequences, attention_mask=attention_mask) - logits = output['logits'] - log_probs = log_probs_from_logits(logits[:, :-1, :], sequences[:, 1:]) - return log_probs[:, -num_actions:] - - def get_base_model(self): - return self.model \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/base/critic.py b/neurons/text/prompting/miners/self_hosted/ppo/base/critic.py deleted file mode 100644 index 420cf849d9..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/base/critic.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Optional - -import torch -import torch.nn as nn - -from ..lora import LoRAModule -from ..utils import masked_mean - - -class Critic(LoRAModule): - """ - Critic model base class. - - Args: - model (nn.Module): Critic model. - value_head (nn.Module): Value head to get value. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__( - self, - model: nn.Module, - value_head: nn.Module, - lora_rank: int = 0, - lora_train_bias: str = 'none', - use_action_mask: bool = False, - ) -> None: - - super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias) - self.model = model - self.value_head = value_head - self.use_action_mask = use_action_mask - self.convert_to_lora() - - def forward(self, - sequences: torch.LongTensor, - action_mask: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - outputs = self.model(sequences, attention_mask=attention_mask) - last_hidden_states = outputs['last_hidden_state'] - - values = self.value_head(last_hidden_states).squeeze(-1) - - if action_mask is not None and self.use_action_mask: - num_actions = action_mask.size(1) - prompt_mask = attention_mask[:, :-num_actions] - values = values[:, :-num_actions] - value = masked_mean(values, prompt_mask, dim=1) - return value - - values = values[:, :-1] - value = values.mean(dim=1) - return value \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/base/lm.py b/neurons/text/prompting/miners/self_hosted/ppo/base/lm.py deleted file mode 100644 index 48410a3b3c..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/base/lm.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import Optional, Tuple, Union - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from ..generation import generate -from .actor import Actor - - -class LM(Actor): - """ - Language model base class. - - Args: - model (nn.Module): Language Model. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = 'none') -> None: - super().__init__(model=model, lora_rank=lora_rank, lora_train_bias=lora_train_bias) - - def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - """Returns output log probs - """ - output = self.model(sequences, attention_mask=attention_mask) - logits = output['logits'] - log_probs = F.log_softmax(logits, dim=-1) - return log_probs \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/base/reward_model.py b/neurons/text/prompting/miners/self_hosted/ppo/base/reward_model.py deleted file mode 100644 index 0a4f24ba39..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/base/reward_model.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import Optional - -import torch -import torch.nn as nn - -from ..lora import LoRAModule - - -class RewardModel(LoRAModule): - """ - Reward model base class. - - Args: - model (nn.Module): Reward model. - value_head (nn.Module): Value head to get reward score. - lora_rank (int): LoRA rank. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - model: nn.Module, - value_head: Optional[nn.Module] = None, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias) - self.model = model - self.convert_to_lora() - - if value_head is not None: - if value_head.out_features != 1: - raise ValueError("The value head of reward model's output dim should be 1!") - self.value_head = value_head - else: - self.value_head = nn.Linear(model.config.n_embd, 1) - - def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - outputs = self.model(sequences, attention_mask=attention_mask) - last_hidden_states = outputs['last_hidden_state'] - values = self.value_head(last_hidden_states)[:, :-1] - value = values.mean(dim=1).squeeze(1) # ensure shape is (B) - return value \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/generation.py b/neurons/text/prompting/miners/self_hosted/ppo/generation.py deleted file mode 100644 index 73a47d86a4..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/generation.py +++ /dev/null @@ -1,146 +0,0 @@ -from typing import Any, Callable, Optional - -import torch -import torch.distributed as dist -import torch.nn as nn - -try: - from transformers.generation_logits_process import ( - LogitsProcessorList, - TemperatureLogitsWarper, - TopKLogitsWarper, - TopPLogitsWarper, - ) -except ImportError: - from transformers.generation import LogitsProcessorList, TemperatureLogitsWarper, TopKLogitsWarper, TopPLogitsWarper - - -def prepare_logits_processor(top_k: Optional[int] = None, - top_p: Optional[float] = None, - temperature: Optional[float] = None) -> LogitsProcessorList: - processor_list = LogitsProcessorList() - if temperature is not None and temperature != 1.0: - processor_list.append(TemperatureLogitsWarper(temperature)) - if top_k is not None and top_k != 0: - processor_list.append(TopKLogitsWarper(top_k)) - if top_p is not None and top_p < 1.0: - processor_list.append(TopPLogitsWarper(top_p)) - return processor_list - - -def _is_sequence_finished(unfinished_sequences: torch.Tensor) -> bool: - if dist.is_initialized() and dist.get_world_size() > 1: - # consider DP - unfinished_sequences = unfinished_sequences.clone() - dist.all_reduce(unfinished_sequences) - return unfinished_sequences.max() == 0 - - -def sample(model: nn.Module, - input_ids: torch.Tensor, - max_length: int, - early_stopping: bool = False, - eos_token_id: Optional[int] = None, - pad_token_id: Optional[int] = None, - top_k: Optional[int] = None, - top_p: Optional[float] = None, - temperature: Optional[float] = None, - prepare_inputs_fn: Optional[Callable[[torch.Tensor, Any], dict]] = None, - update_model_kwargs_fn: Optional[Callable[[dict, Any], dict]] = None, - **model_kwargs) -> torch.Tensor: - if input_ids.size(1) >= max_length: - return input_ids - - logits_processor = prepare_logits_processor(top_k, top_p, temperature) - unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1) - - for _ in range(input_ids.size(1), max_length): - model_inputs = prepare_inputs_fn(input_ids, **model_kwargs) if prepare_inputs_fn is not None else { - 'input_ids': input_ids - } - outputs = model(**model_inputs) - - next_token_logits = outputs['logits'][:, -1, :] - # pre-process distribution - next_token_logits = logits_processor(input_ids, next_token_logits) - # sample - probs = torch.softmax(next_token_logits, dim=-1, dtype=torch.float) - next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1) - - # finished sentences should have their next token be a padding token - if eos_token_id is not None: - if pad_token_id is None: - raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.") - next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences) - - # update generated ids, model inputs for next step - input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) - if update_model_kwargs_fn is not None: - model_kwargs = update_model_kwargs_fn(outputs, **model_kwargs) - - # if eos_token was found in one sentence, set sentence to finished - if eos_token_id is not None: - unfinished_sequences = unfinished_sequences.mul((next_tokens != eos_token_id).long()) - - # stop when each sentence is finished if early_stopping=True - if early_stopping and _is_sequence_finished(unfinished_sequences): - break - - return input_ids - - -def generate(model: nn.Module, - input_ids: torch.Tensor, - max_length: int, - num_beams: int = 1, - do_sample: bool = True, - early_stopping: bool = False, - eos_token_id: Optional[int] = None, - pad_token_id: Optional[int] = None, - top_k: Optional[int] = None, - top_p: Optional[float] = None, - temperature: Optional[float] = None, - prepare_inputs_fn: Optional[Callable[[torch.Tensor, Any], dict]] = None, - update_model_kwargs_fn: Optional[Callable[[dict, Any], dict]] = None, - **model_kwargs) -> torch.Tensor: - """Generate token sequence. The returned sequence is input_ids + generated_tokens. - - Args: - model (nn.Module): model - input_ids (torch.Tensor): input sequence - max_length (int): max length of the returned sequence - num_beams (int, optional): number of beams. Defaults to 1. - do_sample (bool, optional): whether to do sample. Defaults to True. - early_stopping (bool, optional): if True, the sequence length may be smaller than max_length due to finding eos. Defaults to False. - eos_token_id (Optional[int], optional): end of sequence token id. Defaults to None. - pad_token_id (Optional[int], optional): pad token id. Defaults to None. - top_k (Optional[int], optional): the number of highest probability vocabulary tokens to keep for top-k-filtering. Defaults to None. - top_p (Optional[float], optional): If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. Defaults to None. - temperature (Optional[float], optional): The value used to module the next token probabilities. Defaults to None. - prepare_inputs_fn (Optional[Callable[[torch.Tensor, Any], dict]], optional): Function to preprocess model inputs. Arguments of this function should be input_ids and model_kwargs. Defaults to None. - update_model_kwargs_fn (Optional[Callable[[dict, Any], dict]], optional): Function to update model_kwargs based on outputs. Arguments of this function should be outputs and model_kwargs. Defaults to None. - """ - is_greedy_gen_mode = ((num_beams == 1) and do_sample is False) - is_sample_gen_mode = ((num_beams == 1) and do_sample is True) - is_beam_gen_mode = ((num_beams > 1) and do_sample is False) - if is_greedy_gen_mode: - # run greedy search - raise NotImplementedError - elif is_sample_gen_mode: - # run sample - return sample(model, - input_ids, - max_length, - early_stopping=early_stopping, - eos_token_id=eos_token_id, - pad_token_id=pad_token_id, - top_k=top_k, - top_p=top_p, - temperature=temperature, - prepare_inputs_fn=prepare_inputs_fn, - update_model_kwargs_fn=update_model_kwargs_fn, - **model_kwargs) - elif is_beam_gen_mode: - raise NotImplementedError - else: - raise ValueError("Unsupported generation mode") \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/generation_utils.py b/neurons/text/prompting/miners/self_hosted/ppo/generation_utils.py deleted file mode 100644 index f9f78a6c44..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/generation_utils.py +++ /dev/null @@ -1,92 +0,0 @@ -from typing import Optional - -import torch - - -def gpt_prepare_inputs_fn(input_ids: torch.Tensor, past: Optional[torch.Tensor] = None, **kwargs) -> dict: - token_type_ids = kwargs.get("token_type_ids", None) - # only last token for inputs_ids if past is defined in kwargs - if past: - input_ids = input_ids[:, -1].unsqueeze(-1) - if token_type_ids is not None: - token_type_ids = token_type_ids[:, -1].unsqueeze(-1) - - attention_mask = kwargs.get("attention_mask", None) - position_ids = kwargs.get("position_ids", None) - - if attention_mask is not None and position_ids is None: - # create position_ids on the fly for batch generation - position_ids = attention_mask.long().cumsum(-1) - 1 - position_ids.masked_fill_(attention_mask == 0, 1) - if past: - position_ids = position_ids[:, -1].unsqueeze(-1) - else: - position_ids = None - return { - "input_ids": input_ids, - "past_key_values": past, - "use_cache": kwargs.get("use_cache"), - "position_ids": position_ids, - "attention_mask": attention_mask, - "token_type_ids": token_type_ids, - } - - -def update_model_kwargs_fn(outputs: dict, **model_kwargs) -> dict: - if "past_key_values" in outputs: - model_kwargs["past"] = outputs["past_key_values"] - else: - model_kwargs["past"] = None - - # update token_type_ids with last value - if "token_type_ids" in model_kwargs: - token_type_ids = model_kwargs["token_type_ids"] - model_kwargs["token_type_ids"] = torch.cat([token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], dim=-1) - - # update attention mask - if "attention_mask" in model_kwargs: - attention_mask = model_kwargs["attention_mask"] - model_kwargs["attention_mask"] = torch.cat( - [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1) - - return model_kwargs - - -def opt_prepare_inputs_fn(input_ids: torch.Tensor, - past: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None, - use_cache: Optional[bool] = None, - **kwargs) -> dict: - # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly - if attention_mask is None: - attention_mask = input_ids.new_ones(input_ids.shape) - - if past: - input_ids = input_ids[:, -1:] - # first step, decoder_cached_states are empty - return { - "input_ids": input_ids, # encoder_outputs is defined. input_ids not needed - "attention_mask": attention_mask, - "past_key_values": past, - "use_cache": use_cache, - } - - -def bloom_prepare_inputs_fn(input_ids: torch.Tensor, - past: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None, - use_cache: Optional[bool] = None, - **kwargs) -> dict: - # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly - if attention_mask is None: - attention_mask = input_ids.new_ones(input_ids.shape) - - if past: - input_ids = input_ids[:, -1:] - # first step, decoder_cached_states are empty - return { - "input_ids": input_ids, # encoder_outputs is defined. input_ids not needed - "attention_mask": attention_mask, - "past_key_values": past, - "use_cache": use_cache, - } \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/lora.py b/neurons/text/prompting/miners/self_hosted/ppo/lora.py deleted file mode 100644 index 2726018187..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/lora.py +++ /dev/null @@ -1,129 +0,0 @@ -import math -from typing import Optional - -import loralib as lora -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class LoraLinear(lora.LoRALayer, nn.Module): - """Replace in-place ops to out-of-place ops to fit gemini. Convert a torch.nn.Linear to LoraLinear. - """ - - def __init__( - self, - weight: nn.Parameter, - bias: Optional[nn.Parameter], - r: int = 0, - lora_alpha: int = 1, - lora_dropout: float = 0., - fan_in_fan_out: bool = False, # Set this to True if the layer to replace stores weight like (fan_in, fan_out) - merge_weights: bool = True, - ): - nn.Module.__init__(self) - lora.LoRALayer.__init__(self, - r=r, - lora_alpha=lora_alpha, - lora_dropout=lora_dropout, - merge_weights=merge_weights) - self.weight = weight - self.bias = bias - - out_features, in_features = weight.shape - self.in_features = in_features - self.out_features = out_features - - self.fan_in_fan_out = fan_in_fan_out - # Actual trainable parameters - if r > 0: - self.lora_A = nn.Parameter(self.weight.new_zeros((r, in_features))) - self.lora_B = nn.Parameter(self.weight.new_zeros((out_features, r))) - self.scaling = self.lora_alpha / self.r - # Freezing the pre-trained weight matrix - self.weight.requires_grad = False - self.reset_parameters() - if fan_in_fan_out: - self.weight.data = self.weight.data.T - - def reset_parameters(self): - if hasattr(self, 'lora_A'): - # initialize A the same way as the default for nn.Linear and B to zero - nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5)) - nn.init.zeros_(self.lora_B) - - def train(self, mode: bool = True): - - def T(w): - return w.T if self.fan_in_fan_out else w - - nn.Module.train(self, mode) - if self.merge_weights and self.merged: - # Make sure that the weights are not merged - if self.r > 0: - self.weight.data -= T(self.lora_B @ self.lora_A) * self.scaling - self.merged = False - - def eval(self): - - def T(w): - return w.T if self.fan_in_fan_out else w - - nn.Module.eval(self) - if self.merge_weights and not self.merged: - # Merge the weights and mark it - if self.r > 0: - self.weight.data += T(self.lora_B @ self.lora_A) * self.scaling - delattr(self, 'lora_A') - delattr(self, 'lora_B') - self.merged = True - - def forward(self, x: torch.Tensor): - - def T(w): - return w.T if self.fan_in_fan_out else w - - if self.r > 0 and not self.merged: - result = F.linear(x, T(self.weight), bias=self.bias) - if self.r > 0: - result = result + (self.lora_dropout(x) @ self.lora_A.t() @ self.lora_B.t()) * self.scaling - return result - else: - return F.linear(x, T(self.weight), bias=self.bias) - - -def lora_linear_wrapper(linear: nn.Linear, lora_rank: int) -> LoraLinear: - assert lora_rank <= linear.in_features, f'LoRA rank ({lora_rank}) must be less than or equal to in features ({linear.in_features})' - lora_linear = LoraLinear(linear.weight, linear.bias, r=lora_rank, merge_weights=False) - return lora_linear - - -def convert_to_lora_recursively(module: nn.Module, lora_rank: int) -> None: - for name, child in module.named_children(): - if isinstance(child, nn.Linear): - setattr(module, name, lora_linear_wrapper(child, lora_rank)) - else: - convert_to_lora_recursively(child, lora_rank) - - -class LoRAModule(nn.Module): - """A LoRA module base class. All derived classes should call `convert_to_lora()` at the bottom of `__init__()`. - This calss will convert all torch.nn.Linear layer to LoraLinear layer. - - Args: - lora_rank (int, optional): LoRA rank. 0 means LoRA is not applied. Defaults to 0. - lora_train_bias (str, optional): Whether LoRA train biases. - 'none' means it doesn't train biases. 'all' means it trains all biases. 'lora_only' means it only trains biases of LoRA layers. - Defaults to 'none'. - """ - - def __init__(self, lora_rank: int = 0, lora_train_bias: str = 'none') -> None: - super().__init__() - self.lora_rank = lora_rank - self.lora_train_bias = lora_train_bias - - def convert_to_lora(self) -> None: - if self.lora_rank <= 0: - return - convert_to_lora_recursively(self, self.lora_rank) - lora.mark_only_lora_as_trainable(self, self.lora_train_bias) \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/loss.py b/neurons/text/prompting/miners/self_hosted/ppo/loss.py deleted file mode 100644 index b8b55965bb..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/loss.py +++ /dev/null @@ -1,117 +0,0 @@ -from typing import Optional - -import torch -import torch.nn as nn - -from utils import masked_mean - - -class GPTLMLoss(nn.Module): - """ - GPT Language Model Loss - """ - - def __init__(self): - super().__init__() - self.loss = nn.CrossEntropyLoss() - - def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: - shift_logits = logits[..., :-1, :].contiguous() - shift_labels = labels[..., 1:].contiguous() - # Flatten the tokens - return self.loss(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) - - -class PolicyLoss(nn.Module): - """ - Policy Loss for PPO - """ - - def __init__(self, clip_eps: float = 0.2) -> None: - super().__init__() - self.clip_eps = clip_eps - - def forward(self, - log_probs: torch.Tensor, - old_log_probs: torch.Tensor, - advantages: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - ratio = (log_probs - old_log_probs).exp() - surr1 = ratio * advantages - surr2 = ratio.clamp(1 - self.clip_eps, 1 + self.clip_eps) * advantages - loss = -torch.min(surr1, surr2) - if action_mask is not None: - loss = masked_mean(loss, action_mask) - loss = loss.mean() - return loss - - -class ValueLoss(nn.Module): - """ - Value Loss for PPO - """ - - def __init__(self, clip_eps: float = 0.4) -> None: - super().__init__() - self.clip_eps = clip_eps - - def forward(self, - values: torch.Tensor, - old_values: torch.Tensor, - reward: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - values_clipped = old_values + (values - old_values).clamp(-self.clip_eps, self.clip_eps) - surr1 = (values_clipped - reward)**2 - surr2 = (values - reward)**2 - loss = torch.max(surr1, surr2) - loss = loss.mean() - return loss - - -class PPOPtxActorLoss(nn.Module): - """ - To Do: - - PPO-ptx Actor Loss - """ - - def __init__(self, policy_clip_eps: float = 0.2, pretrain_coef: float = 0.0, pretrain_loss_fn=GPTLMLoss()) -> None: - super().__init__() - self.pretrain_coef = pretrain_coef - self.policy_loss_fn = PolicyLoss(clip_eps=policy_clip_eps) - self.pretrain_loss_fn = pretrain_loss_fn - - def forward(self, - log_probs: torch.Tensor, - old_log_probs: torch.Tensor, - advantages: torch.Tensor, - lm_logits: torch.Tensor, - lm_input_ids: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - policy_loss = self.policy_loss_fn(log_probs, old_log_probs, advantages, action_mask=action_mask) - lm_loss = self.pretrain_loss_fn(lm_logits, lm_input_ids) - return policy_loss + self.pretrain_coef * lm_loss - - -class LogSigLoss(nn.Module): - """ - Pairwise Loss for Reward Model - Details: https://arxiv.org/abs/2203.02155 - """ - - def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Tensor) -> torch.Tensor: - probs = torch.sigmoid(chosen_reward - reject_reward) - log_probs = torch.log(probs) - loss = -log_probs.mean() - return loss - - -class LogExpLoss(nn.Module): - """ - Pairwise Loss for Reward Model - Details: https://arxiv.org/abs/2204.05862 - """ - - def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Tensor) -> torch.Tensor: - loss = torch.log(1 + torch.exp(reject_reward - chosen_reward)).mean() - return loss \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/strategies/__init__.py b/neurons/text/prompting/miners/self_hosted/ppo/strategies/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/neurons/text/prompting/miners/self_hosted/ppo/strategies/base.py b/neurons/text/prompting/miners/self_hosted/ppo/strategies/base.py deleted file mode 100644 index 0cd1c1fb55..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/strategies/base.py +++ /dev/null @@ -1,136 +0,0 @@ -from abc import ABC, abstractmethod -from contextlib import nullcontext -from typing import Any, List, Optional, Tuple, Union - -import numpy as np -import torch -import torch.nn as nn -from ..base import LM, Actor, Critic, RewardModel -from coati.replay_buffer import ReplayBuffer -from torch.optim import Optimizer -from torch.utils.data import DataLoader -from transformers.tokenization_utils_base import PreTrainedTokenizerBase - -from .sampler import DistributedSampler - -ModelOptimPair = Tuple[nn.Module, Optimizer] -ModelOrModelOptimPair = Union[nn.Module, ModelOptimPair] - - -class Strategy(ABC): - """ - Base class for training strategies. - """ - - def __init__(self) -> None: - super().__init__() - self.setup_distributed() - - @abstractmethod - def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: Optimizer, **kwargs) -> None: - pass - - @abstractmethod - def optimizer_step(self, optimizer: Optimizer, **kwargs) -> None: - pass - - @abstractmethod - def setup_distributed(self) -> None: - pass - - @abstractmethod - def setup_model(self, model: nn.Module) -> nn.Module: - pass - - @abstractmethod - def setup_optimizer(self, optimizer: Optimizer, model: nn.Module) -> Optimizer: - pass - - @abstractmethod - def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader: - pass - - def model_init_context(self): - return nullcontext() - - def prepare( - self, *models_or_model_optim_pairs: ModelOrModelOptimPair - ) -> Union[List[ModelOrModelOptimPair], ModelOrModelOptimPair]: - """Prepare models or model-optimizer-pairs based on each strategy. - - Example:: - >>> # when fine-tuning actor and critic - >>> (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare((actor, actor_optim), (critic, critic_optim), reward_model, initial_model) - >>> # or when training reward model - >>> (reward_model, reward_model_optim) = strategy.prepare((reward_model, reward_model_optim)) - >>> # or just inference - >>> actor, critic = strategy.prepare(actor, critic) - - Returns: - Union[List[ModelOrModelOptimPair], ModelOrModelOptimPair]: Models or model-optimizer-pairs in the original order. - """ - - def prepare_model(model: nn.Module): - if isinstance(model, Actor): - return Actor(self.setup_model(self._unwrap_model(model))) - return self.setup_model(self._unwrap_model(model)) - - rets = [] - for arg in models_or_model_optim_pairs: - if isinstance(arg, tuple): - assert len(arg) == 2, f'Expect (model, optimizer) pair, got a tuple with size "{len(arg)}"' - model, optimizer = arg - model = prepare_model(model) - optimizer = self.setup_optimizer(optimizer, self._unwrap_model(model)) - rets.append((model, optimizer)) - elif isinstance(arg, nn.Module): - rets.append(prepare_model(arg)) - else: - raise RuntimeError(f'Expect model or (model, optimizer) pair, got {type(arg)}') - - if len(rets) == 1: - return rets[0] - return rets - - @staticmethod - def _unwrap_model(model: nn.Module) -> nn.Module: - """Useful for saving state dict. As actor is wrapped by Actor class again in `prepare()`, we should unwrap it before saving. - - Args: - model (nn.Module): an actor or a critic - """ - if isinstance(model, Actor) or isinstance(model, LM): - return model.model - return model - - @staticmethod - def _unwrap_actor(actor: Actor) -> nn.Module: - """Get `actor.model` from a wrapped (by `prepare()`) actor. Useful for getting original huggingface model. - - Args: - actor (Actor): a wrapped actor - """ - return Strategy._unwrap_model(actor) - - @abstractmethod - def save_model(self, - model: nn.Module, - path: str, - only_rank0: bool = False, - tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None: - pass - - @abstractmethod - def load_model(self, model: nn.Module, path: str, map_location: Any = None, strict: bool = True) -> None: - pass - - @abstractmethod - def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None: - pass - - @abstractmethod - def load_optimizer(self, optimizer: Optimizer, path: str, map_location: Any = None) -> None: - pass - - def setup_sampler(self, dataset) -> DistributedSampler: - return DistributedSampler(dataset, 1, 0) \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/strategies/naive.py b/neurons/text/prompting/miners/self_hosted/ppo/strategies/naive.py deleted file mode 100644 index 55008bb409..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/strategies/naive.py +++ /dev/null @@ -1,74 +0,0 @@ -from typing import Any, Optional - -import torch -import torch.nn as nn -import torch.optim as optim -from coati.replay_buffer import ReplayBuffer -from coati.models.base import LM, RewardModel -from coati.models.lora import LoraLinear -from torch.optim import Optimizer -from torch.utils.data import DataLoader -from transformers.tokenization_utils_base import PreTrainedTokenizerBase - -from .base import Strategy - - -class NaiveStrategy(Strategy): - """ - Strategy for single GPU. No parallelism is used. - """ - - def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: optim.Optimizer, **kwargs) -> None: - loss.backward() - - def optimizer_step(self, optimizer: optim.Optimizer, **kwargs) -> None: - optimizer.step() - - def setup_distributed(self) -> None: - pass - - def setup_model(self, model: nn.Module) -> nn.Module: - return model - - def setup_optimizer(self, optimizer: optim.Optimizer, model: nn.Module) -> optim.Optimizer: - return optimizer - - def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader: - return DataLoader(replay_buffer, - batch_size=replay_buffer.sample_batch_size, - shuffle=True, - drop_last=True, - pin_memory=pin_memory, - collate_fn=replay_buffer.collate_fn) - - def save_model(self, model: nn.Module, path: str, only_rank0: bool = False, tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None: - for module in model.modules(): - if isinstance(module, LoraLinear): - module.merge_weights = True - module.eval() - - if isinstance(model, RewardModel): - state_dict = model.state_dict() - torch.save(state_dict, path) - else: - try: - if isinstance(model, LM): - model = model.model - model.save_pretrained(path) - if tokenizer is not None: - tokenizer.save_pretrained(path) - except AttributeError: - state_dict = model.state_dict() - torch.save(state_dict, path) - - def load_model(self, model: nn.Module, path: str, map_location: Any = None, strict: bool = True) -> None: - unwrapped_model = self._unwrap_model(model) - state_dict = torch.load(path, map_location=map_location) - unwrapped_model.load_state_dict(state_dict, strict=strict) - - def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None: - torch.save(optimizer.state_dict(), path) - - def load_optimizer(self, optimizer: Optimizer, path: str, map_location: Any = None) -> None: - state_dict = torch.load(path, map_location=map_location) - optimizer.load_state_dict(state_dict) \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/strategies/sampler.py b/neurons/text/prompting/miners/self_hosted/ppo/strategies/sampler.py deleted file mode 100644 index e7ab88cef9..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/strategies/sampler.py +++ /dev/null @@ -1,32 +0,0 @@ -import math - -import numpy as np - - -class DistributedSampler: - - def __init__(self, dataset, num_replicas: int, rank: int) -> None: - self.dataset = dataset - self.num_replicas = num_replicas - self.rank = rank - - if len(self.dataset) % self.num_replicas != 0: - self.num_samples = math.ceil( - (len(self.dataset) - self.num_replicas) / self.num_replicas # type: ignore[arg-type] - ) - else: - self.num_samples = math.ceil(len(self.dataset) / self.num_replicas) - - self.total_size = self.num_samples * self.num_replicas - - indices = list(range(len(self.dataset))) - indices = indices[:self.total_size] - assert len(indices) == self.total_size - # subsample - indices = indices[self.rank:self.total_size:self.num_replicas] - assert len(indices) == self.num_samples - self.indices = indices - - def sample(self, batch_size: int) -> list: - sampled_indices = np.random.choice(self.indices, batch_size, replace=False) - return [self.dataset[idx] for idx in sampled_indices] \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/ppo/utils.py b/neurons/text/prompting/miners/self_hosted/ppo/utils.py deleted file mode 100644 index 5a0a9f8a43..0000000000 --- a/neurons/text/prompting/miners/self_hosted/ppo/utils.py +++ /dev/null @@ -1,92 +0,0 @@ -from typing import Optional, Union - -import loralib as lora -import torch -import torch.nn as nn -import torch.nn.functional as F - - -def compute_approx_kl(log_probs: torch.Tensor, - log_probs_base: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - """ - Compute the approximate KL divergence between two distributions. - Schulman blog: http://joschu.net/blog/kl-approx.html - - Args: - log_probs: Log probabilities of the new distribution. - log_probs_base: Log probabilities of the base distribution. - action_mask: Mask for actions. - """ - - log_ratio = log_probs - log_probs_base - approx_kl = (log_ratio.exp() - 1) - log_ratio - if action_mask is not None: - approx_kl = masked_mean(approx_kl, action_mask, dim=1) - return approx_kl - approx_kl = approx_kl.mean(dim=1) - return approx_kl - - -def compute_reward(r: Union[torch.Tensor, float], - kl_coef: float, - log_probs: torch.Tensor, - log_probs_base: torch.Tensor, - action_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - if kl_coef <= 0.0: - return r - kl = compute_approx_kl(log_probs, log_probs_base, action_mask=action_mask) - reward = r - kl_coef * kl - return reward - - -def log_probs_from_logits(logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: - log_probs = F.log_softmax(logits, dim=-1) - log_probs_labels = log_probs.gather(dim=-1, index=labels.unsqueeze(-1)) - return log_probs_labels.squeeze(-1) - - -def masked_mean(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) -> torch.Tensor: - tensor = tensor * mask - tensor = tensor.sum(dim=dim) - mask_sum = mask.sum(dim=dim) - mean = tensor / (mask_sum + 1e-8) - return mean - - -def masked_normalize(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1, eps: float = 1e-8) -> torch.Tensor: - tensor = tensor * mask - mean = masked_mean(tensor, mask, dim=dim) - mean_centered = tensor - mean - var = masked_mean(mean_centered**2, mask, dim=dim) - return mean_centered * var.clamp(min=eps).rsqrt() - - -def normalize(tensor: torch.Tensor, dim: int = 0, eps: float = 1e-8) -> torch.Tensor: - mean = tensor.mean(dim) - mean_centered = tensor - mean - var = (mean_centered**2).mean(dim) - norm = mean_centered * var.clamp(min=eps).rsqrt() - return norm - - -def convert_to_lora(model: nn.Module, - input_size: int, - output_size: int, - lora_rank: int = 16, - lora_alpha: int = 1, - lora_dropout: float = 0., - fan_in_fan_out: bool = False, - merge_weights: bool = True): - if lora_rank > min(input_size, output_size): - raise ValueError(f"LoRA rank {lora_rank} must be less or equal than {min(input_size, output_size)}") - - for name, module in model.named_modules(): - if isinstance(module, nn.Linear): - module._modules[name] = lora.Linear(input_size, - output_size, - r=lora_rank, - lora_alpha=lora_alpha, - lora_dropout=lora_dropout, - fan_in_fan_out=fan_in_fan_out, - merge_weights=merge_weights) \ No newline at end of file diff --git a/neurons/text/prompting/miners/self_hosted/requirements.txt b/neurons/text/prompting/miners/self_hosted/requirements.txt deleted file mode 100644 index d3d7428067..0000000000 --- a/neurons/text/prompting/miners/self_hosted/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -loralib \ No newline at end of file diff --git a/neurons/text/prompting/validators/constitution/neuron.py b/neurons/text/prompting/validators/constitution/neuron.py deleted file mode 100644 index 3c36300ac2..0000000000 --- a/neurons/text/prompting/validators/constitution/neuron.py +++ /dev/null @@ -1,97 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import os -import time -import json -import math -import copy -import queue -import torch -import random -import bittensor -import argparse -import bittensor as bt - -from loguru import logger -from types import SimpleNamespace -from typing import List, Optional, Tuple, Dict - -class neuron: - @classmethod - def check_config( cls, config: 'bt.Config' ): - r""" Checks/validates the config namespace object. - """ - bt.logging.check_config( config ) - bt.wallet.check_config( config ) - bt.subtensor.check_config( config ) - full_path = os.path.expanduser('{}/{}/{}/netuid{}/{}'.format( config.logging.logging_dir, config.wallet.name, config.wallet.hotkey, config.netuid, config.neuron.name )) - config.neuron.full_path = os.path.expanduser( full_path ) - if not os.path.exists( config.neuron.full_path ): - os.makedirs( config.neuron.full_path, exist_ok = True) - - @classmethod - def config ( cls ): - parser = argparse.ArgumentParser() - parser.add_argument( '--netuid', type = int, help = 'Prompting network netuid', default = 1 ) - parser.add_argument( '--neuron.name', type = str, help = 'Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ', default = 'core_prompting_validator') - parser.add_argument( '--neuron.device', type = str, help = 'Device to run the validator on.', default = "cuda" if torch.cuda.is_available() else "cpu" ) - bt.wallet.add_args( parser ) - bt.subtensor.add_args( parser ) - bt.logging.add_args( parser ) - bt.axon.add_args( parser ) - return bt.config( parser ) - - def __init__( self ): - self.config = neuron.config() - self.check_config( self.config ) - bt.logging( config = self.config, logging_dir = self.config.neuron.full_path ) - print( self.config ) - self.subtensor = bt.subtensor ( config = self.config ) - self.wallet = bt.wallet ( config = self.config ) - self.metagraph = bt.metagraph( netuid = self.config.netuid, network = self.subtensor.network ) - print ('done init') - - def train( self ): - while True: - uids = torch.tensor( random.sample( self.metagraph.uids.tolist(), 2 ), dtype = torch.int64 ) - A = bittensor.text_prompting( keypair = self.wallet.hotkey, axon = self.metagraph.axons[uids[0]] ) - B = bittensor.text_prompting( keypair = self.wallet.hotkey, axon = self.metagraph.axons[uids[1]] ) - resp_A = A.forward( - roles = ['user'], - messages = ['ask me a random question?'], - timeout = 5, - ) - resp_B = B.forward( - roles = ['user'], - messages = ['ask me a random question?'], - timeout = 5, - ) - bittensor.logging.info(str(resp_A)) - bittensor.logging.info(str(resp_B)) - - if resp_A.is_success and resp_B.is_success: - bittensor.logging.info('success') - break - else: - bittensor.logging.info('failure') - continue - - -if __name__ == '__main__': - bittensor.logging.info( 'neuron().train()' ) - neuron().train() diff --git a/neurons/text/prompting/validators/core/README.md b/neurons/text/prompting/validators/core/README.md deleted file mode 100644 index 50a01bbb28..0000000000 --- a/neurons/text/prompting/validators/core/README.md +++ /dev/null @@ -1,95 +0,0 @@ -# Bittensor Prompting Validator -This repository the the core validator for the bittensor prompting network. - -## Prerequisites -- Python 3.8+ -- Bittensor - -## Installation -1. Clone the repository -2. Install the required packages with `pip install -r neurons/text/prompting/validators/core/requirements.txt` -For more configuration options related to the wallet, axon, subtensor, logging, and metagraph, please refer to the Bittensor documentation. - -## Example Usage -To run the Core Bittensor Prompting Validator with default settings, use the following command: - -``` -python3 -m pip install -r neurons/text/prompting/validators/core/requirements.txt -python3 neurons/text/prompting/validators/core/neuron.py -``` - -# Full Usage -``` -usage: neuron.py [-h] [--netuid NETUID] [--neuron.name NEURON.NAME] [--neuron.reward_model_name NEURON.REWARD_MODEL_NAME] [--neuron.inference_topk NEURON.INFERENCE_TOPK] [--neuron.training_topk NEURON.TRAINING_TOPK] - [--prompting.model_name PROMPTING.MODEL_NAME] [--prompting.min_tokens PROMPTING.MIN_TOKENS] [--prompting.max_tokens PROMPTING.MAX_TOKENS] [--prompting.temperature PROMPTING.TEMPERATURE] - [--prompting.top_p PROMPTING.TOP_P] [--prompting.logprobs PROMPTING.LOGPROBS] [--prompting.repetition_penalty PROMPTING.REPETITION_PENALTY] [--wallet.name WALLET.NAME] [--wallet.hotkey WALLET.HOTKEY] - [--wallet.path WALLET.PATH] [--wallet._mock] [--wallet.reregister WALLET.REREGISTER] [--subtensor.network SUBTENSOR.NETWORK] [--subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT] [--subtensor._mock] - [--subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES] [--subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL] [--subtensor.register.no_output_in_place] [--subtensor.register.verbose] - [--subtensor.register.cuda.use_cuda] [--subtensor.register.cuda.no_cuda] [--subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...]] - [--subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB] [--metagraph._mock] [--logging.debug] [--logging.trace] [--logging.record_log] [--logging.logging_dir LOGGING.LOGGING_DIR] [--config CONFIG] [--strict] - -optional arguments: - -h, --help show this help message and exit - --netuid NETUID Prompting network netuid - --neuron.name NEURON.NAME - Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name - --neuron.reward_model_name NEURON.REWARD_MODEL_NAME - GPTRewardModel name - --neuron.inference_topk NEURON.INFERENCE_TOPK - At inference time, how many miners to we query and return the top rewarded. - --neuron.training_topk NEURON.TRAINING_TOPK - During training time, how many miners to we query for each batch based on scores from gating network. - --prompting.model_name PROMPTING.MODEL_NAME - Name of the model to use - --prompting.min_tokens PROMPTING.MIN_TOKENS - Minimum number of tokens to generate - --prompting.max_tokens PROMPTING.MAX_TOKENS - Maximum number of tokens to generate - --prompting.temperature PROMPTING.TEMPERATURE - Temperature for sampling - --prompting.top_p PROMPTING.TOP_P - Top p for sampling - --prompting.logprobs PROMPTING.LOGPROBS - Number of logprobs to return - --prompting.repetition_penalty PROMPTING.REPETITION_PENALTY - Repetition penalty for sampling - --wallet.name WALLET.NAME - The name of the wallet to unlock for running bittensor (name mock is reserved for mocking this wallet) - --wallet.hotkey WALLET.HOTKEY - The name of wallet's hotkey. - --wallet.path WALLET.PATH - The path to your bittensor wallets - --wallet._mock To turn on wallet mocking for testing purposes. - --wallet.reregister WALLET.REREGISTER - Whether to reregister the wallet if it is not already registered. - --subtensor.network SUBTENSOR.NETWORK - The subtensor network flag. The likely choices are: -- finney (main network) -- local (local running network) -- mock (creates a mock connection (for testing)) If this option is set it overloads - subtensor.chain_endpoint with an entry point node from that network. - --subtensor.chain_endpoint SUBTENSOR.CHAIN_ENDPOINT - The subtensor endpoint flag. If set, overrides the --network flag. - --subtensor._mock To turn on subtensor mocking for testing purposes. - --subtensor.register.num_processes SUBTENSOR.REGISTER.NUM_PROCESSES, -n SUBTENSOR.REGISTER.NUM_PROCESSES - Number of processors to use for registration - --subtensor.register.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --subtensor.register.cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, --cuda.update_interval SUBTENSOR.REGISTER.UPDATE_INTERVAL, -u SUBTENSOR.REGISTER.UPDATE_INTERVAL - The number of nonces to process before checking for next block during registration - --subtensor.register.no_output_in_place, --no_output_in_place - Whether to not ouput the registration statistics in-place. Set flag to disable output in-place. - --subtensor.register.verbose - Whether to ouput the registration statistics verbosely. - --subtensor.register.cuda.use_cuda, --cuda, --cuda.use_cuda - Set flag to use CUDA to register. - --subtensor.register.cuda.no_cuda, --no_cuda, --cuda.no_cuda - Set flag to not use CUDA for registration - --subtensor.register.cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...], --cuda.dev_id SUBTENSOR.REGISTER.CUDA.DEV_ID [SUBTENSOR.REGISTER.CUDA.DEV_ID ...] - Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest). - --subtensor.register.cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB, --cuda.TPB SUBTENSOR.REGISTER.CUDA.TPB - Set the number of Threads Per Block for CUDA. - --metagraph._mock To turn on metagraph mocking for testing purposes. - --logging.debug Turn on bittensor debugging information - --logging.trace Turn on bittensor trace level information - --logging.record_log Turns on logging to file. - --logging.logging_dir LOGGING.LOGGING_DIR - Logging default root directory. - --config CONFIG If set, defaults are overridden by passed file. - --strict If flagged, config will check that only exact arguemnts have been set. -``` \ No newline at end of file diff --git a/neurons/text/prompting/validators/core/gating.py b/neurons/text/prompting/validators/core/gating.py deleted file mode 100644 index 5966258192..0000000000 --- a/neurons/text/prompting/validators/core/gating.py +++ /dev/null @@ -1,120 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import torch -import argparse -import bittensor -from transformers import AutoModel, AutoTokenizer, AutoConfig - -class GatingModel( torch.nn.Module ): - """ - This class is a PyTorch module that encapsulates the gating model functionality. - - - The backward method runs a backward pass through the model using the mean squared error between the normalized scores and the normalized rewards as the loss function. - - The forward method runs a forward pass through the model, encoding the input message and generating scores for each uid in the network. The scores are returned as a tensor. - """ - - @classmethod - def add_args( cls, parser: argparse.ArgumentParser ): - """ - Adds command line arguments to the parser that are used to configure the gating model. - The arguments added are: - - `--gating.model_name`: Name of the pre-trained transformer-based language model to use as the encoding layer for the gating model. (default: 'EleutherAI/gpt-neo-125m') - - `--gating.num_uids`: Number of uids to gate on. (default: 4096) - - `--gating.learning_rate`: Learning rate for the gating model optimizer. (default: 0.01) - - `--gating.momentum`: Momentum for the gating model optimizer. (default: 0.9) - """ - parser.add_argument('--gating.model_name', type=str, default='EleutherAI/gpt-neo-125m', help='Name of the model to use as the encoding layer for the gating model') - parser.add_argument('--gating.num_uids', type=int, default=4096, help='Number of uids to gate on') - parser.add_argument('--gating.learning_rate', type=float, default=0.01, help='Learning rate for the gating model') - parser.add_argument('--gating.momentum', type=float, default=0.9, help='Momentum for the gating model') - - @classmethod - def config ( cls ): - """ - Returns a configuration object that contains the command line arguments for the gating model. - """ - parser = argparse.ArgumentParser() - cls.add_args( parser ) - return bittensor.config( parser ) - - @classmethod - def check_config( cls, config: 'bittensor.Config' ): - """ - Validates the configuration object for the gating model. - """ - pass - - def __init__( - self, - metagraph: 'bittensor.metagraph.Metagraph', - config: 'bittensor.config' = None, - model_name: str = None, - num_uids: int = None - ): - """ - Initializes the gating model. - - `metagraph`: A reference to the Bittensor metagraph object. - - `config`: Configuration object for the gating model. If `None`, the default configuration is used. - - `model_name`: Name of the pre-trained transformer-based language model to use as the encoding layer for the gating model. If `None`, the default model name specified in the configuration is used. - - `num_uids`: Number of uids to gate on. If `None`, the default number specified in the configuration is used. - """ - super(GatingModel, self).__init__() - if config is None: config = GatingModel.config() - if model_name is not None: config.gating.model_name = model_name - config.gating.num_uids = num_uids if num_uids is not None else metagraph.n - self.config = config - self.num_uids = config.gating.num_uids - self.device = torch.device( self.config.neuron.device ) - self.tokenizer = AutoTokenizer.from_pretrained( self.config.gating.model_name ) - self.model = AutoModel.from_pretrained( self.config.gating.model_name) - self.linear = torch.nn.Linear( self.model.config.hidden_size, config.gating.num_uids ) - self.optimizer = torch.optim.SGD( - [ {"params": self.parameters()} ], - lr = self.config.gating.learning_rate, - momentum = self.config.gating.momentum, - ) - - def backward( self, scores: torch.FloatTensor, rewards: torch.FloatTensor ): - """ Runs a backward pass through the model. - Args: - scores (:obj:`torch.FloatTensor` of shape :obj:`(metagraph.n)`): - Scores for each uids as output by the gating model. - rewards (:obj:`torch.FloatTensor` of shape :obj:`(metagraph.n)`): - Rewards for each uids as output by the reward model. - """ - normalized_scores = torch.nn.functional.softmax( scores, dim=0 ).to( self.device ) - nomralized_rewards = torch.nn.functional.softmax( rewards, dim=0 ).to( self.device ) - loss = torch.nn.functional.mse_loss( normalized_scores, nomralized_rewards.detach() ) - loss.backward() - self.optimizer.step() - - def forward( self, message: str ) -> 'torch.FloatTensor': - """ Runs a forward pass through the model. - Args: - message (:obj:`str`): - text message to be encoded. - Returns: - scores (:obj:`torch.FloatTensor` of shape :obj:`(network_size)`): - Scores for each uids as output by the gating model. - """ - inputs = self.tokenizer( message, return_tensors="pt" ,truncation=True, max_length=2048).to( self.device ) - with torch.no_grad(): - hidden_states = self.model( **inputs ).last_hidden_state[0, -1, :] - return self.linear( hidden_states ) - - diff --git a/neurons/text/prompting/validators/core/neuron.py b/neurons/text/prompting/validators/core/neuron.py deleted file mode 100644 index 3b88248c24..0000000000 --- a/neurons/text/prompting/validators/core/neuron.py +++ /dev/null @@ -1,803 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import os -import time -import math -import copy -import queue -import torch -import random -import bittensor -import argparse -import bittensor as bt -import traceback - -from loguru import logger -from types import SimpleNamespace -from typing import List, Optional, Tuple, Dict -from reward import RewardModel -from gating import GatingModel -from transformers import AutoTokenizer, AutoModelForSequenceClassification -from datasets import load_dataset -from datetime import datetime - -__default_question_prompt__ = ''' -Ask me a random question about anything. Make the question very domain specific. Do not include the answer in the question. -''' - -__default_base_prompt__ = ''' -You are designed to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. -''' - -__default_follow_up_prompt__ = ''' -Ask a follow up question. -''' -class neuron: - @classmethod - def check_config( cls, config: 'bt.Config' ): - r""" Checks/validates the config namespace object. - """ - bt.logging.check_config( config ) - bt.wallet.check_config( config ) - bt.subtensor.check_config( config ) - full_path = os.path.expanduser('{}/{}/{}/netuid{}/{}'.format( config.logging.logging_dir, config.wallet.name, config.wallet.hotkey, config.netuid, config.neuron.name )) - config.neuron.full_path = os.path.expanduser( full_path ) - config.neuron.reward_path = os.path.expanduser( config.neuron.reward_path ) - if not os.path.exists( config.neuron.full_path ): - os.makedirs( config.neuron.full_path, exist_ok = True) - if not os.path.exists( config.neuron.reward_path + '/hf_ckpt.pt' ): - os.makedirs( config.neuron.reward_path, exist_ok = True ) - os.system( - f"wget -O { config.neuron.reward_path + '/hf_ckpt.pt'} \ - https://huggingface.co/Dahoas/gptj-rm-static/resolve/main/hf_ckpt.pt" - ) - if not config.neuron.dont_save_events: - # Add custom event logger for the events. - logger.level("EVENTS", no=38, icon="📝") - logger.add( - config.neuron.full_path + "/" + "completions.log", - rotation=config.neuron.events_retention_size, serialize=True, enqueue=True, backtrace=False, diagnose=False, level="EVENTS", - format = "{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message} | {extra[prompt]} {extra[completion]} {extra[uids]} {extra[all_uids]} {extra[rewards]}{extra[all_completions]} {extra[block]}" - ) - - def record_event( self, event: SimpleNamespace ): - self.history.put( event ) - if not self.config.neuron.dont_save_events: - logger.log( - "EVENTS", - "events", - prompt = event.message, - completion = event.completion, - uids = event.uids.tolist(), - all_uids = event.all_uids.tolist(), - rewards = event.rewards.tolist(), - all_completions = event.all_completions, - block = event.block.item(), - ) - - @classmethod - def add_args( cls, parser ): - # Netuid Arg - parser.add_argument( '--netuid', type = int, help = 'Prompting network netuid', default = 1 ) - parser.add_argument( '--neuron.name', type = str, help = 'Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ', default = 'core_prompting_validator') - parser.add_argument( '--neuron.base_prompt', type=str, help = 'Prompt injected before a question is completed by miners on the network', default = __default_base_prompt__ ) - parser.add_argument( '--neuron.follow_up_prompt', type=str, help = 'Follow up prompt that is completed by miners on the network.', default = __default_follow_up_prompt__ ) - parser.add_argument( '--neuron.reset_bootstrap_prompt_frequency', type=int, help = 'How frequent to use the base follow up question.', default = 3 ) - parser.add_argument( '--neuron.question_prompt', type=str, help = 'Prompt used to generate questions from the network whicha are used to evaluate other miners.', default = __default_question_prompt__ ) - parser.add_argument( '--neuron.reward_model_name', type = str, help = 'GPTRewardModel name', default = 'Dahoas/gpt2-rm-static') - parser.add_argument( '--neuron.length_timeout_multiplier', type = int, help = 'Base timeout for all requests.', default = 0.01 ) - parser.add_argument( '--neuron.inference_topk', type = int, help = 'At inference time, how many miners to we query and return the top rewarded.', default = 10 ) - parser.add_argument( '--neuron.training_topk', type = int, help = 'During training time, how many miners to we query for each batch based on scores from gating network.', default = 50 ) - parser.add_argument( '--neuron.training_timeout', type = int, help = 'Query timeout during training', default = 4 ) - parser.add_argument( '--neuron.inference_timeout', type = int, help = 'Query timeout during inference', default = 10 ) - parser.add_argument( '--neuron.inference_only', action = 'store_true', help = 'If set, training off and only inference will be served via axon.', default = False ) - parser.add_argument( '--neuron.axon_off', action = 'store_true', help = 'If set, the axon will be turned off.', default = False ) - parser.add_argument( '--neuron.reward_path', type = str, help = 'Path to reward model.', default = '~/.bittensor/reward_models' ) - parser.add_argument( '--neuron.max_history', type = int, help = 'Maximum number history values to store at any time.', default = 100000 ) - parser.add_argument( '--neuron.device', type = str, help = 'Device to run the validator on.', default = "cuda" if torch.cuda.is_available() else "cpu" ) - parser.add_argument( '--neuron.epoch_length_override', type = int, help = 'Override the default timeout', default = -1 ) - parser.add_argument( '--neuron.dont_save_events', action = 'store_true', help = 'If set, we dont save events to a log file.', default = False ) - parser.add_argument( '--neuron.events_retention_size', type = str, help = 'Events retention size.', default = "2 GB" ) - parser.add_argument( '--neuron.no_reward_model', action = 'store_true', help = 'If set, we dont load the reward model instead use just the scores.', default = False ) - parser.add_argument( '--neuron.question_random_sample_uids', action = 'store_true', help = 'If set, random sample uids to get question.', default = False ) - parser.add_argument( '--neuron.reward_shift', type = int, help = 'The value to shift rewards for calculation.', default = 3 ) - parser.add_argument( '--neuron.no_nsfw_filter', action = 'store_true', help = 'If set, allow handling of not-safe-for-work messages.', default = False ) - parser.add_argument( '--neuron.vpermit_tao_limit', type = int, help = 'The maximum number of TAO allowed to query a validator with a vpermit.', default = 1024 ) - - @classmethod - def config ( cls ): - parser = argparse.ArgumentParser() - bt.wallet.add_args( parser ) - bt.subtensor.add_args( parser ) - bt.logging.add_args( parser ) - bt.axon.add_args( parser ) - GatingModel.add_args( parser ) - cls.add_args( parser ) - return bt.config( parser ) - - def __init__( self ): - self.config = neuron.config() - self.check_config( self.config ) - bt.logging( config = self.config, logging_dir = self.config.neuron.full_path ) - print( self.config ) - - self.subtensor = bt.subtensor ( config = self.config ) - self.device = torch.device( self.config.neuron.device ) - self.wallet = bt.wallet ( config = self.config ) - self.metagraph = bt.metagraph( netuid = self.config.netuid, network = self.subtensor.network ) - self.wallet.create_if_non_existent() - self.wallet.reregister( subtensor = self.subtensor, netuid = self.config.netuid ) - self.uid = self.wallet.get_uid( subtensor = self.subtensor, netuid = self.config.netuid ) - self.tokenizer = AutoTokenizer.from_pretrained( 'EleutherAI/gpt-j-6b' ) - - # check if invoking iter() is indeed necessary - self.dataset = iter(load_dataset('squad_v2', split='train', streaming=True).shuffle(buffer_size=10000)) - - self.moving_averaged_scores = torch.zeros((self.metagraph.n)).to( self.device ) - self.alpha = 0.99 - self.hotkeys = self.metagraph.hotkeys - # Reward model - if not self.config.neuron.no_reward_model: - bittensor.logging.info('Loading reward model') - self.reward_model = RewardModel( model_path = 'EleutherAI/gpt-j-6b', device = self.config.neuron.device ) - for fpath in os.listdir( self.config.neuron.reward_path ): - if fpath.endswith(".pt") or fpath.endswith(".bin"): - checkpoint = os.path.join( self.config.neuron.reward_path, fpath ) - break - ckpt_state = torch.load( checkpoint ) - self.reward_model.load_state_dict( ckpt_state ) - self.reward_model.eval() - self.reward_model.half() - self.reward_model.requires_grad_( False ) - self.reward_model.to( self.device ) - bittensor.logging.info('done loading reward model') - - # Init the gating model which learns which miners to select for each query. - self.gating_model = GatingModel( metagraph = self.metagraph, config = self.config ).to( self.device ) - # Denddrite pool for querying the network. - self.dendrite_pool = bt.text_prompting_pool( keypair = self.wallet.hotkey, metagraph = self.metagraph ) - self.inference_pool = bt.text_prompting_pool( keypair = self.wallet.hotkey, metagraph = self.metagraph ) - # History of forward events. - self.history = queue.Queue( maxsize = self.config.neuron.max_history ) - # Get a list of peers delegating to me - delegated = self.subtensor.get_delegated( self.wallet.coldkeypub.ss58_address ) - self.my_nominators = { nomin[0]: nomin[1] for nomin in delegated[0][0].nominators } if len(delegated) else {} - - self.load() - self.check_weights() - - # set up filter model - filter_model_path = 'facebook/roberta-hate-speech-dynabench-r4-target' - self.filter_model = AutoModelForSequenceClassification.from_pretrained(filter_model_path).to(self.device) - self.filter_tokenizer = AutoTokenizer.from_pretrained(filter_model_path) - self.filter_tokenizer.pad_token = self.filter_tokenizer.eos_token - self.filter_message_count = 0 - - # Axon set and served for inference requests, unless --neuron.axon_off flag is set. - if not self.config.neuron.axon_off: - # Build synapse entrypoint. - class Synapse( bittensor.TextPromptingSynapse ): - def priority( _, forward_call: "bittensor.TextPromptingForwardCall" ) -> float: - if forward_call.src_hotkey == self.wallet.hotkey.ss58_address: return math.inf # myself. - elif forward_call.src_hotkey in self.my_nominators: return self.my_nominators[ forward_call.src_hotkey ].tao # Delegates. - else: return 0.0 # Everyone else. - - def blacklist( _, forward_call: "bittensor.TextPromptingForwardCall" ) -> bool: - if forward_call.src_hotkey == self.wallet.hotkey.ss58_address: - return True - - elif forward_call.src_hotkey in self.metagraph.hotkeys: - uid = self.metagraph.hotkeys.index(forward_call.src_hotkey) - if self.metagraph.validator_permit[uid]: - return True - return False # Non Validator miners - - elif forward_call.src_hotkey in self.my_nominators: - return False # Delegates, dont blacklist. - else: - return False # Everyone else, dont blacklist. - - def backward( self, messages: List[Dict[str, str]], response: str, rewards: torch.FloatTensor ) -> str: pass - - def forward( _, messages: List[Dict[str, str]] ) -> str: - return self.inference( - messages = messages, - timeout = self.config.neuron.inference_timeout - ) - - def multi_forward( _, messages: List[Dict[str, str]] ) -> str: - return self.inference( - messages = messages, - timeout = self.config.neuron.inference_timeout, - return_all = True - ) - - # Serve axon. - self.axon = bittensor.axon( - wallet = self.wallet, - metagraph = self.metagraph, - config = self.config, - ) - self.synapse = Synapse( axon = self.axon ) - self.axon.start() - self.subtensor.serve_axon( self.config.netuid, self.axon ) - - def filter_message( - self, - message - ) -> bool: - """ Check if the message is related to any sexual content. - - Args: - message (str): - The message that we check if we should filter out. - Returns: - result (bool): - True indicates we should filter out the result, false indicates the result is safe. - """ - # If no filter needed, then just return false withough checking. - if self.config.neuron.no_nsfw_filter: - return False - - now = datetime.now() - dt_string = now.strftime("%d/%m/%Y %H:%M:%S") - tokenized = self.filter_tokenizer(message) - input_ids = tokenized['input_ids'] - bound_score1 = 0.5 - bound_score2 = 0.5 - - while len(input_ids) > 0: - _input_ids = input_ids[:512] - - with torch.no_grad(): - output = self.filter_model(torch.tensor([_input_ids]).to(self.device)) - - filter_out = output.logits[0, 0] < bound_score1 or output.logits[0, 1] > bound_score2 - - if filter_out: - bittensor.logging.debug( 'filtered message', message ) - break - else: - bittensor.logging.debug( 'safe message', message ) - - input_ids = input_ids[512:] - - self.filter_message_count += 1 - return filter_out - - def forward( - self, - roles: List[ str ], - messages: List[ str ], - topk: Optional[int] = None, - random_sample_uids: Optional[ bool ] = False, - train_gating_model: Optional[ bool ] = False, - train_network: Optional[ bool ] = False, - timeout: float = None, - question: bool = False, - ) -> SimpleNamespace: - """ - Queries the network for a response to the passed message using a gating model to select the best uids. - Trains the gating model based on the rewards calculated for the successful completions and passes rewards - backward for potential PPO. - - Args: - roles ( List[ str ] ): - roles associated with messages. - message ( List[ str ] ): - messages content for each role. - topk (Optional[int]): - The number of uids to consider for the query. If None or -1, all uids will be considered. - If provided, selects the top k uids based on the gating model scores. - random_sample_uids( bool, default = False ): - If True, randomly samples the uids to query rather than using topk. - train_gating_model ( bool, default = False ): - If True, trains the gating model based on the rewards calculated for the successful completions. - train_network ( bool, default = False ): - If True, sends backward messages to the network. - Returns: - result (SimpleNamespace): - A namespace containing the completion with the highest reward, message, uids, - rewards, scores, and all completions. - """ - bittensor.logging.info( 'forward()' ) - bittensor.logging.debug( 'roles', roles ) - bittensor.logging.debug( 'message', messages ) - - # Format the messages for the query. - unravelled_message = '' - for role, message in list(zip( roles, messages )): - if role == 'system': unravelled_message += 'system: ' + message + '\n' - if role== 'assistant': unravelled_message += 'assistant: ' + message + '\n' - if role == 'user': unravelled_message += 'user: ' + message + '\n' - - # Set `topk` to the number of items in `self.metagraph.n` if `topk` is not provided or is -1. - # Find the available `uids` that are currently serving. - # If `topk` is larger than the number of available `uids`, set `topk` to the number of available `uids`. - # Check if we have vpermit and if we do, ensure query only UIDs with less than vpermit_tao_limit. - def available( uid ) -> bool: - # Filter non serving axons. - if not self.metagraph.axons[uid].is_serving: - return False - # Filter validator permit > 1024 stake. - if self.metagraph.validator_permit[uid]: - if self.metagraph.S[uid] > self.config.neuron.vpermit_tao_limit: - return False - # Available otherwise. - return True - candidate_uids = [uid for uid, ax in enumerate(self.metagraph.axons) if available( uid )] - available_uids = torch.tensor( candidate_uids, dtype = torch.int64 ).to( self.device ) - if topk is None or topk == -1: topk = self.metagraph.n.item() - if topk > len( available_uids ): topk = len( available_uids ) - if len( available_uids ) == 0: bittensor.logging.error( 'no available uids' ); return None - bittensor.logging.trace( 'available_uids', available_uids ) - bittensor.logging.trace( 'topk', topk ) - - # We run the gating network here to get the best uids - # Use the gating model to generate scores for each `uid`. - scores = self.gating_model( unravelled_message ).to( self.device ) - bittensor.logging.trace( 'scores', scores ) - - # Select the top `topk` `uids` based on the highest `scores`. - # Use the selected `uids` to query the dendrite pool. - # Print the `completions`. - if random_sample_uids: - topk_uids = torch.tensor( random.sample( available_uids.tolist(), topk ), dtype = torch.int64 ).to( self.device ) - else: - topk_uids = available_uids[ scores[ available_uids ].sort()[ 1 ][ -topk: ]] - forward_calls = self.dendrite_pool( - roles = roles, - messages = messages, - uids = topk_uids, - timeout = timeout, - ) - bittensor.logging.trace( 'topk_uids', topk_uids ) - - # Filter out any `None` `completions`. - successful_uids = torch.tensor([uid for uid, call in list(zip(topk_uids, forward_calls)) if call is not None and call.completion is not None and len(call.completion)>10], dtype=torch.int64).to(self.device) - successful_completions = [call.completion for call in forward_calls if call is not None and call.completion is not None and len(call.completion)>10] - unsuccessful_uids = torch.tensor([uid for uid in topk_uids if uid not in successful_uids]) - bittensor.logging.debug( 'successful_uids', successful_uids ) - if len( successful_completions ) == 0: bittensor.logging.error('no successful completions'); return None - - # Calculate the rewards for the successful `completions` using the reward model. - # Print the rewards for all `uids`.` - flattened_message_for_reward = '' - if not self.config.neuron.no_reward_model: - for role_i, message_i in list(zip(roles, messages)): - if role_i != 'system': flattened_message_for_reward += message_i.strip() + '\n' - full_completions_for_reward = [ 'Question: ' + flattened_message_for_reward + 'Answer: ' + comp.strip() for comp in successful_completions ] - completions_for_reward = [comp.strip() for comp in successful_completions] - rewards = self.reward_model.reward( full_completions_for_reward, completions_for_reward, difference = True, shift = self.config.neuron.reward_shift).detach().to( self.device ) - bittensor.logging.trace( 'rewards', rewards ) - else: - rewards = scores[ successful_uids ] - - # Train the gating model using the scores and rewards of the successful `completions`. - if train_gating_model: - self.gating_model.backward( scores = scores[ successful_uids ], rewards = rewards ) - bittensor.logging.trace( 'Apply backward to gating model' ) - - # Pass rewards backward for potential PPO. - if train_network: - self.dendrite_pool.backward( - forward_calls = forward_calls, - rewards = rewards, - timeout = timeout, - ) - bittensor.logging.trace( 'Applied backward to network.' ) - - best_idx = rewards.detach().argmax() - bittensor.logging.trace( 'rewards', rewards ) - bittensor.logging.trace('successful_completions', len(successful_completions)) - bittensor.logging.trace('best_idx', best_idx) - best_completion = successful_completions[best_idx] - - - # Save the query history in a `result` object. - # Return the `completion` with the highest reward. - event = SimpleNamespace( - completion = successful_completions[ rewards.argmax( dim = 0 ) ], - message = message, - uids = successful_uids, - rewards = rewards, - all_uids = topk_uids, - all_completions = successful_completions, - block = self.metagraph.block, - is_question = message == self.config.neuron.question_prompt, - best_completion = best_completion - ) - self.record_event( event ) - - # First we normalize the rewards with a softmax. - normalized_rewards = torch.nn.functional.softmax( event.rewards.to( self.device ), dim=0 ) - - # We scatter the normalized onto the moving scores (updating them but not changing the source) - scattered_rewards = self.moving_averaged_scores.scatter(0, event.uids.to( self.device ), normalized_rewards.to( self.device ) ) - scattered_rewards = scattered_rewards.scatter(0, unsuccessful_uids.to( self.device ) , torch.zeros_like(unsuccessful_uids, dtype=torch.float).to( self.device ) ) - - # We now perform a moving average of the scattered rewards. - self.moving_averaged_scores = self.alpha * self.moving_averaged_scores + ( 1 - self.alpha ) * scattered_rewards - bittensor.logging.trace( 'normalized_rewards', normalized_rewards ) - bittensor.logging.trace( 'scattered_rewards', scattered_rewards ) - bittensor.logging.trace( 'moving_averaged_scores', self.moving_averaged_scores ) - print("===== Best Completion =====") - print(f"\n===== {successful_uids[best_idx], rewards[best_idx]} =====\n") - - print('flattened_message_for_reward:\n', flattened_message_for_reward) - print('completion:\n', best_completion.strip()) - - return event - - def inference( - self, - messages: List[Dict[str, str]], - timeout: float, - dont_use_reward_model: bool = True, - return_all = False - ) -> str: - bittensor.logging.info( 'inference()') - - # Pre-process messages. - roles = []; contents = []; unravelled_message = ''; user_message = None - for message_dict in messages: - roles.append( message_dict['role'] ) - contents.append( message_dict['content'] ) - if message_dict['role'] == 'system': unravelled_message += 'system: ' + message_dict['content'] + '\n' - if message_dict['role'] == 'assistant': unravelled_message += 'assistant: ' + message_dict['content'] + '\n' - if message_dict['role'] == 'user': - unravelled_message += 'user: ' + message_dict['content'] + '\n' - user_message = message_dict['content'] - - bittensor.logging.info( 'inference message', str(unravelled_message) ) - - if user_message and self.filter_message(user_message): - if return_all: - return ['Received possible explicit content.'] - else: - return 'Received possible explicit content.' - - # Get scores for query. - scores = self.gating_model( unravelled_message ).to( self.device ) - bittensor.logging.info( 'inference scores', str(scores) ) - - # Get uids for query. - uids = scores.sort()[ 1 ][ -self.config.neuron.inference_topk: ] - bittensor.logging.info( 'inference uids', str(uids) ) - - # Query using dendrite pool - forward_start = time.time() - bittensor.logging.trace( 'applying dendrite forward' ) - forward_calls = self.inference_pool( - roles = roles, - messages = contents, - uids = uids, - timeout = timeout, - ) - bittensor.logging.trace( 'finished dendrite forward ', time.time() - forward_start ) - - # Return longest completion. - if dont_use_reward_model or self.config.neuron.no_reward_model: - bittensor.logging.info('not applying the reward model taking the best completed response') - # Return first best from scores. - forward_calls.reverse() - - if return_all: - completions = [] - for call in forward_calls: - if len( call.completion ) > 0 and not self.filter_message(call.completion): - completions.append(call.completion) - if len(completions) > 0: - return completions - - else: - for call in forward_calls: - if len( call.completion ) > 0 and not self.filter_message(call.completion): - bittensor.logging.info( 'best completion', call.completion ) - return call.completion - - if return_all: - return ['no valid completions'] - - else: - return 'no valid completions' - - - else: - # Format messages for reward model. - flattened_message_for_reward = '' - for role_i, message_i in list(zip(roles, messages)): - if role_i != 'system': flattened_message_for_reward += message_i.strip() + '\n\n' - completions = [ call.completion for call in forward_calls if len(call.completion) > 0 and not self.filter_message(call.completion) ] - flattened_completions_for_reward = [ flattened_message_for_reward + comp.strip() for comp in completions ] - - # Return best via reward model. - reward_model_start = time.time() - completions_for_reward = [comp.strip() for comp in completions] - rewards = self.reward_model.reward( flattened_completions_for_reward, completions_for_reward, difference =False ).to( self.device ) - best_completion = completions[ rewards.argmax( dim = 0 ) ] - bittensor.logging.info('finished applying the reward model ', time.time() - reward_model_start ) - - if return_all: - return completions - else: - return best_completion - - def get_question(self, uids, bootstrap_prompt, reset_bootstrap_prompt = False, random_sample_uids = False): - - def _get_question(uids, bootstrap_prompt, reset_bootstrap_prompt = False): - # retrieve the answer - # sample = next(self.dataset) - # google_ai_dataset_place_holder = sample['answers']['text'][0] - - if reset_bootstrap_prompt: - bootstrap_prompt = next(self.dataset)['context'] # google_ai_dataset_place_holder - self.base_prompt = bootstrap_prompt - with open('prompt_history.txt', 'a') as file: - file.write("============== reset ==================" + '\n') - file.write(f"bootstrap prompt: {bootstrap_prompt}" + '\n') - - else: - bootstrap_prompt = bootstrap_prompt.replace('As an AI language model, ', '') - - question_prompt = f"{bootstrap_prompt}\n\n{self.config.neuron.follow_up_prompt}" - - questions = self.dendrite_pool( - roles = ['user'], - messages = [ question_prompt ], - uids = uids, - timeout = 12, - ) - - successful_questions = [question.completion for question in questions if question is not None and question.completion is not None and len(question.completion) > 10 and not self.filter_message(question.completion) ] - full_completions_for_reward = [ 'Question: ' + bootstrap_prompt + 'Answer: ' + comp.strip() for comp in successful_questions ] - completions_for_reward = [comp.strip() for comp in successful_questions] - reward_diffs = torch.zeros(len(successful_questions)) - if not self.config.neuron.no_reward_model: - reward_diffs = self.reward_model.reward( full_completions_for_reward, completions_for_reward, difference = True, shift = self.config.neuron.reward_shift ).to( self.device ) - for question, reward_diff in zip(successful_questions, reward_diffs.tolist()): - print(f"\n=== Question score: {reward_diff}===\n") - print(question) - if reward_diff > 0 : - return question, reward_diff - - return None, None - - def _get_random_uids(): - available_uids = torch.tensor( [ uid for uid, ax in enumerate( self.metagraph.axons ) if ax.is_serving ], dtype = torch.int64 ) - uids = torch.tensor( random.sample( available_uids.tolist(), self.config.neuron.training_topk ), dtype = torch.int64 ) - return uids - - question = None - - if random_sample_uids: - uids = _get_random_uids() - - while question is None: - question, reward_diff = _get_question(uids, bootstrap_prompt, reset_bootstrap_prompt) - reset_bootstrap_prompt = True - uids = _get_random_uids() - - return question, reward_diff - - def train( self ): - """ Training - The function uses an infinite loop to repeatedly generate a random question, - ask the network to complete the question, and train the gating network using - the question and the resulting completions. - """ - # Store the current epoch block number for comparison later. - last_epoch_block = self.subtensor.block - steps = 0 - - # grab the question from the current sample - prompt = next(self.dataset)['context'] - self.base_prompt = self.config.neuron.base_prompt - reward_diff = 0 - self.last_sync = self.subtensor.block - - # Start an infinite loop for training. - try: - while True: - # Ask the network to complete the random question, training the gating network. - with open('prompt_history.txt', 'a') as file: - file.write(f"{steps} | Q score({round(reward_diff , 4)}): {prompt}" + '\n') - - forward_result = self.forward( - roles = ['system', 'user' ], - messages = [ self.base_prompt, prompt ], - topk = self.config.neuron.training_topk, - random_sample_uids = True, - train_gating_model = True, - timeout = self.config.neuron.inference_timeout, - question = False - ) - - if forward_result is not None: - with open('prompt_history.txt', 'a') as file: - file.write(f"{steps} | A score({round(forward_result.rewards.sort(descending = True)[0][0].item(), 4)}): {forward_result.best_completion}" + '\n') - - idx_reward_sorted = forward_result.rewards.sort(descending = True)[1] - prompt, reward_diff = self.get_question( - uids = forward_result.uids[idx_reward_sorted], - bootstrap_prompt = forward_result.best_completion, - reset_bootstrap_prompt = (steps % self.config.neuron.reset_bootstrap_prompt_frequency == 0), - random_sample_uids = self.config.neuron.question_random_sample_uids - ) - - # Resync metagraph before returning. (sync every 15 min or ~75 blocks) - if self.subtensor.block - self.last_sync > 100: - self.metagraph.sync() - self.last_sync = self.subtensor.block - self.save() - delegates = self.subtensor.get_delegated( self.wallet.coldkeypub.ss58_address ) - - # Recreate pools here to ensure sizing is correct. - self.dendrite_pool = bt.text_prompting_pool( keypair = self.wallet.hotkey, metagraph = self.metagraph ) - self.inference_pool = bt.text_prompting_pool( keypair = self.wallet.hotkey, metagraph = self.metagraph ) - - self.my_nominators = { nomin[0]: nomin[1] for nomin in delegates[0][0].nominators } if len(delegates) else {} - self.check_weights() - - if self.metagraph.n > self.gating_model.num_uids: - self.gating_model = GatingModel( metagraph = self.metagraph, config = self.config ).to( self.device ) - - # Check if enough epoch blocks have elapsed since the last epoch. - epoch_length = self.subtensor.validator_epoch_length(self.config.netuid) if self.config.neuron.epoch_length_override == -1 else self.config.neuron.epoch_length_override - blocks_until_epoch = epoch_length - ( self.subtensor.block - last_epoch_block ) - bittensor.logging.debug( 'blocks_until_epoch', blocks_until_epoch ) - if blocks_until_epoch <= 0: - bittensor.logging.trace( 'epoch()' ) - bittensor.logging.info( 'block', self.subtensor.block ) - - # Update the last epoch block to the current epoch block. - last_epoch_block = self.subtensor.block - - # Computes the average reward for each uid across non-zero values - # using the rewards history stored in the self.history list. - uids, weights = self.compute_weights() - bittensor.logging.info( 'weights', weights ) - - # Set the weights on chain via our subtensor connection. - self.subtensor.set_weights( - wallet = self.wallet, - netuid = self.config.netuid, - uids = uids, - weights = weights, - wait_for_finalization = False, - ) - steps += 1 - - except Exception as e: - bittensor.logging.info( 'Error in training loop', str( e ) ) - print(traceback.format_exc()) - - def compute_weights( self ) -> Tuple[ torch.LongTensor, torch.FloatTensor ]: - """ - Computes the average reward for each uid across non-zero values - using the rewards history stored in the self.history list. - - Returns: - uids ( torch.LongTensor, shape = (n) ): - Uid to set weights on. - weights ( torch.FloatTensor, shape = (n) ): - The weights for each uid. - """ - bittensor.logging.info( 'compute_weights()' ) - - # Return zeros weights if there is no history. - if self.history.qsize() == 0: - bittensor.logging.warning( 'No history to compute weights returning all ones.' ) - return torch.ones((self.metagraph.n)) / self.metagraph.n - - # Calculate the average reward for each uid across non-zero values. - # Replace any NaN values with 0. - raw_weights = torch.nn.functional.normalize( self.moving_averaged_scores, p=1, dim=0 ) - bittensor.logging.trace( 'raw_weights', raw_weights ) - bittensor.logging.trace( 'top10 values', raw_weights.sort()[0] ) - bittensor.logging.trace( 'top10 uids', raw_weights.sort()[1] ) - - # Process the raw weights to final_weights via subtensor limitations. - processed_weight_uids, processed_weights = bittensor.utils.weight_utils.process_weights_for_netuid( - uids = self.metagraph.uids.to( "cpu" ), - weights = raw_weights.to( "cpu" ), - netuid = self.config.netuid, - subtensor = self.subtensor, - metagraph = self.metagraph - ) - bittensor.logging.trace( 'processed_weights', processed_weights ) - bittensor.logging.trace( 'processed_weight_uids', processed_weight_uids ) - return processed_weight_uids, processed_weights - - def run(self): - if self.config.neuron.inference_only: - # Start an infinite loop, allows axon to service inference requests. - last_sync = self.subtensor.block - while True: - time.sleep(12) - if self.subtensor.block -last_sync > 100: - self.metagraph.sync() - self.last_sync = self.subtensor.block - self.load(inference_only = True) - - else: - # Normal validator train operation for validation. - self.train() - - def save(self, path=None): - r""" Save hotkeys and moving average scores to filesystem. """ - try: - if path is None: - path = self.config.neuron.full_path - state_dict = { - 'neuron_weights': self.moving_averaged_scores, - 'neuron_hotkeys': self.hotkeys - } - - torch.save(state_dict, f'{path}/model.torch') - bittensor.logging.success(prefix='Saved model', sufix=f'{path}/model.torch') - - gating_state_dict = { - 'model_state_dict':self.gating_model.state_dict(), - 'num_hotkeys': self.gating_model.num_uids - } - torch.save(gating_state_dict, f'{path}/gating.torch') - bittensor.logging.success(prefix='Saved gating model', sufix=f'{path}/gating.torch') - except Exception as e: - logger.warning(f'Failed to save model with error: {e}') - - def load(self, path=None, inference_only=False): - r""" Load hotkeys and moving average scores from filesystem. """ - try: - if path is None: - path = self.config.neuron.full_path - state_dict = torch.load(f'{path}/model.torch') - self.moving_averaged_scores = state_dict['neuron_weights'].clone().detach() - self.hotkeys = state_dict['neuron_hotkeys'] - bittensor.logging.success(prefix='Reloaded model', sufix=f'{path}/model.torch') - - gating_state_dict = torch.load(f'{path}/gating.torch') - if self.gating_model.num_uids == gating_state_dict['num_hotkeys']: - self.gating_model.load_state_dict(gating_state_dict['model_state_dict'], strict=False) - bittensor.logging.success(prefix='Reloaded Gating model', sufix=f'{path}/gating.torch') - - elif inference_only: - self.gating_model = GatingModel( metagraph = self.metagraph, config = self.config, num_uids=gating_state_dict['num_hotkeys']).to( self.device ) - self.gating_model.load_state_dict(gating_state_dict['model_state_dict'], strict=False) - bittensor.logging.success(prefix='Reloaded Gating model', sufix=f'{path}/gating.torch') - - except Exception as e: - logger.warning(f'Failed to load model with error: {e}') - - def check_weights(self): - """ Checks current hotkeys with the current version of the metagraph """ - for uid, hotkey in enumerate( self.hotkeys ): - if hotkey != self.metagraph.hotkeys[ uid ]: - self.moving_averaged_scores[ uid ] = 0 #hotkey has been replaced - if self.metagraph.validator_permit[ uid ] and self.metagraph.S[ uid ] > self.config.neuron.vpermit_tao_limit: - self.moving_averaged_scores[ uid ] = 0 # hotkey has validation rights and is below the tao limit - if len(self.hotkeys) < len(self.metagraph.hotkeys): - new_moving_average = torch.zeros((self.metagraph.n)).to( self.device ) - new_moving_average[:len(self.hotkeys)] = self.moving_averaged_scores - self.moving_averaged_scores = new_moving_average - self.hotkeys = copy.deepcopy(self.metagraph.hotkeys) - - -if __name__ == '__main__': - bittensor.logging.info( 'neuron().train()' ) - neuron().run() diff --git a/neurons/text/prompting/validators/core/requirements.txt b/neurons/text/prompting/validators/core/requirements.txt deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/neurons/text/prompting/validators/core/reward.py b/neurons/text/prompting/validators/core/reward.py deleted file mode 100644 index 95c5a7f77c..0000000000 --- a/neurons/text/prompting/validators/core/reward.py +++ /dev/null @@ -1,163 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2021 Yuma Rao - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -#### NOTE(carro): This code is modified from trlX - -import torch -import argparse -import bittensor - -from torch import nn -from typing import List -from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig - -class RewardModel(nn.Module): - - def __init__( self, model_path: str, device: str, config: 'bittensor.config' = None): - super().__init__() - config = AutoConfig.from_pretrained( model_path ) - self.model = AutoModelForCausalLM.from_config( config ) - self.config = self.model.config - # `gpt-neo(x)` models use `hidden_size` attribute names instead of `n_embd`` - if config is None: config = RewardModel.config() - - self.config.n_embd = self.config.hidden_size if hasattr(self.config, "hidden_size") else self.config.n_embd - self.device = torch.device( device ) - self.transformer = self.model.transformer - self.v_head = nn.Linear(self.config.n_embd, 1, bias=False) - self.tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-j-6b') - self.tokenizer.pad_token = self.tokenizer.eos_token - self.PAD_ID = self.tokenizer(self.tokenizer.pad_token)["input_ids"][0] - - def reward( self, full_completions: List[str], comp: List[str], difference=False, shift =3) -> torch.FloatTensor: - def reward_fn( samples ): - if samples is None: return 0 - scores_list = [] - batch_size = 1 - for i in range(0, len(samples), batch_size): - sub_samples = samples[i : i + batch_size] - sub_samples = [ - "<|startoftext|>" + chosen + "<|endoftext|>" for chosen in sub_samples - ] - encodings_dict = self.tokenizer( - sub_samples, - truncation=False, - max_length=550, - padding="max_length", - return_tensors="pt", - ) - input_ids = encodings_dict["input_ids"].to( self.device ) - attn_masks = encodings_dict["attention_mask"].to( self.device ) - input_ids = input_ids.repeat(2, 1) - attn_masks = attn_masks.repeat(2, 1) - with torch.no_grad(): - sub_scores = self.forward(input_ids=input_ids.to( self.device ), attention_mask=attn_masks.to( self.device )) - scores_list.append(sub_scores["chosen_end_scores"]) - scores = torch.cat(scores_list, dim=0).mean().item() - return scores - - with torch.no_grad(): - full_rewards = [reward_fn([completion]) for completion in full_completions] - if difference: - comp_rewards = [reward_fn([completion]) for completion in comp] - return torch.nn.functional.relu(torch.tensor(full_rewards, dtype=torch.float32)+shift) - torch.nn.functional.relu(torch.tensor(comp_rewards, dtype=torch.float32)+shift) - else: - for completion, f_reward in zip(full_completions, full_rewards): - print(completion) - print(f_reward) - return torch.tensor(full_rewards, dtype=torch.float32) - def forward( - self, - input_ids=None, - past_key_values=None, - attention_mask=None, - token_type_ids=None, - position_ids=None, - head_mask=None, - inputs_embeds=None, - mc_token_ids=None, - labels=None, - return_dict=False, - output_attentions=False, - output_hidden_states=False, - ): - loss = None - transformer_outputs = self.transformer( - input_ids, - attention_mask=attention_mask, - ) - - hidden_states = transformer_outputs[0] - - rewards = self.v_head(hidden_states).squeeze(-1) - chosen_end_scores = [] - rejected_end_scores = [] - - # Split the inputs and rewards into two parts, chosen and rejected - assert len(input_ids.shape) == 2 - bs = input_ids.shape[0] // 2 - chosen = input_ids[:bs] - rejected = input_ids[bs:] - chosen_rewards = rewards[:bs] - rejected_rewards = rewards[bs:] - - loss = 0 - inference = False - for i in range(bs): - if torch.all(torch.eq(chosen[i], rejected[i])).item(): - c_inds = (chosen[i] == self.PAD_ID).nonzero() - c_ind = c_inds[0].item() if len(c_inds) > 0 else chosen.shape[1] - chosen_end_scores.append(chosen_rewards[i, c_ind - 1]) - inference = True - continue - - # Check if there is any padding otherwise take length of sequence - c_inds = (chosen[i] == self.PAD_ID).nonzero() - c_ind = c_inds[0].item() if len(c_inds) > 0 else chosen.shape[1] - r_inds = (rejected[i] == self.PAD_ID).nonzero() - r_ind = r_inds[0].item() if len(r_inds) > 0 else rejected.shape[1] - end_ind = max(c_ind, r_ind) - - # Retrieve first index where trajectories diverge - divergence_ind = (chosen[i] != rejected[i]).nonzero()[0] - assert divergence_ind > 0 - - # Index into the correct rewards - c_truncated_reward = chosen_rewards[i][divergence_ind:end_ind] - r_truncated_reward = rejected_rewards[i][divergence_ind:end_ind] - - # Append the last rewards to the list of end scores - chosen_end_scores.append(c_truncated_reward[-1]) - rejected_end_scores.append(r_truncated_reward[-1]) - - # Compute loss based on truncated rewards (ignore padding) - loss += -torch.log(torch.sigmoid(c_truncated_reward - r_truncated_reward)).mean() - loss = loss / bs - - if not inference: - chosen_end_scores = torch.stack(chosen_end_scores) - rejected_end_scores = torch.stack(rejected_end_scores) - - if inference: - chosen_end_scores = torch.stack(chosen_end_scores) - return {"chosen_end_scores": chosen_end_scores} - - return { - "loss": loss, - "chosen_end_scores": chosen_end_scores, - "rejected_end_scores": rejected_end_scores, - }