diff --git a/docs/en_US/Compressor/AutoCompression.md b/docs/en_US/Compressor/AutoCompression.md index 463c23d401..e251db4aa0 100644 --- a/docs/en_US/Compressor/AutoCompression.md +++ b/docs/en_US/Compressor/AutoCompression.md @@ -84,7 +84,7 @@ config_list_agp = [{'initial_sparsity': 0, 'final_sparsity': conv0_sparsity, {'initial_sparsity': 0, 'final_sparsity': conv1_sparsity, 'start_epoch': 0, 'end_epoch': 3, 'frequency': 1,'op_name': 'conv1' },] -PRUNERS = {'level':LevelPruner(model, config_list_level), 'agp':AGP_Pruner(model, config_list_agp)} +PRUNERS = {'level':LevelPruner(model, config_list_level), 'agp':AGPPruner(model, config_list_agp)} pruner = PRUNERS(params['prune_method']['_name']) pruner.compress() ... # fine tuning diff --git a/docs/en_US/Compressor/Pruner.md b/docs/en_US/Compressor/Pruner.md index e4d167b59b..824d8625b3 100644 --- a/docs/en_US/Compressor/Pruner.md +++ b/docs/en_US/Compressor/Pruner.md @@ -11,9 +11,9 @@ We provide several pruning algorithms that support fine-grained weight pruning a * [FPGM Pruner](#fpgm-pruner) * [L1Filter Pruner](#l1filter-pruner) * [L2Filter Pruner](#l2filter-pruner) -* [APoZ Rank Pruner](#activationapozrankfilterpruner) -* [Activation Mean Rank Pruner](#activationmeanrankfilterpruner) -* [Taylor FO On Weight Pruner](#taylorfoweightfilterpruner) +* [Activation APoZ Rank Filter Pruner](#activationAPoZRankFilter-pruner) +* [Activation Mean Rank Filter Pruner](#activationmeanrankfilter-pruner) +* [Taylor FO On Weight Pruner](#taylorfoweightfilter-pruner) **Pruning Schedule** * [AGP Pruner](#agp-pruner) @@ -27,7 +27,7 @@ We provide several pruning algorithms that support fine-grained weight pruning a ## Level Pruner -This is one basic one-shot pruner: you can set a target sparsity level (expressed as a fraction, 0.6 means we will prune 60%). +This is one basic one-shot pruner: you can set a target sparsity level (expressed as a fraction, 0.6 means we will prune 60% of the weight parameters). We first sort the weights in the specified layer by their absolute values. And then mask to zero the smallest magnitude weights until the desired sparsity level is reached. @@ -50,9 +50,19 @@ pruner.compress() ``` #### User configuration for Level Pruner -* **sparsity:** This is to specify the sparsity operations to be compressed to -*** +##### PyTorch + +```eval_rst +.. autoclass:: nni.compression.torch.LevelPruner +``` + +##### Tensorflow + +```eval_rst +.. autoclass:: nni.compression.tensorflow.LevelPruner +``` + ## Slim Pruner @@ -75,8 +85,11 @@ pruner.compress() #### User configuration for Slim Pruner -- **sparsity:** This is to specify the sparsity operations to be compressed to -- **op_types:** Only BatchNorm2d is supported in Slim Pruner +##### PyTorch + +```eval_rst +.. autoclass:: nni.compression.torch.SlimPruner +``` ### Reproduced Experiment @@ -95,7 +108,7 @@ The experiments code can be found at [examples/model_compress]( https://github.c This is an one-shot pruner, FPGM Pruner is an implementation of paper [Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration](https://arxiv.org/pdf/1811.00250.pdf) -FPGMPruner prune filters with the smallest geometric median +FPGMPruner prune filters with the smallest geometric median. ![](../../img/fpgm_fig1.png) @@ -113,6 +126,7 @@ config_list = [{ pruner = FPGMPruner(model, config_list) pruner.compress() ``` + PyTorch code ```python from nni.compression.torch import FPGMPruner @@ -125,10 +139,16 @@ pruner.compress() ``` #### User configuration for FPGM Pruner -- **sparsity:** How much percentage of convolutional filters are to be pruned. -- **op_types:** Only Conv2d is supported in L1Filter Pruner -*** +##### PyTorch +```eval_rst +.. autoclass:: nni.compression.torch.FPGMPruner +``` + +##### Tensorflow +```eval_rst +.. autoclass:: nni.compression.tensorflow.FPGMPruner +``` ## L1Filter Pruner @@ -161,8 +181,10 @@ pruner.compress() #### User configuration for L1Filter Pruner -- **sparsity:** This is to specify the sparsity operations to be compressed to -- **op_types:** Only Conv2d is supported in L1Filter Pruner +##### PyTorch +```eval_rst +.. autoclass:: nni.compression.torch.L1FilterPruner +``` ### Reproduced Experiment @@ -194,14 +216,15 @@ pruner.compress() ### User configuration for L2Filter Pruner -- **sparsity:** This is to specify the sparsity operations to be compressed to -- **op_types:** Only Conv2d is supported in L2Filter Pruner - +##### PyTorch +```eval_rst +.. autoclass:: nni.compression.torch.L2FilterPruner +``` *** -## ActivationAPoZRankFilterPruner +## ActivationAPoZRankFilter Pruner -ActivationAPoZRankFilterPruner is a pruner which prunes the filters with the smallest importance criterion `APoZ` calculated from the output activations of convolution layers to achieve a preset level of network sparsity. The pruning criterion `APoZ` is explained in the paper [Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures](https://arxiv.org/abs/1607.03250). +ActivationAPoZRankFilter Pruner is a pruner which prunes the filters with the smallest importance criterion `APoZ` calculated from the output activations of convolution layers to achieve a preset level of network sparsity. The pruning criterion `APoZ` is explained in the paper [Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures](https://arxiv.org/abs/1607.03250). The APoZ is defined as: @@ -223,16 +246,18 @@ pruner.compress() Note: ActivationAPoZRankFilterPruner is used to prune convolutional layers within deep neural networks, therefore the `op_types` field supports only convolutional layers. -You can view example for more information - -### User configuration for ActivationAPoZRankFilterPruner +You can view [example](https://github.com/microsoft/nni/blob/master/examples/model_compress/model_prune_torch.py) for more information. -- **sparsity:** How much percentage of convolutional filters are to be pruned. -- **op_types:** Only Conv2d is supported in ActivationAPoZRankFilterPruner +### User configuration for ActivationAPoZRankFilter Pruner +##### PyTorch +```eval_rst +.. autoclass:: nni.compression.torch.ActivationAPoZRankFilterPruner +``` *** -## ActivationMeanRankFilterPruner + +## ActivationMeanRankFilter Pruner ActivationMeanRankFilterPruner is a pruner which prunes the filters with the smallest importance criterion `mean activation` calculated from the output activations of convolution layers to achieve a preset level of network sparsity. The pruning criterion `mean activation` is explained in section 2.2 of the paper[Pruning Convolutional Neural Networks for Resource Efficient Inference](https://arxiv.org/abs/1611.06440). Other pruning criteria mentioned in this paper will be supported in future release. @@ -252,18 +277,19 @@ pruner.compress() Note: ActivationMeanRankFilterPruner is used to prune convolutional layers within deep neural networks, therefore the `op_types` field supports only convolutional layers. -You can view example for more information +You can view [example](https://github.com/microsoft/nni/blob/master/examples/model_compress/model_prune_torch.py) for more information. ### User configuration for ActivationMeanRankFilterPruner -- **sparsity:** How much percentage of convolutional filters are to be pruned. -- **op_types:** Only Conv2d is supported in ActivationMeanRankFilterPruner. - +##### PyTorch +```eval_rst +.. autoclass:: nni.compression.torch.ActivationMeanRankFilterPruner +``` *** -## TaylorFOWeightFilterPruner +## TaylorFOWeightFilter Pruner -TaylorFOWeightFilterPruner is a pruner which prunes convolutional layers based on estimated importance calculated from the first order taylor expansion on weights to achieve a preset level of network sparsity. The estimated importance of filters is defined as the paper [Importance Estimation for Neural Network Pruning](http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf). Other pruning criteria mentioned in this paper will be supported in future release. +TaylorFOWeightFilter Pruner is a pruner which prunes convolutional layers based on estimated importance calculated from the first order taylor expansion on weights to achieve a preset level of network sparsity. The estimated importance of filters is defined as the paper [Importance Estimation for Neural Network Pruning](http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf). Other pruning criteria mentioned in this paper will be supported in future release. > @@ -283,28 +309,32 @@ pruner = TaylorFOWeightFilterPruner(model, config_list, statistics_batch_num=1) pruner.compress() ``` -You can view example for more information - -### User configuration for TaylorFOWeightFilterPruner -- **sparsity:** How much percentage of convolutional filters are to be pruned. -- **op_types:** Currently only Conv2d is supported in TaylorFOWeightFilterPruner. +#### User configuration for TaylorFOWeightFilter Pruner +##### PyTorch +```eval_rst +.. autoclass:: nni.compression.torch.TaylorFOWeightFilterPruner +``` *** + ## AGP Pruner + This is an iterative pruner, In [To prune, or not to prune: exploring the efficacy of pruning for model compression](https://arxiv.org/abs/1710.01878), authors Michael Zhu and Suyog Gupta provide an algorithm to prune the weight gradually. >We introduce a new automated gradual pruning algorithm in which the sparsity is increased from an initial sparsity value si (usually 0) to a final sparsity value sf over a span of n pruning steps, starting at training step t0 and with pruning frequency ∆t: ![](../../img/agp_pruner.png) ->The binary weight masks are updated every ∆t steps as the network is trained to gradually increase the sparsity of the network while allowing the network training steps to recover from any pruning-induced loss in accuracy. In our experience, varying the pruning frequency ∆t between 100 and 1000 training steps had a negligible impact on the final model quality. Once the model achieves the target sparsity sf , the weight masks are no longer updated. The intuition behind this sparsity function in equation + +>The binary weight masks are updated every ∆t steps as the network is trained to gradually increase the sparsity of the network while allowing the network training steps to recover from any pruning-induced loss in accuracy. In our experience, varying the pruning frequency ∆t between 100 and 1000 training steps had a negligible impact on the final model quality. Once the model achieves the target sparsity sf , the weight masks are no longer updated. The intuition behind this sparsity function in equation (1). ### Usage + You can prune all weight from 0% to 80% sparsity in 10 epoch with the code below. PyTorch code ```python -from nni.compression.torch import AGP_Pruner +from nni.compression.torch import AGPPruner config_list = [{ 'initial_sparsity': 0, 'final_sparsity': 0.8, @@ -322,7 +352,7 @@ config_list = [{ # optimizer.step(), so an optimizer is required to prune the model. optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4) -pruner = AGP_Pruner(model, config_list, optimizer, pruning_algorithm='level') +pruner = AGPPruner(model, config_list, optimizer, pruning_algorithm='level') pruner.compress() ``` @@ -342,14 +372,21 @@ PyTorch code ```python pruner.update_epoch(epoch) ``` -You can view example for more information +You can view [example](https://github.com/microsoft/nni/blob/master/examples/model_compress/model_prune_torch.py) for more information. #### User configuration for AGP Pruner -* **initial_sparsity:** This is to specify the sparsity when compressor starts to compress -* **final_sparsity:** This is to specify the sparsity when compressor finishes to compress -* **start_epoch:** This is to specify the epoch number when compressor starts to compress, default start from epoch 0 -* **end_epoch:** This is to specify the epoch number when compressor finishes to compress -* **frequency:** This is to specify every *frequency* number epochs compressor compress once, default frequency=1 + +##### PyTorch + +```eval_rst +.. autoclass:: nni.compression.torch.AGPPruner +``` + +##### Tensorflow + +```eval_rst +.. autoclass:: nni.compression.tensorflow.AGPPruner +``` *** @@ -379,52 +416,11 @@ You can view [example](https://github.com/microsoft/nni/blob/master/examples/mod #### User configuration for NetAdapt Pruner -- **sparsity:** The target overall sparsity. -- **op_types:** The operation type to prune. If `base_algo` is `l1` or `l2`, then only `Conv2d` is supported as `op_types`. -- **short_term_fine_tuner:** Function to short-term fine tune the masked model. -This function should include `model` as the only parameter, and fine tune the model for a short term after each pruning iteration. - - Example: - ```python - >>> def short_term_fine_tuner(model, epoch=3): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> train_loader = ... - >>> criterion = torch.nn.CrossEntropyLoss() - >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.01) - >>> model.train() - >>> for _ in range(epoch): - >>> for batch_idx, (data, target) in enumerate(train_loader): - >>> data, target = data.to(device), target.to(device) - >>> optimizer.zero_grad() - >>> output = model(data) - >>> loss = criterion(output, target) - >>> loss.backward() - >>> optimizer.step() - ``` -- **evaluator:** Function to evaluate the masked model. This function should include `model` as the only parameter, and returns a scalar value. - - Example:: - ```python - >>> def evaluator(model): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> val_loader = ... - >>> model.eval() - >>> correct = 0 - >>> with torch.no_grad(): - >>> for data, target in val_loader: - >>> data, target = data.to(device), target.to(device) - >>> output = model(data) - >>> # get the index of the max log-probability - >>> pred = output.argmax(dim=1, keepdim=True) - >>> correct += pred.eq(target.view_as(pred)).sum().item() - >>> accuracy = correct / len(val_loader.dataset) - >>> return accuracy - ``` -- **optimize_mode:** Optimize mode, `maximize` or `minimize`, by default `maximize`. -- **base_algo:** Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. -Given the sparsity distribution among the ops, the assigned `base_algo` is used to decide which filters/channels/weights to prune. -- **sparsity_per_iteration:** The sparsity to prune in each iteration. NetAdapt Pruner prune the model by the same level in each iteration to meet the resource budget progressively. -- **experiment_data_dir:** PATH to save experiment data, including the config_list generated for the base pruning algorithm and the performance of the pruned model. +##### PyTorch + +```eval_rst +.. autoclass:: nni.compression.torch.NetAdaptPruner +``` ## SimulatedAnnealing Pruner @@ -459,39 +455,16 @@ You can view [example](https://github.com/microsoft/nni/blob/master/examples/mod #### User configuration for SimulatedAnnealing Pruner -- **sparsity:** The target overall sparsity. -- **op_types:** The operation type to prune. If `base_algo` is `l1` or `l2`, then only `Conv2d` is supported as `op_types`. -- **evaluator:** Function to evaluate the masked model. This function should include `model` as the only parameter, and returns a scalar value. - Example:: - ```python - >>> def evaluator(model): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> val_loader = ... - >>> model.eval() - >>> correct = 0 - >>> with torch.no_grad(): - >>> for data, target in val_loader: - >>> data, target = data.to(device), target.to(device) - >>> output = model(data) - >>> # get the index of the max log-probability - >>> pred = output.argmax(dim=1, keepdim=True) - >>> correct += pred.eq(target.view_as(pred)).sum().item() - >>> accuracy = correct / len(val_loader.dataset) - >>> return accuracy - ``` -- **optimize_mode:** Optimize mode, `maximize` or `minimize`, by default `maximize`. -- **base_algo:** Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. -Given the sparsity distribution among the ops, the assigned `base_algo` is used to decide which filters/channels/weights to prune. -- **start_temperature:** Simualated Annealing related parameter. -- **stop_temperature:** Simualated Annealing related parameter. -- **cool_down_rate:** Simualated Annealing related parameter. -- **perturbation_magnitude:** Initial perturbation magnitude to the sparsities. The magnitude decreases with current temperature. -- **experiment_data_dir:** PATH to save experiment data, including the config_list generated for the base pruning algorithm, the performance of the pruned model and the pruning history. - +##### PyTorch + +```eval_rst +.. autoclass:: nni.compression.torch.SimulatedAnnealingPruner +``` + ## AutoCompress Pruner For each round, AutoCompressPruner prune the model for the same sparsity to achive the overall sparsity: - 1. Generate sparsities distribution using SimualtedAnnealingPruner + 1. Generate sparsities distribution using SimulatedAnnealingPruner 2. Perform ADMM-based structured pruning to generate pruning result for the next round. Here we use `speedup` to perform real pruning. @@ -518,59 +491,11 @@ You can view [example](https://github.com/microsoft/nni/blob/master/examples/mod #### User configuration for AutoCompress Pruner -- **sparsity:** The target overall sparsity. -- **op_types:** The operation type to prune. If `base_algo` is `l1` or `l2`, then only `Conv2d` is supported as `op_types`. -- **trainer:** Function used for the first subproblem. -Users should write this function as a normal function to train the Pytorch model and include `model, optimizer, criterion, epoch, callback` as function arguments. -Here `callback` acts as an L2 regulizer as presented in the formula (7) of the original paper. -The logic of `callback` is implemented inside the Pruner, users are just required to insert `callback()` between `loss.backward()` and `optimizer.step()`. - Example: - ```python - >>> def trainer(model, criterion, optimizer, epoch, callback): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> train_loader = ... - >>> model.train() - >>> for batch_idx, (data, target) in enumerate(train_loader): - >>> data, target = data.to(device), target.to(device) - >>> optimizer.zero_grad() - >>> output = model(data) - >>> loss = criterion(output, target) - >>> loss.backward() - >>> # callback should be inserted between loss.backward() and optimizer.step() - >>> if callback: - >>> callback() - >>> optimizer.step() - ``` -- **evaluator:** Function to evaluate the masked model. This function should include `model` as the only parameter, and returns a scalar value. - Example:: - ```python - >>> def evaluator(model): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> val_loader = ... - >>> model.eval() - >>> correct = 0 - >>> with torch.no_grad(): - >>> for data, target in val_loader: - >>> data, target = data.to(device), target.to(device) - >>> output = model(data) - >>> # get the index of the max log-probability - >>> pred = output.argmax(dim=1, keepdim=True) - >>> correct += pred.eq(target.view_as(pred)).sum().item() - >>> accuracy = correct / len(val_loader.dataset) - >>> return accuracy - ``` -- **dummy_input:** The dummy input for model speed up, users should put it on right device before pass in. -- **iterations:** The number of overall iterations. -- **optimize_mode:** Optimize mode, `maximize` or `minimize`, by default `maximize`. -- **base_algo:** Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. -Given the sparsity distribution among the ops, the assigned `base_algo` is used to decide which filters/channels/weights to prune. -- **start_temperature:** Simualated Annealing related parameter. -- **stop_temperature:** Simualated Annealing related parameter. -- **cool_down_rate:** Simualated Annealing related parameter. -- **perturbation_magnitude:** Initial perturbation magnitude to the sparsities. The magnitude decreases with current temperature. -- **admm_num_iterations:** Number of iterations of ADMM Pruner. -- **admm_training_epochs:** Training epochs of the first optimization subproblem of ADMMPruner. -- **experiment_data_dir:** PATH to store temporary experiment data. +##### PyTorch + +```eval_rst +.. autoclass:: nni.compression.torch.AutoCompressPruner +``` ## ADMM Pruner @@ -606,35 +531,11 @@ You can view [example](https://github.com/microsoft/nni/blob/master/examples/mod #### User configuration for ADMM Pruner -- **sparsity:** This is to specify the sparsity operations to be compressed to. -- **op_types:** The operation type to prune. If `base_algo` is `l1` or `l2`, then only `Conv2d` is supported as `op_types`. -- **trainer:** Function used for the first subproblem in ADMM optimization, attention, this is not used for fine-tuning. -Users should write this function as a normal function to train the Pytorch model and include `model, optimizer, criterion, epoch, callback` as function arguments. -Here `callback` acts as an L2 regulizer as presented in the formula (7) of the original paper. -The logic of `callback` is implemented inside the Pruner, users are just required to insert `callback()` between `loss.backward()` and `optimizer.step()`. - - Example: - ```python - >>> def trainer(model, criterion, optimizer, epoch, callback): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> train_loader = ... - >>> model.train() - >>> for batch_idx, (data, target) in enumerate(train_loader): - >>> data, target = data.to(device), target.to(device) - >>> optimizer.zero_grad() - >>> output = model(data) - >>> loss = criterion(output, target) - >>> loss.backward() - >>> # callback should be inserted between loss.backward() and optimizer.step() - >>> if callback: - >>> callback() - >>> optimizer.step() - ``` -- **num_iterations:** Total number of iterations. -- **training_epochs:** Training epochs of the first subproblem. -- **row:** Penalty parameters for ADMM training. -- **base_algo:** Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. -Given the sparsity distribution among the ops, the assigned `base_algo` is used to decide which filters/channels/weights to prune. +##### PyTorch + +```eval_rst +.. autoclass:: nni.compression.torch.ADMMPruner +``` ## Lottery Ticket Hypothesis @@ -672,10 +573,13 @@ The above configuration means that there are 5 times of iterative pruning. As th *Tensorflow version will be supported later.* -#### User configuration for LotteryTicketPruner +#### User configuration for LotteryTicket Pruner -* **prune_iterations:** The number of rounds for the iterative pruning, i.e., the number of iterative pruning. -* **sparsity:** The final sparsity when the compression is done. +##### PyTorch + +```eval_rst +.. autoclass:: nni.compression.torch.LotteryTicketPruner +``` ### Reproduced Experiment diff --git a/docs/img/agp_pruner.png b/docs/img/agp_pruner.png index 889f42e764..98e991a0f9 100644 Binary files a/docs/img/agp_pruner.png and b/docs/img/agp_pruner.png differ diff --git a/examples/model_compress/README.md b/examples/model_compress/README.md index 34094352a5..78fe8672f3 100644 --- a/examples/model_compress/README.md +++ b/examples/model_compress/README.md @@ -22,7 +22,7 @@ configure_list = [{ 'frequency': 1, 'op_types': ['default'] }] -pruner = AGP_Pruner(configure_list) +pruner = AGPPruner(configure_list) ``` When ```pruner(model)``` is called, your model is injected with masks as embedded operations. For example, a layer takes a weight as input, we will insert an operation between the weight and the layer, this operation takes the weight as input and outputs a new weight applied by the mask. Thus, the masks are applied at any time the computation goes through the operations. You can fine-tune your model **without** any modifications. diff --git a/examples/model_compress/model_prune_torch.py b/examples/model_compress/model_prune_torch.py index 94eace4cbf..9129509ae7 100644 --- a/examples/model_compress/model_prune_torch.py +++ b/examples/model_compress/model_prune_torch.py @@ -10,7 +10,7 @@ from models.cifar10.vgg import VGG import nni from nni.compression.torch import LevelPruner, SlimPruner, FPGMPruner, L1FilterPruner, \ - L2FilterPruner, AGP_Pruner, ActivationMeanRankFilterPruner, ActivationAPoZRankFilterPruner + L2FilterPruner, AGPPruner, ActivationMeanRankFilterPruner, ActivationAPoZRankFilterPruner prune_config = { 'level': { @@ -25,7 +25,7 @@ 'agp': { 'dataset_name': 'mnist', 'model_name': 'naive', - 'pruner_class': AGP_Pruner, + 'pruner_class': AGPPruner, 'config_list': [{ 'initial_sparsity': 0., 'final_sparsity': 0.8, diff --git a/src/sdk/pynni/nni/compression/tensorflow/builtin_pruners.py b/src/sdk/pynni/nni/compression/tensorflow/builtin_pruners.py index 9ff3d71b92..89ea1a722d 100644 --- a/src/sdk/pynni/nni/compression/tensorflow/builtin_pruners.py +++ b/src/sdk/pynni/nni/compression/tensorflow/builtin_pruners.py @@ -6,17 +6,23 @@ import tensorflow as tf from .compressor import Pruner -__all__ = ['LevelPruner', 'AGP_Pruner', 'FPGMPruner'] +__all__ = ['LevelPruner', 'AGPPruner', 'FPGMPruner'] _logger = logging.getLogger(__name__) class LevelPruner(Pruner): + """ + Parameters + ---------- + model : tensorflow model + Model to be pruned + config_list : list + Supported keys: + - sparsity : This is to specify the sparsity operations to be compressed to. + - op_types : Operation types to prune. + """ def __init__(self, model, config_list): - """ - config_list: supported keys: - - sparsity - """ super().__init__(model, config_list) self.mask_list = {} self.if_init_list = {} @@ -34,24 +40,22 @@ def calc_mask(self, layer, config): return mask -class AGP_Pruner(Pruner): - """An automated gradual pruning algorithm that prunes the smallest magnitude - weights to achieve a preset level of network sparsity. - Michael Zhu and Suyog Gupta, "To prune, or not to prune: exploring the - efficacy of pruning for model compression", 2017 NIPS Workshop on Machine - Learning of Phones and other Consumer Devices, - https://arxiv.org/pdf/1710.01878.pdf +class AGPPruner(Pruner): + """ + Parameters + ---------- + model : torch.nn.Module + Model to be pruned. + config_list : listlist + Supported keys: + - initial_sparsity: This is to specify the sparsity when compressor starts to compress. + - final_sparsity: This is to specify the sparsity when compressor finishes to compress. + - start_epoch: This is to specify the epoch number when compressor starts to compress, default start from epoch 0. + - end_epoch: This is to specify the epoch number when compressor finishes to compress. + - frequency: This is to specify every *frequency* number epochs compressor compress once, default frequency=1. """ def __init__(self, model, config_list): - """ - config_list: supported keys: - - initial_sparsity - - final_sparsity: you should make sure initial_sparsity <= final_sparsity - - start_epoch: start epoch numer begin update mask - - end_epoch: end epoch number stop update mask - - frequency: if you want update every 2 epoch, you can set it 2 - """ super().__init__(model, config_list) self.mask_list = {} self.if_init_list = {} @@ -102,23 +106,19 @@ def update_epoch(self, epoch, sess): for k in self.if_init_list: self.if_init_list[k] = True + class FPGMPruner(Pruner): """ - A filter pruner via geometric median. - "Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration", - https://arxiv.org/pdf/1811.00250.pdf + Parameters + ---------- + model : tensorflow model + Model to be pruned + config_list : list + Supported keys: + - sparsity : percentage of convolutional filters to be pruned. + - op_types : Only Conv2d is supported in FPGM Pruner. """ - def __init__(self, model, config_list): - """ - Parameters - ---------- - model : pytorch model - the model user wants to compress - config_list: list - support key for each list item: - - sparsity: percentage of convolutional filters to be pruned. - """ super().__init__(model, config_list) self.mask_dict = {} self.assign_handler = [] diff --git a/src/sdk/pynni/nni/compression/torch/pruning/admm_pruner.py b/src/sdk/pynni/nni/compression/torch/pruning/admm_pruner.py index 381ad4783b..ae1a9ec9dc 100644 --- a/src/sdk/pynni/nni/compression/torch/pruning/admm_pruner.py +++ b/src/sdk/pynni/nni/compression/torch/pruning/admm_pruner.py @@ -15,58 +15,50 @@ class ADMMPruner(OneshotPruner): """ - This is a Pytorch implementation of ADMM Pruner algorithm. + A Pytorch implementation of ADMM Pruner algorithm. + + Parameters + ---------- + model : torch.nn.Module + Model to be pruned. + config_list : list + List on pruning configs. + trainer : function + Function used for the first subproblem. + Users should write this function as a normal function to train the Pytorch model + and include `model, optimizer, criterion, epoch, callback` as function arguments. + Here `callback` acts as an L2 regulizer as presented in the formula (7) of the original paper. + The logic of `callback` is implemented inside the Pruner, + users are just required to insert `callback()` between `loss.backward()` and `optimizer.step()`. + Example:: + + def trainer(model, criterion, optimizer, epoch, callback): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + train_loader = ... + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = criterion(output, target) + loss.backward() + # callback should be inserted between loss.backward() and optimizer.step() + if callback: + callback() + optimizer.step() + num_iterations : int + Total number of iterations. + training_epochs : int + Training epochs of the first subproblem. + row : float + Penalty parameters for ADMM training. + base_algo : str + Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops, + the assigned `base_algo` is used to decide which filters/channels/weights to prune. - Alternating Direction Method of Multipliers (ADMM) is a mathematical optimization technique, - by decomposing the original nonconvex problem into two subproblems that can be solved iteratively. - In weight pruning problem, these two subproblems are solved via 1) gradient descent algorithm and 2) Euclidean projection respectively. - This solution framework applies both to non-structured and different variations of structured pruning schemes. - - For more details, please refer to the paper: https://arxiv.org/abs/1804.03294. """ def __init__(self, model, config_list, trainer, num_iterations=30, training_epochs=5, row=1e-4, base_algo='l1'): - """ - Parameters - ---------- - model : torch.nn.module - Model to be pruned - config_list : list - List on pruning configs - trainer : function - Function used for the first subproblem. - Users should write this function as a normal function to train the Pytorch model - and include `model, optimizer, criterion, epoch, callback` as function arguments. - Here `callback` acts as an L2 regulizer as presented in the formula (7) of the original paper. - The logic of `callback` is implemented inside the Pruner, - users are just required to insert `callback()` between `loss.backward()` and `optimizer.step()`. - Example:: - ``` - >>> def trainer(model, criterion, optimizer, epoch, callback): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> train_loader = ... - >>> model.train() - >>> for batch_idx, (data, target) in enumerate(train_loader): - >>> data, target = data.to(device), target.to(device) - >>> optimizer.zero_grad() - >>> output = model(data) - >>> loss = criterion(output, target) - >>> loss.backward() - >>> # callback should be inserted between loss.backward() and optimizer.step() - >>> if callback: - >>> callback() - >>> optimizer.step() - ``` - num_iterations : int - Total number of iterations. - training_epochs : int - Training epochs of the first subproblem. - row : float - Penalty parameters for ADMM training. - base_algo : str - Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops, - the assigned `base_algo` is used to decide which filters/channels/weights to prune. - """ self._base_algo = base_algo super().__init__(model, config_list) @@ -83,7 +75,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list List on pruning configs diff --git a/src/sdk/pynni/nni/compression/torch/pruning/agp.py b/src/sdk/pynni/nni/compression/torch/pruning/agp.py index 2108689eee..3e34ba5aef 100644 --- a/src/sdk/pynni/nni/compression/torch/pruning/agp.py +++ b/src/sdk/pynni/nni/compression/torch/pruning/agp.py @@ -1,6 +1,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +""" +An automated gradual pruning algorithm that prunes the smallest magnitude +weights to achieve a preset level of network sparsity. +Michael Zhu and Suyog Gupta, "To prune, or not to prune: exploring the +efficacy of pruning for model compression", 2017 NIPS Workshop on Machine +Learning of Phones and other Consumer Devices. +""" + import logging import torch from schema import And, Optional @@ -8,34 +16,31 @@ from ..utils.config_validation import CompressorSchema from ..compressor import Pruner -__all__ = ['AGP_Pruner'] +__all__ = ['AGPPruner'] logger = logging.getLogger('torch pruner') -class AGP_Pruner(Pruner): +class AGPPruner(Pruner): """ - An automated gradual pruning algorithm that prunes the smallest magnitude - weights to achieve a preset level of network sparsity. - Michael Zhu and Suyog Gupta, "To prune, or not to prune: exploring the - efficacy of pruning for model compression", 2017 NIPS Workshop on Machine - Learning of Phones and other Consumer Devices, - https://arxiv.org/pdf/1710.01878.pdf + Parameters + ---------- + model : torch.nn.Module + Model to be pruned. + config_list : listlist + Supported keys: + - initial_sparsity: This is to specify the sparsity when compressor starts to compress. + - final_sparsity: This is to specify the sparsity when compressor finishes to compress. + - start_epoch: This is to specify the epoch number when compressor starts to compress, default start from epoch 0. + - end_epoch: This is to specify the epoch number when compressor finishes to compress. + - frequency: This is to specify every *frequency* number epochs compressor compress once, default frequency=1. + optimizer: torch.optim.Optimizer + Optimizer used to train model. + pruning_algorithm: str + Algorithms being used to prune model, + choose from `['level', 'slim', 'l1', 'l2', 'fpgm', 'taylorfo', 'apoz', 'mean_activation']`, by default `level` """ def __init__(self, model, config_list, optimizer, pruning_algorithm='level'): - """ - Parameters - ---------- - model : torch.nn.module - Model to be pruned - config_list : list - List on pruning configs - optimizer: torch.optim.Optimizer - Optimizer used to train model - pruning_algorithm: str - algorithms being used to prune model - """ - super().__init__(model, config_list, optimizer) assert isinstance(optimizer, torch.optim.Optimizer), "AGP pruner is an iterative pruner, please pass optimizer of the model to it" self.masker = MASKER_DICT[pruning_algorithm](model, self) @@ -47,7 +52,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list List on pruning configs diff --git a/src/sdk/pynni/nni/compression/torch/pruning/apply_compression.py b/src/sdk/pynni/nni/compression/torch/pruning/apply_compression.py index 315a8579b7..8e6b023f5b 100644 --- a/src/sdk/pynni/nni/compression/torch/pruning/apply_compression.py +++ b/src/sdk/pynni/nni/compression/torch/pruning/apply_compression.py @@ -14,7 +14,7 @@ def apply_compression_results(model, masks_file, map_location=None): Parameters ---------- - model : torch.nn.module + model : torch.nn.Module The model to be compressed masks_file : str The path of the mask file diff --git a/src/sdk/pynni/nni/compression/torch/pruning/auto_compress_pruner.py b/src/sdk/pynni/nni/compression/torch/pruning/auto_compress_pruner.py index 37e9cd1f9b..24db9f2f88 100644 --- a/src/sdk/pynni/nni/compression/torch/pruning/auto_compress_pruner.py +++ b/src/sdk/pynni/nni/compression/torch/pruning/auto_compress_pruner.py @@ -21,14 +21,83 @@ class AutoCompressPruner(Pruner): """ - This is a Pytorch implementation of AutoCompress pruning algorithm. - - For each round, AutoCompressPruner prune the model for the same sparsity to achive the ovrall sparsity: - 1. Generate sparsities distribution using SimualtedAnnealingPruner - 2. Perform ADMM-based structured pruning to generate pruning result for the next round. - Here we use 'speedup' to perform real pruning. - - For more details, please refer to the paper: https://arxiv.org/abs/1907.03141. + A Pytorch implementation of AutoCompress pruning algorithm. + + Parameters + ---------- + model : pytorch model + The model to be pruned. + config_list : list + Supported keys: + - sparsity : The target overall sparsity. + - op_types : The operation type to prune. + trainer : function + Function used for the first subproblem of ADMM Pruner. + Users should write this function as a normal function to train the Pytorch model + and include `model, optimizer, criterion, epoch, callback` as function arguments. + Here `callback` acts as an L2 regulizer as presented in the formula (7) of the original paper. + The logic of `callback` is implemented inside the Pruner, + users are just required to insert `callback()` between `loss.backward()` and `optimizer.step()`. + Example:: + + def trainer(model, criterion, optimizer, epoch, callback): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + train_loader = ... + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = criterion(output, target) + loss.backward() + # callback should be inserted between loss.backward() and optimizer.step() + if callback: + callback() + optimizer.step() + evaluator : function + function to evaluate the pruned model. + This function should include `model` as the only parameter, and returns a scalar value. + Example:: + + def evaluator(model): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + val_loader = ... + model.eval() + correct = 0 + with torch.no_grad(): + for data, target in val_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + accuracy = correct / len(val_loader.dataset) + return accuracy + dummy_input : pytorch tensor + The dummy input for ```jit.trace```, users should put it on right device before pass in. + num_iterations : int + Number of overall iterations. + optimize_mode : str + optimize mode, `maximize` or `minimize`, by default `maximize`. + base_algo : str + Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops, + the assigned `base_algo` is used to decide which filters/channels/weights to prune. + start_temperature : float + Start temperature of the simulated annealing process. + stop_temperature : float + Stop temperature of the simulated annealing process. + cool_down_rate : float + Cool down rate of the temperature. + perturbation_magnitude : float + Initial perturbation magnitude to the sparsities. The magnitude decreases with current temperature. + admm_num_iterations : int + Number of iterations of ADMM Pruner. + admm_training_epochs : int + Training epochs of the first optimization subproblem of ADMMPruner. + row : float + Penalty parameters for ADMM training. + experiment_data_dir : string + PATH to store temporary experiment data. """ def __init__(self, model, config_list, trainer, evaluator, dummy_input, @@ -38,83 +107,6 @@ def __init__(self, model, config_list, trainer, evaluator, dummy_input, # ADMM related admm_num_iterations=30, admm_training_epochs=5, row=1e-4, experiment_data_dir='./'): - """ - Parameters - ---------- - model : pytorch model - The model to be pruned - config_list : list - Supported keys: - - sparsity : The target overall sparsity. - - op_types : The operation type to prune. - trainer : function - Function used for the first subproblem of ADMM Pruner. - Users should write this function as a normal function to train the Pytorch model - and include `model, optimizer, criterion, epoch, callback` as function arguments. - Here `callback` acts as an L2 regulizer as presented in the formula (7) of the original paper. - The logic of `callback` is implemented inside the Pruner, - users are just required to insert `callback()` between `loss.backward()` and `optimizer.step()`. - Example:: - ``` - >>> def trainer(model, criterion, optimizer, epoch, callback): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> train_loader = ... - >>> model.train() - >>> for batch_idx, (data, target) in enumerate(train_loader): - >>> data, target = data.to(device), target.to(device) - >>> optimizer.zero_grad() - >>> output = model(data) - >>> loss = criterion(output, target) - >>> loss.backward() - >>> # callback should be inserted between loss.backward() and optimizer.step() - >>> if callback: - >>> callback() - >>> optimizer.step() - ``` - evaluator : function - function to evaluate the pruned model. - This function should include `model` as the only parameter, and returns a scalar value. - Example:: - >>> def evaluator(model): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> val_loader = ... - >>> model.eval() - >>> correct = 0 - >>> with torch.no_grad(): - >>> for data, target in val_loader: - >>> data, target = data.to(device), target.to(device) - >>> output = model(data) - >>> # get the index of the max log-probability - >>> pred = output.argmax(dim=1, keepdim=True) - >>> correct += pred.eq(target.view_as(pred)).sum().item() - >>> accuracy = correct / len(val_loader.dataset) - >>> return accuracy - dummy_input : pytorch tensor - The dummy input for ```jit.trace```, users should put it on right device before pass in - num_iterations : int - Number of overall iterations - optimize_mode : str - optimize mode, `maximize` or `minimize`, by default `maximize` - base_algo : str - Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops, - the assigned `base_algo` is used to decide which filters/channels/weights to prune. - start_temperature : float - Simualated Annealing related parameter - stop_temperature : float - Simualated Annealing related parameter - cool_down_rate : float - Simualated Annealing related parameter - perturbation_magnitude : float - Initial perturbation magnitude to the sparsities. The magnitude decreases with current temperature - admm_num_iterations : int - Number of iterations of ADMM Pruner - admm_training_epochs : int - Training epochs of the first optimization subproblem of ADMMPruner - row : float - Penalty parameters for ADMM training - experiment_data_dir : string - PATH to store temporary experiment data - """ # original model self._model_to_prune = model self._base_algo = base_algo @@ -147,7 +139,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list List on pruning configs diff --git a/src/sdk/pynni/nni/compression/torch/pruning/lottery_ticket.py b/src/sdk/pynni/nni/compression/torch/pruning/lottery_ticket.py index ace2b36154..3a05217753 100644 --- a/src/sdk/pynni/nni/compression/torch/pruning/lottery_ticket.py +++ b/src/sdk/pynni/nni/compression/torch/pruning/lottery_ticket.py @@ -13,33 +13,22 @@ class LotteryTicketPruner(Pruner): """ - This is a Pytorch implementation of the paper "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks", - following NNI model compression interface. - - 1. Randomly initialize a neural network f(x;theta_0) (where theta_0 follows D_{theta}). - 2. Train the network for j iterations, arriving at parameters theta_j. - 3. Prune p% of the parameters in theta_j, creating a mask m. - 4. Reset the remaining parameters to their values in theta_0, creating the winning ticket f(x;m*theta_0). - 5. Repeat step 2, 3, and 4. + Parameters + ---------- + model : pytorch model + The model to be pruned + config_list : list + Supported keys: + - prune_iterations : The number of rounds for the iterative pruning. + - sparsity : The final sparsity when the compression is done. + optimizer : pytorch optimizer + The optimizer for the model + lr_scheduler : pytorch lr scheduler + The lr scheduler for the model if used + reset_weights : bool + Whether reset weights and optimizer at the beginning of each round. """ - def __init__(self, model, config_list, optimizer=None, lr_scheduler=None, reset_weights=True): - """ - Parameters - ---------- - model : pytorch model - The model to be pruned - config_list : list - Supported keys: - - prune_iterations : The number of rounds for the iterative pruning. - - sparsity : The final sparsity when the compression is done. - optimizer : pytorch optimizer - The optimizer for the model - lr_scheduler : pytorch lr scheduler - The lr scheduler for the model if used - reset_weights : bool - Whether reset weights and optimizer at the beginning of each round. - """ # save init weights and optimizer self.reset_weights = reset_weights if self.reset_weights: @@ -60,7 +49,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list Supported keys: diff --git a/src/sdk/pynni/nni/compression/torch/pruning/net_adapt_pruner.py b/src/sdk/pynni/nni/compression/torch/pruning/net_adapt_pruner.py index 47c39e9f41..81d0c47d14 100644 --- a/src/sdk/pynni/nni/compression/torch/pruning/net_adapt_pruner.py +++ b/src/sdk/pynni/nni/compression/torch/pruning/net_adapt_pruner.py @@ -21,80 +21,69 @@ class NetAdaptPruner(Pruner): """ - This is a Pytorch implementation of NetAdapt compression algorithm. - - The pruning procedure can be described as follows: - While Res_i > Bud: - 1. Con = Res_i - delta_Res - 2. for every layer: - Choose Num Filters to prune - Choose which filter to prune - Short-term fine tune the pruned model - 3. Pick the best layer to prune - Long-term fine tune - - For the details of this algorithm, please refer to the paper: https://arxiv.org/abs/1804.03230 + A Pytorch implementation of NetAdapt compression algorithm. + + Parameters + ---------- + model : pytorch model + The model to be pruned. + config_list : list + Supported keys: + - sparsity : The target overall sparsity. + - op_types : The operation type to prune. + short_term_fine_tuner : function + function to short-term fine tune the masked model. + This function should include `model` as the only parameter, + and fine tune the model for a short term after each pruning iteration. + Example:: + + def short_term_fine_tuner(model, epoch=3): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + train_loader = ... + criterion = torch.nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + model.train() + for _ in range(epoch): + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = criterion(output, target) + loss.backward() + optimizer.step() + evaluator : function + function to evaluate the masked model. + This function should include `model` as the only parameter, and returns a scalar value. + Example:: + + def evaluator(model): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + val_loader = ... + model.eval() + correct = 0 + with torch.no_grad(): + for data, target in val_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + accuracy = correct / len(val_loader.dataset) + return accuracy + optimize_mode : str + optimize mode, `maximize` or `minimize`, by default `maximize`. + base_algo : str + Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops, + the assigned `base_algo` is used to decide which filters/channels/weights to prune. + sparsity_per_iteration : float + sparsity to prune in each iteration. + experiment_data_dir : str + PATH to save experiment data, + including the config_list generated for the base pruning algorithm and the performance of the pruned model. """ def __init__(self, model, config_list, short_term_fine_tuner, evaluator, optimize_mode='maximize', base_algo='l1', sparsity_per_iteration=0.05, experiment_data_dir='./'): - """ - Parameters - ---------- - model : pytorch model - The model to be pruned - config_list : list - Supported keys: - - sparsity : The target overall sparsity. - - op_types : The operation type to prune. - short_term_fine_tuner : function - function to short-term fine tune the masked model. - This function should include `model` as the only parameter, - and fine tune the model for a short term after each pruning iteration. - Example: - >>> def short_term_fine_tuner(model, epoch=3): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> train_loader = ... - >>> criterion = torch.nn.CrossEntropyLoss() - >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.01) - >>> model.train() - >>> for _ in range(epoch): - >>> for _, (data, target) in enumerate(train_loader): - >>> data, target = data.to(device), target.to(device) - >>> optimizer.zero_grad() - >>> output = model(data) - >>> loss = criterion(output, target) - >>> loss.backward() - >>> optimizer.step() - evaluator : function - function to evaluate the masked model. - This function should include `model` as the only parameter, and returns a scalar value. - Example:: - >>> def evaluator(model): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> val_loader = ... - >>> model.eval() - >>> correct = 0 - >>> with torch.no_grad(): - >>> for data, target in val_loader: - >>> data, target = data.to(device), target.to(device) - >>> output = model(data) - >>> # get the index of the max log-probability - >>> pred = output.argmax(dim=1, keepdim=True) - >>> correct += pred.eq(target.view_as(pred)).sum().item() - >>> accuracy = correct / len(val_loader.dataset) - >>> return accuracy - optimize_mode : str - optimize mode, `maximize` or `minimize`, by default `maximize`. - base_algo : str - Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops, - the assigned `base_algo` is used to decide which filters/channels/weights to prune. - sparsity_per_iteration : float - sparsity to prune in each iteration - experiment_data_dir : str - PATH to save experiment data, - including the config_list generated for the base pruning algorithm and the performance of the pruned model. - """ # models used for iterative pruning and evaluation self._model_to_prune = copy.deepcopy(model) self._base_algo = base_algo @@ -124,7 +113,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list List on pruning configs diff --git a/src/sdk/pynni/nni/compression/torch/pruning/one_shot.py b/src/sdk/pynni/nni/compression/torch/pruning/one_shot.py index 47fc0b5e83..f74eba2a52 100644 --- a/src/sdk/pynni/nni/compression/torch/pruning/one_shot.py +++ b/src/sdk/pynni/nni/compression/torch/pruning/one_shot.py @@ -21,7 +21,7 @@ def __init__(self, model, config_list, pruning_algorithm='level', optimizer=None """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list List on pruning configs @@ -41,7 +41,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list List on pruning configs @@ -85,12 +85,32 @@ def calc_mask(self, wrapper, wrapper_idx=None): return None class LevelPruner(OneshotPruner): - def __init__(self, model, config_list, optimizer=None): - super().__init__(model, config_list, pruning_algorithm='level', optimizer=optimizer) + """ + Parameters + ---------- + model : torch.nn.Module + Model to be pruned + config_list : list + Supported keys: + - sparsity : This is to specify the sparsity operations to be compressed to. + - op_types : Operation types to prune. + """ + def __init__(self, model, config_list): + super().__init__(model, config_list, pruning_algorithm='level') class SlimPruner(OneshotPruner): - def __init__(self, model, config_list, optimizer=None): - super().__init__(model, config_list, pruning_algorithm='slim', optimizer=optimizer) + """ + Parameters + ---------- + model : torch.nn.Module + Model to be pruned + config_list : list + Supported keys: + - sparsity : This is to specify the sparsity operations to be compressed to. + - op_types : Only BatchNorm2d is supported in Slim Pruner. + """ + def __init__(self, model, config_list): + super().__init__(model, config_list, pruning_algorithm='slim') def validate_config(self, model, config_list): schema = CompressorSchema([{ @@ -118,27 +138,87 @@ def validate_config(self, model, config_list): schema.validate(config_list) class L1FilterPruner(_StructuredFilterPruner): - def __init__(self, model, config_list, optimizer=None): - super().__init__(model, config_list, pruning_algorithm='l1', optimizer=optimizer) + """ + Parameters + ---------- + model : torch.nn.Module + Model to be pruned + config_list : list + Supported keys: + - sparsity : This is to specify the sparsity operations to be compressed to. + - op_types : Only Conv2d is supported in L1FilterPruner. + """ + def __init__(self, model, config_list): + super().__init__(model, config_list, pruning_algorithm='l1') class L2FilterPruner(_StructuredFilterPruner): - def __init__(self, model, config_list, optimizer=None): - super().__init__(model, config_list, pruning_algorithm='l2', optimizer=optimizer) + """ + Parameters + ---------- + model : torch.nn.Module + Model to be pruned + config_list : list + Supported keys: + - sparsity : This is to specify the sparsity operations to be compressed to. + - op_types : Only Conv2d is supported in L2FilterPruner. + """ + def __init__(self, model, config_list): + super().__init__(model, config_list, pruning_algorithm='l2') class FPGMPruner(_StructuredFilterPruner): - def __init__(self, model, config_list, optimizer=None): - super().__init__(model, config_list, pruning_algorithm='fpgm', optimizer=optimizer) + """ + Parameters + ---------- + model : torch.nn.Module + Model to be pruned + config_list : list + Supported keys: + - sparsity : This is to specify the sparsity operations to be compressed to. + - op_types : Only Conv2d is supported in FPGM Pruner. + """ + def __init__(self, model, config_list): + super().__init__(model, config_list, pruning_algorithm='fpgm') class TaylorFOWeightFilterPruner(_StructuredFilterPruner): + """ + Parameters + ---------- + model : torch.nn.Module + Model to be pruned + config_list : list + Supported keys: + - sparsity : How much percentage of convolutional filters are to be pruned. + - op_types : Currently only Conv2d is supported in TaylorFOWeightFilterPruner. + """ def __init__(self, model, config_list, optimizer=None, statistics_batch_num=1): super().__init__(model, config_list, pruning_algorithm='taylorfo', optimizer=optimizer, statistics_batch_num=statistics_batch_num) class ActivationAPoZRankFilterPruner(_StructuredFilterPruner): + """ + Parameters + ---------- + model : torch.nn.Module + Model to be pruned + config_list : list + Supported keys: + - sparsity : How much percentage of convolutional filters are to be pruned. + - op_types : Only Conv2d is supported in ActivationAPoZRankFilterPruner. + """ def __init__(self, model, config_list, optimizer=None, activation='relu', statistics_batch_num=1): super().__init__(model, config_list, pruning_algorithm='apoz', optimizer=optimizer, \ activation=activation, statistics_batch_num=statistics_batch_num) class ActivationMeanRankFilterPruner(_StructuredFilterPruner): + """ + Parameters + ---------- + model : torch.nn.Module + Model to be pruned + config_list : list + Supported keys: + - sparsity : How much percentage of convolutional filters are to be pruned. + - op_types : Only Conv2d is supported in ActivationMeanRankFilterPruner. + """ def __init__(self, model, config_list, optimizer=None, activation='relu', statistics_batch_num=1): super().__init__(model, config_list, pruning_algorithm='mean_activation', optimizer=optimizer, \ activation=activation, statistics_batch_num=statistics_batch_num) diff --git a/src/sdk/pynni/nni/compression/torch/pruning/simulated_annealing_pruner.py b/src/sdk/pynni/nni/compression/torch/pruning/simulated_annealing_pruner.py index 4ef966298d..d7a50d5723 100644 --- a/src/sdk/pynni/nni/compression/torch/pruning/simulated_annealing_pruner.py +++ b/src/sdk/pynni/nni/compression/torch/pruning/simulated_annealing_pruner.py @@ -22,62 +22,56 @@ class SimulatedAnnealingPruner(Pruner): """ - This is a Pytorch implementation of Simulated Annealing compression algorithm. - - - Randomly initialize a pruning rate distribution (sparsities). - - While current_temperature < stop_temperature: - 1. generate a perturbation to current distribution - 2. Perform fast evaluation on the perturbated distribution - 3. accept the perturbation according to the performance and probability, if not accepted, return to step 1 - 4. cool down, current_temperature <- current_temperature * cool_down_rate + A Pytorch implementation of Simulated Annealing compression algorithm. + + Parameters + ---------- + model : pytorch model + The model to be pruned. + config_list : list + Supported keys: + - sparsity : The target overall sparsity. + - op_types : The operation type to prune. + evaluator : function + Function to evaluate the pruned model. + This function should include `model` as the only parameter, and returns a scalar value. + Example:: + + def evaluator(model): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + val_loader = ... + model.eval() + correct = 0 + with torch.no_grad(): + for data, target in val_loader: + data, target = data.to(device), target.to(device) + output = model(data) + # get the index of the max log-probability + pred = output.argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + accuracy = correct / len(val_loader.dataset) + return accuracy + optimize_mode : str + Optimize mode, `maximize` or `minimize`, by default `maximize`. + base_algo : str + Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops, + the assigned `base_algo` is used to decide which filters/channels/weights to prune. + start_temperature : float + Start temperature of the simulated annealing process. + stop_temperature : float + Stop temperature of the simulated annealing process. + cool_down_rate : float + Cool down rate of the temperature. + perturbation_magnitude : float + Initial perturbation magnitude to the sparsities. The magnitude decreases with current temperature. + experiment_data_dir : string + PATH to save experiment data, + including the config_list generated for the base pruning algorithm, the performance of the pruned model and the pruning history. + """ def __init__(self, model, config_list, evaluator, optimize_mode='maximize', base_algo='l1', start_temperature=100, stop_temperature=20, cool_down_rate=0.9, perturbation_magnitude=0.35, experiment_data_dir='./'): - """ - Parameters - ---------- - model : pytorch model - The model to be pruned - config_list : list - Supported keys: - - sparsity : The target overall sparsity. - - op_types : The operation type to prune. - evaluator : function - function to evaluate the pruned model. - This function should include `model` as the only parameter, and returns a scalar value. - Example:: - >>> def evaluator(model): - >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - >>> val_loader = ... - >>> model.eval() - >>> correct = 0 - >>> with torch.no_grad(): - >>> for data, target in val_loader: - >>> data, target = data.to(device), target.to(device) - >>> output = model(data) - >>> # get the index of the max log-probability - >>> pred = output.argmax(dim=1, keepdim=True) - >>> correct += pred.eq(target.view_as(pred)).sum().item() - >>> accuracy = correct / len(val_loader.dataset) - >>> return accuracy - optimize_mode : str - optimize mode, `maximize` or `minimize`, by default `maximize`. - base_algo : str - Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops, - the assigned `base_algo` is used to decide which filters/channels/weights to prune. - start_temperature : float - Simualated Annealing related parameter - stop_temperature : float - Simualated Annealing related parameter - cool_down_rate : float - Simualated Annealing related parameter - perturbation_magnitude : float - initial perturbation magnitude to the sparsities. The magnitude decreases with current temperature - experiment_data_dir : string - PATH to save experiment data, - including the config_list generated for the base pruning algorithm, the performance of the pruned model and the pruning history. - """ # original model self._model_to_prune = copy.deepcopy(model) self._base_algo = base_algo @@ -114,7 +108,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list List on pruning configs diff --git a/src/sdk/pynni/nni/compression/torch/quantization/quantizers.py b/src/sdk/pynni/nni/compression/torch/quantization/quantizers.py index bd5ebb076b..c9fc1929a2 100644 --- a/src/sdk/pynni/nni/compression/torch/quantization/quantizers.py +++ b/src/sdk/pynni/nni/compression/torch/quantization/quantizers.py @@ -153,7 +153,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list of dict List of configurations @@ -179,7 +179,7 @@ def _quantize(self, bits, op, real_val): ---------- bits : int quantization bits length - op : torch.nn.module + op : torch.nn.Module target module real_val : float real value to be quantized @@ -271,7 +271,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list of dict List of configurations @@ -322,7 +322,7 @@ def validate_config(self, model, config_list): """ Parameters ---------- - model : torch.nn.module + model : torch.nn.Module Model to be pruned config_list : list of dict List of configurations diff --git a/src/sdk/pynni/tests/test_compressor.py b/src/sdk/pynni/tests/test_compressor.py index 7641ae7d25..6a8727c9e4 100644 --- a/src/sdk/pynni/tests/test_compressor.py +++ b/src/sdk/pynni/tests/test_compressor.py @@ -88,9 +88,8 @@ def test_torch_quantizer_modules_detection(self): def test_torch_level_pruner(self): model = TorchModel() - optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5) configure_list = [{'sparsity': 0.8, 'op_types': ['default']}] - torch_compressor.LevelPruner(model, configure_list, optimizer).compress() + torch_compressor.LevelPruner(model, configure_list).compress() @tf2 def test_tf_level_pruner(self): @@ -129,7 +128,7 @@ def test_torch_fpgm_pruner(self): model = TorchModel() config_list = [{'sparsity': 0.6, 'op_types': ['Conv2d']}, {'sparsity': 0.2, 'op_types': ['Conv2d']}] - pruner = torch_compressor.FPGMPruner(model, config_list, torch.optim.SGD(model.parameters(), lr=0.01)) + pruner = torch_compressor.FPGMPruner(model, config_list) model.conv2.module.weight.data = torch.tensor(w).float() masks = pruner.calc_mask(model.conv2) @@ -315,7 +314,7 @@ def test_torch_QAT_quantizer(self): def test_torch_pruner_validation(self): # test bad configuraiton pruner_classes = [torch_compressor.__dict__[x] for x in \ - ['LevelPruner', 'SlimPruner', 'FPGMPruner', 'L1FilterPruner', 'L2FilterPruner', 'AGP_Pruner', \ + ['LevelPruner', 'SlimPruner', 'FPGMPruner', 'L1FilterPruner', 'L2FilterPruner', \ 'ActivationMeanRankFilterPruner', 'ActivationAPoZRankFilterPruner']] bad_configs = [ @@ -337,11 +336,10 @@ def test_torch_pruner_validation(self): ] ] model = TorchModel() - optimizer = torch.optim.SGD(model.parameters(), lr=0.01) for pruner_class in pruner_classes: for config_list in bad_configs: try: - pruner_class(model, config_list, optimizer) + pruner_class(model, config_list) print(config_list) assert False, 'Validation error should be raised for bad configuration' except schema.SchemaError: diff --git a/src/sdk/pynni/tests/test_pruners.py b/src/sdk/pynni/tests/test_pruners.py index e43f657534..1fab9b2b2a 100644 --- a/src/sdk/pynni/tests/test_pruners.py +++ b/src/sdk/pynni/tests/test_pruners.py @@ -8,7 +8,7 @@ import math from unittest import TestCase, main from nni.compression.torch import LevelPruner, SlimPruner, FPGMPruner, L1FilterPruner, \ - L2FilterPruner, AGP_Pruner, ActivationMeanRankFilterPruner, ActivationAPoZRankFilterPruner, \ + L2FilterPruner, AGPPruner, ActivationMeanRankFilterPruner, ActivationAPoZRankFilterPruner, \ TaylorFOWeightFilterPruner, NetAdaptPruner, SimulatedAnnealingPruner, ADMMPruner, AutoCompressPruner def validate_sparsity(wrapper, sparsity, bias=False): @@ -33,7 +33,7 @@ def validate_sparsity(wrapper, sparsity, bias=False): ] }, 'agp': { - 'pruner_class': AGP_Pruner, + 'pruner_class': AGPPruner, 'config_list': [{ 'initial_sparsity': 0., 'final_sparsity': 0.8, @@ -192,7 +192,9 @@ def pruners_test(pruner_names=['level', 'agp', 'slim', 'fpgm', 'l1', 'l2', 'tayl pruner = prune_config[pruner_name]['pruner_class'](model, config_list, trainer=prune_config[pruner_name]['trainer']) elif pruner_name == 'autocompress': pruner = prune_config[pruner_name]['pruner_class'](model, config_list, trainer=prune_config[pruner_name]['trainer'], evaluator=prune_config[pruner_name]['evaluator'], dummy_input=x) - else: + elif pruner_name in ['level', 'slim', 'fpgm', 'l1', 'l2']: + pruner = prune_config[pruner_name]['pruner_class'](model, config_list) + elif pruner_name in ['agp', 'taylorfo', 'mean_activation', 'apoz']: pruner = prune_config[pruner_name]['pruner_class'](model, config_list, optimizer) pruner.compress() @@ -225,7 +227,7 @@ def test_agp(pruning_algorithm): optimizer = torch.optim.SGD(model.parameters(), lr=0.01) config_list = prune_config['agp']['config_list'] - pruner = AGP_Pruner(model, config_list, optimizer, pruning_algorithm=pruning_algorithm) + pruner = AGPPruner(model, config_list, optimizer, pruning_algorithm=pruning_algorithm) pruner.compress() x = torch.randn(2, 1, 28, 28)