-
Notifications
You must be signed in to change notification settings - Fork 1.8k
fix checkpoint load error and stop updating paramters in evaluation stage #3124
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,9 +73,9 @@ def update_quantization_param(bits, rmin, rmax): | |
---------- | ||
bits : int | ||
quantization bits length | ||
rmin : float | ||
rmin : Tensor | ||
min value of real value | ||
rmax : float | ||
rmax : Tensor | ||
max value of real value | ||
|
||
Returns | ||
|
@@ -85,12 +85,17 @@ def update_quantization_param(bits, rmin, rmax): | |
# extend the [min, max] interval to ensure that it contains 0. | ||
# Otherwise, we would not meet the requirement that 0 be an exactly | ||
# representable value. | ||
rmin = min(rmin, 0) | ||
rmax = max(rmax, 0) | ||
if rmin.is_cuda: | ||
rmin = torch.min(rmin, torch.Tensor([0]).cuda()) | ||
rmax = torch.max(rmax, torch.Tensor([0]).cuda()) | ||
qmin = torch.Tensor([0]).cuda() | ||
qmax = torch.Tensor([(1 << bits) - 1]).cuda() | ||
else: | ||
rmin = torch.min(rmin, torch.Tensor([0])) | ||
rmax = torch.max(rmax, torch.Tensor([0])) | ||
qmin = torch.Tensor([0]) | ||
qmax = torch.Tensor([(1 << bits) - 1]) | ||
|
||
# the min and max quantized values, as floating-point values | ||
qmin = 0 | ||
qmax = (1 << bits) - 1 | ||
# First determine the scale. | ||
scale = (rmax - rmin) / (qmax - qmin) | ||
|
||
|
@@ -143,11 +148,11 @@ def __init__(self, model, config_list, optimizer=None): | |
types of nn.module you want to apply quantization, eg. 'Conv2d' | ||
""" | ||
super().__init__(model, config_list, optimizer) | ||
self.steps = 1 | ||
modules_to_compress = self.get_modules_to_compress() | ||
self.bound_model.register_buffer("steps", torch.Tensor([1])) | ||
for layer, config in modules_to_compress: | ||
layer.module.register_buffer("zero_point", None) | ||
layer.module.register_buffer("scale", None) | ||
layer.module.register_buffer("zero_point", torch.Tensor([0.0])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we just use 0-dimension definition here, would it be more simple and we don't need to add conversion? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. define 0-dimension does save some work,defining it to 1-dimension tensor is just for keeping consitent with other buffered variables. Because other buffered variables are also a real value, there's no reason why scale and zero_point, who are also just two real numbers, are defined as a 0-dimension tensor. It might confuse users |
||
layer.module.register_buffer("scale", torch.Tensor([1.0])) | ||
if "output" in config.get("quant_types", []): | ||
layer.module.register_buffer('ema_decay', torch.Tensor([0.99])) | ||
layer.module.register_buffer('tracked_min_biased', torch.zeros(1)) | ||
|
@@ -229,7 +234,8 @@ def quantize_weight(self, wrapper, **kwargs): | |
quant_start_step = config.get('quant_start_step', 0) | ||
assert weight_bits >= 1, "quant bits length should be at least 1" | ||
|
||
if quant_start_step > self.steps: | ||
# we dont update weight in evaluation stage | ||
if quant_start_step > self.bound_model.steps or not wrapper.training: | ||
return weight | ||
|
||
# if bias exists, quantize bias to uint32 | ||
|
@@ -258,15 +264,17 @@ def quantize_output(self, output, wrapper, **kwargs): | |
quant_start_step = config.get('quant_start_step', 0) | ||
assert output_bits >= 1, "quant bits length should be at least 1" | ||
|
||
if quant_start_step > self.steps: | ||
if quant_start_step > self.bound_model.steps: | ||
return output | ||
|
||
current_min, current_max = torch.min(output), torch.max(output) | ||
module.tracked_min_biased, module.tracked_min = update_ema(module.tracked_min_biased, current_min, | ||
module.ema_decay, self.steps) | ||
module.tracked_max_biased, module.tracked_max = update_ema(module.tracked_max_biased, current_max, | ||
module.ema_decay, self.steps) | ||
module.scale, module.zero_point = update_quantization_param(output_bits, module.tracked_min, module.tracked_max) | ||
# we dont update output quantization parameters in evaluation stage | ||
if wrapper.training: | ||
current_min, current_max = torch.min(output), torch.max(output) | ||
module.tracked_min_biased, module.tracked_min = update_ema(module.tracked_min_biased, current_min, | ||
module.ema_decay, self.bound_model.steps) | ||
module.tracked_max_biased, module.tracked_max = update_ema(module.tracked_max_biased, current_max, | ||
module.ema_decay, self.bound_model.steps) | ||
module.scale, module.zero_point = update_quantization_param(output_bits, module.tracked_min, module.tracked_max) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At present, we have to re-calculate the scale and zero_point of output and weight. Because the parameter of activation and weight all use the same scale and zero-point of such module. If quantize both activation and weight during testing without updating, parameters would be only one of them, and the result could be wrong. But this design that scale and zero_point using the same parameter doesn't make sense, we plan to refactor it in the following release. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. um, Indeed activation and weight use the same field, but once a user defined to quantize output of the layer, module.zero_point module.scale will always be activation's zero_point and scale , so it will not cause anything wrong |
||
out = self._quantize(output_bits, module, output) | ||
out = self._dequantize(module, out) | ||
return out | ||
|
@@ -279,7 +287,7 @@ def step_with_optimizer(self): | |
""" | ||
override `compressor` `step` method, quantization only happens after certain number of steps | ||
""" | ||
self.steps += 1 | ||
self.bound_model.steps +=1 | ||
|
||
|
||
class DoReFaQuantizer(Quantizer): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why this place use 1-dimension, it seems
torch.min
,torch.max
andupdate_ema
should return 0-dimension, although the call in line 277 will return 1-dimension due to useself.bound_model.steps
. Maybe it would be better to unify these places.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if we claim scale and zero-point as a 1-dimension tensor in QAT, but here changes scale and zero_point to 0-dimension, then we load checkpoint after saving, we would have type mismatch problem, cuz scale and zero_point are claimed as 1-dimension tensor, but in checkpoint, they are saved as a 0-dimension tensor
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So what i do here is to keep the scale and zero_point's size consistent during all calculation