diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py index d40b3f706..7b1cccb23 100644 --- a/bitsandbytes/autograd/_functions.py +++ b/bitsandbytes/autograd/_functions.py @@ -353,9 +353,13 @@ def forward( # Extract the corresponding weights if state.has_fp16_weights: - state.subB = B[:, state.idx].t() # .contiguous() + state.subB = B[:, state.idx].t() else: - outliers = state.CB[:, state.idx] # .clone() + outliers = state.CB[:, state.idx] + + # To dequantize our weights associated with the input outliers, + # we want to divide by 127. It's however more performant to multiply + # by the reciprocal. state.subB = (7.874016e-3 * outliers * state.SCB.view(-1, 1)).t().to(A.dtype) else: subA = None