diff --git a/auto_round/autoround.py b/auto_round/autoround.py index d9867685..79ba17aa 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -22,7 +22,7 @@ from torch import autocast from .calib_dataset import get_dataloader -from .quantizer import WrapperMultiblock, wrapper_block, unwrapper_block +from .quantizer import WrapperMultiblock, wrapper_block, unwrapper_block, WrapperLinear, unwrapper_layer from .special_model_handler import check_hidden_state_dim, check_share_attention_mask from .utils import ( CpuInfo, diff --git a/auto_round/quantizer.py b/auto_round/quantizer.py index be82be75..01fe5d0f 100644 --- a/auto_round/quantizer.py +++ b/auto_round/quantizer.py @@ -135,8 +135,7 @@ def quant_weight_actor(weight, num_bits, sym, v, min_scale, max_scale, scale_dty def quant_weight( - weight, num_bits=4, group_size=-1, sym=False, v=0, min_scale=1.0, max_scale=1.0, scale_dtype=torch.float16, - **kwargs + weight, num_bits=4, group_size=-1, sym=False, v=0, min_scale=1.0, max_scale=1.0, scale_dtype=torch.float16 ): """Quantizes and dequantizes weight, handing the group size issue .