Skip to content

Commit

Permalink
fix export issue with torch 2.0 (#129)
Browse files Browse the repository at this point in the history
  • Loading branch information
wenhuach21 authored May 28, 2024
1 parent 416ec7e commit 5bff86e
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 17 deletions.
33 changes: 17 additions & 16 deletions auto_round/export/export_to_autogptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ def save_quantized_as_autogptq(output_dir, use_triton=True, inplace=True, **kwar
info = weight_config[key]
if not check_to_quantized(info):
continue
quantizers[key] = (None, info["scale"], info["zp"], info["g_idx"])
##force to float32 to be compatible with torch 2.0
quantizers[key] = (None, info["scale"], info["zp"].to(torch.float32), info["g_idx"])
pack_model(
compressed_model,
quantizers,
Expand All @@ -126,7 +127,7 @@ def save_quantized_as_autogptq(output_dir, use_triton=True, inplace=True, **kwar
info = weight_config[key]
if not check_to_quantized(info):
continue
quantizers[key] = (None, info["scale"], info["zp"], info["g_idx"])
quantizers[key] = (None, info["scale"].to(torch.float32), info["zp"].to(torch.float32), info["g_idx"])
pack_model(
compressed_model,
quantizers,
Expand Down Expand Up @@ -158,20 +159,20 @@ def save_quantized_as_autogptq(output_dir, use_triton=True, inplace=True, **kwar


def _save_quantized_to_autogptq(
model,
save_dir: str,
bits=4,
group_size=128,
sym=False,
iters=200,
lr=5e-3,
minmax_lr=5e-3,
enable_minmax_tuning=True,
enable_quanted_input=True,
use_safetensors: bool = True,
scale_dtype=torch.float32,
safetensors_metadata: Optional[Dict[str, str]] = None,
modules_in_block_to_quantize=None,
model,
save_dir: str,
bits=4,
group_size=128,
sym=False,
iters=200,
lr=5e-3,
minmax_lr=5e-3,
enable_minmax_tuning=True,
enable_quanted_input=True,
use_safetensors: bool = True,
scale_dtype=torch.float32,
safetensors_metadata: Optional[Dict[str, str]] = None,
modules_in_block_to_quantize=None,
):
"""Save quantized model and configs to local disk for cuda."""
os.makedirs(save_dir, exist_ok=True)
Expand Down
3 changes: 2 additions & 1 deletion auto_round/export/export_to_autoround/export_to_autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ def save_quantized_as_autoround(output_dir, inplace=True, backend="gptq:exllamav
zero = weight_config[name]["zp"]
# so far can only pack layer on CPU
qlayer.to("cpu")
layer, scale, zero = layer.to("cpu"), scale.to("cpu"), zero.to("cpu")
##force to float32 to be compatible with torch 2.0
layer, scale, zero = layer.to("cpu"), scale.to("cpu"), zero.to("cpu").to(torch.float32)
qlayer.pack(layer, scale, zero, None)
qlayer.to(device)
quantization_config = kwargs["serialization_dict"]
Expand Down

0 comments on commit 5bff86e

Please sign in to comment.