fix bugs

intel · Jun 3, 2024 · dd40f17 · dd40f17
1 parent 063009b
commit dd40f17
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/auto_round/auto_quantizer.py b/auto_round/auto_quantizer.py
@@ -311,7 +311,7 @@ def convert_model(self, model: nn.Module):
         return model
 
     def _dynamic_import_inference_linear(self, bits, backend):
-        if bits == 4 and self.exllama2_available and "exllama2" in backend:
+        if bits == 4 and self.exllama2_available and "exllamav2" in backend:
             from auto_round_extension.cuda.qliner_exllamav2 import QuantLinear
         else:
             from auto_round_extension.cuda.qliner_triton import QuantLinear