Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
Signed-off-by: n1ck-guo <heng.guo@intel.com>
  • Loading branch information
n1ck-guo committed Dec 27, 2024
1 parent 08d2888 commit 1562f39
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 17 deletions.
14 changes: 6 additions & 8 deletions auto_round/mllm/autoround_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
clear_memory
)
from ..autoround import AutoRound
from .template import get_template, Template, SUPPORT_ONLY_TEXT_MODELS
from .template import get_template, Template
from auto_round.special_model_handler import SUPPORT_ONLY_TEXT_MODELS
from .mllm_dataset import get_mllm_dataloader
from ..low_cpu_mem.utils import get_layers_before_block

Expand All @@ -41,8 +42,10 @@ def _only_text_test(model, tokenizer, device, model_type):

device = detect_device(device)
text = ["only text", "test"]
ori_padding_size = tokenizer.padding_side
tokenizer.padding_side = 'left'
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
tokenizer.padding_size = ori_padding_size

if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
Expand All @@ -53,19 +56,14 @@ def _only_text_test(model, tokenizer, device, model_type):
return True
except RuntimeError as e:
if "CUDA out of memory" in str(e):
logger.warning(f"we strongly recommend using additional CUDA/HPU devices,e.g. "
f"set `--device '0,1'` in our cmd line usage or "
f"load the model with `device_mapping=auto`,"
f" for optimal performance during calibration "
f"Otherwise, the process may be significantly slower.")
model = model.to("cpu")
inputs = inputs.to("cpu")
try:
model(**input)
model(**inputs)
except:
return False
return False
except:
except Exception as e:
return False


Expand Down
9 changes: 0 additions & 9 deletions auto_round/mllm/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,6 @@
from .processor import BasicProcessor, PROCESSORS

TEMPLATES: Dict[str, "Template"] = {}
SUPPORT_ONLY_TEXT_MODELS = [
"phi3_v",
"cogvlm2",
"llava",
"qwen2_vl",
"deepseek_vl_v2",
"chatglm",
"idefics3"
]


def fill_content(target, **kwargs):
Expand Down
10 changes: 10 additions & 0 deletions auto_round/special_model_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@
mllms_with_limited_bs = ("llava", "qwen2_vl", "phi3_v", "mllama") # Limitations on batch_size
skippable_cache_keys = ("past_key_value",)

SUPPORT_ONLY_TEXT_MODELS = [
"phi3_v",
"cogvlm2",
"llava",
"qwen2_vl",
"deepseek_vl_v2",
"chatglm",
"idefics3"
]

def to_device(input, device=torch.device("cpu")):
"""Moves input data to the specified device.
Expand Down
10 changes: 10 additions & 0 deletions test_cuda/test_support_vlms.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,16 @@ def test_cogvlm(self):
response = response.split("<|end_of_text|>")[0]
print(response)
shutil.rmtree(quantized_model_path, ignore_errors=True)

def test_72b(self):
model_path = "/data5/models/Qwen2-VL-72B-Instruct/"
res = os.system(
f"cd .. && {self.python_path} -m auto_round --mllm "
f"--model {model_path} --iter 1 --nsamples 1 --output_dir {self.save_dir} --device {self.device}"
)
self.assertFalse(res > 0 or res == -1, msg="qwen2-72b tuning fail")
shutil.rmtree(quantized_model_path, ignore_errors=True)
quantized_model_path = os.path.join(self.save_dir, "Qwen2-VL-72B-Instruct-w4g128-auto_round")

if __name__ == "__main__":
unittest.main()

0 comments on commit 1562f39

Please sign in to comment.