diff --git a/neural_speed/__init__.py b/neural_speed/__init__.py index 2b98a4fbb..7e7dc3750 100644 --- a/neural_speed/__init__.py +++ b/neural_speed/__init__.py @@ -223,6 +223,7 @@ def init(self, def init_from_bin(self, model_type, model_path, **generate_kwargs): if self.module is None: + model_type = model_maps.get(model_type, model_type) self.module = _import_package(model_type) self.model = self.module.Model() if model_type=="whisper": diff --git a/tests/model-test/calculate_percentiles.py b/tests/model-test/calculate_percentiles.py index f2f32e2f6..752a4715c 100644 --- a/tests/model-test/calculate_percentiles.py +++ b/tests/model-test/calculate_percentiles.py @@ -37,6 +37,10 @@ def parse_output_file_acc(file_path): with open(file_path, 'r', encoding='UTF-8', errors='ignore') as file: for line in file: accuracy_match = re.search(r"\|\s+\|\s+\|none\s+\|\s+0\|acc\s+\|\d\.\d+\|\±\s+\|\d\.\d+\|", line) + if accuracy_match: + accuracy[0]=float(re.search(r"\d+\.\d+", accuracy_match.group()).group())*100 + continue + accuracy_match = re.search(r"\|\s+\|\s+\|none\s+\|\s+0\|acc\s+\|\s+\d\.\d+\|\±\s+\|\d\.\d+\|", line) if accuracy_match: accuracy[0]=float(re.search(r"\d+\.\d+", accuracy_match.group()).group())*100 continue diff --git a/tests/model-test/cpp_graph_inference.sh b/tests/model-test/cpp_graph_inference.sh index afbd46188..63b7e3a8b 100644 --- a/tests/model-test/cpp_graph_inference.sh +++ b/tests/model-test/cpp_graph_inference.sh @@ -468,8 +468,10 @@ function main() { chmod 777 ${WORKSPACE}/${logs_file} if [[ ${input} == "1024" && ${cores_per_instance} == "32" ]]; then echo "-------- Accuracy start--------" - if [[ "${model}" == "llama"* || "${model}" == "gptj-6b" ]]; then + if [[ "${model}" == "llama"* || "${model}" == "gptj-6b" || "${model}" == "mistral-7b" ]]; then OMP_NUM_THREADS=56 numactl -l -C 0-55 python ./scripts/cal_acc.py --model_name ${model_path} --init_from_bin ${model}-${precision}.bin --batch_size 8 --tasks lambada_openai 2>&1 | tee -a ${WORKSPACE}/${logs_file} + elif [[ "${model}" == *"gptq" ]]; then + OMP_NUM_THREADS=56 numactl -l -C 0-55 python ./scripts/cal_acc.py --model_name ${model_path} --use_gptq --tasks lambada_openai 2>&1 | tee -a ${WORKSPACE}/${logs_file} else OMP_NUM_THREADS=56 numactl -l -C 0-55 python ./scripts/cal_acc.py --model_name ${model_path} --init_from_bin ${model}-${precision}.bin --tasks lambada_openai --batch_size 1 2>&1 | tee -a ${WORKSPACE}/${logs_file} fi