intel · a32543254 · Jun 14, 2024 · Jun 14, 2024
diff --git a/neural_speed/convert/convert_chatglm.py b/neural_speed/convert/convert_chatglm.py
@@ -560,7 +560,8 @@ def chatglm4_convert(model, tokenizer, dir_model, fname_out, ftype, hparams):
     fout.write(struct.pack("i", 0))  # n_expert_used
     fout.write(struct.pack("i", 0)) # n_embd_head_k for gemma
     fout.write(struct.pack("f", hparams.get("layernorm_epsilon", 1e-5)))  # rms_norm_eps or layer_norm_eps
-    fout.write(struct.pack("f", 10000.0))  # freq_base
+    rope_ratio = hparams.get("rope_ratio", 1)
+    fout.write(struct.pack("f", rope_ratio*10000.0))  # freq_base
     fout.write(struct.pack("f", 1))  # rope_factor
 
     fout.write(struct.pack("f", 0.0))  # config.json "rope_scaling.factor", not enabled