Skip to content

Commit

Permalink
Merge pull request #11 from VectorInstitute/develop
Browse files Browse the repository at this point in the history
v0.3.2
  • Loading branch information
XkunW authored Sep 3, 2024
2 parents f43d7bf + 9a07db8 commit 39b98a2
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 14 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "vec-inf"
version = "0.3.1"
version = "0.3.2"
description = "Efficient LLM inference on Slurm clusters using vLLM."
authors = ["Marshall Wang <marshall.wang@vectorinstitute.ai>"]
license = "MIT license"
Expand Down
30 changes: 20 additions & 10 deletions vec_inf/cli/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ def cli():
type=str,
help='Time limit for job, this should comply with QoS, default to max walltime of the chosen QoS'
)
@click.option(
"--vocab-size",
type=int,
help='Vocabulary size, this option is intended for custom models'
)
@click.option(
"--data-type",
type=str,
Expand Down Expand Up @@ -93,6 +98,7 @@ def launch(
num_gpus: int=None,
qos: str=None,
time: str=None,
vocab_size: int=None,
data_type: str=None,
venv: str=None,
log_dir: str=None,
Expand All @@ -109,16 +115,20 @@ def launch(

models_df = load_models_df()

if model_name not in models_df['model_name'].values:
raise ValueError(f"Model name {model_name} not found in available models")

default_args = load_default_args(models_df, model_name)

for arg in default_args:
if arg in locals() and locals()[arg] is not None:
default_args[arg] = locals()[arg]
renamed_arg = arg.replace("_", "-")
launch_cmd += f" --{renamed_arg} {default_args[arg]}"
if model_name in models_df['model_name'].values:
default_args = load_default_args(models_df, model_name)
for arg in default_args:
if arg in locals() and locals()[arg] is not None:
default_args[arg] = locals()[arg]
renamed_arg = arg.replace("_", "-")
launch_cmd += f" --{renamed_arg} {default_args[arg]}"
else:
model_args = models_df.columns.tolist()
excluded_keys = ['model_name', 'pipeline_parallelism']
for arg in model_args:
if arg not in excluded_keys and locals()[arg] is not None:
renamed_arg = arg.replace("_", "-")
launch_cmd += f" --{renamed_arg} {locals()[arg]}"

output = run_bash_command(launch_cmd)

Expand Down
3 changes: 2 additions & 1 deletion vec_inf/models/models.csv
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ Mixtral-8x7B-Instruct-v0.1,Mixtral,8x7B-Instruct-v0.1,a40,m2,08:00:00,4,1,32000,
Mixtral-8x22B-v0.1,Mixtral,8x22B-v0.1,a40,m2,08:00:00,4,2,32768,65536,auto,singularity,default,false
Mixtral-8x22B-Instruct-v0.1,Mixtral,8x22B-Instruct-v0.1,a40,m2,08:00:00,4,2,32768,65536,auto,singularity,default,false
Phi-3-medium-128k-instruct,Phi-3,medium-128k-instruct,a40,m2,08:00:00,2,1,32064,131072,auto,singularity,default,false
Phi-3-vision-128k-instruct,Phi-3,vision-128k-instruct,a40,m2,08:00:00,2,1,32064,65536,auto,singularity,default,false
Phi-3-vision-128k-instruct,Phi-3,vision-128k-instruct,a40,m2,08:00:00,2,1,32064,65536,auto,singularity,default,false
Llama3-OpenBioLLM-70B,Llama3-OpenBioLLM,70B,a40,m2,08:00:00,4,1,128256,8192,auto,singularity,default,false
2 changes: 0 additions & 2 deletions vec_inf/multinode_vllm.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ if [ "$VENV_BASE" = "singularity" ]; then
--pipeline-parallel-size ${PIPELINE_PARALLEL_SIZE} \
--tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \
--dtype ${VLLM_DATA_TYPE} \
--load-format safetensors \
--trust-remote-code \
--max-logprobs ${VLLM_MAX_LOGPROBS} \
--max-model-len ${VLLM_MAX_MODEL_LEN}
Expand All @@ -107,7 +106,6 @@ else
--pipeline-parallel-size ${PIPELINE_PARALLEL_SIZE} \
--tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \
--dtype ${VLLM_DATA_TYPE} \
--load-format safetensors \
--trust-remote-code \
--max-logprobs ${VLLM_MAX_LOGPROBS} \
--max-model-len ${VLLM_MAX_MODEL_LEN}
Expand Down

0 comments on commit 39b98a2

Please sign in to comment.