From 4e10efbec37592405b95998bbfd9f60f5aec3f24 Mon Sep 17 00:00:00 2001 From: Marshall Wang Date: Tue, 3 Sep 2024 12:48:57 -0400 Subject: [PATCH 1/2] Add support for custom models, remove load format to default to auto for multi node jobs, add Llama3-OpenBio-70B --- vec_inf/cli/_cli.py | 30 ++++++++++++++++++++---------- vec_inf/models/models.csv | 3 ++- vec_inf/multinode_vllm.slurm | 2 -- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/vec_inf/cli/_cli.py b/vec_inf/cli/_cli.py index 8dd77c5..9c7bee7 100644 --- a/vec_inf/cli/_cli.py +++ b/vec_inf/cli/_cli.py @@ -63,6 +63,11 @@ def cli(): type=str, help='Time limit for job, this should comply with QoS, default to max walltime of the chosen QoS' ) +@click.option( + "--vocab-size", + type=int, + help='Vocabulary size, this option is intended for custom models' +) @click.option( "--data-type", type=str, @@ -93,6 +98,7 @@ def launch( num_gpus: int=None, qos: str=None, time: str=None, + vocab_size: int=None, data_type: str=None, venv: str=None, log_dir: str=None, @@ -109,16 +115,20 @@ def launch( models_df = load_models_df() - if model_name not in models_df['model_name'].values: - raise ValueError(f"Model name {model_name} not found in available models") - - default_args = load_default_args(models_df, model_name) - - for arg in default_args: - if arg in locals() and locals()[arg] is not None: - default_args[arg] = locals()[arg] - renamed_arg = arg.replace("_", "-") - launch_cmd += f" --{renamed_arg} {default_args[arg]}" + if model_name in models_df['model_name'].values: + default_args = load_default_args(models_df, model_name) + for arg in default_args: + if arg in locals() and locals()[arg] is not None: + default_args[arg] = locals()[arg] + renamed_arg = arg.replace("_", "-") + launch_cmd += f" --{renamed_arg} {default_args[arg]}" + else: + model_args = models_df.columns.tolist() + excluded_keys = ['model_name', 'pipeline_parallelism'] + for arg in model_args: + if arg not in excluded_keys and locals()[arg] is not None: + renamed_arg = arg.replace("_", "-") + launch_cmd += f" --{renamed_arg} {locals()[arg]}" output = run_bash_command(launch_cmd) diff --git a/vec_inf/models/models.csv b/vec_inf/models/models.csv index 160c228..bb0b9b6 100644 --- a/vec_inf/models/models.csv +++ b/vec_inf/models/models.csv @@ -42,4 +42,5 @@ Mixtral-8x7B-Instruct-v0.1,Mixtral,8x7B-Instruct-v0.1,a40,m2,08:00:00,4,1,32000, Mixtral-8x22B-v0.1,Mixtral,8x22B-v0.1,a40,m2,08:00:00,4,2,32768,65536,auto,singularity,default,false Mixtral-8x22B-Instruct-v0.1,Mixtral,8x22B-Instruct-v0.1,a40,m2,08:00:00,4,2,32768,65536,auto,singularity,default,false Phi-3-medium-128k-instruct,Phi-3,medium-128k-instruct,a40,m2,08:00:00,2,1,32064,131072,auto,singularity,default,false -Phi-3-vision-128k-instruct,Phi-3,vision-128k-instruct,a40,m2,08:00:00,2,1,32064,65536,auto,singularity,default,false \ No newline at end of file +Phi-3-vision-128k-instruct,Phi-3,vision-128k-instruct,a40,m2,08:00:00,2,1,32064,65536,auto,singularity,default,false +Llama3-OpenBioLLM-70B,Llama3-OpenBioLLM,70B,a40,m2,08:00:00,4,1,128256,8192,auto,singularity,default,false \ No newline at end of file diff --git a/vec_inf/multinode_vllm.slurm b/vec_inf/multinode_vllm.slurm index db8f710..ee36cdb 100644 --- a/vec_inf/multinode_vllm.slurm +++ b/vec_inf/multinode_vllm.slurm @@ -93,7 +93,6 @@ if [ "$VENV_BASE" = "singularity" ]; then --pipeline-parallel-size ${PIPELINE_PARALLEL_SIZE} \ --tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \ --dtype ${VLLM_DATA_TYPE} \ - --load-format safetensors \ --trust-remote-code \ --max-logprobs ${VLLM_MAX_LOGPROBS} \ --max-model-len ${VLLM_MAX_MODEL_LEN} @@ -107,7 +106,6 @@ else --pipeline-parallel-size ${PIPELINE_PARALLEL_SIZE} \ --tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \ --dtype ${VLLM_DATA_TYPE} \ - --load-format safetensors \ --trust-remote-code \ --max-logprobs ${VLLM_MAX_LOGPROBS} \ --max-model-len ${VLLM_MAX_MODEL_LEN} From 9a07db820dcd2ff33f2a7ca7bf75a567faabf626 Mon Sep 17 00:00:00 2001 From: Marshall Wang Date: Tue, 3 Sep 2024 12:49:45 -0400 Subject: [PATCH 2/2] Bump version to 0.3.2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 22838b6..6f47cbc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "vec-inf" -version = "0.3.1" +version = "0.3.2" description = "Efficient LLM inference on Slurm clusters using vLLM." authors = ["Marshall Wang "] license = "MIT license"