From b668b59512269d333e3ef60df830fd909d1bb9ec Mon Sep 17 00:00:00 2001 From: 1b5d <8110504+1b5d@users.noreply.github.com> Date: Fri, 5 May 2023 23:39:58 +0200 Subject: [PATCH] add a separate parameter for safetensors models --- README.md | 3 +++ app/llms/gptq_llama/gptq_llama.py | 7 +++++-- config.yaml | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 69b3e2f..056080c 100644 --- a/README.md +++ b/README.md @@ -196,8 +196,11 @@ model_params: wbits: 4 cuda_visible_devices: "0" device: "cuda:0" + st_device: 0 ``` +**Note**: `st_device` is only needed in the case of safetensors model, otherwise you can either remove it or set it to `-1` + Example request: ``` diff --git a/app/llms/gptq_llama/gptq_llama.py b/app/llms/gptq_llama/gptq_llama.py index 51c2097..025e81f 100644 --- a/app/llms/gptq_llama/gptq_llama.py +++ b/app/llms/gptq_llama/gptq_llama.py @@ -91,6 +91,7 @@ def __init__(self, params: Dict[str, str]) -> None: wbits = params.get("wbits", 4) cuda_visible_devices = params.get("cuda_visible_devices", "0") dev = params.get("device", "cuda:0") + st_device = params.get("st_device", -1) os.environ["CUDA_VISIBLE_DEVICES"] = cuda_visible_devices self.device = torch.device(dev) @@ -99,11 +100,13 @@ def __init__(self, params: Dict[str, str]) -> None: model_path, wbits, group_size, - cuda_visible_devices, + st_device, ) self.model.to(self.device) - self.tokenizer = AutoTokenizer.from_pretrained(self.model, use_fast=False) + self.tokenizer = AutoTokenizer.from_pretrained( + settings.setup_params["repo_id"], use_fast=False + ) def _load_quant( self, model, checkpoint, wbits, groupsize, device diff --git a/config.yaml b/config.yaml index 69ed68c..b246ead 100644 --- a/config.yaml +++ b/config.yaml @@ -8,3 +8,4 @@ model_params: wbits: 4 cuda_visible_devices: "0" device: "cuda:0" + st_device: 0