From b668b59512269d333e3ef60df830fd909d1bb9ec Mon Sep 17 00:00:00 2001
From: 1b5d <8110504+1b5d@users.noreply.github.com>
Date: Fri, 5 May 2023 23:39:58 +0200
Subject: [PATCH] add a separate parameter for safetensors models

---
 README.md                         | 3 +++
 app/llms/gptq_llama/gptq_llama.py | 7 +++++--
 config.yaml                       | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 69b3e2f..056080c 100644
--- a/README.md
+++ b/README.md
@@ -196,8 +196,11 @@ model_params:
   wbits: 4
   cuda_visible_devices: "0"
   device: "cuda:0"
+  st_device: 0
 ```
 
+**Note**: `st_device` is only needed in the case of safetensors model, otherwise you can either remove it or set it to `-1`
+
 Example request:
 
 ```
diff --git a/app/llms/gptq_llama/gptq_llama.py b/app/llms/gptq_llama/gptq_llama.py
index 51c2097..025e81f 100644
--- a/app/llms/gptq_llama/gptq_llama.py
+++ b/app/llms/gptq_llama/gptq_llama.py
@@ -91,6 +91,7 @@ def __init__(self, params: Dict[str, str]) -> None:
         wbits = params.get("wbits", 4)
         cuda_visible_devices = params.get("cuda_visible_devices", "0")
         dev = params.get("device", "cuda:0")
+        st_device = params.get("st_device", -1)
 
         os.environ["CUDA_VISIBLE_DEVICES"] = cuda_visible_devices
         self.device = torch.device(dev)
@@ -99,11 +100,13 @@ def __init__(self, params: Dict[str, str]) -> None:
             model_path,
             wbits,
             group_size,
-            cuda_visible_devices,
+            st_device,
         )
 
         self.model.to(self.device)
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model, use_fast=False)
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            settings.setup_params["repo_id"], use_fast=False
+        )
 
     def _load_quant(
         self, model, checkpoint, wbits, groupsize, device
diff --git a/config.yaml b/config.yaml
index 69ed68c..b246ead 100644
--- a/config.yaml
+++ b/config.yaml
@@ -8,3 +8,4 @@ model_params:
   wbits: 4
   cuda_visible_devices: "0"
   device: "cuda:0"
+  st_device: 0