Add DeepSeek v3 model (#3253)

stanford-crfm · Jan 7, 2025 · 91e6326 · 91e6326
1 parent c798d0f
commit 91e6326
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 4 deletions.
diff --git a/src/helm/clients/together_client.py b/src/helm/clients/together_client.py
@@ -318,10 +318,17 @@ class TogetherRawChatRequest(TypedDict):
 class TogetherChatClient(CachingClient):
     """Client that uses the Python Together library for chat models."""
 
-    def __init__(self, cache_config: CacheConfig, api_key: Optional[str], together_model: Optional[str] = None):
+    def __init__(
+        self,
+        cache_config: CacheConfig,
+        api_key: Optional[str],
+        together_model: Optional[str] = None,
+        disable_logprobs: Optional[bool] = None,
+    ):
         super().__init__(cache_config=cache_config)
         self._client = Together(api_key=api_key)
         self._together_model = together_model
+        self._disable_logprobs = bool(disable_logprobs)
 
     def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatRequest:
         request.validate()
@@ -353,6 +360,10 @@ def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatReques
             model = self._together_model
         else:
             model = request.model
+        if self._disable_logprobs:
+            logprobs = 0
+        else:
+            logprobs = min(request.top_k_per_token, 1)
         return {
             "messages": messages,
             "model": model,
@@ -361,7 +372,7 @@ def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatReques
             "temperature": request.temperature,
             "top_p": request.top_p,
             "top_k": request.top_k_per_token,
-            "logprobs": min(request.top_k_per_token, 1),
+            "logprobs": logprobs,
             "echo": request.echo_prompt,
             "n": request.num_completions,
         }
@@ -426,16 +437,27 @@ class TogetherRawCompletionRequest(TypedDict):
 class TogetherCompletionClient(CachingClient):
     """Client that uses the Python Together library for text completion models."""
 
-    def __init__(self, cache_config: CacheConfig, api_key: Optional[str], together_model: Optional[str] = None):
+    def __init__(
+        self,
+        cache_config: CacheConfig,
+        api_key: Optional[str],
+        together_model: Optional[str] = None,
+        disable_logprobs: Optional[bool] = None,
+    ):
         super().__init__(cache_config=cache_config)
         self._client = Together(api_key=api_key)
         self._together_model = together_model
+        self._disable_logprobs = bool(disable_logprobs)
 
     def convert_to_raw_completion_request(self, request: Request) -> TogetherRawCompletionRequest:
         if self._together_model is not None:
             model = self._together_model
         else:
             model = request.model
+        if self._disable_logprobs:
+            logprobs = 0
+        else:
+            logprobs = min(request.top_k_per_token, 1)
         return {
             "prompt": request.prompt,
             "model": model,
@@ -444,7 +466,7 @@ def convert_to_raw_completion_request(self, request: Request) -> TogetherRawComp
             "temperature": request.temperature,
             "top_p": request.top_p,
             "top_k": request.top_k_per_token,
-            "logprobs": min(request.top_k_per_token, 1),
+            "logprobs": logprobs,
             "echo": request.echo_prompt,
             "n": request.num_completions,
         }

diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml
@@ -412,6 +412,15 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.together_client.TogetherChatClient"
 
+  - name: together/deepseek-v3
+    model_name: deepseek-ai/deepseek-v3
+    tokenizer_name: deepseek-ai/deepseek-v3
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        disable_logprobs: True
+
   # Gooseai
 
   # TODO: Migrate these models to use OpenAIClient

diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
@@ -788,6 +788,15 @@ models:
     release_date: 2024-01-05
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
 
+  - name: deepseek-ai/deepseek-v3
+    display_name: DeepSeek v3
+    description: DeepSeek v3 a Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. It adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures. ([paper](https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf))
+    creator_organization_name: DeepSeek
+    access: open
+    # NOTE: The total size of DeepSeek-V3 models on HuggingFace is 685B, which includes 671B of the Main Model weights and 14B of the Multi-Token Prediction (MTP) Module weights.
+    num_parameters: 685000000000
+    release_date: 2024-12-24
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
 
   # EleutherAI
   - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.

diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml
@@ -169,6 +169,12 @@ tokenizer_configs:
     end_of_text_token: "<｜end▁of▁sentence｜>"
     prefix_token: "<｜begin▁of▁sentence｜>"
 
+  - name: deepseek-ai/deepseek-v3
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+    end_of_text_token: "<｜end▁of▁sentence｜>"
+    prefix_token: "<｜begin▁of▁sentence｜>"
+
   # EleutherAI
   - name: EleutherAI/gpt-j-6B
     tokenizer_spec: