Skip to content

Commit

Permalink
Add DeepSeek v3 model (#3253)
Browse files Browse the repository at this point in the history
  • Loading branch information
yifanmai authored Jan 7, 2025
1 parent c798d0f commit 91e6326
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 4 deletions.
30 changes: 26 additions & 4 deletions src/helm/clients/together_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,10 +318,17 @@ class TogetherRawChatRequest(TypedDict):
class TogetherChatClient(CachingClient):
"""Client that uses the Python Together library for chat models."""

def __init__(self, cache_config: CacheConfig, api_key: Optional[str], together_model: Optional[str] = None):
def __init__(
self,
cache_config: CacheConfig,
api_key: Optional[str],
together_model: Optional[str] = None,
disable_logprobs: Optional[bool] = None,
):
super().__init__(cache_config=cache_config)
self._client = Together(api_key=api_key)
self._together_model = together_model
self._disable_logprobs = bool(disable_logprobs)

def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatRequest:
request.validate()
Expand Down Expand Up @@ -353,6 +360,10 @@ def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatReques
model = self._together_model
else:
model = request.model
if self._disable_logprobs:
logprobs = 0
else:
logprobs = min(request.top_k_per_token, 1)
return {
"messages": messages,
"model": model,
Expand All @@ -361,7 +372,7 @@ def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatReques
"temperature": request.temperature,
"top_p": request.top_p,
"top_k": request.top_k_per_token,
"logprobs": min(request.top_k_per_token, 1),
"logprobs": logprobs,
"echo": request.echo_prompt,
"n": request.num_completions,
}
Expand Down Expand Up @@ -426,16 +437,27 @@ class TogetherRawCompletionRequest(TypedDict):
class TogetherCompletionClient(CachingClient):
"""Client that uses the Python Together library for text completion models."""

def __init__(self, cache_config: CacheConfig, api_key: Optional[str], together_model: Optional[str] = None):
def __init__(
self,
cache_config: CacheConfig,
api_key: Optional[str],
together_model: Optional[str] = None,
disable_logprobs: Optional[bool] = None,
):
super().__init__(cache_config=cache_config)
self._client = Together(api_key=api_key)
self._together_model = together_model
self._disable_logprobs = bool(disable_logprobs)

def convert_to_raw_completion_request(self, request: Request) -> TogetherRawCompletionRequest:
if self._together_model is not None:
model = self._together_model
else:
model = request.model
if self._disable_logprobs:
logprobs = 0
else:
logprobs = min(request.top_k_per_token, 1)
return {
"prompt": request.prompt,
"model": model,
Expand All @@ -444,7 +466,7 @@ def convert_to_raw_completion_request(self, request: Request) -> TogetherRawComp
"temperature": request.temperature,
"top_p": request.top_p,
"top_k": request.top_k_per_token,
"logprobs": min(request.top_k_per_token, 1),
"logprobs": logprobs,
"echo": request.echo_prompt,
"n": request.num_completions,
}
Expand Down
9 changes: 9 additions & 0 deletions src/helm/config/model_deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,15 @@ model_deployments:
client_spec:
class_name: "helm.clients.together_client.TogetherChatClient"

- name: together/deepseek-v3
model_name: deepseek-ai/deepseek-v3
tokenizer_name: deepseek-ai/deepseek-v3
max_sequence_length: 131072
client_spec:
class_name: "helm.clients.together_client.TogetherChatClient"
args:
disable_logprobs: True

# Gooseai

# TODO: Migrate these models to use OpenAIClient
Expand Down
9 changes: 9 additions & 0 deletions src/helm/config/model_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,15 @@ models:
release_date: 2024-01-05
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

- name: deepseek-ai/deepseek-v3
display_name: DeepSeek v3
description: DeepSeek v3 a Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. It adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures. ([paper](https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf))
creator_organization_name: DeepSeek
access: open
# NOTE: The total size of DeepSeek-V3 models on HuggingFace is 685B, which includes 671B of the Main Model weights and 14B of the Multi-Token Prediction (MTP) Module weights.
num_parameters: 685000000000
release_date: 2024-12-24
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# EleutherAI
- name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
Expand Down
6 changes: 6 additions & 0 deletions src/helm/config/tokenizer_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ tokenizer_configs:
end_of_text_token: "<|end▁of▁sentence|>"
prefix_token: "<|begin▁of▁sentence|>"

- name: deepseek-ai/deepseek-v3
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
end_of_text_token: "<|end▁of▁sentence|>"
prefix_token: "<|begin▁of▁sentence|>"

# EleutherAI
- name: EleutherAI/gpt-j-6B
tokenizer_spec:
Expand Down

0 comments on commit 91e6326

Please sign in to comment.