Skip to content

Commit

Permalink
unpin huggingface hub deps (#17433)
Browse files Browse the repository at this point in the history
  • Loading branch information
logan-markewich authored Jan 6, 2025
1 parent 2ea7c51 commit 1209ccc
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 102 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-extractors-entity"
readme = "README.md"
version = "0.3.0"
version = "0.3.1"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
span-marker = ">=1.5.0"
huggingface-hub = "<0.24.0"
huggingface-hub = ">=0.23.0"
llama-index-core = "^0.12.0"

[tool.poetry.group.dev.dependencies]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-extractors-relik"
readme = "README.md"
version = "0.3.0"
version = "0.3.1"

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
relik = "^1.0.3"
huggingface-hub = "<0.24.0" # TODO: relik breaks on newer versions
huggingface-hub = ">=0.23.0"
spacy = "*"
llama-index-core = "^0.12.0"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ license = "MIT"
name = "llama-index-llms-gaudi"
packages = [{include = "llama_index/"}]
readme = "README.md"
version = "0.2.0"
version = "0.2.1"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
huggingface-hub = "^0.23.0"
huggingface-hub = ">=0.23.0"
torch = "^2.1.2"
text-generation = "^0.7.0"
llama-index-core = "^0.12.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from huggingface_hub import AsyncInferenceClient, InferenceClient, model_info
from huggingface_hub.hf_api import ModelInfo
from huggingface_hub.inference._types import ConversationalOutput
from llama_index.core.base.llms.types import (
ChatMessage,
ChatResponse,
Expand All @@ -26,34 +25,6 @@
logger = logging.getLogger(__name__)


def chat_messages_to_conversational_kwargs(
messages: Sequence[ChatMessage],
) -> Dict[str, Any]:
"""Convert ChatMessages to keyword arguments for Inference API conversational."""
if len(messages) % 2 != 1:
raise NotImplementedError("Messages passed in must be of odd length.")
last_message = messages[-1]
kwargs: Dict[str, Any] = {
"text": last_message.content,
**last_message.additional_kwargs,
}
if len(messages) != 1:
kwargs["past_user_inputs"] = []
kwargs["generated_responses"] = []
for user_msg, assistant_msg in zip(messages[::2], messages[1::2]):
if (
user_msg.role != MessageRole.USER
or assistant_msg.role != MessageRole.ASSISTANT
):
raise NotImplementedError(
"Didn't handle when messages aren't ordered in alternating"
f" pairs of {(MessageRole.USER, MessageRole.ASSISTANT)}."
)
kwargs["past_user_inputs"].append(user_msg.content)
kwargs["generated_responses"].append(assistant_msg.content)
return kwargs


class HuggingFaceInferenceAPI(CustomLLM):
"""
Wrapper on the Hugging Face's Inference API.
Expand Down Expand Up @@ -225,12 +196,17 @@ def metadata(self) -> LLMMetadata:
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
# default to conversational task as that was the previous functionality
if self.task == "conversational" or self.task is None:
output: "ConversationalOutput" = self._sync_client.conversational(
**{**chat_messages_to_conversational_kwargs(messages), **kwargs}
output = self._sync_client.chat_completion(
messages=[
{"role": m.role.value, "content": m.content} for m in messages
],
model=self.model_name,
**kwargs,
)
return ChatResponse(
message=ChatMessage(
role=MessageRole.ASSISTANT, content=output["generated_text"]
role=MessageRole.ASSISTANT,
content=output["choices"][0]["message"]["content"] or "",
)
)
else:
Expand Down Expand Up @@ -276,7 +252,28 @@ async def acomplete(
async def astream_chat(
self, messages: Sequence[ChatMessage], **kwargs: Any
) -> ChatResponseAsyncGen:
raise NotImplementedError
# default to conversational task as that was the previous functionality
if self.task == "conversational" or self.task is None:
output = await self._async_client.chat_completion(
messages=[
{"role": m.role.value, "content": m.content} for m in messages
],
model=self.model_name,
**kwargs,
)
return ChatResponse(
message=ChatMessage(
role=MessageRole.ASSISTANT,
content=output["choices"][0]["message"]["content"] or "",
)
)
else:
# try and use text generation
prompt = self.messages_to_prompt(messages)
completion = await self.acomplete(prompt)
return ChatResponse(
message=ChatMessage(role=MessageRole.ASSISTANT, content=completion.text)
)

async def astream_complete(
self, prompt: str, formatted: bool = False, **kwargs: Any
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-llms-huggingface-api"
readme = "README.md"
version = "0.3.0"
version = "0.3.1"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
huggingface-hub = "^0.23.0"
huggingface-hub = ">=0.23.0"
llama-index-core = "^0.12.0"

[tool.poetry.group.dev.dependencies]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,29 +46,27 @@ def test_chat(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
" It's based on the book of the same name by James Fenimore Cooper."
)
conversational_return = {
"generated_text": generated_response,
"conversation": {
"generated_responses": ["It's Die Hard for sure.", generated_response],
"past_user_inputs": [
"Which movie is the best?",
"Can you explain why?",
],
},
"choices": [
{
"message": {
"content": generated_response,
}
}
],
}

with patch.object(
hf_inference_api._sync_client,
"conversational",
"chat_completion",
return_value=conversational_return,
) as mock_conversational:
response = hf_inference_api.chat(messages=messages)

assert response.message.role == MessageRole.ASSISTANT
assert response.message.content == generated_response
mock_conversational.assert_called_once_with(
text="Can you explain why?",
past_user_inputs=["Which movie is the best?"],
generated_responses=["It's Die Hard for sure."],
messages=[{"role": m.role.value, "content": m.content} for m in messages],
model=STUB_MODEL_NAME,
)

def test_chat_text_generation(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import torch
from huggingface_hub import AsyncInferenceClient, InferenceClient, model_info
from huggingface_hub.hf_api import ModelInfo
from huggingface_hub.inference._types import ConversationalOutput
from llama_index.core.base.llms.types import (
ChatMessage,
ChatResponse,
Expand Down Expand Up @@ -434,34 +433,6 @@ def stream_chat(
return stream_completion_response_to_chat_response(completion_response)


def chat_messages_to_conversational_kwargs(
messages: Sequence[ChatMessage],
) -> Dict[str, Any]:
"""Convert ChatMessages to keyword arguments for Inference API conversational."""
if len(messages) % 2 != 1:
raise NotImplementedError("Messages passed in must be of odd length.")
last_message = messages[-1]
kwargs: Dict[str, Any] = {
"text": last_message.content,
**last_message.additional_kwargs,
}
if len(messages) != 1:
kwargs["past_user_inputs"] = []
kwargs["generated_responses"] = []
for user_msg, assistant_msg in zip(messages[::2], messages[1::2]):
if (
user_msg.role != MessageRole.USER
or assistant_msg.role != MessageRole.ASSISTANT
):
raise NotImplementedError(
"Didn't handle when messages aren't ordered in alternating"
f" pairs of {(MessageRole.USER, MessageRole.ASSISTANT)}."
)
kwargs["past_user_inputs"].append(user_msg.content)
kwargs["generated_responses"].append(assistant_msg.content)
return kwargs


@deprecated(
"Deprecated in favor of `HuggingFaceInferenceAPI` from `llama-index-llms-huggingface-api` which should be used instead.",
action="always",
Expand Down Expand Up @@ -637,12 +608,17 @@ def metadata(self) -> LLMMetadata:
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
# default to conversational task as that was the previous functionality
if self.task == "conversational" or self.task is None:
output: "ConversationalOutput" = self._sync_client.conversational(
**{**chat_messages_to_conversational_kwargs(messages), **kwargs}
output = self._sync_client.chat_completion(
messages=[
{"role": m.role.value, "content": m.content} for m in messages
],
model=self.model_name,
**kwargs,
)
return ChatResponse(
message=ChatMessage(
role=MessageRole.ASSISTANT, content=output["generated_text"]
role=MessageRole.ASSISTANT,
content=output["choices"][0]["message"]["content"] or "",
)
)
else:
Expand Down Expand Up @@ -675,7 +651,28 @@ def stream_complete(
async def achat(
self, messages: Sequence[ChatMessage], **kwargs: Any
) -> ChatResponse:
raise NotImplementedError
# default to conversational task as that was the previous functionality
if self.task == "conversational" or self.task is None:
output = await self._async_client.chat_completion(
messages=[
{"role": m.role.value, "content": m.content} for m in messages
],
model=self.model_name,
**kwargs,
)
return ChatResponse(
message=ChatMessage(
role=MessageRole.ASSISTANT,
content=output["choices"][0]["message"]["content"] or "",
)
)
else:
# try and use text generation
prompt = self.messages_to_prompt(messages)
completion = await self.acomplete(prompt)
return ChatResponse(
message=ChatMessage(role=MessageRole.ASSISTANT, content=completion.text)
)

async def acomplete(
self, prompt: str, formatted: bool = False, **kwargs: Any
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-llms-huggingface"
readme = "README.md"
version = "0.4.1"
version = "0.4.2"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
huggingface-hub = "^0.23.0"
huggingface-hub = ">=0.23.0"
torch = "^2.1.2"
text-generation = "^0.7.0"
llama-index-core = "^0.12.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,29 +46,27 @@ def test_chat(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
" It's based on the book of the same name by James Fenimore Cooper."
)
conversational_return = {
"generated_text": generated_response,
"conversation": {
"generated_responses": ["It's Die Hard for sure.", generated_response],
"past_user_inputs": [
"Which movie is the best?",
"Can you explain why?",
],
},
"choices": [
{
"message": {
"content": generated_response,
}
}
]
}

with patch.object(
hf_inference_api._sync_client,
"conversational",
"chat_completion",
return_value=conversational_return,
) as mock_conversational:
response = hf_inference_api.chat(messages=messages)

assert response.message.role == MessageRole.ASSISTANT
assert response.message.content == generated_response
mock_conversational.assert_called_once_with(
text="Can you explain why?",
past_user_inputs=["Which movie is the best?"],
generated_responses=["It's Die Hard for sure."],
messages=[{"role": m.role.value, "content": m.content} for m in messages],
model=STUB_MODEL_NAME,
)

def test_chat_text_generation(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ license = "MIT"
maintainers = ["jerryjliu"]
name = "llama-index-readers-huggingface-fs"
readme = "README.md"
version = "0.3.0"
version = "0.3.1"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
huggingface-hub = "^0.20.3"
huggingface-hub = ">=0.20.3"
pandas = "*"
llama-index-core = "^0.12.0"

Expand Down

0 comments on commit 1209ccc

Please sign in to comment.