From 36cb7df9092ac2ece5b29512fc8a46146a6a188a Mon Sep 17 00:00:00 2001 From: jrj5423 <1565248524@qq.com> Date: Fri, 3 Jan 2025 08:05:23 +0800 Subject: [PATCH 1/5] vllm and xinference reranker --- backend/app/rag/chat_config.py | 16 +++ .../rag/node_postprocessor/vllm_reranker.py | 98 +++++++++++++++++++ .../node_postprocessor/xinference_reranker.py | 98 +++++++++++++++++++ backend/app/rag/reranker_model_option.py | 32 ++++++ backend/app/types.py | 2 + 5 files changed, 246 insertions(+) create mode 100644 backend/app/rag/node_postprocessor/vllm_reranker.py create mode 100644 backend/app/rag/node_postprocessor/xinference_reranker.py diff --git a/backend/app/rag/chat_config.py b/backend/app/rag/chat_config.py index 3646a54e3..62dbf9d5a 100644 --- a/backend/app/rag/chat_config.py +++ b/backend/app/rag/chat_config.py @@ -30,6 +30,8 @@ from app.rag.node_postprocessor.metadata_post_filter import MetadataFilters from app.rag.node_postprocessor.baisheng_reranker import BaishengRerank from app.rag.node_postprocessor.local_reranker import LocalRerank +from app.rag.node_postprocessor.vllm_reranker import VLLMRerank +from app.rag.node_postprocessor.xinference_reranker import XinferenceRerank from app.rag.embeddings.local_embedding import LocalEmbedding from app.repositories import chat_engine_repo, knowledge_base_repo from app.repositories.embedding_model import embed_model_repo @@ -427,6 +429,20 @@ def get_reranker_model( top_n=top_n, **config, ) + case RerankerProvider.VLLM: + return VLLMRerank( + model=model, + top_n=top_n, + api_key=credentials, + **config, + ) + case RerankerProvider.XINFERENCE: + return XinferenceRerank( + model=model, + top_n=top_n, + api_key=credentials, + **config, + ) case _: raise ValueError(f"Got unknown reranker provider: {provider}") diff --git a/backend/app/rag/node_postprocessor/vllm_reranker.py b/backend/app/rag/node_postprocessor/vllm_reranker.py new file mode 100644 index 000000000..80245957a --- /dev/null +++ b/backend/app/rag/node_postprocessor/vllm_reranker.py @@ -0,0 +1,98 @@ +from typing import Any, List, Optional +import requests + +from llama_index.core.bridge.pydantic import Field, PrivateAttr +from llama_index.core.callbacks import CBEventType, EventPayload +from llama_index.core.instrumentation import get_dispatcher +from llama_index.core.instrumentation.events.rerank import ( + ReRankEndEvent, + ReRankStartEvent, +) +from llama_index.core.postprocessor.types import BaseNodePostprocessor +from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle + +dispatcher = get_dispatcher(__name__) + + +class VLLMRerank(BaseNodePostprocessor): + api_key: str = Field(default="", description="API key.") + api_url: str = Field(default="", description="API url.") + model: str = Field(default="", description="The model to use when calling API.") + + top_n: int = Field(description="Top N nodes to return.") + + _session: Any = PrivateAttr() + + def __init__( + self, + top_n: int = 2, + model: str = "BAAI/bge-reranker-v2-m3", + api_key: str = "", + api_url: str = "http://localhost:8000/v1/score", + ): + super().__init__(top_n=top_n, model=model) + self.api_key = api_key + self.api_url = api_url + self.model = model + self._session = requests.Session() + + @classmethod + def class_name(cls) -> str: + return "OpenAILikeRerank" + + def _postprocess_nodes( + self, + nodes: List[NodeWithScore], + query_bundle: Optional[QueryBundle] = None, + ) -> List[NodeWithScore]: + dispatcher.event( + ReRankStartEvent( + query=query_bundle, + nodes=nodes, + top_n=self.top_n, + model_name=self.model, + ) + ) + + if query_bundle is None: + raise ValueError("Missing query bundle in extra info.") + if len(nodes) == 0: + return [] + + with self.callback_manager.event( + CBEventType.RERANKING, + payload={ + EventPayload.NODES: nodes, + EventPayload.MODEL_NAME: self.model, + EventPayload.QUERY_STR: query_bundle.query_str, + EventPayload.TOP_K: self.top_n, + }, + ) as event: + texts = [ + node.node.get_content(metadata_mode=MetadataMode.EMBED) + for node in nodes + ] + resp = self._session.post( # type: ignore + self.api_url, + json={ + "text_1": query_bundle.query_str, + "model": self.model, + "text_2": texts, + }, + ).json() + if "data" not in resp: + raise RuntimeError(f"Got error from reranker: {resp}") + + results = zip(range(len(nodes)), resp["data"]) + results = sorted(results, key=lambda x: x[1]["score"], reverse=True)[: self.top_n] + + new_nodes = [] + for result in results: + new_node_with_score = NodeWithScore( + node=nodes[result[0]].node, score=result[1]["score"] + ) + new_nodes.append(new_node_with_score) + event.on_end(payload={EventPayload.NODES: new_nodes}) + + dispatcher.event(ReRankEndEvent(nodes=new_nodes)) + return new_nodes diff --git a/backend/app/rag/node_postprocessor/xinference_reranker.py b/backend/app/rag/node_postprocessor/xinference_reranker.py new file mode 100644 index 000000000..907b7a66a --- /dev/null +++ b/backend/app/rag/node_postprocessor/xinference_reranker.py @@ -0,0 +1,98 @@ +from typing import Any, List, Optional +import requests + +from llama_index.core.bridge.pydantic import Field, PrivateAttr +from llama_index.core.callbacks import CBEventType, EventPayload +from llama_index.core.instrumentation import get_dispatcher +from llama_index.core.instrumentation.events.rerank import ( + ReRankEndEvent, + ReRankStartEvent, +) +from llama_index.core.postprocessor.types import BaseNodePostprocessor +from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle + +dispatcher = get_dispatcher(__name__) + + +class XinferenceRerank(BaseNodePostprocessor): + api_key: str = Field(default="", description="API key.") + api_url: str = Field(default="", description="API url.") + model: str = Field(default="", description="The model to use when calling API.") + + top_n: int = Field(description="Top N nodes to return.") + + _session: Any = PrivateAttr() + + def __init__( + self, + top_n: int = 2, + model: str = "bge-reranker-v2-m3", + api_key: str = "", + api_url: str = "http://localhost:9997/v1/rerank", + ): + super().__init__(top_n=top_n, model=model) + self.api_key = api_key + self.api_url = api_url + self.model = model + self._session = requests.Session() + + @classmethod + def class_name(cls) -> str: + return "OpenAILikeRerank" + + def _postprocess_nodes( + self, + nodes: List[NodeWithScore], + query_bundle: Optional[QueryBundle] = None, + ) -> List[NodeWithScore]: + dispatcher.event( + ReRankStartEvent( + query=query_bundle, + nodes=nodes, + top_n=self.top_n, + model_name=self.model, + ) + ) + + if query_bundle is None: + raise ValueError("Missing query bundle in extra info.") + if len(nodes) == 0: + return [] + + with self.callback_manager.event( + CBEventType.RERANKING, + payload={ + EventPayload.NODES: nodes, + EventPayload.MODEL_NAME: self.model, + EventPayload.QUERY_STR: query_bundle.query_str, + EventPayload.TOP_K: self.top_n, + }, + ) as event: + texts = [ + node.node.get_content(metadata_mode=MetadataMode.EMBED) + for node in nodes + ] + resp = self._session.post( # type: ignore + self.api_url, + json={ + "query": query_bundle.query_str, + "model": self.model, + "documents": texts, + }, + ).json() + if "data" not in resp: + raise RuntimeError(f"Got error from reranker: {resp}") + + results = zip(range(len(nodes)), resp["results"]) + results = sorted(results, key=lambda x: x[1]["relevance_score"], reverse=True)[: self.top_n] + + new_nodes = [] + for result in results: + new_node_with_score = NodeWithScore( + node=nodes[result[0]].node, score=result[1]["relevance_score"] + ) + new_nodes.append(new_node_with_score) + event.on_end(payload={EventPayload.NODES: new_nodes}) + + dispatcher.event(ReRankEndEvent(nodes=new_nodes)) + return new_nodes diff --git a/backend/app/rag/reranker_model_option.py b/backend/app/rag/reranker_model_option.py index d6444a4ee..c648b8f54 100644 --- a/backend/app/rag/reranker_model_option.py +++ b/backend/app/rag/reranker_model_option.py @@ -77,4 +77,36 @@ class RerankerModelOption(BaseModel): credentials_type="str", default_credentials="dummy", ), + RerankerModelOption( + provider=RerankerProvider.VLLM, + provider_display_name="vLLM", + provider_description="vLLM is a fast and easy-to-use library for LLM inference and serving.", + default_reranker_model="BAAI/bge-reranker-v2-m3", + reranker_model_description="", + default_top_n=10, + default_config={ + "api_url": "http://localhost:8000/v1/score", + }, + config_description="api_url is the url of the vLLM server, ensure it can be accessed from this server", + credentials_display_name="vLLM API Key", + credentials_description="If you don't need an API key to access your rerank model, set a dummy string here is ok.", + credentials_type="str", + default_credentials="dummy", + ), + RerankerModelOption( + provider=RerankerProvider.XINFERENCE, + provider_display_name="Xinference Reranker", + provider_description="Xorbits Inference (Xinference) is an open-source platform to streamline the operation and integration of a wide array of AI models.", + default_reranker_model="bge-reranker-v2-m3", + reranker_model_description="", + default_top_n=10, + default_config={ + "api_url": "http://localhost:9997/v1/rerank", + }, + config_description="api_url is the url of the Xinference server, ensure it can be accessed from this server", + credentials_display_name="Xinference API Key", + credentials_description="If you don't need an API key to access your rerank model, set a dummy string here is ok.", + credentials_type="str", + default_credentials="dummy", + ), ] diff --git a/backend/app/types.py b/backend/app/types.py index 27ffe0225..fb9780793 100644 --- a/backend/app/types.py +++ b/backend/app/types.py @@ -27,6 +27,8 @@ class RerankerProvider(str, enum.Enum): COHERE = "cohere" BAISHENG = "baisheng" LOCAL = "local" + VLLM = "vllm" + XINFERENCE = "xinference" class MimeTypes(str, enum.Enum): From 56e9b5cbe3817c2b9ddef8aafe2e58736ec67941 Mon Sep 17 00:00:00 2001 From: jrj5423 <1565248524@qq.com> Date: Tue, 7 Jan 2025 03:44:44 +0800 Subject: [PATCH 2/5] xinference and bedrock reranker using llama_index --- backend/app/rag/chat_config.py | 15 ++- .../rag/node_postprocessor/vllm_reranker.py | 3 - .../node_postprocessor/xinference_reranker.py | 98 ------------------- backend/app/rag/reranker_model_option.py | 21 +++- backend/app/types.py | 1 + backend/pyproject.toml | 12 ++- backend/requirements-dev.lock | 96 ++++++++---------- backend/requirements.lock | 96 ++++++++---------- 8 files changed, 122 insertions(+), 220 deletions(-) delete mode 100644 backend/app/rag/node_postprocessor/xinference_reranker.py diff --git a/backend/app/rag/chat_config.py b/backend/app/rag/chat_config.py index 62dbf9d5a..796f2d0f7 100644 --- a/backend/app/rag/chat_config.py +++ b/backend/app/rag/chat_config.py @@ -21,6 +21,8 @@ from llama_index.embeddings.ollama import OllamaEmbedding from llama_index.postprocessor.jinaai_rerank import JinaRerank from llama_index.postprocessor.cohere_rerank import CohereRerank +from llama_index.postprocessor.xinference_rerank import XinferenceRerank +from llama_index.postprocessor.bedrock_rerank import AWSBedrockRerank from sqlmodel import Session from google.oauth2 import service_account from google.auth.transport.requests import Request @@ -31,7 +33,6 @@ from app.rag.node_postprocessor.baisheng_reranker import BaishengRerank from app.rag.node_postprocessor.local_reranker import LocalRerank from app.rag.node_postprocessor.vllm_reranker import VLLMRerank -from app.rag.node_postprocessor.xinference_reranker import XinferenceRerank from app.rag.embeddings.local_embedding import LocalEmbedding from app.repositories import chat_engine_repo, knowledge_base_repo from app.repositories.embedding_model import embed_model_repo @@ -433,15 +434,21 @@ def get_reranker_model( return VLLMRerank( model=model, top_n=top_n, - api_key=credentials, **config, ) case RerankerProvider.XINFERENCE: return XinferenceRerank( model=model, top_n=top_n, - api_key=credentials, - **config, + base_url=config.get("api_url") + ) + case RerankerProvider.BEDROCK: + return AWSBedrockRerank( + rerank_model_name=model, + top_n=top_n, + aws_access_key_id=credentials["aws_access_key_id"], + aws_secret_access_key=credentials["aws_secret_access_key"], + region_name=credentials["aws_region_name"], ) case _: raise ValueError(f"Got unknown reranker provider: {provider}") diff --git a/backend/app/rag/node_postprocessor/vllm_reranker.py b/backend/app/rag/node_postprocessor/vllm_reranker.py index 80245957a..0e4c1045b 100644 --- a/backend/app/rag/node_postprocessor/vllm_reranker.py +++ b/backend/app/rag/node_postprocessor/vllm_reranker.py @@ -15,7 +15,6 @@ class VLLMRerank(BaseNodePostprocessor): - api_key: str = Field(default="", description="API key.") api_url: str = Field(default="", description="API url.") model: str = Field(default="", description="The model to use when calling API.") @@ -27,11 +26,9 @@ def __init__( self, top_n: int = 2, model: str = "BAAI/bge-reranker-v2-m3", - api_key: str = "", api_url: str = "http://localhost:8000/v1/score", ): super().__init__(top_n=top_n, model=model) - self.api_key = api_key self.api_url = api_url self.model = model self._session = requests.Session() diff --git a/backend/app/rag/node_postprocessor/xinference_reranker.py b/backend/app/rag/node_postprocessor/xinference_reranker.py deleted file mode 100644 index 907b7a66a..000000000 --- a/backend/app/rag/node_postprocessor/xinference_reranker.py +++ /dev/null @@ -1,98 +0,0 @@ -from typing import Any, List, Optional -import requests - -from llama_index.core.bridge.pydantic import Field, PrivateAttr -from llama_index.core.callbacks import CBEventType, EventPayload -from llama_index.core.instrumentation import get_dispatcher -from llama_index.core.instrumentation.events.rerank import ( - ReRankEndEvent, - ReRankStartEvent, -) -from llama_index.core.postprocessor.types import BaseNodePostprocessor -from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle - -dispatcher = get_dispatcher(__name__) - - -class XinferenceRerank(BaseNodePostprocessor): - api_key: str = Field(default="", description="API key.") - api_url: str = Field(default="", description="API url.") - model: str = Field(default="", description="The model to use when calling API.") - - top_n: int = Field(description="Top N nodes to return.") - - _session: Any = PrivateAttr() - - def __init__( - self, - top_n: int = 2, - model: str = "bge-reranker-v2-m3", - api_key: str = "", - api_url: str = "http://localhost:9997/v1/rerank", - ): - super().__init__(top_n=top_n, model=model) - self.api_key = api_key - self.api_url = api_url - self.model = model - self._session = requests.Session() - - @classmethod - def class_name(cls) -> str: - return "OpenAILikeRerank" - - def _postprocess_nodes( - self, - nodes: List[NodeWithScore], - query_bundle: Optional[QueryBundle] = None, - ) -> List[NodeWithScore]: - dispatcher.event( - ReRankStartEvent( - query=query_bundle, - nodes=nodes, - top_n=self.top_n, - model_name=self.model, - ) - ) - - if query_bundle is None: - raise ValueError("Missing query bundle in extra info.") - if len(nodes) == 0: - return [] - - with self.callback_manager.event( - CBEventType.RERANKING, - payload={ - EventPayload.NODES: nodes, - EventPayload.MODEL_NAME: self.model, - EventPayload.QUERY_STR: query_bundle.query_str, - EventPayload.TOP_K: self.top_n, - }, - ) as event: - texts = [ - node.node.get_content(metadata_mode=MetadataMode.EMBED) - for node in nodes - ] - resp = self._session.post( # type: ignore - self.api_url, - json={ - "query": query_bundle.query_str, - "model": self.model, - "documents": texts, - }, - ).json() - if "data" not in resp: - raise RuntimeError(f"Got error from reranker: {resp}") - - results = zip(range(len(nodes)), resp["results"]) - results = sorted(results, key=lambda x: x[1]["relevance_score"], reverse=True)[: self.top_n] - - new_nodes = [] - for result in results: - new_node_with_score = NodeWithScore( - node=nodes[result[0]].node, score=result[1]["relevance_score"] - ) - new_nodes.append(new_node_with_score) - event.on_end(payload={EventPayload.NODES: new_nodes}) - - dispatcher.event(ReRankEndEvent(nodes=new_nodes)) - return new_nodes diff --git a/backend/app/rag/reranker_model_option.py b/backend/app/rag/reranker_model_option.py index c648b8f54..f3c68bc96 100644 --- a/backend/app/rag/reranker_model_option.py +++ b/backend/app/rag/reranker_model_option.py @@ -89,7 +89,7 @@ class RerankerModelOption(BaseModel): }, config_description="api_url is the url of the vLLM server, ensure it can be accessed from this server", credentials_display_name="vLLM API Key", - credentials_description="If you don't need an API key to access your rerank model, set a dummy string here is ok.", + credentials_description="vLLM doesn't require an API key, set a dummy string here is ok", credentials_type="str", default_credentials="dummy", ), @@ -105,8 +105,25 @@ class RerankerModelOption(BaseModel): }, config_description="api_url is the url of the Xinference server, ensure it can be accessed from this server", credentials_display_name="Xinference API Key", - credentials_description="If you don't need an API key to access your rerank model, set a dummy string here is ok.", + credentials_description="Xinference doesn't require an API key, set a dummy string here is ok", credentials_type="str", default_credentials="dummy", ), + RerankerModelOption( + provider=RerankerProvider.BEDROCK, + provider_display_name="Bedrock Reranker", + provider_description="Amazon Bedrock is a fully managed foundation models service.", + provider_url="https://docs.aws.amazon.com/bedrock/", + default_reranker_model="amazon.rerank-v1:0", + reranker_model_description="Find more models in https://docs.aws.amazon.com/bedrock/latest/userguide/foundation-models-reference.html.", + default_top_n=10, + credentials_display_name="AWS Bedrock Credentials JSON", + credentials_description="The JSON Object of AWS Credentials, refer to https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html#cli-configure-files-global", + credentials_type="dict", + default_credentials={ + "aws_access_key_id": "****", + "aws_secret_access_key": "****", + "aws_region_name": "us-west-2", + }, + ) ] diff --git a/backend/app/types.py b/backend/app/types.py index fb9780793..73ef5e06e 100644 --- a/backend/app/types.py +++ b/backend/app/types.py @@ -29,6 +29,7 @@ class RerankerProvider(str, enum.Enum): LOCAL = "local" VLLM = "vllm" XINFERENCE = "xinference" + BEDROCK = "bedrock" class MimeTypes(str, enum.Enum): diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 3cb92fcca..2ba6af7b3 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -46,10 +46,10 @@ dependencies = [ "llama-index-postprocessor-cohere-rerank>=0.1.7", "llama-index-llms-bedrock>=0.1.12", "pypdf>=4.3.1", - "llama-index-llms-ollama<=0.3.0", - "llama-index-embeddings-ollama<=0.3.0", - "llama-index-embeddings-jinaai<=0.3.0", - "llama-index-embeddings-cohere<=0.3.0", + "llama-index-llms-ollama>=0.5.0", + "llama-index-embeddings-ollama>=0.5.0", + "llama-index-embeddings-jinaai>=0.4.0", + "llama-index-embeddings-cohere>=0.4.0", "python-docx>=1.1.2", "python-pptx>=1.0.2", "colorama>=0.4.6", @@ -58,7 +58,9 @@ dependencies = [ "retry>=0.9.2", "langchain-openai>=0.2.9", "ragas>=0.2.6", - "llama-index-embeddings-bedrock<=0.3.0", + "llama-index-embeddings-bedrock>=0.4.0", + "llama-index-postprocessor-xinference-rerank>=0.2.0", + "llama-index-postprocessor-bedrock-rerank>=0.3.0", ] readme = "README.md" requires-python = ">= 3.8" diff --git a/backend/requirements-dev.lock b/backend/requirements-dev.lock index 68b6f3f4e..f2a9957a4 100644 --- a/backend/requirements-dev.lock +++ b/backend/requirements-dev.lock @@ -15,7 +15,6 @@ aiohttp==3.9.5 # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy aiosignal==1.3.1 # via aiohttp alembic==1.13.1 @@ -24,7 +23,7 @@ amqp==5.2.0 # via kombu annotated-types==0.7.0 # via pydantic -anthropic==0.28.1 +anthropic==0.42.0 # via llama-index-llms-anthropic anyio==4.4.0 # via anthropic @@ -52,11 +51,14 @@ beautifulsoup4==4.12.3 # via markdownify billiard==4.2.0 # via celery -boto3==1.34.156 +boto3==1.35.92 + # via anthropic # via cohere # via llama-index-embeddings-bedrock # via llama-index-llms-bedrock -botocore==1.34.156 + # via llama-index-postprocessor-bedrock-rerank +botocore==1.35.92 + # via anthropic # via boto3 # via s3transfer cachetools==5.3.3 @@ -103,7 +105,6 @@ cryptography==42.0.8 dataclasses-json==0.6.7 # via langchain-community # via llama-index-core - # via llama-index-legacy datasets==2.14.7 # via dspy-ai # via ragas @@ -113,7 +114,6 @@ deepdiff==7.0.1 deepeval==0.21.73 deprecated==1.2.14 # via llama-index-core - # via llama-index-legacy # via opentelemetry-api # via opentelemetry-exporter-otlp-proto-grpc dill==0.3.7 @@ -121,7 +121,6 @@ dill==0.3.7 # via multiprocess dirtyjson==1.0.8 # via llama-index-core - # via llama-index-legacy distro==1.9.0 # via anthropic # via openai @@ -150,6 +149,8 @@ fastavro==1.9.5 filelock==3.15.1 # via huggingface-hub # via transformers +filetype==1.2.0 + # via llama-index-core flower==2.0.1 frozenlist==1.4.1 # via aiohttp @@ -158,7 +159,6 @@ fsspec==2023.10.0 # via datasets # via huggingface-hub # via llama-index-core - # via llama-index-legacy google-ai-generativelanguage==0.6.4 # via google-generativeai google-api-core==2.19.1 @@ -243,7 +243,6 @@ httpx==0.27.0 # via langsmith # via llama-cloud # via llama-index-core - # via llama-index-legacy # via ollama # via openai httpx-oauth==0.14.1 @@ -307,16 +306,15 @@ langsmith==0.1.143 # via langchain # via langchain-community # via langchain-core -llama-cloud==0.0.15 +llama-cloud==0.1.6 # via llama-index-indices-managed-llama-cloud -llama-index==0.11.10 -llama-index-agent-openai==0.3.1 +llama-index==0.12.9 +llama-index-agent-openai==0.4.1 # via llama-index - # via llama-index-llms-openai # via llama-index-program-openai -llama-index-cli==0.3.1 +llama-index-cli==0.4.0 # via llama-index -llama-index-core==0.11.10 +llama-index-core==0.12.9 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -333,30 +331,30 @@ llama-index-core==0.11.10 # via llama-index-llms-openai # via llama-index-llms-openai-like # via llama-index-multi-modal-llms-openai + # via llama-index-postprocessor-bedrock-rerank # via llama-index-postprocessor-cohere-rerank # via llama-index-postprocessor-jinaai-rerank + # via llama-index-postprocessor-xinference-rerank # via llama-index-program-openai # via llama-index-question-gen-openai # via llama-index-readers-file # via llama-index-readers-llama-parse # via llama-parse -llama-index-embeddings-bedrock==0.3.0 -llama-index-embeddings-cohere==0.2.0 -llama-index-embeddings-jinaai==0.3.0 -llama-index-embeddings-ollama==0.3.0 -llama-index-embeddings-openai==0.2.4 +llama-index-embeddings-bedrock==0.4.0 +llama-index-embeddings-cohere==0.4.0 +llama-index-embeddings-jinaai==0.4.0 +llama-index-embeddings-ollama==0.5.0 +llama-index-embeddings-openai==0.3.1 # via llama-index # via llama-index-cli -llama-index-indices-managed-llama-cloud==0.3.0 +llama-index-indices-managed-llama-cloud==0.6.3 # via llama-index -llama-index-legacy==0.9.48 - # via llama-index -llama-index-llms-anthropic==0.2.1 +llama-index-llms-anthropic==0.6.3 # via llama-index-llms-bedrock -llama-index-llms-bedrock==0.2.1 -llama-index-llms-gemini==0.3.4 -llama-index-llms-ollama==0.3.0 -llama-index-llms-openai==0.2.5 +llama-index-llms-bedrock==0.3.3 +llama-index-llms-gemini==0.4.2 +llama-index-llms-ollama==0.5.0 +llama-index-llms-openai==0.3.12 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -364,19 +362,21 @@ llama-index-llms-openai==0.2.5 # via llama-index-multi-modal-llms-openai # via llama-index-program-openai # via llama-index-question-gen-openai -llama-index-llms-openai-like==0.2.0 -llama-index-multi-modal-llms-openai==0.2.0 +llama-index-llms-openai-like==0.3.3 +llama-index-multi-modal-llms-openai==0.4.2 # via llama-index -llama-index-postprocessor-cohere-rerank==0.2.0 -llama-index-postprocessor-jinaai-rerank==0.2.0 -llama-index-program-openai==0.2.0 +llama-index-postprocessor-bedrock-rerank==0.3.0 +llama-index-postprocessor-cohere-rerank==0.3.0 +llama-index-postprocessor-jinaai-rerank==0.3.0 +llama-index-postprocessor-xinference-rerank==0.2.0 +llama-index-program-openai==0.3.1 # via llama-index # via llama-index-question-gen-openai -llama-index-question-gen-openai==0.2.0 +llama-index-question-gen-openai==0.3.0 # via llama-index -llama-index-readers-file==0.2.1 +llama-index-readers-file==0.4.2 # via llama-index -llama-index-readers-llama-parse==0.3.0 +llama-index-readers-llama-parse==0.4.0 # via llama-index llama-parse==0.5.5 # via llama-index-readers-llama-parse @@ -406,21 +406,17 @@ mypy-extensions==1.0.0 # via typing-inspect nest-asyncio==1.6.0 # via llama-index-core - # via llama-index-legacy # via ragas networkx==3.3 # via llama-index-core - # via llama-index-legacy nltk==3.9.1 # via llama-index # via llama-index-core - # via llama-index-legacy numpy==1.26.4 # via datasets # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy # via optuna # via pandas # via pyarrow @@ -428,15 +424,14 @@ numpy==1.26.4 # via shapely # via tidb-vector # via transformers -ollama==0.3.1 +ollama==0.4.5 # via llama-index-embeddings-ollama # via llama-index-llms-ollama -openai==1.54.5 +openai==1.59.3 # via dspy-ai # via langchain-openai # via llama-index-agent-openai # via llama-index-embeddings-openai - # via llama-index-legacy # via llama-index-llms-openai # via ragas openpyxl==3.1.5 @@ -477,7 +472,6 @@ packaging==23.2 pandas==2.2.2 # via datasets # via dspy-ai - # via llama-index-legacy # via llama-index-readers-file parameterized==0.9.0 # via cohere @@ -526,7 +520,7 @@ pyasn1-modules==0.4.0 # via google-auth pycparser==2.22 # via cffi -pydantic==2.8.2 +pydantic==2.10.4 # via anthropic # via cohere # via deepeval @@ -541,11 +535,12 @@ pydantic==2.8.2 # via langsmith # via llama-cloud # via llama-index-core + # via ollama # via openai # via pydantic-settings # via ragas # via sqlmodel -pydantic-core==2.20.1 +pydantic-core==2.27.2 # via pydantic pydantic-settings==2.6.1 # via langchain-community @@ -558,7 +553,7 @@ pyjwt==2.8.0 pymysql==1.1.1 pyparsing==3.1.2 # via httplib2 -pypdf==4.3.1 +pypdf==5.1.0 # via llama-index-readers-file pysbd==0.3.4 # via ragas @@ -619,7 +614,6 @@ requests==2.32.3 # via langchain-community # via langsmith # via llama-index-core - # via llama-index-legacy # via requests-toolbelt # via tiktoken # via transformers @@ -656,7 +650,6 @@ sqlalchemy==2.0.30 # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy # via optuna # via sqlmodel sqlmodel==0.0.19 @@ -675,15 +668,12 @@ tenacity==8.4.2 # via langchain-community # via langchain-core # via llama-index-core - # via llama-index-legacy tidb-vector==0.0.14 tiktoken==0.7.0 # via langchain-openai # via llama-index-core - # via llama-index-legacy # via ragas tokenizers==0.19.1 - # via anthropic # via cohere # via transformers tornado==6.4.1 @@ -716,7 +706,6 @@ typing-extensions==4.12.2 # via huggingface-hub # via langchain-core # via llama-index-core - # via llama-index-legacy # via openai # via opentelemetry-sdk # via pydantic @@ -730,7 +719,6 @@ typing-extensions==4.12.2 typing-inspect==0.9.0 # via dataclasses-json # via llama-index-core - # via llama-index-legacy tzdata==2024.1 # via celery # via pandas diff --git a/backend/requirements.lock b/backend/requirements.lock index 68b6f3f4e..f2a9957a4 100644 --- a/backend/requirements.lock +++ b/backend/requirements.lock @@ -15,7 +15,6 @@ aiohttp==3.9.5 # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy aiosignal==1.3.1 # via aiohttp alembic==1.13.1 @@ -24,7 +23,7 @@ amqp==5.2.0 # via kombu annotated-types==0.7.0 # via pydantic -anthropic==0.28.1 +anthropic==0.42.0 # via llama-index-llms-anthropic anyio==4.4.0 # via anthropic @@ -52,11 +51,14 @@ beautifulsoup4==4.12.3 # via markdownify billiard==4.2.0 # via celery -boto3==1.34.156 +boto3==1.35.92 + # via anthropic # via cohere # via llama-index-embeddings-bedrock # via llama-index-llms-bedrock -botocore==1.34.156 + # via llama-index-postprocessor-bedrock-rerank +botocore==1.35.92 + # via anthropic # via boto3 # via s3transfer cachetools==5.3.3 @@ -103,7 +105,6 @@ cryptography==42.0.8 dataclasses-json==0.6.7 # via langchain-community # via llama-index-core - # via llama-index-legacy datasets==2.14.7 # via dspy-ai # via ragas @@ -113,7 +114,6 @@ deepdiff==7.0.1 deepeval==0.21.73 deprecated==1.2.14 # via llama-index-core - # via llama-index-legacy # via opentelemetry-api # via opentelemetry-exporter-otlp-proto-grpc dill==0.3.7 @@ -121,7 +121,6 @@ dill==0.3.7 # via multiprocess dirtyjson==1.0.8 # via llama-index-core - # via llama-index-legacy distro==1.9.0 # via anthropic # via openai @@ -150,6 +149,8 @@ fastavro==1.9.5 filelock==3.15.1 # via huggingface-hub # via transformers +filetype==1.2.0 + # via llama-index-core flower==2.0.1 frozenlist==1.4.1 # via aiohttp @@ -158,7 +159,6 @@ fsspec==2023.10.0 # via datasets # via huggingface-hub # via llama-index-core - # via llama-index-legacy google-ai-generativelanguage==0.6.4 # via google-generativeai google-api-core==2.19.1 @@ -243,7 +243,6 @@ httpx==0.27.0 # via langsmith # via llama-cloud # via llama-index-core - # via llama-index-legacy # via ollama # via openai httpx-oauth==0.14.1 @@ -307,16 +306,15 @@ langsmith==0.1.143 # via langchain # via langchain-community # via langchain-core -llama-cloud==0.0.15 +llama-cloud==0.1.6 # via llama-index-indices-managed-llama-cloud -llama-index==0.11.10 -llama-index-agent-openai==0.3.1 +llama-index==0.12.9 +llama-index-agent-openai==0.4.1 # via llama-index - # via llama-index-llms-openai # via llama-index-program-openai -llama-index-cli==0.3.1 +llama-index-cli==0.4.0 # via llama-index -llama-index-core==0.11.10 +llama-index-core==0.12.9 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -333,30 +331,30 @@ llama-index-core==0.11.10 # via llama-index-llms-openai # via llama-index-llms-openai-like # via llama-index-multi-modal-llms-openai + # via llama-index-postprocessor-bedrock-rerank # via llama-index-postprocessor-cohere-rerank # via llama-index-postprocessor-jinaai-rerank + # via llama-index-postprocessor-xinference-rerank # via llama-index-program-openai # via llama-index-question-gen-openai # via llama-index-readers-file # via llama-index-readers-llama-parse # via llama-parse -llama-index-embeddings-bedrock==0.3.0 -llama-index-embeddings-cohere==0.2.0 -llama-index-embeddings-jinaai==0.3.0 -llama-index-embeddings-ollama==0.3.0 -llama-index-embeddings-openai==0.2.4 +llama-index-embeddings-bedrock==0.4.0 +llama-index-embeddings-cohere==0.4.0 +llama-index-embeddings-jinaai==0.4.0 +llama-index-embeddings-ollama==0.5.0 +llama-index-embeddings-openai==0.3.1 # via llama-index # via llama-index-cli -llama-index-indices-managed-llama-cloud==0.3.0 +llama-index-indices-managed-llama-cloud==0.6.3 # via llama-index -llama-index-legacy==0.9.48 - # via llama-index -llama-index-llms-anthropic==0.2.1 +llama-index-llms-anthropic==0.6.3 # via llama-index-llms-bedrock -llama-index-llms-bedrock==0.2.1 -llama-index-llms-gemini==0.3.4 -llama-index-llms-ollama==0.3.0 -llama-index-llms-openai==0.2.5 +llama-index-llms-bedrock==0.3.3 +llama-index-llms-gemini==0.4.2 +llama-index-llms-ollama==0.5.0 +llama-index-llms-openai==0.3.12 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -364,19 +362,21 @@ llama-index-llms-openai==0.2.5 # via llama-index-multi-modal-llms-openai # via llama-index-program-openai # via llama-index-question-gen-openai -llama-index-llms-openai-like==0.2.0 -llama-index-multi-modal-llms-openai==0.2.0 +llama-index-llms-openai-like==0.3.3 +llama-index-multi-modal-llms-openai==0.4.2 # via llama-index -llama-index-postprocessor-cohere-rerank==0.2.0 -llama-index-postprocessor-jinaai-rerank==0.2.0 -llama-index-program-openai==0.2.0 +llama-index-postprocessor-bedrock-rerank==0.3.0 +llama-index-postprocessor-cohere-rerank==0.3.0 +llama-index-postprocessor-jinaai-rerank==0.3.0 +llama-index-postprocessor-xinference-rerank==0.2.0 +llama-index-program-openai==0.3.1 # via llama-index # via llama-index-question-gen-openai -llama-index-question-gen-openai==0.2.0 +llama-index-question-gen-openai==0.3.0 # via llama-index -llama-index-readers-file==0.2.1 +llama-index-readers-file==0.4.2 # via llama-index -llama-index-readers-llama-parse==0.3.0 +llama-index-readers-llama-parse==0.4.0 # via llama-index llama-parse==0.5.5 # via llama-index-readers-llama-parse @@ -406,21 +406,17 @@ mypy-extensions==1.0.0 # via typing-inspect nest-asyncio==1.6.0 # via llama-index-core - # via llama-index-legacy # via ragas networkx==3.3 # via llama-index-core - # via llama-index-legacy nltk==3.9.1 # via llama-index # via llama-index-core - # via llama-index-legacy numpy==1.26.4 # via datasets # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy # via optuna # via pandas # via pyarrow @@ -428,15 +424,14 @@ numpy==1.26.4 # via shapely # via tidb-vector # via transformers -ollama==0.3.1 +ollama==0.4.5 # via llama-index-embeddings-ollama # via llama-index-llms-ollama -openai==1.54.5 +openai==1.59.3 # via dspy-ai # via langchain-openai # via llama-index-agent-openai # via llama-index-embeddings-openai - # via llama-index-legacy # via llama-index-llms-openai # via ragas openpyxl==3.1.5 @@ -477,7 +472,6 @@ packaging==23.2 pandas==2.2.2 # via datasets # via dspy-ai - # via llama-index-legacy # via llama-index-readers-file parameterized==0.9.0 # via cohere @@ -526,7 +520,7 @@ pyasn1-modules==0.4.0 # via google-auth pycparser==2.22 # via cffi -pydantic==2.8.2 +pydantic==2.10.4 # via anthropic # via cohere # via deepeval @@ -541,11 +535,12 @@ pydantic==2.8.2 # via langsmith # via llama-cloud # via llama-index-core + # via ollama # via openai # via pydantic-settings # via ragas # via sqlmodel -pydantic-core==2.20.1 +pydantic-core==2.27.2 # via pydantic pydantic-settings==2.6.1 # via langchain-community @@ -558,7 +553,7 @@ pyjwt==2.8.0 pymysql==1.1.1 pyparsing==3.1.2 # via httplib2 -pypdf==4.3.1 +pypdf==5.1.0 # via llama-index-readers-file pysbd==0.3.4 # via ragas @@ -619,7 +614,6 @@ requests==2.32.3 # via langchain-community # via langsmith # via llama-index-core - # via llama-index-legacy # via requests-toolbelt # via tiktoken # via transformers @@ -656,7 +650,6 @@ sqlalchemy==2.0.30 # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy # via optuna # via sqlmodel sqlmodel==0.0.19 @@ -675,15 +668,12 @@ tenacity==8.4.2 # via langchain-community # via langchain-core # via llama-index-core - # via llama-index-legacy tidb-vector==0.0.14 tiktoken==0.7.0 # via langchain-openai # via llama-index-core - # via llama-index-legacy # via ragas tokenizers==0.19.1 - # via anthropic # via cohere # via transformers tornado==6.4.1 @@ -716,7 +706,6 @@ typing-extensions==4.12.2 # via huggingface-hub # via langchain-core # via llama-index-core - # via llama-index-legacy # via openai # via opentelemetry-sdk # via pydantic @@ -730,7 +719,6 @@ typing-extensions==4.12.2 typing-inspect==0.9.0 # via dataclasses-json # via llama-index-core - # via llama-index-legacy tzdata==2024.1 # via celery # via pandas From 052a836dc035ce8d7f82933ac2e6729696ba7976 Mon Sep 17 00:00:00 2001 From: jrj5423 <1565248524@qq.com> Date: Tue, 7 Jan 2025 04:52:04 +0800 Subject: [PATCH 3/5] docs --- .../app/src/pages/docs/reranker-model.mdx | 47 ++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/frontend/app/src/pages/docs/reranker-model.mdx b/frontend/app/src/pages/docs/reranker-model.mdx index c06c54530..dc93f8b73 100644 --- a/frontend/app/src/pages/docs/reranker-model.mdx +++ b/frontend/app/src/pages/docs/reranker-model.mdx @@ -17,5 +17,48 @@ After logging in with an admin account, you can configure the Reranker Model in Currently AutoFlow supports the following reranker providers: -* [Jina AI Reranker](https://jina.ai/reranker) -* [Cohere](https://cohere.com/rerank) +### JinaAI + +To learn more about JinaAI reranking, please visit [Jina AI Reranker](https://jina.ai/reranker/). + +### Cohere + +To learn more about Cohere reranking, please visit [Cohere Rerank](https://cohere.com/rerank/). + +### vLLM + +To use vLLM rerankers, you need to provide the **api_url** of the reranker API as the following JSON format in **Advanced Settings**: + +```json +{ + "api_url": "{api_base_url}" +} +``` + +To learn more about vLLM reranking, please visit [vLLM Sentence Pair Scoring Models](https://docs.vllm.ai/en/latest/models/supported_models.html#sentence-pair-scoring-task-score). + +### Xorbits Inference (Xinference) + +To use Xinference rerankers, you need to provide the **api_url** of the reranker API as the following JSON format in **Advanced Settings**: + +```json +{ + "api_url": "{api_base_url}" +} +``` + +To learn more about Xinference reranking, please visit [Xinference Rerank](https://inference.readthedocs.io/en/latest/models/model_abilities/rerank.html). + +### Amazon Bedrock + +To use Amazon Bedrock rerankers, you'll need to provide a JSON Object of your AWS Credentials, as described in the [AWS CLI config global settings](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html#cli-configure-files-global): + +```json +{ + "aws_access_key_id": "****", + "aws_secret_access_key": "****", + "aws_region_name": "us-west-2" +} +``` + +To find more reranking models supported by Amazon Bedrock, please visit [Amazon Bedrock Models Reference](https://docs.aws.amazon.com/bedrock/latest/userguide/foundation-models-reference.html). From 494d1bb1cec34eee06c3751d9ab42c893ff8f862 Mon Sep 17 00:00:00 2001 From: jrj5423 <1565248524@qq.com> Date: Thu, 9 Jan 2025 20:49:45 +0800 Subject: [PATCH 4/5] fix default urls and others --- backend/app/rag/chat_config.py | 3 ++- .../rag/node_postprocessor/vllm_reranker.py | 16 +++++++++------- backend/app/rag/reranker_model_option.py | 10 +++++----- backend/pyproject.toml | 10 +++++----- backend/requirements-dev.lock | 18 ++++++++---------- backend/requirements.lock | 17 ++++++++--------- frontend/app/src/pages/docs/reranker-model.mdx | 18 +++++++++++++++++- 7 files changed, 54 insertions(+), 38 deletions(-) diff --git a/backend/app/rag/chat_config.py b/backend/app/rag/chat_config.py index 37b585df7..0a28ea563 100644 --- a/backend/app/rag/chat_config.py +++ b/backend/app/rag/chat_config.py @@ -436,7 +436,7 @@ def get_reranker_model( return XinferenceRerank( model=model, top_n=top_n, - base_url=config.get("api_url") + **config, ) case RerankerProvider.BEDROCK: return AWSBedrockRerank( @@ -445,6 +445,7 @@ def get_reranker_model( aws_access_key_id=credentials["aws_access_key_id"], aws_secret_access_key=credentials["aws_secret_access_key"], region_name=credentials["aws_region_name"], + **config, ) case _: raise ValueError(f"Got unknown reranker provider: {provider}") diff --git a/backend/app/rag/node_postprocessor/vllm_reranker.py b/backend/app/rag/node_postprocessor/vllm_reranker.py index 0e4c1045b..46fecf532 100644 --- a/backend/app/rag/node_postprocessor/vllm_reranker.py +++ b/backend/app/rag/node_postprocessor/vllm_reranker.py @@ -26,7 +26,7 @@ def __init__( self, top_n: int = 2, model: str = "BAAI/bge-reranker-v2-m3", - api_url: str = "http://localhost:8000/v1/score", + api_url: str = "http://localhost:8000", ): super().__init__(top_n=top_n, model=model) self.api_url = api_url @@ -35,7 +35,7 @@ def __init__( @classmethod def class_name(cls) -> str: - return "OpenAILikeRerank" + return "VLLMRerank" def _postprocess_nodes( self, @@ -70,17 +70,19 @@ def _postprocess_nodes( for node in nodes ] resp = self._session.post( # type: ignore - self.api_url, + url=f"{self.api_url}/v1/score", json={ "text_1": query_bundle.query_str, "model": self.model, "text_2": texts, }, - ).json() - if "data" not in resp: - raise RuntimeError(f"Got error from reranker: {resp}") + ) + resp.raise_for_status() + resp_json = resp.json() + if "data" not in resp_json: + raise RuntimeError(f"Got error from reranker: {resp_json}") - results = zip(range(len(nodes)), resp["data"]) + results = zip(range(len(nodes)), resp_json["data"]) results = sorted(results, key=lambda x: x[1]["score"], reverse=True)[: self.top_n] new_nodes = [] diff --git a/backend/app/rag/reranker_model_option.py b/backend/app/rag/reranker_model_option.py index f3c68bc96..fb6cda91f 100644 --- a/backend/app/rag/reranker_model_option.py +++ b/backend/app/rag/reranker_model_option.py @@ -82,10 +82,10 @@ class RerankerModelOption(BaseModel): provider_display_name="vLLM", provider_description="vLLM is a fast and easy-to-use library for LLM inference and serving.", default_reranker_model="BAAI/bge-reranker-v2-m3", - reranker_model_description="", + reranker_model_description="Reference: https://docs.vllm.ai/en/latest/models/supported_models.html#sentence-pair-scoring-task-score", default_top_n=10, default_config={ - "api_url": "http://localhost:8000/v1/score", + "api_url": "http://localhost:8000", }, config_description="api_url is the url of the vLLM server, ensure it can be accessed from this server", credentials_display_name="vLLM API Key", @@ -98,12 +98,12 @@ class RerankerModelOption(BaseModel): provider_display_name="Xinference Reranker", provider_description="Xorbits Inference (Xinference) is an open-source platform to streamline the operation and integration of a wide array of AI models.", default_reranker_model="bge-reranker-v2-m3", - reranker_model_description="", + reranker_model_description="Reference: https://inference.readthedocs.io/en/latest/models/model_abilities/rerank.html", default_top_n=10, default_config={ - "api_url": "http://localhost:9997/v1/rerank", + "base_url": "http://localhost:9997", }, - config_description="api_url is the url of the Xinference server, ensure it can be accessed from this server", + config_description="base_url is the url of the Xinference server, ensure it can be accessed from this server", credentials_display_name="Xinference API Key", credentials_description="Xinference doesn't require an API key, set a dummy string here is ok", credentials_type="str", diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 1b539890c..b6e02dd1a 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -46,10 +46,10 @@ dependencies = [ "llama-index-postprocessor-cohere-rerank>=0.1.7", "llama-index-llms-bedrock>=0.1.12", "pypdf>=4.3.1", - "llama-index-llms-ollama>=0.5.0", - "llama-index-embeddings-ollama>=0.5.0", - "llama-index-embeddings-jinaai>=0.4.0", - "llama-index-embeddings-cohere>=0.4.0", + "llama-index-llms-ollama>=0.3.0", + "llama-index-embeddings-ollama>=0.3.0", + "llama-index-embeddings-jinaai>=0.3.0", + "llama-index-embeddings-cohere>=0.2.0", "python-docx>=1.1.2", "python-pptx>=1.0.2", "colorama>=0.4.6", @@ -58,7 +58,7 @@ dependencies = [ "retry>=0.9.2", "langchain-openai>=0.2.9", "ragas>=0.2.6", - "llama-index-embeddings-bedrock>=0.4.0", + "llama-index-embeddings-bedrock>=0.2.0", "llama-index-postprocessor-xinference-rerank>=0.2.0", "llama-index-postprocessor-bedrock-rerank>=0.3.0", ] diff --git a/backend/requirements-dev.lock b/backend/requirements-dev.lock index 9349218e1..0813dc3e4 100644 --- a/backend/requirements-dev.lock +++ b/backend/requirements-dev.lock @@ -51,13 +51,13 @@ beautifulsoup4==4.12.3 # via markdownify billiard==4.2.0 # via celery -boto3==1.35.92 +boto3==1.35.93 # via anthropic # via cohere # via llama-index-embeddings-bedrock # via llama-index-llms-bedrock # via llama-index-postprocessor-bedrock-rerank -botocore==1.35.92 +botocore==1.35.93 # via anthropic # via boto3 # via s3transfer @@ -315,13 +315,13 @@ langsmith==0.1.143 # via langchain-core llama-cloud==0.1.6 # via llama-index-indices-managed-llama-cloud -llama-index==0.12.9 +llama-index==0.12.10 llama-index-agent-openai==0.4.1 # via llama-index # via llama-index-program-openai llama-index-cli==0.4.0 # via llama-index -llama-index-core==0.12.9 +llama-index-core==0.12.10.post1 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -360,7 +360,7 @@ llama-index-llms-anthropic==0.6.3 # via llama-index-llms-bedrock llama-index-llms-bedrock==0.3.3 llama-index-llms-gemini==0.4.2 -llama-index-llms-ollama==0.5.0 +llama-index-llms-ollama==0.4.2 llama-index-llms-openai==0.3.12 # via llama-index # via llama-index-agent-openai @@ -419,7 +419,6 @@ networkx==3.3 nltk==3.9.1 # via llama-index # via llama-index-core - # via llama-index-legacy nodeenv==1.9.1 # via pre-commit numpy==1.26.4 @@ -434,7 +433,7 @@ numpy==1.26.4 # via shapely # via tidb-vector # via transformers -ollama==0.4.5 +ollama==0.3.1 # via llama-index-embeddings-ollama # via llama-index-llms-ollama openai==1.59.3 @@ -533,7 +532,7 @@ pyasn1-modules==0.4.0 # via google-auth pycparser==2.22 # via cffi -pydantic==2.10.4 +pydantic==2.8.2 # via anthropic # via cohere # via deepeval @@ -548,12 +547,11 @@ pydantic==2.10.4 # via langsmith # via llama-cloud # via llama-index-core - # via ollama # via openai # via pydantic-settings # via ragas # via sqlmodel -pydantic-core==2.27.2 +pydantic-core==2.20.1 # via pydantic pydantic-settings==2.6.1 # via langchain-community diff --git a/backend/requirements.lock b/backend/requirements.lock index f2a9957a4..d731e9f3d 100644 --- a/backend/requirements.lock +++ b/backend/requirements.lock @@ -51,13 +51,13 @@ beautifulsoup4==4.12.3 # via markdownify billiard==4.2.0 # via celery -boto3==1.35.92 +boto3==1.35.93 # via anthropic # via cohere # via llama-index-embeddings-bedrock # via llama-index-llms-bedrock # via llama-index-postprocessor-bedrock-rerank -botocore==1.35.92 +botocore==1.35.93 # via anthropic # via boto3 # via s3transfer @@ -308,13 +308,13 @@ langsmith==0.1.143 # via langchain-core llama-cloud==0.1.6 # via llama-index-indices-managed-llama-cloud -llama-index==0.12.9 +llama-index==0.12.10 llama-index-agent-openai==0.4.1 # via llama-index # via llama-index-program-openai llama-index-cli==0.4.0 # via llama-index -llama-index-core==0.12.9 +llama-index-core==0.12.10.post1 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -353,7 +353,7 @@ llama-index-llms-anthropic==0.6.3 # via llama-index-llms-bedrock llama-index-llms-bedrock==0.3.3 llama-index-llms-gemini==0.4.2 -llama-index-llms-ollama==0.5.0 +llama-index-llms-ollama==0.4.2 llama-index-llms-openai==0.3.12 # via llama-index # via llama-index-agent-openai @@ -424,7 +424,7 @@ numpy==1.26.4 # via shapely # via tidb-vector # via transformers -ollama==0.4.5 +ollama==0.3.1 # via llama-index-embeddings-ollama # via llama-index-llms-ollama openai==1.59.3 @@ -520,7 +520,7 @@ pyasn1-modules==0.4.0 # via google-auth pycparser==2.22 # via cffi -pydantic==2.10.4 +pydantic==2.8.2 # via anthropic # via cohere # via deepeval @@ -535,12 +535,11 @@ pydantic==2.10.4 # via langsmith # via llama-cloud # via llama-index-core - # via ollama # via openai # via pydantic-settings # via ragas # via sqlmodel -pydantic-core==2.27.2 +pydantic-core==2.20.1 # via pydantic pydantic-settings==2.6.1 # via langchain-community diff --git a/frontend/app/src/pages/docs/reranker-model.mdx b/frontend/app/src/pages/docs/reranker-model.mdx index dc93f8b73..f444ffb8a 100644 --- a/frontend/app/src/pages/docs/reranker-model.mdx +++ b/frontend/app/src/pages/docs/reranker-model.mdx @@ -35,6 +35,14 @@ To use vLLM rerankers, you need to provide the **api_url** of the reranker API a } ``` +Default config: + +```json +{ + "api_url": "http://localhost:8000" +} +``` + To learn more about vLLM reranking, please visit [vLLM Sentence Pair Scoring Models](https://docs.vllm.ai/en/latest/models/supported_models.html#sentence-pair-scoring-task-score). ### Xorbits Inference (Xinference) @@ -43,7 +51,15 @@ To use Xinference rerankers, you need to provide the **api_url** of the reranker ```json { - "api_url": "{api_base_url}" + "base_url": "{api_base_url}" +} +``` + +Default config: + +```json +{ + "base_url": "http://localhost:9997" } ``` From e6f6fc4b7ab83ae256a92826784e6dcfaeb1082a Mon Sep 17 00:00:00 2001 From: Mini256 Date: Fri, 10 Jan 2025 10:10:55 +0800 Subject: [PATCH 5/5] api_url -> base_url --- backend/app/rag/node_postprocessor/vllm_reranker.py | 8 ++++---- backend/app/rag/reranker_model_option.py | 4 ++-- frontend/app/src/pages/docs/reranker-model.mdx | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/app/rag/node_postprocessor/vllm_reranker.py b/backend/app/rag/node_postprocessor/vllm_reranker.py index 46fecf532..e66ddab4d 100644 --- a/backend/app/rag/node_postprocessor/vllm_reranker.py +++ b/backend/app/rag/node_postprocessor/vllm_reranker.py @@ -15,7 +15,7 @@ class VLLMRerank(BaseNodePostprocessor): - api_url: str = Field(default="", description="API url.") + base_url: str = Field(default="", description="The base URL of vLLM API.") model: str = Field(default="", description="The model to use when calling API.") top_n: int = Field(description="Top N nodes to return.") @@ -26,10 +26,10 @@ def __init__( self, top_n: int = 2, model: str = "BAAI/bge-reranker-v2-m3", - api_url: str = "http://localhost:8000", + base_url: str = "http://localhost:8000", ): super().__init__(top_n=top_n, model=model) - self.api_url = api_url + self.base_url = base_url self.model = model self._session = requests.Session() @@ -70,7 +70,7 @@ def _postprocess_nodes( for node in nodes ] resp = self._session.post( # type: ignore - url=f"{self.api_url}/v1/score", + url=f"{self.base_url}/v1/score", json={ "text_1": query_bundle.query_str, "model": self.model, diff --git a/backend/app/rag/reranker_model_option.py b/backend/app/rag/reranker_model_option.py index fb6cda91f..f595093f0 100644 --- a/backend/app/rag/reranker_model_option.py +++ b/backend/app/rag/reranker_model_option.py @@ -85,9 +85,9 @@ class RerankerModelOption(BaseModel): reranker_model_description="Reference: https://docs.vllm.ai/en/latest/models/supported_models.html#sentence-pair-scoring-task-score", default_top_n=10, default_config={ - "api_url": "http://localhost:8000", + "base_url": "http://localhost:8000", }, - config_description="api_url is the url of the vLLM server, ensure it can be accessed from this server", + config_description="base_url is the base url of the vLLM server, ensure it can be accessed from this server", credentials_display_name="vLLM API Key", credentials_description="vLLM doesn't require an API key, set a dummy string here is ok", credentials_type="str", diff --git a/frontend/app/src/pages/docs/reranker-model.mdx b/frontend/app/src/pages/docs/reranker-model.mdx index f444ffb8a..7af92c9fb 100644 --- a/frontend/app/src/pages/docs/reranker-model.mdx +++ b/frontend/app/src/pages/docs/reranker-model.mdx @@ -27,11 +27,11 @@ To learn more about Cohere reranking, please visit [Cohere Rerank](https://coher ### vLLM -To use vLLM rerankers, you need to provide the **api_url** of the reranker API as the following JSON format in **Advanced Settings**: +To use vLLM rerankers, you need to provide the **base_url** of the reranker API as the following JSON format in **Advanced Settings**: ```json { - "api_url": "{api_base_url}" + "base_url": "{api_base_url}" } ``` @@ -39,7 +39,7 @@ Default config: ```json { - "api_url": "http://localhost:8000" + "base_url": "http://localhost:8000" } ``` @@ -47,7 +47,7 @@ To learn more about vLLM reranking, please visit [vLLM Sentence Pair Scoring Mod ### Xorbits Inference (Xinference) -To use Xinference rerankers, you need to provide the **api_url** of the reranker API as the following JSON format in **Advanced Settings**: +To use Xinference rerankers, you need to provide the **base_url** of the reranker API as the following JSON format in **Advanced Settings**: ```json {