diff --git a/backend/app/rag/chat_config.py b/backend/app/rag/chat_config.py index cb6bf7354..0a28ea563 100644 --- a/backend/app/rag/chat_config.py +++ b/backend/app/rag/chat_config.py @@ -21,6 +21,8 @@ from llama_index.embeddings.ollama import OllamaEmbedding from llama_index.postprocessor.jinaai_rerank import JinaRerank from llama_index.postprocessor.cohere_rerank import CohereRerank +from llama_index.postprocessor.xinference_rerank import XinferenceRerank +from llama_index.postprocessor.bedrock_rerank import AWSBedrockRerank from sqlmodel import Session from google.oauth2 import service_account from google.auth.transport.requests import Request @@ -30,6 +32,7 @@ from app.rag.node_postprocessor.metadata_post_filter import MetadataFilters from app.rag.node_postprocessor.baisheng_reranker import BaishengRerank from app.rag.node_postprocessor.local_reranker import LocalRerank +from app.rag.node_postprocessor.vllm_reranker import VLLMRerank from app.rag.embeddings.local_embedding import LocalEmbedding from app.repositories import chat_engine_repo, knowledge_base_repo from app.repositories.embedding_model import embed_model_repo @@ -423,6 +426,27 @@ def get_reranker_model( top_n=top_n, **config, ) + case RerankerProvider.VLLM: + return VLLMRerank( + model=model, + top_n=top_n, + **config, + ) + case RerankerProvider.XINFERENCE: + return XinferenceRerank( + model=model, + top_n=top_n, + **config, + ) + case RerankerProvider.BEDROCK: + return AWSBedrockRerank( + rerank_model_name=model, + top_n=top_n, + aws_access_key_id=credentials["aws_access_key_id"], + aws_secret_access_key=credentials["aws_secret_access_key"], + region_name=credentials["aws_region_name"], + **config, + ) case _: raise ValueError(f"Got unknown reranker provider: {provider}") diff --git a/backend/app/rag/node_postprocessor/vllm_reranker.py b/backend/app/rag/node_postprocessor/vllm_reranker.py new file mode 100644 index 000000000..e66ddab4d --- /dev/null +++ b/backend/app/rag/node_postprocessor/vllm_reranker.py @@ -0,0 +1,97 @@ +from typing import Any, List, Optional +import requests + +from llama_index.core.bridge.pydantic import Field, PrivateAttr +from llama_index.core.callbacks import CBEventType, EventPayload +from llama_index.core.instrumentation import get_dispatcher +from llama_index.core.instrumentation.events.rerank import ( + ReRankEndEvent, + ReRankStartEvent, +) +from llama_index.core.postprocessor.types import BaseNodePostprocessor +from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle + +dispatcher = get_dispatcher(__name__) + + +class VLLMRerank(BaseNodePostprocessor): + base_url: str = Field(default="", description="The base URL of vLLM API.") + model: str = Field(default="", description="The model to use when calling API.") + + top_n: int = Field(description="Top N nodes to return.") + + _session: Any = PrivateAttr() + + def __init__( + self, + top_n: int = 2, + model: str = "BAAI/bge-reranker-v2-m3", + base_url: str = "http://localhost:8000", + ): + super().__init__(top_n=top_n, model=model) + self.base_url = base_url + self.model = model + self._session = requests.Session() + + @classmethod + def class_name(cls) -> str: + return "VLLMRerank" + + def _postprocess_nodes( + self, + nodes: List[NodeWithScore], + query_bundle: Optional[QueryBundle] = None, + ) -> List[NodeWithScore]: + dispatcher.event( + ReRankStartEvent( + query=query_bundle, + nodes=nodes, + top_n=self.top_n, + model_name=self.model, + ) + ) + + if query_bundle is None: + raise ValueError("Missing query bundle in extra info.") + if len(nodes) == 0: + return [] + + with self.callback_manager.event( + CBEventType.RERANKING, + payload={ + EventPayload.NODES: nodes, + EventPayload.MODEL_NAME: self.model, + EventPayload.QUERY_STR: query_bundle.query_str, + EventPayload.TOP_K: self.top_n, + }, + ) as event: + texts = [ + node.node.get_content(metadata_mode=MetadataMode.EMBED) + for node in nodes + ] + resp = self._session.post( # type: ignore + url=f"{self.base_url}/v1/score", + json={ + "text_1": query_bundle.query_str, + "model": self.model, + "text_2": texts, + }, + ) + resp.raise_for_status() + resp_json = resp.json() + if "data" not in resp_json: + raise RuntimeError(f"Got error from reranker: {resp_json}") + + results = zip(range(len(nodes)), resp_json["data"]) + results = sorted(results, key=lambda x: x[1]["score"], reverse=True)[: self.top_n] + + new_nodes = [] + for result in results: + new_node_with_score = NodeWithScore( + node=nodes[result[0]].node, score=result[1]["score"] + ) + new_nodes.append(new_node_with_score) + event.on_end(payload={EventPayload.NODES: new_nodes}) + + dispatcher.event(ReRankEndEvent(nodes=new_nodes)) + return new_nodes diff --git a/backend/app/rag/reranker_model_option.py b/backend/app/rag/reranker_model_option.py index d6444a4ee..f595093f0 100644 --- a/backend/app/rag/reranker_model_option.py +++ b/backend/app/rag/reranker_model_option.py @@ -77,4 +77,53 @@ class RerankerModelOption(BaseModel): credentials_type="str", default_credentials="dummy", ), + RerankerModelOption( + provider=RerankerProvider.VLLM, + provider_display_name="vLLM", + provider_description="vLLM is a fast and easy-to-use library for LLM inference and serving.", + default_reranker_model="BAAI/bge-reranker-v2-m3", + reranker_model_description="Reference: https://docs.vllm.ai/en/latest/models/supported_models.html#sentence-pair-scoring-task-score", + default_top_n=10, + default_config={ + "base_url": "http://localhost:8000", + }, + config_description="base_url is the base url of the vLLM server, ensure it can be accessed from this server", + credentials_display_name="vLLM API Key", + credentials_description="vLLM doesn't require an API key, set a dummy string here is ok", + credentials_type="str", + default_credentials="dummy", + ), + RerankerModelOption( + provider=RerankerProvider.XINFERENCE, + provider_display_name="Xinference Reranker", + provider_description="Xorbits Inference (Xinference) is an open-source platform to streamline the operation and integration of a wide array of AI models.", + default_reranker_model="bge-reranker-v2-m3", + reranker_model_description="Reference: https://inference.readthedocs.io/en/latest/models/model_abilities/rerank.html", + default_top_n=10, + default_config={ + "base_url": "http://localhost:9997", + }, + config_description="base_url is the url of the Xinference server, ensure it can be accessed from this server", + credentials_display_name="Xinference API Key", + credentials_description="Xinference doesn't require an API key, set a dummy string here is ok", + credentials_type="str", + default_credentials="dummy", + ), + RerankerModelOption( + provider=RerankerProvider.BEDROCK, + provider_display_name="Bedrock Reranker", + provider_description="Amazon Bedrock is a fully managed foundation models service.", + provider_url="https://docs.aws.amazon.com/bedrock/", + default_reranker_model="amazon.rerank-v1:0", + reranker_model_description="Find more models in https://docs.aws.amazon.com/bedrock/latest/userguide/foundation-models-reference.html.", + default_top_n=10, + credentials_display_name="AWS Bedrock Credentials JSON", + credentials_description="The JSON Object of AWS Credentials, refer to https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html#cli-configure-files-global", + credentials_type="dict", + default_credentials={ + "aws_access_key_id": "****", + "aws_secret_access_key": "****", + "aws_region_name": "us-west-2", + }, + ) ] diff --git a/backend/app/types.py b/backend/app/types.py index 27ffe0225..73ef5e06e 100644 --- a/backend/app/types.py +++ b/backend/app/types.py @@ -27,6 +27,9 @@ class RerankerProvider(str, enum.Enum): COHERE = "cohere" BAISHENG = "baisheng" LOCAL = "local" + VLLM = "vllm" + XINFERENCE = "xinference" + BEDROCK = "bedrock" class MimeTypes(str, enum.Enum): diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 078bf1b32..b6e02dd1a 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -46,10 +46,10 @@ dependencies = [ "llama-index-postprocessor-cohere-rerank>=0.1.7", "llama-index-llms-bedrock>=0.1.12", "pypdf>=4.3.1", - "llama-index-llms-ollama<=0.3.0", - "llama-index-embeddings-ollama<=0.3.0", - "llama-index-embeddings-jinaai<=0.3.0", - "llama-index-embeddings-cohere<=0.3.0", + "llama-index-llms-ollama>=0.3.0", + "llama-index-embeddings-ollama>=0.3.0", + "llama-index-embeddings-jinaai>=0.3.0", + "llama-index-embeddings-cohere>=0.2.0", "python-docx>=1.1.2", "python-pptx>=1.0.2", "colorama>=0.4.6", @@ -58,7 +58,9 @@ dependencies = [ "retry>=0.9.2", "langchain-openai>=0.2.9", "ragas>=0.2.6", - "llama-index-embeddings-bedrock<=0.3.0", + "llama-index-embeddings-bedrock>=0.2.0", + "llama-index-postprocessor-xinference-rerank>=0.2.0", + "llama-index-postprocessor-bedrock-rerank>=0.3.0", ] readme = "README.md" requires-python = ">= 3.8" diff --git a/backend/requirements-dev.lock b/backend/requirements-dev.lock index 3242838ba..0813dc3e4 100644 --- a/backend/requirements-dev.lock +++ b/backend/requirements-dev.lock @@ -15,7 +15,6 @@ aiohttp==3.9.5 # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy aiosignal==1.3.1 # via aiohttp alembic==1.13.1 @@ -24,7 +23,7 @@ amqp==5.2.0 # via kombu annotated-types==0.7.0 # via pydantic -anthropic==0.28.1 +anthropic==0.42.0 # via llama-index-llms-anthropic anyio==4.4.0 # via anthropic @@ -52,11 +51,14 @@ beautifulsoup4==4.12.3 # via markdownify billiard==4.2.0 # via celery -boto3==1.34.156 +boto3==1.35.93 + # via anthropic # via cohere # via llama-index-embeddings-bedrock # via llama-index-llms-bedrock -botocore==1.34.156 + # via llama-index-postprocessor-bedrock-rerank +botocore==1.35.93 + # via anthropic # via boto3 # via s3transfer cachetools==5.3.3 @@ -105,7 +107,6 @@ cryptography==42.0.8 dataclasses-json==0.6.7 # via langchain-community # via llama-index-core - # via llama-index-legacy datasets==2.14.7 # via dspy-ai # via ragas @@ -115,7 +116,6 @@ deepdiff==7.0.1 deepeval==0.21.73 deprecated==1.2.14 # via llama-index-core - # via llama-index-legacy # via opentelemetry-api # via opentelemetry-exporter-otlp-proto-grpc dill==0.3.7 @@ -123,7 +123,6 @@ dill==0.3.7 # via multiprocess dirtyjson==1.0.8 # via llama-index-core - # via llama-index-legacy distlib==0.3.9 # via virtualenv distro==1.9.0 @@ -155,6 +154,8 @@ filelock==3.15.1 # via huggingface-hub # via transformers # via virtualenv +filetype==1.2.0 + # via llama-index-core flower==2.0.1 frozenlist==1.4.1 # via aiohttp @@ -163,7 +164,6 @@ fsspec==2023.10.0 # via datasets # via huggingface-hub # via llama-index-core - # via llama-index-legacy google-ai-generativelanguage==0.6.4 # via google-generativeai google-api-core==2.19.1 @@ -248,7 +248,6 @@ httpx==0.27.0 # via langsmith # via llama-cloud # via llama-index-core - # via llama-index-legacy # via ollama # via openai httpx-oauth==0.14.1 @@ -314,16 +313,15 @@ langsmith==0.1.143 # via langchain # via langchain-community # via langchain-core -llama-cloud==0.0.15 +llama-cloud==0.1.6 # via llama-index-indices-managed-llama-cloud -llama-index==0.11.10 -llama-index-agent-openai==0.3.1 +llama-index==0.12.10 +llama-index-agent-openai==0.4.1 # via llama-index - # via llama-index-llms-openai # via llama-index-program-openai -llama-index-cli==0.3.1 +llama-index-cli==0.4.0 # via llama-index -llama-index-core==0.11.10 +llama-index-core==0.12.10.post1 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -340,30 +338,30 @@ llama-index-core==0.11.10 # via llama-index-llms-openai # via llama-index-llms-openai-like # via llama-index-multi-modal-llms-openai + # via llama-index-postprocessor-bedrock-rerank # via llama-index-postprocessor-cohere-rerank # via llama-index-postprocessor-jinaai-rerank + # via llama-index-postprocessor-xinference-rerank # via llama-index-program-openai # via llama-index-question-gen-openai # via llama-index-readers-file # via llama-index-readers-llama-parse # via llama-parse -llama-index-embeddings-bedrock==0.3.0 -llama-index-embeddings-cohere==0.2.0 -llama-index-embeddings-jinaai==0.3.0 -llama-index-embeddings-ollama==0.3.0 -llama-index-embeddings-openai==0.2.4 +llama-index-embeddings-bedrock==0.4.0 +llama-index-embeddings-cohere==0.4.0 +llama-index-embeddings-jinaai==0.4.0 +llama-index-embeddings-ollama==0.5.0 +llama-index-embeddings-openai==0.3.1 # via llama-index # via llama-index-cli -llama-index-indices-managed-llama-cloud==0.3.0 - # via llama-index -llama-index-legacy==0.9.48 +llama-index-indices-managed-llama-cloud==0.6.3 # via llama-index -llama-index-llms-anthropic==0.2.1 +llama-index-llms-anthropic==0.6.3 # via llama-index-llms-bedrock -llama-index-llms-bedrock==0.2.1 -llama-index-llms-gemini==0.3.4 -llama-index-llms-ollama==0.3.0 -llama-index-llms-openai==0.2.5 +llama-index-llms-bedrock==0.3.3 +llama-index-llms-gemini==0.4.2 +llama-index-llms-ollama==0.4.2 +llama-index-llms-openai==0.3.12 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -371,19 +369,21 @@ llama-index-llms-openai==0.2.5 # via llama-index-multi-modal-llms-openai # via llama-index-program-openai # via llama-index-question-gen-openai -llama-index-llms-openai-like==0.2.0 -llama-index-multi-modal-llms-openai==0.2.0 +llama-index-llms-openai-like==0.3.3 +llama-index-multi-modal-llms-openai==0.4.2 # via llama-index -llama-index-postprocessor-cohere-rerank==0.2.0 -llama-index-postprocessor-jinaai-rerank==0.2.0 -llama-index-program-openai==0.2.0 +llama-index-postprocessor-bedrock-rerank==0.3.0 +llama-index-postprocessor-cohere-rerank==0.3.0 +llama-index-postprocessor-jinaai-rerank==0.3.0 +llama-index-postprocessor-xinference-rerank==0.2.0 +llama-index-program-openai==0.3.1 # via llama-index # via llama-index-question-gen-openai -llama-index-question-gen-openai==0.2.0 +llama-index-question-gen-openai==0.3.0 # via llama-index -llama-index-readers-file==0.2.1 +llama-index-readers-file==0.4.2 # via llama-index -llama-index-readers-llama-parse==0.3.0 +llama-index-readers-llama-parse==0.4.0 # via llama-index llama-parse==0.5.5 # via llama-index-readers-llama-parse @@ -413,15 +413,12 @@ mypy-extensions==1.0.0 # via typing-inspect nest-asyncio==1.6.0 # via llama-index-core - # via llama-index-legacy # via ragas networkx==3.3 # via llama-index-core - # via llama-index-legacy nltk==3.9.1 # via llama-index # via llama-index-core - # via llama-index-legacy nodeenv==1.9.1 # via pre-commit numpy==1.26.4 @@ -429,7 +426,6 @@ numpy==1.26.4 # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy # via optuna # via pandas # via pyarrow @@ -440,12 +436,11 @@ numpy==1.26.4 ollama==0.3.1 # via llama-index-embeddings-ollama # via llama-index-llms-ollama -openai==1.54.5 +openai==1.59.3 # via dspy-ai # via langchain-openai # via llama-index-agent-openai # via llama-index-embeddings-openai - # via llama-index-legacy # via llama-index-llms-openai # via ragas openpyxl==3.1.5 @@ -486,7 +481,6 @@ packaging==23.2 pandas==2.2.2 # via datasets # via dspy-ai - # via llama-index-legacy # via llama-index-readers-file parameterized==0.9.0 # via cohere @@ -570,7 +564,7 @@ pyjwt==2.8.0 pymysql==1.1.1 pyparsing==3.1.2 # via httplib2 -pypdf==4.3.1 +pypdf==5.1.0 # via llama-index-readers-file pysbd==0.3.4 # via ragas @@ -632,7 +626,6 @@ requests==2.32.3 # via langchain-community # via langsmith # via llama-index-core - # via llama-index-legacy # via requests-toolbelt # via tiktoken # via transformers @@ -670,7 +663,6 @@ sqlalchemy==2.0.30 # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy # via optuna # via sqlmodel sqlmodel==0.0.19 @@ -689,15 +681,12 @@ tenacity==8.4.2 # via langchain-community # via langchain-core # via llama-index-core - # via llama-index-legacy tidb-vector==0.0.14 tiktoken==0.7.0 # via langchain-openai # via llama-index-core - # via llama-index-legacy # via ragas tokenizers==0.19.1 - # via anthropic # via cohere # via transformers tornado==6.4.1 @@ -730,7 +719,6 @@ typing-extensions==4.12.2 # via huggingface-hub # via langchain-core # via llama-index-core - # via llama-index-legacy # via openai # via opentelemetry-sdk # via pydantic @@ -744,7 +732,6 @@ typing-extensions==4.12.2 typing-inspect==0.9.0 # via dataclasses-json # via llama-index-core - # via llama-index-legacy tzdata==2024.1 # via celery # via pandas diff --git a/backend/requirements.lock b/backend/requirements.lock index 68b6f3f4e..d731e9f3d 100644 --- a/backend/requirements.lock +++ b/backend/requirements.lock @@ -15,7 +15,6 @@ aiohttp==3.9.5 # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy aiosignal==1.3.1 # via aiohttp alembic==1.13.1 @@ -24,7 +23,7 @@ amqp==5.2.0 # via kombu annotated-types==0.7.0 # via pydantic -anthropic==0.28.1 +anthropic==0.42.0 # via llama-index-llms-anthropic anyio==4.4.0 # via anthropic @@ -52,11 +51,14 @@ beautifulsoup4==4.12.3 # via markdownify billiard==4.2.0 # via celery -boto3==1.34.156 +boto3==1.35.93 + # via anthropic # via cohere # via llama-index-embeddings-bedrock # via llama-index-llms-bedrock -botocore==1.34.156 + # via llama-index-postprocessor-bedrock-rerank +botocore==1.35.93 + # via anthropic # via boto3 # via s3transfer cachetools==5.3.3 @@ -103,7 +105,6 @@ cryptography==42.0.8 dataclasses-json==0.6.7 # via langchain-community # via llama-index-core - # via llama-index-legacy datasets==2.14.7 # via dspy-ai # via ragas @@ -113,7 +114,6 @@ deepdiff==7.0.1 deepeval==0.21.73 deprecated==1.2.14 # via llama-index-core - # via llama-index-legacy # via opentelemetry-api # via opentelemetry-exporter-otlp-proto-grpc dill==0.3.7 @@ -121,7 +121,6 @@ dill==0.3.7 # via multiprocess dirtyjson==1.0.8 # via llama-index-core - # via llama-index-legacy distro==1.9.0 # via anthropic # via openai @@ -150,6 +149,8 @@ fastavro==1.9.5 filelock==3.15.1 # via huggingface-hub # via transformers +filetype==1.2.0 + # via llama-index-core flower==2.0.1 frozenlist==1.4.1 # via aiohttp @@ -158,7 +159,6 @@ fsspec==2023.10.0 # via datasets # via huggingface-hub # via llama-index-core - # via llama-index-legacy google-ai-generativelanguage==0.6.4 # via google-generativeai google-api-core==2.19.1 @@ -243,7 +243,6 @@ httpx==0.27.0 # via langsmith # via llama-cloud # via llama-index-core - # via llama-index-legacy # via ollama # via openai httpx-oauth==0.14.1 @@ -307,16 +306,15 @@ langsmith==0.1.143 # via langchain # via langchain-community # via langchain-core -llama-cloud==0.0.15 +llama-cloud==0.1.6 # via llama-index-indices-managed-llama-cloud -llama-index==0.11.10 -llama-index-agent-openai==0.3.1 +llama-index==0.12.10 +llama-index-agent-openai==0.4.1 # via llama-index - # via llama-index-llms-openai # via llama-index-program-openai -llama-index-cli==0.3.1 +llama-index-cli==0.4.0 # via llama-index -llama-index-core==0.11.10 +llama-index-core==0.12.10.post1 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -333,30 +331,30 @@ llama-index-core==0.11.10 # via llama-index-llms-openai # via llama-index-llms-openai-like # via llama-index-multi-modal-llms-openai + # via llama-index-postprocessor-bedrock-rerank # via llama-index-postprocessor-cohere-rerank # via llama-index-postprocessor-jinaai-rerank + # via llama-index-postprocessor-xinference-rerank # via llama-index-program-openai # via llama-index-question-gen-openai # via llama-index-readers-file # via llama-index-readers-llama-parse # via llama-parse -llama-index-embeddings-bedrock==0.3.0 -llama-index-embeddings-cohere==0.2.0 -llama-index-embeddings-jinaai==0.3.0 -llama-index-embeddings-ollama==0.3.0 -llama-index-embeddings-openai==0.2.4 +llama-index-embeddings-bedrock==0.4.0 +llama-index-embeddings-cohere==0.4.0 +llama-index-embeddings-jinaai==0.4.0 +llama-index-embeddings-ollama==0.5.0 +llama-index-embeddings-openai==0.3.1 # via llama-index # via llama-index-cli -llama-index-indices-managed-llama-cloud==0.3.0 - # via llama-index -llama-index-legacy==0.9.48 +llama-index-indices-managed-llama-cloud==0.6.3 # via llama-index -llama-index-llms-anthropic==0.2.1 +llama-index-llms-anthropic==0.6.3 # via llama-index-llms-bedrock -llama-index-llms-bedrock==0.2.1 -llama-index-llms-gemini==0.3.4 -llama-index-llms-ollama==0.3.0 -llama-index-llms-openai==0.2.5 +llama-index-llms-bedrock==0.3.3 +llama-index-llms-gemini==0.4.2 +llama-index-llms-ollama==0.4.2 +llama-index-llms-openai==0.3.12 # via llama-index # via llama-index-agent-openai # via llama-index-cli @@ -364,19 +362,21 @@ llama-index-llms-openai==0.2.5 # via llama-index-multi-modal-llms-openai # via llama-index-program-openai # via llama-index-question-gen-openai -llama-index-llms-openai-like==0.2.0 -llama-index-multi-modal-llms-openai==0.2.0 +llama-index-llms-openai-like==0.3.3 +llama-index-multi-modal-llms-openai==0.4.2 # via llama-index -llama-index-postprocessor-cohere-rerank==0.2.0 -llama-index-postprocessor-jinaai-rerank==0.2.0 -llama-index-program-openai==0.2.0 +llama-index-postprocessor-bedrock-rerank==0.3.0 +llama-index-postprocessor-cohere-rerank==0.3.0 +llama-index-postprocessor-jinaai-rerank==0.3.0 +llama-index-postprocessor-xinference-rerank==0.2.0 +llama-index-program-openai==0.3.1 # via llama-index # via llama-index-question-gen-openai -llama-index-question-gen-openai==0.2.0 +llama-index-question-gen-openai==0.3.0 # via llama-index -llama-index-readers-file==0.2.1 +llama-index-readers-file==0.4.2 # via llama-index -llama-index-readers-llama-parse==0.3.0 +llama-index-readers-llama-parse==0.4.0 # via llama-index llama-parse==0.5.5 # via llama-index-readers-llama-parse @@ -406,21 +406,17 @@ mypy-extensions==1.0.0 # via typing-inspect nest-asyncio==1.6.0 # via llama-index-core - # via llama-index-legacy # via ragas networkx==3.3 # via llama-index-core - # via llama-index-legacy nltk==3.9.1 # via llama-index # via llama-index-core - # via llama-index-legacy numpy==1.26.4 # via datasets # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy # via optuna # via pandas # via pyarrow @@ -431,12 +427,11 @@ numpy==1.26.4 ollama==0.3.1 # via llama-index-embeddings-ollama # via llama-index-llms-ollama -openai==1.54.5 +openai==1.59.3 # via dspy-ai # via langchain-openai # via llama-index-agent-openai # via llama-index-embeddings-openai - # via llama-index-legacy # via llama-index-llms-openai # via ragas openpyxl==3.1.5 @@ -477,7 +472,6 @@ packaging==23.2 pandas==2.2.2 # via datasets # via dspy-ai - # via llama-index-legacy # via llama-index-readers-file parameterized==0.9.0 # via cohere @@ -558,7 +552,7 @@ pyjwt==2.8.0 pymysql==1.1.1 pyparsing==3.1.2 # via httplib2 -pypdf==4.3.1 +pypdf==5.1.0 # via llama-index-readers-file pysbd==0.3.4 # via ragas @@ -619,7 +613,6 @@ requests==2.32.3 # via langchain-community # via langsmith # via llama-index-core - # via llama-index-legacy # via requests-toolbelt # via tiktoken # via transformers @@ -656,7 +649,6 @@ sqlalchemy==2.0.30 # via langchain # via langchain-community # via llama-index-core - # via llama-index-legacy # via optuna # via sqlmodel sqlmodel==0.0.19 @@ -675,15 +667,12 @@ tenacity==8.4.2 # via langchain-community # via langchain-core # via llama-index-core - # via llama-index-legacy tidb-vector==0.0.14 tiktoken==0.7.0 # via langchain-openai # via llama-index-core - # via llama-index-legacy # via ragas tokenizers==0.19.1 - # via anthropic # via cohere # via transformers tornado==6.4.1 @@ -716,7 +705,6 @@ typing-extensions==4.12.2 # via huggingface-hub # via langchain-core # via llama-index-core - # via llama-index-legacy # via openai # via opentelemetry-sdk # via pydantic @@ -730,7 +718,6 @@ typing-extensions==4.12.2 typing-inspect==0.9.0 # via dataclasses-json # via llama-index-core - # via llama-index-legacy tzdata==2024.1 # via celery # via pandas diff --git a/frontend/app/src/pages/docs/reranker-model.mdx b/frontend/app/src/pages/docs/reranker-model.mdx index c06c54530..7af92c9fb 100644 --- a/frontend/app/src/pages/docs/reranker-model.mdx +++ b/frontend/app/src/pages/docs/reranker-model.mdx @@ -17,5 +17,64 @@ After logging in with an admin account, you can configure the Reranker Model in Currently AutoFlow supports the following reranker providers: -* [Jina AI Reranker](https://jina.ai/reranker) -* [Cohere](https://cohere.com/rerank) +### JinaAI + +To learn more about JinaAI reranking, please visit [Jina AI Reranker](https://jina.ai/reranker/). + +### Cohere + +To learn more about Cohere reranking, please visit [Cohere Rerank](https://cohere.com/rerank/). + +### vLLM + +To use vLLM rerankers, you need to provide the **base_url** of the reranker API as the following JSON format in **Advanced Settings**: + +```json +{ + "base_url": "{api_base_url}" +} +``` + +Default config: + +```json +{ + "base_url": "http://localhost:8000" +} +``` + +To learn more about vLLM reranking, please visit [vLLM Sentence Pair Scoring Models](https://docs.vllm.ai/en/latest/models/supported_models.html#sentence-pair-scoring-task-score). + +### Xorbits Inference (Xinference) + +To use Xinference rerankers, you need to provide the **base_url** of the reranker API as the following JSON format in **Advanced Settings**: + +```json +{ + "base_url": "{api_base_url}" +} +``` + +Default config: + +```json +{ + "base_url": "http://localhost:9997" +} +``` + +To learn more about Xinference reranking, please visit [Xinference Rerank](https://inference.readthedocs.io/en/latest/models/model_abilities/rerank.html). + +### Amazon Bedrock + +To use Amazon Bedrock rerankers, you'll need to provide a JSON Object of your AWS Credentials, as described in the [AWS CLI config global settings](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html#cli-configure-files-global): + +```json +{ + "aws_access_key_id": "****", + "aws_secret_access_key": "****", + "aws_region_name": "us-west-2" +} +``` + +To find more reranking models supported by Amazon Bedrock, please visit [Amazon Bedrock Models Reference](https://docs.aws.amazon.com/bedrock/latest/userguide/foundation-models-reference.html).