From 001e69aec10da8f19672ae011df29ffe9cbd155f Mon Sep 17 00:00:00 2001 From: Daljeet Nandha Date: Tue, 7 Jan 2025 22:24:05 +0100 Subject: [PATCH 1/4] add multimodal bedrock integration --- .../.gitignore | 89 +++++ .../BUILD | 3 + .../Makefile | 28 ++ .../README.md | 136 ++++++++ .../multi_modal_llms/bedrock/__init__.py | 3 + .../multi_modal_llms/bedrock/base.py | 328 ++++++++++++++++++ .../multi_modal_llms/bedrock/utils.py | 223 ++++++++++++ .../pyproject.toml | 68 ++++ .../tests/BUILD | 9 + .../tests/__init__.py | 0 .../tests/test_multi_modal_llms_bedrock.py | 78 +++++ 11 files changed, 965 insertions(+) create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/__init__.py create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore new file mode 100644 index 0000000000000..ce810799a86c0 --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore @@ -0,0 +1,89 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +.env +.venv +env/ +venv/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Testing +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Distribution +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Poetry +poetry.lock \ No newline at end of file diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD new file mode 100644 index 0000000000000..7a3d9b720c151 --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD @@ -0,0 +1,3 @@ +package(default_visibility = ["//visibility:public"]) + +exports_files(["pyproject.toml"]) \ No newline at end of file diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile new file mode 100644 index 0000000000000..531966a802f71 --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile @@ -0,0 +1,28 @@ +.PHONY: format lint test + +format: + poetry run black . + poetry run isort . + +lint: + poetry run mypy . + poetry run black . --check + poetry run isort . --check + poetry run flake8 . + +test: + poetry run pytest tests/ --disable-socket + +clean: + rm -rf dist/* + rm -rf build/* + rm -rf *.egg-info + +build: + poetry build + +install: + poetry install --with dev + +install_editable: + pip install -e ".[dev]" \ No newline at end of file diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md new file mode 100644 index 0000000000000..9ebdb64901e7a --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md @@ -0,0 +1,136 @@ +# LlamaIndex Multi-Modal LLM Integration: AWS Bedrock + +AWS Bedrock is a fully managed service that offers a choice of high-performing foundation models from leading AI companies through a single API, along with a broad set of capabilities you need to build generative AI applications with security, privacy, and responsible AI. + +## Installation + +```bash +pip install llama-index-multi-modal-llms-bedrock +``` + +## Usage + +Here's how to use the AWS Bedrock multi-modal integration: + +### Basic Usage + +```python +from llama_index.multi_modal_llms.bedrock import BedrockMultiModal +from llama_index.core import SimpleDirectoryReader +from llama_index.core.schema import ImageDocument + +# Initialize the model (credentials can be provided through environment variables) +llm = BedrockMultiModal( + model="anthropic.claude-3-haiku-20240307-v1:0", # or other Bedrock multi-modal models + temperature=0.0, + max_tokens=300, + region_name="eu-central-1" # make sure to use the region where the model access is granted +) + +# Method 1: Load images using SimpleDirectoryReader +image_documents = SimpleDirectoryReader( + input_files=["path/to/image.jpg"] +).load_data() + +# Method 2: Create image documents directly +image_doc = ImageDocument( + image_path="/path/to/image.jpg", # Local file path + # OR + image="base64_encoded_image_string" # Base64 encoded image +) + +# Get a completion with both text and image +response = llm.complete( + prompt="Describe this image in detail:", + image_documents=image_documents # or [image_doc] +) + +print(response.text) +``` + +### AWS Authentication + +You can authenticate with AWS Bedrock in several ways: + +1. Environment variables: +```bash +export AWS_ACCESS_KEY_ID=your_access_key +export AWS_SECRET_ACCESS_KEY=your_secret_key +export AWS_REGION=us-east-1 # optional +``` + +2. Explicit credentials: +```python +llm = BedrockMultiModal( + model="anthropic.claude-3-haiku-20240307-v1:0", + aws_access_key_id="your_access_key", + aws_secret_access_key="your_secret_key", + region_name="eu-central-1" +) +``` + +3. AWS CLI configuration: +```bash +aws configure +``` + +4. IAM role-based authentication (when running on AWS services like EC2, Lambda, etc.) + +### Supported Models + +Currently supported multi-modal models in AWS Bedrock: + +- `anthropic.claude-3-sonnet-20240229-v1:0` +- `anthropic.claude-3-haiku-20240307-v1:0` +- `anthropic.claude-3-opus-20240229-v1:0` +- `anthropic.claude-3-5-sonnet-20240620-v1:0` +- `anthropic.claude-3-5-sonnet-20241022-v2:0` +- `anthropic.claude-3-5-haiku-20241022-v1:0` + +### Advanced Usage + +```python +# Using multiple images +image_docs = SimpleDirectoryReader( + input_files=["image1.jpg", "image2.jpg"] +).load_data() + +response = llm.complete( + prompt="Compare these two images:", + image_documents=image_docs +) + +# Custom parameters +llm = BedrockMultiModal( + model="anthropic.claude-3-haiku-20240307-v1:0", + temperature=0.0, + max_tokens=300, + timeout=60.0, # API timeout in seconds + max_retries=10, # Maximum number of API retries + additional_kwargs={ + # Add other model-specific parameters + } +) + +# Response includes token counts +print(f"Input tokens: {response.additional_kwargs['input_tokens']}") +print(f"Output tokens: {response.additional_kwargs['output_tokens']}") +``` + +## Development + +To install development dependencies: + +```bash +pip install -e ".[dev]" +``` + +To run tests: + +```bash +pytest tests/ +``` + +## License + +This project is licensed under the MIT License. \ No newline at end of file diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py new file mode 100644 index 0000000000000..9aa24ea500704 --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py @@ -0,0 +1,3 @@ +from llama_index.multi_modal_llms.bedrock.base import BedrockMultiModal + +__all__ = ["BedrockMultiModal"] \ No newline at end of file diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py new file mode 100644 index 0000000000000..3e60f963e78bf --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py @@ -0,0 +1,328 @@ +from typing import Any, Callable, Dict, List, Optional, Sequence + +import boto3 +import aioboto3 +from botocore.config import Config +from llama_index.core.base.llms.types import ( + CompletionResponse, + CompletionResponseGen, + CompletionResponseAsyncGen, + MessageRole, +) +from llama_index.core.bridge.pydantic import Field, PrivateAttr +from llama_index.core.callbacks import CallbackManager +from llama_index.core.constants import ( + DEFAULT_CONTEXT_WINDOW, + DEFAULT_NUM_OUTPUTS, + DEFAULT_TEMPERATURE, +) +from llama_index.core.base.llms.generic_utils import ( + messages_to_prompt as generic_messages_to_prompt, +) +from llama_index.core.multi_modal_llms import ( + MultiModalLLM, + MultiModalLLMMetadata, +) +from llama_index.core.schema import ImageNode +from llama_index.multi_modal_llms.bedrock.utils import ( + BEDROCK_MULTI_MODAL_MODELS, + generate_bedrock_multi_modal_message, + resolve_bedrock_credentials, + invoke_model_with_retry, + invoke_model_with_retry_async, +) + + +class BedrockMultiModal(MultiModalLLM): + """Bedrock Multi-Modal LLM implementation.""" + + model: str = Field(description="The Multi-Modal model to use from Bedrock.") + temperature: float = Field(description="The temperature to use for sampling.") + max_tokens: Optional[int] = Field( + description="The maximum numbers of tokens to generate.", + gt=0, + ) + context_window: Optional[int] = Field( + description="The maximum number of context tokens for the model.", + gt=0, + ) + region_name: str = Field( + default=None, + description="AWS region name.", + ) + aws_access_key_id: str = Field( + default=None, + description="AWS access key ID.", + exclude=True, + ) + aws_secret_access_key: str = Field( + default=None, + description="AWS secret access key.", + exclude=True, + ) + max_retries: int = Field( + default=10, + description="The maximum number of API retries.", + gt=0, + ) + timeout: float = Field( + default=60.0, + description="The timeout for API requests in seconds.", + gt=0, + ) + additional_kwargs: Dict[str, Any] = Field( + default_factory=dict, + description="Additional kwargs for the Bedrock API.", + ) + + _messages_to_prompt: Callable = PrivateAttr() + _completion_to_prompt: Callable = PrivateAttr() + _client: Any = PrivateAttr() # boto3 client + _config: Any = PrivateAttr() # botocore config + _asession: Any = PrivateAttr() # aioboto3 session + + def __init__( + self, + model: str = "anthropic.claude-3-sonnet-20240229-v1:0", + temperature: float = DEFAULT_TEMPERATURE, + max_tokens: Optional[int] = 300, + additional_kwargs: Optional[Dict[str, Any]] = None, + context_window: Optional[int] = DEFAULT_CONTEXT_WINDOW, + region_name: Optional[str] = None, + aws_access_key_id: Optional[str] = None, + aws_secret_access_key: Optional[str] = None, + max_retries: int = 10, + timeout: float = 60.0, + messages_to_prompt: Optional[Callable] = None, + completion_to_prompt: Optional[Callable] = None, + callback_manager: Optional[CallbackManager] = None, + **kwargs: Any, + ) -> None: + """Initialize params.""" + # Validate model name first + if model not in BEDROCK_MULTI_MODAL_MODELS: + raise ValueError( + f"Invalid model {model}. " + f"Available models are: {list(BEDROCK_MULTI_MODAL_MODELS.keys())}" + ) + + aws_access_key_id, aws_secret_access_key, region = resolve_bedrock_credentials( + region_name=region_name, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + + super().__init__( + model=model, + temperature=temperature, + max_tokens=max_tokens, + additional_kwargs=additional_kwargs or {}, + context_window=context_window, + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + max_retries=max_retries, + timeout=timeout, + callback_manager=callback_manager, + **kwargs, + ) + self._messages_to_prompt = messages_to_prompt or generic_messages_to_prompt + self._completion_to_prompt = completion_to_prompt or (lambda x: x) + self._config = Config( + retries={"max_attempts": max_retries, "mode": "standard"}, + connect_timeout=timeout, + read_timeout=timeout, + ) + self._client = self._get_client() + self._asession = aioboto3.Session( + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=self.aws_secret_access_key, + region_name=self.region_name, + ) + + def _get_client(self) -> Any: + """Get Bedrock client.""" + session = boto3.Session( + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=self.aws_secret_access_key, + region_name=self.region_name, + ) + return session.client('bedrock-runtime', config=self._config) + + @classmethod + def class_name(cls) -> str: + """Get class name.""" + return "bedrock_multi_modal_llm" + + @property + def metadata(self) -> MultiModalLLMMetadata: + """Multi Modal LLM metadata.""" + return MultiModalLLMMetadata( + num_output=self.max_tokens or DEFAULT_NUM_OUTPUTS, + model_name=self.model, + ) + + def _get_model_kwargs(self, **kwargs: Any) -> Dict[str, Any]: + """Get model kwargs.""" + # For Claude models, parameters need to be part of the body + model_kwargs = { + "contentType": "application/json", + "accept": "application/json", + } + + if self.model.startswith("anthropic.claude"): + model_kwargs["body"] = { + "anthropic_version": "bedrock-2023-05-31", + "max_tokens": self.max_tokens if self.max_tokens is not None else 300, + "temperature": self.temperature, + } + + # Add any additional kwargs + if "body" in model_kwargs: + model_kwargs["body"].update(self.additional_kwargs) + model_kwargs["body"].update(kwargs) + + return model_kwargs + + def _complete( + self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any + ) -> CompletionResponse: + """Complete the prompt with image support.""" + message = generate_bedrock_multi_modal_message( + prompt=prompt, + image_documents=image_documents, + ) + + # Get model kwargs and prepare the request body + model_kwargs = self._get_model_kwargs(**kwargs) + if "body" in model_kwargs: + model_kwargs["body"]["messages"] = [message] + else: + model_kwargs["body"] = {"messages": [message]} + + # Convert body to JSON string + if isinstance(model_kwargs.get("body"), dict): + import json + body_str = json.dumps(model_kwargs["body"]) + del model_kwargs["body"] + else: + body_str = model_kwargs["body"] + del model_kwargs["body"] + + response = invoke_model_with_retry( + client=self._client, + model=self.model, + messages=body_str, + max_retries=self.max_retries, + **model_kwargs, + ) + + # Parse the streaming response body + response_body = json.loads(response["body"].read()) + + # Parse response based on model + if self.model.startswith("anthropic.claude"): + completion = response_body["content"][0]["text"] + else: + # Add support for other models as needed + completion = response_body.get("completion", "") + + return CompletionResponse( + text=completion, + raw=response_body, + additional_kwargs={ + "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-input-token-count"), + "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-output-token-count"), + }, + ) + + def complete( + self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any + ) -> CompletionResponse: + """Complete the prompt with image support.""" + return self._complete(prompt, image_documents, **kwargs) + + async def acomplete( + self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any + ) -> CompletionResponse: + """Complete the prompt with image support asynchronously.""" + message = generate_bedrock_multi_modal_message( + prompt=prompt, + image_documents=image_documents, + ) + + # Get model kwargs and prepare the request body + model_kwargs = self._get_model_kwargs(**kwargs) + if "body" in model_kwargs: + model_kwargs["body"]["messages"] = [message] + else: + model_kwargs["body"] = {"messages": [message]} + + # Convert body to JSON string + if isinstance(model_kwargs.get("body"), dict): + import json + body_str = json.dumps(model_kwargs["body"]) + del model_kwargs["body"] + else: + body_str = model_kwargs["body"] + del model_kwargs["body"] + + response = await invoke_model_with_retry_async( + session=self._asession, + config=self._config, + model=self.model, + messages=body_str, + max_retries=self.max_retries, + **model_kwargs, + ) + + # Parse the streaming response body + response_body = json.loads(await response["body"].read()) + + # Parse response based on model + if self.model.startswith("anthropic.claude"): + completion = response_body["content"][0]["text"] + else: + # Add support for other models as needed + completion = response_body.get("completion", "") + + return CompletionResponse( + text=completion, + raw=response_body, + additional_kwargs={ + "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-input-token-count"), + "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-output-token-count"), + }, + ) + + def chat(self, messages: Sequence[Any], **kwargs: Any) -> Any: + """Chat with the model.""" + raise NotImplementedError("Chat is not supported for this model.") + + def stream_chat( + self, messages: Sequence[Any], **kwargs: Any + ) -> Any: + """Stream chat with the model.""" + raise NotImplementedError("Stream chat is not supported for this model.") + + async def achat(self, messages: Sequence[Any], **kwargs: Any) -> Any: + """Chat with the model asynchronously.""" + raise NotImplementedError("Async chat is not supported for this model.") + + async def astream_chat( + self, messages: Sequence[Any], **kwargs: Any + ) -> Any: + """Stream chat with the model asynchronously.""" + raise NotImplementedError("Async stream chat is not supported for this model.") + + def stream_complete( + self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any + ) -> Any: + """Complete the prompt with image support in a streaming fashion.""" + raise NotImplementedError("Streaming completion is not supported for this model.") + + async def astream_complete( + self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any + ) -> Any: + """Complete the prompt with image support in a streaming fashion asynchronously.""" + raise NotImplementedError("Async streaming completion is not supported for this model.") \ No newline at end of file diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py new file mode 100644 index 0000000000000..16e4970d6a590 --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py @@ -0,0 +1,223 @@ +import base64 +import logging +from typing import Any, Dict, List, Optional, Sequence +import filetype +from tenacity import ( + before_sleep_log, + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +from llama_index.core.base.llms.generic_utils import get_from_param_or_env +from llama_index.core.multi_modal_llms.generic_utils import encode_image +from llama_index.core.schema import ImageDocument + +DEFAULT_BEDROCK_REGION = "us-east-1" + +# Only include multi-modal capable models +BEDROCK_MULTI_MODAL_MODELS = { + "anthropic.claude-3-sonnet-20240229-v1:0": 200000, + "anthropic.claude-3-haiku-20240307-v1:0": 200000, + "anthropic.claude-3-opus-20240229-v1:0": 200000, + "anthropic.claude-3-5-sonnet-20240620-v1:0": 200000, + "anthropic.claude-3-5-sonnet-20241022-v2:0": 200000, + "anthropic.claude-3-5-haiku-20241022-v1:0": 200000, +} + +MISSING_CREDENTIALS_ERROR_MESSAGE = """No AWS credentials found. +Please set up your AWS credentials using one of the following methods: +1. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables +2. Configure AWS CLI credentials +3. Use IAM role-based authentication +""" + +logger = logging.getLogger(__name__) + +def infer_image_mimetype_from_base64(base64_string) -> str: + decoded_data = base64.b64decode(base64_string) + kind = filetype.guess(decoded_data) + return kind.mime if kind is not None else None + +def infer_image_mimetype_from_file_path(image_file_path: str) -> str: + file_extension = image_file_path.split(".")[-1].lower() + + if file_extension in ["jpg", "jpeg"]: + return "image/jpeg" + elif file_extension == "png": + return "image/png" + elif file_extension == "gif": + return "image/gif" + elif file_extension == "webp": + return "image/webp" + + return "image/jpeg" + +def generate_bedrock_multi_modal_message( + prompt: str, + image_documents: Optional[Sequence[ImageDocument]] = None, +) -> Dict[str, Any]: + """Generate message for Bedrock multi-modal API.""" + if image_documents is None: + return { + "role": "user", + "content": [{ + "type": "text", + "text": prompt + }] + } + + message_content = [] + # Add text content first + message_content.append({ + "type": "text", + "text": prompt + }) + + # Add image content + for image_document in image_documents: + image_content = {} + if image_document.image_path: + base64_image = encode_image(image_document.image_path) + image_content = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", # Default to JPEG + "data": base64_image + } + } + elif "file_path" in image_document.metadata: + base64_image = encode_image(image_document.metadata["file_path"]) + image_content = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", # Default to JPEG + "data": base64_image + } + } + elif image_document.image: + image_content = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", # Default to JPEG + "data": image_document.image + } + } + + if image_content: + message_content.append(image_content) + + return { + "role": "user", + "content": message_content + } + +def resolve_bedrock_credentials( + region_name: Optional[str] = None, + aws_access_key_id: Optional[str] = None, + aws_secret_access_key: Optional[str] = None, +) -> tuple[Optional[str], Optional[str], str]: + """Resolve AWS Bedrock credentials. + + The order of precedence is: + 1. Explicitly passed credentials + 2. Environment variables + 3. Default region + """ + region = get_from_param_or_env( + "region_name", region_name, "AWS_REGION", DEFAULT_BEDROCK_REGION + ) + access_key = get_from_param_or_env( + "aws_access_key_id", aws_access_key_id, "AWS_ACCESS_KEY_ID", "" + ) + secret_key = get_from_param_or_env( + "aws_secret_access_key", aws_secret_access_key, "AWS_SECRET_ACCESS_KEY", "" + ) + + return access_key, secret_key, region + +def _create_retry_decorator(client: Any, max_retries: int) -> Any: + """Create a retry decorator for Bedrock API calls.""" + min_seconds = 4 + max_seconds = 10 + try: + import boto3 # noqa + except ImportError as e: + raise ImportError( + "You must install the `boto3` package to use Bedrock." + "Please `pip install boto3`" + ) from e + + return retry( + reraise=True, + stop=stop_after_attempt(max_retries), + wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds), + retry=(retry_if_exception_type(client.exceptions.ThrottlingException)), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + +def _create_retry_decorator_async(max_retries: int) -> Any: + """Create a retry decorator for async Bedrock API calls.""" + min_seconds = 4 + max_seconds = 10 + try: + import aioboto3 # noqa + except ImportError as e: + raise ImportError( + "You must install the `aioboto3` package to use async Bedrock." + "Please `pip install aioboto3`" + ) from e + + return retry( + reraise=True, + stop=stop_after_attempt(max_retries), + wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds), + retry=(retry_if_exception_type()), # TODO: Add throttling exception for async + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + +def invoke_model_with_retry( + client: Any, + model: str, + messages: Dict[str, Any], + max_retries: int = 3, + **kwargs: Any, +) -> Any: + """Use tenacity to retry the model invocation.""" + retry_decorator = _create_retry_decorator(client=client, max_retries=max_retries) + + @retry_decorator + def _invoke_with_retry(**kwargs: Any) -> Any: + return client.invoke_model(**kwargs) + + return _invoke_with_retry( + modelId=model, + body=messages, + **kwargs, + ) + +async def invoke_model_with_retry_async( + session: Any, + config: Any, + model: str, + messages: Dict[str, Any], + max_retries: int = 3, + **kwargs: Any, +) -> Any: + """Use tenacity to retry the model invocation asynchronously.""" + retry_decorator = _create_retry_decorator_async(max_retries=max_retries) + + @retry_decorator + async def _invoke_with_retry(**kwargs: Any) -> Any: + async with session.client("bedrock-runtime", config=config) as client: + return await client.invoke_model(**kwargs) + + return await _invoke_with_retry( + modelId=model, + body=messages, + **kwargs, + ) \ No newline at end of file diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml new file mode 100644 index 0000000000000..c495fcbb79f45 --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml @@ -0,0 +1,68 @@ +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] + +[tool.codespell] +check-filenames = true +check-hidden = true +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.llamahub] +contains_example = false +import_path = "llama_index.multi_modal_llms.bedrock" + +[tool.llamahub.class_authors] +BedrockMultiModal = "llama-index" + +[tool.mypy] +disallow_untyped_defs = true +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +authors = ["LlamaIndex"] +description = "llama-index multi-modal llms bedrock integration" +exclude = ["**/BUILD"] +license = "MIT" +name = "llama-index-multi-modal-llms-bedrock" +readme = "README.md" +version = "0.1.0" + +[tool.poetry.dependencies] +python = ">=3.9,<4.0" +boto3 = ">=1.34.0" +aioboto3 = ">=12.3.0" +llama-index-core = "^0.12.5" + +[tool.poetry.group.dev.dependencies] +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-asyncio = "^0.23.5" +pytest-mock = "3.11.1" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" +types-setuptools = "67.1.0.0" + +[tool.poetry.group.dev.dependencies.black] +extras = ["jupyter"] +version = "<=23.9.1,>=23.7.0" + +[tool.poetry.group.dev.dependencies.codespell] +extras = ["toml"] +version = ">=v2.2.6" + +[[tool.poetry.packages]] +include = "llama_index/" + +[tool.pytest.ini_options] +asyncio_mode = "auto" \ No newline at end of file diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD new file mode 100644 index 0000000000000..f1c904f04a601 --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD @@ -0,0 +1,9 @@ +load("//bazel/rules/python:pytest.bzl", "llama_pytest_test") + +llama_pytest_test( + name = "multi_modal_llms_bedrock_test", + srcs = ["test_multi_modal_llms_bedrock.py"], + deps = [ + "//llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock", + ], +) \ No newline at end of file diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/__init__.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py new file mode 100644 index 0000000000000..6869e4ca47ea2 --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py @@ -0,0 +1,78 @@ +"""Test Bedrock multi-modal LLM.""" +import pytest +from unittest.mock import patch + +from llama_index.core.multi_modal_llms.base import MultiModalLLM +from llama_index.multi_modal_llms.bedrock import BedrockMultiModal +from llama_index.core.schema import ImageDocument + + +def test_class_name(): + """Test class name.""" + llm = BedrockMultiModal() + assert llm.class_name() == "bedrock_multi_modal_llm" + + +def test_init(): + """Test initialization.""" + llm = BedrockMultiModal(max_tokens=400) + assert llm.max_tokens == 400 + assert llm.model == "anthropic.claude-3-sonnet-20240229-v1:0" + + +def test_inheritance(): + """Test inheritance.""" + assert issubclass(BedrockMultiModal, MultiModalLLM) + + +def test_model_validation(): + """Test model validation.""" + with pytest.raises(ValueError, match="Invalid model"): + BedrockMultiModal(model="invalid-model") + + +@patch("boto3.Session") +def test_completion(mock_session): + """Test completion.""" + # Mock the invoke_model response + mock_client = mock_session.return_value.client.return_value + mock_client.invoke_model.return_value = { + "content": [{"text": "test response"}] + } + + llm = BedrockMultiModal() + image_doc = ImageDocument(image="base64_encoded_string") + + response = llm.complete( + prompt="test prompt", + image_documents=[image_doc] + ) + + assert response.text == "test response" + # Verify the call was made with correct parameters + mock_client.invoke_model.assert_called_once() + call_args = mock_client.invoke_model.call_args[1] + assert "modelId" in call_args + assert call_args["modelId"] == "anthropic.claude-3-sonnet-20240229-v1:0" + + +@pytest.mark.asyncio +@patch("aioboto3.Session") +async def test_async_completion(mock_session): + """Test async completion.""" + # Mock the async client + mock_client = mock_session.return_value.client.return_value + mock_client.__aenter__.return_value.invoke_model.return_value = { + "content": [{"text": "async test response"}] + } + + llm = BedrockMultiModal() + image_doc = ImageDocument(image="base64_encoded_string") + + response = await llm.acomplete( + prompt="test prompt", + image_documents=[image_doc] + ) + + assert response.text == "async test response" + # No need to verify call args for async as the mock is structured differently \ No newline at end of file From aa050773c4ee999d11e54760af864548a064d32c Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Thu, 9 Jan 2025 20:20:09 -0600 Subject: [PATCH 2/4] linting + BUILD files --- .../.gitignore | 2 +- .../BUILD | 6 +-- .../Makefile | 2 +- .../README.md | 18 ++++--- .../multi_modal_llms/bedrock/BUILD | 1 + .../multi_modal_llms/bedrock/__init__.py | 2 +- .../multi_modal_llms/bedrock/base.py | 51 +++++++++++-------- .../multi_modal_llms/bedrock/utils.py | 50 +++++++++--------- .../pyproject.toml | 2 +- .../tests/BUILD | 10 +--- .../tests/test_multi_modal_llms_bedrock.py | 22 +++----- 11 files changed, 78 insertions(+), 88 deletions(-) create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/BUILD diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore index ce810799a86c0..3c425e8a4843b 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore @@ -86,4 +86,4 @@ dmypy.json .pyre/ # Poetry -poetry.lock \ No newline at end of file +poetry.lock diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD index 7a3d9b720c151..0896ca890d8bf 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD @@ -1,3 +1,3 @@ -package(default_visibility = ["//visibility:public"]) - -exports_files(["pyproject.toml"]) \ No newline at end of file +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile index 531966a802f71..ce1c24875570c 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile @@ -25,4 +25,4 @@ install: poetry install --with dev install_editable: - pip install -e ".[dev]" \ No newline at end of file + pip install -e ".[dev]" diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md index 9ebdb64901e7a..aefccac4c5fba 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md @@ -24,7 +24,7 @@ llm = BedrockMultiModal( model="anthropic.claude-3-haiku-20240307-v1:0", # or other Bedrock multi-modal models temperature=0.0, max_tokens=300, - region_name="eu-central-1" # make sure to use the region where the model access is granted + region_name="eu-central-1", # make sure to use the region where the model access is granted ) # Method 1: Load images using SimpleDirectoryReader @@ -36,13 +36,13 @@ image_documents = SimpleDirectoryReader( image_doc = ImageDocument( image_path="/path/to/image.jpg", # Local file path # OR - image="base64_encoded_image_string" # Base64 encoded image + image="base64_encoded_image_string", # Base64 encoded image ) # Get a completion with both text and image response = llm.complete( prompt="Describe this image in detail:", - image_documents=image_documents # or [image_doc] + image_documents=image_documents, # or [image_doc] ) print(response.text) @@ -53,6 +53,7 @@ print(response.text) You can authenticate with AWS Bedrock in several ways: 1. Environment variables: + ```bash export AWS_ACCESS_KEY_ID=your_access_key export AWS_SECRET_ACCESS_KEY=your_secret_key @@ -60,16 +61,18 @@ export AWS_REGION=us-east-1 # optional ``` 2. Explicit credentials: + ```python llm = BedrockMultiModal( model="anthropic.claude-3-haiku-20240307-v1:0", aws_access_key_id="your_access_key", aws_secret_access_key="your_secret_key", - region_name="eu-central-1" + region_name="eu-central-1", ) ``` 3. AWS CLI configuration: + ```bash aws configure ``` @@ -96,8 +99,7 @@ image_docs = SimpleDirectoryReader( ).load_data() response = llm.complete( - prompt="Compare these two images:", - image_documents=image_docs + prompt="Compare these two images:", image_documents=image_docs ) # Custom parameters @@ -109,7 +111,7 @@ llm = BedrockMultiModal( max_retries=10, # Maximum number of API retries additional_kwargs={ # Add other model-specific parameters - } + }, ) # Response includes token counts @@ -133,4 +135,4 @@ pytest tests/ ## License -This project is licensed under the MIT License. \ No newline at end of file +This project is licensed under the MIT License. diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py index 9aa24ea500704..12bb7290ac33f 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py @@ -1,3 +1,3 @@ from llama_index.multi_modal_llms.bedrock.base import BedrockMultiModal -__all__ = ["BedrockMultiModal"] \ No newline at end of file +__all__ = ["BedrockMultiModal"] diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py index 3e60f963e78bf..fb00fd277dcbb 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py @@ -1,13 +1,10 @@ -from typing import Any, Callable, Dict, List, Optional, Sequence +from typing import Any, Callable, Dict, Optional, Sequence import boto3 import aioboto3 from botocore.config import Config from llama_index.core.base.llms.types import ( CompletionResponse, - CompletionResponseGen, - CompletionResponseAsyncGen, - MessageRole, ) from llama_index.core.bridge.pydantic import Field, PrivateAttr from llama_index.core.callbacks import CallbackManager @@ -147,7 +144,7 @@ def _get_client(self) -> Any: aws_secret_access_key=self.aws_secret_access_key, region_name=self.region_name, ) - return session.client('bedrock-runtime', config=self._config) + return session.client("bedrock-runtime", config=self._config) @classmethod def class_name(cls) -> str: @@ -169,19 +166,19 @@ def _get_model_kwargs(self, **kwargs: Any) -> Dict[str, Any]: "contentType": "application/json", "accept": "application/json", } - + if self.model.startswith("anthropic.claude"): model_kwargs["body"] = { "anthropic_version": "bedrock-2023-05-31", "max_tokens": self.max_tokens if self.max_tokens is not None else 300, "temperature": self.temperature, } - + # Add any additional kwargs if "body" in model_kwargs: model_kwargs["body"].update(self.additional_kwargs) model_kwargs["body"].update(kwargs) - + return model_kwargs def _complete( @@ -203,6 +200,7 @@ def _complete( # Convert body to JSON string if isinstance(model_kwargs.get("body"), dict): import json + body_str = json.dumps(model_kwargs["body"]) del model_kwargs["body"] else: @@ -219,7 +217,7 @@ def _complete( # Parse the streaming response body response_body = json.loads(response["body"].read()) - + # Parse response based on model if self.model.startswith("anthropic.claude"): completion = response_body["content"][0]["text"] @@ -231,8 +229,12 @@ def _complete( text=completion, raw=response_body, additional_kwargs={ - "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-input-token-count"), - "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-output-token-count"), + "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get( + "x-amzn-bedrock-input-token-count" + ), + "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get( + "x-amzn-bedrock-output-token-count" + ), }, ) @@ -261,6 +263,7 @@ async def acomplete( # Convert body to JSON string if isinstance(model_kwargs.get("body"), dict): import json + body_str = json.dumps(model_kwargs["body"]) del model_kwargs["body"] else: @@ -278,7 +281,7 @@ async def acomplete( # Parse the streaming response body response_body = json.loads(await response["body"].read()) - + # Parse response based on model if self.model.startswith("anthropic.claude"): completion = response_body["content"][0]["text"] @@ -290,8 +293,12 @@ async def acomplete( text=completion, raw=response_body, additional_kwargs={ - "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-input-token-count"), - "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-output-token-count"), + "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get( + "x-amzn-bedrock-input-token-count" + ), + "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get( + "x-amzn-bedrock-output-token-count" + ), }, ) @@ -299,9 +306,7 @@ def chat(self, messages: Sequence[Any], **kwargs: Any) -> Any: """Chat with the model.""" raise NotImplementedError("Chat is not supported for this model.") - def stream_chat( - self, messages: Sequence[Any], **kwargs: Any - ) -> Any: + def stream_chat(self, messages: Sequence[Any], **kwargs: Any) -> Any: """Stream chat with the model.""" raise NotImplementedError("Stream chat is not supported for this model.") @@ -309,9 +314,7 @@ async def achat(self, messages: Sequence[Any], **kwargs: Any) -> Any: """Chat with the model asynchronously.""" raise NotImplementedError("Async chat is not supported for this model.") - async def astream_chat( - self, messages: Sequence[Any], **kwargs: Any - ) -> Any: + async def astream_chat(self, messages: Sequence[Any], **kwargs: Any) -> Any: """Stream chat with the model asynchronously.""" raise NotImplementedError("Async stream chat is not supported for this model.") @@ -319,10 +322,14 @@ def stream_complete( self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any ) -> Any: """Complete the prompt with image support in a streaming fashion.""" - raise NotImplementedError("Streaming completion is not supported for this model.") + raise NotImplementedError( + "Streaming completion is not supported for this model." + ) async def astream_complete( self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any ) -> Any: """Complete the prompt with image support in a streaming fashion asynchronously.""" - raise NotImplementedError("Async streaming completion is not supported for this model.") \ No newline at end of file + raise NotImplementedError( + "Async streaming completion is not supported for this model." + ) diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py index 16e4970d6a590..8dc5f39bdf903 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py @@ -1,6 +1,6 @@ import base64 import logging -from typing import Any, Dict, List, Optional, Sequence +from typing import Any, Dict, Optional, Sequence import filetype from tenacity import ( before_sleep_log, @@ -35,14 +35,16 @@ logger = logging.getLogger(__name__) + def infer_image_mimetype_from_base64(base64_string) -> str: decoded_data = base64.b64decode(base64_string) kind = filetype.guess(decoded_data) return kind.mime if kind is not None else None + def infer_image_mimetype_from_file_path(image_file_path: str) -> str: file_extension = image_file_path.split(".")[-1].lower() - + if file_extension in ["jpg", "jpeg"]: return "image/jpeg" elif file_extension == "png": @@ -51,29 +53,21 @@ def infer_image_mimetype_from_file_path(image_file_path: str) -> str: return "image/gif" elif file_extension == "webp": return "image/webp" - + return "image/jpeg" + def generate_bedrock_multi_modal_message( prompt: str, image_documents: Optional[Sequence[ImageDocument]] = None, ) -> Dict[str, Any]: """Generate message for Bedrock multi-modal API.""" if image_documents is None: - return { - "role": "user", - "content": [{ - "type": "text", - "text": prompt - }] - } + return {"role": "user", "content": [{"type": "text", "text": prompt}]} message_content = [] # Add text content first - message_content.append({ - "type": "text", - "text": prompt - }) + message_content.append({"type": "text", "text": prompt}) # Add image content for image_document in image_documents: @@ -85,8 +79,8 @@ def generate_bedrock_multi_modal_message( "source": { "type": "base64", "media_type": "image/jpeg", # Default to JPEG - "data": base64_image - } + "data": base64_image, + }, } elif "file_path" in image_document.metadata: base64_image = encode_image(image_document.metadata["file_path"]) @@ -95,8 +89,8 @@ def generate_bedrock_multi_modal_message( "source": { "type": "base64", "media_type": "image/jpeg", # Default to JPEG - "data": base64_image - } + "data": base64_image, + }, } elif image_document.image: image_content = { @@ -104,17 +98,15 @@ def generate_bedrock_multi_modal_message( "source": { "type": "base64", "media_type": "image/jpeg", # Default to JPEG - "data": image_document.image - } + "data": image_document.image, + }, } - + if image_content: message_content.append(image_content) - return { - "role": "user", - "content": message_content - } + return {"role": "user", "content": message_content} + def resolve_bedrock_credentials( region_name: Optional[str] = None, @@ -140,6 +132,7 @@ def resolve_bedrock_credentials( return access_key, secret_key, region + def _create_retry_decorator(client: Any, max_retries: int) -> Any: """Create a retry decorator for Bedrock API calls.""" min_seconds = 4 @@ -160,6 +153,7 @@ def _create_retry_decorator(client: Any, max_retries: int) -> Any: before_sleep=before_sleep_log(logger, logging.WARNING), ) + def _create_retry_decorator_async(max_retries: int) -> Any: """Create a retry decorator for async Bedrock API calls.""" min_seconds = 4 @@ -180,6 +174,7 @@ def _create_retry_decorator_async(max_retries: int) -> Any: before_sleep=before_sleep_log(logger, logging.WARNING), ) + def invoke_model_with_retry( client: Any, model: str, @@ -193,13 +188,14 @@ def invoke_model_with_retry( @retry_decorator def _invoke_with_retry(**kwargs: Any) -> Any: return client.invoke_model(**kwargs) - + return _invoke_with_retry( modelId=model, body=messages, **kwargs, ) + async def invoke_model_with_retry_async( session: Any, config: Any, @@ -220,4 +216,4 @@ async def _invoke_with_retry(**kwargs: Any) -> Any: modelId=model, body=messages, **kwargs, - ) \ No newline at end of file + ) diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml index c495fcbb79f45..eb9929c6e4b9b 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml @@ -65,4 +65,4 @@ version = ">=v2.2.6" include = "llama_index/" [tool.pytest.ini_options] -asyncio_mode = "auto" \ No newline at end of file +asyncio_mode = "auto" diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD index f1c904f04a601..dabf212d7e716 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD @@ -1,9 +1 @@ -load("//bazel/rules/python:pytest.bzl", "llama_pytest_test") - -llama_pytest_test( - name = "multi_modal_llms_bedrock_test", - srcs = ["test_multi_modal_llms_bedrock.py"], - deps = [ - "//llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock", - ], -) \ No newline at end of file +python_tests() diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py index 6869e4ca47ea2..34d926c92fc64 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py @@ -36,17 +36,12 @@ def test_completion(mock_session): """Test completion.""" # Mock the invoke_model response mock_client = mock_session.return_value.client.return_value - mock_client.invoke_model.return_value = { - "content": [{"text": "test response"}] - } + mock_client.invoke_model.return_value = {"content": [{"text": "test response"}]} llm = BedrockMultiModal() image_doc = ImageDocument(image="base64_encoded_string") - - response = llm.complete( - prompt="test prompt", - image_documents=[image_doc] - ) + + response = llm.complete(prompt="test prompt", image_documents=[image_doc]) assert response.text == "test response" # Verify the call was made with correct parameters @@ -56,7 +51,7 @@ def test_completion(mock_session): assert call_args["modelId"] == "anthropic.claude-3-sonnet-20240229-v1:0" -@pytest.mark.asyncio +@pytest.mark.asyncio() @patch("aioboto3.Session") async def test_async_completion(mock_session): """Test async completion.""" @@ -68,11 +63,8 @@ async def test_async_completion(mock_session): llm = BedrockMultiModal() image_doc = ImageDocument(image="base64_encoded_string") - - response = await llm.acomplete( - prompt="test prompt", - image_documents=[image_doc] - ) + + response = await llm.acomplete(prompt="test prompt", image_documents=[image_doc]) assert response.text == "async test response" - # No need to verify call args for async as the mock is structured differently \ No newline at end of file + # No need to verify call args for async as the mock is structured differently From 64958653a81f037a11f8a5db8e0c0b638a826dbb Mon Sep 17 00:00:00 2001 From: Daljeet Nandha Date: Fri, 10 Jan 2025 19:50:59 +0100 Subject: [PATCH 3/4] mock bedrock response --- .../tests/test_multi_modal_llms_bedrock.py | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py index 34d926c92fc64..35304b64fd125 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py @@ -1,6 +1,8 @@ """Test Bedrock multi-modal LLM.""" +import json +from io import BytesIO import pytest -from unittest.mock import patch +from unittest.mock import patch, MagicMock, AsyncMock from llama_index.core.multi_modal_llms.base import MultiModalLLM from llama_index.multi_modal_llms.bedrock import BedrockMultiModal @@ -36,7 +38,16 @@ def test_completion(mock_session): """Test completion.""" # Mock the invoke_model response mock_client = mock_session.return_value.client.return_value - mock_client.invoke_model.return_value = {"content": [{"text": "test response"}]} + mock_response = { + "body": BytesIO(json.dumps({"content": [{"text": "test response"}]}).encode()), + "ResponseMetadata": { + "HTTPHeaders": { + "x-amzn-bedrock-input-token-count": "100", + "x-amzn-bedrock-output-token-count": "50", + } + } + } + mock_client.invoke_model.return_value = mock_response llm = BedrockMultiModal() image_doc = ImageDocument(image="base64_encoded_string") @@ -44,22 +55,30 @@ def test_completion(mock_session): response = llm.complete(prompt="test prompt", image_documents=[image_doc]) assert response.text == "test response" + assert response.additional_kwargs["input_tokens"] == "100" + assert response.additional_kwargs["output_tokens"] == "50" # Verify the call was made with correct parameters mock_client.invoke_model.assert_called_once() - call_args = mock_client.invoke_model.call_args[1] - assert "modelId" in call_args - assert call_args["modelId"] == "anthropic.claude-3-sonnet-20240229-v1:0" -@pytest.mark.asyncio() +@pytest.mark.asyncio @patch("aioboto3.Session") async def test_async_completion(mock_session): """Test async completion.""" # Mock the async client - mock_client = mock_session.return_value.client.return_value - mock_client.__aenter__.return_value.invoke_model.return_value = { - "content": [{"text": "async test response"}] + mock_client = mock_session.return_value.client.return_value.__aenter__.return_value + mock_body = AsyncMock() + mock_body.read.return_value = json.dumps({"content": [{"text": "async test response"}]}).encode() + mock_response = { + "body": mock_body, + "ResponseMetadata": { + "HTTPHeaders": { + "x-amzn-bedrock-input-token-count": "100", + "x-amzn-bedrock-output-token-count": "50", + } + } } + mock_client.invoke_model.return_value = mock_response llm = BedrockMultiModal() image_doc = ImageDocument(image="base64_encoded_string") @@ -67,4 +86,6 @@ async def test_async_completion(mock_session): response = await llm.acomplete(prompt="test prompt", image_documents=[image_doc]) assert response.text == "async test response" - # No need to verify call args for async as the mock is structured differently + assert response.additional_kwargs["input_tokens"] == "100" + assert response.additional_kwargs["output_tokens"] == "50" + mock_body.read.assert_awaited_once() From 159278215518832e55eb2440e31bd29430a9e703 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Mon, 13 Jan 2025 12:05:09 -0600 Subject: [PATCH 4/4] lint --- .../tests/test_multi_modal_llms_bedrock.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py index 35304b64fd125..cebd9c236b89a 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py @@ -2,7 +2,7 @@ import json from io import BytesIO import pytest -from unittest.mock import patch, MagicMock, AsyncMock +from unittest.mock import patch, AsyncMock from llama_index.core.multi_modal_llms.base import MultiModalLLM from llama_index.multi_modal_llms.bedrock import BedrockMultiModal @@ -45,7 +45,7 @@ def test_completion(mock_session): "x-amzn-bedrock-input-token-count": "100", "x-amzn-bedrock-output-token-count": "50", } - } + }, } mock_client.invoke_model.return_value = mock_response @@ -61,14 +61,16 @@ def test_completion(mock_session): mock_client.invoke_model.assert_called_once() -@pytest.mark.asyncio +@pytest.mark.asyncio() @patch("aioboto3.Session") async def test_async_completion(mock_session): """Test async completion.""" # Mock the async client mock_client = mock_session.return_value.client.return_value.__aenter__.return_value mock_body = AsyncMock() - mock_body.read.return_value = json.dumps({"content": [{"text": "async test response"}]}).encode() + mock_body.read.return_value = json.dumps( + {"content": [{"text": "async test response"}]} + ).encode() mock_response = { "body": mock_body, "ResponseMetadata": { @@ -76,7 +78,7 @@ async def test_async_completion(mock_session): "x-amzn-bedrock-input-token-count": "100", "x-amzn-bedrock-output-token-count": "50", } - } + }, } mock_client.invoke_model.return_value = mock_response