From 001e69aec10da8f19672ae011df29ffe9cbd155f Mon Sep 17 00:00:00 2001
From: Daljeet Nandha <dn@assistent.ai>
Date: Tue, 7 Jan 2025 22:24:05 +0100
Subject: [PATCH 1/4] add multimodal bedrock integration

---
 .../.gitignore                                |  89 +++++
 .../BUILD                                     |   3 +
 .../Makefile                                  |  28 ++
 .../README.md                                 | 136 ++++++++
 .../multi_modal_llms/bedrock/__init__.py      |   3 +
 .../multi_modal_llms/bedrock/base.py          | 328 ++++++++++++++++++
 .../multi_modal_llms/bedrock/utils.py         | 223 ++++++++++++
 .../pyproject.toml                            |  68 ++++
 .../tests/BUILD                               |   9 +
 .../tests/__init__.py                         |   0
 .../tests/test_multi_modal_llms_bedrock.py    |  78 +++++
 11 files changed, 965 insertions(+)
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/__init__.py
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py

diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore
new file mode 100644
index 0000000000000..ce810799a86c0
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore
@@ -0,0 +1,89 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual Environment
+.env
+.venv
+env/
+venv/
+ENV/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Testing
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Distribution
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Poetry
+poetry.lock 
\ No newline at end of file
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD
new file mode 100644
index 0000000000000..7a3d9b720c151
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD
@@ -0,0 +1,3 @@
+package(default_visibility = ["//visibility:public"])
+
+exports_files(["pyproject.toml"]) 
\ No newline at end of file
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile
new file mode 100644
index 0000000000000..531966a802f71
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile
@@ -0,0 +1,28 @@
+.PHONY: format lint test
+
+format:
+	poetry run black .
+	poetry run isort .
+
+lint:
+	poetry run mypy .
+	poetry run black . --check
+	poetry run isort . --check
+	poetry run flake8 .
+
+test:
+	poetry run pytest tests/ --disable-socket
+
+clean:
+	rm -rf dist/*
+	rm -rf build/*
+	rm -rf *.egg-info
+
+build:
+	poetry build
+
+install:
+	poetry install --with dev
+
+install_editable:
+	pip install -e ".[dev]" 
\ No newline at end of file
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md
new file mode 100644
index 0000000000000..9ebdb64901e7a
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md
@@ -0,0 +1,136 @@
+# LlamaIndex Multi-Modal LLM Integration: AWS Bedrock
+
+AWS Bedrock is a fully managed service that offers a choice of high-performing foundation models from leading AI companies through a single API, along with a broad set of capabilities you need to build generative AI applications with security, privacy, and responsible AI.
+
+## Installation
+
+```bash
+pip install llama-index-multi-modal-llms-bedrock
+```
+
+## Usage
+
+Here's how to use the AWS Bedrock multi-modal integration:
+
+### Basic Usage
+
+```python
+from llama_index.multi_modal_llms.bedrock import BedrockMultiModal
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core.schema import ImageDocument
+
+# Initialize the model (credentials can be provided through environment variables)
+llm = BedrockMultiModal(
+    model="anthropic.claude-3-haiku-20240307-v1:0",  # or other Bedrock multi-modal models
+    temperature=0.0,
+    max_tokens=300,
+    region_name="eu-central-1"  # make sure to use the region where the model access is granted
+)
+
+# Method 1: Load images using SimpleDirectoryReader
+image_documents = SimpleDirectoryReader(
+    input_files=["path/to/image.jpg"]
+).load_data()
+
+# Method 2: Create image documents directly
+image_doc = ImageDocument(
+    image_path="/path/to/image.jpg",  # Local file path
+    # OR
+    image="base64_encoded_image_string"  # Base64 encoded image
+)
+
+# Get a completion with both text and image
+response = llm.complete(
+    prompt="Describe this image in detail:",
+    image_documents=image_documents  # or [image_doc]
+)
+
+print(response.text)
+```
+
+### AWS Authentication
+
+You can authenticate with AWS Bedrock in several ways:
+
+1. Environment variables:
+```bash
+export AWS_ACCESS_KEY_ID=your_access_key
+export AWS_SECRET_ACCESS_KEY=your_secret_key
+export AWS_REGION=us-east-1  # optional
+```
+
+2. Explicit credentials:
+```python
+llm = BedrockMultiModal(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    aws_access_key_id="your_access_key",
+    aws_secret_access_key="your_secret_key",
+    region_name="eu-central-1"
+)
+```
+
+3. AWS CLI configuration:
+```bash
+aws configure
+```
+
+4. IAM role-based authentication (when running on AWS services like EC2, Lambda, etc.)
+
+### Supported Models
+
+Currently supported multi-modal models in AWS Bedrock:
+
+- `anthropic.claude-3-sonnet-20240229-v1:0`
+- `anthropic.claude-3-haiku-20240307-v1:0`
+- `anthropic.claude-3-opus-20240229-v1:0`
+- `anthropic.claude-3-5-sonnet-20240620-v1:0`
+- `anthropic.claude-3-5-sonnet-20241022-v2:0`
+- `anthropic.claude-3-5-haiku-20241022-v1:0`
+
+### Advanced Usage
+
+```python
+# Using multiple images
+image_docs = SimpleDirectoryReader(
+    input_files=["image1.jpg", "image2.jpg"]
+).load_data()
+
+response = llm.complete(
+    prompt="Compare these two images:",
+    image_documents=image_docs
+)
+
+# Custom parameters
+llm = BedrockMultiModal(
+    model="anthropic.claude-3-haiku-20240307-v1:0",
+    temperature=0.0,
+    max_tokens=300,
+    timeout=60.0,  # API timeout in seconds
+    max_retries=10,  # Maximum number of API retries
+    additional_kwargs={
+        # Add other model-specific parameters
+    }
+)
+
+# Response includes token counts
+print(f"Input tokens: {response.additional_kwargs['input_tokens']}")
+print(f"Output tokens: {response.additional_kwargs['output_tokens']}")
+```
+
+## Development
+
+To install development dependencies:
+
+```bash
+pip install -e ".[dev]"
+```
+
+To run tests:
+
+```bash
+pytest tests/
+```
+
+## License
+
+This project is licensed under the MIT License. 
\ No newline at end of file
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py
new file mode 100644
index 0000000000000..9aa24ea500704
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.multi_modal_llms.bedrock.base import BedrockMultiModal
+
+__all__ = ["BedrockMultiModal"] 
\ No newline at end of file
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py
new file mode 100644
index 0000000000000..3e60f963e78bf
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py
@@ -0,0 +1,328 @@
+from typing import Any, Callable, Dict, List, Optional, Sequence
+
+import boto3
+import aioboto3
+from botocore.config import Config
+from llama_index.core.base.llms.types import (
+    CompletionResponse,
+    CompletionResponseGen,
+    CompletionResponseAsyncGen,
+    MessageRole,
+)
+from llama_index.core.bridge.pydantic import Field, PrivateAttr
+from llama_index.core.callbacks import CallbackManager
+from llama_index.core.constants import (
+    DEFAULT_CONTEXT_WINDOW,
+    DEFAULT_NUM_OUTPUTS,
+    DEFAULT_TEMPERATURE,
+)
+from llama_index.core.base.llms.generic_utils import (
+    messages_to_prompt as generic_messages_to_prompt,
+)
+from llama_index.core.multi_modal_llms import (
+    MultiModalLLM,
+    MultiModalLLMMetadata,
+)
+from llama_index.core.schema import ImageNode
+from llama_index.multi_modal_llms.bedrock.utils import (
+    BEDROCK_MULTI_MODAL_MODELS,
+    generate_bedrock_multi_modal_message,
+    resolve_bedrock_credentials,
+    invoke_model_with_retry,
+    invoke_model_with_retry_async,
+)
+
+
+class BedrockMultiModal(MultiModalLLM):
+    """Bedrock Multi-Modal LLM implementation."""
+
+    model: str = Field(description="The Multi-Modal model to use from Bedrock.")
+    temperature: float = Field(description="The temperature to use for sampling.")
+    max_tokens: Optional[int] = Field(
+        description="The maximum numbers of tokens to generate.",
+        gt=0,
+    )
+    context_window: Optional[int] = Field(
+        description="The maximum number of context tokens for the model.",
+        gt=0,
+    )
+    region_name: str = Field(
+        default=None,
+        description="AWS region name.",
+    )
+    aws_access_key_id: str = Field(
+        default=None,
+        description="AWS access key ID.",
+        exclude=True,
+    )
+    aws_secret_access_key: str = Field(
+        default=None,
+        description="AWS secret access key.",
+        exclude=True,
+    )
+    max_retries: int = Field(
+        default=10,
+        description="The maximum number of API retries.",
+        gt=0,
+    )
+    timeout: float = Field(
+        default=60.0,
+        description="The timeout for API requests in seconds.",
+        gt=0,
+    )
+    additional_kwargs: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Additional kwargs for the Bedrock API.",
+    )
+
+    _messages_to_prompt: Callable = PrivateAttr()
+    _completion_to_prompt: Callable = PrivateAttr()
+    _client: Any = PrivateAttr()  # boto3 client
+    _config: Any = PrivateAttr()  # botocore config
+    _asession: Any = PrivateAttr()  # aioboto3 session
+
+    def __init__(
+        self,
+        model: str = "anthropic.claude-3-sonnet-20240229-v1:0",
+        temperature: float = DEFAULT_TEMPERATURE,
+        max_tokens: Optional[int] = 300,
+        additional_kwargs: Optional[Dict[str, Any]] = None,
+        context_window: Optional[int] = DEFAULT_CONTEXT_WINDOW,
+        region_name: Optional[str] = None,
+        aws_access_key_id: Optional[str] = None,
+        aws_secret_access_key: Optional[str] = None,
+        max_retries: int = 10,
+        timeout: float = 60.0,
+        messages_to_prompt: Optional[Callable] = None,
+        completion_to_prompt: Optional[Callable] = None,
+        callback_manager: Optional[CallbackManager] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize params."""
+        # Validate model name first
+        if model not in BEDROCK_MULTI_MODAL_MODELS:
+            raise ValueError(
+                f"Invalid model {model}. "
+                f"Available models are: {list(BEDROCK_MULTI_MODAL_MODELS.keys())}"
+            )
+
+        aws_access_key_id, aws_secret_access_key, region = resolve_bedrock_credentials(
+            region_name=region_name,
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+        )
+
+        super().__init__(
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            additional_kwargs=additional_kwargs or {},
+            context_window=context_window,
+            region_name=region,
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            max_retries=max_retries,
+            timeout=timeout,
+            callback_manager=callback_manager,
+            **kwargs,
+        )
+        self._messages_to_prompt = messages_to_prompt or generic_messages_to_prompt
+        self._completion_to_prompt = completion_to_prompt or (lambda x: x)
+        self._config = Config(
+            retries={"max_attempts": max_retries, "mode": "standard"},
+            connect_timeout=timeout,
+            read_timeout=timeout,
+        )
+        self._client = self._get_client()
+        self._asession = aioboto3.Session(
+            aws_access_key_id=self.aws_access_key_id,
+            aws_secret_access_key=self.aws_secret_access_key,
+            region_name=self.region_name,
+        )
+
+    def _get_client(self) -> Any:
+        """Get Bedrock client."""
+        session = boto3.Session(
+            aws_access_key_id=self.aws_access_key_id,
+            aws_secret_access_key=self.aws_secret_access_key,
+            region_name=self.region_name,
+        )
+        return session.client('bedrock-runtime', config=self._config)
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "bedrock_multi_modal_llm"
+
+    @property
+    def metadata(self) -> MultiModalLLMMetadata:
+        """Multi Modal LLM metadata."""
+        return MultiModalLLMMetadata(
+            num_output=self.max_tokens or DEFAULT_NUM_OUTPUTS,
+            model_name=self.model,
+        )
+
+    def _get_model_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
+        """Get model kwargs."""
+        # For Claude models, parameters need to be part of the body
+        model_kwargs = {
+            "contentType": "application/json",
+            "accept": "application/json",
+        }
+        
+        if self.model.startswith("anthropic.claude"):
+            model_kwargs["body"] = {
+                "anthropic_version": "bedrock-2023-05-31",
+                "max_tokens": self.max_tokens if self.max_tokens is not None else 300,
+                "temperature": self.temperature,
+            }
+        
+        # Add any additional kwargs
+        if "body" in model_kwargs:
+            model_kwargs["body"].update(self.additional_kwargs)
+            model_kwargs["body"].update(kwargs)
+        
+        return model_kwargs
+
+    def _complete(
+        self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
+    ) -> CompletionResponse:
+        """Complete the prompt with image support."""
+        message = generate_bedrock_multi_modal_message(
+            prompt=prompt,
+            image_documents=image_documents,
+        )
+
+        # Get model kwargs and prepare the request body
+        model_kwargs = self._get_model_kwargs(**kwargs)
+        if "body" in model_kwargs:
+            model_kwargs["body"]["messages"] = [message]
+        else:
+            model_kwargs["body"] = {"messages": [message]}
+
+        # Convert body to JSON string
+        if isinstance(model_kwargs.get("body"), dict):
+            import json
+            body_str = json.dumps(model_kwargs["body"])
+            del model_kwargs["body"]
+        else:
+            body_str = model_kwargs["body"]
+            del model_kwargs["body"]
+
+        response = invoke_model_with_retry(
+            client=self._client,
+            model=self.model,
+            messages=body_str,
+            max_retries=self.max_retries,
+            **model_kwargs,
+        )
+
+        # Parse the streaming response body
+        response_body = json.loads(response["body"].read())
+        
+        # Parse response based on model
+        if self.model.startswith("anthropic.claude"):
+            completion = response_body["content"][0]["text"]
+        else:
+            # Add support for other models as needed
+            completion = response_body.get("completion", "")
+
+        return CompletionResponse(
+            text=completion,
+            raw=response_body,
+            additional_kwargs={
+                "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-input-token-count"),
+                "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-output-token-count"),
+            },
+        )
+
+    def complete(
+        self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
+    ) -> CompletionResponse:
+        """Complete the prompt with image support."""
+        return self._complete(prompt, image_documents, **kwargs)
+
+    async def acomplete(
+        self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
+    ) -> CompletionResponse:
+        """Complete the prompt with image support asynchronously."""
+        message = generate_bedrock_multi_modal_message(
+            prompt=prompt,
+            image_documents=image_documents,
+        )
+
+        # Get model kwargs and prepare the request body
+        model_kwargs = self._get_model_kwargs(**kwargs)
+        if "body" in model_kwargs:
+            model_kwargs["body"]["messages"] = [message]
+        else:
+            model_kwargs["body"] = {"messages": [message]}
+
+        # Convert body to JSON string
+        if isinstance(model_kwargs.get("body"), dict):
+            import json
+            body_str = json.dumps(model_kwargs["body"])
+            del model_kwargs["body"]
+        else:
+            body_str = model_kwargs["body"]
+            del model_kwargs["body"]
+
+        response = await invoke_model_with_retry_async(
+            session=self._asession,
+            config=self._config,
+            model=self.model,
+            messages=body_str,
+            max_retries=self.max_retries,
+            **model_kwargs,
+        )
+
+        # Parse the streaming response body
+        response_body = json.loads(await response["body"].read())
+        
+        # Parse response based on model
+        if self.model.startswith("anthropic.claude"):
+            completion = response_body["content"][0]["text"]
+        else:
+            # Add support for other models as needed
+            completion = response_body.get("completion", "")
+
+        return CompletionResponse(
+            text=completion,
+            raw=response_body,
+            additional_kwargs={
+                "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-input-token-count"),
+                "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-output-token-count"),
+            },
+        )
+
+    def chat(self, messages: Sequence[Any], **kwargs: Any) -> Any:
+        """Chat with the model."""
+        raise NotImplementedError("Chat is not supported for this model.")
+
+    def stream_chat(
+        self, messages: Sequence[Any], **kwargs: Any
+    ) -> Any:
+        """Stream chat with the model."""
+        raise NotImplementedError("Stream chat is not supported for this model.")
+
+    async def achat(self, messages: Sequence[Any], **kwargs: Any) -> Any:
+        """Chat with the model asynchronously."""
+        raise NotImplementedError("Async chat is not supported for this model.")
+
+    async def astream_chat(
+        self, messages: Sequence[Any], **kwargs: Any
+    ) -> Any:
+        """Stream chat with the model asynchronously."""
+        raise NotImplementedError("Async stream chat is not supported for this model.")
+
+    def stream_complete(
+        self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
+    ) -> Any:
+        """Complete the prompt with image support in a streaming fashion."""
+        raise NotImplementedError("Streaming completion is not supported for this model.")
+
+    async def astream_complete(
+        self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
+    ) -> Any:
+        """Complete the prompt with image support in a streaming fashion asynchronously."""
+        raise NotImplementedError("Async streaming completion is not supported for this model.")
\ No newline at end of file
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py
new file mode 100644
index 0000000000000..16e4970d6a590
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py
@@ -0,0 +1,223 @@
+import base64
+import logging
+from typing import Any, Dict, List, Optional, Sequence
+import filetype
+from tenacity import (
+    before_sleep_log,
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+)
+
+from llama_index.core.base.llms.generic_utils import get_from_param_or_env
+from llama_index.core.multi_modal_llms.generic_utils import encode_image
+from llama_index.core.schema import ImageDocument
+
+DEFAULT_BEDROCK_REGION = "us-east-1"
+
+# Only include multi-modal capable models
+BEDROCK_MULTI_MODAL_MODELS = {
+    "anthropic.claude-3-sonnet-20240229-v1:0": 200000,
+    "anthropic.claude-3-haiku-20240307-v1:0": 200000,
+    "anthropic.claude-3-opus-20240229-v1:0": 200000,
+    "anthropic.claude-3-5-sonnet-20240620-v1:0": 200000,
+    "anthropic.claude-3-5-sonnet-20241022-v2:0": 200000,
+    "anthropic.claude-3-5-haiku-20241022-v1:0": 200000,
+}
+
+MISSING_CREDENTIALS_ERROR_MESSAGE = """No AWS credentials found.
+Please set up your AWS credentials using one of the following methods:
+1. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables
+2. Configure AWS CLI credentials
+3. Use IAM role-based authentication
+"""
+
+logger = logging.getLogger(__name__)
+
+def infer_image_mimetype_from_base64(base64_string) -> str:
+    decoded_data = base64.b64decode(base64_string)
+    kind = filetype.guess(decoded_data)
+    return kind.mime if kind is not None else None
+
+def infer_image_mimetype_from_file_path(image_file_path: str) -> str:
+    file_extension = image_file_path.split(".")[-1].lower()
+    
+    if file_extension in ["jpg", "jpeg"]:
+        return "image/jpeg"
+    elif file_extension == "png":
+        return "image/png"
+    elif file_extension == "gif":
+        return "image/gif"
+    elif file_extension == "webp":
+        return "image/webp"
+    
+    return "image/jpeg"
+
+def generate_bedrock_multi_modal_message(
+    prompt: str,
+    image_documents: Optional[Sequence[ImageDocument]] = None,
+) -> Dict[str, Any]:
+    """Generate message for Bedrock multi-modal API."""
+    if image_documents is None:
+        return {
+            "role": "user",
+            "content": [{
+                "type": "text",
+                "text": prompt
+            }]
+        }
+
+    message_content = []
+    # Add text content first
+    message_content.append({
+        "type": "text",
+        "text": prompt
+    })
+
+    # Add image content
+    for image_document in image_documents:
+        image_content = {}
+        if image_document.image_path:
+            base64_image = encode_image(image_document.image_path)
+            image_content = {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": "image/jpeg",  # Default to JPEG
+                    "data": base64_image
+                }
+            }
+        elif "file_path" in image_document.metadata:
+            base64_image = encode_image(image_document.metadata["file_path"])
+            image_content = {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": "image/jpeg",  # Default to JPEG
+                    "data": base64_image
+                }
+            }
+        elif image_document.image:
+            image_content = {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": "image/jpeg",  # Default to JPEG
+                    "data": image_document.image
+                }
+            }
+        
+        if image_content:
+            message_content.append(image_content)
+
+    return {
+        "role": "user",
+        "content": message_content
+    }
+
+def resolve_bedrock_credentials(
+    region_name: Optional[str] = None,
+    aws_access_key_id: Optional[str] = None,
+    aws_secret_access_key: Optional[str] = None,
+) -> tuple[Optional[str], Optional[str], str]:
+    """Resolve AWS Bedrock credentials.
+
+    The order of precedence is:
+    1. Explicitly passed credentials
+    2. Environment variables
+    3. Default region
+    """
+    region = get_from_param_or_env(
+        "region_name", region_name, "AWS_REGION", DEFAULT_BEDROCK_REGION
+    )
+    access_key = get_from_param_or_env(
+        "aws_access_key_id", aws_access_key_id, "AWS_ACCESS_KEY_ID", ""
+    )
+    secret_key = get_from_param_or_env(
+        "aws_secret_access_key", aws_secret_access_key, "AWS_SECRET_ACCESS_KEY", ""
+    )
+
+    return access_key, secret_key, region
+
+def _create_retry_decorator(client: Any, max_retries: int) -> Any:
+    """Create a retry decorator for Bedrock API calls."""
+    min_seconds = 4
+    max_seconds = 10
+    try:
+        import boto3  # noqa
+    except ImportError as e:
+        raise ImportError(
+            "You must install the `boto3` package to use Bedrock."
+            "Please `pip install boto3`"
+        ) from e
+
+    return retry(
+        reraise=True,
+        stop=stop_after_attempt(max_retries),
+        wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds),
+        retry=(retry_if_exception_type(client.exceptions.ThrottlingException)),
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+    )
+
+def _create_retry_decorator_async(max_retries: int) -> Any:
+    """Create a retry decorator for async Bedrock API calls."""
+    min_seconds = 4
+    max_seconds = 10
+    try:
+        import aioboto3  # noqa
+    except ImportError as e:
+        raise ImportError(
+            "You must install the `aioboto3` package to use async Bedrock."
+            "Please `pip install aioboto3`"
+        ) from e
+
+    return retry(
+        reraise=True,
+        stop=stop_after_attempt(max_retries),
+        wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds),
+        retry=(retry_if_exception_type()),  # TODO: Add throttling exception for async
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+    )
+
+def invoke_model_with_retry(
+    client: Any,
+    model: str,
+    messages: Dict[str, Any],
+    max_retries: int = 3,
+    **kwargs: Any,
+) -> Any:
+    """Use tenacity to retry the model invocation."""
+    retry_decorator = _create_retry_decorator(client=client, max_retries=max_retries)
+
+    @retry_decorator
+    def _invoke_with_retry(**kwargs: Any) -> Any:
+        return client.invoke_model(**kwargs)
+    
+    return _invoke_with_retry(
+        modelId=model,
+        body=messages,
+        **kwargs,
+    )
+
+async def invoke_model_with_retry_async(
+    session: Any,
+    config: Any,
+    model: str,
+    messages: Dict[str, Any],
+    max_retries: int = 3,
+    **kwargs: Any,
+) -> Any:
+    """Use tenacity to retry the model invocation asynchronously."""
+    retry_decorator = _create_retry_decorator_async(max_retries=max_retries)
+
+    @retry_decorator
+    async def _invoke_with_retry(**kwargs: Any) -> Any:
+        async with session.client("bedrock-runtime", config=config) as client:
+            return await client.invoke_model(**kwargs)
+
+    return await _invoke_with_retry(
+        modelId=model,
+        body=messages,
+        **kwargs,
+    ) 
\ No newline at end of file
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml
new file mode 100644
index 0000000000000..c495fcbb79f45
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml
@@ -0,0 +1,68 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+contains_example = false
+import_path = "llama_index.multi_modal_llms.bedrock"
+
+[tool.llamahub.class_authors]
+BedrockMultiModal = "llama-index"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["LlamaIndex"]
+description = "llama-index multi-modal llms bedrock integration"
+exclude = ["**/BUILD"]
+license = "MIT"
+name = "llama-index-multi-modal-llms-bedrock"
+readme = "README.md"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = ">=3.9,<4.0"
+boto3 = ">=1.34.0"
+aioboto3 = ">=12.3.0"
+llama-index-core = "^0.12.5"
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-asyncio = "^0.23.5"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"
+types-setuptools = "67.1.0.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto" 
\ No newline at end of file
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD
new file mode 100644
index 0000000000000..f1c904f04a601
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD
@@ -0,0 +1,9 @@
+load("//bazel/rules/python:pytest.bzl", "llama_pytest_test")
+
+llama_pytest_test(
+    name = "multi_modal_llms_bedrock_test",
+    srcs = ["test_multi_modal_llms_bedrock.py"],
+    deps = [
+        "//llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock",
+    ],
+) 
\ No newline at end of file
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/__init__.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
new file mode 100644
index 0000000000000..6869e4ca47ea2
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
@@ -0,0 +1,78 @@
+"""Test Bedrock multi-modal LLM."""
+import pytest
+from unittest.mock import patch
+
+from llama_index.core.multi_modal_llms.base import MultiModalLLM
+from llama_index.multi_modal_llms.bedrock import BedrockMultiModal
+from llama_index.core.schema import ImageDocument
+
+
+def test_class_name():
+    """Test class name."""
+    llm = BedrockMultiModal()
+    assert llm.class_name() == "bedrock_multi_modal_llm"
+
+
+def test_init():
+    """Test initialization."""
+    llm = BedrockMultiModal(max_tokens=400)
+    assert llm.max_tokens == 400
+    assert llm.model == "anthropic.claude-3-sonnet-20240229-v1:0"
+
+
+def test_inheritance():
+    """Test inheritance."""
+    assert issubclass(BedrockMultiModal, MultiModalLLM)
+
+
+def test_model_validation():
+    """Test model validation."""
+    with pytest.raises(ValueError, match="Invalid model"):
+        BedrockMultiModal(model="invalid-model")
+
+
+@patch("boto3.Session")
+def test_completion(mock_session):
+    """Test completion."""
+    # Mock the invoke_model response
+    mock_client = mock_session.return_value.client.return_value
+    mock_client.invoke_model.return_value = {
+        "content": [{"text": "test response"}]
+    }
+
+    llm = BedrockMultiModal()
+    image_doc = ImageDocument(image="base64_encoded_string")
+    
+    response = llm.complete(
+        prompt="test prompt",
+        image_documents=[image_doc]
+    )
+
+    assert response.text == "test response"
+    # Verify the call was made with correct parameters
+    mock_client.invoke_model.assert_called_once()
+    call_args = mock_client.invoke_model.call_args[1]
+    assert "modelId" in call_args
+    assert call_args["modelId"] == "anthropic.claude-3-sonnet-20240229-v1:0"
+
+
+@pytest.mark.asyncio
+@patch("aioboto3.Session")
+async def test_async_completion(mock_session):
+    """Test async completion."""
+    # Mock the async client
+    mock_client = mock_session.return_value.client.return_value
+    mock_client.__aenter__.return_value.invoke_model.return_value = {
+        "content": [{"text": "async test response"}]
+    }
+
+    llm = BedrockMultiModal()
+    image_doc = ImageDocument(image="base64_encoded_string")
+    
+    response = await llm.acomplete(
+        prompt="test prompt",
+        image_documents=[image_doc]
+    )
+
+    assert response.text == "async test response"
+    # No need to verify call args for async as the mock is structured differently 
\ No newline at end of file

From aa050773c4ee999d11e54760af864548a064d32c Mon Sep 17 00:00:00 2001
From: Logan Markewich <logan.markewich@live.com>
Date: Thu, 9 Jan 2025 20:20:09 -0600
Subject: [PATCH 2/4] linting + BUILD files

---
 .../.gitignore                                |  2 +-
 .../BUILD                                     |  6 +--
 .../Makefile                                  |  2 +-
 .../README.md                                 | 18 ++++---
 .../multi_modal_llms/bedrock/BUILD            |  1 +
 .../multi_modal_llms/bedrock/__init__.py      |  2 +-
 .../multi_modal_llms/bedrock/base.py          | 51 +++++++++++--------
 .../multi_modal_llms/bedrock/utils.py         | 50 +++++++++---------
 .../pyproject.toml                            |  2 +-
 .../tests/BUILD                               | 10 +---
 .../tests/test_multi_modal_llms_bedrock.py    | 22 +++-----
 11 files changed, 78 insertions(+), 88 deletions(-)
 create mode 100644 llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/BUILD

diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore
index ce810799a86c0..3c425e8a4843b 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/.gitignore
@@ -86,4 +86,4 @@ dmypy.json
 .pyre/
 
 # Poetry
-poetry.lock 
\ No newline at end of file
+poetry.lock
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD
index 7a3d9b720c151..0896ca890d8bf 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/BUILD
@@ -1,3 +1,3 @@
-package(default_visibility = ["//visibility:public"])
-
-exports_files(["pyproject.toml"]) 
\ No newline at end of file
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile
index 531966a802f71..ce1c24875570c 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/Makefile
@@ -25,4 +25,4 @@ install:
 	poetry install --with dev
 
 install_editable:
-	pip install -e ".[dev]" 
\ No newline at end of file
+	pip install -e ".[dev]"
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md
index 9ebdb64901e7a..aefccac4c5fba 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/README.md
@@ -24,7 +24,7 @@ llm = BedrockMultiModal(
     model="anthropic.claude-3-haiku-20240307-v1:0",  # or other Bedrock multi-modal models
     temperature=0.0,
     max_tokens=300,
-    region_name="eu-central-1"  # make sure to use the region where the model access is granted
+    region_name="eu-central-1",  # make sure to use the region where the model access is granted
 )
 
 # Method 1: Load images using SimpleDirectoryReader
@@ -36,13 +36,13 @@ image_documents = SimpleDirectoryReader(
 image_doc = ImageDocument(
     image_path="/path/to/image.jpg",  # Local file path
     # OR
-    image="base64_encoded_image_string"  # Base64 encoded image
+    image="base64_encoded_image_string",  # Base64 encoded image
 )
 
 # Get a completion with both text and image
 response = llm.complete(
     prompt="Describe this image in detail:",
-    image_documents=image_documents  # or [image_doc]
+    image_documents=image_documents,  # or [image_doc]
 )
 
 print(response.text)
@@ -53,6 +53,7 @@ print(response.text)
 You can authenticate with AWS Bedrock in several ways:
 
 1. Environment variables:
+
 ```bash
 export AWS_ACCESS_KEY_ID=your_access_key
 export AWS_SECRET_ACCESS_KEY=your_secret_key
@@ -60,16 +61,18 @@ export AWS_REGION=us-east-1  # optional
 ```
 
 2. Explicit credentials:
+
 ```python
 llm = BedrockMultiModal(
     model="anthropic.claude-3-haiku-20240307-v1:0",
     aws_access_key_id="your_access_key",
     aws_secret_access_key="your_secret_key",
-    region_name="eu-central-1"
+    region_name="eu-central-1",
 )
 ```
 
 3. AWS CLI configuration:
+
 ```bash
 aws configure
 ```
@@ -96,8 +99,7 @@ image_docs = SimpleDirectoryReader(
 ).load_data()
 
 response = llm.complete(
-    prompt="Compare these two images:",
-    image_documents=image_docs
+    prompt="Compare these two images:", image_documents=image_docs
 )
 
 # Custom parameters
@@ -109,7 +111,7 @@ llm = BedrockMultiModal(
     max_retries=10,  # Maximum number of API retries
     additional_kwargs={
         # Add other model-specific parameters
-    }
+    },
 )
 
 # Response includes token counts
@@ -133,4 +135,4 @@ pytest tests/
 
 ## License
 
-This project is licensed under the MIT License. 
\ No newline at end of file
+This project is licensed under the MIT License.
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/BUILD
new file mode 100644
index 0000000000000..db46e8d6c978c
--- /dev/null
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py
index 9aa24ea500704..12bb7290ac33f 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/__init__.py
@@ -1,3 +1,3 @@
 from llama_index.multi_modal_llms.bedrock.base import BedrockMultiModal
 
-__all__ = ["BedrockMultiModal"] 
\ No newline at end of file
+__all__ = ["BedrockMultiModal"]
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py
index 3e60f963e78bf..fb00fd277dcbb 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/base.py
@@ -1,13 +1,10 @@
-from typing import Any, Callable, Dict, List, Optional, Sequence
+from typing import Any, Callable, Dict, Optional, Sequence
 
 import boto3
 import aioboto3
 from botocore.config import Config
 from llama_index.core.base.llms.types import (
     CompletionResponse,
-    CompletionResponseGen,
-    CompletionResponseAsyncGen,
-    MessageRole,
 )
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks import CallbackManager
@@ -147,7 +144,7 @@ def _get_client(self) -> Any:
             aws_secret_access_key=self.aws_secret_access_key,
             region_name=self.region_name,
         )
-        return session.client('bedrock-runtime', config=self._config)
+        return session.client("bedrock-runtime", config=self._config)
 
     @classmethod
     def class_name(cls) -> str:
@@ -169,19 +166,19 @@ def _get_model_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
             "contentType": "application/json",
             "accept": "application/json",
         }
-        
+
         if self.model.startswith("anthropic.claude"):
             model_kwargs["body"] = {
                 "anthropic_version": "bedrock-2023-05-31",
                 "max_tokens": self.max_tokens if self.max_tokens is not None else 300,
                 "temperature": self.temperature,
             }
-        
+
         # Add any additional kwargs
         if "body" in model_kwargs:
             model_kwargs["body"].update(self.additional_kwargs)
             model_kwargs["body"].update(kwargs)
-        
+
         return model_kwargs
 
     def _complete(
@@ -203,6 +200,7 @@ def _complete(
         # Convert body to JSON string
         if isinstance(model_kwargs.get("body"), dict):
             import json
+
             body_str = json.dumps(model_kwargs["body"])
             del model_kwargs["body"]
         else:
@@ -219,7 +217,7 @@ def _complete(
 
         # Parse the streaming response body
         response_body = json.loads(response["body"].read())
-        
+
         # Parse response based on model
         if self.model.startswith("anthropic.claude"):
             completion = response_body["content"][0]["text"]
@@ -231,8 +229,12 @@ def _complete(
             text=completion,
             raw=response_body,
             additional_kwargs={
-                "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-input-token-count"),
-                "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-output-token-count"),
+                "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get(
+                    "x-amzn-bedrock-input-token-count"
+                ),
+                "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get(
+                    "x-amzn-bedrock-output-token-count"
+                ),
             },
         )
 
@@ -261,6 +263,7 @@ async def acomplete(
         # Convert body to JSON string
         if isinstance(model_kwargs.get("body"), dict):
             import json
+
             body_str = json.dumps(model_kwargs["body"])
             del model_kwargs["body"]
         else:
@@ -278,7 +281,7 @@ async def acomplete(
 
         # Parse the streaming response body
         response_body = json.loads(await response["body"].read())
-        
+
         # Parse response based on model
         if self.model.startswith("anthropic.claude"):
             completion = response_body["content"][0]["text"]
@@ -290,8 +293,12 @@ async def acomplete(
             text=completion,
             raw=response_body,
             additional_kwargs={
-                "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-input-token-count"),
-                "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get("x-amzn-bedrock-output-token-count"),
+                "input_tokens": response["ResponseMetadata"]["HTTPHeaders"].get(
+                    "x-amzn-bedrock-input-token-count"
+                ),
+                "output_tokens": response["ResponseMetadata"]["HTTPHeaders"].get(
+                    "x-amzn-bedrock-output-token-count"
+                ),
             },
         )
 
@@ -299,9 +306,7 @@ def chat(self, messages: Sequence[Any], **kwargs: Any) -> Any:
         """Chat with the model."""
         raise NotImplementedError("Chat is not supported for this model.")
 
-    def stream_chat(
-        self, messages: Sequence[Any], **kwargs: Any
-    ) -> Any:
+    def stream_chat(self, messages: Sequence[Any], **kwargs: Any) -> Any:
         """Stream chat with the model."""
         raise NotImplementedError("Stream chat is not supported for this model.")
 
@@ -309,9 +314,7 @@ async def achat(self, messages: Sequence[Any], **kwargs: Any) -> Any:
         """Chat with the model asynchronously."""
         raise NotImplementedError("Async chat is not supported for this model.")
 
-    async def astream_chat(
-        self, messages: Sequence[Any], **kwargs: Any
-    ) -> Any:
+    async def astream_chat(self, messages: Sequence[Any], **kwargs: Any) -> Any:
         """Stream chat with the model asynchronously."""
         raise NotImplementedError("Async stream chat is not supported for this model.")
 
@@ -319,10 +322,14 @@ def stream_complete(
         self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
     ) -> Any:
         """Complete the prompt with image support in a streaming fashion."""
-        raise NotImplementedError("Streaming completion is not supported for this model.")
+        raise NotImplementedError(
+            "Streaming completion is not supported for this model."
+        )
 
     async def astream_complete(
         self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
     ) -> Any:
         """Complete the prompt with image support in a streaming fashion asynchronously."""
-        raise NotImplementedError("Async streaming completion is not supported for this model.")
\ No newline at end of file
+        raise NotImplementedError(
+            "Async streaming completion is not supported for this model."
+        )
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py
index 16e4970d6a590..8dc5f39bdf903 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/llama_index/multi_modal_llms/bedrock/utils.py
@@ -1,6 +1,6 @@
 import base64
 import logging
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Any, Dict, Optional, Sequence
 import filetype
 from tenacity import (
     before_sleep_log,
@@ -35,14 +35,16 @@
 
 logger = logging.getLogger(__name__)
 
+
 def infer_image_mimetype_from_base64(base64_string) -> str:
     decoded_data = base64.b64decode(base64_string)
     kind = filetype.guess(decoded_data)
     return kind.mime if kind is not None else None
 
+
 def infer_image_mimetype_from_file_path(image_file_path: str) -> str:
     file_extension = image_file_path.split(".")[-1].lower()
-    
+
     if file_extension in ["jpg", "jpeg"]:
         return "image/jpeg"
     elif file_extension == "png":
@@ -51,29 +53,21 @@ def infer_image_mimetype_from_file_path(image_file_path: str) -> str:
         return "image/gif"
     elif file_extension == "webp":
         return "image/webp"
-    
+
     return "image/jpeg"
 
+
 def generate_bedrock_multi_modal_message(
     prompt: str,
     image_documents: Optional[Sequence[ImageDocument]] = None,
 ) -> Dict[str, Any]:
     """Generate message for Bedrock multi-modal API."""
     if image_documents is None:
-        return {
-            "role": "user",
-            "content": [{
-                "type": "text",
-                "text": prompt
-            }]
-        }
+        return {"role": "user", "content": [{"type": "text", "text": prompt}]}
 
     message_content = []
     # Add text content first
-    message_content.append({
-        "type": "text",
-        "text": prompt
-    })
+    message_content.append({"type": "text", "text": prompt})
 
     # Add image content
     for image_document in image_documents:
@@ -85,8 +79,8 @@ def generate_bedrock_multi_modal_message(
                 "source": {
                     "type": "base64",
                     "media_type": "image/jpeg",  # Default to JPEG
-                    "data": base64_image
-                }
+                    "data": base64_image,
+                },
             }
         elif "file_path" in image_document.metadata:
             base64_image = encode_image(image_document.metadata["file_path"])
@@ -95,8 +89,8 @@ def generate_bedrock_multi_modal_message(
                 "source": {
                     "type": "base64",
                     "media_type": "image/jpeg",  # Default to JPEG
-                    "data": base64_image
-                }
+                    "data": base64_image,
+                },
             }
         elif image_document.image:
             image_content = {
@@ -104,17 +98,15 @@ def generate_bedrock_multi_modal_message(
                 "source": {
                     "type": "base64",
                     "media_type": "image/jpeg",  # Default to JPEG
-                    "data": image_document.image
-                }
+                    "data": image_document.image,
+                },
             }
-        
+
         if image_content:
             message_content.append(image_content)
 
-    return {
-        "role": "user",
-        "content": message_content
-    }
+    return {"role": "user", "content": message_content}
+
 
 def resolve_bedrock_credentials(
     region_name: Optional[str] = None,
@@ -140,6 +132,7 @@ def resolve_bedrock_credentials(
 
     return access_key, secret_key, region
 
+
 def _create_retry_decorator(client: Any, max_retries: int) -> Any:
     """Create a retry decorator for Bedrock API calls."""
     min_seconds = 4
@@ -160,6 +153,7 @@ def _create_retry_decorator(client: Any, max_retries: int) -> Any:
         before_sleep=before_sleep_log(logger, logging.WARNING),
     )
 
+
 def _create_retry_decorator_async(max_retries: int) -> Any:
     """Create a retry decorator for async Bedrock API calls."""
     min_seconds = 4
@@ -180,6 +174,7 @@ def _create_retry_decorator_async(max_retries: int) -> Any:
         before_sleep=before_sleep_log(logger, logging.WARNING),
     )
 
+
 def invoke_model_with_retry(
     client: Any,
     model: str,
@@ -193,13 +188,14 @@ def invoke_model_with_retry(
     @retry_decorator
     def _invoke_with_retry(**kwargs: Any) -> Any:
         return client.invoke_model(**kwargs)
-    
+
     return _invoke_with_retry(
         modelId=model,
         body=messages,
         **kwargs,
     )
 
+
 async def invoke_model_with_retry_async(
     session: Any,
     config: Any,
@@ -220,4 +216,4 @@ async def _invoke_with_retry(**kwargs: Any) -> Any:
         modelId=model,
         body=messages,
         **kwargs,
-    ) 
\ No newline at end of file
+    )
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml
index c495fcbb79f45..eb9929c6e4b9b 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/pyproject.toml
@@ -65,4 +65,4 @@ version = ">=v2.2.6"
 include = "llama_index/"
 
 [tool.pytest.ini_options]
-asyncio_mode = "auto" 
\ No newline at end of file
+asyncio_mode = "auto"
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD
index f1c904f04a601..dabf212d7e716 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/BUILD
@@ -1,9 +1 @@
-load("//bazel/rules/python:pytest.bzl", "llama_pytest_test")
-
-llama_pytest_test(
-    name = "multi_modal_llms_bedrock_test",
-    srcs = ["test_multi_modal_llms_bedrock.py"],
-    deps = [
-        "//llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock",
-    ],
-) 
\ No newline at end of file
+python_tests()
diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
index 6869e4ca47ea2..34d926c92fc64 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
@@ -36,17 +36,12 @@ def test_completion(mock_session):
     """Test completion."""
     # Mock the invoke_model response
     mock_client = mock_session.return_value.client.return_value
-    mock_client.invoke_model.return_value = {
-        "content": [{"text": "test response"}]
-    }
+    mock_client.invoke_model.return_value = {"content": [{"text": "test response"}]}
 
     llm = BedrockMultiModal()
     image_doc = ImageDocument(image="base64_encoded_string")
-    
-    response = llm.complete(
-        prompt="test prompt",
-        image_documents=[image_doc]
-    )
+
+    response = llm.complete(prompt="test prompt", image_documents=[image_doc])
 
     assert response.text == "test response"
     # Verify the call was made with correct parameters
@@ -56,7 +51,7 @@ def test_completion(mock_session):
     assert call_args["modelId"] == "anthropic.claude-3-sonnet-20240229-v1:0"
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio()
 @patch("aioboto3.Session")
 async def test_async_completion(mock_session):
     """Test async completion."""
@@ -68,11 +63,8 @@ async def test_async_completion(mock_session):
 
     llm = BedrockMultiModal()
     image_doc = ImageDocument(image="base64_encoded_string")
-    
-    response = await llm.acomplete(
-        prompt="test prompt",
-        image_documents=[image_doc]
-    )
+
+    response = await llm.acomplete(prompt="test prompt", image_documents=[image_doc])
 
     assert response.text == "async test response"
-    # No need to verify call args for async as the mock is structured differently 
\ No newline at end of file
+    # No need to verify call args for async as the mock is structured differently

From 64958653a81f037a11f8a5db8e0c0b638a826dbb Mon Sep 17 00:00:00 2001
From: Daljeet Nandha <dn@assistent.ai>
Date: Fri, 10 Jan 2025 19:50:59 +0100
Subject: [PATCH 3/4] mock bedrock response

---
 .../tests/test_multi_modal_llms_bedrock.py    | 41 ++++++++++++++-----
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
index 34d926c92fc64..35304b64fd125 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
@@ -1,6 +1,8 @@
 """Test Bedrock multi-modal LLM."""
+import json
+from io import BytesIO
 import pytest
-from unittest.mock import patch
+from unittest.mock import patch, MagicMock, AsyncMock
 
 from llama_index.core.multi_modal_llms.base import MultiModalLLM
 from llama_index.multi_modal_llms.bedrock import BedrockMultiModal
@@ -36,7 +38,16 @@ def test_completion(mock_session):
     """Test completion."""
     # Mock the invoke_model response
     mock_client = mock_session.return_value.client.return_value
-    mock_client.invoke_model.return_value = {"content": [{"text": "test response"}]}
+    mock_response = {
+        "body": BytesIO(json.dumps({"content": [{"text": "test response"}]}).encode()),
+        "ResponseMetadata": {
+            "HTTPHeaders": {
+                "x-amzn-bedrock-input-token-count": "100",
+                "x-amzn-bedrock-output-token-count": "50",
+            }
+        }
+    }
+    mock_client.invoke_model.return_value = mock_response
 
     llm = BedrockMultiModal()
     image_doc = ImageDocument(image="base64_encoded_string")
@@ -44,22 +55,30 @@ def test_completion(mock_session):
     response = llm.complete(prompt="test prompt", image_documents=[image_doc])
 
     assert response.text == "test response"
+    assert response.additional_kwargs["input_tokens"] == "100"
+    assert response.additional_kwargs["output_tokens"] == "50"
     # Verify the call was made with correct parameters
     mock_client.invoke_model.assert_called_once()
-    call_args = mock_client.invoke_model.call_args[1]
-    assert "modelId" in call_args
-    assert call_args["modelId"] == "anthropic.claude-3-sonnet-20240229-v1:0"
 
 
-@pytest.mark.asyncio()
+@pytest.mark.asyncio
 @patch("aioboto3.Session")
 async def test_async_completion(mock_session):
     """Test async completion."""
     # Mock the async client
-    mock_client = mock_session.return_value.client.return_value
-    mock_client.__aenter__.return_value.invoke_model.return_value = {
-        "content": [{"text": "async test response"}]
+    mock_client = mock_session.return_value.client.return_value.__aenter__.return_value
+    mock_body = AsyncMock()
+    mock_body.read.return_value = json.dumps({"content": [{"text": "async test response"}]}).encode()
+    mock_response = {
+        "body": mock_body,
+        "ResponseMetadata": {
+            "HTTPHeaders": {
+                "x-amzn-bedrock-input-token-count": "100",
+                "x-amzn-bedrock-output-token-count": "50",
+            }
+        }
     }
+    mock_client.invoke_model.return_value = mock_response
 
     llm = BedrockMultiModal()
     image_doc = ImageDocument(image="base64_encoded_string")
@@ -67,4 +86,6 @@ async def test_async_completion(mock_session):
     response = await llm.acomplete(prompt="test prompt", image_documents=[image_doc])
 
     assert response.text == "async test response"
-    # No need to verify call args for async as the mock is structured differently
+    assert response.additional_kwargs["input_tokens"] == "100"
+    assert response.additional_kwargs["output_tokens"] == "50"
+    mock_body.read.assert_awaited_once()

From 159278215518832e55eb2440e31bd29430a9e703 Mon Sep 17 00:00:00 2001
From: Logan Markewich <logan.markewich@live.com>
Date: Mon, 13 Jan 2025 12:05:09 -0600
Subject: [PATCH 4/4] lint

---
 .../tests/test_multi_modal_llms_bedrock.py           | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
index 35304b64fd125..cebd9c236b89a 100644
--- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
+++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-bedrock/tests/test_multi_modal_llms_bedrock.py
@@ -2,7 +2,7 @@
 import json
 from io import BytesIO
 import pytest
-from unittest.mock import patch, MagicMock, AsyncMock
+from unittest.mock import patch, AsyncMock
 
 from llama_index.core.multi_modal_llms.base import MultiModalLLM
 from llama_index.multi_modal_llms.bedrock import BedrockMultiModal
@@ -45,7 +45,7 @@ def test_completion(mock_session):
                 "x-amzn-bedrock-input-token-count": "100",
                 "x-amzn-bedrock-output-token-count": "50",
             }
-        }
+        },
     }
     mock_client.invoke_model.return_value = mock_response
 
@@ -61,14 +61,16 @@ def test_completion(mock_session):
     mock_client.invoke_model.assert_called_once()
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio()
 @patch("aioboto3.Session")
 async def test_async_completion(mock_session):
     """Test async completion."""
     # Mock the async client
     mock_client = mock_session.return_value.client.return_value.__aenter__.return_value
     mock_body = AsyncMock()
-    mock_body.read.return_value = json.dumps({"content": [{"text": "async test response"}]}).encode()
+    mock_body.read.return_value = json.dumps(
+        {"content": [{"text": "async test response"}]}
+    ).encode()
     mock_response = {
         "body": mock_body,
         "ResponseMetadata": {
@@ -76,7 +78,7 @@ async def test_async_completion(mock_session):
                 "x-amzn-bedrock-input-token-count": "100",
                 "x-amzn-bedrock-output-token-count": "50",
             }
-        }
+        },
     }
     mock_client.invoke_model.return_value = mock_response