Add DALL-E 3 support (#105)

* Add DALL-E 3 support * Update gcp-deployment.yml
RAHB-REALTORS-Association · Nov 8, 2023 · b70d279 · b70d279
1 parent f2fa6c4
commit b70d279
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 7 deletions.
diff --git a/.github/workflows/gcp-deploy.yml b/.github/workflows/gcp-deploy.yml
@@ -41,4 +41,4 @@ jobs:
         env:
           GCS_BUCKET_NAME: ${{ secrets.GCS_BUCKET_NAME }}
 
-      - run: gcloud functions deploy ${{ secrets.GCP_FUNCTION }} --runtime python311 --memory 512MB --timeout 120s --trigger-http --allow-unauthenticated --entry-point process_event --region ${{ secrets.GCP_REGION }} --set-env-vars GCP_SA_KEY=${{ secrets.GCP_SA_KEY }},GCS_BUCKET_NAME=${{ secrets.GCS_BUCKET_NAME }},OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }},MODEL_NAME=${{ secrets.MODEL_NAME }},SYSTEM_PROMPT="${{ secrets.SYSTEM_PROMPT }}",MAX_TURNS="${{ secrets.MAX_TURNS }}",TTL="${{ secrets.TTL }}",MAX_TOKENS_INPUT="${{ secrets.MAX_TOKENS_INPUT }}",MAX_TOKENS_OUTPUT="${{ secrets.MAX_TOKENS_OUTPUT }}",TEMPERATURE="${{ secrets.TEMPERATURE }}",IMAGE_SIZE="${{ secrets.IMAGE_SIZE }}",ELEVENLABS_API_KEY="${{ secrets.ELEVENLABS_API_KEY }}",ELEVENLABS_MODEL_NAME="${{ secrets.ELEVENLABS_MODEL_NAME }}"
+      - run: gcloud functions deploy ${{ secrets.GCP_FUNCTION }} --runtime python311 --memory 512MB --timeout 120s --trigger-http --allow-unauthenticated --entry-point process_event --region ${{ secrets.GCP_REGION }} --set-env-vars GCP_SA_KEY=${{ secrets.GCP_SA_KEY }},GCS_BUCKET_NAME=${{ secrets.GCS_BUCKET_NAME }},OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }},MODEL_NAME=${{ secrets.MODEL_NAME }},SYSTEM_PROMPT="${{ secrets.SYSTEM_PROMPT }}",MAX_TURNS="${{ secrets.MAX_TURNS }}",TTL="${{ secrets.TTL }}",MAX_TOKENS_INPUT="${{ secrets.MAX_TOKENS_INPUT }}",MAX_TOKENS_OUTPUT="${{ secrets.MAX_TOKENS_OUTPUT }}",TEMPERATURE="${{ secrets.TEMPERATURE }}",IMAGE_SIZE="${{ secrets.IMAGE_SIZE }}",IMAGE_STYLE="${{ secrets.IMAGE_STYLE}}",IMAGE_QUALITY="${{ secrets.IMAGE_QUALITY }}",DALLE_MODEL="${{ secrets.DALLE_MODEL }}",ELEVENLABS_API_KEY="${{ secrets.ELEVENLABS_API_KEY }}",ELEVENLABS_MODEL_NAME="${{ secrets.ELEVENLABS_MODEL_NAME }}"
diff --git a/README.md b/README.md
@@ -82,7 +82,10 @@ In your GitHub repository:
   - `MAX_TOKENS_INPUT`: This sets the maximum number of tokens that can be sent. Default: 1000 tokens.
   - `MAX_TOKENS_OUTPUT`: This sets the maximum number of tokens that can be received. Default: 1000 tokens.
   - `TEMPERATURE`: This sets the temperature for the OpenAI API. Default: 0.8.
-  - `IMAGE_SIZE`: This sets the image size for the DALL-E API. Default: "512x512".
+  - `IMAGE_SIZE`: This sets the image size for the DALL-E API. Default: "1024x1024".
+  - `IMAGE_STYLE`: This sets the image style for the DALL-E API. Must choose between "natural", or "vivid". Default: "natural".
+  - `IMAGE_QUALITY`: This sets the image quality for the DALL-E API, can be "standard" or "hd". Default: "standard".
+  - `DALLE_MODEL`: This sets the DALL-E model for the DALL-E API. Must choose between "dall-e-2" or "dall-e-3". Default: "dall-e-2".
   - `API_URL`: This sets the API endpoint for the chat completions API. Default: "https://api.openai.com/v1/chat/completions".
   - `ELEVENLABS_API_KEY`: Your ElevenLabs API key. Can be disabled by omitting this secret.
   - `ELEVENLABS_MODEL_NAME`: ElevenLabs model you're using. Default: "eleven_multilingual_v2".

diff --git a/docs/setup.md b/docs/setup.md
@@ -58,7 +58,9 @@ This bot is intended to be deployed on Google Cloud Functions, with audio data t
     - `MAX_TOKENS_INPUT`: This sets the maximum number of tokens that can be sent. Default: 1000 tokens.
     - `MAX_TOKENS_OUTPUT`: This sets the maximum number of tokens that can be received. Default: 1000 tokens.
     - `TEMPERATURE`: This sets the temperature for the OpenAI API. Default: 0.8.
-    - `IMAGE_SIZE`: This sets the image size for the DALL-E API. Default: "512x512".
+    - `IMAGE_SIZE`: This sets the image size for the DALL-E API. Default: "1024x1024".
+    - `IMAGE_STYLE`: This sets the image style for the DALL-E API. Must choose between "natural", or "vivid". Default: "natural".
+    - `IMAGE_QUALITY`: This sets the image quality for the DALL-E API, can be "standard" or "hd". Default: "standard".
     - `API_URL`: This sets the API endpoint for the chat completions API. Default: "https://api.openai.com/v1/chat/completions".
     - `ELEVENLABS_API_KEY`: Your ElevenLabs API key. Can be disabled by omitting this secret.
     - `ELEVENLABS_MODEL_NAME`: ElevenLabs model you're using. Default: "eleven_multilingual_v2".

diff --git a/main.py b/main.py
@@ -18,6 +18,9 @@
 MAX_TOKENS_OUTPUT = get_env("MAX_TOKENS_OUTPUT")
 TEMPERATURE = get_env("TEMPERATURE")
 IMAGE_SIZE = get_env("IMAGE_SIZE")
+IMAGE_STYLE = get_env("IMAGE_STYLE")
+IMAGE_QUALITY = get_env("IMAGE_QUALITY")
+DALLE_MODEL = get_env("DALLE_MODEL")
 API_URL = get_env("API_URL")
 ELEVENLABS_API_KEY = get_env("ELEVENLABS_API_KEY")
 
@@ -88,8 +91,20 @@ def handle_message(user_id, user_message):
             if not prompt:
                 return jsonify({'text': 'Please provide a prompt for the image generation. Example: `/image sunset over a beach`.'})
 
+            model = DALLE_MODEL
+            style = IMAGE_STYLE
+            quality = IMAGE_QUALITY
+
             try:
-                image_resp = generate_image(prompt, n=1, size=IMAGE_SIZE)
+                image_resp = generate_image(
+                    prompt=prompt,
+                    n=1,
+                    size=IMAGE_SIZE,
+                    model=model,
+                    style=style,
+                    quality=quality,
+                    user=user_id
+                )
                 image_url = image_resp["data"][0]["url"]
                 return jsonify({
                     'text': 'Processing your image request...',

diff --git a/settings.py b/settings.py
@@ -60,11 +60,29 @@
         "description": "Temperature parameter for randomness in generation."
     },
     "IMAGE_SIZE": {
-        "default": "512x512",
+        "default": "1024x1024",
         "required": False,
         "type": str,
         "description": "Size of the images."
     },
+    "IMAGE_STYLE": {
+        "default": "natural",
+        "required": False,
+        "type": str,
+        "description": "Style of the images."
+    },
+    "IMAGE_QUALITY": {
+        "default": "standard",
+        "required": False,
+        "type": str,
+        "description": "Quality of the images."
+    },
+    "DALLE_MODEL": {
+        "default": "dall-e-2",
+        "required": False,
+        "type": str,
+        "description": "DALL-E model to use."
+    },
     "API_URL": {
         "default": None,
         "required": False,

diff --git a/utils/openai_helper.py b/utils/openai_helper.py
@@ -20,5 +20,28 @@ def num_tokens_from_string(string: str) -> int:
     return num_tokens
 
 # Define the function for image generation
-def generate_image(prompt, n=1, size="512x512"):
-    return openai.Image.create(prompt=prompt, n=n, size=size)
+def generate_image(prompt, n=1, size="1024x1024", model="dall-e-2", style="natural", quality="standard", user=""):
+    # If the model is 'dall-e-2' or not specified, use the existing behavior
+    if model == "dall-e-2":
+        return openai.Image.create(prompt=prompt, n=n, size=size)
+    # If the model is 'dall-e-3', use the new enhancements
+    elif model == "dall-e-3":
+        # Validate that DALL-E 3 is only generating one image at a time
+        if n != 1:
+            raise ValueError("DALL-E 3 currently supports generation of only 1 image at a time (n=1).")
+        # Validate the image size for DALL-E 3
+        valid_sizes = ["1024x1024", "1792x1024", "1024x1792"]
+        if size not in valid_sizes:
+            raise ValueError(f"Invalid size for DALL-E 3. Valid sizes: {valid_sizes}")
+        # Make the API call with the DALL-E 3 specific parameters
+        return openai.Image.create(
+            prompt=prompt,
+            n=n,
+            size=size,
+            model=model,
+            style=style,
+            quality=quality,
+            user=user
+        )
+    else:
+        raise ValueError("Invalid model specified. Valid models: 'dall-e-2', 'dall-e-3'.")