Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate alt text #2816

Open
jcoyne opened this issue Jan 22, 2025 · 0 comments
Open

Generate alt text #2816

jcoyne opened this issue Jan 22, 2025 · 0 comments

Comments

@jcoyne
Copy link
Contributor

jcoyne commented Jan 22, 2025

Here's a script I used to extract the data from the site:

require 'csv'
Spotlight::Engine.routes.default_url_options[:host] = 'https://exhibits.stanford.edu'
exhibits = Spotlight::Exhibit.where(slug: ['rarebooks', 'maps-of-africa', 'baltic-way', 'lianhuanhua', 'exemplars'])
CSV.open("images.csv", "wb") do |csv|
  csv << ['Exhibit', 'Exhibit description', 'Page title', 'exhibit slug', 'page slug', 'page url', 'image url']
  exhibits.each do |exhibit|
    pages_with_alt = exhibit.pages.order(Arel.sql('id = 1 DESC, created_at DESC')).select { |elem| elem.content.any?(&:alt_text?) }
    pages_with_alt.each do |page|
      page.content.each do |block|
        next unless block.alt_text?

        route_parts = [exhibit]
        route_parts << page unless page.is_a?(Spotlight::HomePage)
        page_url = Spotlight::Engine.routes.url_helpers.url_for(route_parts)
        
        images = block.item || {}
        images_without_alt = images.values.select { |img| img['alt_text'].blank? && img['decorative'].blank? }
        urls = images_without_alt.map { |img| img['url'] || img['full_image_url'].presence || img['iiif_tilesource'].sub('info.json', '/full/!400,400/0/default.jpg')}
        urls.each do |url|
          next if url == 'undefined' # Likely a media item. Not an image.

          csv << [exhibit.title, exhibit.description, page.title, exhibit.slug, page.slug, page_url, url]
        end
      end
    end
  end
end

Then download the files like this:

require 'csv'
CSV.foreach("ua-maps-drawings.csv", headers: true).with_index(1) do |row, n|
  url = row['image url']
  url = "https://exhibits.stanford.edu#{url}" unless url.start_with?('https://')
  puts n.to_s + ' ' + url
  `curl -s #{url} -o ua-maps-file/#{n}.jpg`
end

We then upload these files to GCP and run the following python script in Colab Enterprise:

import vertexai
import csv
from google.cloud import storage

from io import StringIO

from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    Part,
    HarmCategory,
    HarmBlockThreshold,
)
model_id = 'gemini-1.5-pro'
safety_settings = {
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: HarmBlockThreshold.BLOCK_ONLY_HIGH,
}
model = GenerativeModel(
    model_id,
    safety_settings=safety_settings,
)

# Define project information
PROJECT_ID = "sul-ai-sandbox"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
BUCKET_NAME = "cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4"
DIRECTORY = "exhibits-alt-text/pilot-1"
INPUTFILE = f'{DIRECTORY}/pilot.csv'
OUTPUTFILE = f'{DIRECTORY}/output/generated-text.csv'

# Initialize Vertex AI
vertexai.init(project=PROJECT_ID, location=LOCATION)


# Send Google Cloud Storage Document to Vertex AI
def process_document(
    prompt: str,
    file_uri: str,
    generation_config: GenerationConfig | None = None,
) -> str:
    # Load file directly from Google Cloud Storage
    file_part = Part.from_uri(
      uri=file_uri,
      mime_type="image/jpeg",
    )

    # Load contents
    contents = [file_part, prompt]

    try:
      # Send to Gemini
      response = model.generate_content(contents)#, generation_config=generation_config)

      return response.text.rstrip()
    except ValueError as e:
      # Handle the ValueError exception
      print(f"A ValueError occurred: {e}")
    except Exception as e:
        # Handle any other unforeseen errors
        print(f"An unexpected error occurred: {e}")

def get_blob(blob_name):
  client = storage.Client()
  bucket = client.bucket(BUCKET_NAME)
  return bucket.blob(blob_name)

def description(exhibit_name, exhibit_description, file_uri):
  print(file_uri)
  prompt = f"""
  This is an image from the Stanford University exhibit entitled "{exhibit_name}".
  {exhibit_description}
  Please briefly describe what is pictured in the image. Limit your response to 150 characters or fewer.
  Please avoid starting the description with "This is a photo of..." or "This is an image of...", just say what it is in the image."""
  return process_document(prompt, file_uri)

csv_buffer = StringIO()

# Create a CSV writer
writer = csv.writer(csv_buffer)

# Write header row
writer.writerow(["File", "Description"])

with get_blob(INPUTFILE).open() as csvfile:
  reader = csv.reader(csvfile)

  next(reader) # skip headers

  count = 0
  for row in reader:
    count += 1
    exhibit_name = row[0]
    exhibit_description = row[1]
    file_uri = f'gs://{BUCKET_NAME}/{DIRECTORY}/{count}.jpg'

    writer.writerow([row[0], description(exhibit_name, exhibit_description, file_uri)])


# Get the CSV content as a string
csv_content = csv_buffer.getvalue()
get_blob(OUTPUTFILE).upload_from_string(csv_content, content_type='text/csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant