Generate alt text #2816

jcoyne · 2025-01-22T17:33:55Z

Here's a script I used to extract the data from the site:

require 'csv'
Spotlight::Engine.routes.default_url_options[:host] = 'https://exhibits.stanford.edu'
exhibits = Spotlight::Exhibit.where(slug: ['rarebooks', 'maps-of-africa', 'baltic-way', 'lianhuanhua', 'exemplars'])
CSV.open("images.csv", "wb") do |csv|
  csv << ['Exhibit', 'Exhibit description', 'Page title', 'exhibit slug', 'page slug', 'page url', 'image url']
  exhibits.each do |exhibit|
    pages_with_alt = exhibit.pages.order(Arel.sql('id = 1 DESC, created_at DESC')).select { |elem| elem.content.any?(&:alt_text?) }
    pages_with_alt.each do |page|
      page.content.each do |block|
        next unless block.alt_text?

        route_parts = [exhibit]
        route_parts << page unless page.is_a?(Spotlight::HomePage)
        page_url = Spotlight::Engine.routes.url_helpers.url_for(route_parts)
        
        images = block.item || {}
        images_without_alt = images.values.select { |img| img['alt_text'].blank? && img['decorative'].blank? }
        urls = images_without_alt.map { |img| img['url'] || img['full_image_url'].presence || img['iiif_tilesource'].sub('info.json', '/full/!400,400/0/default.jpg')}
        urls.each do |url|
          next if url == 'undefined' # Likely a media item. Not an image.

          csv << [exhibit.title, exhibit.description, page.title, exhibit.slug, page.slug, page_url, url]
        end
      end
    end
  end
end

Then download the files like this:

require 'csv'
CSV.foreach("ua-maps-drawings.csv", headers: true).with_index(1) do |row, n|
  url = row['image url']
  url = "https://exhibits.stanford.edu#{url}" unless url.start_with?('https://')
  puts n.to_s + ' ' + url
  `curl -s #{url} -o ua-maps-file/#{n}.jpg`
end

We then upload these files to GCP and run the following python script in Colab Enterprise:

import vertexai
import csv
from google.cloud import storage

from io import StringIO

from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    Part,
    HarmCategory,
    HarmBlockThreshold,
)
model_id = 'gemini-1.5-pro'
safety_settings = {
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: HarmBlockThreshold.BLOCK_ONLY_HIGH,
}
model = GenerativeModel(
    model_id,
    safety_settings=safety_settings,
)

# Define project information
PROJECT_ID = "sul-ai-sandbox"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
BUCKET_NAME = "cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4"
DIRECTORY = "exhibits-alt-text/pilot-1"
INPUTFILE = f'{DIRECTORY}/pilot.csv'
OUTPUTFILE = f'{DIRECTORY}/output/generated-text.csv'

# Initialize Vertex AI
vertexai.init(project=PROJECT_ID, location=LOCATION)


# Send Google Cloud Storage Document to Vertex AI
def process_document(
    prompt: str,
    file_uri: str,
    generation_config: GenerationConfig | None = None,
) -> str:
    # Load file directly from Google Cloud Storage
    file_part = Part.from_uri(
      uri=file_uri,
      mime_type="image/jpeg",
    )

    # Load contents
    contents = [file_part, prompt]

    try:
      # Send to Gemini
      response = model.generate_content(contents)#, generation_config=generation_config)

      return response.text.rstrip()
    except ValueError as e:
      # Handle the ValueError exception
      print(f"A ValueError occurred: {e}")
    except Exception as e:
        # Handle any other unforeseen errors
        print(f"An unexpected error occurred: {e}")

def get_blob(blob_name):
  client = storage.Client()
  bucket = client.bucket(BUCKET_NAME)
  return bucket.blob(blob_name)

def description(exhibit_name, exhibit_description, file_uri):
  print(file_uri)
  prompt = f"""
  This is an image from the Stanford University exhibit entitled "{exhibit_name}".
  {exhibit_description}
  Please briefly describe what is pictured in the image. Limit your response to 150 characters or fewer.
  Please avoid starting the description with "This is a photo of..." or "This is an image of...", just say what it is in the image."""
  return process_document(prompt, file_uri)

csv_buffer = StringIO()

# Create a CSV writer
writer = csv.writer(csv_buffer)

# Write header row
writer.writerow(["File", "Description"])

with get_blob(INPUTFILE).open() as csvfile:
  reader = csv.reader(csvfile)

  next(reader) # skip headers

  count = 0
  for row in reader:
    count += 1
    exhibit_name = row[0]
    exhibit_description = row[1]
    file_uri = f'gs://{BUCKET_NAME}/{DIRECTORY}/{count}.jpg'

    writer.writerow([row[0], description(exhibit_name, exhibit_description, file_uri)])


# Get the CSV content as a string
csv_content = csv_buffer.getvalue()
get_blob(OUTPUTFILE).upload_from_string(csv_content, content_type='text/csv')

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Generate alt text #2816

Generate alt text #2816

jcoyne commented Jan 22, 2025 •

edited

Loading

Generate alt text #2816

Generate alt text #2816

Comments

jcoyne commented Jan 22, 2025 • edited Loading

jcoyne commented Jan 22, 2025 •

edited

Loading