You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
We then upload these files to GCP and run the following python script in Colab Enterprise:
importvertexaiimportcsvfromgoogle.cloudimportstoragefromioimportStringIOfromvertexai.generative_modelsimport (
GenerationConfig,
GenerativeModel,
Part,
HarmCategory,
HarmBlockThreshold,
)
model_id='gemini-1.5-pro'safety_settings= {
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: HarmBlockThreshold.BLOCK_ONLY_HIGH,
}
model=GenerativeModel(
model_id,
safety_settings=safety_settings,
)
# Define project informationPROJECT_ID="sul-ai-sandbox"# @param {type:"string"}LOCATION="us-central1"# @param {type:"string"}BUCKET_NAME="cloud-ai-platform-e215f7f7-a526-4a66-902d-eb69384ef0c4"DIRECTORY="exhibits-alt-text/pilot-1"INPUTFILE=f'{DIRECTORY}/pilot.csv'OUTPUTFILE=f'{DIRECTORY}/output/generated-text.csv'# Initialize Vertex AIvertexai.init(project=PROJECT_ID, location=LOCATION)
# Send Google Cloud Storage Document to Vertex AIdefprocess_document(
prompt: str,
file_uri: str,
generation_config: GenerationConfig|None=None,
) ->str:
# Load file directly from Google Cloud Storagefile_part=Part.from_uri(
uri=file_uri,
mime_type="image/jpeg",
)
# Load contentscontents= [file_part, prompt]
try:
# Send to Geminiresponse=model.generate_content(contents)#, generation_config=generation_config)returnresponse.text.rstrip()
exceptValueErrorase:
# Handle the ValueError exceptionprint(f"A ValueError occurred: {e}")
exceptExceptionase:
# Handle any other unforeseen errorsprint(f"An unexpected error occurred: {e}")
defget_blob(blob_name):
client=storage.Client()
bucket=client.bucket(BUCKET_NAME)
returnbucket.blob(blob_name)
defdescription(exhibit_name, exhibit_description, file_uri):
print(file_uri)
prompt=f""" This is an image from the Stanford University exhibit entitled "{exhibit_name}".{exhibit_description} Please briefly describe what is pictured in the image. Limit your response to 150 characters or fewer. Please avoid starting the description with "This is a photo of..." or "This is an image of...", just say what it is in the image."""returnprocess_document(prompt, file_uri)
csv_buffer=StringIO()
# Create a CSV writerwriter=csv.writer(csv_buffer)
# Write header rowwriter.writerow(["File", "Description"])
withget_blob(INPUTFILE).open() ascsvfile:
reader=csv.reader(csvfile)
next(reader) # skip headerscount=0forrowinreader:
count+=1exhibit_name=row[0]
exhibit_description=row[1]
file_uri=f'gs://{BUCKET_NAME}/{DIRECTORY}/{count}.jpg'writer.writerow([row[0], description(exhibit_name, exhibit_description, file_uri)])
# Get the CSV content as a stringcsv_content=csv_buffer.getvalue()
get_blob(OUTPUTFILE).upload_from_string(csv_content, content_type='text/csv')
The text was updated successfully, but these errors were encountered:
Here's a script I used to extract the data from the site:
Then download the files like this:
We then upload these files to GCP and run the following python script in Colab Enterprise:
The text was updated successfully, but these errors were encountered: