Has anyone been able to successfully create an action to generate images using Gemini?
I tried using this script and my API key from Gemini and the instructions say to use the 1.2 Flash Preview model, but I havent been able to get it to work.
Any guidance on this would be helpful, trying to troubleshoot by asking it to print the error exactly as it appears doesn’t always work to figure out the root issue.
Here’s my python script:
import os
import requests
def generate_image_with_gemini(image_prompt: str):
“”"
Generates images using Google’s Gemini model based on a text prompt.
Args:
image_prompt (string): Prompt for the image to be generated
Envs:
GEMINI_KEY (string): Your Gemini API key, required for generating images w
"""
# Ensure 'os' is available from the non-editable section for os.environ.get
gemini_api_key = os.environ.get("GEMINI_KEY")
import google.generativeai as genai
# from google.generativeai import types # Removing this direct import as it caused an error
from PIL import Image
from io import BytesIO
genai.configure(api_key=gemini_api_key)
model_name_from_example = "gemini-2.0-flash-preview-image-generation"
try:
# Create a GenerationConfig for candidate_count (and potentially other settings later)
# We will NOT try to set response_modalities here for now.
core_generation_config = genai.types.GenerationConfig( # 'types' should still be genai.types
candidate_count=1
)
model = genai.GenerativeModel(
model_name=model_name_from_example,
generation_config=core_generation_config # Pass the core config here
)
# Generate content
# REMOVED explicit generation_config from this call for now to see default behavior
# or if it gives a different error related to needing response_modalities.
# The model was already configured with core_generation_config.
# If the model itself is purely for image generation, it might not need further hints.
# The previous error about modalities suggests it *does* expect some hint if we want IMAGE.
#
# Let's try to pass the response_modalities directly if the SDK allows,
# or if the model's default generation_config can take it.
#
# According to some SDK structures, you might pass parts of the config directly
# to generate_content if not set on the model.
#
# The error was "module 'google.generativeai.types' has no attribute 'GenerateContentConfig'".
# This means the specific class `GenerateContentConfig` under `genai.types` wasn't found.
#
# Let's try to provide the `generation_config` parameter to `generate_content`
# with the modalities, but using the `genai.types.GenerationConfig` we know exists,
# and see if *it* accepts a `response_modalities` argument.
# This is a guess based on common SDK patterns.
# Attempt 1: Configure modalities within the main GenerationConfig if supported
# This is a guess. If this fails, the parameter might not exist.
try:
config_with_modalities = genai.types.GenerationConfig(
candidate_count=1,
response_modalities=['TEXT', 'IMAGE'] # TRYING TO ADD IT HERE
)
# Re-initialize model with this config if this is the way
model_for_call = genai.GenerativeModel(
model_name=model_name_from_example,
generation_config=config_with_modalities
)
response = model_for_call.generate_content(contents=image_prompt)
except TypeError as e_type_config: # Catch if 'response_modalities' is an unexpected keyword argument
# If the above fails due to 'response_modalities' not being a valid param for GenerationConfig,
# then that specific class `types.GenerateContentConfig` was indeed necessary as per Google's example,
# and its absence points to an SDK version mismatch or Pickaxe environment issue.
return (f"SCRIPT_ERROR: Configuration issue. "
f"Could not set response_modalities in genai.types.GenerationConfig. "
f"Error: {str(e_type_config)}. This suggests the SDK version might be older "
f"or `types.GenerateContentConfig` is needed but missing as per previous error.")
# except Exception as e_inner_call: # Catch other errors from this attempt
# return f"SCRIPT_ERROR: Inner call with modality guess failed. Error: {str(e_inner_call)}"
# Process the response (same as before)
if response.candidates and len(response.candidates) > 0:
candidate = response.candidates[0]
if candidate.content and candidate.content.parts:
image_found = False
text_part_content = ""
for part in candidate.content.parts:
if part.text:
text_part_content += part.text + " "
elif part.inline_data and part.inline_data.data:
image_bytes = part.inline_data.data
pil_image = Image.open(BytesIO(image_bytes))
output_filename = "generated_image.png"
pil_image.save(output_filename, "PNG")
image_found = True
success_message = f"Image generated by Gemini and saved as {output_filename}."
if text_part_content.strip():
success_message += f" Accompanying text: {text_part_content.strip()}"
return success_message
if text_part_content.strip() and not image_found:
return f"INFO_FROM_GEMINI: Received text but no image. Text: {text_part_content.strip()}"
return "ERROR_PROCESSING_RESPONSE: Image data was not found in Gemini response parts, though parts existed."
return "ERROR_PROCESSING_RESPONSE: Gemini response candidate is empty or has no parts."
return "ERROR_PROCESSING_RESPONSE: No image candidates found in Gemini API response."
except ImportError as e_import_runtime:
return f"SCRIPT_ERROR: Runtime Import Error - {str(e_import_runtime)}."
except Exception as e_api_call:
error_type = type(e_api_call).__name__
error_message = str(e_api_call)
google_api_error_detail = ""
if hasattr(e_api_call, 'args') and e_api_call.args:
for arg_item in e_api_call.args:
if isinstance(arg_item, str) and ('PERMISSION_DENIED' in arg_item or 'API_KEY_INVALID' in arg_item or 'billing' in arg_item.lower() or 'quota' in arg_item.lower() or 'API target not found' in arg_item or 'not found for API version' in arg_item or 'not supported for generateContent' in arg_item or 'response modalities' in arg_item):
google_api_error_detail = f" Specific Detail: {arg_item}"
break
return f"SCRIPT_ERROR: Gemini API call failed. Type: {error_type}. Message: {error_message}.{google_api_error_detail}"