Spaces:
Runtime error
Runtime error
| import os | |
| import logging | |
| from llama_index.core.agent.workflow import FunctionAgent | |
| from llama_index.llms.google_genai import GoogleGenAI | |
| # Setup logging | |
| logger = logging.getLogger(__name__) | |
| # Helper function to load prompt from file | |
| def load_prompt_from_file(filename="../prompts/image_analyzer_prompt.txt") -> str: | |
| """Loads the system prompt from a text file.""" | |
| try: | |
| # Assuming the prompt file is in the same directory as the agent script | |
| script_dir = os.path.dirname(__file__) | |
| prompt_path = os.path.join(script_dir, filename) | |
| with open(prompt_path, "r") as f: | |
| prompt = f.read() | |
| logger.info(f"Successfully loaded system prompt from {prompt_path}") | |
| return prompt | |
| except FileNotFoundError: | |
| logger.error(f"Prompt file {filename} not found at {prompt_path}. Using fallback prompt.") | |
| # Fallback minimal prompt | |
| return "You are an image analyzer. Describe the image factually." | |
| except Exception as e: | |
| logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True) | |
| return "You are an image analyzer. Describe the image factually." | |
| def initialize_image_analyzer_agent() -> FunctionAgent: | |
| """ | |
| Create an agent that orchestrates image analysis. | |
| Uses Gemini Pro multimodal capabilities directly without explicit tools. | |
| Configuration and prompt are loaded from environment/file. | |
| """ | |
| logger.info("Initializing ImageAnalyzerAgent...") | |
| # Configuration from environment variables | |
| llm_model_name = os.getenv("IMAGE_ANALYZER_LLM_MODEL", "gemini-2.5-pro-preview-03-25") | |
| gemini_api_key = os.getenv("GEMINI_API_KEY") | |
| if not gemini_api_key: | |
| logger.error("GEMINI_API_KEY not found in environment variables.") | |
| raise ValueError("GEMINI_API_KEY must be set") | |
| try: | |
| llm = GoogleGenAI( | |
| api_key=gemini_api_key, | |
| model=llm_model_name, | |
| ) | |
| logger.info(f"Using LLM model: {llm_model_name}") | |
| # Load system prompt from file | |
| system_prompt = load_prompt_from_file() | |
| # Note: This agent is a FunctionAgent but doesn't explicitly define tools. | |
| # It relies on the LLM (Gemini 1.5 Pro) to understand the system prompt | |
| # and perform the analysis when an image is passed in the ChatMessage blocks. | |
| agent = FunctionAgent( | |
| name="image_analyzer_agent", | |
| description=( | |
| "ImageAnalyzerAgent inspects image files using its multimodal capabilities, " | |
| "interpreting the visual content according to a detailed factual analysis prompt." | |
| ), | |
| llm=llm, | |
| system_prompt=system_prompt, | |
| # No explicit tools needed if relying on direct multimodal LLM call | |
| # tools=[], | |
| can_handoff_to=["planner_agent", "research_agent", "reasoning_agent", "figure_interpretation_agent"], | |
| ) | |
| logger.info("ImageAnalyzerAgent initialized successfully.") | |
| return agent | |
| except Exception as e: | |
| logger.error(f"Error during ImageAnalyzerAgent initialization: {e}", exc_info=True) | |
| raise | |
| # Example usage (for testing if run directly) | |
| if __name__ == "__main__": | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| logger.info("Running image_analyzer_agent.py directly for testing...") | |
| # Ensure API key is set for testing | |
| if not os.getenv("GEMINI_API_KEY"): | |
| print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.") | |
| else: | |
| try: | |
| test_agent = initialize_image_analyzer_agent() | |
| print("Image Analyzer Agent initialized successfully for testing.") | |
| # To test further, you would need to construct a ChatMessage with an ImageBlock | |
| # and run agent.chat(message) | |
| except Exception as e: | |
| print(f"Error during testing: {e}") | |