Last active
March 1, 2026 04:34
-
-
Save inardini/b9267831de1d557596186d49e2a78451 to your computer and use it in GitHub Desktop.
Sends a multimodal query to ADK agent hosted on Vertex AI Agent Engine.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import vertexai | |
| from vertexai import agent_engines | |
| from typing import Optional, Dict, Any | |
| def query_multimodal_agent( | |
| project_id: str, | |
| location: str, | |
| agent_engine_id: str, | |
| file_gcs_uri: str, | |
| mime_type: str, | |
| prompt: str, | |
| user_id: str = "user_example_123", | |
| ) -> Dict[str, Any]: | |
| """ | |
| Sends a multimodal query to ADK agent hosted on Vertex AI Agent Engine. | |
| It assumes Gemini as LLM. | |
| """ | |
| # Initialize the Vertex AI SDK | |
| vertexai.init(project=project_id, location=location) | |
| # Construct the full resource name for the agent engine | |
| agent_engine_resource_name = f"projects/{project_id}/locations/{location}/reasoningEngines/{agent_engine_id}" | |
| # Get the remote application representing the agent engine | |
| remote_app = agent_engines.get(agent_engine_resource_name) | |
| # Create a new session for the user | |
| remote_session = remote_app.create_session(user_id=user_id) | |
| # Prepare the multimodal message payload. | |
| # Check out the doc: https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/use/langchain#multimodal-content | |
| multimodal_message = { | |
| "role": "user", | |
| "parts": [ | |
| { | |
| "text": prompt, | |
| }, | |
| { | |
| "file_data": { | |
| "file_uri": file_gcs_uri, | |
| "mime_type": mime_type, | |
| }, | |
| }, | |
| ], | |
| } | |
| print(f"Querying agent with prompt: '{prompt}' and file: {file_gcs_uri} ({mime_type})...") | |
| # Stream the query to the agent engine | |
| response_stream = remote_app.stream_query( | |
| user_id=user_id, | |
| session_id=remote_session["id"], | |
| message=multimodal_message, | |
| ) | |
| # Extract the final response text from the stream | |
| final_response_text = event["content"]["parts"][0].get("text", "") | |
| print(final_response_text) | |
| if __name__ == "__main__": | |
| PROJECT_ID = "your-google-cloud-project-id" | |
| LOCATION = "us-central1" | |
| AGENT_ENGINE_ID = "your-reasoning-engine-id" | |
| # Example 1: Image Analysis | |
| print("Running Image Example") | |
| image_uri = "gs://cloud-samples-data/generative-ai/image/scones.jpg" | |
| image_mime_type = "image/jpeg" | |
| image_prompt = "Describe this image in five words." | |
| image_response = query_multimodal_agent( | |
| project_id=PROJECT_ID, | |
| location=LOCATION, | |
| agent_engine_id=AGENT_ENGINE_ID, | |
| file_gcs_uri=image_uri, | |
| mime_type=image_mime_type, | |
| prompt=image_prompt, | |
| ) | |
| print("Agent Response (Image)") | |
| print(image_response) | |
| print("-" * 30) | |
| # Example 2: Video Analysis | |
| print("Running Video Example") | |
| video_uri = "gs://cloud-samples-data/generative-ai/video/ad_copy_from_video.mp4" | |
| video_mime_type = "video/mp4" | |
| video_prompt = "What is the main subject of this video? Provide a short summary." | |
| video_response = query_multimodal_agent( | |
| project_id=PROJECT_ID, | |
| location=LOCATION, | |
| agent_engine_id=AGENT_ENGINE_ID, | |
| file_gcs_uri=video_uri, | |
| mime_type=video_mime_type, | |
| prompt=video_prompt, | |
| ) | |
| print("Agent Response (Video)") | |
| print(video_response) | |
| print("-" * 30) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment