inardini · March 1, 2026 04:34
diff --git a/adk_agent_engine_multimodal_query.py b/adk_agent_engine_multimodal_query.py
 import os
 import vertexai
 from vertexai import agent_engines
 from typing import Optional, Dict, Any

 def query_multimodal_agent(
    project_id: str,
    location: str,
    agent_engine_id: str,
    file_gcs_uri: str,
    mime_type: str,
    prompt: str,
    user_id: str = "user_example_123",
 ) -> Dict[str, Any]:
    """
    Sends a multimodal query to ADK agent hosted on Vertex AI Agent Engine.
    It assumes Gemini as LLM.
    """
    
    # Initialize the Vertex AI SDK
    vertexai.init(project=project_id, location=location)

    # Construct the full resource name for the agent engine
    agent_engine_resource_name = f"projects/{project_id}/locations/{location}/reasoningEngines/{agent_engine_id}"

    # Get the remote application representing the agent engine
    remote_app = agent_engines.get(agent_engine_resource_name)

    # Create a new session for the user
    remote_session = remote_app.create_session(user_id=user_id)

    # Prepare the multimodal message payload. 
    # Check out the doc: https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/use/langchain#multimodal-content
    multimodal_message = {
        "role": "user",
        "parts": [
            {
                "text": prompt,
            },
            {
                "file_data": {
                    "file_uri": file_gcs_uri,
                    "mime_type": mime_type,
                },
            },
        ],
    }

    print(f"Querying agent with prompt: '{prompt}' and file: {file_gcs_uri} ({mime_type})...")

    # Stream the query to the agent engine
    response_stream = remote_app.stream_query(
        user_id=user_id,
        session_id=remote_session["id"],
        message=multimodal_message,
    )

    # Extract the final response text from the stream
    final_response_text = event["content"]["parts"][0].get("text", "")
    print(final_response_text)


 if __name__ == "__main__":
    
    PROJECT_ID = "your-google-cloud-project-id"
    LOCATION = "us-central1"
    AGENT_ENGINE_ID = "your-reasoning-engine-id"
    
    # Example 1: Image Analysis
    print("Running Image Example")
    image_uri = "gs://cloud-samples-data/generative-ai/image/scones.jpg"
    image_mime_type = "image/jpeg"
    image_prompt = "Describe this image in five words."

    image_response = query_multimodal_agent(
        project_id=PROJECT_ID,
        location=LOCATION,
        agent_engine_id=AGENT_ENGINE_ID,
        file_gcs_uri=image_uri,
        mime_type=image_mime_type,
        prompt=image_prompt,
    )

    print("Agent Response (Image)")
    print(image_response)
    print("-" * 30)

    # Example 2: Video Analysis
    print("Running Video Example")
    video_uri = "gs://cloud-samples-data/generative-ai/video/ad_copy_from_video.mp4"
    video_mime_type = "video/mp4"
    video_prompt = "What is the main subject of this video? Provide a short summary."

    video_response = query_multimodal_agent(
        project_id=PROJECT_ID,
        location=LOCATION,
        agent_engine_id=AGENT_ENGINE_ID,
        file_gcs_uri=video_uri,
        mime_type=video_mime_type,
        prompt=video_prompt,
    )

    print("Agent Response (Video)")
    print(video_response)
    print("-" * 30)
	import os
	import vertexai
	from vertexai import agent_engines
	from typing import Optional, Dict, Any

	def query_multimodal_agent(
	project_id: str,
	location: str,
	agent_engine_id: str,
	file_gcs_uri: str,
	mime_type: str,
	prompt: str,
	user_id: str = "user_example_123",
	) -> Dict[str, Any]:
	"""
	Sends a multimodal query to ADK agent hosted on Vertex AI Agent Engine.
	It assumes Gemini as LLM.
	"""

	# Initialize the Vertex AI SDK
	vertexai.init(project=project_id, location=location)

	# Construct the full resource name for the agent engine
	agent_engine_resource_name = f"projects/{project_id}/locations/{location}/reasoningEngines/{agent_engine_id}"

	# Get the remote application representing the agent engine
	remote_app = agent_engines.get(agent_engine_resource_name)

	# Create a new session for the user
	remote_session = remote_app.create_session(user_id=user_id)

	# Prepare the multimodal message payload.
	# Check out the doc: https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/use/langchain#multimodal-content
	multimodal_message = {
	"role": "user",
	"parts": [
	{
	"text": prompt,
	},
	{
	"file_data": {
	"file_uri": file_gcs_uri,
	"mime_type": mime_type,
	},
	},
	],
	}

	print(f"Querying agent with prompt: '{prompt}' and file: {file_gcs_uri} ({mime_type})...")

	# Stream the query to the agent engine
	response_stream = remote_app.stream_query(
	user_id=user_id,
	session_id=remote_session["id"],
	message=multimodal_message,
	)

	# Extract the final response text from the stream
	final_response_text = event["content"]["parts"][0].get("text", "")
	print(final_response_text)


	if __name__ == "__main__":

	PROJECT_ID = "your-google-cloud-project-id"
	LOCATION = "us-central1"
	AGENT_ENGINE_ID = "your-reasoning-engine-id"

	# Example 1: Image Analysis
	print("Running Image Example")
	image_uri = "gs://cloud-samples-data/generative-ai/image/scones.jpg"
	image_mime_type = "image/jpeg"
	image_prompt = "Describe this image in five words."

	image_response = query_multimodal_agent(
	project_id=PROJECT_ID,
	location=LOCATION,
	agent_engine_id=AGENT_ENGINE_ID,
	file_gcs_uri=image_uri,
	mime_type=image_mime_type,
	prompt=image_prompt,
	)

	print("Agent Response (Image)")
	print(image_response)
	print("-" * 30)

	# Example 2: Video Analysis
	print("Running Video Example")
	video_uri = "gs://cloud-samples-data/generative-ai/video/ad_copy_from_video.mp4"
	video_mime_type = "video/mp4"
	video_prompt = "What is the main subject of this video? Provide a short summary."

	video_response = query_multimodal_agent(
	project_id=PROJECT_ID,
	location=LOCATION,
	agent_engine_id=AGENT_ENGINE_ID,
	file_gcs_uri=video_uri,
	mime_type=video_mime_type,
	prompt=video_prompt,
	)

	print("Agent Response (Video)")
	print(video_response)
	print("-" * 30)
No results found