Last active
March 30, 2026 08:09
-
-
Save AIWintermuteAI/e3409667a3d01df500c2248bb3b40a0f to your computer and use it in GitHub Desktop.
Pi AI Hat+ 2 Benchmarking
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| HOST="${HOST:-localhost}" | |
| PORT="${1:-${PORT:-11434}}" | |
| URL="${URL:-http://${HOST}:${PORT}/api/chat}" | |
| MODEL="${MODEL:-aleSuglia/qwen2-vl-2b-instruct-q4_k_m}" | |
| WARMUP_PROMPT="${WARMUP_PROMPT:-Reply with OK only.}" | |
| PROMPT="${PROMPT:-What is in this image?}" | |
| IMAGE_PATH="${IMAGE_PATH:-test.jpg}" | |
| if ! command -v curl >/dev/null 2>&1; then | |
| echo "Error: curl is required but was not found." >&2 | |
| exit 1 | |
| fi | |
| if ! command -v base64 >/dev/null 2>&1; then | |
| echo "Error: base64 is required but was not found." >&2 | |
| exit 1 | |
| fi | |
| # Optional: download sample image if missing | |
| if [ ! -f "$IMAGE_PATH" ]; then | |
| echo "Image not found at $IMAGE_PATH, downloading sample..." | |
| curl --silent --show-error --fail -L \ | |
| -o "$IMAGE_PATH" \ | |
| "https://upload.wikimedia.org/wikipedia/commons/3/3a/Cat03.jpg" | |
| fi | |
| # Encode image (GNU and BSD/macOS compatible) | |
| if base64 --help >/dev/null 2>&1; then | |
| IMG_B64="$(base64 -w 0 < "$IMAGE_PATH" 2>/dev/null || base64 < "$IMAGE_PATH" | tr -d '\n')" | |
| else | |
| IMG_B64="$(base64 < "$IMAGE_PATH" | tr -d '\n')" | |
| fi | |
| # Warm-up payload: text only (no image) | |
| WARMUP_PAYLOAD="$(printf '{"model":"%s","messages":[{"role":"user","content":"%s"}],"stream":false}' \ | |
| "$MODEL" "$WARMUP_PROMPT")" | |
| # Measured payload: includes image | |
| PAYLOAD="$(printf '{"model":"%s","messages":[{"role":"user","content":"%s","images":["%s"]}],"stream":true}' \ | |
| "$MODEL" "$PROMPT" "$IMG_B64")" | |
| echo "Running warm-up prompt (text-only), response discarded..." | |
| curl --silent --show-error --fail "$URL" \ | |
| -H 'Content-Type: application/json' \ | |
| -d "$WARMUP_PAYLOAD" \ | |
| >/dev/null | |
| echo "Measuring time to first response character on image prompt..." | |
| start_ns="$(date +%s%N)" | |
| response_file="$(mktemp)" | |
| cleanup() { | |
| rm -f "$response_file" | |
| } | |
| trap cleanup EXIT | |
| set +e | |
| ttfb_ms="$({ | |
| curl --silent --show-error --fail --no-buffer "$URL" \ | |
| -H 'Content-Type: application/json' \ | |
| -d "$PAYLOAD" | |
| } | tee "$response_file" | { | |
| IFS= read -r -n1 _first_char | |
| read_status=$? | |
| if [ "$read_status" -ne 0 ]; then | |
| exit 1 | |
| fi | |
| now_ns="$(date +%s%N)" | |
| elapsed_ns=$((now_ns - start_ns)) | |
| elapsed_ms_int=$((elapsed_ns / 1000000)) | |
| elapsed_ms_frac=$(((elapsed_ns / 10000) % 100)) | |
| printf '%s.%02d\n' "$elapsed_ms_int" "$elapsed_ms_frac" | |
| cat >/dev/null | |
| })" | |
| status=$? | |
| set -e | |
| if [ "$status" -ne 0 ]; then | |
| echo "Error: could not read first response character." >&2 | |
| exit 1 | |
| fi | |
| echo "Time to first character (second prompt): ${ttfb_ms} ms" | |
| echo "Response body (second prompt):" | |
| cat "$response_file" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| HOST="${HOST:-localhost}" | |
| PORT="${1:-${PORT:-8000}}" | |
| URL="${URL:-http://${HOST}:${PORT}/api/chat}" | |
| PROMPT="$(cat <<'EOF' | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| Please read this long benchmark prompt and continue thinking carefully about context details constraints assumptions and tradeoffs. | |
| EOF | |
| )" | |
| PROMPT="${PROMPT//$'\n'/ }" | |
| PAYLOAD="{\"model\":\"qwen2:1.5b\",\"messages\":[{\"role\":\"user\",\"content\":\"${PROMPT}\"}],\"stream\":true}" | |
| if ! command -v curl >/dev/null 2>&1; then | |
| echo "Error: curl is required but was not found." >&2 | |
| exit 1 | |
| fi | |
| echo "Running warm-up prompt (model load), response discarded..." | |
| curl --silent --show-error --fail "$URL" \ | |
| -H 'Content-Type: application/json' \ | |
| -d "$PAYLOAD" \ | |
| >/dev/null | |
| echo "Measuring time to first response character on second prompt..." | |
| start_ns="$(date +%s%N)" | |
| response_file="$(mktemp)" | |
| cleanup() { | |
| rm -f "$response_file" | |
| } | |
| trap cleanup EXIT | |
| set +e | |
| ttfb_ms="$({ | |
| curl --silent --show-error --fail --no-buffer "$URL" \ | |
| -H 'Content-Type: application/json' \ | |
| -d "$PAYLOAD" | |
| } | tee "$response_file" | { | |
| IFS= read -r -n1 _first_char | |
| read_status=$? | |
| if [ "$read_status" -ne 0 ]; then | |
| exit 1 | |
| fi | |
| now_ns="$(date +%s%N)" | |
| elapsed_ns=$((now_ns - start_ns)) | |
| elapsed_ms_int=$((elapsed_ns / 1000000)) | |
| elapsed_ms_frac=$(((elapsed_ns / 10000) % 100)) | |
| printf '%s.%02d\n' "$elapsed_ms_int" "$elapsed_ms_frac" | |
| cat >/dev/null | |
| })" | |
| status=$? | |
| set -e | |
| if [ "$status" -ne 0 ]; then | |
| echo "Error: could not read first response character." >&2 | |
| exit 1 | |
| fi | |
| echo "Time to first character (second prompt): ${ttfb_ms} ms" | |
| echo "Response body (second prompt):" | |
| cat "$response_file" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| HOST="${HOST:-localhost}" | |
| PORT="${1:-${PORT:-8000}}" | |
| URL="${URL:-http://${HOST}:${PORT}/api/chat}" | |
| PAYLOAD='{"model":"qwen2:1.5b","messages":[{"role":"user","content":"Tell me a joke about cats."}],"stream":true}' | |
| if ! command -v curl >/dev/null 2>&1; then | |
| echo "Error: curl is required but was not found." >&2 | |
| exit 1 | |
| fi | |
| echo "Running warm-up prompt (model load), response discarded..." | |
| curl --silent --show-error --fail "$URL" \ | |
| -H 'Content-Type: application/json' \ | |
| -d "$PAYLOAD" \ | |
| >/dev/null | |
| echo "Measuring time to first response character on second prompt..." | |
| start_ns="$(date +%s%N)" | |
| response_file="$(mktemp)" | |
| cleanup() { | |
| rm -f "$response_file" | |
| } | |
| trap cleanup EXIT | |
| set +e | |
| ttfb_ms="$({ | |
| curl --silent --show-error --fail --no-buffer "$URL" \ | |
| -H 'Content-Type: application/json' \ | |
| -d "$PAYLOAD" | |
| } | tee "$response_file" | { | |
| IFS= read -r -n1 _first_char | |
| read_status=$? | |
| if [ "$read_status" -ne 0 ]; then | |
| exit 1 | |
| fi | |
| now_ns="$(date +%s%N)" | |
| elapsed_ns=$((now_ns - start_ns)) | |
| elapsed_ms_int=$((elapsed_ns / 1000000)) | |
| elapsed_ms_frac=$(((elapsed_ns / 10000) % 100)) | |
| printf '%s.%02d\n' "$elapsed_ms_int" "$elapsed_ms_frac" | |
| cat >/dev/null | |
| })" | |
| status=$? | |
| set -e | |
| if [ "$status" -ne 0 ]; then | |
| echo "Error: could not read first response character." >&2 | |
| exit 1 | |
| fi | |
| echo "Time to first character (second prompt): ${ttfb_ms} ms" | |
| echo "Response body (second prompt):" | |
| cat "$response_file" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import sys | |
| import cv2 | |
| import numpy as np | |
| import time | |
| from hailo_platform import VDevice | |
| from hailo_platform.genai import VLM | |
| from hailo_apps.python.core.common.core import handle_list_models_flag, resolve_hef_path | |
| from hailo_apps.python.core.common.defines import VLM_CHAT_APP, SHARED_VDEVICE_GROUP_ID, HAILO10H_ARCH, REPO_ROOT | |
| from hailo_apps.python.core.common.hailo_logger import get_logger | |
| # Initialize logger | |
| logger = get_logger(__name__) | |
| def main(): | |
| """Main function for VLM Chat Example.""" | |
| # Parse arguments | |
| parser = argparse.ArgumentParser(description="VLM Chat Example") | |
| parser.add_argument("--hef-path", type=str, default=None, help="Path to HEF model file") | |
| parser.add_argument("--list-models", action="store_true", help="List available models") | |
| # Handle --list-models flag before full initialization | |
| handle_list_models_flag(parser, VLM_CHAT_APP) | |
| args = parser.parse_args() | |
| # Resolve HEF path with auto-download (VLM is Hailo-10H only) | |
| hef_path = resolve_hef_path(args.hef_path, app_name=VLM_CHAT_APP, arch=HAILO10H_ARCH) | |
| if hef_path is None: | |
| logger.error("Failed to resolve HEF path for VLM model.") | |
| sys.exit(1) | |
| logger.info(f"Using HEF: {hef_path}") | |
| print(f"✓ Model file found: {hef_path}") | |
| vdevice = None | |
| vlm = None | |
| try: | |
| print("\n[1/5] Initializing Hailo device...") | |
| params = VDevice.create_params() | |
| params.group_id = SHARED_VDEVICE_GROUP_ID | |
| vdevice = VDevice(params) | |
| print("✓ Hailo device initialized") | |
| print("[2/5] Loading VLM model...") | |
| vlm = VLM(vdevice, str(hef_path)) | |
| print("✓ Model loaded successfully") | |
| prompt = [ | |
| { | |
| "role": "system", | |
| "content": [{"type": "text", "text": 'You are a helpful assistant that analyzes images and answers questions about them.'}] | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image"}, | |
| {"type": "text", "text": 'How many people in the image?.'} | |
| ] | |
| } | |
| ] | |
| # Load and convert image | |
| # Use standard REPO_ROOT from defines | |
| image_path = REPO_ROOT / 'doc' / 'images' / 'barcode-example.png' | |
| print(f"[3/5] Loading image from: {image_path}") | |
| image = cv2.imread(str(image_path)) | |
| if image is None: | |
| raise FileNotFoundError(f"Could not load image file: {image_path}") | |
| print(f"✓ Image loaded (size: {image.shape[1]}x{image.shape[0]})") | |
| print("[4/5] Preprocessing image...") | |
| if len(image.shape) == 3 and image.shape[2] == 3: | |
| image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| image = cv2.resize(image, (336, 336), interpolation=cv2.INTER_LINEAR).astype(np.uint8) | |
| print("✓ Image preprocessed (resized to 336x336, converted to RGB)") | |
| print("[5/5] Sending prompt with image to VLM...") | |
| print(f" User question: '{prompt[1]['content'][1]['text']}'") | |
| response = vlm.generate_all(prompt=prompt, frames=[image], temperature=0.1, seed=42, max_generated_tokens=200) | |
| start_time = time.time() | |
| first_token = False | |
| num_tokens = 0 | |
| with vlm: | |
| with vlm.generate(prompt, frames=[image], temperature=0.1, seed=42, max_generated_tokens=200) as gen: | |
| for token in gen: | |
| if not first_token: | |
| first_token = True | |
| time_to_first_token = time.time() - start_time | |
| print(token, end='', flush=True) | |
| num_tokens += 1 | |
| # print("[4/4] Response received:") | |
| # print("-" * 60) | |
| # print(response.split(". [{'type'")[0]) | |
| # print("-" * 60) | |
| print("\n✓ Example completed successfully") | |
| print(f"TTFT {time_to_first_token}") | |
| tokens_per_s = num_tokens / (time.time() - start_time) | |
| print(f"TPS {tokens_per_s}") | |
| except Exception as e: | |
| logger.error(f"Error occurred: {e}", exc_info=True) | |
| sys.exit(1) | |
| finally: | |
| # Clean up resources | |
| if vlm: | |
| try: | |
| vlm.clear_context() | |
| vlm.release() | |
| except Exception as e: | |
| logger.warning(f"Error releasing VLM: {e}") | |
| if vdevice: | |
| try: | |
| vdevice.release() | |
| except Exception as e: | |
| logger.warning(f"Error releasing VDevice: {e}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment