Skip to content

Instantly share code, notes, and snippets.

@lucataco
Last active April 30, 2026 04:28
Show Gist options
  • Select an option

  • Save lucataco/2b91f9aa439ddcb6f7974ba23c34821a to your computer and use it in GitHub Desktop.

Select an option

Save lucataco/2b91f9aa439ddcb6f7974ba23c34821a to your computer and use it in GitHub Desktop.
H200 Minimax m2.7 server
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LLAMA_CPP_DIR="${LLAMA_CPP_DIR:-/home/shadeform/Code/llama.cpp}"
LLAMA_SERVER_BIN="${LLAMA_SERVER_BIN:-$LLAMA_CPP_DIR/build/bin/llama-server}"
MODEL_DIR="${MODEL_DIR:-$ROOT_DIR/MiniMax-M2.7-GGUF-MXFP4_MOE}"
MODEL_SUBDIR="${MODEL_SUBDIR:-$MODEL_DIR/MXFP4_MOE}"
MODEL_SHARD="${MODEL_SHARD:-$MODEL_SUBDIR/MiniMax-M2.7-MXFP4_MOE-00001-of-00004.gguf}"
CHAT_TEMPLATE_FILE="${CHAT_TEMPLATE_FILE:-$MODEL_DIR/chat_template.jinja}"
HOST="${HOST:-0.0.0.0}"
PORT="${PORT:-8080}"
ALIAS="${ALIAS:-minimax-m2.7-mxfp4-moe}"
API_KEY="${API_KEY:-dummy}"
CTX_SIZE="${CTX_SIZE:-196608}"
N_PREDICT="${N_PREDICT:--1}"
PARALLEL="${PARALLEL:-1}"
BATCH_SIZE="${BATCH_SIZE:-1024}"
UBATCH_SIZE="${UBATCH_SIZE:-256}"
N_GPU_LAYERS="${N_GPU_LAYERS:-auto}"
FLASH_ATTN="${FLASH_ATTN:-on}"
CACHE_TYPE_K="${CACHE_TYPE_K:-q4_0}"
CACHE_TYPE_V="${CACHE_TYPE_V:-q4_0}"
if [[ ! -x "$LLAMA_SERVER_BIN" ]]; then
echo "Missing llama-server binary at $LLAMA_SERVER_BIN" >&2
exit 1
fi
if [[ ! -f "$MODEL_SHARD" ]]; then
echo "Missing model shard at $MODEL_SHARD" >&2
exit 1
fi
cmd=(
"$LLAMA_SERVER_BIN"
--model "$MODEL_SHARD"
--alias "$ALIAS"
--host "$HOST"
--port "$PORT"
--api-key "$API_KEY"
--ctx-size "$CTX_SIZE"
--predict "$N_PREDICT"
--parallel "$PARALLEL"
--batch-size "$BATCH_SIZE"
--ubatch-size "$UBATCH_SIZE"
--gpu-layers "$N_GPU_LAYERS"
--flash-attn "$FLASH_ATTN"
--jinja
--reasoning auto
--cache-type-k "$CACHE_TYPE_K"
--cache-type-v "$CACHE_TYPE_V"
)
if [[ -f "$CHAT_TEMPLATE_FILE" ]]; then
cmd+=(--chat-template-file "$CHAT_TEMPLATE_FILE")
fi
exec "${cmd[@]}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment