Created
April 24, 2026 16:29
-
-
Save goodrahstar/5e080e6181ccb46c4f94ddcb8f13e4ea to your computer and use it in GitHub Desktop.
Scripts to Run Qwen 3.6 27B on Your Mac
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ------- | |
| File: download-qwen36-q3km.sh | |
| #!/usr/bin/env bash | |
| # Smaller 27B quant for 16GB RAM systems (~12GB class file; verify after download). | |
| set -euo pipefail | |
| REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | |
| OUT_DIR="${OUT_DIR:-$REPO_ROOT/models}" | |
| FILE="Qwen3.6-27B-Q3_K_M.gguf" | |
| URL="https://huggingface.co/unsloth/Qwen3.6-27B-GGUF/resolve/main/${FILE}" | |
| mkdir -p "$OUT_DIR" | |
| DEST="${OUT_DIR}/${FILE}" | |
| if [[ -f "$DEST" ]]; then | |
| echo "Already present: $DEST" | |
| exit 0 | |
| fi | |
| echo "Downloading $FILE (smaller than Q4_K_M)..." | |
| if [[ -n "${HF_TOKEN:-}" ]]; then | |
| curl -L --retry 3 --continue-at - -H "Authorization: Bearer ${HF_TOKEN}" -o "$DEST" "$URL" | |
| else | |
| curl -L --retry 3 --continue-at - -o "$DEST" "$URL" | |
| fi | |
| echo "Done: $DEST" | |
| ------- | |
| File: download-qwen36-q4.sh | |
| #!/usr/bin/env bash | |
| # Larger Q4 27B file — best on 32GB+ RAM. Resume: run again (curl -C -). | |
| set -euo pipefail | |
| REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | |
| OUT_DIR="${OUT_DIR:-$REPO_ROOT/models}" | |
| FILE="Qwen3.6-27B-Q4_K_M.gguf" | |
| URL="https://huggingface.co/unsloth/Qwen3.6-27B-GGUF/resolve/main/${FILE}" | |
| mkdir -p "$OUT_DIR" | |
| DEST="${OUT_DIR}/${FILE}" | |
| if [[ -f "$DEST" ]]; then | |
| echo "Already present: $DEST" | |
| exit 0 | |
| fi | |
| echo "Downloading $FILE (heavier; needs plenty of RAM)..." | |
| if [[ -n "${HF_TOKEN:-}" ]]; then | |
| curl -L --retry 3 --continue-at - -H "Authorization: Bearer ${HF_TOKEN}" -o "$DEST" "$URL" | |
| else | |
| curl -L --retry 3 --continue-at - -o "$DEST" "$URL" | |
| fi | |
| echo "Done: $DEST" | |
| ------- | |
| File: start-llama-qwen36-macos.sh | |
| #!/usr/bin/env bash | |
| # Run llama-server with a local Qwen3.6 27B GGUF (OpenAI-compatible API on :8080). | |
| # Apple Silicon: Homebrew on arm64 — use Metal offload via --n-gpu-layers all. | |
| # Intel Mac: this build may only expose CPU/BLAS; 27B Q4 is heavy — prefer 32GB+ RAM. | |
| set -eo pipefail | |
| REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | |
| # Prefer Q3 on low-RAM; override with MODEL=... | |
| _DEF_Q3="$REPO_ROOT/models/Qwen3.6-27B-Q3_K_M.gguf" | |
| _DEF_Q4="$REPO_ROOT/models/Qwen3.6-27B-Q4_K_M.gguf" | |
| if [[ -n "${MODEL:-}" ]]; then | |
| : | |
| elif [[ -f "$_DEF_Q3" ]]; then | |
| MODEL="$_DEF_Q3" | |
| else | |
| MODEL="$_DEF_Q4" | |
| fi | |
| PORT="${PORT:-8080}" | |
| CTX="${CTX:-4096}" | |
| # API model id (must match agent_data/agent/models.json and requests) | |
| ALIAS="${MODEL_ALIAS:-unsloth/Qwen3.6-27B-GGUF}" | |
| if [[ ! -f "$MODEL" ]]; then | |
| echo "Missing GGUF: $MODEL" >&2 | |
| echo "Download one of:" >&2 | |
| echo " $REPO_ROOT/scripts/download-qwen36-q3km.sh (smaller file — 16GB Macs)" >&2 | |
| echo " $REPO_ROOT/scripts/download-qwen36-q4.sh (higher quality — 32GB+ RAM)" >&2 | |
| exit 1 | |
| fi | |
| NGL="all" | |
| if [[ "$(uname -m)" != "arm64" ]]; then | |
| # Intel Homebrew build often has no GPU backend; use CPU (still accepts --n-gpu-layers, may be ignored) | |
| NGL="${N_GPU_LAYERS:-0}" | |
| fi | |
| # mlock: USE_MLOCK=1 to add --mlock (can hurt on low-RAM) | |
| EXTRA=( ) | |
| [[ "${USE_MLOCK:-0}" == "1" ]] && EXTRA+=( --mlock ) | |
| [[ "${LLAMA_FIT_MODE:-on}" == "off" ]] && EXTRA+=( --fit off ) | |
| exec llama-server \ | |
| -m "$MODEL" \ | |
| --port "$PORT" \ | |
| -c "$CTX" \ | |
| --n-gpu-layers "$NGL" \ | |
| -fa on \ | |
| "${EXTRA[@]}" \ | |
| -a "$ALIAS" \ | |
| --chat-template-kwargs "${CHAT_TEMPLATE_KWARGS:-{\"enable_thinking\": true}}" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment