Created
April 10, 2026 20:55
-
-
Save loktar00/8848696723ee6ee5fd480dc22fa831a6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # llama.cpp | |
| "GLM-5.1-Q2": | |
| cmd: | | |
| ${latest-llama} | |
| --model ${models_dir}/glm-5.1/GLM-5.1-UD-Q2_K_XL-00001-of-00007.gguf | |
| --rpc 192.168.1.179:50052,192.168.1.179:50053 | |
| --fit on | |
| # --n-gpu-layers auto | |
| # -np 1 \ | |
| # --no-mmap \ | |
| --batch-size 4096 | |
| --ubatch-size 1024 | |
| --ctx-size 120000 | |
| --temp 1.0 | |
| --top-p 0.95 | |
| --flash-attn on | |
| --no-warmup | |
| env: | |
| - "CUDA_VISIBLE_DEVICES=0,1,2,3,4,5" | |
| proxy: http://127.0.0.1:8999 | |
| checkEndpoint: "/v1/models" | |
| ttl: 3600 | |
| useModelName: "GLM 5.1 Q2_K_M" | |
| # ik_llama | |
| "GLM-5.1-Q2_ik": | |
| cmd: | | |
| ${latest-ik_llama} \ | |
| --model ${models_dir}/glm-5.1/GLM-5.1-UD-Q2_K_XL-00001-of-00007.gguf \ | |
| --rpc 192.168.1.179:60052,192.168.1.179:60053 \ | |
| --device CUDA0,RPC0[192.168.1.179:60052],RPC0[192.168.1.179:60053],CUDA1,CUDA2,CUDA3,CUDA4,CUDA5 \ | |
| --split-mode layer \ | |
| --n-gpu-layers 80 \ | |
| --ctx-size 120000 \ | |
| --flash-attn on \ | |
| -mla 3 \ | |
| -amb 512 \ | |
| -ctk q8_0 \ | |
| -ctv q8_0 \ | |
| -mea 0 \ | |
| --threads 120 | |
| -ot "blk\.(3|4|5|6|43|44)\.ffn_.*=CUDA0" \ | |
| -ot "blk\.(7|8|9|10|11|12|13|14)\.ffn_.*=RPC0[192.168.1.179:60052]" \ | |
| -ot "blk\.(15|16|17|18|19|20|21|22)\.ffn_.*=RPC0[192.168.1.179:60053]" \ | |
| -ot "blk\.(23|24|25|26|45|46)\.ffn_.*=CUDA1" \ | |
| -ot "blk\.(27|28|29|30|47|48)\.ffn_.*=CUDA2" \ | |
| -ot "blk\.(31|32|33|34|49|50)\.ffn_.*=CUDA3" \ | |
| -ot "blk\.(35|36|37|38|51|52)\.ffn_.*=CUDA4" \ | |
| -ot "blk\.(39|40|41|42|53|54)\.ffn_.*=CUDA5" \ | |
| -ot exps=CPU \ | |
| --temp 1.0 \ | |
| --top-p 0.95 \ | |
| --no-warmup | |
| env: | |
| - "CUDA_VISIBLE_DEVICES=0,1,2,3,4,5" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment