llama-runner/profiles.json
2026-03-26 18:33:24 +01:00

233 lines
5.6 KiB
JSON

{
"Nemotron Cascade Q8 160k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 160000,
"enableThinking": false,
"modelPath": "/home/kamma/models/Nemotron-Cascade-2-30B-A3B.Q8_0.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": false}",
"ngl": -1,
"fit": false,
"reasoning": false
},
"Qwen3.5-q6k-180k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 180000,
"enableThinking": false,
"modelPath": "/home/kamma/models/Qwen3.5-35B-A3B-Q6_K.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": false}",
"ngl": 999,
"fit": false,
"reasoning": false
},
"QwenCoderNext-160k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 160000,
"enableThinking": false,
"modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q2_K_XL.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": false}",
"ngl": 999,
"fit": false,
"reasoning": false
},
"Nemotron Cascade 180k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 180000,
"enableThinking": false,
"modelPath": "/home/kamma/models/Nemotron-Cascade-2-30B-A3B.Q5_K_M.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": false}",
"ngl": 999,
"fit": false,
"reasoning": false
},
"Qwen3.5 q6xl 160k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 160000,
"enableThinking": false,
"modelPath": "/home/kamma/models/Qwen3.5-35B-A3B-UD-Q6_K_XL.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": false}",
"ngl": -1,
"fit": true,
"reasoning": false
},
"gpt-oss-20b-160k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "bf16",
"cacheTypeV": "bf16",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 160000,
"enableThinking": false,
"modelPath": "/home/kamma/models/gpt-oss-20b-F16.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": false}",
"ngl": 999,
"fit": false,
"reasoning": false
},
"gpt-oss-120b-F16-16k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 131070,
"enableThinking": false,
"modelPath": "/home/kamma/models/gpt-oss-120b-F16.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": true}",
"ngl": -1,
"fit": true,
"reasoning": false
},
"GLM-Flash-UD-Q6KXL-180k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 180000,
"enableThinking": false,
"modelPath": "/home/kamma/models/GLM-4.7-Flash-UD-Q6_K_XL.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": false}",
"ngl": -1,
"fit": true,
"reasoning": false
},
"Qwen3-Coder-Next-UD-Q2KXL-160k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 160000,
"enableThinking": false,
"modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q2_K_XL.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": true}",
"ngl": -1,
"fit": true,
"reasoning": false
},
"Qwen3-Coder-Next-UD-Q3KXL-160k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 160000,
"enableThinking": false,
"modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q3_K_XL.gguf",
"chatTemplateKwargs": "{\"enable_thinking\": true}",
"ngl": -1,
"fit": true,
"reasoning": false
},
"Qwen3-Coder-Next-UD-Q4KXL-160k": {
"host": "0.0.0.0",
"port": 3080,
"parallel": 1,
"threads": 99,
"flashAttention": true,
"kvUnified": true,
"cacheTypeK": "q8_0",
"cacheTypeV": "q8_0",
"temperature": 0.6,
"topP": 0.95,
"topK": 20,
"minP": 0.0,
"ctxSize": 160000,
"enableThinking": false,
"modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q4_K_XL.gguf",
"chatTemplateKwargs": "",
"ngl": -1,
"fit": true,
"reasoning": true
}
}