354 lines
8.5 KiB
JSON
354 lines
8.5 KiB
JSON
{
|
|
"Nemotron Cascade Q8 160k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "turbo3",
|
|
"cacheTypeV": "turbo3",
|
|
"temperature": 0.6,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 160000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/Nemotron-Cascade-2-30B-A3B.Q8_0.gguf",
|
|
"chatTemplateKwargs": "{\"enable_thinking\": false}",
|
|
"ngl": -1,
|
|
"fit": false,
|
|
"reasoning": false
|
|
},
|
|
"QwenCoderNext-160k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "turbo3",
|
|
"cacheTypeV": "turbo3",
|
|
"temperature": 0.6,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 160000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q2_K_XL.gguf",
|
|
"chatTemplateKwargs": "{\"enable_thinking\": false}",
|
|
"ngl": 999,
|
|
"fit": false,
|
|
"reasoning": false
|
|
},
|
|
"Nemotron Cascade 180k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "turbo3",
|
|
"cacheTypeV": "turbo3",
|
|
"temperature": 0.6,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 180000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/Nemotron-Cascade-2-30B-A3B.Q5_K_M.gguf",
|
|
"chatTemplateKwargs": "{\"enable_thinking\": false}",
|
|
"ngl": 999,
|
|
"fit": false,
|
|
"reasoning": false
|
|
},
|
|
"gpt-oss-20b-160k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "bf16",
|
|
"cacheTypeV": "bf16",
|
|
"temperature": 0.6,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 160000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/gpt-oss-20b-F16.gguf",
|
|
"chatTemplateKwargs": "{\"enable_thinking\": false}",
|
|
"ngl": 999,
|
|
"fit": false,
|
|
"reasoning": false
|
|
},
|
|
"gpt-oss-120b-F16-16k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 1.0,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 131072,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/gpt-oss-120b-F16.gguf",
|
|
"chatTemplateKwargs": "{\"enable_thinking\": true}",
|
|
"ngl": -1,
|
|
"fit": true,
|
|
"reasoning": false
|
|
},
|
|
"GLM-Flash-UD-Q6KXL-180k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "turbo3",
|
|
"cacheTypeV": "turbo3",
|
|
"temperature": 0.6,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 180000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/GLM-4.7-Flash-UD-Q6_K_XL.gguf",
|
|
"chatTemplateKwargs": "{\"enable_thinking\": false}",
|
|
"ngl": -1,
|
|
"fit": true,
|
|
"reasoning": false
|
|
},
|
|
"Qwen3-Coder-Next-UD-Q3KXL-160k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 4096,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 1.0,
|
|
"topP": 0.95,
|
|
"topK": 40,
|
|
"minP": 0.01,
|
|
"ctxSize": 160000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q3_K_XL.gguf",
|
|
"chatTemplateKwargs": "",
|
|
"ngl": -1,
|
|
"fit": true,
|
|
"reasoning": false
|
|
},
|
|
"Qwen3-Coder-Next-UD-Q4KXL-160k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 1.0,
|
|
"topP": 0.95,
|
|
"topK": 40,
|
|
"minP": 0.01,
|
|
"ctxSize": 160000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q4_K_XL.gguf",
|
|
"chatTemplateKwargs": "",
|
|
"ngl": -1,
|
|
"fit": true,
|
|
"reasoning": false
|
|
},
|
|
"gemma-4-31B-it-UD-Q6KXL-54k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 0.9,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 54000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/gemma-4-31B-it-UD-Q6_K_XL.gguf",
|
|
"chatTemplateKwargs": "",
|
|
"ngl": 99,
|
|
"fit": false,
|
|
"reasoning": true
|
|
},
|
|
"gemma-4-26B-A4B-it-UD-Q8KXL-180k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 0.9,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 180000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/gemma-4-26B-A4B-it-UD-Q8_K_XL.gguf",
|
|
"chatTemplateKwargs": "",
|
|
"ngl": 99,
|
|
"fit": false,
|
|
"reasoning": true
|
|
},
|
|
"gemma-4-31B-it-Q6K-112k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 0.8,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 112000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/gemma-4-31B-it-Q6_K.gguf",
|
|
"chatTemplateKwargs": "{\"enable_thinking\": true}",
|
|
"ngl": 99,
|
|
"fit": false,
|
|
"reasoning": true
|
|
},
|
|
"Exaone4": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 2048,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 0.9,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 131072,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/EXAONE-4.0-32B-GGUF-Q6_K.gguf",
|
|
"chatTemplateKwargs": "",
|
|
"ngl": 99,
|
|
"fit": false,
|
|
"reasoning": true
|
|
},
|
|
"Qwen3.6-35B-A3B-UD-Q6_K_XL-FIT": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 4096,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 0.9,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 180000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/Qwen3.6-35B-A3B-UD-Q6_K_XL.gguf",
|
|
"chatTemplateKwargs": "",
|
|
"ngl": 99,
|
|
"fit": false,
|
|
"reasoning": true
|
|
},
|
|
"Qwen3.6-35B-A3B-UD-Q5_K_XL-FULL": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 4096,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 0.7,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 260000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/Qwen3.6-35B-A3B-UD-Q5_K_XL.gguf",
|
|
"chatTemplateKwargs": "",
|
|
"ngl": 99,
|
|
"fit": false,
|
|
"reasoning": true
|
|
},
|
|
"Qwen3.6-27B-UD-Q6_K_XL-FULL": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 4096,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 0.6,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 180000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/Qwen3.6-27B-UD-Q6_K_XL.gguf",
|
|
"chatTemplateKwargs": "{\"preserve_thinking\":true,\"enable_thinking\":true}",
|
|
"ngl": 99,
|
|
"fit": false,
|
|
"reasoning": true
|
|
},
|
|
"Qwen3.6-27B-UD-Q6_K_XL-FULL-no-thinking-180k": {
|
|
"host": "0.0.0.0",
|
|
"port": 3080,
|
|
"parallel": 1,
|
|
"batchSize": 8192,
|
|
"threads": 99,
|
|
"flashAttention": true,
|
|
"kvUnified": true,
|
|
"cacheTypeK": "q8_0",
|
|
"cacheTypeV": "q8_0",
|
|
"temperature": 0.6,
|
|
"topP": 0.95,
|
|
"topK": 20,
|
|
"minP": 0.0,
|
|
"ctxSize": 180000,
|
|
"enableThinking": false,
|
|
"modelPath": "/home/kamma/models/Qwen3.6-27B-UD-Q6_K_XL.gguf",
|
|
"chatTemplateKwargs": "",
|
|
"ngl": 99,
|
|
"fit": false,
|
|
"reasoning": false
|
|
}
|
|
} |