llama-runner/profiles.json

{
  "Nemotron Cascade Q8 160k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "turbo3",
    "cacheTypeV": "turbo3",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 160000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Nemotron-Cascade-2-30B-A3B.Q8_0.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": false}",
    "ngl": -1,
    "fit": false,
    "reasoning": false
  },
  "Qwen3.5-q6k-180k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "q8_0",
    "cacheTypeV": "q8_0",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 180000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Qwen3.5-35B-A3B-Q6_K.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": false}",
    "ngl": 999,
    "fit": true,
    "reasoning": false
  },
  "QwenCoderNext-160k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "turbo3",
    "cacheTypeV": "turbo3",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 160000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q2_K_XL.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": false}",
    "ngl": 999,
    "fit": false,
    "reasoning": false
  },
  "Nemotron Cascade 180k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "turbo3",
    "cacheTypeV": "turbo3",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 180000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Nemotron-Cascade-2-30B-A3B.Q5_K_M.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": false}",
    "ngl": 999,
    "fit": false,
    "reasoning": false
  },
  "Qwen3.5 q6xl 160k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "turbo3",
    "cacheTypeV": "turbo3",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 160000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Qwen3.5-35B-A3B-UD-Q6_K_XL.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": false}",
    "ngl": -1,
    "fit": true,
    "reasoning": false
  },
  "gpt-oss-20b-160k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "bf16",
    "cacheTypeV": "bf16",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 160000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/gpt-oss-20b-F16.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": false}",
    "ngl": 999,
    "fit": false,
    "reasoning": false
  },
  "gpt-oss-120b-F16-16k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "turbo3",
    "cacheTypeV": "turbo3",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 131070,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/gpt-oss-120b-F16.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": true}",
    "ngl": -1,
    "fit": true,
    "reasoning": false
  },
  "GLM-Flash-UD-Q6KXL-180k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "turbo3",
    "cacheTypeV": "turbo3",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 180000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/GLM-4.7-Flash-UD-Q6_K_XL.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": false}",
    "ngl": -1,
    "fit": true,
    "reasoning": false
  },
  "Qwen3-Coder-Next-UD-Q2KXL-160k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "turbo3",
    "cacheTypeV": "turbo3",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 160000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q2_K_XL.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": true}",
    "ngl": -1,
    "fit": true,
    "reasoning": false
  },
  "Qwen3-Coder-Next-UD-Q3KXL-160k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "turbo3",
    "cacheTypeV": "turbo3",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 160000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q3_K_XL.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": true}",
    "ngl": -1,
    "fit": true,
    "reasoning": false
  },
  "Qwen3-Coder-Next-UD-Q4KXL-160k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "turbo3",
    "cacheTypeV": "turbo3",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 160000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Qwen3-Coder-Next-UD-Q4_K_XL.gguf",
    "chatTemplateKwargs": "",
    "ngl": -1,
    "fit": true,
    "reasoning": true
  },
  "Qwen35-35B-A3B-Q6K-turbo4-256k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "q8_0",
    "cacheTypeV": "q8_0",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 256000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Qwen3.5-35B-A3B-Q6_K.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": true}",
    "ngl": 999,
    "fit": true,
    "reasoning": true
  },
  "Qwen35-35B-A3B-Q6K-Q8-256k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "q8_0",
    "cacheTypeV": "q8_0",
    "temperature": 0.6,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 256000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/Qwen3.5-35B-A3B-Q6_K.gguf",
    "chatTemplateKwargs": "",
    "ngl": 999,
    "fit": true,
    "reasoning": true
  },
  "gemma-4-31B-it-UD-Q6KXL-54k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "q8_0",
    "cacheTypeV": "q8_0",
    "temperature": 0.9,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 54000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/gemma-4-31B-it-UD-Q6_K_XL.gguf",
    "chatTemplateKwargs": "",
    "ngl": 99,
    "fit": false,
    "reasoning": true
  },
  "gemma-4-26B-A4B-it-UD-Q8KXL-180k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "q8_0",
    "cacheTypeV": "q8_0",
    "temperature": 0.9,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 180000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/gemma-4-26B-A4B-it-UD-Q8_K_XL.gguf",
    "chatTemplateKwargs": "",
    "ngl": 99,
    "fit": false,
    "reasoning": true
  },
  "gemma-4-31B-it-Q6K-112k": {
    "host": "0.0.0.0",
    "port": 3080,
    "parallel": 1,
    "batchSize": 2048,
    "threads": 99,
    "flashAttention": true,
    "kvUnified": true,
    "cacheTypeK": "q8_0",
    "cacheTypeV": "q8_0",
    "temperature": 0.8,
    "topP": 0.95,
    "topK": 20,
    "minP": 0.0,
    "ctxSize": 112000,
    "enableThinking": false,
    "modelPath": "/home/kamma/models/gemma-4-31B-it-Q6_K.gguf",
    "chatTemplateKwargs": "{\"enable_thinking\": true}",
    "ngl": 99,
    "fit": false,
    "reasoning": true
  }
}