added support for q5_0 cache quant
This commit is contained in:
parent
0acebfd315
commit
d0b693d1a9
@ -814,8 +814,8 @@ public class Main extends JFrame {
|
||||
|
||||
gbc.gridx = 1;
|
||||
gbc.weightx = 1.0;
|
||||
cacheTypeKComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "turbo3", "turbo4" });
|
||||
cacheTypeKComboBox.setSelectedIndex(0);
|
||||
cacheTypeKComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "q5_0", "turbo3", "turbo4" });
|
||||
cacheTypeKComboBox.setSelectedIndex(4);
|
||||
increaseComboBoxHeight(cacheTypeKComboBox);
|
||||
cacheTypeKComboBox.addActionListener(e -> updateCommandPreview());
|
||||
panel.add(cacheTypeKComboBox, gbc);
|
||||
@ -827,8 +827,8 @@ public class Main extends JFrame {
|
||||
|
||||
gbc.gridx = 1;
|
||||
gbc.weightx = 1.0;
|
||||
cacheTypeVComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "turbo3", "turbo4" });
|
||||
cacheTypeVComboBox.setSelectedIndex(0);
|
||||
cacheTypeVComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "q5_0", "turbo3", "turbo4" });
|
||||
cacheTypeVComboBox.setSelectedIndex(4);
|
||||
increaseComboBoxHeight(cacheTypeVComboBox);
|
||||
cacheTypeVComboBox.addActionListener(e -> updateCommandPreview());
|
||||
panel.add(cacheTypeVComboBox, gbc);
|
||||
|
||||
@ -39,8 +39,8 @@ public class ModelConfig implements Serializable {
|
||||
this.threads = 99;
|
||||
this.flashAttention = true;
|
||||
this.kvUnified = true;
|
||||
this.cacheTypeK = "bf16";
|
||||
this.cacheTypeV = "bf16";
|
||||
this.cacheTypeK = "q8_0";
|
||||
this.cacheTypeV = "q8_0";
|
||||
this.temperature = 0.6;
|
||||
this.topP = 0.95;
|
||||
this.topK = 20;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user