added support for q5_0 cache quant
This commit is contained in:
parent
0acebfd315
commit
d0b693d1a9
@ -814,8 +814,8 @@ public class Main extends JFrame {
|
|||||||
|
|
||||||
gbc.gridx = 1;
|
gbc.gridx = 1;
|
||||||
gbc.weightx = 1.0;
|
gbc.weightx = 1.0;
|
||||||
cacheTypeKComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "turbo3", "turbo4" });
|
cacheTypeKComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "q5_0", "turbo3", "turbo4" });
|
||||||
cacheTypeKComboBox.setSelectedIndex(0);
|
cacheTypeKComboBox.setSelectedIndex(4);
|
||||||
increaseComboBoxHeight(cacheTypeKComboBox);
|
increaseComboBoxHeight(cacheTypeKComboBox);
|
||||||
cacheTypeKComboBox.addActionListener(e -> updateCommandPreview());
|
cacheTypeKComboBox.addActionListener(e -> updateCommandPreview());
|
||||||
panel.add(cacheTypeKComboBox, gbc);
|
panel.add(cacheTypeKComboBox, gbc);
|
||||||
@ -827,8 +827,8 @@ public class Main extends JFrame {
|
|||||||
|
|
||||||
gbc.gridx = 1;
|
gbc.gridx = 1;
|
||||||
gbc.weightx = 1.0;
|
gbc.weightx = 1.0;
|
||||||
cacheTypeVComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "turbo3", "turbo4" });
|
cacheTypeVComboBox = new JComboBox<>(new String[] { "bf16", "f16", "f32", "f8", "q8_0", "q5_0", "turbo3", "turbo4" });
|
||||||
cacheTypeVComboBox.setSelectedIndex(0);
|
cacheTypeVComboBox.setSelectedIndex(4);
|
||||||
increaseComboBoxHeight(cacheTypeVComboBox);
|
increaseComboBoxHeight(cacheTypeVComboBox);
|
||||||
cacheTypeVComboBox.addActionListener(e -> updateCommandPreview());
|
cacheTypeVComboBox.addActionListener(e -> updateCommandPreview());
|
||||||
panel.add(cacheTypeVComboBox, gbc);
|
panel.add(cacheTypeVComboBox, gbc);
|
||||||
|
|||||||
@ -39,8 +39,8 @@ public class ModelConfig implements Serializable {
|
|||||||
this.threads = 99;
|
this.threads = 99;
|
||||||
this.flashAttention = true;
|
this.flashAttention = true;
|
||||||
this.kvUnified = true;
|
this.kvUnified = true;
|
||||||
this.cacheTypeK = "bf16";
|
this.cacheTypeK = "q8_0";
|
||||||
this.cacheTypeV = "bf16";
|
this.cacheTypeV = "q8_0";
|
||||||
this.temperature = 0.6;
|
this.temperature = 0.6;
|
||||||
this.topP = 0.95;
|
this.topP = 0.95;
|
||||||
this.topK = 20;
|
this.topK = 20;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user