Spaces:
Running
Running
ggml : fix q2_k bpw in comments (ggml/680)
Browse files- ggml-quants.h +1 -1
ggml-quants.h
CHANGED
|
@@ -70,7 +70,7 @@ static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block s
|
|
| 70 |
// 2-bit quantization
|
| 71 |
// weight is represented as x = a * q + b
|
| 72 |
// 16 blocks of 16 elements each
|
| 73 |
-
// Effectively 2.
|
| 74 |
typedef struct {
|
| 75 |
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
| 76 |
uint8_t qs[QK_K/4]; // quants
|
|
|
|
| 70 |
// 2-bit quantization
|
| 71 |
// weight is represented as x = a * q + b
|
| 72 |
// 16 blocks of 16 elements each
|
| 73 |
+
// Effectively 2.625 bits per weight
|
| 74 |
typedef struct {
|
| 75 |
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
| 76 |
uint8_t qs[QK_K/4]; // quants
|