Spaces:
Running
Running
Commit
·
53dd8ad
1
Parent(s):
ecb4322
vulkan: optimize iq1 coopmat2 dequant functions (llama/12427)
Browse files
ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp
CHANGED
|
@@ -311,8 +311,8 @@ float16_t dequantFuncIQ1_S(const in decodeBufIQ1_S bl, const in uint blockCoords
|
|
| 311 |
const float16_t d = bl.block.d;
|
| 312 |
const uint idx = coordInBlock[1];
|
| 313 |
|
| 314 |
-
const uint ib32 = idx
|
| 315 |
-
const uint ib8 = idx
|
| 316 |
|
| 317 |
const uint qh = bl.block.qh[ib32];
|
| 318 |
const uint qs = bl.block.qs[ib8];
|
|
@@ -330,14 +330,20 @@ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ1
|
|
| 330 |
block_iq1_m block;
|
| 331 |
};
|
| 332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
float16_t dequantFuncIQ1_M(const in decodeBufIQ1_M bl, const in uint blockCoords[2], const in uint coordInBlock[2])
|
| 334 |
{
|
| 335 |
-
|
| 336 |
-
const float16_t d = uint16BitsToHalf(scales.x | (scales.y << 4) | (scales.z << 8) | (scales.w << 12));
|
| 337 |
const uint idx = coordInBlock[1];
|
| 338 |
|
| 339 |
-
|
| 340 |
-
const
|
|
|
|
|
|
|
|
|
|
| 341 |
const int i8 = int(idx % 8);
|
| 342 |
const uint sc = bl.block.scales[ib8 / 8];
|
| 343 |
const uint qs = bl.block.qs[ib8];
|
|
|
|
| 311 |
const float16_t d = bl.block.d;
|
| 312 |
const uint idx = coordInBlock[1];
|
| 313 |
|
| 314 |
+
const uint ib32 = (idx & 0xE0) >> 5;
|
| 315 |
+
const uint ib8 = (idx & 0xF8) >> 3;
|
| 316 |
|
| 317 |
const uint qh = bl.block.qh[ib32];
|
| 318 |
const uint qs = bl.block.qs[ib8];
|
|
|
|
| 330 |
block_iq1_m block;
|
| 331 |
};
|
| 332 |
|
| 333 |
+
layout(buffer_reference, std430, buffer_reference_align = 8) buffer decodeBufIQ1_M_packed64 {
|
| 334 |
+
block_iq1_m_packed64 block;
|
| 335 |
+
};
|
| 336 |
+
|
| 337 |
float16_t dequantFuncIQ1_M(const in decodeBufIQ1_M bl, const in uint blockCoords[2], const in uint coordInBlock[2])
|
| 338 |
{
|
| 339 |
+
decodeBufIQ1_M_packed64 bl64 = decodeBufIQ1_M_packed64(bl);
|
|
|
|
| 340 |
const uint idx = coordInBlock[1];
|
| 341 |
|
| 342 |
+
uvec2 scales = unpack32(bl64.block.scales);
|
| 343 |
+
const float16_t d = uint16BitsToHalf(uint16_t(((scales.x & 0xF000) >> 12) | ((scales.x & 0xF0000000) >> 24) | ((scales.y & 0xF000) >> 4) | ((scales.y & 0xF0000000) >> 16)));
|
| 344 |
+
|
| 345 |
+
const uint ib8 = (idx & 0xF8) >> 3;
|
| 346 |
+
const uint ib16 = (idx & 0xF0) >> 4;
|
| 347 |
const int i8 = int(idx % 8);
|
| 348 |
const uint sc = bl.block.scales[ib8 / 8];
|
| 349 |
const uint qs = bl.block.qs[ib8];
|
ggml/src/ggml-vulkan/vulkan-shaders/types.comp
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
#if !defined(GGML_TYPES_COMP)
|
| 3 |
#define GGML_TYPES_COMP
|
| 4 |
|
|
|
|
| 5 |
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
| 6 |
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
| 7 |
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
|
@@ -312,6 +313,12 @@ struct block_iq1_m {
|
|
| 312 |
uint16_t scales[QUANT_K_IQ1_M/64];
|
| 313 |
};
|
| 314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
#if defined(DATA_A_IQ1_S)
|
| 316 |
#define QUANT_K QUANT_K_IQ1_S
|
| 317 |
#define QUANT_R QUANT_R_IQ1_S
|
|
|
|
| 2 |
#if !defined(GGML_TYPES_COMP)
|
| 3 |
#define GGML_TYPES_COMP
|
| 4 |
|
| 5 |
+
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
| 6 |
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
| 7 |
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
| 8 |
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
|
|
|
| 313 |
uint16_t scales[QUANT_K_IQ1_M/64];
|
| 314 |
};
|
| 315 |
|
| 316 |
+
struct block_iq1_m_packed64 {
|
| 317 |
+
uint64_t qs[QUANT_K_IQ1_M/8/8];
|
| 318 |
+
uint64_t qh[QUANT_K_IQ1_M/16/8];
|
| 319 |
+
uint64_t scales;
|
| 320 |
+
};
|
| 321 |
+
|
| 322 |
#if defined(DATA_A_IQ1_S)
|
| 323 |
#define QUANT_K QUANT_K_IQ1_S
|
| 324 |
#define QUANT_R QUANT_R_IQ1_S
|