whisper.cpp / ggml /src /ggml-cuda /template-instances /fattn-mma-f16-instance-ncols1_1-ncols2_8.cu
JohannesGaessler's picture
CUDA: optimize FA for GQA + large batches (llama/12014)
6662d54
raw
history blame
330 Bytes
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-mma-f16.cuh"
DECL_FATTN_MMA_F16_CASE(64, 1, 8);
DECL_FATTN_MMA_F16_CASE(80, 1, 8);
DECL_FATTN_MMA_F16_CASE(96, 1, 8);
DECL_FATTN_MMA_F16_CASE(112, 1, 8);
DECL_FATTN_MMA_F16_CASE(128, 1, 8);
DECL_FATTN_MMA_F16_CASE(256, 1, 8);