Spaces:
Running
Running
Erik Scholz
commited on
Commit
·
4ec988a
1
Parent(s):
d6b6852
CUDA: compress mode option and default to size (llama/12029)
Browse filescuda 12.8 added the option to specify stronger compression for binaries, so we now default to "size".
- ggml/CMakeLists.txt +3 -0
- ggml/src/ggml-cuda/CMakeLists.txt +9 -0
ggml/CMakeLists.txt
CHANGED
|
@@ -155,6 +155,9 @@ option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM"
|
|
| 155 |
option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
|
| 156 |
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
| 157 |
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
option(GGML_HIP "ggml: use HIP" OFF)
|
| 160 |
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
|
|
|
| 155 |
option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
|
| 156 |
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
| 157 |
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
| 158 |
+
set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
|
| 159 |
+
"ggml: cuda link binary compression mode; requires cuda 12.8+")
|
| 160 |
+
set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")
|
| 161 |
|
| 162 |
option(GGML_HIP "ggml: use HIP" OFF)
|
| 163 |
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
ggml/src/ggml-cuda/CMakeLists.txt
CHANGED
|
@@ -102,6 +102,15 @@ if (CUDAToolkit_FOUND)
|
|
| 102 |
|
| 103 |
set(CUDA_FLAGS -use_fast_math)
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
if (GGML_FATAL_WARNINGS)
|
| 106 |
list(APPEND CUDA_FLAGS -Werror all-warnings)
|
| 107 |
endif()
|
|
|
|
| 102 |
|
| 103 |
set(CUDA_FLAGS -use_fast_math)
|
| 104 |
|
| 105 |
+
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
|
| 106 |
+
# Options are:
|
| 107 |
+
# - none (not recommended)
|
| 108 |
+
# - speed (nvcc's default)
|
| 109 |
+
# - balance
|
| 110 |
+
# - size
|
| 111 |
+
list(APPEND CUDA_FLAGS -compress-mode=${GGML_CUDA_COMPRESSION_MODE})
|
| 112 |
+
endif()
|
| 113 |
+
|
| 114 |
if (GGML_FATAL_WARNINGS)
|
| 115 |
list(APPEND CUDA_FLAGS -Werror all-warnings)
|
| 116 |
endif()
|