Spaces:
Running
Running
cmake : do not hide GGML options + rename option (llama/9465)
Browse files* cmake : do not hide GGML options
ggml-ci
* build : rename flag GGML_CUDA_USE_GRAPHS -> GGML_CUDA_GRAPHS
for consistency
ggml-ci
- ggml/CMakeLists.txt +11 -2
- ggml/src/CMakeLists.txt +1 -1
ggml/CMakeLists.txt
CHANGED
|
@@ -56,6 +56,15 @@ else()
|
|
| 56 |
set(GGML_NATIVE_DEFAULT ON)
|
| 57 |
endif()
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
# general
|
| 60 |
option(GGML_STATIC "ggml: static link libraries" OFF)
|
| 61 |
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
|
|
@@ -110,7 +119,7 @@ option(GGML_ACCELERATE "ggml: enable Accelerate framework"
|
|
| 110 |
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
| 111 |
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
| 112 |
"ggml: BLAS library vendor")
|
| 113 |
-
option(GGML_LLAMAFILE "ggml: use LLAMAFILE"
|
| 114 |
|
| 115 |
option(GGML_CUDA "ggml: use CUDA" OFF)
|
| 116 |
option(GGML_MUSA "ggml: use MUSA" OFF)
|
|
@@ -127,7 +136,7 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
|
| 127 |
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
| 128 |
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
| 129 |
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
| 130 |
-
option(
|
| 131 |
|
| 132 |
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
|
| 133 |
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
|
|
|
| 56 |
set(GGML_NATIVE_DEFAULT ON)
|
| 57 |
endif()
|
| 58 |
|
| 59 |
+
# defaults
|
| 60 |
+
if (NOT GGML_LLAMAFILE_DEFAULT)
|
| 61 |
+
set(GGML_LLAMAFILE_DEFAULT OFF)
|
| 62 |
+
endif()
|
| 63 |
+
|
| 64 |
+
if (NOT GGML_CUDA_GRAPHS_DEFAULT)
|
| 65 |
+
set(GGML_CUDA_GRAPHS_DEFAULT OFF)
|
| 66 |
+
endif()
|
| 67 |
+
|
| 68 |
# general
|
| 69 |
option(GGML_STATIC "ggml: static link libraries" OFF)
|
| 70 |
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
|
|
|
|
| 119 |
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
| 120 |
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
| 121 |
"ggml: BLAS library vendor")
|
| 122 |
+
option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
|
| 123 |
|
| 124 |
option(GGML_CUDA "ggml: use CUDA" OFF)
|
| 125 |
option(GGML_MUSA "ggml: use MUSA" OFF)
|
|
|
|
| 136 |
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
| 137 |
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
| 138 |
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
| 139 |
+
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
| 140 |
|
| 141 |
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
|
| 142 |
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
ggml/src/CMakeLists.txt
CHANGED
|
@@ -329,7 +329,7 @@ if (GGML_CUDA)
|
|
| 329 |
add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
|
| 330 |
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
|
| 331 |
|
| 332 |
-
if (
|
| 333 |
add_compile_definitions(GGML_CUDA_USE_GRAPHS)
|
| 334 |
endif()
|
| 335 |
|
|
|
|
| 329 |
add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
|
| 330 |
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
|
| 331 |
|
| 332 |
+
if (GGML_CUDA_GRAPHS)
|
| 333 |
add_compile_definitions(GGML_CUDA_USE_GRAPHS)
|
| 334 |
endif()
|
| 335 |
|