ggerganov commited on
Commit
8c32d36
·
1 Parent(s): f2986f6

cmake : do not hide GGML options + rename option (llama/9465)

Browse files

* cmake : do not hide GGML options

ggml-ci

* build : rename flag GGML_CUDA_USE_GRAPHS -> GGML_CUDA_GRAPHS

for consistency

ggml-ci

Files changed (2) hide show
  1. ggml/CMakeLists.txt +11 -2
  2. ggml/src/CMakeLists.txt +1 -1
ggml/CMakeLists.txt CHANGED
@@ -56,6 +56,15 @@ else()
56
  set(GGML_NATIVE_DEFAULT ON)
57
  endif()
58
 
 
 
 
 
 
 
 
 
 
59
  # general
60
  option(GGML_STATIC "ggml: static link libraries" OFF)
61
  option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
@@ -110,7 +119,7 @@ option(GGML_ACCELERATE "ggml: enable Accelerate framework"
110
  option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
111
  set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
112
  "ggml: BLAS library vendor")
113
- option(GGML_LLAMAFILE "ggml: use LLAMAFILE" OFF)
114
 
115
  option(GGML_CUDA "ggml: use CUDA" OFF)
116
  option(GGML_MUSA "ggml: use MUSA" OFF)
@@ -127,7 +136,7 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
127
  option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
128
  option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
129
  option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
130
- option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF)
131
 
132
  option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
133
  option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
 
56
  set(GGML_NATIVE_DEFAULT ON)
57
  endif()
58
 
59
+ # defaults
60
+ if (NOT GGML_LLAMAFILE_DEFAULT)
61
+ set(GGML_LLAMAFILE_DEFAULT OFF)
62
+ endif()
63
+
64
+ if (NOT GGML_CUDA_GRAPHS_DEFAULT)
65
+ set(GGML_CUDA_GRAPHS_DEFAULT OFF)
66
+ endif()
67
+
68
  # general
69
  option(GGML_STATIC "ggml: static link libraries" OFF)
70
  option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
 
119
  option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
120
  set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
121
  "ggml: BLAS library vendor")
122
+ option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
123
 
124
  option(GGML_CUDA "ggml: use CUDA" OFF)
125
  option(GGML_MUSA "ggml: use MUSA" OFF)
 
136
  option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
137
  option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
138
  option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
139
+ option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
140
 
141
  option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
142
  option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
ggml/src/CMakeLists.txt CHANGED
@@ -329,7 +329,7 @@ if (GGML_CUDA)
329
  add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
330
  add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
331
 
332
- if (GGML_CUDA_USE_GRAPHS)
333
  add_compile_definitions(GGML_CUDA_USE_GRAPHS)
334
  endif()
335
 
 
329
  add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
330
  add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
331
 
332
+ if (GGML_CUDA_GRAPHS)
333
  add_compile_definitions(GGML_CUDA_USE_GRAPHS)
334
  endif()
335