matteogeniaccio matteo serva JohannesGaessler commited on
Commit
686bb18
·
1 Parent(s): 0019ddb

ggml-cuda: Adding support for unified memory (llama/8035)

Browse files

* Adding support for unified memory

* adding again the documentation about unified memory

* refactoring: Moved the unified memory code in the correct location.

* Fixed compilation error when using hipblas

* cleaning up the documentation

* Updating the documentation

Co-authored-by: Johannes Gäßler <[email protected]>

* adding one more case where the PR should not be enabled

---------

Co-authored-by: matteo serva <[email protected]>
Co-authored-by: Johannes Gäßler <[email protected]>

Files changed (1) hide show
  1. ggml/src/ggml-cuda.cu +15 -0
ggml/src/ggml-cuda.cu CHANGED
@@ -130,7 +130,22 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
130
  }
131
  return res;
132
  #else
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  return cudaMalloc(ptr, size);
 
 
134
  #endif
135
  }
136
 
 
130
  }
131
  return res;
132
  #else
133
+
134
+ #if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
135
+ cudaError_t err;
136
+ if (getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr)
137
+ {
138
+ err = cudaMallocManaged(ptr, size);
139
+ }
140
+ else
141
+ {
142
+ err = cudaMalloc(ptr, size);
143
+ }
144
+ return err;
145
+ #else
146
  return cudaMalloc(ptr, size);
147
+ #endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
148
+
149
  #endif
150
  }
151