Paul Tsochantaris commited on
Commit
ea7167a
·
unverified ·
1 Parent(s): 80cfca4

metal : free metal objects (llama/5161)

Browse files

* Releasing MTLFunction references after Metal pipeline construction

* Keeping the `ggml_metal_kernel` structure

* Spacing fix

* Whitespace fix

Files changed (1) hide show
  1. ggml-metal.m +13 -20
ggml-metal.m CHANGED
@@ -24,10 +24,7 @@
24
 
25
  #define UNUSED(x) (void)(x)
26
 
27
- #define GGML_METAL_MAX_KERNELS 256
28
-
29
  struct ggml_metal_kernel {
30
- id<MTLFunction> function;
31
  id<MTLComputePipelineState> pipeline;
32
  };
33
 
@@ -159,11 +156,10 @@ struct ggml_metal_context {
159
 
160
  id<MTLDevice> device;
161
  id<MTLCommandQueue> queue;
162
- id<MTLLibrary> library;
163
 
164
  dispatch_queue_t d_queue;
165
 
166
- struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
167
 
168
  bool support_simdgroup_reduction;
169
  bool support_simdgroup_mm;
@@ -248,6 +244,8 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
248
  ctx->queue = [ctx->device newCommandQueue];
249
  ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
250
 
 
 
251
  // load library
252
  {
253
  NSBundle * bundle = nil;
@@ -262,7 +260,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
262
  // pre-compiled library found
263
  NSURL * libURL = [NSURL fileURLWithPath:libPath];
264
  GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
265
- ctx->library = [ctx->device newLibraryWithURL:libURL error:&error];
266
  if (error) {
267
  GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
268
  return NULL;
@@ -304,7 +302,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
304
 
305
  //[options setFastMathEnabled:false];
306
 
307
- ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
308
  if (error) {
309
  GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
310
  return NULL;
@@ -371,8 +369,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
371
  {
372
  NSError * error = nil;
373
 
374
- for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
375
- ctx->kernels[i].function = nil;
376
  ctx->kernels[i].pipeline = nil;
377
  }
378
 
@@ -384,10 +381,12 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
384
  #define GGML_METAL_ADD_KERNEL(e, name, supported) \
385
  if (supported) { \
386
  struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
387
- kernel->function = [ctx->library newFunctionWithName:@"kernel_"#name]; \
388
- kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:kernel->function error:&error]; \
 
389
  if (error) { \
390
  GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
 
391
  return NULL; \
392
  } \
393
  } else { \
@@ -516,23 +515,17 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
516
  GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
517
  }
518
 
 
519
  return ctx;
520
  }
521
 
522
  static void ggml_metal_free(struct ggml_metal_context * ctx) {
523
  GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
524
 
525
- for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
526
- if (ctx->kernels[i].pipeline) {
527
- [ctx->kernels[i].pipeline release];
528
- }
529
-
530
- if (ctx->kernels[i].function) {
531
- [ctx->kernels[i].function release];
532
- }
533
  }
534
 
535
- [ctx->library release];
536
  [ctx->queue release];
537
  [ctx->device release];
538
 
 
24
 
25
  #define UNUSED(x) (void)(x)
26
 
 
 
27
  struct ggml_metal_kernel {
 
28
  id<MTLComputePipelineState> pipeline;
29
  };
30
 
 
156
 
157
  id<MTLDevice> device;
158
  id<MTLCommandQueue> queue;
 
159
 
160
  dispatch_queue_t d_queue;
161
 
162
+ struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];
163
 
164
  bool support_simdgroup_reduction;
165
  bool support_simdgroup_mm;
 
244
  ctx->queue = [ctx->device newCommandQueue];
245
  ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
246
 
247
+ id<MTLLibrary> metal_library;
248
+
249
  // load library
250
  {
251
  NSBundle * bundle = nil;
 
260
  // pre-compiled library found
261
  NSURL * libURL = [NSURL fileURLWithPath:libPath];
262
  GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
263
+ metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
264
  if (error) {
265
  GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
266
  return NULL;
 
302
 
303
  //[options setFastMathEnabled:false];
304
 
305
+ metal_library = [ctx->device newLibraryWithSource:src options:options error:&error];
306
  if (error) {
307
  GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
308
  return NULL;
 
369
  {
370
  NSError * error = nil;
371
 
372
+ for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
 
373
  ctx->kernels[i].pipeline = nil;
374
  }
375
 
 
381
  #define GGML_METAL_ADD_KERNEL(e, name, supported) \
382
  if (supported) { \
383
  struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
384
+ id<MTLFunction> metal_function = [metal_library newFunctionWithName:@"kernel_"#name]; \
385
+ kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:metal_function error:&error]; \
386
+ [metal_function release]; \
387
  if (error) { \
388
  GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
389
+ [metal_library release]; \
390
  return NULL; \
391
  } \
392
  } else { \
 
515
  GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
516
  }
517
 
518
+ [metal_library release];
519
  return ctx;
520
  }
521
 
522
  static void ggml_metal_free(struct ggml_metal_context * ctx) {
523
  GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
524
 
525
+ for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
526
+ [ctx->kernels[i].pipeline release];
 
 
 
 
 
 
527
  }
528
 
 
529
  [ctx->queue release];
530
  [ctx->device release];
531