Paul Tsochantaris commited on
Commit
a3e87d3
·
unverified ·
1 Parent(s): ae08f31

metal : remove unused `n_buffers` and `buffers` (llama/5129)

Browse files
Files changed (1) hide show
  1. ggml-metal.m +16 -57
ggml-metal.m CHANGED
@@ -26,15 +26,6 @@
26
 
27
  #define GGML_METAL_MAX_KERNELS 256
28
 
29
- struct ggml_metal_buffer {
30
- const char * name;
31
-
32
- void * data;
33
- size_t size;
34
-
35
- id<MTLBuffer> metal;
36
- };
37
-
38
  struct ggml_metal_kernel {
39
  id<MTLFunction> function;
40
  id<MTLComputePipelineState> pipeline;
@@ -172,9 +163,6 @@ struct ggml_metal_context {
172
 
173
  dispatch_queue_t d_queue;
174
 
175
- int n_buffers;
176
- struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
177
-
178
  struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
179
 
180
  bool support_simdgroup_reduction;
@@ -242,24 +230,20 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
242
  // Show all the Metal device instances in the system
243
  NSArray * devices = MTLCopyAllDevices();
244
  for (id<MTLDevice> device in devices) {
245
- NSString * s = [device name];
246
- GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
247
  }
248
  [devices release]; // since it was created by a *Copy* C method
249
  #endif
250
 
251
  // Pick and show default Metal device
252
  id<MTLDevice> device = MTLCreateSystemDefaultDevice();
253
- NSString * s = [device name];
254
- GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
255
 
256
  // Configure context
257
  struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
258
  ctx->device = device;
259
  ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
260
  ctx->queue = [ctx->device newCommandQueue];
261
- ctx->n_buffers = 0;
262
-
263
  ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
264
 
265
  // load library
@@ -534,10 +518,6 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
534
  static void ggml_metal_free(struct ggml_metal_context * ctx) {
535
  GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
536
 
537
- for (int i = 0; i < ctx->n_buffers; ++i) {
538
- [ctx->buffers[i].metal release];
539
- }
540
-
541
  for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
542
  if (ctx->kernels[i].pipeline) {
543
  [ctx->kernels[i].pipeline release];
@@ -580,51 +560,30 @@ struct ggml_backend_metal_buffer_context {
580
  // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
581
  // Metal buffer based on the host memory pointer
582
  //
583
- static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
584
  //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
585
 
586
  const int64_t tsize = ggml_nbytes(t);
587
 
588
  ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
589
 
590
- // compatibility with ggml-backend
591
- if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) {
592
- struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
593
-
594
- // find the view that contains the tensor fully
595
- for (int i = 0; i < buf_ctx->n_buffers; ++i) {
596
- const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
597
-
598
- //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
599
- if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
600
- *offs = (size_t) ioffs;
601
-
602
- //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
603
-
604
- return buf_ctx->buffers[i].metal;
605
- }
606
- }
607
-
608
- GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
609
-
610
- return nil;
611
- }
612
 
613
  // find the view that contains the tensor fully
614
- for (int i = 0; i < ctx->n_buffers; ++i) {
615
- const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
616
 
617
- //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx->buffers[%d].size = %10ld, name = %s\n", ioffs, tsize, ioffs + tsize, i, ctx->buffers[i].size, ctx->buffers[i].name);
618
- if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
619
  *offs = (size_t) ioffs;
620
 
621
- //GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
622
 
623
- return ctx->buffers[i].metal;
624
  }
625
  }
626
 
627
- GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
628
 
629
  return nil;
630
  }
@@ -817,9 +776,9 @@ static bool ggml_metal_graph_compute(
817
  const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
818
  const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
819
 
820
- id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(ctx, src0, &offs_src0) : nil;
821
- id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil;
822
- id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(ctx, dst, &offs_dst) : nil;
823
 
824
  //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
825
  //if (src0) {
@@ -1601,7 +1560,7 @@ static bool ggml_metal_graph_compute(
1601
  struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
1602
 
1603
  size_t offs_src_cur = 0;
1604
- id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
1605
 
1606
  [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
1607
  }
@@ -1746,7 +1705,7 @@ static bool ggml_metal_graph_compute(
1746
  struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
1747
 
1748
  size_t offs_src_cur = 0;
1749
- id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
1750
 
1751
  [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
1752
  }
 
26
 
27
  #define GGML_METAL_MAX_KERNELS 256
28
 
 
 
 
 
 
 
 
 
 
29
  struct ggml_metal_kernel {
30
  id<MTLFunction> function;
31
  id<MTLComputePipelineState> pipeline;
 
163
 
164
  dispatch_queue_t d_queue;
165
 
 
 
 
166
  struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
167
 
168
  bool support_simdgroup_reduction;
 
230
  // Show all the Metal device instances in the system
231
  NSArray * devices = MTLCopyAllDevices();
232
  for (id<MTLDevice> device in devices) {
233
+ GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
 
234
  }
235
  [devices release]; // since it was created by a *Copy* C method
236
  #endif
237
 
238
  // Pick and show default Metal device
239
  id<MTLDevice> device = MTLCreateSystemDefaultDevice();
240
+ GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
 
241
 
242
  // Configure context
243
  struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
244
  ctx->device = device;
245
  ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
246
  ctx->queue = [ctx->device newCommandQueue];
 
 
247
  ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
248
 
249
  // load library
 
518
  static void ggml_metal_free(struct ggml_metal_context * ctx) {
519
  GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
520
 
 
 
 
 
521
  for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
522
  if (ctx->kernels[i].pipeline) {
523
  [ctx->kernels[i].pipeline release];
 
560
  // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
561
  // Metal buffer based on the host memory pointer
562
  //
563
+ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) {
564
  //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
565
 
566
  const int64_t tsize = ggml_nbytes(t);
567
 
568
  ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
569
 
570
+ struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
 
572
  // find the view that contains the tensor fully
573
+ for (int i = 0; i < buf_ctx->n_buffers; ++i) {
574
+ const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
575
 
576
+ //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
577
+ if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
578
  *offs = (size_t) ioffs;
579
 
580
+ //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
581
 
582
+ return buf_ctx->buffers[i].metal;
583
  }
584
  }
585
 
586
+ GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
587
 
588
  return nil;
589
  }
 
776
  const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
777
  const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
778
 
779
+ id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
780
+ id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
781
+ id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil;
782
 
783
  //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
784
  //if (src0) {
 
1560
  struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
1561
 
1562
  size_t offs_src_cur = 0;
1563
+ id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
1564
 
1565
  [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
1566
  }
 
1705
  struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
1706
 
1707
  size_t offs_src_cur = 0;
1708
+ id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
1709
 
1710
  [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
1711
  }