Spaces:
Running
Running
Paul Tsochantaris
commited on
metal : remove unused `n_buffers` and `buffers` (llama/5129)
Browse files- ggml-metal.m +16 -57
ggml-metal.m
CHANGED
|
@@ -26,15 +26,6 @@
|
|
| 26 |
|
| 27 |
#define GGML_METAL_MAX_KERNELS 256
|
| 28 |
|
| 29 |
-
struct ggml_metal_buffer {
|
| 30 |
-
const char * name;
|
| 31 |
-
|
| 32 |
-
void * data;
|
| 33 |
-
size_t size;
|
| 34 |
-
|
| 35 |
-
id<MTLBuffer> metal;
|
| 36 |
-
};
|
| 37 |
-
|
| 38 |
struct ggml_metal_kernel {
|
| 39 |
id<MTLFunction> function;
|
| 40 |
id<MTLComputePipelineState> pipeline;
|
|
@@ -172,9 +163,6 @@ struct ggml_metal_context {
|
|
| 172 |
|
| 173 |
dispatch_queue_t d_queue;
|
| 174 |
|
| 175 |
-
int n_buffers;
|
| 176 |
-
struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
|
| 177 |
-
|
| 178 |
struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
|
| 179 |
|
| 180 |
bool support_simdgroup_reduction;
|
|
@@ -242,24 +230,20 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 242 |
// Show all the Metal device instances in the system
|
| 243 |
NSArray * devices = MTLCopyAllDevices();
|
| 244 |
for (id<MTLDevice> device in devices) {
|
| 245 |
-
|
| 246 |
-
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
|
| 247 |
}
|
| 248 |
[devices release]; // since it was created by a *Copy* C method
|
| 249 |
#endif
|
| 250 |
|
| 251 |
// Pick and show default Metal device
|
| 252 |
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
| 253 |
-
|
| 254 |
-
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
|
| 255 |
|
| 256 |
// Configure context
|
| 257 |
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
|
| 258 |
ctx->device = device;
|
| 259 |
ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
|
| 260 |
ctx->queue = [ctx->device newCommandQueue];
|
| 261 |
-
ctx->n_buffers = 0;
|
| 262 |
-
|
| 263 |
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
| 264 |
|
| 265 |
// load library
|
|
@@ -534,10 +518,6 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 534 |
static void ggml_metal_free(struct ggml_metal_context * ctx) {
|
| 535 |
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
|
| 536 |
|
| 537 |
-
for (int i = 0; i < ctx->n_buffers; ++i) {
|
| 538 |
-
[ctx->buffers[i].metal release];
|
| 539 |
-
}
|
| 540 |
-
|
| 541 |
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
|
| 542 |
if (ctx->kernels[i].pipeline) {
|
| 543 |
[ctx->kernels[i].pipeline release];
|
|
@@ -580,51 +560,30 @@ struct ggml_backend_metal_buffer_context {
|
|
| 580 |
// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
|
| 581 |
// Metal buffer based on the host memory pointer
|
| 582 |
//
|
| 583 |
-
static id<MTLBuffer> ggml_metal_get_buffer(struct
|
| 584 |
//GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
|
| 585 |
|
| 586 |
const int64_t tsize = ggml_nbytes(t);
|
| 587 |
|
| 588 |
ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
|
| 589 |
|
| 590 |
-
|
| 591 |
-
if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) {
|
| 592 |
-
struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
|
| 593 |
-
|
| 594 |
-
// find the view that contains the tensor fully
|
| 595 |
-
for (int i = 0; i < buf_ctx->n_buffers; ++i) {
|
| 596 |
-
const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
|
| 597 |
-
|
| 598 |
-
//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
|
| 599 |
-
if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
|
| 600 |
-
*offs = (size_t) ioffs;
|
| 601 |
-
|
| 602 |
-
//GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
|
| 603 |
-
|
| 604 |
-
return buf_ctx->buffers[i].metal;
|
| 605 |
-
}
|
| 606 |
-
}
|
| 607 |
-
|
| 608 |
-
GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
|
| 609 |
-
|
| 610 |
-
return nil;
|
| 611 |
-
}
|
| 612 |
|
| 613 |
// find the view that contains the tensor fully
|
| 614 |
-
for (int i = 0; i <
|
| 615 |
-
const int64_t ioffs = (int64_t) t->data - (int64_t)
|
| 616 |
|
| 617 |
-
//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld,
|
| 618 |
-
if (ioffs >= 0 && ioffs + tsize <= (int64_t)
|
| 619 |
*offs = (size_t) ioffs;
|
| 620 |
|
| 621 |
-
//GGML_METAL_LOG_INFO("%s:
|
| 622 |
|
| 623 |
-
return
|
| 624 |
}
|
| 625 |
}
|
| 626 |
|
| 627 |
-
GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
|
| 628 |
|
| 629 |
return nil;
|
| 630 |
}
|
|
@@ -817,9 +776,9 @@ static bool ggml_metal_graph_compute(
|
|
| 817 |
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
|
| 818 |
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
|
| 819 |
|
| 820 |
-
id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(
|
| 821 |
-
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(
|
| 822 |
-
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(
|
| 823 |
|
| 824 |
//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
|
| 825 |
//if (src0) {
|
|
@@ -1601,7 +1560,7 @@ static bool ggml_metal_graph_compute(
|
|
| 1601 |
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
|
| 1602 |
|
| 1603 |
size_t offs_src_cur = 0;
|
| 1604 |
-
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(
|
| 1605 |
|
| 1606 |
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
|
| 1607 |
}
|
|
@@ -1746,7 +1705,7 @@ static bool ggml_metal_graph_compute(
|
|
| 1746 |
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
|
| 1747 |
|
| 1748 |
size_t offs_src_cur = 0;
|
| 1749 |
-
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(
|
| 1750 |
|
| 1751 |
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
|
| 1752 |
}
|
|
|
|
| 26 |
|
| 27 |
#define GGML_METAL_MAX_KERNELS 256
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
struct ggml_metal_kernel {
|
| 30 |
id<MTLFunction> function;
|
| 31 |
id<MTLComputePipelineState> pipeline;
|
|
|
|
| 163 |
|
| 164 |
dispatch_queue_t d_queue;
|
| 165 |
|
|
|
|
|
|
|
|
|
|
| 166 |
struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
|
| 167 |
|
| 168 |
bool support_simdgroup_reduction;
|
|
|
|
| 230 |
// Show all the Metal device instances in the system
|
| 231 |
NSArray * devices = MTLCopyAllDevices();
|
| 232 |
for (id<MTLDevice> device in devices) {
|
| 233 |
+
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
|
|
|
|
| 234 |
}
|
| 235 |
[devices release]; // since it was created by a *Copy* C method
|
| 236 |
#endif
|
| 237 |
|
| 238 |
// Pick and show default Metal device
|
| 239 |
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
| 240 |
+
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
|
|
|
|
| 241 |
|
| 242 |
// Configure context
|
| 243 |
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
|
| 244 |
ctx->device = device;
|
| 245 |
ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
|
| 246 |
ctx->queue = [ctx->device newCommandQueue];
|
|
|
|
|
|
|
| 247 |
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
| 248 |
|
| 249 |
// load library
|
|
|
|
| 518 |
static void ggml_metal_free(struct ggml_metal_context * ctx) {
|
| 519 |
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
|
| 520 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 521 |
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
|
| 522 |
if (ctx->kernels[i].pipeline) {
|
| 523 |
[ctx->kernels[i].pipeline release];
|
|
|
|
| 560 |
// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
|
| 561 |
// Metal buffer based on the host memory pointer
|
| 562 |
//
|
| 563 |
+
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) {
|
| 564 |
//GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
|
| 565 |
|
| 566 |
const int64_t tsize = ggml_nbytes(t);
|
| 567 |
|
| 568 |
ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
|
| 569 |
|
| 570 |
+
struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
|
| 572 |
// find the view that contains the tensor fully
|
| 573 |
+
for (int i = 0; i < buf_ctx->n_buffers; ++i) {
|
| 574 |
+
const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
|
| 575 |
|
| 576 |
+
//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
|
| 577 |
+
if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
|
| 578 |
*offs = (size_t) ioffs;
|
| 579 |
|
| 580 |
+
//GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
|
| 581 |
|
| 582 |
+
return buf_ctx->buffers[i].metal;
|
| 583 |
}
|
| 584 |
}
|
| 585 |
|
| 586 |
+
GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
|
| 587 |
|
| 588 |
return nil;
|
| 589 |
}
|
|
|
|
| 776 |
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
|
| 777 |
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
|
| 778 |
|
| 779 |
+
id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
|
| 780 |
+
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
|
| 781 |
+
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil;
|
| 782 |
|
| 783 |
//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
|
| 784 |
//if (src0) {
|
|
|
|
| 1560 |
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
|
| 1561 |
|
| 1562 |
size_t offs_src_cur = 0;
|
| 1563 |
+
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
|
| 1564 |
|
| 1565 |
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
|
| 1566 |
}
|
|
|
|
| 1705 |
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
|
| 1706 |
|
| 1707 |
size_t offs_src_cur = 0;
|
| 1708 |
+
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
|
| 1709 |
|
| 1710 |
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
|
| 1711 |
}
|