Spaces:
Running
Running
Paul Tsochantaris
commited on
metal : free metal objects (llama/5161)
Browse files* Releasing MTLFunction references after Metal pipeline construction
* Keeping the `ggml_metal_kernel` structure
* Spacing fix
* Whitespace fix
- ggml-metal.m +13 -20
ggml-metal.m
CHANGED
|
@@ -24,10 +24,7 @@
|
|
| 24 |
|
| 25 |
#define UNUSED(x) (void)(x)
|
| 26 |
|
| 27 |
-
#define GGML_METAL_MAX_KERNELS 256
|
| 28 |
-
|
| 29 |
struct ggml_metal_kernel {
|
| 30 |
-
id<MTLFunction> function;
|
| 31 |
id<MTLComputePipelineState> pipeline;
|
| 32 |
};
|
| 33 |
|
|
@@ -159,11 +156,10 @@ struct ggml_metal_context {
|
|
| 159 |
|
| 160 |
id<MTLDevice> device;
|
| 161 |
id<MTLCommandQueue> queue;
|
| 162 |
-
id<MTLLibrary> library;
|
| 163 |
|
| 164 |
dispatch_queue_t d_queue;
|
| 165 |
|
| 166 |
-
struct ggml_metal_kernel kernels[
|
| 167 |
|
| 168 |
bool support_simdgroup_reduction;
|
| 169 |
bool support_simdgroup_mm;
|
|
@@ -248,6 +244,8 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 248 |
ctx->queue = [ctx->device newCommandQueue];
|
| 249 |
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
| 250 |
|
|
|
|
|
|
|
| 251 |
// load library
|
| 252 |
{
|
| 253 |
NSBundle * bundle = nil;
|
|
@@ -262,7 +260,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 262 |
// pre-compiled library found
|
| 263 |
NSURL * libURL = [NSURL fileURLWithPath:libPath];
|
| 264 |
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
|
| 265 |
-
|
| 266 |
if (error) {
|
| 267 |
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
| 268 |
return NULL;
|
|
@@ -304,7 +302,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 304 |
|
| 305 |
//[options setFastMathEnabled:false];
|
| 306 |
|
| 307 |
-
|
| 308 |
if (error) {
|
| 309 |
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
| 310 |
return NULL;
|
|
@@ -371,8 +369,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 371 |
{
|
| 372 |
NSError * error = nil;
|
| 373 |
|
| 374 |
-
for (int i = 0; i <
|
| 375 |
-
ctx->kernels[i].function = nil;
|
| 376 |
ctx->kernels[i].pipeline = nil;
|
| 377 |
}
|
| 378 |
|
|
@@ -384,10 +381,12 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 384 |
#define GGML_METAL_ADD_KERNEL(e, name, supported) \
|
| 385 |
if (supported) { \
|
| 386 |
struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
|
| 387 |
-
|
| 388 |
-
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:
|
|
|
|
| 389 |
if (error) { \
|
| 390 |
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
|
|
|
|
| 391 |
return NULL; \
|
| 392 |
} \
|
| 393 |
} else { \
|
|
@@ -516,23 +515,17 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 516 |
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
|
| 517 |
}
|
| 518 |
|
|
|
|
| 519 |
return ctx;
|
| 520 |
}
|
| 521 |
|
| 522 |
static void ggml_metal_free(struct ggml_metal_context * ctx) {
|
| 523 |
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
|
| 524 |
|
| 525 |
-
for (int i = 0; i <
|
| 526 |
-
|
| 527 |
-
[ctx->kernels[i].pipeline release];
|
| 528 |
-
}
|
| 529 |
-
|
| 530 |
-
if (ctx->kernels[i].function) {
|
| 531 |
-
[ctx->kernels[i].function release];
|
| 532 |
-
}
|
| 533 |
}
|
| 534 |
|
| 535 |
-
[ctx->library release];
|
| 536 |
[ctx->queue release];
|
| 537 |
[ctx->device release];
|
| 538 |
|
|
|
|
| 24 |
|
| 25 |
#define UNUSED(x) (void)(x)
|
| 26 |
|
|
|
|
|
|
|
| 27 |
struct ggml_metal_kernel {
|
|
|
|
| 28 |
id<MTLComputePipelineState> pipeline;
|
| 29 |
};
|
| 30 |
|
|
|
|
| 156 |
|
| 157 |
id<MTLDevice> device;
|
| 158 |
id<MTLCommandQueue> queue;
|
|
|
|
| 159 |
|
| 160 |
dispatch_queue_t d_queue;
|
| 161 |
|
| 162 |
+
struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];
|
| 163 |
|
| 164 |
bool support_simdgroup_reduction;
|
| 165 |
bool support_simdgroup_mm;
|
|
|
|
| 244 |
ctx->queue = [ctx->device newCommandQueue];
|
| 245 |
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
| 246 |
|
| 247 |
+
id<MTLLibrary> metal_library;
|
| 248 |
+
|
| 249 |
// load library
|
| 250 |
{
|
| 251 |
NSBundle * bundle = nil;
|
|
|
|
| 260 |
// pre-compiled library found
|
| 261 |
NSURL * libURL = [NSURL fileURLWithPath:libPath];
|
| 262 |
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
|
| 263 |
+
metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
|
| 264 |
if (error) {
|
| 265 |
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
| 266 |
return NULL;
|
|
|
|
| 302 |
|
| 303 |
//[options setFastMathEnabled:false];
|
| 304 |
|
| 305 |
+
metal_library = [ctx->device newLibraryWithSource:src options:options error:&error];
|
| 306 |
if (error) {
|
| 307 |
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
| 308 |
return NULL;
|
|
|
|
| 369 |
{
|
| 370 |
NSError * error = nil;
|
| 371 |
|
| 372 |
+
for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
|
|
|
|
| 373 |
ctx->kernels[i].pipeline = nil;
|
| 374 |
}
|
| 375 |
|
|
|
|
| 381 |
#define GGML_METAL_ADD_KERNEL(e, name, supported) \
|
| 382 |
if (supported) { \
|
| 383 |
struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
|
| 384 |
+
id<MTLFunction> metal_function = [metal_library newFunctionWithName:@"kernel_"#name]; \
|
| 385 |
+
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:metal_function error:&error]; \
|
| 386 |
+
[metal_function release]; \
|
| 387 |
if (error) { \
|
| 388 |
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
|
| 389 |
+
[metal_library release]; \
|
| 390 |
return NULL; \
|
| 391 |
} \
|
| 392 |
} else { \
|
|
|
|
| 515 |
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
|
| 516 |
}
|
| 517 |
|
| 518 |
+
[metal_library release];
|
| 519 |
return ctx;
|
| 520 |
}
|
| 521 |
|
| 522 |
static void ggml_metal_free(struct ggml_metal_context * ctx) {
|
| 523 |
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
|
| 524 |
|
| 525 |
+
for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
|
| 526 |
+
[ctx->kernels[i].pipeline release];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
}
|
| 528 |
|
|
|
|
| 529 |
[ctx->queue release];
|
| 530 |
[ctx->device release];
|
| 531 |
|