Spaces:
Running
Running
Diego Devesa
commited on
Commit
·
df2cb6e
1
Parent(s):
ae9a15f
vulkan : add backend registry / device interfaces (llama/9721)
Browse files* vulkan : add backend registry / device interfaces
* llama : print devices used on model load
- ggml/include/ggml-vulkan.h +2 -0
- ggml/src/ggml-backend.cpp +8 -1
- ggml/src/ggml-vulkan.cpp +204 -74
ggml/include/ggml-vulkan.h
CHANGED
|
@@ -24,6 +24,8 @@ GGML_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
|
|
| 24 |
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
| 25 |
GGML_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
| 26 |
|
|
|
|
|
|
|
| 27 |
#ifdef __cplusplus
|
| 28 |
}
|
| 29 |
#endif
|
|
|
|
| 24 |
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
| 25 |
GGML_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
| 26 |
|
| 27 |
+
GGML_API ggml_backend_reg_t ggml_backend_vk_reg(void);
|
| 28 |
+
|
| 29 |
#ifdef __cplusplus
|
| 30 |
}
|
| 31 |
#endif
|
ggml/src/ggml-backend.cpp
CHANGED
|
@@ -538,6 +538,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
|
|
| 538 |
#include "ggml-metal.h"
|
| 539 |
#endif
|
| 540 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
#ifdef GGML_USE_BLAS
|
| 542 |
#include "ggml-blas.h"
|
| 543 |
#endif
|
|
@@ -557,6 +561,9 @@ struct ggml_backend_registry {
|
|
| 557 |
#ifdef GGML_USE_METAL
|
| 558 |
register_backend(ggml_backend_metal_reg());
|
| 559 |
#endif
|
|
|
|
|
|
|
|
|
|
| 560 |
#ifdef GGML_USE_BLAS
|
| 561 |
register_backend(ggml_backend_blas_reg());
|
| 562 |
#endif
|
|
@@ -564,7 +571,7 @@ struct ggml_backend_registry {
|
|
| 564 |
register_backend(ggml_backend_rpc_reg());
|
| 565 |
#endif
|
| 566 |
|
| 567 |
-
// TODO: sycl,
|
| 568 |
|
| 569 |
register_backend(ggml_backend_cpu_reg());
|
| 570 |
}
|
|
|
|
| 538 |
#include "ggml-metal.h"
|
| 539 |
#endif
|
| 540 |
|
| 541 |
+
#ifdef GGML_USE_VULKAN
|
| 542 |
+
#include "ggml-vulkan.h"
|
| 543 |
+
#endif
|
| 544 |
+
|
| 545 |
#ifdef GGML_USE_BLAS
|
| 546 |
#include "ggml-blas.h"
|
| 547 |
#endif
|
|
|
|
| 561 |
#ifdef GGML_USE_METAL
|
| 562 |
register_backend(ggml_backend_metal_reg());
|
| 563 |
#endif
|
| 564 |
+
#ifdef GGML_USE_VULKAN
|
| 565 |
+
register_backend(ggml_backend_vk_reg());
|
| 566 |
+
#endif
|
| 567 |
#ifdef GGML_USE_BLAS
|
| 568 |
register_backend(ggml_backend_blas_reg());
|
| 569 |
#endif
|
|
|
|
| 571 |
register_backend(ggml_backend_rpc_reg());
|
| 572 |
#endif
|
| 573 |
|
| 574 |
+
// TODO: sycl, kompute, cann
|
| 575 |
|
| 576 |
register_backend(ggml_backend_cpu_reg());
|
| 577 |
}
|
ggml/src/ggml-vulkan.cpp
CHANGED
|
@@ -1941,7 +1941,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|
| 1941 |
if (device->fp16) {
|
| 1942 |
device_extensions.push_back("VK_KHR_shader_float16_int8");
|
| 1943 |
}
|
| 1944 |
-
device->name =
|
| 1945 |
|
| 1946 |
device_create_info = {
|
| 1947 |
vk::DeviceCreateFlags(),
|
|
@@ -1968,7 +1968,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|
| 1968 |
|
| 1969 |
device->buffer_type = {
|
| 1970 |
/* .iface = */ ggml_backend_vk_buffer_type_interface,
|
| 1971 |
-
/* .device = */
|
| 1972 |
/* .context = */ new ggml_backend_vk_buffer_type_context{ device->name, device },
|
| 1973 |
};
|
| 1974 |
|
|
@@ -6378,7 +6378,7 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
|
| 6378 |
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
| 6379 |
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
| 6380 |
},
|
| 6381 |
-
/* .device = */
|
| 6382 |
/* .context = */ nullptr,
|
| 6383 |
};
|
| 6384 |
|
|
@@ -6581,9 +6581,135 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
|
|
| 6581 |
UNUSED(backend);
|
| 6582 |
}
|
| 6583 |
|
| 6584 |
-
|
| 6585 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6586 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6587 |
switch (op->op) {
|
| 6588 |
case GGML_OP_UNARY:
|
| 6589 |
switch (ggml_get_unary_op(op)) {
|
|
@@ -6701,97 +6827,101 @@ static bool ggml_backend_vk_supports_op(ggml_backend_t backend, const ggml_tenso
|
|
| 6701 |
return false;
|
| 6702 |
}
|
| 6703 |
|
| 6704 |
-
UNUSED(
|
| 6705 |
-
}
|
| 6706 |
-
|
| 6707 |
-
static bool ggml_backend_vk_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
|
| 6708 |
-
const int min_batch_size = 32;
|
| 6709 |
-
|
| 6710 |
-
return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
|
| 6711 |
-
(op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
|
| 6712 |
-
|
| 6713 |
-
UNUSED(backend);
|
| 6714 |
}
|
| 6715 |
|
| 6716 |
-
static bool
|
| 6717 |
if (buft->iface.get_name != ggml_backend_vk_buffer_type_name) {
|
| 6718 |
return false;
|
| 6719 |
}
|
| 6720 |
|
|
|
|
| 6721 |
ggml_backend_vk_buffer_type_context * buft_ctx = (ggml_backend_vk_buffer_type_context *)buft->context;
|
| 6722 |
-
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 6723 |
-
|
| 6724 |
-
return buft_ctx->device == ctx->device;
|
| 6725 |
-
}
|
| 6726 |
-
|
| 6727 |
-
// TODO: enable async and synchronize
|
| 6728 |
-
static ggml_backend_i ggml_backend_vk_interface = {
|
| 6729 |
-
/* .get_name = */ ggml_backend_vk_name,
|
| 6730 |
-
/* .free = */ ggml_backend_vk_free,
|
| 6731 |
-
/* .get_default_buffer_type = */ ggml_backend_vk_get_default_buffer_type,
|
| 6732 |
-
/* .set_tensor_async = */ NULL, // ggml_backend_vk_set_tensor_async,
|
| 6733 |
-
/* .get_tensor_async = */ NULL, // ggml_backend_vk_get_tensor_async,
|
| 6734 |
-
/* .cpy_tensor_async = */ NULL, // ggml_backend_vk_cpy_tensor_async,
|
| 6735 |
-
/* .synchronize = */ NULL, // ggml_backend_vk_synchronize,
|
| 6736 |
-
/* .graph_plan_create = */ NULL,
|
| 6737 |
-
/* .graph_plan_free = */ NULL,
|
| 6738 |
-
/* .graph_plan_update = */ NULL,
|
| 6739 |
-
/* .graph_plan_compute = */ NULL,
|
| 6740 |
-
/* .graph_compute = */ ggml_backend_vk_graph_compute,
|
| 6741 |
-
/* .supports_op = */ ggml_backend_vk_supports_op,
|
| 6742 |
-
/* .supports_buft = */ ggml_backend_vk_supports_buft,
|
| 6743 |
-
/* .offload_op = */ ggml_backend_vk_offload_op,
|
| 6744 |
-
/* .event_record = */ NULL,
|
| 6745 |
-
/* .event_wait = */ NULL,
|
| 6746 |
-
};
|
| 6747 |
|
| 6748 |
-
|
| 6749 |
-
static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x40, 0x3c, 0xe1, 0x02, 0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b };
|
| 6750 |
-
return &guid;
|
| 6751 |
}
|
| 6752 |
|
| 6753 |
-
|
| 6754 |
-
|
| 6755 |
|
| 6756 |
-
|
| 6757 |
-
|
| 6758 |
|
| 6759 |
-
|
| 6760 |
-
|
| 6761 |
-
|
| 6762 |
-
|
| 6763 |
-
|
| 6764 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6765 |
|
| 6766 |
-
|
|
|
|
|
|
|
| 6767 |
}
|
| 6768 |
|
| 6769 |
-
|
| 6770 |
-
|
|
|
|
| 6771 |
}
|
| 6772 |
|
| 6773 |
-
|
| 6774 |
-
|
| 6775 |
-
}
|
| 6776 |
|
| 6777 |
-
|
| 6778 |
-
ggml_vk_get_device_description(device, description, description_size);
|
| 6779 |
-
}
|
| 6780 |
|
| 6781 |
-
|
| 6782 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6783 |
|
| 6784 |
-
|
|
|
|
|
|
|
| 6785 |
|
| 6786 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6787 |
|
| 6788 |
-
|
| 6789 |
-
|
| 6790 |
-
|
| 6791 |
-
|
| 6792 |
-
|
| 6793 |
-
|
| 6794 |
-
|
| 6795 |
}
|
| 6796 |
|
| 6797 |
// Extension availability
|
|
|
|
| 1941 |
if (device->fp16) {
|
| 1942 |
device_extensions.push_back("VK_KHR_shader_float16_int8");
|
| 1943 |
}
|
| 1944 |
+
device->name = GGML_VK_NAME + std::to_string(idx);
|
| 1945 |
|
| 1946 |
device_create_info = {
|
| 1947 |
vk::DeviceCreateFlags(),
|
|
|
|
| 1968 |
|
| 1969 |
device->buffer_type = {
|
| 1970 |
/* .iface = */ ggml_backend_vk_buffer_type_interface,
|
| 1971 |
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), idx),
|
| 1972 |
/* .context = */ new ggml_backend_vk_buffer_type_context{ device->name, device },
|
| 1973 |
};
|
| 1974 |
|
|
|
|
| 6378 |
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
| 6379 |
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
| 6380 |
},
|
| 6381 |
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), 0),
|
| 6382 |
/* .context = */ nullptr,
|
| 6383 |
};
|
| 6384 |
|
|
|
|
| 6581 |
UNUSED(backend);
|
| 6582 |
}
|
| 6583 |
|
| 6584 |
+
// TODO: enable async and synchronize
|
| 6585 |
+
static ggml_backend_i ggml_backend_vk_interface = {
|
| 6586 |
+
/* .get_name = */ ggml_backend_vk_name,
|
| 6587 |
+
/* .free = */ ggml_backend_vk_free,
|
| 6588 |
+
/* .get_default_buffer_type = */ ggml_backend_vk_get_default_buffer_type,
|
| 6589 |
+
/* .set_tensor_async = */ NULL, // ggml_backend_vk_set_tensor_async,
|
| 6590 |
+
/* .get_tensor_async = */ NULL, // ggml_backend_vk_get_tensor_async,
|
| 6591 |
+
/* .cpy_tensor_async = */ NULL, // ggml_backend_vk_cpy_tensor_async,
|
| 6592 |
+
/* .synchronize = */ NULL, // ggml_backend_vk_synchronize,
|
| 6593 |
+
/* .graph_plan_create = */ NULL,
|
| 6594 |
+
/* .graph_plan_free = */ NULL,
|
| 6595 |
+
/* .graph_plan_update = */ NULL,
|
| 6596 |
+
/* .graph_plan_compute = */ NULL,
|
| 6597 |
+
/* .graph_compute = */ ggml_backend_vk_graph_compute,
|
| 6598 |
+
/* .supports_op = */ NULL,
|
| 6599 |
+
/* .supports_buft = */ NULL,
|
| 6600 |
+
/* .offload_op = */ NULL,
|
| 6601 |
+
/* .event_record = */ NULL,
|
| 6602 |
+
/* .event_wait = */ NULL,
|
| 6603 |
+
};
|
| 6604 |
+
|
| 6605 |
+
static ggml_guid_t ggml_backend_vk_guid() {
|
| 6606 |
+
static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x40, 0x3c, 0xe1, 0x02, 0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b };
|
| 6607 |
+
return &guid;
|
| 6608 |
+
}
|
| 6609 |
+
|
| 6610 |
+
ggml_backend_t ggml_backend_vk_init(size_t dev_num) {
|
| 6611 |
+
VK_LOG_DEBUG("ggml_backend_vk_init(" << dev_num << ")");
|
| 6612 |
+
|
| 6613 |
+
ggml_backend_vk_context * ctx = new ggml_backend_vk_context;
|
| 6614 |
+
ggml_vk_init(ctx, dev_num);
|
| 6615 |
+
|
| 6616 |
+
ggml_backend_t vk_backend = new ggml_backend {
|
| 6617 |
+
/* .guid = */ ggml_backend_vk_guid(),
|
| 6618 |
+
/* .interface = */ ggml_backend_vk_interface,
|
| 6619 |
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), dev_num),
|
| 6620 |
+
/* .context = */ ctx,
|
| 6621 |
+
};
|
| 6622 |
+
|
| 6623 |
+
return vk_backend;
|
| 6624 |
+
}
|
| 6625 |
+
|
| 6626 |
+
bool ggml_backend_is_vk(ggml_backend_t backend) {
|
| 6627 |
+
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_vk_guid());
|
| 6628 |
+
}
|
| 6629 |
+
|
| 6630 |
+
int ggml_backend_vk_get_device_count() {
|
| 6631 |
+
return ggml_vk_get_device_count();
|
| 6632 |
+
}
|
| 6633 |
+
|
| 6634 |
+
void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size) {
|
| 6635 |
+
GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
| 6636 |
+
int dev_idx = vk_instance.device_indices[device];
|
| 6637 |
+
ggml_vk_get_device_description(dev_idx, description, description_size);
|
| 6638 |
+
}
|
| 6639 |
+
|
| 6640 |
+
void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
|
| 6641 |
+
GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
| 6642 |
+
|
| 6643 |
+
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
|
| 6644 |
+
|
| 6645 |
+
vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
|
| 6646 |
+
|
| 6647 |
+
for (const vk::MemoryHeap& heap : memprops.memoryHeaps) {
|
| 6648 |
+
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
| 6649 |
+
*total = heap.size;
|
| 6650 |
+
*free = heap.size;
|
| 6651 |
+
break;
|
| 6652 |
+
}
|
| 6653 |
+
}
|
| 6654 |
+
}
|
| 6655 |
+
|
| 6656 |
+
//////////////////////////
|
| 6657 |
+
|
| 6658 |
+
struct ggml_backend_vk_device_context {
|
| 6659 |
+
int device;
|
| 6660 |
+
std::string name;
|
| 6661 |
+
std::string description;
|
| 6662 |
+
};
|
| 6663 |
+
|
| 6664 |
+
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
|
| 6665 |
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
| 6666 |
+
return ctx->name.c_str();
|
| 6667 |
+
}
|
| 6668 |
+
|
| 6669 |
+
static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t dev) {
|
| 6670 |
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
| 6671 |
+
return ctx->description.c_str();
|
| 6672 |
+
}
|
| 6673 |
+
|
| 6674 |
+
static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
|
| 6675 |
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
|
| 6676 |
+
ggml_backend_vk_get_device_memory(ctx->device, free, total);
|
| 6677 |
+
}
|
| 6678 |
+
|
| 6679 |
+
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
|
| 6680 |
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
| 6681 |
+
return ggml_backend_vk_buffer_type(ctx->device);
|
| 6682 |
+
}
|
| 6683 |
+
|
| 6684 |
+
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_host_buffer_type(ggml_backend_dev_t dev) {
|
| 6685 |
+
UNUSED(dev);
|
| 6686 |
+
return ggml_backend_vk_host_buffer_type();
|
| 6687 |
+
}
|
| 6688 |
|
| 6689 |
+
static enum ggml_backend_dev_type ggml_backend_vk_device_get_type(ggml_backend_dev_t dev) {
|
| 6690 |
+
UNUSED(dev);
|
| 6691 |
+
return GGML_BACKEND_DEVICE_TYPE_GPU_FULL;
|
| 6692 |
+
}
|
| 6693 |
+
|
| 6694 |
+
static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
| 6695 |
+
props->name = ggml_backend_vk_device_get_name(dev);
|
| 6696 |
+
props->description = ggml_backend_vk_device_get_description(dev);
|
| 6697 |
+
props->type = ggml_backend_vk_device_get_type(dev);
|
| 6698 |
+
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
| 6699 |
+
props->caps = {
|
| 6700 |
+
/* async */ false,
|
| 6701 |
+
/* host_buffer */ true,
|
| 6702 |
+
/* events */ false,
|
| 6703 |
+
};
|
| 6704 |
+
}
|
| 6705 |
+
|
| 6706 |
+
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
|
| 6707 |
+
UNUSED(params);
|
| 6708 |
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
| 6709 |
+
return ggml_backend_vk_init(ctx->device);
|
| 6710 |
+
}
|
| 6711 |
+
|
| 6712 |
+
static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
| 6713 |
switch (op->op) {
|
| 6714 |
case GGML_OP_UNARY:
|
| 6715 |
switch (ggml_get_unary_op(op)) {
|
|
|
|
| 6827 |
return false;
|
| 6828 |
}
|
| 6829 |
|
| 6830 |
+
UNUSED(dev);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6831 |
}
|
| 6832 |
|
| 6833 |
+
static bool ggml_backend_vk_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
| 6834 |
if (buft->iface.get_name != ggml_backend_vk_buffer_type_name) {
|
| 6835 |
return false;
|
| 6836 |
}
|
| 6837 |
|
| 6838 |
+
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
| 6839 |
ggml_backend_vk_buffer_type_context * buft_ctx = (ggml_backend_vk_buffer_type_context *)buft->context;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6840 |
|
| 6841 |
+
return buft_ctx->device->idx == ctx->device;
|
|
|
|
|
|
|
| 6842 |
}
|
| 6843 |
|
| 6844 |
+
static bool ggml_backend_vk_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
| 6845 |
+
const int min_batch_size = 32;
|
| 6846 |
|
| 6847 |
+
return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
|
| 6848 |
+
(op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
|
| 6849 |
|
| 6850 |
+
UNUSED(dev);
|
| 6851 |
+
}
|
| 6852 |
+
|
| 6853 |
+
static const struct ggml_backend_device_i ggml_backend_vk_device_i = {
|
| 6854 |
+
/* .get_name = */ ggml_backend_vk_device_get_name,
|
| 6855 |
+
/* .get_description = */ ggml_backend_vk_device_get_description,
|
| 6856 |
+
/* .get_memory = */ ggml_backend_vk_device_get_memory,
|
| 6857 |
+
/* .get_type = */ ggml_backend_vk_device_get_type,
|
| 6858 |
+
/* .get_props = */ ggml_backend_vk_device_get_props,
|
| 6859 |
+
/* .init_backend = */ ggml_backend_vk_device_init,
|
| 6860 |
+
/* .get_buffer_type = */ ggml_backend_vk_device_get_buffer_type,
|
| 6861 |
+
/* .get_host_buffer_type = */ ggml_backend_vk_device_get_host_buffer_type,
|
| 6862 |
+
/* .buffer_from_host_ptr = */ NULL,
|
| 6863 |
+
/* .supports_op = */ ggml_backend_vk_device_supports_op,
|
| 6864 |
+
/* .supports_buft = */ ggml_backend_vk_device_supports_buft,
|
| 6865 |
+
/* .offload_op = */ ggml_backend_vk_device_offload_op,
|
| 6866 |
+
/* .event_new = */ NULL,
|
| 6867 |
+
/* .event_free = */ NULL,
|
| 6868 |
+
/* .event_synchronize = */ NULL,
|
| 6869 |
+
};
|
| 6870 |
|
| 6871 |
+
static const char * ggml_backend_vk_reg_get_name(ggml_backend_reg_t reg) {
|
| 6872 |
+
UNUSED(reg);
|
| 6873 |
+
return GGML_VK_NAME;
|
| 6874 |
}
|
| 6875 |
|
| 6876 |
+
static size_t ggml_backend_vk_reg_get_device_count(ggml_backend_reg_t reg) {
|
| 6877 |
+
UNUSED(reg);
|
| 6878 |
+
return ggml_backend_vk_get_device_count();
|
| 6879 |
}
|
| 6880 |
|
| 6881 |
+
static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, size_t device) {
|
| 6882 |
+
static std::vector<ggml_backend_dev_t> devices;
|
|
|
|
| 6883 |
|
| 6884 |
+
static bool initialized = false;
|
|
|
|
|
|
|
| 6885 |
|
| 6886 |
+
{
|
| 6887 |
+
static std::mutex mutex;
|
| 6888 |
+
std::lock_guard<std::mutex> lock(mutex);
|
| 6889 |
+
if (!initialized) {
|
| 6890 |
+
for (size_t i = 0; i < ggml_backend_vk_get_device_count(); i++) {
|
| 6891 |
+
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
|
| 6892 |
+
char desc[256];
|
| 6893 |
+
ggml_backend_vk_get_device_description(i, desc, sizeof(desc));
|
| 6894 |
+
ctx->device = i;
|
| 6895 |
+
ctx->name = GGML_VK_NAME + std::to_string(i);
|
| 6896 |
+
ctx->description = desc;
|
| 6897 |
+
devices.push_back(new ggml_backend_device {
|
| 6898 |
+
/* .iface = */ ggml_backend_vk_device_i,
|
| 6899 |
+
/* .reg = */ reg,
|
| 6900 |
+
/* .context = */ ctx,
|
| 6901 |
+
});
|
| 6902 |
+
}
|
| 6903 |
+
initialized = true;
|
| 6904 |
+
}
|
| 6905 |
+
}
|
| 6906 |
|
| 6907 |
+
GGML_ASSERT(device < devices.size());
|
| 6908 |
+
return devices[device];
|
| 6909 |
+
}
|
| 6910 |
|
| 6911 |
+
static const struct ggml_backend_reg_i ggml_backend_vk_reg_i = {
|
| 6912 |
+
/* .get_name = */ ggml_backend_vk_reg_get_name,
|
| 6913 |
+
/* .get_device_count = */ ggml_backend_vk_reg_get_device_count,
|
| 6914 |
+
/* .get_device = */ ggml_backend_vk_reg_get_device,
|
| 6915 |
+
/* .get_proc_address = */ NULL,
|
| 6916 |
+
};
|
| 6917 |
|
| 6918 |
+
ggml_backend_reg_t ggml_backend_vk_reg() {
|
| 6919 |
+
static ggml_backend_reg reg = {
|
| 6920 |
+
/* .iface = */ ggml_backend_vk_reg_i,
|
| 6921 |
+
/* .context = */ nullptr,
|
| 6922 |
+
};
|
| 6923 |
+
|
| 6924 |
+
return ®
|
| 6925 |
}
|
| 6926 |
|
| 6927 |
// Extension availability
|