Spaces:
Running
Running
leo-pony
commited on
Commit
·
f8d4728
1
Parent(s):
9542e42
Adapt to dynamically loadable backends mechanism (llama/9970)
Browse files* [CANN] Adapt to dynamically loadable backends mechanism
* Fix the Bug: inference running result is garbled in debug running model for LM models who's type is Q4_0 class
* Handle the review comments of this pull request
- ggml/include/ggml-cann.h +2 -0
- ggml/src/ggml-backend.cpp +8 -1
- ggml/src/ggml-cann.cpp +250 -104
ggml/include/ggml-cann.h
CHANGED
|
@@ -34,6 +34,8 @@ extern "C" {
|
|
| 34 |
*/
|
| 35 |
#define GGML_CANN_MAX_DEVICES 16
|
| 36 |
|
|
|
|
|
|
|
| 37 |
/**
|
| 38 |
* @brief Initializes the CANN backend for a specified device.
|
| 39 |
*
|
|
|
|
| 34 |
*/
|
| 35 |
#define GGML_CANN_MAX_DEVICES 16
|
| 36 |
|
| 37 |
+
GGML_API ggml_backend_reg_t ggml_backend_cann_reg(void);
|
| 38 |
+
|
| 39 |
/**
|
| 40 |
* @brief Initializes the CANN backend for a specified device.
|
| 41 |
*
|
ggml/src/ggml-backend.cpp
CHANGED
|
@@ -561,6 +561,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
|
|
| 561 |
# include "ggml-amx.h"
|
| 562 |
#endif
|
| 563 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
struct ggml_backend_registry {
|
| 565 |
std::vector<ggml_backend_reg_t> backends;
|
| 566 |
std::vector<ggml_backend_dev_t> devices;
|
|
@@ -587,8 +591,11 @@ struct ggml_backend_registry {
|
|
| 587 |
#ifdef GGML_USE_AMX
|
| 588 |
register_backend(ggml_backend_amx_reg());
|
| 589 |
#endif
|
|
|
|
|
|
|
|
|
|
| 590 |
|
| 591 |
-
// TODO: kompute
|
| 592 |
|
| 593 |
register_backend(ggml_backend_cpu_reg());
|
| 594 |
}
|
|
|
|
| 561 |
# include "ggml-amx.h"
|
| 562 |
#endif
|
| 563 |
|
| 564 |
+
#ifdef GGML_USE_CANN
|
| 565 |
+
#include "ggml-cann.h"
|
| 566 |
+
#endif
|
| 567 |
+
|
| 568 |
struct ggml_backend_registry {
|
| 569 |
std::vector<ggml_backend_reg_t> backends;
|
| 570 |
std::vector<ggml_backend_dev_t> devices;
|
|
|
|
| 591 |
#ifdef GGML_USE_AMX
|
| 592 |
register_backend(ggml_backend_amx_reg());
|
| 593 |
#endif
|
| 594 |
+
#ifdef GGML_USE_CANN
|
| 595 |
+
register_backend(ggml_backend_cann_reg());
|
| 596 |
+
#endif
|
| 597 |
|
| 598 |
+
// TODO: kompute
|
| 599 |
|
| 600 |
register_backend(ggml_backend_cpu_reg());
|
| 601 |
}
|
ggml/src/ggml-cann.cpp
CHANGED
|
@@ -39,6 +39,8 @@
|
|
| 39 |
|
| 40 |
#include "ggml-common.h"
|
| 41 |
|
|
|
|
|
|
|
| 42 |
/**
|
| 43 |
* @brief Handles CANN errors by printing an error message and aborting.
|
| 44 |
*
|
|
@@ -851,13 +853,6 @@ static void ggml_backend_cann_buffer_set_tensor(
|
|
| 851 |
void *transform_buffer = malloc(size);
|
| 852 |
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
| 853 |
|
| 854 |
-
#ifndef NDEBUG
|
| 855 |
-
void *check_buffer = malloc(size);
|
| 856 |
-
ggml_backend_cann_transform_back(tensor, transform_buffer,
|
| 857 |
-
check_buffer);
|
| 858 |
-
GGML_ASSERT(memcmp(data, check_buffer, size) == 0);
|
| 859 |
-
free(check_buffer);
|
| 860 |
-
#endif
|
| 861 |
ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
|
| 862 |
transform_buffer, size,
|
| 863 |
ACL_MEMCPY_HOST_TO_DEVICE));
|
|
@@ -969,7 +964,7 @@ static void ggml_backend_cann_buffer_clear(
|
|
| 969 |
* This structure defines function pointers to operations that can be performed
|
| 970 |
* on a CANN buffer within the backend.
|
| 971 |
*/
|
| 972 |
-
static ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
|
| 973 |
/* .get_name = */ ggml_backend_cann_buffer_get_name,
|
| 974 |
/* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
|
| 975 |
/* .get_base = */ ggml_backend_cann_buffer_get_base,
|
|
@@ -1105,19 +1100,25 @@ static size_t ggml_backend_cann_buffer_type_get_alloc_size(
|
|
| 1105 |
GGML_UNUSED(buft);
|
| 1106 |
}
|
| 1107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1108 |
/**
|
| 1109 |
* @brief Interface for managing CANN buffer types in the GGML backend.
|
| 1110 |
*
|
| 1111 |
* Provides function pointers for allocating, querying properties, and managing
|
| 1112 |
* memory for CANN buffer types in the GGML backend.
|
| 1113 |
*/
|
| 1114 |
-
static ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
|
| 1115 |
/* .get_name = */ ggml_backend_cann_buffer_type_name,
|
| 1116 |
/* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
|
| 1117 |
/* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
|
| 1118 |
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
| 1119 |
/* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
|
| 1120 |
-
/* .is_host = */
|
| 1121 |
};
|
| 1122 |
|
| 1123 |
/**
|
|
@@ -1148,7 +1149,7 @@ ggml_backend_cann_buffer_type(int32_t device) {
|
|
| 1148 |
for (int32_t i = 0; i < GGML_CANN_MAX_DEVICES; i++) {
|
| 1149 |
ggml_backend_cann_buffer_types[i] = {
|
| 1150 |
/* .iface = */ ggml_backend_cann_buffer_type_interface,
|
| 1151 |
-
/* .device = */
|
| 1152 |
/* .context = */
|
| 1153 |
new ggml_backend_cann_buffer_type_context{
|
| 1154 |
i, "CANN" + std::to_string(i)},
|
|
@@ -1264,7 +1265,7 @@ ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
|
|
| 1264 |
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
| 1265 |
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
| 1266 |
},
|
| 1267 |
-
/* .device = */
|
| 1268 |
/* .context = */ nullptr,
|
| 1269 |
};
|
| 1270 |
|
|
@@ -1511,13 +1512,6 @@ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
|
|
| 1511 |
void *transform_buffer = malloc(size);
|
| 1512 |
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
| 1513 |
|
| 1514 |
-
#ifndef NDEBUG
|
| 1515 |
-
void *check_buffer = malloc(size);
|
| 1516 |
-
ggml_backend_cann_transform_back(tensor, transform_buffer,
|
| 1517 |
-
check_buffer);
|
| 1518 |
-
GGML_ASSERT(memcmp(data, check_buffer, size));
|
| 1519 |
-
free(check_buffer);
|
| 1520 |
-
#endif
|
| 1521 |
ACL_CHECK(aclrtMemcpyAsync(
|
| 1522 |
(char *)tensor->data + offset, size, transform_buffer, size,
|
| 1523 |
ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
|
|
@@ -1692,7 +1686,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(
|
|
| 1692 |
* @return bool Returns true if the operation is supported by the backend,
|
| 1693 |
* otherwise false.
|
| 1694 |
*/
|
| 1695 |
-
static bool ggml_backend_cann_supports_op(
|
| 1696 |
const ggml_tensor* op) {
|
| 1697 |
switch (op->op) {
|
| 1698 |
case GGML_OP_UNARY:
|
|
@@ -1783,7 +1777,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
|
| 1783 |
return false;
|
| 1784 |
}
|
| 1785 |
|
| 1786 |
-
GGML_UNUSED(
|
| 1787 |
}
|
| 1788 |
|
| 1789 |
/**
|
|
@@ -1801,31 +1795,6 @@ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) {
|
|
| 1801 |
return buft->iface.get_name == ggml_backend_cann_buffer_type_name;
|
| 1802 |
}
|
| 1803 |
|
| 1804 |
-
/**
|
| 1805 |
-
* @brief Checks if the CANN backend supports a specific backend buffer type.
|
| 1806 |
-
*
|
| 1807 |
-
* This function determines whether the CANN backend supports the given backend
|
| 1808 |
-
* buffer type by comparing the device context of the backend and buffer type.
|
| 1809 |
-
* It returns true if the devices are same between the backend context and
|
| 1810 |
-
* buffer type context.
|
| 1811 |
-
*
|
| 1812 |
-
* @param backend Pointer to the CANN backend.
|
| 1813 |
-
* @param buft Pointer to the backend buffer type to check.
|
| 1814 |
-
* @return bool Returns true if the CANN backend supports the buffer type,
|
| 1815 |
-
* otherwise false.
|
| 1816 |
-
*/
|
| 1817 |
-
static bool ggml_backend_cann_supports_buft(
|
| 1818 |
-
ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
|
| 1819 |
-
if (ggml_backend_buft_is_cann(buft)) {
|
| 1820 |
-
ggml_backend_cann_context * cann_ctx =
|
| 1821 |
-
(ggml_backend_cann_context *)backend->context;
|
| 1822 |
-
ggml_backend_cann_buffer_type_context * buft_ctx =
|
| 1823 |
-
(ggml_backend_cann_buffer_type_context *)buft->context;
|
| 1824 |
-
return buft_ctx->device == cann_ctx->device;
|
| 1825 |
-
}
|
| 1826 |
-
return false;
|
| 1827 |
-
}
|
| 1828 |
-
|
| 1829 |
/**
|
| 1830 |
* @brief Determines if a tensor operation should be offloaded to the CANN
|
| 1831 |
* backend.
|
|
@@ -1840,54 +1809,14 @@ static bool ggml_backend_cann_supports_buft(
|
|
| 1840 |
* @return bool Returns true if the operation should be offloaded, otherwise
|
| 1841 |
* false.
|
| 1842 |
*/
|
| 1843 |
-
static bool ggml_backend_cann_offload_op(
|
| 1844 |
const ggml_tensor* op) {
|
| 1845 |
const int min_batch_size = 32;
|
| 1846 |
-
GGML_UNUSED(
|
| 1847 |
|
| 1848 |
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
|
| 1849 |
}
|
| 1850 |
|
| 1851 |
-
/**
|
| 1852 |
-
* @brief Creates a new event for the CANN backend.
|
| 1853 |
-
*
|
| 1854 |
-
* This function initializes a new event for the CANN backend by setting the
|
| 1855 |
-
* device and creating an ACL runtime event. The created event is then wrapped
|
| 1856 |
-
* in a ggml_backend_event structure and returned.
|
| 1857 |
-
*
|
| 1858 |
-
* @param backend Pointer to the CANN backend.
|
| 1859 |
-
* @return ggml_backend_event_t Returns a pointer to the new event structure.
|
| 1860 |
-
*/
|
| 1861 |
-
static ggml_backend_event_t ggml_backend_cann_event_new(
|
| 1862 |
-
ggml_backend_t backend) {
|
| 1863 |
-
ggml_backend_cann_context* cann_ctx =
|
| 1864 |
-
(ggml_backend_cann_context*)backend->context;
|
| 1865 |
-
|
| 1866 |
-
ggml_cann_set_device(cann_ctx->device);
|
| 1867 |
-
|
| 1868 |
-
aclrtEvent event;
|
| 1869 |
-
ACL_CHECK(aclrtCreateEvent(&event));
|
| 1870 |
-
|
| 1871 |
-
return new ggml_backend_event{
|
| 1872 |
-
/* .device = */ nullptr,
|
| 1873 |
-
/* .context = */ event,
|
| 1874 |
-
};
|
| 1875 |
-
}
|
| 1876 |
-
|
| 1877 |
-
/**
|
| 1878 |
-
* @brief Frees a CANN backend event.
|
| 1879 |
-
*
|
| 1880 |
-
* This function destroys the ACL runtime event associated with the given CANN
|
| 1881 |
-
* backend event and then deletes the event structure itself.
|
| 1882 |
-
*
|
| 1883 |
-
* @param event Pointer to the event structure to be freed.
|
| 1884 |
-
*/
|
| 1885 |
-
static void ggml_backend_cann_event_free(ggml_backend_event_t event) {
|
| 1886 |
-
ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
|
| 1887 |
-
|
| 1888 |
-
delete event;
|
| 1889 |
-
}
|
| 1890 |
-
|
| 1891 |
/**
|
| 1892 |
* @brief Records an event on the CANN backend stream.
|
| 1893 |
*
|
|
@@ -1924,17 +1853,6 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
|
|
| 1924 |
}
|
| 1925 |
}
|
| 1926 |
|
| 1927 |
-
/**
|
| 1928 |
-
* @brief Synchronizes the given event on the CANN backend.
|
| 1929 |
-
*
|
| 1930 |
-
* This function waits for the specified event to complete on the ACL runtime.
|
| 1931 |
-
*
|
| 1932 |
-
* @param event Pointer to the event structure to be synchronized.
|
| 1933 |
-
*/
|
| 1934 |
-
static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
|
| 1935 |
-
ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
|
| 1936 |
-
}
|
| 1937 |
-
|
| 1938 |
/**
|
| 1939 |
* @brief Structure defining the interface for the CANN backend.
|
| 1940 |
*
|
|
@@ -1942,7 +1860,7 @@ static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
|
|
| 1942 |
* supported by the CANN backend, including name retrieval, memory
|
| 1943 |
* management, tensor operations, synchronization, and event handling.
|
| 1944 |
*/
|
| 1945 |
-
static ggml_backend_i ggml_backend_cann_interface = {
|
| 1946 |
/* .get_name = */ ggml_backend_cann_name,
|
| 1947 |
/* .free = */ ggml_backend_cann_free,
|
| 1948 |
/* .get_default_buffer_type = */ ggml_backend_cann_get_default_buffer_type,
|
|
@@ -1955,9 +1873,9 @@ static ggml_backend_i ggml_backend_cann_interface = {
|
|
| 1955 |
/* .graph_plan_update = */ NULL,
|
| 1956 |
/* .graph_plan_compute = */ NULL,
|
| 1957 |
/* .graph_compute = */ ggml_backend_cann_graph_compute,
|
| 1958 |
-
/* .supports_op = */
|
| 1959 |
-
/* .supports_buft = */
|
| 1960 |
-
/* .offload_op = */
|
| 1961 |
/* .event_record = */ ggml_backend_cann_event_record,
|
| 1962 |
/* .event_wait = */ ggml_backend_cann_event_wait,
|
| 1963 |
};
|
|
@@ -1976,6 +1894,234 @@ static ggml_guid_t ggml_backend_cann_guid() {
|
|
| 1976 |
return &guid;
|
| 1977 |
}
|
| 1978 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1979 |
ggml_backend_t ggml_backend_cann_init(int32_t device) {
|
| 1980 |
aclInit(nullptr);
|
| 1981 |
if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
|
|
@@ -1992,7 +2138,7 @@ ggml_backend_t ggml_backend_cann_init(int32_t device) {
|
|
| 1992 |
ggml_backend_t cann_backend =
|
| 1993 |
new ggml_backend{/* .guid = */ ggml_backend_cann_guid(),
|
| 1994 |
/* .interface = */ ggml_backend_cann_interface,
|
| 1995 |
-
/* .device = */
|
| 1996 |
/* .context = */ ctx};
|
| 1997 |
|
| 1998 |
return cann_backend;
|
|
|
|
| 39 |
|
| 40 |
#include "ggml-common.h"
|
| 41 |
|
| 42 |
+
#define GGML_CANN_NAME "CANN"
|
| 43 |
+
|
| 44 |
/**
|
| 45 |
* @brief Handles CANN errors by printing an error message and aborting.
|
| 46 |
*
|
|
|
|
| 853 |
void *transform_buffer = malloc(size);
|
| 854 |
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
| 855 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 856 |
ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
|
| 857 |
transform_buffer, size,
|
| 858 |
ACL_MEMCPY_HOST_TO_DEVICE));
|
|
|
|
| 964 |
* This structure defines function pointers to operations that can be performed
|
| 965 |
* on a CANN buffer within the backend.
|
| 966 |
*/
|
| 967 |
+
static const ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
|
| 968 |
/* .get_name = */ ggml_backend_cann_buffer_get_name,
|
| 969 |
/* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
|
| 970 |
/* .get_base = */ ggml_backend_cann_buffer_get_base,
|
|
|
|
| 1100 |
GGML_UNUSED(buft);
|
| 1101 |
}
|
| 1102 |
|
| 1103 |
+
static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
|
| 1104 |
+
return false;
|
| 1105 |
+
|
| 1106 |
+
GGML_UNUSED(buft);
|
| 1107 |
+
}
|
| 1108 |
+
|
| 1109 |
/**
|
| 1110 |
* @brief Interface for managing CANN buffer types in the GGML backend.
|
| 1111 |
*
|
| 1112 |
* Provides function pointers for allocating, querying properties, and managing
|
| 1113 |
* memory for CANN buffer types in the GGML backend.
|
| 1114 |
*/
|
| 1115 |
+
static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
|
| 1116 |
/* .get_name = */ ggml_backend_cann_buffer_type_name,
|
| 1117 |
/* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
|
| 1118 |
/* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
|
| 1119 |
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
| 1120 |
/* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
|
| 1121 |
+
/* .is_host = */ ggml_backend_cann_buffer_type_is_host,
|
| 1122 |
};
|
| 1123 |
|
| 1124 |
/**
|
|
|
|
| 1149 |
for (int32_t i = 0; i < GGML_CANN_MAX_DEVICES; i++) {
|
| 1150 |
ggml_backend_cann_buffer_types[i] = {
|
| 1151 |
/* .iface = */ ggml_backend_cann_buffer_type_interface,
|
| 1152 |
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
|
| 1153 |
/* .context = */
|
| 1154 |
new ggml_backend_cann_buffer_type_context{
|
| 1155 |
i, "CANN" + std::to_string(i)},
|
|
|
|
| 1265 |
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
| 1266 |
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
| 1267 |
},
|
| 1268 |
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
|
| 1269 |
/* .context = */ nullptr,
|
| 1270 |
};
|
| 1271 |
|
|
|
|
| 1512 |
void *transform_buffer = malloc(size);
|
| 1513 |
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
| 1514 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1515 |
ACL_CHECK(aclrtMemcpyAsync(
|
| 1516 |
(char *)tensor->data + offset, size, transform_buffer, size,
|
| 1517 |
ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
|
|
|
|
| 1686 |
* @return bool Returns true if the operation is supported by the backend,
|
| 1687 |
* otherwise false.
|
| 1688 |
*/
|
| 1689 |
+
static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
| 1690 |
const ggml_tensor* op) {
|
| 1691 |
switch (op->op) {
|
| 1692 |
case GGML_OP_UNARY:
|
|
|
|
| 1777 |
return false;
|
| 1778 |
}
|
| 1779 |
|
| 1780 |
+
GGML_UNUSED(dev);
|
| 1781 |
}
|
| 1782 |
|
| 1783 |
/**
|
|
|
|
| 1795 |
return buft->iface.get_name == ggml_backend_cann_buffer_type_name;
|
| 1796 |
}
|
| 1797 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1798 |
/**
|
| 1799 |
* @brief Determines if a tensor operation should be offloaded to the CANN
|
| 1800 |
* backend.
|
|
|
|
| 1809 |
* @return bool Returns true if the operation should be offloaded, otherwise
|
| 1810 |
* false.
|
| 1811 |
*/
|
| 1812 |
+
static bool ggml_backend_cann_offload_op(ggml_backend_dev_t dev,
|
| 1813 |
const ggml_tensor* op) {
|
| 1814 |
const int min_batch_size = 32;
|
| 1815 |
+
GGML_UNUSED(dev);
|
| 1816 |
|
| 1817 |
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
|
| 1818 |
}
|
| 1819 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1820 |
/**
|
| 1821 |
* @brief Records an event on the CANN backend stream.
|
| 1822 |
*
|
|
|
|
| 1853 |
}
|
| 1854 |
}
|
| 1855 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1856 |
/**
|
| 1857 |
* @brief Structure defining the interface for the CANN backend.
|
| 1858 |
*
|
|
|
|
| 1860 |
* supported by the CANN backend, including name retrieval, memory
|
| 1861 |
* management, tensor operations, synchronization, and event handling.
|
| 1862 |
*/
|
| 1863 |
+
static const ggml_backend_i ggml_backend_cann_interface = {
|
| 1864 |
/* .get_name = */ ggml_backend_cann_name,
|
| 1865 |
/* .free = */ ggml_backend_cann_free,
|
| 1866 |
/* .get_default_buffer_type = */ ggml_backend_cann_get_default_buffer_type,
|
|
|
|
| 1873 |
/* .graph_plan_update = */ NULL,
|
| 1874 |
/* .graph_plan_compute = */ NULL,
|
| 1875 |
/* .graph_compute = */ ggml_backend_cann_graph_compute,
|
| 1876 |
+
/* .supports_op = */ NULL, // moved to device
|
| 1877 |
+
/* .supports_buft = */ NULL, // moved to device
|
| 1878 |
+
/* .offload_op = */ NULL, // moved to device
|
| 1879 |
/* .event_record = */ ggml_backend_cann_event_record,
|
| 1880 |
/* .event_wait = */ ggml_backend_cann_event_wait,
|
| 1881 |
};
|
|
|
|
| 1894 |
return &guid;
|
| 1895 |
}
|
| 1896 |
|
| 1897 |
+
// backend device
|
| 1898 |
+
struct ggml_backend_cann_device_context {
|
| 1899 |
+
int device;
|
| 1900 |
+
std::string name;
|
| 1901 |
+
std::string description;
|
| 1902 |
+
};
|
| 1903 |
+
|
| 1904 |
+
static const char * ggml_backend_cann_device_get_name(ggml_backend_dev_t dev) {
|
| 1905 |
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
| 1906 |
+
return ctx->name.c_str();
|
| 1907 |
+
}
|
| 1908 |
+
|
| 1909 |
+
static const char* ggml_backend_cann_device_get_description(ggml_backend_dev_t dev) {
|
| 1910 |
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
| 1911 |
+
return ctx->description.c_str();
|
| 1912 |
+
}
|
| 1913 |
+
|
| 1914 |
+
static void ggml_backend_cann_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
| 1915 |
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
| 1916 |
+
ggml_backend_cann_get_device_memory(ctx->device, free, total);
|
| 1917 |
+
}
|
| 1918 |
+
|
| 1919 |
+
static enum ggml_backend_dev_type ggml_backend_cann_device_get_type(ggml_backend_dev_t dev) {
|
| 1920 |
+
GGML_UNUSED(dev);
|
| 1921 |
+
return GGML_BACKEND_DEVICE_TYPE_GPU_FULL;
|
| 1922 |
+
}
|
| 1923 |
+
|
| 1924 |
+
static void ggml_backend_cann_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
| 1925 |
+
props->name = ggml_backend_cann_device_get_name(dev);
|
| 1926 |
+
props->description = ggml_backend_cann_device_get_description(dev);
|
| 1927 |
+
props->type = ggml_backend_cann_device_get_type(dev);
|
| 1928 |
+
ggml_backend_cann_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
| 1929 |
+
|
| 1930 |
+
bool host_buffer = getenv("GGML_CANN_NO_PINNED") == nullptr;
|
| 1931 |
+
|
| 1932 |
+
props->caps = {
|
| 1933 |
+
/* .async = */ false,
|
| 1934 |
+
/* .host_buffer = */ host_buffer,
|
| 1935 |
+
/* .buffer_from_host_ptr = */ false,
|
| 1936 |
+
/* .events = */ true,
|
| 1937 |
+
};
|
| 1938 |
+
}
|
| 1939 |
+
|
| 1940 |
+
static ggml_backend_t ggml_backend_cann_device_init(ggml_backend_dev_t dev, const char * params) {
|
| 1941 |
+
GGML_UNUSED(params);
|
| 1942 |
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
| 1943 |
+
return ggml_backend_cann_init(ctx->device);
|
| 1944 |
+
}
|
| 1945 |
+
|
| 1946 |
+
/**
|
| 1947 |
+
* @brief Checks if the CANN backend supports a specific backend buffer type.
|
| 1948 |
+
*
|
| 1949 |
+
* This function determines whether the CANN backend supports the given backend
|
| 1950 |
+
* buffer type by comparing the device context of the backend and buffer type.
|
| 1951 |
+
* It returns true if the devices are same between the backend context and
|
| 1952 |
+
* buffer type context.
|
| 1953 |
+
*
|
| 1954 |
+
* @param backend Pointer to the CANN backend.
|
| 1955 |
+
* @param buft Pointer to the backend buffer type to check.
|
| 1956 |
+
* @return bool Returns true if the CANN backend supports the buffer type,
|
| 1957 |
+
* otherwise false.
|
| 1958 |
+
*/
|
| 1959 |
+
static bool ggml_backend_cann_supports_buft(
|
| 1960 |
+
ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
| 1961 |
+
if (ggml_backend_buft_is_cann(buft)) {
|
| 1962 |
+
ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
|
| 1963 |
+
ggml_backend_cann_buffer_type_context * buft_ctx =
|
| 1964 |
+
(ggml_backend_cann_buffer_type_context *)buft->context;
|
| 1965 |
+
return buft_ctx->device == dev_ctx->device;
|
| 1966 |
+
}
|
| 1967 |
+
return false;
|
| 1968 |
+
}
|
| 1969 |
+
|
| 1970 |
+
static ggml_backend_buffer_type_t ggml_backend_cann_device_get_buffer_type(ggml_backend_dev_t dev) {
|
| 1971 |
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
| 1972 |
+
return ggml_backend_cann_buffer_type(ctx->device);
|
| 1973 |
+
}
|
| 1974 |
+
|
| 1975 |
+
static ggml_backend_buffer_type_t ggml_backend_cann_device_get_host_buffer_type(ggml_backend_dev_t dev) {
|
| 1976 |
+
GGML_UNUSED(dev);
|
| 1977 |
+
return ggml_backend_cann_host_buffer_type();
|
| 1978 |
+
}
|
| 1979 |
+
|
| 1980 |
+
/**
|
| 1981 |
+
* @brief Creates a new event for the CANN backend device.
|
| 1982 |
+
*
|
| 1983 |
+
* This function initializes a new event for the CANN backend by setting the
|
| 1984 |
+
* device and creating an ACL runtime event. The created event is then wrapped
|
| 1985 |
+
* in a ggml_backend_event structure and returned.
|
| 1986 |
+
*
|
| 1987 |
+
* @param backend Pointer to the CANN backend.
|
| 1988 |
+
* @return ggml_backend_event_t Returns a pointer to the new event structure.
|
| 1989 |
+
*/
|
| 1990 |
+
static ggml_backend_event_t ggml_backend_cann_device_event_new(
|
| 1991 |
+
ggml_backend_dev_t dev) {
|
| 1992 |
+
ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
|
| 1993 |
+
|
| 1994 |
+
ggml_cann_set_device(dev_ctx->device);
|
| 1995 |
+
|
| 1996 |
+
aclrtEvent event;
|
| 1997 |
+
ACL_CHECK(aclrtCreateEvent(&event));
|
| 1998 |
+
|
| 1999 |
+
return new ggml_backend_event{
|
| 2000 |
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), dev_ctx->device),
|
| 2001 |
+
/* .context = */ event,
|
| 2002 |
+
};
|
| 2003 |
+
}
|
| 2004 |
+
|
| 2005 |
+
/**
|
| 2006 |
+
* @brief Frees a CANN backend event.
|
| 2007 |
+
*
|
| 2008 |
+
* This function destroys the ACL runtime event associated with the given CANN
|
| 2009 |
+
* backend event and then deletes the event structure itself.
|
| 2010 |
+
*
|
| 2011 |
+
* @param event Pointer to the event structure to be freed.
|
| 2012 |
+
*/
|
| 2013 |
+
static void ggml_backend_cann_device_event_free(ggml_backend_dev_t dev, ggml_backend_event_t event) {
|
| 2014 |
+
ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
|
| 2015 |
+
|
| 2016 |
+
delete event;
|
| 2017 |
+
GGML_UNUSED(dev);
|
| 2018 |
+
}
|
| 2019 |
+
|
| 2020 |
+
/**
|
| 2021 |
+
* @brief Synchronizes the given event on the CANN backend.
|
| 2022 |
+
*
|
| 2023 |
+
* This function waits for the specified event to complete on the ACL runtime.
|
| 2024 |
+
*
|
| 2025 |
+
* @param event Pointer to the event structure to be synchronized.
|
| 2026 |
+
*/
|
| 2027 |
+
static void ggml_backend_cann_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) {
|
| 2028 |
+
ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
|
| 2029 |
+
|
| 2030 |
+
GGML_UNUSED(dev);
|
| 2031 |
+
}
|
| 2032 |
+
|
| 2033 |
+
static const ggml_backend_device_i ggml_backend_cann_device_interface = {
|
| 2034 |
+
/* .get_name = */ ggml_backend_cann_device_get_name,
|
| 2035 |
+
/* .get_description = */ ggml_backend_cann_device_get_description,
|
| 2036 |
+
/* .get_memory = */ ggml_backend_cann_device_get_memory,
|
| 2037 |
+
/* .get_type = */ ggml_backend_cann_device_get_type,
|
| 2038 |
+
/* .get_props = */ ggml_backend_cann_device_get_props,
|
| 2039 |
+
/* .init_backend = */ ggml_backend_cann_device_init, // called for every card
|
| 2040 |
+
/* .get_buffer_type = */ ggml_backend_cann_device_get_buffer_type,
|
| 2041 |
+
/* .get_host_buffer_type = */ ggml_backend_cann_device_get_host_buffer_type,
|
| 2042 |
+
/* .buffer_from_host_ptr = */ NULL, // not supported for CANN
|
| 2043 |
+
/* .supports_op = */ ggml_backend_cann_supports_op,
|
| 2044 |
+
/* .supports_buft = */ ggml_backend_cann_supports_buft,
|
| 2045 |
+
/* .offload_op = */ ggml_backend_cann_offload_op,
|
| 2046 |
+
/* .event_new = */ ggml_backend_cann_device_event_new,
|
| 2047 |
+
/* .event_free = */ ggml_backend_cann_device_event_free,
|
| 2048 |
+
/* .event_synchronize = */ ggml_backend_cann_device_event_synchronize,
|
| 2049 |
+
};
|
| 2050 |
+
|
| 2051 |
+
|
| 2052 |
+
// backend reg
|
| 2053 |
+
struct ggml_backend_cann_reg_context {
|
| 2054 |
+
std::vector<ggml_backend_dev_t> devices;
|
| 2055 |
+
};
|
| 2056 |
+
|
| 2057 |
+
static const char * ggml_backend_cann_reg_get_name(ggml_backend_reg_t reg) {
|
| 2058 |
+
GGML_UNUSED(reg);
|
| 2059 |
+
return GGML_CANN_NAME;
|
| 2060 |
+
}
|
| 2061 |
+
|
| 2062 |
+
static size_t ggml_backend_cann_reg_get_device_count(ggml_backend_reg_t reg) {
|
| 2063 |
+
ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
|
| 2064 |
+
return ctx->devices.size();
|
| 2065 |
+
}
|
| 2066 |
+
|
| 2067 |
+
static ggml_backend_dev_t ggml_backend_cann_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
| 2068 |
+
ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
|
| 2069 |
+
GGML_ASSERT(index < ctx->devices.size());
|
| 2070 |
+
return ctx->devices[index];
|
| 2071 |
+
}
|
| 2072 |
+
|
| 2073 |
+
static void * ggml_backend_cann_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
| 2074 |
+
GGML_UNUSED(reg);
|
| 2075 |
+
GGML_UNUSED(name);
|
| 2076 |
+
// reserved for future use
|
| 2077 |
+
return nullptr;
|
| 2078 |
+
}
|
| 2079 |
+
|
| 2080 |
+
static const ggml_backend_reg_i ggml_backend_cann_reg_interface = {
|
| 2081 |
+
/* .get_name = */ ggml_backend_cann_reg_get_name,
|
| 2082 |
+
/* .get_device_count = */ ggml_backend_cann_reg_get_device_count,
|
| 2083 |
+
/* .get_device_get = */ ggml_backend_cann_reg_get_device,
|
| 2084 |
+
/* .get_proc_address = */ ggml_backend_cann_reg_get_proc_address,
|
| 2085 |
+
};
|
| 2086 |
+
|
| 2087 |
+
// backend registry, called only once for cann backend
|
| 2088 |
+
ggml_backend_reg_t ggml_backend_cann_reg() {
|
| 2089 |
+
static ggml_backend_reg reg;
|
| 2090 |
+
static bool initialized = false;
|
| 2091 |
+
|
| 2092 |
+
{
|
| 2093 |
+
static std::mutex mutex;
|
| 2094 |
+
std::lock_guard<std::mutex> lock(mutex);
|
| 2095 |
+
if (!initialized) {
|
| 2096 |
+
aclInit(nullptr);
|
| 2097 |
+
ggml_backend_cann_reg_context * ctx = new ggml_backend_cann_reg_context;
|
| 2098 |
+
|
| 2099 |
+
for (int i = 0; i < ggml_cann_info().device_count; i++) {
|
| 2100 |
+
ggml_backend_cann_device_context* dev_ctx = new ggml_backend_cann_device_context();
|
| 2101 |
+
dev_ctx->description = aclrtGetSocName();
|
| 2102 |
+
dev_ctx->device = i;
|
| 2103 |
+
dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
|
| 2104 |
+
ggml_cann_set_device(i);
|
| 2105 |
+
ggml_backend_dev_t dev = new ggml_backend_device {
|
| 2106 |
+
/* .interface = */ ggml_backend_cann_device_interface,
|
| 2107 |
+
/* .reg = */ ®,
|
| 2108 |
+
/* .context = */ dev_ctx
|
| 2109 |
+
};
|
| 2110 |
+
ctx->devices.push_back(dev);
|
| 2111 |
+
}
|
| 2112 |
+
|
| 2113 |
+
reg = ggml_backend_reg {
|
| 2114 |
+
/* .interface = */ ggml_backend_cann_reg_interface,
|
| 2115 |
+
/* .context = */ ctx
|
| 2116 |
+
};
|
| 2117 |
+
}
|
| 2118 |
+
|
| 2119 |
+
initialized = true;
|
| 2120 |
+
}
|
| 2121 |
+
|
| 2122 |
+
return ®
|
| 2123 |
+
}
|
| 2124 |
+
|
| 2125 |
ggml_backend_t ggml_backend_cann_init(int32_t device) {
|
| 2126 |
aclInit(nullptr);
|
| 2127 |
if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
|
|
|
|
| 2138 |
ggml_backend_t cann_backend =
|
| 2139 |
new ggml_backend{/* .guid = */ ggml_backend_cann_guid(),
|
| 2140 |
/* .interface = */ ggml_backend_cann_interface,
|
| 2141 |
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
|
| 2142 |
/* .context = */ ctx};
|
| 2143 |
|
| 2144 |
return cann_backend;
|