leo-pony commited on
Commit
f8d4728
·
1 Parent(s): 9542e42

Adapt to dynamically loadable backends mechanism (llama/9970)

Browse files

* [CANN] Adapt to dynamically loadable backends mechanism

* Fix the Bug: inference running result is garbled in debug running model for LM models who's type is Q4_0 class

* Handle the review comments of this pull request

ggml/include/ggml-cann.h CHANGED
@@ -34,6 +34,8 @@ extern "C" {
34
  */
35
  #define GGML_CANN_MAX_DEVICES 16
36
 
 
 
37
  /**
38
  * @brief Initializes the CANN backend for a specified device.
39
  *
 
34
  */
35
  #define GGML_CANN_MAX_DEVICES 16
36
 
37
+ GGML_API ggml_backend_reg_t ggml_backend_cann_reg(void);
38
+
39
  /**
40
  * @brief Initializes the CANN backend for a specified device.
41
  *
ggml/src/ggml-backend.cpp CHANGED
@@ -561,6 +561,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
561
  # include "ggml-amx.h"
562
  #endif
563
 
 
 
 
 
564
  struct ggml_backend_registry {
565
  std::vector<ggml_backend_reg_t> backends;
566
  std::vector<ggml_backend_dev_t> devices;
@@ -587,8 +591,11 @@ struct ggml_backend_registry {
587
  #ifdef GGML_USE_AMX
588
  register_backend(ggml_backend_amx_reg());
589
  #endif
 
 
 
590
 
591
- // TODO: kompute, cann
592
 
593
  register_backend(ggml_backend_cpu_reg());
594
  }
 
561
  # include "ggml-amx.h"
562
  #endif
563
 
564
+ #ifdef GGML_USE_CANN
565
+ #include "ggml-cann.h"
566
+ #endif
567
+
568
  struct ggml_backend_registry {
569
  std::vector<ggml_backend_reg_t> backends;
570
  std::vector<ggml_backend_dev_t> devices;
 
591
  #ifdef GGML_USE_AMX
592
  register_backend(ggml_backend_amx_reg());
593
  #endif
594
+ #ifdef GGML_USE_CANN
595
+ register_backend(ggml_backend_cann_reg());
596
+ #endif
597
 
598
+ // TODO: kompute
599
 
600
  register_backend(ggml_backend_cpu_reg());
601
  }
ggml/src/ggml-cann.cpp CHANGED
@@ -39,6 +39,8 @@
39
 
40
  #include "ggml-common.h"
41
 
 
 
42
  /**
43
  * @brief Handles CANN errors by printing an error message and aborting.
44
  *
@@ -851,13 +853,6 @@ static void ggml_backend_cann_buffer_set_tensor(
851
  void *transform_buffer = malloc(size);
852
  ggml_backend_cann_transform(tensor, data, transform_buffer);
853
 
854
- #ifndef NDEBUG
855
- void *check_buffer = malloc(size);
856
- ggml_backend_cann_transform_back(tensor, transform_buffer,
857
- check_buffer);
858
- GGML_ASSERT(memcmp(data, check_buffer, size) == 0);
859
- free(check_buffer);
860
- #endif
861
  ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
862
  transform_buffer, size,
863
  ACL_MEMCPY_HOST_TO_DEVICE));
@@ -969,7 +964,7 @@ static void ggml_backend_cann_buffer_clear(
969
  * This structure defines function pointers to operations that can be performed
970
  * on a CANN buffer within the backend.
971
  */
972
- static ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
973
  /* .get_name = */ ggml_backend_cann_buffer_get_name,
974
  /* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
975
  /* .get_base = */ ggml_backend_cann_buffer_get_base,
@@ -1105,19 +1100,25 @@ static size_t ggml_backend_cann_buffer_type_get_alloc_size(
1105
  GGML_UNUSED(buft);
1106
  }
1107
 
 
 
 
 
 
 
1108
  /**
1109
  * @brief Interface for managing CANN buffer types in the GGML backend.
1110
  *
1111
  * Provides function pointers for allocating, querying properties, and managing
1112
  * memory for CANN buffer types in the GGML backend.
1113
  */
1114
- static ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
1115
  /* .get_name = */ ggml_backend_cann_buffer_type_name,
1116
  /* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
1117
  /* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
1118
  /* .get_max_size = */ NULL, // defaults to SIZE_MAX
1119
  /* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
1120
- /* .is_host = */ NULL,
1121
  };
1122
 
1123
  /**
@@ -1148,7 +1149,7 @@ ggml_backend_cann_buffer_type(int32_t device) {
1148
  for (int32_t i = 0; i < GGML_CANN_MAX_DEVICES; i++) {
1149
  ggml_backend_cann_buffer_types[i] = {
1150
  /* .iface = */ ggml_backend_cann_buffer_type_interface,
1151
- /* .device = */ nullptr,
1152
  /* .context = */
1153
  new ggml_backend_cann_buffer_type_context{
1154
  i, "CANN" + std::to_string(i)},
@@ -1264,7 +1265,7 @@ ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
1264
  /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
1265
  /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
1266
  },
1267
- /* .device = */ nullptr,
1268
  /* .context = */ nullptr,
1269
  };
1270
 
@@ -1511,13 +1512,6 @@ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
1511
  void *transform_buffer = malloc(size);
1512
  ggml_backend_cann_transform(tensor, data, transform_buffer);
1513
 
1514
- #ifndef NDEBUG
1515
- void *check_buffer = malloc(size);
1516
- ggml_backend_cann_transform_back(tensor, transform_buffer,
1517
- check_buffer);
1518
- GGML_ASSERT(memcmp(data, check_buffer, size));
1519
- free(check_buffer);
1520
- #endif
1521
  ACL_CHECK(aclrtMemcpyAsync(
1522
  (char *)tensor->data + offset, size, transform_buffer, size,
1523
  ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
@@ -1692,7 +1686,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(
1692
  * @return bool Returns true if the operation is supported by the backend,
1693
  * otherwise false.
1694
  */
1695
- static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
1696
  const ggml_tensor* op) {
1697
  switch (op->op) {
1698
  case GGML_OP_UNARY:
@@ -1783,7 +1777,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
1783
  return false;
1784
  }
1785
 
1786
- GGML_UNUSED(backend);
1787
  }
1788
 
1789
  /**
@@ -1801,31 +1795,6 @@ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) {
1801
  return buft->iface.get_name == ggml_backend_cann_buffer_type_name;
1802
  }
1803
 
1804
- /**
1805
- * @brief Checks if the CANN backend supports a specific backend buffer type.
1806
- *
1807
- * This function determines whether the CANN backend supports the given backend
1808
- * buffer type by comparing the device context of the backend and buffer type.
1809
- * It returns true if the devices are same between the backend context and
1810
- * buffer type context.
1811
- *
1812
- * @param backend Pointer to the CANN backend.
1813
- * @param buft Pointer to the backend buffer type to check.
1814
- * @return bool Returns true if the CANN backend supports the buffer type,
1815
- * otherwise false.
1816
- */
1817
- static bool ggml_backend_cann_supports_buft(
1818
- ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
1819
- if (ggml_backend_buft_is_cann(buft)) {
1820
- ggml_backend_cann_context * cann_ctx =
1821
- (ggml_backend_cann_context *)backend->context;
1822
- ggml_backend_cann_buffer_type_context * buft_ctx =
1823
- (ggml_backend_cann_buffer_type_context *)buft->context;
1824
- return buft_ctx->device == cann_ctx->device;
1825
- }
1826
- return false;
1827
- }
1828
-
1829
  /**
1830
  * @brief Determines if a tensor operation should be offloaded to the CANN
1831
  * backend.
@@ -1840,54 +1809,14 @@ static bool ggml_backend_cann_supports_buft(
1840
  * @return bool Returns true if the operation should be offloaded, otherwise
1841
  * false.
1842
  */
1843
- static bool ggml_backend_cann_offload_op(ggml_backend_t backend,
1844
  const ggml_tensor* op) {
1845
  const int min_batch_size = 32;
1846
- GGML_UNUSED(backend);
1847
 
1848
  return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
1849
  }
1850
 
1851
- /**
1852
- * @brief Creates a new event for the CANN backend.
1853
- *
1854
- * This function initializes a new event for the CANN backend by setting the
1855
- * device and creating an ACL runtime event. The created event is then wrapped
1856
- * in a ggml_backend_event structure and returned.
1857
- *
1858
- * @param backend Pointer to the CANN backend.
1859
- * @return ggml_backend_event_t Returns a pointer to the new event structure.
1860
- */
1861
- static ggml_backend_event_t ggml_backend_cann_event_new(
1862
- ggml_backend_t backend) {
1863
- ggml_backend_cann_context* cann_ctx =
1864
- (ggml_backend_cann_context*)backend->context;
1865
-
1866
- ggml_cann_set_device(cann_ctx->device);
1867
-
1868
- aclrtEvent event;
1869
- ACL_CHECK(aclrtCreateEvent(&event));
1870
-
1871
- return new ggml_backend_event{
1872
- /* .device = */ nullptr,
1873
- /* .context = */ event,
1874
- };
1875
- }
1876
-
1877
- /**
1878
- * @brief Frees a CANN backend event.
1879
- *
1880
- * This function destroys the ACL runtime event associated with the given CANN
1881
- * backend event and then deletes the event structure itself.
1882
- *
1883
- * @param event Pointer to the event structure to be freed.
1884
- */
1885
- static void ggml_backend_cann_event_free(ggml_backend_event_t event) {
1886
- ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
1887
-
1888
- delete event;
1889
- }
1890
-
1891
  /**
1892
  * @brief Records an event on the CANN backend stream.
1893
  *
@@ -1924,17 +1853,6 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
1924
  }
1925
  }
1926
 
1927
- /**
1928
- * @brief Synchronizes the given event on the CANN backend.
1929
- *
1930
- * This function waits for the specified event to complete on the ACL runtime.
1931
- *
1932
- * @param event Pointer to the event structure to be synchronized.
1933
- */
1934
- static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
1935
- ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
1936
- }
1937
-
1938
  /**
1939
  * @brief Structure defining the interface for the CANN backend.
1940
  *
@@ -1942,7 +1860,7 @@ static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
1942
  * supported by the CANN backend, including name retrieval, memory
1943
  * management, tensor operations, synchronization, and event handling.
1944
  */
1945
- static ggml_backend_i ggml_backend_cann_interface = {
1946
  /* .get_name = */ ggml_backend_cann_name,
1947
  /* .free = */ ggml_backend_cann_free,
1948
  /* .get_default_buffer_type = */ ggml_backend_cann_get_default_buffer_type,
@@ -1955,9 +1873,9 @@ static ggml_backend_i ggml_backend_cann_interface = {
1955
  /* .graph_plan_update = */ NULL,
1956
  /* .graph_plan_compute = */ NULL,
1957
  /* .graph_compute = */ ggml_backend_cann_graph_compute,
1958
- /* .supports_op = */ ggml_backend_cann_supports_op,
1959
- /* .supports_buft = */ ggml_backend_cann_supports_buft,
1960
- /* .offload_op = */ ggml_backend_cann_offload_op,
1961
  /* .event_record = */ ggml_backend_cann_event_record,
1962
  /* .event_wait = */ ggml_backend_cann_event_wait,
1963
  };
@@ -1976,6 +1894,234 @@ static ggml_guid_t ggml_backend_cann_guid() {
1976
  return &guid;
1977
  }
1978
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1979
  ggml_backend_t ggml_backend_cann_init(int32_t device) {
1980
  aclInit(nullptr);
1981
  if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
@@ -1992,7 +2138,7 @@ ggml_backend_t ggml_backend_cann_init(int32_t device) {
1992
  ggml_backend_t cann_backend =
1993
  new ggml_backend{/* .guid = */ ggml_backend_cann_guid(),
1994
  /* .interface = */ ggml_backend_cann_interface,
1995
- /* .device = */ nullptr,
1996
  /* .context = */ ctx};
1997
 
1998
  return cann_backend;
 
39
 
40
  #include "ggml-common.h"
41
 
42
+ #define GGML_CANN_NAME "CANN"
43
+
44
  /**
45
  * @brief Handles CANN errors by printing an error message and aborting.
46
  *
 
853
  void *transform_buffer = malloc(size);
854
  ggml_backend_cann_transform(tensor, data, transform_buffer);
855
 
 
 
 
 
 
 
 
856
  ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
857
  transform_buffer, size,
858
  ACL_MEMCPY_HOST_TO_DEVICE));
 
964
  * This structure defines function pointers to operations that can be performed
965
  * on a CANN buffer within the backend.
966
  */
967
+ static const ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
968
  /* .get_name = */ ggml_backend_cann_buffer_get_name,
969
  /* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
970
  /* .get_base = */ ggml_backend_cann_buffer_get_base,
 
1100
  GGML_UNUSED(buft);
1101
  }
1102
 
1103
+ static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
1104
+ return false;
1105
+
1106
+ GGML_UNUSED(buft);
1107
+ }
1108
+
1109
  /**
1110
  * @brief Interface for managing CANN buffer types in the GGML backend.
1111
  *
1112
  * Provides function pointers for allocating, querying properties, and managing
1113
  * memory for CANN buffer types in the GGML backend.
1114
  */
1115
+ static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
1116
  /* .get_name = */ ggml_backend_cann_buffer_type_name,
1117
  /* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
1118
  /* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
1119
  /* .get_max_size = */ NULL, // defaults to SIZE_MAX
1120
  /* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
1121
+ /* .is_host = */ ggml_backend_cann_buffer_type_is_host,
1122
  };
1123
 
1124
  /**
 
1149
  for (int32_t i = 0; i < GGML_CANN_MAX_DEVICES; i++) {
1150
  ggml_backend_cann_buffer_types[i] = {
1151
  /* .iface = */ ggml_backend_cann_buffer_type_interface,
1152
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
1153
  /* .context = */
1154
  new ggml_backend_cann_buffer_type_context{
1155
  i, "CANN" + std::to_string(i)},
 
1265
  /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
1266
  /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
1267
  },
1268
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
1269
  /* .context = */ nullptr,
1270
  };
1271
 
 
1512
  void *transform_buffer = malloc(size);
1513
  ggml_backend_cann_transform(tensor, data, transform_buffer);
1514
 
 
 
 
 
 
 
 
1515
  ACL_CHECK(aclrtMemcpyAsync(
1516
  (char *)tensor->data + offset, size, transform_buffer, size,
1517
  ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
 
1686
  * @return bool Returns true if the operation is supported by the backend,
1687
  * otherwise false.
1688
  */
1689
+ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1690
  const ggml_tensor* op) {
1691
  switch (op->op) {
1692
  case GGML_OP_UNARY:
 
1777
  return false;
1778
  }
1779
 
1780
+ GGML_UNUSED(dev);
1781
  }
1782
 
1783
  /**
 
1795
  return buft->iface.get_name == ggml_backend_cann_buffer_type_name;
1796
  }
1797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1798
  /**
1799
  * @brief Determines if a tensor operation should be offloaded to the CANN
1800
  * backend.
 
1809
  * @return bool Returns true if the operation should be offloaded, otherwise
1810
  * false.
1811
  */
1812
+ static bool ggml_backend_cann_offload_op(ggml_backend_dev_t dev,
1813
  const ggml_tensor* op) {
1814
  const int min_batch_size = 32;
1815
+ GGML_UNUSED(dev);
1816
 
1817
  return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
1818
  }
1819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1820
  /**
1821
  * @brief Records an event on the CANN backend stream.
1822
  *
 
1853
  }
1854
  }
1855
 
 
 
 
 
 
 
 
 
 
 
 
1856
  /**
1857
  * @brief Structure defining the interface for the CANN backend.
1858
  *
 
1860
  * supported by the CANN backend, including name retrieval, memory
1861
  * management, tensor operations, synchronization, and event handling.
1862
  */
1863
+ static const ggml_backend_i ggml_backend_cann_interface = {
1864
  /* .get_name = */ ggml_backend_cann_name,
1865
  /* .free = */ ggml_backend_cann_free,
1866
  /* .get_default_buffer_type = */ ggml_backend_cann_get_default_buffer_type,
 
1873
  /* .graph_plan_update = */ NULL,
1874
  /* .graph_plan_compute = */ NULL,
1875
  /* .graph_compute = */ ggml_backend_cann_graph_compute,
1876
+ /* .supports_op = */ NULL, // moved to device
1877
+ /* .supports_buft = */ NULL, // moved to device
1878
+ /* .offload_op = */ NULL, // moved to device
1879
  /* .event_record = */ ggml_backend_cann_event_record,
1880
  /* .event_wait = */ ggml_backend_cann_event_wait,
1881
  };
 
1894
  return &guid;
1895
  }
1896
 
1897
+ // backend device
1898
+ struct ggml_backend_cann_device_context {
1899
+ int device;
1900
+ std::string name;
1901
+ std::string description;
1902
+ };
1903
+
1904
+ static const char * ggml_backend_cann_device_get_name(ggml_backend_dev_t dev) {
1905
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1906
+ return ctx->name.c_str();
1907
+ }
1908
+
1909
+ static const char* ggml_backend_cann_device_get_description(ggml_backend_dev_t dev) {
1910
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1911
+ return ctx->description.c_str();
1912
+ }
1913
+
1914
+ static void ggml_backend_cann_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
1915
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1916
+ ggml_backend_cann_get_device_memory(ctx->device, free, total);
1917
+ }
1918
+
1919
+ static enum ggml_backend_dev_type ggml_backend_cann_device_get_type(ggml_backend_dev_t dev) {
1920
+ GGML_UNUSED(dev);
1921
+ return GGML_BACKEND_DEVICE_TYPE_GPU_FULL;
1922
+ }
1923
+
1924
+ static void ggml_backend_cann_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
1925
+ props->name = ggml_backend_cann_device_get_name(dev);
1926
+ props->description = ggml_backend_cann_device_get_description(dev);
1927
+ props->type = ggml_backend_cann_device_get_type(dev);
1928
+ ggml_backend_cann_device_get_memory(dev, &props->memory_free, &props->memory_total);
1929
+
1930
+ bool host_buffer = getenv("GGML_CANN_NO_PINNED") == nullptr;
1931
+
1932
+ props->caps = {
1933
+ /* .async = */ false,
1934
+ /* .host_buffer = */ host_buffer,
1935
+ /* .buffer_from_host_ptr = */ false,
1936
+ /* .events = */ true,
1937
+ };
1938
+ }
1939
+
1940
+ static ggml_backend_t ggml_backend_cann_device_init(ggml_backend_dev_t dev, const char * params) {
1941
+ GGML_UNUSED(params);
1942
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1943
+ return ggml_backend_cann_init(ctx->device);
1944
+ }
1945
+
1946
+ /**
1947
+ * @brief Checks if the CANN backend supports a specific backend buffer type.
1948
+ *
1949
+ * This function determines whether the CANN backend supports the given backend
1950
+ * buffer type by comparing the device context of the backend and buffer type.
1951
+ * It returns true if the devices are same between the backend context and
1952
+ * buffer type context.
1953
+ *
1954
+ * @param backend Pointer to the CANN backend.
1955
+ * @param buft Pointer to the backend buffer type to check.
1956
+ * @return bool Returns true if the CANN backend supports the buffer type,
1957
+ * otherwise false.
1958
+ */
1959
+ static bool ggml_backend_cann_supports_buft(
1960
+ ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
1961
+ if (ggml_backend_buft_is_cann(buft)) {
1962
+ ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
1963
+ ggml_backend_cann_buffer_type_context * buft_ctx =
1964
+ (ggml_backend_cann_buffer_type_context *)buft->context;
1965
+ return buft_ctx->device == dev_ctx->device;
1966
+ }
1967
+ return false;
1968
+ }
1969
+
1970
+ static ggml_backend_buffer_type_t ggml_backend_cann_device_get_buffer_type(ggml_backend_dev_t dev) {
1971
+ ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
1972
+ return ggml_backend_cann_buffer_type(ctx->device);
1973
+ }
1974
+
1975
+ static ggml_backend_buffer_type_t ggml_backend_cann_device_get_host_buffer_type(ggml_backend_dev_t dev) {
1976
+ GGML_UNUSED(dev);
1977
+ return ggml_backend_cann_host_buffer_type();
1978
+ }
1979
+
1980
+ /**
1981
+ * @brief Creates a new event for the CANN backend device.
1982
+ *
1983
+ * This function initializes a new event for the CANN backend by setting the
1984
+ * device and creating an ACL runtime event. The created event is then wrapped
1985
+ * in a ggml_backend_event structure and returned.
1986
+ *
1987
+ * @param backend Pointer to the CANN backend.
1988
+ * @return ggml_backend_event_t Returns a pointer to the new event structure.
1989
+ */
1990
+ static ggml_backend_event_t ggml_backend_cann_device_event_new(
1991
+ ggml_backend_dev_t dev) {
1992
+ ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
1993
+
1994
+ ggml_cann_set_device(dev_ctx->device);
1995
+
1996
+ aclrtEvent event;
1997
+ ACL_CHECK(aclrtCreateEvent(&event));
1998
+
1999
+ return new ggml_backend_event{
2000
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), dev_ctx->device),
2001
+ /* .context = */ event,
2002
+ };
2003
+ }
2004
+
2005
+ /**
2006
+ * @brief Frees a CANN backend event.
2007
+ *
2008
+ * This function destroys the ACL runtime event associated with the given CANN
2009
+ * backend event and then deletes the event structure itself.
2010
+ *
2011
+ * @param event Pointer to the event structure to be freed.
2012
+ */
2013
+ static void ggml_backend_cann_device_event_free(ggml_backend_dev_t dev, ggml_backend_event_t event) {
2014
+ ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
2015
+
2016
+ delete event;
2017
+ GGML_UNUSED(dev);
2018
+ }
2019
+
2020
+ /**
2021
+ * @brief Synchronizes the given event on the CANN backend.
2022
+ *
2023
+ * This function waits for the specified event to complete on the ACL runtime.
2024
+ *
2025
+ * @param event Pointer to the event structure to be synchronized.
2026
+ */
2027
+ static void ggml_backend_cann_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) {
2028
+ ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
2029
+
2030
+ GGML_UNUSED(dev);
2031
+ }
2032
+
2033
+ static const ggml_backend_device_i ggml_backend_cann_device_interface = {
2034
+ /* .get_name = */ ggml_backend_cann_device_get_name,
2035
+ /* .get_description = */ ggml_backend_cann_device_get_description,
2036
+ /* .get_memory = */ ggml_backend_cann_device_get_memory,
2037
+ /* .get_type = */ ggml_backend_cann_device_get_type,
2038
+ /* .get_props = */ ggml_backend_cann_device_get_props,
2039
+ /* .init_backend = */ ggml_backend_cann_device_init, // called for every card
2040
+ /* .get_buffer_type = */ ggml_backend_cann_device_get_buffer_type,
2041
+ /* .get_host_buffer_type = */ ggml_backend_cann_device_get_host_buffer_type,
2042
+ /* .buffer_from_host_ptr = */ NULL, // not supported for CANN
2043
+ /* .supports_op = */ ggml_backend_cann_supports_op,
2044
+ /* .supports_buft = */ ggml_backend_cann_supports_buft,
2045
+ /* .offload_op = */ ggml_backend_cann_offload_op,
2046
+ /* .event_new = */ ggml_backend_cann_device_event_new,
2047
+ /* .event_free = */ ggml_backend_cann_device_event_free,
2048
+ /* .event_synchronize = */ ggml_backend_cann_device_event_synchronize,
2049
+ };
2050
+
2051
+
2052
+ // backend reg
2053
+ struct ggml_backend_cann_reg_context {
2054
+ std::vector<ggml_backend_dev_t> devices;
2055
+ };
2056
+
2057
+ static const char * ggml_backend_cann_reg_get_name(ggml_backend_reg_t reg) {
2058
+ GGML_UNUSED(reg);
2059
+ return GGML_CANN_NAME;
2060
+ }
2061
+
2062
+ static size_t ggml_backend_cann_reg_get_device_count(ggml_backend_reg_t reg) {
2063
+ ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
2064
+ return ctx->devices.size();
2065
+ }
2066
+
2067
+ static ggml_backend_dev_t ggml_backend_cann_reg_get_device(ggml_backend_reg_t reg, size_t index) {
2068
+ ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
2069
+ GGML_ASSERT(index < ctx->devices.size());
2070
+ return ctx->devices[index];
2071
+ }
2072
+
2073
+ static void * ggml_backend_cann_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
2074
+ GGML_UNUSED(reg);
2075
+ GGML_UNUSED(name);
2076
+ // reserved for future use
2077
+ return nullptr;
2078
+ }
2079
+
2080
+ static const ggml_backend_reg_i ggml_backend_cann_reg_interface = {
2081
+ /* .get_name = */ ggml_backend_cann_reg_get_name,
2082
+ /* .get_device_count = */ ggml_backend_cann_reg_get_device_count,
2083
+ /* .get_device_get = */ ggml_backend_cann_reg_get_device,
2084
+ /* .get_proc_address = */ ggml_backend_cann_reg_get_proc_address,
2085
+ };
2086
+
2087
+ // backend registry, called only once for cann backend
2088
+ ggml_backend_reg_t ggml_backend_cann_reg() {
2089
+ static ggml_backend_reg reg;
2090
+ static bool initialized = false;
2091
+
2092
+ {
2093
+ static std::mutex mutex;
2094
+ std::lock_guard<std::mutex> lock(mutex);
2095
+ if (!initialized) {
2096
+ aclInit(nullptr);
2097
+ ggml_backend_cann_reg_context * ctx = new ggml_backend_cann_reg_context;
2098
+
2099
+ for (int i = 0; i < ggml_cann_info().device_count; i++) {
2100
+ ggml_backend_cann_device_context* dev_ctx = new ggml_backend_cann_device_context();
2101
+ dev_ctx->description = aclrtGetSocName();
2102
+ dev_ctx->device = i;
2103
+ dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
2104
+ ggml_cann_set_device(i);
2105
+ ggml_backend_dev_t dev = new ggml_backend_device {
2106
+ /* .interface = */ ggml_backend_cann_device_interface,
2107
+ /* .reg = */ &reg,
2108
+ /* .context = */ dev_ctx
2109
+ };
2110
+ ctx->devices.push_back(dev);
2111
+ }
2112
+
2113
+ reg = ggml_backend_reg {
2114
+ /* .interface = */ ggml_backend_cann_reg_interface,
2115
+ /* .context = */ ctx
2116
+ };
2117
+ }
2118
+
2119
+ initialized = true;
2120
+ }
2121
+
2122
+ return &reg;
2123
+ }
2124
+
2125
  ggml_backend_t ggml_backend_cann_init(int32_t device) {
2126
  aclInit(nullptr);
2127
  if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
 
2138
  ggml_backend_t cann_backend =
2139
  new ggml_backend{/* .guid = */ ggml_backend_cann_guid(),
2140
  /* .interface = */ ggml_backend_cann_interface,
2141
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
2142
  /* .context = */ ctx};
2143
 
2144
  return cann_backend;