qnixsynapse commited on
Commit
f083887
·
1 Parent(s): 1c781a8

SYCL: Fix and switch to GGML_LOG system instead of fprintf (llama/10579)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-sycl/ggml-sycl.cpp +51 -39
ggml/src/ggml-sycl/ggml-sycl.cpp CHANGED
@@ -47,7 +47,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
47
 
48
  info.device_count = dpct::dev_mgr::instance().device_count();
49
  if (info.device_count == 0) {
50
- fprintf(stderr, "%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__);
51
  return info;
52
  }
53
 
@@ -55,16 +55,16 @@ static ggml_sycl_device_info ggml_sycl_init() {
55
 
56
  int64_t total_vram = 0;
57
  #if defined(GGML_SYCL_FORCE_MMQ)
58
- fprintf(stderr, "%s: GGML_SYCL_FORCE_MMQ: yes\n", __func__);
59
  #else
60
- fprintf(stderr, "%s: GGML_SYCL_FORCE_MMQ: no\n", __func__);
61
  #endif
62
  #if defined(SYCL_USE_XMX)
63
- fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
64
  #else
65
- fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
66
  #endif
67
- fprintf(stderr, "%s: found %d " GGML_SYCL_NAME " devices:\n", __func__, info.device_count);
68
 
69
  for (int i = 0; i < info.device_count; ++i) {
70
  info.devices[i].vmm = 0;
@@ -110,7 +110,7 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
110
 
111
  auto global_mem_size = prop.get_global_mem_size()/1000000;
112
 
113
- fprintf(stderr, "|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(),
114
  name.c_str(), version.c_str(), prop.get_max_compute_units(),
115
  prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
116
  global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
@@ -120,19 +120,30 @@ void ggml_backend_sycl_print_sycl_devices() {
120
  GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
121
  int device_count = dpct::dev_mgr::instance().device_count();
122
  std::map<std::string, size_t> DeviceNums;
123
- fprintf(stderr, "found %d SYCL devices:\n", device_count);
124
- fprintf(stderr, "| | | | |Max | |Max |Global | |\n");
125
- fprintf(stderr, "| | | | |compute|Max work|sub |mem | |\n");
126
- fprintf(stderr, "|ID| Device Type| Name|Version|units |group |group|size | Driver version|\n");
127
- fprintf(stderr, "|--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------|\n");
 
 
 
 
 
 
 
 
 
 
128
  for (int id = 0; id < device_count; ++id) {
129
- sycl::device device = dpct::dev_mgr::instance().get_device(id);
130
- sycl::backend backend = device.get_backend();
131
- std::string backend_type = get_device_backend_and_type(device);
132
- int type_id=DeviceNums[backend_type]++;
133
- std::stringstream device_type;
134
- device_type << "[" << backend_type << ":" << std::to_string(type_id) << "]";
135
- print_device_detail(id, device, device_type.str());
 
136
  }
137
  }
138
 
@@ -154,15 +165,14 @@ static void ggml_check_sycl() try {
154
  static bool initialized = false;
155
 
156
  if (!initialized) {
157
- fprintf(stderr, "[SYCL] call ggml_check_sycl\n");
158
  g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
159
-
160
- fprintf(stderr, "%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
161
 
162
  #if defined(GGML_SYCL_F16)
163
- fprintf(stderr, "%s: GGML_SYCL_F16: yes\n", __func__);
164
  #else
165
- fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__);
166
  #endif
167
 
168
  /* NOT REMOVE, keep it for next optimize for XMX.
@@ -180,9 +190,10 @@ static void ggml_check_sycl() try {
180
  return;
181
  }
182
  GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
183
- ggml_backend_sycl_print_sycl_devices();
184
  initialized = true;
185
  g_sycl_loaded = true;
 
186
  }
187
  }
188
  catch (sycl::exception const &exc) {
@@ -205,7 +216,7 @@ inline void check_allow_gpu_index(const int device_index) {
205
  __func__,
206
  device_index,
207
  ggml_sycl_info().device_count - 1);
208
- fprintf(stderr, "%s\n", error_buf);
209
  assert(false);
210
  }
211
  }
@@ -475,8 +486,8 @@ ggml_backend_sycl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
475
  SYCL_CHECK(CHECK_TRY_ERROR(dev_ptr = (void *)sycl::malloc_device(
476
  size, *stream)));
477
  if (!dev_ptr) {
478
- fprintf(stderr, "%s: can't malloc %lu Bytes memory on device", __func__, size);
479
- return nullptr;
480
  }
481
  ggml_backend_sycl_buffer_context * ctx = new ggml_backend_sycl_buffer_context(buft_ctx->device, dev_ptr, buft_ctx->stream);
482
  return ggml_backend_buffer_init(buft, ggml_backend_sycl_buffer_interface, ctx, size);
@@ -752,7 +763,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
752
  size, *stream)));
753
  if (!buf) {
754
  char err_buf[1024];
755
- snprintf(err_buf, 1023, "%s: can't malloc %lu Bytes memory on device", __func__, size);
756
  throw std::runtime_error(err_buf);
757
  }
758
  // set padding to 0 to avoid possible NaN values
@@ -1142,17 +1153,18 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
1142
  CHECK_TRY_ERROR(ptr = (void *)sycl::malloc_device(
1143
  look_ahead_size, *qptr)));
1144
  if (!ptr) {
1145
- fprintf(stderr, "%s: can't malloc %lu Bytes memory on device", __func__, look_ahead_size);
1146
  return nullptr;
1147
  }
1148
 
1149
  *actual_size = look_ahead_size;
1150
  pool_size += look_ahead_size;
1151
 
1152
- #ifdef DEBUG_SYCL_MALLOC
1153
- fprintf(stderr, "%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, requested %u MB\n", __func__, id, nnz,
1154
  (uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024));
1155
- #endif
 
1156
  // GGML_SYCL_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr);
1157
  return ptr;
1158
  }
@@ -1166,7 +1178,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
1166
  return;
1167
  }
1168
  }
1169
- fprintf(stderr, "WARNING: sycl buffer pool full, increase MAX_sycl_BUFFERS\n");
1170
  SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, *qptr)));
1171
  pool_size -= size;
1172
  }
@@ -2437,7 +2449,7 @@ static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_te
2437
  break;
2438
  default:
2439
  // TODO: k-quants
2440
- fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
2441
  GGML_ABORT("fatal error");
2442
  break;
2443
  }
@@ -3750,7 +3762,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
3750
  } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
3751
  ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
3752
  } else {
3753
- fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
3754
  ggml_type_name(src0->type), ggml_type_name(src1->type));
3755
  GGML_ABORT("fatal error");
3756
  }
@@ -3825,7 +3837,7 @@ void ggml_sycl_set_main_device(const int main_device) try {
3825
  dpct::device_info prop;
3826
  SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
3827
  prop, dpct::dev_mgr::instance().get_device(main_device))));
3828
- fprintf(stderr, "Using device %d (%s) as main device\n",
3829
  main_device, prop.get_name());
3830
  }
3831
  }
@@ -4172,7 +4184,7 @@ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_
4172
  #endif
4173
  bool ok = ggml_sycl_compute_forward(*sycl_ctx, node);
4174
  if (!ok) {
4175
- fprintf(stderr, "%s: error: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
4176
  }
4177
  GGML_ASSERT(ok);
4178
  }
@@ -4672,7 +4684,7 @@ ggml_backend_t ggml_backend_sycl_init(int device) {
4672
 
4673
  ggml_backend_sycl_context * ctx = new ggml_backend_sycl_context(device);
4674
  if (ctx == nullptr) {
4675
- fprintf(stderr, "%s: error: failed to allocate context\n", __func__);
4676
  return nullptr;
4677
  };
4678
 
 
47
 
48
  info.device_count = dpct::dev_mgr::instance().device_count();
49
  if (info.device_count == 0) {
50
+ GGML_LOG_ERROR("%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__);
51
  return info;
52
  }
53
 
 
55
 
56
  int64_t total_vram = 0;
57
  #if defined(GGML_SYCL_FORCE_MMQ)
58
+ GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: yes\n", __func__);
59
  #else
60
+ GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: no\n", __func__);
61
  #endif
62
  #if defined(SYCL_USE_XMX)
63
+ GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
64
  #else
65
+ GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
66
  #endif
67
+ GGML_LOG_INFO("%s: found %d " GGML_SYCL_NAME " devices:\n", __func__, info.device_count);
68
 
69
  for (int i = 0; i < info.device_count; ++i) {
70
  info.devices[i].vmm = 0;
 
110
 
111
  auto global_mem_size = prop.get_global_mem_size()/1000000;
112
 
113
+ GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(),
114
  name.c_str(), version.c_str(), prop.get_max_compute_units(),
115
  prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
116
  global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
 
120
  GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
121
  int device_count = dpct::dev_mgr::instance().device_count();
122
  std::map<std::string, size_t> DeviceNums;
123
+ GGML_LOG_INFO("Found %d SYCL devices:\n", device_count);
124
+
125
+ GGML_LOG_INFO(
126
+ "| | | | "
127
+ " |Max | |Max |Global | |\n");
128
+ GGML_LOG_INFO(
129
+ "| | | | "
130
+ " |compute|Max work|sub |mem | |\n");
131
+ GGML_LOG_INFO(
132
+ "|ID| Device Type| "
133
+ "Name|Version|units |group |group|size | Driver version|\n");
134
+ GGML_LOG_INFO(
135
+ "|--|-------------------|---------------------------------------|------"
136
+ "-|-------|--------|-----|-------|---------------------|\n");
137
+
138
  for (int id = 0; id < device_count; ++id) {
139
+ sycl::device device = dpct::dev_mgr::instance().get_device(id);
140
+ sycl::backend backend = device.get_backend();
141
+ std::string backend_type = get_device_backend_and_type(device);
142
+ int type_id = DeviceNums[backend_type]++;
143
+ std::stringstream device_type;
144
+ device_type << "[" << backend_type << ":" << std::to_string(type_id)
145
+ << "]";
146
+ print_device_detail(id, device, device_type.str());
147
  }
148
  }
149
 
 
165
  static bool initialized = false;
166
 
167
  if (!initialized) {
168
+ GGML_LOG_INFO("[SYCL] call ggml_check_sycl\n");
169
  g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
170
+ GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
 
171
 
172
  #if defined(GGML_SYCL_F16)
173
+ GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__);
174
  #else
175
+ GGML_LOG_INFO("%s: GGML_SYCL_F16: no\n", __func__);
176
  #endif
177
 
178
  /* NOT REMOVE, keep it for next optimize for XMX.
 
190
  return;
191
  }
192
  GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
193
+
194
  initialized = true;
195
  g_sycl_loaded = true;
196
+ ggml_backend_sycl_print_sycl_devices();
197
  }
198
  }
199
  catch (sycl::exception const &exc) {
 
216
  __func__,
217
  device_index,
218
  ggml_sycl_info().device_count - 1);
219
+ GGML_LOG_ERROR("%s\n", error_buf);
220
  assert(false);
221
  }
222
  }
 
486
  SYCL_CHECK(CHECK_TRY_ERROR(dev_ptr = (void *)sycl::malloc_device(
487
  size, *stream)));
488
  if (!dev_ptr) {
489
+ GGML_LOG_ERROR("%s: can't allocate %lu Bytes of memory on device\n", __func__, size);
490
+ return nullptr;
491
  }
492
  ggml_backend_sycl_buffer_context * ctx = new ggml_backend_sycl_buffer_context(buft_ctx->device, dev_ptr, buft_ctx->stream);
493
  return ggml_backend_buffer_init(buft, ggml_backend_sycl_buffer_interface, ctx, size);
 
763
  size, *stream)));
764
  if (!buf) {
765
  char err_buf[1024];
766
+ snprintf(err_buf, 1023, "%s: can't allocate %lu Bytes of memory on device\n", __func__, size);
767
  throw std::runtime_error(err_buf);
768
  }
769
  // set padding to 0 to avoid possible NaN values
 
1153
  CHECK_TRY_ERROR(ptr = (void *)sycl::malloc_device(
1154
  look_ahead_size, *qptr)));
1155
  if (!ptr) {
1156
+ GGML_LOG_ERROR("%s: can't allocate %lu Bytes of memory on device/GPU\n", __func__, look_ahead_size);
1157
  return nullptr;
1158
  }
1159
 
1160
  *actual_size = look_ahead_size;
1161
  pool_size += look_ahead_size;
1162
 
1163
+ #ifdef DEBUG_SYCL_MALLOC
1164
+ GGML_LOG_DEBUG("%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, requested %u MB\n", __func__, id, nnz,
1165
  (uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024));
1166
+ #endif
1167
+
1168
  // GGML_SYCL_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr);
1169
  return ptr;
1170
  }
 
1178
  return;
1179
  }
1180
  }
1181
+ GGML_LOG_WARN("WARNING: sycl buffer pool full, increase MAX_sycl_BUFFERS\n");
1182
  SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, *qptr)));
1183
  pool_size -= size;
1184
  }
 
2449
  break;
2450
  default:
2451
  // TODO: k-quants
2452
+ GGML_LOG_ERROR("%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
2453
  GGML_ABORT("fatal error");
2454
  break;
2455
  }
 
3762
  } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
3763
  ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
3764
  } else {
3765
+ GGML_LOG_ERROR("%s: unsupported type combination (%s to %s)\n", __func__,
3766
  ggml_type_name(src0->type), ggml_type_name(src1->type));
3767
  GGML_ABORT("fatal error");
3768
  }
 
3837
  dpct::device_info prop;
3838
  SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
3839
  prop, dpct::dev_mgr::instance().get_device(main_device))));
3840
+ GGML_LOG_INFO("Using device %d (%s) as main device\n",
3841
  main_device, prop.get_name());
3842
  }
3843
  }
 
4184
  #endif
4185
  bool ok = ggml_sycl_compute_forward(*sycl_ctx, node);
4186
  if (!ok) {
4187
+ GGML_LOG_ERROR("%s: error: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
4188
  }
4189
  GGML_ASSERT(ok);
4190
  }
 
4684
 
4685
  ggml_backend_sycl_context * ctx = new ggml_backend_sycl_context(device);
4686
  if (ctx == nullptr) {
4687
+ GGML_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
4688
  return nullptr;
4689
  };
4690