Spaces:
Running
Running
Commit
·
f083887
1
Parent(s):
1c781a8
SYCL: Fix and switch to GGML_LOG system instead of fprintf (llama/10579)
Browse files- ggml/src/ggml-sycl/ggml-sycl.cpp +51 -39
ggml/src/ggml-sycl/ggml-sycl.cpp
CHANGED
|
@@ -47,7 +47,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
|
|
| 47 |
|
| 48 |
info.device_count = dpct::dev_mgr::instance().device_count();
|
| 49 |
if (info.device_count == 0) {
|
| 50 |
-
|
| 51 |
return info;
|
| 52 |
}
|
| 53 |
|
|
@@ -55,16 +55,16 @@ static ggml_sycl_device_info ggml_sycl_init() {
|
|
| 55 |
|
| 56 |
int64_t total_vram = 0;
|
| 57 |
#if defined(GGML_SYCL_FORCE_MMQ)
|
| 58 |
-
|
| 59 |
#else
|
| 60 |
-
|
| 61 |
#endif
|
| 62 |
#if defined(SYCL_USE_XMX)
|
| 63 |
-
|
| 64 |
#else
|
| 65 |
-
|
| 66 |
#endif
|
| 67 |
-
|
| 68 |
|
| 69 |
for (int i = 0; i < info.device_count; ++i) {
|
| 70 |
info.devices[i].vmm = 0;
|
|
@@ -110,7 +110,7 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
|
|
| 110 |
|
| 111 |
auto global_mem_size = prop.get_global_mem_size()/1000000;
|
| 112 |
|
| 113 |
-
|
| 114 |
name.c_str(), version.c_str(), prop.get_max_compute_units(),
|
| 115 |
prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
|
| 116 |
global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
|
|
@@ -120,19 +120,30 @@ void ggml_backend_sycl_print_sycl_devices() {
|
|
| 120 |
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
|
| 121 |
int device_count = dpct::dev_mgr::instance().device_count();
|
| 122 |
std::map<std::string, size_t> DeviceNums;
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
for (int id = 0; id < device_count; ++id) {
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
| 136 |
}
|
| 137 |
}
|
| 138 |
|
|
@@ -154,15 +165,14 @@ static void ggml_check_sycl() try {
|
|
| 154 |
static bool initialized = false;
|
| 155 |
|
| 156 |
if (!initialized) {
|
| 157 |
-
|
| 158 |
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
| 159 |
-
|
| 160 |
-
fprintf(stderr, "%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
|
| 161 |
|
| 162 |
#if defined(GGML_SYCL_F16)
|
| 163 |
-
|
| 164 |
#else
|
| 165 |
-
|
| 166 |
#endif
|
| 167 |
|
| 168 |
/* NOT REMOVE, keep it for next optimize for XMX.
|
|
@@ -180,9 +190,10 @@ static void ggml_check_sycl() try {
|
|
| 180 |
return;
|
| 181 |
}
|
| 182 |
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
|
| 183 |
-
|
| 184 |
initialized = true;
|
| 185 |
g_sycl_loaded = true;
|
|
|
|
| 186 |
}
|
| 187 |
}
|
| 188 |
catch (sycl::exception const &exc) {
|
|
@@ -205,7 +216,7 @@ inline void check_allow_gpu_index(const int device_index) {
|
|
| 205 |
__func__,
|
| 206 |
device_index,
|
| 207 |
ggml_sycl_info().device_count - 1);
|
| 208 |
-
|
| 209 |
assert(false);
|
| 210 |
}
|
| 211 |
}
|
|
@@ -475,8 +486,8 @@ ggml_backend_sycl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|
| 475 |
SYCL_CHECK(CHECK_TRY_ERROR(dev_ptr = (void *)sycl::malloc_device(
|
| 476 |
size, *stream)));
|
| 477 |
if (!dev_ptr) {
|
| 478 |
-
|
| 479 |
-
|
| 480 |
}
|
| 481 |
ggml_backend_sycl_buffer_context * ctx = new ggml_backend_sycl_buffer_context(buft_ctx->device, dev_ptr, buft_ctx->stream);
|
| 482 |
return ggml_backend_buffer_init(buft, ggml_backend_sycl_buffer_interface, ctx, size);
|
|
@@ -752,7 +763,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
| 752 |
size, *stream)));
|
| 753 |
if (!buf) {
|
| 754 |
char err_buf[1024];
|
| 755 |
-
snprintf(err_buf, 1023, "%s: can't
|
| 756 |
throw std::runtime_error(err_buf);
|
| 757 |
}
|
| 758 |
// set padding to 0 to avoid possible NaN values
|
|
@@ -1142,17 +1153,18 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
|
|
| 1142 |
CHECK_TRY_ERROR(ptr = (void *)sycl::malloc_device(
|
| 1143 |
look_ahead_size, *qptr)));
|
| 1144 |
if (!ptr) {
|
| 1145 |
-
|
| 1146 |
return nullptr;
|
| 1147 |
}
|
| 1148 |
|
| 1149 |
*actual_size = look_ahead_size;
|
| 1150 |
pool_size += look_ahead_size;
|
| 1151 |
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
(uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024));
|
| 1155 |
-
|
|
|
|
| 1156 |
// GGML_SYCL_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr);
|
| 1157 |
return ptr;
|
| 1158 |
}
|
|
@@ -1166,7 +1178,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
|
|
| 1166 |
return;
|
| 1167 |
}
|
| 1168 |
}
|
| 1169 |
-
|
| 1170 |
SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, *qptr)));
|
| 1171 |
pool_size -= size;
|
| 1172 |
}
|
|
@@ -2437,7 +2449,7 @@ static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_te
|
|
| 2437 |
break;
|
| 2438 |
default:
|
| 2439 |
// TODO: k-quants
|
| 2440 |
-
|
| 2441 |
GGML_ABORT("fatal error");
|
| 2442 |
break;
|
| 2443 |
}
|
|
@@ -3750,7 +3762,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
|
|
| 3750 |
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
|
| 3751 |
ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
|
| 3752 |
} else {
|
| 3753 |
-
|
| 3754 |
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 3755 |
GGML_ABORT("fatal error");
|
| 3756 |
}
|
|
@@ -3825,7 +3837,7 @@ void ggml_sycl_set_main_device(const int main_device) try {
|
|
| 3825 |
dpct::device_info prop;
|
| 3826 |
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
| 3827 |
prop, dpct::dev_mgr::instance().get_device(main_device))));
|
| 3828 |
-
|
| 3829 |
main_device, prop.get_name());
|
| 3830 |
}
|
| 3831 |
}
|
|
@@ -4172,7 +4184,7 @@ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_
|
|
| 4172 |
#endif
|
| 4173 |
bool ok = ggml_sycl_compute_forward(*sycl_ctx, node);
|
| 4174 |
if (!ok) {
|
| 4175 |
-
|
| 4176 |
}
|
| 4177 |
GGML_ASSERT(ok);
|
| 4178 |
}
|
|
@@ -4672,7 +4684,7 @@ ggml_backend_t ggml_backend_sycl_init(int device) {
|
|
| 4672 |
|
| 4673 |
ggml_backend_sycl_context * ctx = new ggml_backend_sycl_context(device);
|
| 4674 |
if (ctx == nullptr) {
|
| 4675 |
-
|
| 4676 |
return nullptr;
|
| 4677 |
};
|
| 4678 |
|
|
|
|
| 47 |
|
| 48 |
info.device_count = dpct::dev_mgr::instance().device_count();
|
| 49 |
if (info.device_count == 0) {
|
| 50 |
+
GGML_LOG_ERROR("%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__);
|
| 51 |
return info;
|
| 52 |
}
|
| 53 |
|
|
|
|
| 55 |
|
| 56 |
int64_t total_vram = 0;
|
| 57 |
#if defined(GGML_SYCL_FORCE_MMQ)
|
| 58 |
+
GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: yes\n", __func__);
|
| 59 |
#else
|
| 60 |
+
GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: no\n", __func__);
|
| 61 |
#endif
|
| 62 |
#if defined(SYCL_USE_XMX)
|
| 63 |
+
GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
|
| 64 |
#else
|
| 65 |
+
GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
|
| 66 |
#endif
|
| 67 |
+
GGML_LOG_INFO("%s: found %d " GGML_SYCL_NAME " devices:\n", __func__, info.device_count);
|
| 68 |
|
| 69 |
for (int i = 0; i < info.device_count; ++i) {
|
| 70 |
info.devices[i].vmm = 0;
|
|
|
|
| 110 |
|
| 111 |
auto global_mem_size = prop.get_global_mem_size()/1000000;
|
| 112 |
|
| 113 |
+
GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(),
|
| 114 |
name.c_str(), version.c_str(), prop.get_max_compute_units(),
|
| 115 |
prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
|
| 116 |
global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
|
|
|
|
| 120 |
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
|
| 121 |
int device_count = dpct::dev_mgr::instance().device_count();
|
| 122 |
std::map<std::string, size_t> DeviceNums;
|
| 123 |
+
GGML_LOG_INFO("Found %d SYCL devices:\n", device_count);
|
| 124 |
+
|
| 125 |
+
GGML_LOG_INFO(
|
| 126 |
+
"| | | | "
|
| 127 |
+
" |Max | |Max |Global | |\n");
|
| 128 |
+
GGML_LOG_INFO(
|
| 129 |
+
"| | | | "
|
| 130 |
+
" |compute|Max work|sub |mem | |\n");
|
| 131 |
+
GGML_LOG_INFO(
|
| 132 |
+
"|ID| Device Type| "
|
| 133 |
+
"Name|Version|units |group |group|size | Driver version|\n");
|
| 134 |
+
GGML_LOG_INFO(
|
| 135 |
+
"|--|-------------------|---------------------------------------|------"
|
| 136 |
+
"-|-------|--------|-----|-------|---------------------|\n");
|
| 137 |
+
|
| 138 |
for (int id = 0; id < device_count; ++id) {
|
| 139 |
+
sycl::device device = dpct::dev_mgr::instance().get_device(id);
|
| 140 |
+
sycl::backend backend = device.get_backend();
|
| 141 |
+
std::string backend_type = get_device_backend_and_type(device);
|
| 142 |
+
int type_id = DeviceNums[backend_type]++;
|
| 143 |
+
std::stringstream device_type;
|
| 144 |
+
device_type << "[" << backend_type << ":" << std::to_string(type_id)
|
| 145 |
+
<< "]";
|
| 146 |
+
print_device_detail(id, device, device_type.str());
|
| 147 |
}
|
| 148 |
}
|
| 149 |
|
|
|
|
| 165 |
static bool initialized = false;
|
| 166 |
|
| 167 |
if (!initialized) {
|
| 168 |
+
GGML_LOG_INFO("[SYCL] call ggml_check_sycl\n");
|
| 169 |
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
| 170 |
+
GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
|
|
|
|
| 171 |
|
| 172 |
#if defined(GGML_SYCL_F16)
|
| 173 |
+
GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__);
|
| 174 |
#else
|
| 175 |
+
GGML_LOG_INFO("%s: GGML_SYCL_F16: no\n", __func__);
|
| 176 |
#endif
|
| 177 |
|
| 178 |
/* NOT REMOVE, keep it for next optimize for XMX.
|
|
|
|
| 190 |
return;
|
| 191 |
}
|
| 192 |
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
|
| 193 |
+
|
| 194 |
initialized = true;
|
| 195 |
g_sycl_loaded = true;
|
| 196 |
+
ggml_backend_sycl_print_sycl_devices();
|
| 197 |
}
|
| 198 |
}
|
| 199 |
catch (sycl::exception const &exc) {
|
|
|
|
| 216 |
__func__,
|
| 217 |
device_index,
|
| 218 |
ggml_sycl_info().device_count - 1);
|
| 219 |
+
GGML_LOG_ERROR("%s\n", error_buf);
|
| 220 |
assert(false);
|
| 221 |
}
|
| 222 |
}
|
|
|
|
| 486 |
SYCL_CHECK(CHECK_TRY_ERROR(dev_ptr = (void *)sycl::malloc_device(
|
| 487 |
size, *stream)));
|
| 488 |
if (!dev_ptr) {
|
| 489 |
+
GGML_LOG_ERROR("%s: can't allocate %lu Bytes of memory on device\n", __func__, size);
|
| 490 |
+
return nullptr;
|
| 491 |
}
|
| 492 |
ggml_backend_sycl_buffer_context * ctx = new ggml_backend_sycl_buffer_context(buft_ctx->device, dev_ptr, buft_ctx->stream);
|
| 493 |
return ggml_backend_buffer_init(buft, ggml_backend_sycl_buffer_interface, ctx, size);
|
|
|
|
| 763 |
size, *stream)));
|
| 764 |
if (!buf) {
|
| 765 |
char err_buf[1024];
|
| 766 |
+
snprintf(err_buf, 1023, "%s: can't allocate %lu Bytes of memory on device\n", __func__, size);
|
| 767 |
throw std::runtime_error(err_buf);
|
| 768 |
}
|
| 769 |
// set padding to 0 to avoid possible NaN values
|
|
|
|
| 1153 |
CHECK_TRY_ERROR(ptr = (void *)sycl::malloc_device(
|
| 1154 |
look_ahead_size, *qptr)));
|
| 1155 |
if (!ptr) {
|
| 1156 |
+
GGML_LOG_ERROR("%s: can't allocate %lu Bytes of memory on device/GPU\n", __func__, look_ahead_size);
|
| 1157 |
return nullptr;
|
| 1158 |
}
|
| 1159 |
|
| 1160 |
*actual_size = look_ahead_size;
|
| 1161 |
pool_size += look_ahead_size;
|
| 1162 |
|
| 1163 |
+
#ifdef DEBUG_SYCL_MALLOC
|
| 1164 |
+
GGML_LOG_DEBUG("%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, requested %u MB\n", __func__, id, nnz,
|
| 1165 |
(uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024));
|
| 1166 |
+
#endif
|
| 1167 |
+
|
| 1168 |
// GGML_SYCL_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr);
|
| 1169 |
return ptr;
|
| 1170 |
}
|
|
|
|
| 1178 |
return;
|
| 1179 |
}
|
| 1180 |
}
|
| 1181 |
+
GGML_LOG_WARN("WARNING: sycl buffer pool full, increase MAX_sycl_BUFFERS\n");
|
| 1182 |
SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, *qptr)));
|
| 1183 |
pool_size -= size;
|
| 1184 |
}
|
|
|
|
| 2449 |
break;
|
| 2450 |
default:
|
| 2451 |
// TODO: k-quants
|
| 2452 |
+
GGML_LOG_ERROR("%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
|
| 2453 |
GGML_ABORT("fatal error");
|
| 2454 |
break;
|
| 2455 |
}
|
|
|
|
| 3762 |
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
|
| 3763 |
ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
|
| 3764 |
} else {
|
| 3765 |
+
GGML_LOG_ERROR("%s: unsupported type combination (%s to %s)\n", __func__,
|
| 3766 |
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 3767 |
GGML_ABORT("fatal error");
|
| 3768 |
}
|
|
|
|
| 3837 |
dpct::device_info prop;
|
| 3838 |
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
| 3839 |
prop, dpct::dev_mgr::instance().get_device(main_device))));
|
| 3840 |
+
GGML_LOG_INFO("Using device %d (%s) as main device\n",
|
| 3841 |
main_device, prop.get_name());
|
| 3842 |
}
|
| 3843 |
}
|
|
|
|
| 4184 |
#endif
|
| 4185 |
bool ok = ggml_sycl_compute_forward(*sycl_ctx, node);
|
| 4186 |
if (!ok) {
|
| 4187 |
+
GGML_LOG_ERROR("%s: error: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
|
| 4188 |
}
|
| 4189 |
GGML_ASSERT(ok);
|
| 4190 |
}
|
|
|
|
| 4684 |
|
| 4685 |
ggml_backend_sycl_context * ctx = new ggml_backend_sycl_context(device);
|
| 4686 |
if (ctx == nullptr) {
|
| 4687 |
+
GGML_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
|
| 4688 |
return nullptr;
|
| 4689 |
};
|
| 4690 |
|