Spaces:
Running
Running
Diego Devesa
commited on
Commit
·
4c5e449
1
Parent(s):
ba7a5f8
ggml : add bilinear upscale support (ggml/1185)
Browse files- ggml/include/ggml.h +12 -7
- ggml/src/ggml-cann/ggml-cann.cpp +3 -0
- ggml/src/ggml-cpu/ops.cpp +63 -15
- ggml/src/ggml-cuda/ggml-cuda.cu +1 -0
- ggml/src/ggml-metal/ggml-metal.m +2 -1
- ggml/src/ggml-sycl/ggml-sycl.cpp +2 -1
- ggml/src/ggml-vulkan/ggml-vulkan.cpp +4 -3
- ggml/src/ggml.c +10 -5
ggml/include/ggml.h
CHANGED
|
@@ -1717,24 +1717,29 @@ extern "C" {
|
|
| 1717 |
float p0,
|
| 1718 |
float p1);
|
| 1719 |
|
| 1720 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1721 |
// multiplies ne0 and ne1 by scale factor
|
| 1722 |
-
// used in stable-diffusion
|
| 1723 |
GGML_API struct ggml_tensor * ggml_upscale(
|
| 1724 |
struct ggml_context * ctx,
|
| 1725 |
struct ggml_tensor * a,
|
| 1726 |
-
int scale_factor
|
|
|
|
| 1727 |
|
| 1728 |
-
//
|
| 1729 |
-
//
|
| 1730 |
-
// used in tortoise.cpp
|
| 1731 |
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
| 1732 |
struct ggml_context * ctx,
|
| 1733 |
struct ggml_tensor * a,
|
| 1734 |
int ne0,
|
| 1735 |
int ne1,
|
| 1736 |
int ne2,
|
| 1737 |
-
int ne3
|
|
|
|
| 1738 |
|
| 1739 |
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
| 1740 |
GGML_API struct ggml_tensor * ggml_pad(
|
|
|
|
| 1717 |
float p0,
|
| 1718 |
float p1);
|
| 1719 |
|
| 1720 |
+
enum ggml_scale_mode {
|
| 1721 |
+
GGML_SCALE_MODE_NEAREST = 0,
|
| 1722 |
+
GGML_SCALE_MODE_BILINEAR = 1,
|
| 1723 |
+
};
|
| 1724 |
+
|
| 1725 |
+
// interpolate
|
| 1726 |
// multiplies ne0 and ne1 by scale factor
|
|
|
|
| 1727 |
GGML_API struct ggml_tensor * ggml_upscale(
|
| 1728 |
struct ggml_context * ctx,
|
| 1729 |
struct ggml_tensor * a,
|
| 1730 |
+
int scale_factor,
|
| 1731 |
+
enum ggml_scale_mode mode);
|
| 1732 |
|
| 1733 |
+
// interpolate
|
| 1734 |
+
// interpolate scale to specified dimensions
|
|
|
|
| 1735 |
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
| 1736 |
struct ggml_context * ctx,
|
| 1737 |
struct ggml_tensor * a,
|
| 1738 |
int ne0,
|
| 1739 |
int ne1,
|
| 1740 |
int ne2,
|
| 1741 |
+
int ne3,
|
| 1742 |
+
enum ggml_scale_mode mode);
|
| 1743 |
|
| 1744 |
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
| 1745 |
GGML_API struct ggml_tensor * ggml_pad(
|
ggml/src/ggml-cann/ggml-cann.cpp
CHANGED
|
@@ -1796,6 +1796,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
| 1796 |
if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
|
| 1797 |
return false;
|
| 1798 |
}
|
|
|
|
|
|
|
|
|
|
| 1799 |
return true;
|
| 1800 |
}
|
| 1801 |
case GGML_OP_POOL_2D: {
|
|
|
|
| 1796 |
if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
|
| 1797 |
return false;
|
| 1798 |
}
|
| 1799 |
+
if (op->op_params[0] != GGML_SCALE_MODE_NEAREST) {
|
| 1800 |
+
return false;
|
| 1801 |
+
}
|
| 1802 |
return true;
|
| 1803 |
}
|
| 1804 |
case GGML_OP_POOL_2D: {
|
ggml/src/ggml-cpu/ops.cpp
CHANGED
|
@@ -6351,24 +6351,72 @@ static void ggml_compute_forward_upscale_f32(
|
|
| 6351 |
const float sf2 = (float)ne2/src0->ne[2];
|
| 6352 |
const float sf3 = (float)ne3/src0->ne[3];
|
| 6353 |
|
| 6354 |
-
|
| 6355 |
-
|
| 6356 |
-
|
| 6357 |
-
|
| 6358 |
-
|
| 6359 |
-
|
| 6360 |
-
|
| 6361 |
-
|
| 6362 |
-
|
| 6363 |
-
|
| 6364 |
-
|
| 6365 |
-
|
| 6366 |
-
|
| 6367 |
-
|
| 6368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6369 |
}
|
| 6370 |
}
|
| 6371 |
}
|
|
|
|
|
|
|
| 6372 |
}
|
| 6373 |
}
|
| 6374 |
|
|
|
|
| 6351 |
const float sf2 = (float)ne2/src0->ne[2];
|
| 6352 |
const float sf3 = (float)ne3/src0->ne[3];
|
| 6353 |
|
| 6354 |
+
const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
|
| 6355 |
+
|
| 6356 |
+
if (mode == GGML_SCALE_MODE_NEAREST) {
|
| 6357 |
+
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 6358 |
+
const int64_t i03 = i3 / sf3;
|
| 6359 |
+
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
|
| 6360 |
+
const int64_t i02 = i2 / sf2;
|
| 6361 |
+
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
| 6362 |
+
const int64_t i01 = i1 / sf1;
|
| 6363 |
+
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
| 6364 |
+
const int64_t i00 = i0 / sf0;
|
| 6365 |
+
|
| 6366 |
+
const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
| 6367 |
+
float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
| 6368 |
+
|
| 6369 |
+
*y = *x;
|
| 6370 |
+
}
|
| 6371 |
+
}
|
| 6372 |
+
}
|
| 6373 |
+
}
|
| 6374 |
+
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
|
| 6375 |
+
// setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
|
| 6376 |
+
const float pixel_offset = 0.5f;
|
| 6377 |
+
|
| 6378 |
+
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 6379 |
+
const int64_t i03 = i3 / sf3;
|
| 6380 |
+
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
|
| 6381 |
+
const int64_t i02 = i2 / sf2;
|
| 6382 |
+
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
| 6383 |
+
const float y = ((float)i1 + pixel_offset) / sf1 - pixel_offset;
|
| 6384 |
+
int64_t y0 = (int64_t)floorf(y);
|
| 6385 |
+
int64_t y1 = y0 + 1;
|
| 6386 |
+
|
| 6387 |
+
y0 = std::max(int64_t(0), std::min(y0, ne01 - 1));
|
| 6388 |
+
y1 = std::max(int64_t(0), std::min(y1, ne01 - 1));
|
| 6389 |
+
|
| 6390 |
+
float dy = y - (float)y0;
|
| 6391 |
+
dy = std::max(0.0f, std::min(dy, 1.0f));
|
| 6392 |
+
|
| 6393 |
+
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
| 6394 |
+
const float x = ((float)i0 + pixel_offset) / sf0 - pixel_offset;
|
| 6395 |
+
int64_t x0 = (int64_t)floorf(x);
|
| 6396 |
+
int64_t x1 = x0 + 1;
|
| 6397 |
+
|
| 6398 |
+
x0 = std::max(int64_t(0), std::min(x0, ne00 - 1));
|
| 6399 |
+
x1 = std::max(int64_t(0), std::min(x1, ne00 - 1));
|
| 6400 |
+
|
| 6401 |
+
float dx = x - (float)x0;
|
| 6402 |
+
dx = std::max(0.0f, std::min(dx, 1.0f));
|
| 6403 |
+
|
| 6404 |
+
// fetch the four surrounding pixel values and interpolate
|
| 6405 |
+
const float a = *(const float *)((const char *)src0->data + x0*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
|
| 6406 |
+
const float b = *(const float *)((const char *)src0->data + x1*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
|
| 6407 |
+
const float c = *(const float *)((const char *)src0->data + x0*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
|
| 6408 |
+
const float d = *(const float *)((const char *)src0->data + x1*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
|
| 6409 |
+
|
| 6410 |
+
const float val = a*(1 - dx)*(1 - dy) + b*dx*(1 - dy) + c*(1 - dx)*dy + d*dx*dy;
|
| 6411 |
+
|
| 6412 |
+
float * y_dst = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
| 6413 |
+
*y_dst = val;
|
| 6414 |
+
}
|
| 6415 |
}
|
| 6416 |
}
|
| 6417 |
}
|
| 6418 |
+
} else {
|
| 6419 |
+
GGML_ABORT("unsupported upscale mode");
|
| 6420 |
}
|
| 6421 |
}
|
| 6422 |
|
ggml/src/ggml-cuda/ggml-cuda.cu
CHANGED
|
@@ -3213,6 +3213,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
|
| 3213 |
case GGML_OP_GROUP_NORM:
|
| 3214 |
return ggml_is_contiguous(op->src[0]);
|
| 3215 |
case GGML_OP_UPSCALE:
|
|
|
|
| 3216 |
case GGML_OP_PAD:
|
| 3217 |
case GGML_OP_ARANGE:
|
| 3218 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
|
|
| 3213 |
case GGML_OP_GROUP_NORM:
|
| 3214 |
return ggml_is_contiguous(op->src[0]);
|
| 3215 |
case GGML_OP_UPSCALE:
|
| 3216 |
+
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
|
| 3217 |
case GGML_OP_PAD:
|
| 3218 |
case GGML_OP_ARANGE:
|
| 3219 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
ggml/src/ggml-metal/ggml-metal.m
CHANGED
|
@@ -1334,8 +1334,9 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
|
|
| 1334 |
return op->src[0]->type == GGML_TYPE_F16;
|
| 1335 |
case GGML_OP_POOL_1D:
|
| 1336 |
return false;
|
| 1337 |
-
case GGML_OP_POOL_2D:
|
| 1338 |
case GGML_OP_UPSCALE:
|
|
|
|
|
|
|
| 1339 |
case GGML_OP_PAD:
|
| 1340 |
case GGML_OP_PAD_REFLECT_1D:
|
| 1341 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
|
|
| 1334 |
return op->src[0]->type == GGML_TYPE_F16;
|
| 1335 |
case GGML_OP_POOL_1D:
|
| 1336 |
return false;
|
|
|
|
| 1337 |
case GGML_OP_UPSCALE:
|
| 1338 |
+
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
|
| 1339 |
+
case GGML_OP_POOL_2D:
|
| 1340 |
case GGML_OP_PAD:
|
| 1341 |
case GGML_OP_PAD_REFLECT_1D:
|
| 1342 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
ggml/src/ggml-sycl/ggml-sycl.cpp
CHANGED
|
@@ -4055,12 +4055,13 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|
| 4055 |
case GGML_OP_IM2COL:
|
| 4056 |
// TODO: add support for the new F32 operations
|
| 4057 |
return op->src[0]->type == GGML_TYPE_F16;
|
|
|
|
|
|
|
| 4058 |
case GGML_OP_POOL_2D:
|
| 4059 |
case GGML_OP_SUM:
|
| 4060 |
case GGML_OP_SUM_ROWS:
|
| 4061 |
case GGML_OP_ARGSORT:
|
| 4062 |
case GGML_OP_ACC:
|
| 4063 |
-
case GGML_OP_UPSCALE:
|
| 4064 |
case GGML_OP_PAD:
|
| 4065 |
case GGML_OP_LEAKY_RELU:
|
| 4066 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
|
|
| 4055 |
case GGML_OP_IM2COL:
|
| 4056 |
// TODO: add support for the new F32 operations
|
| 4057 |
return op->src[0]->type == GGML_TYPE_F16;
|
| 4058 |
+
case GGML_OP_UPSCALE:
|
| 4059 |
+
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
|
| 4060 |
case GGML_OP_POOL_2D:
|
| 4061 |
case GGML_OP_SUM:
|
| 4062 |
case GGML_OP_SUM_ROWS:
|
| 4063 |
case GGML_OP_ARGSORT:
|
| 4064 |
case GGML_OP_ACC:
|
|
|
|
| 4065 |
case GGML_OP_PAD:
|
| 4066 |
case GGML_OP_LEAKY_RELU:
|
| 4067 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
ggml/src/ggml-vulkan/ggml-vulkan.cpp
CHANGED
|
@@ -5743,7 +5743,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
|
| 5743 |
}
|
| 5744 |
return nullptr;
|
| 5745 |
case GGML_OP_UPSCALE:
|
| 5746 |
-
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
| 5747 |
return ctx->device->pipeline_upscale_f32;
|
| 5748 |
}
|
| 5749 |
return nullptr;
|
|
@@ -9398,9 +9398,10 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
|
| 9398 |
case GGML_OP_COS:
|
| 9399 |
case GGML_OP_CLAMP:
|
| 9400 |
return op->src[0]->type == GGML_TYPE_F32;
|
|
|
|
|
|
|
| 9401 |
case GGML_OP_ACC:
|
| 9402 |
case GGML_OP_CONCAT:
|
| 9403 |
-
case GGML_OP_UPSCALE:
|
| 9404 |
case GGML_OP_SCALE:
|
| 9405 |
case GGML_OP_PAD:
|
| 9406 |
case GGML_OP_DIAG_MASK_INF:
|
|
@@ -9768,7 +9769,7 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
|
|
| 9768 |
} else if (tensor->op == GGML_OP_CONCAT) {
|
| 9769 |
tensor_clone = ggml_concat(ggml_ctx, src_clone[0], src_clone[1], *(int *)tensor->op_params);
|
| 9770 |
} else if (tensor->op == GGML_OP_UPSCALE) {
|
| 9771 |
-
tensor_clone = ggml_upscale_ext(ggml_ctx, src_clone[0], tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
|
| 9772 |
} else if (tensor->op == GGML_OP_SCALE) {
|
| 9773 |
const float * params = (const float *)tensor->op_params;
|
| 9774 |
tensor_clone = ggml_scale(ggml_ctx, src_clone[0], params[0]);
|
|
|
|
| 5743 |
}
|
| 5744 |
return nullptr;
|
| 5745 |
case GGML_OP_UPSCALE:
|
| 5746 |
+
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 && dst->op_params[0] == GGML_SCALE_MODE_NEAREST) {
|
| 5747 |
return ctx->device->pipeline_upscale_f32;
|
| 5748 |
}
|
| 5749 |
return nullptr;
|
|
|
|
| 9398 |
case GGML_OP_COS:
|
| 9399 |
case GGML_OP_CLAMP:
|
| 9400 |
return op->src[0]->type == GGML_TYPE_F32;
|
| 9401 |
+
case GGML_OP_UPSCALE:
|
| 9402 |
+
return op->op_params[0] == GGML_SCALE_MODE_NEAREST;
|
| 9403 |
case GGML_OP_ACC:
|
| 9404 |
case GGML_OP_CONCAT:
|
|
|
|
| 9405 |
case GGML_OP_SCALE:
|
| 9406 |
case GGML_OP_PAD:
|
| 9407 |
case GGML_OP_DIAG_MASK_INF:
|
|
|
|
| 9769 |
} else if (tensor->op == GGML_OP_CONCAT) {
|
| 9770 |
tensor_clone = ggml_concat(ggml_ctx, src_clone[0], src_clone[1], *(int *)tensor->op_params);
|
| 9771 |
} else if (tensor->op == GGML_OP_UPSCALE) {
|
| 9772 |
+
tensor_clone = ggml_upscale_ext(ggml_ctx, src_clone[0], tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], tensor->op_params[0], tensor->op_params[1], (ggml_scale_mode) tensor->op_params[0]);
|
| 9773 |
} else if (tensor->op == GGML_OP_SCALE) {
|
| 9774 |
const float * params = (const float *)tensor->op_params;
|
| 9775 |
tensor_clone = ggml_scale(ggml_ctx, src_clone[0], params[0]);
|
ggml/src/ggml.c
CHANGED
|
@@ -4174,7 +4174,8 @@ static struct ggml_tensor * ggml_upscale_impl(
|
|
| 4174 |
int ne0,
|
| 4175 |
int ne1,
|
| 4176 |
int ne2,
|
| 4177 |
-
int ne3
|
|
|
|
| 4178 |
GGML_ASSERT(a->ne[0] <= ne0);
|
| 4179 |
GGML_ASSERT(a->ne[1] <= ne1);
|
| 4180 |
GGML_ASSERT(a->ne[2] <= ne2);
|
|
@@ -4182,6 +4183,8 @@ static struct ggml_tensor * ggml_upscale_impl(
|
|
| 4182 |
|
| 4183 |
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
| 4184 |
|
|
|
|
|
|
|
| 4185 |
result->op = GGML_OP_UPSCALE;
|
| 4186 |
result->src[0] = a;
|
| 4187 |
|
|
@@ -4191,8 +4194,9 @@ static struct ggml_tensor * ggml_upscale_impl(
|
|
| 4191 |
struct ggml_tensor * ggml_upscale(
|
| 4192 |
struct ggml_context * ctx,
|
| 4193 |
struct ggml_tensor * a,
|
| 4194 |
-
int scale_factor
|
| 4195 |
-
|
|
|
|
| 4196 |
}
|
| 4197 |
|
| 4198 |
struct ggml_tensor * ggml_upscale_ext(
|
|
@@ -4201,8 +4205,9 @@ struct ggml_tensor * ggml_upscale_ext(
|
|
| 4201 |
int ne0,
|
| 4202 |
int ne1,
|
| 4203 |
int ne2,
|
| 4204 |
-
int ne3
|
| 4205 |
-
|
|
|
|
| 4206 |
}
|
| 4207 |
|
| 4208 |
// ggml_pad
|
|
|
|
| 4174 |
int ne0,
|
| 4175 |
int ne1,
|
| 4176 |
int ne2,
|
| 4177 |
+
int ne3,
|
| 4178 |
+
enum ggml_scale_mode mode) {
|
| 4179 |
GGML_ASSERT(a->ne[0] <= ne0);
|
| 4180 |
GGML_ASSERT(a->ne[1] <= ne1);
|
| 4181 |
GGML_ASSERT(a->ne[2] <= ne2);
|
|
|
|
| 4183 |
|
| 4184 |
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
| 4185 |
|
| 4186 |
+
ggml_set_op_params_i32(result, 0, mode);
|
| 4187 |
+
|
| 4188 |
result->op = GGML_OP_UPSCALE;
|
| 4189 |
result->src[0] = a;
|
| 4190 |
|
|
|
|
| 4194 |
struct ggml_tensor * ggml_upscale(
|
| 4195 |
struct ggml_context * ctx,
|
| 4196 |
struct ggml_tensor * a,
|
| 4197 |
+
int scale_factor,
|
| 4198 |
+
enum ggml_scale_mode mode) {
|
| 4199 |
+
return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
|
| 4200 |
}
|
| 4201 |
|
| 4202 |
struct ggml_tensor * ggml_upscale_ext(
|
|
|
|
| 4205 |
int ne0,
|
| 4206 |
int ne1,
|
| 4207 |
int ne2,
|
| 4208 |
+
int ne3,
|
| 4209 |
+
enum ggml_scale_mode mode) {
|
| 4210 |
+
return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
|
| 4211 |
}
|
| 4212 |
|
| 4213 |
// ggml_pad
|