Diego Devesa commited on
Commit
4c5e449
·
1 Parent(s): ba7a5f8

ggml : add bilinear upscale support (ggml/1185)

Browse files
ggml/include/ggml.h CHANGED
@@ -1717,24 +1717,29 @@ extern "C" {
1717
  float p0,
1718
  float p1);
1719
 
1720
- // nearest interpolate
 
 
 
 
 
1721
  // multiplies ne0 and ne1 by scale factor
1722
- // used in stable-diffusion
1723
  GGML_API struct ggml_tensor * ggml_upscale(
1724
  struct ggml_context * ctx,
1725
  struct ggml_tensor * a,
1726
- int scale_factor);
 
1727
 
1728
- // nearest interpolate
1729
- // nearest interpolate to specified dimensions
1730
- // used in tortoise.cpp
1731
  GGML_API struct ggml_tensor * ggml_upscale_ext(
1732
  struct ggml_context * ctx,
1733
  struct ggml_tensor * a,
1734
  int ne0,
1735
  int ne1,
1736
  int ne2,
1737
- int ne3);
 
1738
 
1739
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1740
  GGML_API struct ggml_tensor * ggml_pad(
 
1717
  float p0,
1718
  float p1);
1719
 
1720
+ enum ggml_scale_mode {
1721
+ GGML_SCALE_MODE_NEAREST = 0,
1722
+ GGML_SCALE_MODE_BILINEAR = 1,
1723
+ };
1724
+
1725
+ // interpolate
1726
  // multiplies ne0 and ne1 by scale factor
 
1727
  GGML_API struct ggml_tensor * ggml_upscale(
1728
  struct ggml_context * ctx,
1729
  struct ggml_tensor * a,
1730
+ int scale_factor,
1731
+ enum ggml_scale_mode mode);
1732
 
1733
+ // interpolate
1734
+ // interpolate scale to specified dimensions
 
1735
  GGML_API struct ggml_tensor * ggml_upscale_ext(
1736
  struct ggml_context * ctx,
1737
  struct ggml_tensor * a,
1738
  int ne0,
1739
  int ne1,
1740
  int ne2,
1741
+ int ne3,
1742
+ enum ggml_scale_mode mode);
1743
 
1744
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1745
  GGML_API struct ggml_tensor * ggml_pad(
ggml/src/ggml-cann/ggml-cann.cpp CHANGED
@@ -1796,6 +1796,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1796
  if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
1797
  return false;
1798
  }
 
 
 
1799
  return true;
1800
  }
1801
  case GGML_OP_POOL_2D: {
 
1796
  if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
1797
  return false;
1798
  }
1799
+ if (op->op_params[0] != GGML_SCALE_MODE_NEAREST) {
1800
+ return false;
1801
+ }
1802
  return true;
1803
  }
1804
  case GGML_OP_POOL_2D: {
ggml/src/ggml-cpu/ops.cpp CHANGED
@@ -6351,24 +6351,72 @@ static void ggml_compute_forward_upscale_f32(
6351
  const float sf2 = (float)ne2/src0->ne[2];
6352
  const float sf3 = (float)ne3/src0->ne[3];
6353
 
6354
- // TODO: optimize
6355
-
6356
- for (int64_t i3 = 0; i3 < ne3; i3++) {
6357
- const int64_t i03 = i3 / sf3;
6358
- for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
6359
- const int64_t i02 = i2 / sf2;
6360
- for (int64_t i1 = 0; i1 < ne1; i1++) {
6361
- const int64_t i01 = i1 / sf1;
6362
- for (int64_t i0 = 0; i0 < ne0; i0++) {
6363
- const int64_t i00 = i0 / sf0;
6364
-
6365
- const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
6366
- float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
6367
-
6368
- *y = *x;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6369
  }
6370
  }
6371
  }
 
 
6372
  }
6373
  }
6374
 
 
6351
  const float sf2 = (float)ne2/src0->ne[2];
6352
  const float sf3 = (float)ne3/src0->ne[3];
6353
 
6354
+ const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
6355
+
6356
+ if (mode == GGML_SCALE_MODE_NEAREST) {
6357
+ for (int64_t i3 = 0; i3 < ne3; i3++) {
6358
+ const int64_t i03 = i3 / sf3;
6359
+ for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
6360
+ const int64_t i02 = i2 / sf2;
6361
+ for (int64_t i1 = 0; i1 < ne1; i1++) {
6362
+ const int64_t i01 = i1 / sf1;
6363
+ for (int64_t i0 = 0; i0 < ne0; i0++) {
6364
+ const int64_t i00 = i0 / sf0;
6365
+
6366
+ const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
6367
+ float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
6368
+
6369
+ *y = *x;
6370
+ }
6371
+ }
6372
+ }
6373
+ }
6374
+ } else if (mode == GGML_SCALE_MODE_BILINEAR) {
6375
+ // setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
6376
+ const float pixel_offset = 0.5f;
6377
+
6378
+ for (int64_t i3 = 0; i3 < ne3; i3++) {
6379
+ const int64_t i03 = i3 / sf3;
6380
+ for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
6381
+ const int64_t i02 = i2 / sf2;
6382
+ for (int64_t i1 = 0; i1 < ne1; i1++) {
6383
+ const float y = ((float)i1 + pixel_offset) / sf1 - pixel_offset;
6384
+ int64_t y0 = (int64_t)floorf(y);
6385
+ int64_t y1 = y0 + 1;
6386
+
6387
+ y0 = std::max(int64_t(0), std::min(y0, ne01 - 1));
6388
+ y1 = std::max(int64_t(0), std::min(y1, ne01 - 1));
6389
+
6390
+ float dy = y - (float)y0;
6391
+ dy = std::max(0.0f, std::min(dy, 1.0f));
6392
+
6393
+ for (int64_t i0 = 0; i0 < ne0; i0++) {
6394
+ const float x = ((float)i0 + pixel_offset) / sf0 - pixel_offset;
6395
+ int64_t x0 = (int64_t)floorf(x);
6396
+ int64_t x1 = x0 + 1;
6397
+
6398
+ x0 = std::max(int64_t(0), std::min(x0, ne00 - 1));
6399
+ x1 = std::max(int64_t(0), std::min(x1, ne00 - 1));
6400
+
6401
+ float dx = x - (float)x0;
6402
+ dx = std::max(0.0f, std::min(dx, 1.0f));
6403
+
6404
+ // fetch the four surrounding pixel values and interpolate
6405
+ const float a = *(const float *)((const char *)src0->data + x0*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
6406
+ const float b = *(const float *)((const char *)src0->data + x1*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
6407
+ const float c = *(const float *)((const char *)src0->data + x0*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
6408
+ const float d = *(const float *)((const char *)src0->data + x1*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
6409
+
6410
+ const float val = a*(1 - dx)*(1 - dy) + b*dx*(1 - dy) + c*(1 - dx)*dy + d*dx*dy;
6411
+
6412
+ float * y_dst = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
6413
+ *y_dst = val;
6414
+ }
6415
  }
6416
  }
6417
  }
6418
+ } else {
6419
+ GGML_ABORT("unsupported upscale mode");
6420
  }
6421
  }
6422
 
ggml/src/ggml-cuda/ggml-cuda.cu CHANGED
@@ -3213,6 +3213,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
3213
  case GGML_OP_GROUP_NORM:
3214
  return ggml_is_contiguous(op->src[0]);
3215
  case GGML_OP_UPSCALE:
 
3216
  case GGML_OP_PAD:
3217
  case GGML_OP_ARANGE:
3218
  case GGML_OP_TIMESTEP_EMBEDDING:
 
3213
  case GGML_OP_GROUP_NORM:
3214
  return ggml_is_contiguous(op->src[0]);
3215
  case GGML_OP_UPSCALE:
3216
+ return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
3217
  case GGML_OP_PAD:
3218
  case GGML_OP_ARANGE:
3219
  case GGML_OP_TIMESTEP_EMBEDDING:
ggml/src/ggml-metal/ggml-metal.m CHANGED
@@ -1334,8 +1334,9 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
1334
  return op->src[0]->type == GGML_TYPE_F16;
1335
  case GGML_OP_POOL_1D:
1336
  return false;
1337
- case GGML_OP_POOL_2D:
1338
  case GGML_OP_UPSCALE:
 
 
1339
  case GGML_OP_PAD:
1340
  case GGML_OP_PAD_REFLECT_1D:
1341
  case GGML_OP_TIMESTEP_EMBEDDING:
 
1334
  return op->src[0]->type == GGML_TYPE_F16;
1335
  case GGML_OP_POOL_1D:
1336
  return false;
 
1337
  case GGML_OP_UPSCALE:
1338
+ return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
1339
+ case GGML_OP_POOL_2D:
1340
  case GGML_OP_PAD:
1341
  case GGML_OP_PAD_REFLECT_1D:
1342
  case GGML_OP_TIMESTEP_EMBEDDING:
ggml/src/ggml-sycl/ggml-sycl.cpp CHANGED
@@ -4055,12 +4055,13 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
4055
  case GGML_OP_IM2COL:
4056
  // TODO: add support for the new F32 operations
4057
  return op->src[0]->type == GGML_TYPE_F16;
 
 
4058
  case GGML_OP_POOL_2D:
4059
  case GGML_OP_SUM:
4060
  case GGML_OP_SUM_ROWS:
4061
  case GGML_OP_ARGSORT:
4062
  case GGML_OP_ACC:
4063
- case GGML_OP_UPSCALE:
4064
  case GGML_OP_PAD:
4065
  case GGML_OP_LEAKY_RELU:
4066
  case GGML_OP_TIMESTEP_EMBEDDING:
 
4055
  case GGML_OP_IM2COL:
4056
  // TODO: add support for the new F32 operations
4057
  return op->src[0]->type == GGML_TYPE_F16;
4058
+ case GGML_OP_UPSCALE:
4059
+ return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
4060
  case GGML_OP_POOL_2D:
4061
  case GGML_OP_SUM:
4062
  case GGML_OP_SUM_ROWS:
4063
  case GGML_OP_ARGSORT:
4064
  case GGML_OP_ACC:
 
4065
  case GGML_OP_PAD:
4066
  case GGML_OP_LEAKY_RELU:
4067
  case GGML_OP_TIMESTEP_EMBEDDING:
ggml/src/ggml-vulkan/ggml-vulkan.cpp CHANGED
@@ -5743,7 +5743,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
5743
  }
5744
  return nullptr;
5745
  case GGML_OP_UPSCALE:
5746
- if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
5747
  return ctx->device->pipeline_upscale_f32;
5748
  }
5749
  return nullptr;
@@ -9398,9 +9398,10 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
9398
  case GGML_OP_COS:
9399
  case GGML_OP_CLAMP:
9400
  return op->src[0]->type == GGML_TYPE_F32;
 
 
9401
  case GGML_OP_ACC:
9402
  case GGML_OP_CONCAT:
9403
- case GGML_OP_UPSCALE:
9404
  case GGML_OP_SCALE:
9405
  case GGML_OP_PAD:
9406
  case GGML_OP_DIAG_MASK_INF:
@@ -9768,7 +9769,7 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
9768
  } else if (tensor->op == GGML_OP_CONCAT) {
9769
  tensor_clone = ggml_concat(ggml_ctx, src_clone[0], src_clone[1], *(int *)tensor->op_params);
9770
  } else if (tensor->op == GGML_OP_UPSCALE) {
9771
- tensor_clone = ggml_upscale_ext(ggml_ctx, src_clone[0], tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
9772
  } else if (tensor->op == GGML_OP_SCALE) {
9773
  const float * params = (const float *)tensor->op_params;
9774
  tensor_clone = ggml_scale(ggml_ctx, src_clone[0], params[0]);
 
5743
  }
5744
  return nullptr;
5745
  case GGML_OP_UPSCALE:
5746
+ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 && dst->op_params[0] == GGML_SCALE_MODE_NEAREST) {
5747
  return ctx->device->pipeline_upscale_f32;
5748
  }
5749
  return nullptr;
 
9398
  case GGML_OP_COS:
9399
  case GGML_OP_CLAMP:
9400
  return op->src[0]->type == GGML_TYPE_F32;
9401
+ case GGML_OP_UPSCALE:
9402
+ return op->op_params[0] == GGML_SCALE_MODE_NEAREST;
9403
  case GGML_OP_ACC:
9404
  case GGML_OP_CONCAT:
 
9405
  case GGML_OP_SCALE:
9406
  case GGML_OP_PAD:
9407
  case GGML_OP_DIAG_MASK_INF:
 
9769
  } else if (tensor->op == GGML_OP_CONCAT) {
9770
  tensor_clone = ggml_concat(ggml_ctx, src_clone[0], src_clone[1], *(int *)tensor->op_params);
9771
  } else if (tensor->op == GGML_OP_UPSCALE) {
9772
+ tensor_clone = ggml_upscale_ext(ggml_ctx, src_clone[0], tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], tensor->op_params[0], tensor->op_params[1], (ggml_scale_mode) tensor->op_params[0]);
9773
  } else if (tensor->op == GGML_OP_SCALE) {
9774
  const float * params = (const float *)tensor->op_params;
9775
  tensor_clone = ggml_scale(ggml_ctx, src_clone[0], params[0]);
ggml/src/ggml.c CHANGED
@@ -4174,7 +4174,8 @@ static struct ggml_tensor * ggml_upscale_impl(
4174
  int ne0,
4175
  int ne1,
4176
  int ne2,
4177
- int ne3) {
 
4178
  GGML_ASSERT(a->ne[0] <= ne0);
4179
  GGML_ASSERT(a->ne[1] <= ne1);
4180
  GGML_ASSERT(a->ne[2] <= ne2);
@@ -4182,6 +4183,8 @@ static struct ggml_tensor * ggml_upscale_impl(
4182
 
4183
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4184
 
 
 
4185
  result->op = GGML_OP_UPSCALE;
4186
  result->src[0] = a;
4187
 
@@ -4191,8 +4194,9 @@ static struct ggml_tensor * ggml_upscale_impl(
4191
  struct ggml_tensor * ggml_upscale(
4192
  struct ggml_context * ctx,
4193
  struct ggml_tensor * a,
4194
- int scale_factor) {
4195
- return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
 
4196
  }
4197
 
4198
  struct ggml_tensor * ggml_upscale_ext(
@@ -4201,8 +4205,9 @@ struct ggml_tensor * ggml_upscale_ext(
4201
  int ne0,
4202
  int ne1,
4203
  int ne2,
4204
- int ne3) {
4205
- return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
 
4206
  }
4207
 
4208
  // ggml_pad
 
4174
  int ne0,
4175
  int ne1,
4176
  int ne2,
4177
+ int ne3,
4178
+ enum ggml_scale_mode mode) {
4179
  GGML_ASSERT(a->ne[0] <= ne0);
4180
  GGML_ASSERT(a->ne[1] <= ne1);
4181
  GGML_ASSERT(a->ne[2] <= ne2);
 
4183
 
4184
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4185
 
4186
+ ggml_set_op_params_i32(result, 0, mode);
4187
+
4188
  result->op = GGML_OP_UPSCALE;
4189
  result->src[0] = a;
4190
 
 
4194
  struct ggml_tensor * ggml_upscale(
4195
  struct ggml_context * ctx,
4196
  struct ggml_tensor * a,
4197
+ int scale_factor,
4198
+ enum ggml_scale_mode mode) {
4199
+ return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4200
  }
4201
 
4202
  struct ggml_tensor * ggml_upscale_ext(
 
4205
  int ne0,
4206
  int ne1,
4207
  int ne2,
4208
+ int ne3,
4209
+ enum ggml_scale_mode mode) {
4210
+ return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4211
  }
4212
 
4213
  // ggml_pad