Spaces:
Running
Running
slaren
commited on
Commit
·
9808fbf
1
Parent(s):
dc51517
ggml : reduce hash table reset cost (llama/8698)
Browse files* ggml : reduce hash table reset cost
* fix unreachable code warnings after GGML_ASSERT(false)
* GGML_ASSERT(false) -> GGML_ABORT("fatal error")
* GGML_ABORT use format string
- ggml/include/ggml.h +21 -16
- ggml/src/ggml-alloc.c +18 -24
- ggml/src/ggml-backend.c +110 -104
- ggml/src/ggml-blas.cpp +1 -2
- ggml/src/ggml-cuda.cu +3 -3
- ggml/src/ggml-cuda/argsort.cu +1 -1
- ggml/src/ggml-cuda/binbcast.cu +1 -1
- ggml/src/ggml-cuda/common.cuh +1 -1
- ggml/src/ggml-cuda/cpy.cu +2 -2
- ggml/src/ggml-cuda/dmmv.cu +1 -1
- ggml/src/ggml-cuda/fattn-common.cuh +3 -3
- ggml/src/ggml-cuda/fattn-tile-f16.cu +1 -1
- ggml/src/ggml-cuda/fattn-tile-f32.cu +1 -1
- ggml/src/ggml-cuda/fattn.cu +5 -5
- ggml/src/ggml-cuda/getrows.cu +1 -2
- ggml/src/ggml-cuda/mmq.cu +1 -1
- ggml/src/ggml-cuda/mmq.cuh +2 -2
- ggml/src/ggml-cuda/mmvq.cu +3 -3
- ggml/src/ggml-cuda/quantize.cu +1 -1
- ggml/src/ggml-cuda/rope.cu +2 -2
- ggml/src/ggml-impl.h +108 -8
- ggml/src/ggml-kompute.cpp +4 -4
- ggml/src/ggml-metal.m +21 -21
- ggml/src/ggml-quants.c +6 -6
- ggml/src/ggml-sycl.cpp +8 -8
- ggml/src/ggml-sycl/common.hpp +1 -1
- ggml/src/ggml-sycl/dmmv.cpp +1 -1
- ggml/src/ggml-sycl/dpct/helper.hpp +1 -1
- ggml/src/ggml-sycl/mmq.cpp +11 -11
- ggml/src/ggml-sycl/mmvq.cpp +1 -1
- ggml/src/ggml-sycl/rope.cpp +2 -2
- ggml/src/ggml-vulkan.cpp +29 -29
- ggml/src/ggml.c +412 -414
ggml/include/ggml.h
CHANGED
|
@@ -254,18 +254,8 @@
|
|
| 254 |
|
| 255 |
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
| 256 |
|
| 257 |
-
#define GGML_ASSERT(x) \
|
| 258 |
-
do { \
|
| 259 |
-
if (!(x)) { \
|
| 260 |
-
fflush(stdout); \
|
| 261 |
-
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
|
| 262 |
-
ggml_print_backtrace(); \
|
| 263 |
-
abort(); \
|
| 264 |
-
} \
|
| 265 |
-
} while (0)
|
| 266 |
-
|
| 267 |
#ifndef NDEBUG
|
| 268 |
-
#define GGML_UNREACHABLE()
|
| 269 |
#elif defined(__GNUC__)
|
| 270 |
#define GGML_UNREACHABLE() __builtin_unreachable()
|
| 271 |
#elif defined(_MSC_VER)
|
|
@@ -274,6 +264,17 @@
|
|
| 274 |
#define GGML_UNREACHABLE() ((void) 0)
|
| 275 |
#endif
|
| 276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
| 278 |
// main purpose is to reduce code duplication and improve readability.
|
| 279 |
//
|
|
@@ -322,6 +323,9 @@
|
|
| 322 |
extern "C" {
|
| 323 |
#endif
|
| 324 |
|
|
|
|
|
|
|
|
|
|
| 325 |
enum ggml_status {
|
| 326 |
GGML_STATUS_ALLOC_FAILED = -2,
|
| 327 |
GGML_STATUS_FAILED = -1,
|
|
@@ -636,8 +640,11 @@ extern "C" {
|
|
| 636 |
GGML_CGRAPH_EVAL_ORDER_COUNT
|
| 637 |
};
|
| 638 |
|
|
|
|
|
|
|
| 639 |
struct ggml_hash_set {
|
| 640 |
size_t size;
|
|
|
|
| 641 |
struct ggml_tensor ** keys;
|
| 642 |
};
|
| 643 |
|
|
@@ -651,7 +658,7 @@ extern "C" {
|
|
| 651 |
struct ggml_tensor ** grads;
|
| 652 |
struct ggml_tensor ** leafs;
|
| 653 |
|
| 654 |
-
struct ggml_hash_set
|
| 655 |
|
| 656 |
enum ggml_cgraph_eval_order order;
|
| 657 |
};
|
|
@@ -698,8 +705,6 @@ extern "C" {
|
|
| 698 |
GGML_API int64_t ggml_cycles(void);
|
| 699 |
GGML_API int64_t ggml_cycles_per_ms(void);
|
| 700 |
|
| 701 |
-
GGML_API void ggml_print_backtrace(void);
|
| 702 |
-
|
| 703 |
// accepts a UTF-8 path, even on Windows
|
| 704 |
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
|
| 705 |
|
|
@@ -2005,8 +2010,8 @@ extern "C" {
|
|
| 2005 |
|
| 2006 |
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
| 2007 |
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
| 2008 |
-
GGML_API struct ggml_cplan ggml_graph_plan
|
| 2009 |
-
GGML_API enum ggml_status ggml_graph_compute
|
| 2010 |
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
| 2011 |
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
| 2012 |
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
|
|
|
| 254 |
|
| 255 |
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
#ifndef NDEBUG
|
| 258 |
+
#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
| 259 |
#elif defined(__GNUC__)
|
| 260 |
#define GGML_UNREACHABLE() __builtin_unreachable()
|
| 261 |
#elif defined(_MSC_VER)
|
|
|
|
| 264 |
#define GGML_UNREACHABLE() ((void) 0)
|
| 265 |
#endif
|
| 266 |
|
| 267 |
+
#ifdef __cplusplus
|
| 268 |
+
#define GGML_NORETURN [[noreturn]]
|
| 269 |
+
#elif defined(_MSC_VER)
|
| 270 |
+
#define GGML_NORETURN __declspec(noreturn)
|
| 271 |
+
#else
|
| 272 |
+
#define GGML_NORETURN _Noreturn
|
| 273 |
+
#endif
|
| 274 |
+
|
| 275 |
+
#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
|
| 276 |
+
#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x)
|
| 277 |
+
|
| 278 |
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
| 279 |
// main purpose is to reduce code duplication and improve readability.
|
| 280 |
//
|
|
|
|
| 323 |
extern "C" {
|
| 324 |
#endif
|
| 325 |
|
| 326 |
+
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
| 327 |
+
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
| 328 |
+
|
| 329 |
enum ggml_status {
|
| 330 |
GGML_STATUS_ALLOC_FAILED = -2,
|
| 331 |
GGML_STATUS_FAILED = -1,
|
|
|
|
| 640 |
GGML_CGRAPH_EVAL_ORDER_COUNT
|
| 641 |
};
|
| 642 |
|
| 643 |
+
typedef uint32_t ggml_bitset_t;
|
| 644 |
+
|
| 645 |
struct ggml_hash_set {
|
| 646 |
size_t size;
|
| 647 |
+
ggml_bitset_t * used;
|
| 648 |
struct ggml_tensor ** keys;
|
| 649 |
};
|
| 650 |
|
|
|
|
| 658 |
struct ggml_tensor ** grads;
|
| 659 |
struct ggml_tensor ** leafs;
|
| 660 |
|
| 661 |
+
struct ggml_hash_set visited_hash_set;
|
| 662 |
|
| 663 |
enum ggml_cgraph_eval_order order;
|
| 664 |
};
|
|
|
|
| 705 |
GGML_API int64_t ggml_cycles(void);
|
| 706 |
GGML_API int64_t ggml_cycles_per_ms(void);
|
| 707 |
|
|
|
|
|
|
|
| 708 |
// accepts a UTF-8 path, even on Windows
|
| 709 |
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
|
| 710 |
|
|
|
|
| 2010 |
|
| 2011 |
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
| 2012 |
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
| 2013 |
+
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
|
| 2014 |
+
GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
| 2015 |
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
| 2016 |
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
| 2017 |
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
ggml/src/ggml-alloc.c
CHANGED
|
@@ -91,8 +91,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
|
|
| 91 |
if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) {
|
| 92 |
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",
|
| 93 |
__func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset);
|
| 94 |
-
|
| 95 |
-
return;
|
| 96 |
}
|
| 97 |
|
| 98 |
void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset;
|
|
@@ -133,7 +132,7 @@ static void add_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset,
|
|
| 133 |
return;
|
| 134 |
}
|
| 135 |
}
|
| 136 |
-
|
| 137 |
}
|
| 138 |
static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) {
|
| 139 |
for (int i = 0; i < 1024; i++) {
|
|
@@ -142,8 +141,7 @@ static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offs
|
|
| 142 |
return;
|
| 143 |
}
|
| 144 |
}
|
| 145 |
-
|
| 146 |
-
GGML_ASSERT(!"tensor not found");
|
| 147 |
}
|
| 148 |
#endif
|
| 149 |
|
|
@@ -176,8 +174,7 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
|
|
| 176 |
// this should never happen
|
| 177 |
fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
|
| 178 |
__func__, size, max_avail);
|
| 179 |
-
|
| 180 |
-
GGML_UNREACHABLE();
|
| 181 |
}
|
| 182 |
}
|
| 183 |
|
|
@@ -443,7 +440,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
|
|
| 443 |
}
|
| 444 |
}
|
| 445 |
|
| 446 |
-
|
| 447 |
free(galloc->hash_values);
|
| 448 |
free(galloc->bufts);
|
| 449 |
free(galloc->buffers);
|
|
@@ -456,7 +453,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
|
|
| 456 |
typedef struct ggml_gallocr * ggml_gallocr_t;
|
| 457 |
|
| 458 |
static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) {
|
| 459 |
-
size_t i = ggml_hash_find_or_insert(galloc->hash_set, t);
|
| 460 |
return &galloc->hash_values[i];
|
| 461 |
}
|
| 462 |
|
|
@@ -565,8 +562,8 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {
|
|
| 565 |
|
| 566 |
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
|
| 567 |
// clear hash tables
|
| 568 |
-
|
| 569 |
-
memset(galloc->hash_values,
|
| 570 |
|
| 571 |
// allocate leafs
|
| 572 |
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
|
|
@@ -671,21 +668,19 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
|
|
| 671 |
}
|
| 672 |
|
| 673 |
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
|
| 674 |
-
size_t
|
|
|
|
|
|
|
| 675 |
|
| 676 |
// initialize hash table
|
| 677 |
-
if (galloc->hash_set.size <
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
galloc->hash_set.size = hash_size;
|
| 681 |
-
galloc->hash_set.keys = calloc(hash_size, sizeof(struct ggml_tensor *));
|
| 682 |
-
galloc->hash_values = calloc(hash_size, sizeof(struct hash_node));
|
| 683 |
GGML_ASSERT(galloc->hash_set.keys != NULL);
|
|
|
|
|
|
|
|
|
|
| 684 |
GGML_ASSERT(galloc->hash_values != NULL);
|
| 685 |
-
} else {
|
| 686 |
-
// reset hash table
|
| 687 |
-
memset(galloc->hash_set.keys, 0, sizeof(struct ggml_tensor *) * galloc->hash_set.size);
|
| 688 |
-
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
|
| 689 |
}
|
| 690 |
|
| 691 |
// reset allocators
|
|
@@ -817,8 +812,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
|
|
| 817 |
}
|
| 818 |
|
| 819 |
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
|
| 820 |
-
|
| 821 |
-
size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(buft, node);
|
| 822 |
return talloc->size_max >= node_size;
|
| 823 |
}
|
| 824 |
|
|
|
|
| 91 |
if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) {
|
| 92 |
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",
|
| 93 |
__func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset);
|
| 94 |
+
GGML_ABORT("not enough space in the buffer");
|
|
|
|
| 95 |
}
|
| 96 |
|
| 97 |
void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset;
|
|
|
|
| 132 |
return;
|
| 133 |
}
|
| 134 |
}
|
| 135 |
+
GGML_ABORT("out of allocated_tensors");
|
| 136 |
}
|
| 137 |
static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) {
|
| 138 |
for (int i = 0; i < 1024; i++) {
|
|
|
|
| 141 |
return;
|
| 142 |
}
|
| 143 |
}
|
| 144 |
+
GGML_ABORT("tried to free tensor %s not found\n", tensor->name);
|
|
|
|
| 145 |
}
|
| 146 |
#endif
|
| 147 |
|
|
|
|
| 174 |
// this should never happen
|
| 175 |
fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
|
| 176 |
__func__, size, max_avail);
|
| 177 |
+
GGML_ABORT("not enough space in the buffer");
|
|
|
|
| 178 |
}
|
| 179 |
}
|
| 180 |
|
|
|
|
| 440 |
}
|
| 441 |
}
|
| 442 |
|
| 443 |
+
ggml_hash_set_free(&galloc->hash_set);
|
| 444 |
free(galloc->hash_values);
|
| 445 |
free(galloc->bufts);
|
| 446 |
free(galloc->buffers);
|
|
|
|
| 453 |
typedef struct ggml_gallocr * ggml_gallocr_t;
|
| 454 |
|
| 455 |
static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) {
|
| 456 |
+
size_t i = ggml_hash_find_or_insert(&galloc->hash_set, t);
|
| 457 |
return &galloc->hash_values[i];
|
| 458 |
}
|
| 459 |
|
|
|
|
| 562 |
|
| 563 |
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
|
| 564 |
// clear hash tables
|
| 565 |
+
ggml_hash_set_reset(&galloc->hash_set);
|
| 566 |
+
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
|
| 567 |
|
| 568 |
// allocate leafs
|
| 569 |
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
|
|
|
|
| 668 |
}
|
| 669 |
|
| 670 |
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
|
| 671 |
+
size_t min_hash_size = graph->n_nodes + graph->n_leafs;
|
| 672 |
+
// add 25% margin to avoid hash collisions
|
| 673 |
+
min_hash_size += min_hash_size / 4;
|
| 674 |
|
| 675 |
// initialize hash table
|
| 676 |
+
if (galloc->hash_set.size < min_hash_size) {
|
| 677 |
+
ggml_hash_set_free(&galloc->hash_set);
|
| 678 |
+
galloc->hash_set = ggml_hash_set_new(min_hash_size);
|
|
|
|
|
|
|
|
|
|
| 679 |
GGML_ASSERT(galloc->hash_set.keys != NULL);
|
| 680 |
+
|
| 681 |
+
free(galloc->hash_values);
|
| 682 |
+
galloc->hash_values = malloc(sizeof(struct hash_node) * galloc->hash_set.size);
|
| 683 |
GGML_ASSERT(galloc->hash_values != NULL);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
}
|
| 685 |
|
| 686 |
// reset allocators
|
|
|
|
| 812 |
}
|
| 813 |
|
| 814 |
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
|
| 815 |
+
size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
|
|
|
|
| 816 |
return talloc->size_max >= node_size;
|
| 817 |
}
|
| 818 |
|
ggml/src/ggml-backend.c
CHANGED
|
@@ -1055,11 +1055,10 @@ struct ggml_backend_sched {
|
|
| 1055 |
ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS];
|
| 1056 |
ggml_gallocr_t galloc;
|
| 1057 |
|
| 1058 |
-
// hash
|
| 1059 |
-
struct ggml_hash_set
|
| 1060 |
-
//
|
| 1061 |
-
|
| 1062 |
-
struct ggml_tensor * (* tensor_copies)[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
|
| 1063 |
|
| 1064 |
int * node_backend_ids; // [graph_size]
|
| 1065 |
int * leaf_backend_ids; // [graph_size]
|
|
@@ -1068,7 +1067,7 @@ struct ggml_backend_sched {
|
|
| 1068 |
int * prev_leaf_backend_ids; // [graph_size]
|
| 1069 |
|
| 1070 |
// copy of the graph with modified inputs
|
| 1071 |
-
struct ggml_cgraph
|
| 1072 |
|
| 1073 |
// graph splits
|
| 1074 |
struct ggml_backend_sched_split * splits;
|
|
@@ -1087,19 +1086,16 @@ struct ggml_backend_sched {
|
|
| 1087 |
ggml_backend_sched_eval_callback callback_eval;
|
| 1088 |
void * callback_eval_user_data;
|
| 1089 |
|
| 1090 |
-
|
|
|
|
| 1091 |
|
| 1092 |
-
|
| 1093 |
-
#ifdef _MSC_VER
|
| 1094 |
-
__declspec(align(GGML_MEM_ALIGN))
|
| 1095 |
-
#else
|
| 1096 |
-
__attribute__((aligned(GGML_MEM_ALIGN)))
|
| 1097 |
-
#endif
|
| 1098 |
-
char context_buffer[GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)];
|
| 1099 |
};
|
| 1100 |
|
| 1101 |
-
#define hash_id(tensor) ggml_hash_find_or_insert(sched->hash_set, tensor)
|
| 1102 |
-
#define tensor_backend_id(tensor) sched->
|
|
|
|
|
|
|
| 1103 |
|
| 1104 |
// returns the priority of the backend, lower id is higher priority
|
| 1105 |
static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
|
@@ -1169,7 +1165,6 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
|
| 1169 |
return cur_backend_id;
|
| 1170 |
}
|
| 1171 |
|
| 1172 |
-
// assign nodes that use weights to the backend of the weights
|
| 1173 |
// operations with weights are preferably run on the same backend as the weights
|
| 1174 |
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
| 1175 |
const struct ggml_tensor * src = tensor->src[i];
|
|
@@ -1275,7 +1270,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1275 |
sched->is_reset = false;
|
| 1276 |
|
| 1277 |
struct ggml_init_params params = {
|
| 1278 |
-
/* .mem_size = */
|
| 1279 |
/* .mem_buffer = */ sched->context_buffer,
|
| 1280 |
/* .no_alloc = */ true
|
| 1281 |
};
|
|
@@ -1284,39 +1279,43 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1284 |
|
| 1285 |
sched->ctx = ggml_init(params);
|
| 1286 |
if (sched->ctx == NULL) {
|
| 1287 |
-
|
| 1288 |
-
GGML_ASSERT(false);
|
| 1289 |
}
|
| 1290 |
|
| 1291 |
// pass 1: assign backends to ops with pre-allocated inputs
|
| 1292 |
for (int i = 0; i < graph->n_leafs; i++) {
|
| 1293 |
struct ggml_tensor * leaf = graph->leafs[i];
|
| 1294 |
int * leaf_backend_id = &tensor_backend_id(leaf);
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
}
|
| 1299 |
-
*leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf);
|
| 1300 |
}
|
| 1301 |
|
| 1302 |
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1303 |
struct ggml_tensor * node = graph->nodes[i];
|
| 1304 |
int * node_backend_id = &tensor_backend_id(node);
|
| 1305 |
-
|
| 1306 |
-
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
|
| 1310 |
-
|
| 1311 |
-
|
| 1312 |
-
struct ggml_tensor * src = node->src[j];
|
| 1313 |
-
if (src == NULL) {
|
| 1314 |
continue;
|
| 1315 |
}
|
| 1316 |
-
|
| 1317 |
-
|
| 1318 |
-
*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1319 |
}
|
|
|
|
| 1320 |
}
|
| 1321 |
}
|
| 1322 |
|
|
@@ -1488,12 +1487,13 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1488 |
}
|
| 1489 |
}
|
| 1490 |
|
| 1491 |
-
// pass
|
| 1492 |
{
|
| 1493 |
int i_split = 0;
|
| 1494 |
struct ggml_backend_sched_split * split = &sched->splits[0];
|
| 1495 |
// find the backend of the first split, skipping view ops
|
| 1496 |
-
|
|
|
|
| 1497 |
struct ggml_tensor * node = graph->nodes[i];
|
| 1498 |
if (!ggml_is_view_op(node->op)) {
|
| 1499 |
split->backend_id = tensor_backend_id(node);
|
|
@@ -1502,9 +1502,8 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1502 |
}
|
| 1503 |
split->i_start = 0;
|
| 1504 |
split->n_inputs = 0;
|
| 1505 |
-
memset(split->inputs, 0, sizeof(split->inputs)); //HACK
|
| 1506 |
int cur_backend_id = split->backend_id;
|
| 1507 |
-
for (
|
| 1508 |
struct ggml_tensor * node = graph->nodes[i];
|
| 1509 |
|
| 1510 |
if (ggml_is_view_op(node->op)) {
|
|
@@ -1513,7 +1512,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1513 |
|
| 1514 |
const int node_backend_id = tensor_backend_id(node);
|
| 1515 |
|
| 1516 |
-
|
| 1517 |
|
| 1518 |
// check if we should start a new split based on the sources of the current node
|
| 1519 |
bool need_new_split = false;
|
|
@@ -1527,7 +1526,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1527 |
// by starting a new split, the memory of the previously offloaded weights can be reused
|
| 1528 |
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
|
| 1529 |
int src_backend_id = tensor_backend_id(src);
|
| 1530 |
-
if (src_backend_id !=
|
| 1531 |
need_new_split = true;
|
| 1532 |
break;
|
| 1533 |
}
|
|
@@ -1536,9 +1535,9 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1536 |
// FIXME: count the number of inputs instead of only checking when full
|
| 1537 |
if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) {
|
| 1538 |
const size_t id = hash_id(src);
|
| 1539 |
-
int src_backend_id = sched->
|
| 1540 |
bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id);
|
| 1541 |
-
if (src_backend_id != cur_backend_id &&
|
| 1542 |
//printf("starting new split because of too many inputs: node %s, input %s\n", node->name, src->name);
|
| 1543 |
need_new_split = true;
|
| 1544 |
break;
|
|
@@ -1570,12 +1569,12 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1570 |
continue;
|
| 1571 |
}
|
| 1572 |
|
| 1573 |
-
|
|
|
|
| 1574 |
assert(src_backend_id != -1); // all inputs should be assigned by now
|
| 1575 |
|
| 1576 |
if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
|
| 1577 |
-
|
| 1578 |
-
if (sched->tensor_copies[id][src_backend_id][0] == NULL) {
|
| 1579 |
ggml_backend_t backend = sched->backends[src_backend_id];
|
| 1580 |
for (int c = 0; c < sched->n_copies; c++) {
|
| 1581 |
struct ggml_tensor * tensor_copy;
|
|
@@ -1589,7 +1588,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1589 |
ggml_set_input(tensor_copy);
|
| 1590 |
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
| 1591 |
}
|
| 1592 |
-
|
| 1593 |
SET_CAUSE(tensor_copy, "4.cpy");
|
| 1594 |
}
|
| 1595 |
int n_graph_inputs = sched->n_graph_inputs++;
|
|
@@ -1598,11 +1597,9 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1598 |
}
|
| 1599 |
}
|
| 1600 |
|
| 1601 |
-
|
| 1602 |
-
if (src_backend_id != cur_backend_id && !supported) {
|
| 1603 |
// create a copy of the input in the split's backend
|
| 1604 |
-
|
| 1605 |
-
if (sched->tensor_copies[id][cur_backend_id][0] == NULL) {
|
| 1606 |
ggml_backend_t backend = sched->backends[cur_backend_id];
|
| 1607 |
for (int c = 0; c < sched->n_copies; c++) {
|
| 1608 |
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
|
|
@@ -1611,14 +1608,14 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1611 |
ggml_set_input(tensor_copy);
|
| 1612 |
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
| 1613 |
}
|
| 1614 |
-
|
| 1615 |
SET_CAUSE(tensor_copy, "4.cpy");
|
| 1616 |
}
|
| 1617 |
int n_inputs = split->n_inputs++;
|
| 1618 |
GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
|
| 1619 |
split->inputs[n_inputs] = src;
|
| 1620 |
}
|
| 1621 |
-
node->src[j] =
|
| 1622 |
}
|
| 1623 |
}
|
| 1624 |
}
|
|
@@ -1630,7 +1627,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1630 |
ggml_backend_sched_print_assignments(sched, graph);
|
| 1631 |
}
|
| 1632 |
|
| 1633 |
-
// swap node_backend_ids and
|
| 1634 |
{
|
| 1635 |
int * tmp = sched->node_backend_ids;
|
| 1636 |
sched->node_backend_ids = sched->prev_node_backend_ids;
|
|
@@ -1641,9 +1638,19 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1641 |
sched->prev_leaf_backend_ids = tmp;
|
| 1642 |
}
|
| 1643 |
|
| 1644 |
-
|
| 1645 |
-
|
| 1646 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1647 |
for (int i = 0; i < sched->n_splits; i++) {
|
| 1648 |
struct ggml_backend_sched_split * split = &sched->splits[i];
|
| 1649 |
split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
|
|
@@ -1654,12 +1661,12 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1654 |
|
| 1655 |
struct ggml_tensor * input = split->inputs[j];
|
| 1656 |
const size_t input_id = hash_id(input);
|
| 1657 |
-
struct ggml_tensor * input_cpy =
|
| 1658 |
|
| 1659 |
// add a dependency to the input source so that it is not freed before the copy is done
|
| 1660 |
struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input);
|
| 1661 |
input_dep->src[0] = input;
|
| 1662 |
-
sched->node_backend_ids[graph_copy->n_nodes] = sched->
|
| 1663 |
graph_copy->nodes[graph_copy->n_nodes++] = input_dep;
|
| 1664 |
|
| 1665 |
// add a dependency to the input copy so that it is allocated at the start of the split
|
|
@@ -1681,7 +1688,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1681 |
size_t id = hash_id(input);
|
| 1682 |
int backend_id = tensor_backend_id(input);
|
| 1683 |
for (int c = 0; c < sched->n_copies; c++) {
|
| 1684 |
-
struct ggml_tensor * input_cpy =
|
| 1685 |
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
|
| 1686 |
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
|
| 1687 |
}
|
|
@@ -1694,7 +1701,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1694 |
struct ggml_tensor * input = split->inputs[j];
|
| 1695 |
size_t id = hash_id(input);
|
| 1696 |
for (int c = 0; c < sched->n_copies; c++) {
|
| 1697 |
-
struct ggml_tensor * input_cpy =
|
| 1698 |
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
|
| 1699 |
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
|
| 1700 |
}
|
|
@@ -1708,13 +1715,11 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1708 |
sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf);
|
| 1709 |
graph_copy->leafs[graph_copy->n_leafs++] = leaf;
|
| 1710 |
}
|
| 1711 |
-
|
| 1712 |
-
sched->graph = graph_copy;
|
| 1713 |
}
|
| 1714 |
|
| 1715 |
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
| 1716 |
bool backend_ids_changed = false;
|
| 1717 |
-
for (int i = 0; i < sched->graph
|
| 1718 |
if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] &&
|
| 1719 |
sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) {
|
| 1720 |
backend_ids_changed = true;
|
|
@@ -1722,7 +1727,7 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
|
| 1722 |
}
|
| 1723 |
}
|
| 1724 |
if (!backend_ids_changed) {
|
| 1725 |
-
for (int i = 0; i < sched->graph
|
| 1726 |
if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] &&
|
| 1727 |
sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) {
|
| 1728 |
backend_ids_changed = true;
|
|
@@ -1732,14 +1737,14 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
|
| 1732 |
}
|
| 1733 |
|
| 1734 |
// allocate graph
|
| 1735 |
-
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
|
| 1736 |
// the re-allocation may cause the split inputs to be moved to a different address
|
| 1737 |
ggml_backend_sched_synchronize(sched);
|
| 1738 |
#ifndef NDEBUG
|
| 1739 |
-
fprintf(stderr, "%s: failed to allocate graph, reserving\n", __func__);
|
| 1740 |
#endif
|
| 1741 |
-
ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
|
| 1742 |
-
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
|
| 1743 |
fprintf(stderr, "%s: failed to allocate graph\n", __func__);
|
| 1744 |
return false;
|
| 1745 |
}
|
|
@@ -1760,7 +1765,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|
| 1760 |
for (int j = 0; j < split->n_inputs; j++) {
|
| 1761 |
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]);
|
| 1762 |
struct ggml_tensor * input = split->inputs[j];
|
| 1763 |
-
struct ggml_tensor * input_cpy =
|
| 1764 |
|
| 1765 |
if (input->flags & GGML_TENSOR_FLAG_INPUT) {
|
| 1766 |
// inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
|
|
@@ -1846,21 +1851,23 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
|
| 1846 |
struct ggml_backend_sched * sched = calloc(1, sizeof(struct ggml_backend_sched));
|
| 1847 |
|
| 1848 |
sched->debug = getenv("GGML_SCHED_DEBUG") != NULL;
|
|
|
|
|
|
|
| 1849 |
|
| 1850 |
// initialize hash table
|
| 1851 |
-
|
| 1852 |
-
sched->
|
| 1853 |
-
sched->
|
|
|
|
| 1854 |
|
| 1855 |
const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
|
| 1856 |
-
sched->node_backend_ids
|
| 1857 |
-
sched->leaf_backend_ids
|
| 1858 |
sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
|
| 1859 |
sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
|
| 1860 |
|
| 1861 |
-
sched->
|
| 1862 |
-
|
| 1863 |
-
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
|
| 1864 |
|
| 1865 |
const int initial_splits_capacity = 16;
|
| 1866 |
sched->splits = calloc(initial_splits_capacity, sizeof(sched->splits[0]));
|
|
@@ -1895,37 +1902,37 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
|
|
| 1895 |
}
|
| 1896 |
ggml_gallocr_free(sched->galloc);
|
| 1897 |
ggml_free(sched->ctx);
|
|
|
|
| 1898 |
free(sched->splits);
|
| 1899 |
-
free(sched->
|
| 1900 |
-
free(sched->
|
| 1901 |
-
free(sched->tensor_copies);
|
| 1902 |
free(sched->node_backend_ids);
|
| 1903 |
free(sched->leaf_backend_ids);
|
| 1904 |
free(sched->prev_node_backend_ids);
|
| 1905 |
free(sched->prev_leaf_backend_ids);
|
|
|
|
|
|
|
|
|
|
| 1906 |
free(sched);
|
| 1907 |
}
|
| 1908 |
|
| 1909 |
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
| 1910 |
// reset state for the next run
|
| 1911 |
if (!sched->is_reset) {
|
| 1912 |
-
|
| 1913 |
-
memset(sched->
|
| 1914 |
-
memset(sched->
|
| 1915 |
-
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
|
| 1916 |
-
|
| 1917 |
sched->is_reset = true;
|
| 1918 |
}
|
| 1919 |
sched->is_alloc = false;
|
| 1920 |
}
|
| 1921 |
|
| 1922 |
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
|
| 1923 |
-
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes);
|
| 1924 |
|
| 1925 |
ggml_backend_sched_split_graph(sched, measure_graph);
|
| 1926 |
|
| 1927 |
-
|
| 1928 |
-
if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
|
| 1929 |
return false;
|
| 1930 |
}
|
| 1931 |
|
|
@@ -1936,10 +1943,11 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
|
|
| 1936 |
}
|
| 1937 |
|
| 1938 |
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
| 1939 |
-
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes);
|
| 1940 |
|
| 1941 |
ggml_backend_sched_split_graph(sched, graph);
|
| 1942 |
|
|
|
|
| 1943 |
if (!ggml_backend_sched_alloc_splits(sched)) {
|
| 1944 |
return false;
|
| 1945 |
}
|
|
@@ -2009,6 +2017,7 @@ void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct gg
|
|
| 2009 |
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
| 2010 |
tensor_backend_id(node) = backend_index;
|
| 2011 |
SET_CAUSE(node, "usr");
|
|
|
|
| 2012 |
}
|
| 2013 |
|
| 2014 |
ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
|
|
@@ -2051,9 +2060,9 @@ static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set,
|
|
| 2051 |
GGML_ASSERT(src != NULL);
|
| 2052 |
GGML_ASSERT(src->data && "graph must be allocated");
|
| 2053 |
|
| 2054 |
-
size_t id = ggml_hash_insert(hash_set, src);
|
| 2055 |
-
if (id ==
|
| 2056 |
-
return node_copies[ggml_hash_find(hash_set, src)];
|
| 2057 |
}
|
| 2058 |
|
| 2059 |
struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src);
|
|
@@ -2078,7 +2087,7 @@ static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set,
|
|
| 2078 |
return dst;
|
| 2079 |
}
|
| 2080 |
|
| 2081 |
-
static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) {
|
| 2082 |
size_t id = ggml_hash_find(hash_set, src);
|
| 2083 |
if (node_init[id]) {
|
| 2084 |
return;
|
|
@@ -2105,10 +2114,7 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te
|
|
| 2105 |
}
|
| 2106 |
|
| 2107 |
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
|
| 2108 |
-
struct ggml_hash_set hash_set =
|
| 2109 |
-
/* .size = */ graph->visited_hash_table.size,
|
| 2110 |
-
/* .keys = */ calloc(graph->visited_hash_table.size, sizeof(hash_set.keys[0])) // NOLINT
|
| 2111 |
-
};
|
| 2112 |
struct ggml_tensor ** node_copies = calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT
|
| 2113 |
bool * node_init = calloc(hash_set.size, sizeof(node_init[0]));
|
| 2114 |
|
|
@@ -2123,7 +2129,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
|
|
| 2123 |
|
| 2124 |
if (ctx_allocated == NULL || ctx_unallocated == NULL) {
|
| 2125 |
fprintf(stderr, "failed to allocate context for graph copy\n");
|
| 2126 |
-
|
| 2127 |
free(node_copies);
|
| 2128 |
free(node_init);
|
| 2129 |
ggml_free(ctx_allocated);
|
|
@@ -2146,7 +2152,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
|
|
| 2146 |
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend);
|
| 2147 |
if (buffer == NULL) {
|
| 2148 |
fprintf(stderr, "failed to allocate buffer for graph copy\n");
|
| 2149 |
-
|
| 2150 |
free(node_copies);
|
| 2151 |
free(node_init);
|
| 2152 |
ggml_free(ctx_allocated);
|
|
@@ -2164,19 +2170,19 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
|
|
| 2164 |
// copy data and init views
|
| 2165 |
for (int i = 0; i < graph->n_nodes; i++) {
|
| 2166 |
struct ggml_tensor * node = graph->nodes[i];
|
| 2167 |
-
graph_copy_init_tensor(hash_set, node_copies, node_init, node);
|
| 2168 |
}
|
| 2169 |
|
| 2170 |
// build graph copy
|
| 2171 |
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false);
|
| 2172 |
for (int i = 0; i < graph->n_nodes; i++) {
|
| 2173 |
struct ggml_tensor * node = graph->nodes[i];
|
| 2174 |
-
struct ggml_tensor * node_copy = node_copies[ggml_hash_find(hash_set, node)];
|
| 2175 |
graph_copy->nodes[i] = node_copy;
|
| 2176 |
}
|
| 2177 |
graph_copy->n_nodes = graph->n_nodes;
|
| 2178 |
|
| 2179 |
-
|
| 2180 |
free(node_copies);
|
| 2181 |
free(node_init);
|
| 2182 |
|
|
|
|
| 1055 |
ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS];
|
| 1056 |
ggml_gallocr_t galloc;
|
| 1057 |
|
| 1058 |
+
// hash map of the nodes in the graph
|
| 1059 |
+
struct ggml_hash_set hash_set;
|
| 1060 |
+
int * hv_tensor_backend_ids; // [hash_set.size]
|
| 1061 |
+
struct ggml_tensor ** hv_tensor_copies; // [hash_set.size][n_backends][n_copies]
|
|
|
|
| 1062 |
|
| 1063 |
int * node_backend_ids; // [graph_size]
|
| 1064 |
int * leaf_backend_ids; // [graph_size]
|
|
|
|
| 1067 |
int * prev_leaf_backend_ids; // [graph_size]
|
| 1068 |
|
| 1069 |
// copy of the graph with modified inputs
|
| 1070 |
+
struct ggml_cgraph graph;
|
| 1071 |
|
| 1072 |
// graph splits
|
| 1073 |
struct ggml_backend_sched_split * splits;
|
|
|
|
| 1086 |
ggml_backend_sched_eval_callback callback_eval;
|
| 1087 |
void * callback_eval_user_data;
|
| 1088 |
|
| 1089 |
+
char * context_buffer;
|
| 1090 |
+
size_t context_buffer_size;
|
| 1091 |
|
| 1092 |
+
bool debug;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1093 |
};
|
| 1094 |
|
| 1095 |
+
#define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
|
| 1096 |
+
#define tensor_backend_id(tensor) sched->hv_tensor_backend_ids[hash_id(tensor)]
|
| 1097 |
+
#define tensor_id_copy(id, backend_id, copy_id) sched->hv_tensor_copies[(id) * sched->n_backends * sched->n_copies + (backend_id) * sched->n_copies + (copy_id)]
|
| 1098 |
+
#define tensor_copy(tensor, backend_id, copy_id) tensor_id_copy(hash_id(tensor), backend_id, copy_id)
|
| 1099 |
|
| 1100 |
// returns the priority of the backend, lower id is higher priority
|
| 1101 |
static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
|
|
|
| 1165 |
return cur_backend_id;
|
| 1166 |
}
|
| 1167 |
|
|
|
|
| 1168 |
// operations with weights are preferably run on the same backend as the weights
|
| 1169 |
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
| 1170 |
const struct ggml_tensor * src = tensor->src[i];
|
|
|
|
| 1270 |
sched->is_reset = false;
|
| 1271 |
|
| 1272 |
struct ggml_init_params params = {
|
| 1273 |
+
/* .mem_size = */ sched->context_buffer_size,
|
| 1274 |
/* .mem_buffer = */ sched->context_buffer,
|
| 1275 |
/* .no_alloc = */ true
|
| 1276 |
};
|
|
|
|
| 1279 |
|
| 1280 |
sched->ctx = ggml_init(params);
|
| 1281 |
if (sched->ctx == NULL) {
|
| 1282 |
+
GGML_ABORT("%s: failed to initialize context\n", __func__);
|
|
|
|
| 1283 |
}
|
| 1284 |
|
| 1285 |
// pass 1: assign backends to ops with pre-allocated inputs
|
| 1286 |
for (int i = 0; i < graph->n_leafs; i++) {
|
| 1287 |
struct ggml_tensor * leaf = graph->leafs[i];
|
| 1288 |
int * leaf_backend_id = &tensor_backend_id(leaf);
|
| 1289 |
+
// do not overwrite user assignments
|
| 1290 |
+
if (*leaf_backend_id == -1) {
|
| 1291 |
+
*leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf);
|
| 1292 |
}
|
|
|
|
| 1293 |
}
|
| 1294 |
|
| 1295 |
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1296 |
struct ggml_tensor * node = graph->nodes[i];
|
| 1297 |
int * node_backend_id = &tensor_backend_id(node);
|
| 1298 |
+
// do not overwrite user assignments
|
| 1299 |
+
if (*node_backend_id == -1) {
|
| 1300 |
+
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
|
| 1301 |
+
|
| 1302 |
+
#if 0
|
| 1303 |
+
// src
|
| 1304 |
+
if (node->op == GGML_OP_NONE) {
|
|
|
|
|
|
|
| 1305 |
continue;
|
| 1306 |
}
|
| 1307 |
+
|
| 1308 |
+
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 1309 |
+
struct ggml_tensor * src = node->src[j];
|
| 1310 |
+
if (src == NULL) {
|
| 1311 |
+
continue;
|
| 1312 |
+
}
|
| 1313 |
+
int * src_backend_id = &tensor_backend_id(src);
|
| 1314 |
+
if (*src_backend_id == -1) {
|
| 1315 |
+
*src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src);
|
| 1316 |
+
}
|
| 1317 |
}
|
| 1318 |
+
#endif
|
| 1319 |
}
|
| 1320 |
}
|
| 1321 |
|
|
|
|
| 1487 |
}
|
| 1488 |
}
|
| 1489 |
|
| 1490 |
+
// pass 5: split graph, find tensors that need to be copied
|
| 1491 |
{
|
| 1492 |
int i_split = 0;
|
| 1493 |
struct ggml_backend_sched_split * split = &sched->splits[0];
|
| 1494 |
// find the backend of the first split, skipping view ops
|
| 1495 |
+
int i = 0;
|
| 1496 |
+
for (; i < graph->n_nodes; i++) {
|
| 1497 |
struct ggml_tensor * node = graph->nodes[i];
|
| 1498 |
if (!ggml_is_view_op(node->op)) {
|
| 1499 |
split->backend_id = tensor_backend_id(node);
|
|
|
|
| 1502 |
}
|
| 1503 |
split->i_start = 0;
|
| 1504 |
split->n_inputs = 0;
|
|
|
|
| 1505 |
int cur_backend_id = split->backend_id;
|
| 1506 |
+
for (; i < graph->n_nodes; i++) {
|
| 1507 |
struct ggml_tensor * node = graph->nodes[i];
|
| 1508 |
|
| 1509 |
if (ggml_is_view_op(node->op)) {
|
|
|
|
| 1512 |
|
| 1513 |
const int node_backend_id = tensor_backend_id(node);
|
| 1514 |
|
| 1515 |
+
assert(node_backend_id != -1); // all nodes should be assigned by now
|
| 1516 |
|
| 1517 |
// check if we should start a new split based on the sources of the current node
|
| 1518 |
bool need_new_split = false;
|
|
|
|
| 1526 |
// by starting a new split, the memory of the previously offloaded weights can be reused
|
| 1527 |
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
|
| 1528 |
int src_backend_id = tensor_backend_id(src);
|
| 1529 |
+
if (src_backend_id != cur_backend_id) {
|
| 1530 |
need_new_split = true;
|
| 1531 |
break;
|
| 1532 |
}
|
|
|
|
| 1535 |
// FIXME: count the number of inputs instead of only checking when full
|
| 1536 |
if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) {
|
| 1537 |
const size_t id = hash_id(src);
|
| 1538 |
+
int src_backend_id = sched->hv_tensor_backend_ids[id];
|
| 1539 |
bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id);
|
| 1540 |
+
if (src_backend_id != cur_backend_id && tensor_id_copy(id, cur_backend_id, 0) == NULL && !supported) {
|
| 1541 |
//printf("starting new split because of too many inputs: node %s, input %s\n", node->name, src->name);
|
| 1542 |
need_new_split = true;
|
| 1543 |
break;
|
|
|
|
| 1569 |
continue;
|
| 1570 |
}
|
| 1571 |
|
| 1572 |
+
size_t src_id = hash_id(src);
|
| 1573 |
+
const int src_backend_id = sched->hv_tensor_backend_ids[src_id];
|
| 1574 |
assert(src_backend_id != -1); // all inputs should be assigned by now
|
| 1575 |
|
| 1576 |
if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
|
| 1577 |
+
if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) {
|
|
|
|
| 1578 |
ggml_backend_t backend = sched->backends[src_backend_id];
|
| 1579 |
for (int c = 0; c < sched->n_copies; c++) {
|
| 1580 |
struct ggml_tensor * tensor_copy;
|
|
|
|
| 1588 |
ggml_set_input(tensor_copy);
|
| 1589 |
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
| 1590 |
}
|
| 1591 |
+
tensor_id_copy(src_id, src_backend_id, c) = tensor_copy;
|
| 1592 |
SET_CAUSE(tensor_copy, "4.cpy");
|
| 1593 |
}
|
| 1594 |
int n_graph_inputs = sched->n_graph_inputs++;
|
|
|
|
| 1597 |
}
|
| 1598 |
}
|
| 1599 |
|
| 1600 |
+
if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) {
|
|
|
|
| 1601 |
// create a copy of the input in the split's backend
|
| 1602 |
+
if (tensor_id_copy(src_id, cur_backend_id, 0) == NULL) {
|
|
|
|
| 1603 |
ggml_backend_t backend = sched->backends[cur_backend_id];
|
| 1604 |
for (int c = 0; c < sched->n_copies; c++) {
|
| 1605 |
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
|
|
|
|
| 1608 |
ggml_set_input(tensor_copy);
|
| 1609 |
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
| 1610 |
}
|
| 1611 |
+
tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy;
|
| 1612 |
SET_CAUSE(tensor_copy, "4.cpy");
|
| 1613 |
}
|
| 1614 |
int n_inputs = split->n_inputs++;
|
| 1615 |
GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
|
| 1616 |
split->inputs[n_inputs] = src;
|
| 1617 |
}
|
| 1618 |
+
node->src[j] = tensor_id_copy(src_id, cur_backend_id, sched->cur_copy);
|
| 1619 |
}
|
| 1620 |
}
|
| 1621 |
}
|
|
|
|
| 1627 |
ggml_backend_sched_print_assignments(sched, graph);
|
| 1628 |
}
|
| 1629 |
|
| 1630 |
+
// swap node_backend_ids and leaf _backend_ids with prevs
|
| 1631 |
{
|
| 1632 |
int * tmp = sched->node_backend_ids;
|
| 1633 |
sched->node_backend_ids = sched->prev_node_backend_ids;
|
|
|
|
| 1638 |
sched->prev_leaf_backend_ids = tmp;
|
| 1639 |
}
|
| 1640 |
|
| 1641 |
+
int graph_size = graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2;
|
| 1642 |
+
if (sched->graph.size < graph_size) {
|
| 1643 |
+
sched->graph.size = graph_size;
|
| 1644 |
+
sched->graph.nodes = realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
|
| 1645 |
+
sched->graph.leafs = realloc(sched->graph.leafs, graph_size * sizeof(struct ggml_tensor *));
|
| 1646 |
+
GGML_ASSERT(sched->graph.nodes != NULL);
|
| 1647 |
+
GGML_ASSERT(sched->graph.leafs != NULL);
|
| 1648 |
+
}
|
| 1649 |
+
sched->graph.n_nodes = 0;
|
| 1650 |
+
sched->graph.n_leafs = 0;
|
| 1651 |
+
|
| 1652 |
+
struct ggml_cgraph * graph_copy = &sched->graph;
|
| 1653 |
+
|
| 1654 |
for (int i = 0; i < sched->n_splits; i++) {
|
| 1655 |
struct ggml_backend_sched_split * split = &sched->splits[i];
|
| 1656 |
split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
|
|
|
|
| 1661 |
|
| 1662 |
struct ggml_tensor * input = split->inputs[j];
|
| 1663 |
const size_t input_id = hash_id(input);
|
| 1664 |
+
struct ggml_tensor * input_cpy = tensor_id_copy(input_id, split->backend_id, sched->cur_copy);
|
| 1665 |
|
| 1666 |
// add a dependency to the input source so that it is not freed before the copy is done
|
| 1667 |
struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input);
|
| 1668 |
input_dep->src[0] = input;
|
| 1669 |
+
sched->node_backend_ids[graph_copy->n_nodes] = sched->hv_tensor_backend_ids[input_id];
|
| 1670 |
graph_copy->nodes[graph_copy->n_nodes++] = input_dep;
|
| 1671 |
|
| 1672 |
// add a dependency to the input copy so that it is allocated at the start of the split
|
|
|
|
| 1688 |
size_t id = hash_id(input);
|
| 1689 |
int backend_id = tensor_backend_id(input);
|
| 1690 |
for (int c = 0; c < sched->n_copies; c++) {
|
| 1691 |
+
struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c);
|
| 1692 |
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
|
| 1693 |
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
|
| 1694 |
}
|
|
|
|
| 1701 |
struct ggml_tensor * input = split->inputs[j];
|
| 1702 |
size_t id = hash_id(input);
|
| 1703 |
for (int c = 0; c < sched->n_copies; c++) {
|
| 1704 |
+
struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c);
|
| 1705 |
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
|
| 1706 |
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
|
| 1707 |
}
|
|
|
|
| 1715 |
sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf);
|
| 1716 |
graph_copy->leafs[graph_copy->n_leafs++] = leaf;
|
| 1717 |
}
|
|
|
|
|
|
|
| 1718 |
}
|
| 1719 |
|
| 1720 |
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
| 1721 |
bool backend_ids_changed = false;
|
| 1722 |
+
for (int i = 0; i < sched->graph.n_nodes; i++) {
|
| 1723 |
if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] &&
|
| 1724 |
sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) {
|
| 1725 |
backend_ids_changed = true;
|
|
|
|
| 1727 |
}
|
| 1728 |
}
|
| 1729 |
if (!backend_ids_changed) {
|
| 1730 |
+
for (int i = 0; i < sched->graph.n_leafs; i++) {
|
| 1731 |
if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] &&
|
| 1732 |
sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) {
|
| 1733 |
backend_ids_changed = true;
|
|
|
|
| 1737 |
}
|
| 1738 |
|
| 1739 |
// allocate graph
|
| 1740 |
+
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
| 1741 |
// the re-allocation may cause the split inputs to be moved to a different address
|
| 1742 |
ggml_backend_sched_synchronize(sched);
|
| 1743 |
#ifndef NDEBUG
|
| 1744 |
+
fprintf(stderr, "%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
| 1745 |
#endif
|
| 1746 |
+
ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
|
| 1747 |
+
if (!ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
| 1748 |
fprintf(stderr, "%s: failed to allocate graph\n", __func__);
|
| 1749 |
return false;
|
| 1750 |
}
|
|
|
|
| 1765 |
for (int j = 0; j < split->n_inputs; j++) {
|
| 1766 |
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]);
|
| 1767 |
struct ggml_tensor * input = split->inputs[j];
|
| 1768 |
+
struct ggml_tensor * input_cpy = tensor_copy(input, split_backend_id, sched->cur_copy);
|
| 1769 |
|
| 1770 |
if (input->flags & GGML_TENSOR_FLAG_INPUT) {
|
| 1771 |
// inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
|
|
|
|
| 1851 |
struct ggml_backend_sched * sched = calloc(1, sizeof(struct ggml_backend_sched));
|
| 1852 |
|
| 1853 |
sched->debug = getenv("GGML_SCHED_DEBUG") != NULL;
|
| 1854 |
+
sched->n_backends = n_backends;
|
| 1855 |
+
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
|
| 1856 |
|
| 1857 |
// initialize hash table
|
| 1858 |
+
// FIXME: needs to be size*2 to account for leafs (do it in graph_split instead)
|
| 1859 |
+
sched->hash_set = ggml_hash_set_new(graph_size);
|
| 1860 |
+
sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
|
| 1861 |
+
sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
|
| 1862 |
|
| 1863 |
const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
|
| 1864 |
+
sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
|
| 1865 |
+
sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
|
| 1866 |
sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
|
| 1867 |
sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
|
| 1868 |
|
| 1869 |
+
sched->context_buffer_size = GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
|
| 1870 |
+
sched->context_buffer = malloc(sched->context_buffer_size);
|
|
|
|
| 1871 |
|
| 1872 |
const int initial_splits_capacity = 16;
|
| 1873 |
sched->splits = calloc(initial_splits_capacity, sizeof(sched->splits[0]));
|
|
|
|
| 1902 |
}
|
| 1903 |
ggml_gallocr_free(sched->galloc);
|
| 1904 |
ggml_free(sched->ctx);
|
| 1905 |
+
ggml_hash_set_free(&sched->hash_set);
|
| 1906 |
free(sched->splits);
|
| 1907 |
+
free(sched->hv_tensor_backend_ids);
|
| 1908 |
+
free(sched->hv_tensor_copies);
|
|
|
|
| 1909 |
free(sched->node_backend_ids);
|
| 1910 |
free(sched->leaf_backend_ids);
|
| 1911 |
free(sched->prev_node_backend_ids);
|
| 1912 |
free(sched->prev_leaf_backend_ids);
|
| 1913 |
+
free(sched->context_buffer);
|
| 1914 |
+
free(sched->graph.nodes);
|
| 1915 |
+
free(sched->graph.leafs);
|
| 1916 |
free(sched);
|
| 1917 |
}
|
| 1918 |
|
| 1919 |
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
| 1920 |
// reset state for the next run
|
| 1921 |
if (!sched->is_reset) {
|
| 1922 |
+
ggml_hash_set_reset(&sched->hash_set);
|
| 1923 |
+
memset(sched->hv_tensor_backend_ids, -1, sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
|
| 1924 |
+
memset(sched->hv_tensor_copies, 0, sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
|
|
|
|
|
|
|
| 1925 |
sched->is_reset = true;
|
| 1926 |
}
|
| 1927 |
sched->is_alloc = false;
|
| 1928 |
}
|
| 1929 |
|
| 1930 |
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
|
| 1931 |
+
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
|
| 1932 |
|
| 1933 |
ggml_backend_sched_split_graph(sched, measure_graph);
|
| 1934 |
|
| 1935 |
+
if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
|
|
|
|
| 1936 |
return false;
|
| 1937 |
}
|
| 1938 |
|
|
|
|
| 1943 |
}
|
| 1944 |
|
| 1945 |
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
| 1946 |
+
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
|
| 1947 |
|
| 1948 |
ggml_backend_sched_split_graph(sched, graph);
|
| 1949 |
|
| 1950 |
+
|
| 1951 |
if (!ggml_backend_sched_alloc_splits(sched)) {
|
| 1952 |
return false;
|
| 1953 |
}
|
|
|
|
| 2017 |
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
| 2018 |
tensor_backend_id(node) = backend_index;
|
| 2019 |
SET_CAUSE(node, "usr");
|
| 2020 |
+
sched->is_reset = false;
|
| 2021 |
}
|
| 2022 |
|
| 2023 |
ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
|
|
|
|
| 2060 |
GGML_ASSERT(src != NULL);
|
| 2061 |
GGML_ASSERT(src->data && "graph must be allocated");
|
| 2062 |
|
| 2063 |
+
size_t id = ggml_hash_insert(&hash_set, src);
|
| 2064 |
+
if (id == GGML_HASHSET_ALREADY_EXISTS) {
|
| 2065 |
+
return node_copies[ggml_hash_find(&hash_set, src)];
|
| 2066 |
}
|
| 2067 |
|
| 2068 |
struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src);
|
|
|
|
| 2087 |
return dst;
|
| 2088 |
}
|
| 2089 |
|
| 2090 |
+
static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) {
|
| 2091 |
size_t id = ggml_hash_find(hash_set, src);
|
| 2092 |
if (node_init[id]) {
|
| 2093 |
return;
|
|
|
|
| 2114 |
}
|
| 2115 |
|
| 2116 |
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
|
| 2117 |
+
struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size);
|
|
|
|
|
|
|
|
|
|
| 2118 |
struct ggml_tensor ** node_copies = calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT
|
| 2119 |
bool * node_init = calloc(hash_set.size, sizeof(node_init[0]));
|
| 2120 |
|
|
|
|
| 2129 |
|
| 2130 |
if (ctx_allocated == NULL || ctx_unallocated == NULL) {
|
| 2131 |
fprintf(stderr, "failed to allocate context for graph copy\n");
|
| 2132 |
+
ggml_hash_set_free(&hash_set);
|
| 2133 |
free(node_copies);
|
| 2134 |
free(node_init);
|
| 2135 |
ggml_free(ctx_allocated);
|
|
|
|
| 2152 |
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend);
|
| 2153 |
if (buffer == NULL) {
|
| 2154 |
fprintf(stderr, "failed to allocate buffer for graph copy\n");
|
| 2155 |
+
ggml_hash_set_free(&hash_set);
|
| 2156 |
free(node_copies);
|
| 2157 |
free(node_init);
|
| 2158 |
ggml_free(ctx_allocated);
|
|
|
|
| 2170 |
// copy data and init views
|
| 2171 |
for (int i = 0; i < graph->n_nodes; i++) {
|
| 2172 |
struct ggml_tensor * node = graph->nodes[i];
|
| 2173 |
+
graph_copy_init_tensor(&hash_set, node_copies, node_init, node);
|
| 2174 |
}
|
| 2175 |
|
| 2176 |
// build graph copy
|
| 2177 |
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false);
|
| 2178 |
for (int i = 0; i < graph->n_nodes; i++) {
|
| 2179 |
struct ggml_tensor * node = graph->nodes[i];
|
| 2180 |
+
struct ggml_tensor * node_copy = node_copies[ggml_hash_find(&hash_set, node)];
|
| 2181 |
graph_copy->nodes[i] = node_copy;
|
| 2182 |
}
|
| 2183 |
graph_copy->n_nodes = graph->n_nodes;
|
| 2184 |
|
| 2185 |
+
ggml_hash_set_free(&hash_set);
|
| 2186 |
free(node_copies);
|
| 2187 |
free(node_init);
|
| 2188 |
|
ggml/src/ggml-blas.cpp
CHANGED
|
@@ -275,8 +275,7 @@ GGML_CALL static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t
|
|
| 275 |
break;
|
| 276 |
|
| 277 |
default:
|
| 278 |
-
|
| 279 |
-
GGML_ASSERT(false);
|
| 280 |
}
|
| 281 |
}
|
| 282 |
|
|
|
|
| 275 |
break;
|
| 276 |
|
| 277 |
default:
|
| 278 |
+
GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
|
|
|
|
| 279 |
}
|
| 280 |
}
|
| 281 |
|
ggml/src/ggml-cuda.cu
CHANGED
|
@@ -98,7 +98,7 @@ void ggml_cuda_error(const char * stmt, const char * func, const char * file, in
|
|
| 98 |
GGML_CUDA_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
|
| 99 |
GGML_CUDA_LOG_ERROR(" %s\n", stmt);
|
| 100 |
// abort with GGML_ASSERT to get a stack trace
|
| 101 |
-
|
| 102 |
}
|
| 103 |
|
| 104 |
// this is faster on Windows
|
|
@@ -1596,7 +1596,7 @@ static void ggml_cuda_op_mul_mat(
|
|
| 1596 |
CUDA_CHECK(ggml_cuda_cpy_tensor_2d(
|
| 1597 |
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
| 1598 |
} else {
|
| 1599 |
-
|
| 1600 |
}
|
| 1601 |
|
| 1602 |
if (quantize_src1 && !src1_is_contiguous) {
|
|
@@ -2945,7 +2945,7 @@ static void ggml_backend_cuda_event_wait(ggml_backend_t backend, ggml_backend_ev
|
|
| 2945 |
|
| 2946 |
CUDA_CHECK(cudaLaunchHostFunc(cuda_ctx->stream(), wait_fn, event));
|
| 2947 |
#endif
|
| 2948 |
-
|
| 2949 |
}
|
| 2950 |
}
|
| 2951 |
|
|
|
|
| 98 |
GGML_CUDA_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
|
| 99 |
GGML_CUDA_LOG_ERROR(" %s\n", stmt);
|
| 100 |
// abort with GGML_ASSERT to get a stack trace
|
| 101 |
+
GGML_ABORT("CUDA error");
|
| 102 |
}
|
| 103 |
|
| 104 |
// this is faster on Windows
|
|
|
|
| 1596 |
CUDA_CHECK(ggml_cuda_cpy_tensor_2d(
|
| 1597 |
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
| 1598 |
} else {
|
| 1599 |
+
GGML_ABORT("fatal error");
|
| 1600 |
}
|
| 1601 |
|
| 1602 |
if (quantize_src1 && !src1_is_contiguous) {
|
|
|
|
| 2945 |
|
| 2946 |
CUDA_CHECK(cudaLaunchHostFunc(cuda_ctx->stream(), wait_fn, event));
|
| 2947 |
#endif
|
| 2948 |
+
GGML_ABORT("fatal error");
|
| 2949 |
}
|
| 2950 |
}
|
| 2951 |
|
ggml/src/ggml-cuda/argsort.cu
CHANGED
|
@@ -81,7 +81,7 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
|
|
| 81 |
} else if (order == GGML_SORT_ORDER_DESC) {
|
| 82 |
k_argsort_f32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
|
| 83 |
} else {
|
| 84 |
-
|
| 85 |
}
|
| 86 |
}
|
| 87 |
|
|
|
|
| 81 |
} else if (order == GGML_SORT_ORDER_DESC) {
|
| 82 |
k_argsort_f32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
|
| 83 |
} else {
|
| 84 |
+
GGML_ABORT("fatal error");
|
| 85 |
}
|
| 86 |
}
|
| 87 |
|
ggml/src/ggml-cuda/binbcast.cu
CHANGED
|
@@ -259,7 +259,7 @@ static void ggml_cuda_op_bin_bcast(
|
|
| 259 |
} else {
|
| 260 |
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
|
| 261 |
ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 262 |
-
|
| 263 |
}
|
| 264 |
}
|
| 265 |
|
|
|
|
| 259 |
} else {
|
| 260 |
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
|
| 261 |
ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 262 |
+
GGML_ABORT("fatal error");
|
| 263 |
}
|
| 264 |
}
|
| 265 |
|
ggml/src/ggml-cuda/common.cuh
CHANGED
|
@@ -348,7 +348,7 @@ static __device__ void no_device_code(
|
|
| 348 |
#ifdef __CUDA_ARCH__
|
| 349 |
#define NO_DEVICE_CODE no_device_code(__FILE__, __LINE__, __FUNCTION__, __CUDA_ARCH__, STRINGIZE(__CUDA_ARCH_LIST__))
|
| 350 |
#else
|
| 351 |
-
#define NO_DEVICE_CODE //
|
| 352 |
#endif // __CUDA_ARCH__
|
| 353 |
|
| 354 |
static __device__ __forceinline__ float warp_reduce_sum(float x) {
|
|
|
|
| 348 |
#ifdef __CUDA_ARCH__
|
| 349 |
#define NO_DEVICE_CODE no_device_code(__FILE__, __LINE__, __FUNCTION__, __CUDA_ARCH__, STRINGIZE(__CUDA_ARCH_LIST__))
|
| 350 |
#else
|
| 351 |
+
#define NO_DEVICE_CODE //GGML_ABORT("NO_DEVICE_CODE not valid in host code.")
|
| 352 |
#endif // __CUDA_ARCH__
|
| 353 |
|
| 354 |
static __device__ __forceinline__ float warp_reduce_sum(float x) {
|
ggml/src/ggml-cuda/cpy.cu
CHANGED
|
@@ -451,7 +451,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
|
|
| 451 |
} else {
|
| 452 |
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
| 453 |
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 454 |
-
|
| 455 |
}
|
| 456 |
}
|
| 457 |
|
|
@@ -484,6 +484,6 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
|
|
| 484 |
} else {
|
| 485 |
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
| 486 |
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 487 |
-
|
| 488 |
}
|
| 489 |
}
|
|
|
|
| 451 |
} else {
|
| 452 |
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
| 453 |
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 454 |
+
GGML_ABORT("fatal error");
|
| 455 |
}
|
| 456 |
}
|
| 457 |
|
|
|
|
| 484 |
} else {
|
| 485 |
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
| 486 |
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 487 |
+
GGML_ABORT("fatal error");
|
| 488 |
}
|
| 489 |
}
|
ggml/src/ggml-cuda/dmmv.cu
CHANGED
|
@@ -662,7 +662,7 @@ void ggml_cuda_op_dequantize_mul_mat_vec(
|
|
| 662 |
convert_mul_mat_vec_f16_cuda(src0_dd_i, src1_dfloat, dst_dd_i, ne00, row_diff, stream);
|
| 663 |
break;
|
| 664 |
default:
|
| 665 |
-
|
| 666 |
break;
|
| 667 |
}
|
| 668 |
|
|
|
|
| 662 |
convert_mul_mat_vec_f16_cuda(src0_dd_i, src1_dfloat, dst_dd_i, ne00, row_diff, stream);
|
| 663 |
break;
|
| 664 |
default:
|
| 665 |
+
GGML_ABORT("fatal error");
|
| 666 |
break;
|
| 667 |
}
|
| 668 |
|
ggml/src/ggml-cuda/fattn-common.cuh
CHANGED
|
@@ -564,7 +564,7 @@ static void on_no_fattn_vec_case(const int D) {
|
|
| 564 |
fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
|
| 565 |
fprintf(stderr, "By default only f16 KV cache is supported.\n");
|
| 566 |
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
|
| 567 |
-
|
| 568 |
} else if (D == 128) {
|
| 569 |
fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
|
| 570 |
fprintf(stderr, "Supported combinations:\n");
|
|
@@ -572,11 +572,11 @@ static void on_no_fattn_vec_case(const int D) {
|
|
| 572 |
fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n");
|
| 573 |
fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n");
|
| 574 |
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
|
| 575 |
-
|
| 576 |
} else {
|
| 577 |
fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
|
| 578 |
fprintf(stderr, "Only f16 is supported.\n");
|
| 579 |
-
|
| 580 |
}
|
| 581 |
}
|
| 582 |
|
|
|
|
| 564 |
fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
|
| 565 |
fprintf(stderr, "By default only f16 KV cache is supported.\n");
|
| 566 |
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
|
| 567 |
+
GGML_ABORT("fatal error");
|
| 568 |
} else if (D == 128) {
|
| 569 |
fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
|
| 570 |
fprintf(stderr, "Supported combinations:\n");
|
|
|
|
| 572 |
fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n");
|
| 573 |
fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n");
|
| 574 |
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
|
| 575 |
+
GGML_ABORT("fatal error");
|
| 576 |
} else {
|
| 577 |
fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
|
| 578 |
fprintf(stderr, "Only f16 is supported.\n");
|
| 579 |
+
GGML_ABORT("fatal error");
|
| 580 |
}
|
| 581 |
}
|
| 582 |
|
ggml/src/ggml-cuda/fattn-tile-f16.cu
CHANGED
|
@@ -287,7 +287,7 @@ void launch_fattn_tile_f16_64_128(ggml_backend_cuda_context & ctx, ggml_tensor *
|
|
| 287 |
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
|
| 288 |
} break;
|
| 289 |
default: {
|
| 290 |
-
|
| 291 |
} break;
|
| 292 |
}
|
| 293 |
}
|
|
|
|
| 287 |
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
|
| 288 |
} break;
|
| 289 |
default: {
|
| 290 |
+
GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
|
| 291 |
} break;
|
| 292 |
}
|
| 293 |
}
|
ggml/src/ggml-cuda/fattn-tile-f32.cu
CHANGED
|
@@ -284,7 +284,7 @@ void launch_fattn_tile_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor *
|
|
| 284 |
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
|
| 285 |
} break;
|
| 286 |
default: {
|
| 287 |
-
|
| 288 |
} break;
|
| 289 |
}
|
| 290 |
}
|
|
|
|
| 284 |
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
|
| 285 |
} break;
|
| 286 |
default: {
|
| 287 |
+
GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
|
| 288 |
} break;
|
| 289 |
}
|
| 290 |
}
|
ggml/src/ggml-cuda/fattn.cu
CHANGED
|
@@ -38,7 +38,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|
| 38 |
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst);
|
| 39 |
break;
|
| 40 |
default:
|
| 41 |
-
|
| 42 |
break;
|
| 43 |
}
|
| 44 |
} else {
|
|
@@ -63,7 +63,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|
| 63 |
// ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst);
|
| 64 |
// break;
|
| 65 |
default:
|
| 66 |
-
|
| 67 |
break;
|
| 68 |
}
|
| 69 |
}
|
|
@@ -86,7 +86,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|
| 86 |
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
|
| 87 |
break;
|
| 88 |
default:
|
| 89 |
-
|
| 90 |
break;
|
| 91 |
}
|
| 92 |
return;
|
|
@@ -114,7 +114,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|
| 114 |
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
|
| 115 |
break;
|
| 116 |
default:
|
| 117 |
-
|
| 118 |
break;
|
| 119 |
}
|
| 120 |
return;
|
|
@@ -141,7 +141,7 @@ static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, g
|
|
| 141 |
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
|
| 142 |
break;
|
| 143 |
default:
|
| 144 |
-
|
| 145 |
break;
|
| 146 |
}
|
| 147 |
}
|
|
|
|
| 38 |
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst);
|
| 39 |
break;
|
| 40 |
default:
|
| 41 |
+
GGML_ABORT("fatal error");
|
| 42 |
break;
|
| 43 |
}
|
| 44 |
} else {
|
|
|
|
| 63 |
// ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst);
|
| 64 |
// break;
|
| 65 |
default:
|
| 66 |
+
GGML_ABORT("fatal error");
|
| 67 |
break;
|
| 68 |
}
|
| 69 |
}
|
|
|
|
| 86 |
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
|
| 87 |
break;
|
| 88 |
default:
|
| 89 |
+
GGML_ABORT("fatal error");
|
| 90 |
break;
|
| 91 |
}
|
| 92 |
return;
|
|
|
|
| 114 |
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
|
| 115 |
break;
|
| 116 |
default:
|
| 117 |
+
GGML_ABORT("fatal error");
|
| 118 |
break;
|
| 119 |
}
|
| 120 |
return;
|
|
|
|
| 141 |
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
|
| 142 |
break;
|
| 143 |
default:
|
| 144 |
+
GGML_ABORT("fatal error");
|
| 145 |
break;
|
| 146 |
}
|
| 147 |
}
|
ggml/src/ggml-cuda/getrows.cu
CHANGED
|
@@ -171,8 +171,7 @@ void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 171 |
break;
|
| 172 |
default:
|
| 173 |
// TODO: k-quants
|
| 174 |
-
|
| 175 |
-
GGML_ASSERT(false);
|
| 176 |
break;
|
| 177 |
}
|
| 178 |
}
|
|
|
|
| 171 |
break;
|
| 172 |
default:
|
| 173 |
// TODO: k-quants
|
| 174 |
+
GGML_ABORT("%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
|
|
|
|
| 175 |
break;
|
| 176 |
}
|
| 177 |
}
|
ggml/src/ggml-cuda/mmq.cu
CHANGED
|
@@ -84,7 +84,7 @@ void ggml_cuda_op_mul_mat_q(
|
|
| 84 |
mul_mat_q_case<GGML_TYPE_IQ4_NL>(ctx, args, stream);
|
| 85 |
break;
|
| 86 |
default:
|
| 87 |
-
|
| 88 |
break;
|
| 89 |
}
|
| 90 |
|
|
|
|
| 84 |
mul_mat_q_case<GGML_TYPE_IQ4_NL>(ctx, args, stream);
|
| 85 |
break;
|
| 86 |
default:
|
| 87 |
+
GGML_ABORT("fatal error");
|
| 88 |
break;
|
| 89 |
}
|
| 90 |
|
ggml/src/ggml-cuda/mmq.cuh
CHANGED
|
@@ -75,7 +75,7 @@ static mmq_q8_1_ds_layout mmq_get_q8_1_ds_layout(const ggml_type type_x) {
|
|
| 75 |
case GGML_TYPE_IQ4_NL:
|
| 76 |
return MMQ_Q8_1_DS_LAYOUT_D4;
|
| 77 |
default:
|
| 78 |
-
|
| 79 |
break;
|
| 80 |
}
|
| 81 |
}
|
|
@@ -2898,7 +2898,7 @@ void mul_mat_q_case(ggml_backend_cuda_context & ctx, const mmq_args & args, cuda
|
|
| 2898 |
break;
|
| 2899 |
default:
|
| 2900 |
fprintf(stderr, "mmq_x_best=%d\n", mmq_x_best);
|
| 2901 |
-
|
| 2902 |
break;
|
| 2903 |
}
|
| 2904 |
}
|
|
|
|
| 75 |
case GGML_TYPE_IQ4_NL:
|
| 76 |
return MMQ_Q8_1_DS_LAYOUT_D4;
|
| 77 |
default:
|
| 78 |
+
GGML_ABORT("fatal error");
|
| 79 |
break;
|
| 80 |
}
|
| 81 |
}
|
|
|
|
| 2898 |
break;
|
| 2899 |
default:
|
| 2900 |
fprintf(stderr, "mmq_x_best=%d\n", mmq_x_best);
|
| 2901 |
+
GGML_ABORT("fatal error");
|
| 2902 |
break;
|
| 2903 |
}
|
| 2904 |
}
|
ggml/src/ggml-cuda/mmvq.cu
CHANGED
|
@@ -162,7 +162,7 @@ static void mul_mat_vec_q_cuda(
|
|
| 162 |
rows_per_cuda_block = 2;
|
| 163 |
break;
|
| 164 |
default:
|
| 165 |
-
|
| 166 |
break;
|
| 167 |
}
|
| 168 |
}
|
|
@@ -196,7 +196,7 @@ static void mul_mat_vec_q_cuda(
|
|
| 196 |
mul_mat_vec_q<type, 8><<<block_nums, block_dims, 0, stream>>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst);
|
| 197 |
break;
|
| 198 |
default:
|
| 199 |
-
|
| 200 |
break;
|
| 201 |
}
|
| 202 |
}
|
|
@@ -413,7 +413,7 @@ void ggml_cuda_op_mul_mat_vec_q(
|
|
| 413 |
mul_mat_vec_iq3_s_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream);
|
| 414 |
break;
|
| 415 |
default:
|
| 416 |
-
|
| 417 |
break;
|
| 418 |
}
|
| 419 |
|
|
|
|
| 162 |
rows_per_cuda_block = 2;
|
| 163 |
break;
|
| 164 |
default:
|
| 165 |
+
GGML_ABORT("fatal error");
|
| 166 |
break;
|
| 167 |
}
|
| 168 |
}
|
|
|
|
| 196 |
mul_mat_vec_q<type, 8><<<block_nums, block_dims, 0, stream>>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst);
|
| 197 |
break;
|
| 198 |
default:
|
| 199 |
+
GGML_ABORT("fatal error");
|
| 200 |
break;
|
| 201 |
}
|
| 202 |
}
|
|
|
|
| 413 |
mul_mat_vec_iq3_s_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream);
|
| 414 |
break;
|
| 415 |
default:
|
| 416 |
+
GGML_ABORT("fatal error");
|
| 417 |
break;
|
| 418 |
}
|
| 419 |
|
ggml/src/ggml-cuda/quantize.cu
CHANGED
|
@@ -163,7 +163,7 @@ void quantize_mmq_q8_1_cuda(
|
|
| 163 |
<<<num_blocks, block_size, 0, stream>>>(x, vy, kx0, kx1, kx0_padded);
|
| 164 |
break;
|
| 165 |
default:
|
| 166 |
-
|
| 167 |
break;
|
| 168 |
}
|
| 169 |
}
|
|
|
|
| 163 |
<<<num_blocks, block_size, 0, stream>>>(x, vy, kx0, kx1, kx0_padded);
|
| 164 |
break;
|
| 165 |
default:
|
| 166 |
+
GGML_ABORT("fatal error");
|
| 167 |
break;
|
| 168 |
}
|
| 169 |
}
|
ggml/src/ggml-cuda/rope.cu
CHANGED
|
@@ -251,7 +251,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 251 |
attn_factor, corr_dims, freq_factors, stream
|
| 252 |
);
|
| 253 |
} else {
|
| 254 |
-
|
| 255 |
}
|
| 256 |
} else {
|
| 257 |
if (src0->type == GGML_TYPE_F32) {
|
|
@@ -265,7 +265,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 265 |
attn_factor, corr_dims, freq_factors, stream
|
| 266 |
);
|
| 267 |
} else {
|
| 268 |
-
|
| 269 |
}
|
| 270 |
}
|
| 271 |
}
|
|
|
|
| 251 |
attn_factor, corr_dims, freq_factors, stream
|
| 252 |
);
|
| 253 |
} else {
|
| 254 |
+
GGML_ABORT("fatal error");
|
| 255 |
}
|
| 256 |
} else {
|
| 257 |
if (src0->type == GGML_TYPE_F32) {
|
|
|
|
| 265 |
attn_factor, corr_dims, freq_factors, stream
|
| 266 |
);
|
| 267 |
} else {
|
| 268 |
+
GGML_ABORT("fatal error");
|
| 269 |
}
|
| 270 |
}
|
| 271 |
}
|
ggml/src/ggml-impl.h
CHANGED
|
@@ -634,21 +634,121 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
|
| 634 |
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
| 635 |
#endif
|
| 636 |
|
| 637 |
-
|
| 638 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
|
| 640 |
struct ggml_hash_set ggml_hash_set_new(size_t size);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
|
| 642 |
-
|
|
|
|
| 643 |
|
| 644 |
-
// returns
|
| 645 |
-
|
| 646 |
|
| 647 |
-
// returns
|
| 648 |
-
size_t
|
|
|
|
|
|
|
|
|
|
| 649 |
|
| 650 |
// return index, asserts if table is full
|
| 651 |
-
size_t ggml_hash_find_or_insert(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 652 |
|
| 653 |
#ifdef __cplusplus
|
| 654 |
}
|
|
|
|
| 634 |
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
| 635 |
#endif
|
| 636 |
|
| 637 |
+
// bitset
|
| 638 |
+
|
| 639 |
+
static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
|
| 640 |
+
#define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
|
| 641 |
+
#define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
|
| 642 |
+
|
| 643 |
+
static size_t ggml_bitset_size(size_t n) {
|
| 644 |
+
return (n + BITSET_MASK) >> BITSET_SHR;
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, size_t i) {
|
| 648 |
+
return !!(bitset[i >> BITSET_SHR] & (1u << (i & BITSET_MASK)));
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
+
static inline void ggml_bitset_set(ggml_bitset_t * bitset, size_t i) {
|
| 652 |
+
bitset[i >> BITSET_SHR] |= (1u << (i & BITSET_MASK));
|
| 653 |
+
}
|
| 654 |
+
|
| 655 |
+
static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
|
| 656 |
+
bitset[i >> BITSET_SHR] &= ~(1u << (i & BITSET_MASK));
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
// hash set
|
| 660 |
+
|
| 661 |
+
#define GGML_HASHSET_FULL ((size_t)-1)
|
| 662 |
+
#define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
|
| 663 |
|
| 664 |
struct ggml_hash_set ggml_hash_set_new(size_t size);
|
| 665 |
+
void ggml_hash_set_free(struct ggml_hash_set * hash_set);
|
| 666 |
+
|
| 667 |
+
// returns the minimum size for a hash set that can hold min_sz elements
|
| 668 |
+
size_t ggml_hash_size(size_t min_sz);
|
| 669 |
|
| 670 |
+
// remove all elements from the hash set
|
| 671 |
+
void ggml_hash_set_reset(struct ggml_hash_set * hash_set);
|
| 672 |
|
| 673 |
+
// returns true if key is in the hash set
|
| 674 |
+
static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
|
| 675 |
|
| 676 |
+
// returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted
|
| 677 |
+
static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
|
| 678 |
+
|
| 679 |
+
// returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
|
| 680 |
+
static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
|
| 681 |
|
| 682 |
// return index, asserts if table is full
|
| 683 |
+
static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
|
| 684 |
+
|
| 685 |
+
// hash function for ggml_tensor
|
| 686 |
+
static inline size_t ggml_hash(const struct ggml_tensor * p) {
|
| 687 |
+
// the last 4 bits are always zero due to alignment
|
| 688 |
+
return (size_t)(uintptr_t)p >> 4;
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
|
| 692 |
+
size_t h = ggml_hash(key) % hash_set->size;
|
| 693 |
+
|
| 694 |
+
// linear probing
|
| 695 |
+
size_t i = h;
|
| 696 |
+
while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) {
|
| 697 |
+
i = (i + 1) % hash_set->size;
|
| 698 |
+
if (i == h) {
|
| 699 |
+
// visited all hash table entries -> not found
|
| 700 |
+
return GGML_HASHSET_FULL;
|
| 701 |
+
}
|
| 702 |
+
}
|
| 703 |
+
return i;
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
|
| 707 |
+
size_t i = ggml_hash_find(hash_set, key);
|
| 708 |
+
return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i);
|
| 709 |
+
}
|
| 710 |
+
|
| 711 |
+
static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
|
| 712 |
+
size_t h = ggml_hash(key) % hash_set->size;
|
| 713 |
+
|
| 714 |
+
// linear probing
|
| 715 |
+
size_t i = h;
|
| 716 |
+
do {
|
| 717 |
+
if (!ggml_bitset_get(hash_set->used, i)) {
|
| 718 |
+
ggml_bitset_set(hash_set->used, i);
|
| 719 |
+
hash_set->keys[i] = key;
|
| 720 |
+
return i;
|
| 721 |
+
}
|
| 722 |
+
if (hash_set->keys[i] == key) {
|
| 723 |
+
return GGML_HASHSET_ALREADY_EXISTS;
|
| 724 |
+
}
|
| 725 |
+
i = (i + 1) % hash_set->size;
|
| 726 |
+
} while (i != h);
|
| 727 |
+
|
| 728 |
+
// visited all hash table entries -> not found
|
| 729 |
+
GGML_ABORT("fatal error");
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
|
| 733 |
+
size_t h = ggml_hash(key) % hash_set->size;
|
| 734 |
+
|
| 735 |
+
// linear probing
|
| 736 |
+
size_t i = h;
|
| 737 |
+
do {
|
| 738 |
+
if (!ggml_bitset_get(hash_set->used, i)) {
|
| 739 |
+
ggml_bitset_set(hash_set->used, i);
|
| 740 |
+
hash_set->keys[i] = key;
|
| 741 |
+
return i;
|
| 742 |
+
}
|
| 743 |
+
if (hash_set->keys[i] == key) {
|
| 744 |
+
return i;
|
| 745 |
+
}
|
| 746 |
+
i = (i + 1) % hash_set->size;
|
| 747 |
+
} while (i != h);
|
| 748 |
+
|
| 749 |
+
// visited all hash table entries -> not found
|
| 750 |
+
GGML_ABORT("fatal error");
|
| 751 |
+
}
|
| 752 |
|
| 753 |
#ifdef __cplusplus
|
| 754 |
}
|
ggml/src/ggml-kompute.cpp
CHANGED
|
@@ -566,7 +566,7 @@ uint32_t safe_divide(uint32_t a, uint32_t b) {
|
|
| 566 |
}
|
| 567 |
if ((a % b) != 0) {
|
| 568 |
fprintf(stderr, "((%u %% %u) == %u) != 0\n", a, b, a % b);
|
| 569 |
-
|
| 570 |
}
|
| 571 |
return a / b;
|
| 572 |
}
|
|
@@ -1460,7 +1460,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
|
|
| 1460 |
|
| 1461 |
if (!ggml_vk_supports_op(dst)) {
|
| 1462 |
fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
|
| 1463 |
-
|
| 1464 |
}
|
| 1465 |
|
| 1466 |
const int32_t ne00 = src0 ? src0->ne[0] : 0;
|
|
@@ -1562,7 +1562,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
|
|
| 1562 |
default:
|
| 1563 |
{
|
| 1564 |
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
| 1565 |
-
|
| 1566 |
}
|
| 1567 |
}
|
| 1568 |
} break;
|
|
@@ -1745,7 +1745,7 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
|
|
| 1745 |
continue;
|
| 1746 |
not_implemented: {}
|
| 1747 |
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
| 1748 |
-
//
|
| 1749 |
}
|
| 1750 |
|
| 1751 |
// Evaluate sequence
|
|
|
|
| 566 |
}
|
| 567 |
if ((a % b) != 0) {
|
| 568 |
fprintf(stderr, "((%u %% %u) == %u) != 0\n", a, b, a % b);
|
| 569 |
+
GGML_ABORT("safe_divide result would've had remainder");
|
| 570 |
}
|
| 571 |
return a / b;
|
| 572 |
}
|
|
|
|
| 1460 |
|
| 1461 |
if (!ggml_vk_supports_op(dst)) {
|
| 1462 |
fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
|
| 1463 |
+
GGML_ABORT("unsupported op");
|
| 1464 |
}
|
| 1465 |
|
| 1466 |
const int32_t ne00 = src0 ? src0->ne[0] : 0;
|
|
|
|
| 1562 |
default:
|
| 1563 |
{
|
| 1564 |
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
| 1565 |
+
GGML_ABORT("fatal error");
|
| 1566 |
}
|
| 1567 |
}
|
| 1568 |
} break;
|
|
|
|
| 1745 |
continue;
|
| 1746 |
not_implemented: {}
|
| 1747 |
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
| 1748 |
+
//GGML_ABORT("fatal error");
|
| 1749 |
}
|
| 1750 |
|
| 1751 |
// Evaluate sequence
|
ggml/src/ggml-metal.m
CHANGED
|
@@ -869,7 +869,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 869 |
NSError * error = nil;
|
| 870 |
if (![[MTLCaptureManager sharedCaptureManager] startCaptureWithDescriptor:descriptor error:&error]) {
|
| 871 |
GGML_METAL_LOG_ERROR("%s: error: unable to start capture '%s'\n", __func__, [[error localizedDescription] UTF8String]);
|
| 872 |
-
|
| 873 |
}
|
| 874 |
}
|
| 875 |
|
|
@@ -931,7 +931,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 931 |
|
| 932 |
if (!ggml_metal_supports_op(ctx, dst)) {
|
| 933 |
GGML_METAL_LOG_ERROR("%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
|
| 934 |
-
|
| 935 |
}
|
| 936 |
|
| 937 |
if (should_capture) {
|
|
@@ -1068,7 +1068,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 1068 |
case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD_ROW].pipeline; break;
|
| 1069 |
case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_ROW].pipeline; break;
|
| 1070 |
case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV_ROW].pipeline; break;
|
| 1071 |
-
default:
|
| 1072 |
}
|
| 1073 |
|
| 1074 |
bcast_row = true;
|
|
@@ -1077,7 +1077,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 1077 |
case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD].pipeline; break;
|
| 1078 |
case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL].pipeline; break;
|
| 1079 |
case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV].pipeline; break;
|
| 1080 |
-
default:
|
| 1081 |
}
|
| 1082 |
}
|
| 1083 |
|
|
@@ -1131,7 +1131,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 1131 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_F16].pipeline; break;
|
| 1132 |
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I32].pipeline; break;
|
| 1133 |
case GGML_TYPE_I16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I16].pipeline; break;
|
| 1134 |
-
default:
|
| 1135 |
}
|
| 1136 |
|
| 1137 |
[encoder setComputePipelineState:pipeline];
|
|
@@ -1387,7 +1387,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 1387 |
default:
|
| 1388 |
{
|
| 1389 |
GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
| 1390 |
-
|
| 1391 |
}
|
| 1392 |
} break;
|
| 1393 |
case GGML_OP_SQR:
|
|
@@ -1609,7 +1609,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 1609 |
case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_M_F32 ].pipeline; break;
|
| 1610 |
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_NL_F32 ].pipeline; break;
|
| 1611 |
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_XS_F32 ].pipeline; break;
|
| 1612 |
-
default:
|
| 1613 |
}
|
| 1614 |
|
| 1615 |
[encoder setComputePipelineState:pipeline];
|
|
@@ -1782,7 +1782,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 1782 |
default:
|
| 1783 |
{
|
| 1784 |
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t);
|
| 1785 |
-
|
| 1786 |
}
|
| 1787 |
};
|
| 1788 |
|
|
@@ -1911,7 +1911,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 1911 |
case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_M_F32 ].pipeline; break;
|
| 1912 |
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_NL_F32 ].pipeline; break;
|
| 1913 |
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_XS_F32 ].pipeline; break;
|
| 1914 |
-
default:
|
| 1915 |
}
|
| 1916 |
|
| 1917 |
[encoder setComputePipelineState:pipeline];
|
|
@@ -2078,7 +2078,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2078 |
default:
|
| 2079 |
{
|
| 2080 |
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src2t);
|
| 2081 |
-
|
| 2082 |
}
|
| 2083 |
};
|
| 2084 |
|
|
@@ -2178,7 +2178,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2178 |
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL ].pipeline; break;
|
| 2179 |
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_XS ].pipeline; break;
|
| 2180 |
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_I32 ].pipeline; break;
|
| 2181 |
-
default:
|
| 2182 |
}
|
| 2183 |
|
| 2184 |
[encoder setComputePipelineState:pipeline];
|
|
@@ -2316,13 +2316,13 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2316 |
switch (src0->type) {
|
| 2317 |
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F32].pipeline; break;
|
| 2318 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F16].pipeline; break;
|
| 2319 |
-
default:
|
| 2320 |
};
|
| 2321 |
} else {
|
| 2322 |
switch (src0->type) {
|
| 2323 |
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F32].pipeline; break;
|
| 2324 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F16].pipeline; break;
|
| 2325 |
-
default:
|
| 2326 |
};
|
| 2327 |
}
|
| 2328 |
|
|
@@ -2399,7 +2399,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2399 |
switch (dst->type) {
|
| 2400 |
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F32].pipeline; break;
|
| 2401 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F16].pipeline; break;
|
| 2402 |
-
default:
|
| 2403 |
};
|
| 2404 |
|
| 2405 |
[encoder setComputePipelineState:pipeline];
|
|
@@ -2556,7 +2556,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2556 |
switch (order) {
|
| 2557 |
case GGML_SORT_ORDER_ASC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC].pipeline; break;
|
| 2558 |
case GGML_SORT_ORDER_DESC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC].pipeline; break;
|
| 2559 |
-
default:
|
| 2560 |
};
|
| 2561 |
|
| 2562 |
[encoder setComputePipelineState:pipeline];
|
|
@@ -2645,7 +2645,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2645 |
{
|
| 2646 |
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
|
| 2647 |
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
|
| 2648 |
-
|
| 2649 |
}
|
| 2650 |
}
|
| 2651 |
} else {
|
|
@@ -2658,7 +2658,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2658 |
{
|
| 2659 |
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
|
| 2660 |
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
|
| 2661 |
-
|
| 2662 |
}
|
| 2663 |
}
|
| 2664 |
}
|
|
@@ -2779,7 +2779,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2779 |
case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0].pipeline; break;
|
| 2780 |
case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1].pipeline; break;
|
| 2781 |
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL].pipeline; break;
|
| 2782 |
-
default:
|
| 2783 |
};
|
| 2784 |
} break;
|
| 2785 |
case GGML_TYPE_F16:
|
|
@@ -2787,10 +2787,10 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2787 |
switch (dstt) {
|
| 2788 |
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F32].pipeline; break;
|
| 2789 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F16].pipeline; break;
|
| 2790 |
-
default:
|
| 2791 |
};
|
| 2792 |
} break;
|
| 2793 |
-
default:
|
| 2794 |
}
|
| 2795 |
|
| 2796 |
[encoder setComputePipelineState:pipeline];
|
|
@@ -2818,7 +2818,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2818 |
default:
|
| 2819 |
{
|
| 2820 |
GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
| 2821 |
-
|
| 2822 |
}
|
| 2823 |
}
|
| 2824 |
|
|
|
|
| 869 |
NSError * error = nil;
|
| 870 |
if (![[MTLCaptureManager sharedCaptureManager] startCaptureWithDescriptor:descriptor error:&error]) {
|
| 871 |
GGML_METAL_LOG_ERROR("%s: error: unable to start capture '%s'\n", __func__, [[error localizedDescription] UTF8String]);
|
| 872 |
+
GGML_ABORT("capture failed");
|
| 873 |
}
|
| 874 |
}
|
| 875 |
|
|
|
|
| 931 |
|
| 932 |
if (!ggml_metal_supports_op(ctx, dst)) {
|
| 933 |
GGML_METAL_LOG_ERROR("%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
|
| 934 |
+
GGML_ABORT("unsupported op");
|
| 935 |
}
|
| 936 |
|
| 937 |
if (should_capture) {
|
|
|
|
| 1068 |
case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD_ROW].pipeline; break;
|
| 1069 |
case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_ROW].pipeline; break;
|
| 1070 |
case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV_ROW].pipeline; break;
|
| 1071 |
+
default: GGML_ABORT("fatal error");
|
| 1072 |
}
|
| 1073 |
|
| 1074 |
bcast_row = true;
|
|
|
|
| 1077 |
case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD].pipeline; break;
|
| 1078 |
case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL].pipeline; break;
|
| 1079 |
case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV].pipeline; break;
|
| 1080 |
+
default: GGML_ABORT("fatal error");
|
| 1081 |
}
|
| 1082 |
}
|
| 1083 |
|
|
|
|
| 1131 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_F16].pipeline; break;
|
| 1132 |
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I32].pipeline; break;
|
| 1133 |
case GGML_TYPE_I16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I16].pipeline; break;
|
| 1134 |
+
default: GGML_ABORT("fatal error");
|
| 1135 |
}
|
| 1136 |
|
| 1137 |
[encoder setComputePipelineState:pipeline];
|
|
|
|
| 1387 |
default:
|
| 1388 |
{
|
| 1389 |
GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
| 1390 |
+
GGML_ABORT("fatal error");
|
| 1391 |
}
|
| 1392 |
} break;
|
| 1393 |
case GGML_OP_SQR:
|
|
|
|
| 1609 |
case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_M_F32 ].pipeline; break;
|
| 1610 |
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_NL_F32 ].pipeline; break;
|
| 1611 |
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_XS_F32 ].pipeline; break;
|
| 1612 |
+
default: GGML_ABORT("MUL MAT-MAT not implemented");
|
| 1613 |
}
|
| 1614 |
|
| 1615 |
[encoder setComputePipelineState:pipeline];
|
|
|
|
| 1782 |
default:
|
| 1783 |
{
|
| 1784 |
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t);
|
| 1785 |
+
GGML_ABORT("not implemented");
|
| 1786 |
}
|
| 1787 |
};
|
| 1788 |
|
|
|
|
| 1911 |
case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_M_F32 ].pipeline; break;
|
| 1912 |
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_NL_F32 ].pipeline; break;
|
| 1913 |
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_XS_F32 ].pipeline; break;
|
| 1914 |
+
default: GGML_ABORT("MUL_MAT_ID not implemented");
|
| 1915 |
}
|
| 1916 |
|
| 1917 |
[encoder setComputePipelineState:pipeline];
|
|
|
|
| 2078 |
default:
|
| 2079 |
{
|
| 2080 |
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src2t);
|
| 2081 |
+
GGML_ABORT("not implemented");
|
| 2082 |
}
|
| 2083 |
};
|
| 2084 |
|
|
|
|
| 2178 |
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL ].pipeline; break;
|
| 2179 |
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_XS ].pipeline; break;
|
| 2180 |
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_I32 ].pipeline; break;
|
| 2181 |
+
default: GGML_ABORT("not implemented");
|
| 2182 |
}
|
| 2183 |
|
| 2184 |
[encoder setComputePipelineState:pipeline];
|
|
|
|
| 2316 |
switch (src0->type) {
|
| 2317 |
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F32].pipeline; break;
|
| 2318 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F16].pipeline; break;
|
| 2319 |
+
default: GGML_ABORT("fatal error");
|
| 2320 |
};
|
| 2321 |
} else {
|
| 2322 |
switch (src0->type) {
|
| 2323 |
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F32].pipeline; break;
|
| 2324 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F16].pipeline; break;
|
| 2325 |
+
default: GGML_ABORT("fatal error");
|
| 2326 |
};
|
| 2327 |
}
|
| 2328 |
|
|
|
|
| 2399 |
switch (dst->type) {
|
| 2400 |
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F32].pipeline; break;
|
| 2401 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F16].pipeline; break;
|
| 2402 |
+
default: GGML_ABORT("fatal error");
|
| 2403 |
};
|
| 2404 |
|
| 2405 |
[encoder setComputePipelineState:pipeline];
|
|
|
|
| 2556 |
switch (order) {
|
| 2557 |
case GGML_SORT_ORDER_ASC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC].pipeline; break;
|
| 2558 |
case GGML_SORT_ORDER_DESC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC].pipeline; break;
|
| 2559 |
+
default: GGML_ABORT("fatal error");
|
| 2560 |
};
|
| 2561 |
|
| 2562 |
[encoder setComputePipelineState:pipeline];
|
|
|
|
| 2645 |
{
|
| 2646 |
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
|
| 2647 |
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
|
| 2648 |
+
GGML_ABORT("add template specialization for this size");
|
| 2649 |
}
|
| 2650 |
}
|
| 2651 |
} else {
|
|
|
|
| 2658 |
{
|
| 2659 |
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
|
| 2660 |
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
|
| 2661 |
+
GGML_ABORT("add template specialization for this size");
|
| 2662 |
}
|
| 2663 |
}
|
| 2664 |
}
|
|
|
|
| 2779 |
case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0].pipeline; break;
|
| 2780 |
case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1].pipeline; break;
|
| 2781 |
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL].pipeline; break;
|
| 2782 |
+
default: GGML_ABORT("not implemented");
|
| 2783 |
};
|
| 2784 |
} break;
|
| 2785 |
case GGML_TYPE_F16:
|
|
|
|
| 2787 |
switch (dstt) {
|
| 2788 |
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F32].pipeline; break;
|
| 2789 |
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F16].pipeline; break;
|
| 2790 |
+
default: GGML_ABORT("not implemented");
|
| 2791 |
};
|
| 2792 |
} break;
|
| 2793 |
+
default: GGML_ABORT("not implemented");
|
| 2794 |
}
|
| 2795 |
|
| 2796 |
[encoder setComputePipelineState:pipeline];
|
|
|
|
| 2818 |
default:
|
| 2819 |
{
|
| 2820 |
GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
| 2821 |
+
GGML_ABORT("fatal error");
|
| 2822 |
}
|
| 2823 |
}
|
| 2824 |
|
ggml/src/ggml-quants.c
CHANGED
|
@@ -12718,7 +12718,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
|
|
| 12718 |
printf("Oops: found point %u not on grid:", u);
|
| 12719 |
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
|
| 12720 |
printf("\n");
|
| 12721 |
-
|
| 12722 |
}
|
| 12723 |
q2[2*ib+0] |= ((uint32_t) grid_index << 8*k);
|
| 12724 |
q2[2*ib+1] |= (block_signs[k] << 7*k);
|
|
@@ -12897,7 +12897,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
|
|
| 12897 |
printf("Oops: found point %u not on grid:", u);
|
| 12898 |
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
|
| 12899 |
printf("\n");
|
| 12900 |
-
|
| 12901 |
}
|
| 12902 |
q2[2*ib+k] = grid_index | (block_signs[k] << 9);
|
| 12903 |
}
|
|
@@ -13340,7 +13340,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
|
|
| 13340 |
printf("Oops: found point %u not on grid:", u);
|
| 13341 |
for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
|
| 13342 |
printf("\n");
|
| 13343 |
-
|
| 13344 |
}
|
| 13345 |
if (grid_size == 256) {
|
| 13346 |
q3[8*ib+k] = grid_index;
|
|
@@ -13553,7 +13553,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
|
|
| 13553 |
printf("Oops: found point %u not on grid:", u);
|
| 13554 |
for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
|
| 13555 |
printf("\n");
|
| 13556 |
-
|
| 13557 |
}
|
| 13558 |
qs[k] = grid_index & 255;
|
| 13559 |
qh[(ib*bs4+k)/8] |= ((grid_index >> 8) << ((ib*bs4+k)%8));
|
|
@@ -14529,7 +14529,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
|
|
| 14529 |
printf("Oops: found point %u not on grid:", u);
|
| 14530 |
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
|
| 14531 |
printf("\n");
|
| 14532 |
-
|
| 14533 |
}
|
| 14534 |
const int i8 = 2*ib + k;
|
| 14535 |
y[ibl].qs[i8] = grid_index & 255;
|
|
@@ -14649,7 +14649,7 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
|
|
| 14649 |
}
|
| 14650 |
|
| 14651 |
if (nbytes % ggml_type_size(type) != 0) {
|
| 14652 |
-
fprintf(stderr, "%s: invalid size %zu for type %
|
| 14653 |
return false;
|
| 14654 |
}
|
| 14655 |
|
|
|
|
| 12718 |
printf("Oops: found point %u not on grid:", u);
|
| 12719 |
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
|
| 12720 |
printf("\n");
|
| 12721 |
+
GGML_ABORT("fatal error");
|
| 12722 |
}
|
| 12723 |
q2[2*ib+0] |= ((uint32_t) grid_index << 8*k);
|
| 12724 |
q2[2*ib+1] |= (block_signs[k] << 7*k);
|
|
|
|
| 12897 |
printf("Oops: found point %u not on grid:", u);
|
| 12898 |
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
|
| 12899 |
printf("\n");
|
| 12900 |
+
GGML_ABORT("fatal error");
|
| 12901 |
}
|
| 12902 |
q2[2*ib+k] = grid_index | (block_signs[k] << 9);
|
| 12903 |
}
|
|
|
|
| 13340 |
printf("Oops: found point %u not on grid:", u);
|
| 13341 |
for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
|
| 13342 |
printf("\n");
|
| 13343 |
+
GGML_ABORT("fatal error");
|
| 13344 |
}
|
| 13345 |
if (grid_size == 256) {
|
| 13346 |
q3[8*ib+k] = grid_index;
|
|
|
|
| 13553 |
printf("Oops: found point %u not on grid:", u);
|
| 13554 |
for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
|
| 13555 |
printf("\n");
|
| 13556 |
+
GGML_ABORT("fatal error");
|
| 13557 |
}
|
| 13558 |
qs[k] = grid_index & 255;
|
| 13559 |
qh[(ib*bs4+k)/8] |= ((grid_index >> 8) << ((ib*bs4+k)%8));
|
|
|
|
| 14529 |
printf("Oops: found point %u not on grid:", u);
|
| 14530 |
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
|
| 14531 |
printf("\n");
|
| 14532 |
+
GGML_ABORT("fatal error");
|
| 14533 |
}
|
| 14534 |
const int i8 = 2*ib + k;
|
| 14535 |
y[ibl].qs[i8] = grid_index & 255;
|
|
|
|
| 14649 |
}
|
| 14650 |
|
| 14651 |
if (nbytes % ggml_type_size(type) != 0) {
|
| 14652 |
+
fprintf(stderr, "%s: invalid size %zu for type %s (type size = %zu)\n", __func__, nbytes, ggml_type_name(type), ggml_type_size(type));
|
| 14653 |
return false;
|
| 14654 |
}
|
| 14655 |
|
ggml/src/ggml-sycl.cpp
CHANGED
|
@@ -1723,7 +1723,7 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|
| 1723 |
});
|
| 1724 |
});
|
| 1725 |
} else {
|
| 1726 |
-
|
| 1727 |
}
|
| 1728 |
}
|
| 1729 |
|
|
@@ -2075,8 +2075,8 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
|
|
| 2075 |
// GGML_SYCL_DEBUG("current device index %d\n", id);
|
| 2076 |
src_ptr = (char *) extra->data_device[id];
|
| 2077 |
} else {
|
| 2078 |
-
// GGML_SYCL_DEBUG("
|
| 2079 |
-
|
| 2080 |
}
|
| 2081 |
char * dst_ptr = (char *) dst;
|
| 2082 |
|
|
@@ -2163,7 +2163,7 @@ static void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_te
|
|
| 2163 |
default:
|
| 2164 |
// TODO: k-quants
|
| 2165 |
fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
|
| 2166 |
-
|
| 2167 |
break;
|
| 2168 |
}
|
| 2169 |
}
|
|
@@ -2192,7 +2192,7 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
|
|
| 2192 |
} else {
|
| 2193 |
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
|
| 2194 |
ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 2195 |
-
|
| 2196 |
}
|
| 2197 |
}
|
| 2198 |
|
|
@@ -2476,7 +2476,7 @@ static int64_t get_row_rounding(ggml_type type, const std::array<float, GGML_SYC
|
|
| 2476 |
case GGML_TYPE_Q6_K:
|
| 2477 |
return 64;
|
| 2478 |
default:
|
| 2479 |
-
|
| 2480 |
}
|
| 2481 |
|
| 2482 |
}
|
|
@@ -3101,7 +3101,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
| 3101 |
SYCL_CHECK(ggml_sycl_cpy_tensor_2d(
|
| 3102 |
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
| 3103 |
} else {
|
| 3104 |
-
|
| 3105 |
}
|
| 3106 |
|
| 3107 |
if (convert_src1_to_q8_1 && !src1_is_contiguous) {
|
|
@@ -3896,7 +3896,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
|
|
| 3896 |
} else {
|
| 3897 |
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
| 3898 |
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 3899 |
-
|
| 3900 |
}
|
| 3901 |
|
| 3902 |
(void) dst;
|
|
|
|
| 1723 |
});
|
| 1724 |
});
|
| 1725 |
} else {
|
| 1726 |
+
GGML_ABORT("fatal error");
|
| 1727 |
}
|
| 1728 |
}
|
| 1729 |
|
|
|
|
| 2075 |
// GGML_SYCL_DEBUG("current device index %d\n", id);
|
| 2076 |
src_ptr = (char *) extra->data_device[id];
|
| 2077 |
} else {
|
| 2078 |
+
// GGML_SYCL_DEBUG("GGML_ABORT("fatal error")\n");
|
| 2079 |
+
GGML_ABORT("fatal error");
|
| 2080 |
}
|
| 2081 |
char * dst_ptr = (char *) dst;
|
| 2082 |
|
|
|
|
| 2163 |
default:
|
| 2164 |
// TODO: k-quants
|
| 2165 |
fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
|
| 2166 |
+
GGML_ABORT("fatal error");
|
| 2167 |
break;
|
| 2168 |
}
|
| 2169 |
}
|
|
|
|
| 2192 |
} else {
|
| 2193 |
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
|
| 2194 |
ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 2195 |
+
GGML_ABORT("fatal error");
|
| 2196 |
}
|
| 2197 |
}
|
| 2198 |
|
|
|
|
| 2476 |
case GGML_TYPE_Q6_K:
|
| 2477 |
return 64;
|
| 2478 |
default:
|
| 2479 |
+
GGML_ABORT("fatal error");
|
| 2480 |
}
|
| 2481 |
|
| 2482 |
}
|
|
|
|
| 3101 |
SYCL_CHECK(ggml_sycl_cpy_tensor_2d(
|
| 3102 |
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
| 3103 |
} else {
|
| 3104 |
+
GGML_ABORT("fatal error");
|
| 3105 |
}
|
| 3106 |
|
| 3107 |
if (convert_src1_to_q8_1 && !src1_is_contiguous) {
|
|
|
|
| 3896 |
} else {
|
| 3897 |
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
| 3898 |
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
| 3899 |
+
GGML_ABORT("fatal error");
|
| 3900 |
}
|
| 3901 |
|
| 3902 |
(void) dst;
|
ggml/src/ggml-sycl/common.hpp
CHANGED
|
@@ -100,7 +100,7 @@ static void crash() {
|
|
| 100 |
const char* msg) {
|
| 101 |
fprintf(stderr, "SYCL error: %s: %s\n", stmt, msg);
|
| 102 |
fprintf(stderr, " in function %s at %s:%d\n", func, file, line);
|
| 103 |
-
|
| 104 |
}
|
| 105 |
|
| 106 |
#define SYCL_CHECK(err) \
|
|
|
|
| 100 |
const char* msg) {
|
| 101 |
fprintf(stderr, "SYCL error: %s: %s\n", stmt, msg);
|
| 102 |
fprintf(stderr, " in function %s at %s:%d\n", func, file, line);
|
| 103 |
+
GGML_ABORT("SYCL error");
|
| 104 |
}
|
| 105 |
|
| 106 |
#define SYCL_CHECK(err) \
|
ggml/src/ggml-sycl/dmmv.cpp
CHANGED
|
@@ -1011,7 +1011,7 @@ void ggml_sycl_op_dequantize_mul_mat_vec(
|
|
| 1011 |
break;
|
| 1012 |
default:
|
| 1013 |
printf("ggml_sycl_op_dequantize_mul_mat_vec unsupported GGML_TYPE %d\n", src0->type);
|
| 1014 |
-
|
| 1015 |
break;
|
| 1016 |
}
|
| 1017 |
|
|
|
|
| 1011 |
break;
|
| 1012 |
default:
|
| 1013 |
printf("ggml_sycl_op_dequantize_mul_mat_vec unsupported GGML_TYPE %d\n", src0->type);
|
| 1014 |
+
GGML_ABORT("fatal error");
|
| 1015 |
break;
|
| 1016 |
}
|
| 1017 |
|
ggml/src/ggml-sycl/dpct/helper.hpp
CHANGED
|
@@ -975,7 +975,7 @@ namespace dpct
|
|
| 975 |
if (backend == "opencl:cpu") return 4;
|
| 976 |
if (backend == "opencl:acc") return 5;
|
| 977 |
printf("convert_backend_index: can't handle backend=%s\n", backend.c_str());
|
| 978 |
-
|
| 979 |
}
|
| 980 |
static bool compare_backend(std::string &backend1, std::string &backend2) {
|
| 981 |
return convert_backend_index(backend1) < convert_backend_index(backend2);
|
|
|
|
| 975 |
if (backend == "opencl:cpu") return 4;
|
| 976 |
if (backend == "opencl:acc") return 5;
|
| 977 |
printf("convert_backend_index: can't handle backend=%s\n", backend.c_str());
|
| 978 |
+
GGML_ABORT("fatal error");
|
| 979 |
}
|
| 980 |
static bool compare_backend(std::string &backend1, std::string &backend2) {
|
| 981 |
return convert_backend_index(backend1) < convert_backend_index(backend2);
|
ggml/src/ggml-sycl/mmq.cpp
CHANGED
|
@@ -1799,7 +1799,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 1799 |
mmq_y = MMQ_Y_Q4_0_PASCAL;
|
| 1800 |
nwarps = NWARPS_Q4_0_PASCAL;
|
| 1801 |
} else {
|
| 1802 |
-
|
| 1803 |
}
|
| 1804 |
|
| 1805 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -1914,7 +1914,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 1914 |
mmq_y = MMQ_Y_Q4_1_PASCAL;
|
| 1915 |
nwarps = NWARPS_Q4_1_PASCAL;
|
| 1916 |
} else {
|
| 1917 |
-
|
| 1918 |
}
|
| 1919 |
|
| 1920 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -2029,7 +2029,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2029 |
mmq_y = MMQ_Y_Q5_0_PASCAL;
|
| 2030 |
nwarps = NWARPS_Q5_0_PASCAL;
|
| 2031 |
} else {
|
| 2032 |
-
|
| 2033 |
}
|
| 2034 |
|
| 2035 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -2144,7 +2144,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2144 |
mmq_y = MMQ_Y_Q5_1_PASCAL;
|
| 2145 |
nwarps = NWARPS_Q5_1_PASCAL;
|
| 2146 |
} else {
|
| 2147 |
-
|
| 2148 |
}
|
| 2149 |
|
| 2150 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -2259,7 +2259,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2259 |
mmq_y = MMQ_Y_Q8_0_PASCAL;
|
| 2260 |
nwarps = NWARPS_Q8_0_PASCAL;
|
| 2261 |
} else {
|
| 2262 |
-
|
| 2263 |
}
|
| 2264 |
|
| 2265 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -2374,7 +2374,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2374 |
mmq_y = MMQ_Y_Q2_K_PASCAL;
|
| 2375 |
nwarps = NWARPS_Q2_K_PASCAL;
|
| 2376 |
} else {
|
| 2377 |
-
|
| 2378 |
}
|
| 2379 |
|
| 2380 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -2497,7 +2497,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2497 |
mmq_y = MMQ_Y_Q3_K_PASCAL;
|
| 2498 |
nwarps = NWARPS_Q3_K_PASCAL;
|
| 2499 |
} else {
|
| 2500 |
-
|
| 2501 |
}
|
| 2502 |
|
| 2503 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -2625,7 +2625,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2625 |
mmq_y = MMQ_Y_Q4_K_PASCAL;
|
| 2626 |
nwarps = NWARPS_Q4_K_PASCAL;
|
| 2627 |
} else {
|
| 2628 |
-
|
| 2629 |
}
|
| 2630 |
|
| 2631 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -2746,7 +2746,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2746 |
mmq_y = MMQ_Y_Q5_K_PASCAL;
|
| 2747 |
nwarps = NWARPS_Q5_K_PASCAL;
|
| 2748 |
} else {
|
| 2749 |
-
|
| 2750 |
}
|
| 2751 |
|
| 2752 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -2867,7 +2867,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2867 |
mmq_y = MMQ_Y_Q6_K_PASCAL;
|
| 2868 |
nwarps = NWARPS_Q6_K_PASCAL;
|
| 2869 |
} else {
|
| 2870 |
-
|
| 2871 |
}
|
| 2872 |
|
| 2873 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
@@ -3016,7 +3016,7 @@ void ggml_sycl_op_mul_mat_q(
|
|
| 3016 |
ggml_mul_mat_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_ncols, src1_padded_row_size, nrows_dst, stream);
|
| 3017 |
break;
|
| 3018 |
default:
|
| 3019 |
-
|
| 3020 |
break;
|
| 3021 |
}
|
| 3022 |
|
|
|
|
| 1799 |
mmq_y = MMQ_Y_Q4_0_PASCAL;
|
| 1800 |
nwarps = NWARPS_Q4_0_PASCAL;
|
| 1801 |
} else {
|
| 1802 |
+
GGML_ABORT("fatal error");
|
| 1803 |
}
|
| 1804 |
|
| 1805 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 1914 |
mmq_y = MMQ_Y_Q4_1_PASCAL;
|
| 1915 |
nwarps = NWARPS_Q4_1_PASCAL;
|
| 1916 |
} else {
|
| 1917 |
+
GGML_ABORT("fatal error");
|
| 1918 |
}
|
| 1919 |
|
| 1920 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 2029 |
mmq_y = MMQ_Y_Q5_0_PASCAL;
|
| 2030 |
nwarps = NWARPS_Q5_0_PASCAL;
|
| 2031 |
} else {
|
| 2032 |
+
GGML_ABORT("fatal error");
|
| 2033 |
}
|
| 2034 |
|
| 2035 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 2144 |
mmq_y = MMQ_Y_Q5_1_PASCAL;
|
| 2145 |
nwarps = NWARPS_Q5_1_PASCAL;
|
| 2146 |
} else {
|
| 2147 |
+
GGML_ABORT("fatal error");
|
| 2148 |
}
|
| 2149 |
|
| 2150 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 2259 |
mmq_y = MMQ_Y_Q8_0_PASCAL;
|
| 2260 |
nwarps = NWARPS_Q8_0_PASCAL;
|
| 2261 |
} else {
|
| 2262 |
+
GGML_ABORT("fatal error");
|
| 2263 |
}
|
| 2264 |
|
| 2265 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 2374 |
mmq_y = MMQ_Y_Q2_K_PASCAL;
|
| 2375 |
nwarps = NWARPS_Q2_K_PASCAL;
|
| 2376 |
} else {
|
| 2377 |
+
GGML_ABORT("fatal error");
|
| 2378 |
}
|
| 2379 |
|
| 2380 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 2497 |
mmq_y = MMQ_Y_Q3_K_PASCAL;
|
| 2498 |
nwarps = NWARPS_Q3_K_PASCAL;
|
| 2499 |
} else {
|
| 2500 |
+
GGML_ABORT("fatal error");
|
| 2501 |
}
|
| 2502 |
|
| 2503 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 2625 |
mmq_y = MMQ_Y_Q4_K_PASCAL;
|
| 2626 |
nwarps = NWARPS_Q4_K_PASCAL;
|
| 2627 |
} else {
|
| 2628 |
+
GGML_ABORT("fatal error");
|
| 2629 |
}
|
| 2630 |
|
| 2631 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 2746 |
mmq_y = MMQ_Y_Q5_K_PASCAL;
|
| 2747 |
nwarps = NWARPS_Q5_K_PASCAL;
|
| 2748 |
} else {
|
| 2749 |
+
GGML_ABORT("fatal error");
|
| 2750 |
}
|
| 2751 |
|
| 2752 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 2867 |
mmq_y = MMQ_Y_Q6_K_PASCAL;
|
| 2868 |
nwarps = NWARPS_Q6_K_PASCAL;
|
| 2869 |
} else {
|
| 2870 |
+
GGML_ABORT("fatal error");
|
| 2871 |
}
|
| 2872 |
|
| 2873 |
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
|
|
|
|
| 3016 |
ggml_mul_mat_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_ncols, src1_padded_row_size, nrows_dst, stream);
|
| 3017 |
break;
|
| 3018 |
default:
|
| 3019 |
+
GGML_ABORT("fatal error");
|
| 3020 |
break;
|
| 3021 |
}
|
| 3022 |
|
ggml/src/ggml-sycl/mmvq.cpp
CHANGED
|
@@ -1017,7 +1017,7 @@ void ggml_sycl_op_mul_mat_vec_q(
|
|
| 1017 |
mul_mat_vec_iq4_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
| 1018 |
break;
|
| 1019 |
default:
|
| 1020 |
-
|
| 1021 |
break;
|
| 1022 |
}
|
| 1023 |
}
|
|
|
|
| 1017 |
mul_mat_vec_iq4_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
|
| 1018 |
break;
|
| 1019 |
default:
|
| 1020 |
+
GGML_ABORT("fatal error");
|
| 1021 |
break;
|
| 1022 |
}
|
| 1023 |
}
|
ggml/src/ggml-sycl/rope.cpp
CHANGED
|
@@ -251,7 +251,7 @@ void ggml_sycl_op_rope(
|
|
| 251 |
attn_factor, corr_dims, freq_factors, main_stream
|
| 252 |
);
|
| 253 |
} else {
|
| 254 |
-
|
| 255 |
}
|
| 256 |
} else {
|
| 257 |
if (src0->type == GGML_TYPE_F32) {
|
|
@@ -265,7 +265,7 @@ void ggml_sycl_op_rope(
|
|
| 265 |
attn_factor, corr_dims, freq_factors, main_stream
|
| 266 |
);
|
| 267 |
} else {
|
| 268 |
-
|
| 269 |
}
|
| 270 |
}
|
| 271 |
|
|
|
|
| 251 |
attn_factor, corr_dims, freq_factors, main_stream
|
| 252 |
);
|
| 253 |
} else {
|
| 254 |
+
GGML_ABORT("fatal error");
|
| 255 |
}
|
| 256 |
} else {
|
| 257 |
if (src0->type == GGML_TYPE_F32) {
|
|
|
|
| 265 |
attn_factor, corr_dims, freq_factors, main_stream
|
| 266 |
);
|
| 267 |
} else {
|
| 268 |
+
GGML_ABORT("fatal error");
|
| 269 |
}
|
| 270 |
}
|
| 271 |
|
ggml/src/ggml-vulkan.cpp
CHANGED
|
@@ -1961,7 +1961,7 @@ void ggml_vk_instance_init() {
|
|
| 1961 |
// Make sure at least one device exists
|
| 1962 |
if (devices.empty()) {
|
| 1963 |
std::cerr << "ggml_vulkan: Error: No devices found." << std::endl;
|
| 1964 |
-
|
| 1965 |
}
|
| 1966 |
|
| 1967 |
// Default to using all dedicated GPUs
|
|
@@ -2459,7 +2459,7 @@ static void ggml_vk_buffer_write_nc_async(ggml_backend_vk_context * ctx, vk_cont
|
|
| 2459 |
// Buffer is already mapped
|
| 2460 |
if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
|
| 2461 |
std::cerr << "ggml_vulkan: buffer_write_nc_async dst buffer is host_visible. Use synchronous write." << std::endl;
|
| 2462 |
-
|
| 2463 |
}
|
| 2464 |
// Check if src is pinned memory
|
| 2465 |
vk_buffer buf;
|
|
@@ -2527,7 +2527,7 @@ static void ggml_vk_buffer_write_nc_async(ggml_backend_vk_context * ctx, vk_cont
|
|
| 2527 |
staging = ctx->device->sync_staging;
|
| 2528 |
staging_offset = 0;
|
| 2529 |
} else {
|
| 2530 |
-
|
| 2531 |
}
|
| 2532 |
}
|
| 2533 |
|
|
@@ -2563,7 +2563,7 @@ static void ggml_vk_buffer_write_2d_async(vk_context * subctx, vk_buffer& dst, s
|
|
| 2563 |
// Buffer is already mapped
|
| 2564 |
if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
|
| 2565 |
std::cerr << "ggml_vulkan: buffer_write_async dst buffer is host_visible. Use synchronous write." << std::endl;
|
| 2566 |
-
|
| 2567 |
}
|
| 2568 |
// Check if src is pinned memory
|
| 2569 |
vk_buffer buf = nullptr;
|
|
@@ -2602,7 +2602,7 @@ static void ggml_vk_buffer_write_2d_async(vk_context * subctx, vk_buffer& dst, s
|
|
| 2602 |
staging_buffer = dst->device->sync_staging;
|
| 2603 |
staging_offset = 0;
|
| 2604 |
} else {
|
| 2605 |
-
|
| 2606 |
}
|
| 2607 |
}
|
| 2608 |
|
|
@@ -2704,7 +2704,7 @@ static void ggml_vk_buffer_read_2d_async(vk_context * subctx, vk_buffer& src, si
|
|
| 2704 |
|
| 2705 |
staging_buffer = src->device->sync_staging;
|
| 2706 |
} else {
|
| 2707 |
-
|
| 2708 |
}
|
| 2709 |
}
|
| 2710 |
|
|
@@ -2913,7 +2913,7 @@ static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, ggml_
|
|
| 2913 |
}
|
| 2914 |
|
| 2915 |
std::cerr << "Missing CPY op for types: " << ggml_type_name(from) << " " << ggml_type_name(to) << std::endl;
|
| 2916 |
-
|
| 2917 |
}
|
| 2918 |
|
| 2919 |
static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context * subctx, vk_pipeline pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out) {
|
|
@@ -3499,7 +3499,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context *
|
|
| 3499 |
const bool qy_needs_dequant = (src1->type != GGML_TYPE_F16 && !y_f32_kernel) || y_non_contig;
|
| 3500 |
|
| 3501 |
if (mmp == nullptr) {
|
| 3502 |
-
|
| 3503 |
}
|
| 3504 |
|
| 3505 |
// Not implemented
|
|
@@ -4078,7 +4078,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|
| 4078 |
std::cerr << " and " << ggml_type_name(src1->type);
|
| 4079 |
}
|
| 4080 |
std::cerr << " to " << ggml_type_name(dst->type) << std::endl;
|
| 4081 |
-
|
| 4082 |
}
|
| 4083 |
|
| 4084 |
op_func(ctx, subctx, src0, src1, dst);
|
|
@@ -4521,7 +4521,7 @@ static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0
|
|
| 4521 |
} else if (type == GGML_TYPE_F16) {
|
| 4522 |
val = ggml_fp16_to_fp32(*((const ggml_fp16_t *) data + i2*ne1*ne0 + idx1*ne0 + idx0));
|
| 4523 |
} else {
|
| 4524 |
-
|
| 4525 |
}
|
| 4526 |
fprintf(stderr, "% 7.2f ", val);
|
| 4527 |
} else {
|
|
@@ -4555,7 +4555,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
| 4555 |
p = ctx->device->pipeline_matmul_f16->a_s;
|
| 4556 |
shname = "F16_ALIGNED_S";
|
| 4557 |
} else {
|
| 4558 |
-
|
| 4559 |
}
|
| 4560 |
} else if (shader_size == 1) {
|
| 4561 |
if (std::is_same<float, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
@@ -4571,7 +4571,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
| 4571 |
p = ctx->device->pipeline_matmul_f16->a_m;
|
| 4572 |
shname = "F16_ALIGNED_M";
|
| 4573 |
} else {
|
| 4574 |
-
|
| 4575 |
}
|
| 4576 |
} else if (shader_size == 2) {
|
| 4577 |
if (std::is_same<float, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
@@ -4587,7 +4587,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
| 4587 |
p = ctx->device->pipeline_matmul_f16->a_l;
|
| 4588 |
shname = "F16_ALIGNED_L";
|
| 4589 |
} else {
|
| 4590 |
-
|
| 4591 |
}
|
| 4592 |
} else {
|
| 4593 |
GGML_ASSERT(0);
|
|
@@ -4668,7 +4668,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
| 4668 |
} else if (std::is_same<ggml_fp16_t, X_TYPE>()) {
|
| 4669 |
x[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f);
|
| 4670 |
} else {
|
| 4671 |
-
|
| 4672 |
}
|
| 4673 |
}
|
| 4674 |
for (size_t i = 0; i < y_ne; i++) {
|
|
@@ -4679,7 +4679,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
| 4679 |
// y[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f);
|
| 4680 |
y[i] = ggml_fp32_to_fp16((i % k == i / k) ? 1.0f : 0.0f);
|
| 4681 |
} else {
|
| 4682 |
-
|
| 4683 |
}
|
| 4684 |
}
|
| 4685 |
|
|
@@ -4727,14 +4727,14 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
| 4727 |
} else if (std::is_same<ggml_fp16_t, X_TYPE>()) {
|
| 4728 |
src0_type = GGML_TYPE_F16;
|
| 4729 |
} else {
|
| 4730 |
-
|
| 4731 |
}
|
| 4732 |
if (std::is_same<float, Y_TYPE>()) {
|
| 4733 |
src1_type = GGML_TYPE_F32;
|
| 4734 |
} else if (std::is_same<ggml_fp16_t, Y_TYPE>()) {
|
| 4735 |
src1_type = GGML_TYPE_F16;
|
| 4736 |
} else {
|
| 4737 |
-
|
| 4738 |
}
|
| 4739 |
|
| 4740 |
ggml_tensor * src0_ggml = ggml_new_tensor_3d(ggml_ctx, src0_type, k, m, batch);
|
|
@@ -4841,7 +4841,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, int i0, int i1
|
|
| 4841 |
} else if (tensor->type == GGML_TYPE_F16) {
|
| 4842 |
val = ggml_fp16_to_fp32(*(ggml_fp16_t *) ((char *) tensor->data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]));
|
| 4843 |
} else {
|
| 4844 |
-
|
| 4845 |
}
|
| 4846 |
fprintf(stderr, "% 7.2f ", val);
|
| 4847 |
} else {
|
|
@@ -5391,7 +5391,7 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
|
| 5391 |
std::cerr << std::endl;
|
| 5392 |
}
|
| 5393 |
|
| 5394 |
-
|
| 5395 |
#endif
|
| 5396 |
|
| 5397 |
if (ctx->prealloc_x == nullptr || (ctx->prealloc_size_x > 0 && ctx->prealloc_x->size < ctx->prealloc_size_x)) {
|
|
@@ -5486,7 +5486,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
| 5486 |
break;
|
| 5487 |
default:
|
| 5488 |
std::cerr << "ggml_vulkan: Error: Missing op: " << ggml_op_name(node->op) << std::endl;
|
| 5489 |
-
|
| 5490 |
return;
|
| 5491 |
}
|
| 5492 |
|
|
@@ -6498,7 +6498,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, const void * d
|
|
| 6498 |
} else if (tensor->type == GGML_TYPE_I32) {
|
| 6499 |
val = *(const int32_t *) ((const char *) data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]);
|
| 6500 |
} else {
|
| 6501 |
-
|
| 6502 |
}
|
| 6503 |
fprintf(stderr, "% 7.2f ", val);
|
| 6504 |
} else {
|
|
@@ -6620,7 +6620,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6620 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6621 |
}
|
| 6622 |
} else {
|
| 6623 |
-
|
| 6624 |
}
|
| 6625 |
|
| 6626 |
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
|
|
@@ -6662,7 +6662,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6662 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6663 |
}
|
| 6664 |
} else {
|
| 6665 |
-
|
| 6666 |
}
|
| 6667 |
|
| 6668 |
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
|
|
@@ -6720,7 +6720,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6720 |
memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6721 |
}
|
| 6722 |
} else {
|
| 6723 |
-
|
| 6724 |
}
|
| 6725 |
|
| 6726 |
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
|
|
@@ -6797,7 +6797,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6797 |
break;
|
| 6798 |
default:
|
| 6799 |
std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl;
|
| 6800 |
-
|
| 6801 |
}
|
| 6802 |
} else if (tensor->op == GGML_OP_CPY || tensor->op == GGML_OP_DUP) {
|
| 6803 |
if (src1 == nullptr) {
|
|
@@ -6825,7 +6825,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6825 |
tensor_clone = ggml_sum_rows(ggml_ctx, src0_clone);
|
| 6826 |
} else {
|
| 6827 |
std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl;
|
| 6828 |
-
|
| 6829 |
}
|
| 6830 |
|
| 6831 |
ggml_cgraph * cgraph = ggml_new_graph(ggml_ctx);
|
|
@@ -6912,7 +6912,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6912 |
}
|
| 6913 |
} else {
|
| 6914 |
std::cerr << "Missing debug code for type " << ggml_type_name(tensor->type) << std::endl;
|
| 6915 |
-
|
| 6916 |
}
|
| 6917 |
|
| 6918 |
if ((std::isnan(correct) != std::isnan(result)) || (std::isinf(correct) != std::isinf(result)) || !buffer_size_fit) {
|
|
@@ -6935,7 +6935,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6935 |
std::cerr << std::endl;
|
| 6936 |
std::vector<const ggml_tensor *> done;
|
| 6937 |
ggml_vk_print_graph_origin(tensor, done);
|
| 6938 |
-
|
| 6939 |
}
|
| 6940 |
if (first_error[0] == -1 && std::fabs(correct - result) > 0.1f) {
|
| 6941 |
first_error[0] = i0;
|
|
@@ -7006,7 +7006,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 7006 |
std::cerr << std::endl;
|
| 7007 |
std::vector<const ggml_tensor *> done;
|
| 7008 |
ggml_vk_print_graph_origin(tensor, done);
|
| 7009 |
-
|
| 7010 |
} else {
|
| 7011 |
std::cerr << check_counter << " " << tensor->name << " op=" << ggml_op_name(tensor->op) << " avg_err=" << avg_err << std::endl;
|
| 7012 |
}
|
|
|
|
| 1961 |
// Make sure at least one device exists
|
| 1962 |
if (devices.empty()) {
|
| 1963 |
std::cerr << "ggml_vulkan: Error: No devices found." << std::endl;
|
| 1964 |
+
GGML_ABORT("fatal error");
|
| 1965 |
}
|
| 1966 |
|
| 1967 |
// Default to using all dedicated GPUs
|
|
|
|
| 2459 |
// Buffer is already mapped
|
| 2460 |
if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
|
| 2461 |
std::cerr << "ggml_vulkan: buffer_write_nc_async dst buffer is host_visible. Use synchronous write." << std::endl;
|
| 2462 |
+
GGML_ABORT("fatal error");
|
| 2463 |
}
|
| 2464 |
// Check if src is pinned memory
|
| 2465 |
vk_buffer buf;
|
|
|
|
| 2527 |
staging = ctx->device->sync_staging;
|
| 2528 |
staging_offset = 0;
|
| 2529 |
} else {
|
| 2530 |
+
GGML_ABORT("fatal error");
|
| 2531 |
}
|
| 2532 |
}
|
| 2533 |
|
|
|
|
| 2563 |
// Buffer is already mapped
|
| 2564 |
if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
|
| 2565 |
std::cerr << "ggml_vulkan: buffer_write_async dst buffer is host_visible. Use synchronous write." << std::endl;
|
| 2566 |
+
GGML_ABORT("fatal error");
|
| 2567 |
}
|
| 2568 |
// Check if src is pinned memory
|
| 2569 |
vk_buffer buf = nullptr;
|
|
|
|
| 2602 |
staging_buffer = dst->device->sync_staging;
|
| 2603 |
staging_offset = 0;
|
| 2604 |
} else {
|
| 2605 |
+
GGML_ABORT("fatal error");
|
| 2606 |
}
|
| 2607 |
}
|
| 2608 |
|
|
|
|
| 2704 |
|
| 2705 |
staging_buffer = src->device->sync_staging;
|
| 2706 |
} else {
|
| 2707 |
+
GGML_ABORT("fatal error");
|
| 2708 |
}
|
| 2709 |
}
|
| 2710 |
|
|
|
|
| 2913 |
}
|
| 2914 |
|
| 2915 |
std::cerr << "Missing CPY op for types: " << ggml_type_name(from) << " " << ggml_type_name(to) << std::endl;
|
| 2916 |
+
GGML_ABORT("fatal error");
|
| 2917 |
}
|
| 2918 |
|
| 2919 |
static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context * subctx, vk_pipeline pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out) {
|
|
|
|
| 3499 |
const bool qy_needs_dequant = (src1->type != GGML_TYPE_F16 && !y_f32_kernel) || y_non_contig;
|
| 3500 |
|
| 3501 |
if (mmp == nullptr) {
|
| 3502 |
+
GGML_ABORT("fatal error");
|
| 3503 |
}
|
| 3504 |
|
| 3505 |
// Not implemented
|
|
|
|
| 4078 |
std::cerr << " and " << ggml_type_name(src1->type);
|
| 4079 |
}
|
| 4080 |
std::cerr << " to " << ggml_type_name(dst->type) << std::endl;
|
| 4081 |
+
GGML_ABORT("fatal error");
|
| 4082 |
}
|
| 4083 |
|
| 4084 |
op_func(ctx, subctx, src0, src1, dst);
|
|
|
|
| 4521 |
} else if (type == GGML_TYPE_F16) {
|
| 4522 |
val = ggml_fp16_to_fp32(*((const ggml_fp16_t *) data + i2*ne1*ne0 + idx1*ne0 + idx0));
|
| 4523 |
} else {
|
| 4524 |
+
GGML_ABORT("fatal error");
|
| 4525 |
}
|
| 4526 |
fprintf(stderr, "% 7.2f ", val);
|
| 4527 |
} else {
|
|
|
|
| 4555 |
p = ctx->device->pipeline_matmul_f16->a_s;
|
| 4556 |
shname = "F16_ALIGNED_S";
|
| 4557 |
} else {
|
| 4558 |
+
GGML_ABORT("fatal error");
|
| 4559 |
}
|
| 4560 |
} else if (shader_size == 1) {
|
| 4561 |
if (std::is_same<float, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
|
|
| 4571 |
p = ctx->device->pipeline_matmul_f16->a_m;
|
| 4572 |
shname = "F16_ALIGNED_M";
|
| 4573 |
} else {
|
| 4574 |
+
GGML_ABORT("fatal error");
|
| 4575 |
}
|
| 4576 |
} else if (shader_size == 2) {
|
| 4577 |
if (std::is_same<float, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
|
|
|
|
| 4587 |
p = ctx->device->pipeline_matmul_f16->a_l;
|
| 4588 |
shname = "F16_ALIGNED_L";
|
| 4589 |
} else {
|
| 4590 |
+
GGML_ABORT("fatal error");
|
| 4591 |
}
|
| 4592 |
} else {
|
| 4593 |
GGML_ASSERT(0);
|
|
|
|
| 4668 |
} else if (std::is_same<ggml_fp16_t, X_TYPE>()) {
|
| 4669 |
x[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f);
|
| 4670 |
} else {
|
| 4671 |
+
GGML_ABORT("fatal error");
|
| 4672 |
}
|
| 4673 |
}
|
| 4674 |
for (size_t i = 0; i < y_ne; i++) {
|
|
|
|
| 4679 |
// y[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f);
|
| 4680 |
y[i] = ggml_fp32_to_fp16((i % k == i / k) ? 1.0f : 0.0f);
|
| 4681 |
} else {
|
| 4682 |
+
GGML_ABORT("fatal error");
|
| 4683 |
}
|
| 4684 |
}
|
| 4685 |
|
|
|
|
| 4727 |
} else if (std::is_same<ggml_fp16_t, X_TYPE>()) {
|
| 4728 |
src0_type = GGML_TYPE_F16;
|
| 4729 |
} else {
|
| 4730 |
+
GGML_ABORT("fatal error");
|
| 4731 |
}
|
| 4732 |
if (std::is_same<float, Y_TYPE>()) {
|
| 4733 |
src1_type = GGML_TYPE_F32;
|
| 4734 |
} else if (std::is_same<ggml_fp16_t, Y_TYPE>()) {
|
| 4735 |
src1_type = GGML_TYPE_F16;
|
| 4736 |
} else {
|
| 4737 |
+
GGML_ABORT("fatal error");
|
| 4738 |
}
|
| 4739 |
|
| 4740 |
ggml_tensor * src0_ggml = ggml_new_tensor_3d(ggml_ctx, src0_type, k, m, batch);
|
|
|
|
| 4841 |
} else if (tensor->type == GGML_TYPE_F16) {
|
| 4842 |
val = ggml_fp16_to_fp32(*(ggml_fp16_t *) ((char *) tensor->data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]));
|
| 4843 |
} else {
|
| 4844 |
+
GGML_ABORT("fatal error");
|
| 4845 |
}
|
| 4846 |
fprintf(stderr, "% 7.2f ", val);
|
| 4847 |
} else {
|
|
|
|
| 5391 |
std::cerr << std::endl;
|
| 5392 |
}
|
| 5393 |
|
| 5394 |
+
GGML_ABORT("fatal error");
|
| 5395 |
#endif
|
| 5396 |
|
| 5397 |
if (ctx->prealloc_x == nullptr || (ctx->prealloc_size_x > 0 && ctx->prealloc_x->size < ctx->prealloc_size_x)) {
|
|
|
|
| 5486 |
break;
|
| 5487 |
default:
|
| 5488 |
std::cerr << "ggml_vulkan: Error: Missing op: " << ggml_op_name(node->op) << std::endl;
|
| 5489 |
+
GGML_ABORT("fatal error");
|
| 5490 |
return;
|
| 5491 |
}
|
| 5492 |
|
|
|
|
| 6498 |
} else if (tensor->type == GGML_TYPE_I32) {
|
| 6499 |
val = *(const int32_t *) ((const char *) data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]);
|
| 6500 |
} else {
|
| 6501 |
+
GGML_ABORT("fatal error");
|
| 6502 |
}
|
| 6503 |
fprintf(stderr, "% 7.2f ", val);
|
| 6504 |
} else {
|
|
|
|
| 6620 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6621 |
}
|
| 6622 |
} else {
|
| 6623 |
+
GGML_ABORT("fatal error");
|
| 6624 |
}
|
| 6625 |
|
| 6626 |
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
|
|
|
|
| 6662 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6663 |
}
|
| 6664 |
} else {
|
| 6665 |
+
GGML_ABORT("fatal error");
|
| 6666 |
}
|
| 6667 |
|
| 6668 |
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
|
|
|
|
| 6720 |
memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6721 |
}
|
| 6722 |
} else {
|
| 6723 |
+
GGML_ABORT("fatal error");
|
| 6724 |
}
|
| 6725 |
|
| 6726 |
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
|
|
|
|
| 6797 |
break;
|
| 6798 |
default:
|
| 6799 |
std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl;
|
| 6800 |
+
GGML_ABORT("fatal error");
|
| 6801 |
}
|
| 6802 |
} else if (tensor->op == GGML_OP_CPY || tensor->op == GGML_OP_DUP) {
|
| 6803 |
if (src1 == nullptr) {
|
|
|
|
| 6825 |
tensor_clone = ggml_sum_rows(ggml_ctx, src0_clone);
|
| 6826 |
} else {
|
| 6827 |
std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl;
|
| 6828 |
+
GGML_ABORT("fatal error");
|
| 6829 |
}
|
| 6830 |
|
| 6831 |
ggml_cgraph * cgraph = ggml_new_graph(ggml_ctx);
|
|
|
|
| 6912 |
}
|
| 6913 |
} else {
|
| 6914 |
std::cerr << "Missing debug code for type " << ggml_type_name(tensor->type) << std::endl;
|
| 6915 |
+
GGML_ABORT("fatal error");
|
| 6916 |
}
|
| 6917 |
|
| 6918 |
if ((std::isnan(correct) != std::isnan(result)) || (std::isinf(correct) != std::isinf(result)) || !buffer_size_fit) {
|
|
|
|
| 6935 |
std::cerr << std::endl;
|
| 6936 |
std::vector<const ggml_tensor *> done;
|
| 6937 |
ggml_vk_print_graph_origin(tensor, done);
|
| 6938 |
+
GGML_ABORT("fatal error");
|
| 6939 |
}
|
| 6940 |
if (first_error[0] == -1 && std::fabs(correct - result) > 0.1f) {
|
| 6941 |
first_error[0] = i0;
|
|
|
|
| 7006 |
std::cerr << std::endl;
|
| 7007 |
std::vector<const ggml_tensor *> done;
|
| 7008 |
ggml_vk_print_graph_origin(tensor, done);
|
| 7009 |
+
GGML_ABORT("fatal error");
|
| 7010 |
} else {
|
| 7011 |
std::cerr << check_counter << " " << tensor->name << " op=" << ggml_op_name(tensor->op) << " avg_err=" << avg_err << std::endl;
|
| 7012 |
}
|
ggml/src/ggml.c
CHANGED
|
@@ -141,23 +141,25 @@ typedef pthread_t ggml_thread_t;
|
|
| 141 |
|
| 142 |
#include <sys/wait.h>
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
#include <dlfcn.h>
|
| 148 |
-
|
| 149 |
void * trace[100];
|
| 150 |
-
|
| 151 |
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
| 152 |
-
|
| 153 |
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
-
|
| 157 |
char attach[32];
|
| 158 |
snprintf(attach, sizeof(attach), "attach %d", getpid());
|
| 159 |
int pid = fork();
|
| 160 |
if (pid == 0) {
|
|
|
|
| 161 |
execlp("gdb", "gdb", "--batch",
|
| 162 |
"-ex", "set style enabled on",
|
| 163 |
"-ex", attach,
|
|
@@ -165,16 +167,46 @@ void ggml_print_backtrace(void) {
|
|
| 165 |
"-ex", "detach",
|
| 166 |
"-ex", "quit",
|
| 167 |
(char *) NULL);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
} else {
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
}
|
| 171 |
}
|
| 172 |
#else
|
| 173 |
-
void ggml_print_backtrace(void) {
|
| 174 |
// platform not supported
|
| 175 |
}
|
| 176 |
#endif
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
#define GGML_DEBUG 0
|
| 179 |
#define GGML_GELU_FP16
|
| 180 |
#define GGML_GELU_QUICK_FP16
|
|
@@ -246,7 +278,7 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
| 246 |
break;
|
| 247 |
}
|
| 248 |
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
|
| 249 |
-
|
| 250 |
return NULL;
|
| 251 |
}
|
| 252 |
return aligned_memory;
|
|
@@ -267,7 +299,7 @@ inline static void * ggml_malloc(size_t size) {
|
|
| 267 |
void * result = malloc(size);
|
| 268 |
if (result == NULL) {
|
| 269 |
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
|
| 270 |
-
|
| 271 |
}
|
| 272 |
return result;
|
| 273 |
}
|
|
@@ -281,7 +313,7 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
|
| 281 |
void * result = calloc(num, size);
|
| 282 |
if (result == NULL) {
|
| 283 |
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
|
| 284 |
-
|
| 285 |
}
|
| 286 |
return result;
|
| 287 |
}
|
|
@@ -3372,7 +3404,7 @@ static inline int ggml_up(int n, int m) {
|
|
| 3372 |
}
|
| 3373 |
|
| 3374 |
// assert that pointer is aligned to GGML_MEM_ALIGN
|
| 3375 |
-
#define
|
| 3376 |
GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
|
| 3377 |
|
| 3378 |
////////////////////////////////////////////////////////////////////////////////
|
|
@@ -3473,7 +3505,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
| 3473 |
|
| 3474 |
GGML_ASSERT(ctx->mem_buffer != NULL);
|
| 3475 |
|
| 3476 |
-
|
| 3477 |
|
| 3478 |
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
| 3479 |
|
|
@@ -3605,7 +3637,7 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
|
|
| 3605 |
.type = type,
|
| 3606 |
};
|
| 3607 |
|
| 3608 |
-
|
| 3609 |
|
| 3610 |
if (obj_cur != NULL) {
|
| 3611 |
obj_cur->next = obj_new;
|
|
@@ -3706,7 +3738,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
| 3706 |
#endif
|
| 3707 |
|
| 3708 |
// TODO: this should not be needed as long as we don't rely on aligned SIMD loads
|
| 3709 |
-
//
|
| 3710 |
|
| 3711 |
for (int i = 0; i < n_dims; i++) {
|
| 3712 |
result->ne[i] = ne[i];
|
|
@@ -3879,8 +3911,8 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) {
|
|
| 3879 |
} break;
|
| 3880 |
default:
|
| 3881 |
{
|
| 3882 |
-
|
| 3883 |
-
}
|
| 3884 |
}
|
| 3885 |
|
| 3886 |
return tensor;
|
|
@@ -3938,8 +3970,8 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) {
|
|
| 3938 |
} break;
|
| 3939 |
default:
|
| 3940 |
{
|
| 3941 |
-
|
| 3942 |
-
}
|
| 3943 |
}
|
| 3944 |
|
| 3945 |
return tensor;
|
|
@@ -4008,11 +4040,9 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
|
|
| 4008 |
}
|
| 4009 |
default:
|
| 4010 |
{
|
| 4011 |
-
|
| 4012 |
}
|
| 4013 |
}
|
| 4014 |
-
|
| 4015 |
-
return 0.0f;
|
| 4016 |
}
|
| 4017 |
|
| 4018 |
void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
|
|
@@ -4055,8 +4085,8 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
|
|
| 4055 |
} break;
|
| 4056 |
default:
|
| 4057 |
{
|
| 4058 |
-
|
| 4059 |
-
}
|
| 4060 |
}
|
| 4061 |
}
|
| 4062 |
|
|
@@ -4076,10 +4106,8 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
|
|
| 4076 |
case GGML_TYPE_F32:
|
| 4077 |
return ((float *) data)[0];
|
| 4078 |
default:
|
| 4079 |
-
|
| 4080 |
}
|
| 4081 |
-
|
| 4082 |
-
return 0.0f;
|
| 4083 |
}
|
| 4084 |
|
| 4085 |
void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value) {
|
|
@@ -4111,8 +4139,8 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
| 4111 |
} break;
|
| 4112 |
default:
|
| 4113 |
{
|
| 4114 |
-
|
| 4115 |
-
}
|
| 4116 |
}
|
| 4117 |
}
|
| 4118 |
|
|
@@ -4149,11 +4177,9 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
|
|
| 4149 |
}
|
| 4150 |
default:
|
| 4151 |
{
|
| 4152 |
-
|
| 4153 |
}
|
| 4154 |
}
|
| 4155 |
-
|
| 4156 |
-
return 0.0f;
|
| 4157 |
}
|
| 4158 |
|
| 4159 |
void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
|
|
@@ -4190,8 +4216,8 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
|
|
| 4190 |
} break;
|
| 4191 |
default:
|
| 4192 |
{
|
| 4193 |
-
|
| 4194 |
-
}
|
| 4195 |
}
|
| 4196 |
}
|
| 4197 |
|
|
@@ -4211,10 +4237,8 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
| 4211 |
case GGML_TYPE_F32:
|
| 4212 |
return ((float *) data)[0];
|
| 4213 |
default:
|
| 4214 |
-
|
| 4215 |
}
|
| 4216 |
-
|
| 4217 |
-
return 0.0f;
|
| 4218 |
}
|
| 4219 |
|
| 4220 |
void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value) {
|
|
@@ -4246,8 +4270,8 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
| 4246 |
} break;
|
| 4247 |
default:
|
| 4248 |
{
|
| 4249 |
-
|
| 4250 |
-
}
|
| 4251 |
}
|
| 4252 |
}
|
| 4253 |
|
|
@@ -4270,8 +4294,11 @@ const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
|
| 4270 |
}
|
| 4271 |
|
| 4272 |
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
|
| 4273 |
-
|
| 4274 |
-
|
|
|
|
|
|
|
|
|
|
| 4275 |
return tensor;
|
| 4276 |
}
|
| 4277 |
|
|
@@ -4842,7 +4869,7 @@ struct ggml_tensor * ggml_mean(
|
|
| 4842 |
bool is_node = false;
|
| 4843 |
|
| 4844 |
if (a->grad) {
|
| 4845 |
-
|
| 4846 |
is_node = true;
|
| 4847 |
}
|
| 4848 |
|
|
@@ -4865,7 +4892,7 @@ struct ggml_tensor * ggml_argmax(
|
|
| 4865 |
bool is_node = false;
|
| 4866 |
|
| 4867 |
if (a->grad) {
|
| 4868 |
-
|
| 4869 |
is_node = true;
|
| 4870 |
}
|
| 4871 |
|
|
@@ -5188,7 +5215,7 @@ static struct ggml_tensor * ggml_norm_impl(
|
|
| 5188 |
bool is_node = false;
|
| 5189 |
|
| 5190 |
if (!inplace && (a->grad)) {
|
| 5191 |
-
|
| 5192 |
is_node = true;
|
| 5193 |
}
|
| 5194 |
|
|
@@ -5291,7 +5318,7 @@ static struct ggml_tensor * ggml_group_norm_impl(
|
|
| 5291 |
|
| 5292 |
bool is_node = false;
|
| 5293 |
if (!inplace && (a->grad)) {
|
| 5294 |
-
|
| 5295 |
is_node = true;
|
| 5296 |
}
|
| 5297 |
|
|
@@ -5705,7 +5732,7 @@ struct ggml_tensor * ggml_reshape(
|
|
| 5705 |
|
| 5706 |
if (b->grad) {
|
| 5707 |
// gradient propagation is not supported
|
| 5708 |
-
//
|
| 5709 |
}
|
| 5710 |
|
| 5711 |
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
|
|
@@ -6488,7 +6515,7 @@ struct ggml_tensor * ggml_clamp(
|
|
| 6488 |
bool is_node = false;
|
| 6489 |
|
| 6490 |
if (a->grad) {
|
| 6491 |
-
|
| 6492 |
is_node = true;
|
| 6493 |
}
|
| 6494 |
|
|
@@ -6564,7 +6591,7 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|
| 6564 |
bool is_node = false;
|
| 6565 |
|
| 6566 |
if (a->grad || b->grad) {
|
| 6567 |
-
|
| 6568 |
is_node = true;
|
| 6569 |
}
|
| 6570 |
|
|
@@ -6636,7 +6663,7 @@ struct ggml_tensor * ggml_im2col(
|
|
| 6636 |
bool is_node = false;
|
| 6637 |
|
| 6638 |
if (a->grad || b->grad) {
|
| 6639 |
-
|
| 6640 |
is_node = true;
|
| 6641 |
}
|
| 6642 |
|
|
@@ -6722,7 +6749,7 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|
| 6722 |
bool is_node = false;
|
| 6723 |
|
| 6724 |
if (a->grad || b->grad) {
|
| 6725 |
-
|
| 6726 |
is_node = true;
|
| 6727 |
}
|
| 6728 |
|
|
@@ -6763,7 +6790,7 @@ struct ggml_tensor * ggml_pool_1d(
|
|
| 6763 |
bool is_node = false;
|
| 6764 |
|
| 6765 |
if (a->grad) {
|
| 6766 |
-
|
| 6767 |
is_node = true;
|
| 6768 |
}
|
| 6769 |
|
|
@@ -6801,7 +6828,7 @@ struct ggml_tensor * ggml_pool_2d(
|
|
| 6801 |
bool is_node = false;
|
| 6802 |
|
| 6803 |
if (a->grad) {
|
| 6804 |
-
|
| 6805 |
is_node = true;
|
| 6806 |
}
|
| 6807 |
|
|
@@ -6834,7 +6861,7 @@ static struct ggml_tensor * ggml_upscale_impl(
|
|
| 6834 |
bool is_node = false;
|
| 6835 |
|
| 6836 |
if (a->grad) {
|
| 6837 |
-
|
| 6838 |
is_node = true;
|
| 6839 |
}
|
| 6840 |
|
|
@@ -6884,7 +6911,7 @@ struct ggml_tensor * ggml_pad(
|
|
| 6884 |
bool is_node = false;
|
| 6885 |
|
| 6886 |
if (a->grad) {
|
| 6887 |
-
|
| 6888 |
is_node = true;
|
| 6889 |
}
|
| 6890 |
|
|
@@ -6933,7 +6960,7 @@ struct ggml_tensor * ggml_timestep_embedding(
|
|
| 6933 |
bool is_node = false;
|
| 6934 |
|
| 6935 |
if (timesteps->grad) {
|
| 6936 |
-
|
| 6937 |
is_node = true;
|
| 6938 |
}
|
| 6939 |
|
|
@@ -7059,7 +7086,7 @@ struct ggml_tensor * ggml_flash_attn_back(
|
|
| 7059 |
struct ggml_tensor * v,
|
| 7060 |
struct ggml_tensor * d,
|
| 7061 |
bool masked) {
|
| 7062 |
-
|
| 7063 |
|
| 7064 |
GGML_ASSERT(ggml_can_mul_mat(k, q));
|
| 7065 |
// TODO: check if vT can be multiplied by (k*qT)
|
|
@@ -7158,7 +7185,7 @@ struct ggml_tensor * ggml_ssm_conv(
|
|
| 7158 |
bool is_node = false;
|
| 7159 |
|
| 7160 |
if (s->grad || x->grad || c->grad || sq->grad) {
|
| 7161 |
-
|
| 7162 |
is_node = true;
|
| 7163 |
}
|
| 7164 |
|
|
@@ -7212,7 +7239,7 @@ struct ggml_tensor * ggml_ssm_scan(
|
|
| 7212 |
bool is_node = false;
|
| 7213 |
|
| 7214 |
if (s->grad || x->grad || dt->grad || A->grad || B->grad || C->grad || sq->grad) {
|
| 7215 |
-
|
| 7216 |
is_node = true;
|
| 7217 |
}
|
| 7218 |
|
|
@@ -7244,7 +7271,7 @@ struct ggml_tensor * ggml_win_part(
|
|
| 7244 |
bool is_node = false;
|
| 7245 |
|
| 7246 |
if (a->grad) {
|
| 7247 |
-
|
| 7248 |
is_node = true;
|
| 7249 |
}
|
| 7250 |
|
|
@@ -7282,7 +7309,7 @@ struct ggml_tensor * ggml_win_unpart(
|
|
| 7282 |
bool is_node = false;
|
| 7283 |
|
| 7284 |
if (a->grad) {
|
| 7285 |
-
|
| 7286 |
is_node = true;
|
| 7287 |
}
|
| 7288 |
|
|
@@ -7312,7 +7339,7 @@ struct ggml_tensor * ggml_get_rel_pos(
|
|
| 7312 |
bool is_node = false;
|
| 7313 |
|
| 7314 |
if (a->grad) {
|
| 7315 |
-
|
| 7316 |
is_node = true;
|
| 7317 |
}
|
| 7318 |
|
|
@@ -8002,7 +8029,7 @@ static void ggml_compute_forward_dup_f16(
|
|
| 8002 |
}
|
| 8003 |
}
|
| 8004 |
} else {
|
| 8005 |
-
|
| 8006 |
}
|
| 8007 |
} else {
|
| 8008 |
//printf("%s: this is not optimal - fix me\n", __func__);
|
|
@@ -8044,7 +8071,7 @@ static void ggml_compute_forward_dup_f16(
|
|
| 8044 |
}
|
| 8045 |
}
|
| 8046 |
} else {
|
| 8047 |
-
|
| 8048 |
}
|
| 8049 |
}
|
| 8050 |
return;
|
|
@@ -8161,7 +8188,7 @@ static void ggml_compute_forward_dup_f16(
|
|
| 8161 |
}
|
| 8162 |
}
|
| 8163 |
} else {
|
| 8164 |
-
|
| 8165 |
}
|
| 8166 |
}
|
| 8167 |
|
|
@@ -8288,7 +8315,7 @@ static void ggml_compute_forward_dup_bf16(
|
|
| 8288 |
}
|
| 8289 |
}
|
| 8290 |
} else {
|
| 8291 |
-
|
| 8292 |
}
|
| 8293 |
} else {
|
| 8294 |
//printf("%s: this is not optimal - fix me\n", __func__);
|
|
@@ -8348,7 +8375,7 @@ static void ggml_compute_forward_dup_bf16(
|
|
| 8348 |
}
|
| 8349 |
}
|
| 8350 |
} else {
|
| 8351 |
-
|
| 8352 |
}
|
| 8353 |
}
|
| 8354 |
return;
|
|
@@ -8517,7 +8544,7 @@ static void ggml_compute_forward_dup_bf16(
|
|
| 8517 |
}
|
| 8518 |
}
|
| 8519 |
} else {
|
| 8520 |
-
|
| 8521 |
}
|
| 8522 |
}
|
| 8523 |
|
|
@@ -8603,7 +8630,7 @@ static void ggml_compute_forward_dup_f32(
|
|
| 8603 |
}
|
| 8604 |
}
|
| 8605 |
} else {
|
| 8606 |
-
|
| 8607 |
}
|
| 8608 |
} else {
|
| 8609 |
//printf("%s: this is not optimal - fix me\n", __func__);
|
|
@@ -8663,7 +8690,7 @@ static void ggml_compute_forward_dup_f32(
|
|
| 8663 |
}
|
| 8664 |
}
|
| 8665 |
} else {
|
| 8666 |
-
|
| 8667 |
}
|
| 8668 |
}
|
| 8669 |
|
|
@@ -8834,7 +8861,7 @@ static void ggml_compute_forward_dup_f32(
|
|
| 8834 |
}
|
| 8835 |
}
|
| 8836 |
} else {
|
| 8837 |
-
|
| 8838 |
}
|
| 8839 |
}
|
| 8840 |
|
|
@@ -9012,8 +9039,8 @@ static void ggml_compute_forward_dup(
|
|
| 9012 |
} break;
|
| 9013 |
default:
|
| 9014 |
{
|
| 9015 |
-
|
| 9016 |
-
}
|
| 9017 |
}
|
| 9018 |
}
|
| 9019 |
|
|
@@ -9165,7 +9192,7 @@ static void ggml_compute_forward_add_f16_f32(
|
|
| 9165 |
}
|
| 9166 |
else {
|
| 9167 |
// src1 is not contiguous
|
| 9168 |
-
|
| 9169 |
}
|
| 9170 |
}
|
| 9171 |
|
|
@@ -9240,7 +9267,7 @@ static void ggml_compute_forward_add_bf16_f32(
|
|
| 9240 |
}
|
| 9241 |
else {
|
| 9242 |
// src1 is not contiguous
|
| 9243 |
-
|
| 9244 |
}
|
| 9245 |
}
|
| 9246 |
|
|
@@ -9292,7 +9319,7 @@ static void ggml_compute_forward_add_f16_f16(
|
|
| 9292 |
}
|
| 9293 |
else {
|
| 9294 |
// src1 is not contiguous
|
| 9295 |
-
|
| 9296 |
}
|
| 9297 |
}
|
| 9298 |
|
|
@@ -9344,7 +9371,7 @@ static void ggml_compute_forward_add_bf16_bf16(
|
|
| 9344 |
}
|
| 9345 |
else {
|
| 9346 |
// src1 is not contiguous
|
| 9347 |
-
|
| 9348 |
}
|
| 9349 |
}
|
| 9350 |
|
|
@@ -9438,7 +9465,7 @@ static void ggml_compute_forward_add(
|
|
| 9438 |
ggml_compute_forward_add_f32(params, dst);
|
| 9439 |
}
|
| 9440 |
else {
|
| 9441 |
-
|
| 9442 |
}
|
| 9443 |
} break;
|
| 9444 |
case GGML_TYPE_F16:
|
|
@@ -9450,7 +9477,7 @@ static void ggml_compute_forward_add(
|
|
| 9450 |
ggml_compute_forward_add_f16_f32(params, dst);
|
| 9451 |
}
|
| 9452 |
else {
|
| 9453 |
-
|
| 9454 |
}
|
| 9455 |
} break;
|
| 9456 |
case GGML_TYPE_BF16:
|
|
@@ -9462,7 +9489,7 @@ static void ggml_compute_forward_add(
|
|
| 9462 |
ggml_compute_forward_add_bf16_f32(params, dst);
|
| 9463 |
}
|
| 9464 |
else {
|
| 9465 |
-
|
| 9466 |
}
|
| 9467 |
} break;
|
| 9468 |
case GGML_TYPE_Q4_0:
|
|
@@ -9492,8 +9519,8 @@ static void ggml_compute_forward_add(
|
|
| 9492 |
} break;
|
| 9493 |
default:
|
| 9494 |
{
|
| 9495 |
-
|
| 9496 |
-
}
|
| 9497 |
}
|
| 9498 |
}
|
| 9499 |
|
|
@@ -9827,7 +9854,7 @@ static void ggml_compute_forward_add1(
|
|
| 9827 |
ggml_compute_forward_add1_f16_f32(params, dst);
|
| 9828 |
}
|
| 9829 |
else {
|
| 9830 |
-
|
| 9831 |
}
|
| 9832 |
} break;
|
| 9833 |
case GGML_TYPE_BF16:
|
|
@@ -9839,7 +9866,7 @@ static void ggml_compute_forward_add1(
|
|
| 9839 |
ggml_compute_forward_add1_bf16_f32(params, dst);
|
| 9840 |
}
|
| 9841 |
else {
|
| 9842 |
-
|
| 9843 |
}
|
| 9844 |
} break;
|
| 9845 |
case GGML_TYPE_Q4_0:
|
|
@@ -9870,8 +9897,8 @@ static void ggml_compute_forward_add1(
|
|
| 9870 |
} break;
|
| 9871 |
default:
|
| 9872 |
{
|
| 9873 |
-
|
| 9874 |
-
}
|
| 9875 |
}
|
| 9876 |
}
|
| 9877 |
|
|
@@ -9995,8 +10022,8 @@ static void ggml_compute_forward_acc(
|
|
| 9995 |
case GGML_TYPE_Q4_0_8_8:
|
| 9996 |
default:
|
| 9997 |
{
|
| 9998 |
-
|
| 9999 |
-
}
|
| 10000 |
}
|
| 10001 |
}
|
| 10002 |
|
|
@@ -10076,8 +10103,8 @@ static void ggml_compute_forward_sub(
|
|
| 10076 |
} break;
|
| 10077 |
default:
|
| 10078 |
{
|
| 10079 |
-
|
| 10080 |
-
}
|
| 10081 |
}
|
| 10082 |
}
|
| 10083 |
|
|
@@ -10170,8 +10197,8 @@ static void ggml_compute_forward_mul(
|
|
| 10170 |
} break;
|
| 10171 |
default:
|
| 10172 |
{
|
| 10173 |
-
|
| 10174 |
-
}
|
| 10175 |
}
|
| 10176 |
}
|
| 10177 |
|
|
@@ -10261,8 +10288,8 @@ static void ggml_compute_forward_div(
|
|
| 10261 |
} break;
|
| 10262 |
default:
|
| 10263 |
{
|
| 10264 |
-
|
| 10265 |
-
}
|
| 10266 |
}
|
| 10267 |
}
|
| 10268 |
|
|
@@ -10306,8 +10333,8 @@ static void ggml_compute_forward_sqr(
|
|
| 10306 |
} break;
|
| 10307 |
default:
|
| 10308 |
{
|
| 10309 |
-
|
| 10310 |
-
}
|
| 10311 |
}
|
| 10312 |
}
|
| 10313 |
|
|
@@ -10351,8 +10378,8 @@ static void ggml_compute_forward_sqrt(
|
|
| 10351 |
} break;
|
| 10352 |
default:
|
| 10353 |
{
|
| 10354 |
-
|
| 10355 |
-
}
|
| 10356 |
}
|
| 10357 |
}
|
| 10358 |
|
|
@@ -10396,8 +10423,8 @@ static void ggml_compute_forward_log(
|
|
| 10396 |
} break;
|
| 10397 |
default:
|
| 10398 |
{
|
| 10399 |
-
|
| 10400 |
-
}
|
| 10401 |
}
|
| 10402 |
}
|
| 10403 |
|
|
@@ -10525,8 +10552,8 @@ static void ggml_compute_forward_sum(
|
|
| 10525 |
} break;
|
| 10526 |
default:
|
| 10527 |
{
|
| 10528 |
-
|
| 10529 |
-
}
|
| 10530 |
}
|
| 10531 |
}
|
| 10532 |
|
|
@@ -10578,8 +10605,8 @@ static void ggml_compute_forward_sum_rows(
|
|
| 10578 |
} break;
|
| 10579 |
default:
|
| 10580 |
{
|
| 10581 |
-
|
| 10582 |
-
}
|
| 10583 |
}
|
| 10584 |
}
|
| 10585 |
|
|
@@ -10635,8 +10662,8 @@ static void ggml_compute_forward_mean(
|
|
| 10635 |
} break;
|
| 10636 |
default:
|
| 10637 |
{
|
| 10638 |
-
|
| 10639 |
-
}
|
| 10640 |
}
|
| 10641 |
}
|
| 10642 |
|
|
@@ -10683,8 +10710,8 @@ static void ggml_compute_forward_argmax(
|
|
| 10683 |
} break;
|
| 10684 |
default:
|
| 10685 |
{
|
| 10686 |
-
|
| 10687 |
-
}
|
| 10688 |
}
|
| 10689 |
}
|
| 10690 |
|
|
@@ -10801,8 +10828,8 @@ static void ggml_compute_forward_repeat(
|
|
| 10801 |
} break;
|
| 10802 |
default:
|
| 10803 |
{
|
| 10804 |
-
|
| 10805 |
-
}
|
| 10806 |
}
|
| 10807 |
}
|
| 10808 |
|
|
@@ -10879,8 +10906,8 @@ static void ggml_compute_forward_repeat_back(
|
|
| 10879 |
} break;
|
| 10880 |
default:
|
| 10881 |
{
|
| 10882 |
-
|
| 10883 |
-
}
|
| 10884 |
}
|
| 10885 |
}
|
| 10886 |
|
|
@@ -10948,8 +10975,8 @@ static void ggml_compute_forward_concat(
|
|
| 10948 |
} break;
|
| 10949 |
default:
|
| 10950 |
{
|
| 10951 |
-
|
| 10952 |
-
}
|
| 10953 |
}
|
| 10954 |
}
|
| 10955 |
|
|
@@ -10992,8 +11019,8 @@ static void ggml_compute_forward_abs(
|
|
| 10992 |
} break;
|
| 10993 |
default:
|
| 10994 |
{
|
| 10995 |
-
|
| 10996 |
-
}
|
| 10997 |
}
|
| 10998 |
}
|
| 10999 |
|
|
@@ -11036,8 +11063,8 @@ static void ggml_compute_forward_sgn(
|
|
| 11036 |
} break;
|
| 11037 |
default:
|
| 11038 |
{
|
| 11039 |
-
|
| 11040 |
-
}
|
| 11041 |
}
|
| 11042 |
}
|
| 11043 |
|
|
@@ -11080,8 +11107,8 @@ static void ggml_compute_forward_neg(
|
|
| 11080 |
} break;
|
| 11081 |
default:
|
| 11082 |
{
|
| 11083 |
-
|
| 11084 |
-
}
|
| 11085 |
}
|
| 11086 |
}
|
| 11087 |
|
|
@@ -11124,8 +11151,8 @@ static void ggml_compute_forward_step(
|
|
| 11124 |
} break;
|
| 11125 |
default:
|
| 11126 |
{
|
| 11127 |
-
|
| 11128 |
-
}
|
| 11129 |
}
|
| 11130 |
}
|
| 11131 |
|
|
@@ -11168,8 +11195,8 @@ static void ggml_compute_forward_tanh(
|
|
| 11168 |
} break;
|
| 11169 |
default:
|
| 11170 |
{
|
| 11171 |
-
|
| 11172 |
-
}
|
| 11173 |
}
|
| 11174 |
}
|
| 11175 |
|
|
@@ -11212,8 +11239,8 @@ static void ggml_compute_forward_elu(
|
|
| 11212 |
} break;
|
| 11213 |
default:
|
| 11214 |
{
|
| 11215 |
-
|
| 11216 |
-
}
|
| 11217 |
}
|
| 11218 |
}
|
| 11219 |
|
|
@@ -11256,8 +11283,8 @@ static void ggml_compute_forward_relu(
|
|
| 11256 |
} break;
|
| 11257 |
default:
|
| 11258 |
{
|
| 11259 |
-
|
| 11260 |
-
}
|
| 11261 |
}
|
| 11262 |
}
|
| 11263 |
|
|
@@ -11300,8 +11327,8 @@ static void ggml_compute_forward_sigmoid(
|
|
| 11300 |
} break;
|
| 11301 |
default:
|
| 11302 |
{
|
| 11303 |
-
|
| 11304 |
-
}
|
| 11305 |
}
|
| 11306 |
}
|
| 11307 |
|
|
@@ -11359,8 +11386,8 @@ static void ggml_compute_forward_gelu(
|
|
| 11359 |
} break;
|
| 11360 |
default:
|
| 11361 |
{
|
| 11362 |
-
|
| 11363 |
-
}
|
| 11364 |
}
|
| 11365 |
}
|
| 11366 |
|
|
@@ -11418,8 +11445,8 @@ static void ggml_compute_forward_gelu_quick(
|
|
| 11418 |
} break;
|
| 11419 |
default:
|
| 11420 |
{
|
| 11421 |
-
|
| 11422 |
-
}
|
| 11423 |
}
|
| 11424 |
}
|
| 11425 |
|
|
@@ -11477,8 +11504,8 @@ static void ggml_compute_forward_silu(
|
|
| 11477 |
} break;
|
| 11478 |
default:
|
| 11479 |
{
|
| 11480 |
-
|
| 11481 |
-
}
|
| 11482 |
}
|
| 11483 |
}
|
| 11484 |
// ggml_compute_forward_leaky_relu
|
|
@@ -11526,8 +11553,8 @@ static void ggml_compute_forward_leaky_relu(
|
|
| 11526 |
} break;
|
| 11527 |
default:
|
| 11528 |
{
|
| 11529 |
-
|
| 11530 |
-
}
|
| 11531 |
}
|
| 11532 |
}
|
| 11533 |
|
|
@@ -11589,8 +11616,8 @@ static void ggml_compute_forward_silu_back(
|
|
| 11589 |
} break;
|
| 11590 |
default:
|
| 11591 |
{
|
| 11592 |
-
|
| 11593 |
-
}
|
| 11594 |
}
|
| 11595 |
}
|
| 11596 |
|
|
@@ -11631,8 +11658,8 @@ static void ggml_compute_forward_hardswish(
|
|
| 11631 |
} break;
|
| 11632 |
default:
|
| 11633 |
{
|
| 11634 |
-
|
| 11635 |
-
}
|
| 11636 |
}
|
| 11637 |
}
|
| 11638 |
|
|
@@ -11673,8 +11700,8 @@ static void ggml_compute_forward_hardsigmoid(
|
|
| 11673 |
} break;
|
| 11674 |
default:
|
| 11675 |
{
|
| 11676 |
-
|
| 11677 |
-
}
|
| 11678 |
}
|
| 11679 |
}
|
| 11680 |
|
|
@@ -11745,8 +11772,8 @@ static void ggml_compute_forward_norm(
|
|
| 11745 |
} break;
|
| 11746 |
default:
|
| 11747 |
{
|
| 11748 |
-
|
| 11749 |
-
}
|
| 11750 |
}
|
| 11751 |
}
|
| 11752 |
|
|
@@ -11813,8 +11840,8 @@ static void ggml_compute_forward_rms_norm(
|
|
| 11813 |
} break;
|
| 11814 |
default:
|
| 11815 |
{
|
| 11816 |
-
|
| 11817 |
-
}
|
| 11818 |
}
|
| 11819 |
}
|
| 11820 |
|
|
@@ -11986,8 +12013,8 @@ static void ggml_compute_forward_rms_norm_back(
|
|
| 11986 |
} break;
|
| 11987 |
default:
|
| 11988 |
{
|
| 11989 |
-
|
| 11990 |
-
}
|
| 11991 |
}
|
| 11992 |
}
|
| 11993 |
|
|
@@ -12080,8 +12107,8 @@ static void ggml_compute_forward_group_norm(
|
|
| 12080 |
} break;
|
| 12081 |
default:
|
| 12082 |
{
|
| 12083 |
-
|
| 12084 |
-
}
|
| 12085 |
}
|
| 12086 |
}
|
| 12087 |
|
|
@@ -12839,17 +12866,17 @@ static void ggml_compute_forward_out_prod(
|
|
| 12839 |
} break;
|
| 12840 |
case GGML_TYPE_F16:
|
| 12841 |
{
|
| 12842 |
-
|
| 12843 |
// ggml_compute_forward_out_prod_f16_f32(params, dst);
|
| 12844 |
-
}
|
| 12845 |
case GGML_TYPE_F32:
|
| 12846 |
{
|
| 12847 |
ggml_compute_forward_out_prod_f32(params, dst);
|
| 12848 |
} break;
|
| 12849 |
default:
|
| 12850 |
{
|
| 12851 |
-
|
| 12852 |
-
}
|
| 12853 |
}
|
| 12854 |
}
|
| 12855 |
|
|
@@ -12908,8 +12935,8 @@ static void ggml_compute_forward_scale(
|
|
| 12908 |
} break;
|
| 12909 |
default:
|
| 12910 |
{
|
| 12911 |
-
|
| 12912 |
-
}
|
| 12913 |
}
|
| 12914 |
}
|
| 12915 |
|
|
@@ -13024,8 +13051,8 @@ static void ggml_compute_forward_set(
|
|
| 13024 |
case GGML_TYPE_Q4_0_8_8:
|
| 13025 |
default:
|
| 13026 |
{
|
| 13027 |
-
|
| 13028 |
-
}
|
| 13029 |
}
|
| 13030 |
}
|
| 13031 |
|
|
@@ -13302,8 +13329,8 @@ static void ggml_compute_forward_get_rows(
|
|
| 13302 |
} break;
|
| 13303 |
default:
|
| 13304 |
{
|
| 13305 |
-
|
| 13306 |
-
}
|
| 13307 |
}
|
| 13308 |
|
| 13309 |
//static bool first = true;
|
|
@@ -13410,8 +13437,8 @@ static void ggml_compute_forward_get_rows_back(
|
|
| 13410 |
} break;
|
| 13411 |
default:
|
| 13412 |
{
|
| 13413 |
-
|
| 13414 |
-
}
|
| 13415 |
}
|
| 13416 |
|
| 13417 |
//static bool first = true;
|
|
@@ -13488,8 +13515,8 @@ static void ggml_compute_forward_diag(
|
|
| 13488 |
} break;
|
| 13489 |
default:
|
| 13490 |
{
|
| 13491 |
-
|
| 13492 |
-
}
|
| 13493 |
}
|
| 13494 |
}
|
| 13495 |
|
|
@@ -13558,8 +13585,8 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
| 13558 |
} break;
|
| 13559 |
default:
|
| 13560 |
{
|
| 13561 |
-
|
| 13562 |
-
}
|
| 13563 |
}
|
| 13564 |
}
|
| 13565 |
|
|
@@ -13576,8 +13603,8 @@ static void ggml_compute_forward_diag_mask_zero(
|
|
| 13576 |
} break;
|
| 13577 |
default:
|
| 13578 |
{
|
| 13579 |
-
|
| 13580 |
-
}
|
| 13581 |
}
|
| 13582 |
}
|
| 13583 |
|
|
@@ -13694,8 +13721,8 @@ static void ggml_compute_forward_soft_max(
|
|
| 13694 |
} break;
|
| 13695 |
default:
|
| 13696 |
{
|
| 13697 |
-
|
| 13698 |
-
}
|
| 13699 |
}
|
| 13700 |
}
|
| 13701 |
|
|
@@ -13790,8 +13817,8 @@ static void ggml_compute_forward_soft_max_back(
|
|
| 13790 |
} break;
|
| 13791 |
default:
|
| 13792 |
{
|
| 13793 |
-
|
| 13794 |
-
}
|
| 13795 |
}
|
| 13796 |
}
|
| 13797 |
|
|
@@ -13881,8 +13908,8 @@ static void ggml_compute_forward_clamp(
|
|
| 13881 |
case GGML_TYPE_F64:
|
| 13882 |
case GGML_TYPE_COUNT:
|
| 13883 |
{
|
| 13884 |
-
|
| 13885 |
-
}
|
| 13886 |
}
|
| 13887 |
}
|
| 13888 |
|
|
@@ -14211,8 +14238,8 @@ static void ggml_compute_forward_rope(
|
|
| 14211 |
} break;
|
| 14212 |
default:
|
| 14213 |
{
|
| 14214 |
-
|
| 14215 |
-
}
|
| 14216 |
}
|
| 14217 |
}
|
| 14218 |
|
|
@@ -14235,8 +14262,8 @@ static void ggml_compute_forward_rope_back(
|
|
| 14235 |
} break;
|
| 14236 |
default:
|
| 14237 |
{
|
| 14238 |
-
|
| 14239 |
-
}
|
| 14240 |
}
|
| 14241 |
}
|
| 14242 |
|
|
@@ -14435,8 +14462,8 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
| 14435 |
} break;
|
| 14436 |
default:
|
| 14437 |
{
|
| 14438 |
-
|
| 14439 |
-
}
|
| 14440 |
}
|
| 14441 |
}
|
| 14442 |
|
|
@@ -14607,8 +14634,8 @@ static void ggml_compute_forward_im2col(
|
|
| 14607 |
} break;
|
| 14608 |
default:
|
| 14609 |
{
|
| 14610 |
-
|
| 14611 |
-
}
|
| 14612 |
}
|
| 14613 |
}
|
| 14614 |
|
|
@@ -14908,8 +14935,8 @@ static void ggml_compute_forward_upscale(
|
|
| 14908 |
} break;
|
| 14909 |
default:
|
| 14910 |
{
|
| 14911 |
-
|
| 14912 |
-
}
|
| 14913 |
}
|
| 14914 |
}
|
| 14915 |
|
|
@@ -14966,8 +14993,8 @@ static void ggml_compute_forward_pad(
|
|
| 14966 |
} break;
|
| 14967 |
default:
|
| 14968 |
{
|
| 14969 |
-
|
| 14970 |
-
}
|
| 14971 |
}
|
| 14972 |
}
|
| 14973 |
|
|
@@ -15007,8 +15034,8 @@ static void ggml_compute_forward_arange(
|
|
| 15007 |
} break;
|
| 15008 |
default:
|
| 15009 |
{
|
| 15010 |
-
|
| 15011 |
-
}
|
| 15012 |
}
|
| 15013 |
}
|
| 15014 |
|
|
@@ -15058,8 +15085,8 @@ static void ggml_compute_forward_timestep_embedding(
|
|
| 15058 |
} break;
|
| 15059 |
default:
|
| 15060 |
{
|
| 15061 |
-
|
| 15062 |
-
}
|
| 15063 |
}
|
| 15064 |
}
|
| 15065 |
|
|
@@ -15117,8 +15144,8 @@ static void ggml_compute_forward_argsort(
|
|
| 15117 |
} break;
|
| 15118 |
default:
|
| 15119 |
{
|
| 15120 |
-
|
| 15121 |
-
}
|
| 15122 |
}
|
| 15123 |
}
|
| 15124 |
|
|
@@ -15340,8 +15367,8 @@ static void ggml_compute_forward_flash_attn_ext(
|
|
| 15340 |
} break;
|
| 15341 |
default:
|
| 15342 |
{
|
| 15343 |
-
|
| 15344 |
-
}
|
| 15345 |
}
|
| 15346 |
}
|
| 15347 |
|
|
@@ -15676,8 +15703,8 @@ static void ggml_compute_forward_flash_attn_back(
|
|
| 15676 |
} break;
|
| 15677 |
default:
|
| 15678 |
{
|
| 15679 |
-
|
| 15680 |
-
}
|
| 15681 |
}
|
| 15682 |
}
|
| 15683 |
|
|
@@ -15798,8 +15825,8 @@ static void ggml_compute_forward_ssm_conv(
|
|
| 15798 |
} break;
|
| 15799 |
default:
|
| 15800 |
{
|
| 15801 |
-
|
| 15802 |
-
}
|
| 15803 |
}
|
| 15804 |
}
|
| 15805 |
|
|
@@ -15919,8 +15946,8 @@ static void ggml_compute_forward_ssm_scan(
|
|
| 15919 |
} break;
|
| 15920 |
default:
|
| 15921 |
{
|
| 15922 |
-
|
| 15923 |
-
}
|
| 15924 |
}
|
| 15925 |
}
|
| 15926 |
|
|
@@ -15982,8 +16009,8 @@ static void ggml_compute_forward_win_part(
|
|
| 15982 |
} break;
|
| 15983 |
default:
|
| 15984 |
{
|
| 15985 |
-
|
| 15986 |
-
}
|
| 15987 |
}
|
| 15988 |
}
|
| 15989 |
|
|
@@ -16043,8 +16070,8 @@ static void ggml_compute_forward_win_unpart(
|
|
| 16043 |
} break;
|
| 16044 |
default:
|
| 16045 |
{
|
| 16046 |
-
|
| 16047 |
-
}
|
| 16048 |
}
|
| 16049 |
}
|
| 16050 |
|
|
@@ -16111,8 +16138,8 @@ static void ggml_compute_forward_unary(
|
|
| 16111 |
} break;
|
| 16112 |
default:
|
| 16113 |
{
|
| 16114 |
-
|
| 16115 |
-
}
|
| 16116 |
}
|
| 16117 |
}
|
| 16118 |
|
|
@@ -16158,8 +16185,8 @@ static void ggml_compute_forward_get_rel_pos(
|
|
| 16158 |
} break;
|
| 16159 |
default:
|
| 16160 |
{
|
| 16161 |
-
|
| 16162 |
-
}
|
| 16163 |
}
|
| 16164 |
}
|
| 16165 |
|
|
@@ -16239,8 +16266,8 @@ static void ggml_compute_forward_add_rel_pos(
|
|
| 16239 |
} break;
|
| 16240 |
default:
|
| 16241 |
{
|
| 16242 |
-
|
| 16243 |
-
}
|
| 16244 |
}
|
| 16245 |
}
|
| 16246 |
|
|
@@ -16285,8 +16312,8 @@ static void ggml_compute_forward_map_unary(
|
|
| 16285 |
} break;
|
| 16286 |
default:
|
| 16287 |
{
|
| 16288 |
-
|
| 16289 |
-
}
|
| 16290 |
}
|
| 16291 |
}
|
| 16292 |
|
|
@@ -16334,8 +16361,8 @@ static void ggml_compute_forward_map_binary(
|
|
| 16334 |
} break;
|
| 16335 |
default:
|
| 16336 |
{
|
| 16337 |
-
|
| 16338 |
-
}
|
| 16339 |
}
|
| 16340 |
}
|
| 16341 |
|
|
@@ -16533,8 +16560,8 @@ static void ggml_compute_forward_cross_entropy_loss(
|
|
| 16533 |
} break;
|
| 16534 |
default:
|
| 16535 |
{
|
| 16536 |
-
|
| 16537 |
-
}
|
| 16538 |
}
|
| 16539 |
}
|
| 16540 |
|
|
@@ -16620,8 +16647,8 @@ static void ggml_compute_forward_cross_entropy_loss_back(
|
|
| 16620 |
} break;
|
| 16621 |
default:
|
| 16622 |
{
|
| 16623 |
-
|
| 16624 |
-
}
|
| 16625 |
}
|
| 16626 |
}
|
| 16627 |
|
|
@@ -16956,14 +16983,32 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
| 16956 |
} break;
|
| 16957 |
case GGML_OP_COUNT:
|
| 16958 |
{
|
| 16959 |
-
|
| 16960 |
-
}
|
| 16961 |
}
|
| 16962 |
}
|
| 16963 |
|
| 16964 |
////////////////////////////////////////////////////////////////////////////////
|
| 16965 |
|
| 16966 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16967 |
// next primes after powers of two
|
| 16968 |
static const size_t primes[] = {
|
| 16969 |
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
|
|
@@ -16974,7 +17019,7 @@ static size_t ggml_hash_size(size_t min_sz) {
|
|
| 16974 |
};
|
| 16975 |
static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
|
| 16976 |
|
| 16977 |
-
// find the smallest prime that is larger or equal
|
| 16978 |
size_t l = 0;
|
| 16979 |
size_t r = n_primes;
|
| 16980 |
while (l < r) {
|
|
@@ -16989,67 +17034,6 @@ static size_t ggml_hash_size(size_t min_sz) {
|
|
| 16989 |
return sz;
|
| 16990 |
}
|
| 16991 |
|
| 16992 |
-
static size_t ggml_hash(const void * p) {
|
| 16993 |
-
return (size_t)p;
|
| 16994 |
-
}
|
| 16995 |
-
|
| 16996 |
-
size_t ggml_hash_find(const struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
| 16997 |
-
size_t h = ggml_hash(key) % hash_set.size;
|
| 16998 |
-
|
| 16999 |
-
// linear probing
|
| 17000 |
-
size_t i = h;
|
| 17001 |
-
while (hash_set.keys[i] != NULL && hash_set.keys[i] != key) {
|
| 17002 |
-
i = (i + 1) % hash_set.size;
|
| 17003 |
-
if (i == h) {
|
| 17004 |
-
// visited all hash table entries -> not found
|
| 17005 |
-
return GGML_HASHTABLE_FULL;
|
| 17006 |
-
}
|
| 17007 |
-
}
|
| 17008 |
-
return i;
|
| 17009 |
-
}
|
| 17010 |
-
|
| 17011 |
-
bool ggml_hash_contains(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
| 17012 |
-
size_t i = ggml_hash_find(hash_set, key);
|
| 17013 |
-
return i != GGML_HASHTABLE_FULL && hash_set.keys[i] == key;
|
| 17014 |
-
}
|
| 17015 |
-
|
| 17016 |
-
size_t ggml_hash_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
| 17017 |
-
size_t i = ggml_hash_find(hash_set, key);
|
| 17018 |
-
|
| 17019 |
-
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
|
| 17020 |
-
|
| 17021 |
-
if (hash_set.keys[i] == key) {
|
| 17022 |
-
return GGML_HASHTABLE_ALREADY_EXISTS;
|
| 17023 |
-
}
|
| 17024 |
-
|
| 17025 |
-
// insert
|
| 17026 |
-
GGML_ASSERT(hash_set.keys[i] == NULL);
|
| 17027 |
-
hash_set.keys[i] = key;
|
| 17028 |
-
return i;
|
| 17029 |
-
}
|
| 17030 |
-
|
| 17031 |
-
size_t ggml_hash_find_or_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
| 17032 |
-
size_t i = ggml_hash_find(hash_set, key);
|
| 17033 |
-
|
| 17034 |
-
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
|
| 17035 |
-
|
| 17036 |
-
hash_set.keys[i] = key;
|
| 17037 |
-
return i;
|
| 17038 |
-
}
|
| 17039 |
-
|
| 17040 |
-
struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
| 17041 |
-
size = ggml_hash_size(size);
|
| 17042 |
-
struct ggml_hash_set result;
|
| 17043 |
-
result.size = size;
|
| 17044 |
-
result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
|
| 17045 |
-
memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
|
| 17046 |
-
return result;
|
| 17047 |
-
}
|
| 17048 |
-
|
| 17049 |
-
static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
|
| 17050 |
-
GGML_FREE(hash_set.keys);
|
| 17051 |
-
}
|
| 17052 |
-
|
| 17053 |
struct hash_map {
|
| 17054 |
struct ggml_hash_set set;
|
| 17055 |
struct ggml_tensor ** vals;
|
|
@@ -17058,13 +17042,12 @@ struct hash_map {
|
|
| 17058 |
static struct hash_map * ggml_new_hash_map(size_t size) {
|
| 17059 |
struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
|
| 17060 |
result->set = ggml_hash_set_new(size);
|
| 17061 |
-
result->vals =
|
| 17062 |
-
memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
|
| 17063 |
return result;
|
| 17064 |
}
|
| 17065 |
|
| 17066 |
static void ggml_hash_map_free(struct hash_map * map) {
|
| 17067 |
-
ggml_hash_set_free(map->set);
|
| 17068 |
GGML_FREE(map->vals);
|
| 17069 |
GGML_FREE(map);
|
| 17070 |
}
|
|
@@ -17085,7 +17068,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
| 17085 |
return node;
|
| 17086 |
}
|
| 17087 |
|
| 17088 |
-
if (!ggml_hash_contains(graph->
|
| 17089 |
return node;
|
| 17090 |
}
|
| 17091 |
|
|
@@ -17100,8 +17083,8 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
| 17100 |
return node;
|
| 17101 |
}
|
| 17102 |
|
| 17103 |
-
size_t i = ggml_hash_find(replacements->set, node);
|
| 17104 |
-
GGML_ASSERT(i !=
|
| 17105 |
if (replacements->set.keys[i] == node) {
|
| 17106 |
return replacements->vals[i];
|
| 17107 |
}
|
|
@@ -17159,8 +17142,8 @@ void ggml_build_backward_gradient_checkpointing(
|
|
| 17159 |
|
| 17160 |
// insert checkpoints in replacements
|
| 17161 |
for (int i = 0; i < n_checkpoints; ++i) {
|
| 17162 |
-
size_t k = ggml_hash_find(replacements->set, checkpoints[i]);
|
| 17163 |
-
GGML_ASSERT(k !=
|
| 17164 |
GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite
|
| 17165 |
replacements->set.keys[k] = checkpoints[i];
|
| 17166 |
replacements->vals[k] = checkpoints[i];
|
|
@@ -17188,7 +17171,7 @@ void ggml_build_backward_gradient_checkpointing(
|
|
| 17188 |
|
| 17189 |
// functions to change gradients considering the case that input a might be initial gradient with zero value
|
| 17190 |
|
| 17191 |
-
static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
| 17192 |
if (ggml_hash_contains(zero_table, a)) {
|
| 17193 |
return b;
|
| 17194 |
} else {
|
|
@@ -17196,7 +17179,7 @@ static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct gg
|
|
| 17196 |
}
|
| 17197 |
}
|
| 17198 |
|
| 17199 |
-
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set zero_table) {
|
| 17200 |
if (ggml_hash_contains(zero_table, a)) {
|
| 17201 |
struct ggml_tensor * a_zero = ggml_scale(ctx, a, 0.0f);
|
| 17202 |
return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
|
|
@@ -17205,7 +17188,7 @@ static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct gg
|
|
| 17205 |
}
|
| 17206 |
}
|
| 17207 |
|
| 17208 |
-
static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
| 17209 |
if (ggml_hash_contains(zero_table, a)) {
|
| 17210 |
return ggml_repeat(ctx, b, a);
|
| 17211 |
} else {
|
|
@@ -17213,7 +17196,7 @@ static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct g
|
|
| 17213 |
}
|
| 17214 |
}
|
| 17215 |
|
| 17216 |
-
static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
| 17217 |
if (ggml_hash_contains(zero_table, a)) {
|
| 17218 |
return ggml_neg(ctx, b);
|
| 17219 |
} else {
|
|
@@ -17221,7 +17204,7 @@ static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct gg
|
|
| 17221 |
}
|
| 17222 |
}
|
| 17223 |
|
| 17224 |
-
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set zero_table) {
|
| 17225 |
struct ggml_tensor * src0 = tensor->src[0];
|
| 17226 |
struct ggml_tensor * src1 = tensor->src[1];
|
| 17227 |
struct ggml_tensor * src2 = tensor->src[2];
|
|
@@ -17390,8 +17373,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17390 |
case GGML_OP_MEAN:
|
| 17391 |
case GGML_OP_ARGMAX:
|
| 17392 |
{
|
| 17393 |
-
|
| 17394 |
-
}
|
| 17395 |
case GGML_OP_REPEAT:
|
| 17396 |
{
|
| 17397 |
// necessary for llama
|
|
@@ -17414,16 +17397,16 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17414 |
} break;
|
| 17415 |
case GGML_OP_CONCAT:
|
| 17416 |
{
|
| 17417 |
-
|
| 17418 |
-
}
|
| 17419 |
case GGML_OP_SILU_BACK:
|
| 17420 |
{
|
| 17421 |
-
|
| 17422 |
-
}
|
| 17423 |
case GGML_OP_NORM:
|
| 17424 |
{
|
| 17425 |
-
|
| 17426 |
-
}
|
| 17427 |
case GGML_OP_RMS_NORM:
|
| 17428 |
{
|
| 17429 |
// necessary for llama
|
|
@@ -17439,12 +17422,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17439 |
} break;
|
| 17440 |
case GGML_OP_RMS_NORM_BACK:
|
| 17441 |
{
|
| 17442 |
-
|
| 17443 |
-
}
|
| 17444 |
case GGML_OP_GROUP_NORM:
|
| 17445 |
{
|
| 17446 |
-
|
| 17447 |
-
}
|
| 17448 |
case GGML_OP_MUL_MAT:
|
| 17449 |
{
|
| 17450 |
// https://cs231n.github.io/optimization-2/#staged
|
|
@@ -17505,12 +17488,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17505 |
} break;
|
| 17506 |
case GGML_OP_MUL_MAT_ID:
|
| 17507 |
{
|
| 17508 |
-
|
| 17509 |
-
}
|
| 17510 |
case GGML_OP_OUT_PROD:
|
| 17511 |
{
|
| 17512 |
-
|
| 17513 |
-
}
|
| 17514 |
case GGML_OP_SCALE:
|
| 17515 |
{
|
| 17516 |
// necessary for llama
|
|
@@ -17686,12 +17669,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17686 |
} break;
|
| 17687 |
case GGML_OP_GET_ROWS_BACK:
|
| 17688 |
{
|
| 17689 |
-
|
| 17690 |
-
}
|
| 17691 |
case GGML_OP_DIAG:
|
| 17692 |
{
|
| 17693 |
-
|
| 17694 |
-
}
|
| 17695 |
case GGML_OP_DIAG_MASK_INF:
|
| 17696 |
{
|
| 17697 |
// necessary for llama
|
|
@@ -17729,8 +17712,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17729 |
} break;
|
| 17730 |
case GGML_OP_SOFT_MAX_BACK:
|
| 17731 |
{
|
| 17732 |
-
|
| 17733 |
-
}
|
| 17734 |
case GGML_OP_ROPE:
|
| 17735 |
{
|
| 17736 |
// necessary for llama
|
|
@@ -17805,52 +17788,52 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17805 |
} break;
|
| 17806 |
case GGML_OP_CLAMP:
|
| 17807 |
{
|
| 17808 |
-
|
| 17809 |
-
}
|
| 17810 |
case GGML_OP_CONV_TRANSPOSE_1D:
|
| 17811 |
{
|
| 17812 |
-
|
| 17813 |
-
}
|
| 17814 |
case GGML_OP_IM2COL:
|
| 17815 |
{
|
| 17816 |
-
|
| 17817 |
-
}
|
| 17818 |
case GGML_OP_CONV_TRANSPOSE_2D:
|
| 17819 |
{
|
| 17820 |
-
|
| 17821 |
-
}
|
| 17822 |
case GGML_OP_POOL_1D:
|
| 17823 |
{
|
| 17824 |
-
|
| 17825 |
-
}
|
| 17826 |
case GGML_OP_POOL_2D:
|
| 17827 |
{
|
| 17828 |
-
|
| 17829 |
-
}
|
| 17830 |
case GGML_OP_UPSCALE:
|
| 17831 |
{
|
| 17832 |
-
|
| 17833 |
-
}
|
| 17834 |
case GGML_OP_PAD:
|
| 17835 |
{
|
| 17836 |
-
|
| 17837 |
-
}
|
| 17838 |
case GGML_OP_ARANGE:
|
| 17839 |
{
|
| 17840 |
-
|
| 17841 |
-
}
|
| 17842 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
| 17843 |
{
|
| 17844 |
-
|
| 17845 |
-
}
|
| 17846 |
case GGML_OP_ARGSORT:
|
| 17847 |
{
|
| 17848 |
-
|
| 17849 |
-
}
|
| 17850 |
case GGML_OP_LEAKY_RELU:
|
| 17851 |
{
|
| 17852 |
-
|
| 17853 |
-
}
|
| 17854 |
case GGML_OP_FLASH_ATTN_EXT:
|
| 17855 |
{
|
| 17856 |
struct ggml_tensor * flash_grad = NULL;
|
|
@@ -17906,13 +17889,13 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17906 |
} break;
|
| 17907 |
case GGML_OP_FLASH_ATTN_BACK:
|
| 17908 |
{
|
| 17909 |
-
|
| 17910 |
-
}
|
| 17911 |
case GGML_OP_SSM_CONV:
|
| 17912 |
case GGML_OP_SSM_SCAN:
|
| 17913 |
{
|
| 17914 |
-
|
| 17915 |
-
}
|
| 17916 |
case GGML_OP_WIN_PART:
|
| 17917 |
case GGML_OP_WIN_UNPART:
|
| 17918 |
case GGML_OP_UNARY:
|
|
@@ -17950,12 +17933,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17950 |
} break;
|
| 17951 |
case GGML_UNARY_OP_TANH:
|
| 17952 |
{
|
| 17953 |
-
|
| 17954 |
-
}
|
| 17955 |
case GGML_UNARY_OP_ELU:
|
| 17956 |
{
|
| 17957 |
-
|
| 17958 |
-
}
|
| 17959 |
case GGML_UNARY_OP_RELU:
|
| 17960 |
{
|
| 17961 |
if (src0->grad) {
|
|
@@ -17969,16 +17952,16 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17969 |
} break;
|
| 17970 |
case GGML_UNARY_OP_SIGMOID:
|
| 17971 |
{
|
| 17972 |
-
|
| 17973 |
-
}
|
| 17974 |
case GGML_UNARY_OP_GELU:
|
| 17975 |
{
|
| 17976 |
-
|
| 17977 |
-
}
|
| 17978 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 17979 |
{
|
| 17980 |
-
|
| 17981 |
-
}
|
| 17982 |
case GGML_UNARY_OP_SILU:
|
| 17983 |
{
|
| 17984 |
// necessary for llama
|
|
@@ -17990,7 +17973,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 17990 |
}
|
| 17991 |
} break;
|
| 17992 |
default:
|
| 17993 |
-
|
| 17994 |
}
|
| 17995 |
} break;
|
| 17996 |
case GGML_OP_GET_REL_POS:
|
|
@@ -18004,8 +17987,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 18004 |
case GGML_OP_MAP_CUSTOM2:
|
| 18005 |
case GGML_OP_MAP_CUSTOM3:
|
| 18006 |
{
|
| 18007 |
-
|
| 18008 |
-
}
|
| 18009 |
case GGML_OP_CROSS_ENTROPY_LOSS:
|
| 18010 |
{
|
| 18011 |
if (src0->grad) {
|
|
@@ -18020,16 +18003,16 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 18020 |
} break;
|
| 18021 |
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
| 18022 |
{
|
| 18023 |
-
|
| 18024 |
-
}
|
| 18025 |
case GGML_OP_NONE:
|
| 18026 |
{
|
| 18027 |
// nop
|
| 18028 |
} break;
|
| 18029 |
case GGML_OP_COUNT:
|
| 18030 |
{
|
| 18031 |
-
|
| 18032 |
-
}
|
| 18033 |
}
|
| 18034 |
|
| 18035 |
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
|
@@ -18049,7 +18032,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
| 18049 |
}
|
| 18050 |
|
| 18051 |
// check if already visited
|
| 18052 |
-
if (ggml_hash_insert(cgraph->
|
| 18053 |
return;
|
| 18054 |
}
|
| 18055 |
|
|
@@ -18130,7 +18113,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
| 18130 |
struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size);
|
| 18131 |
for (int i = 0; i < gf->n_nodes; i++) {
|
| 18132 |
if (gf->grads[i]) {
|
| 18133 |
-
ggml_hash_insert(zero_table, gf->grads[i]);
|
| 18134 |
}
|
| 18135 |
}
|
| 18136 |
|
|
@@ -18140,7 +18123,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
| 18140 |
// inplace operations to add gradients are not created by ggml_compute_backward
|
| 18141 |
// use allocator to automatically make inplace operations
|
| 18142 |
if (node->grad) {
|
| 18143 |
-
ggml_compute_backward(ctx, node, zero_table);
|
| 18144 |
}
|
| 18145 |
}
|
| 18146 |
|
|
@@ -18153,16 +18136,29 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
| 18153 |
}
|
| 18154 |
}
|
| 18155 |
|
| 18156 |
-
ggml_hash_set_free(zero_table);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18157 |
}
|
| 18158 |
|
| 18159 |
static size_t ggml_graph_nbytes(size_t size, bool grads) {
|
| 18160 |
-
size_t
|
| 18161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18162 |
if (grads) {
|
| 18163 |
-
|
| 18164 |
}
|
| 18165 |
-
|
|
|
|
|
|
|
| 18166 |
return nbytes;
|
| 18167 |
}
|
| 18168 |
|
|
@@ -18179,19 +18175,19 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
|
|
| 18179 |
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
|
| 18180 |
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
| 18181 |
|
| 18182 |
-
|
| 18183 |
-
|
| 18184 |
size_t hash_size = ggml_hash_size(size * 2);
|
| 18185 |
-
struct ggml_tensor ** nodes_ptr = data_start;
|
| 18186 |
-
struct ggml_tensor ** leafs_ptr = nodes_ptr + size;
|
| 18187 |
-
struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size;
|
| 18188 |
-
struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL;
|
| 18189 |
|
| 18190 |
-
|
| 18191 |
-
|
| 18192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18193 |
|
| 18194 |
-
|
|
|
|
| 18195 |
|
| 18196 |
*cgraph = (struct ggml_cgraph) {
|
| 18197 |
/*.size =*/ size,
|
|
@@ -18200,10 +18196,12 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
|
|
| 18200 |
/*.nodes =*/ nodes_ptr,
|
| 18201 |
/*.grads =*/ grads_ptr,
|
| 18202 |
/*.leafs =*/ leafs_ptr,
|
| 18203 |
-
/*.hash_table =*/ { hash_size, hash_keys_ptr },
|
| 18204 |
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
| 18205 |
};
|
| 18206 |
|
|
|
|
|
|
|
| 18207 |
return cgraph;
|
| 18208 |
}
|
| 18209 |
|
|
@@ -18219,7 +18217,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
|
|
| 18219 |
/*.nodes =*/ cgraph0->nodes + i0,
|
| 18220 |
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
|
| 18221 |
/*.leafs =*/ NULL,
|
| 18222 |
-
/*.hash_table =*/ { 0, NULL },
|
| 18223 |
/*.order =*/ cgraph0->order,
|
| 18224 |
};
|
| 18225 |
|
|
@@ -18229,7 +18227,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
|
|
| 18229 |
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
| 18230 |
GGML_ASSERT(dst->size >= src->n_leafs);
|
| 18231 |
GGML_ASSERT(dst->size >= src->n_nodes);
|
| 18232 |
-
GGML_ASSERT(dst->
|
| 18233 |
|
| 18234 |
dst->n_leafs = src->n_leafs;
|
| 18235 |
dst->n_nodes = src->n_nodes;
|
|
@@ -18250,9 +18248,9 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
|
| 18250 |
}
|
| 18251 |
}
|
| 18252 |
|
| 18253 |
-
for (size_t i = 0; i < src->
|
| 18254 |
-
if (src->
|
| 18255 |
-
ggml_hash_insert(dst->
|
| 18256 |
}
|
| 18257 |
}
|
| 18258 |
}
|
|
@@ -18278,7 +18276,7 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
| 18278 |
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
|
| 18279 |
cgraph->n_leafs = 0;
|
| 18280 |
cgraph->n_nodes = 0;
|
| 18281 |
-
|
| 18282 |
}
|
| 18283 |
|
| 18284 |
//
|
|
@@ -18470,7 +18468,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
| 18470 |
n_tasks = n_threads;
|
| 18471 |
} break;
|
| 18472 |
default:
|
| 18473 |
-
|
| 18474 |
}
|
| 18475 |
break;
|
| 18476 |
case GGML_OP_SILU_BACK:
|
|
@@ -18597,8 +18595,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
| 18597 |
} break;
|
| 18598 |
case GGML_OP_COUNT:
|
| 18599 |
{
|
| 18600 |
-
|
| 18601 |
-
}
|
| 18602 |
default:
|
| 18603 |
{
|
| 18604 |
fprintf(stderr, "%s: op not implemented: ", __func__);
|
|
@@ -18607,8 +18605,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
| 18607 |
} else {
|
| 18608 |
fprintf(stderr, "%d\n", node->op);
|
| 18609 |
}
|
| 18610 |
-
|
| 18611 |
-
}
|
| 18612 |
}
|
| 18613 |
|
| 18614 |
assert(n_tasks > 0);
|
|
@@ -18718,7 +18716,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|
| 18718 |
cur += sizeof(float)*ne00*ne01*ne02;
|
| 18719 |
cur += sizeof(float)*ne10*ne11;
|
| 18720 |
} else {
|
| 18721 |
-
|
| 18722 |
}
|
| 18723 |
} break;
|
| 18724 |
case GGML_OP_CONV_TRANSPOSE_2D:
|
|
@@ -18764,8 +18762,8 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|
| 18764 |
} break;
|
| 18765 |
case GGML_OP_COUNT:
|
| 18766 |
{
|
| 18767 |
-
|
| 18768 |
-
}
|
| 18769 |
default:
|
| 18770 |
break;
|
| 18771 |
}
|
|
@@ -19999,9 +19997,9 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
| 19999 |
(*step) *= width;
|
| 20000 |
}
|
| 20001 |
|
| 20002 |
-
|
| 20003 |
|
| 20004 |
-
return GGML_LINESEARCH_FAIL;
|
| 20005 |
}
|
| 20006 |
|
| 20007 |
static enum ggml_opt_result ggml_opt_lbfgs(
|
|
@@ -20269,9 +20267,9 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 20269 |
step[0] = 1.0;
|
| 20270 |
}
|
| 20271 |
|
| 20272 |
-
|
| 20273 |
|
| 20274 |
-
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
| 20275 |
}
|
| 20276 |
|
| 20277 |
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
@@ -20966,10 +20964,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
| 20966 |
}
|
| 20967 |
} break;
|
| 20968 |
case GGUF_TYPE_ARRAY:
|
| 20969 |
-
default:
|
| 20970 |
}
|
| 20971 |
} break;
|
| 20972 |
-
default:
|
| 20973 |
}
|
| 20974 |
|
| 20975 |
if (!ok) {
|
|
@@ -21550,12 +21548,12 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|
| 21550 |
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
|
| 21551 |
GGML_FREE((void *)data);
|
| 21552 |
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
|
| 21553 |
-
|
| 21554 |
} else {
|
| 21555 |
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
|
| 21556 |
}
|
| 21557 |
} break;
|
| 21558 |
-
default:
|
| 21559 |
}
|
| 21560 |
}
|
| 21561 |
}
|
|
@@ -21564,7 +21562,7 @@ void gguf_add_tensor(
|
|
| 21564 |
struct gguf_context * ctx,
|
| 21565 |
const struct ggml_tensor * tensor) {
|
| 21566 |
if (gguf_find_tensor(ctx, tensor->name) != -1) {
|
| 21567 |
-
|
| 21568 |
}
|
| 21569 |
|
| 21570 |
const int idx = ctx->header.n_tensors;
|
|
@@ -21597,7 +21595,7 @@ void gguf_add_tensor(
|
|
| 21597 |
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
|
| 21598 |
const int idx = gguf_find_tensor(ctx, name);
|
| 21599 |
if (idx < 0) {
|
| 21600 |
-
|
| 21601 |
}
|
| 21602 |
|
| 21603 |
ctx->infos[idx].type = type;
|
|
@@ -21606,7 +21604,7 @@ void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggm
|
|
| 21606 |
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
|
| 21607 |
const int idx = gguf_find_tensor(ctx, name);
|
| 21608 |
if (idx < 0) {
|
| 21609 |
-
|
| 21610 |
}
|
| 21611 |
|
| 21612 |
ctx->infos[idx].data = data;
|
|
@@ -21735,10 +21733,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
|
|
| 21735 |
}
|
| 21736 |
} break;
|
| 21737 |
case GGUF_TYPE_ARRAY:
|
| 21738 |
-
default:
|
| 21739 |
}
|
| 21740 |
} break;
|
| 21741 |
-
default:
|
| 21742 |
}
|
| 21743 |
}
|
| 21744 |
|
|
@@ -21799,7 +21797,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
|
|
| 21799 |
void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
|
| 21800 |
FILE * file = ggml_fopen(fname, "wb");
|
| 21801 |
if (!file) {
|
| 21802 |
-
|
| 21803 |
}
|
| 21804 |
|
| 21805 |
struct gguf_buf buf = gguf_buf_init(16*1024);
|
|
|
|
| 141 |
|
| 142 |
#include <sys/wait.h>
|
| 143 |
|
| 144 |
+
#if defined(__linux__)
|
| 145 |
+
#include <execinfo.h>
|
| 146 |
+
static void ggml_print_backtrace_symbols(void) {
|
|
|
|
|
|
|
| 147 |
void * trace[100];
|
|
|
|
| 148 |
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
|
|
|
| 149 |
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
| 150 |
+
}
|
| 151 |
+
#else
|
| 152 |
+
static void ggml_print_backtrace_symbols(void) {
|
| 153 |
+
// platform not supported
|
| 154 |
+
}
|
| 155 |
+
#endif
|
| 156 |
|
| 157 |
+
static void ggml_print_backtrace(void) {
|
| 158 |
char attach[32];
|
| 159 |
snprintf(attach, sizeof(attach), "attach %d", getpid());
|
| 160 |
int pid = fork();
|
| 161 |
if (pid == 0) {
|
| 162 |
+
// try gdb
|
| 163 |
execlp("gdb", "gdb", "--batch",
|
| 164 |
"-ex", "set style enabled on",
|
| 165 |
"-ex", attach,
|
|
|
|
| 167 |
"-ex", "detach",
|
| 168 |
"-ex", "quit",
|
| 169 |
(char *) NULL);
|
| 170 |
+
// try lldb
|
| 171 |
+
execlp("lldb", "lldb", "--batch",
|
| 172 |
+
"-o", "bt",
|
| 173 |
+
"-o", "quit",
|
| 174 |
+
"-p", attach,
|
| 175 |
+
(char *) NULL);
|
| 176 |
+
exit(EXIT_FAILURE);
|
| 177 |
} else {
|
| 178 |
+
int wstatus;
|
| 179 |
+
waitpid(pid, &wstatus, 0);
|
| 180 |
+
if (WIFEXITED(wstatus)) {
|
| 181 |
+
if (WEXITSTATUS(wstatus) == EXIT_FAILURE) {
|
| 182 |
+
// gdb failed, fallback to backtrace_symbols
|
| 183 |
+
ggml_print_backtrace_symbols();
|
| 184 |
+
}
|
| 185 |
+
}
|
| 186 |
}
|
| 187 |
}
|
| 188 |
#else
|
| 189 |
+
static void ggml_print_backtrace(void) {
|
| 190 |
// platform not supported
|
| 191 |
}
|
| 192 |
#endif
|
| 193 |
|
| 194 |
+
void ggml_abort(const char * file, int line, const char * fmt, ...) {
|
| 195 |
+
fflush(stdout);
|
| 196 |
+
|
| 197 |
+
fprintf(stderr, "%s:%d: ", file, line);
|
| 198 |
+
|
| 199 |
+
va_list args;
|
| 200 |
+
va_start(args, fmt);
|
| 201 |
+
vfprintf(stderr, fmt, args);
|
| 202 |
+
va_end(args);
|
| 203 |
+
|
| 204 |
+
fprintf(stderr, "\n");
|
| 205 |
+
|
| 206 |
+
ggml_print_backtrace();
|
| 207 |
+
abort();
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
#define GGML_DEBUG 0
|
| 211 |
#define GGML_GELU_FP16
|
| 212 |
#define GGML_GELU_QUICK_FP16
|
|
|
|
| 278 |
break;
|
| 279 |
}
|
| 280 |
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
|
| 281 |
+
GGML_ABORT("fatal error");
|
| 282 |
return NULL;
|
| 283 |
}
|
| 284 |
return aligned_memory;
|
|
|
|
| 299 |
void * result = malloc(size);
|
| 300 |
if (result == NULL) {
|
| 301 |
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
|
| 302 |
+
GGML_ABORT("fatal error");
|
| 303 |
}
|
| 304 |
return result;
|
| 305 |
}
|
|
|
|
| 313 |
void * result = calloc(num, size);
|
| 314 |
if (result == NULL) {
|
| 315 |
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
|
| 316 |
+
GGML_ABORT("fatal error");
|
| 317 |
}
|
| 318 |
return result;
|
| 319 |
}
|
|
|
|
| 3404 |
}
|
| 3405 |
|
| 3406 |
// assert that pointer is aligned to GGML_MEM_ALIGN
|
| 3407 |
+
#define GGML_ASSERT_ALIGNED(ptr) \
|
| 3408 |
GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
|
| 3409 |
|
| 3410 |
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
| 3505 |
|
| 3506 |
GGML_ASSERT(ctx->mem_buffer != NULL);
|
| 3507 |
|
| 3508 |
+
GGML_ASSERT_ALIGNED(ctx->mem_buffer);
|
| 3509 |
|
| 3510 |
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
| 3511 |
|
|
|
|
| 3637 |
.type = type,
|
| 3638 |
};
|
| 3639 |
|
| 3640 |
+
GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
|
| 3641 |
|
| 3642 |
if (obj_cur != NULL) {
|
| 3643 |
obj_cur->next = obj_new;
|
|
|
|
| 3738 |
#endif
|
| 3739 |
|
| 3740 |
// TODO: this should not be needed as long as we don't rely on aligned SIMD loads
|
| 3741 |
+
//GGML_ASSERT_ALIGNED(result->data);
|
| 3742 |
|
| 3743 |
for (int i = 0; i < n_dims; i++) {
|
| 3744 |
result->ne[i] = ne[i];
|
|
|
|
| 3911 |
} break;
|
| 3912 |
default:
|
| 3913 |
{
|
| 3914 |
+
GGML_ABORT("fatal error");
|
| 3915 |
+
}
|
| 3916 |
}
|
| 3917 |
|
| 3918 |
return tensor;
|
|
|
|
| 3970 |
} break;
|
| 3971 |
default:
|
| 3972 |
{
|
| 3973 |
+
GGML_ABORT("fatal error");
|
| 3974 |
+
}
|
| 3975 |
}
|
| 3976 |
|
| 3977 |
return tensor;
|
|
|
|
| 4040 |
}
|
| 4041 |
default:
|
| 4042 |
{
|
| 4043 |
+
GGML_ABORT("fatal error");
|
| 4044 |
}
|
| 4045 |
}
|
|
|
|
|
|
|
| 4046 |
}
|
| 4047 |
|
| 4048 |
void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
|
|
|
|
| 4085 |
} break;
|
| 4086 |
default:
|
| 4087 |
{
|
| 4088 |
+
GGML_ABORT("fatal error");
|
| 4089 |
+
}
|
| 4090 |
}
|
| 4091 |
}
|
| 4092 |
|
|
|
|
| 4106 |
case GGML_TYPE_F32:
|
| 4107 |
return ((float *) data)[0];
|
| 4108 |
default:
|
| 4109 |
+
GGML_ABORT("fatal error");
|
| 4110 |
}
|
|
|
|
|
|
|
| 4111 |
}
|
| 4112 |
|
| 4113 |
void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value) {
|
|
|
|
| 4139 |
} break;
|
| 4140 |
default:
|
| 4141 |
{
|
| 4142 |
+
GGML_ABORT("fatal error");
|
| 4143 |
+
}
|
| 4144 |
}
|
| 4145 |
}
|
| 4146 |
|
|
|
|
| 4177 |
}
|
| 4178 |
default:
|
| 4179 |
{
|
| 4180 |
+
GGML_ABORT("fatal error");
|
| 4181 |
}
|
| 4182 |
}
|
|
|
|
|
|
|
| 4183 |
}
|
| 4184 |
|
| 4185 |
void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
|
|
|
|
| 4216 |
} break;
|
| 4217 |
default:
|
| 4218 |
{
|
| 4219 |
+
GGML_ABORT("fatal error");
|
| 4220 |
+
}
|
| 4221 |
}
|
| 4222 |
}
|
| 4223 |
|
|
|
|
| 4237 |
case GGML_TYPE_F32:
|
| 4238 |
return ((float *) data)[0];
|
| 4239 |
default:
|
| 4240 |
+
GGML_ABORT("fatal error");
|
| 4241 |
}
|
|
|
|
|
|
|
| 4242 |
}
|
| 4243 |
|
| 4244 |
void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value) {
|
|
|
|
| 4270 |
} break;
|
| 4271 |
default:
|
| 4272 |
{
|
| 4273 |
+
GGML_ABORT("fatal error");
|
| 4274 |
+
}
|
| 4275 |
}
|
| 4276 |
}
|
| 4277 |
|
|
|
|
| 4294 |
}
|
| 4295 |
|
| 4296 |
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
|
| 4297 |
+
size_t i;
|
| 4298 |
+
for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
|
| 4299 |
+
tensor->name[i] = name[i];
|
| 4300 |
+
}
|
| 4301 |
+
tensor->name[i] = '\0';
|
| 4302 |
return tensor;
|
| 4303 |
}
|
| 4304 |
|
|
|
|
| 4869 |
bool is_node = false;
|
| 4870 |
|
| 4871 |
if (a->grad) {
|
| 4872 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 4873 |
is_node = true;
|
| 4874 |
}
|
| 4875 |
|
|
|
|
| 4892 |
bool is_node = false;
|
| 4893 |
|
| 4894 |
if (a->grad) {
|
| 4895 |
+
GGML_ABORT("fatal error");
|
| 4896 |
is_node = true;
|
| 4897 |
}
|
| 4898 |
|
|
|
|
| 5215 |
bool is_node = false;
|
| 5216 |
|
| 5217 |
if (!inplace && (a->grad)) {
|
| 5218 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 5219 |
is_node = true;
|
| 5220 |
}
|
| 5221 |
|
|
|
|
| 5318 |
|
| 5319 |
bool is_node = false;
|
| 5320 |
if (!inplace && (a->grad)) {
|
| 5321 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 5322 |
is_node = true;
|
| 5323 |
}
|
| 5324 |
|
|
|
|
| 5732 |
|
| 5733 |
if (b->grad) {
|
| 5734 |
// gradient propagation is not supported
|
| 5735 |
+
//GGML_ABORT("fatal error");
|
| 5736 |
}
|
| 5737 |
|
| 5738 |
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
|
|
|
|
| 6515 |
bool is_node = false;
|
| 6516 |
|
| 6517 |
if (a->grad) {
|
| 6518 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6519 |
is_node = true;
|
| 6520 |
}
|
| 6521 |
|
|
|
|
| 6591 |
bool is_node = false;
|
| 6592 |
|
| 6593 |
if (a->grad || b->grad) {
|
| 6594 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6595 |
is_node = true;
|
| 6596 |
}
|
| 6597 |
|
|
|
|
| 6663 |
bool is_node = false;
|
| 6664 |
|
| 6665 |
if (a->grad || b->grad) {
|
| 6666 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6667 |
is_node = true;
|
| 6668 |
}
|
| 6669 |
|
|
|
|
| 6749 |
bool is_node = false;
|
| 6750 |
|
| 6751 |
if (a->grad || b->grad) {
|
| 6752 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6753 |
is_node = true;
|
| 6754 |
}
|
| 6755 |
|
|
|
|
| 6790 |
bool is_node = false;
|
| 6791 |
|
| 6792 |
if (a->grad) {
|
| 6793 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6794 |
is_node = true;
|
| 6795 |
}
|
| 6796 |
|
|
|
|
| 6828 |
bool is_node = false;
|
| 6829 |
|
| 6830 |
if (a->grad) {
|
| 6831 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6832 |
is_node = true;
|
| 6833 |
}
|
| 6834 |
|
|
|
|
| 6861 |
bool is_node = false;
|
| 6862 |
|
| 6863 |
if (a->grad) {
|
| 6864 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6865 |
is_node = true;
|
| 6866 |
}
|
| 6867 |
|
|
|
|
| 6911 |
bool is_node = false;
|
| 6912 |
|
| 6913 |
if (a->grad) {
|
| 6914 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6915 |
is_node = true;
|
| 6916 |
}
|
| 6917 |
|
|
|
|
| 6960 |
bool is_node = false;
|
| 6961 |
|
| 6962 |
if (timesteps->grad) {
|
| 6963 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 6964 |
is_node = true;
|
| 6965 |
}
|
| 6966 |
|
|
|
|
| 7086 |
struct ggml_tensor * v,
|
| 7087 |
struct ggml_tensor * d,
|
| 7088 |
bool masked) {
|
| 7089 |
+
GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
|
| 7090 |
|
| 7091 |
GGML_ASSERT(ggml_can_mul_mat(k, q));
|
| 7092 |
// TODO: check if vT can be multiplied by (k*qT)
|
|
|
|
| 7185 |
bool is_node = false;
|
| 7186 |
|
| 7187 |
if (s->grad || x->grad || c->grad || sq->grad) {
|
| 7188 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 7189 |
is_node = true;
|
| 7190 |
}
|
| 7191 |
|
|
|
|
| 7239 |
bool is_node = false;
|
| 7240 |
|
| 7241 |
if (s->grad || x->grad || dt->grad || A->grad || B->grad || C->grad || sq->grad) {
|
| 7242 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 7243 |
is_node = true;
|
| 7244 |
}
|
| 7245 |
|
|
|
|
| 7271 |
bool is_node = false;
|
| 7272 |
|
| 7273 |
if (a->grad) {
|
| 7274 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 7275 |
is_node = true;
|
| 7276 |
}
|
| 7277 |
|
|
|
|
| 7309 |
bool is_node = false;
|
| 7310 |
|
| 7311 |
if (a->grad) {
|
| 7312 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 7313 |
is_node = true;
|
| 7314 |
}
|
| 7315 |
|
|
|
|
| 7339 |
bool is_node = false;
|
| 7340 |
|
| 7341 |
if (a->grad) {
|
| 7342 |
+
GGML_ABORT("fatal error"); // TODO: implement backward
|
| 7343 |
is_node = true;
|
| 7344 |
}
|
| 7345 |
|
|
|
|
| 8029 |
}
|
| 8030 |
}
|
| 8031 |
} else {
|
| 8032 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 8033 |
}
|
| 8034 |
} else {
|
| 8035 |
//printf("%s: this is not optimal - fix me\n", __func__);
|
|
|
|
| 8071 |
}
|
| 8072 |
}
|
| 8073 |
} else {
|
| 8074 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 8075 |
}
|
| 8076 |
}
|
| 8077 |
return;
|
|
|
|
| 8188 |
}
|
| 8189 |
}
|
| 8190 |
} else {
|
| 8191 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 8192 |
}
|
| 8193 |
}
|
| 8194 |
|
|
|
|
| 8315 |
}
|
| 8316 |
}
|
| 8317 |
} else {
|
| 8318 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 8319 |
}
|
| 8320 |
} else {
|
| 8321 |
//printf("%s: this is not optimal - fix me\n", __func__);
|
|
|
|
| 8375 |
}
|
| 8376 |
}
|
| 8377 |
} else {
|
| 8378 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 8379 |
}
|
| 8380 |
}
|
| 8381 |
return;
|
|
|
|
| 8544 |
}
|
| 8545 |
}
|
| 8546 |
} else {
|
| 8547 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 8548 |
}
|
| 8549 |
}
|
| 8550 |
|
|
|
|
| 8630 |
}
|
| 8631 |
}
|
| 8632 |
} else {
|
| 8633 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 8634 |
}
|
| 8635 |
} else {
|
| 8636 |
//printf("%s: this is not optimal - fix me\n", __func__);
|
|
|
|
| 8690 |
}
|
| 8691 |
}
|
| 8692 |
} else {
|
| 8693 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 8694 |
}
|
| 8695 |
}
|
| 8696 |
|
|
|
|
| 8861 |
}
|
| 8862 |
}
|
| 8863 |
} else {
|
| 8864 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 8865 |
}
|
| 8866 |
}
|
| 8867 |
|
|
|
|
| 9039 |
} break;
|
| 9040 |
default:
|
| 9041 |
{
|
| 9042 |
+
GGML_ABORT("fatal error");
|
| 9043 |
+
}
|
| 9044 |
}
|
| 9045 |
}
|
| 9046 |
|
|
|
|
| 9192 |
}
|
| 9193 |
else {
|
| 9194 |
// src1 is not contiguous
|
| 9195 |
+
GGML_ABORT("fatal error");
|
| 9196 |
}
|
| 9197 |
}
|
| 9198 |
|
|
|
|
| 9267 |
}
|
| 9268 |
else {
|
| 9269 |
// src1 is not contiguous
|
| 9270 |
+
GGML_ABORT("fatal error");
|
| 9271 |
}
|
| 9272 |
}
|
| 9273 |
|
|
|
|
| 9319 |
}
|
| 9320 |
else {
|
| 9321 |
// src1 is not contiguous
|
| 9322 |
+
GGML_ABORT("fatal error");
|
| 9323 |
}
|
| 9324 |
}
|
| 9325 |
|
|
|
|
| 9371 |
}
|
| 9372 |
else {
|
| 9373 |
// src1 is not contiguous
|
| 9374 |
+
GGML_ABORT("fatal error");
|
| 9375 |
}
|
| 9376 |
}
|
| 9377 |
|
|
|
|
| 9465 |
ggml_compute_forward_add_f32(params, dst);
|
| 9466 |
}
|
| 9467 |
else {
|
| 9468 |
+
GGML_ABORT("fatal error");
|
| 9469 |
}
|
| 9470 |
} break;
|
| 9471 |
case GGML_TYPE_F16:
|
|
|
|
| 9477 |
ggml_compute_forward_add_f16_f32(params, dst);
|
| 9478 |
}
|
| 9479 |
else {
|
| 9480 |
+
GGML_ABORT("fatal error");
|
| 9481 |
}
|
| 9482 |
} break;
|
| 9483 |
case GGML_TYPE_BF16:
|
|
|
|
| 9489 |
ggml_compute_forward_add_bf16_f32(params, dst);
|
| 9490 |
}
|
| 9491 |
else {
|
| 9492 |
+
GGML_ABORT("fatal error");
|
| 9493 |
}
|
| 9494 |
} break;
|
| 9495 |
case GGML_TYPE_Q4_0:
|
|
|
|
| 9519 |
} break;
|
| 9520 |
default:
|
| 9521 |
{
|
| 9522 |
+
GGML_ABORT("fatal error");
|
| 9523 |
+
}
|
| 9524 |
}
|
| 9525 |
}
|
| 9526 |
|
|
|
|
| 9854 |
ggml_compute_forward_add1_f16_f32(params, dst);
|
| 9855 |
}
|
| 9856 |
else {
|
| 9857 |
+
GGML_ABORT("fatal error");
|
| 9858 |
}
|
| 9859 |
} break;
|
| 9860 |
case GGML_TYPE_BF16:
|
|
|
|
| 9866 |
ggml_compute_forward_add1_bf16_f32(params, dst);
|
| 9867 |
}
|
| 9868 |
else {
|
| 9869 |
+
GGML_ABORT("fatal error");
|
| 9870 |
}
|
| 9871 |
} break;
|
| 9872 |
case GGML_TYPE_Q4_0:
|
|
|
|
| 9897 |
} break;
|
| 9898 |
default:
|
| 9899 |
{
|
| 9900 |
+
GGML_ABORT("fatal error");
|
| 9901 |
+
}
|
| 9902 |
}
|
| 9903 |
}
|
| 9904 |
|
|
|
|
| 10022 |
case GGML_TYPE_Q4_0_8_8:
|
| 10023 |
default:
|
| 10024 |
{
|
| 10025 |
+
GGML_ABORT("fatal error");
|
| 10026 |
+
}
|
| 10027 |
}
|
| 10028 |
}
|
| 10029 |
|
|
|
|
| 10103 |
} break;
|
| 10104 |
default:
|
| 10105 |
{
|
| 10106 |
+
GGML_ABORT("fatal error");
|
| 10107 |
+
}
|
| 10108 |
}
|
| 10109 |
}
|
| 10110 |
|
|
|
|
| 10197 |
} break;
|
| 10198 |
default:
|
| 10199 |
{
|
| 10200 |
+
GGML_ABORT("fatal error");
|
| 10201 |
+
}
|
| 10202 |
}
|
| 10203 |
}
|
| 10204 |
|
|
|
|
| 10288 |
} break;
|
| 10289 |
default:
|
| 10290 |
{
|
| 10291 |
+
GGML_ABORT("fatal error");
|
| 10292 |
+
}
|
| 10293 |
}
|
| 10294 |
}
|
| 10295 |
|
|
|
|
| 10333 |
} break;
|
| 10334 |
default:
|
| 10335 |
{
|
| 10336 |
+
GGML_ABORT("fatal error");
|
| 10337 |
+
}
|
| 10338 |
}
|
| 10339 |
}
|
| 10340 |
|
|
|
|
| 10378 |
} break;
|
| 10379 |
default:
|
| 10380 |
{
|
| 10381 |
+
GGML_ABORT("fatal error");
|
| 10382 |
+
}
|
| 10383 |
}
|
| 10384 |
}
|
| 10385 |
|
|
|
|
| 10423 |
} break;
|
| 10424 |
default:
|
| 10425 |
{
|
| 10426 |
+
GGML_ABORT("fatal error");
|
| 10427 |
+
}
|
| 10428 |
}
|
| 10429 |
}
|
| 10430 |
|
|
|
|
| 10552 |
} break;
|
| 10553 |
default:
|
| 10554 |
{
|
| 10555 |
+
GGML_ABORT("fatal error");
|
| 10556 |
+
}
|
| 10557 |
}
|
| 10558 |
}
|
| 10559 |
|
|
|
|
| 10605 |
} break;
|
| 10606 |
default:
|
| 10607 |
{
|
| 10608 |
+
GGML_ABORT("fatal error");
|
| 10609 |
+
}
|
| 10610 |
}
|
| 10611 |
}
|
| 10612 |
|
|
|
|
| 10662 |
} break;
|
| 10663 |
default:
|
| 10664 |
{
|
| 10665 |
+
GGML_ABORT("fatal error");
|
| 10666 |
+
}
|
| 10667 |
}
|
| 10668 |
}
|
| 10669 |
|
|
|
|
| 10710 |
} break;
|
| 10711 |
default:
|
| 10712 |
{
|
| 10713 |
+
GGML_ABORT("fatal error");
|
| 10714 |
+
}
|
| 10715 |
}
|
| 10716 |
}
|
| 10717 |
|
|
|
|
| 10828 |
} break;
|
| 10829 |
default:
|
| 10830 |
{
|
| 10831 |
+
GGML_ABORT("fatal error");
|
| 10832 |
+
}
|
| 10833 |
}
|
| 10834 |
}
|
| 10835 |
|
|
|
|
| 10906 |
} break;
|
| 10907 |
default:
|
| 10908 |
{
|
| 10909 |
+
GGML_ABORT("fatal error");
|
| 10910 |
+
}
|
| 10911 |
}
|
| 10912 |
}
|
| 10913 |
|
|
|
|
| 10975 |
} break;
|
| 10976 |
default:
|
| 10977 |
{
|
| 10978 |
+
GGML_ABORT("fatal error");
|
| 10979 |
+
}
|
| 10980 |
}
|
| 10981 |
}
|
| 10982 |
|
|
|
|
| 11019 |
} break;
|
| 11020 |
default:
|
| 11021 |
{
|
| 11022 |
+
GGML_ABORT("fatal error");
|
| 11023 |
+
}
|
| 11024 |
}
|
| 11025 |
}
|
| 11026 |
|
|
|
|
| 11063 |
} break;
|
| 11064 |
default:
|
| 11065 |
{
|
| 11066 |
+
GGML_ABORT("fatal error");
|
| 11067 |
+
}
|
| 11068 |
}
|
| 11069 |
}
|
| 11070 |
|
|
|
|
| 11107 |
} break;
|
| 11108 |
default:
|
| 11109 |
{
|
| 11110 |
+
GGML_ABORT("fatal error");
|
| 11111 |
+
}
|
| 11112 |
}
|
| 11113 |
}
|
| 11114 |
|
|
|
|
| 11151 |
} break;
|
| 11152 |
default:
|
| 11153 |
{
|
| 11154 |
+
GGML_ABORT("fatal error");
|
| 11155 |
+
}
|
| 11156 |
}
|
| 11157 |
}
|
| 11158 |
|
|
|
|
| 11195 |
} break;
|
| 11196 |
default:
|
| 11197 |
{
|
| 11198 |
+
GGML_ABORT("fatal error");
|
| 11199 |
+
}
|
| 11200 |
}
|
| 11201 |
}
|
| 11202 |
|
|
|
|
| 11239 |
} break;
|
| 11240 |
default:
|
| 11241 |
{
|
| 11242 |
+
GGML_ABORT("fatal error");
|
| 11243 |
+
}
|
| 11244 |
}
|
| 11245 |
}
|
| 11246 |
|
|
|
|
| 11283 |
} break;
|
| 11284 |
default:
|
| 11285 |
{
|
| 11286 |
+
GGML_ABORT("fatal error");
|
| 11287 |
+
}
|
| 11288 |
}
|
| 11289 |
}
|
| 11290 |
|
|
|
|
| 11327 |
} break;
|
| 11328 |
default:
|
| 11329 |
{
|
| 11330 |
+
GGML_ABORT("fatal error");
|
| 11331 |
+
}
|
| 11332 |
}
|
| 11333 |
}
|
| 11334 |
|
|
|
|
| 11386 |
} break;
|
| 11387 |
default:
|
| 11388 |
{
|
| 11389 |
+
GGML_ABORT("fatal error");
|
| 11390 |
+
}
|
| 11391 |
}
|
| 11392 |
}
|
| 11393 |
|
|
|
|
| 11445 |
} break;
|
| 11446 |
default:
|
| 11447 |
{
|
| 11448 |
+
GGML_ABORT("fatal error");
|
| 11449 |
+
}
|
| 11450 |
}
|
| 11451 |
}
|
| 11452 |
|
|
|
|
| 11504 |
} break;
|
| 11505 |
default:
|
| 11506 |
{
|
| 11507 |
+
GGML_ABORT("fatal error");
|
| 11508 |
+
}
|
| 11509 |
}
|
| 11510 |
}
|
| 11511 |
// ggml_compute_forward_leaky_relu
|
|
|
|
| 11553 |
} break;
|
| 11554 |
default:
|
| 11555 |
{
|
| 11556 |
+
GGML_ABORT("fatal error");
|
| 11557 |
+
}
|
| 11558 |
}
|
| 11559 |
}
|
| 11560 |
|
|
|
|
| 11616 |
} break;
|
| 11617 |
default:
|
| 11618 |
{
|
| 11619 |
+
GGML_ABORT("fatal error");
|
| 11620 |
+
}
|
| 11621 |
}
|
| 11622 |
}
|
| 11623 |
|
|
|
|
| 11658 |
} break;
|
| 11659 |
default:
|
| 11660 |
{
|
| 11661 |
+
GGML_ABORT("fatal error");
|
| 11662 |
+
}
|
| 11663 |
}
|
| 11664 |
}
|
| 11665 |
|
|
|
|
| 11700 |
} break;
|
| 11701 |
default:
|
| 11702 |
{
|
| 11703 |
+
GGML_ABORT("fatal error");
|
| 11704 |
+
}
|
| 11705 |
}
|
| 11706 |
}
|
| 11707 |
|
|
|
|
| 11772 |
} break;
|
| 11773 |
default:
|
| 11774 |
{
|
| 11775 |
+
GGML_ABORT("fatal error");
|
| 11776 |
+
}
|
| 11777 |
}
|
| 11778 |
}
|
| 11779 |
|
|
|
|
| 11840 |
} break;
|
| 11841 |
default:
|
| 11842 |
{
|
| 11843 |
+
GGML_ABORT("fatal error");
|
| 11844 |
+
}
|
| 11845 |
}
|
| 11846 |
}
|
| 11847 |
|
|
|
|
| 12013 |
} break;
|
| 12014 |
default:
|
| 12015 |
{
|
| 12016 |
+
GGML_ABORT("fatal error");
|
| 12017 |
+
}
|
| 12018 |
}
|
| 12019 |
}
|
| 12020 |
|
|
|
|
| 12107 |
} break;
|
| 12108 |
default:
|
| 12109 |
{
|
| 12110 |
+
GGML_ABORT("fatal error");
|
| 12111 |
+
}
|
| 12112 |
}
|
| 12113 |
}
|
| 12114 |
|
|
|
|
| 12866 |
} break;
|
| 12867 |
case GGML_TYPE_F16:
|
| 12868 |
{
|
| 12869 |
+
GGML_ABORT("fatal error"); // todo
|
| 12870 |
// ggml_compute_forward_out_prod_f16_f32(params, dst);
|
| 12871 |
+
}
|
| 12872 |
case GGML_TYPE_F32:
|
| 12873 |
{
|
| 12874 |
ggml_compute_forward_out_prod_f32(params, dst);
|
| 12875 |
} break;
|
| 12876 |
default:
|
| 12877 |
{
|
| 12878 |
+
GGML_ABORT("fatal error");
|
| 12879 |
+
}
|
| 12880 |
}
|
| 12881 |
}
|
| 12882 |
|
|
|
|
| 12935 |
} break;
|
| 12936 |
default:
|
| 12937 |
{
|
| 12938 |
+
GGML_ABORT("fatal error");
|
| 12939 |
+
}
|
| 12940 |
}
|
| 12941 |
}
|
| 12942 |
|
|
|
|
| 13051 |
case GGML_TYPE_Q4_0_8_8:
|
| 13052 |
default:
|
| 13053 |
{
|
| 13054 |
+
GGML_ABORT("fatal error");
|
| 13055 |
+
}
|
| 13056 |
}
|
| 13057 |
}
|
| 13058 |
|
|
|
|
| 13329 |
} break;
|
| 13330 |
default:
|
| 13331 |
{
|
| 13332 |
+
GGML_ABORT("fatal error");
|
| 13333 |
+
}
|
| 13334 |
}
|
| 13335 |
|
| 13336 |
//static bool first = true;
|
|
|
|
| 13437 |
} break;
|
| 13438 |
default:
|
| 13439 |
{
|
| 13440 |
+
GGML_ABORT("fatal error");
|
| 13441 |
+
}
|
| 13442 |
}
|
| 13443 |
|
| 13444 |
//static bool first = true;
|
|
|
|
| 13515 |
} break;
|
| 13516 |
default:
|
| 13517 |
{
|
| 13518 |
+
GGML_ABORT("fatal error");
|
| 13519 |
+
}
|
| 13520 |
}
|
| 13521 |
}
|
| 13522 |
|
|
|
|
| 13585 |
} break;
|
| 13586 |
default:
|
| 13587 |
{
|
| 13588 |
+
GGML_ABORT("fatal error");
|
| 13589 |
+
}
|
| 13590 |
}
|
| 13591 |
}
|
| 13592 |
|
|
|
|
| 13603 |
} break;
|
| 13604 |
default:
|
| 13605 |
{
|
| 13606 |
+
GGML_ABORT("fatal error");
|
| 13607 |
+
}
|
| 13608 |
}
|
| 13609 |
}
|
| 13610 |
|
|
|
|
| 13721 |
} break;
|
| 13722 |
default:
|
| 13723 |
{
|
| 13724 |
+
GGML_ABORT("fatal error");
|
| 13725 |
+
}
|
| 13726 |
}
|
| 13727 |
}
|
| 13728 |
|
|
|
|
| 13817 |
} break;
|
| 13818 |
default:
|
| 13819 |
{
|
| 13820 |
+
GGML_ABORT("fatal error");
|
| 13821 |
+
}
|
| 13822 |
}
|
| 13823 |
}
|
| 13824 |
|
|
|
|
| 13908 |
case GGML_TYPE_F64:
|
| 13909 |
case GGML_TYPE_COUNT:
|
| 13910 |
{
|
| 13911 |
+
GGML_ABORT("fatal error");
|
| 13912 |
+
}
|
| 13913 |
}
|
| 13914 |
}
|
| 13915 |
|
|
|
|
| 14238 |
} break;
|
| 14239 |
default:
|
| 14240 |
{
|
| 14241 |
+
GGML_ABORT("fatal error");
|
| 14242 |
+
}
|
| 14243 |
}
|
| 14244 |
}
|
| 14245 |
|
|
|
|
| 14262 |
} break;
|
| 14263 |
default:
|
| 14264 |
{
|
| 14265 |
+
GGML_ABORT("fatal error");
|
| 14266 |
+
}
|
| 14267 |
}
|
| 14268 |
}
|
| 14269 |
|
|
|
|
| 14462 |
} break;
|
| 14463 |
default:
|
| 14464 |
{
|
| 14465 |
+
GGML_ABORT("fatal error");
|
| 14466 |
+
}
|
| 14467 |
}
|
| 14468 |
}
|
| 14469 |
|
|
|
|
| 14634 |
} break;
|
| 14635 |
default:
|
| 14636 |
{
|
| 14637 |
+
GGML_ABORT("fatal error");
|
| 14638 |
+
}
|
| 14639 |
}
|
| 14640 |
}
|
| 14641 |
|
|
|
|
| 14935 |
} break;
|
| 14936 |
default:
|
| 14937 |
{
|
| 14938 |
+
GGML_ABORT("fatal error");
|
| 14939 |
+
}
|
| 14940 |
}
|
| 14941 |
}
|
| 14942 |
|
|
|
|
| 14993 |
} break;
|
| 14994 |
default:
|
| 14995 |
{
|
| 14996 |
+
GGML_ABORT("fatal error");
|
| 14997 |
+
}
|
| 14998 |
}
|
| 14999 |
}
|
| 15000 |
|
|
|
|
| 15034 |
} break;
|
| 15035 |
default:
|
| 15036 |
{
|
| 15037 |
+
GGML_ABORT("fatal error");
|
| 15038 |
+
}
|
| 15039 |
}
|
| 15040 |
}
|
| 15041 |
|
|
|
|
| 15085 |
} break;
|
| 15086 |
default:
|
| 15087 |
{
|
| 15088 |
+
GGML_ABORT("fatal error");
|
| 15089 |
+
}
|
| 15090 |
}
|
| 15091 |
}
|
| 15092 |
|
|
|
|
| 15144 |
} break;
|
| 15145 |
default:
|
| 15146 |
{
|
| 15147 |
+
GGML_ABORT("fatal error");
|
| 15148 |
+
}
|
| 15149 |
}
|
| 15150 |
}
|
| 15151 |
|
|
|
|
| 15367 |
} break;
|
| 15368 |
default:
|
| 15369 |
{
|
| 15370 |
+
GGML_ABORT("fatal error");
|
| 15371 |
+
}
|
| 15372 |
}
|
| 15373 |
}
|
| 15374 |
|
|
|
|
| 15703 |
} break;
|
| 15704 |
default:
|
| 15705 |
{
|
| 15706 |
+
GGML_ABORT("fatal error");
|
| 15707 |
+
}
|
| 15708 |
}
|
| 15709 |
}
|
| 15710 |
|
|
|
|
| 15825 |
} break;
|
| 15826 |
default:
|
| 15827 |
{
|
| 15828 |
+
GGML_ABORT("fatal error");
|
| 15829 |
+
}
|
| 15830 |
}
|
| 15831 |
}
|
| 15832 |
|
|
|
|
| 15946 |
} break;
|
| 15947 |
default:
|
| 15948 |
{
|
| 15949 |
+
GGML_ABORT("fatal error");
|
| 15950 |
+
}
|
| 15951 |
}
|
| 15952 |
}
|
| 15953 |
|
|
|
|
| 16009 |
} break;
|
| 16010 |
default:
|
| 16011 |
{
|
| 16012 |
+
GGML_ABORT("fatal error");
|
| 16013 |
+
}
|
| 16014 |
}
|
| 16015 |
}
|
| 16016 |
|
|
|
|
| 16070 |
} break;
|
| 16071 |
default:
|
| 16072 |
{
|
| 16073 |
+
GGML_ABORT("fatal error");
|
| 16074 |
+
}
|
| 16075 |
}
|
| 16076 |
}
|
| 16077 |
|
|
|
|
| 16138 |
} break;
|
| 16139 |
default:
|
| 16140 |
{
|
| 16141 |
+
GGML_ABORT("fatal error");
|
| 16142 |
+
}
|
| 16143 |
}
|
| 16144 |
}
|
| 16145 |
|
|
|
|
| 16185 |
} break;
|
| 16186 |
default:
|
| 16187 |
{
|
| 16188 |
+
GGML_ABORT("fatal error");
|
| 16189 |
+
}
|
| 16190 |
}
|
| 16191 |
}
|
| 16192 |
|
|
|
|
| 16266 |
} break;
|
| 16267 |
default:
|
| 16268 |
{
|
| 16269 |
+
GGML_ABORT("fatal error");
|
| 16270 |
+
}
|
| 16271 |
}
|
| 16272 |
}
|
| 16273 |
|
|
|
|
| 16312 |
} break;
|
| 16313 |
default:
|
| 16314 |
{
|
| 16315 |
+
GGML_ABORT("fatal error");
|
| 16316 |
+
}
|
| 16317 |
}
|
| 16318 |
}
|
| 16319 |
|
|
|
|
| 16361 |
} break;
|
| 16362 |
default:
|
| 16363 |
{
|
| 16364 |
+
GGML_ABORT("fatal error");
|
| 16365 |
+
}
|
| 16366 |
}
|
| 16367 |
}
|
| 16368 |
|
|
|
|
| 16560 |
} break;
|
| 16561 |
default:
|
| 16562 |
{
|
| 16563 |
+
GGML_ABORT("fatal error");
|
| 16564 |
+
}
|
| 16565 |
}
|
| 16566 |
}
|
| 16567 |
|
|
|
|
| 16647 |
} break;
|
| 16648 |
default:
|
| 16649 |
{
|
| 16650 |
+
GGML_ABORT("fatal error");
|
| 16651 |
+
}
|
| 16652 |
}
|
| 16653 |
}
|
| 16654 |
|
|
|
|
| 16983 |
} break;
|
| 16984 |
case GGML_OP_COUNT:
|
| 16985 |
{
|
| 16986 |
+
GGML_ABORT("fatal error");
|
| 16987 |
+
}
|
| 16988 |
}
|
| 16989 |
}
|
| 16990 |
|
| 16991 |
////////////////////////////////////////////////////////////////////////////////
|
| 16992 |
|
| 16993 |
+
struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
| 16994 |
+
size = ggml_hash_size(size);
|
| 16995 |
+
struct ggml_hash_set result;
|
| 16996 |
+
result.size = size;
|
| 16997 |
+
result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
|
| 16998 |
+
result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
|
| 16999 |
+
return result;
|
| 17000 |
+
}
|
| 17001 |
+
|
| 17002 |
+
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
|
| 17003 |
+
memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
|
| 17004 |
+
}
|
| 17005 |
+
|
| 17006 |
+
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
|
| 17007 |
+
GGML_FREE(hash_set->used);
|
| 17008 |
+
GGML_FREE(hash_set->keys);
|
| 17009 |
+
}
|
| 17010 |
+
|
| 17011 |
+
size_t ggml_hash_size(size_t min_sz) {
|
| 17012 |
// next primes after powers of two
|
| 17013 |
static const size_t primes[] = {
|
| 17014 |
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
|
|
|
|
| 17019 |
};
|
| 17020 |
static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
|
| 17021 |
|
| 17022 |
+
// find the smallest prime that is larger or equal than min_sz
|
| 17023 |
size_t l = 0;
|
| 17024 |
size_t r = n_primes;
|
| 17025 |
while (l < r) {
|
|
|
|
| 17034 |
return sz;
|
| 17035 |
}
|
| 17036 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17037 |
struct hash_map {
|
| 17038 |
struct ggml_hash_set set;
|
| 17039 |
struct ggml_tensor ** vals;
|
|
|
|
| 17042 |
static struct hash_map * ggml_new_hash_map(size_t size) {
|
| 17043 |
struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
|
| 17044 |
result->set = ggml_hash_set_new(size);
|
| 17045 |
+
result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
|
|
|
|
| 17046 |
return result;
|
| 17047 |
}
|
| 17048 |
|
| 17049 |
static void ggml_hash_map_free(struct hash_map * map) {
|
| 17050 |
+
ggml_hash_set_free(&map->set);
|
| 17051 |
GGML_FREE(map->vals);
|
| 17052 |
GGML_FREE(map);
|
| 17053 |
}
|
|
|
|
| 17068 |
return node;
|
| 17069 |
}
|
| 17070 |
|
| 17071 |
+
if (!ggml_hash_contains(&graph->visited_hash_set, node)) {
|
| 17072 |
return node;
|
| 17073 |
}
|
| 17074 |
|
|
|
|
| 17083 |
return node;
|
| 17084 |
}
|
| 17085 |
|
| 17086 |
+
size_t i = ggml_hash_find(&replacements->set, node);
|
| 17087 |
+
GGML_ASSERT(i != GGML_HASHSET_FULL); // assert that not full
|
| 17088 |
if (replacements->set.keys[i] == node) {
|
| 17089 |
return replacements->vals[i];
|
| 17090 |
}
|
|
|
|
| 17142 |
|
| 17143 |
// insert checkpoints in replacements
|
| 17144 |
for (int i = 0; i < n_checkpoints; ++i) {
|
| 17145 |
+
size_t k = ggml_hash_find(&replacements->set, checkpoints[i]);
|
| 17146 |
+
GGML_ASSERT(k != GGML_HASHSET_FULL); // assert that not full
|
| 17147 |
GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite
|
| 17148 |
replacements->set.keys[k] = checkpoints[i];
|
| 17149 |
replacements->vals[k] = checkpoints[i];
|
|
|
|
| 17171 |
|
| 17172 |
// functions to change gradients considering the case that input a might be initial gradient with zero value
|
| 17173 |
|
| 17174 |
+
static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
|
| 17175 |
if (ggml_hash_contains(zero_table, a)) {
|
| 17176 |
return b;
|
| 17177 |
} else {
|
|
|
|
| 17179 |
}
|
| 17180 |
}
|
| 17181 |
|
| 17182 |
+
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set * zero_table) {
|
| 17183 |
if (ggml_hash_contains(zero_table, a)) {
|
| 17184 |
struct ggml_tensor * a_zero = ggml_scale(ctx, a, 0.0f);
|
| 17185 |
return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
|
|
|
|
| 17188 |
}
|
| 17189 |
}
|
| 17190 |
|
| 17191 |
+
static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
|
| 17192 |
if (ggml_hash_contains(zero_table, a)) {
|
| 17193 |
return ggml_repeat(ctx, b, a);
|
| 17194 |
} else {
|
|
|
|
| 17196 |
}
|
| 17197 |
}
|
| 17198 |
|
| 17199 |
+
static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
|
| 17200 |
if (ggml_hash_contains(zero_table, a)) {
|
| 17201 |
return ggml_neg(ctx, b);
|
| 17202 |
} else {
|
|
|
|
| 17204 |
}
|
| 17205 |
}
|
| 17206 |
|
| 17207 |
+
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set * zero_table) {
|
| 17208 |
struct ggml_tensor * src0 = tensor->src[0];
|
| 17209 |
struct ggml_tensor * src1 = tensor->src[1];
|
| 17210 |
struct ggml_tensor * src2 = tensor->src[2];
|
|
|
|
| 17373 |
case GGML_OP_MEAN:
|
| 17374 |
case GGML_OP_ARGMAX:
|
| 17375 |
{
|
| 17376 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 17377 |
+
}
|
| 17378 |
case GGML_OP_REPEAT:
|
| 17379 |
{
|
| 17380 |
// necessary for llama
|
|
|
|
| 17397 |
} break;
|
| 17398 |
case GGML_OP_CONCAT:
|
| 17399 |
{
|
| 17400 |
+
GGML_ABORT("fatal error"); // TODO: implement
|
| 17401 |
+
}
|
| 17402 |
case GGML_OP_SILU_BACK:
|
| 17403 |
{
|
| 17404 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17405 |
+
}
|
| 17406 |
case GGML_OP_NORM:
|
| 17407 |
{
|
| 17408 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17409 |
+
}
|
| 17410 |
case GGML_OP_RMS_NORM:
|
| 17411 |
{
|
| 17412 |
// necessary for llama
|
|
|
|
| 17422 |
} break;
|
| 17423 |
case GGML_OP_RMS_NORM_BACK:
|
| 17424 |
{
|
| 17425 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17426 |
+
}
|
| 17427 |
case GGML_OP_GROUP_NORM:
|
| 17428 |
{
|
| 17429 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17430 |
+
}
|
| 17431 |
case GGML_OP_MUL_MAT:
|
| 17432 |
{
|
| 17433 |
// https://cs231n.github.io/optimization-2/#staged
|
|
|
|
| 17488 |
} break;
|
| 17489 |
case GGML_OP_MUL_MAT_ID:
|
| 17490 |
{
|
| 17491 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17492 |
+
}
|
| 17493 |
case GGML_OP_OUT_PROD:
|
| 17494 |
{
|
| 17495 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17496 |
+
}
|
| 17497 |
case GGML_OP_SCALE:
|
| 17498 |
{
|
| 17499 |
// necessary for llama
|
|
|
|
| 17669 |
} break;
|
| 17670 |
case GGML_OP_GET_ROWS_BACK:
|
| 17671 |
{
|
| 17672 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17673 |
+
}
|
| 17674 |
case GGML_OP_DIAG:
|
| 17675 |
{
|
| 17676 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17677 |
+
}
|
| 17678 |
case GGML_OP_DIAG_MASK_INF:
|
| 17679 |
{
|
| 17680 |
// necessary for llama
|
|
|
|
| 17712 |
} break;
|
| 17713 |
case GGML_OP_SOFT_MAX_BACK:
|
| 17714 |
{
|
| 17715 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17716 |
+
}
|
| 17717 |
case GGML_OP_ROPE:
|
| 17718 |
{
|
| 17719 |
// necessary for llama
|
|
|
|
| 17788 |
} break;
|
| 17789 |
case GGML_OP_CLAMP:
|
| 17790 |
{
|
| 17791 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17792 |
+
}
|
| 17793 |
case GGML_OP_CONV_TRANSPOSE_1D:
|
| 17794 |
{
|
| 17795 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17796 |
+
}
|
| 17797 |
case GGML_OP_IM2COL:
|
| 17798 |
{
|
| 17799 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17800 |
+
}
|
| 17801 |
case GGML_OP_CONV_TRANSPOSE_2D:
|
| 17802 |
{
|
| 17803 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17804 |
+
}
|
| 17805 |
case GGML_OP_POOL_1D:
|
| 17806 |
{
|
| 17807 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17808 |
+
}
|
| 17809 |
case GGML_OP_POOL_2D:
|
| 17810 |
{
|
| 17811 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17812 |
+
}
|
| 17813 |
case GGML_OP_UPSCALE:
|
| 17814 |
{
|
| 17815 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17816 |
+
}
|
| 17817 |
case GGML_OP_PAD:
|
| 17818 |
{
|
| 17819 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17820 |
+
}
|
| 17821 |
case GGML_OP_ARANGE:
|
| 17822 |
{
|
| 17823 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17824 |
+
}
|
| 17825 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
| 17826 |
{
|
| 17827 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17828 |
+
}
|
| 17829 |
case GGML_OP_ARGSORT:
|
| 17830 |
{
|
| 17831 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17832 |
+
}
|
| 17833 |
case GGML_OP_LEAKY_RELU:
|
| 17834 |
{
|
| 17835 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17836 |
+
}
|
| 17837 |
case GGML_OP_FLASH_ATTN_EXT:
|
| 17838 |
{
|
| 17839 |
struct ggml_tensor * flash_grad = NULL;
|
|
|
|
| 17889 |
} break;
|
| 17890 |
case GGML_OP_FLASH_ATTN_BACK:
|
| 17891 |
{
|
| 17892 |
+
GGML_ABORT("fatal error"); // not supported
|
| 17893 |
+
}
|
| 17894 |
case GGML_OP_SSM_CONV:
|
| 17895 |
case GGML_OP_SSM_SCAN:
|
| 17896 |
{
|
| 17897 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17898 |
+
}
|
| 17899 |
case GGML_OP_WIN_PART:
|
| 17900 |
case GGML_OP_WIN_UNPART:
|
| 17901 |
case GGML_OP_UNARY:
|
|
|
|
| 17933 |
} break;
|
| 17934 |
case GGML_UNARY_OP_TANH:
|
| 17935 |
{
|
| 17936 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17937 |
+
}
|
| 17938 |
case GGML_UNARY_OP_ELU:
|
| 17939 |
{
|
| 17940 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17941 |
+
}
|
| 17942 |
case GGML_UNARY_OP_RELU:
|
| 17943 |
{
|
| 17944 |
if (src0->grad) {
|
|
|
|
| 17952 |
} break;
|
| 17953 |
case GGML_UNARY_OP_SIGMOID:
|
| 17954 |
{
|
| 17955 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17956 |
+
}
|
| 17957 |
case GGML_UNARY_OP_GELU:
|
| 17958 |
{
|
| 17959 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17960 |
+
}
|
| 17961 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 17962 |
{
|
| 17963 |
+
GGML_ABORT("fatal error"); // TODO: not implemented
|
| 17964 |
+
}
|
| 17965 |
case GGML_UNARY_OP_SILU:
|
| 17966 |
{
|
| 17967 |
// necessary for llama
|
|
|
|
| 17973 |
}
|
| 17974 |
} break;
|
| 17975 |
default:
|
| 17976 |
+
GGML_ABORT("fatal error");
|
| 17977 |
}
|
| 17978 |
} break;
|
| 17979 |
case GGML_OP_GET_REL_POS:
|
|
|
|
| 17987 |
case GGML_OP_MAP_CUSTOM2:
|
| 17988 |
case GGML_OP_MAP_CUSTOM3:
|
| 17989 |
{
|
| 17990 |
+
GGML_ABORT("fatal error"); // not supported
|
| 17991 |
+
}
|
| 17992 |
case GGML_OP_CROSS_ENTROPY_LOSS:
|
| 17993 |
{
|
| 17994 |
if (src0->grad) {
|
|
|
|
| 18003 |
} break;
|
| 18004 |
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
| 18005 |
{
|
| 18006 |
+
GGML_ABORT("fatal error"); // not supported
|
| 18007 |
+
}
|
| 18008 |
case GGML_OP_NONE:
|
| 18009 |
{
|
| 18010 |
// nop
|
| 18011 |
} break;
|
| 18012 |
case GGML_OP_COUNT:
|
| 18013 |
{
|
| 18014 |
+
GGML_ABORT("fatal error");
|
| 18015 |
+
}
|
| 18016 |
}
|
| 18017 |
|
| 18018 |
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
|
|
|
| 18032 |
}
|
| 18033 |
|
| 18034 |
// check if already visited
|
| 18035 |
+
if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) {
|
| 18036 |
return;
|
| 18037 |
}
|
| 18038 |
|
|
|
|
| 18113 |
struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size);
|
| 18114 |
for (int i = 0; i < gf->n_nodes; i++) {
|
| 18115 |
if (gf->grads[i]) {
|
| 18116 |
+
ggml_hash_insert(&zero_table, gf->grads[i]);
|
| 18117 |
}
|
| 18118 |
}
|
| 18119 |
|
|
|
|
| 18123 |
// inplace operations to add gradients are not created by ggml_compute_backward
|
| 18124 |
// use allocator to automatically make inplace operations
|
| 18125 |
if (node->grad) {
|
| 18126 |
+
ggml_compute_backward(ctx, node, &zero_table);
|
| 18127 |
}
|
| 18128 |
}
|
| 18129 |
|
|
|
|
| 18136 |
}
|
| 18137 |
}
|
| 18138 |
|
| 18139 |
+
ggml_hash_set_free(&zero_table);
|
| 18140 |
+
}
|
| 18141 |
+
|
| 18142 |
+
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
|
| 18143 |
+
void * ptr = *p;
|
| 18144 |
+
ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
|
| 18145 |
+
*p = (void *) ((char *) ptr + size);
|
| 18146 |
+
return ptr;
|
| 18147 |
}
|
| 18148 |
|
| 18149 |
static size_t ggml_graph_nbytes(size_t size, bool grads) {
|
| 18150 |
+
size_t hash_size = ggml_hash_size(size * 2);
|
| 18151 |
+
void * p = 0;
|
| 18152 |
+
incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
|
| 18153 |
+
incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
|
| 18154 |
+
incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
|
| 18155 |
+
incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
|
| 18156 |
if (grads) {
|
| 18157 |
+
incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
|
| 18158 |
}
|
| 18159 |
+
incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
|
| 18160 |
+
|
| 18161 |
+
size_t nbytes = (size_t) p;
|
| 18162 |
return nbytes;
|
| 18163 |
}
|
| 18164 |
|
|
|
|
| 18175 |
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
|
| 18176 |
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
| 18177 |
|
| 18178 |
+
// the size of the hash table is doubled since it needs to hold both nodes and leafs
|
|
|
|
| 18179 |
size_t hash_size = ggml_hash_size(size * 2);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18180 |
|
| 18181 |
+
void * p = cgraph + 1;
|
| 18182 |
+
|
| 18183 |
+
struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
|
| 18184 |
+
struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
|
| 18185 |
+
struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
|
| 18186 |
+
struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
|
| 18187 |
+
ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
|
| 18188 |
|
| 18189 |
+
// check that we allocated the correct amount of memory
|
| 18190 |
+
assert(obj_size == (size_t)((char *)p - (char *)cgraph));
|
| 18191 |
|
| 18192 |
*cgraph = (struct ggml_cgraph) {
|
| 18193 |
/*.size =*/ size,
|
|
|
|
| 18196 |
/*.nodes =*/ nodes_ptr,
|
| 18197 |
/*.grads =*/ grads_ptr,
|
| 18198 |
/*.leafs =*/ leafs_ptr,
|
| 18199 |
+
/*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
|
| 18200 |
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
| 18201 |
};
|
| 18202 |
|
| 18203 |
+
ggml_hash_set_reset(&cgraph->visited_hash_set);
|
| 18204 |
+
|
| 18205 |
return cgraph;
|
| 18206 |
}
|
| 18207 |
|
|
|
|
| 18217 |
/*.nodes =*/ cgraph0->nodes + i0,
|
| 18218 |
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
|
| 18219 |
/*.leafs =*/ NULL,
|
| 18220 |
+
/*.hash_table =*/ { 0, NULL, NULL },
|
| 18221 |
/*.order =*/ cgraph0->order,
|
| 18222 |
};
|
| 18223 |
|
|
|
|
| 18227 |
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
| 18228 |
GGML_ASSERT(dst->size >= src->n_leafs);
|
| 18229 |
GGML_ASSERT(dst->size >= src->n_nodes);
|
| 18230 |
+
GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
|
| 18231 |
|
| 18232 |
dst->n_leafs = src->n_leafs;
|
| 18233 |
dst->n_nodes = src->n_nodes;
|
|
|
|
| 18248 |
}
|
| 18249 |
}
|
| 18250 |
|
| 18251 |
+
for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
|
| 18252 |
+
if (src->visited_hash_set.keys[i]) {
|
| 18253 |
+
ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
|
| 18254 |
}
|
| 18255 |
}
|
| 18256 |
}
|
|
|
|
| 18276 |
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
|
| 18277 |
cgraph->n_leafs = 0;
|
| 18278 |
cgraph->n_nodes = 0;
|
| 18279 |
+
ggml_hash_set_reset(&cgraph->visited_hash_set);
|
| 18280 |
}
|
| 18281 |
|
| 18282 |
//
|
|
|
|
| 18468 |
n_tasks = n_threads;
|
| 18469 |
} break;
|
| 18470 |
default:
|
| 18471 |
+
GGML_ABORT("fatal error");
|
| 18472 |
}
|
| 18473 |
break;
|
| 18474 |
case GGML_OP_SILU_BACK:
|
|
|
|
| 18595 |
} break;
|
| 18596 |
case GGML_OP_COUNT:
|
| 18597 |
{
|
| 18598 |
+
GGML_ABORT("fatal error");
|
| 18599 |
+
}
|
| 18600 |
default:
|
| 18601 |
{
|
| 18602 |
fprintf(stderr, "%s: op not implemented: ", __func__);
|
|
|
|
| 18605 |
} else {
|
| 18606 |
fprintf(stderr, "%d\n", node->op);
|
| 18607 |
}
|
| 18608 |
+
GGML_ABORT("fatal error");
|
| 18609 |
+
}
|
| 18610 |
}
|
| 18611 |
|
| 18612 |
assert(n_tasks > 0);
|
|
|
|
| 18716 |
cur += sizeof(float)*ne00*ne01*ne02;
|
| 18717 |
cur += sizeof(float)*ne10*ne11;
|
| 18718 |
} else {
|
| 18719 |
+
GGML_ABORT("fatal error");
|
| 18720 |
}
|
| 18721 |
} break;
|
| 18722 |
case GGML_OP_CONV_TRANSPOSE_2D:
|
|
|
|
| 18762 |
} break;
|
| 18763 |
case GGML_OP_COUNT:
|
| 18764 |
{
|
| 18765 |
+
GGML_ABORT("fatal error");
|
| 18766 |
+
}
|
| 18767 |
default:
|
| 18768 |
break;
|
| 18769 |
}
|
|
|
|
| 19997 |
(*step) *= width;
|
| 19998 |
}
|
| 19999 |
|
| 20000 |
+
GGML_ABORT("line search failed");
|
| 20001 |
|
| 20002 |
+
//return GGML_LINESEARCH_FAIL;
|
| 20003 |
}
|
| 20004 |
|
| 20005 |
static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
|
| 20267 |
step[0] = 1.0;
|
| 20268 |
}
|
| 20269 |
|
| 20270 |
+
GGML_ABORT("lbfgs failed");
|
| 20271 |
|
| 20272 |
+
//return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
| 20273 |
}
|
| 20274 |
|
| 20275 |
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
|
|
| 20964 |
}
|
| 20965 |
} break;
|
| 20966 |
case GGUF_TYPE_ARRAY:
|
| 20967 |
+
default: GGML_ABORT("invalid type");
|
| 20968 |
}
|
| 20969 |
} break;
|
| 20970 |
+
default: GGML_ABORT("invalid type");
|
| 20971 |
}
|
| 20972 |
|
| 20973 |
if (!ok) {
|
|
|
|
| 21548 |
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
|
| 21549 |
GGML_FREE((void *)data);
|
| 21550 |
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
|
| 21551 |
+
GGML_ABORT("nested arrays not supported");
|
| 21552 |
} else {
|
| 21553 |
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
|
| 21554 |
}
|
| 21555 |
} break;
|
| 21556 |
+
default: GGML_ABORT("invalid type");
|
| 21557 |
}
|
| 21558 |
}
|
| 21559 |
}
|
|
|
|
| 21562 |
struct gguf_context * ctx,
|
| 21563 |
const struct ggml_tensor * tensor) {
|
| 21564 |
if (gguf_find_tensor(ctx, tensor->name) != -1) {
|
| 21565 |
+
GGML_ABORT("duplicated tensor name");
|
| 21566 |
}
|
| 21567 |
|
| 21568 |
const int idx = ctx->header.n_tensors;
|
|
|
|
| 21595 |
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
|
| 21596 |
const int idx = gguf_find_tensor(ctx, name);
|
| 21597 |
if (idx < 0) {
|
| 21598 |
+
GGML_ABORT("tensor not found");
|
| 21599 |
}
|
| 21600 |
|
| 21601 |
ctx->infos[idx].type = type;
|
|
|
|
| 21604 |
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
|
| 21605 |
const int idx = gguf_find_tensor(ctx, name);
|
| 21606 |
if (idx < 0) {
|
| 21607 |
+
GGML_ABORT("tensor not found");
|
| 21608 |
}
|
| 21609 |
|
| 21610 |
ctx->infos[idx].data = data;
|
|
|
|
| 21733 |
}
|
| 21734 |
} break;
|
| 21735 |
case GGUF_TYPE_ARRAY:
|
| 21736 |
+
default: GGML_ABORT("invalid type");
|
| 21737 |
}
|
| 21738 |
} break;
|
| 21739 |
+
default: GGML_ABORT("invalid type");
|
| 21740 |
}
|
| 21741 |
}
|
| 21742 |
|
|
|
|
| 21797 |
void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
|
| 21798 |
FILE * file = ggml_fopen(fname, "wb");
|
| 21799 |
if (!file) {
|
| 21800 |
+
GGML_ABORT("failed to open file for writing");
|
| 21801 |
}
|
| 21802 |
|
| 21803 |
struct gguf_buf buf = gguf_buf_init(16*1024);
|