Spaces:
Running
Running
jiez
z5269887
commited on
Commit
·
1706870
1
Parent(s):
cd0c122
llama : add gguf_remove_key + remove split meta during quantize (llama/6591)
Browse files* Remove split metadata when quantize model shards
* Find metadata key by enum
* Correct loop range for gguf_remove_key and code format
* Free kv memory
---------
Co-authored-by: z5269887 <[email protected]>
ggml.c
CHANGED
|
@@ -20621,6 +20621,32 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
|
| 20621 |
return ok;
|
| 20622 |
}
|
| 20623 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20624 |
struct gguf_context * gguf_init_empty(void) {
|
| 20625 |
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
| 20626 |
|
|
@@ -20970,31 +20996,7 @@ void gguf_free(struct gguf_context * ctx) {
|
|
| 20970 |
if (ctx->kv) {
|
| 20971 |
// free string memory - not great..
|
| 20972 |
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
| 20973 |
-
|
| 20974 |
-
|
| 20975 |
-
if (kv->key.data) {
|
| 20976 |
-
GGML_FREE(kv->key.data);
|
| 20977 |
-
}
|
| 20978 |
-
|
| 20979 |
-
if (kv->type == GGUF_TYPE_STRING) {
|
| 20980 |
-
if (kv->value.str.data) {
|
| 20981 |
-
GGML_FREE(kv->value.str.data);
|
| 20982 |
-
}
|
| 20983 |
-
}
|
| 20984 |
-
|
| 20985 |
-
if (kv->type == GGUF_TYPE_ARRAY) {
|
| 20986 |
-
if (kv->value.arr.data) {
|
| 20987 |
-
if (kv->value.arr.type == GGUF_TYPE_STRING) {
|
| 20988 |
-
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
| 20989 |
-
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
| 20990 |
-
if (str->data) {
|
| 20991 |
-
GGML_FREE(str->data);
|
| 20992 |
-
}
|
| 20993 |
-
}
|
| 20994 |
-
}
|
| 20995 |
-
GGML_FREE(kv->value.arr.data);
|
| 20996 |
-
}
|
| 20997 |
-
}
|
| 20998 |
}
|
| 20999 |
|
| 21000 |
GGML_FREE(ctx->kv);
|
|
@@ -21219,6 +21221,19 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
|
|
| 21219 |
return n_kv;
|
| 21220 |
}
|
| 21221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21222 |
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
|
| 21223 |
const int idx = gguf_get_or_add_key(ctx, key);
|
| 21224 |
|
|
|
|
| 20621 |
return ok;
|
| 20622 |
}
|
| 20623 |
|
| 20624 |
+
static void gguf_free_kv(struct gguf_kv * kv) {
|
| 20625 |
+
if (kv->key.data) {
|
| 20626 |
+
GGML_FREE(kv->key.data);
|
| 20627 |
+
}
|
| 20628 |
+
|
| 20629 |
+
if (kv->type == GGUF_TYPE_STRING) {
|
| 20630 |
+
if (kv->value.str.data) {
|
| 20631 |
+
GGML_FREE(kv->value.str.data);
|
| 20632 |
+
}
|
| 20633 |
+
}
|
| 20634 |
+
|
| 20635 |
+
if (kv->type == GGUF_TYPE_ARRAY) {
|
| 20636 |
+
if (kv->value.arr.data) {
|
| 20637 |
+
if (kv->value.arr.type == GGUF_TYPE_STRING) {
|
| 20638 |
+
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
| 20639 |
+
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
| 20640 |
+
if (str->data) {
|
| 20641 |
+
GGML_FREE(str->data);
|
| 20642 |
+
}
|
| 20643 |
+
}
|
| 20644 |
+
}
|
| 20645 |
+
GGML_FREE(kv->value.arr.data);
|
| 20646 |
+
}
|
| 20647 |
+
}
|
| 20648 |
+
}
|
| 20649 |
+
|
| 20650 |
struct gguf_context * gguf_init_empty(void) {
|
| 20651 |
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
| 20652 |
|
|
|
|
| 20996 |
if (ctx->kv) {
|
| 20997 |
// free string memory - not great..
|
| 20998 |
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
| 20999 |
+
gguf_free_kv(&ctx->kv[i]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21000 |
}
|
| 21001 |
|
| 21002 |
GGML_FREE(ctx->kv);
|
|
|
|
| 21221 |
return n_kv;
|
| 21222 |
}
|
| 21223 |
|
| 21224 |
+
void gguf_remove_key(struct gguf_context * ctx, const char * key) {
|
| 21225 |
+
const int idx = gguf_find_key(ctx, key);
|
| 21226 |
+
if (idx >= 0) {
|
| 21227 |
+
const int n_kv = gguf_get_n_kv(ctx);
|
| 21228 |
+
gguf_free_kv(&ctx->kv[idx]);
|
| 21229 |
+
for (int i = idx; i < n_kv-1; ++i) {
|
| 21230 |
+
ctx->kv[i] = ctx->kv[i+1];
|
| 21231 |
+
}
|
| 21232 |
+
ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
|
| 21233 |
+
ctx->header.n_kv--;
|
| 21234 |
+
}
|
| 21235 |
+
}
|
| 21236 |
+
|
| 21237 |
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
|
| 21238 |
const int idx = gguf_get_or_add_key(ctx, key);
|
| 21239 |
|
ggml.h
CHANGED
|
@@ -2298,6 +2298,9 @@ extern "C" {
|
|
| 2298 |
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
| 2299 |
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
|
| 2300 |
|
|
|
|
|
|
|
|
|
|
| 2301 |
// overrides existing values or adds a new one
|
| 2302 |
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
| 2303 |
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
|
|
|
| 2298 |
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
| 2299 |
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
|
| 2300 |
|
| 2301 |
+
// removes key if it exists
|
| 2302 |
+
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
|
| 2303 |
+
|
| 2304 |
// overrides existing values or adds a new one
|
| 2305 |
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
| 2306 |
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|