whisper.cpp

Running

App Files Files Community

slaren commited on Mar 14, 2024

Commit

007ebd7

unverified ·

1 Parent(s): 8cdfa17

update examples and tests

Browse files

Files changed (1) hide show

examples/common-ggml.cpp +2 -27

examples/common-ggml.cpp CHANGED Viewed

@@ -90,8 +90,6 @@ bool ggml_common_quantize_0(
     std::vector<ggml_fp16_t> data_f16;
     std::vector<float>       data_f32;
-    std::vector<int64_t> hist_all(1 << 4, 0);
     while (true) {
         int32_t n_dims;
         int32_t length;
@@ -176,8 +174,6 @@ bool ggml_common_quantize_0(
             work.resize(nelements); // for quantization
             size_t cur_size = 0;
-            std::vector<int64_t> hist_cur(1 << 4, 0);
             switch ((ggml_type) ttype) {
                 case GGML_TYPE_Q4_0:
                 case GGML_TYPE_Q4_1:
@@ -190,7 +186,7 @@ bool ggml_common_quantize_0(
                 case GGML_TYPE_Q5_K:
                 case GGML_TYPE_Q6_K:
                     {
-                        cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], hist_cur.data(), nullptr);
                     } break;
                 case GGML_TYPE_F32:
                 case GGML_TYPE_F16:
@@ -217,15 +213,7 @@ bool ggml_common_quantize_0(
             fout.write(reinterpret_cast<char *>(work.data()), cur_size);
             total_size_new += cur_size;
-            printf("size = %8.2f MB -> %8.2f MB | hist: ", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
-            for (int i = 0; i < (int) hist_cur.size(); ++i) {
-                hist_all[i] += hist_cur[i];
-            }
-            for (int i = 0; i < (int) hist_cur.size(); ++i) {
-                printf("%5.3f ", hist_cur[i] / (float)nelements);
-            }
-            printf("\n");
         } else {
             printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
             fout.write(reinterpret_cast<char *>(data_u8.data()), data_u8.size());
@@ -238,18 +226,5 @@ bool ggml_common_quantize_0(
     printf("%s: model size  = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
     printf("%s: quant size  = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
-    {
-        int64_t sum_all = 0;
-        for (int i = 0; i < (int) hist_all.size(); ++i) {
-            sum_all += hist_all[i];
-        }
-        printf("%s: hist: ", __func__);
-        for (int i = 0; i < (int) hist_all.size(); ++i) {
-            printf("%5.3f ", hist_all[i] / (float)sum_all);
-        }
-        printf("\n");
-    }
     return true;
 }

     std::vector<ggml_fp16_t> data_f16;
     std::vector<float>       data_f32;
     while (true) {
         int32_t n_dims;
         int32_t length;
             work.resize(nelements); // for quantization
             size_t cur_size = 0;
             switch ((ggml_type) ttype) {
                 case GGML_TYPE_Q4_0:
                 case GGML_TYPE_Q4_1:
                 case GGML_TYPE_Q5_K:
                 case GGML_TYPE_Q6_K:
                     {
+                        cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], nullptr);
                     } break;
                 case GGML_TYPE_F32:
                 case GGML_TYPE_F16:
             fout.write(reinterpret_cast<char *>(work.data()), cur_size);
             total_size_new += cur_size;
+            printf("size = %8.2f MB -> %8.2f MB\n", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
         } else {
             printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
             fout.write(reinterpret_cast<char *>(data_u8.data()), data_u8.size());
     printf("%s: model size  = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
     printf("%s: quant size  = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
     return true;
 }