slaren commited on
Commit
007ebd7
·
unverified ·
1 Parent(s): 8cdfa17

update examples and tests

Browse files
Files changed (1) hide show
  1. examples/common-ggml.cpp +2 -27
examples/common-ggml.cpp CHANGED
@@ -90,8 +90,6 @@ bool ggml_common_quantize_0(
90
  std::vector<ggml_fp16_t> data_f16;
91
  std::vector<float> data_f32;
92
 
93
- std::vector<int64_t> hist_all(1 << 4, 0);
94
-
95
  while (true) {
96
  int32_t n_dims;
97
  int32_t length;
@@ -176,8 +174,6 @@ bool ggml_common_quantize_0(
176
  work.resize(nelements); // for quantization
177
 
178
  size_t cur_size = 0;
179
- std::vector<int64_t> hist_cur(1 << 4, 0);
180
-
181
  switch ((ggml_type) ttype) {
182
  case GGML_TYPE_Q4_0:
183
  case GGML_TYPE_Q4_1:
@@ -190,7 +186,7 @@ bool ggml_common_quantize_0(
190
  case GGML_TYPE_Q5_K:
191
  case GGML_TYPE_Q6_K:
192
  {
193
- cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], hist_cur.data(), nullptr);
194
  } break;
195
  case GGML_TYPE_F32:
196
  case GGML_TYPE_F16:
@@ -217,15 +213,7 @@ bool ggml_common_quantize_0(
217
  fout.write(reinterpret_cast<char *>(work.data()), cur_size);
218
  total_size_new += cur_size;
219
 
220
- printf("size = %8.2f MB -> %8.2f MB | hist: ", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
221
- for (int i = 0; i < (int) hist_cur.size(); ++i) {
222
- hist_all[i] += hist_cur[i];
223
- }
224
-
225
- for (int i = 0; i < (int) hist_cur.size(); ++i) {
226
- printf("%5.3f ", hist_cur[i] / (float)nelements);
227
- }
228
- printf("\n");
229
  } else {
230
  printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
231
  fout.write(reinterpret_cast<char *>(data_u8.data()), data_u8.size());
@@ -238,18 +226,5 @@ bool ggml_common_quantize_0(
238
  printf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
239
  printf("%s: quant size = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
240
 
241
- {
242
- int64_t sum_all = 0;
243
- for (int i = 0; i < (int) hist_all.size(); ++i) {
244
- sum_all += hist_all[i];
245
- }
246
-
247
- printf("%s: hist: ", __func__);
248
- for (int i = 0; i < (int) hist_all.size(); ++i) {
249
- printf("%5.3f ", hist_all[i] / (float)sum_all);
250
- }
251
- printf("\n");
252
- }
253
-
254
  return true;
255
  }
 
90
  std::vector<ggml_fp16_t> data_f16;
91
  std::vector<float> data_f32;
92
 
 
 
93
  while (true) {
94
  int32_t n_dims;
95
  int32_t length;
 
174
  work.resize(nelements); // for quantization
175
 
176
  size_t cur_size = 0;
 
 
177
  switch ((ggml_type) ttype) {
178
  case GGML_TYPE_Q4_0:
179
  case GGML_TYPE_Q4_1:
 
186
  case GGML_TYPE_Q5_K:
187
  case GGML_TYPE_Q6_K:
188
  {
189
+ cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], nullptr);
190
  } break;
191
  case GGML_TYPE_F32:
192
  case GGML_TYPE_F16:
 
213
  fout.write(reinterpret_cast<char *>(work.data()), cur_size);
214
  total_size_new += cur_size;
215
 
216
+ printf("size = %8.2f MB -> %8.2f MB\n", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
 
 
 
 
 
 
 
 
217
  } else {
218
  printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
219
  fout.write(reinterpret_cast<char *>(data_u8.data()), data_u8.size());
 
226
  printf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
227
  printf("%s: quant size = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  return true;
230
  }