Spaces:
Sleeping
Sleeping
slaren
commited on
update examples and tests
Browse files- examples/common-ggml.cpp +2 -27
examples/common-ggml.cpp
CHANGED
|
@@ -90,8 +90,6 @@ bool ggml_common_quantize_0(
|
|
| 90 |
std::vector<ggml_fp16_t> data_f16;
|
| 91 |
std::vector<float> data_f32;
|
| 92 |
|
| 93 |
-
std::vector<int64_t> hist_all(1 << 4, 0);
|
| 94 |
-
|
| 95 |
while (true) {
|
| 96 |
int32_t n_dims;
|
| 97 |
int32_t length;
|
|
@@ -176,8 +174,6 @@ bool ggml_common_quantize_0(
|
|
| 176 |
work.resize(nelements); // for quantization
|
| 177 |
|
| 178 |
size_t cur_size = 0;
|
| 179 |
-
std::vector<int64_t> hist_cur(1 << 4, 0);
|
| 180 |
-
|
| 181 |
switch ((ggml_type) ttype) {
|
| 182 |
case GGML_TYPE_Q4_0:
|
| 183 |
case GGML_TYPE_Q4_1:
|
|
@@ -190,7 +186,7 @@ bool ggml_common_quantize_0(
|
|
| 190 |
case GGML_TYPE_Q5_K:
|
| 191 |
case GGML_TYPE_Q6_K:
|
| 192 |
{
|
| 193 |
-
cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0],
|
| 194 |
} break;
|
| 195 |
case GGML_TYPE_F32:
|
| 196 |
case GGML_TYPE_F16:
|
|
@@ -217,15 +213,7 @@ bool ggml_common_quantize_0(
|
|
| 217 |
fout.write(reinterpret_cast<char *>(work.data()), cur_size);
|
| 218 |
total_size_new += cur_size;
|
| 219 |
|
| 220 |
-
printf("size = %8.2f MB -> %8.2f MB
|
| 221 |
-
for (int i = 0; i < (int) hist_cur.size(); ++i) {
|
| 222 |
-
hist_all[i] += hist_cur[i];
|
| 223 |
-
}
|
| 224 |
-
|
| 225 |
-
for (int i = 0; i < (int) hist_cur.size(); ++i) {
|
| 226 |
-
printf("%5.3f ", hist_cur[i] / (float)nelements);
|
| 227 |
-
}
|
| 228 |
-
printf("\n");
|
| 229 |
} else {
|
| 230 |
printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
|
| 231 |
fout.write(reinterpret_cast<char *>(data_u8.data()), data_u8.size());
|
|
@@ -238,18 +226,5 @@ bool ggml_common_quantize_0(
|
|
| 238 |
printf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
|
| 239 |
printf("%s: quant size = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
|
| 240 |
|
| 241 |
-
{
|
| 242 |
-
int64_t sum_all = 0;
|
| 243 |
-
for (int i = 0; i < (int) hist_all.size(); ++i) {
|
| 244 |
-
sum_all += hist_all[i];
|
| 245 |
-
}
|
| 246 |
-
|
| 247 |
-
printf("%s: hist: ", __func__);
|
| 248 |
-
for (int i = 0; i < (int) hist_all.size(); ++i) {
|
| 249 |
-
printf("%5.3f ", hist_all[i] / (float)sum_all);
|
| 250 |
-
}
|
| 251 |
-
printf("\n");
|
| 252 |
-
}
|
| 253 |
-
|
| 254 |
return true;
|
| 255 |
}
|
|
|
|
| 90 |
std::vector<ggml_fp16_t> data_f16;
|
| 91 |
std::vector<float> data_f32;
|
| 92 |
|
|
|
|
|
|
|
| 93 |
while (true) {
|
| 94 |
int32_t n_dims;
|
| 95 |
int32_t length;
|
|
|
|
| 174 |
work.resize(nelements); // for quantization
|
| 175 |
|
| 176 |
size_t cur_size = 0;
|
|
|
|
|
|
|
| 177 |
switch ((ggml_type) ttype) {
|
| 178 |
case GGML_TYPE_Q4_0:
|
| 179 |
case GGML_TYPE_Q4_1:
|
|
|
|
| 186 |
case GGML_TYPE_Q5_K:
|
| 187 |
case GGML_TYPE_Q6_K:
|
| 188 |
{
|
| 189 |
+
cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], nullptr);
|
| 190 |
} break;
|
| 191 |
case GGML_TYPE_F32:
|
| 192 |
case GGML_TYPE_F16:
|
|
|
|
| 213 |
fout.write(reinterpret_cast<char *>(work.data()), cur_size);
|
| 214 |
total_size_new += cur_size;
|
| 215 |
|
| 216 |
+
printf("size = %8.2f MB -> %8.2f MB\n", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
} else {
|
| 218 |
printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
|
| 219 |
fout.write(reinterpret_cast<char *>(data_u8.data()), data_u8.size());
|
|
|
|
| 226 |
printf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
|
| 227 |
printf("%s: quant size = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
|
| 228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
return true;
|
| 230 |
}
|