ncnn之六：ncnn量化(post-training quantization)三部曲

ncnn之六：ncnn量化(post-training quantization)三部曲 - ncnnoptimize

1 NetOptimize

定义类 NetOptimize 用于优化网络结构

class NetOptimize : public ncnn::Net{public: // 0=fp32 1=fp16 int storage_type;public: int fuse_batchnorm_scale(); int fuse_convolution_batchnorm(); int fuse_convolutiondepthwise_batchnorm(); int fuse_deconvolution_batchnorm(); int fuse_deconvolutiondepthwise_batchnorm(); int fuse_innerproduct_batchnorm(); int fuse_innerproduct_dropout(); int fuse_convolution_activation(); int fuse_convolutiondepthwise_activation(); int fuse_deconvolution_activation(); int fuse_deconvolutiondepthwise_activation(); int fuse_innerproduct_activation(); int eliminate_dropout(); int eliminate_pooling1x1(); int eliminate_noop(); int eliminate_orphaned_memorydata(); int eliminate_flatten_after_global_pooling(); int eliminate_reshape_after_global_pooling(); int eliminate_flatten_after_innerproduct(); int eliminate_reshape_before_binaryop(); int replace_convolution_with_innerproduct_after_global_pooling(); int replace_convolution_with_innerproduct_after_innerproduct();public: int fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp); int fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp); int fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp); int fwrite_weight_data(const ncnn::Mat& data, FILE* bp); int save(const char* parampath, const char* binpath);#if void gauss_random(ncnn::Mat &m); void find_fastest_fp32_conv(const char* name, int w, int h, int c); int support_fp32_conv_type(const ncnn::Convolution* op, const ncnn::Mat& mat, const int type);#endif};

1-1 fuse

1-1-1 fuse_batchnorm_scale (融合 bn和 scale)

v = ((v - mean) / sqrt(var + eps) * slope + bias) * s + b = (v - mean) / sqrt(var + eps) * (slope * s) + (bias * s + b)

int NetOptimize::fuse_batchnorm_scale(){ const size_t layer_count = layers.size(); for (int i=0; itype != "BatchNorm") continue; // BatchNorm - Scale int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "Scale") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse BatchNorm - Scale to BatchNorm ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[i]; ncnn::Scale* scale = (ncnn::Scale*)layers[j]; fprintf(stderr, "fuse_batchnorm_scale %s %s\n", batchnorm->name.c_str(), scale->name.c_str()); {// v = ((v - mean) / sqrt(var + eps) * slope + bias) * s + b// = (v - mean) / sqrt(var + eps) * (slope * s) + (bias * s + b) int channels = batchnorm->channels; float* slope = batchnorm->slope_data; float* bias = batchnorm->bias_data; for (int q=0; qscale_data[q]; if (scale->bias_term) bias[q] = bias[q] * scale->scale_data[q] + scale->bias_data[q]; else bias[q] = bias[q] * scale->scale_data[q]; } } int top_blob_index_final = scale->tops[0]; batchnorm->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; scale->type = "ncnnfused"; } return 0;}

1-1-2 fuse_convolution_batchnorm (融合 bn和 convolution)

// a = bias - slope * mean / sqrt(var + eps) // b = slope / sqrt(var + eps) // value = value * b + a

fuse_convolution_batchnorm

int NetOptimize::fuse_convolution_batchnorm(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Convolution") continue; // Convolution - BatchNorm int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "BatchNorm") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse Convolution - BatchNorm to Convolution ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i]; ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j]; fprintf(stderr, "fuse_convolution_batchnorm %s %s\n", convolution->name.c_str(), batchnorm->name.c_str()); { int channels = batchnorm->channels; float eps = batchnorm->eps; // a = bias - slope * mean / sqrt(var + eps) // b = slope / sqrt(var + eps) // value = value * b + a std::vector a(channels); std::vector b(channels); for (int i=0; i(sqrt(batchnorm->var_data[i] + eps)); a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var; b[i] = batchnorm->slope_data[i] / sqrt_var; } if (convolution->bias_term == 0) { // init bias as zero convolution->bias_term = 1; convolution->bias_data = ncnn::Mat(channels); convolution->bias_data.fill(0.f); } const int weight_per_outch = convolution->weight_data_size / channels; float* weight = convolution->weight_data; float* bias = convolution->bias_data; for (int i=0; itops[0]; convolution->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; batchnorm->type = "ncnnfused"; } return 0;}

fuse_convolutiondepthwise_batchnorm

int NetOptimize::fuse_convolutiondepthwise_batchnorm(){ const size_t layer_count = layers.size(); for (int i=0; itype != "ConvolutionDepthWise") continue; // ConvolutionDepthWise - BatchNorm int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "BatchNorm") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse ConvolutionDepthWise - BatchNorm to ConvolutionDepthWise ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i]; ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j]; fprintf(stderr, "fuse_convolutiondepthwise_batchnorm %s %s\n", convolutiondepthwise->name.c_str(), batchnorm->name.c_str()); { int channels = batchnorm->channels; float eps = batchnorm->eps; // a = bias - slope * mean / sqrt(var + eps) // b = slope / sqrt(var + eps) // value = value * b + a std::vector a(channels); std::vector b(channels); for (int i=0; i(sqrt(batchnorm->var_data[i] + eps)); a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var; b[i] = batchnorm->slope_data[i] / sqrt_var; } if (convolutiondepthwise->bias_term == 0) { // init bias as zero convolutiondepthwise->bias_term = 1; convolutiondepthwise->bias_data = ncnn::Mat(channels); convolutiondepthwise->bias_data.fill(0.f); } const int weight_per_outch = convolutiondepthwise->weight_data_size / channels; float* weight = convolutiondepthwise->weight_data; float* bias = convolutiondepthwise->bias_data; for (int i=0; itops[0]; convolutiondepthwise->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; batchnorm->type = "ncnnfused"; } return 0;}

fuse_deconvolution_batchnorm

int NetOptimize::fuse_deconvolution_batchnorm(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Deconvolution") continue; // Deconvolution - BatchNorm int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "BatchNorm") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse Deconvolution - BatchNorm to Deconvolution ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i]; ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j]; fprintf(stderr, "fuse_deconvolution_batchnorm %s %s\n", deconvolution->name.c_str(), batchnorm->name.c_str()); { int channels = batchnorm->channels; float eps = batchnorm->eps; // a = bias - slope * mean / sqrt(var + eps) // b = slope / sqrt(var + eps) // value = value * b + a std::vector a(channels); std::vector b(channels); for (int i=0; i(sqrt(batchnorm->var_data[i] + eps)); a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var; b[i] = batchnorm->slope_data[i] / sqrt_var; } if (deconvolution->bias_term == 0) { // init bias as zero deconvolution->bias_term = 1; deconvolution->bias_data = ncnn::Mat(channels); deconvolution->bias_data.fill(0.f); } const int weight_per_outch = deconvolution->weight_data_size / channels; float* weight = deconvolution->weight_data; float* bias = deconvolution->bias_data; for (int i=0; itops[0]; deconvolution->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; batchnorm->type = "ncnnfused"; } return 0;}

fuse_deconvolutiondepthwise_batchnorm

int NetOptimize::fuse_deconvolutiondepthwise_batchnorm(){ const size_t layer_count = layers.size(); for (int i=0; itype != "DeconvolutionDepthWise") continue; // DeconvolutionDepthWise - BatchNorm int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "BatchNorm") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse DeconvolutionDepthWise - BatchNorm to DeconvolutionDepthWise ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i]; ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j]; fprintf(stderr, "fuse_deconvolutiondepthwise_batchnorm %s %s\n", deconvolutiondepthwise->name.c_str(), batchnorm->name.c_str()); { int channels = batchnorm->channels; float eps = batchnorm->eps; // a = bias - slope * mean / sqrt(var + eps) // b = slope / sqrt(var + eps) // value = value * b + a std::vector a(channels); std::vector b(channels); for (int i=0; i(sqrt(batchnorm->var_data[i] + eps)); a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var; b[i] = batchnorm->slope_data[i] / sqrt_var; } if (deconvolutiondepthwise->bias_term == 0) { // init bias as zero deconvolutiondepthwise->bias_term = 1; deconvolutiondepthwise->bias_data = ncnn::Mat(channels); deconvolutiondepthwise->bias_data.fill(0.f); } const int weight_per_outch = deconvolutiondepthwise->weight_data_size / channels; float* weight = deconvolutiondepthwise->weight_data; float* bias = deconvolutiondepthwise->bias_data; for (int i=0; itops[0]; deconvolutiondepthwise->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; batchnorm->type = "ncnnfused"; } return 0;}

fuse_innerproduct_batchnorm

int NetOptimize::fuse_innerproduct_batchnorm(){ const size_t layer_count = layers.size(); for (int i=0; itype != "InnerProduct") continue; // InnerProduct - BatchNorm int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "BatchNorm") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse InnerProduct - BatchNorm to InnerProduct ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i]; ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j]; fprintf(stderr, "fuse_innerproduct_batchnorm %s %s\n", innerproduct->name.c_str(), batchnorm->name.c_str()); { int channels = batchnorm->channels; float eps = batchnorm->eps; // a = bias - slope * mean / sqrt(var + eps) // b = slope / sqrt(var + eps) // value = value * b + a std::vector a(channels); std::vector b(channels); for (int i=0; i(sqrt(batchnorm->var_data[i] + eps)); a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var; b[i] = batchnorm->slope_data[i] / sqrt_var; } if (innerproduct->bias_term == 0) { // init bias as zero innerproduct->bias_term = 1; innerproduct->bias_data = ncnn::Mat(channels); innerproduct->bias_data.fill(0.f); } const int weight_per_outch = innerproduct->weight_data_size / channels; float* weight = innerproduct->weight_data; float* bias = innerproduct->bias_data; for (int i=0; itops[0]; innerproduct->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; batchnorm->type = "ncnnfused"; } return 0;}

1-1-3 fuse_innerproduct_dropout

int NetOptimize::fuse_innerproduct_dropout(){ const size_t layer_count = layers.size(); for (int i=0; itype != "InnerProduct") continue; // InnerProduct - Dropout int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "Dropout") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse InnerProduct - Dropout to InnerProduct ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i]; ncnn::Dropout* dropout = (ncnn::Dropout*)layers[j]; fprintf(stderr, "fuse_innerproduct_dropout %s %s\n", innerproduct->name.c_str(), dropout->name.c_str()); float scale = dropout->scale; if (scale != 1.f) { const int num_output = innerproduct->num_output; const int weight_per_outch = innerproduct->weight_data_size / num_output; float* weight = innerproduct->weight_data; for (int i=0; ibias_term) { float* bias = innerproduct->bias_data; for (int i=0; itops[0]; innerproduct->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; dropout->type = "ncnnfused"; } return 0;}

1-1-4 fuse_convolution_activation

fuse_convolution_activation

int NetOptimize::fuse_convolution_activation(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Convolution") continue; // Convolution - Activation int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse Convolution - Activation to Convolution ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i]; ncnn::Layer* activation = layers[j]; fprintf(stderr, "fuse_convolution_activation %s %s\n", convolution->name.c_str(), activation->name.c_str()); if (activation->type == "ReLU") { ncnn::ReLU* relu = (ncnn::ReLU*)activation; if (relu->slope == 0.f) { convolution->activation_type = 1; } else { convolution->activation_type = 2; convolution->activation_params = ncnn::Mat(1); convolution->activation_params[0] = relu->slope; } } else if (activation->type == "Clip") { ncnn::Clip* clip = (ncnn::Clip*)activation; convolution->activation_type = 3; convolution->activation_params = ncnn::Mat(2); convolution->activation_params[0] = clip->min; convolution->activation_params[1] = clip->max; } else if (activation->type == "Sigmoid") { convolution->activation_type = 4; } int top_blob_index_final = activation->tops[0]; convolution->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; activation->type = "ncnnfused"; } return 0;}

fuse_convolutiondepthwise_activation

int NetOptimize::fuse_convolutiondepthwise_activation(){ const size_t layer_count = layers.size(); for (int i=0; itype != "ConvolutionDepthWise") continue; // ConvolutionDepthWise - Activation int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse ConvolutionDepthWise - Activation to ConvolutionDepthWise ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i]; ncnn::Layer* activation = layers[j]; fprintf(stderr, "fuse_convolutiondepthwise_activation %s %s\n", convolutiondepthwise->name.c_str(), activation->name.c_str()); if (activation->type == "ReLU") { ncnn::ReLU* relu = (ncnn::ReLU*)activation; if (relu->slope == 0.f) { convolutiondepthwise->activation_type = 1; } else { convolutiondepthwise->activation_type = 2; convolutiondepthwise->activation_params = ncnn::Mat(1); convolutiondepthwise->activation_params[0] = relu->slope; } } else if (activation->type == "Clip") { ncnn::Clip* clip = (ncnn::Clip*)activation; convolutiondepthwise->activation_type = 3; convolutiondepthwise->activation_params = ncnn::Mat(2); convolutiondepthwise->activation_params[0] = clip->min; convolutiondepthwise->activation_params[1] = clip->max; } else if (activation->type == "Sigmoid") { convolutiondepthwise->activation_type = 4; } int top_blob_index_final = activation->tops[0]; convolutiondepthwise->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; activation->type = "ncnnfused"; } return 0;}

fuse_deconvolution_activation

int NetOptimize::fuse_deconvolution_activation(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Deconvolution") continue; // Deconvolution - Activation int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse Deconvolution - Activation to Deconvolution ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i]; ncnn::Layer* activation = layers[j]; fprintf(stderr, "fuse_deconvolution_activation %s %s\n", deconvolution->name.c_str(), activation->name.c_str()); if (activation->type == "ReLU") { ncnn::ReLU* relu = (ncnn::ReLU*)activation; if (relu->slope == 0.f) { deconvolution->activation_type = 1; } else { deconvolution->activation_type = 2; deconvolution->activation_params = ncnn::Mat(1); deconvolution->activation_params[0] = relu->slope; } } else if (activation->type == "Clip") { ncnn::Clip* clip = (ncnn::Clip*)activation; deconvolution->activation_type = 3; deconvolution->activation_params = ncnn::Mat(2); deconvolution->activation_params[0] = clip->min; deconvolution->activation_params[1] = clip->max; } else if (activation->type == "Sigmoid") { deconvolution->activation_type = 4; } int top_blob_index_final = activation->tops[0]; deconvolution->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; activation->type = "ncnnfused"; } return 0;}

fuse_deconvolutiondepthwise_activation

int NetOptimize::fuse_deconvolutiondepthwise_activation(){ const size_t layer_count = layers.size(); for (int i=0; itype != "DeconvolutionDepthWise") continue; // DeconvolutionDepthWise - Activation int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse DeconvolutionDepthWise - Activation to DeconvolutionDepthWise ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i]; ncnn::Layer* activation = layers[j]; fprintf(stderr, "fuse_deconvolutiondepthwise_activation %s %s\n", deconvolutiondepthwise->name.c_str(), activation->name.c_str()); if (activation->type == "ReLU") { ncnn::ReLU* relu = (ncnn::ReLU*)activation; if (relu->slope == 0.f) { deconvolutiondepthwise->activation_type = 1; } else { deconvolutiondepthwise->activation_type = 2; deconvolutiondepthwise->activation_params = ncnn::Mat(1); deconvolutiondepthwise->activation_params[0] = relu->slope; } } else if (activation->type == "Clip") { ncnn::Clip* clip = (ncnn::Clip*)activation; deconvolutiondepthwise->activation_type = 3; deconvolutiondepthwise->activation_params = ncnn::Mat(2); deconvolutiondepthwise->activation_params[0] = clip->min; deconvolutiondepthwise->activation_params[1] = clip->max; } else if (activation->type == "Sigmoid") { deconvolutiondepthwise->activation_type = 4; } int top_blob_index_final = activation->tops[0]; deconvolutiondepthwise->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; activation->type = "ncnnfused"; } return 0;}

fuse_innerproduct_activation

int NetOptimize::fuse_innerproduct_activation(){ const size_t layer_count = layers.size(); for (int i=0; itype != "InnerProduct") continue; // InnerProduct - Activation int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; // fuse InnerProduct - Activation to InnerProduct ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i]; ncnn::Layer* activation = layers[j]; fprintf(stderr, "fuse_innerproduct_activation %s %s\n", innerproduct->name.c_str(), activation->name.c_str()); if (activation->type == "ReLU") { ncnn::ReLU* relu = (ncnn::ReLU*)activation; if (relu->slope == 0.f) { innerproduct->activation_type = 1; } else { innerproduct->activation_type = 2; innerproduct->activation_params = ncnn::Mat(1); innerproduct->activation_params[0] = relu->slope; } } else if (activation->type == "Clip") { ncnn::Clip* clip = (ncnn::Clip*)activation; innerproduct->activation_type = 3; innerproduct->activation_params = ncnn::Mat(2); innerproduct->activation_params[0] = clip->min; innerproduct->activation_params[1] = clip->max; } else if (activation->type == "Sigmoid") { innerproduct->activation_type = 4; } int top_blob_index_final = activation->tops[0]; innerproduct->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; activation->type = "ncnnfused"; } return 0;}

1-2 eliminate

1-2-1 eliminate_dropout

int NetOptimize::eliminate_dropout(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Dropout") continue; ncnn::Dropout* dropout = (ncnn::Dropout*)layers[i]; if (dropout->scale != 1.f) continue; // Any - Dropout int bottom_blob_index = layers[i]->bottoms[0]; int j = i - 1; for (; j>=0; j--) { if (layers[j]->type == "ncnnfused") continue; if (layers[j]->tops.size() != 1) continue; if (layers[j]->tops[0] == bottom_blob_index) break; } if (j == -1) continue; ncnn::Layer* any = layers[j]; fprintf(stderr, "eliminate_dropout %s %s\n", any->name.c_str(), dropout->name.c_str()); int top_blob_index_final = dropout->tops[0]; any->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = j; dropout->type = "ncnnfused"; } return 0;}

1-2-2 eliminate_pooling1x1

int NetOptimize::eliminate_pooling1x1(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Pooling") continue; ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i]; if (pooling->pad_left != 0 || pooling->pad_right != 0 || pooling->pad_top != 0 || pooling->pad_bottom != 0) continue; if (pooling->kernel_w != 1 || pooling->kernel_h != 1 || pooling->stride_w != 1 || pooling->stride_h != 1) continue; if (pooling->global_pooling != 0) continue; // Any - Pooling int bottom_blob_index = layers[i]->bottoms[0]; int top_i = -1; int j = i - 1; for (; j>=0; j--) { if (layers[j]->type == "ncnnfused") continue; for (int k=0; ktops.size(); k++) { if (layers[j]->tops[k] == bottom_blob_index) { top_i = k; break; } } if (top_i != -1) break; } if (j == -1) continue; ncnn::Layer* any = layers[j]; fprintf(stderr, "eliminate_pooling1x1 %s %s\n", any->name.c_str(), pooling->name.c_str()); int top_blob_index_final = pooling->tops[0]; any->tops[top_i] = top_blob_index_final; blobs[top_blob_index_final].producer = j; pooling->type = "ncnnfused"; } return 0;}

1-2-3 eliminate_noop

int NetOptimize::eliminate_noop(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Noop") continue; ncnn::Layer* noop = layers[i]; if (noop->bottoms.empty()) { // Noop fprintf(stderr, "eliminate_noop %s\n", noop->name.c_str()); size_t top_blob_count = noop->tops.size(); for (int k=0; ktops[k]; blobs[top_blob_index_final].producer = -1; } noop->type = "ncnnfused"; continue; } // Any - Noop int bottom_blob_index = layers[i]->bottoms[0]; int j = i - 1; for (; j>=0; j--) { if (layers[j]->type == "ncnnfused") continue; if (layers[j]->tops.size() != 1) continue; if (layers[j]->tops[0] == bottom_blob_index) break; } if (j == -1) continue; ncnn::Layer* any = layers[j]; fprintf(stderr, "eliminate_noop %s %s\n", any->name.c_str(), noop->name.c_str()); size_t top_blob_count = std::min(noop->tops.size(), any->tops.size()); for (int k=0; ktops[k]; any->tops[k] = top_blob_index_final; blobs[top_blob_index_final].producer = j; } noop->type = "ncnnfused"; } return 0;}

1-2-4 eliminate_orphaned_memorydata

int NetOptimize::eliminate_orphaned_memorydata(){ const size_t layer_count = layers.size(); for (int i=0; itype != "MemoryData") continue; // MemoryData - X int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype == "ncnnfused") continue; bool orphaned = true; for (int k=0; kbottoms.size(); k++) { if (layers[j]->bottoms[k] == top_blob_index) { orphaned = false; break; } } if (!orphaned) break; } if (j < layer_count) continue; // assert orphaned == true fprintf(stderr, "eliminate_orphaned_memorydata %s\n", layers[i]->name.c_str()); layers[i]->type = "ncnnfused"; } return 0;}

1-2-5 eliminate_reshape_after_global_pooling

int NetOptimize::eliminate_reshape_after_global_pooling(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Pooling") continue; ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i]; if (pooling->global_pooling == 0) continue; // Pooling - Reshape int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "Reshape") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; ncnn::Reshape* reshape = (ncnn::Reshape*)layers[j]; if (reshape->h != -233 || reshape->c != -233 || reshape->permute != 0) continue; fprintf(stderr, "eliminate_reshape_after_global_pooling %s %s\n", pooling->name.c_str(), reshape->name.c_str()); int top_blob_index_final = reshape->tops[0]; pooling->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; reshape->type = "ncnnfused"; } return 0;}

1-2-6 eliminate_flatten_after_global_pooling

int NetOptimize::eliminate_flatten_after_global_pooling(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Pooling") continue; ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i]; if (pooling->global_pooling == 0) continue; // Pooling - Flatten int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "Flatten") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; ncnn::Flatten* flatten = (ncnn::Flatten*)layers[j]; fprintf(stderr, "eliminate_flatten_after_global_pooling %s %s\n", pooling->name.c_str(), flatten->name.c_str()); int top_blob_index_final = flatten->tops[0]; pooling->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; flatten->type = "ncnnfused"; } return 0;}

1-2-7 eliminate_flatten_after_innerproduct

int NetOptimize::eliminate_flatten_after_innerproduct(){ const size_t layer_count = layers.size(); for (int i=0; itype != "InnerProduct") continue; // InnerProduct - Flatten int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "Flatten") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i]; ncnn::Flatten* flatten = (ncnn::Flatten*)layers[j]; fprintf(stderr, "eliminate_flatten_after_innerproduct %s %s\n", innerproduct->name.c_str(), flatten->name.c_str()); int top_blob_index_final = flatten->tops[0]; innerproduct->tops[0] = top_blob_index_final; blobs[top_blob_index_final].producer = i; flatten->type = "ncnnfused"; } return 0;}

1-2-8 eliminate_reshape_before_binaryop

int NetOptimize::eliminate_reshape_before_binaryop(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Reshape") continue; ncnn::Reshape* reshape = (ncnn::Reshape*)layers[i]; if (reshape->w != 1 || reshape->h != 1 || reshape->permute != 0) continue; // Reshape - BinaryOp int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "BinaryOp") continue; if (layers[j]->bottoms.size() != 2) continue; if (layers[j]->bottoms[0] == top_blob_index || layers[j]->bottoms[1] == top_blob_index) break; } if (j == layer_count) continue; ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j]; fprintf(stderr, "eliminate_reshape_before_binaryop %s %s\n", reshape->name.c_str(), binaryop->name.c_str()); int bottom_blob_index_final = reshape->bottoms[0]; if (layers[j]->bottoms[0] == top_blob_index) binaryop->bottoms[0] = bottom_blob_index_final; if (layers[j]->bottoms[1] == top_blob_index) binaryop->bottoms[1] = bottom_blob_index_final; blobs[bottom_blob_index_final].consumers.erase(std::find(blobs[bottom_blob_index_final].consumers.begin(), blobs[bottom_blob_index_final].consumers.end(), i)); blobs[bottom_blob_index_final].consumers.push_back(j); reshape->type = "ncnnfused"; } return 0;}

1-3 replace

1-3-1 replace_convolution_with_innerproduct_after_global_pooling

int NetOptimize::replace_convolution_with_innerproduct_after_global_pooling(){ const size_t layer_count = layers.size(); for (int i=0; itype != "Pooling") continue; ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i]; if (pooling->global_pooling == 0) continue; // Pooling - Convolution int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "Convolution") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; ncnn::Convolution* convolution = (ncnn::Convolution*)layers[j]; fprintf(stderr, "replace_convolution_with_innerproduct_after_global_pooling %s %s\n", pooling->name.c_str(), convolution->name.c_str()); ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)ncnn::create_layer("InnerProduct"); innerproduct->type = "InnerProduct"; innerproduct->name = convolution->name; innerproduct->bottoms = convolution->bottoms; innerproduct->tops = convolution->tops; ncnn::ParamDict pd; innerproduct->load_param(pd); innerproduct->num_output = convolution->num_output; innerproduct->bias_term = convolution->bias_term; innerproduct->weight_data_size = convolution->weight_data_size; innerproduct->weight_data = convolution->weight_data; innerproduct->bias_data = convolution->bias_data; innerproduct->activation_type = convolution->activation_type; innerproduct->activation_params = convolution->activation_params; layers[j] = innerproduct; delete convolution; } return 0;}

1-3-2 replace_convolution_with_innerproduct_after_innerproduct

int NetOptimize::replace_convolution_with_innerproduct_after_innerproduct(){ const size_t layer_count = layers.size(); for (;;) { bool replaced = false; for (int i=0; itype != "InnerProduct") continue; // InnerProduct - Convolution int top_blob_index = layers[i]->tops[0]; int j = i + 1; for (; jtype != "Convolution") continue; if (layers[j]->bottoms.size() != 1) continue; if (layers[j]->bottoms[0] == top_blob_index) break; } if (j == layer_count) continue; ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i]; ncnn::Convolution* convolution = (ncnn::Convolution*)layers[j]; fprintf(stderr, "replace_convolution_with_innerproduct_after_innerproduct %s %s\n", innerproduct->name.c_str(), convolution->name.c_str()); ncnn::InnerProduct* innerproduct2 = (ncnn::InnerProduct*)ncnn::create_layer("InnerProduct"); innerproduct2->type = "InnerProduct"; innerproduct2->name = convolution->name; innerproduct2->bottoms = convolution->bottoms; innerproduct2->tops = convolution->tops; ncnn::ParamDict pd; innerproduct2->load_param(pd); innerproduct2->num_output = convolution->num_output; innerproduct2->bias_term = convolution->bias_term; innerproduct2->weight_data_size = convolution->weight_data_size; innerproduct2->weight_data = convolution->weight_data; innerproduct2->bias_data = convolution->bias_data; innerproduct2->activation_type = convolution->activation_type; innerproduct2->activation_params = convolution->activation_params; layers[j] = innerproduct2; delete convolution; replaced = true; } if (!replaced) break; } return 0;}

2 使用

./ncnnoptimize ncnn.param ncnn.bin new.param new.bin flagflag指的是fp32和fp16，其中0指的的是fp32，1指的是fp16

参考资料 1 ncnn 2 NCNN Conv量化详解（一） 3 NCNN量化详解（二） https://zhuanlan.zhihu.com/p/72375164

mysql连接测试不成功的原因有哪些

338 2022-09-04

ncnn之六：ncnn量化(post-training quantization)三部曲 - ncnnoptimize

mysql连接测试不成功的原因有哪些

linux怎么安装无线网卡驱动

java应用程序启动失败怎么解决

推荐文章

api接口有哪几种分类及功能

什么是API接口?API接口简单介绍

短信API接口概述，短信API接口的优势

7款快递物流的物流查询API工具，物流快递查询API接口怎么对接？

企业四要素: 了解企业经营成功的关键

什么是语音验证码?,语音验证码平台有哪些

全国工商查询系统怎么查企业名录

哪些平台提供实名认证的接口？

PHP如何调用API接口?

如何使用百度天气预报API接口?

最近发表

热评文章

数据接口api（数据接口API开发平台）

数据开放接口api（数据服务api开发）

Python爬虫教程：爬取酷狗音乐（python爬取

hbuilder怎么更改字体大小和颜色

直播平台api接口 - 构建卓越的直播平台

实时股票数据api接口（股票实时行情api接口）