Caffe源码-Net类(下)

net.cpp部分源码

// 接着上一篇博客的介绍,此部分为Net类中前向反向计算函数,以及一些与HDF5文件或proto文件相互转换的函数。

template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {   //执行第start层到第end层的前向计算过程
  CHECK_GE(start, 0);     //检查start >= 0, end < 总层数
  CHECK_LT(end, layers_.size());
  Dtype loss = 0;         //存储每层前向计算的loss之和
  for (int i = start; i <= end; ++i) {
    for (int c = 0; c < before_forward_.size(); ++c) {
      before_forward_[c]->run(i);   //在调用前向计算之前调用的回调函数
    }
    Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]); //调用Layer类的前向计算函数,返回计算出的loss
    loss += layer_loss;   //累加
    if (debug_info_) { ForwardDebugInfo(i); }   //允许打印调试信息,则打印输出blob和参数blob的数据的均值信息
    for (int c = 0; c < after_forward_.size(); ++c) {
      after_forward_[c]->run(i);    //同样,在调用前向计算之后调用的回调函数
    }
  }
  return loss;
}

template <typename Dtype>
Dtype Net<Dtype>::ForwardFrom(int start) {    //执行第start层到末尾层的前向计算过程
  return ForwardFromTo(start, layers_.size() - 1);
}

template <typename Dtype>
Dtype Net<Dtype>::ForwardTo(int end) {        //执行起始层到第end层的前向计算过程
  return ForwardFromTo(0, end);
}

template <typename Dtype>
const vector<Blob<Dtype>*>& Net<Dtype>::Forward(Dtype* loss) {    //执行网络的所有层的前向计算过程
  if (loss != NULL) {
    *loss = ForwardFromTo(0, layers_.size() - 1);   //返回的loss保存起来
  } else {
    ForwardFromTo(0, layers_.size() - 1);
  }
  return net_output_blobs_;   //输出网络的输出blob数据
}

template <typename Dtype>
const vector<Blob<Dtype>*>& Net<Dtype>::Forward(
    const vector<Blob<Dtype>*> & bottom, Dtype* loss) {   //旧版本函数,将bottom当作网络的输入,执行整个网络的前向计算过程
  LOG_EVERY_N(WARNING, 1000) << "DEPRECATED: Forward(bottom, loss) "
      << "will be removed in a future version. Use Forward(loss).";
  // Copy bottom to net bottoms
  for (int i = 0; i < bottom.size(); ++i) {
    net_input_blobs_[i]->CopyFrom(*bottom[i]);    //bottom中的数据拷贝至网络的输入blob中
  }
  return Forward(loss);   //前向计算
}

template <typename Dtype>
void Net<Dtype>::BackwardFromTo(int start, int end) {   //执行第start层到第end层的反向计算过程
  CHECK_GE(end, 0);
  CHECK_LT(start, layers_.size());    //检查输入参数,从后往前,start不超过末尾层,end不小于起始层
  for (int i = start; i >= end; --i) {
    for (int c = 0; c < before_backward_.size(); ++c) {
      before_backward_[c]->run(i);    //反向计算之前调用的回调函数
    }
    if (layer_need_backward_[i]) {    //当前层是否需要反向计算
      layers_[i]->Backward(top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);  //反向计算
      if (debug_info_) { BackwardDebugInfo(i); }    //打印调试信息
    }
    for (int c = 0; c < after_backward_.size(); ++c) {
      after_backward_[c]->run(i);     //反向计算之后调用的回调函数
    }
  }
}

template <typename Dtype>
void Net<Dtype>::ForwardDebugInfo(const int layer_id) {       //打印第layer_id层的前向计算结果的调试信息
  for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
    const Blob<Dtype>& blob = *top_vecs_[layer_id][top_id];   //第layer_id层的第top_id个输出blob
    const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];    //该blob对应的名称
    const Dtype data_abs_val_mean = blob.asum_data() / blob.count();  //计算blob中data_的绝对值之和的均值
    LOG_IF(INFO, Caffe::root_solver())
        << "    [Forward] "
        << "Layer " << layer_names_[layer_id]
        << ", top blob " << blob_name
        << " data: " << data_abs_val_mean;    //打印信息
  }
  for (int param_id = 0; param_id < layers_[layer_id]->blobs().size(); ++param_id) {
    const Blob<Dtype>& blob = *layers_[layer_id]->blobs()[param_id];  //第layer_id层的第param_id个参数blob
    const int net_param_id = param_id_vecs_[layer_id][param_id];      //该参数blob在params_中索引
    const string& blob_name = param_display_names_[net_param_id];     //由索引得到参数blob的名称
    const Dtype data_abs_val_mean = blob.asum_data() / blob.count();  //参数blob的data_的绝对值之和的均值
    LOG_IF(INFO, Caffe::root_solver())
        << "    [Forward] "
        << "Layer " << layer_names_[layer_id]
        << ", param blob " << blob_name
        << " data: " << data_abs_val_mean;    //打印均值信息
  }
}

template <typename Dtype>
void Net<Dtype>::BackwardDebugInfo(const int layer_id) {      //打印第layer_id层的反向计算结果的调试信息
  const vector<Blob<Dtype>*>& bottom_vec = bottom_vecs_[layer_id];
  for (int bottom_id = 0; bottom_id < bottom_vec.size(); ++bottom_id) { //第layer_id层的第bottom_id个输入blob
    if (!bottom_need_backward_[layer_id][bottom_id]) { continue; }      //不处理不需要反传的输入
    const Blob<Dtype>& blob = *bottom_vec[bottom_id];         //输入blob数据
    const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];  //输入blob名称
    const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();  //输入blob的diff_的绝对值之和的均值
    LOG_IF(INFO, Caffe::root_solver())
        << "    [Backward] "
        << "Layer " << layer_names_[layer_id]
        << ", bottom blob " << blob_name
        << " diff: " << diff_abs_val_mean;    //打印
  }
  for (int param_id = 0; param_id < layers_[layer_id]->blobs().size(); ++param_id) {
    if (!layers_[layer_id]->param_propagate_down(param_id)) { continue; }   //略过无需反传的
    const Blob<Dtype>& blob = *layers_[layer_id]->blobs()[param_id];    //第layer_id层的第param_id个参数blob
    const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();    //参数blob的diff_的绝对值之和的均值
    LOG_IF(INFO, Caffe::root_solver())
        << "    [Backward] "
        << "Layer " << layer_names_[layer_id]
        << ", param blob " << param_id
        << " diff: " << diff_abs_val_mean;    //打印
  }
}

template <typename Dtype>
void Net<Dtype>::UpdateDebugInfo(const int param_id) {    //打印net中第param_id个参数的一些信息
  const Blob<Dtype>& blob = *params_[param_id];           //net中第param_id个参数blob
  const int param_owner = param_owners_[param_id];        //net中第param_id个参数的来源的索引(源参数的索引)
  const string& layer_name = layer_names_[param_layer_indices_[param_id].first];  //第param_id个参数所在层的名称
  const string& param_display_name = param_display_names_[param_id];    //该参数的名称
  const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();      //该参数的diff_的绝对值之和的均值
  if (param_owner < 0) {    //param_owner为-1表示该参数为源参数
    const Dtype data_abs_val_mean = blob.asum_data() / blob.count();  //计算data_的绝对值之和的均值
    LOG_IF(INFO, Caffe::root_solver())
        << "    [Update] Layer " << layer_name
        << ", param " << param_display_name
        << " data: " << data_abs_val_mean
        << "; diff: " << diff_abs_val_mean;   //打印
  } else {  //该参数为共享参数
    const string& owner_layer_name =
        layer_names_[param_layer_indices_[param_owner].first];  //该共享参数对应的源参数的名称
    LOG_IF(INFO, Caffe::root_solver())
        << "    [Update] Layer " << layer_name
        << ", param blob " << param_display_name
        << " (owned by layer " << owner_layer_name << ", " << "param "
        << param_display_names_[param_owners_[param_id]] << ")"
        << " diff: " << diff_abs_val_mean;    //打印
  }
}

//将other网络的layer的参数blob数据共享给当前网络中的相同名称的layer(只修改指针指向的位置,并不会复制数据)
template <typename Dtype>
void Net<Dtype>::ShareTrainedLayersWith(const Net* other) {
  int num_source_layers = other->layers().size();   //来源网络的layer的个数
  for (int i = 0; i < num_source_layers; ++i) {
    Layer<Dtype>* source_layer = other->layers()[i].get();      //来源网络的第i个layer的指针
    const string& source_layer_name = other->layer_names()[i];  //来源网络第i个layer的名称
    int target_layer_id = 0;
    while (target_layer_id != layer_names_.size() &&
        layer_names_[target_layer_id] != source_layer_name) {   //在当前网络中寻找与来源网络第i个layer名称相同的layer
      ++target_layer_id;
    }
    if (target_layer_id == layer_names_.size()) {   //未找到名称相同的,该layer的参数不进行共享
      LOG(INFO) << "Ignoring source layer " << source_layer_name;
      continue;
    }
    //当前网络中第target_layer_id个layer与other网络的第i个layer的名称相同,开始共享参数
    DLOG(INFO) << "Copying source layer " << source_layer_name;
    vector<shared_ptr<Blob<Dtype> > >& target_blobs = layers_[target_layer_id]->blobs();  //当前层的blob数据
    CHECK_EQ(target_blobs.size(), source_layer->blobs().size())
        << "Incompatible number of blobs for layer " << source_layer_name;    //检查两个layer的参数blob是否相等
    for (int j = 0; j < target_blobs.size(); ++j) {
      Blob<Dtype>* source_blob = source_layer->blobs()[j].get();    //other的第i层的第j个参数blob
      CHECK(target_blobs[j]->shape() == source_blob->shape())
          << "Cannot share param " << j << " weights from layer '"
          << source_layer_name << "'; shape mismatch.  Source param shape is "
          << source_blob->shape_string() << "; target param shape is "
          << target_blobs[j]->shape_string();     //检查两个参数blob的形状是否相同
      target_blobs[j]->ShareData(*source_blob);   //共享数据,将target_blobs[j]的data_的数据指针指向source_blob中的data_
    }
  }
}

template <typename Dtype>
void Net<Dtype>::BackwardFrom(int start) {    //执行第start层到起始层的反向计算过程
  BackwardFromTo(start, 0);
}

template <typename Dtype>
void Net<Dtype>::BackwardTo(int end) {        //执行末尾层到第end层的反向计算过程
  BackwardFromTo(layers_.size() - 1, end);
}

template <typename Dtype>
void Net<Dtype>::Backward() {     //执行网络的所有层的反向计算过程
  BackwardFromTo(layers_.size() - 1, 0);    //整个网络
  if (debug_info_) {              //打印调试信息
    Dtype asum_data = 0, asum_diff = 0, sumsq_data = 0, sumsq_diff = 0;
    for (int i = 0; i < learnable_params_.size(); ++i) {  //net中的所有可学习参数
      asum_data += learnable_params_[i]->asum_data();     //参数blob的data_的绝对值之和
      asum_diff += learnable_params_[i]->asum_diff();     //参数blob的diff_的绝对值之和
      sumsq_data += learnable_params_[i]->sumsq_data();   //参数blob的data_的平方和
      sumsq_diff += learnable_params_[i]->sumsq_diff();   //参数blob的diff_的平方和
    }
    const Dtype l2norm_data = std::sqrt(sumsq_data);      //开方
    const Dtype l2norm_diff = std::sqrt(sumsq_diff);
    LOG(ERROR) << "    [Backward] All net params (data, diff): "
               << "L1 norm = (" << asum_data << ", " << asum_diff << "); "
               << "L2 norm = (" << l2norm_data << ", " << l2norm_diff << ")"; //打印
  }
}

template <typename Dtype>
void Net<Dtype>::Reshape() {    //调整网络中所有层的输入输出数据和参数数据等的形状
  for (int i = 0; i < layers_.size(); ++i) {
    layers_[i]->Reshape(bottom_vecs_[i], top_vecs_[i]);   //调用各层各自的reshape函数
  }
}

//从param中拷贝同名的layer的参数blob数据(与ShareTrainedLayersWith不同,该函数是复制数据而不是修改数据指针的位置)
template <typename Dtype>
void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
  int num_source_layers = param.layer_size();     //来源网络参数中layer的个数
  for (int i = 0; i < num_source_layers; ++i) {
    const LayerParameter& source_layer = param.layer(i);    //来源的第i个layer对应的LayerParameter
    const string& source_layer_name = source_layer.name();  //来源网络的layer的名称
    int target_layer_id = 0;
    while (target_layer_id != layer_names_.size() &&
        layer_names_[target_layer_id] != source_layer_name) { //同样,在当前net中寻找与之名称相同的layer
      ++target_layer_id;
    }
    if (target_layer_id == layer_names_.size()) {   //未找到,跳过该层
      LOG(INFO) << "Ignoring source layer " << source_layer_name;
      continue;
    }
    //当前net中第target_layer_id个layer与param的第i个对应
    DLOG(INFO) << "Copying source layer " << source_layer_name;
    vector<shared_ptr<Blob<Dtype> > >& target_blobs =
        layers_[target_layer_id]->blobs();  //当前net中第target_layer_id个layer的参数blob
    CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
        << "Incompatible number of blobs for layer " << source_layer_name;    //检查参数个数是否相等
    for (int j = 0; j < target_blobs.size(); ++j) {
      if (!target_blobs[j]->ShapeEquals(source_layer.blobs(j))) {   //检查参数blob与param中的BlobProto消息的数据形状是否一致
        Blob<Dtype> source_blob;
        const bool kReshape = true;
        //不一致,则将source_layer的BlobProto消息的数据拷贝至source_blob中,然后报错
        source_blob.FromProto(source_layer.blobs(j), kReshape);
        LOG(FATAL) << "Cannot copy param " << j << " weights from layer '"
            << source_layer_name << "'; shape mismatch.  Source param shape is "
            << source_blob.shape_string() << "; target param shape is "
            << target_blobs[j]->shape_string() << ". "
            << "To learn this layer's parameters from scratch rather than "
            << "copying from a saved net, rename the layer.";   //报错
      }
      const bool kReshape = false;
      //将source_layer中第j个BlobProto消息数据拷贝至target_blobs[j]中
      target_blobs[j]->FromProto(source_layer.blobs(j), kReshape);
    }
  }
}

template <typename Dtype>
void Net<Dtype>::CopyTrainedLayersFrom(const string& trained_filename) {  //从文件trained_filename中拷贝网络参数
  if (H5Fis_hdf5(trained_filename.c_str())) {       //判断文件是否未hdf5格式
    CopyTrainedLayersFromHDF5(trained_filename);    //从hdf5类型的文件中拷贝网络参数
  } else {
    CopyTrainedLayersFromBinaryProto(trained_filename);   //从二进制的proto文件中读取网络的参数
  }
}

template <typename Dtype>
void Net<Dtype>::CopyTrainedLayersFromBinaryProto(const string& trained_filename) { //从二进制的proto文件中读取网络的参数
  NetParameter param;
  ReadNetParamsFromBinaryFileOrDie(trained_filename, &param); //从trained_filename文件中读取,存入param中
  CopyTrainedLayersFrom(param);   //将参数拷贝至当前网络中
}

template <typename Dtype>
void Net<Dtype>::CopyTrainedLayersFromHDF5(const string& trained_filename) {    //从hdf5类型的文件中拷贝网络参数
#ifdef USE_HDF5
  hid_t file_hid = H5Fopen(trained_filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);  //打开hdf5类型的文件
  CHECK_GE(file_hid, 0) << "Couldn't open " << trained_filename;    //检查是否打开
  hid_t data_hid = H5Gopen2(file_hid, "data", H5P_DEFAULT);         //打开file_hid中的"data"数据集
  CHECK_GE(data_hid, 0) << "Error reading weights from " << trained_filename;   //检查是否打开
  int num_layers = hdf5_get_num_links(data_hid);    //获取data_hid中links(元素?)的个数,此处即为layer的个数
  for (int i = 0; i < num_layers; ++i) {
    string source_layer_name = hdf5_get_name_by_idx(data_hid, i);   //data_hid中第i个link的名称
    if (!layer_names_index_.count(source_layer_name)) {             //在当前net中寻找同名的layer
      LOG(INFO) << "Ignoring source layer " << source_layer_name;   //未找到,跳过
      continue;
    }
    int target_layer_id = layer_names_index_[source_layer_name];    //得到该layer在当前网络中的索引
    DLOG(INFO) << "Copying source layer " << source_layer_name;
    vector<shared_ptr<Blob<Dtype> > >& target_blobs = layers_[target_layer_id]->blobs();  //当前网络中该layer的参数blob
    hid_t layer_hid = H5Gopen2(data_hid,
        source_layer_name.c_str(), H5P_DEFAULT); //打开data_hid中名为source_layer_name的数据集
    CHECK_GE(layer_hid, 0) << "Error reading weights from " << trained_filename;  //检查是否打开
    // Check that source layer doesn't have more params than target layer
    int num_source_params = hdf5_get_num_links(layer_hid);    //返回layer_hid中links的个数,即为参数的个数
    CHECK_LE(num_source_params, target_blobs.size())
        << "Incompatible number of blobs for layer " << source_layer_name;  //检查是否与参数blob的个数相等
    for (int j = 0; j < target_blobs.size(); ++j) {   //拷贝每个参数blob
      ostringstream oss;
      oss << j;
      string dataset_name = oss.str();
      int target_net_param_id = param_id_vecs_[target_layer_id][j]; //第target_layer_id层第j个参数在params_中的索引
      if (!H5Lexists(layer_hid, dataset_name.c_str(), H5P_DEFAULT)) { //layer_hid中是否存在名为dataset_name的数据集
        // Target param doesn't exist in source weights...
        if (param_owners_[target_net_param_id] != -1) {   //hdf5文件中不存在该参数,但是该参数为共享参数,所以不影响
          // ...but it's weight-shared in target, so that's fine.
          continue;
        } else {
          LOG(FATAL) << "Incompatible number of blobs for layer "
              << source_layer_name;   //源参数,但是在hdf5文件中不存在,报错
        }
      }
      hdf5_load_nd_dataset(layer_hid, dataset_name.c_str(), 0, kMaxBlobAxes,
          target_blobs[j].get()); //读取dataset_name数据集,检查其形状和数据类型,并存入target_blobs[j]的cpu data中
    }
    H5Gclose(layer_hid);    //关闭
  }
  H5Gclose(data_hid);
  H5Fclose(file_hid);
#else
  LOG(FATAL) << "CopyTrainedLayersFromHDF5 requires hdf5;"
             << " compile with USE_HDF5.";
#endif  // USE_HDF5
}

//将当前net的所有LayerParameter拷贝至NetParameter类型的变量中
template <typename Dtype>
void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) const {
  param->Clear();           //先清空所有数据
  param->set_name(name_);   //当前net的名称存入param中
  // Add bottom and top
  DLOG(INFO) << "Serializing " << layers_.size() << " layers";
  for (int i = 0; i < layers_.size(); ++i) {    //处理每一层
    LayerParameter* layer_param = param->add_layer();   //在param中添加新的LayerParameter消息,返回其指针
    layers_[i]->ToProto(layer_param, write_diff);       //将当前net中的layer数据拷贝至layer_param消息中
  }
}

//将当前net的所有LayerParameter写入到hdf5类型的filename文件中
template <typename Dtype>
void Net<Dtype>::ToHDF5(const string& filename, bool write_diff) const {
// This code is taken from https://github.com/sh1r0/caffe-android-lib
#ifdef USE_HDF5
  hid_t file_hid = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);  //创建hdf5文件
  CHECK_GE(file_hid, 0) << "Couldn't open " << filename << " to save weights.";
  hid_t data_hid = H5Gcreate2(file_hid, "data", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);   //创建"data"数据集
  CHECK_GE(data_hid, 0) << "Error saving weights to " << filename << ".";
  hid_t diff_hid = -1;
  if (write_diff) {
    diff_hid = H5Gcreate2(file_hid, "diff", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); //如果需要存梯度信息,则再创建"diff"数据集
    CHECK_GE(diff_hid, 0) << "Error saving weights to " << filename << ".";
  }
  for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {   //处理每一层
    const LayerParameter& layer_param = layers_[layer_id]->layer_param(); //当前网络的第layer_id层的layer参数
    string layer_name = layer_param.name();         //layer的名称
    hid_t layer_data_hid = H5Gcreate2(data_hid, layer_name.c_str(),
        H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);     //使用该名称在"data"中创建数据集
    CHECK_GE(layer_data_hid, 0) << "Error saving weights to " << filename << ".";
    hid_t layer_diff_hid = -1;
    if (write_diff) {
      layer_diff_hid = H5Gcreate2(diff_hid, layer_name.c_str(),
          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);   //需要写梯度,则再在"diff"中创建数据集
      CHECK_GE(layer_diff_hid, 0) << "Error saving weights to " << filename << ".";
    }
    int num_params = layers_[layer_id]->blobs().size();   //该layer中参数blob的个数
    for (int param_id = 0; param_id < num_params; ++param_id) {
      ostringstream dataset_name;
      dataset_name << param_id;
      const int net_param_id = param_id_vecs_[layer_id][param_id];  //第layer_id层第param_id个参数在params_的索引
      if (param_owners_[net_param_id] == -1) {
        // Only save params that own themselves
        //源参数,则在layer_data_hid中创建dataset_name数据集,并将参数blob的data_数据写入其中
        hdf5_save_nd_dataset<Dtype>(layer_data_hid, dataset_name.str(), *params_[net_param_id]);
      }
      if (write_diff) {
        // Write diffs regardless of weight-sharing
        //在layer_diff_hid中创建dataset_name数据集,并将参数blob的diff_数据写入其中
        hdf5_save_nd_dataset<Dtype>(layer_diff_hid, dataset_name.str(),
            *params_[net_param_id], true);
      }
    }
    H5Gclose(layer_data_hid);   //关闭
    if (write_diff) {
      H5Gclose(layer_diff_hid);
    }
  }
  H5Gclose(data_hid);   //各种关闭
  if (write_diff) {
    H5Gclose(diff_hid);
  }
  H5Fclose(file_hid);
// This code is taken from https://github.com/sh1r0/caffe-android-lib
#else
  LOG(FATAL) << "ToHDF5 requires hdf5; compile with USE_HDF5.";
#endif  // USE_HDF5
}

template <typename Dtype>
void Net<Dtype>::Update() {             //网络更新,把网络中的所有可学习参数blob更新一次
  for (int i = 0; i < learnable_params_.size(); ++i) {
    learnable_params_[i]->Update();   //调用blob类的函数,data_ = Dtype(-1) * diff_ + data_
  }
}

template <typename Dtype>
void Net<Dtype>::ClearParamDiffs() {    //梯度清空,清空网络中所有可学习参数blob的diff_数据
  for (int i = 0; i < learnable_params_.size(); ++i) {    //处理每个可学习参数
    Blob<Dtype>* blob = learnable_params_[i];
    switch (Caffe::mode()) {    //当前caffe运行的模式
    case Caffe::CPU:
      caffe_set(blob->count(), static_cast<Dtype>(0), blob->mutable_cpu_diff());  //将blob中diff_在cpu上的数据全部置为0
      break;
    case Caffe::GPU:
#ifndef CPU_ONLY
      caffe_gpu_set(blob->count(), static_cast<Dtype>(0), blob->mutable_gpu_diff());  //将diff_在gpu上的数据全部置为0
#else
      NO_GPU;
#endif
      break;
    }
  }
}

template <typename Dtype>
void Net<Dtype>::ShareWeights() {   //layer间的参数共享,将所有共享参数的data_和diff_数据指针指向对应的源参数的对应数据
  for (int i = 0; i < params_.size(); ++i) {    //网络中的所有参数
    if (param_owners_[i] < 0) { continue; }     //源参数才为-1,不处理
    params_[i]->ShareData(*params_[param_owners_[i]]);  //共享参数,则将源参数的data_和diff_指针传给当前参数
    params_[i]->ShareDiff(*params_[param_owners_[i]]);
  }
}

template <typename Dtype>
bool Net<Dtype>::has_blob(const string& blob_name) const {      //判断网络中是否存在名为blob_name的输出blob
  return blob_names_index_.find(blob_name) != blob_names_index_.end();
}

template <typename Dtype>
const shared_ptr<Blob<Dtype> > Net<Dtype>::blob_by_name(    //返回名为blob_name的blob的数据指针
    const string& blob_name) const {
  shared_ptr<Blob<Dtype> > blob_ptr;
  if (has_blob(blob_name)) {    //网络中是否存在该名称的blob
    blob_ptr = blobs_[blob_names_index_.find(blob_name)->second]; //由名称确定在blobs_中的位置,返回索引
  } else {
    blob_ptr.reset((Blob<Dtype>*)(NULL));
    LOG(WARNING) << "Unknown blob name " << blob_name;    //不存在,警告
  }
  return blob_ptr;
}

template <typename Dtype>
bool Net<Dtype>::has_layer(const string& layer_name) const {    //判断网络中是否存在名为layer_name的层
  return layer_names_index_.find(layer_name) != layer_names_index_.end();
}

template <typename Dtype>
const shared_ptr<Layer<Dtype> > Net<Dtype>::layer_by_name(    //返回名为layer_name的layer的指针
    const string& layer_name) const {
  shared_ptr<Layer<Dtype> > layer_ptr;
  if (has_layer(layer_name)) {    //网络中是否存在该名称的layer
    layer_ptr = layers_[layer_names_index_.find(layer_name)->second]; //找到在layers_中的位置,返回其指针
  } else {
    layer_ptr.reset((Layer<Dtype>*)(NULL));
    LOG(WARNING) << "Unknown layer name " << layer_name;  //未找到,警告
  }
  return layer_ptr;
}

小结

  1. Protocol库中定义了两种序列化数据的格式,一种为文本类型(textual类型),caffe中 ".prototxt" 文件均是此类型,用户可以用来定义网络参数(如train_val.prototxt中设置的是NetParameter类型数据)和求解器参数(如solver.prototxt中设置的是SolverParameter类型数据)。另一种为二进制类型(binary类型),caffe中的模型快照文件".caffemodel"和求解器快照文件".solverstate"均是此类型,一般用来存储数据量较大且无需用户修改的数据,如网络中的所有blob类型的可学习参数等。

参考

https://confluence.hdfgroup.org/display/HDF5
https://developers.google.com/protocol-buffers
(链接打不开的话则需要某科学上网工具)
Caffe的源码笔者是第一次阅读,一边阅读一边记录,对代码的理解和分析可能会存在错误或遗漏,希望各位读者批评指正,谢谢支持!

原文地址:https://www.cnblogs.com/Relu110/p/12044371.html

时间: 2024-10-30 19:39:03

Caffe源码-Net类(下)的相关文章

Caffe源码-LossLayer类(下)

InfogainLossLayer类 EuclideanLossLayer类 HingeLossLayer类 ContrastiveLossLayer类 InfogainLossLayer类简介 InfogainLossLayer与SoftmaxWithLossLayer类似,只不过增加了一个信息增益矩阵\(H\),用于指定某真实类别的数据被预测为某一类别时的权重,常用于类间样本数不均衡的情况.当矩阵\(H\)为单位矩阵时,等同于SoftmaxWithLossLayer. 第一个输入blob为网

Caffe源码-Layer类

Layer类简介 Layer是caffe中搭建网络的基本单元,caffe代码中包含大量Layer基类派生出来的各种各样的层,各自通过虚函数 Forward() 和 Backward() 实现自己的功能. Forward() 函数用于前向计算过程,由 bottom blob 计算 top blob 和 loss ,实现数据由浅至深的传递.而 Backward() 函数用于反向传播过程,由 top blob 的计算 bottom blob 的梯度,将网络的预测误差向浅层网络传递,以便更新网络的参数.

Caffe源码-SGDSolver类

SGDSolver类简介 Solver类用于网络参数的更新,而SGDSolver类实现了优化方法中的随机梯度下降法(stochastic gradient descent),此外还具备缩放.正则化梯度等功能.caffe中其他的优化方法都是SGDSolver类的派生类,重载了基类的ComputeUpdateValue()函数,用于各自计算更新的梯度. sgd_solver.cpp源码 // Return the current learning rate. The currently implem

Caffe源码-Solver类

Solver类简介 Net类中实现了网络的前向/反向计算和参数更新,而Solver类中则是对此进行进一步封装,包含可用于逐次训练网络的Step()函数,和用于求解网络的优化解的Solve()函数,同时还实现了一些存储.读取网络模型快照的接口函数. solver.cpp源码 template<typename Dtype> void Solver<Dtype>::SetActionFunction(ActionCallback func) { action_request_funct

Caffe源码-DataTransformer类

DataTransformer类简介 DataTransformer类中主要用于图像预处理操作,layer中可设置TransformationParameter类型的消息来对输入图像进行减均值.随机镜像.随机裁剪或缩放.DataTransformer类中主要包含重载函数Transform(),可以对各种类型的图像数据进行预处理,并存入到Blob类型的数据中.类中还包含了以下变量. TransformationParameter param_; //预处理参数 shared_ptr<Caffe::

读caffe源码(未完待续)

caffe源码阅读杂记 准备 一些参考网页 Neural Networks and Deep Learning TUTORIAL ON DEEP LEARNING FOR VISION Deep Learning Tutorial 知乎-深度学习caffe的代码怎么读 Caffe源码解析 caffe源码结构 官方代码结构doxygen 官方Caffe Tutorial 以C++源码形式配置debug&CPU版的caffe,便于阅读源码与单步调试[参考] 参考官方的文档,先了解某个模块的作用 为了

分析caffe源码以及相应的Google c++ style

本人项目需分析caffe源码,并做一些相应的修改 1.caffe源码工程目录 src目录为整个工程的核心,它主要实现了神经网络的基本模型的组件:Blob.Layer.Net.Solver等核心类. include目录存放整个工程所有头文件. tools目录中存放了caffe的入口函数,用于参数解析,神经网络配置等. data目录和examples目录用于提供一些基本例子和相应数据. matlab和python目录存放用于两者的接口,只要不使用两者,目录下的内容没有实质作用. 其他一些目录和文件是

caffe源码分析--poolinger_layer.cpp

对于采样层,cafffe里实现了最大采样和平均采样的算法. 最大采样,给定一个扫描窗口,找最大值, 平均采样,扫描窗口内所有值的平均值. 其实对于caffe的实现一直有个疑问, 就是每一层貌似没有绑定一个激活函数? 看ufldl教程,感觉激活函数是必要存在的. 这怎么解释呢? 看到源码中,看到一些激活函数,比如sigmoid_layer.cpp和sigmoid_layer.cu. 也就是说,激活函数作为layer层面来实现了.当然,还有tanh_layer和relu_layer. 那,这个意思是

Caffe源码-im2col操作

目录 im2col简介 im2col.cpp源码 小结 参考 @(Caffe源码-im2col操作) im2col简介 caffe的卷积操作中使用im2col来加速,im2col将卷积核中的每个点在图像上的对应点全都提取出来按行排列,得到一个矩阵,这样就将卷积操作转化为矩阵进行操作. 如上图所示的,假设输入图像的形状为channels=1, height=width=5,并且pad_w=pad_h=1, kernel_h=kernel_w=3, stride_h=stride_w=2, dila