1. 数据处理,包括数据增强、人脸区域裁剪以及归一化
2.caffe 多标签配置
- 更改D:\caffe-master\src\caffe\proto 里 caffe.proto 文件
message Datum { optional int32 channels = 1; optional int32 height = 2; optional int32 width = 3; // the actual image data, in bytes optional bytes data = 4; repeated int32 label = 5; // Optionally, the datum could also hold float data. repeated float float_data = 6; // If true data contains an encoded image that need to be decoded optional bool encoded = 7 [default = false]; }
message ImageDataParameter { // Specify the data source. optional string source = 1; // Specify the batch size. optional uint32 batch_size = 4 [default = 1]; // The rand_skip variable is for the data layer to skip a few data points // to avoid all asynchronous sgd clients to start at the same point. The skip // point would be set as rand_skip * rand(0,1). Note that rand_skip should not // be larger than the number of keys in the database. optional uint32 rand_skip = 7 [default = 0]; // Whether or not ImageLayer should shuffle the list of files at every epoch. optional bool shuffle = 8 [default = false]; // It will also resize images if new_height or new_width are not zero. optional uint32 new_height = 9 [default = 0]; optional uint32 new_width = 10 [default = 0]; // Specify if the images are color or gray optional bool is_color = 11 [default = true]; // DEPRECATED. See TransformationParameter. For data pre-processing, we can do // simple scaling and subtracting the data mean, if provided. Note that the // mean subtraction is always carried out before scaling. optional float scale = 2 [default = 1]; optional string mean_file = 3; // DEPRECATED. See TransformationParameter. Specify if we would like to randomly // crop an image. optional uint32 crop_size = 5 [default = 0]; // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror // data. optional bool mirror = 6 [default = false]; optional string root_folder = 12 [default = ""]; optional uint32 label_size = 13 [default = 1]; }
message MemoryDataParameter { optional uint32 batch_size = 1; optional uint32 channels = 2; optional uint32 height = 3; optional uint32 width = 4; optional uint32 label_size = 5 [default = 1]; }
然后运行D:\caffe-master\scripts 里 GeneratePB.bat 更新 caffe.pb.cc 和 caffe.pb.h
- 更改caffe工程里的convert_imageset项目里convert_imageset.cpp
// This program converts a set of images to a lmdb/leveldb by storing them // as Datum proto buffers. // Usage: // convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME // // where ROOTFOLDER is the root folder that holds all the images, and LISTFILE // should be a list of files as well as their labels, in the format as // subfolder1/file1.JPEG 7 // .... #include <algorithm> #include <fstream> // NOLINT(readability/streams) #include <string> #include <utility> #include <vector> #include "boost/scoped_ptr.hpp" #include "gflags/gflags.h" #include "glog/logging.h" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/format.hpp" #include "caffe/util/io.hpp" #include "caffe/util/rng.hpp" using namespace caffe; // NOLINT(build/namespaces) using std::pair; using boost::scoped_ptr; DEFINE_bool(gray, false, "When this option is on, treat images as grayscale ones"); DEFINE_bool(shuffle, false, "Randomly shuffle the order of images and their labels"); DEFINE_string(backend, "lmdb", "The backend {lmdb, leveldb} for storing the result"); DEFINE_int32(resize_width, 0, "Width images are resized to"); DEFINE_int32(resize_height, 0, "Height images are resized to"); DEFINE_bool(check_size, false, "When this option is on, check that all the datum have the same size"); DEFINE_bool(encoded, false, "When this option is on, the encoded image will be save in datum"); DEFINE_string(encode_type, "", "Optional: What type should we encode the image as (‘png‘,‘jpg‘,...)."); int main(int argc, char** argv) { #ifdef USE_OPENCV ::google::InitGoogleLogging(argv[0]); // Print output to stderr (while still logging) FLAGS_alsologtostderr = 1; #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n" "format used as input for Caffe.\n" "Usage:\n" " convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n" "The ImageNet dataset for the training demo is at\n" " http://www.image-net.org/download-images\n"); gflags::ParseCommandLineFlags(&argc, &argv, true); if (argc < 4) { gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset"); return 1; } const bool is_color = !FLAGS_gray; const bool check_size = FLAGS_check_size; const bool encoded = FLAGS_encoded; const string encode_type = FLAGS_encode_type; std::ifstream infile(argv[2]); /*std::vector<std::pair<std::string, int> > lines; std::string line; size_t pos; int label; while (std::getline(infile, line)) { pos = line.find_last_of(‘ ‘); label = atoi(line.substr(pos + 1).c_str()); lines.push_back(std::make_pair(line.substr(0, pos), label)); }*/ std::vector<std::pair<std::string, std::vector<int>> > lines; std::string line; std::string filename; size_t pos; while (std::getline(infile, line)) { std::vector<int> labels; int label; std::istringstream iss(line); iss >> filename; while (iss >> label){ labels.push_back(label); } lines.push_back(std::make_pair(filename, labels)); } if (FLAGS_shuffle) { // randomly shuffle data LOG(INFO) << "Shuffling data"; shuffle(lines.begin(), lines.end()); } LOG(INFO) << "A total of " << lines.size() << " images."; if (encode_type.size() && !encoded) LOG(INFO) << "encode_type specified, assuming encoded=true."; int resize_height = std::max<int>(0, FLAGS_resize_height); int resize_width = std::max<int>(0, FLAGS_resize_width); // Create new DB scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend)); db->Open(argv[3], db::NEW); scoped_ptr<db::Transaction> txn(db->NewTransaction()); // Storing to db std::string root_folder(argv[1]); Datum datum; int count = 0; int data_size = 0; bool data_size_initialized = false; for (int line_id = 0; line_id < lines.size(); ++line_id) { bool status; std::string enc = encode_type; if (encoded && !enc.size()) { // Guess the encoding type from the file name string fn = lines[line_id].first; size_t p = fn.rfind(‘.‘); if ( p == fn.npos ) LOG(WARNING) << "Failed to guess the encoding of ‘" << fn << "‘"; enc = fn.substr(p); std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower); } status = ReadImageToDatum(root_folder + lines[line_id].first, lines[line_id].second, resize_height, resize_width, is_color, enc, &datum); if (status == false) continue; if (check_size) { if (!data_size_initialized) { data_size = datum.channels() * datum.height() * datum.width(); data_size_initialized = true; } else { const std::string& data = datum.data(); CHECK_EQ(data.size(), data_size) << "Incorrect data field size " << data.size(); } } // sequential string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first; // Put in db string out; CHECK(datum.SerializeToString(&out)); txn->Put(key_str, out); if (++count % 1000 == 0) { // Commit db txn->Commit(); txn.reset(db->NewTransaction()); LOG(INFO) << "Processed " << count << " files."; } } // write the last batch if (count % 1000 != 0) { txn->Commit(); LOG(INFO) << "Processed " << count << " files."; } #else LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; #endif // USE_OPENCV return 0; }
- 更改caffe源文件io.hpp 和io.cpp
#ifndef CAFFE_UTIL_IO_H_ #define CAFFE_UTIL_IO_H_ #include <boost/filesystem.hpp> #include <iomanip> #include <iostream> // NOLINT(readability/streams) #include <string> #include "google/protobuf/message.h" #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/format.hpp" #ifndef CAFFE_TMP_DIR_RETRIES #define CAFFE_TMP_DIR_RETRIES 100 #endif namespace caffe { using ::google::protobuf::Message; using ::boost::filesystem::path; inline void MakeTempDir(string* temp_dirname) { temp_dirname->clear(); // Place all temp directories under temp_root, to be able to delete all of // them at once, without knowing their name. const path& temp_root = boost::filesystem::temp_directory_path() / "caffe_test"; boost::filesystem::create_directory(temp_root); const path& model = temp_root / "%%%%-%%%%"; for ( int i = 0; i < CAFFE_TMP_DIR_RETRIES; i++ ) { const path& dir = boost::filesystem::unique_path(model).string(); bool done = boost::filesystem::create_directory(dir); if ( done ) { *temp_dirname = dir.string(); return; } } LOG(FATAL) << "Failed to create a temporary directory."; } inline void MakeTempFilename(string* temp_filename) { path temp_files_subpath; static uint64_t next_temp_file = 0; temp_filename->clear(); if ( temp_files_subpath.empty() ) { string path_string=""; MakeTempDir(&path_string); temp_files_subpath = path_string; } *temp_filename = (temp_files_subpath/caffe::format_int(next_temp_file++, 9)).string(); } #ifdef _MSC_VER inline void RemoveCaffeTempDir() { boost::system::error_code err; boost::filesystem::remove_all( boost::filesystem::temp_directory_path() / "caffe_test", err); } #else inline void RemoveCaffeTempDir() { } #endif bool ReadProtoFromTextFile(const char* filename, Message* proto); inline bool ReadProtoFromTextFile(const string& filename, Message* proto) { return ReadProtoFromTextFile(filename.c_str(), proto); } inline void ReadProtoFromTextFileOrDie(const char* filename, Message* proto) { CHECK(ReadProtoFromTextFile(filename, proto)); } inline void ReadProtoFromTextFileOrDie(const string& filename, Message* proto) { ReadProtoFromTextFileOrDie(filename.c_str(), proto); } void WriteProtoToTextFile(const Message& proto, const char* filename); inline void WriteProtoToTextFile(const Message& proto, const string& filename) { WriteProtoToTextFile(proto, filename.c_str()); } bool ReadProtoFromBinaryFile(const char* filename, Message* proto); inline bool ReadProtoFromBinaryFile(const string& filename, Message* proto) { return ReadProtoFromBinaryFile(filename.c_str(), proto); } inline void ReadProtoFromBinaryFileOrDie(const char* filename, Message* proto) { CHECK(ReadProtoFromBinaryFile(filename, proto)); } inline void ReadProtoFromBinaryFileOrDie(const string& filename, Message* proto) { ReadProtoFromBinaryFileOrDie(filename.c_str(), proto); } void WriteProtoToBinaryFile(const Message& proto, const char* filename); inline void WriteProtoToBinaryFile( const Message& proto, const string& filename) { WriteProtoToBinaryFile(proto, filename.c_str()); } //bool ReadFileToDatum(const string& filename, const int label, Datum* datum); // //inline bool ReadFileToDatum(const string& filename, Datum* datum) { // return ReadFileToDatum(filename, -1, datum); //} // //bool ReadImageToDatum(const string& filename, const int label, // const int height, const int width, const bool is_color, // const std::string & encoding, Datum* datum); // //inline bool ReadImageToDatum(const string& filename, const int label, // const int height, const int width, const bool is_color, Datum* datum) { // return ReadImageToDatum(filename, label, height, width, is_color, // "", datum); //} // //inline bool ReadImageToDatum(const string& filename, const int label, // const int height, const int width, Datum* datum) { // return ReadImageToDatum(filename, label, height, width, true, datum); //} // //inline bool ReadImageToDatum(const string& filename, const int label, // const bool is_color, Datum* datum) { // return ReadImageToDatum(filename, label, 0, 0, is_color, datum); //} // //inline bool ReadImageToDatum(const string& filename, const int label, // Datum* datum) { // return ReadImageToDatum(filename, label, 0, 0, true, datum); //} // //inline bool ReadImageToDatum(const string& filename, const int label, // const std::string & encoding, Datum* datum) { // return ReadImageToDatum(filename, label, 0, 0, true, encoding, datum); //} bool ReadFileToDatum(const string& filename, const vector<int> label, Datum* datum); inline bool ReadFileToDatum(const string& filename, Datum* datum) { return ReadFileToDatum(filename, vector<int>(1, -1), datum); } bool ReadImageToDatum(const string& filename, const vector<int> label, const int height, const int width, const bool is_color, const std::string & encoding, Datum* datum); inline bool ReadImageToDatum(const string& filename, const vector<int> label, const int height, const int width, const bool is_color, Datum* datum) { return ReadImageToDatum(filename, label, height, width, is_color, "", datum); } inline bool ReadImageToDatum(const string& filename, const vector<int> label, const int height, const int width, Datum* datum) { return ReadImageToDatum(filename, label, height, width, true, datum); } inline bool ReadImageToDatum(const string& filename, const vector<int> label, const bool is_color, Datum* datum) { return ReadImageToDatum(filename, label, 0, 0, is_color, datum); } inline bool ReadImageToDatum(const string& filename, const vector<int> label, Datum* datum) { return ReadImageToDatum(filename, label, 0, 0, true, datum); } inline bool ReadImageToDatum(const string& filename, const vector<int> label, const std::string & encoding, Datum* datum) { return ReadImageToDatum(filename, label, 0, 0, true, encoding, datum); } bool DecodeDatumNative(Datum* datum); bool DecodeDatum(Datum* datum, bool is_color); #ifdef USE_OPENCV cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width, const bool is_color); cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width); cv::Mat ReadImageToCVMat(const string& filename, const bool is_color); cv::Mat ReadImageToCVMat(const string& filename); cv::Mat DecodeDatumToCVMatNative(const Datum& datum); cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color); void CVMatToDatum(const cv::Mat& cv_img, Datum* datum); #endif // USE_OPENCV } // namespace caffe #endif // CAFFE_UTIL_IO_H_
#include <fcntl.h> #if defined(_MSC_VER) #include <io.h> #endif #include <google/protobuf/io/coded_stream.h> #include <google/protobuf/io/zero_copy_stream_impl.h> #include <google/protobuf/text_format.h> #ifdef USE_OPENCV #include <opencv2/core/core.hpp> #include <opencv2/highgui/highgui.hpp> #include <opencv2/highgui/highgui_c.h> #include <opencv2/imgproc/imgproc.hpp> #endif // USE_OPENCV #include <stdint.h> #include <algorithm> #include <fstream> // NOLINT(readability/streams) #include <string> #include <vector> #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/io.hpp" const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte. namespace caffe { using google::protobuf::io::FileInputStream; using google::protobuf::io::FileOutputStream; using google::protobuf::io::ZeroCopyInputStream; using google::protobuf::io::CodedInputStream; using google::protobuf::io::ZeroCopyOutputStream; using google::protobuf::io::CodedOutputStream; using google::protobuf::Message; bool ReadProtoFromTextFile(const char* filename, Message* proto) { int fd = open(filename, O_RDONLY); CHECK_NE(fd, -1) << "File not found: " << filename; FileInputStream* input = new FileInputStream(fd); bool success = google::protobuf::TextFormat::Parse(input, proto); delete input; close(fd); return success; } void WriteProtoToTextFile(const Message& proto, const char* filename) { int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); FileOutputStream* output = new FileOutputStream(fd); CHECK(google::protobuf::TextFormat::Print(proto, output)); delete output; close(fd); } bool ReadProtoFromBinaryFile(const char* filename, Message* proto) { #if defined (_MSC_VER) // for MSC compiler binary flag needs to be specified int fd = open(filename, O_RDONLY | O_BINARY); #else int fd = open(filename, O_RDONLY); #endif CHECK_NE(fd, -1) << "File not found: " << filename; ZeroCopyInputStream* raw_input = new FileInputStream(fd); CodedInputStream* coded_input = new CodedInputStream(raw_input); coded_input->SetTotalBytesLimit(kProtoReadBytesLimit, 536870912); bool success = proto->ParseFromCodedStream(coded_input); delete coded_input; delete raw_input; close(fd); return success; } void WriteProtoToBinaryFile(const Message& proto, const char* filename) { fstream output(filename, ios::out | ios::trunc | ios::binary); CHECK(proto.SerializeToOstream(&output)); } #ifdef USE_OPENCV cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width, const bool is_color) { cv::Mat cv_img; int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE); cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag); if (!cv_img_origin.data) { LOG(ERROR) << "Could not open or find file " << filename; return cv_img_origin; } if (height > 0 && width > 0) { cv::resize(cv_img_origin, cv_img, cv::Size(width, height)); } else { cv_img = cv_img_origin; } return cv_img; } cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width) { return ReadImageToCVMat(filename, height, width, true); } cv::Mat ReadImageToCVMat(const string& filename, const bool is_color) { return ReadImageToCVMat(filename, 0, 0, is_color); } cv::Mat ReadImageToCVMat(const string& filename) { return ReadImageToCVMat(filename, 0, 0, true); } // Do the file extension and encoding match? static bool matchExt(const std::string & fn, std::string en) { size_t p = fn.rfind(‘.‘); std::string ext = p != fn.npos ? fn.substr(p) : fn; std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); std::transform(en.begin(), en.end(), en.begin(), ::tolower); if ( ext == en ) return true; if ( en == "jpg" && ext == "jpeg" ) return true; return false; } //bool ReadImageToDatum(const string& filename, const int label, // const int height, const int width, const bool is_color, // const std::string & encoding, Datum* datum) { // cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color); // if (cv_img.data) { // if (encoding.size()) { // if ( (cv_img.channels() == 3) == is_color && !height && !width && // matchExt(filename, encoding) ) // return ReadFileToDatum(filename, label, datum); // std::vector<uchar> buf; // cv::imencode("."+encoding, cv_img, buf); // datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]), // buf.size())); // datum->set_label(label); // datum->set_encoded(true); // return true; // } // CVMatToDatum(cv_img, datum); // datum->set_label(label); // return true; // } else { // return false; // } //} bool ReadImageToDatum(const string& filename, const vector<int> label, const int height, const int width, const bool is_color, const std::string & encoding, Datum* datum) { cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color); if (cv_img.data) { if (encoding.size()) { if ((cv_img.channels() == 3) == is_color && !height && !width && matchExt(filename, encoding)) return ReadFileToDatum(filename, label, datum); std::vector<uchar> buf; cv::imencode("." + encoding, cv_img, buf); datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]), buf.size())); datum->mutable_label()->Clear(); for (int label_i = 0; label_i < label.size(); label_i++){ datum->add_label(label[label_i]); } datum->set_encoded(true); return true; } CVMatToDatum(cv_img, datum); datum->mutable_label()->Clear(); for (int label_i = 0; label_i < label.size(); label_i++){ datum->add_label(label[label_i]); } return true; } else { return false; } } #endif // USE_OPENCV //bool ReadFileToDatum(const string& filename, const int label, // Datum* datum) { // std::streampos size; // // fstream file(filename.c_str(), ios::in|ios::binary|ios::ate); // if (file.is_open()) { // size = file.tellg(); // std::string buffer(size, ‘ ‘); // file.seekg(0, ios::beg); // file.read(&buffer[0], size); // file.close(); // datum->set_data(buffer); // datum->set_label(label); // datum->set_encoded(true); // return true; // } else { // return false; // } //} bool ReadFileToDatum(const string& filename, const vector<int> label, Datum* datum) { std::streampos size; fstream file(filename.c_str(), ios::in | ios::binary | ios::ate); if (file.is_open()) { size = file.tellg(); std::string buffer(size, ‘ ‘); file.seekg(0, ios::beg); file.read(&buffer[0], size); file.close(); datum->set_data(buffer); datum->mutable_label()->Clear(); for (int label_i = 0; label_i < label.size(); label_i++){ datum->add_label(label[label_i]); } datum->set_encoded(true); return true; } else { return false; } } #ifdef USE_OPENCV cv::Mat DecodeDatumToCVMatNative(const Datum& datum) { cv::Mat cv_img; CHECK(datum.encoded()) << "Datum not encoded"; const string& data = datum.data(); std::vector<char> vec_data(data.c_str(), data.c_str() + data.size()); cv_img = cv::imdecode(vec_data, -1); if (!cv_img.data) { LOG(ERROR) << "Could not decode datum "; } return cv_img; } cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color) { cv::Mat cv_img; CHECK(datum.encoded()) << "Datum not encoded"; const string& data = datum.data(); std::vector<char> vec_data(data.c_str(), data.c_str() + data.size()); int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE); cv_img = cv::imdecode(vec_data, cv_read_flag); if (!cv_img.data) { LOG(ERROR) << "Could not decode datum "; } return cv_img; } // If Datum is encoded will decoded using DecodeDatumToCVMat and CVMatToDatum // If Datum is not encoded will do nothing bool DecodeDatumNative(Datum* datum) { if (datum->encoded()) { cv::Mat cv_img = DecodeDatumToCVMatNative((*datum)); CVMatToDatum(cv_img, datum); return true; } else { return false; } } bool DecodeDatum(Datum* datum, bool is_color) { if (datum->encoded()) { cv::Mat cv_img = DecodeDatumToCVMat((*datum), is_color); CVMatToDatum(cv_img, datum); return true; } else { return false; } } void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) { CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte"; datum->set_channels(cv_img.channels()); datum->set_height(cv_img.rows); datum->set_width(cv_img.cols); datum->clear_data(); datum->clear_float_data(); datum->set_encoded(false); int datum_channels = datum->channels(); int datum_height = datum->height(); int datum_width = datum->width(); int datum_size = datum_channels * datum_height * datum_width; std::string buffer(datum_size, ‘ ‘); for (int h = 0; h < datum_height; ++h) { const uchar* ptr = cv_img.ptr<uchar>(h); int img_index = 0; for (int w = 0; w < datum_width; ++w) { for (int c = 0; c < datum_channels; ++c) { int datum_index = (c * datum_height + h) * datum_width + w; buffer[datum_index] = static_cast<char>(ptr[img_index++]); } } } datum->set_data(buffer); } #endif // USE_OPENCV } // namespace caffe
- 更改data_layer.hpp和data_layer.cpp
#ifndef CAFFE_DATA_LAYER_HPP_ #define CAFFE_DATA_LAYER_HPP_ #include <vector> #include "caffe/blob.hpp" #include "caffe/data_reader.hpp" #include "caffe/data_transformer.hpp" #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" #include "caffe/layers/base_data_layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" namespace caffe { template <typename Dtype> class DataLayer : public BasePrefetchingDataLayer<Dtype> { public: explicit DataLayer(const LayerParameter& param); virtual ~DataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // DataLayer uses DataReader instead for sharing for parallelism virtual inline bool ShareInParallel() const { return false; } virtual inline const char* type() const { return "Data"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int MinTopBlobs() const { return 1; } virtual inline int MaxTopBlobs() const { return 2; } std::vector<std::pair<std::string, std::vector<int>> > lines_; int lines_id; protected: virtual void load_batch(Batch<Dtype>* batch); DataReader reader_; }; } // namespace caffe #endif // CAFFE_DATA_LAYER_HPP_
#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #endif // USE_OPENCV #include <stdint.h> #include <vector> #include "caffe/data_transformer.hpp" #include "caffe/layers/data_layer.hpp" #include "caffe/util/benchmark.hpp" namespace caffe { template <typename Dtype> DataLayer<Dtype>::DataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param), reader_(param) { } template <typename Dtype> DataLayer<Dtype>::~DataLayer() { this->StopInternalThread(); } template <typename Dtype> void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int batch_size = this->layer_param_.data_param().batch_size(); // Read a data point, and use it to initialize the top blob. Datum& datum = *(reader_.full().peek()); // Use data_transformer to infer the expected blob shape from datum. vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape top[0] and prefetch_data according to the batch_size. top_shape[0] = batch_size; top[0]->Reshape(top_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].data_.Reshape(top_shape); } LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label /*if (this->output_labels_) { vector<int> label_shape(1, batch_size); top[1]->Reshape(label_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } }*/ if (this->output_labels_) { vector<int> label_shape(batch_size, datum.label_size()); top[1]->Reshape(label_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } } } // This function is called on prefetch thread template<typename Dtype> void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); // Reshape according to the first datum of each batch // on single input batches allows for inputs of varying dimension. const int batch_size = this->layer_param_.data_param().batch_size(); Datum& datum = *(reader_.full().peek()); // Use data_transformer to infer the expected blob shape from datum. vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); Dtype* top_data = batch->data_.mutable_cpu_data(); Dtype* top_label = NULL; // suppress warnings about uninitialized variables if (this->output_labels_) { top_label = batch->label_.mutable_cpu_data(); } for (int item_id = 0; item_id < batch_size; ++item_id) { timer.Start(); // get a datum Datum& datum = *(reader_.full().pop("Waiting for data")); read_time += timer.MicroSeconds(); timer.Start(); // Apply data transformations (mirror, scale, crop...) int offset = batch->data_.offset(item_id); this->transformed_data_.set_cpu_data(top_data + offset); this->data_transformer_->Transform(datum, &(this->transformed_data_)); // Copy label. /* if (this->output_labels_) { top_label[item_id] = datum.label(); }*/ if (this->output_labels_) { for (int label_i = 0; label_i < datum.label_size(); label_i++) { top_label[item_id*datum.label_size() + label_i] = datum.label(label_i); } } trans_time += timer.MicroSeconds(); reader_.free().push(const_cast<Datum*>(&datum)); } timer.Stop(); batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(DataLayer); REGISTER_LAYER_CLASS(Data); } // namespace caffe
- 更改image_data_layer.hpp和image_data_layer.cpp
#ifndef CAFFE_IMAGE_DATA_LAYER_HPP_ #define CAFFE_IMAGE_DATA_LAYER_HPP_ #include <string> #include <utility> #include <vector> #include "caffe/blob.hpp" #include "caffe/data_transformer.hpp" #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" #include "caffe/layers/base_data_layer.hpp" #include "caffe/proto/caffe.pb.h" namespace caffe { /** * @brief Provides data to the Net from image files. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> { public: explicit ImageDataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param) {} virtual ~ImageDataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "ImageData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } protected: shared_ptr<Caffe::RNG> prefetch_rng_; virtual void ShuffleImages(); virtual void load_batch(Batch<Dtype>* batch); //vector<std::pair<std::string, int> > lines_; //int lines_id_; vector<std::pair<std::string, std::vector<int>> > lines_; int lines_id_; }; } // namespace caffe #endif // CAFFE_IMAGE_DATA_LAYER_HPP_
#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #include <fstream> // NOLINT(readability/streams) #include <iostream> // NOLINT(readability/streams) #include <string> #include <utility> #include <vector> #include "caffe/data_transformer.hpp" #include "caffe/layers/base_data_layer.hpp" #include "caffe/layers/image_data_layer.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/rng.hpp" namespace caffe { template <typename Dtype> ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() { this->StopInternalThread(); } template <typename Dtype> void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int new_height = this->layer_param_.image_data_param().new_height(); const int new_width = this->layer_param_.image_data_param().new_width(); const bool is_color = this->layer_param_.image_data_param().is_color(); const int label_size = this->layer_param_.image_data_param().label_size(); string root_folder = this->layer_param_.image_data_param().root_folder(); CHECK((new_height == 0 && new_width == 0) || (new_height > 0 && new_width > 0)) << "Current implementation requires " "new_height and new_width to be set at the same time."; // Read the file with filenames and labels const string& source = this->layer_param_.image_data_param().source(); LOG(INFO) << "Opening file " << source; std::ifstream infile(source.c_str()); /*string line; size_t pos; int label; while (std::getline(infile, line)) { pos = line.find_last_of(‘ ‘); label = atoi(line.substr(pos + 1).c_str()); lines_.push_back(std::make_pair(line.substr(0, pos), label)); }*/ string line; size_t pos; std::string filename; std::vector<int> labels; while (std::getline(infile, line)) { int label; std::istringstream iss(line); iss >> filename; while (iss >> label){ labels.push_back(label); } lines_.push_back(std::make_pair(filename, labels)); } CHECK(!lines_.empty()) << "File is empty"; if (this->layer_param_.image_data_param().shuffle()) { // randomly shuffle data LOG(INFO) << "Shuffling data"; const unsigned int prefetch_rng_seed = caffe_rng_rand(); prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); ShuffleImages(); } LOG(INFO) << "A total of " << lines_.size() << " images."; lines_id_ = 0; // Check if we would need to randomly skip a few data points if (this->layer_param_.image_data_param().rand_skip()) { unsigned int skip = caffe_rng_rand() % this->layer_param_.image_data_param().rand_skip(); LOG(INFO) << "Skipping first " << skip << " data points."; CHECK_GT(lines_.size(), skip) << "Not enough points to skip"; lines_id_ = skip; } // Read an image, and use it to initialize the top blob. cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; // Use data_transformer to infer the expected blob shape from a cv_image. vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img); this->transformed_data_.Reshape(top_shape); // Reshape prefetch_data and top[0] according to the batch_size. const int batch_size = this->layer_param_.image_data_param().batch_size(); CHECK_GT(batch_size, 0) << "Positive batch size required"; top_shape[0] = batch_size; for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].data_.Reshape(top_shape); } top[0]->Reshape(top_shape); LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label /*vector<int> label_shape(1, batch_size);*/ vector<int> label_shape(batch_size, label_size); top[1]->Reshape(label_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } } template <typename Dtype> void ImageDataLayer<Dtype>::ShuffleImages() { caffe::rng_t* prefetch_rng = static_cast<caffe::rng_t*>(prefetch_rng_->generator()); shuffle(lines_.begin(), lines_.end(), prefetch_rng); } // This function is called on prefetch thread template <typename Dtype> void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); ImageDataParameter image_data_param = this->layer_param_.image_data_param(); const int batch_size = image_data_param.batch_size(); const int new_height = image_data_param.new_height(); const int new_width = image_data_param.new_width(); const bool is_color = image_data_param.is_color(); const int label_size = image_data_param.label_size(); string root_folder = image_data_param.root_folder(); // Reshape according to the first image of each batch // on single input batches allows for inputs of varying dimension. cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; // Use data_transformer to infer the expected blob shape from a cv_img. vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img); this->transformed_data_.Reshape(top_shape); // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); Dtype* prefetch_data = batch->data_.mutable_cpu_data(); Dtype* prefetch_label = batch->label_.mutable_cpu_data(); // datum scales const int lines_size = lines_.size(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob timer.Start(); CHECK_GT(lines_size, lines_id_); cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; read_time += timer.MicroSeconds(); timer.Start(); // Apply transformations (mirror, crop...) to the image int offset = batch->data_.offset(item_id); this->transformed_data_.set_cpu_data(prefetch_data + offset); this->data_transformer_->Transform(cv_img, &(this->transformed_data_)); trans_time += timer.MicroSeconds(); /*prefetch_label[item_id] = lines_[lines_id_].second;*/ CHECK_EQ(label_size, lines_[lines_id_].second.size()) << "The input label size is not match the prototxt setting"; for (int label_id = 0; label_id < label_size; ++label_id){ prefetch_label[item_id*label_size + label_id] = lines_[lines_id_].second[label_id]; } // go to the next iter lines_id_++; if (lines_id_ >= lines_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; lines_id_ = 0; if (this->layer_param_.image_data_param().shuffle()) { ShuffleImages(); } } } batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(ImageDataLayer); REGISTER_LAYER_CLASS(ImageData); } // namespace caffe #endif // USE_OPENCV
- 更改memory_data_layer.hpp和memory_data_layer.cpp
#ifndef CAFFE_MEMORY_DATA_LAYER_HPP_ #define CAFFE_MEMORY_DATA_LAYER_HPP_ #include <vector> #include "caffe/blob.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/layers/base_data_layer.hpp" namespace caffe { /** * @brief Provides data to the Net from memory. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class MemoryDataLayer : public BaseDataLayer<Dtype> { public: explicit MemoryDataLayer(const LayerParameter& param) : BaseDataLayer<Dtype>(param), has_new_data_(false) {} virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "MemoryData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } virtual void AddDatumVector(const vector<Datum>& datum_vector); #ifdef USE_OPENCV virtual void AddMatVector(const vector<cv::Mat>& mat_vector, const vector<int>& labels); #endif // USE_OPENCV // Reset should accept const pointers, but can‘t, because the memory // will be given to Blob, which is mutable void Reset(Dtype* data, Dtype* label, int n); void set_batch_size(int new_size); int batch_size() { return batch_size_; } int channels() { return channels_; } int height() { return height_; } int width() { return width_; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); /*int batch_size_, channels_, height_, width_, size_;*/ int batch_size_, channels_, height_, width_, size_, label_size_; Dtype* data_; Dtype* labels_; int n_; size_t pos_; Blob<Dtype> added_data_; Blob<Dtype> added_label_; bool has_new_data_; }; } // namespace caffe #endif // CAFFE_MEMORY_DATA_LAYER_HPP_
#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #endif // USE_OPENCV #include <vector> #include "caffe/layers/memory_data_layer.hpp" namespace caffe { template <typename Dtype> void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { batch_size_ = this->layer_param_.memory_data_param().batch_size(); channels_ = this->layer_param_.memory_data_param().channels(); height_ = this->layer_param_.memory_data_param().height(); width_ = this->layer_param_.memory_data_param().width(); label_size_ = this->layer_param_.memory_data_param().label_size(); size_ = channels_ * height_ * width_; CHECK_GT(batch_size_ * size_, 0) << "batch_size, channels, height, and width must be specified and" " positive in memory_data_param"; /*vector<int> label_shape(1, batch_size_);*/ vector<int> label_shape(batch_size_, label_size_); top[0]->Reshape(batch_size_, channels_, height_, width_); top[1]->Reshape(label_shape); added_data_.Reshape(batch_size_, channels_, height_, width_); added_label_.Reshape(label_shape); data_ = NULL; labels_ = NULL; added_data_.cpu_data(); added_label_.cpu_data(); } template <typename Dtype> void MemoryDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) { CHECK(!has_new_data_) << "Can‘t add data until current data has been consumed."; size_t num = datum_vector.size(); CHECK_GT(num, 0) << "There is no datum to add."; CHECK_EQ(num % batch_size_, 0) << "The added data must be a multiple of the batch size."; CHECK_EQ(label_size_, datum_vector[0].label_size()) << "The label size is for input not match the prototxt setting"; added_data_.Reshape(num, channels_, height_, width_); //added_label_.Reshape(num, 1, 1, 1); added_label_.Reshape(num, label_size_, 1, 1); // Apply data transformations (mirror, scale, crop...) this->data_transformer_->Transform(datum_vector, &added_data_); // Copy Labels Dtype* top_label = added_label_.mutable_cpu_data(); //for (int item_id = 0; item_id < num; ++item_id) { // top_label[item_id] = datum_vector[item_id].label(); //} for (int item_id = 0; item_id < num; ++item_id) { for (int label_id = 0; label_id < label_size_; label_id++){ top_label[item_id * label_size_ + label_id] = datum_vector[item_id].label(label_id); } } // num_images == batch_size_ Dtype* top_data = added_data_.mutable_cpu_data(); Reset(top_data, top_label, num); has_new_data_ = true; } #ifdef USE_OPENCV template <typename Dtype> void MemoryDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector, const vector<int>& labels) { size_t num = mat_vector.size(); CHECK(!has_new_data_) << "Can‘t add mat until current data has been consumed."; CHECK_GT(num, 0) << "There is no mat to add"; CHECK_EQ(num % batch_size_, 0) << "The added data must be a multiple of the batch size."; CHECK_EQ(label_size_, labels.size() / num) << "The label size is for input not match the prototxt setting"; added_data_.Reshape(num, channels_, height_, width_); //added_label_.Reshape(num, 1, 1, 1); added_label_.Reshape(num, label_size_, 1, 1); // Apply data transformations (mirror, scale, crop...) this->data_transformer_->Transform(mat_vector, &added_data_); // Copy Labels Dtype* top_label = added_label_.mutable_cpu_data(); /*for (int item_id = 0; item_id < num; ++item_id) { top_label[item_id] = labels[item_id]; }*/ for (int item_id = 0; item_id < num; ++item_id) { for (int label_id = 0; label_id < label_size_; label_id++){ top_label[item_id * label_size_ + label_id] = labels[item_id * label_size_ + label_id]; } } // num_images == batch_size_ Dtype* top_data = added_data_.mutable_cpu_data(); Reset(top_data, top_label, num); has_new_data_ = true; } #endif // USE_OPENCV template <typename Dtype> void MemoryDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) { CHECK(data); CHECK(labels); CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size"; // Warn with transformation parameters since a memory array is meant to // be generic and no transformations are done with Reset(). if (this->layer_param_.has_transform_param()) { LOG(WARNING) << this->type() << " does not transform array data on Reset()"; } data_ = data; labels_ = labels; n_ = n; pos_ = 0; } template <typename Dtype> void MemoryDataLayer<Dtype>::set_batch_size(int new_size) { CHECK(!has_new_data_) << "Can‘t change batch_size until current data has been consumed."; batch_size_ = new_size; added_data_.Reshape(batch_size_, channels_, height_, width_); //added_label_.Reshape(batch_size_, 1, 1, 1); added_label_.Reshape(batch_size_, label_size_, 1, 1); } template <typename Dtype> void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK(data_) << "MemoryDataLayer needs to be initialized by calling Reset"; top[0]->Reshape(batch_size_, channels_, height_, width_); //top[1]->Reshape(batch_size_, 1, 1, 1); top[1]->Reshape(batch_size_, label_size_, 1, 1); top[0]->set_cpu_data(data_ + pos_ * size_); top[1]->set_cpu_data(labels_ + pos_); pos_ = (pos_ + batch_size_) % n_; if (pos_ == 0) has_new_data_ = false; } INSTANTIATE_CLASS(MemoryDataLayer); REGISTER_LAYER_CLASS(MemoryData); } // namespace caffe
时间: 2024-10-21 16:04:41