题目简单介绍
官方提供100类狗的图片,选手将狗进行分类。
分析
官方提供的图片如下:
从图片可以看出背景复杂,图片中不仅有狗还有人,可能还有其他物体的背景,因此第一步需要将狗提取出来。考虑到从先训练一个狗的检测器将耗费一定的时间,因此采用目标检测SSD方法进行狗的检测。将狗检测出来再进行分类。
因此流程就是 检测 + 分类。
狗的检测核心代码
该代码使用Qt编写。项目配置如下:
INCLUDEPATH += /home/young/deeplearning/SSD/caffe/include += /usr/include += /home/young/deeplearning/SSD/caffe/src += /home/young/software/cuda/include LIBS += -L/home/young/deeplearning/SSD/caffe/build/lib -lcaffe LIBS += -L/usr/lib/x86_64-linux-gnu -lopencv_core -lopencv_imgproc -lopencv_highgui LIBS += -lglog -lgflags -lprotobuf -lboost_system -lboost_thread -latlas SOURCES += main.cpp # cuda INCLUDEPATH += /usr/local/cuda/include LIBS += -L/usr/local/cuda/lib64 -lcudart -lcublas -lcurand #cudnn LIBS += -L/home/young/software/cuda/lib64 -lcudnn
12345678910111213141516171819SSD检测代码
#include "caffe/caffe.hpp" #define USE_OPENCV #define CPU_ONLY #ifdef USE_OPENCV #include <opencv2/core/core.hpp> #include <opencv2/highgui/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #endif // USE_OPENCV #include <algorithm> #include <iomanip> #include <iosfwd> #include <memory> #include <string> #include <utility> #include <vector> #include<fstream> #ifdef USE_OPENCV using namespace caffe; // NOLINT(build/namespaces) std::vector<std::string> splitEx(const std::string& src, std::string separate_character) { std::vector<std::string> strs; int separate_characterLen = separate_character.size();//分割字符串的长度,这样就可以支持如“,,”多字符串的分隔符 int lastPosition = 0,index = -1; while (-1 != (index = src.find(separate_character,lastPosition))) { strs.push_back(src.substr(lastPosition,index - lastPosition)); lastPosition = index + separate_characterLen; } std::string lastString = src.substr(lastPosition);//截取最后一个分隔符后的内容 if (!lastString.empty()) strs.push_back(lastString);//如果最后一个分隔符后还有内容就入队 return strs; } class Detector { public: Detector(const string& model_file, const string& weights_file, const string& mean_file, const string& mean_value); std::vector<vector<float> > Detect(const cv::Mat& img); private: void SetMean(const string& mean_file, const string& mean_value); void WrapInputLayer(std::vector<cv::Mat>* input_channels); void Preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels); private: shared_ptr<Net<float> > net_; cv::Size input_geometry_; int num_channels_; cv::Mat mean_; }; Detector::Detector(const string& model_file, const string& weights_file, const string& mean_file, const string& mean_value) { #ifdef CPU_ONLY Caffe::set_mode(Caffe::CPU); #else Caffe::set_mode(Caffe::GPU); #endif /* Load the network. */ net_.reset(new Net<float>(model_file, TEST)); net_->CopyTrainedLayersFrom(weights_file); CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input."; CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output."; Blob<float>* input_layer = net_->input_blobs()[0]; num_channels_ = input_layer->channels(); CHECK(num_channels_ == 3 || num_channels_ == 1) <<
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283