猫狗识别是学习CNN中最有趣的一个应用,本次我在Kaggle上的“Dogs vs. Cats Redux”进行了一次尝试。考虑到训练速度和实验的简易性,我使用了比较简单的AlexNet,并且还用了对应ImageNet的预训练模型(https://github.com/Cadene/pretrained-models.pytorch)。
因此,下文中将按照数据处理、模型预处理、训练和测试结果几个部分展开,实验代码如下:
https://github.com/Yannnnnnnnnnnn/learnPyTorch/tree/master/catVSdog
从Kaggle上下载完比赛数据后,首先要做两个处理:
1、将train数据中不同类别的数据放到不同的文件夹下,例如猫的数据放在“0”文件夹下,狗的数据放在“1”文件夹下,如下图所示:
2、完成train数据处理后,则需要进一步将其分成train数据和validation数据;本次实验时,我通过随机采样的方式,从train中选取了20%的数据用于验证,代码如下:
# -*- coding:utf-8 -*- # https://blog.csdn.net/mdjxy63/article/details/78946455 __author__ = 'xuy' import os import shutil import random root_dir = r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/train/0' output_dir = r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/validation/0' percentage = 0.2 for root, dirs, files in os.walk(root_dir): number_of_files = len(os.listdir(root)) ref_copy = int(round(percentage * number_of_files))#随机筛选20%的图片到新建的文件夹当中 for i in range(ref_copy): chosen_one = random.choice(os.listdir(root)) file_in_track = root file_to_copy = file_in_track + '/' + chosen_one if os.path.isfile(file_to_copy) == True: shutil.move(file_to_copy,output_dir)
1234567891011121314151617181920212223模型预处理的主要内容读取ImageNet预训练的模型,然后对AlexNet网络最后一层进行修改,使得分类类别数量为2,代码如下:
import torch import torch.nn as nn from torch import optim from torchvision.datasets import ImageFolder from torchvision import transforms # AlexNet网络结构 class MineAlexNet(nn.Module): def __init__(self, num_classes=2): super(MineAlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) # softmax self.logsoftmax = nn.LogSoftmax(dim=1) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) x = self.logsoftmax(x) return x # 读取提前下载的预训练模型 state_dict = torch.load('alexnetImageNet.pth') alexNet = MineAlexNet(1000) alexNet.load_state_dict(state_dict) # 修改AlexNet网络结构的后两层 alexNet.classifier[6] = nn.Linear(4096, 2) # 保存包含了预训练参数的,猫狗分类AlexNet模型 torch.save(alexNet.state_dict(), 'begin.pth')
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859训练过程,目前我还没有明白炼丹的具体技巧,所以就随便训练了30epoch,代码如下:
import torch import torch.nn as nn from torch import optim from torchvision.datasets import ImageFolder from torchvision import transforms # 数据预处理 # 非常重要,如果不处理,效果可能非常差!!! normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.Resize(size=(227,227)), transforms.RandomRotation(20), transforms.RandomHorizontalFlip(), transforms.ToTensor(), #将图片转换为Tensor,归一化至[0,1] normalize ]) # 从文件夹中读取训练数据 train_dataset = ImageFolder(r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/train',transform=transform) trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=True) # 从文件夹中读取validation数据 validation_dataset = ImageFolder(r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/validation',transform=transform) validationloader = torch.utils.data.DataLoader(validation_dataset, batch_size=512, shuffle=True) # AlexNet class MineAlexNet(nn.Module): def __init__(self, num_classes=2): super(MineAlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) self.logsoftmax = nn.LogSoftmax(dim=1) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) x = self.logsoftmax(x) return x # 读取转换后的AlexNet模型 state_dict = torch.load('begin.pth') alexNet = MineAlexNet(2) alexNet.load_state_dict(state_dict) # cuda alexNet.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(alexNet.parameters(), lr=0.00005) epochs = 30 train_losses, validation_losses = [], [] # 训练 for e in range(epochs): running_loss = 0 for images,labels in trainloader: images = images.cuda() labels = labels.cuda() # TODO: Training pass optimizer.zero_grad() output = alexNet(images) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() else: validation_loss = 0 accuracy = 0 # Turn off gradients for validation, saves memory and computations with torch.no_grad(): for images, labels in validationloader: images = images.cuda() labels = labels.cuda() log_ps = alexNet(images) validation_loss += criterion(log_ps, labels) ps = torch.exp(log_ps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)) train_losses.append(running_loss/len(trainloader)) validation_losses.append(validation_loss/len(validationloader)) torch.save(alexNet.state_dict(), str(e+1+37) +'.pth') print("Epoch: {}/{}.. ".format( e+1, epochs), "Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)), "Test Loss: {:.3f}.. ".format(validation_loss/len(validationloader)), "Test Accuracy: {:.3f}".format(accuracy/len(validationloader))) # 画一下精度图 import matplotlib.pyplot as plt plt.plot(train_losses, label='Training loss') plt.plot(validation_losses, label='Validation loss') plt.legend(frameon=False)
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133最后我用test数据测试了一下训练的模型,在kaggle的得分如下,虽然结果不是特别好,但好歹也还行吧,排名大概在700左右。
预测的代码如下:
import torch import torch.nn as nn from torchvision import transforms normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.Resize(size=(227,227)), transforms.ToTensor(), normalize ]) # AlexNet class MineAlexNet(nn.Module): def __init__(self, num_classes=2): super(MineAlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) self.logsoftmax = nn.LogSoftmax(dim=1) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) x = self.logsoftmax(x) return x # load pretrained state_dict = torch.load('30.pth') alexNet = MineAlexNet(2) alexNet.load_state_dict(state_dict) from PIL import Image import matplotlib.pyplot as plt import os f = open('/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/submission.csv', 'w') f.write("id,label n") path=r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/test/' for root, dirs, files in os.walk(path, topdown=False): for name in files: ## 打开图片并转成灰度 image = Image.open(os.path.join(root, name)) # 转成tensor tensor = transform(image) tensor = tensor.unsqueeze(0) inputdata = torch.autograd.Variable(tensor,requires_grad=False) outputdata = alexNet(inputdata) ps = torch.exp(outputdata) top_p, top_class = ps.topk(1, dim=1) print(top_p) print(top_class) filename,extension = os.path.splitext(name) # 输出结果 prob = 0 if top_class.detach().numpy()[0][0]==0: prob = 1 - top_p.detach().numpy()[0][0] else: prob = top_p.detach().numpy()[0][0] print(prob) f.write(filename+","+str(prob)+" n") f.close()
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596应为已经有代码了,所以整体写的比较简单,主要目的还是鼓励自己。
相关知识
Pytorch采用AlexNet实现猫狗数据集分类(训练与预测)
【深度学习】AlexNet网络实现猫狗分类
PyTorch深度学习:猫狗情感识别
PyTorch猫狗:深度学习在宠物识别中的应用
使用PyTorch进行城市声音分类:PyTorch音频识别
猫狗分类PyTorch:深度学习与迁移学习的探索
用Tensorflow实现AlexNet识别猫狗数据集(猫狗大战)【附代码】
详解pytorch实现猫狗识别98%附代码
CNN简单实战:PyTorch搭建CNN对猫狗图片进行分类
面部表情识别2:Pytorch实现表情识别(含表情识别数据集和训练代码)
网址: PyTorch:基于AlexNet的猫狗识别 https://m.mcbbbk.com/newsview557444.html
上一篇: 宠物猫狗会传染新冠肺炎吗?世卫: |
下一篇: 中老年科学养宠成主流 |