首页 > 分享 > PyTorch 11—简单图像定位

PyTorch 11—简单图像定位

萌宠菠菠乐园
2024-09-17 02:25

常见图像处理的任务

（1）分类

给定一幅图像，我们用计算机模型预测图片中有什么对象。

（2）分类加定位

不仅需要我们知道图片中的对象是什么，还要在对象的附近画一个边框，确定该对象所处的位置。

（3）语义分割

区分到图中每一个像素点，而不仅仅是矩形框框住。

（4）目标检测

目标检测简单来说就是回答图片里面有什么？分别在哪里？（把它们用矩形框框住）。

（5）实例分割

实例分割是目标检测和语义分割的结合。相对目标检测的边界框，实例分割可精确到物体的边缘；相对语义分割，实例分割需要标注出图上同一物体的不同个体。

图像定位

对于单纯的分类问题，比较容易理解，给定一幅图片，我们输出一个标签类别，我们已经跟熟悉。

而定位有点复杂，需要输出四个数字（x，y，w，h），图像中某一个点的坐标（x，y），以及图像的宽度和高度，有了这四个数字，我们可以很容易地找到物体的边框。

简单定位网络架构

本质是回归问题，使用L2损失进行优化。

Oxford-IIIT数据集

The Oxford-IIIT Pet Dataset是一个宠物图像数据集，包含37种宠物，每种宠物200张左右宠物图片，该数据集同时包含宠物分类、头部轮廓标注和语义分割信息。

代码实战

import torch

import torch.nn as nn

import torch.nn.functional as F

from torch.utils import data

import numpy as np

import matplotlib.pyplot as plt

import torchvision

from torchvision import transforms

import os

from lxml import etree

from matplotlib.patches import Rectangle

import glob

from PIL import Image

单张图片演示

BATCH_SIZE = 8

print('一、图片解析演示')

pil_img = Image.open(r'dataset/images/Abyssinian_1.jpg')

np_img = np.array(pil_img)

print(np_img.shape)

plt.imshow(np_img)

plt.show()

xml = open(r'dataset/annotations/xmls/Abyssinian_1.xml').read()

sel = etree.HTML(xml)

width = sel.xpath('//size/width/text()')[0]

height = sel.xpath('//size/height/text()')[0]

print(width,' ',height)

xmin = sel.xpath('//bndbox/xmin/text()')[0]

ymin = sel.xpath('//bndbox/ymin/text()')[0]

xmax = sel.xpath('//bndbox/xmax/text()')[0]

ymax = sel.xpath('//bndbox/ymax/text()')[0]

width = int(width)

height = int(height)

xmin = int(xmin)

ymin = int(ymin)

xmax = int(xmax)

ymax = int(ymax)

plt.imshow(np_img)

rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='blue')

ax = plt.gca()

ax.axes.add_patch(rect)

plt.show()

在原始数据集中，各张图片大小不一，我们在输入模型时，想要把它变成固定的大小。但是，图像改变尺寸后，对应的xmin和ymin就不对了，因为xmin和ymin是相对于原先图片的大小。实际上，我们可以将它转换为一个比值就可以了。

img = pil_img.resize((224,224))

xmin = (xmin/width)*224

ymin = (ymin/height)*224

xmax = (xmax/width)*224

ymax = (ymax/height)*224

plt.imshow(img)

rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')

ax = plt.gca()

ax.axes.add_patch(rect)

plt.show()

输出是比值，使用比值作为目标值。

创建输入

images = glob.glob('dataset/images/*.jpg')

xmls = glob.glob('dataset/annotations/xmls/*.xml')

len(images)

len(xmls)

"""

我们不知道对哪些图片做了标注，为了取出标注的图片；

我们要将这些被标注数据的文件名；

也即'dataset/annotations/xmlsAbyssinian_1.xml'中的Abyssinian_1取出来；

然后使用文件名对原有的图片进行一个筛选。

"""

xmls_names = [x.split('')[-1].split('.xml')[0] for x in xmls]

len(xmls_names)

imgs = [img for img in images if

img.split('')[-1].split('.jpg')[0] in xmls_names]

len(imgs)

print('len(imgs)==len(xmls_names)?:',len(imgs)==len(xmls_names))

print('imgs[:5]:n',imgs[:5])

print('xmls[:5]:n',xmls[:5])

将xml文件转换成标签的格式：下面我们需要将xml文件给它转换成标签的形式，在转换之前，我们首先要明确一点，我们的目标值不再是xmin这个实际的值，因为每一张图片的大小都是不一的，这个时候我们只是取出它的一个比例值。我们的预测值是头部宽高度所占的比值。

def to_labels(path):

xml = open(r'{}'.format(path)).read()

sel = etree.HTML(xml)

width = int(sel.xpath('//size/width/text()')[0])

height = int(sel.xpath('//size/height/text()')[0])

xmin = int(sel.xpath('//bndbox/xmin/text()')[0])

ymin = int(sel.xpath('//bndbox/ymin/text()')[0])

xmax = int(sel.xpath('//bndbox/xmax/text()')[0])

ymax = int(sel.xpath('//bndbox/ymax/text()')[0])

return [xmin/width, ymin/height, xmax/width, ymax/height]

labels = [to_labels(path) for path in xmls]

labels[0],type(labels)

out1_label, out2_label, out3_label, out4_label = list(zip(*labels))

len(out1_label), len(out2_label), len(out3_label), len(out4_label)

划分数据集

index = np.random.permutation(len(imgs))

images = np.array(imgs)[index]

labels = np.array(labels)[index]

out1_label = np.array(out1_label).astype(np.float32).reshape(-1, 1)[index]

out2_label = np.array(out2_label).astype(np.float32).reshape(-1, 1)[index]

out3_label = np.array(out3_label).astype(np.float32).reshape(-1, 1)[index]

out4_label = np.array(out4_label).astype(np.float32).reshape(-1, 1)[index]

labels = labels.astype(np.float32)

labels.shape

"""

out1_label = out1_label.astype(np.float32)

out2_label = out2_label.astype(np.float32)

out3_label = out3_label.astype(np.float32)

out4_label = out4_label.astype(np.float32)

"""

i = int(len(imgs)*0.8)

train_images = images[:i]

train_labels = labels[:i]

out1_train_label = out1_label[:i]

out2_train_label = out2_label[:i]

out3_train_label = out3_label[:i]

out4_train_label = out4_label[:i]

test_images = images[i:]

test_labels = labels[i:]

out1_test_label = out1_label[i: ]

out2_test_label = out2_label[i: ]

out3_test_label = out3_label[i: ]

out4_test_label = out4_label[i: ]

创建输入模型

transform = transforms.Compose([

transforms.Resize((224, 224)),

transforms.ToTensor(),

])

class Oxford_dataset(data.Dataset):

def __init__(self, img_paths, out1_label, out2_label,

out3_label, out4_label, transform):

self.imgs = img_paths

self.out1_label = out1_label

self.out2_label = out2_label

self.out3_label = out3_label

self.out4_label = out4_label

self.transforms = transform

def __getitem__(self, index):

img = self.imgs[index]

out1_label = self.out1_label[index]

out2_label = self.out2_label[index]

out3_label = self.out3_label[index]

out4_label = self.out4_label[index]

pil_img = Image.open(img)

imgs_data = np.asarray(pil_img, dtype=np.uint8)

if len(imgs_data.shape) == 2:

imgs_data = np.repeat(imgs_data[:, :, np.newaxis], 3, axis=2)

img_tensor = self.transforms(Image.fromarray(imgs_data))

else:

img_tensor = self.transforms(pil_img)

return (img_tensor,

out1_label,

out2_label,

out3_label,

out4_label)

def __len__(self):

return len(self.imgs)

train_dataset = Oxford_dataset(train_images, out1_train_label,

out2_train_label, out3_train_label,

out4_train_label, transform)

test_dataset = Oxford_dataset(test_images, out1_test_label,

out2_test_label, out3_test_label,

out4_test_label, transform)

train_dl = data.DataLoader(

train_dataset,

batch_size=BATCH_SIZE,

shuffle=True,

)

test_dl = data.DataLoader(

test_dataset,

batch_size=BATCH_SIZE,

)

(imgs_batch,

out1_batch,

out2_batch,

out3_batch,

out4_batch) = next(iter(train_dl))

imgs_batch.shape, out1_batch.shape

plt.figure(figsize=(12, 8))

for i,(img, label1, label2,

label3,label4,) in enumerate(zip(imgs_batch[:2],

out1_batch[:2],

out2_batch[:2],

out3_batch[:2],

out4_batch[:2])):

img = (img.permute(1,2,0).numpy() + 1)/2

plt.subplot(2, 3, i+1)

plt.imshow(img)

xmin, ymin, xmax, ymax = label1*224, label2*224, label3*224, label4*224,

rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')

ax = plt.gca()

ax.axes.add_patch(rect)

创建定位模型

resnet = torchvision.models.resnet101(pretrained=True)

"""

resnet101里面包含很多层，conv、batch.........

最后是avgpool和fc全连接层。

avgpool之前的层都是我们需要的

"""

in_f = resnet.fc.in_features

print(in_f)

resnet.children()

list(resnet.children())

print(len(list(resnet.children())))

print(list(resnet.children())[-1])

list(resnet.children())[:-1]

conv_base = nn.Sequential(*list(resnet.children()))

class Net(nn.Module):

def __init__(self):

super(Net, self).__init__()

self.conv_base = nn.Sequential(*list(resnet.children())[:-1])

self.fc1 = nn.Linear(in_f, 1)

self.fc2 = nn.Linear(in_f, 1)

self.fc3 = nn.Linear(in_f, 1)

self.fc4 = nn.Linear(in_f, 1)

def forward(self, x):

x = self.conv_base(x)

x = x.view(x.size(0), -1)

x1 = self.fc1(x)

x2 = self.fc2(x)

x3 = self.fc3(x)

x4 = self.fc4(x)

return x1, x2, x3, x4

model = Net()

if torch.cuda.is_available():

model.to('cuda')

loss_fn = nn.MSELoss()

from torch.optim import lr_scheduler

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

def fit(epoch, model, trainloader, testloader):

total = 0

running_loss = 0

model.train()

for x, y1, y2, y3, y4 in trainloader:

if torch.cuda.is_available():

x, y1, y2, y3, y4 = (x.to('cuda'),

y1.to('cuda'), y2.to('cuda'),

y3.to('cuda'), y4.to('cuda'))

y_pred1, y_pred2, y_pred3, y_pred4 = model(x)

loss1 = loss_fn(y_pred1, y1)

loss2 = loss_fn(y_pred2, y2)

loss3 = loss_fn(y_pred3, y3)

loss4 = loss_fn(y_pred4, y4)

loss = loss1 + loss2 + loss3 + loss4

optimizer.zero_grad()

loss.backward()

optimizer.step()

with torch.no_grad():

running_loss += loss.item()

exp_lr_scheduler.step()

epoch_loss = running_loss / len(trainloader.dataset)

test_total = 0

test_running_loss = 0

model.eval()

with torch.no_grad():

for x, y1, y2, y3, y4 in testloader:

if torch.cuda.is_available():

x, y1, y2, y3, y4 = (x.to('cuda'),

y1.to('cuda'), y2.to('cuda'),

y3.to('cuda'), y4.to('cuda'))

y_pred1, y_pred2, y_pred3, y_pred4 = model(x)

loss1 = loss_fn(y_pred1, y1)

loss2 = loss_fn(y_pred2, y2)

loss3 = loss_fn(y_pred3, y3)

loss4 = loss_fn(y_pred4, y4)

loss = loss1 + loss2 + loss3 + loss4

test_running_loss += loss.item()

epoch_test_loss = test_running_loss / len(testloader.dataset)

print('epoch: ', epoch,

'loss： ', round(epoch_loss, 3),

'test_loss： ', round(epoch_test_loss, 3),

)

return epoch_loss, epoch_test_loss

开始训练

epochs = 10

train_loss = []

test_loss = []

for epoch in range(epochs):

epoch_loss, epoch_test_loss = fit(epoch, model, train_dl, test_dl)

train_loss.append(epoch_loss)

test_loss.append(epoch_test_loss)

plt.figure()

plt.plot(range(1, len(train_loss)+1), train_loss, 'r', label='Training loss')

plt.plot(range(1, len(train_loss)+1), test_loss, 'bo', label='Validation loss')

plt.title('Training and Validation Loss')

plt.xlabel('Epoch')

plt.ylabel('Loss Value')

plt.legend()

plt.show()

模型保存

PATH = 'location_model.pth'

torch.save(model.state_dict(), PATH)

plt.figure(figsize=(8, 24))

imgs, _, _, _, _ = next(iter(test_dl))

imgs = imgs.to('cuda')

out1, out2, out3, out4 = model(imgs)

for i in range(6):

plt.subplot(6, 1, i+1)

plt.imshow(imgs[i].permute(1,2,0).cpu().numpy())

xmin, ymin, xmax, ymax = (out1[i].item()*224,

out2[i].item()*224,

out3[i].item()*224,

out4[i].item()*224)

rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')

ax = plt.gca()

ax.axes.add_patch(rect)

plt.show()

走进宠物世界，体验独特的配音乐趣

QQ音乐宠物关闭方法（轻松解决宠物上瘾问题）

热点分享

布偶猫吃什么对毛发好原来这些食物就可以

对于布偶猫这种长毛猫来说，一般情况下，布偶猫这样的长毛猫咪毛...

这九种宠物既新奇又独特，看完你爱上没有？

这九种宠物既新奇又独特，看完你爱上没有？小猫，小狗，仓鼠...

推荐分享

缅因猫能长多大一种体型较大的猫

缅因猫能长多大?缅因猫是很多人都喜欢的一个品种，尤其是广大的女...

警惕狗贩的骗人损招星期狗的症状特征

警惕狗贩的骗人损招染色：这一招最多的是用在斑点狗、蝴蝶犬...

热门点击排行

养玉米蛇的危害

我的狗老公李淑敏33——如何打造与宠物相伴的幸福生活？

分享分类导航

萌宠日常

宠物饲养指南

宠物营养食谱

PyTorch 11—简单图像定位

常见图像处理的任务

图像定位

代码实战

走进宠物世界，体验独特的配音乐趣

QQ音乐宠物关闭方法（轻松解决宠物上瘾问题）

布偶猫吃什么对毛发好 原来这些食物就可以

这九种宠物既新奇又独特，看完你爱上没有？

缅因猫能长多大 一种体型较大的猫

警惕狗贩的骗人损招 星期狗的症状特征

布偶猫吃什么对毛发好原来这些食物就可以

缅因猫能长多大一种体型较大的猫

警惕狗贩的骗人损招星期狗的症状特征