img.convert():是图像实例化对象的一个方法,接受一个mode 参数,用以指定一个色彩模式。
os.path.join():用于拼接文件路径
os.listdir():返回指定路径下的所有文件和文件夹的名字。
endswith():判断字符串或字符是否以指定的字符串或者字符结尾。
Batchnorm层: 批量归一化。
作用:加速神经网络训练,加速收敛速度及稳定性的算法。
图像分割输出是图片,损失值是对比输出图片与标签图片的像素差。
参考链接:宠物图像分割
关于模型组网:就是把网络分成小模块,然后组织起来。
注意力机制是在计算资源一定的情况下,把有限的计算资源更多地调整分配给相对重要的任务,使得计算机能合理规划并且处理大量信息的一种模型。U.Net网络提取的低层特征中存在较多的冗余信息,注意力机制的融入可以抑制网络模型学习无关任务,达到抑制冗余信息被激活的目的,同时提高模型学习重要特征的能力。
import os import io import numpy as np import matplotlib.pyplot as plt from PIL import Image as PilImage import paddle import paddle.nn as nn import paddle.nn.functional as F paddle.set_device('gpu') paddle.__version__ 123456789101112
作用:将图片数据划分,并保存为txt文件。
IMAGE_SIZE = (160, 160) train_images_path = "images/" label_images_path = "annotations/trimaps/" image_count = len([os.path.join(train_images_path, image_name) for image_name in os.listdir(train_images_path) if image_name.endswith('.jpg')]) print("用于训练的图片样本数量:", image_count) # 对数据集进行处理,划分训练集、测试集 def _sort_images(image_dir, image_type): """ 对文件夹内的图像进行按照文件名排序 """ files = [] for image_name in os.listdir(image_dir): if image_name.endswith('.{}'.format(image_type)) and not image_name.startswith('.'): files.append(os.path.join(image_dir, image_name)) return sorted(files) def write_file(mode, images, labels): with open('./{}.txt'.format(mode), 'w') as f: for i in range(len(images)): f.write('{}t{}n'.format(images[i], labels[i])) images = _sort_images(train_images_path, 'jpg') labels = _sort_images(label_images_path, 'png') eval_num = int(image_count * 0.15) write_file('train', images[:-eval_num], labels[:-eval_num]) write_file('test', images[-eval_num:], labels[-eval_num:]) write_file('predict', images[-eval_num:], labels[-eval_num:])
1234567891011121314151617181920212223242526272829303132333435with open('./train.txt', 'r') as f: i = 0 for line in f.readlines(): image_path, label_path = line.strip().split('t') image = np.array(PilImage.open(image_path)) label = np.array(PilImage.open(label_path)) if i > 2: break # 进行图片的展示 plt.figure() plt.subplot(1,2,1), plt.title('Train Image') plt.imshow(image.astype('uint8')) plt.axis('off') plt.subplot(1,2,2), plt.title('Label') plt.imshow(label.astype('uint8'), cmap='gray') plt.axis('off') plt.show() i = i + 1
12345678910111213141516171819202122232425import random from paddle.io import Dataset from paddle.vision.transforms import transforms as T class PetDataset(Dataset): """ 数据集定义 """ def __init__(self, mode='train'): """ 构造函数 """ self.image_size = IMAGE_SIZE self.mode = mode.lower() assert self.mode in ['train', 'test', 'predict'], "mode should be 'train' or 'test' or 'predict', but got {}".format(self.mode) self.train_images = [] self.label_images = [] with open('./{}.txt'.format(self.mode), 'r') as f: for line in f.readlines(): image, label = line.strip().split('t') self.train_images.append(image) self.label_images.append(label) def _load_img(self, path, color_mode='rgb', transforms=[]): """ 统一的图像处理接口封装,用于规整图像大小和通道 """ with open(path, 'rb') as f: img = PilImage.open(io.BytesIO(f.read())) if color_mode == 'grayscale': # if image is not already an 8-bit, 16-bit or 32-bit grayscale image # convert it to an 8-bit grayscale image. if img.mode not in ('L', 'I;16', 'I'): img = img.convert('L') elif color_mode == 'rgba': if img.mode != 'RGBA': img = img.convert('RGBA') elif color_mode == 'rgb': if img.mode != 'RGB': img = img.convert('RGB') else: raise ValueError('color_mode must be "grayscale", "rgb", or "rgba"') return T.Compose([ T.Resize(self.image_size) ] + transforms)(img) def __getitem__(self, idx): """ 返回 image, label """ train_image = self._load_img(self.train_images[idx], transforms=[ T.Transpose(), T.Normalize(mean=127.5, std=127.5) ]) # 加载原始图像 label_image = self._load_img(self.label_images[idx], color_mode='grayscale', transforms=[T.Grayscale()]) # 加载Label图像 # 返回image, label train_image = np.array(train_image, dtype='float32') label_image = np.array(label_image, dtype='int64') return train_image, label_image def __len__(self): """ 返回数据集总数 """ return len(self.train_images)
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576class conv_block(nn.Layer): def __init__(self, ch_in, ch_out): super(conv_block, self).__init__() self.conv = nn.Sequential( nn.Conv2D(ch_in, ch_out, kernel_size=3, stride=1, padding=1), nn.BatchNorm(ch_out), nn.ReLU(), nn.Conv2D(ch_out, ch_out, kernel_size=3, stride=1, padding=1), nn.BatchNorm(ch_out), nn.ReLU() ) def forward(self, x): x = self.conv(x) return x class up_conv(nn.Layer): def __init__(self, ch_in, ch_out): super(up_conv, self).__init__() self.up = nn.Sequential( nn.Upsample(scale_factor=2), nn.Conv2D(ch_in, ch_out, kernel_size=3, stride=1, padding=1), nn.BatchNorm(ch_out), nn.ReLU() ) def forward(self, x): x = self.up(x) return x class single_conv(nn.Layer): def __init__(self, ch_in, ch_out): super(single_conv, self).__init__() self.conv = nn.Sequential( nn.Conv2D(ch_in, ch_out, kernel_size=3, stride=1, padding=1), nn.BatchNorm(ch_out), nn.ReLU() ) def forward(self, x): x = self.conv(x) return x
12345678910111213141516171819202122232425262728293031323334353637383940414243class Attention_block(nn.Layer): def __init__(self, F_g, F_l, F_int): super(Attention_block, self).__init__() self.W_g = nn.Sequential( nn.Conv2D(F_g, F_int, kernel_size=1, stride=1, padding=0), nn.BatchNorm(F_int) ) self.W_x = nn.Sequential( nn.Conv2D(F_l, F_int, kernel_size=1, stride=1, padding=0), nn.BatchNorm(F_int) ) self.psi = nn.Sequential( nn.Conv2D(F_int, 1, kernel_size=1, stride=1, padding=0), nn.BatchNorm(1), nn.Sigmoid() ) self.relu = nn.ReLU() def forward(self, g, x): g1 = self.W_g(g) x1 = self.W_x(x) psi = self.relu(g1 + x1) psi = self.psi(psi) return x * psi
12345678910111213141516171819202122232425262728class AttU_Net(nn.Layer): def __init__(self, img_ch=3, output_ch=1): super(AttU_Net, self).__init__() self.Maxpool = nn.MaxPool2D(kernel_size=2, stride=2) self.Maxpool1 = nn.MaxPool2D(kernel_size=2, stride=2) self.Maxpool2 = nn.MaxPool2D(kernel_size=2, stride=2) self.Maxpool3 = nn.MaxPool2D(kernel_size=2, stride=2) self.Conv1 = conv_block(ch_in=img_ch, ch_out=64) self.Conv2 = conv_block(ch_in=64, ch_out=128) self.Conv3 = conv_block(ch_in=128, ch_out=256) self.Conv4 = conv_block(ch_in=256, ch_out=512) self.Conv5 = conv_block(ch_in=512, ch_out=1024) self.Up5 = up_conv(ch_in=1024, ch_out=512) self.Att5 = Attention_block(F_g=512, F_l=512, F_int=256) self.Up_conv5 = conv_block(ch_in=1024, ch_out=512) self.Up4 = up_conv(ch_in=512, ch_out=256) self.Att4 = Attention_block(F_g=256, F_l=256, F_int=128) self.Up_conv4 = conv_block(ch_in=512, ch_out=256) self.Up3 = up_conv(ch_in=256, ch_out=128) self.Att3 = Attention_block(F_g=128, F_l=128, F_int=64) self.Up_conv3 = conv_block(ch_in=256, ch_out=128) self.Up2 = up_conv(ch_in=128, ch_out=64) self.Att2 = Attention_block(F_g=64, F_l=64, F_int=32) self.Up_conv2 = conv_block(ch_in=128, ch_out=64) self.Conv_1x1 = nn.Conv2D(64, output_ch, kernel_size=1, stride=1, padding=0) def forward(self, x): # encoding path x1 = self.Conv1(x) x2 = self.Maxpool(x1) x2 = self.Conv2(x2) x3 = self.Maxpool1(x2) x3 = self.Conv3(x3) x4 = self.Maxpool2(x3) x4 = self.Conv4(x4) x5 = self.Maxpool3(x4) x5 = self.Conv5(x5) # decoding + concat path d5 = self.Up5(x5) x4 = self.Att5(g=d5, x=x4) d5 = paddle.concat(x=[x4, d5], axis=1) d5 = self.Up_conv5(d5) d4 = self.Up4(d5) x3 = self.Att4(g=d4, x=x3) d4 = paddle.concat(x=[x3, d4], axis=1) d4 = self.Up_conv4(d4) d3 = self.Up3(d4) x2 = self.Att3(g=d3, x=x2) d3 = paddle.concat(x=[x2, d3], axis=1) d3 = self.Up_conv3(d3) d2 = self.Up2(d3) x1 = self.Att2(g=d2, x=x1) d2 = paddle.concat(x=[x1, d2], axis=1) d2 = self.Up_conv2(d2) d1 = self.Conv_1x1(d2) return d1
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273num_classes = 4 network = AttU_Net(img_ch=3, output_ch=num_classes) model = paddle.Model(network) model.summary((-1, 3,) + IMAGE_SIZE) 1234
train_dataset = PetDataset(mode='train') # 训练数据集 val_dataset = PetDataset(mode='test') # 验证数据集 optim = paddle.optimizer.RMSProp(learning_rate=0.001, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False, parameters=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss(axis=1)) model.fit(train_dataset, val_dataset, epochs=15, batch_size=32, verbose=1) 123456789101112131415
predict_dataset = PetDataset(mode='predict') predict_results = model.predict(predict_dataset) plt.figure(figsize=(10, 10)) i = 0 mask_idx = 0 with open('./predict.txt', 'r') as f: for line in f.readlines(): image_path, label_path = line.strip().split('t') resize_t = T.Compose([ T.Resize(IMAGE_SIZE) ]) image = resize_t(PilImage.open(image_path)) label = resize_t(PilImage.open(label_path)) image = np.array(image).astype('uint8') label = np.array(label).astype('uint8') if i > 8: break plt.subplot(3, 3, i + 1) plt.imshow(image) plt.title('Input Image') plt.axis("off") plt.subplot(3, 3, i + 2) plt.imshow(label, cmap='gray') plt.title('Label') plt.axis("off") # 模型只有一个输出,通过predict_results[0]来取出1000个预测的结果 # 映射原始图片的index来取出预测结果,提取mask进行展示 data = predict_results[0][mask_idx][0].transpose((1, 2, 0)) mask = np.argmax(data, axis=-1) plt.subplot(3, 3, i + 3) plt.imshow(mask.astype('uint8'), cmap='gray') plt.title('Predict') plt.axis("off") i += 3 mask_idx += 1 plt.show()
123456789101112131415161718192021222324252627282930313233343536373839404142434445相关知识
基于Attention U
防止皮肤病夏天是对宠物皮肤影响很大的季节,宠物要特别注意保持清洁。 的翻译是:Prevention of skin diseases is on the pet's skin had a great influence in the summer season, pet pay special attention to keep clean. 中文翻译英文意思,翻译英语
基于Java+Vue+uniapp微信小程序宠物寄养平台设计和实现
基于Java+Vue+uniapp微信小程序宠物店商城系统设计和实现
基于大数据的宠物消费行为分析与预测.docx
基于PHP的猫咪宠物网
金丝熊眼睛红肿流泪怎么回事
基于SSH技术的宠物救助系统的设计与实现
【“摩登”宠物新风尚】库奇宠:一枚定位器带来的“颈上添花”
Nourse卫仕U系列 复合维生素片 狗狗猫咪维生素片400片力
网址: 基于Attention U https://m.mcbbbk.com/newsview81447.html
上一篇: 头脑风暴助发展丨数字赋能宠物经济 |
下一篇: 成都宠物 麻辣社区 |