到目前为止,在CS231N中,我们探索的神经网络的所有应用都是具有判别能力的模型,该模型接受输入并经过训练以产生标记的输出。 范围从简单的图像类别分类到句子生成(仍然被称为分类问题,我们的标签在词汇空间中,并且我们学会了重复捕获多词标签)。 在本笔记本中,我们将扩展功能范围,并使用神经网络构建生成模型。 具体来说,我们将学习如何构建模型,以生成类似于一组训练图像的新颖图像。


2014年,Goodfellow等人。 提出了一种用于训练生成模型的方法,称为生成对抗网络(简称GAN)。 在GAN中,我们建立了两个不同的神经网络。 我们的第一个网络是传统的分类网络,称为鉴别器。 我们将训练鉴别器拍摄图像,并将其分类为真实的(属于训练集)或伪造的(不在训练集中)。 我们称为发电机的另一个网络将随机噪声作为输入,并使用神经网络对其进行转换以生成图像。 生成器的目的是使鉴别器误以为生成的图像是真实的。

import torch import torch.nn as nn from torch.nn import init import torchvision import torchvision.transforms as T import torch.optim as optim from torch.utils.data import DataLoader from torch.utils.data import sampler import torchvision.datasets as dset import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec %matplotlib inline plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' def show_images(images): images = np.reshape(images, [images.shape[0], -1]) # images reshape to (batch_size, D) sqrtn = int(np.ceil(np.sqrt(images.shape[0]))) sqrtimg = int(np.ceil(np.sqrt(images.shape[1]))) fig = plt.figure(figsize=(sqrtn, sqrtn)) gs = gridspec.GridSpec(sqrtn, sqrtn) gs.update(wspace=0.05, hspace=0.05) for i, img in enumerate(images): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(img.reshape([sqrtimg,sqrtimg])) return def preprocess_img(x): return 2 * x - 1.0 def deprocess_img(x): return (x + 1.0) / 2.0 def rel_error(x,y): return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) def count_params(model): """Count the number of parameters in the current TensorFlow graph """ param_count = np.sum([np.prod(p.size()) for p in model.parameters()]) return param_count answers = dict(np.load('gan-checks-tf.npz'))


众所周知,GAN具有超参数的挑剔性,并且还需要很多训练集。 为了使这种分配在没有GPU的情况下可以实现,我们将研究MNIST数据集,该数据集是60,000个训练图像和10,000个测试图像。 每张图片在黑色背景(0到9)上包含白色数字的居中图像。 这是用于训练卷积神经网络的首批数据集之一,而且非常简单-标准的CNN模型可以轻松超过99%的准确性。

class ChunkSampler(sampler.Sampler): """Samples elements sequentially from some offset. Arguments: num_samples: # of desired datapoints start: offset where we should start selecting from """ def __init__(self, num_samples, start=0): self.num_samples = num_samples self.start = start def __iter__(self): return iter(range(self.start, self.start + self.num_samples)) def __len__(self): return self.num_samples NUM_TRAIN = 50000 NUM_VAL = 5000 NOISE_DIM = 96 batch_size = 128 mnist_train = dset.MNIST('./cs231n/datasets/MNIST_data', train=True, download=True, transform=T.ToTensor()) loader_train = DataLoader(mnist_train, batch_size=batch_size, sampler=ChunkSampler(NUM_TRAIN, 0)) mnist_val = dset.MNIST('./cs231n/datasets/MNIST_data', train=True, download=True, transform=T.ToTensor()) loader_val = DataLoader(mnist_val, batch_size=batch_size, sampler=ChunkSampler(NUM_VAL, NUM_TRAIN)) imgs = loader_train.__iter__().next()[0].view(batch_size, 784).numpy().squeeze() show_images(imgs)


def sample_noise(batch_size, dim): """ Generate a PyTorch Tensor of uniform random noise. Input: - batch_size: Integer giving the batch size of noise to generate. - dim: Integer giving the dimension of noise to generate. Output: - A PyTorch Tensor of shape (batch_size, dim) containing uniform random noise in the range (-1, 1). """ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** return torch.rand(batch_size, dim)*2 - 1. # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** def test_sample_noise(): batch_size = 3 dim = 4 torch.manual_seed(231) z = sample_noise(batch_size, dim) np_z = z.cpu().numpy() assert np_z.shape == (batch_size, dim) assert torch.is_tensor(z) assert np.all(np_z >= -1.0) and np.all(np_z <= 1.0) assert np.any(np_z < 0.0) and np.any(np_z > 0.0) print('All tests passed!') test_sample_noise()


class Flatten(nn.Module): def forward(self, x): N, C, H, W = x.size() # read in N, C, H, W return x.view(N, -1) # "flatten" the C * H * W values into a single vector per image class Unflatten(nn.Module): """ An Unflatten module receives an input of shape (N, C*H*W) and reshapes it to produce an output of shape (N, C, H, W). """ def __init__(self, N=-1, C=128, H=7, W=7): super(Unflatten, self).__init__() self.N = N self.C = C self.H = H self.W = W def forward(self, x): return x.view(self.N, self.C, self.H, self.W) def initialize_weights(m): if isinstance(m, nn.Linear) or isinstance(m, nn.ConvTranspose2d): init.xavier_uniform_(m.weight.data)


#dtype = torch.FloatTensor dtype = torch.cuda.FloatTensor ## UNCOMMENT THIS LINE IF YOU'RE ON A GPU! 12

我们的第一步是建立一个鉴别器。 在下面的函数中,将架构作为nn.Sequential构造函数的一部分进行填写。 所有完全连接的层应包括偏置项。 该体系结构是

2.含alpha 0.01的LeakyReLU
4.含alpha 0.01的LeakyReLU
回想一下,漏泄的ReLU非线性对于某些固定常数计算()= max(,); 对于以上架构中的LeakyReLU非线性,我们将 = 0.01设置为。

def discriminator(): """ Build and return a PyTorch model implementing the architecture above. """ model = nn.Sequential( # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** Flatten(), nn.Linear(784, 256), nn.LeakyReLU(0.01), nn.Linear(256, 256), nn.LeakyReLU(0.01), nn.Linear(256, 1), # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ) return model def test_discriminator(true_count=267009): model = discriminator() cur_count = count_params(model) if cur_count != true_count: print('Incorrect number of parameters in discriminator. Check your achitecture.') else: print('Correct number of parameters in discriminator.') test_discriminator()



def generator(noise_dim=NOISE_DIM): """ Build and return a PyTorch model implementing the architecture above. """ model = nn.Sequential( # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** nn.Linear(noise_dim, 1024), nn.ReLU(), nn.Linear(1024, 1024), nn.ReLU(), nn.Linear(1024, 784), nn.Tanh(), # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ) return model def test_generator(true_count=1858320): model = generator(4) cur_count = count_params(model) if cur_count != true_count: print('Incorrect number of parameters in generator. Check your achitecture.') else: print('Correct number of parameters in generator.') test_generator()


计算生成器和鉴别器损耗。 生成器损耗为:
ℓ= −∼()[log(())]


def bce_loss(input, target): """ Numerically stable version of the binary cross-entropy loss function. As per https://github.com/pytorch/pytorch/issues/751 See the TensorFlow docs for a derivation of this formula: https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits Inputs: - input: PyTorch Tensor of shape (N, ) giving scores. - target: PyTorch Tensor of shape (N,) containing 0 and 1 giving targets. Returns: - A PyTorch Tensor containing the mean BCE loss over the minibatch of input data. """ neg_abs = - input.abs() loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log() return loss.mean() def discriminator_loss(logits_real, logits_fake): """ Computes the discriminator loss described above. Inputs: - logits_real: PyTorch Tensor of shape (N,) giving scores for the real data. - logits_fake: PyTorch Tensor of shape (N,) giving scores for the fake data. Returns: - loss: PyTorch Tensor containing (scalar) the loss for the discriminator. """ loss = bce_loss(logits_real, torch.ones(logits_real.size()).type(dtype))+bce_loss(logits_fake, torch.zeros(logits_fake.size()).type(dtype)) return loss def generator_loss(logits_fake): """ Computes the generator loss described above. Inputs: - logits_fake: PyTorch Tensor of shape (N,) giving scores for the fake data. Returns: - loss: PyTorch Tensor containing the (scalar) loss for the generator. """ loss = bce_loss(logits_fake, torch.ones(logits_fake.size()).type(dtype)) return loss def test_discriminator_loss(logits_real, logits_fake, d_loss_true): d_loss = discriminator_loss(torch.Tensor(logits_real).type(dtype), torch.Tensor(logits_fake).type(dtype)).cpu().numpy() print("Maximum error in d_loss: %g"%rel_error(d_loss_true, d_loss)) test_discriminator_loss(answers['logits_real'], answers['logits_fake'], answers['d_loss_true']) def test_generator_loss(logits_fake, g_loss_true): g_loss = generator_loss(torch.Tensor(logits_fake).type(dtype)).cpu().numpy() print("Maximum error in g_loss: %g"%rel_error(g_loss_true, g_loss)) test_generator_loss(answers['logits_fake'], answers['g_loss_true'])



def get_optimizer(model): """ Construct and return an Adam optimizer for the model with learning rate 1e-3, beta1=0.5, and beta2=0.999. Input: - model: A PyTorch model that we want to optimize. Returns: - An Adam optimizer for the model with the desired hyperparameters. """ optimizer = optim.Adam(model.parameters(), lr = 1e-3, betas = (0.5, 0.999)) return optimizer 12345678910111213


def run_a_gan(D, G, D_solver, G_solver, discriminator_loss, generator_loss, show_every=250, batch_size=128, noise_size=96, num_epochs=10): """ Train a GAN! Inputs: - D, G: PyTorch models for the discriminator and generator - D_solver, G_solver: torch.optim Optimizers to use for training the discriminator and generator. - discriminator_loss, generator_loss: Functions to use for computing the generator and discriminator loss, respectively. - show_every: Show samples after every show_every iterations. - batch_size: Batch size to use for training. - noise_size: Dimension of the noise to use as input to the generator. - num_epochs: Number of epochs over the training dataset to use for training. """ iter_count = 0 for epoch in range(num_epochs): for x, _ in loader_train: if len(x) != batch_size: continue D_solver.zero_grad() real_data = x.type(dtype) logits_real = D(2* (real_data - 0.5)).type(dtype) g_fake_seed = sample_noise(batch_size, noise_size).type(dtype) fake_images = G(g_fake_seed).detach() logits_fake = D(fake_images.view(batch_size, 1, 28, 28)) d_total_error = discriminator_loss(logits_real, logits_fake) d_total_error.backward() D_solver.step() G_solver.zero_grad() g_fake_seed = sample_noise(batch_size, noise_size).type(dtype) fake_images = G(g_fake_seed) gen_logits_fake = D(fake_images.view(batch_size, 1, 28, 28)) g_error = generator_loss(gen_logits_fake) g_error.backward() G_solver.step() if (iter_count % show_every == 0): print('Iter: {}, D: {:.4}, G:{:.4}'.format(iter_count,d_total_error.item(),g_error.item())) imgs_numpy = fake_images.data.cpu().numpy() show_images(imgs_numpy[0:16]) plt.show() print() iter_count += 1


# Make the discriminator D = discriminator().type(dtype) # Make the generator G = generator().type(dtype) # Use the function you wrote earlier to get optimizers for the Discriminator and the Generator D_solver = get_optimizer(D) G_solver = get_optimizer(G) # Run it! run_a_gan(D, G, D_solver, G_solver, discriminator_loss, generator_loss) 1234567891011

现在我们来看最小二乘GAN,它是对原始GAN损失函数的更新,更稳定的替代方法。 对于这一部分,我们要做的就是更改损失函数并重新训练模型。 我们将在本文中用生成器损耗实现方程式:
ℓ= 12∼()[((())-1)2]

ℓ= 12∼data [(()-1)2] + 12∼()[((()))2]

def ls_discriminator_loss(scores_real, scores_fake): """ Compute the Least-Squares GAN loss for the discriminator. Inputs: - scores_real: PyTorch Tensor of shape (N,) giving scores for the real data. - scores_fake: PyTorch Tensor of shape (N,) giving scores for the fake data. Outputs: - loss: A PyTorch Tensor containing the loss. """ loss = None # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** loss = (scores_real - 1).pow(2).mean() + (scores_fake).pow(2).mean() loss /= 2 # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** return loss def ls_generator_loss(scores_fake): """ Computes the Least-Squares GAN loss for the generator. Inputs: - scores_fake: PyTorch Tensor of shape (N,) giving scores for the fake data. Outputs: - loss: A PyTorch Tensor containing the loss. """ loss = (scores_fake - 1).pow(2).mean()/2 return loss def test_lsgan_loss(score_real, score_fake, d_loss_true, g_loss_true): score_real = torch.Tensor(score_real).type(dtype) score_fake = torch.Tensor(score_fake).type(dtype) d_loss = ls_discriminator_loss(score_real, score_fake).cpu().numpy() g_loss = ls_generator_loss(score_fake).cpu().numpy() print("Maximum error in d_loss: %g"%rel_error(d_loss_true, d_loss)) print("Maximum error in g_loss: %g"%rel_error(g_loss_true, g_loss)) test_lsgan_loss(answers['logits_real'], answers['logits_fake'], answers['d_loss_lsgan_true'], answers['g_loss_lsgan_true']) D_LS = discriminator().type(dtype) G_LS = generator().type(dtype) D_LS_solver = get_optimizer(D_LS) G_LS_solver = get_optimizer(G_LS) run_a_gan(D_LS, G_LS, D_LS_solver, G_LS_solver, ls_discriminator_loss, ls_generator_loss)



