Deep Dream

3 minute read

Published:

什么是Deep Dream?

DeepDream 是谷歌发布的对卷积神经网络(CNN)进行可视化的方法,当然它的用途不仅限于此,我们可以通过它让机器“做梦”,计算机将自然图像的一些特征放大,生成了它想想中的一些物体. 利用这个特点还可以生成一些从未有过的物体.

原理

卷积神经网络由于其从理论上难以解释,一直被很多学者诟病.在2013年“Visualizing and Understanding Convolutional Neural Networks”这篇文章提出了使用梯度上升的方法可视化网络每一层的特征,即用一张噪声图像输入网络,反向更新的时候不更新网络权重,而是更新初始图像的像素值,以这种“训练图像”的方式可视化网络. Deep Dream正是以此为基础.

之前说过,Deep Dream需要放大图像特征. 比如:有一个网络学习了分类猫和狗的任务,给这个网络一张云的图像,这朵云可能比较像狗,那么机器提取的特征可能也会像狗. 假设对应一个特征 $[0.6, 0.4]$, 0.6 表示图像为狗的概率, 0.4 图像表示为猫的概率,那么采用 $L_2$ 范数可以很好达到放大特征的效果. 对于这样一个特征,$L_2 = x_1^2 + x_2^2$,若 $x_1$ 越大,$x_2$ 越小,则 $L_2$ 越大,所以只需要最大化 $L_2$ 就能保证当 $x_1 > x_2$ 的时候,迭代的轮数越多,$x_1$ 越大,$x_2$ 越小,所以图像就会越来越像狗. 每次迭代相当于计算 $L_2$ 范数,然后用梯度上升的方法调整图像. 当然最开始的图像不一定是真实的图像,也可以从一张噪声图像,只不过此时生成的梦境图像会比较奇怪.

Deep Dream 的Keras实现

# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.applications import vgg16
from tensorflow.keras.layers import Input
import matplotlib.pyplot as plt
import numpy as np
import os

DATA_DIR = "../data"
img = "cat.jpg"

def preprocess(img):
    img4d = img.copy()
    img4d = img4d.astype("float64")
    img4d = np.expand_dims(img4d, axis=0)
    img4d = vgg16.preprocess_input(img4d)
    return img4d

def deprocess(img4d):
    img = img4d.copy()
    img = img.reshape((img4d.shape[1], img4d.shape[2], img4d.shape[3]))
    img[:, :, 0] += 103.939
    img[:, :, 1] += 116.779
    img[:, :, 2] += 123.68
    img = img[:, :, ::-1]
    img = np.clip(img, 0, 255).astype("uint8")
    return img

########################### main ###########################

IMAGE_FILE = os.path.join(DATA_DIR,img)

img = plt.imread(IMAGE_FILE)
plt.imshow(img)
img_copy = img.copy()
print("Original image shape:", img.shape)
p_img = preprocess(img_copy)
print("After preprocess:", p_img.shape)
d_img = deprocess(p_img)
print("After deprocess:", d_img.shape)
plt.imshow(d_img)
plt.show()

# load pretrained VGG-16
batch_shape = p_img.shape
dream = Input(batch_shape=batch_shape)
model = vgg16.VGG16(input_tensor=dream, weights="imagenet", include_top=False)

# create layer name to layer dictionary
layer_dict = {layer.name : layer for layer in model.layers}
#layer_dict

# visualize gradients at pooling layers
num_pool_layers = 5
lr = 0.01
fig, axes = plt.subplots(1, num_pool_layers, figsize=(20, 10))
for i in range(num_pool_layers):
    layer_name = "block{:d}_pool".format(i + 1)
    layer_output = layer_dict[layer_name].output
    loss = K.mean(layer_output)
    grads = K.gradients(loss, dream)[0]
    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5) * lr
    f = K.function([dream], [loss, grads])
    img_value = p_img.copy()
    loss_value, grads_value = f([img_value])
    axes[i].set_title(layer_name)
    axes[i].imshow(deprocess(grads_value))

plt.tight_layout()
plt.show()

# deep dreaming
first_layer = model.layers[-1]
input_img = first_layer.input
print(first_layer.name, first_layer.output_shape)

num_pool_layers = 5
num_iters_per_layer = 3
step = 100

for i in range(num_pool_layers):
    layer_name = "block{:d}_pool".format(i+1)
    print("Pooling Layer: {:s}".format(layer_name))
    layer_output = layer_dict[layer_name].output
    # loss function
    loss = K.mean(layer_output)
    # gradient
    grads = K.gradients(loss, dream)[0]
    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
    # optimizer
    f = K.function([dream], [loss, grads])
    img_value = p_img.copy()
    fig, axes = plt.subplots(1, num_iters_per_layer, figsize=(20, 10))
    for it in range(num_iters_per_layer):
        loss_value, grads_value = f([img_value])
        img_value += grads_value * step 
        axes[it].imshow(deprocess(img_value))
    plt.show()

# try to dream structure out of random noise
img_noise = np.random.randint(100, 150, size=(227, 227, 3), dtype=np.uint8)
print(img_noise.shape)
plt.imshow(img_noise)
plt.show()

num_pool_layers = 5
num_iters_per_layer = 3
step = 100

for i in range(num_pool_layers):
    layer_name = "block{:d}_pool".format(i+1)
    print("Pooling Layer: {:s}".format(layer_name))
    layer_output = layer_dict[layer_name].output
    # loss function
    loss = K.mean(layer_output)
    # loss = layer_output[:,:,:,24]
    # gradient
    grads = K.gradients(loss, dream)[0]
    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
    # optimizer
    f = K.function([dream], [loss, grads])
    img_value = p_img.copy()
    fig, axes = plt.subplots(1, num_iters_per_layer, figsize=(20, 10))
    for it in range(num_iters_per_layer):
        loss_value, grads_value = f([img_value])
        img_value += grads_value * step 
        axes[it].imshow(deprocess(img_value))
    plt.show()
    
# random noise with specific objective. Only do gradient ascent on
# specific label and see that this pattern shows up
num_pool_layers = 5
num_iters_per_layer = 3
step = 100

for i in range(num_pool_layers):
    layer_name = "block{:d}_pool".format(i+1)
    print("Pooling Layer: {:s}".format(layer_name))
    layer_output = layer_dict[layer_name].output
    # loss function
    loss = layer_output[:,:,:,24]
    # gradient
    grads = K.gradients(loss, dream)[0]
    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
    # optimizer
    f = K.function([dream], [loss, grads])
    img_value = p_img.copy()
    fig, axes = plt.subplots(1, num_iters_per_layer, figsize=(20, 10))
    for it in range(num_iters_per_layer):
        loss_value, grads_value = f([img_value])
        img_value += grads_value * step 
        axes[it].imshow(deprocess(img_value))
    plt.show()

运行结果如下(部分): 原图:

生成的图片: