|
| 1 | +from __future__ import print_function |
| 2 | + |
| 3 | +import warnings as w |
| 4 | +w.simplefilter(action = 'ignore') |
| 5 | +import argparse |
| 6 | +import numpy as np |
| 7 | +import matplotlib.pyplot as plt |
| 8 | +from scipy.stats import norm |
| 9 | + |
| 10 | +from keras.layers import Input, Dense, Lambda |
| 11 | +from keras.models import Model |
| 12 | +from keras import backend as K |
| 13 | +from keras import metrics |
| 14 | +import os |
| 15 | +from PIL import Image |
| 16 | +import cv2 |
| 17 | + |
| 18 | + |
| 19 | +batch_size = 100 |
| 20 | +original_dim = 12288 |
| 21 | +latent_dim = 2 |
| 22 | +intermediate_dim = 256 |
| 23 | +epochs = 500 |
| 24 | +epsilon_std = 1.0 |
| 25 | + |
| 26 | + |
| 27 | +parser = argparse.ArgumentParser(description='Variational Autoencoder') |
| 28 | +parser.add_argument('--input',type=str,required=True,help='Directory containing images eg: data/') |
| 29 | +parser.add_argument('--epoch',type=int,default=500,help='No of training iterations') |
| 30 | +parser.add_argument('--batch',type=int,default=100,help='Batch Size') |
| 31 | +parser.add_argument('--inter_dim',type=int,default=256,help='Dimension of Intermediate Layer') |
| 32 | +args = parser.parse_args() |
| 33 | + |
| 34 | +epochs = args.epoch |
| 35 | +intermediate_dim = args.inter_dim |
| 36 | + |
| 37 | +x = Input(shape=(original_dim,)) |
| 38 | +h = Dense(intermediate_dim, activation='relu')(x) |
| 39 | +z_mean = Dense(latent_dim)(h) |
| 40 | +z_log_var = Dense(latent_dim)(h) |
| 41 | + |
| 42 | + |
| 43 | +def sampling(args): |
| 44 | + z_mean, z_log_var = args |
| 45 | + epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., |
| 46 | + stddev=epsilon_std) |
| 47 | + return z_mean + K.exp(z_log_var / 2) * epsilon |
| 48 | + |
| 49 | +# note that "output_shape" isn't necessary with the TensorFlow backend |
| 50 | +z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) |
| 51 | + |
| 52 | +# we instantiate these layers separately so as to reuse them later |
| 53 | +decoder_h = Dense(intermediate_dim, activation='relu') |
| 54 | +decoder_mean = Dense(original_dim, activation='sigmoid') |
| 55 | +h_decoded = decoder_h(z) |
| 56 | +x_decoded_mean = decoder_mean(h_decoded) |
| 57 | + |
| 58 | + |
| 59 | + |
| 60 | + |
| 61 | +# train the VAE on MNIST digits |
| 62 | +#(x_train, y_train), (x_test, y_test) = mnist.load_data() |
| 63 | + |
| 64 | +y_train = [] |
| 65 | +y_test =[] |
| 66 | + |
| 67 | +for each in os.listdir(args.input): |
| 68 | + img = cv2.imread(os.path.join(args.input,each)) |
| 69 | + np_im = np.array(img) |
| 70 | + y_train.append(img) |
| 71 | + y_test.append(img) |
| 72 | + |
| 73 | +x_train=np.array(y_train) |
| 74 | +x_test=np.array(y_test) |
| 75 | + |
| 76 | +print(x_train.shape) |
| 77 | + |
| 78 | +x_train = x_train.astype('float32') / 255. |
| 79 | +x_test = x_test.astype('float32') / 255. |
| 80 | +x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) |
| 81 | +x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) |
| 82 | + |
| 83 | +# instantiate VAE model |
| 84 | +vae = Model(x, x_decoded_mean) |
| 85 | + |
| 86 | +# Compute VAE loss |
| 87 | +xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean) |
| 88 | +kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) |
| 89 | +vae_loss = K.mean(xent_loss + kl_loss) |
| 90 | + |
| 91 | +vae.add_loss(vae_loss) |
| 92 | +vae.compile(optimizer='rmsprop') |
| 93 | + |
| 94 | +vae.summary() |
| 95 | + |
| 96 | +vae.fit(x_train, |
| 97 | + shuffle=True, |
| 98 | + epochs=epochs, |
| 99 | + batch_size=batch_size, |
| 100 | + validation_data=(x_test, None)) |
| 101 | + |
| 102 | +# build a model to project inputs on the latent space |
| 103 | +encoder = Model(x, z_mean) |
| 104 | + |
| 105 | +x_test_encoded = encoder.predict(x_test, batch_size=batch_size) |
| 106 | + |
| 107 | + |
| 108 | +# build a image generator that can sample from the learned distribution |
| 109 | +decoder_input = Input(shape=(latent_dim,)) |
| 110 | +_h_decoded = decoder_h(decoder_input) |
| 111 | +_x_decoded_mean = decoder_mean(_h_decoded) |
| 112 | +generator = Model(decoder_input, _x_decoded_mean) |
| 113 | + |
| 114 | + |
| 115 | + |
| 116 | +n = 20 |
| 117 | +digit_size = 64 |
| 118 | +figure = np.zeros((digit_size * n, digit_size * n, 3 * n)) |
| 119 | +# linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian |
| 120 | +# to produce values of the latent variables z, since the prior of the latent space is Gaussian |
| 121 | +grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) |
| 122 | +grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) |
| 123 | + |
| 124 | + |
| 125 | +for i, yi in enumerate(grid_x): |
| 126 | + for j, xi in enumerate(grid_y): |
| 127 | + z_sample = np.array([[xi, yi]]) |
| 128 | + x_decoded = generator.predict(z_sample) |
| 129 | + newimage = x_decoded[0].reshape(digit_size, digit_size, 3) |
| 130 | + img_cv = cv2.resize(newimage,(256,256)) |
| 131 | + cv2.imshow('Color image', img_cv) |
| 132 | + cv2.waitKey(0) |
0 commit comments