Setup¶
In [381]:
import tensorflow as tf
In [382]:
from tensorflow.examples.tutorials.mnist import input_data
In [383]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model
import pandas as pd
from sklearn import preprocessing
%matplotlib inline
np.random.seed(1) # set a seed so that the results are consistent
Data¶
We use a binary valued occupancy dataset for this excercise. You can obtain this dataset from https://archive.ics.uci.edu/ml/datasets/Occupancy+Detection+ We will use pandas to load this into a dataframe.
In [384]:
df_train = pd.read_csv('occupancy_data\\datatraining.txt')
In [385]:
df_train = df_train.drop(['date'], axis = 1)
df_train.head()
Out[385]:
In [386]:
nTrain = df_train.shape[0]
In [387]:
Ytrain = np.transpose(df_train['Occupancy'].values)
Ytrain = Ytrain.reshape(1, nTrain)
In [388]:
df_train = df_train.drop(['Occupancy'], axis = 1)
df_train.head()
Out[388]:
In [389]:
Xtrain = np.transpose(df_train.values)
In [390]:
Xtrain = preprocessing.normalize(Xtrain, axis = 1)
In [391]:
print(Xtrain.shape) ## each column now represents a data instance
print(Ytrain.shape)
In [392]:
df_test = pd.read_csv('occupancy_data\\datatest.txt')
In [393]:
df_test = df_test.drop(['date'], axis = 1)
In [394]:
nTest = df_test.shape[0]
In [395]:
Ytest = np.transpose(df_test['Occupancy'].values)
Ytest = Ytest.reshape(1, nTest)
In [396]:
df_test = df_test.drop(['Occupancy'], axis = 1)
df_test.head()
Out[396]:
In [397]:
Xtest = np.transpose(df_test.values)
In [398]:
Xtest = preprocessing.normalize(Xtest, axis = 1)
In [399]:
print(Xtest.shape) ## each column now represents a data instance
print(Ytest.shape)
DNN code¶
In [400]:
def layer_sizes(X, Y, numberHiddenLayers):
"""
Arguments:
X -- input dataset of shape (input size, number of examples)
Y -- labels of shape (output size, number of examples)
Returns:
n_x -- the size of the input layer
n_h -- the size of the hidden layer
n_y -- the size of the output layer
"""
n_x = X.shape[0] # size of input layer
n_h = numberHiddenLayers
n_y = Y.shape[0] # size of output layer
return (n_x, n_h, n_y)
In [401]:
def initialize_parameters(n_x, n_h, n_y):
"""
Argument:
n_x -- size of the input layer
n_h -- size of the hidden layer
n_y -- size of the output layer
Returns:
params -- python dictionary containing your parameters:
W1 -- weight matrix of shape (n_h, n_x)
b1 -- bias vector of shape (n_h, 1)
W2 -- weight matrix of shape (n_y, n_h)
b2 -- bias vector of shape (n_y, 1)
"""
np.random.seed(2)
W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros((n_h, 1))
W2 = np.random.rand(n_y, n_h) * 0.01
b2 = np.zeros((n_y, 1))
assert (W1.shape == (n_h, n_x))
assert (b1.shape == (n_h, 1))
assert (W2.shape == (n_y, n_h))
assert (b2.shape == (n_y, 1))
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2}
return parameters
In [402]:
def sigmoid(x):
return 1/(1 + np.exp(-x))
In [403]:
def forward_propagation(X, parameters):
"""
Argument:
X -- input data of size (n_x, m)
parameters -- python dictionary containing your parameters (output of initialization function)
Returns:
A2 -- The sigmoid output of the second activation
cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
"""
# Retrieve each parameter from the dictionary "parameters"
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
# Implement Forward Propagation to calculate A2 (probabilities)
Z1 = np.dot(W1, X) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = sigmoid(Z2)
assert(A2.shape == (1, X.shape[1]))
cache = {"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2}
return A2, cache
In [404]:
def compute_cost(A2, Y, parameters):
"""
Computes the cross-entropy cost given in equation (13)
Arguments:
A2 -- The sigmoid output of the second activation, of shape (1, number of examples)
Y -- "true" labels vector of shape (1, number of examples)
parameters -- python dictionary containing your parameters W1, b1, W2 and b2
Returns:
cost -- cross-entropy cost given equation (13)
"""
m = Y.shape[1] # number of example
# Retrieve W1 and W2 from parameters
W1 = parameters['W1']
W2 = parameters['W2']
# Compute the cross-entropy cost
logprobs = np.multiply(np.log(A2), Y) + np.multiply((1 - Y), np.log(1 - A2))
cost = - np.sum(logprobs) / m
cost = np.squeeze(cost) # makes sure cost is the dimension we expect.
# E.g., turns [[17]] into 17
assert(isinstance(cost, float))
return cost
In [405]:
# GRADED FUNCTION: backward_propagation
def backward_propagation(parameters, cache, X, Y):
"""
Implement the backward propagation using the instructions above.
Arguments:
parameters -- python dictionary containing our parameters
cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
X -- input data of shape (2, number of examples)
Y -- "true" labels vector of shape (1, number of examples)
Returns:
grads -- python dictionary containing your gradients with respect to different parameters
"""
m = X.shape[1]
# First, retrieve W1 and W2 from the dictionary "parameters".
W1 = parameters['W1']
W2 = parameters['W2']
# Retrieve also A1 and A2 from dictionary "cache".
A1 = cache['A1']
A2 = cache['A2']
# Backward propagation: calculate dW1, db1, dW2, db2.
dZ2= A2 - Y
dW2 = (1 / m) * np.dot(dZ2, A1.T)
db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
dW1 = (1 / m) * np.dot(dZ1, X.T)
db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
grads = {"dW1": dW1,
"db1": db1,
"dW2": dW2,
"db2": db2}
return grads
In [406]:
def update_parameters(parameters, grads, learning_rate=1.2):
"""
Updates parameters using the gradient descent update rule given above
Arguments:
parameters -- python dictionary containing your parameters
grads -- python dictionary containing your gradients
Returns:
parameters -- python dictionary containing your updated parameters
"""
# Retrieve each parameter from the dictionary "parameters"
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
# Retrieve each gradient from the dictionary "grads"
dW1 = grads['dW1']
db1 = grads['db1']
dW2 = grads['dW2']
db2 = grads['db2']
# Update rule for each parameter
W1 = W1 - learning_rate * dW1
b1 = b1 - learning_rate * db1
W2 = W2 - learning_rate * dW2
b2 = b2 - learning_rate * db2
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2}
return parameters
In [407]:
def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False):
"""
Arguments:
X -- dataset of shape (2, number of examples)
Y -- labels of shape (1, number of examples)
n_h -- size of the hidden layer
num_iterations -- Number of iterations in gradient descent loop
print_cost -- if True, print the cost every 1000 iterations
Returns:
parameters -- parameters learnt by the model. They can then be used to predict.
"""
np.random.seed(3)
n_x = layer_sizes(X, Y, n_h)[0]
n_y = layer_sizes(X, Y, n_h)[2]
# Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters".
parameters = initialize_parameters(n_x, n_h, n_y)
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
# Loop (gradient descent)
for i in range(0, num_iterations):
## Get the next batch of images to train on
## print("Iteration :" + str(i))
# Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
A2, cache = forward_propagation(X, parameters)
# Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
cost = compute_cost(A2, Y, parameters)
# Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
grads = backward_propagation(parameters, cache, X, Y)
# Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
parameters = update_parameters(parameters, grads)
# Print the cost every 1000 iterations
if print_cost and i % 1000 == 0:
print ("Cost after iteration %i: %f" % (i, cost))
return parameters
In [408]:
parameters = nn_model(Xtrain, Ytrain, 5, num_iterations=10000, print_cost=True)
In [648]:
def predict(parameters, X):
"""
Using the learned parameters, predicts a class for each example in X
Arguments:
parameters -- python dictionary containing your parameters
X -- input data of size (n_x, m)
Returns
predictions -- vector of predictions of our model (red: 0 / blue: 1)
"""
# Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
### START CODE HERE ### (≈ 2 lines of code)
A2, cache = forward_propagation(X, parameters)
predictions = np.round(A2)
### END CODE HERE ###
return predictions
In [649]:
predictions = predict(parameters, Xtest)
In [650]:
predictions
Out[650]:
In [654]:
print ('Accuracy: %d' % float((np.dot(Ytest,predictions.T) + np.dot(1-Ytest,1-predictions.T))/float(Ytest.size)*100) + '%')
In [ ]:
Below is a TensorFlow of the same network a above. Observe that most of the heavy-lifting is done by TensorFlow and the amount of code reduces significantly. Also note that we only define the layers and the forward prop equations, TF takes care of backprop on its own.
Setup¶
In [134]:
import tensorflow as tf
In [135]:
from tensorflow.examples.tutorials.mnist import input_data
In [136]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model
import pandas as pd
from sklearn import preprocessing
%matplotlib inline
np.random.seed(1) # set a seed so that the results are consistent
Network using Tensorflow¶
Define the variables and placeholders¶
In [153]:
tf.reset_default_graph()
In [176]:
X = tf.placeholder(tf.float32,shape=[5,None])
W1 = tf.Variable(tf.random_normal([5,5]))
b1 = tf.Variable(tf.zeros([5, 1]))
W2 = tf.Variable(tf.random_normal([1,5]))
b2 = tf.Variable(tf.zeros([1, 1]))
y_true = tf.placeholder(tf.float32,[1,None])
Create the Graph¶
In [177]:
h = tf.matmul(W1, X) + b1
h = tf.nn.tanh(h)
y = tf.matmul(W2, h) + b2
y = tf.sigmoid(y)
Define the loss and optimizer¶
In [192]:
loss = tf.reduce_mean(-y_true * tf.log(y) - (1 - y_true) * tf.log(1 - y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.5)
train = optimizer.minimize(loss)
Train the model¶
In [196]:
# Train the model
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Train the model for 1000 steps on the training set
# Using built in batch feeder from mnist for convenience
for step in range(10000):
_,cost = sess.run([train, loss] , feed_dict={X:Xtrain, y_true:Ytrain})
if (step % 1000 == 0):
print("Cost after iteration %i: %f" % (step, cost))
ypred = y.eval({X: Xtest})
pred = np.round(ypred)
correct_predictions = np.equal(pred, Ytest)
print("\nAccuracy:", np.sum(correct_predictions)/Xtest.shape[1])
In [ ]:
References
https://www.coursera.org/learn/neural-networks-deep-learning?specialization=deep-learning
https://github.com/cs231n/cs231n.github.io
No comments:
Post a Comment