Setup¶

import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data

import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model
import pandas as pd
from sklearn import preprocessing

%matplotlib inline

np.random.seed(1) # set a seed so that the results are consistent

Data¶

We use a binary valued occupancy dataset for this excercise. You can obtain this dataset from https://archive.ics.uci.edu/ml/datasets/Occupancy+Detection+ We will use pandas to load this into a dataframe.

df_train =  pd.read_csv('occupancy_data\\datatraining.txt')

df_train = df_train.drop(['date'], axis = 1)
df_train.head()

nTrain = df_train.shape[0]

Ytrain = np.transpose(df_train['Occupancy'].values)
Ytrain = Ytrain.reshape(1, nTrain)

df_train = df_train.drop(['Occupancy'], axis = 1)
df_train.head()

Xtrain = np.transpose(df_train.values)

Xtrain = preprocessing.normalize(Xtrain, axis = 1)

print(Xtrain.shape) ## each column now represents a data instance
print(Ytrain.shape)

(5, 8143)
(1, 8143)

df_test =  pd.read_csv('occupancy_data\\datatest.txt')

df_test = df_test.drop(['date'], axis = 1)

nTest = df_test.shape[0]

Ytest = np.transpose(df_test['Occupancy'].values)
Ytest = Ytest.reshape(1, nTest)

df_test = df_test.drop(['Occupancy'], axis = 1)
df_test.head()

Xtest = np.transpose(df_test.values)

Xtest = preprocessing.normalize(Xtest, axis = 1)

print(Xtest.shape) ## each column now represents a data instance
print(Ytest.shape)

(5, 2665)
(1, 2665)

DNN code¶

def layer_sizes(X, Y, numberHiddenLayers):
    """
    Arguments:
    X -- input dataset of shape (input size, number of examples)
    Y -- labels of shape (output size, number of examples)
    
    Returns:
    n_x -- the size of the input layer
    n_h -- the size of the hidden layer
    n_y -- the size of the output layer
    """
    
    n_x = X.shape[0] # size of input layer
    n_h = numberHiddenLayers
    n_y = Y.shape[0] # size of output layer
    
    return (n_x, n_h, n_y)

def initialize_parameters(n_x, n_h, n_y):
    """
    Argument:
    n_x -- size of the input layer
    n_h -- size of the hidden layer
    n_y -- size of the output layer
    
    Returns:
    params -- python dictionary containing your parameters:
                    W1 -- weight matrix of shape (n_h, n_x)
                    b1 -- bias vector of shape (n_h, 1)
                    W2 -- weight matrix of shape (n_y, n_h)
                    b2 -- bias vector of shape (n_y, 1)
    """
    
    np.random.seed(2) 

    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.rand(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))
        
    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def forward_propagation(X, parameters):
    """
    Argument:
    X -- input data of size (n_x, m)
    parameters -- python dictionary containing your parameters (output of initialization function)
    
    Returns:
    A2 -- The sigmoid output of the second activation
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
    """
    # Retrieve each parameter from the dictionary "parameters"
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    # Implement Forward Propagation to calculate A2 (probabilities)
    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    
    assert(A2.shape == (1, X.shape[1]))
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return A2, cache

def compute_cost(A2, Y, parameters):
    """
    Computes the cross-entropy cost given in equation (13)
    
    Arguments:
    A2 -- The sigmoid output of the second activation, of shape (1, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    parameters -- python dictionary containing your parameters W1, b1, W2 and b2
    
    Returns:
    cost -- cross-entropy cost given equation (13)
    """
    
    m = Y.shape[1] # number of example
    
    # Retrieve W1 and W2 from parameters
    W1 = parameters['W1']
    W2 = parameters['W2']
    
    # Compute the cross-entropy cost
    logprobs = np.multiply(np.log(A2), Y) + np.multiply((1 - Y), np.log(1 - A2))
    cost = - np.sum(logprobs) / m
    
    cost = np.squeeze(cost)     # makes sure cost is the dimension we expect. 
                                # E.g., turns [[17]] into 17 
    assert(isinstance(cost, float))
    
    return cost

# GRADED FUNCTION: backward_propagation

def backward_propagation(parameters, cache, X, Y):
    """
    Implement the backward propagation using the instructions above.
    
    Arguments:
    parameters -- python dictionary containing our parameters 
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
    X -- input data of shape (2, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    
    Returns:
    grads -- python dictionary containing your gradients with respect to different parameters
    """
    m = X.shape[1]
    
    # First, retrieve W1 and W2 from the dictionary "parameters".
    W1 = parameters['W1']
    W2 = parameters['W2']
        
    # Retrieve also A1 and A2 from dictionary "cache".
    A1 = cache['A1']
    A2 = cache['A2']
    
    # Backward propagation: calculate dW1, db1, dW2, db2. 
    dZ2= A2 - Y
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads

def update_parameters(parameters, grads, learning_rate=1.2):
    """
    Updates parameters using the gradient descent update rule given above
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    grads -- python dictionary containing your gradients 
    
    Returns:
    parameters -- python dictionary containing your updated parameters 
    """
    # Retrieve each parameter from the dictionary "parameters"
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
   
    # Retrieve each gradient from the dictionary "grads"
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']
    
    # Update rule for each parameter
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False):
    """
    Arguments:
    X -- dataset of shape (2, number of examples)
    Y -- labels of shape (1, number of examples)
    n_h -- size of the hidden layer
    num_iterations -- Number of iterations in gradient descent loop
    print_cost -- if True, print the cost every 1000 iterations
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    np.random.seed(3)
    n_x = layer_sizes(X, Y, n_h)[0]
    n_y = layer_sizes(X, Y, n_h)[2]
    
    # Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters".
    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
   
    # Loop (gradient descent)

    for i in range(0, num_iterations):
        ## Get the next batch of images to train on
        ## print("Iteration :" + str(i))
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A2, cache = forward_propagation(X, parameters)
        
        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = compute_cost(A2, Y, parameters)
 
        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters, cache, X, Y)
 
        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters, grads)
        
        # Print the cost every 1000 iterations
        if print_cost and i % 1000 == 0:
            print ("Cost after iteration %i: %f" % (i, cost))

    return parameters

parameters = nn_model(Xtrain, Ytrain, 5, num_iterations=10000, print_cost=True)

Cost after iteration 0: 0.693147
Cost after iteration 1000: 0.512973
Cost after iteration 2000: 0.420606
Cost after iteration 3000: 0.326401
Cost after iteration 4000: 0.264507
Cost after iteration 5000: 0.226376
Cost after iteration 6000: 0.065537
Cost after iteration 7000: 0.064785
Cost after iteration 8000: 0.064379
Cost after iteration 9000: 0.063994

def predict(parameters, X):
    """
    Using the learned parameters, predicts a class for each example in X
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    X -- input data of size (n_x, m)
    
    Returns
    predictions -- vector of predictions of our model (red: 0 / blue: 1)
    """
    
    # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
    ### START CODE HERE ### (≈ 2 lines of code)
    A2, cache = forward_propagation(X, parameters)
    predictions = np.round(A2)
    ### END CODE HERE ###
    
    return predictions

predictions = predict(parameters, Xtest)

predictions

array([[ 1.,  1.,  1., ...,  1.,  1.,  1.]])

print ('Accuracy: %d' % float((np.dot(Ytest,predictions.T) + np.dot(1-Ytest,1-predictions.T))/float(Ytest.size)*100) + '%')

Accuracy: 97%

Setup¶

import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data

import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model
import pandas as pd
from sklearn import preprocessing

%matplotlib inline

np.random.seed(1) # set a seed so that the results are consistent

Network using Tensorflow¶

Define the variables and placeholders¶

tf.reset_default_graph()

X = tf.placeholder(tf.float32,shape=[5,None])

W1 = tf.Variable(tf.random_normal([5,5])) 

b1 = tf.Variable(tf.zeros([5, 1]))

W2 = tf.Variable(tf.random_normal([1,5]))

b2 = tf.Variable(tf.zeros([1, 1]))

y_true = tf.placeholder(tf.float32,[1,None])

Create the Graph¶

h = tf.matmul(W1, X) + b1 
h = tf.nn.tanh(h)
y = tf.matmul(W2, h) + b2
y = tf.sigmoid(y)

Define the loss and optimizer¶

loss = tf.reduce_mean(-y_true * tf.log(y) - (1 - y_true) * tf.log(1 - y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.5)

train = optimizer.minimize(loss)

Train the model¶

# Train the model

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    # Train the model for 1000 steps on the training set
    # Using built in batch feeder from mnist for convenience
    
    for step in range(10000):
        _,cost = sess.run([train, loss] , feed_dict={X:Xtrain, y_true:Ytrain})
        if (step % 1000 == 0):
            print("Cost after iteration %i: %f" % (step, cost))

    ypred = y.eval({X: Xtest})
    pred = np.round(ypred)
    correct_predictions = np.equal(pred, Ytest)
    print("\nAccuracy:", np.sum(correct_predictions)/Xtest.shape[1])

Cost after iteration 0: 0.690228
Cost after iteration 1000: 0.430140
Cost after iteration 2000: 0.450875
Cost after iteration 3000: 0.398449
Cost after iteration 4000: 0.301684
Cost after iteration 5000: 0.075488
Cost after iteration 6000: 0.068829
Cost after iteration 7000: 0.066808
Cost after iteration 8000: 0.065850
Cost after iteration 9000: 0.065270

Accuracy: 0.971857410882

	Temperature	Humidity	Light	CO2	HumidityRatio	Occupancy
1	23.18	27.2720	426.0	721.25	0.004793	1
2	23.15	27.2675	429.5	714.00	0.004783	1
3	23.15	27.2450	426.0	713.50	0.004779	1
4	23.15	27.2000	426.0	708.25	0.004772	1
5	23.10	27.2000	426.0	704.50	0.004757	1

	Temperature	Humidity	Light	CO2	HumidityRatio
1	23.18	27.2720	426.0	721.25	0.004793
2	23.15	27.2675	429.5	714.00	0.004783
3	23.15	27.2450	426.0	713.50	0.004779
4	23.15	27.2000	426.0	708.25	0.004772
5	23.10	27.2000	426.0	704.50	0.004757

	Temperature	Humidity	Light	CO2	HumidityRatio
140	23.7000	26.272	585.200000	749.200000	0.004764
141	23.7180	26.290	578.400000	760.400000	0.004773
142	23.7300	26.230	572.666667	769.666667	0.004765
143	23.7225	26.125	493.750000	774.750000	0.004744
144	23.7540	26.200	488.600000	779.000000	0.004767

SystemAntics: DNNs, ML, Systems and Code...

Sunday, December 16, 2018

Overview of Forward and Backward Propagation in Convolutional Neural Networks

Thursday, December 13, 2018

DNN code from scratch

Setup¶

Data¶

DNN code¶

Setup¶

Network using Tensorflow¶

Define the variables and placeholders¶

Create the Graph¶

Define the loss and optimizer¶

Train the model¶