Last active
December 1, 2019 20:49
-
-
Save Asynchronousx/45f061024b958af76b1247172118c577 to your computer and use it in GitHub Desktop.
Tensorflow Digit Classifier
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import tensorflow as tf | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import os | |
| #optional: suppress some CPU warnings | |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' | |
| # Here, we're acceding throu keras to datasets presents into the lib | |
| mnist = tf.keras.datasets.mnist #28x28 images of handwritten digits | |
| #Assigning tuples of features and labels to variables throu load data function: | |
| #it returns test and train both for features and labels. | |
| (x_train, y_train), (x_test, y_test) = mnist.load_data() | |
| #plotting the first train feature as a binary (bw) image. | |
| plt.imshow(x_train[0], cmap = plt.cm.binary) | |
| plt.show() | |
| #Since for a network is easier to learn with all values normalized between 0 and 1, let's normalize | |
| #the inputting train and testes features (normalization means equilibrate the pixels values). | |
| x_train = tf.keras.utils.normalize(x_train) | |
| x_test = tf.keras.utils.normalize(x_test) | |
| #plot the image again to check those changes | |
| plt.imshow(x_train[0], cmap=plt.cm.binary) | |
| plt.show() | |
| #building the actual neural network model using tf | |
| #We're gonna use a sequential model to build up our network, the default kind of model used to generate a | |
| #linear stack of neuron layers. This is the most common used one, since it's great to develop learning model | |
| #that are not quite complex. We also use sequential because it allow us to build our network layer by layer, | |
| #manually. | |
| model = tf.keras.models.Sequential() | |
| #Adding layers to our neural network | |
| #The first layer is composed by the input layer, as definition. Since the input are in form of 28x28 matrixes, | |
| #we need to FLATTEN the inputs from two dimension to one. We use flatten for that. Also, we need to flatten | |
| #the input because we want to pass the information of that to a dense hidden layer to work on the data. | |
| model.add(tf.keras.layers.Flatten(input_shape=(28, 28))) | |
| #We then procede to create hidden layer to generate our deep learning neural network. | |
| #We prefer to use DENSE Layers to create the hidden one, because from definition, given N layers of size n1,n2..nn | |
| #(I.E: N=2) we'll got n1*n2 connections. That's why they're defined as dense. | |
| #For each one of those layers, we'll use 128 neurons and an activation function based on the rectified linear method. | |
| model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu)) | |
| model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu)) | |
| #adding the last layer, that will be our output one. The last layer is always specified by the number of the outputs, in our | |
| #case: 10 (the digits). Note: We create this layer dense too to make the necessary connection with the hidden dense layer. | |
| #Also, since we're outputting the result we want a probability distribution activation to check whenever our input is A or B. | |
| #We use softmax activation for that. | |
| model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax)) | |
| #Defining parameter for the training model. Since we rely on tf NN, we could use some tools to polish our NN in order to minimize loss | |
| #and optimize the result. Note: A NN is about LOSS REDUCTION, so we're not trying to MAXIMIZE ACCURACY, instead we want to MINIMIZE LOSS. | |
| #The loss information (intendes as a quadratic residual from the given predition to the real one) will be useful to the optimizer to increase | |
| #the accuracy. (This is the most complex part of a NN, and if you feel not confident, i strongly suggest you to deep the argument). | |
| #We're gonna use the ADAM optimizer and the Sparse Categorical Crossentropy for the loss data. We could have use Stochastic Gradient Descent | |
| #for the optimization and the Binary loss algorithm, but those one are kind of standards for an entry project. | |
| model.compile( | |
| optimizer='adam', | |
| loss='sparse_categorical_crossentropy', | |
| metrics=['accuracy'] | |
| ) | |
| #Last step consist in the training of our model. We're gonna pass our features, that means the x_train and the y_train one. | |
| #We gonna make the NN trains for 3 epochs (1 epoch = one forward pass and one backward pass of all the training examples) | |
| model.fit(x_train, y_train, epochs=3) | |
| #Once our model has trained, let's evalute loss and accuracy gained over our tests | |
| #results = model.evaluate(x_test, y_test, batch_size=128) | |
| #print('test loss, test acc:', results) | |
| #Once all this has done, we can easily save our trained model with the following function | |
| model.save('digit_recognizer') | |
| #Note: to call this already-trained model into another python script, we must load it first with | |
| #num_model = tf.keras.models.load_model('digit_recognizer') <- Uncomment if you wanna test that. | |
| #That will recall the saved model into the models directory of tf. | |
| #Now, let's try our model with a prediction: | |
| #Show the first test image stored in the first position of the x_test | |
| plt.imshow(x_test[0], cmap=plt.cm.binary) | |
| plt.show() | |
| #And then check if the prediction is correct, passing the desired image to the relative prediction function. | |
| predictions = model.predict([x_test]) | |
| #We then use the numpy argmax function to retrieve what's stored into the first position of the predictions array, since the array itself | |
| #is composed by the raw probabilities statistic. It should print 7. | |
| print("Predicted Number: {}".format(np.argmax(predictions[0]))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment