{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "import torchvision\n", "import torchvision.transforms as xforms" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Files already downloaded and verified\n", "Files already downloaded and verified\n" ] } ], "source": [ "# Load the training and test datasets, build loaders to batch four images at a time, define\n", "# a mapping from training image index to training image semantic label\n", "\n", "# Normalize image as pix = ( pix - mean ) / std, moves pixel values from [0..1] to [-1..1]\n", "# since we pass (0.5,0.5) as mean and standard deviation; to move back to the [0..1] range\n", "# we can reverse this as pix = ( ( pix * 2 ) / mean ); the first argument is mean for R,G,B\n", "# channels, the second argument is stdev for the three channels\n", "\n", "xform = xforms.Compose( [ xforms.ToTensor(), xforms.Normalize( ( 0.5,0.5,0.5 ), (0.5,0.5,0.5) ) ] )\n", "\n", "train_set = torchvision.datasets.CIFAR10( root='./data', train=True, download=True, transform=xform )\n", "train_loader = torch.utils.data.DataLoader( train_set, batch_size=4, shuffle=True )\n", "\n", "test_set = torchvision.datasets.CIFAR10( root='./data', train=False, download=True, transform=xform )\n", "test_loader = torch.utils.data.DataLoader( test_set, batch_size=4, shuffle=False )\n", "\n", "classes = ( 'plane','car','bird','cat','deer','dog','frog','horse','ship','truck' )" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " car frog deer ship\n" ] } ], "source": [ "# Check a radom four images and their correct labels from the training set\n", "\n", "def img_show( img ):\n", " \n", " # Function to display a PyTorch normalized image\n", " \n", " img = img / 2 + 0.5 # Un-normalize\n", " np_img = img.numpy() # Convert to numpy image format\n", " plt.imshow( np.transpose( np_img, ( 1, 2, 0 ) ) )\n", " plt.show()\n", "\n", "# Retrieve random training images\n", "\n", "data_iter = iter( train_loader )\n", "images, labels = next(data_iter)\n", "\n", "# Display images\n", "\n", "img_show( torchvision.utils.make_grid( images ) )\n", "print( ' '.join( '%5s' % classes[ labels[ j ] ] for j in range( 4 ) ) )" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Define the CNN: convolution, maxpool, convoloution, FCN 1/ReLU, FCN 2/ReLU, FCN 3\n", "\n", "class Net( nn.Module ):\n", " def __init__( self ):\n", " super( Net, self ).__init__()\n", " \n", " self.conv_1 = nn.Conv2d( 3, 6, 5 ) # 2D convolution, in_channel=3 (RGB), out-channel=6, kernel-size=5x5\n", " self.pool = nn.MaxPool2d( 2, 2 ) # Pool by selecting max value, kernel-size=2, stride=2\n", " self.conv_2 = nn.Conv2d( 6, 16, 5 ) # 2D convolution, in_channel=6 (prev conv), out_channel=16, kernel-size=5x5\n", " self.fc_1 = nn.Linear( 16 * 5 * 5, 120 ) # 1D vectorize convolution results, feed into 120-node hidden layer\n", " self.fc_2 = nn.Linear( 120, 84 ) # Feed from first to second 84-node hidden layer\n", " self.fc_3 = nn.Linear( 84, 10 ) # Feed from second to 10-class output layer\n", " \n", " def forward( self, x ):\n", " x = self.pool( F.relu( self.conv_1( x ) ) ) # 1st convolve, ReLU, maxpool\n", " x = self.pool( F.relu( self.conv_2( x ) ) ) # 2nd convolve, ReLU, maxpool\n", " x = x.view( -1, 16 * 5 * 5 ) # Reshape tensor into req'd rows (-1) and 16*5*5 columns\n", " x = F.relu( self.fc_1( x ) ) # Pass through FCN 1st hidden layer, ReLU results\n", " x = F.relu( self.fc_2( x ) ) # Pass through FCN 2nd hidden layer, ReLU results\n", " x = self.fc_3( x ) # Provide class probabilities\n", " \n", " return x\n", " \n", "net = Net()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Set loss criteria and optimizer for backpropagation\n", "\n", "criterion = nn.CrossEntropyLoss()\n", "optimizer = optim.SGD( net.parameters(), lr=0.001, momentum=0.9 ) # Stochastic gradient descent" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1, 2000] loss: 2.230\n", "[1, 4000] loss: 1.900\n", "[1, 6000] loss: 1.695\n", "[1, 8000] loss: 1.575\n", "[1,10000] loss: 1.519\n", "[1,12000] loss: 1.458\n", "[1,12500] loss: 0.358\n", "[2, 2000] loss: 1.390\n", "[2, 4000] loss: 1.352\n", "[2, 6000] loss: 1.329\n", "[2, 8000] loss: 1.293\n", "[2,10000] loss: 1.281\n", "[2,12000] loss: 1.279\n", "[2,12500] loss: 0.316\n", "Training complete...\n" ] } ], "source": [ "# Train the CNN on the training dataset for two epochs\n", "\n", "for epoch in range( 2 ):\n", " step_loss = 0.0\n", " \n", " for i,data in enumerate( train_loader, 0 ):\n", " inputs,labels = data\n", " \n", " optimizer.zero_grad() # Zero any previous gradient descent results\n", " \n", " outputs = net( inputs ) # Forward pass\n", " loss = criterion( outputs, labels ) # Compute error\n", " loss.backward() # Backpropegation\n", " optimizer.step() # Update weights\n", " \n", " step_loss += loss.item() # Update current step error\n", " if ( i + 1 ) % 2000 == 0: # Print step error every 2000 batches\n", " print( '[%d,%5d] loss: %.3f' % ( epoch + 1, i+1, step_loss / 2000.0 ) )\n", " step_loss = 0.0\n", " \n", " print( '[%d,%5d] loss: %.3f' % ( epoch + 1, i+1, step_loss / 2000.0 ) )\n", " \n", "print( 'Training complete...' )" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Ground Truth: cat ship ship plane\n", "Predicted: dog ship ship plane\n" ] } ], "source": [ "# Check accuracy for a random set of four images\n", "\n", "data_iter = iter( test_loader )\n", "images,labels = next( data_iter )\n", "\n", "img_show( torchvision.utils.make_grid( images ) )\n", "print( 'Ground Truth: ', ' '.join( '%5s' % classes[ labels[ j ] ] for j in range( 4 ) ) )\n", "\n", "outputs = net( images )\n", "_,predicted = torch.max( outputs, 1 )\n", "\n", "print( 'Predicted: ', ' '.join( '%5s' % classes[ predicted[ j ] ] for j in range( 4 ) ) )" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Network accuracy on 10,000 test images: 55%\n" ] } ], "source": [ "# Check accuracy on test set\n", "\n", "c = 0\n", "n = 0\n", "\n", "with torch.no_grad():\n", " for data in test_loader:\n", " images,labels = data\n", " outputs = net( images )\n", " _,predicted = torch.max( outputs.data, 1 )\n", " n += labels.size( 0 )\n", " c += ( predicted == labels ).sum().item()\n", " \n", "print( 'Network accuracy on 10,000 test images: %d%%' % ( 100.0 * c / n ) )" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy for plane: 45%\n", "Accuracy for car: 59%\n", "Accuracy for bird: 41%\n", "Accuracy for cat: 28%\n", "Accuracy for deer: 57%\n", "Accuracy for dog: 47%\n", "Accuracy for frog: 72%\n", "Accuracy for horse: 61%\n", "Accuracy for ship: 80%\n", "Accuracy for truck: 60%\n" ] } ], "source": [ "# Check accuracy for the 10 individual classes\n", "\n", "class_c = list( 0.0 for i in range( 10 ) )\n", "class_n = list( 0.0 for i in range( 10 ) )\n", "\n", "with torch.no_grad():\n", " for data in test_loader:\n", " images,labels = data\n", " outputs = net( images )\n", " _,predicted = torch.max( outputs, 1 )\n", " \n", " c = ( predicted == labels ).squeeze()\n", " for i in range( 4 ):\n", " label = labels[ i ]\n", " class_c[ label ] += c[ i ].item()\n", " class_n[ label ] += 1\n", " \n", "for i in range( 10 ):\n", " print( 'Accuracy for %5s: %2d%%' % ( classes[ i ], 100.0 * class_c[ i ] / class_n[ i ] ) )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base-ex", "language": "python", "name": "base-ex" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 4 }