In [2]:
from fastbook import *

## Draw two circles

Lesson 3 uses the `MNIST_SAMPLE` dataset. This sample is filled with images of threes and sevens. 

The workflow for preparing the training and testing datasets is as follows:

1. Get the URLs containing the location of the `MNIST_SAMPLE`. 
2. Get all the paths to the threes and sevens. 
3. Put all the images inside a data structure called a tensor.
4. Stack all these images together and convert them into matrices with every entry representing "pixel density".
5. Construct the x's which represent the numerical representation of the images and y's which represent whether an image is a 3 or 7. 

In [6]:
path = untar_data(URLs.MNIST_SAMPLE)
threes = (path/'train'/'3').ls().sorted()
sevens = (path/'train'/'7').ls().sorted()
seven_tensors = [tensor(Image.open(o)) for o in sevens]
three_tensors = [tensor(Image.open(o)) for o in threes]
stacked_sevens = torch.stack(seven_tensors).float()/255  # Why divide by 255?
stacked_threes = torch.stack(three_tensors).float()/255
train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1, 28*28)
train_y = tensor([1]*len(threes) + [0]*len(sevens)).unsqueeze(1)
dset = list(zip(train_x,train_y))
valid_3_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'3').ls()])
valid_3_tens = valid_3_tens.float()/255
valid_7_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'7').ls()])
valid_7_tens = valid_7_tens.float()/255
valid_x = torch.cat([valid_3_tens, valid_7_tens]).view(-1, 28*28)
valid_y = tensor([1]*len(valid_3_tens) + [0]*len(valid_7_tens)).unsqueeze(1)
valid_dset = list(zip(valid_x,valid_y))

## Draw the ~~rest of the~~ damn owl aka the whole game

The workflow for the whole game -- the one layer version

1. Start from the prepared dataset. 
2. Use the `DataLoader()` class to setup an iterator over batches. 
3. Start with a simple model via `nn.Linear()`. 
4. Fix the epoch and the learning rate.
5. Experiment away. 

In [8]:
def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()
def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb
    return correct.float().mean()

In [9]:
dl = DataLoader(dset, batch_size=256)
valid_dl = DataLoader(valid_dset, batch_size=256)
dls = DataLoaders(dl, valid_dl)
learn = Learner(dls, 
                nn.Linear(28*28,1), 
                opt_func=SGD, 
                loss_func=mnist_loss, 
                metrics=batch_accuracy)
lr = 1
learn.fit(10, lr=lr)

epoch,train_loss,valid_loss,batch_accuracy,time
0,0.637194,0.502786,0.495584,00:00
1,0.376234,0.279311,0.733072,00:00
2,0.143703,0.154746,0.860648,00:00
3,0.066251,0.097512,0.917076,00:00
4,0.037467,0.073096,0.935721,00:00
5,0.02606,0.059391,0.951914,00:00
6,0.021268,0.050743,0.957311,00:00
7,0.019058,0.044939,0.964181,00:00
8,0.017881,0.040821,0.966634,00:00
9,0.01714,0.037756,0.968597,00:00


## Draw the ~~rest of the~~ damn owl aka the whole game

The workflow for the whole game -- the three layer version

1. Change the model through `nn.Sequential()`.
2. Everything else is the same. 

In [10]:
simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,1)
)
learn = Learner(dls, 
                simple_net, 
                opt_func=SGD,
                loss_func=mnist_loss, 
                metrics=batch_accuracy)
learn.fit(40, 0.1)

epoch,train_loss,valid_loss,batch_accuracy,time
0,0.302603,0.410403,0.504907,00:00
1,0.142131,0.222038,0.814033,00:00
2,0.079345,0.112429,0.917566,00:00
3,0.052579,0.076461,0.941119,00:00
4,0.040109,0.059901,0.957311,00:00
5,0.033704,0.050566,0.964181,00:00
6,0.029999,0.044691,0.966634,00:00
7,0.027576,0.040678,0.966634,00:00
8,0.025823,0.037757,0.968597,00:00
9,0.024458,0.035528,0.970069,00:00


## Fleshing out the parts

The task was to differentiate between two digits in the best way possible. Turn images into matrices. Form the training and testing datasets. 

1. Changing one's perspective of how to start
    
    - Aim for pixel similarity and form a way of differentiating digits.
    - Ditch pixel similarity and put weights on each individual pixel instead. Higher weights mean more black pixels. 

2. Python side

    - Data structures: tensors v. arrays
    - List comprehension
    - Calculations via broadcasting
    
3. The actual construction of a learner

    - Initialize weights at random starting points.
    - Compute the predictions. 
    - Calculate loss. 
    - Calculate gradient. 
    - Step / Adjust the weights using the gradient and a hyperparameter called a learning rate. 
    - Repeat from second step onwards. 

## Implementing a learner for MNIST

We will be using the full MNIST dataset. Basically, we will be recycling the code. The comments in the code point to what needs to be altered. I did not adjust the object names anymore. 

Two tasks:

1. Is it a 6 or an 8?
2. Is it a 5 or a 6?

In [None]:
path = untar_data(URLs.MNIST) ## alter to new URL
threes = (path/'train'/'6').ls().sorted() ## alter to desired digit
sevens = (path/'train'/'8').ls().sorted() ## alter to desired digit
seven_tensors = [tensor(Image.open(o)) for o in sevens]
three_tensors = [tensor(Image.open(o)) for o in threes]
stacked_sevens = torch.stack(seven_tensors).float()/255  
stacked_threes = torch.stack(three_tensors).float()/255
train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1, 28*28)
train_y = tensor([1]*len(threes) + [0]*len(sevens)).unsqueeze(1)
dset = list(zip(train_x,train_y))
valid_3_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'6').ls()]) ## alter to desired digit
valid_3_tens = valid_3_tens.float()/255
valid_7_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'8').ls()]) ## alter to desired digit
valid_7_tens = valid_7_tens.float()/255
valid_x = torch.cat([valid_3_tens, valid_7_tens]).view(-1, 28*28)
valid_y = tensor([1]*len(valid_3_tens) + [0]*len(valid_7_tens)).unsqueeze(1)
valid_dset = list(zip(valid_x,valid_y))