# Download, if needed
; tiny_imagenet()
Tiny Imagenet (Part I)
Adapted from
tiny_imagenet
tiny_imagenet (dir_=Path('../tiny-imagenet-200'))
TinyImageNetDatasetGenerator
TinyImageNetDatasetGenerator (base_dir=None)
Generate Tiny Imagenet rows lazily from zip file
= False
upload if upload:
= TinyImageNetDatasetGenerator()
dg = {}
splits for split, split_gen in [("train", dg.gen_train), ("test", dg.gen_test)]:
# Ensure at least one record
next(iter(split_gen()))
= Dataset.from_generator(split_gen)
ds = ds.cast_column("image", DatasetsImage())
ds = ds
splits[split] = DatasetDict(splits)
dsd "jeremyf/tiny-imagent-200") dsd.push_to_hub(
tiny_imagenet_dataset_dict
tiny_imagenet_dataset_dict ()
Huggingface dataset from the HF hub
to_rgb
to_rgb (img)
norm
norm (x)
denorm
denorm (x)
preprocess_factory
preprocess_factory (pipe_)
get_imagenet_dls
get_imagenet_dls (bs=512, with_word=False, training_preprocessor=[<function to_rgb at 0x7f2095257640>, PILToTensor(), ConvertImageDtype(), <function norm at 0x7f2095257760>])
= get_imagenet_dls(with_word=True) dls
Downloading readme: 100%|██████████████████████| 453/453 [00:00<00:00, 1.30MB/s]
Downloading data files: 0%| | 0/2 [00:00<?, ?it/s]
Downloading data: 0%| | 0.00/158M [00:00<?, ?B/s]
Downloading data: 3%|▌ | 4.19M/158M [00:03<01:57, 1.31MB/s]
Downloading data: 8%|█▋ | 12.6M/158M [00:06<01:06, 2.19MB/s]
Downloading data: 13%|██▊ | 21.0M/158M [00:09<00:56, 2.43MB/s]
Downloading data: 19%|███▉ | 29.4M/158M [00:13<00:55, 2.32MB/s]
Downloading data: 24%|█████ | 37.7M/158M [00:16<00:50, 2.39MB/s]
Downloading data: 29%|██████▏ | 46.1M/158M [00:19<00:45, 2.43MB/s]
Downloading data: 35%|███████▎ | 54.5M/158M [00:21<00:37, 2.77MB/s]
Downloading data: 40%|████████▍ | 62.9M/158M [00:24<00:30, 3.07MB/s]
Downloading data: 45%|█████████▌ | 71.3M/158M [00:27<00:30, 2.82MB/s]
Downloading data: 51%|██████████▋ | 79.7M/158M [00:31<00:30, 2.57MB/s]
Downloading data: 56%|███████████▋ | 88.1M/158M [00:34<00:25, 2.72MB/s]
Downloading data: 61%|████████████▊ | 96.5M/158M [00:36<00:21, 2.83MB/s]
Downloading data: 67%|██████████████▋ | 105M/158M [00:39<00:17, 3.04MB/s]
Downloading data: 72%|███████████████▊ | 113M/158M [00:42<00:14, 2.97MB/s]
Downloading data: 77%|████████████████▉ | 122M/158M [00:45<00:12, 2.90MB/s]
Downloading data: 83%|██████████████████▏ | 130M/158M [00:47<00:09, 2.93MB/s]
Downloading data: 88%|███████████████████▎ | 138M/158M [00:51<00:06, 2.83MB/s]
Downloading data: 93%|████████████████████▌ | 147M/158M [00:56<00:04, 2.18MB/s]
Downloading data: 100%|██████████████████████| 158M/158M [00:59<00:00, 2.65MB/s]
Downloading data files: 50%|██████████▌ | 1/2 [00:59<00:59, 59.44s/it]
Downloading data: 0%| | 0.00/15.8M [00:00<?, ?B/s]
Downloading data: 26%|█████▎ | 4.19M/15.8M [00:02<00:07, 1.46MB/s]
Downloading data: 100%|████████████████████| 15.8M/15.8M [00:05<00:00, 2.97MB/s]
Downloading data files: 100%|█████████████████████| 2/2 [01:04<00:00, 32.39s/it]
Extracting data files: 100%|█████████████████████| 2/2 [00:00<00:00, 614.82it/s]
Generating train split: 100%|█| 100000/100000 [00:00<00:00, 233408.11 examples/s
Generating test split: 100%|███| 10000/10000 [00:00<00:00, 665783.68 examples/s]
Map: 100%|████████████████████████| 10000/10000 [00:10<00:00, 913.89 examples/s]
CPU times: user 12.4 s, sys: 2.92 s, total: 15.3 s
Wall time: 1min 20s
viz
viz (dls)
viz(dls)
= dls.peek()
xb, _, words = xb.reshape(-1)
pixels = pixels[torch.randperm(len(pixels))]
pixels 1000], bins=25); plt.hist(pixels[:
min(), xb.max() xb.
(tensor(-1.7143), tensor(2.5556))
Let’s implement the model with color and a slightly fancier final pooling layer.
nn.AdaptiveAvgPool2d?
Init signature: nn.AdaptiveAvgPool2d( output_size: Union[int, NoneType, Tuple[Optional[int], ...]], ) -> None Docstring: Applies a 2D adaptive average pooling over an input signal composed of several input planes. The output is of size H x W, for any input size. The number of output features is equal to the number of input planes. Args: output_size: the target output size of the image of the form H x W. Can be a tuple (H, W) or a single H for a square image H x H. H and W can be either a ``int``, or ``None`` which means the size will be the same as that of the input. Shape: - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`. - Output: :math:`(N, C, S_{0}, S_{1})` or :math:`(C, S_{0}, S_{1})`, where :math:`S=\text{output\_size}`. Examples: >>> # target output size of 5x7 >>> m = nn.AdaptiveAvgPool2d((5, 7)) >>> input = torch.randn(1, 64, 8, 9) >>> output = m(input) >>> # target output size of 7x7 (square) >>> m = nn.AdaptiveAvgPool2d(7) >>> input = torch.randn(1, 64, 10, 9) >>> output = m(input) >>> # target output size of 10x7 >>> m = nn.AdaptiveAvgPool2d((None, 7)) >>> input = torch.randn(1, 64, 10, 9) >>> output = m(input) Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule. File: ~/micromamba/envs/slowai/lib/python3.11/site-packages/torch/nn/modules/pooling.py Type: type Subclasses:
class TinyImageResNet1(nn.Module):
def __init__(
self,
nfs,=10,
n_outputs=0.1,
p_drop
):super().__init__()
self.nfs = nfs
self.n_outputs = n_outputs
self.layers = nn.Sequential(*self.get_layers(nfs, n_outputs))
self.pool = nn.Sequential(
=1),
nn.AdaptiveAvgPool2d(output_size
nn.Flatten(),
)self.drop = nn.Dropout(p_drop)
self.lin = nn.Linear(nfs[-1], n_outputs, bias=False)
self.norm = nn.BatchNorm1d(n_outputs)
def get_layers(self, nfs, n_outputs=10):
= [Conv(3, nfs[0], ks=5, stride=1)]
layers for c_in, c_out in zip(nfs, nfs[1:]):
= ResidualConvBlock(c_in, c_out)
block
layers.append(block)return layers
def forward(self, x):
= self.layers(x)
x = self.pool(x)
x = self.drop(x)
x = self.lin(x)
x = self.norm(x)
x return x
@classmethod
def kaiming(cls, *args, **kwargs):
= cls(*args, **kwargs)
model apply(init_leaky_weights)
model.return model
= get_imagenet_dls(bs=512) # We don't want the human-readable labels dls
= TinyImageResNet1.kaiming(
model =200,
n_outputs=[32, 64, 128, 256, 512, 1024],
nfs
)*model.layers, model.lin, model.norm], get_imagenet_dls()) summarize(model, [
Type | Input | Output | N. params | MFlops |
---|---|---|---|---|
Conv | (512, 3, 64, 64) | (512, 32, 64, 64) | 2,464 | 9.8 |
ResidualConvBlock | (512, 32, 64, 64) | (512, 64, 32, 32) | 57,664 | 58.7 |
ResidualConvBlock | (512, 64, 32, 32) | (512, 128, 16, 16) | 230,016 | 58.7 |
ResidualConvBlock | (512, 128, 16, 16) | (512, 256, 8, 8) | 918,784 | 58.7 |
ResidualConvBlock | (512, 256, 8, 8) | (512, 512, 4, 4) | 3,672,576 | 58.7 |
ResidualConvBlock | (512, 512, 4, 4) | (512, 1024, 2, 2) | 14,685,184 | 58.7 |
Linear | (512, 1024) | (512, 200) | 204,800 | 0.2 |
BatchNorm1d | (512, 200) | (512, 200) | 400 | 0.0 |
Total | 19,771,888 | 303.646864 |
lr_find
lr_find (model, dls, start_lr, gamma=1.3, n_epochs=2, extra_cbs=(), loss_fn=<function cross_entropy>)
=1e-5) lr_find(model, dls, start_lr
train
train (model, dls, lr=0.01, n_epochs=2, extra_cbs=())
= TinyImageResNet1.kaiming(
model =200,
n_outputs=[32, 64, 128, 256, 512, 1024],
nfs
)=0.1, n_epochs=25) train(model, dls, lr
MulticlassAccuracy | loss | epoch | train |
---|---|---|---|
0.081 | 4.524 | 0 | train |
0.119 | 4.143 | 0 | eval |
0.180 | 3.747 | 1 | train |
0.182 | 3.735 | 1 | eval |
0.242 | 3.359 | 2 | train |
0.193 | 3.809 | 2 | eval |
0.284 | 3.119 | 3 | train |
0.189 | 3.818 | 3 | eval |
0.312 | 2.964 | 4 | train |
0.178 | 4.275 | 4 | eval |
0.334 | 2.843 | 5 | train |
0.159 | 4.461 | 5 | eval |
0.357 | 2.724 | 6 | train |
0.137 | 4.707 | 6 | eval |
0.374 | 2.626 | 7 | train |
0.179 | 4.110 | 7 | eval |
0.397 | 2.514 | 8 | train |
0.124 | 4.837 | 8 | eval |
0.417 | 2.420 | 9 | train |
0.199 | 4.202 | 9 | eval |
0.437 | 2.314 | 10 | train |
0.191 | 3.961 | 10 | eval |
0.458 | 2.207 | 11 | train |
0.215 | 3.956 | 11 | eval |
0.478 | 2.110 | 12 | train |
0.237 | 3.711 | 12 | eval |
0.505 | 1.986 | 13 | train |
0.251 | 3.702 | 13 | eval |
0.537 | 1.833 | 14 | train |
0.241 | 4.379 | 14 | eval |
0.569 | 1.680 | 15 | train |
0.172 | 6.307 | 15 | eval |
0.607 | 1.497 | 16 | train |
0.208 | 4.592 | 16 | eval |
0.664 | 1.261 | 17 | train |
0.310 | 3.490 | 17 | eval |
0.733 | 0.972 | 18 | train |
0.312 | 3.723 | 18 | eval |
0.819 | 0.654 | 19 | train |
0.338 | 3.578 | 19 | eval |
0.914 | 0.325 | 20 | train |
0.398 | 3.102 | 20 | eval |
0.976 | 0.113 | 21 | train |
0.437 | 2.950 | 21 | eval |
0.997 | 0.033 | 22 | train |
0.441 | 2.972 | 22 | eval |
0.999 | 0.015 | 23 | train |
0.440 | 2.966 | 23 | eval |
1.000 | 0.012 | 24 | train |
0.441 | 2.969 | 24 | eval |
That’s clearly overfitting. Let’s add some minor augmentation.
T.RandomErasing?
Init signature: T.RandomErasing( p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False, ) Docstring: Randomly selects a rectangle region in a torch.Tensor image and erases its pixels. This transform does not support PIL Image. 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896 Args: p: probability that the random erasing operation will be performed. scale: range of proportion of erased area against input image. ratio: range of aspect ratio of erased area. value: erasing value. Default is 0. If a single int, it is used to erase all pixels. If a tuple of length 3, it is used to erase R, G, B channels respectively. If a str of 'random', erasing each pixel with random values. inplace: boolean to make this transform inplace. Default set to False. Returns: Erased Image. Example: >>> transform = transforms.Compose([ >>> transforms.RandomHorizontalFlip(), >>> transforms.PILToTensor(), >>> transforms.ConvertImageDtype(torch.float), >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), >>> transforms.RandomErasing(), >>> ]) Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule. File: ~/micromamba/envs/slowai/lib/python3.11/site-packages/torchvision/transforms/transforms.py Type: type Subclasses:
= get_imagenet_dls(bs=512, training_preprocessor=preprocess_and_augment)
dls viz(dls)
= TinyImageResNet1.kaiming(
model =200,
n_outputs=[32, 64, 128, 256, 512, 1024],
nfs
)=0.1, n_epochs=25) train(model, dls, lr
MulticlassAccuracy | loss | epoch | train |
---|---|---|---|
0.078 | 4.541 | 0 | train |
0.129 | 4.056 | 0 | eval |
0.166 | 3.847 | 1 | train |
0.154 | 4.086 | 1 | eval |
0.210 | 3.532 | 2 | train |
0.179 | 3.950 | 2 | eval |
0.247 | 3.317 | 3 | train |
0.174 | 3.973 | 3 | eval |
0.271 | 3.178 | 4 | train |
0.186 | 3.907 | 4 | eval |
0.297 | 3.037 | 5 | train |
0.227 | 3.507 | 5 | eval |
0.313 | 2.945 | 6 | train |
0.221 | 3.579 | 6 | eval |
0.330 | 2.852 | 7 | train |
0.190 | 3.795 | 7 | eval |
0.351 | 2.744 | 8 | train |
0.214 | 3.588 | 8 | eval |
0.367 | 2.667 | 9 | train |
0.185 | 3.882 | 9 | eval |
0.383 | 2.590 | 10 | train |
0.232 | 3.654 | 10 | eval |
0.399 | 2.511 | 11 | train |
0.218 | 3.653 | 11 | eval |
0.416 | 2.423 | 12 | train |
0.286 | 3.208 | 12 | eval |
0.434 | 2.345 | 13 | train |
0.295 | 3.322 | 13 | eval |
0.453 | 2.251 | 14 | train |
0.368 | 2.786 | 14 | eval |
0.471 | 2.153 | 15 | train |
0.301 | 3.272 | 15 | eval |
0.498 | 2.039 | 16 | train |
0.360 | 2.885 | 16 | eval |
0.522 | 1.912 | 17 | train |
0.431 | 2.474 | 17 | eval |
0.556 | 1.765 | 18 | train |
0.403 | 2.818 | 18 | eval |
0.588 | 1.615 | 19 | train |
0.456 | 2.405 | 19 | eval |
0.628 | 1.436 | 20 | train |
0.503 | 2.159 | 20 | eval |
0.672 | 1.253 | 21 | train |
0.537 | 2.000 | 21 | eval |
0.713 | 1.091 | 22 | train |
0.575 | 1.820 | 22 | eval |
0.745 | 0.961 | 23 | train |
0.592 | 1.762 | 23 | eval |
0.768 | 0.881 | 24 | train |
0.593 | 1.752 | 24 | eval |
It does seem that augmentation is critical for performance. Compare 58.6% accuracy (mine) vs 59.3% accuracy (Howard).
How much better can we do?
Going deeper
To answer this, we can check Papers With Code. The best approach documented there is the ResNext paper at 72% accurate. This benchmark has shifted since the original course publication date, but here is the paper Jeremy refers to.
So, how much work is involved in going from 60% accurate to 70% accurate? “Real resnets” have multiple ResBlocks at the same feature map resolution.
= (3, 2, 2, 1, 1) n_blocks
This specification has five downsampling layers but nine ResBlocks
StackableResidualConvBlock
StackableResidualConvBlock (n_blocks, c_in, c_out, conv_cls=<class 'slowai.resnets.ResidualConvBlock'>)
*A sequential container.
Modules will be added to it in the order they are passed in the constructor. Alternatively, an OrderedDict
of modules can be passed in. The forward()
method of Sequential
accepts any input and forwards it to the first module it contains. It then “chains” outputs to inputs sequentially for each subsequent module, finally returning the output of the last module.
The value a Sequential
provides over manually calling a sequence of modules is that it allows treating the whole container as a single module, such that performing a transformation on the Sequential
applies to each of the modules it stores (which are each a registered submodule of the Sequential
).
What’s the difference between a Sequential
and a :class:torch.nn.ModuleList
? A ModuleList
is exactly what it sounds like–a list for storing Module
s! On the other hand, the layers in a Sequential
are connected in a cascading way.
Example::
# Using Sequential to create a small model. When `model` is run,
# input will first be passed to `Conv2d(1,20,5)`. The output of
# `Conv2d(1,20,5)` will be used as the input to the first
# `ReLU`; the output of the first `ReLU` will become the input
# for `Conv2d(20,64,5)`. Finally, the output of
# `Conv2d(20,64,5)` will be used as input to the second `ReLU`
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
# Using Sequential with OrderedDict. This is functionally the
# same as the above code
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))*
This was a tricky module to implement. Make sure that the channel, height and width dimensions are apropriate for all values of n_blocks.
class TinyImageResNet2(TinyImageResNet1):
def __init__(
self,
nfs,
n_blocks,=10,
n_outputs=0.1,
p_drop
):self.n_blocks = n_blocks
super().__init__(nfs, n_outputs, p_drop)
def get_layers(self, nfs, n_outputs=10):
= [Conv(3, nfs[0], stride=1, ks=5)]
layers for c_in, c_out, n_blocks in zip(nfs, nfs[1:], self.n_blocks):
layers.append(StackableResidualConvBlock(n_blocks, c_in, c_out))return layers
= TinyImageResNet2.kaiming(
model =200,
n_outputs=[32, 64, 128, 256, 512, 1024],
nfs=n_blocks,
n_blocks
)*model.layers, model.lin, model.norm], dls) summarize(model, [
Type | Input | Output | N. params | MFlops |
---|---|---|---|---|
Conv | (512, 3, 64, 64) | (512, 32, 64, 64) | 2,464 | 9.8 |
StackableResidualConvBlock | (512, 32, 64, 64) | (512, 64, 32, 32) | 213,952 | 218.1 |
StackableResidualConvBlock | (512, 64, 32, 32) | (512, 128, 16, 16) | 541,952 | 138.4 |
StackableResidualConvBlock | (512, 128, 16, 16) | (512, 256, 8, 8) | 2,165,248 | 138.4 |
StackableResidualConvBlock | (512, 256, 8, 8) | (512, 512, 4, 4) | 3,672,576 | 58.7 |
StackableResidualConvBlock | (512, 512, 4, 4) | (512, 1024, 2, 2) | 14,685,184 | 58.7 |
Linear | (512, 1024) | (512, 200) | 204,800 | 0.2 |
BatchNorm1d | (512, 200) | (512, 200) | 400 | 0.0 |
Total | 21,486,576 | 622.416528 |
=0.1, n_epochs=25) train(model, dls, lr
MulticlassAccuracy | loss | epoch | train |
---|---|---|---|
0.068 | 4.621 | 0 | train |
0.101 | 4.285 | 0 | eval |
0.160 | 3.876 | 1 | train |
0.187 | 3.676 | 1 | eval |
0.226 | 3.429 | 2 | train |
0.221 | 3.561 | 2 | eval |
0.277 | 3.138 | 3 | train |
0.222 | 3.567 | 3 | eval |
0.312 | 2.952 | 4 | train |
0.283 | 3.124 | 4 | eval |
0.339 | 2.806 | 5 | train |
0.269 | 3.274 | 5 | eval |
0.360 | 2.696 | 6 | train |
0.225 | 3.778 | 6 | eval |
0.383 | 2.586 | 7 | train |
0.290 | 3.208 | 7 | eval |
0.400 | 2.494 | 8 | train |
0.293 | 3.169 | 8 | eval |
0.419 | 2.405 | 9 | train |
0.324 | 3.169 | 9 | eval |
0.437 | 2.322 | 10 | train |
0.356 | 2.849 | 10 | eval |
0.454 | 2.236 | 11 | train |
0.291 | 3.322 | 11 | eval |
0.471 | 2.162 | 12 | train |
0.352 | 2.957 | 12 | eval |
0.490 | 2.068 | 13 | train |
0.326 | 3.154 | 13 | eval |
0.510 | 1.976 | 14 | train |
0.352 | 2.911 | 14 | eval |
0.534 | 1.864 | 15 | train |
0.390 | 2.655 | 15 | eval |
0.560 | 1.743 | 16 | train |
0.442 | 2.527 | 16 | eval |
0.589 | 1.611 | 17 | train |
0.455 | 2.454 | 17 | eval |
0.620 | 1.470 | 18 | train |
0.498 | 2.181 | 18 | eval |
0.658 | 1.306 | 19 | train |
0.514 | 2.130 | 19 | eval |
0.704 | 1.113 | 20 | train |
0.529 | 2.054 | 20 | eval |
0.751 | 0.924 | 21 | train |
0.583 | 1.807 | 21 | eval |
0.803 | 0.734 | 22 | train |
0.599 | 1.746 | 22 | eval |
0.838 | 0.607 | 23 | train |
0.613 | 1.712 | 23 | eval |
0.860 | 0.533 | 24 | train |
0.614 | 1.706 | 24 | eval |
Yet more augmentation
T.TrivialAugmentWide?
Init signature: T.TrivialAugmentWide( num_magnitude_bins: int = 31, interpolation: torchvision.transforms.functional.InterpolationMode = <InterpolationMode.NEAREST: 'nearest'>, fill: Optional[List[float]] = None, ) -> None Docstring: Dataset-independent data-augmentation with TrivialAugment Wide, as described in `"TrivialAugment: Tuning-free Yet State-of-the-Art Data Augmentation" <https://arxiv.org/abs/2103.10158>`_. If the image is torch Tensor, it should be of type torch.uint8, and it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. If img is PIL Image, it is expected to be in mode "L" or "RGB". Args: num_magnitude_bins (int): The number of different magnitude values. interpolation (InterpolationMode): Desired interpolation enum defined by :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. fill (sequence or number, optional): Pixel fill value for the area outside the transformed image. If given a number, the value is used for all bands respectively. Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule. File: ~/micromamba/envs/slowai/lib/python3.11/site-packages/torchvision/transforms/autoaugment.py Type: type Subclasses:
= get_imagenet_dls(bs=512, training_preprocessor=preprocess_and_trivial_augment)
dls viz(dls)
= TinyImageResNet2.kaiming(
model =200,
n_outputs=[32, 64, 128, 256, 512, 1024],
nfs=n_blocks,
n_blocks
)*model.layers, model.lin, model.norm], dls) summarize(model, [
Type | Input | Output | N. params | MFlops |
---|---|---|---|---|
Conv | (512, 3, 64, 64) | (512, 32, 64, 64) | 2,464 | 9.8 |
StackableResidualConvBlock | (512, 32, 64, 64) | (512, 64, 32, 32) | 213,952 | 218.1 |
StackableResidualConvBlock | (512, 64, 32, 32) | (512, 128, 16, 16) | 541,952 | 138.4 |
StackableResidualConvBlock | (512, 128, 16, 16) | (512, 256, 8, 8) | 2,165,248 | 138.4 |
StackableResidualConvBlock | (512, 256, 8, 8) | (512, 512, 4, 4) | 3,672,576 | 58.7 |
StackableResidualConvBlock | (512, 512, 4, 4) | (512, 1024, 2, 2) | 14,685,184 | 58.7 |
Linear | (512, 1024) | (512, 200) | 204,800 | 0.2 |
BatchNorm1d | (512, 200) | (512, 200) | 400 | 0.0 |
Total | 21,486,576 | 622.416528 |
=0.1, n_epochs=25) train(model, dls, lr
MulticlassAccuracy | loss | epoch | train |
---|---|---|---|
0.040 | 4.902 | 0 | train |
0.075 | 4.431 | 0 | eval |
0.097 | 4.338 | 1 | train |
0.136 | 4.074 | 1 | eval |
0.147 | 3.954 | 2 | train |
0.180 | 3.691 | 2 | eval |
0.189 | 3.684 | 3 | train |
0.161 | 4.008 | 3 | eval |
0.223 | 3.488 | 4 | train |
0.193 | 3.875 | 4 | eval |
0.244 | 3.360 | 5 | train |
0.270 | 3.188 | 5 | eval |
0.266 | 3.237 | 6 | train |
0.256 | 3.541 | 6 | eval |
0.285 | 3.128 | 7 | train |
0.242 | 3.555 | 7 | eval |
0.305 | 3.023 | 8 | train |
0.302 | 3.071 | 8 | eval |
0.323 | 2.937 | 9 | train |
0.322 | 2.903 | 9 | eval |
0.338 | 2.860 | 10 | train |
0.300 | 3.106 | 10 | eval |
0.355 | 2.770 | 11 | train |
0.340 | 2.840 | 11 | eval |
0.369 | 2.690 | 12 | train |
0.320 | 3.068 | 12 | eval |
0.388 | 2.598 | 13 | train |
0.358 | 2.851 | 13 | eval |
0.404 | 2.512 | 14 | train |
0.377 | 2.662 | 14 | eval |
0.428 | 2.402 | 15 | train |
0.442 | 2.354 | 15 | eval |
0.450 | 2.293 | 16 | train |
0.451 | 2.319 | 16 | eval |
0.473 | 2.178 | 17 | train |
0.433 | 2.439 | 17 | eval |
0.501 | 2.047 | 18 | train |
0.521 | 1.995 | 18 | eval |
0.532 | 1.894 | 19 | train |
0.555 | 1.840 | 19 | eval |
0.566 | 1.747 | 20 | train |
0.570 | 1.761 | 20 | eval |
0.600 | 1.602 | 21 | train |
0.598 | 1.657 | 21 | eval |
0.630 | 1.464 | 22 | train |
0.617 | 1.567 | 22 | eval |
0.656 | 1.358 | 23 | train |
0.627 | 1.526 | 23 | eval |
0.669 | 1.307 | 24 | train |
0.629 | 1.521 | 24 | eval |
"../models/tiny_imagenet_200_classifier.pt") torch.save(model,
Continued in part 2…