image-representation-analysis/src/vgg_network.py at main · mn-cs/image-representation-analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import numpy as np
import torch
import torch.nn as nn

from src.dataset import get_cifar10_mu_std_img, normalize
from src.path import model_path

__all__ = ["VGG", "vgg11_bn"]


class VGG(nn.Module):
    """VGG model adapted for CIFAR-10 classification. The architecture is based on the original VGG paper, but modified to work with 32x32 input images and 10 output classes."""
    def __init__(self, features, num_classes=10, init_weights=True):
        super().__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        self.classifier = nn.Sequential(
            nn.Linear(512, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

        if init_weights:
            self._initialize_weights()

    def extract_features(self, x, layer):
        """Extract features from a specific layer of the VGG model."""
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)

        if layer == "last_conv":
            return x
        if layer == "last_fc":
            return self.classifier[:-1](x)

        raise ValueError("layer must be one of: ['last_conv', 'last_fc']")

    def forward(self, x):
        """Forward pass of the VGG model."""
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        """Initialize the weights of the VGG model."""
        for module in self.modules():
            if isinstance(module, nn.Conv2d):
                nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
            elif isinstance(module, nn.BatchNorm2d):
                nn.init.constant_(module.weight, 1)
                nn.init.constant_(module.bias, 0)
            elif isinstance(module, nn.Linear):
                nn.init.normal_(module.weight, 0, 0.01)
                nn.init.constant_(module.bias, 0)


def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3

    for value in cfg:
        if value == "M":
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        else:
            conv2d = nn.Conv2d(in_channels, value, kernel_size=3, padding=1)
            if batch_norm:
                layers.extend([conv2d, nn.BatchNorm2d(value), nn.ReLU(inplace=True)])
            else:
                layers.extend([conv2d, nn.ReLU(inplace=True)])
            in_channels = value

    return nn.Sequential(*layers)


cfgs = {
    "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
}


def ensure_vgg11_bn_weights():
    weights_path = model_path / "vgg11_bn.pt"
    if weights_path.exists():
        return weights_path

    raise FileNotFoundError(
        f"Missing pretrained weights: {weights_path}\n"
        "Place the CIFAR-10 pretrained vgg11_bn.pt file in the models/ folder."
    )


def _vgg(arch, cfg, batch_norm, pretrained=False, device="cpu", **kwargs):
    """Create a VGG model with the specified configuration."""
    if pretrained:
        kwargs["init_weights"] = False

    model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)

    if pretrained:
        weights_path = ensure_vgg11_bn_weights()
        state_dict = torch.load(weights_path, map_location=device)
        model.load_state_dict(state_dict)

    model = model.to(device)
    return model


def vgg11_bn(pretrained=False, device="cpu", **kwargs):
    """VGG-11 model with batch normalization."""
    return _vgg("vgg11_bn", "A", True, pretrained=pretrained, device=device, **kwargs)


def test_pretrained_vgg(x_test, y_test, batch_size=100, device=None):
    """Test the pretrained VGG model on the CIFAR-10 test set."""
    if batch_size <= 0:
        raise ValueError("batch_size must be a positive integer")

    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    else:
        device = torch.device(device)

    vgg_network = vgg11_bn(pretrained=True, device=device)
    vgg_network.eval()

    mu_img, std_img = get_cifar10_mu_std_img()
    x_test_normalized = normalize(np.copy(x_test).astype(np.float32), mu_img, std_img)

    num_test_samples = x_test_normalized.shape[0]
    correct = 0
    total = 0

    with torch.no_grad():
        for start_idx in range(0, num_test_samples, batch_size):
            end_idx = min(start_idx + batch_size, num_test_samples)

            x_batch = torch.from_numpy(x_test_normalized[start_idx:end_idx]).to(
                device=device, dtype=torch.float32
            )
            y_batch = torch.from_numpy(y_test[start_idx:end_idx]).to(device=device)

            outputs = vgg_network(x_batch)
            predicted_labels = outputs.argmax(dim=1)

            total += y_batch.size(0)
            correct += (predicted_labels == y_batch).sum().item()

    return correct / total