-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtrain_example.py
More file actions
176 lines (139 loc) · 4.79 KB
/
train_example.py
File metadata and controls
176 lines (139 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
"""
Training Example: Simple CNN for Image Classification
This example demonstrates how to build, train, and evaluate
a Convolutional Neural Network using only NumPy.
Network Architecture:
Conv2D(1, 16, 3x3) -> ReLU -> MaxPool(2x2) ->
Conv2D(16, 32, 3x3) -> ReLU -> MaxPool(2x2) ->
Flatten -> Dense(1568, 128) -> ReLU -> Dropout(0.5) ->
Dense(128, 10) -> Softmax
This is a classic CNN architecture similar to LeNet-5.
"""
import numpy as np
import sys
# Import our CNN components
from layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from activations import ReLU, Softmax
from losses import CrossEntropyLoss
from optimizers import Adam
from network import NeuralNetwork
from data_utils import load_mnist_like, normalize, train_val_split
# Set random seed for reproducibility
np.random.seed(42)
def build_cnn(input_channels: int = 1, num_classes: int = 10) -> NeuralNetwork:
"""
Build a simple CNN architecture.
Args:
input_channels: Number of input channels (1 for grayscale, 3 for RGB)
num_classes: Number of output classes
Returns:
Compiled neural network
"""
print("Building CNN architecture...")
model = NeuralNetwork()
# First convolutional block
model.add(Conv2D(input_channels, 16, kernel_size=3, stride=1, padding=1))
model.add(ReLU())
model.add(MaxPool2D(pool_size=2, stride=2))
# Second convolutional block
model.add(Conv2D(16, 32, kernel_size=3, stride=1, padding=1))
model.add(ReLU())
model.add(MaxPool2D(pool_size=2, stride=2))
# Fully connected layers
# After two 2x2 pooling layers, 28x28 becomes 7x7
# So we have 32 channels * 7 * 7 = 1568 features
model.add(Flatten())
model.add(Dense(1568, 128))
model.add(ReLU())
model.add(Dropout(p=0.5))
# Output layer
model.add(Dense(128, num_classes))
model.add(Softmax())
# Compile model
model.compile(
loss=CrossEntropyLoss(),
optimizer=Adam(learning_rate=0.001)
)
print("\nModel architecture built successfully!")
model.summary()
return model
def main():
"""Main training function."""
print("=" * 80)
print("CNN Training Example - NumPy Implementation")
print("=" * 80)
print()
# 1. Load and preprocess data
print("Loading data...")
X_train, y_train, X_test, y_test = load_mnist_like(
num_samples=1000,
image_size=28,
num_classes=10
)
print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")
print(f"Image shape: {X_train.shape[1:]}")
print()
# 2. Normalize data
print("Normalizing data...")
X_train, mean, std = normalize(X_train)
X_test, _, _ = normalize(X_test, mean=mean, std=std)
print(f"Data normalized with mean={mean:.4f}, std={std:.4f}")
print()
# 3. Create validation split
print("Creating validation split...")
X_train, X_val, y_train, y_val = train_val_split(
X_train, y_train, val_ratio=0.2, shuffle=True
)
print(f"Training samples: {X_train.shape[0]}")
print(f"Validation samples: {X_val.shape[0]}")
print()
# 4. Build model
model = build_cnn(input_channels=1, num_classes=10)
print()
# 5. Train model
print("=" * 80)
print("Training Model")
print("=" * 80)
model.fit(
X_train, y_train,
epochs=5,
batch_size=32,
validation_data=(X_val, y_val),
verbose=1,
shuffle=True
)
print()
print("=" * 80)
print("Training completed!")
print("=" * 80)
print()
# 6. Evaluate on test set
print("Evaluating on test set...")
test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=32)
print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print()
# 7. Make predictions on a few samples
print("Making predictions on sample data...")
sample_indices = np.random.choice(X_test.shape[0], size=5, replace=False)
X_sample = X_test[sample_indices]
y_sample = y_test[sample_indices]
predictions = model.predict(X_sample)
predicted_classes = np.argmax(predictions, axis=1)
print("\nSample Predictions:")
print("-" * 40)
for i in range(len(sample_indices)):
print(f"Sample {i+1}:")
print(f" True label: {y_sample[i]}")
print(f" Predicted: {predicted_classes[i]}")
print(f" Confidence: {predictions[i, predicted_classes[i]]:.4f}")
print()
# 8. Save model weights
print("Saving model weights...")
model.save_weights("cnn_weights.npy")
print("\n" + "=" * 80)
print("Training example completed successfully!")
print("=" * 80)
if __name__ == "__main__":
main()