-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathlayer.cpp
More file actions
97 lines (86 loc) · 4.09 KB
/
layer.cpp
File metadata and controls
97 lines (86 loc) · 4.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#pragma once
#include <vector>
#include <stdexcept>
#include <cmath>
#include "Vector.cpp"
#include "Matrix.cpp"
#include "Activation.cpp"
#include "WeightMatrix.cpp"
class Layer {
private:
// The weight matrix handles the linear transformation (Wx + b)
WeightMatrix weights_;
// Smart pointer to activation function - allows polymorphic behavior
// We use unique_ptr because each layer owns its activation function
std::unique_ptr<ActivationFunction> activation_;
// These buffers store intermediate results during forward pass
// They're marked mutable because they're cached values that don't
// affect the logical state of the layer
// preinitialized during compilation to save time during runtime
mutable Vector forward_input_buffer_; // Stores input during forward pass
mutable Vector pre_activation_buffer_; // Stores Wx + b
mutable Vector activation_buffer_; // Stores activation(Wx + b)
mutable Vector activation_gradient_buffer_; // Stores gradient through activation
public:
// Constructor initializes a layer with specific dimensions and activation
Layer(size_t input_size, size_t output_size,
std::unique_ptr<ActivationFunction> activation)
: weights_(input_size, output_size),
activation_(std::move(activation)),
forward_input_buffer_(input_size),
pre_activation_buffer_(output_size),
activation_buffer_(output_size),
activation_gradient_buffer_(output_size) {
if (!activation_) {
throw std::invalid_argument("Activation function cannot be null");
}
}
// Forward pass: transforms input through weights and activation
// Returns reference to avoid copying, const because it doesn't modify layer
const Vector& forward(const Vector& input) const {
// 1. Apply weight matrix to input (store in pre_activation_buffer_)
forward_input_buffer_ = input;
pre_activation_buffer_ = weights_.multiply(forward_input_buffer_);
// 2. Apply activation function (store in activation_buffer_)
activation_buffer_ = activation_->forward(pre_activation_buffer_);
// 3. Return activation_buffer_
return activation_buffer_;
}
// Backward pass: computes gradients for backpropagation
// Takes gradient from next layer, returns gradient for previous layer
Vector backward(const Vector& gradient) {
// 1. Compute activation function gradient using stored pre_activation
// This tells us how changes in our pre-activation values affect the loss
activation_gradient_buffer_ = activation_->backward(pre_activation_buffer_, gradient);
// 2. Compute weight gradients using our helper function
// This tells us how to adjust each weight to reduce the error
Matrix weight_gradients = compute_weight_gradients();
// 3. Update weights using the computed gradients
// Here we need to update both weights and biases to improve our predictions
weights_.update(weight_gradients, gradient); // gradient is used for bias updates
// 4. Compute and return gradient for previous layer
// This tells the previous layer how it should change its outputs
return compute_input_gradients();
}
private:
Matrix compute_weight_gradients() const {
Matrix gradients(weights_.output_size(), weights_.input_size());
for (size_t i = 0; i < weights_.output_size(); ++i) {
for (size_t j = 0; j < weights_.input_size(); ++j) {
gradients.at(i, j) = activation_gradient_buffer_[i] * forward_input_buffer_[j];
}
}
return gradients;
}
Vector compute_input_gradients() const {
Vector input_grad(weights_.input_size());
for (size_t i = 0; i < weights_.input_size(); ++i) {
float sum = 0.0f;
for (size_t j = 0; j < weights_.output_size(); ++j) {
sum += weights_.at(j, i) * activation_gradient_buffer_[j];
}
input_grad[i] = sum;
}
return input_grad;
}
};