-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathActivation.cpp
More file actions
71 lines (58 loc) · 2.04 KB
/
Activation.cpp
File metadata and controls
71 lines (58 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#pragma once
#include <vector>
#include <stdexcept>
#include <cmath>
#include "Vector.cpp"
#include "Matrix.cpp"
// Abstract base class for activation functions
class ActivationFunction {
public:
virtual ~ActivationFunction() = default;
// Pure virtual functions that derived classes must implement
virtual Vector forward(const Vector& input) const = 0;
virtual Vector backward(const Vector& input, const Vector& gradient) const = 0;
};
// Concrete activation functions inherit from base class
class ReLU : public ActivationFunction {
public:
Vector forward(const Vector& input) const override {
// Create vector to store results
Vector result(input.size());
for (size_t i = 0; i < input.size(); ++i){
result[i] = std::max(0.0f, input[i]);
}
return result;
}
Vector backward(const Vector& input, const Vector& gradient) const override {
Vector result(input.size());
// For numerical stability, we will say the derivative at 0 is 0
for (size_t i = 0; i < input.size(); ++i){
result[i] = input[i] > 0.0f ? gradient[i] : 0.0f;
}
return result;
}
};
class Sigmoid : public ActivationFunction {
public:
Vector forward(const Vector& input) const override {
// Create vector to store results
Vector result(input.size());
// split into +/- components for numerical stability
for (size_t i = 0; i < input.size(); ++i){
if (input[i] < 0.0f) {
result[i] = std::exp(input[i]) / (1 + std::exp(input[i]));
}
else result[i] = 1 / (1 + std::exp(-input[i]));
}
return result;
}
Vector backward(const Vector& input, const Vector& gradient) const override {
Vector result(input.size());
Vector sigmoid_x = forward(input);
// s'(x) = s(x) * (1-s(x))
for (size_t i = 0; i < input.size(); ++i){
result[i] = sigmoid_x[i] * (1.0f - sigmoid_x[i]) * gradient[i];
}
return result;
}
};