From 5b08294dbfb49b8edaf005d04e364bd97de297a2 Mon Sep 17 00:00:00 2001
From: Zachary Laborde <zachlaborde93@gmail.com>
Date: Tue, 29 Oct 2024 18:44:18 -0400
Subject: [PATCH] Added Option for Kaiming He Layer Initialization

---
 rllte/xplore/reward/disagreement.py  |  2 +-
 rllte/xplore/reward/e3b.py           |  2 +-
 rllte/xplore/reward/icm.py           |  2 +-
 rllte/xplore/reward/model.py         | 10 ++++++++++
 rllte/xplore/reward/ngu.py           |  2 ++
 rllte/xplore/reward/pseudo_counts.py |  2 +-
 rllte/xplore/reward/re3.py           |  2 +-
 rllte/xplore/reward/ride.py          |  2 +-
 rllte/xplore/reward/rnd.py           |  2 +-
 9 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/rllte/xplore/reward/disagreement.py b/rllte/xplore/reward/disagreement.py
index 0f6ac6b2..44a9b7d5 100644
--- a/rllte/xplore/reward/disagreement.py
+++ b/rllte/xplore/reward/disagreement.py
@@ -54,7 +54,7 @@ class Disagreement(BaseReward):
         batch_size (int): The batch size for training.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of Disagreement.
diff --git a/rllte/xplore/reward/e3b.py b/rllte/xplore/reward/e3b.py
index 7bbeae37..544ad19f 100644
--- a/rllte/xplore/reward/e3b.py
+++ b/rllte/xplore/reward/e3b.py
@@ -55,7 +55,7 @@ class E3B(BaseReward):
         batch_size (int): The batch size for training.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of E3B.
diff --git a/rllte/xplore/reward/icm.py b/rllte/xplore/reward/icm.py
index 6a315a75..5d10dc45 100644
--- a/rllte/xplore/reward/icm.py
+++ b/rllte/xplore/reward/icm.py
@@ -54,7 +54,7 @@ class ICM(BaseReward):
         batch_size (int): The batch size for training.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of ICM.
diff --git a/rllte/xplore/reward/model.py b/rllte/xplore/reward/model.py
index bb3accb6..2a584a62 100644
--- a/rllte/xplore/reward/model.py
+++ b/rllte/xplore/reward/model.py
@@ -36,6 +36,12 @@ def orthogonal_layer_init(layer, std=np.sqrt(2), bias_const=0.0):
     th.nn.init.constant_(layer.bias, bias_const)
     return layer
 
+def kaiming_he_init(layer):
+    th.nn.init.kaiming_normal_(layer.weight, nonlinearity='relu')
+    if layer.bias is not None:
+        th.nn.init.zeros_(layer.bias)
+    return layer
+
 def default_layer_init(layer):
     stdv = 1. / math.sqrt(layer.weight.size(1))
     layer.weight.data.uniform_(-stdv, stdv)
@@ -49,6 +55,8 @@ class ObservationEncoder(nn.Module):
     Args:
         obs_shape (Tuple): The data shape of observations.
         latent_dim (int): The dimension of encoding vectors.
+        encoder_model (str): The network architecture of the encoder from ['mnih', 'espeholt']. Defaults to 'mnih'
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Defaults to 'default'
 
     Returns:
         Encoder instance.
@@ -59,6 +67,8 @@ def __init__(self, obs_shape: Tuple, latent_dim: int, encoder_model:str = "mnih"
 
         if weight_init == "orthogonal":
             init_ = orthogonal_layer_init
+        elif weight_init == "kaiming he":
+            init_ = kaiming_he_init
         elif weight_init == "default":
             init_ = default_layer_init
         else:
diff --git a/rllte/xplore/reward/ngu.py b/rllte/xplore/reward/ngu.py
index f753b338..5717c034 100644
--- a/rllte/xplore/reward/ngu.py
+++ b/rllte/xplore/reward/ngu.py
@@ -56,6 +56,8 @@ class NGU(Fabric):
         sm (float): The kernel maximum similarity.
         mrs (float): The maximum reward scaling.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
+        encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of NGU.
diff --git a/rllte/xplore/reward/pseudo_counts.py b/rllte/xplore/reward/pseudo_counts.py
index a2b7a8f1..f522541d 100644
--- a/rllte/xplore/reward/pseudo_counts.py
+++ b/rllte/xplore/reward/pseudo_counts.py
@@ -60,7 +60,7 @@ class PseudoCounts(BaseReward):
         sm (float): The kernel maximum similarity.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of PseudoCounts.
diff --git a/rllte/xplore/reward/re3.py b/rllte/xplore/reward/re3.py
index 920b6a84..d11708ea 100644
--- a/rllte/xplore/reward/re3.py
+++ b/rllte/xplore/reward/re3.py
@@ -50,7 +50,7 @@ class RE3(BaseReward):
         k (int): Use the k-th neighbors.
         average_entropy (bool): Use the average of entropy estimation.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of RE3.
diff --git a/rllte/xplore/reward/ride.py b/rllte/xplore/reward/ride.py
index e5216218..ec2bcf7c 100644
--- a/rllte/xplore/reward/ride.py
+++ b/rllte/xplore/reward/ride.py
@@ -60,7 +60,7 @@ class RIDE(BaseReward):
         sm (float): The kernel maximum similarity.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of RIDE.
diff --git a/rllte/xplore/reward/rnd.py b/rllte/xplore/reward/rnd.py
index ba36e0fa..e424de21 100644
--- a/rllte/xplore/reward/rnd.py
+++ b/rllte/xplore/reward/rnd.py
@@ -53,7 +53,7 @@ class RND(BaseReward):
         batch_size (int): The batch size for training.
         update_proportion (float): The proportion of the training data used for updating the forward dynamics models.
         encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak'].
-        weight_init (str): The weight initialization method from ['default', 'orthogonal'].
+        weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he'].
 
     Returns:
         Instance of RND.