From 5b08294dbfb49b8edaf005d04e364bd97de297a2 Mon Sep 17 00:00:00 2001 From: Zachary Laborde Date: Tue, 29 Oct 2024 18:44:18 -0400 Subject: [PATCH] Added Option for Kaiming He Layer Initialization --- rllte/xplore/reward/disagreement.py | 2 +- rllte/xplore/reward/e3b.py | 2 +- rllte/xplore/reward/icm.py | 2 +- rllte/xplore/reward/model.py | 10 ++++++++++ rllte/xplore/reward/ngu.py | 2 ++ rllte/xplore/reward/pseudo_counts.py | 2 +- rllte/xplore/reward/re3.py | 2 +- rllte/xplore/reward/ride.py | 2 +- rllte/xplore/reward/rnd.py | 2 +- 9 files changed, 19 insertions(+), 7 deletions(-) diff --git a/rllte/xplore/reward/disagreement.py b/rllte/xplore/reward/disagreement.py index 0f6ac6b2..44a9b7d5 100644 --- a/rllte/xplore/reward/disagreement.py +++ b/rllte/xplore/reward/disagreement.py @@ -54,7 +54,7 @@ class Disagreement(BaseReward): batch_size (int): The batch size for training. update_proportion (float): The proportion of the training data used for updating the forward dynamics models. encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak']. - weight_init (str): The weight initialization method from ['default', 'orthogonal']. + weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Returns: Instance of Disagreement. diff --git a/rllte/xplore/reward/e3b.py b/rllte/xplore/reward/e3b.py index 7bbeae37..544ad19f 100644 --- a/rllte/xplore/reward/e3b.py +++ b/rllte/xplore/reward/e3b.py @@ -55,7 +55,7 @@ class E3B(BaseReward): batch_size (int): The batch size for training. update_proportion (float): The proportion of the training data used for updating the forward dynamics models. encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak']. - weight_init (str): The weight initialization method from ['default', 'orthogonal']. + weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Returns: Instance of E3B. diff --git a/rllte/xplore/reward/icm.py b/rllte/xplore/reward/icm.py index 6a315a75..5d10dc45 100644 --- a/rllte/xplore/reward/icm.py +++ b/rllte/xplore/reward/icm.py @@ -54,7 +54,7 @@ class ICM(BaseReward): batch_size (int): The batch size for training. update_proportion (float): The proportion of the training data used for updating the forward dynamics models. encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak']. - weight_init (str): The weight initialization method from ['default', 'orthogonal']. + weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Returns: Instance of ICM. diff --git a/rllte/xplore/reward/model.py b/rllte/xplore/reward/model.py index bb3accb6..2a584a62 100644 --- a/rllte/xplore/reward/model.py +++ b/rllte/xplore/reward/model.py @@ -36,6 +36,12 @@ def orthogonal_layer_init(layer, std=np.sqrt(2), bias_const=0.0): th.nn.init.constant_(layer.bias, bias_const) return layer +def kaiming_he_init(layer): + th.nn.init.kaiming_normal_(layer.weight, nonlinearity='relu') + if layer.bias is not None: + th.nn.init.zeros_(layer.bias) + return layer + def default_layer_init(layer): stdv = 1. / math.sqrt(layer.weight.size(1)) layer.weight.data.uniform_(-stdv, stdv) @@ -49,6 +55,8 @@ class ObservationEncoder(nn.Module): Args: obs_shape (Tuple): The data shape of observations. latent_dim (int): The dimension of encoding vectors. + encoder_model (str): The network architecture of the encoder from ['mnih', 'espeholt']. Defaults to 'mnih' + weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Defaults to 'default' Returns: Encoder instance. @@ -59,6 +67,8 @@ def __init__(self, obs_shape: Tuple, latent_dim: int, encoder_model:str = "mnih" if weight_init == "orthogonal": init_ = orthogonal_layer_init + elif weight_init == "kaiming he": + init_ = kaiming_he_init elif weight_init == "default": init_ = default_layer_init else: diff --git a/rllte/xplore/reward/ngu.py b/rllte/xplore/reward/ngu.py index f753b338..5717c034 100644 --- a/rllte/xplore/reward/ngu.py +++ b/rllte/xplore/reward/ngu.py @@ -56,6 +56,8 @@ class NGU(Fabric): sm (float): The kernel maximum similarity. mrs (float): The maximum reward scaling. update_proportion (float): The proportion of the training data used for updating the forward dynamics models. + encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak']. + weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Returns: Instance of NGU. diff --git a/rllte/xplore/reward/pseudo_counts.py b/rllte/xplore/reward/pseudo_counts.py index a2b7a8f1..f522541d 100644 --- a/rllte/xplore/reward/pseudo_counts.py +++ b/rllte/xplore/reward/pseudo_counts.py @@ -60,7 +60,7 @@ class PseudoCounts(BaseReward): sm (float): The kernel maximum similarity. update_proportion (float): The proportion of the training data used for updating the forward dynamics models. encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak']. - weight_init (str): The weight initialization method from ['default', 'orthogonal']. + weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Returns: Instance of PseudoCounts. diff --git a/rllte/xplore/reward/re3.py b/rllte/xplore/reward/re3.py index 920b6a84..d11708ea 100644 --- a/rllte/xplore/reward/re3.py +++ b/rllte/xplore/reward/re3.py @@ -50,7 +50,7 @@ class RE3(BaseReward): k (int): Use the k-th neighbors. average_entropy (bool): Use the average of entropy estimation. encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak']. - weight_init (str): The weight initialization method from ['default', 'orthogonal']. + weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Returns: Instance of RE3. diff --git a/rllte/xplore/reward/ride.py b/rllte/xplore/reward/ride.py index e5216218..ec2bcf7c 100644 --- a/rllte/xplore/reward/ride.py +++ b/rllte/xplore/reward/ride.py @@ -60,7 +60,7 @@ class RIDE(BaseReward): sm (float): The kernel maximum similarity. update_proportion (float): The proportion of the training data used for updating the forward dynamics models. encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak']. - weight_init (str): The weight initialization method from ['default', 'orthogonal']. + weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Returns: Instance of RIDE. diff --git a/rllte/xplore/reward/rnd.py b/rllte/xplore/reward/rnd.py index ba36e0fa..e424de21 100644 --- a/rllte/xplore/reward/rnd.py +++ b/rllte/xplore/reward/rnd.py @@ -53,7 +53,7 @@ class RND(BaseReward): batch_size (int): The batch size for training. update_proportion (float): The proportion of the training data used for updating the forward dynamics models. encoder_model (str): The network architecture of the encoder from ['mnih', 'pathak']. - weight_init (str): The weight initialization method from ['default', 'orthogonal']. + weight_init (str): The weight initialization method from ['default', 'orthogonal', 'kaiming he']. Returns: Instance of RND.