From ff00aac951283db11ca83ac69974ea2b3d9e32d8 Mon Sep 17 00:00:00 2001
From: davidwalter2 <david.walter@cern.ch>
Date: Wed, 14 Jan 2026 10:02:17 -0500
Subject: [PATCH 1/3] Initial work on beta variation templates

---
 rabbit/fitter.py       | 100 +++++++++++++++++++++++++++++++----------
 rabbit/inputdata.py    |   4 ++
 rabbit/tensorwriter.py |  59 ++++++++++++++++++++++++
 3 files changed, 139 insertions(+), 24 deletions(-)

diff --git a/rabbit/fitter.py b/rabbit/fitter.py
index faba62a..29eb88c 100644
--- a/rabbit/fitter.py
+++ b/rabbit/fitter.py
@@ -912,15 +912,17 @@ def _compute_global_impacts_beta0(self, cov_dexpdx, profile=True):
                     profile=profile, compute_norm=False, full=False
                 )
                 lbeta = self._compute_lbeta(beta)
-            pdlbetadbeta = t1.gradient(lbeta, self.ubeta)
+            # pdlbetadbeta = t1.gradient(lbeta, self.ubeta)
+            pdlbetadbeta = t1.jacobian(lbeta, self.ubeta)
             dbetadx = t1.jacobian(beta, self.x)
         # pd2lbetadbeta2 is diagonal so we can use gradient instead of jacobian
-        pd2lbetadbeta2_diag = t2.gradient(pdlbetadbeta, self.ubeta)
+        # pd2lbetadbeta2_diag = t2.gradient(pdlbetadbeta, self.ubeta)
+        sbeta = t2.jacobian(pdlbetadbeta, self.ubeta)
 
         # this the cholesky decomposition of pd2lbetadbeta2
-        sbeta = tf.linalg.LinearOperatorDiag(
-            tf.sqrt(tf.reshape(pd2lbetadbeta2_diag, [-1])), is_self_adjoint=True
-        )
+        # sbeta = tf.linalg.LinearOperatorDiag(
+        #     tf.sqrt(tf.reshape(pd2lbetadbeta2_diag, [-1])), is_self_adjoint=True
+        # )
 
         impacts_beta_shape = (*self.beta_shape, cov_dexpdx.shape[-1])
         impacts_beta0 = tf.zeros(shape=impacts_beta_shape, dtype=cov_dexpdx.dtype)
@@ -1126,31 +1128,56 @@ def compute_derivatives(dvars):
             expcov = None
 
         if pdexpdbeta is not None:
-            pd2ldbeta2 = self._pd2ldbeta2(profile)
-
-            if self.covarianceFit and profile:
-                pd2ldbeta2_pdexpdbeta = pd2ldbeta2.solve(pdexpdbeta, adjoint_arg=True)
-            else:
-                if self.binByBinStatType == "normal-additive":
-                    pd2ldbeta2_pdexpdbeta = pdexpdbeta / pd2ldbeta2[None, :]
-                else:
-                    pd2ldbeta2_pdexpdbeta = tf.where(
-                        self.betamask[None, :],
-                        tf.zeros_like(pdexpdbeta),
-                        pdexpdbeta / pd2ldbeta2[None, :],
-                    )
+            if self.indata.betavar is not None and full:
+                _0, _1, pd2ldbeta2 = self.loss_val_grad_hess_beta(profile=profile)
 
-                # flatten all but first axes
                 batch = tf.shape(pdexpdbeta)[0]
                 pdexpdbeta = tf.reshape(pdexpdbeta, [batch, -1])
-                pd2ldbeta2_pdexpdbeta = tf.transpose(
-                    tf.reshape(pd2ldbeta2_pdexpdbeta, [batch, -1])
+
+                betamask = ~tf.reshape(self.betamask, [-1])
+                pdexpdbeta = tf.boolean_mask(pdexpdbeta, betamask, axis=1)
+
+                chol = tf.linalg.cholesky(pd2ldbeta2)
+                pd2ldbeta2_pdexpdbeta = tf.linalg.cholesky_solve(
+                    chol, tf.transpose(pdexpdbeta)
                 )
 
-            if compute_cov:
-                expcov += pdexpdbeta @ pd2ldbeta2_pdexpdbeta
+                if compute_cov:
+                    expcov += pdexpdbeta @ pd2ldbeta2_pdexpdbeta
+                else:
+                    expvar_flat += tf.einsum(
+                        "ik,ki->i", pdexpdbeta, pd2ldbeta2_pdexpdbeta
+                    )
             else:
-                expvar_flat += tf.einsum("ik,ki->i", pdexpdbeta, pd2ldbeta2_pdexpdbeta)
+                pd2ldbeta2 = self._pd2ldbeta2(profile)
+
+                if self.covarianceFit and profile:
+                    pd2ldbeta2_pdexpdbeta = pd2ldbeta2.solve(
+                        pdexpdbeta, adjoint_arg=True
+                    )
+                else:
+                    if self.binByBinStatType == "normal-additive":
+                        pd2ldbeta2_pdexpdbeta = pdexpdbeta / pd2ldbeta2[None, :]
+                    else:
+                        pd2ldbeta2_pdexpdbeta = tf.where(
+                            self.betamask[None, :],
+                            tf.zeros_like(pdexpdbeta),
+                            pdexpdbeta / pd2ldbeta2[None, :],
+                        )
+
+                    # flatten all but first axes
+                    batch = tf.shape(pdexpdbeta)[0]
+                    pdexpdbeta = tf.reshape(pdexpdbeta, [batch, -1])
+                    pd2ldbeta2_pdexpdbeta = tf.transpose(
+                        tf.reshape(pd2ldbeta2_pdexpdbeta, [batch, -1])
+                    )
+
+                if compute_cov:
+                    expcov += pdexpdbeta @ pd2ldbeta2_pdexpdbeta
+                else:
+                    expvar_flat += tf.einsum(
+                        "ik,ki->i", pdexpdbeta, pd2ldbeta2_pdexpdbeta
+                    )
 
         if compute_cov:
             expvar_flat = tf.linalg.diag_part(expcov)
@@ -1633,6 +1660,23 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True)
                 betamask = self.betamask[: nexp.shape[0]]
                 if self.binByBinStatMode == "full":
                     norm = tf.where(betamask, norm, betasel * norm)
+
+                    if self.indata.betavar is not None and full:
+                        # apply beta variations as normal scaling
+                        n0 = self.indata.norm
+                        sbeta = tf.math.sqrt(self.kstat[: self.indata.nbins])
+                        dbeta = sbeta * (betasel[: self.indata.nbins] - 1)
+                        dbeta = tf.where(
+                            betamask[: self.indata.nbins], tf.zeros_like(dbeta), dbeta
+                        )
+                        var = tf.einsum("ijk,jk->ik", self.indata.betavar, dbeta)
+
+                        safe_n0 = tf.where(
+                            n0 > 0, n0, 1.0
+                        )  # Use 1.0 as a dummy to avoid div by zero
+                        ratio = var / safe_n0
+                        norm = tf.where(n0 > 0, norm * (1 + ratio), norm)
+
                     nexp = tf.reduce_sum(norm, -1)
                 else:
                     nexp = tf.where(betamask, nexp, nexp * betasel)
@@ -2063,6 +2107,14 @@ def loss_val_grad_hess_beta(self, profile=True):
             grad = t1.gradient(val, self.ubeta)
         hess = t2.jacobian(grad, self.ubeta)
 
+        grad = tf.reshape(grad, [-1])
+        hess = tf.reshape(hess, [grad.shape[0], grad.shape[0]])
+
+        betamask = ~tf.reshape(self.betamask, [-1])
+        grad = grad[betamask]
+        hess = tf.boolean_mask(hess, betamask, axis=0)
+        hess = tf.boolean_mask(hess, betamask, axis=1)
+
         return val, grad, hess
 
     def minimize(self):
diff --git a/rabbit/inputdata.py b/rabbit/inputdata.py
index 806bd44..88e41dd 100644
--- a/rabbit/inputdata.py
+++ b/rabbit/inputdata.py
@@ -90,6 +90,10 @@ def __init__(self, filename, pseudodata=None):
             else:
                 self.norm = maketensor(f["hnorm"])
                 self.logk = maketensor(f["hlogk"])
+            if "hbetavariations" in f.keys():
+                self.betavar = maketensor(f["hbetavariations"])
+            else:
+                self.betavar = None
 
             # infer some metadata from loaded information
             self.dtype = self.data_obs.dtype
diff --git a/rabbit/tensorwriter.py b/rabbit/tensorwriter.py
index 9121284..dec3cef 100644
--- a/rabbit/tensorwriter.py
+++ b/rabbit/tensorwriter.py
@@ -55,6 +55,7 @@ def __init__(
         self.dict_logkhalfdiff = {}  # [channel][proc][syst]
         self.dict_logkavg_indices = {}
         self.dict_logkhalfdiff_indices = {}
+        self.dict_beta_variations = {}  # [channel][syst][process]
 
         self.clipSystVariations = False
         if self.clipSystVariations > 0.0:
@@ -160,6 +161,7 @@ def add_channel(self, axes, name=None, masked=False, flow=False):
         self.nbinschan[name] = ibins
         self.dict_norm[name] = {}
         self.dict_sumw2[name] = {}
+        self.dict_beta_variations[name] = {}
 
         # add masked channels last and not masked channels first
         this_channel = {"axes": [a for a in axes], "masked": masked, "flow": flow}
@@ -392,6 +394,53 @@ def add_systematic(
             var_name_out, add_to_data_covariance=add_to_data_covariance, **kargs
         )
 
+    def add_beta_variations(
+        self,
+        h,
+        process,
+        source_channel,
+        dest_channel,
+    ):
+        """
+        Adds a template variation in the destination channel that is correlated with the beta variation in the source channel for a given process
+        h: must be a histogram with the axes of the source channel and destiation channel.
+        """
+        if self.sparse:
+            raise NotImplementedError("Sparse implementation not yet implemented")
+
+        if source_channel not in self.channels.keys():
+            raise RuntimeError(f"Channel {source_channel} not known!")
+        if dest_channel not in self.channels.keys():
+            raise RuntimeError(f"Channel {dest_channel} not known!")
+        if not self.channels[dest_channel]["masked"]:
+            raise RuntimeError(
+                f"Beta variations can only be applied to masked channels"
+            )
+
+        norm = self.dict_norm[dest_channel][process]
+
+        source_axes = self.channels[source_channel]["axes"]
+        dest_axes = self.channels[dest_channel]["axes"]
+
+        source_axes_names = [a.name for a in source_axes]
+        dest_axes_names = [a.name for a in dest_axes]
+
+        for a in source_axes:
+            if a.name not in h.axes.name:
+                raise RuntimeError(
+                    f"Axis {a.name} not found in histogram h with {h.axes.name}"
+                )
+        for a in dest_axes:
+            if a.name not in h.axes.name:
+                raise RuntimeError(
+                    f"Axis {a.name} not found in histogram h with {h.axes.name}"
+                )
+
+        variation = h.project(*source_axes_names, *dest_axes_names).values()
+        variation = variation.reshape((*norm.shape, -1))
+
+        self.dict_beta_variations[dest_channel][process] = variation
+
     def get_logk(self, syst, norm, kfac=1.0, systematic_type=None):
         if not np.all(np.isfinite(syst)):
             raise RuntimeError(
@@ -714,6 +763,7 @@ def write(self, outfolder="./", outfilename="rabbit_input.hdf5", args={}):
             else:
                 logk = np.zeros([nbinsfull, nproc, 2, nsyst], self.dtype)
 
+            beta_variations = np.zeros([nbinsfull, nbins, nproc], self.dtype)
             for chan in self.channels.keys():
                 nbinschan = self.nbinschan[chan]
                 dict_norm_chan = self.dict_norm[chan]
@@ -745,6 +795,10 @@ def write(self, outfolder="./", outfilename="rabbit_input.hdf5", args={}):
                                     dict_logkhalfdiff_proc[syst]
                                 )
 
+                    if proc in self.dict_beta_variations[chan]:
+                        beta_vars = self.dict_beta_variations[chan][proc]
+                        beta_variations[ibin : ibin + nbinschan, :, iproc] = beta_vars
+
                 ibin += nbinschan
 
         if self.data_covariance is None and (
@@ -916,6 +970,11 @@ def create_dataset(
             )
             logk = None
 
+            nbytes += h5pyutils.writeFlatInChunks(
+                beta_variations, f, "hbetavariations", maxChunkBytes=self.chunkSize
+            )
+            beta_variations = None
+
         logger.info(f"Total raw bytes in arrays = {nbytes}")
 
     def get_systsstandard(self):

From a5ba7deccc9bf235235a6ade2fca61882bcb3058 Mon Sep 17 00:00:00 2001
From: davidwalter2 <david.walter@cern.ch>
Date: Thu, 15 Jan 2026 20:35:27 -0500
Subject: [PATCH 2/3] Some fixes

---
 rabbit/fitter.py       | 78 ++++++++++++++----------------------------
 rabbit/tensorwriter.py |  2 +-
 2 files changed, 26 insertions(+), 54 deletions(-)

diff --git a/rabbit/fitter.py b/rabbit/fitter.py
index 29eb88c..3bc3554 100644
--- a/rabbit/fitter.py
+++ b/rabbit/fitter.py
@@ -912,17 +912,15 @@ def _compute_global_impacts_beta0(self, cov_dexpdx, profile=True):
                     profile=profile, compute_norm=False, full=False
                 )
                 lbeta = self._compute_lbeta(beta)
-            # pdlbetadbeta = t1.gradient(lbeta, self.ubeta)
-            pdlbetadbeta = t1.jacobian(lbeta, self.ubeta)
+            pdlbetadbeta = t1.gradient(lbeta, self.ubeta)
             dbetadx = t1.jacobian(beta, self.x)
         # pd2lbetadbeta2 is diagonal so we can use gradient instead of jacobian
-        # pd2lbetadbeta2_diag = t2.gradient(pdlbetadbeta, self.ubeta)
-        sbeta = t2.jacobian(pdlbetadbeta, self.ubeta)
+        pd2lbetadbeta2_diag = t2.gradient(pdlbetadbeta, self.ubeta)
 
         # this the cholesky decomposition of pd2lbetadbeta2
-        # sbeta = tf.linalg.LinearOperatorDiag(
-        #     tf.sqrt(tf.reshape(pd2lbetadbeta2_diag, [-1])), is_self_adjoint=True
-        # )
+        sbeta = tf.linalg.LinearOperatorDiag(
+            tf.sqrt(tf.reshape(pd2lbetadbeta2_diag, [-1])), is_self_adjoint=True
+        )
 
         impacts_beta_shape = (*self.beta_shape, cov_dexpdx.shape[-1])
         impacts_beta0 = tf.zeros(shape=impacts_beta_shape, dtype=cov_dexpdx.dtype)
@@ -1128,56 +1126,31 @@ def compute_derivatives(dvars):
             expcov = None
 
         if pdexpdbeta is not None:
-            if self.indata.betavar is not None and full:
-                _0, _1, pd2ldbeta2 = self.loss_val_grad_hess_beta(profile=profile)
+            pd2ldbeta2 = self._pd2ldbeta2(profile)
 
+            if self.covarianceFit and profile:
+                pd2ldbeta2_pdexpdbeta = pd2ldbeta2.solve(pdexpdbeta, adjoint_arg=True)
+            else:
+                if self.binByBinStatType == "normal-additive":
+                    pd2ldbeta2_pdexpdbeta = pdexpdbeta / pd2ldbeta2[None, :]
+                else:
+                    pd2ldbeta2_pdexpdbeta = tf.where(
+                        self.betamask[None, :],
+                        tf.zeros_like(pdexpdbeta),
+                        pdexpdbeta / pd2ldbeta2[None, :],
+                    )
+
+                # flatten all but first axes
                 batch = tf.shape(pdexpdbeta)[0]
                 pdexpdbeta = tf.reshape(pdexpdbeta, [batch, -1])
-
-                betamask = ~tf.reshape(self.betamask, [-1])
-                pdexpdbeta = tf.boolean_mask(pdexpdbeta, betamask, axis=1)
-
-                chol = tf.linalg.cholesky(pd2ldbeta2)
-                pd2ldbeta2_pdexpdbeta = tf.linalg.cholesky_solve(
-                    chol, tf.transpose(pdexpdbeta)
+                pd2ldbeta2_pdexpdbeta = tf.transpose(
+                    tf.reshape(pd2ldbeta2_pdexpdbeta, [batch, -1])
                 )
 
-                if compute_cov:
-                    expcov += pdexpdbeta @ pd2ldbeta2_pdexpdbeta
-                else:
-                    expvar_flat += tf.einsum(
-                        "ik,ki->i", pdexpdbeta, pd2ldbeta2_pdexpdbeta
-                    )
+            if compute_cov:
+                expcov += pdexpdbeta @ pd2ldbeta2_pdexpdbeta
             else:
-                pd2ldbeta2 = self._pd2ldbeta2(profile)
-
-                if self.covarianceFit and profile:
-                    pd2ldbeta2_pdexpdbeta = pd2ldbeta2.solve(
-                        pdexpdbeta, adjoint_arg=True
-                    )
-                else:
-                    if self.binByBinStatType == "normal-additive":
-                        pd2ldbeta2_pdexpdbeta = pdexpdbeta / pd2ldbeta2[None, :]
-                    else:
-                        pd2ldbeta2_pdexpdbeta = tf.where(
-                            self.betamask[None, :],
-                            tf.zeros_like(pdexpdbeta),
-                            pdexpdbeta / pd2ldbeta2[None, :],
-                        )
-
-                    # flatten all but first axes
-                    batch = tf.shape(pdexpdbeta)[0]
-                    pdexpdbeta = tf.reshape(pdexpdbeta, [batch, -1])
-                    pd2ldbeta2_pdexpdbeta = tf.transpose(
-                        tf.reshape(pd2ldbeta2_pdexpdbeta, [batch, -1])
-                    )
-
-                if compute_cov:
-                    expcov += pdexpdbeta @ pd2ldbeta2_pdexpdbeta
-                else:
-                    expvar_flat += tf.einsum(
-                        "ik,ki->i", pdexpdbeta, pd2ldbeta2_pdexpdbeta
-                    )
+                expvar_flat += tf.einsum("ik,ki->i", pdexpdbeta, pd2ldbeta2_pdexpdbeta)
 
         if compute_cov:
             expvar_flat = tf.linalg.diag_part(expcov)
@@ -1659,7 +1632,6 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True)
             if self.binByBinStatType in ["gamma", "normal-multiplicative"]:
                 betamask = self.betamask[: nexp.shape[0]]
                 if self.binByBinStatMode == "full":
-                    norm = tf.where(betamask, norm, betasel * norm)
 
                     if self.indata.betavar is not None and full:
                         # apply beta variations as normal scaling
@@ -1670,13 +1642,13 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True)
                             betamask[: self.indata.nbins], tf.zeros_like(dbeta), dbeta
                         )
                         var = tf.einsum("ijk,jk->ik", self.indata.betavar, dbeta)
-
                         safe_n0 = tf.where(
                             n0 > 0, n0, 1.0
                         )  # Use 1.0 as a dummy to avoid div by zero
                         ratio = var / safe_n0
                         norm = tf.where(n0 > 0, norm * (1 + ratio), norm)
 
+                    norm = tf.where(betamask, norm, betasel * norm)
                     nexp = tf.reduce_sum(norm, -1)
                 else:
                     nexp = tf.where(betamask, nexp, nexp * betasel)
diff --git a/rabbit/tensorwriter.py b/rabbit/tensorwriter.py
index dec3cef..8af8109 100644
--- a/rabbit/tensorwriter.py
+++ b/rabbit/tensorwriter.py
@@ -436,7 +436,7 @@ def add_beta_variations(
                     f"Axis {a.name} not found in histogram h with {h.axes.name}"
                 )
 
-        variation = h.project(*source_axes_names, *dest_axes_names).values()
+        variation = h.project(*dest_axes_names, *source_axes_names).values()
         variation = variation.reshape((*norm.shape, -1))
 
         self.dict_beta_variations[dest_channel][process] = variation

From 98b3b1e1702a36e1d2e2f7a38c43569edeb94db6 Mon Sep 17 00:00:00 2001
From: davidwalter2 <david.walter@cern.ch>
Date: Sun, 25 Jan 2026 12:53:20 -0500
Subject: [PATCH 3/3] Fix betavar in presence of multiple channels

---
 rabbit/tensorwriter.py | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/rabbit/tensorwriter.py b/rabbit/tensorwriter.py
index 8af8109..108e5f0 100644
--- a/rabbit/tensorwriter.py
+++ b/rabbit/tensorwriter.py
@@ -439,7 +439,10 @@ def add_beta_variations(
         variation = h.project(*dest_axes_names, *source_axes_names).values()
         variation = variation.reshape((*norm.shape, -1))
 
-        self.dict_beta_variations[dest_channel][process] = variation
+        if source_channel not in self.dict_beta_variations[dest_channel].keys():
+            self.dict_beta_variations[dest_channel][source_channel] = {}
+
+        self.dict_beta_variations[dest_channel][source_channel][process] = variation
 
     def get_logk(self, syst, norm, kfac=1.0, systematic_type=None):
         if not np.all(np.isfinite(syst)):
@@ -795,9 +798,31 @@ def write(self, outfolder="./", outfilename="rabbit_input.hdf5", args={}):
                                     dict_logkhalfdiff_proc[syst]
                                 )
 
-                    if proc in self.dict_beta_variations[chan]:
-                        beta_vars = self.dict_beta_variations[chan][proc]
-                        beta_variations[ibin : ibin + nbinschan, :, iproc] = beta_vars
+                    for (
+                        source_channel,
+                        source_channel_dict,
+                    ) in self.dict_beta_variations[chan].items():
+                        if proc in source_channel_dict:
+
+                            # find the bins of the source channel
+                            ibin_start = 0
+                            for c, nb in self.nbinschan.items():
+                                if self.channels[c]["masked"]:
+                                    continue  # masked channels can not be source channels
+                                if c == source_channel:
+                                    ibin_end = ibin_start + nb
+                                    break
+                                else:
+                                    ibin_start += nb
+                            else:
+                                raise RuntimeError(
+                                    f"Did not find source channel {source_channel} in list of channels {[k for k in self.nbinschan.keys()]}"
+                                )
+
+                            beta_vars = source_channel_dict[proc]
+                            beta_variations[
+                                ibin : ibin + nbinschan, ibin_start:ibin_end, iproc
+                            ] = beta_vars
 
                 ibin += nbinschan