From 7695f7a4cade5801f330c3e5cf9e2ab260400b0f Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Sun, 11 Jan 2026 19:08:59 -0500 Subject: [PATCH 1/3] Implement BB full with gamma via numerically solving for nbeta --- rabbit/fitter.py | 166 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 148 insertions(+), 18 deletions(-) diff --git a/rabbit/fitter.py b/rabbit/fitter.py index faba62a..2f90680 100644 --- a/rabbit/fitter.py +++ b/rabbit/fitter.py @@ -81,15 +81,6 @@ def __init__( else: self.binByBinStatType = options.binByBinStatType - if ( - self.binByBinStat - and self.binByBinStatMode == "full" - and not self.binByBinStatType.startswith("normal") - ): - raise Exception( - 'bin-by-bin stat only for option "--binByBinStatMode full" with "--binByBinStatType normal"' - ) - if ( options.covarianceFit and self.binByBinStat @@ -231,9 +222,14 @@ def __init__( self.sumw = self.indata.sumw if self.binByBinStatType in ["gamma", "normal-multiplicative"]: - self.kstat = self.sumw**2 / self.varbeta - self.betamask = (self.varbeta == 0.0) | (self.kstat == 0.0) - self.kstat = tf.where(self.betamask, 1.0, self.kstat) + self.betamask = (self.varbeta == 0.0) | (self.sumw == 0.0) + self.kstat = tf.where(self.betamask, 1.0, self.sumw**2 / self.varbeta) + + if self.binByBinStatType == "gamma" and self.binByBinStatMode == "full": + self.nbeta = tf.Variable( + tf.ones_like(self.nobs), trainable=True, name="nbeta" + ) + elif self.binByBinStatType == "normal-additive": # precompute decomposition of composite matrix to speed up # calculation of profiled beta values @@ -1405,13 +1401,74 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) if self.chisqFit: if self.binByBinStatType == "gamma": kstat = self.kstat[: self.indata.nbins] + betamask = self.betamask[: self.indata.nbins] - abeta = nexp_profile**2 - bbeta = kstat * self.varnobs - nexp_profile * self.nobs - cbeta = -kstat * self.varnobs * beta0 - beta = solve_quad_eq(abeta, bbeta, cbeta) + if self.binByBinStatMode == "lite": + abeta = nexp_profile**2 + bbeta = kstat * self.varnobs - nexp_profile * self.nobs + cbeta = -kstat * self.varnobs * beta0 + beta = solve_quad_eq(abeta, bbeta, cbeta) + elif self.binByBinStatMode == "full": + norm_profile = norm[: self.indata.nbins] + + # solving nbeta numerically using newtons method (does not work with forward differentiation i.e. use --globalImpacts with --globalImpactsDisableJVP) + def fnll_nbeta(x): + beta = ( + kstat + * beta0 + / ( + kstat + + ((x - self.nobs) / self.varnobs)[..., None] + * norm_profile + ) + ) + beta = tf.where(betamask, beta0, beta) + new_nexp = tf.reduce_sum(beta * norm_profile, axis=-1) + ln = 0.5 * (new_nexp - self.nobs) ** 2 / self.varnobs + lbeta = tf.reduce_sum( + kstat * (beta - beta0) + - kstat + * beta0 + * (tf.math.log(beta) - tf.math.log(beta0)), + axis=-1, + ) + return ln + lbeta + + def body(i, edm): + with tf.GradientTape() as t2: + with tf.GradientTape() as t1: + nll = fnll_nbeta(nexp_profile * self.nbeta) + grad = t1.gradient(nll, self.nbeta) + hess = t2.gradient(grad, self.nbeta) + + eps = 1e-8 + safe_hess = tf.where(hess > 0, hess, tf.ones_like(hess)) + step = grad / (safe_hess + eps) + + self.nbeta.assign_sub(step) + + return i + 1, tf.reduce_max(0.5 * grad * step) + + def cond(i, edm): + return tf.logical_and(i < 50, edm > 1e-10) + + i0 = tf.constant(0) + edm0 = tf.constant(tf.float64.max) + tf.while_loop(cond, body, loop_vars=(i0, edm0)) + + beta = ( + kstat + * beta0 + / ( + kstat + + ( + (nexp_profile * self.nbeta - self.nobs) + / self.varnobs + )[..., None] + * norm_profile + ) + ) - betamask = self.betamask[: self.indata.nbins] beta = tf.where(betamask, beta0, beta) elif self.binByBinStatType == "normal-multiplicative": kstat = self.kstat[: self.indata.nbins] @@ -1560,7 +1617,72 @@ def _compute_yields_with_beta(self, profile=True, compute_norm=False, full=True) kstat = self.kstat[: self.indata.nbins] betamask = self.betamask[: self.indata.nbins] - beta = (self.nobs + kstat * beta0) / (nexp_profile + kstat) + if self.binByBinStatMode == "lite": + beta = (self.nobs + kstat * beta0) / (nexp_profile + kstat) + elif self.binByBinStatMode == "full": + norm_profile = norm[: self.indata.nbins] + + # solving nbeta numerically using newtons method (does not work with forward differentiation i.e. use --globalImpacts with --globalImpactsDisableJVP) + def fnll_nbeta(x): + beta = ( + kstat + * beta0 + / ( + norm_profile + + kstat + - (self.nobs / x)[..., None] * norm_profile + ) + ) + beta = tf.where(betamask, beta0, beta) + new_nexp = tf.reduce_sum(beta * norm_profile, axis=-1) + ln = ( + new_nexp + - self.nobs + - self.nobs + * (tf.math.log(new_nexp) - tf.math.log(self.nobs)) + ) + lbeta = tf.reduce_sum( + kstat * (beta - beta0) + - kstat + * beta0 + * (tf.math.log(beta) - tf.math.log(beta0)), + axis=-1, + ) + return ln + lbeta + + def body(i, edm): + with tf.GradientTape() as t2: + with tf.GradientTape() as t1: + nll = fnll_nbeta(nexp_profile * self.nbeta) + grad = t1.gradient(nll, self.nbeta) + hess = t2.gradient(grad, self.nbeta) + + eps = 1e-8 + safe_hess = tf.where(hess > 0, hess, tf.ones_like(hess)) + step = grad / (safe_hess + eps) + self.nbeta.assign_sub(step) + return i + 1, tf.reduce_max(0.5 * grad * step) + + def cond(i, edm): + return tf.logical_and(i < 50, edm > 1e-10) + + i0 = tf.constant(0) + edm0 = tf.constant(tf.float64.max) + tf.while_loop(cond, body, loop_vars=(i0, edm0)) + + beta = ( + kstat + * beta0 + / ( + norm_profile + - (self.nobs / (nexp_profile * self.nbeta))[ + ..., None + ] + * norm_profile + + kstat + ) + ) + beta = tf.where(betamask, beta0, beta) elif self.binByBinStatType == "normal-multiplicative": kstat = self.kstat[: self.indata.nbins] @@ -2063,6 +2185,14 @@ def loss_val_grad_hess_beta(self, profile=True): grad = t1.gradient(val, self.ubeta) hess = t2.jacobian(grad, self.ubeta) + grad = tf.reshape(grad, [-1]) + hess = tf.reshape(hess, [grad.shape[0], grad.shape[0]]) + + betamask = ~tf.reshape(self.betamask, [-1]) + grad = grad[betamask] + hess = tf.boolean_mask(hess, betamask, axis=0) + hess = tf.boolean_mask(hess, betamask, axis=1) + return val, grad, hess def minimize(self): From 9be8657e6f96f71473fd981b614429f298af6c25 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Wed, 28 Jan 2026 15:13:47 -0500 Subject: [PATCH 2/3] Update readme --- README.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 32d527d..e963402 100644 --- a/README.md +++ b/README.md @@ -84,10 +84,10 @@ This is often the case in the standard profile likelihood unfolding. By default, systematic variations are asymmetric. However, defining only symmetric variations can be beneficial as a fully symmetric tensor has reduced memory consumption, simplifications in the likelihood function in the fit, and is usually numerically more stable. Different symmetrization options are supported: - * "average": TBD - * "conservative": TBD - * "linear": TBD - * "quadratic": TBD + * "average": In each bin average the up and down variation and mirror. This is the recommended option for vairations that are expected to be symmetric. + * "conservative": In each bin use the larger variation between up and down and mirror. This option is less recommended but can be used as cross check against "average". + * "linear": The up and down variations are decomposed into the components for the average and difference with a scale factor for the difference shape of 1, corresponding to linear interpolation. This option is recommended for variations that are expected to be asymmetric. + * "quadratic": This option is recommended for variations that are expected to be asymmetric with a scale factor for the difference shape of $\sqrt{3}$, corresponding to quadratic interpolation and more conservative than "linear". If a systematic variation is added by providing a single histogram, the variation is mirrored. ### Masked channels @@ -120,7 +120,13 @@ rabbit_fit test_tensor.hdf5 -o results/fitresult.hdf5 -t 0 --doImpacts --globalI ``` ### Bin-by-bin statistical uncertainties -Bin-by-bin statistical uncertainties on the templates are added by default and can be disabled at runtime using the `--noBinByBinStat` option. The Barlow-Beeston lite method is used to add implicit nuisance parameters for each template bin. By default this is implemented using a gamma distribution for the probability density, but Gaussian uncertainties can also be used with `--binByBinStatType normal`. +Bin-by-bin statistical uncertainties on the templates are added by default and can be disabled at runtime using the `--noBinByBinStat` option. +The Barlow-Beeston method is used to add implicit nuisance parameters for each template bin. +By default, the lite variant is used where one parameter is introduced per template bin, for the sum of all processes. +The Barlow-Beeston-full method can be used by specifying `--binByBinStatMode full` which introduces implicit nuisance parameters for each process and each template bin. +By default these nuisance parameters are multiplied to the expected events and follow a gamma distribution for the probability density. +Gaussian uncertainties can also be used with `--binByBinStatType normal-additive` for an additive scaling or `--binByBinStatType normal-multiplicative` for a multiplicative scaling. +In the case of `--binByBinStatMode full` and `--binByBinStatType gamma` no analytic solution is available and in each bin a 1D minimization is performed using Newton's method which can significantly increase the time required in the fit. ### Mappings Perform mappings on the parameters and observables (the histogram bins in the (masked) channels). From d6cd11d0e8d2e307510aff6fc012b22dc3654f47 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Thu, 5 Feb 2026 13:56:44 -0500 Subject: [PATCH 3/3] Fix edm in case of non convex region --- rabbit/fitter.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/rabbit/fitter.py b/rabbit/fitter.py index 4e14673..d5638bc 100644 --- a/rabbit/fitter.py +++ b/rabbit/fitter.py @@ -1464,12 +1464,17 @@ def body(i, edm): hess = t2.gradient(grad, self.nbeta) eps = 1e-8 - safe_hess = tf.where(hess > 0, hess, tf.ones_like(hess)) - step = grad / (safe_hess + eps) + hess_sign = tf.where( + hess != 0, tf.sign(hess), tf.ones_like(hess) + ) + safe_hess = hess_sign * tf.maximum(tf.abs(hess), eps) + step = grad / safe_hess self.nbeta.assign_sub(step) - return i + 1, tf.reduce_max(0.5 * grad * step) + return i + 1, tf.reduce_max( + tf.reduce_max(0.5 * grad * step) + ) def cond(i, edm): return tf.logical_and(i < 50, edm > 1e-10) @@ -1680,10 +1685,17 @@ def body(i, edm): hess = t2.gradient(grad, self.nbeta) eps = 1e-8 - safe_hess = tf.where(hess > 0, hess, tf.ones_like(hess)) - step = grad / (safe_hess + eps) + hess_sign = tf.where( + hess != 0, tf.sign(hess), tf.ones_like(hess) + ) + safe_hess = hess_sign * tf.maximum(tf.abs(hess), eps) + step = grad / safe_hess + self.nbeta.assign_sub(step) - return i + 1, tf.reduce_max(0.5 * grad * step) + + return i + 1, tf.reduce_max( + tf.reduce_max(0.5 * grad * step) + ) def cond(i, edm): return tf.logical_and(i < 50, edm > 1e-10)