From 7227c5c4abae99e864c2c48a85f5c8c334461989 Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 6 Feb 2026 13:34:35 +0100 Subject: [PATCH 1/3] Update fake structure to work with new format, add a fake option to unit hists --- Analysis/HistProducerFromNTuple.py | 51 ++++++++++++++---------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index 1ff88b7d..627fd325 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -27,11 +27,14 @@ def find_keys(inFiles_list): def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): - model, unit_hist, rdf = hist_list[0] - if verbose > 0: - print( - f"Saving hist for key: {key_tuple}, unc: {unc}, scale: {scale}. Number of RDF runs: {rdf.GetNRuns()}" - ) + if len(hist_list[0]) == 3: + model, unit_hist, rdf = hist_list[0] + if verbose > 0: + print( + f"Saving hist for key: {key_tuple}, unc: {unc}, scale: {scale}. Number of RDF runs: {rdf.GetNRuns()}" + ) + else: + model, unit_hist = hist_list[0] dir_name = "/".join(key_tuple) dir_ptr = Utilities.mkdir(outFile, dir_name) @@ -67,7 +70,7 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): dir_ptr.WriteTObject(merged_hist, final_hist_name, "Overwrite") -def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale): +def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale, fake=False): var_entry = HistHelper.findBinEntry(hist_cfg_dict, args.var) dims = ( 1 @@ -83,12 +86,17 @@ def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale): if dims > 1 else [f"{var}_bin"] ) - rdf_filtered = rdf.Filter(filter_to_apply) - if dims >= 1 and dims <= 3: - mkhist_fn = getattr(rdf_filtered, f"Histo{dims}D") - unit_hist = mkhist_fn(unit_bin_model, *var_bin_list, weight_name) + + # If fake structure, we want to build a correct dimensional histogram + if fake: + unit_hist = unit_bin_model.GetHistogram() else: - raise RuntimeError("Only 1D, 2D and 3D histograms are supported") + rdf_filtered = rdf.Filter(filter_to_apply) + if dims >= 1 and dims <= 3: + mkhist_fn = getattr(rdf_filtered, f"Histo{dims}D") + unit_hist = mkhist_fn(unit_bin_model, *var_bin_list, weight_name) + else: + raise RuntimeError("Only 1D, 2D and 3D histograms are supported") return model, unit_hist @@ -192,31 +200,18 @@ def SaveTmpFileUnc( def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts): - hist_cfg_dict = setup.hists - channels = setup.global_params["channels_to_consider"] - for filter_key in key_filter_dict.keys(): print(filter_key) for further_cut_name in [None] + list(further_cuts.keys()): - var_entry = HistHelper.findBinEntry(hist_cfg_dict, args.var) - dims = ( - 1 - if not hist_cfg_dict[var_entry].get("var_list", False) - else len(hist_cfg_dict[var_entry]["var_list"]) - ) - model, unit_bin_model = HistHelper.GetModel( - hist_cfg_dict, var, dims, return_unit_bin_model=True + rdf_dummy = ROOT.RDataFrame(1) + model, unit_hist = GetUnitBinHist( + rdf_dummy, var, "", "weight_Central", "Central", "Central", fake=True ) - nbins = unit_bin_model.fNbinsX - xmin = -0.5 - xmax = unit_bin_model.fNbinsX - 0.5 - empty_hist = ROOT.TH1F(var, var, nbins, xmin, xmax) - empty_hist.Sumw2() key_tuple = filter_key if further_cut_name: key_tuple += (further_cut_name,) SaveHist( - key_tuple, outFile, [(model, empty_hist)], var, "Central", "Central" + key_tuple, outFile, [(model, unit_hist)], var, "Central", "Central" ) From d8fdfbe39700fdc21e3552f8beb5e402f69633a7 Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 6 Feb 2026 14:21:37 +0100 Subject: [PATCH 2/3] Completely remove fake structure and use base files for everything --- Analysis/HistProducerFromNTuple.py | 82 +++++------------------------- 1 file changed, 14 insertions(+), 68 deletions(-) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index 627fd325..63ab1de9 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -27,14 +27,11 @@ def find_keys(inFiles_list): def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): - if len(hist_list[0]) == 3: - model, unit_hist, rdf = hist_list[0] - if verbose > 0: - print( - f"Saving hist for key: {key_tuple}, unc: {unc}, scale: {scale}. Number of RDF runs: {rdf.GetNRuns()}" - ) - else: - model, unit_hist = hist_list[0] + model, unit_hist, rdf = hist_list[0] + if verbose > 0: + print( + f"Saving hist for key: {key_tuple}, unc: {unc}, scale: {scale}. Number of RDF runs: {rdf.GetNRuns()}" + ) dir_name = "/".join(key_tuple) dir_ptr = Utilities.mkdir(outFile, dir_name) @@ -70,7 +67,7 @@ def SaveHist(key_tuple, outFile, hist_list, hist_name, unc, scale, verbose=0): dir_ptr.WriteTObject(merged_hist, final_hist_name, "Overwrite") -def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale, fake=False): +def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale): var_entry = HistHelper.findBinEntry(hist_cfg_dict, args.var) dims = ( 1 @@ -87,16 +84,12 @@ def GetUnitBinHist(rdf, var, filter_to_apply, weight_name, unc, scale, fake=Fals else [f"{var}_bin"] ) - # If fake structure, we want to build a correct dimensional histogram - if fake: - unit_hist = unit_bin_model.GetHistogram() + rdf_filtered = rdf.Filter(filter_to_apply) + if dims >= 1 and dims <= 3: + mkhist_fn = getattr(rdf_filtered, f"Histo{dims}D") + unit_hist = mkhist_fn(unit_bin_model, *var_bin_list, weight_name) else: - rdf_filtered = rdf.Filter(filter_to_apply) - if dims >= 1 and dims <= 3: - mkhist_fn = getattr(rdf_filtered, f"Histo{dims}D") - unit_hist = mkhist_fn(unit_bin_model, *var_bin_list, weight_name) - else: - raise RuntimeError("Only 1D, 2D and 3D histograms are supported") + raise RuntimeError("Only 1D, 2D and 3D histograms are supported") return model, unit_hist @@ -198,23 +191,6 @@ def SaveTmpFileUnc( tmp_file_root.Close() tmp_files.append(tmp_file) - -def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts): - for filter_key in key_filter_dict.keys(): - print(filter_key) - for further_cut_name in [None] + list(further_cuts.keys()): - rdf_dummy = ROOT.RDataFrame(1) - model, unit_hist = GetUnitBinHist( - rdf_dummy, var, "", "weight_Central", "Central", "Central", fake=True - ) - key_tuple = filter_key - if further_cut_name: - key_tuple += (further_cut_name,) - SaveHist( - key_tuple, outFile, [(model, unit_hist)], var, "Central", "Central" - ) - - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("inputFiles", nargs="+", type=str) @@ -256,39 +232,9 @@ def CreateFakeStructure(outFile, setup, var, key_filter_dict, further_cuts): for key in unique_keys: if not key.startswith(treeName): continue - valid_files = [] - has_entries = False - for f in all_infiles: - rf = ROOT.TFile.Open(f) - if rf and rf.Get(key): - tree = rf.Get(key) - if tree and tree.GetEntries() > 0: - has_entries = True - valid_files.append(f) - rf.Close() - - if valid_files and has_entries: - base_rdfs[key] = ROOT.RDataFrame(key, Utilities.ListToVector(valid_files)) - ROOT.RDF.Experimental.AddProgressBar(base_rdfs[key]) - else: - print(f"{key} tree not found or with 0 entries: fake structure creation") - outFile_root = ROOT.TFile(args.outFile, "UPDATE") - key_filter_dict = analysis.createKeyFilterDict( - setup.global_params, setup.global_params["era"] - ) - further_cuts = {} - if args.furtherCut: - further_cuts = {f: (f, f) for f in args.furtherCut.split(",")} - if ( - "further_cuts" in setup.global_params - and setup.global_params["further_cuts"] - ): - further_cuts.update(setup.global_params["further_cuts"]) - CreateFakeStructure( - outFile_root, setup, args.var, key_filter_dict, further_cuts - ) - outFile_root.Close() - continue + + base_rdfs[key] = ROOT.RDataFrame(key, Utilities.ListToVector(all_infiles)) + ROOT.RDF.Experimental.AddProgressBar(base_rdfs[key]) further_cuts = {} if args.furtherCut: From d0429efa584af8409e2b92dfe188c7d3621fe678 Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 6 Feb 2026 14:24:14 +0100 Subject: [PATCH 3/3] Formatting --- Analysis/HistProducerFromNTuple.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Analysis/HistProducerFromNTuple.py b/Analysis/HistProducerFromNTuple.py index 63ab1de9..dc951c2b 100644 --- a/Analysis/HistProducerFromNTuple.py +++ b/Analysis/HistProducerFromNTuple.py @@ -191,6 +191,7 @@ def SaveTmpFileUnc( tmp_file_root.Close() tmp_files.append(tmp_file) + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("inputFiles", nargs="+", type=str)