From dbf90d68f8c777f522ff9d0f3aed2222347b26c9 Mon Sep 17 00:00:00 2001 From: cfuselli Date: Fri, 13 Feb 2026 17:27:43 +0100 Subject: [PATCH 1/6] flexible store data flags --- strax/processing/peak_merging.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py index 1e5009d88..e28b4324f 100644 --- a/strax/processing/peak_merging.py +++ b/strax/processing/peak_merging.py @@ -83,6 +83,10 @@ def _merge_peaks( # Do the merging. Could numbafy this to optimize, probably... buffer = np.zeros(max_buffer, dtype=np.float32) buffer_top = np.zeros(max_buffer, dtype=np.float32) + + # Check which optional waveform fields exist in the dtype + has_data_top = "data_top" in peaks.dtype.names + has_data_start = "data_start" in peaks.dtype.names for new_i, new_p in enumerate(new_peaks): new_p["min_diff"] = 2147483647 # inf of int32 @@ -119,9 +123,10 @@ def _merge_peaks( n_after = p["length"] * upsample i0 = (p["time"] - new_p["time"]) // common_dt buffer[i0 : i0 + n_after] = np.repeat(p["data"][: p["length"]], upsample) / upsample - buffer_top[i0 : i0 + n_after] = ( - np.repeat(p["data_top"][: p["length"]], upsample) / upsample - ) + if has_data_top: + buffer_top[i0 : i0 + n_after] = ( + np.repeat(p["data_top"][: p["length"]], upsample) / upsample + ) # Handle the other peak attributes new_p["area"] += p["area"] @@ -138,13 +143,13 @@ def _merge_peaks( new_p["min_diff"] = min(new_p["min_diff"], p["min_diff"]) max_data = np.array(max_data) - # Downsample the buffers into - # new_p['data'], new_p['data_top'], and new_p['data_start'] + # Downsample the buffers into new_p['data'], and optionally + # new_p['data_top'] and new_p['data_start'] if those fields exist strax.store_downsampled_waveform( new_p, buffer, - True, - True, + has_data_top, + has_data_start, buffer_top, ) From 3f5b7e388c8c60389fc99acb422e6ab8c18f9ff8 Mon Sep 17 00:00:00 2001 From: cfuselli Date: Fri, 13 Feb 2026 17:34:37 +0100 Subject: [PATCH 2/6] Make data_top and data_start fields independent in merge_peaks Previously, _merge_peaks() hardcoded both store_data_top=True and store_data_start=True when calling store_downsampled_waveform(), which caused crashes when peaks had one field but not the other. This fix: - Checks which optional waveform fields exist in merge_peaks() (before numba compilation) - Passes has_data_top and has_data_start flags to _merge_peaks() - Conditionally fills buffer_top only if data_top exists - Passes detected flags to store_downsampled_waveform() Benefits: - Fixes crash when peaks missing data_start field - Makes fields truly independent (can use data_top without data_start) - Backward compatible with existing code - Enables RAM optimization in straxen online DAQ processing --- strax/processing/peak_merging.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py index 1e5009d88..40fdc973d 100644 --- a/strax/processing/peak_merging.py +++ b/strax/processing/peak_merging.py @@ -36,6 +36,10 @@ def merge_peaks( constituent peaks, it being too time-consuming to revert to records/hits. """ + # Check which optional waveform fields exist in the input dtype + # (must be done here, before calling numba-compiled function) + has_data_top = "data_top" in peaks.dtype.names + has_data_start = "data_start" in peaks.dtype.names new_peaks, endtime = _merge_peaks( peaks, @@ -43,6 +47,8 @@ def merge_peaks( end_merge_at, merged=merged, max_buffer=max_buffer, + has_data_top=has_data_top, + has_data_start=has_data_start, ) # If the endtime was in the peaks we have to recompute it here # because otherwise it will stay set to zero due to the buffer @@ -60,6 +66,8 @@ def _merge_peaks( end_merge_at, merged=None, max_buffer=int(1e5), + has_data_top=True, + has_data_start=True, ): """Merge specified peaks with their neighbors, return merged peaks. @@ -69,6 +77,8 @@ def _merge_peaks( :param max_buffer: Maximum number of samples in the sum_waveforms and other waveforms of the resulting peaks (after merging). Peaks must be constructed based on the properties of constituent peaks, it being too time-consuming to revert to records/hits. + :param has_data_top: Whether data_top field exists in peaks dtype + :param has_data_start: Whether data_start field exists in peaks dtype """ assert len(start_merge_at) == len(end_merge_at) @@ -119,9 +129,10 @@ def _merge_peaks( n_after = p["length"] * upsample i0 = (p["time"] - new_p["time"]) // common_dt buffer[i0 : i0 + n_after] = np.repeat(p["data"][: p["length"]], upsample) / upsample - buffer_top[i0 : i0 + n_after] = ( - np.repeat(p["data_top"][: p["length"]], upsample) / upsample - ) + if has_data_top: + buffer_top[i0 : i0 + n_after] = ( + np.repeat(p["data_top"][: p["length"]], upsample) / upsample + ) # Handle the other peak attributes new_p["area"] += p["area"] @@ -138,13 +149,13 @@ def _merge_peaks( new_p["min_diff"] = min(new_p["min_diff"], p["min_diff"]) max_data = np.array(max_data) - # Downsample the buffers into - # new_p['data'], new_p['data_top'], and new_p['data_start'] + # Downsample the buffers into new_p['data'], and optionally + # new_p['data_top'] and new_p['data_start'] if those fields exist strax.store_downsampled_waveform( new_p, buffer, - True, - True, + has_data_top, + has_data_start, buffer_top, ) From 85150717f68b2f465d7265e45a54e2b54f71e013 Mon Sep 17 00:00:00 2001 From: cfuselli Date: Fri, 13 Feb 2026 17:40:56 +0100 Subject: [PATCH 3/6] Remove redundant dtype checks inside numba function The has_data_top and has_data_start flags are already passed as parameters from the outer merge_peaks() function. The dtype checks inside _merge_peaks() were leftover from the first attempt and cause numba compilation errors since string operations are not allowed in numba-compiled code. --- strax/processing/peak_merging.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py index c111fc721..40fdc973d 100644 --- a/strax/processing/peak_merging.py +++ b/strax/processing/peak_merging.py @@ -93,10 +93,6 @@ def _merge_peaks( # Do the merging. Could numbafy this to optimize, probably... buffer = np.zeros(max_buffer, dtype=np.float32) buffer_top = np.zeros(max_buffer, dtype=np.float32) - - # Check which optional waveform fields exist in the dtype - has_data_top = "data_top" in peaks.dtype.names - has_data_start = "data_start" in peaks.dtype.names for new_i, new_p in enumerate(new_peaks): new_p["min_diff"] = 2147483647 # inf of int32 From 1ff1add53be263f0bd559bd38820c41559c162f1 Mon Sep 17 00:00:00 2001 From: cfuselli Date: Fri, 13 Feb 2026 17:45:04 +0100 Subject: [PATCH 4/6] Fix store_downsampled_waveform when data_start field missing The inner condition 'if p_length > len(p["data_start"])' was trying to access p["data_start"] even when the field doesn't exist, causing a numba compilation error. Simplified the logic to just store min(p_length, available_space) samples, which works whether the field is larger or smaller than p_length. --- strax/processing/peak_building.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py index 402c59fd5..6254e3a22 100644 --- a/strax/processing/peak_building.py +++ b/strax/processing/peak_building.py @@ -184,12 +184,11 @@ def store_downsampled_waveform( p["data_top"][: p["length"]] = waveform_buffer_top[: p["length"]] p["data"][: p["length"]] = waveform_buffer[: p["length"]] - # If the waveform is downsampled, we can store the first samples of the waveform + # Store the first samples of the waveform if requested if store_data_start: - if p_length > len(p["data_start"]): - p["data_start"] = waveform_buffer[: len(p["data_start"])] - else: - p["data_start"][:p_length] = waveform_buffer[:p_length] + # Choose the appropriate copy based on available space + n_store = min(p_length, len(p["data_start"])) + p["data_start"][:n_store] = waveform_buffer[:n_store] @export From 31ced3fc7db27f6cc9a519fe067b20c47ff137bd Mon Sep 17 00:00:00 2001 From: cfuselli Date: Fri, 13 Feb 2026 17:48:16 +0100 Subject: [PATCH 5/6] Use hardcoded size for data_start to avoid field access in numba Cannot call len(p['data_start']) inside numba function because numba compiles for the specific dtype - if dtype doesn't have data_start field, compilation fails even though the code is inside 'if store_data_start:'. Solution: Use hardcoded 200 (standard data_start size in strax) instead of accessing the field to get its length. --- strax/processing/peak_building.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py index 6254e3a22..2063b33bb 100644 --- a/strax/processing/peak_building.py +++ b/strax/processing/peak_building.py @@ -185,9 +185,11 @@ def store_downsampled_waveform( p["data"][: p["length"]] = waveform_buffer[: p["length"]] # Store the first samples of the waveform if requested + # Note: data_start is typically 200 samples, but we store min(p_length, 200) if store_data_start: - # Choose the appropriate copy based on available space - n_store = min(p_length, len(p["data_start"])) + # Avoid accessing p["data_start"] to get length (fails if field missing) + # data_start field is typically 200 samples in strax peak dtype + n_store = min(p_length, 200) p["data_start"][:n_store] = waveform_buffer[:n_store] From 11cfa29d30d3fe9ed63679cdfc812be743c8c52f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Feb 2026 17:14:10 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strax/processing/peak_building.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py index 2063b33bb..b48d375a1 100644 --- a/strax/processing/peak_building.py +++ b/strax/processing/peak_building.py @@ -189,7 +189,7 @@ def store_downsampled_waveform( if store_data_start: # Avoid accessing p["data_start"] to get length (fails if field missing) # data_start field is typically 200 samples in strax peak dtype - n_store = min(p_length, 200) + n_store = min(p_length, 200) p["data_start"][:n_store] = waveform_buffer[:n_store]