From dbf90d68f8c777f522ff9d0f3aed2222347b26c9 Mon Sep 17 00:00:00 2001
From: cfuselli <cfuselli@nikhef.nl>
Date: Fri, 13 Feb 2026 17:27:43 +0100
Subject: [PATCH 1/6] flexible store data flags

---
 strax/processing/peak_merging.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py
index 1e5009d88..e28b4324f 100644
--- a/strax/processing/peak_merging.py
+++ b/strax/processing/peak_merging.py
@@ -83,6 +83,10 @@ def _merge_peaks(
     # Do the merging. Could numbafy this to optimize, probably...
     buffer = np.zeros(max_buffer, dtype=np.float32)
     buffer_top = np.zeros(max_buffer, dtype=np.float32)
+    
+    # Check which optional waveform fields exist in the dtype
+    has_data_top = "data_top" in peaks.dtype.names
+    has_data_start = "data_start" in peaks.dtype.names
 
     for new_i, new_p in enumerate(new_peaks):
         new_p["min_diff"] = 2147483647  # inf of int32
@@ -119,9 +123,10 @@ def _merge_peaks(
             n_after = p["length"] * upsample
             i0 = (p["time"] - new_p["time"]) // common_dt
             buffer[i0 : i0 + n_after] = np.repeat(p["data"][: p["length"]], upsample) / upsample
-            buffer_top[i0 : i0 + n_after] = (
-                np.repeat(p["data_top"][: p["length"]], upsample) / upsample
-            )
+            if has_data_top:
+                buffer_top[i0 : i0 + n_after] = (
+                    np.repeat(p["data_top"][: p["length"]], upsample) / upsample
+                )
 
             # Handle the other peak attributes
             new_p["area"] += p["area"]
@@ -138,13 +143,13 @@ def _merge_peaks(
             new_p["min_diff"] = min(new_p["min_diff"], p["min_diff"])
         max_data = np.array(max_data)
 
-        # Downsample the buffers into
-        # new_p['data'], new_p['data_top'], and new_p['data_start']
+        # Downsample the buffers into new_p['data'], and optionally
+        # new_p['data_top'] and new_p['data_start'] if those fields exist
         strax.store_downsampled_waveform(
             new_p,
             buffer,
-            True,
-            True,
+            has_data_top,
+            has_data_start,
             buffer_top,
         )
 

From 3f5b7e388c8c60389fc99acb422e6ab8c18f9ff8 Mon Sep 17 00:00:00 2001
From: cfuselli <cfuselli@nikhef.nl>
Date: Fri, 13 Feb 2026 17:34:37 +0100
Subject: [PATCH 2/6] Make data_top and data_start fields independent in
 merge_peaks

Previously, _merge_peaks() hardcoded both store_data_top=True and
store_data_start=True when calling store_downsampled_waveform(), which
caused crashes when peaks had one field but not the other.

This fix:
- Checks which optional waveform fields exist in merge_peaks() (before
  numba compilation)
- Passes has_data_top and has_data_start flags to _merge_peaks()
- Conditionally fills buffer_top only if data_top exists
- Passes detected flags to store_downsampled_waveform()

Benefits:
- Fixes crash when peaks missing data_start field
- Makes fields truly independent (can use data_top without data_start)
- Backward compatible with existing code
- Enables RAM optimization in straxen online DAQ processing
---
 strax/processing/peak_merging.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py
index 1e5009d88..40fdc973d 100644
--- a/strax/processing/peak_merging.py
+++ b/strax/processing/peak_merging.py
@@ -36,6 +36,10 @@ def merge_peaks(
         constituent peaks, it being too time-consuming to revert to records/hits.
 
     """
+    # Check which optional waveform fields exist in the input dtype
+    # (must be done here, before calling numba-compiled function)
+    has_data_top = "data_top" in peaks.dtype.names
+    has_data_start = "data_start" in peaks.dtype.names
 
     new_peaks, endtime = _merge_peaks(
         peaks,
@@ -43,6 +47,8 @@ def merge_peaks(
         end_merge_at,
         merged=merged,
         max_buffer=max_buffer,
+        has_data_top=has_data_top,
+        has_data_start=has_data_start,
     )
     # If the endtime was in the peaks we have to recompute it here
     # because otherwise it will stay set to zero due to the buffer
@@ -60,6 +66,8 @@ def _merge_peaks(
     end_merge_at,
     merged=None,
     max_buffer=int(1e5),
+    has_data_top=True,
+    has_data_start=True,
 ):
     """Merge specified peaks with their neighbors, return merged peaks.
 
@@ -69,6 +77,8 @@ def _merge_peaks(
     :param max_buffer: Maximum number of samples in the sum_waveforms and other waveforms of the
         resulting peaks (after merging). Peaks must be constructed based on the properties of
         constituent peaks, it being too time-consuming to revert to records/hits.
+    :param has_data_top: Whether data_top field exists in peaks dtype
+    :param has_data_start: Whether data_start field exists in peaks dtype
 
     """
     assert len(start_merge_at) == len(end_merge_at)
@@ -119,9 +129,10 @@ def _merge_peaks(
             n_after = p["length"] * upsample
             i0 = (p["time"] - new_p["time"]) // common_dt
             buffer[i0 : i0 + n_after] = np.repeat(p["data"][: p["length"]], upsample) / upsample
-            buffer_top[i0 : i0 + n_after] = (
-                np.repeat(p["data_top"][: p["length"]], upsample) / upsample
-            )
+            if has_data_top:
+                buffer_top[i0 : i0 + n_after] = (
+                    np.repeat(p["data_top"][: p["length"]], upsample) / upsample
+                )
 
             # Handle the other peak attributes
             new_p["area"] += p["area"]
@@ -138,13 +149,13 @@ def _merge_peaks(
             new_p["min_diff"] = min(new_p["min_diff"], p["min_diff"])
         max_data = np.array(max_data)
 
-        # Downsample the buffers into
-        # new_p['data'], new_p['data_top'], and new_p['data_start']
+        # Downsample the buffers into new_p['data'], and optionally
+        # new_p['data_top'] and new_p['data_start'] if those fields exist
         strax.store_downsampled_waveform(
             new_p,
             buffer,
-            True,
-            True,
+            has_data_top,
+            has_data_start,
             buffer_top,
         )
 

From 85150717f68b2f465d7265e45a54e2b54f71e013 Mon Sep 17 00:00:00 2001
From: cfuselli <cfuselli@nikhef.nl>
Date: Fri, 13 Feb 2026 17:40:56 +0100
Subject: [PATCH 3/6] Remove redundant dtype checks inside numba function

The has_data_top and has_data_start flags are already passed as parameters
from the outer merge_peaks() function. The dtype checks inside _merge_peaks()
were leftover from the first attempt and cause numba compilation errors
since string operations are not allowed in numba-compiled code.
---
 strax/processing/peak_merging.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py
index c111fc721..40fdc973d 100644
--- a/strax/processing/peak_merging.py
+++ b/strax/processing/peak_merging.py
@@ -93,10 +93,6 @@ def _merge_peaks(
     # Do the merging. Could numbafy this to optimize, probably...
     buffer = np.zeros(max_buffer, dtype=np.float32)
     buffer_top = np.zeros(max_buffer, dtype=np.float32)
-    
-    # Check which optional waveform fields exist in the dtype
-    has_data_top = "data_top" in peaks.dtype.names
-    has_data_start = "data_start" in peaks.dtype.names
 
     for new_i, new_p in enumerate(new_peaks):
         new_p["min_diff"] = 2147483647  # inf of int32

From 1ff1add53be263f0bd559bd38820c41559c162f1 Mon Sep 17 00:00:00 2001
From: cfuselli <cfuselli@nikhef.nl>
Date: Fri, 13 Feb 2026 17:45:04 +0100
Subject: [PATCH 4/6] Fix store_downsampled_waveform when data_start field
 missing

The inner condition 'if p_length > len(p["data_start"])' was trying to
access p["data_start"] even when the field doesn't exist, causing a
numba compilation error.

Simplified the logic to just store min(p_length, available_space) samples,
which works whether the field is larger or smaller than p_length.
---
 strax/processing/peak_building.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index 402c59fd5..6254e3a22 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -184,12 +184,11 @@ def store_downsampled_waveform(
             p["data_top"][: p["length"]] = waveform_buffer_top[: p["length"]]
         p["data"][: p["length"]] = waveform_buffer[: p["length"]]
 
-    # If the waveform is downsampled, we can store the first samples of the waveform
+    # Store the first samples of the waveform if requested
     if store_data_start:
-        if p_length > len(p["data_start"]):
-            p["data_start"] = waveform_buffer[: len(p["data_start"])]
-        else:
-            p["data_start"][:p_length] = waveform_buffer[:p_length]
+        # Choose the appropriate copy based on available space
+        n_store = min(p_length, len(p["data_start"]))
+        p["data_start"][:n_store] = waveform_buffer[:n_store]
 
 
 @export

From 31ced3fc7db27f6cc9a519fe067b20c47ff137bd Mon Sep 17 00:00:00 2001
From: cfuselli <cfuselli@nikhef.nl>
Date: Fri, 13 Feb 2026 17:48:16 +0100
Subject: [PATCH 5/6] Use hardcoded size for data_start to avoid field access
 in numba

Cannot call len(p['data_start']) inside numba function because numba
compiles for the specific dtype - if dtype doesn't have data_start field,
compilation fails even though the code is inside 'if store_data_start:'.

Solution: Use hardcoded 200 (standard data_start size in strax) instead
of accessing the field to get its length.
---
 strax/processing/peak_building.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index 6254e3a22..2063b33bb 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -185,9 +185,11 @@ def store_downsampled_waveform(
         p["data"][: p["length"]] = waveform_buffer[: p["length"]]
 
     # Store the first samples of the waveform if requested
+    # Note: data_start is typically 200 samples, but we store min(p_length, 200)
     if store_data_start:
-        # Choose the appropriate copy based on available space
-        n_store = min(p_length, len(p["data_start"]))
+        # Avoid accessing p["data_start"] to get length (fails if field missing)
+        # data_start field is typically 200 samples in strax peak dtype
+        n_store = min(p_length, 200)  
         p["data_start"][:n_store] = waveform_buffer[:n_store]
 
 

From 11cfa29d30d3fe9ed63679cdfc812be743c8c52f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 13 Feb 2026 17:14:10 +0000
Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 strax/processing/peak_building.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index 2063b33bb..b48d375a1 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -189,7 +189,7 @@ def store_downsampled_waveform(
     if store_data_start:
         # Avoid accessing p["data_start"] to get length (fails if field missing)
         # data_start field is typically 200 samples in strax peak dtype
-        n_store = min(p_length, 200)  
+        n_store = min(p_length, 200)
         p["data_start"][:n_store] = waveform_buffer[:n_store]