diff --git a/cosipy/response/FullDetectorResponse.py b/cosipy/response/FullDetectorResponse.py index 1463acc9..012d1d35 100644 --- a/cosipy/response/FullDetectorResponse.py +++ b/cosipy/response/FullDetectorResponse.py @@ -67,7 +67,7 @@ def open(cls, filename, dtype=None, pa_convention=None, cache_size=None): return cls._open_h5(filename, dtype, pa_convention, cache_size) else: raise ValueError( - "Unsupported file format. Only .h5 and .rsp.gz extensions are supported.") + "Unsupported file format. Only .h5 extension is supported.") @classmethod def _open_h5(cls, filename, dtype=None, pa_convention=None, cache_size=None): diff --git a/cosipy/response/RspConverter.py b/cosipy/response/RspConverter.py index 49f257cb..1c86e82d 100644 --- a/cosipy/response/RspConverter.py +++ b/cosipy/response/RspConverter.py @@ -19,13 +19,16 @@ class RspConverter(): """ - Converter between response files stored in .rsp.gz format and + Converter between response files stored in .rsp format and optimized HDF5 format on disk. - Use method convert_to_h5() to convert a .rsp.gz file to .h5. + Use method convert_to_h5() to convert a .rsp file to .h5. Use method convert_to_rsp() to convert a FullDetectorResponse - (backed by an .h5 file) to .rsp.gz. + (backed by an .h5 file) to .rsp. + + To read and write compressed .rsp files, add ".gz" to the end of + the .rsp filename. """ @@ -107,6 +110,33 @@ def __init__(self, self.norm = norm self.norm_params = norm_params + @staticmethod + def _open_rsp(rsp_filename, mode): + """ + Open an .rsp file with or without .gz extension. If .gz, + use gzip to open; otherwise, open as a regular file. + The resulting file object can be used in a context manager. + + Parameters + ---------- + rsp_filename : Path or string + name of file + mode : str + mode in which to open file + + Returns + ------- + open file or gzip stream object + + """ + + rsp_path = Path(rsp_filename) + + if rsp_path.suffix == ".gz": + return gzip.open(rsp_filename, mode) + else: + return open(rsp_filename, mode) + def convert_to_h5(self, rsp_filename, h5_filename = None, @@ -115,13 +145,13 @@ def convert_to_h5(self, elt_type = None): """ - Given a response file in .rsp.gz format, read it + Given a response file in .rsp format, read it and write it out as an HDF5 file Parameters ---------- rsp_filename: string - name of input file (must end with .rsp.gz) + name of input file (must end with .rsp or .rsp.gz) h5_filename : string (optional) name of output file (should end with .h5); if not specified, use base name of rsp_filename with .h5 extension @@ -142,20 +172,27 @@ def convert_to_h5(self, """ if h5_filename is None: - h5_filename = str(rsp_filename).replace(".rsp.gz", ".h5") + rsp_path = Path(rsp_filename) + + # strip any .rsp and .gz from end of path and add .h5 + h5_path = rsp_path.parent / rsp_path.stem + while h5_path.suffix in {".rsp", "gz"}: + h5_path = h5_path.with_suffix("") + + h5_filename = h5_path.parent / (h5_path.stem + ".h5") if Path(h5_filename).exists() and not overwrite: raise RuntimeError(f"Not overwriting existing HDF5 file {h5_filename}") - if elt_type is None: - elt_type = self._get_min_elt_type(rsp_filename) - # read all info from the .rsp file - with gzip.open(rsp_filename, "rt") as f: + with self._open_rsp(rsp_filename, "rt") as f: axes, hdr = self._read_response_header(f) eff_area = self._get_eff_area_correction(axes, hdr) + if elt_type is None: + elt_type = self._get_min_elt_type(rsp_filename) + nbins = hdr["nbins"] counts = self._read_counts(f, axes, nbins, elt_type) @@ -193,7 +230,7 @@ def _read_response_header(self, rsp_file): Parameters ---------- - rsp_file : file handle to open .rsp.gz file + rsp_file : file handle to open .rsp file Returns ------- @@ -363,9 +400,10 @@ def _validate_norm_params(self, norm, params): match norm: case 'Mono': - if len(params) > 0: - raise ValueError(f"Mono normalization takes zero params; {len(params)} given") - params = () + if len(params) > 1: + raise ValueError(f"Mono normalization takes a most one param; {len(params)} given") + # emono if given, else dummy value + params = () if len(params) == 0 else ( int(params[0]), ) case 'Linear': if len(params) != 2: @@ -436,58 +474,60 @@ def _get_eff_area_correction(self, axes, hdr): """ - ewidth = axes['Ei'].widths - - norm = hdr["norm"] - - # If we have one single bin, treat the Gaussian normalization - # like the mono one. Also check that the Gaussian spectrum is - # fully contained in that bin - if norm == "Gaussian" and len(ewidth) == 1: - + def gauss_int(x, mu, sigma): from scipy.special import erf + z = (x - mu)/(sigma * np.sqrt(2)) + return 0.5*(1 + erf(z)) - Gauss_mean = hdr["norm_params"][0] - - edges = axes['Ei'].edges - gauss_int = \ - 0.5 * (1 + erf( (edges[0] - Gauss_mean)/(4*np.sqrt(2)) ) ) + \ - 0.5 * (1 + erf( (edges[1] - Gauss_mean)/(4*np.sqrt(2)) ) ) - - assert gauss_int == 1, "The gaussian spectrum is not fully contained in this single bin!" - - if not self.quiet: - logger.info("Only one bin so we will use the Mono normalisation") + norm = hdr["norm"] + params = hdr["norm_params"] - norm = "Mono" + ei_axis = axes['Ei'] + e_lo = ei_axis.lower_bounds.value + e_hi = ei_axis.upper_bounds.value match norm: case "Linear": - - emin, emax = hdr["norm_params"] + emin, emax = params if not self.quiet: logger.info(f"normalization: linear with energy range [{emin}-{emax}]") - nperchannel_norm = ewidth / (emax - emin) + e_lo = np.minimum(emax, e_lo) + e_hi = np.minimum(emax, e_hi) + + e_lo = np.maximum(emin, e_lo) + e_hi = np.maximum(emin, e_hi) + + nperchannel_norm = (e_hi - e_lo) / (emax - emin) case "Mono" : if not self.quiet: logger.info("normalization: mono") - nperchannel_norm = np.array([1.]) + if params == (): + if ei_axis.nbins > 1: + raise ValueError("Cannot specify Mono norm without energy for Ei axis with multiple bins") + else: + # all energy is in the single bin + nperchannel_norm = np.array([1.]) + + # fill in a header value so we don't write "None" + hdr["norm_params"] = (ei_axis.centers[0],) + else: + emono = params[0] + + # set just the Ei bin containing the mono energy to 1 + nperchannel_norm = np.zeros(ei_axis.nbins) + nperchannel_norm[(e_lo <= emono) & (e_hi >= emono)] = 1. case "powerlaw": - emin, emax, alpha = hdr["norm_params"] + emin, emax, alpha = params if not self.quiet: logger.info(f"normalization: powerlaw with index {alpha} with energy range [{emin}-{emax}]keV") - # From powerlaw - e_lo = axes['Ei'].lower_bounds.value - e_hi = axes['Ei'].upper_bounds.value - e_lo = np.minimum(emax, e_lo) e_hi = np.minimum(emax, e_hi) @@ -501,14 +541,35 @@ def _get_eff_area_correction(self, axes, hdr): nperchannel_norm = (e_hi**a - e_lo**a) / (emax**a - emin**a) case "Gaussian" : - raise NotImplementedError("Gaussian normalization for multiple bins not yet implemented") + mean, sdev, cutoff = params + + emin = mean - cutoff * sdev + emax = mean + cutoff * sdev + + logger.info(f"normalization: Gaussian with energy range [{emin}-{emax}]keV") + + e_lo = np.minimum(emax, e_lo) + e_hi = np.minimum(emax, e_hi) + + e_lo = np.maximum(emin, e_lo) + e_hi = np.maximum(emin, e_hi) + + nperchannel_norm = (gauss_int(e_hi, mean, sdev) - gauss_int(e_lo, mean, sdev)) / (emax - emin) # If Nulambda is full-sky, its nbins will be 1, so division is a no-op. # We assume all FISBEL pixels have the same area. nperchannel = nperchannel_norm * hdr["nevents_sim"] / axes["NuLambda"].nbins + zero_weights = (nperchannel == 0.) + + if np.any(zero_weights): + logger.warning("Spectral normalization gives zero incident photons in some Ei bins; " + "eff_area correction for those bins will be set to zero!") + # Area - eff_area = hdr["area_sim"] / nperchannel + eff_area = np.zeros(len(nperchannel)) + eff_area[~zero_weights] = hdr["area_sim"] / nperchannel[~zero_weights] + return eff_area @@ -661,8 +722,9 @@ def _get_min_elt_type(self, rsp_filename): """ - with gzip.open(rsp_filename, "rt") as rsp_file: + with self._open_rsp(rsp_filename, "rt") as rsp_file: + nbins = None for line in rsp_file: # consume the file header line = line.split() @@ -670,6 +732,9 @@ def _get_min_elt_type(self, rsp_filename): nbins = int(line[1]) break + if nbins is None: + ValueError("Could not find start of data in .rsp file") + tq = tqdm(total=nbins, desc="Getting type for counts", disable=self.quiet) @@ -714,7 +779,7 @@ def _read_counts(self, rsp_file, axes, nbins, elt_type): Parameters ---------- rsp_file : file handle - open .rsp.gz file + open .rsp file nbins : int number of bins to read from file axes : Axes object @@ -813,7 +878,7 @@ def convert_to_rsp(self, overwrite = False): """ Convert a FullDetectorResponse object backed by an HDF5 file - into a textual .rsp.gz response. We reuse the header + into a textual .rsp or .rsp.gz response. We reuse the header information stored in the HDF5 file, along with its axes and counts. @@ -822,14 +887,15 @@ def convert_to_rsp(self, fullDetectorResponse : FullDetectorResponse object to be converted rsp_filename : string - path to write .rsp.gz file (should end with .rsp.gz) + path to write .rsp file; if extension is .rsp.gz, the file + will be gzipped. overwrite : bool if true, overwrite existing response if it exists """ if Path(rsp_filename).exists() and not overwrite: - raise RuntimeError(f"Not overwriting existing .rsp.gz file {rsp_filename}") + raise RuntimeError(f"Not overwriting existing file {rsp_filename}") # reorder axes if needed to match the expected order for an .rsp file axes = fullDetectorResponse._axes @@ -849,7 +915,7 @@ def convert_to_rsp(self, def _write_rsp(self, headers, axes, counts, rsp_filename): """ - Write an .rsp.gz file with all necessary info. + Write an .rsp file with all necessary info. Parameters ---------- @@ -860,7 +926,8 @@ def _write_rsp(self, headers, axes, counts, rsp_filename): counts : counts of Histogram rsp_filename : - name of response file to write (should be .rsp.gz). + path to write .rsp file; if extension is .rsp.gz, the file + will be gzipped. """ @@ -869,7 +936,7 @@ def _write_rsp(self, headers, axes, counts, rsp_filename): for desc in RspConverter.axis_name_map: axis_names[RspConverter.axis_name_map[desc]] = desc - with gzip.open(rsp_filename, "wt") as f: + with self._open_rsp(rsp_filename, "wt") as f: f.write("# computed reduced response\n") diff --git a/cosipy/test_data/test_full_detector_response_mono_no_norm.rsp.gz b/cosipy/test_data/test_full_detector_response_mono_no_norm.rsp.gz new file mode 100644 index 00000000..11093d1e Binary files /dev/null and b/cosipy/test_data/test_full_detector_response_mono_no_norm.rsp.gz differ diff --git a/tests/response/test_response_conversion.py b/tests/response/test_response_conversion.py index 8d0f483c..a95780f4 100644 --- a/tests/response/test_response_conversion.py +++ b/tests/response/test_response_conversion.py @@ -1,25 +1,33 @@ +import numpy as np + from cosipy import test_data -7 + from cosipy.response import FullDetectorResponse, RspConverter +from pytest import raises + rspgz_response_path = test_data.path / "test_full_detector_response.rsp.gz" rspgz_nonorm_response_path = test_data.path / "test_full_detector_response_no_norm.rsp.gz" +rspgz_mono_nonorm_response_path = test_data.path / "test_full_detector_response_mono_no_norm.rsp.gz" + h5_response_path = test_data.path / "test_full_detector_response.h5" def test_convert_rsp_to_h5(tmp_path): + import gzip + tmp_h5_filename = tmp_path / "fdr.h5" c = RspConverter(bufsize = 100000) + # test opening compressed .rsp and writing compressed .h5 c.convert_to_h5(rspgz_response_path, h5_filename = tmp_h5_filename, overwrite = True) - fdr = FullDetectorResponse.open(h5_response_path) fdr2 = FullDetectorResponse.open(tmp_h5_filename) @@ -31,7 +39,7 @@ def test_convert_rsp_to_h5(tmp_path): assert h1 == h2 - + # test opening compressed .rsp and writing uncompressed .h5 c.convert_to_h5(rspgz_response_path, h5_filename = tmp_h5_filename, overwrite = True, @@ -48,72 +56,173 @@ def test_convert_rsp_to_h5(tmp_path): assert h1 == h2 + # test opening uncompressed .rsp + with gzip.open(rspgz_response_path) as gzfile: + content = gzfile.read() + with open(tmp_path / "response.rsp", "wb") as rspfile: + rspfile.write(content) + + c.convert_to_h5(rspgz_response_path, + h5_filename = tmp_h5_filename, + overwrite = True) + + fdr = FullDetectorResponse.open(h5_response_path) + fdr2 = FullDetectorResponse.open(tmp_h5_filename) + + h1 = fdr.to_dr() + h2 = fdr2.to_dr() + + fdr.close() + fdr2.close() + + assert h1 == h2 + def test_default_norms(tmp_path): tmp_h5_filename = tmp_path / "fdr.h5" c = RspConverter(bufsize = 100000, norm = "Linear", - norm_params = [50, 1000]) + norm_params = [50, 10000]) c.convert_to_h5(rspgz_nonorm_response_path, h5_filename = tmp_h5_filename, overwrite = True) - fdr = FullDetectorResponse.open(tmp_h5_filename) - norm_info = fdr.headers["SP"] - assert norm_info == "Linear 50 1000" - fdr.close() + with FullDetectorResponse.open(tmp_h5_filename) as fdr: + norm_info = fdr.headers["SP"] + assert norm_info == "Linear 50 10000" c = RspConverter(bufsize = 100000, - norm = "Mono") + norm = "Mono", + norm_params = [511]) c.convert_to_h5(rspgz_nonorm_response_path, h5_filename = tmp_h5_filename, overwrite = True) - fdr = FullDetectorResponse.open(tmp_h5_filename) - norm_info = fdr.headers["SP"] - assert norm_info == "Mono" - fdr.close() + with FullDetectorResponse.open(tmp_h5_filename) as fdr: + norm_info = fdr.headers["SP"] + assert norm_info == "Mono 511" + + # bins with no defined spectral normalization should + # have their eff_area set to zero, not -inf or NaN + assert all(np.isfinite(fdr.eff_area_correction)) + + # mono norm with no energy should work for single-bin response + c = RspConverter(bufsize = 100000, + norm = "Mono", + norm_params = []) + + c.convert_to_h5(rspgz_mono_nonorm_response_path, + h5_filename = tmp_h5_filename, + overwrite = True) + + with FullDetectorResponse.open(tmp_h5_filename) as fdr: + norm_info = fdr.headers["SP"] + assert norm_info == "Mono" + + # bins with no defined spectral normalization should + # have their eff_area set to zero, not -inf or NaN + assert all(np.isfinite(fdr.eff_area_correction)) + + # if no monoenergetic energy is given, Mono is + # undefined with multiple Ei bins + with raises(ValueError): + c = RspConverter(bufsize = 100000, + norm = "Mono") + + c.convert_to_h5(rspgz_nonorm_response_path, + h5_filename = tmp_h5_filename, + overwrite = True) c = RspConverter(bufsize = 100000, norm = "powerlaw", - norm_params = [50, 1000, 0.9]) + norm_params = [50, 10000, 0.9]) c.convert_to_h5(rspgz_nonorm_response_path, h5_filename = tmp_h5_filename, overwrite = True) - fdr = FullDetectorResponse.open(tmp_h5_filename) - norm_info = fdr.headers["SP"] - assert norm_info == "powerlaw 50 1000 0.9" - fdr.close() + with FullDetectorResponse.open(tmp_h5_filename) as fdr: + norm_info = fdr.headers["SP"] + assert norm_info == "powerlaw 50 10000 0.9" c = RspConverter(bufsize = 100000, norm = "powerlaw", - norm_params = [50, 1000, 1]) + norm_params = [50, 10000, 1]) c.convert_to_h5(rspgz_nonorm_response_path, h5_filename = tmp_h5_filename, overwrite = True) - fdr = FullDetectorResponse.open(tmp_h5_filename) - norm_info = fdr.headers["SP"] - assert norm_info == "powerlaw 50 1000 1.0" - fdr.close() + with FullDetectorResponse.open(tmp_h5_filename) as fdr: + norm_info = fdr.headers["SP"] + assert norm_info == "powerlaw 50 10000 1.0" + + # powerlaw norm that does not span all of Ei + c = RspConverter(bufsize = 100000, + norm = "powerlaw", + norm_params = [500, 1000, 1]) + + c.convert_to_h5(rspgz_nonorm_response_path, + h5_filename = tmp_h5_filename, + overwrite = True) + + with FullDetectorResponse.open(tmp_h5_filename) as fdr: + norm_info = fdr.headers["SP"] + assert norm_info == "powerlaw 500 1000 1.0" + + # bins with no defined spectral normalization should + # have their eff_area set to zero, not -inf or NaN + assert all(np.isfinite(fdr.eff_area_correction)) + + # Gaussian norm + c = RspConverter(bufsize = 100000, + norm = "Gaussian", + norm_params = [100, 100, 3]) + + c.convert_to_h5(rspgz_nonorm_response_path, + h5_filename = tmp_h5_filename, + overwrite = True) + + with FullDetectorResponse.open(tmp_h5_filename) as fdr: + norm_info = fdr.headers["SP"] + assert norm_info == "Gaussian 100.0 100.0 3.0" + + # bins with no defined spectral normalization should + # have their eff_area set to zero, not -inf or NaN + assert all(np.isfinite(fdr.eff_area_correction)) - # cannot test Gaussian norm because it can only be used on - # a response with a single Ei bin def test_convert_h5_to_rsp(tmp_path): - tmp_rsp_filename = tmp_path / "fdr.rsp.gz" + tmp_rsp_filename = tmp_path / "fdr.rsp" + tmp_rspgz_filename = tmp_path / "fdr.rsp.gz" tmp_h5_filename = tmp_path / "fdr.h5" + c = RspConverter(bufsize = 100000) + + # test writing compressed .rsp.gz fdr = FullDetectorResponse.open(h5_response_path) - c = RspConverter(bufsize = 100000) + c.convert_to_rsp(fdr, tmp_rspgz_filename, overwrite=True) + + tmp_h5_filename = c.convert_to_h5(tmp_rspgz_filename, overwrite=True) + + fdr2 = FullDetectorResponse.open(tmp_h5_filename) + + + h1 = fdr.to_dr() + h2 = fdr2.to_dr() + + fdr.close() + fdr2.close() + + assert h1 == h2 + + # test writing uncompressed .rsp + fdr = FullDetectorResponse.open(h5_response_path) c.convert_to_rsp(fdr, tmp_rsp_filename, overwrite=True)