diff --git a/Makefile b/Makefile index 247b809..c06b0d8 100644 --- a/Makefile +++ b/Makefile @@ -42,8 +42,9 @@ TEST_OBJECTS = $(TDIR)/obj/seatest.o \ $(TDIR)/obj/run_tests.o LINK_COMMON = -lm -LINK_FFTW = -L$(FFTWROOT) -lfftw3 -lfftw3f -LINK_MKL = -L${MKLROOT}/lib/intel64 -lmkl_rt -Wl,--no-as-needed -lpthread -lm -ldl +LINK_FFTW = -L$(FFTW_HOME)/lib -lfftw3 -lfftw3f -Wl,-rpath,${FFTW_HOME}/lib +CFLAGS_FFTW = -I"${FFTW_HOME}/include" +LINK_MKL = -L${MKLROOT}/lib -lmkl_rt -Wl,--no-as-needed,-rpath,${MKLROOT}/lib -lpthread -lm -ldl CFLAGS_MKL = -m64 -I"${MKLROOT}/include" all: $(LDIR)/libradial_functional.so ${LDIR}/libksw_estimator.so ${LDIR}/libksw_fisher.so @@ -64,25 +65,25 @@ $(ODIR)/hyperspherical.o: $(HS_SDIR)/hyperspherical.c $(HS_IDIR)/*.h $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(HS_IDIR) $(LDIR)/libksw_estimator.so: $(EST_OBJECTS) $(LP_OBJECTS) $(IDIR)/ksw_estimator.h - $(CC) $(CFLAGS) $(CFLAGS_MKL) $(OMPFLAG) $(OPTFLAG) -shared -o $@ $< $(LP_OBJECTS) $(LINK_COMMON) $(LINK_FFTW) $(LINK_MKL) -lgomp + $(CC) $(CFLAGS) $(CFLAGS_MKL) $(CFLAGS_FFTW) $(OMPFLAG) $(OPTFLAG) -shared -o $@ $< $(LP_OBJECTS) $(LINK_COMMON) $(LINK_FFTW) $(LINK_MKL) -lgomp $(LDIR)/libksw_fisher.so: $(FIS_OBJECTS) $(LP_OBJECTS) $(IDIR)/ksw_fisher.h - $(CC) $(CFLAGS) $(CFLAGS_MKL) $(OMPFLAG) $(OPTFLAG) -shared -o $@ $< $(LP_OBJECTS) $(LINK_COMMON) $(LINK_FFTW) $(LINK_MKL) -lgomp + $(CC) $(CFLAGS) $(CFLAGS_MKL) $(CFLAGS_FFTW) $(OMPFLAG) $(OPTFLAG) -shared -o $@ $< $(LP_OBJECTS) $(LINK_COMMON) $(LINK_FFTW) $(LINK_MKL) -lgomp $(ODIR)/estimator.o: $(SDIR)/estimator.c $(IDIR)/*.h - $(CC) $(CFLAGS) $(CFLAGS_MKL) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) $(LINK_COMMON) $(LINK_FFTW) $(LINK_MKL) -lgomp + $(CC) $(CFLAGS) $(CFLAGS_MKL) $(CFLAGS_FFTW) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) $(ODIR)/fisher.o: $(SDIR)/fisher.c $(IDIR)/*.h - $(CC) $(CFLAGS) $(CFLAGS_MKL) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) $(LINK_COMMON) $(LINK_MKL) + $(CC) $(CFLAGS) $(CFLAGS_MKL) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) $(LINK_COMMON) $(ODIR)/ylmgen_c.o: $(LP_SDIR)/ylmgen_c.c $(LP_IDIR)/*.h - $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) $(LINK_COMMON) + $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) $(ODIR)/c_utils.o: $(LP_SDIR)/c_utils.c $(LP_IDIR)/*.h - $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) $(LINK_COMMON) + $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) $(ODIR)/walltime_c.o: $(LP_SDIR)/walltime_c.c $(LP_IDIR)/*.h - $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) $(LINK_COMMON) + $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) check: check_c check_python @@ -93,7 +94,7 @@ check_python: cd $(TDIR); python -m pytest python/ $(TDIR)/bin/run_tests: $(TEST_OBJECTS) $(LDIR)/libradial_functional.so $(LDIR)/libksw_estimator.so $(LDIR)/libksw_fisher.so - $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -o $@ $(TEST_OBJECTS) -L$(LDIR) -lradial_functional -lksw_estimator -lksw_fisher $(LINK_COMMON) $(LINK_FFTW) $(LINK_MKL) -lgomp -Wl,-rpath,$(LDIR) + $(CC) $(CFLAGS) $(CFLAGS_MKL) $(CFLAGS_FFTW) $(OMPFLAG) $(OPTFLAG) -o $@ $(TEST_OBJECTS) -L$(LDIR) -lradial_functional -lksw_estimator -lksw_fisher $(LINK_COMMON) $(LINK_FFTW) $(LINK_MKL) -lgomp -Wl,-rpath,$(LDIR) $(TDIR)/obj/run_tests.o: $(TDIR)/src/run_tests.c $(TDIR)/include/seatest.h $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(TDIR)/include -I$(IDIR) -I$(LP_IDIR) @@ -105,10 +106,10 @@ $(TDIR)/obj/test_radial_functional.o: $(TDIR)/src/test_radial_functional.c $(IDI $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(HS_IDIR) -I$(TDIR)/include $(TDIR)/obj/test_estimator.o: $(TDIR)/src/test_estimator.c $(IDIR)/*.h - $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) -I$(TDIR)/include + $(CC) $(CFLAGS) $(CFLAGS_MKL) $(CFLAGS_FFTW) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) -I$(TDIR)/include $(TDIR)/obj/test_fisher.o: $(TDIR)/src/test_fisher.c $(IDIR)/*.h - $(CC) $(CFLAGS) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) -I$(TDIR)/include + $(CC) $(CFLAGS) $(CFLAGS_MKL) $(CFLAGS_FFTW) $(OMPFLAG) $(OPTFLAG) -c -o $@ $< -I$(IDIR) -I$(LP_IDIR) -I$(TDIR)/include clean: rm -rf $(ODIR) diff --git a/README.md b/README.md index d06ccda..d9b8668 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,10 @@ An implementation of the Komatsu-Spergel-Wandelt bispectrum estimator for modern Start by making sure the MKL library is loaded in your environment. On most clusters this can be achieved by loading a predefined module. On the Princeton `della` and `tiger` clusters you can use `module load intel-mkl` (see [here](https://researchcomputing.princeton.edu/faq/how-to-build-using-intel-mkl) for more information). On `NERSC` you can use `load intel` (see [here](https://docs-dev.nersc.gov/cgpu/software/math/)). Once you have loaded the module, check if the `MKLROOT` environment variable has been set (`echo $MKLROOT`). +Then do the same for the FFTW library. Right now, the Makefile assumes that the `FFTW_HOME` environment variable is set. So that make sure that is the case and that `FFTW_HOME/lib` and `FFTW_HOME/include` point to the correct directories. -Once the MKL environment has been set, `git clone` this repository, go into the directory and run: +Once the MKL and FFTW environments have been set, `git clone` this repository, go into the directory and run: ``` diff --git a/cython/radial_functional.pxd b/cython/radial_functional.pxd index 0a1f10e..ebbc849 100644 --- a/cython/radial_functional.pxd +++ b/cython/radial_functional.pxd @@ -10,3 +10,4 @@ cdef extern from "radial_functional.h": int nr, int npol, int ncomp); + diff --git a/cython/radial_functional.pyx b/cython/radial_functional.pyx index 6c205b2..8883e89 100644 --- a/cython/radial_functional.pyx +++ b/cython/radial_functional.pyx @@ -70,8 +70,9 @@ def radial_func(f_k, tr_ell_k, k, radii, ells): npol, ncomp) return f_ell_r + -def check_and_return_shape(arr, exp_shape): +def check_and_return_shape(arr, exp_shape): ''' Return shape of input array if it matches expections. @@ -109,3 +110,91 @@ def check_and_return_shape(arr, exp_shape): idx, n_exp, n)) return arr.shape + +def radial_func_dL(f_k, tr_ell_k, k, radii, ells): + ''' + Compute f_ell^X(r) = (2/pi) int k^2 dk f(k) transfer^X_ell(k) j_ell(k r), + where f(k) is an arbitrary function of wavenumber k. + + Parameters + --------- + f_k : (nk, ncomp) array + Input functions. + tr_ell_k : (nell, nk, npol) array + Transfer functions. + k : (nk) array + Wavenumbers in 1/Mpc. + radii : (nr) array + Multipoles + + Returns + ------- + f_ell_r : (nr, nell, npol, ncomp, delta_L_list) array + Evaluated integral for all radii, multipoles, polarizations + and input function components. + + Raises + ------ + ValueError + If input array shapes are incorrect. + If the input arrays contain nans or infs. + ''' + + # Check input for nans and infs. + f_k = np.asarray_chkfinite(f_k, dtype=float, order='C') + tr_ell_k = np.asarray_chkfinite(tr_ell_k, dtype=float, order='C') + k = np.asarray_chkfinite(k, dtype=float, order='C') + radii = np.asarray_chkfinite(radii, dtype=float, order='C') + ells = np.asarray_chkfinite(ells, dtype=np.int32, order='C') + delta_L_list = np.asarray_chkfinite([-2, -1, 0, 1, 2], dtype=np.int32, order='C') + + # Check for input shapes. + nk, = check_and_return_shape(k, (None,)) + nr, = check_and_return_shape(radii, (None,)) + nell, = check_and_return_shape(ells, (None,)) + nk, ncomp = check_and_return_shape(f_k, (nk, None)) + nell, nk, npol = check_and_return_shape(tr_ell_k, (nell, nk, None)) + ndeltaL, = check_and_return_shape(delta_L_list, (None,)) + + # Create output array. + f_ell_r = np.empty((ndeltaL, nr, nell, npol, ncomp), dtype=float) + + cdef double [::1] f_k_ = f_k.reshape(-1) + cdef double [::1] tr_ell_k_ = tr_ell_k.reshape(-1) + cdef double [::1] k_ = k.reshape(-1) + cdef double [::1] radii_ = radii.reshape(-1) + cdef double [::1] f_ell_r_ = f_ell_r.reshape(-1) + cdef int [::1] ells_= ells + + # Define local variables + cdef Py_ssize_t i, index # loop indices (Py_ssize_t = signed size type) + cdef int L, delta_L # temporary variables for ell and its shifted version + + # Preallocate a temporary buffer to hold valid shifted ell values + cdef np.ndarray[np.int32_t, ndim=1, mode='c'] L_arr = np.empty(nell, dtype=np.int32) + cdef int[::1] L_buf = L_arr # contiguous int memoryview (to pass to the C function) + + # Loop over each delta_L value + for index in range(ndeltaL): + delta_L = delta_L_list[index] + + # build the shifted ell list (L = ell+delta_L), excluding negative results + for i in range(nell): + L = ells_[i] + delta_L + if L >= 0: + L_buf[i] = L + else: + L_buf[i] = 0 + + radial_func = compute_radial_func(&f_k_[0], + &tr_ell_k_[0], + &k_[0], + &radii_[0], + &f_ell_r_[index * (nr * nell * npol * ncomp)], + &L_buf[0], + nk, + nell, + nr, + npol, + ncomp) + return f_ell_r \ No newline at end of file diff --git a/include/radial_functional.h b/include/radial_functional.h index f58a81e..df9584a 100644 --- a/include/radial_functional.h +++ b/include/radial_functional.h @@ -81,3 +81,37 @@ void _trapezoidal_weights(double const *k, */ void * _malloc_checked(size_t size); + + +/* +* Compute f_ell^X(r) = (2/pi) int k^2 dk f(k) transfer^X_ell(k) j_ell(k r) +* where f(k) is an arbitrary function of wavenumber k. +* +* Arguments +* --------- +* f_k : (nk * ncomp) input functions. +* tr_ell_k : (nell * nk * npol) transfer function. +* k : (nk) array of monotonically increasing wavenumbers. +* radii : (nr) array of radii +* f_r_ell : (nr * nell * npol * ncomp) output array. +* ells : (nell) multipoles. +* nk : Number of wavenumbers. +* nell : Number of multipoles. +* nr : Number of radii. +* npol : Number of polarization components (1=T, 2= E, 3=B). +* ncomp : Number of input functions. +* delta_l : shift in ell for transfer functions +*/ + +void compute_radial_func_dL2(double const *f_k, + double const *tr_ell_k, + double *k, + double const *radii, + double *f_r_ell, + int *ells, + int nk, + int nell, + int nr, + int npol, + int ncomp, + int delta_l); // New: delta ell diff --git a/ksw/Afunctionals.py b/ksw/Afunctionals.py new file mode 100644 index 0000000..9441f3a --- /dev/null +++ b/ksw/Afunctionals.py @@ -0,0 +1,196 @@ +import numpy as np +from ducc0.misc import wigner3j_int +from ksw import utils, estimator_core +from estimator import KSW + +ksw = KSW.__new__(KSW) +ksw.lmax = 100 +ksw.dtype = np.float32 +ksw.cdtype = np.complex64 +ksw.pol = ["T", "E"] + + +#deltaL_list = {-1, +1} for scalar +#deltaL_list = {-2, -1, 0, +1, +2} for tensor + + +def wigner_J(S, L_list, deltaL_list, Jindex): + L_list = np.asarray(L_list, dtype=int) + deltaL_list = np.asarray(deltaL_list, dtype=int) + + out = np.zeros((len(L_list), len(deltaL_list)), dtype=np.float64) + + cJ = np.zeros((len(L_list), len(deltaL_list)), dtype=np.float64) + for iL, L in enumerate(L_list): + for idL, dL in enumerate(deltaL_list): + ell = L + dL + if ell < 0: + continue + + # --- selection rules --- + if abs(Jindex[1]) > L: + continue + if abs(Jindex[2]) > ell: + continue + + l1_min_J, vals_J = wigner3j_int(L, ell, Jindex[1], Jindex[2]) + iS = S - l1_min_J + if 0 <= iS < len(vals_J): + out[iL, idL] = vals_J[iS] + return out + +def w3j(S, n, L_list, deltaL_list): + + L_list = np.asarray(L_list, dtype=int) + deltaL_list = np.asarray(deltaL_list, dtype=int) + + Lmax = np.max(L_list) + out = np.zeros((len(L_list), 2*Lmax+1, len(deltaL_list)), dtype=np.float64) + + for idL, dL in enumerate(deltaL_list): + for iL, L in enumerate(L_list): + ell = L + dL + if ell < 0: + continue + + for M in range(-L, L+1): + m = - M - n + if abs(m) > ell: + continue + + # --- selection rules --- + if abs(M) > L: + continue + if abs(m) > ell: + continue + + l1_min, vals = wigner3j_int(L, ell, M, m) + iS = S - l1_min + if 0 <= iS < len(vals): + out[iL, M + Lmax, idL] = vals[iS] + return out + +def products_3j_array(S, n , L_list, deltaL_list, Jindex): + """ + Compute the product of two 3j symbols + J_(S, L, L+deltaL)^(Jindex) * (S L L+deltaL; n M m) + Parameters + ---------- + S : int + Total angular momentum quantum number + n: int + Magnetic quantum number + L_list : int + Orbital angular momentum quantum number + deltaL_list : int + choices for ell + Jindex: an array of shape(3, ) + The indices (n, M, m) for the first 3j symbol + + Returns: + ------- + np.ndarray + Array of products of 3j symbols with shape(N_L, N_M, N_deltaL_list) + """ + L_list = np.asarray(L_list, dtype=int) + Lmax = int(np.max(L_list)) + + w3j_J = wigner_J(S, L_list, deltaL_list, Jindex) + w3j_vals = w3j(S, n, L_list, deltaL_list) + + out = np.zeros((len(L_list), 2*Lmax+1, len(deltaL_list)), dtype=np.float64) + + for iL in range(len(L_list)): + for idL in range(len(deltaL_list)): + out[iL, :, idL] = w3j_J[iL, idL] * w3j_vals[iL, :, idL] + return out + + +def parity_x(x): + """ + Map x in {T, E, B} to parity code: + 0 for T/E (parity even) + 1 for B (parity odd) + """ + x = x.upper() + if x == "T" or x == "E": + return 0 + if x == "B": + return 1 + +def gamma_Z(x, Z, L, deltaL): + """ + gamma^{(Z)}_{x, L, ell}: + = 1, if Z = zeta + = 1 + (-1)^{p_x + L + ell}, if Z = h + + where p_x = 0 for T/E and 1 for B. + """ + Zlow = Z.lower() + if Zlow == "zeta": + return 1 + if Zlow == "h": + px = parity_x(x) + sgn = 1 + (-1) ** (px + 2 * L + deltaL) + return sgn + +def get_a_lm(alm, ell, m): + """ + access a_{lm} assuming a_ell_m stores m>=0 in the last axis. + + If m < 0, use reality condition: + a_{ell, -m} = (-1)^m * conj(a_{ell, m}) + """ + alm = utils.alm_return_2d(alm, ksw.npol, ksw.lmax) + a_ell_m = utils.alm2a_ell_m(alm) + a_ell_m = a_ell_m.astype(ksw.cdtype) + + if m >= 0: + return a_ell_m[ell, m] + else: + mp = -m + return ((-1)**mp) * np.conjugate(a_ell_m[ell, mp]) + +# thetas.max() = np.pi + +def A_LM(deltaL_list, L_list, n, x, Z, S, Jindex, alm, theta_batch=1): + + Lmax = np.max(L_list) + alm_list = np.zeros((len(L_list), 2*Lmax+1, len(deltaL_list)), dtype=np.complex128) + out = np.zeros((len(L_list), 2*Lmax+1, len(deltaL_list)), dtype=np.complex128) + + w3j_product = products_3j_array(S, n, L_list, deltaL_list, Jindex) + + thetas, theta_weights, nphi = KSW.get_coords(ksw) + tidx_start = 20 + thetas_batch = thetas[tidx_start:tidx_start+theta_batch] + y_m_ell = estimator_core.compute_ylm(thetas_batch, ksw.lmax, dtype=ksw.dtype) + + for iL, L in enumerate(L_list): + for idL, dL in enumerate(deltaL_list): + ell = L + dL + gamma = gamma_Z(x, Z, L, dL) + phase = (1j) ** dL + if ell < 0: + continue + for M in range(-L, L+1): + m = - M - n + if abs(m) > ell: + continue + if abs(M) > L: + continue + alm = get_a_lm(alm, ell=L+dL, m=-M-n) + alm_list[iL, M+Lmax, idL] = alm + # prefactors independent of M + prefactor = phase * w3j_product[iL, Lmax+M, idL] * gamma + out[iL, M+Lmax, idL] = prefactor * alm * y_m_ell[:, M+Lmax, iL] + return out + + + + + + + + + \ No newline at end of file diff --git a/ksw/cosmo.py b/ksw/cosmo.py index 32893d2..50c699d 100644 --- a/ksw/cosmo.py +++ b/ksw/cosmo.py @@ -6,7 +6,8 @@ import camb import h5py -from ksw import utils, radial_functional as rf +from ksw import utils +from ksw import radial_functional as rf class Cosmology: ''' @@ -74,7 +75,7 @@ def __init__(self, camb_params, verbose=False): verbose=verbose) self._setattr_camb('IntkAccuracyBoost', 5, subclass='Accuracy', verbose=verbose) - self._setattr_camb('lSampleBoost', 2, subclass='Accuracy', + self._setattr_camb('lSampleBoost', 2, subclass='Accuracy', verbose=verbose) self._setattr_camb('lAccuracyBoost', 2, subclass='Accuracy', verbose=verbose) @@ -142,7 +143,7 @@ def _setattr_camb(self, name, value, subclass=None, verbose=True): 'New value {} for param {} makes params invalid.'.format( value, name)) - def compute_transfer(self, lmax, verbose=True): + def compute_transfer(self, lmax, k_eta_fac=2.5, verbose=True): ''' Call CAMB to calculate radiation transfer functions. @@ -165,7 +166,7 @@ def compute_transfer(self, lmax, verbose=True): # CAMB crashes for too low lmax. raise ValueError('Pick lmax >= 300.') - k_eta_fac = 2.5 # Default used by CAMB. + #k_eta_fac = 2.5 # Default used by CAMB. self.camb_params.set_for_lmax(lmax, lens_margin=0, k_eta_fac=k_eta_fac) @@ -211,6 +212,79 @@ def compute_transfer(self, lmax, verbose=True): self.transfer['k'] = tr.q self.transfer['ells'] = ells # Probably sparse. + def compute_transfer_tensor(self, lmax, k_eta_fac=2.5, verbose=True): + """ + Call CAMB to calculate tensor transfer functions. + + Parameters + ---------- + lmax : int + Maximum multipole. + Verbose : bool, optional + Print if CAMB parameter values change. + + Raises + ------ + AttributeError + If CAMB parameters have not been initialized. + ValueError + If lmax is too low (lmax < 300). + """ + + if lmax < 300: + raise ValueError("Pick lmax >= 300.") + + self._setattr_camb('WantTensors', True, verbose=verbose) + lmax = max(lmax, 300) + #k_eta_fac = 2.5 + max_eta_k = k_eta_fac * lmax + max_eta_k = max(max_eta_k, 1000) + + self.camb_params.max_l = lmax + self.camb_params.max_l_tensor = lmax + self.camb_params.max_eta_k = max_eta_k + self.camb_params.max_eta_k_tensor = max_eta_k + + if not self.camb_params.validate(): + raise ValueError(f"Value {lmax} for lmax makes params invalid") + + # Actually run CAMB + data = camb.get_transfer_functions(self.camb_params) + self._camb_data = data + + # tensor instead of scalar + tr = data.get_cmb_transfer_data('tensor') + try: + ells = tr.L + except AttributeError: + ells = tr.l + ells = ells.astype(int) + + prefactor = np.sqrt((ells + 2) * (ells + 1) * ells * (ells - 1)) + tr.delta_p_l_k[0,...] *= prefactor[:,np.newaxis] + + tr.delta_p_l_k *= (self.camb_params.TCMB * 1e6) + + # Scale tensor transfer by 1/4 to correct for CAMB output. + tr.delta_p_l_k /= 4. + + nk = tr.q.size + nell = ells.size + # For tensor we have (T, E, B) + npol = tr.delta_p_l_k.shape[0] + tr_ell_k = np.empty((nell, nk, npol), dtype=float) + + tr_view = np.swapaxes(tr.delta_p_l_k, 0, 2) # (nk, nell, npol) + tr_view = np.swapaxes(tr_view, 0, 1) # (nell, nk, npol) + tr_ell_k[:] = np.ascontiguousarray(tr_view) + + self.transfer['tr_ell_k_tensor'] = tr_ell_k + self.transfer['k_tensor'] = tr.q + self.transfer['ells_tensor'] = ells + + self._setattr_camb('WantTensors', False, verbose=verbose) + + def compute_c_ell(self, lmax=None): ''' Calculate angular power spectra (Cls) using precomputed @@ -252,6 +326,7 @@ def compute_c_ell(self, lmax=None): self.c_ell['lenspotential']['ells'] = ells_lenspotential self.c_ell['lenspotential']['c_ell'] = c_ell_lenspotential + def add_prim_reduced_bispectrum(self, prim_shape, radii, name=None): ''' Compute the factors of the reduced bispectrum for a given @@ -300,7 +375,7 @@ def add_prim_reduced_bispectrum(self, prim_shape, radii, name=None): amps = np.asarray(prim_shape.amps) amps *= 2 * (2 * np.pi ** 2 * self.camb_params.InitPower.As) ** 2 * (3 / 5) - # Call C code. + # Call C code red_bisp = rf.radial_func(f_k, tr_ell_k, k, radii, ells_sparse) factors, rule, weights = self._parse_prim_reduced_bispec( @@ -1149,7 +1224,7 @@ def write(self, filename): f.create_dataset('rule', data=self.rule) f.create_dataset('weights', data=self.weights) f.create_dataset('ells_full', data=self.ells_full) - f.create_dataset('name', data=np.string_(self.name)) + f.create_dataset('name', data=np.bytes_(self.name)) @classmethod def init_from_file(cls, filename): diff --git a/ksw/estimator.py b/ksw/estimator.py index 7c04812..55de234 100644 --- a/ksw/estimator.py +++ b/ksw/estimator.py @@ -78,6 +78,8 @@ def __init__(self, red_bispectra, icov, lmax, pol, precision='single'): self.mc_gt = None self.mc_gt_sq = None + self.gt_local = [] + self.lmax = lmax self.pol = pol @@ -374,6 +376,104 @@ def step_batch(self, alm_loader, alm_files, comm=None, verbose=False, **kwargs): self.mc_idx += mc_idx + def step_batch_2pass(self, alm_loader, alm_files, comm=None, verbose=False, **kwargs): + ''' + Compute grad T its mean for a set of simulations, by loading and processing + several alms in parallel using MPI. + + Parameters + ---------- + alm_loader : callable + Function that returns alms on rank given filename as first argument. + alm_files : array_like + List of alm files to load. + comm : MPI communicator, optional + verbose : bool, optional + Print process. + kwargs : dict, optional + Optional keyword arguments passed to "_step". + ''' + + if comm is None: + comm = utils.FakeMPIComm() + + # Monte Carlo quantities local to rank. + mc_idx_loc = 0 + mc_gt_loc = None + + # Split alm_file loop over ranks. + for alm_file in alm_files[comm.Get_rank():len(alm_files):comm.Get_size()]: + + if verbose: + print(f'rank {comm.rank:3}: loading {alm_file}') + alm = alm_loader(alm_file) + if verbose: + print(f'rank {comm.rank:3}: done loading') + grad_t = self._step(alm, **kwargs) + + if mc_gt_loc is None: + mc_gt_loc = grad_t + else: + mc_gt_loc += grad_t + + # # NOTE + # if mc_gt_loc is None: + # mc_gt_loc = grad_t.copy() + # else: + # mc_gt_loc += grad_t.copy() + + # The copy here is important, otherwise each iteration of the + # loop adds grad to the first element of the list. + self.gt_local.append(grad_t.copy()) + + mc_idx_loc += 1 + + print(f'rank : {comm.rank:3} waiting in step_batch') + # To allow allreduce when number of ranks > alm files. + shape, dtype = utils.bcast_array_meta(mc_gt_loc, comm, root=0) + if mc_gt_loc is None: mc_gt_loc = np.zeros(shape, dtype=dtype) + if mc_idx_loc is None: mc_idx_loc = 0 + + mc_gt = utils.allreduce_array(mc_gt_loc, comm) + mc_idx = utils.allreduce(mc_idx_loc, comm) + print(f'rank : {comm.rank:3} after reduce in step_batch') + + # All ranks get to update the internal mc variable themselves. + if self.mc_gt is None: + self.mc_gt = mc_gt + else: + self.__mc_gt += mc_gt + + self.mc_idx += mc_idx + + def compute_fisher_2pass(self, comm): + ''' + Compute the Fisher information from the grad T maps computed + on all ranks. + + Returns + ------- + fisher : float, None + Fisher information. + ''' + + if self.mc_gt is None: + return None + + fisher_local = 0. + for gidx, gt in enumerate(self.gt_local): + + diff = gt - self.mc_gt + diff_icov = self.icov(diff) + dot = utils.contract_almxblm(diff, np.conj(diff_icov)) + print(f'{comm.rank=}, {gidx=}, fisher estimate={dot / 3}') + fisher_local += dot + + fisher = utils.allreduce(fisher_local, comm) + fisher /= (3 * self.mc_idx) + + return fisher + def compute_estimate_batch(self, alm_loader, alm_files, comm=None, verbose=False, **kwargs): ''' @@ -411,8 +511,6 @@ def compute_estimate_batch(self, alm_loader, alm_files, comm=None, lin_terms = np.zeros(len(alm_files)) fishers = np.zeros(len(alm_files)) - # NOTE, silly to recompute fisher for each aidx. - # Split alm_file loop over ranks. for aidx in range(comm.Get_rank(), len(alm_files), comm.Get_size()): @@ -709,7 +807,127 @@ def compute_ng_sim_batch(self, alm_loader, alm_files, alm_writer, if verbose: print(f'rank {comm.rank:3}: done writing {oalm_file}') - + + def write_state_2pass(self, filename, fisher, comm=None): + ''' + Write internal state, i.e. mc_gt, fisher and mc_idx, for the 2pass-mode + of the code. + + Parameters + ---------- + filename : str + Absolute path to output file. + fisher : float + Output from compute_fisher_2pass. + comm : MPI communicator, optional + If provided, rank 0 is assumed to do the writing, so must be present. + ''' + + if comm is None: + comm = utils.FakeMPIComm() + + if comm.Get_rank() == 0: + # Remove file extension to be consistent. + filename, _ = os.path.splitext(filename) + + mc_idx_to_save = np.asarray([self.mc_idx], dtype=np.int64) + + fisher_to_save = np.asarray([fisher], dtype=np.float64) + + if self.__mc_gt is None: + mc_gt_to_save = np.asarray([np.nan], dtype=self.cdtype) + else: + mc_gt_to_save = self.__mc_gt + + with h5py.File(filename + '.hdf5', 'w') as f: + f.create_dataset('mc_idx', data=mc_idx_to_save) + f.create_dataset('fisher', data=fisher_to_save) + f.create_dataset('mc_gt', data=mc_gt_to_save) + + def _read_state_2pass(self, filename, comm=None): + ''' + Read internal state, i.e. mc_gt, fisher and mc_idx, from hdf5 file. + + Parameters + ---------- + filename : str + Absolute path to output file. + comm : MPI communicator, optional + If provided, rank 0 is assumed to do the reading, result will be + broadcasted to all ranks. + + Returns + ------- + mc_idx : int + Counter for Monte Carlo estimates. + fisher : float, None + Fisher information. + mc_gt : (npol, nelem) complex array, None + Monte Carlo estimate. + ''' + + if comm is None: + comm = utils.FakeMPIComm() + + if comm.Get_rank() == 0: + # Remove file extension to be consistent. + filename, _ = os.path.splitext(filename) + + with h5py.File(filename + '.hdf5', 'r') as f: + mc_idx_read = f['mc_idx'][()] + fisher_read = f['fisher'][()] + mc_gt_read = f['mc_gt'][()] + + assert mc_idx_read.size == 1, (f'mc_idx has to be single int, got ' + f'{mc_idx.size}-sized array') + mc_idx_read = int(mc_idx_read[0]) + + else: + mc_idx_read = None + fisher_read = None + mc_gt_read = None + + mc_idx_read = utils.bcast(mc_idx_read, comm, root=0) + fisher_read = utils.bcast_array(fisher_read, comm, root=0) + mc_gt_read = utils.bcast_array(mc_gt_read, comm, root=0) + + if fisher_read.size == 1 and np.isnan(fisher_read)[0]: + fisher_read = None + else: + fisher_read = float(fisher_read[0]) + + if mc_gt_read.size == 1 and np.isnan(mc_gt_read)[0]: + mc_gt_read = None + + return mc_idx_read, fisher_read, mc_gt_read + + def start_from_read_state_2pass(self, filename, comm=None): + ''' + Return Fisher information and update estimator state with + mc_gt and mc_idx read from .hdf5 file. + + Parameters + ---------- + filename : str + Absolute path to output file. + comm : MPI communicator, optional + If provided, rank 0 is assumed to do the reading, result will be + broadcasted to all ranks. + + Returns + ------- + fisher : float + Fisher information. + ''' + + mc_idx_read, fisher_read, mc_gt_read = self._read_state_2pass( + filename, comm=comm) + + self.mc_idx = mc_idx_read + self.__mc_gt = mc_gt_read + + return fisher_read + def write_state(self, filename, comm=None): ''' Write internal state, i.e. mc_gt, mc_gt_sq and mc_idx, to hdf5 file. diff --git a/tests/python/test_cosmology.py b/tests/python/test_cosmology.py index a463566..a998a3f 100644 --- a/tests/python/test_cosmology.py +++ b/tests/python/test_cosmology.py @@ -1,6 +1,7 @@ import unittest import numpy as np from scipy.special import spherical_jn +from scipy.integrate import trapezoid import os import tempfile import pathlib @@ -36,7 +37,7 @@ def test_cosmology_init(self): self.assertIs(cosmo.camb_params.DoLensing, True) self.assertEqual(cosmo.camb_params.Accuracy.AccuracyBoost, 2) self.assertEqual(cosmo.camb_params.Accuracy.lSampleBoost, 2) - self.assertEqual(cosmo.camb_params.Accuracy.lAccuracyBoost, 2) + self.assertEqual(cosmo.camb_params.Accuracy.lAccuracyBoost, 2) self.assertIs(cosmo.camb_params.Accuracy.AccurateBB, True) self.assertIs(cosmo.camb_params.Accuracy.AccurateReionization, True) self.assertEqual(cosmo.camb_params.Accuracy.BessIntBoost, 30) @@ -125,6 +126,30 @@ def test_cosmology_compute_transfer(self): self.assertEqual(ells[0], 2) self.assertEqual(ells[-1], lmax) + def test_cosmology_compute_transfer_tensor(self): + + lmax = 300 + pars = camb.CAMBparams() + pars.set_cosmology(**self.cosmo_opts) + + cosmo = Cosmology(pars) + cosmo.compute_transfer_tensor(lmax) + + ells = cosmo.transfer['ells_tensor'] + k = cosmo.transfer['k_tensor'] + tr_ell_k = cosmo.transfer['tr_ell_k_tensor'] + + npol = 3 # tensor has T, E, B + nell = ells.size + nk = k.size + + self.assertEqual(tr_ell_k.shape, (nell, nk, npol)) + self.assertTrue(tr_ell_k.flags['C_CONTIGUOUS']) + self.assertTrue(tr_ell_k.flags['OWNDATA']) + self.assertEqual(ells[0], 2) + self.assertEqual(ells[-1], lmax) + + def test_cosmology_compute_transfer_err_value(self): lmax = 299 # Too low value. @@ -227,13 +252,13 @@ def test_cosmology_compute_transfer_power(self): for lidx in range(ells.size): # TT. - c_ell[lidx,0] = np.trapz(k ** 2 * p_k * tr[lidx,:,0] ** 2, k) + c_ell[lidx,0] = trapezoid(k ** 2 * p_k * tr[lidx,:,0] ** 2, k) # EE. - c_ell[lidx,1] = np.trapz(k ** 2 * p_k * tr[lidx,:,1] ** 2, k) + c_ell[lidx,1] = trapezoid(k ** 2 * p_k * tr[lidx,:,1] ** 2, k) # TE. - c_ell[lidx,3] = np.trapz(k ** 2 * p_k * tr[lidx,:,0] * tr[lidx,:,1], k) + c_ell[lidx,3] = trapezoid(k ** 2 * p_k * tr[lidx,:,0] * tr[lidx,:,1], k) c_ell *= (2 / np.pi) @@ -275,13 +300,13 @@ def test_cosmology_compute_transfer_power_ns(self): for lidx in range(ells.size): # TT. - c_ell[lidx,0] = np.trapz(k ** 2 * p_k * tr[lidx,:,0] ** 2, k) + c_ell[lidx,0] = trapezoid(k ** 2 * p_k * tr[lidx,:,0] ** 2, k) # EE. - c_ell[lidx,1] = np.trapz(k ** 2 * p_k * tr[lidx,:,1] ** 2, k) + c_ell[lidx,1] = trapezoid(k ** 2 * p_k * tr[lidx,:,1] ** 2, k) # TE. - c_ell[lidx,3] = np.trapz(k ** 2 * p_k * tr[lidx,:,0] * tr[lidx,:,1], k) + c_ell[lidx,3] = trapezoid(k ** 2 * p_k * tr[lidx,:,0] * tr[lidx,:,1], k) c_ell *= (2 / np.pi) @@ -375,7 +400,7 @@ def test_cosmology_add_prim_reduced_bispectrum(self): integrand = k ** 2 * spherical_jn(ell, radius * k) integrand *= tr_ell_k[lidx,:,pidx] * func - ans_expec = (2 / np.pi) * np.trapz(integrand, k) + ans_expec = (2 / np.pi) * trapezoid(integrand, k) lidx_full = ell - 2 ans = red_bisp.factors[cidx*nr+ridx,pidx,lidx_full] diff --git a/tests/python/test_radial_func.py b/tests/python/test_radial_func.py index dd6a1e3..d52e2c4 100644 --- a/tests/python/test_radial_func.py +++ b/tests/python/test_radial_func.py @@ -4,6 +4,7 @@ import unittest import numpy as np from scipy.special import spherical_jn +from scipy.integrate import trapezoid from ksw import radial_functional as rf @@ -154,7 +155,7 @@ def test_radial_func(self): exp_f_ell_r = np.zeros((nr, nell, npol, ncomp), dtype=float) for ridx, rp in enumerate(radii_prime): - exp_f_ell_r[0,0,0,ridx] = (2. / np.pi) * np.trapz( + exp_f_ell_r[0,0,0,ridx] = (2. / np.pi) * trapezoid( f_k[:,ridx] * spherical_jn(ell, k * radius) * k ** 2, k) f_ell_r = rf.radial_func(f_k, tr_ell_k, k, radii, ells) diff --git a/tests/python/test_utils.py b/tests/python/test_utils.py index feb8d1d..3241a1c 100644 --- a/tests/python/test_utils.py +++ b/tests/python/test_utils.py @@ -1,6 +1,7 @@ import unittest import numpy as np import healpy as hp +from scipy.integrate import trapezoid from mpi4py import MPI from ksw import utils @@ -17,7 +18,7 @@ def test_utils_get_trapz_weights(self): y = np.asarray([6, 3, 7, 3]) - self.assertAlmostEqual(np.sum(y * dx), np.trapz(y, x)) + self.assertAlmostEqual(np.sum(y * dx), trapezoid(y, x)) def test_utils_get_trapz_weights_err(self):