From c83360fe51b76d342c8638514594cde8cab1415a Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Tue, 3 Feb 2026 11:16:51 +0000 Subject: [PATCH 01/11] Add wrapper for `open_dataset` --- src/sdf_xarray/__init__.py | 58 ++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index 1737615..e56928e 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -195,6 +195,40 @@ def combine_datasets( ) +def open_dataset( + path: PathLike, + *, + drop_variables: list[str] | None = None, + keep_particles: bool = False, + probe_names: list[str] | None = None, +) -> xr.Dataset: + """Open an EPOCH SDF file as a `xarray.Dataset`. + + Parameters + ---------- + path + The path to the SDF file + drop_variables + A list of variables to drop from the dataset + keep_particles + If ``True``, also load particle data (this may use a lot of memory!) + probe_names + List of EPOCH probe names + + Examples + -------- + >>> ds = open_dataset("0000.sdf") + >>> ds["Electric_Field"]["Ex"].values # Access Electric_Field_Ex data + """ + + return xr.open_dataset( + path, + drop_variables=drop_variables, + keep_particles=keep_particles, + probe_names=probe_names, + ) + + def open_mfdataset( path_glob: Iterable | str | Path | Callable[..., Iterable[Path]], *, @@ -204,7 +238,7 @@ def open_mfdataset( data_vars: list[str] | None = None, chunks: T_Chunks = "auto", ) -> xr.Dataset: - """Open a set of EPOCH SDF files as one `xarray.Dataset` + """Open a set of EPOCH SDF files as one `xarray.Dataset`. EPOCH can output variables at different periods, so each individal SDF file from one EPOCH run may have different variables in it. In @@ -225,18 +259,18 @@ def open_mfdataset( Parameters ---------- - path_glob : + path_glob List of filenames or string glob pattern - separate_times : + separate_times If ``True``, create separate time dimensions for variables defined at different output frequencies - keep_particles : + keep_particles If ``True``, also load particle data (this may use a lot of memory!) - probe_names : + probe_names List of EPOCH probe names - data_vars : + data_vars List of data vars to load in (If not specified loads in all variables) - chunks : + chunks Dictionary with keys given by dimension names and values given by chunk sizes. In general, these should divide the dimensions of each dataset. By default chunks are automatically set so that they are the same size as the dimensions @@ -300,6 +334,7 @@ def open_mfdataset( def open_datatree( path: PathLike, *, + drop_variables: list[str] | None = None, keep_particles: bool = False, probe_names: list[str] | None = None, ) -> xr.DataTree: @@ -334,6 +369,8 @@ def open_datatree( ---------- path The path to the SDF file + drop_variables + A list of variables to drop from the dataset keep_particles If ``True``, also load particle data (this may use a lot of memory!) probe_names @@ -342,11 +379,14 @@ def open_datatree( Examples -------- >>> dt = open_datatree("0000.sdf") - >>> dt["Electric_Field"]["Ex"].values # Access all Electric_Field_Ex data + >>> dt["Electric_Field"]["Ex"].values # Access Electric_Field_Ex data """ return xr.open_datatree( - path, keep_particles=keep_particles, probe_names=probe_names + path, + drop_variables=drop_variables, + keep_particles=keep_particles, + probe_names=probe_names, ) From f97b4952f9d498785898968b8e298136fc98c275 Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Tue, 3 Feb 2026 11:17:04 +0000 Subject: [PATCH 02/11] Add `open_dataset` tests --- tests/test_dataset.py | 125 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 263da88..6757d2d 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -8,6 +8,7 @@ _process_latex_name, _resolve_glob, download, + open_dataset, open_mfdataset, ) @@ -19,6 +20,20 @@ def test_basic(): + with open_dataset(TEST_FILES_DIR / "0000.sdf") as df: + ex_field = "Electric_Field_Ex" + assert ex_field in df + x_coord = "X_Grid_mid" + assert x_coord in df[ex_field].coords + assert df[x_coord].attrs["long_name"] == "X" + + px_protons = "Particles_Px_proton" + assert px_protons not in df + x_coord = "X_Particles_proton" + assert x_coord not in df.coords + + +def test_xr_basic(): with xr.open_dataset(TEST_FILES_DIR / "0000.sdf") as df: ex_field = "Electric_Field_Ex" assert ex_field in df @@ -33,6 +48,15 @@ def test_basic(): def test_constant_name_and_units(): + with open_dataset(TEST_FILES_DIR / "0000.sdf") as df: + name = "Absorption_Total_Laser_Energy_Injected" + full_name = "Absorption/Total Laser Energy Injected" + assert name in df + assert df[name].units == "J" + assert df[name].attrs["full_name"] == full_name + + +def test_xr_constant_name_and_units(): with xr.open_dataset(TEST_FILES_DIR / "0000.sdf") as df: name = "Absorption_Total_Laser_Energy_Injected" full_name = "Absorption/Total Laser Energy Injected" @@ -42,12 +66,28 @@ def test_constant_name_and_units(): def test_preferred_chunks_metadata(): + with open_dataset(TEST_FILES_DIR / "0000.sdf") as df: + for var in df.data_vars: + assert "preferred_chunks" in df[var].encoding + + +def test_xr_preferred_chunks_metadata(): with xr.open_dataset(TEST_FILES_DIR / "0000.sdf") as df: for var in df.data_vars: assert "preferred_chunks" in df[var].encoding def test_coords(): + with open_dataset(TEST_FILES_DIR / "0010.sdf") as df: + px_electron = "dist_fn_x_px_electron" + assert px_electron in df + print(df[px_electron].coords) + x_coord = "Px_x_px_electron" + assert x_coord in df[px_electron].coords + assert df[x_coord].attrs["full_name"] == "Grid/x_px/electron" + + +def test_xr_coords(): with xr.open_dataset(TEST_FILES_DIR / "0010.sdf") as df: px_electron = "dist_fn_x_px_electron" assert px_electron in df @@ -58,6 +98,15 @@ def test_coords(): def test_particles(): + with open_dataset(TEST_FILES_DIR / "0010.sdf", keep_particles=True) as df: + px_protons = "Particles_Px_proton" + assert px_protons in df + x_coord = "X_Particles_proton" + assert x_coord in df[px_protons].coords + assert df[x_coord].attrs["long_name"] == "X" + + +def test_xr_particles(): with xr.open_dataset(TEST_FILES_DIR / "0010.sdf", keep_particles=True) as df: px_protons = "Particles_Px_proton" assert px_protons in df @@ -67,6 +116,12 @@ def test_particles(): def test_no_particles(): + with open_dataset(TEST_FILES_DIR / "0010.sdf", keep_particles=False) as df: + px_protons = "Particles_Px_proton" + assert px_protons not in df + + +def test_xr_no_particles(): with xr.open_dataset(TEST_FILES_DIR / "0010.sdf", keep_particles=False) as df: px_protons = "Particles_Px_proton" assert px_protons not in df @@ -394,6 +449,13 @@ def test_xr_3d_distribution_function(): assert df[distribution_function].shape == (16, 20, 20) +def test_drop_variables(): + with open_dataset( + TEST_FILES_DIR / "0000.sdf", drop_variables=["Electric_Field_Ex"] + ) as df: + assert "Electric_Field_Ex" not in df + + def test_xr_drop_variables(): with xr.open_dataset( TEST_FILES_DIR / "0000.sdf", drop_variables=["Electric_Field_Ex"] @@ -401,6 +463,15 @@ def test_xr_drop_variables(): assert "Electric_Field_Ex" not in df +def test_drop_variables_multiple(): + with open_dataset( + TEST_FILES_DIR / "0000.sdf", + drop_variables=["Electric_Field_Ex", "Electric_Field_Ey"], + ) as df: + assert "Electric_Field_Ex" not in df + assert "Electric_Field_Ey" not in df + + def test_xr_drop_variables_multiple(): with xr.open_dataset( TEST_FILES_DIR / "0000.sdf", @@ -410,6 +481,15 @@ def test_xr_drop_variables_multiple(): assert "Electric_Field_Ey" not in df +def test_drop_variables_original(): + with open_dataset( + TEST_FILES_DIR / "0000.sdf", + drop_variables=["Electric_Field/Ex", "Electric_Field/Ey"], + ) as df: + assert "Electric_Field_Ex" not in df + assert "Electric_Field_Ey" not in df + + def test_xr_drop_variables_original(): with xr.open_dataset( TEST_FILES_DIR / "0000.sdf", @@ -419,6 +499,15 @@ def test_xr_drop_variables_original(): assert "Electric_Field_Ey" not in df +def test_drop_variables_mixed(): + with open_dataset( + TEST_FILES_DIR / "0000.sdf", + drop_variables=["Electric_Field/Ex", "Electric_Field_Ey"], + ) as df: + assert "Electric_Field_Ex" not in df + assert "Electric_Field_Ey" not in df + + def test_xr_drop_variables_mixed(): with xr.open_dataset( TEST_FILES_DIR / "0000.sdf", @@ -428,6 +517,11 @@ def test_xr_drop_variables_mixed(): assert "Electric_Field_Ey" not in df +def test_erroring_drop_variables(): + with pytest.raises(KeyError): + open_dataset(TEST_FILES_DIR / "0000.sdf", drop_variables=["Electric_Field/E"]) + + def test_xr_erroring_drop_variables(): with pytest.raises(KeyError): xr.open_dataset( @@ -435,6 +529,18 @@ def test_xr_erroring_drop_variables(): ) +def test_loading_multiple_probes(): + with open_dataset( + TEST_2D_PARTICLE_DATA / "0002.sdf", + keep_particles=True, + probe_names=["Electron_Front_Probe", "Electron_Back_Probe"], + ) as df: + assert "X_Probe_Electron_Front_Probe" in df.coords + assert "X_Probe_Electron_Back_Probe" in df.coords + assert "ID_Electron_Front_Probe_Px" in df.dims + assert "ID_Electron_Back_Probe_Px" in df.dims + + def test_xr_loading_multiple_probes(): with xr.open_dataset( TEST_2D_PARTICLE_DATA / "0002.sdf", @@ -447,7 +553,24 @@ def test_xr_loading_multiple_probes(): assert "ID_Electron_Back_Probe_Px" in df.dims -def test_xr_oading_one_probe_drop_second_probe(): +def test_loading_one_probe_drop_second_probe(): + with open_dataset( + TEST_2D_PARTICLE_DATA / "0002.sdf", + keep_particles=True, + drop_variables=[ + "Electron_Back_Probe_Px", + "Electron_Back_Probe_Py", + "Electron_Back_Probe_Pz", + "Electron_Back_Probe_weight", + ], + probe_names=["Electron_Front_Probe"], + ) as df: + assert "X_Probe_Electron_Front_Probe" in df.coords + assert "ID_Electron_Front_Probe_Px" in df.dims + assert "ID_Electron_Back_Probe_Px" not in df.dims + + +def test_xr_loading_one_probe_drop_second_probe(): with xr.open_dataset( TEST_2D_PARTICLE_DATA / "0002.sdf", keep_particles=True, From 828c7e1c12223b7bcec3800fcc93b22e242abcd5 Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Tue, 3 Feb 2026 12:08:59 +0000 Subject: [PATCH 03/11] Remove old references to `xr.open_dataset` from docs --- docs/animation.rst | 2 +- docs/getting_started.rst | 4 ++-- docs/key_functionality.rst | 17 ++++++++--------- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/docs/animation.rst b/docs/animation.rst index 62dec15..92dc31c 100644 --- a/docs/animation.rst +++ b/docs/animation.rst @@ -125,7 +125,7 @@ the animation. .. jupyter-execute:: - ds = xr.open_dataset("tutorial_dataset_3d/0005.sdf") + ds = sdfxr.open_dataset("tutorial_dataset_3d/0005.sdf") da = ds["Derived_Number_Density"] anim = da.epoch.animate(t = "X_Grid_mid") anim.show() diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 132242f..28c759b 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -60,9 +60,9 @@ Loading single files .. jupyter-execute:: - import xarray as xr + import sdf_xarray as sdfxr - xr.open_dataset("tutorial_dataset_1d/0010.sdf") + sdfxr.open_dataset("tutorial_dataset_1d/0010.sdf") Alternatively, you can load the data in as a `xarray.DataTree`, which organises the data hierarchically into ``groups`` (for example grouping related quantities such as the individual diff --git a/docs/key_functionality.rst b/docs/key_functionality.rst index ca32db9..bbfc3b6 100644 --- a/docs/key_functionality.rst +++ b/docs/key_functionality.rst @@ -15,14 +15,13 @@ Loading SDF files ----------------- There are several ways to load SDF files: -- To load a single file, use `xarray.open_dataset`, `sdf_xarray.open_datatree` or `xarray.open_datatree` -- To load multiple files, use `sdf_xarray.open_mfdataset`, `xarray.open_mfdataset` or `sdf_xarray.open_mfdatatree`. -- To access the raw contents of a single SDF file, use `sdf_xarray.sdf_interface.SDFFile`. +- To load a single file, use `sdf_xarray.open_dataset` or `sdf_xarray.open_datatree` +- To load multiple files, use `sdf_xarray.open_mfdataset` or `sdf_xarray.open_mfdatatree` +- To access the raw contents of a single SDF file, use `sdf_xarray.sdf_interface.SDFFile` -.. note:: - - When loading SDF files, variables related to ``boundaries``, ``cpu`` and ``output file`` are excluded as they are problematic. If you wish to load these in please use the - :ref:`loading-raw-files` approach. +When loading SDF files, variables related to ``boundaries``, ``cpu`` and ``output file`` are +excluded as they are problematic. If you wish to load these in please use the +:ref:`loading-raw-files` approach. .. tip:: @@ -39,9 +38,9 @@ Loading single files .. jupyter-execute:: - xr.open_dataset("tutorial_dataset_1d/0010.sdf") + sdfxr.open_dataset("tutorial_dataset_1d/0010.sdf") -Alternatively, you can load the data in as a `xarray.DataTree`, which organises the data +You can also load the data in as a `xarray.DataTree`, which organises the data hierarchically into ``groups`` (for example grouping related quantities such as the individual components of the electric and magnetic fields) while keeping each item as a `xarray.Dataset`. From c781479d0959c1d56e2a192179af52e421b1c740 Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Tue, 3 Feb 2026 12:09:15 +0000 Subject: [PATCH 04/11] Additional docs cleanup --- docs/key_functionality.rst | 24 ++++++++++++------------ docs/unit_conversion.rst | 15 +++++++-------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/docs/key_functionality.rst b/docs/key_functionality.rst index bbfc3b6..fa7aa21 100644 --- a/docs/key_functionality.rst +++ b/docs/key_functionality.rst @@ -71,22 +71,14 @@ is by using the `sdf_xarray.open_mfdataset`. If your simulation includes multiple ``output`` blocks that specify different variables for output at various time steps, variables not present at a specific step will default - to a nan value. To clean your dataset by removing these nan values we suggest using the - `xarray.DataArray.dropna` function or :ref:`loading-sparse-data`. + to a nan value. To remove these nan values we suggest using the `xarray.DataArray.dropna` + function or following our implmentation in :ref:`loading-sparse-data`. .. jupyter-execute:: sdfxr.open_mfdataset("tutorial_dataset_1d/*.sdf") -Alternatively, you can load the data in as a `xarray.DataTree`, which organises the data -hierarchically into ``groups`` (for example grouping related quantities such as the individual -components of the electric and magnetic fields) while keeping each item as a `xarray.Dataset`. - -.. jupyter-execute:: - - sdfxr.open_mfdatatree("tutorial_dataset_1d/*.sdf") - -Alternatively files can be loaded using `xarray.open_mfdataset` however when loading in +Alternatively, files can be loaded using `xarray.open_mfdataset` however when loading in all the files we have do some processing of the data so that we can correctly align it along the time dimension; This is done via the ``preprocess`` parameter utilising the `sdf_xarray.SDFPreprocess` function. @@ -100,6 +92,14 @@ the time dimension; This is done via the ``preprocess`` parameter utilising the preprocess=sdfxr.SDFPreprocess() ) +You can also load the data in as a `xarray.DataTree`, which organises the data +hierarchically into ``groups`` (for example grouping related quantities such as the individual +components of the electric and magnetic fields) while keeping each item as a `xarray.Dataset`. + +.. jupyter-execute:: + + sdfxr.open_mfdatatree("tutorial_dataset_1d/*.sdf") + .. _loading-sparse-data: Loading sparse data @@ -141,7 +141,7 @@ multiple files). .. jupyter-execute:: - xr.open_dataset("tutorial_dataset_1d/0010.sdf", keep_particles=True) + sdfxr.open_dataset("tutorial_dataset_1d/0010.sdf", keep_particles=True) Loading specific variables ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/unit_conversion.rst b/docs/unit_conversion.rst index b793598..aeae0b6 100644 --- a/docs/unit_conversion.rst +++ b/docs/unit_conversion.rst @@ -15,9 +15,10 @@ to femto-seconds or particle energy from Joules to electron-volts. .. jupyter-execute:: - from sdf_xarray import open_mfdataset + import sdf_xarray as sdfxr import matplotlib.pyplot as plt %matplotlib inline + plt.rcParams.update({ "axes.labelsize": 16, "xtick.labelsize": 14, @@ -45,7 +46,7 @@ We can use the |rescale_coords_accessor| method to convert X, Y, and Z coordinat fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) - ds = open_mfdataset("tutorial_dataset_2d/*.sdf") + ds = sdfxr.open_mfdataset("tutorial_dataset_2d/*.sdf") ds_in_microns = ds.epoch.rescale_coords(1e6, "µm", ["X_Grid_mid", "Y_Grid_mid"]) ds["Derived_Number_Density_Electron"].isel(time=0).plot(ax=ax1, x="X_Grid_mid", y="Y_Grid_mid") @@ -65,7 +66,7 @@ seconds (``s``) to femto-seconds (``fs``) by applying a multiplier of ``1e15``. .. jupyter-execute:: - ds = open_mfdataset("tutorial_dataset_2d/*.sdf") + ds = sdfxr.open_mfdataset("tutorial_dataset_2d/*.sdf") ds["time"] .. jupyter-execute:: @@ -97,15 +98,13 @@ Installation To install the pint libraries you can simply run the following optional dependency pip command which will install both the ``pint`` and ``pint-xarray`` -libraries. You can install these optional dependencies via pip: +libraries. Once installed the ``xarray.Dataset.pint`` accessor should become +accessible. You can install these optional dependencies via pip: .. code:: console $ pip install "sdf_xarray[pint]" -.. note:: - Once you install ``pint-xarray`` it is automatically picked up and loaded - by the code so you should have access to the ``xarray.Dataset.pint`` accessor. Quantifying DataArrays ~~~~~~~~~~~~~~~~~~~~~~ @@ -117,7 +116,7 @@ Joules and convert it to electron volts. .. jupyter-execute:: - ds = open_mfdataset("tutorial_dataset_1d/*.sdf") + ds = sdfxr.open_mfdataset("tutorial_dataset_1d/*.sdf") ds["Total_Particle_Energy_Electron"] Once you call `xarray.DataArray.pint.quantify` the type is inferred the original From 028d6b512319650b67ef5d731f53b9468ba9e45b Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Tue, 3 Feb 2026 12:09:37 +0000 Subject: [PATCH 05/11] Add padding of `0.5 em` to bottom of jupyter cells in docs --- docs/_static/jupyter_padding.css | 3 +++ docs/conf.py | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 docs/_static/jupyter_padding.css diff --git a/docs/_static/jupyter_padding.css b/docs/_static/jupyter_padding.css new file mode 100644 index 0000000..375ebd4 --- /dev/null +++ b/docs/_static/jupyter_padding.css @@ -0,0 +1,3 @@ +.cell_output { + margin-bottom: 0.5em; +} \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 017a6ab..25d0add 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -102,9 +102,7 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] -html_css_files = [ - "force_render_dark_xarray_objects.css", -] +html_css_files = ["force_render_dark_xarray_objects.css", "jupyter_padding.css"] html_theme_options = { "repository_url": "https://github.com/epochpic/sdf-xarray", From 3b60e77812960ba15190e96e7be9190e0cb42bf5 Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Tue, 3 Feb 2026 12:22:02 +0000 Subject: [PATCH 06/11] Explain why certain variables are not loaded in in docstring of all `open_...()` --- src/sdf_xarray/__init__.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index e56928e..035411e 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -202,7 +202,9 @@ def open_dataset( keep_particles: bool = False, probe_names: list[str] | None = None, ) -> xr.Dataset: - """Open an EPOCH SDF file as a `xarray.Dataset`. + """Open an SDF file as a `xarray.Dataset`. Variables related to ``boundaries``, + ``cpu`` and ``output`` file are excluded as they are problematic. If you wish + to load these variables in see :ref:`loading-raw-files`. Parameters ---------- @@ -238,7 +240,10 @@ def open_mfdataset( data_vars: list[str] | None = None, chunks: T_Chunks = "auto", ) -> xr.Dataset: - """Open a set of EPOCH SDF files as one `xarray.Dataset`. + """Open a set of EPOCH SDF files as one `xarray.Dataset`. Variables + related to ``boundaries``, ``cpu`` and ``output`` file are excluded + as they are problematic. If you wish to load these variables in see + :ref:`loading-raw-files`. EPOCH can output variables at different periods, so each individal SDF file from one EPOCH run may have different variables in it. In @@ -339,6 +344,10 @@ def open_datatree( probe_names: list[str] | None = None, ) -> xr.DataTree: """ + Open an SDF file as a `xarray.DataTree`. Variables related to ``boundaries``, + ``cpu`` and ``output`` file are excluded as they are problematic. If you wish + to load these variables in see :ref:`loading-raw-files`. + An `xarray.DataTree` is constructed utilising the original names in the SDF file. This is due to the fact that these names include slashes which `xarray` can use to automatically build up a datatree. We do additionally replace @@ -398,7 +407,10 @@ def open_mfdatatree( probe_names: list[str] | None = None, data_vars: list[str] | None = None, ) -> xr.DataTree: - """Open a set of EPOCH SDF files as one `xarray.DataTree` + """Open a set of EPOCH SDF files as one `xarray.DataTree`. Variables + related to ``boundaries``, ``cpu`` and ``output`` file are excluded + as they are problematic. If you wish to load these variables in see + :ref:`loading-raw-files`. EPOCH can output variables at different periods, so each individal SDF file from one EPOCH run may have different variables in it. In From 876796933db855f8572b3dcfe3164fe8969666fc Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Tue, 3 Feb 2026 12:27:04 +0000 Subject: [PATCH 07/11] Change wording of missing variables in docs --- docs/key_functionality.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/key_functionality.rst b/docs/key_functionality.rst index fa7aa21..b27780c 100644 --- a/docs/key_functionality.rst +++ b/docs/key_functionality.rst @@ -20,8 +20,7 @@ There are several ways to load SDF files: - To access the raw contents of a single SDF file, use `sdf_xarray.sdf_interface.SDFFile` When loading SDF files, variables related to ``boundaries``, ``cpu`` and ``output file`` are -excluded as they are problematic. If you wish to load these in please use the -:ref:`loading-raw-files` approach. +excluded as they are problematic. If you wish to load these variables in see :ref:`loading-raw-files`. .. tip:: From 7a1c3f089f719c7300e05792fa628102ec8f2458 Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Wed, 4 Feb 2026 12:09:22 +0000 Subject: [PATCH 08/11] Refactor summary in docs for sdf_xarray vs xarray --- docs/getting_started.rst | 91 +++++++++++++------------------------- docs/key_functionality.rst | 20 +-------- 2 files changed, 32 insertions(+), 79 deletions(-) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 28c759b..05b244b 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -30,77 +30,46 @@ or download this code locally: cd sdf-xarray pip install . -.. note:: - When loading SDF files, variables related to ``boundaries``, ``cpu`` and ``output file`` are excluded as they are problematic. If you wish to load these in please use the - :ref:`loading-raw-files-getting-started` approach. +Interaction +----------- -.. tip:: +There are two main ways to load EPOCH SDF files into xarray objects: using the dedicated +`sdf_xarray` functions or using the standard `xarray` interface with our custom engine. +For examples of how to use these functions see :ref:`loading-sdf-files`. - All code examples throughout this documentation are visualised using Jupyter notebooks - so that you can interactively explore `xarray.Dataset` objects. To do this on your machine - make sure that you have the necessary dependencies installed: +All code examples throughout this documentation are visualised using Jupyter notebooks +so that you can interactively explore the datasets. To do this on your machine make +sure that you have the necessary dependencies installed: - .. code-block:: bash - - pip install "sdf-xarray[jupyter]" - -Usage ------ - -``sdf-xarray`` is a backend for xarray, and so is usable directly from -`xarray`. There are several ways to load SDF files: - -- To load a single file, use `xarray.open_dataset`. -- To load multiple files, use `sdf_xarray.open_mfdataset` or `xarray.open_mfdataset`. -- To access the raw contents of a single SDF file, use `sdf_xarray.sdf_interface.SDFFile`. - -Loading single files --------------------- - -.. jupyter-execute:: - - import sdf_xarray as sdfxr - - sdfxr.open_dataset("tutorial_dataset_1d/0010.sdf") - -Alternatively, you can load the data in as a `xarray.DataTree`, which organises the data -hierarchically into ``groups`` (for example grouping related quantities such as the individual -components of the electric and magnetic fields) while keeping each item as a `xarray.Dataset`. - -.. jupyter-execute:: - - import sdf_xarray as sdfxr - - sdfxr.open_datatree("tutorial_dataset_1d/0010.sdf") - -Loading multiple files ----------------------- - -.. jupyter-execute:: - - import sdf_xarray as sdfxr - - sdfxr.open_mfdataset("tutorial_dataset_1d/*.sdf") +.. code-block:: bash -Alternatively, you can load the data in as a `xarray.DataTree`, which organises the data -hierarchically into ``groups`` (for example grouping related quantities such as the individual -components of the electric and magnetic fields) while keeping each item as a `xarray.Dataset`. + pip install "sdf-xarray[jupyter]" -.. jupyter-execute:: +.. important:: + + When loading SDF files, variables related to ``boundaries``, ``cpu`` and ``output file`` + are excluded as they are problematic. If you wish to load these variables in see + :ref:`loading-raw-files`. - import sdf_xarray as sdfxr - sdfxr.open_mfdatatree("tutorial_dataset_1d/*.sdf") +Using sdf_xarray (Recommended) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. _loading-raw-files-getting-started: +These functions are wrappers designed specifically for SDF data, providing the most +straightforward experience: -Loading raw files ------------------ +- **Single files**: Use `sdf_xarray.open_dataset` or `sdf_xarray.open_datatree` +- **Multiple files**: Use `sdf_xarray.open_mfdataset` or `sdf_xarray.open_mfdatatree` +- **Raw files**: use `sdf_xarray.sdf_interface.SDFFile` -.. jupyter-execute:: +Using xarray +~~~~~~~~~~~~ - import sdf_xarray as sdfxr +If you prefer using the native `xarray` functions, you can use the `xarray.open_dataset`, +`xarray.open_datatree` and `xarray.open_mfdataset`. Strangely there is no function in +`xarray` for ``xarray.open_mfdatatree``. - raw_ds = sdfxr.SDFFile("tutorial_dataset_1d/0010.sdf") - raw_ds.variables.keys() +These functions should all work out of the box as long as `sdf_xarray` is installed on your +system, if you are having issues with it reading files, you might need to pass the parameter +``engine=sdf_engine`` when calling any of the above xarray functions. diff --git a/docs/key_functionality.rst b/docs/key_functionality.rst index b27780c..31b6ceb 100644 --- a/docs/key_functionality.rst +++ b/docs/key_functionality.rst @@ -11,26 +11,10 @@ Key Functionality import matplotlib.pyplot as plt %matplotlib inline +.. _loading-sdf-files: + Loading SDF files ----------------- -There are several ways to load SDF files: - -- To load a single file, use `sdf_xarray.open_dataset` or `sdf_xarray.open_datatree` -- To load multiple files, use `sdf_xarray.open_mfdataset` or `sdf_xarray.open_mfdatatree` -- To access the raw contents of a single SDF file, use `sdf_xarray.sdf_interface.SDFFile` - -When loading SDF files, variables related to ``boundaries``, ``cpu`` and ``output file`` are -excluded as they are problematic. If you wish to load these variables in see :ref:`loading-raw-files`. - -.. tip:: - - All code examples throughout this documentation are visualised using Jupyter notebooks - so that you can interactively explore `xarray.Dataset` objects. To do this on your machine - make sure that you have the necessary dependencies installed: - - .. code-block:: bash - - pip install "sdf-xarray[jupyter]" Loading single files ~~~~~~~~~~~~~~~~~~~~ From 8ad27134a16f893c4580aabfd60d926b6d4b9514 Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Wed, 4 Feb 2026 12:14:28 +0000 Subject: [PATCH 09/11] Fix ruff PLC0207 error --- src/sdf_xarray/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index 035411e..e085150 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -646,7 +646,7 @@ def _norm_grid_name(grid_name: str) -> str: return grid_name.split("/", maxsplit=1)[-1] def _grid_species_name(grid_name: str) -> str: - return grid_name.split("/")[-1] + return grid_name.rsplit("/", maxsplit=1)[-1] def _process_grid_name(grid_name: str, transform_func) -> str: """Apply the given transformation function and then rename with underscores.""" From d16e1a33b385c3a71d90497cf366e2a625b0a764 Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Wed, 4 Feb 2026 12:23:26 +0000 Subject: [PATCH 10/11] Add docstring to `XrTUIEntrpoint` class --- src/sdf_xarray/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index e085150..e73829d 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -884,6 +884,7 @@ def guess_can_open(self, filename_or_obj): class XrTUIEntrpoint: def open_mfdatatree(self, paths: list[Path]) -> xr.DataTree: + """Backend open_mfdatatree method used by `xr-tui `_""" return open_mfdatatree(paths) From 3839506e20f95bca2cdc9290e297b6c181c23b1e Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Sat, 7 Feb 2026 16:45:10 +0000 Subject: [PATCH 11/11] Add deck_path to `open_dataset` wrapper --- src/sdf_xarray/__init__.py | 2 ++ tests/test_dataset.py | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index 2dbd886..3a72789 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -235,6 +235,7 @@ def open_dataset( drop_variables: list[str] | None = None, keep_particles: bool = False, probe_names: list[str] | None = None, + deck_path: PathLike | None = None, ) -> xr.Dataset: """Open an SDF file as a `xarray.Dataset`. Variables related to ``boundaries``, ``cpu`` and ``output`` file are excluded as they are problematic. If you wish @@ -262,6 +263,7 @@ def open_dataset( drop_variables=drop_variables, keep_particles=keep_particles, probe_names=probe_names, + deck_path=deck_path, ) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 0ea007d..a4da3e9 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -814,26 +814,26 @@ def test_open_mfdataset_data_vars_separate_times_multiple_times_keep_particles() def test_open_dataset_deck_path_default(): - with xr.open_dataset(TEST_FILES_DIR / "0000.sdf") as df: + with open_dataset(TEST_FILES_DIR / "0000.sdf") as df: assert "deck" in df.attrs def test_open_dataset_deck_path_failed(): with ( pytest.raises(FileNotFoundError), - xr.open_dataset(TEST_FILES_DIR / "0000.sdf", deck_path="non_existent.deck"), + open_dataset(TEST_FILES_DIR / "0000.sdf", deck_path="non_existent.deck"), ): pass def test_open_dataset_deck_path_relative(): - with xr.open_dataset(TEST_FILES_DIR / "0000.sdf", deck_path="input.deck") as df: + with open_dataset(TEST_FILES_DIR / "0000.sdf", deck_path="input.deck") as df: assert "deck" in df.attrs assert "constant" in df.attrs["deck"] def test_open_dataset_deck_path_absolute(): - with xr.open_dataset( + with open_dataset( TEST_FILES_DIR / "0000.sdf", deck_path=TEST_FILES_DIR / "input.deck" ) as df: assert "deck" in df.attrs @@ -841,7 +841,7 @@ def test_open_dataset_deck_path_absolute(): def test_open_dataset_deck_path_absolute_other_path(): - with xr.open_dataset( + with open_dataset( TEST_FILES_DIR / "0000.sdf", deck_path=TEST_3D_DIST_FN / "input.deck" ) as df: assert "deck" in df.attrs