From cfcb20aadedcaaa26f61eff4d409c9a7757b027c Mon Sep 17 00:00:00 2001 From: AndreaBrg Date: Fri, 14 Nov 2025 11:36:35 +0100 Subject: [PATCH 1/9] (#97) add day_night_flag for earthdata sources --- configs/config.template.yml | 2 -- src/satctl/model.py | 1 + src/satctl/sources/earthdata.py | 19 +++++++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/configs/config.template.yml b/configs/config.template.yml index 6799891..4a7710e 100644 --- a/configs/config.template.yml +++ b/configs/config.template.yml @@ -38,8 +38,6 @@ sources: default_downloader: "s3" stac_url: "https://stac.dataspace.copernicus.eu/v1" search_limit: 100 - download_pool_conns: 1 - download_pool_size: 1 s2-l2a: default_authenticator: "s3" diff --git a/src/satctl/model.py b/src/satctl/model.py index cc62fd3..1d17c99 100644 --- a/src/satctl/model.py +++ b/src/satctl/model.py @@ -226,6 +226,7 @@ class Granule(BaseModel): assets: dict[str, Any] # 'Any' must be serializable info: ProductInfo local_path: Path | None = None + day_night_flag: str | None = None # Day/night acquisition flag: "day", "night", "both", "unspecified" @classmethod def from_file(cls, path: Path) -> "Granule": diff --git a/src/satctl/sources/earthdata.py b/src/satctl/sources/earthdata.py index 9571b10..1c0d395 100644 --- a/src/satctl/sources/earthdata.py +++ b/src/satctl/sources/earthdata.py @@ -71,6 +71,23 @@ def parse_umm_assets(umm_result: dict, asset_class: type[BaseModel] = EarthDataA } +def parse_day_night_flag(umm_result: dict) -> str | None: + """Parse DayNightFlag from UMM search result. + + Args: + umm_result (dict): UMM format result from earthaccess search + + Returns: + str | None: Lowercase flag value ("day", "night", "both", "unspecified") or None if missing + """ + try: + flag = umm_result["umm"]["DataGranule"]["DayNightFlag"] + return flag.lower() + except (KeyError, AttributeError): + # Field not present or structure different + return None + + class EarthDataSource(DataSource): """Base class for NASA EarthData sources. @@ -289,6 +306,7 @@ def _search_single_combination( "georeference": parse_umm_assets(georeference_result, EarthDataAsset), }, info=self._parse_item_name(radiance_id), + day_night_flag=parse_day_night_flag(radiance_result), ) ) @@ -353,6 +371,7 @@ def _get_granule_by_short_name(self, item_id: str, short_name: str) -> Granule: "georeference": parse_umm_assets(georeference_result, EarthDataAsset), }, info=self._parse_item_name(radiance_id), + day_night_flag=parse_day_night_flag(radiance_result), ) def download_item(self, item: Granule, destination: Path, downloader: Downloader) -> bool: From 3ad01350eafa5ba21f82d0da9d504bc1d0fc73cf Mon Sep 17 00:00:00 2001 From: AndreaBrg Date: Mon, 17 Nov 2025 16:34:24 +0100 Subject: [PATCH 2/9] (#100) moved search_limit parameter to SearchParams instead of sources' __init__ --- configs/config.template.yml | 8 -------- src/satctl/model.py | 1 + src/satctl/sources/earthdata.py | 10 ++++------ src/satctl/sources/modis.py | 8 -------- src/satctl/sources/mtg.py | 8 +------- src/satctl/sources/sentinel1.py | 8 +------- src/satctl/sources/sentinel2.py | 10 +--------- src/satctl/sources/sentinel3.py | 14 +------------- src/satctl/sources/viirs.py | 8 -------- tests/conftest.py | 2 ++ tests/test_modis.py | 1 - tests/test_mtg.py | 1 - tests/test_olci.py | 1 - tests/test_s1.py | 1 - tests/test_s2.py | 2 -- tests/test_slstr.py | 1 - tests/test_viirs.py | 1 - 17 files changed, 11 insertions(+), 74 deletions(-) diff --git a/configs/config.template.yml b/configs/config.template.yml index 4a7710e..eb42b45 100644 --- a/configs/config.template.yml +++ b/configs/config.template.yml @@ -37,54 +37,46 @@ sources: default_authenticator: "s3" default_downloader: "s3" stac_url: "https://stac.dataspace.copernicus.eu/v1" - search_limit: 100 s2-l2a: default_authenticator: "s3" default_downloader: "s3" default_composite: "true_color" stac_url: "https://stac.dataspace.copernicus.eu/v1" - search_limit: 100 s2-l1c: default_authenticator: "s3" default_downloader: "s3" default_composite: "true_color" stac_url: "https://stac.dataspace.copernicus.eu/v1" - search_limit: 100 s3-slstr: default_authenticator: "odata" default_downloader: "http" default_composite: "all_bands_m" stac_url: "https://stac.dataspace.copernicus.eu/v1" - search_limit: 100 s3-olci: default_authenticator: "odata" default_downloader: "http" default_composite: "all_bands" stac_url: "https://stac.dataspace.copernicus.eu/v1" - search_limit: 100 viirs-l1b: default_authenticator: "earthdata" default_downloader: "http" - search_limit: 100 satellite: ["vnp", "jp1", "jp2"] product_type: ["mod", "img"] modis-l1b: default_authenticator: "earthdata" default_downloader: "http" - search_limit: 100 platform: ["mod"] resolution: ["qkm", "hkm", "1km"] mtg-fci-l1c: default_authenticator: "eumetsat" default_downloader: "http" - search_limit: 2 collection_name: "EO:EUM:DAT:0662" reader: "fci_l1c_nc" default_composite: "natural_color" diff --git a/src/satctl/model.py b/src/satctl/model.py index 1d17c99..c7ec7f9 100644 --- a/src/satctl/model.py +++ b/src/satctl/model.py @@ -119,6 +119,7 @@ def area_geometry(self) -> Polygon | None: class SearchParams(AreaParams): start: datetime end: datetime + search_limit: int | None = None @model_validator(mode="after") def validate_dates(self): diff --git a/src/satctl/sources/earthdata.py b/src/satctl/sources/earthdata.py index 1c0d395..c32aefa 100644 --- a/src/satctl/sources/earthdata.py +++ b/src/satctl/sources/earthdata.py @@ -25,8 +25,6 @@ "3": "doi", } -DEFAULT_SEARCH_LIMIT = 100 - class EarthDataAsset(BaseModel): """Asset model for EarthData sources.""" @@ -111,7 +109,6 @@ def __init__( default_downloader: str | None = "http", default_composite: str | None = None, default_resolution: int | None = None, - search_limit: int = DEFAULT_SEARCH_LIMIT, ): """Initialize EarthData source. @@ -126,7 +123,6 @@ def __init__( version (str | None): Product version. Defaults to None. default_composite (str | None): Default composite name. Defaults to None. default_resolution (int | None): Default resolution in meters. Defaults to None. - search_limit (int): Maximum search results per query. Defaults to 100. """ super().__init__( collection_name, @@ -140,7 +136,6 @@ def __init__( self.reader = reader self.short_name = short_name self.version = version - self.search_limit = search_limit warnings.filterwarnings(action="ignore", category=UserWarning) @abstractmethod @@ -267,9 +262,12 @@ def _search_single_combination( search_kwargs: dict[str, Any] = { "short_name": short_name, "temporal": (params.start.isoformat(), params.end.isoformat()), - "count": self.search_limit, } + # Add search limit if specified (omit for unlimited results) + if params.search_limit is not None: + search_kwargs["count"] = params.search_limit + # Add version if specified if version: search_kwargs["version"] = version diff --git a/src/satctl/sources/modis.py b/src/satctl/sources/modis.py index 8afa2b4..d0d788c 100644 --- a/src/satctl/sources/modis.py +++ b/src/satctl/sources/modis.py @@ -9,7 +9,6 @@ from satctl.downloaders import DownloadBuilder from satctl.model import Granule, ProductInfo, SearchParams from satctl.sources.earthdata import ( - DEFAULT_SEARCH_LIMIT, EarthDataSource, ParsedGranuleId, ) @@ -55,7 +54,6 @@ def __init__( default_downloader: str = "http", default_composite: str | None = None, default_resolution: int | None = None, - search_limit: int = DEFAULT_SEARCH_LIMIT, ): """Initialize MODIS data source. @@ -70,7 +68,6 @@ def __init__( default_downloader (str): Default downloader name to use when down_builder is None. Defaults to "http". default_composite (str | None): Default composite/band to load. Defaults to None. default_resolution (int | None): Default resolution in meters. Defaults to None. - search_limit (int): Maximum number of items to return per search. Defaults to 100. """ super().__init__( collection_name, @@ -83,7 +80,6 @@ def __init__( default_downloader=default_downloader, default_composite=default_composite, default_resolution=default_resolution, - search_limit=search_limit, ) def _parse_granule_id(self, granule_id: str) -> ParsedGranuleId: @@ -201,7 +197,6 @@ class MODISL1BSource(MODISSource): downloader: HTTP downloader instance platform: List of satellite platforms - ["mod"] (Terra), ["myd"] (Aqua) resolution: List of resolutions - ["qkm"] (250m), ["hkm"] (500m), ["1km"] (1000m) - search_limit: Maximum number of granules to return in search results per combination Examples: # Single combination @@ -222,7 +217,6 @@ def __init__( *, platform: list[Literal["mod", "myd"]], resolution: list[Literal["qkm", "hkm", "1km"]], - search_limit: int = DEFAULT_SEARCH_LIMIT, auth_builder: AuthBuilder | None = None, down_builder: DownloadBuilder | None = None, default_authenticator: str = "earthdata", @@ -235,7 +229,6 @@ def __init__( Args: platform (list[Literal["mod", "myd"]]): List of satellite platforms to search resolution (list[Literal["qkm", "hkm", "1km"]]): List of resolutions to search - search_limit (int): Maximum number of items to return per search. Defaults to 100. auth_builder (AuthBuilder | None): Factory that creates an authenticator object on demand. Defaults to None. down_builder (DownloadBuilder | None): Factory that creates a downloader object on demand. Defaults to None. default_authenticator (str): Default authenticator name to use when auth_builder is None. Defaults to "earthdata". @@ -271,7 +264,6 @@ def __init__( default_authenticator=default_authenticator, default_downloader=default_downloader, version=primary["version"], - search_limit=search_limit, ) def search(self, params: SearchParams) -> list[Granule]: diff --git a/src/satctl/sources/mtg.py b/src/satctl/sources/mtg.py index a13a38b..6d73aff 100644 --- a/src/satctl/sources/mtg.py +++ b/src/satctl/sources/mtg.py @@ -22,9 +22,6 @@ log = logging.getLogger(__name__) -# Constants -DEFAULT_SEARCH_LIMIT = 100 - class MTGAsset(BaseModel): href: str @@ -46,7 +43,6 @@ def __init__( default_downloader: str = "http", default_composite: str | None = None, default_resolution: int | None = None, - search_limit: int = DEFAULT_SEARCH_LIMIT, ): """Initialize MTG data source. @@ -59,7 +55,6 @@ def __init__( default_downloader (str): Default downloader name to use when down_builder is None. Defaults to "s3". default_composite (str | None): Default composite/band to load. Defaults to None. default_resolution (int | None): Default resolution in meters. Defaults to None. - search_limit (int): Maximum number of items to return per search. Defaults to 100. """ super().__init__( collection_name, @@ -71,7 +66,6 @@ def __init__( default_resolution=default_resolution, ) self.reader = reader - self.search_limit = search_limit warnings.filterwarnings(action="ignore", category=UserWarning) # Use synchronous dask scheduler for processing @@ -140,7 +134,7 @@ def search(self, params: SearchParams) -> list[Granule]: ] ) log.debug("Found %d items", len(items)) - return items[: self.search_limit] + return items if params.search_limit is None else items[: params.search_limit] def get_by_id(self, item_id: str, **kwargs) -> Granule: """Get specific MTG granule by ID. diff --git a/src/satctl/sources/sentinel1.py b/src/satctl/sources/sentinel1.py index 8894529..82f4893 100644 --- a/src/satctl/sources/sentinel1.py +++ b/src/satctl/sources/sentinel1.py @@ -77,7 +77,6 @@ def __init__( default_downloader: str | None = "s3", default_composite: str | None = None, default_resolution: int | None = None, - search_limit: int = 100, ): """Initialize Sentinel-1 data source. @@ -91,7 +90,6 @@ def __init__( default_downloader (str | None): Default downloader name to use when down_builder is None. Defaults to "s3". default_composite (str | None): Default composite/band to load. Defaults to None. default_resolution (int | None): Default resolution in meters. Defaults to None. - search_limit (int): Maximum number of items to return per search. Defaults to 100. """ super().__init__( collection_name, @@ -104,7 +102,6 @@ def __init__( ) self.reader = reader self.stac_url = stac_url - self.search_limit = search_limit @abstractmethod def _parse_item_name(self, name: str) -> ProductInfo: @@ -149,7 +146,7 @@ def search(self, params: SearchParams) -> list[Granule]: collections=self.collections, intersects=params.area_geometry, datetime=(params.start, params.end), - max_items=self.search_limit, + max_items=params.search_limit, ) # Convert STAC items to internal Granule model @@ -490,7 +487,6 @@ def __init__( down_builder: DownloadBuilder | None = None, default_authenticator: str | None = "s3", default_downloader: str | None = "s3", - search_limit: int = 100, ): """Initialize Sentinel-1 GRD data source. @@ -501,7 +497,6 @@ def __init__( down_builder (DownloadBuilder | None): Factory that creates a downloader object on demand. Defaults to None. default_authenticator (str | None): Default authenticator name to use when auth_builder is None. Defaults to "s3". default_downloader (str | None): Default downloader name to use when down_builder is None. Defaults to "s3". - search_limit (int): Maximum number of items to return per search. Defaults to 100. """ super().__init__( "sentinel-1-grd", @@ -513,7 +508,6 @@ def __init__( default_composite=composite, default_resolution=20, # Native GRD resolution in IW mode stac_url=stac_url, - search_limit=search_limit, ) def _parse_item_name(self, name: str) -> ProductInfo: diff --git a/src/satctl/sources/sentinel2.py b/src/satctl/sources/sentinel2.py index 24c9413..08dfca3 100644 --- a/src/satctl/sources/sentinel2.py +++ b/src/satctl/sources/sentinel2.py @@ -17,9 +17,6 @@ log = logging.getLogger(__name__) -# Constants -DEFAULT_SEARCH_LIMIT = 100 - class S2Asset(BaseModel): href: str @@ -67,7 +64,6 @@ def __init__( default_downloader: str | None, default_composite: str | None = None, default_resolution: int | None = None, - search_limit: int = DEFAULT_SEARCH_LIMIT, ): """Initialize Sentinel-2 source. @@ -81,7 +77,6 @@ def __init__( default_downloader (str | None): Default downloader name to use when down_builder is None. default_composite (str | None): Default composite name. Defaults to None. default_resolution (int | None): Default resolution in meters. Defaults to None. - search_limit (int): Maximum search results. Defaults to 100. """ super().__init__( collection_name, @@ -94,7 +89,6 @@ def __init__( ) self.reader = reader self.stac_url = stac_url - self.search_limit = search_limit @abstractmethod def _parse_item_name(self, name: str) -> ProductInfo: @@ -125,7 +119,7 @@ def search(self, params: SearchParams) -> list[Granule]: collections=self.collections, intersects=params.area_geometry, datetime=(params.start, params.end), - max_items=self.search_limit, + max_items=params.search_limit, ) items = [ Granule( @@ -458,7 +452,6 @@ def __init__( default_composite=default_composite, default_resolution=default_resolution, stac_url=stac_url, - search_limit=search_limit, ) def _parse_item_name(self, name: str) -> ProductInfo: @@ -551,7 +544,6 @@ def __init__( default_composite=default_composite, default_resolution=default_resolution, stac_url=stac_url, - search_limit=search_limit, ) def _parse_item_name(self, name: str) -> ProductInfo: diff --git a/src/satctl/sources/sentinel3.py b/src/satctl/sources/sentinel3.py index fa209c4..c2faf56 100644 --- a/src/satctl/sources/sentinel3.py +++ b/src/satctl/sources/sentinel3.py @@ -17,9 +17,6 @@ log = logging.getLogger(__name__) -# Constants -DEFAULT_SEARCH_LIMIT = 100 - class S3Asset(BaseModel): href: str @@ -41,7 +38,6 @@ def __init__( default_downloader: str | None, default_composite: str | None = None, default_resolution: int | None = None, - search_limit: int = DEFAULT_SEARCH_LIMIT, ): """Initialize Sentinel-3 data source. @@ -55,7 +51,6 @@ def __init__( default_downloader (str | None): Default downloader name to use when down_builder is None. default_composite (str | None): Default composite/band to load. Defaults to None. default_resolution (int | None): Default resolution in meters. Defaults to None. - search_limit (int): Maximum number of items to return per search. Defaults to 100. """ super().__init__( collection_name, @@ -68,7 +63,6 @@ def __init__( ) self.reader = reader self.stac_url = stac_url - self.search_limit = search_limit @abstractmethod def _parse_item_name(self, name: str) -> ProductInfo: @@ -99,7 +93,7 @@ def search(self, params: SearchParams) -> list[Granule]: collections=self.collections, intersects=params.area_geometry, datetime=(params.start, params.end), - max_items=self.search_limit, + max_items=params.search_limit, ) items = [ Granule( @@ -278,7 +272,6 @@ def __init__( default_downloader: str | None = "http", default_composite: str = "all_bands", default_resolution: int = 1000, - search_limit: int = DEFAULT_SEARCH_LIMIT, ): """Initialize Sentinel-3 SLSTR data source. @@ -290,7 +283,6 @@ def __init__( default_downloader (str | None): Default downloader name to use when down_builder is None. Defaults to "http". default_composite (str): Default composite/band to load. Defaults to "all_bands". default_resolution (int): Default resolution in meters. Defaults to 1000. - search_limit (int): Maximum number of items to return per search. Defaults to 100. """ super().__init__( "sentinel-3-sl-1-rbt-ntc", @@ -302,7 +294,6 @@ def __init__( default_composite=default_composite, default_resolution=default_resolution, stac_url=stac_url, - search_limit=search_limit, ) def _parse_item_name(self, name: str) -> ProductInfo: @@ -347,7 +338,6 @@ def __init__( default_downloader: str | None = "http", default_composite: str = "all_bands", default_resolution: int = 300, - search_limit: int = DEFAULT_SEARCH_LIMIT, ): """Initialize Sentinel-3 OLCI data source. @@ -359,7 +349,6 @@ def __init__( default_downloader (str | None): Default downloader name to use when down_builder is None. Defaults to "http". default_composite (str): Default composite/band to load. Defaults to "all_bands". default_resolution (int): Default resolution in meters. Defaults to 300. - search_limit (int): Maximum number of items to return per search. Defaults to 100. """ super().__init__( "sentinel-3-olci-1-efr-ntc", @@ -371,7 +360,6 @@ def __init__( default_composite=default_composite, default_resolution=default_resolution, stac_url=stac_url, - search_limit=search_limit, ) def _parse_item_name(self, name: str) -> ProductInfo: diff --git a/src/satctl/sources/viirs.py b/src/satctl/sources/viirs.py index 308ab35..43820a1 100644 --- a/src/satctl/sources/viirs.py +++ b/src/satctl/sources/viirs.py @@ -9,7 +9,6 @@ from satctl.downloaders import DownloadBuilder from satctl.model import Granule, ProductInfo, SearchParams from satctl.sources.earthdata import ( - DEFAULT_SEARCH_LIMIT, EarthDataSource, ParsedGranuleId, ) @@ -55,7 +54,6 @@ def __init__( default_downloader: str = "http", default_composite: str | None = None, default_resolution: int | None = None, - search_limit: int = DEFAULT_SEARCH_LIMIT, ): """Initialize VIIRS data source. @@ -70,7 +68,6 @@ def __init__( default_downloader (str): Default downloader name to use when down_builder is None. Defaults to "http". default_composite (str | None): Default composite/band to load. Defaults to None. default_resolution (int | None): Default resolution in meters. Defaults to None. - search_limit (int): Maximum number of items to return per search. Defaults to 100. """ super().__init__( collection_name, @@ -83,7 +80,6 @@ def __init__( default_downloader=default_downloader, default_composite=default_composite, default_resolution=default_resolution, - search_limit=search_limit, ) def _parse_granule_id(self, granule_id: str) -> ParsedGranuleId: @@ -198,7 +194,6 @@ class VIIRSL1BSource(VIIRSSource): downloader: HTTP downloader instance satellite: List of satellite platforms - ["vnp"] (Suomi-NPP), ["jp1"] (NOAA-20/JPSS-1), ["jp2"] (NOAA-21/JPSS-2) product_type: List of product types - ["mod"] (M-bands, 750m), ["img"] (I-bands, 375m) - search_limit: Maximum number of granules to return in search results per combination Examples: # Single combination @@ -225,7 +220,6 @@ def __init__( default_resolution: int | None = None, satellite: list[Literal["vnp", "jp1", "jp2"]], product_type: list[Literal["mod", "img"]], - search_limit: int = DEFAULT_SEARCH_LIMIT, ): """Initialize VIIRS Level 1B data source. @@ -238,7 +232,6 @@ def __init__( default_resolution (int | None): Default resolution in meters. Defaults to None. satellite (list[Literal["vnp", "jp1", "jp2"]]): List of satellite platforms to search product_type (list[Literal["mod", "img"]]): List of product types to search - search_limit (int): Maximum number of items to return per search. Defaults to 100. """ # Generate all combinations (cartesian product) self.combinations: list[ProductCombination] = [] @@ -269,7 +262,6 @@ def __init__( default_resolution=default_resolution if default_resolution else primary["resolution"], short_name=primary["short_name"], version=primary["version"], - search_limit=search_limit, ) def search(self, params: SearchParams) -> list[Granule]: diff --git a/tests/conftest.py b/tests/conftest.py index 0d8b6bc..4a17674 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -60,6 +60,7 @@ def test_search_params(): path=geojson_path, start=datetime.strptime("2024-09-01", "%Y-%m-%d"), end=datetime.strptime("2024-09-04", "%Y-%m-%d"), + search_limit=1, # Limit results for testing ) @@ -83,6 +84,7 @@ def test_mtg_search_params(): path=geojson_path, start=datetime.strptime("2024-09-25", "%Y-%m-%d"), end=datetime.strptime("2024-09-26", "%Y-%m-%d"), + search_limit=1, # Limit results for testing ) diff --git a/tests/test_modis.py b/tests/test_modis.py index e3b404f..6dd8613 100644 --- a/tests/test_modis.py +++ b/tests/test_modis.py @@ -42,7 +42,6 @@ def test_auth_and_init(self) -> None: down_builder=configure_downloader("http"), platform=["mod"], # Terra satellite resolution=["1km"], # 1km resolution - search_limit=1, # Limit results for testing ) # Verify source is configured using helper diff --git a/tests/test_mtg.py b/tests/test_mtg.py index 8fbfc78..3ab2319 100644 --- a/tests/test_mtg.py +++ b/tests/test_mtg.py @@ -45,7 +45,6 @@ def test_auth_and_init(self) -> None: reader="fci_l1c_nc", # FCI Level 1C NetCDF reader default_composite="simple_fci_fire_mask", # Or whatever your default composite is default_resolution=2000, # 2km resolution for FCI - search_limit=1, # Limit results for testing ) # Verify source is configured using helper diff --git a/tests/test_olci.py b/tests/test_olci.py index 6c36214..f58f2f6 100644 --- a/tests/test_olci.py +++ b/tests/test_olci.py @@ -38,7 +38,6 @@ def test_auth_and_init(self) -> None: auth_builder=configure_authenticator("odata"), down_builder=configure_downloader("http"), stac_url="https://stac.dataspace.copernicus.eu/v1", - search_limit=1, # Limit results for testing ) # Verify source is configured using helper diff --git a/tests/test_s1.py b/tests/test_s1.py index 1c1edae..0703f30 100644 --- a/tests/test_s1.py +++ b/tests/test_s1.py @@ -44,7 +44,6 @@ def test_auth_and_init(self) -> None: down_builder=configure_downloader("s3"), stac_url="https://stac.dataspace.copernicus.eu/v1", composite="s1_dual_pol", # Or whatever your default SAR composite is - search_limit=1, # Limit results for testing ) # Verify source is configured using helper diff --git a/tests/test_s2.py b/tests/test_s2.py index 2632abb..9e8d688 100644 --- a/tests/test_s2.py +++ b/tests/test_s2.py @@ -38,7 +38,6 @@ def test_auth_and_init(self) -> None: auth_builder=configure_authenticator("s3"), down_builder=configure_downloader("s3"), stac_url="https://stac.dataspace.copernicus.eu/v1", - search_limit=1, # Limit results for testing ) # Verify source is configured using helper @@ -201,7 +200,6 @@ def test_auth_and_init(self) -> None: auth_builder=configure_authenticator("s3"), down_builder=configure_downloader("s3"), stac_url="https://stac.dataspace.copernicus.eu/v1", - search_limit=1, # Limit results for testing ) # Verify source is configured using helper diff --git a/tests/test_slstr.py b/tests/test_slstr.py index 02d7932..445efdb 100644 --- a/tests/test_slstr.py +++ b/tests/test_slstr.py @@ -41,7 +41,6 @@ def test_auth_and_init(self) -> None: auth_builder=configure_authenticator("odata"), down_builder=configure_downloader("http"), stac_url="https://stac.dataspace.copernicus.eu/v1", - search_limit=1, # Limit results for testing ) # Verify source is configured using helper diff --git a/tests/test_viirs.py b/tests/test_viirs.py index 0d3c15e..d8ea241 100644 --- a/tests/test_viirs.py +++ b/tests/test_viirs.py @@ -40,7 +40,6 @@ def test_auth_and_init(self) -> None: down_builder=configure_downloader("http"), satellite=["vnp"], # NPP satellite product_type=["mod"], # M-bands (750m) - search_limit=1, # Limit results for testing ) # Verify source is configured using helper From 696c0308abe0c55d07396bbac248d5d93eb592d0 Mon Sep 17 00:00:00 2001 From: Edoardo Arnaudo Date: Mon, 17 Nov 2025 16:56:59 +0100 Subject: [PATCH 3/9] Bump version to 0.3.3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bb6458f..d475266 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "satctl" -version = "0.3.2" +version = "0.3.3" description = "Toolkit to search, download and process Earth Observation data" readme = "README.md" authors = [ From fd672234cd81cf221a5e7cacae1ea1bf37de2def Mon Sep 17 00:00:00 2001 From: Edoardo Arnaudo Date: Mon, 17 Nov 2025 19:14:14 +0100 Subject: [PATCH 4/9] (#90) fix import order for satpy env, add lazy loading in load_scene, add SLSTR L1B rev.4 reader config --- src/satctl/__init__.py | 5 +- .../satpy/readers/slstr_l1b_rev4.yaml | 365 ++++++++++++++++++ src/satctl/sources/base.py | 11 +- src/satctl/sources/mtg.py | 34 +- src/satctl/sources/sentinel1.py | 45 --- src/satctl/sources/sentinel2.py | 30 +- src/satctl/sources/sentinel3.py | 15 +- 7 files changed, 413 insertions(+), 92 deletions(-) create mode 100644 src/satctl/_config_data/satpy/readers/slstr_l1b_rev4.yaml diff --git a/src/satctl/__init__.py b/src/satctl/__init__.py index 8cb8205..64dbb49 100644 --- a/src/satctl/__init__.py +++ b/src/satctl/__init__.py @@ -28,8 +28,6 @@ import os from importlib.resources import files -from satctl.sources import create_source, list_sources - # override the satpy config path, adding our own custom yaml configs # it is non-destructive, i.e. if the variable is already set, we append satpy_config_path = os.getenv("SATPY_CONFIG_PATH", None) @@ -40,4 +38,7 @@ satpy_config_path = str([satpy_config_path, local_config_path]) os.environ["SATPY_CONFIG_PATH"] = satpy_config_path +# need to delay the import, otherwise satpy messes with the env +from satctl.sources import create_source, list_sources # noqa: E402 + __all__ = ["create_source", "list_sources"] diff --git a/src/satctl/_config_data/satpy/readers/slstr_l1b_rev4.yaml b/src/satctl/_config_data/satpy/readers/slstr_l1b_rev4.yaml new file mode 100644 index 0000000..a3a1b92 --- /dev/null +++ b/src/satctl/_config_data/satpy/readers/slstr_l1b_rev4.yaml @@ -0,0 +1,365 @@ + +reader: + name: slstr_l1b_004 + short_name: SLSTR l1b (rev.004) + long_name: Sentinel-3 A and B SLSTR L1B, revision 4 + description: NC Reader for SLSTR data after Jan 2020 + status: Alpha + supports_fsspec: false + sensors: [slstr] + default_channels: [] + reader: !!python/name:satpy.readers.core.yaml_reader.FileYAMLReader + + data_identification_keys: + name: + required: true + wavelength: + type: !!python/name:satpy.dataset.dataid.WavelengthRange + resolution: + transitive: false + calibration: + enum: + - reflectance + - brightness_temperature + - radiance + - counts + transitive: true + view: + enum: + - nadir + - oblique + transitive: true + stripe: + enum: + - a + - b + - i + - f + modifiers: + default: [] + type: !!python/name:satpy.dataset.dataid.ModifierTuple + + coord_identification_keys: + name: + required: true + resolution: + transitive: false + view: + enum: + - nadir + - oblique + transitive: true + stripe: + enum: + - a + - b + - i + - f + +file_types: + esa_l1b_refl: + file_reader: !!python/name:satpy.readers.slstr_l1b.NCSLSTR1B + file_patterns: ['{mission_id:3s}_SL_{processing_level:1s}_{datatype_id:_<6s}_{start_time:%Y%m%dT%H%M%S}_{end_time:%Y%m%dT%H%M%S}_{creation_time:%Y%m%dT%H%M%S}_{duration:4d}_{cycle:3d}_{relative_orbit:3d}_{frame:4s}_{centre:3s}_{mode:1s}_{timeliness:2s}_{collection:3s}.SEN3/{dataset_name}_radiance_{stripe:1s}{view:1s}.nc'] + esa_l1b_tir: + file_reader: !!python/name:satpy.readers.slstr_l1b.NCSLSTR1B + file_patterns: ['{mission_id:3s}_SL_{processing_level:1s}_{datatype_id:_<6s}_{start_time:%Y%m%dT%H%M%S}_{end_time:%Y%m%dT%H%M%S}_{creation_time:%Y%m%dT%H%M%S}_{duration:4d}_{cycle:3d}_{relative_orbit:3d}_{frame:4s}_{centre:3s}_{mode:1s}_{timeliness:2s}_{collection:3s}.SEN3/{dataset_name}_BT_{stripe:1s}{view:1s}.nc'] + esa_angles: + file_reader: !!python/name:satpy.readers.slstr_l1b.NCSLSTRAngles + file_patterns: ['{mission_id:3s}_SL_{processing_level:1s}_{datatype_id:_<6s}_{start_time:%Y%m%dT%H%M%S}_{end_time:%Y%m%dT%H%M%S}_{creation_time:%Y%m%dT%H%M%S}_{duration:4d}_{cycle:3d}_{relative_orbit:3d}_{frame:4s}_{centre:3s}_{mode:1s}_{timeliness:2s}_{collection:3s}.SEN3/geometry_t{view:1s}.nc'] + esa_geo: + file_reader: !!python/name:satpy.readers.slstr_l1b.NCSLSTRGeo + file_patterns: ['{mission_id:3s}_SL_{processing_level:1s}_{datatype_id:_<6s}_{start_time:%Y%m%dT%H%M%S}_{end_time:%Y%m%dT%H%M%S}_{creation_time:%Y%m%dT%H%M%S}_{duration:4d}_{cycle:3d}_{relative_orbit:3d}_{frame:4s}_{centre:3s}_{mode:1s}_{timeliness:2s}_{collection:3s}.SEN3/geodetic_{stripe:1s}{view:1s}.nc'] + esa_l1b_flag: + file_reader: !!python/name:satpy.readers.slstr_l1b.NCSLSTRFlag + file_patterns: ['{mission_id:3s}_SL_{processing_level:1s}_{datatype_id:_<6s}_{start_time:%Y%m%dT%H%M%S}_{end_time:%Y%m%dT%H%M%S}_{creation_time:%Y%m%dT%H%M%S}_{duration:4d}_{cycle:3d}_{relative_orbit:3d}_{frame:4s}_{centre:3s}_{mode:1s}_{timeliness:2s}_{collection:3s}.SEN3/flags_{stripe:1s}{view:1s}.nc'] + +datasets: + longitude: + name: longitude + resolution: [500, 1000] + view: [nadir, oblique] + stripe: [a, b, i, f] + file_type: esa_geo + file_key: longitude_{stripe:1s}{view:1s} + standard_name: longitude + units: degree + + latitude: + name: latitude + resolution: [500, 1000] + view: [nadir, oblique] + stripe: [a, b, i, f] + file_type: esa_geo + file_key: latitude_{stripe:1s}{view:1s} + standard_name: latitude + units: degree + + elevation: + name: elevation + resolution: [500, 1000] + view: [nadir, oblique] + stripe: [a, b, i, f] + file_type: esa_geo + file_key: elevation_{stripe:1s}{view:1s} + standard_name: elevation + units: m + + # The channels S1-S3 are available in nadir (default) and oblique view. + S1: + name: S1 + sensor: slstr + wavelength: [0.545,0.555,0.565] + resolution: 500 + view: [nadir, oblique] + stripe: [a, b] + calibration: + reflectance: + standard_name: toa_bidirectional_reflectance + units: "%" + radiance: + standard_name: toa_outgoing_radiance_per_unit_wavelength + units: W m-2 um-1 sr-1 + coordinates: [longitude, latitude] + file_type: esa_l1b_refl + + S2: + name: S2 + sensor: slstr + wavelength: [0.649, 0.659, 0.669] + resolution: 500 + view: [nadir, oblique] + stripe: [a, b] + calibration: + reflectance: + standard_name: toa_bidirectional_reflectance + units: "%" + radiance: + standard_name: toa_outgoing_radiance_per_unit_wavelength + units: W m-2 um-1 sr-1 + coordinates: [longitude, latitude] + file_type: esa_l1b_refl + + S3: + name: S3 + sensor: slstr + wavelength: [0.855, 0.865, 0.875] + resolution: 500 + view: [nadir, oblique] + stripe: [a, b] + calibration: + reflectance: + standard_name: toa_bidirectional_reflectance + units: "%" + radiance: + standard_name: toa_outgoing_radiance_per_unit_wavelength + units: W m-2 um-1 sr-1 + coordinates: [longitude, latitude] + file_type: esa_l1b_refl + + # The channels S4-S6 are available in nadir (default) and oblique view and for both in the + # a,b and c stripes. + S4: + name: S4 + sensor: slstr + wavelength: [1.3675, 1.375, 1.36825] + resolution: 500 + view: [nadir, oblique] + stripe: [a, b] + calibration: + reflectance: + standard_name: toa_bidirectional_reflectance + units: "%" + radiance: + standard_name: toa_outgoing_radiance_per_unit_wavelength + units: W m-2 um-1 sr-1 + coordinates: [longitude, latitude] + file_type: esa_l1b_refl + + S5: + name: S5 + sensor: slstr + wavelength: [1.58, 1.61, 1.64] + resolution: 500 + view: [nadir, oblique] + stripe: [a, b] + calibration: + reflectance: + standard_name: toa_bidirectional_reflectance + units: "%" + radiance: + standard_name: toa_outgoing_radiance_per_unit_wavelength + units: W m-2 um-1 sr-1 + coordinates: [longitude, latitude] + file_type: esa_l1b_refl + + S6: + name: S6 + sensor: slstr + wavelength: [2.225, 2.25, 2.275] + resolution: 500 + view: [nadir, oblique] + stripe: [a, b] + calibration: + reflectance: + standard_name: toa_bidirectional_reflectance + units: "%" + radiance: + standard_name: toa_outgoing_radiance_per_unit_wavelength + units: W m-2 um-1 sr-1 + coordinates: [longitude, latitude] + file_type: esa_l1b_refl + + # The channels S7-S9, F1 and F2 are available in nadir (default) and oblique view. + S7: + name: S7 + sensor: slstr + wavelength: [3.55, 3.74, 3.93] + resolution: 1000 + view: [nadir, oblique] + stripe: i + calibration: + brightness_temperature: + standard_name: toa_brightness_temperature + units: "K" + coordinates: [longitude, latitude] + file_type: esa_l1b_tir + + S8: + name: S8 + sensor: slstr + wavelength: [10.4, 10.85, 11.3] + resolution: 1000 + view: [nadir, oblique] + stripe: i + calibration: + brightness_temperature: + standard_name: toa_brightness_temperature + units: "K" + coordinates: [longitude, latitude] + file_type: esa_l1b_tir + + S9: + name: S9 + sensor: slstr + wavelength: [11.57, 12.0225, 12.475] + resolution: 1000 + view: [nadir, oblique] + stripe: i + calibration: + brightness_temperature: + standard_name: toa_brightness_temperature + units: "K" + coordinates: [longitude, latitude] + file_type: esa_l1b_tir + + F1: + name: F1 + sensor: slstr + wavelength: [3.55, 3.74, 3.93] + resolution: 1000 + view: [nadir, oblique] + stripe: f + calibration: + brightness_temperature: + standard_name: toa_brightness_temperature + units: "K" + coordinates: [longitude, latitude] + file_type: esa_l1b_tir + + F2: + name: F2 + sensor: slstr + wavelength: [10.4, 10.85, 11.3] + resolution: 1000 + view: [nadir, oblique] + stripe: i + calibration: + brightness_temperature: + standard_name: toa_brightness_temperature + units: "K" + coordinates: [longitude, latitude] + file_type: esa_l1b_tir + + + solar_zenith_angle: + name: solar_zenith_angle + sensor: slstr + resolution: [500, 1000] + coordinates: [longitude, latitude] + view: [nadir, oblique] + standard_name: solar_zenith_angle + file_type: esa_angles + file_key: solar_zenith_t{view:1s} + + solar_azimuth_angle: + name: solar_azimuth_angle + sensor: slstr + resolution: [500, 1000] + coordinates: [longitude, latitude] + view: [nadir, oblique] + standard_name: solar_azimuth_angle + file_type: esa_angles + file_key: solar_azimuth_t{view:1s} + + satellite_zenith_angle: + name: satellite_zenith_angle + sensor: slstr + resolution: [500, 1000] + coordinates: [longitude, latitude] + view: [nadir, oblique] + standard_name: satellite_zenith_angle + file_type: esa_angles + file_key: sat_zenith_t{view:1s} + + satellite_azimuth_angle: + name: satellite_azimuth_angle + sensor: slstr + resolution: [500, 1000] + coordinates: [longitude, latitude] + view: [nadir, oblique] + standard_name: satellite_azimuth_angle + file_type: esa_angles + file_key: sat_azimuth_t{view:1s} + +# CloudFlags are all bitfields. They are available in nadir (default) and oblique view for +# each of the a,b,c,i stripes. + cloud: + name: cloud + sensor: slstr + resolution: [500, 1000] + coordinates: [longitude, latitude] + view: [nadir, oblique] + stripe: [a, b, i, f] + file_type: esa_l1b_flag + file_key: cloud_{stripe:1s}{view:1s} + + confidence: + name: confidence + sensor: slstr + resolution: [500, 1000] + coordinates: [longitude, latitude] + view: [nadir, oblique] + stripe: [a, b, i, f] + file_type: esa_l1b_flag + file_key: confidence_{stripe:1s}{view:1s} + + + pointing: + name: pointing + sensor: slstr + resolution: [500, 1000] + coordinates: [longitude, latitude] + view: [nadir, oblique] + stripe: [a, b, i, f] + file_type: esa_l1b_flag + file_key: pointing_{stripe:1s}{view:1s} + + bayes: + name: bayes + sensor: slstr + resolution: [500, 1000] + coordinates: [longitude, latitude] + view: [nadir, oblique] + stripe: [a, b, i, f] + file_type: esa_l1b_flag + file_key: bayes_{stripe:1s}{view:1s} diff --git a/src/satctl/sources/base.py b/src/satctl/sources/base.py index e9eb9f1..bbafd1c 100644 --- a/src/satctl/sources/base.py +++ b/src/satctl/sources/base.py @@ -266,16 +266,18 @@ def download( def load_scene( self, item: Granule, + reader: str | None = None, datasets: list[str] | None = None, - generate: bool = False, + lazy: bool = False, **scene_options: Any, ) -> Scene: """Load a satpy Scene from granule files. Args: item (Granule): Granule to load + reader (str | None): Optional custom reader for extra customization. datasets (list[str] | None): List of datasets/composites to load. Defaults to None (uses default_composite). - generate (bool): Whether to generate composites. Defaults to False. + lazy (bool): Whether to lazily return the scene without loading datasets. Defaults to False. **scene_options (Any): Additional keyword arguments passed to Scene reader Returns: @@ -292,10 +294,11 @@ def load_scene( datasets = [self.default_composite] scene = Scene( filenames=self.get_files(item), - reader=self.reader, + reader=reader or self.reader, reader_kwargs=scene_options, ) - scene.load(datasets) + if not lazy: + scene.load(datasets) return scene def resample( diff --git a/src/satctl/sources/mtg.py b/src/satctl/sources/mtg.py index 6d73aff..d77807c 100644 --- a/src/satctl/sources/mtg.py +++ b/src/satctl/sources/mtg.py @@ -193,34 +193,32 @@ def get_files(self, item: Granule) -> list[Path | str]: def load_scene( self, item: Granule, + reader: str | None = None, datasets: list[str] | None = None, - generate: bool = False, - **scene_options: dict[str, Any], + lazy: bool = False, + **scene_options: Any, ) -> Scene: - """Load MTG data into a Satpy Scene. + """Load a Sentinel-2 scene with specified calibration. Args: item (Granule): Granule to load - datasets (list[str] | None): List of dataset names to load. Defaults to None. - generate (bool): Whether to generate composites. Defaults to False. - **scene_options (dict[str, Any]): Additional scene options + reader (str | None): Optional custom reader for extra customization. + datasets (list[str] | None): List of datasets/composites to load. Defaults to None (uses default_composite). + lazy (bool): Whether to lazily return the scene without loading datasets. Defaults to False. + **scene_options (Any): Additional keyword arguments passed to Scene reader to Scene reader Returns: - Scene: Loaded Satpy scene with requested datasets + Scene: Loaded satpy Scene object Raises: - ValueError: If datasets is None and no default composite is configured + ValueError: If datasets is None and no default_composite is set """ - if not datasets: - if self.default_composite is None: - raise ValueError( - "Invalid configuration: datasets parameter is required when no default composite is set" - ) - datasets = [self.default_composite] - scene = Scene( - filenames=self.get_files(item), - reader=self.reader, - reader_kwargs=scene_options, + scene = super().load_scene( + item, + reader=reader, + datasets=datasets, + lazy=True, + scene_options=scene_options, ) # note: the data inside the FCI files is stored upside down. # The upper_right_corner='NE' argument flips it automatically in upright position diff --git a/src/satctl/sources/sentinel1.py b/src/satctl/sources/sentinel1.py index 82f4893..a1e4599 100644 --- a/src/satctl/sources/sentinel1.py +++ b/src/satctl/sources/sentinel1.py @@ -246,51 +246,6 @@ def validate(self, item: Granule) -> None: "application/xml", ), f"Unexpected media type for asset {name}: {asset.media_type}" - def load_scene( - self, - item: Granule, - datasets: list[str] | None = None, - generate: bool = False, - calibration: str = "counts", - **scene_options: dict[str, Any], - ) -> Scene: - """Load a Sentinel-1 scene into a Satpy Scene object. - - Note: The 'calibration' parameter is currently unused but retained for - API compatibility. SAR calibration is typically specified in the dataset - name or composite definition (e.g., 'sigma_nought', 'beta_nought'). - - Args: - item: Granule to load (must have local_path set) - datasets: List of datasets/composites to load (e.g., ['measurement_vv']) - generate: Whether to generate composites (unused) - calibration: Calibration type - retained for compatibility but not used - (actual calibration is specified in dataset queries) - **scene_options: Additional options passed to Scene reader_kwargs - - Returns: - Loaded satpy Scene object with requested datasets - - Raises: - ValueError: If datasets is None and no default composite is configured - """ - if not datasets: - if self.default_composite is None: - raise ValueError("Please provide the source with a default composite, or provide custom composites") - datasets = [self.default_composite] - - # Create scene with all files in SAFE directory - scene = Scene( - filenames=self.get_files(item), - reader=self.reader, - reader_kwargs=scene_options, - ) - - # Load datasets (calibration is handled by dataset definition, not this parameter) - # TODO: Remove unused calibration parameter in future version - scene.load(datasets, calibration=calibration) - return scene - def download_item(self, item: Granule, destination: Path, downloader: Downloader) -> bool: """Download Sentinel-1 assets and reconstruct SAFE directory structure. diff --git a/src/satctl/sources/sentinel2.py b/src/satctl/sources/sentinel2.py index 08dfca3..25404e3 100644 --- a/src/satctl/sources/sentinel2.py +++ b/src/satctl/sources/sentinel2.py @@ -229,19 +229,19 @@ def validate(self, item: Granule) -> None: def load_scene( self, item: Granule, + reader: str | None = None, datasets: list[str] | None = None, - generate: bool = False, - calibration: str = "counts", + lazy: bool = False, **scene_options: Any, ) -> Scene: """Load a Sentinel-2 scene with specified calibration. Args: item (Granule): Granule to load - datasets (list[str] | None): List of datasets/composites to load. Defaults to None. - generate (bool): Whether to generate composites. Defaults to False. - calibration (str): Calibration type - 'counts' (DN 0-10000) or 'reflectance' (percentage 0-100%). Defaults to 'counts'. - **scene_options (Any): Additional keyword arguments passed to Scene reader + reader (str | None): Optional custom reader for extra customization. + datasets (list[str] | None): List of datasets/composites to load. Defaults to None (uses default_composite). + lazy (bool): Whether to lazily return the scene without loading datasets. Defaults to False. + **scene_options (Any): Additional keyword arguments passed to Scene reader to Scene reader Returns: Scene: Loaded satpy Scene object @@ -249,19 +249,15 @@ def load_scene( Raises: ValueError: If datasets is None and no default_composite is set """ - if not datasets: - if self.default_composite is None: - raise ValueError( - "Invalid configuration: datasets parameter is required when no default composite is set" - ) - datasets = [self.default_composite] - scene = Scene( - filenames=self.get_files(item), - reader=self.reader, - reader_kwargs=scene_options, + scene = super().load_scene( + item, + reader=reader, + datasets=datasets, + lazy=True, + scene_options=scene_options, ) # Load with specified calibration - scene.load(datasets, calibration=calibration) + scene.load(datasets, calibration="counts") return scene def download_item(self, item: Granule, destination: Path, downloader: Downloader) -> bool: diff --git a/src/satctl/sources/sentinel3.py b/src/satctl/sources/sentinel3.py index c2faf56..c5a4bb8 100644 --- a/src/satctl/sources/sentinel3.py +++ b/src/satctl/sources/sentinel3.py @@ -232,18 +232,21 @@ def save_item( Returns: dict[str, list]: Dictionary mapping granule_id to list of output paths """ - # Validate inputs using base class helper self._validate_save_inputs(item, params) - - # Parse datasets using base class helper datasets_dict = self._prepare_datasets(writer, params) - - # Filter existing files using base class helper datasets_dict = self._filter_existing_files(datasets_dict, destination, item.granule_id, writer, force) # Load and resample scene log.debug("Loading and resampling scene") - scene = self.load_scene(item, datasets=list(datasets_dict.values())) + + # workaround patch to fix broker SLSTR reader + # see https://github.com/pytroll/satpy/issues/3251 + # TLDR: SLSTR revision 004 switches band F1 from stripe i to f + # current satpy reader does not allow for missing files + custom_reader = None + if item.info.instrument == "slstr" and item.granule_id.endswith("004"): + custom_reader = f"{self.reader}_rev4" + scene = self.load_scene(item, reader=custom_reader, datasets=list(datasets_dict.values())) # Define area using base class helper area_def = self.define_area( From 8f4b0fb62c2845ac976210456a2ad4087d02f0b5 Mon Sep 17 00:00:00 2001 From: Edoardo Arnaudo Date: Mon, 17 Nov 2025 21:47:48 +0100 Subject: [PATCH 5/9] (#99) fix docstrings typo, respect lazy parameter when loading scenes --- src/satctl/sources/mtg.py | 5 +++-- src/satctl/sources/sentinel2.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/satctl/sources/mtg.py b/src/satctl/sources/mtg.py index d77807c..80d94ee 100644 --- a/src/satctl/sources/mtg.py +++ b/src/satctl/sources/mtg.py @@ -198,7 +198,7 @@ def load_scene( lazy: bool = False, **scene_options: Any, ) -> Scene: - """Load a Sentinel-2 scene with specified calibration. + """Load a MTG scene with specified calibration. Args: item (Granule): Granule to load @@ -222,7 +222,8 @@ def load_scene( ) # note: the data inside the FCI files is stored upside down. # The upper_right_corner='NE' argument flips it automatically in upright position - scene.load(datasets, upper_right_corner="NE") + if not lazy: + scene.load(datasets, upper_right_corner="NE") return scene def validate(self, item: Granule) -> None: diff --git a/src/satctl/sources/sentinel2.py b/src/satctl/sources/sentinel2.py index 25404e3..9126509 100644 --- a/src/satctl/sources/sentinel2.py +++ b/src/satctl/sources/sentinel2.py @@ -257,7 +257,8 @@ def load_scene( scene_options=scene_options, ) # Load with specified calibration - scene.load(datasets, calibration="counts") + if not lazy: + scene.load(datasets, calibration="counts") return scene def download_item(self, item: Granule, destination: Path, downloader: Downloader) -> bool: From db26229e5560440daf9be50b9e293dc1cff785e1 Mon Sep 17 00:00:00 2001 From: Edoardo Arnaudo Date: Mon, 17 Nov 2025 21:55:29 +0100 Subject: [PATCH 6/9] Bump version to 0.3.4 --- Makefile | 2 +- pyproject.toml | 2 +- src/satctl/sources/sentinel1.py | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 72d1130..4f721e3 100644 --- a/Makefile +++ b/Makefile @@ -77,7 +77,7 @@ endif @NEW_VERSION=$$(uv version --short) @echo "New version: v$$NEW_VERSION" @echo "Creating git commit and tag..." - @git add pyproject.toml + @git add . @git commit --no-verify -m "Bump version to $$NEW_VERSION" @git tag -a "v$$NEW_VERSION" -m "Release v$$NEW_VERSION" @git push diff --git a/pyproject.toml b/pyproject.toml index d475266..5704e90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "satctl" -version = "0.3.3" +version = "0.3.4" description = "Toolkit to search, download and process Earth Observation data" readme = "README.md" authors = [ diff --git a/src/satctl/sources/sentinel1.py b/src/satctl/sources/sentinel1.py index a1e4599..ed70c5d 100644 --- a/src/satctl/sources/sentinel1.py +++ b/src/satctl/sources/sentinel1.py @@ -4,11 +4,10 @@ from collections import defaultdict from datetime import datetime, timezone from pathlib import Path -from typing import Any, cast +from typing import cast from pydantic import BaseModel from pystac_client import Client -from satpy.scene import Scene from xarray import DataArray from satctl.auth import AuthBuilder From 6dbcaef5738c6e4705fb057cd94169977d6c5d0d Mon Sep 17 00:00:00 2001 From: AndreaBrg Date: Wed, 19 Nov 2025 15:28:52 +0100 Subject: [PATCH 7/9] (#108) Add earthaccess to log suprpession to reduce spam --- src/satctl/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/satctl/cli.py b/src/satctl/cli.py index 5d5879c..0f72ca3 100644 --- a/src/satctl/cli.py +++ b/src/satctl/cli.py @@ -60,7 +60,7 @@ def main( reporter_cls=reporter_cls, suppressions={ "error": ["urllib3", "requests", "satpy.readers.core.loading", "pyresample.area_config"], - "warning": ["satpy", "pyspectral", "boto3", "botocore", "s3transfer"], + "warning": ["satpy", "pyspectral", "boto3", "botocore", "s3transfer", "earthaccess"], }, ) cli_context.progress_reporter = create_reporter(reporter_name=progress) From cd2d7ef45d9496ea8942da3d3b5220e8a1d9c737 Mon Sep 17 00:00:00 2001 From: AndreaBrg Date: Thu, 20 Nov 2025 17:22:16 +0100 Subject: [PATCH 8/9] (#112) removed mtg patch, fixed by pinning packages version --- src/satctl/sources/mtg.py | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/src/satctl/sources/mtg.py b/src/satctl/sources/mtg.py index 80d94ee..5e1ce2c 100644 --- a/src/satctl/sources/mtg.py +++ b/src/satctl/sources/mtg.py @@ -68,9 +68,6 @@ def __init__( self.reader = reader warnings.filterwarnings(action="ignore", category=UserWarning) - # Use synchronous dask scheduler for processing - dask.config.set(scheduler="synchronous") - def _parse_item_name(self, name: str) -> ProductInfo: """Parse MTG item name into product information. @@ -293,24 +290,23 @@ def save_item( datasets_dict = self._prepare_datasets(writer, params) datasets_dict = self._filter_existing_files(datasets_dict, destination, item.granule_id, writer, force) - with self._netcdf_lock: - # Load and resample scene - log.debug("Loading and resampling scene") - scene = self.load_scene(item, datasets=list(datasets_dict.values())) - - # Define area using base class helper - area_def = self.define_area( - target_crs=params.target_crs_obj, - area=params.area_geometry, - scene=scene, - source_crs=params.source_crs_obj, - resolution=params.resolution, - ) - scene = scene.compute() - scene = self.resample(scene, area_def=area_def) + # Load and resample scene + log.debug("Loading and resampling scene") + scene = self.load_scene(item, datasets=list(datasets_dict.values())) + + # Define area using base class helper + area_def = self.define_area( + target_crs=params.target_crs_obj, + area=params.area_geometry, + scene=scene, + source_crs=params.source_crs_obj, + resolution=params.resolution, + ) + scene = scene.compute() + scene = self.resample(scene, area_def=area_def) - # Write datasets using base class helper - res = self._write_scene_datasets(scene, datasets_dict, destination, item.granule_id, writer) + # Write datasets using base class helper + res = self._write_scene_datasets(scene, datasets_dict, destination, item.granule_id, writer) return res From 8428ebb465280896f69d134e22341762c660b87f Mon Sep 17 00:00:00 2001 From: AndreaBrg Date: Thu, 20 Nov 2025 19:02:41 +0100 Subject: [PATCH 9/9] (#112) - Harmonize save_item behaviour - Differentiate between failed conversion ans skip due to already existing - Moved save_item to base source where possible --- src/satctl/sources/base.py | 75 +++++++++++++++++++++++++++--- src/satctl/sources/earthdata.py | 55 ---------------------- src/satctl/sources/mtg.py | 52 ++------------------- src/satctl/sources/sentinel1.py | 81 --------------------------------- src/satctl/sources/sentinel2.py | 41 ----------------- src/satctl/sources/sentinel3.py | 27 +++++++++-- 6 files changed, 96 insertions(+), 235 deletions(-) diff --git a/src/satctl/sources/base.py b/src/satctl/sources/base.py index bbafd1c..6886d7b 100644 --- a/src/satctl/sources/base.py +++ b/src/satctl/sources/base.py @@ -150,7 +150,6 @@ def download_item( """ ... - @abstractmethod def save_item( self, item: Granule, @@ -169,9 +168,49 @@ def save_item( force (bool): If True, overwrite existing files. Defaults to False. Returns: - dict[str, list]: Dictionary mapping granule_id to list of output paths + dict[str, list]: Dictionary mapping granule_id to list of output paths. + Empty list means all files were skipped (already exist). + + Raises: + FileNotFoundError: If granule data not downloaded + ValueError: If invalid configuration + Exception: If processing fails (scene loading, resampling, writing) """ - ... + # Validate inputs using base class helper + self._validate_save_inputs(item, params) + + # Parse datasets using base class helper + datasets_dict = self._prepare_datasets(writer, params) + + # Filter existing files using base class helper + datasets_dict = self._filter_existing_files(datasets_dict, destination, item.granule_id, writer, force) + + # Early return if no datasets to process (all files already exist) + if not datasets_dict: + log.info("Skipping %s - all datasets already exist", item.granule_id) + return {item.granule_id: []} + + # Load and resample scene + log.debug("Loading and resampling scene for %s", item.granule_id) + scene = self.load_scene(item, datasets=list(datasets_dict.values())) + + # Define area using base class helper + area_def = self.define_area( + target_crs=params.target_crs_obj, + area=params.area_geometry, + scene=scene, + source_crs=params.source_crs_obj, + resolution=params.resolution, + ) + scene = self.resample(scene, area_def=area_def) + + # Write datasets using base class helper + result = self._write_scene_datasets(scene, datasets_dict, destination, item.granule_id, writer) + + # Log success + num_files = len(result.get(item.granule_id, [])) + log.info("Successfully processed %s - wrote %d file(s)", item.granule_id, num_files) + return result @property def authenticator(self) -> Authenticator: @@ -474,6 +513,7 @@ def save( success = [] failure = [] + skipped = [] # Track skipped items separately for logging num_workers = num_workers or 1 batch_id = str(uuid.uuid4()) # this prevents pickle errors for unpicklable entities @@ -504,10 +544,32 @@ def save( } for future in as_completed(future_to_item_map): item = future_to_item_map[future] - if future.result(): - success.append(item) - else: + try: + result = future.result() + # Check if files were actually written + files_written = result.get(item.granule_id, []) + if files_written: + # Files were written - successful processing + success.append(item) + else: + # Empty list = skipped (all files already existed) + success.append(item) + skipped.append(item) + except Exception as e: + # Worker raised an exception = processing failed failure.append(item) + log.warning("Failed to process %s: %s", item.granule_id, str(e)) + + # Log summary + if skipped: + log.info( + "Batch complete: %d processed, %d skipped, %d failed", + len(success) - len(skipped), + len(skipped), + len(failure), + ) + else: + log.info("Batch complete: %d processed, %d failed", len(success), len(failure)) emit_event( ProgressEventType.BATCH_COMPLETED, @@ -519,6 +581,7 @@ def save( log.info("Interrupted, cleaning up...") if executor: executor.shutdown(wait=False, cancel_futures=True) + raise # Re-raise to allow outer handler to clean up finally: emit_event( ProgressEventType.BATCH_COMPLETED, diff --git a/src/satctl/sources/earthdata.py b/src/satctl/sources/earthdata.py index c32aefa..5e4ea7e 100644 --- a/src/satctl/sources/earthdata.py +++ b/src/satctl/sources/earthdata.py @@ -421,61 +421,6 @@ def download_item(self, item: Granule, destination: Path, downloader: Downloader item.to_file(granule_dir) return True - def save_item( - self, - item: Granule, - destination: Path, - writer: Writer, - params: ConversionParams, - force: bool = False, - ) -> dict[str, list]: - """Save granule item to output files after processing. - - Args: - item (Granule): Granule to process - destination (Path): Base destination directory - writer (Writer): Writer instance for output - params (ConversionParams): Conversion parameters - force (bool): If True, overwrite existing files. Defaults to False. - - Returns: - dict[str, list]: Dictionary mapping granule_id to list of output paths - """ - # Validate inputs using base class helper - self._validate_save_inputs(item, params) - - # Parse datasets using base class helper - datasets_dict = self._prepare_datasets(writer, params) - - # Filter existing files using base class helper - datasets_dict = self._filter_existing_files(datasets_dict, destination, item.granule_id, writer, force) - - # Early return if no datasets to process - if not datasets_dict: - log.debug("All datasets already exist for %s, skipping", item.granule_id) - return {item.granule_id: []} - - # Get files for processing - files = self.get_files(item) - log.debug("Found %d files to process", len(files)) - - # Load and resample scene - log.debug("Loading and resampling scene") - scene = self.load_scene(item, datasets=list(datasets_dict.values())) - - # Define area using base class helper - area_def = self.define_area( - target_crs=params.target_crs_obj, - area=params.area_geometry, - scene=scene, - source_crs=params.source_crs_obj, - resolution=params.resolution, - ) - scene = self.resample(scene, area_def=area_def) - - # Write datasets using base class helper - return self._write_scene_datasets(scene, datasets_dict, destination, item.granule_id, writer) - def validate(self, item: Granule) -> None: """Validate a granule. diff --git a/src/satctl/sources/mtg.py b/src/satctl/sources/mtg.py index 5e1ce2c..4f300eb 100644 --- a/src/satctl/sources/mtg.py +++ b/src/satctl/sources/mtg.py @@ -1,12 +1,10 @@ import logging import re -import threading import warnings from datetime import datetime, timezone from pathlib import Path from typing import Any, cast -import dask.config import numpy as np from eumdac.datastore import DataStore from pydantic import BaseModel @@ -15,7 +13,7 @@ from satctl.auth import AuthBuilder from satctl.auth.eumetsat import EUMETSATAuthenticator from satctl.downloaders import DownloadBuilder, Downloader -from satctl.model import ConversionParams, Granule, ProductInfo, SearchParams +from satctl.model import Granule, ProductInfo, SearchParams from satctl.sources import DataSource from satctl.utils import extract_zip from satctl.writers import Writer @@ -30,8 +28,6 @@ class MTGAsset(BaseModel): class MTGSource(DataSource): """Source for EUMETSAT MTG product""" - _netcdf_lock = threading.Lock() - def __init__( self, collection_name: str, @@ -221,6 +217,8 @@ def load_scene( # The upper_right_corner='NE' argument flips it automatically in upright position if not lazy: scene.load(datasets, upper_right_corner="NE") + # Compute scene to avoid issues with resampling (MTG-specific requirement) + scene = scene.compute() return scene def validate(self, item: Granule) -> None: @@ -266,50 +264,6 @@ def download_item(self, item: Granule, destination: Path, downloader: Downloader log.warning("Failed to download: %s", item.granule_id) return result - def save_item( - self, - item: Granule, - destination: Path, - writer: Writer, - params: ConversionParams, - force: bool = False, - ) -> dict[str, list]: - """Save granule item to output files after processing. - - Args: - item (Granule): Granule to process - destination (Path): Base destination directory - writer (Writer): Writer instance for output - params (ConversionParams): Conversion parameters - force (bool): If True, overwrite existing files. Defaults to False. - - Returns: - dict[str, list]: Dictionary mapping granule_id to list of output paths - """ - self._validate_save_inputs(item, params) - datasets_dict = self._prepare_datasets(writer, params) - datasets_dict = self._filter_existing_files(datasets_dict, destination, item.granule_id, writer, force) - - # Load and resample scene - log.debug("Loading and resampling scene") - scene = self.load_scene(item, datasets=list(datasets_dict.values())) - - # Define area using base class helper - area_def = self.define_area( - target_crs=params.target_crs_obj, - area=params.area_geometry, - scene=scene, - source_crs=params.source_crs_obj, - resolution=params.resolution, - ) - scene = scene.compute() - scene = self.resample(scene, area_def=area_def) - - # Write datasets using base class helper - res = self._write_scene_datasets(scene, datasets_dict, destination, item.granule_id, writer) - - return res - def _write_scene_datasets( self, scene: Scene, diff --git a/src/satctl/sources/sentinel1.py b/src/satctl/sources/sentinel1.py index ed70c5d..650f503 100644 --- a/src/satctl/sources/sentinel1.py +++ b/src/satctl/sources/sentinel1.py @@ -342,87 +342,6 @@ def download_item(self, item: Granule, destination: Path, downloader: Downloader return all_success - def save_item( - self, - item: Granule, - destination: Path, - writer: Writer, - params: ConversionParams, - force: bool = False, - ) -> dict[str, list]: - """Process and save Sentinel-1 granule to output files. - - Workflow: - 1. Validate inputs (local files exist, datasets specified) - 2. Load scene with SAR data using sar-c_safe reader - 3. Define target area (from params or full granule extent) - 4. Resample to target projection and resolution - 5. Write datasets to output files - - Args: - item: Granule to process (must have local_path set) - destination: Base destination directory for outputs - writer: Writer instance for file output (GeoTIFF, NetCDF, etc.) - params: Conversion parameters including: - - datasets: List of datasets to process - - area_geometry: Optional AOI for spatial subsetting - - target_crs: Target coordinate reference system - - resolution: Target spatial resolution - force: If True, overwrite existing output files. Defaults to False. - - Returns: - Dictionary mapping granule_id to list of output file paths - - Raises: - FileNotFoundError: If local_path doesn't exist - ValueError: If datasets is None and no default composite is configured - """ - # Validate that granule was downloaded - if item.local_path is None or not item.local_path.exists(): - raise FileNotFoundError(f"Invalid source file or directory: {item.local_path}") - - # Ensure datasets are specified - if params.datasets is None and self.default_composite is None: - raise ValueError("Missing datasets or default composite for storage") - - # Parse dataset names and prepare output filenames - datasets_dict = self._prepare_datasets(writer, params) - - # Filter existing files using base class helper - datasets_dict = self._filter_existing_files(datasets_dict, destination, item.granule_id, writer, force) - - # Load scene with requested SAR datasets - log.debug("Loading and resampling scene") - scene = self.load_scene(item, datasets=list(datasets_dict.values())) - - # Define target area for resampling - area_def = self.define_area( - target_crs=params.target_crs_obj, - area=params.area_geometry, - scene=scene, - source_crs=params.source_crs_obj, - resolution=params.resolution, - ) - - # Resample to target area - scene = self.resample(scene, area_def=area_def) - - # Write each dataset to output file - paths: dict[str, list] = defaultdict(list) - output_dir = destination / item.granule_id - output_dir.mkdir(exist_ok=True, parents=True) - - for dataset_name, file_name in datasets_dict.items(): - output_path = output_dir / f"{file_name}.{writer.extension}" - paths[item.granule_id].append( - writer.write( - dataset=cast(DataArray, scene[dataset_name]), - output_path=output_path, - ) - ) - - return paths - class Sentinel1GRDSource(Sentinel1Source): """Source for Sentinel-1 Ground Range Detected (GRD) products. diff --git a/src/satctl/sources/sentinel2.py b/src/satctl/sources/sentinel2.py index 9126509..05f9e35 100644 --- a/src/satctl/sources/sentinel2.py +++ b/src/satctl/sources/sentinel2.py @@ -345,47 +345,6 @@ def download_item(self, item: Granule, destination: Path, downloader: Downloader log.warning("Failed to download all required assets for: %s", item.granule_id) return all_success - def save_item( - self, - item: Granule, - destination: Path, - writer: Writer, - params: ConversionParams, - force: bool = False, - ) -> dict[str, list]: - """Save granule item to output files after processing. - - Args: - item (Granule): Granule to process - destination (Path): Base destination directory - writer (Writer): Writer instance for output - params (ConversionParams): Conversion parameters - force (bool): If True, overwrite existing files. Defaults to False. - - Returns: - dict[str, list]: Dictionary mapping granule_id to list of output paths - """ - # Validate inputs using base class helper - self._validate_save_inputs(item, params) - # Parse datasets using base class helper - datasets_dict = self._prepare_datasets(writer, params) - # Filter existing files using base class helper - datasets_dict = self._filter_existing_files(datasets_dict, destination, item.granule_id, writer, force) - # Load and resample scene - log.debug("Loading and resampling scene") - scene = self.load_scene(item, datasets=list(datasets_dict.values())) - # Define area using base class helper - area_def = self.define_area( - target_crs=params.target_crs_obj, - area=params.area_geometry, - scene=scene, - source_crs=params.source_crs_obj, - resolution=params.resolution, - ) - scene = self.resample(scene, area_def=area_def) - # Write datasets using base class helper - return self._write_scene_datasets(scene, datasets_dict, destination, item.granule_id, writer) - class Sentinel2L2ASource(Sentinel2Source): """Source for Sentinel-2 MSI L2A product.""" diff --git a/src/satctl/sources/sentinel3.py b/src/satctl/sources/sentinel3.py index c5a4bb8..bf22183 100644 --- a/src/satctl/sources/sentinel3.py +++ b/src/satctl/sources/sentinel3.py @@ -230,14 +230,30 @@ def save_item( force (bool): If True, overwrite existing files. Defaults to False. Returns: - dict[str, list]: Dictionary mapping granule_id to list of output paths + dict[str, list]: Dictionary mapping granule_id to list of output paths. + Empty list means all files were skipped (already exist). + + Raises: + FileNotFoundError: If granule data not downloaded + ValueError: If invalid configuration + Exception: If processing fails (scene loading, resampling, writing) """ + # Validate inputs using base class helper self._validate_save_inputs(item, params) + + # Parse datasets using base class helper datasets_dict = self._prepare_datasets(writer, params) + + # Filter existing files using base class helper datasets_dict = self._filter_existing_files(datasets_dict, destination, item.granule_id, writer, force) + # Early return if no datasets to process (all files already exist) + if not datasets_dict: + log.info("Skipping %s - all datasets already exist", item.granule_id) + return {item.granule_id: []} + # Load and resample scene - log.debug("Loading and resampling scene") + log.debug("Loading and resampling scene for %s", item.granule_id) # workaround patch to fix broker SLSTR reader # see https://github.com/pytroll/satpy/issues/3251 @@ -259,7 +275,12 @@ def save_item( scene = self.resample(scene, area_def=area_def) # Write datasets using base class helper - return self._write_scene_datasets(scene, datasets_dict, destination, item.granule_id, writer) + result = self._write_scene_datasets(scene, datasets_dict, destination, item.granule_id, writer) + + # Log success + num_files = len(result.get(item.granule_id, [])) + log.info("Successfully processed %s - wrote %d file(s)", item.granule_id, num_files) + return result class SLSTRSource(Sentinel3Source):