diff --git a/CHANGELOG.md b/CHANGELOG.md index 96b8ce3f..3f393dd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,13 @@ and this project uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html) ## [Unreleased] +### Changed +- `search_datasets` now accepts a `has_granules` keyword argument. Use + `has_granules=False` to search for metadata about collections with no + associated granules. The default value set in `DataCollections` remains `True`. + ([#939](https://github.com/nsidc/earthaccess/issues/939)) + ([**@juliacollins**](https://github.com/juliacollins)) + ## [v0.13.0] - 2025-01-28 ### Changed diff --git a/earthaccess/api.py b/earthaccess/api.py index 6b758aa2..b6ba1a38 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -49,6 +49,7 @@ def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]: * **doi**: DOI for a dataset * **daac**: e.g. NSIDC or PODAAC * **provider**: particular to each DAAC, e.g. POCLOUD, LPDAAC etc. + * **has_granules**: if true, only return collections with granules * **temporal**: a tuple representing temporal bounds in the form `(date_from, date_to)` * **bounding_box**: a tuple representing spatial bounds in the form diff --git a/earthaccess/search.py b/earthaccess/search.py index 3a2b458d..ef9ea83f 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -291,6 +291,28 @@ def debug(self, debug: bool = True) -> Self: self._debug = debug return self + def has_granules(self, has_granules: bool | None = True) -> Self: + """Match only collections with granules, without granules, or either. + + Parameters: + has_granules: + If `True`, only return collections with granules. If + `False`, only return collections without granules. + If `None`, return both types of collections. + + Returns: + self + """ + if has_granules is not None and not isinstance(has_granules, bool): + raise TypeError("has_granules must be of type bool or None") + + if has_granules is None and "has_granules" in self.params: + del self.params["has_granules"] + else: + self.params["has_granules"] = has_granules + + return self + def cloud_hosted(self, cloud_hosted: bool = True) -> Self: """Only match granules that are hosted in the cloud. This is valid for public collections. diff --git a/tests/unit/test_collection_queries.py b/tests/unit/test_collection_queries.py index 603784de..73f3e148 100644 --- a/tests/unit/test_collection_queries.py +++ b/tests/unit/test_collection_queries.py @@ -54,6 +54,15 @@ def test_querybuilder_can_handle_doi(): assert query.params["doi"] == doi +def test_querybuilder_can_handle_has_granules(): + query = DataCollections().has_granules(False) + assert not query.params["has_granules"] + query = DataCollections().has_granules(True) + assert query.params["has_granules"] + query = DataCollections().has_granules(None) + assert "has_granules" not in query.params + + @pytest.mark.parametrize("start,end,expected", valid_single_dates) def test_query_can_parse_single_dates(start, end, expected): query = DataCollections().temporal(start, end)