Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: restart files optimization + non-parallel open #13

Merged
merged 2 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/test_python/test_smoke_dust/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ def create_fake_restart_files(
cycle_dates: The series of dates to create the restart files for.
shape: Output grid shape.
"""
restart_dir = root_dir / "RESTART"
restart_dir.mkdir(exist_ok=True)
for date in cycle_dates:
restart_dir = root_dir / date / "RESTART"
restart_dir.mkdir(exist_ok=True, parents=True)
restart_file = restart_dir / f"{date[:8]}.{date[8:10]}0000.phy_data.nc"
with Dataset(restart_file, "w") as nc_ds:
nc_ds.createDimension("Time")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_python/test_smoke_dust/test_core/test_cycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_find_restart_files(
create_fake_grid_out(tmp_path, fake_grid_out_shape)
context = create_fake_context(tmp_path)
cycle = SmokeDustCycleTwo(context)
create_fake_restart_files(context.nwges_dir, cycle.cycle_dates, fake_grid_out_shape)
create_fake_restart_files(context.hourly_hwpdir, cycle.cycle_dates, fake_grid_out_shape)
create_fake_restart_files(
context.nwges_dir,
[
Expand Down
48 changes: 28 additions & 20 deletions ush/smoke_dust/core/cycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,32 +390,40 @@ def _find_restart_files_(
) -> tuple[Path, ...]:
root_dir = self._context.hourly_hwpdir
self.log(f"_find_restart_files_: {root_dir=}")
filenames = glob.glob("**/*phy_data*nc", root_dir=root_dir, recursive=True)
potential_restart_files = [
f"{cycle[:8]}.{cycle[8:10]}0000.phy_data.nc" for cycle in self.cycle_dates
]
self.log(f"_find_restart_files_: {potential_restart_files=}")
potential_restart_dirs = [root_dir / cycle / "RESTART" for cycle in self.cycle_dates]
restart_dirs = [
restart_dir for restart_dir in potential_restart_dirs if restart_dir.exists()
]
self.log(f"_find_restart_files_: {restart_dirs=}")
found_potentials = []
restart_files = []
for filename in filenames:
self.log(f"_find_restart_files_: {filename=}", level=logging.DEBUG)
path = root_dir / filename
if path.name in potential_restart_files and path.name not in found_potentials:
try:
resolved = path.resolve(strict=True)
except FileNotFoundError:
self.log(f"restart file link not resolvable: {path=}", level=logging.WARN)
continue
with open_nc(resolved) as nc_ds:
variables = nc_ds.variables.keys() # pylint: disable=no-member
if all(
expected_var in variables for expected_var in self.expected_restart_varnames
):
self.log(
f"_find_restart_files_: found restart path {path=}", level=logging.DEBUG
)
restart_files.append(path)
found_potentials.append(path.name)
for restart_dir in restart_dirs:
filenames = glob.glob("**/*phy_data*nc", root_dir=restart_dir, recursive=True)
for filename in filenames:
self.log(f"_find_restart_files_: {filename=}", level=logging.DEBUG)
path = restart_dir / filename
if path.name in potential_restart_files and path.name not in found_potentials:
try:
resolved = path.resolve(strict=True)
except FileNotFoundError:
self.log(f"restart file link not resolvable: {path=}", level=logging.WARN)
continue
with open_nc(resolved, parallel=False) as nc_ds:
variables = nc_ds.variables.keys() # pylint: disable=no-member
if all(
expected_var in variables
for expected_var in self.expected_restart_varnames
):
self.log(
f"_find_restart_files_: found restart path {path=}",
level=logging.DEBUG,
)
restart_files.append(path)
found_potentials.append(path.name)
return tuple(restart_files)


Expand Down