Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New SPLIT_CHECKPOINT option to replace read/write by face #2394

Merged
merged 29 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2e8314c
changes for per writer output
bena-nasa Sep 22, 2023
b17510b
more bug fixes
bena-nasa Sep 28, 2023
4362894
more bug fixes
bena-nasa Sep 28, 2023
cd6c99e
remove commented out code
bena-nasa Sep 29, 2023
feb072b
Merge branch 'develop' into feature/bmauer/file_per_writer
bena-nasa Sep 29, 2023
ddba7ab
update changelog
bena-nasa Sep 29, 2023
3a8a1ce
remove uncommented code
bena-nasa Sep 29, 2023
1be68c9
rename routine
bena-nasa Sep 29, 2023
dae6a49
Update base/FileIOShared.F90
bena-nasa Sep 29, 2023
9593599
Update base/FileIOShared.F90
bena-nasa Sep 29, 2023
a4b672d
Update base/FileIOShared.F90
bena-nasa Sep 29, 2023
dd95da3
Update base/FileIOShared.F90
bena-nasa Sep 29, 2023
67c807d
Update base/FileIOShared.F90
bena-nasa Sep 29, 2023
23c6836
Update base/FileIOShared.F90
bena-nasa Sep 29, 2023
7ec5ac3
Update base/FileIOShared.F90
bena-nasa Sep 29, 2023
ff9d80e
Update base/FileIOShared.F90
bena-nasa Sep 29, 2023
3f9c5ee
cleanup error handling in FileIOShared.F90
bena-nasa Sep 29, 2023
e81fa85
restore check that grids match
bena-nasa Sep 29, 2023
6da4c67
Merge branch 'develop' into feature/bmauer/file_per_writer
bena-nasa Jan 5, 2024
8936d6e
updates Tom requested
bena-nasa Jan 5, 2024
6820f22
Merge branch 'develop' into feature/bmauer/file_per_writer
bena-nasa Jan 8, 2024
39e1319
fix bug in last commit
bena-nasa Jan 8, 2024
f48f922
get split checkpoint working with server
bena-nasa Feb 2, 2024
a9a7b87
Merge branch 'develop' into feature/bmauer/file_per_writer
bena-nasa Feb 2, 2024
02a16d4
fix bug
bena-nasa Feb 2, 2024
22368f7
more updates for the split restart capability
bena-nasa Feb 2, 2024
5111692
Merge branch 'develop' into feature/bmauer/file_per_writer
bena-nasa Feb 8, 2024
7835f5e
Merge branch 'develop' into feature/bmauer/file_per_writer
bena-nasa Feb 9, 2024
82ca55d
Merge branch 'develop' into feature/bmauer/file_per_writer
bena-nasa Feb 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ file (COPY mapl_stub.pl DESTINATION ${esma_etc}/MAPL)

install (PROGRAMS
MAPL_GridCompSpecs_ACG.py
combine_restarts.py
split_restart.py
mapl_acg.pl
mapl_stub.pl
TYPE SYSCONF
Expand Down
151 changes: 151 additions & 0 deletions Apps/combine_restarts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/usr/bin/env python

#-------------
# Load modules
#-------------
from netCDF4 import Dataset
import numpy
import argparse
import yaml

def parse_args():
p = argparse.ArgumentParser(description='Flatten a lat-lon to 1D')
p.add_argument('input',type=str,help='input file',default=None)
p.add_argument('output',type=str,help='output file',default=None)
return vars(p.parse_args())

#------------------
# Opening the file
#------------------
comm_args = parse_args()
Input_template = comm_args['input']
Output_file = comm_args['output']

f = open(Input_template,'r')
input_yaml = yaml.safe_load(f)
f.close()
num_files = input_yaml['num_files']
j_size = input_yaml['j_size']

j_per_file = j_size*6//num_files

ncFid = Dataset(Input_template+"_"+str(1), mode='r')
ncFidOut = Dataset(Output_file, mode='w', format='NETCDF4')

#---------------------
# Extracting variables
#---------------------

old_time = ncFid.variables['time'][:]

exclude_dims = ['time','lon','lat']
detected_dims = []
for dim in ncFid.dimensions:
if dim not in exclude_dims:
detected_dims.append(dim)
cube_res = len(ncFid.dimensions['lon'])

# define dimenions

Xdim = ncFidOut.createDimension('lon',cube_res)
Ydim = ncFidOut.createDimension('lat',cube_res*6)

for dim in detected_dims:
dim_out = ncFidOut.createDimension(dim,len(ncFid.dimensions[dim]))

new_time_dim = ncFidOut.createDimension('time',1)

# define coordinate variables

new_time = ncFidOut.createVariable('time','f8',('time'))
for att in ncFid.variables['time'].ncattrs():
for att in ncFid.variables['time'].ncattrs():
setattr(ncFidOut.variables['time'],att,getattr(ncFid.variables['time'],att))
new_time[:] = 0


vXdim = ncFidOut.createVariable('lon','f8',('lon'))
vYdim = ncFidOut.createVariable('lat','f8',('lat'))
setattr(ncFidOut.variables['lon'],'units','degrees_east')
setattr(ncFidOut.variables['lat'],'units','degrees_north')
setattr(ncFidOut.variables['lon'],'long_name','longitude')
setattr(ncFidOut.variables['lat'],'long_name','latitude')
vXdim[:]=range(1,cube_res+1)
vYdim[:]=range(1,(cube_res*6)+1)

for dim in detected_dims:
if dim in ncFid.variables:
vLevOut = ncFidOut.createVariable(dim,'f8',(dim))
for att in ncFid.variables[dim].ncattrs():
setattr(ncFidOut.variables[dim],att,getattr(ncFid.variables[dim],att))
dim_size = len(ncFid.dimensions[dim])+1
vLevOut[:] = range(1,dim_size)

# special handling if fvcore restart for AK/BK or pref
oned_vars = ['AK','BK','PREF']
for oned_var in oned_vars:
if oned_var in ncFid.variables:
float_type = ncFid.variables[oned_var].dtype
ak= ncFidOut.createVariable(oned_var,float_type,('edge'))
for att in ncFid.variables[oned_var].ncattrs():
setattr(ncFidOut.variables[oned_var],att,getattr(ncFid.variables[oned_var],att))
ak[:] = ncFid.variables[oned_var][:]

ncFid.close()

Exclude_Var = ['time','edge','lev','lon','lat','AK','BK','unknown_dim1','unknown_dim2']

for i in range(num_files):
ncFid = Dataset(Input_template+"_"+str(i), mode='r')
if i==0:
for var in ncFid.variables:
if var not in Exclude_Var:
temp = ncFid.variables[var][:]
dim_size =len(temp.shape)
float_type = ncFid.variables[var].dtype
var_dims = ncFid.variables[var].dimensions

if dim_size == 4:
tout = ncFidOut.createVariable(var,float_type,var_dims,fill_value=1.0e15,chunksizes=(1,1,cube_res,cube_res))
for att in ncFid.variables[var].ncattrs():
if att != "_FillValue":
setattr(ncFidOut.variables[var],att,getattr(ncFid.variables[var],att))
elif dim_size == 3:
tout = ncFidOut.createVariable(var,float_type,var_dims,fill_value=1.0e15,chunksizes=(1,cube_res,cube_res))
for att in ncFid.variables[var].ncattrs():
if att != "_FillValue":
setattr(ncFidOut.variables[var],att,getattr(ncFid.variables[var],att))
elif dim_size == 2:
tout = ncFidOut.createVariable(var,float_type,('lat','lon'),fill_value=1.0e15,chunksizes=(cube_res,cube_res))
for att in ncFid.variables[var].ncattrs():
if att != "_FillValue":
setattr(ncFidOut.variables[var],att,getattr(ncFid.variables[var],att))

for var in ncFid.variables:
if var not in Exclude_Var:
temp = ncFid.variables[var][:]
dim_size =len(temp.shape)
tout = ncFidOut.variables[var][:]

if dim_size == 4:
il = j_per_file*i
iu = j_per_file*(i+1)
ncFidOut.variables[var][:,:,il:iu,:] = temp[:,:,:,:]

elif dim_size == 3:
il = j_per_file*i
iu = j_per_file*(i+1)
ncFidOut.variables[var][:,il:iu,:] = temp[:,:,:]

elif dim_size == 2:
il = j_per_file*i
iu = j_per_file*(i+1)
ncFidOut.variables[var][il:iu,:] = temp[:,:]

ncFid.close()

#-----------------
# Closing the file
#-----------------
ncFidOut.close()

160 changes: 160 additions & 0 deletions Apps/split_restart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/usr/bin/env python

#-------------
# Load modules
#-------------
from netCDF4 import Dataset
import numpy
import argparse
import sys

def parse_args():
p = argparse.ArgumentParser(description='Flatten a lat-lon to 1D')
p.add_argument('input',type=str,help='input file',default=None)
p.add_argument('output',type=str,help='output file',default=None)
p.add_argument('split',type=int,help='number of files to split into',default=None)
return vars(p.parse_args())

#------------------
# Opening the file
#------------------
comm_args = parse_args()
Input_file = comm_args['input']
Output_template = comm_args['output']
n_files = comm_args['split']

ncFid = Dataset(Input_file,mode='r')

if 'tile' in ncFid.dimensions:
quit()

#---------------------
# Extracting variables
#---------------------

old_time = ncFid.variables['time'][:]

exclude_dims = ['time','lon','lat']
detected_dims = []
for dim in ncFid.dimensions:
if dim not in exclude_dims:
detected_dims.append(dim)

cube_res = len(ncFid.dimensions['lon'])

Exclude_Var = ['time','edge','lev','lon','lat','AK','BK','unknown_dim1','unknown_dim2']

remainder = (cube_res*6)%n_files
if remainder != 0:
raise ValueError('number of files my evenly divide 6 times cube size')

y_size = cube_res*6//n_files

# create master file
f = open(Output_template,mode='w')
out_master = "num_files: "+str(n_files)+"\n"+"j_size: "+str(cube_res)
f.write(out_master)
f.close()
# create each file
for i in range(n_files):
ncFidOut = Dataset(Output_template+"_"+str(i), mode='w',format='NETCDF4')
setattr(ncFidOut,'Split_Cubed_Sphere',i)

# define dimenions

Xdim = ncFidOut.createDimension('lon',cube_res)
Ydim = ncFidOut.createDimension('lat',y_size)

for dim in detected_dims:
dim_out = ncFidOut.createDimension(dim,len(ncFid.dimensions[dim]))

new_time_dim = ncFidOut.createDimension('time',1)

# define coordinate variables

new_time = ncFidOut.createVariable('time','f8',('time'))
for att in ncFid.variables['time'].ncattrs():
for att in ncFid.variables['time'].ncattrs():
setattr(ncFidOut.variables['time'],att,getattr(ncFid.variables['time'],att))
new_time[:] = 0


vXdim = ncFidOut.createVariable('lon','f8',('lon'))
vYdim = ncFidOut.createVariable('lat','f8',('lat'))
setattr(ncFidOut.variables['lon'],'units','degrees_east')
setattr(ncFidOut.variables['lat'],'units','degrees_north')
setattr(ncFidOut.variables['lon'],'long_name','longitude')
setattr(ncFidOut.variables['lat'],'long_name','latitude')
y_start = i*y_size
vXdim[:]=range(1,cube_res+1)
vYdim[:]=range(1+y_start,y_size+1+y_start)

for dim in detected_dims:
if dim in ncFid.variables:
vLevOut = ncFidOut.createVariable(dim,'f8',(dim))
for att in ncFid.variables[dim].ncattrs():
setattr(ncFidOut.variables[dim],att,getattr(ncFid.variables[dim],att))
dim_size = len(ncFid.dimensions[dim])+1
vLevOut[:] = range(1,dim_size)

# special handling if fvcore restart for AK/BK or pref
oned_vars = ['AK','BK','PREF']
for oned_var in oned_vars:
if oned_var in ncFid.variables:
float_type = ncFid.variables[oned_var].dtype
ak= ncFidOut.createVariable(oned_var,float_type,('edge'))
for att in ncFid.variables[oned_var].ncattrs():
setattr(ncFidOut.variables[oned_var],att,getattr(ncFid.variables[oned_var],att))
ak[:] = ncFid.variables[oned_var][:]

# define variables
for var in ncFid.variables:
if var not in Exclude_Var:
temp = ncFid.variables[var][:]
dim_size =len(temp.shape)
float_type = ncFid.variables[var].dtype
var_dims = ncFid.variables[var].dimensions
if dim_size == 4:
tout = ncFidOut.createVariable(var,float_type,var_dims,fill_value=1.0e15,chunksizes=(1,1,cube_res,cube_res))
for att in ncFid.variables[var].ncattrs():
if att != "_FillValue":
setattr(ncFidOut.variables[var],att,getattr(ncFid.variables[var],att))
elif dim_size == 3:
tout = ncFidOut.createVariable(var,float_type,var_dims,fill_value=1.0e15,chunksizes=(1,cube_res,cube_res))
for att in ncFid.variables[var].ncattrs():
if att != "_FillValue":
setattr(ncFidOut.variables[var],att,getattr(ncFid.variables[var],att))
elif dim_size == 2:
tout = ncFidOut.createVariable(var,float_type,('lat','lon'),fill_value=1.0e15,chunksizes=(cube_res,cube_res))
for att in ncFid.variables[var].ncattrs():
if att != "_FillValue":
setattr(ncFidOut.variables[var],att,getattr(ncFid.variables[var],att))

for var in ncFid.variables:
if var not in Exclude_Var:
temp = ncFid.variables[var][:]
dim_size =len(temp.shape)
tout = ncFidOut.variables[var][:]

if dim_size == 4:
il = y_size*i
iu = y_size*(i+1)
ncFidOut.variables[var][:,:,:,:] = temp[:,:,il:iu,:]

elif dim_size == 3:
il = y_size*i
iu = y_size*(i+1)
ncFidOut.variables[var][:,:,:] = temp[:,il:iu,:]

elif dim_size == 2:
il = y_size*i
iu = y_size*(i+1)
ncFidOut.variables[var][:,:] = temp[il:iu,:]

ncFidOut.close()

#-----------------
# Closing the file
#-----------------
ncFid.close()

7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- Add python utilities to split and recombine restarts
- Add a new "SPLIT\_CHECKPOINT:" option that has replaced the write-by-face option. This will write a file per writer wit the base checkpoint name being a control file that tells how many files were written to. On reading if this control file is provided as the restart file name, it will automatically trigger reading the individual files
- implemented a new algorthm to read tile files

### Changed
Expand Down Expand Up @@ -67,6 +69,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed failing tests for `field_utils`.
- Various fixes for NVHPC work

### Removed

### Deprecated
- The write-by-face option for checkpoint/restart has been depreciated. This has been replaced by a more generic file-per-writer option

## [2.43.2] - 2024-02-06

### Fixed
Expand Down
Loading
Loading