Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[develop] Modify retrieve_data.py to check a url instead of download. #776

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions ush/retrieve_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import glob
from textwrap import dedent
import time
import urllib.request
from copy import deepcopy

import yaml
Expand Down Expand Up @@ -99,6 +100,14 @@ def copy_file(source, destination, copy_cmd):
return False
return True

def check_file(url):

"""
Check that a file exists at the expected URL. Return boolean value
based on the response.
"""
status_code = urllib.request.urlopen(url).getcode()
return status_code == 200

def download_file(url):

Expand Down Expand Up @@ -399,7 +408,11 @@ def get_requested_files(cla, file_templates, input_locs, method="disk", **kwargs

elif method == "download":

retrieved = download_file(input_loc)
if cla.check_file:
retrieved = check_file(input_loc)

else:
retrieved = download_file(input_loc)
# Wait a bit before trying the next download.
# Seems to reduce the occurrence of timeouts
# when downloading from AWS
Expand Down Expand Up @@ -853,7 +866,7 @@ def main(argv):
if not unavailable:
# All files are found. Stop looking!
# Write a variable definitions file for the data, if requested
if cla.summary_file:
if cla.summary_file and not cla.check_file:
write_summary_file(cla, data_store, file_templates)
break

Expand Down Expand Up @@ -1029,6 +1042,13 @@ def parse_args(argv):
help="Name of the summary file to be written to the output \
directory",
)
parser.add_argument(
"--check_file",
action="store_true",
help="Use this flag to check the existence of requested files, \
but don't try to download them. Works with download protocol \
only",
)
return parser.parse_args(argv)


Expand Down
16 changes: 4 additions & 12 deletions ush/test_retrieve_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,17 +454,13 @@ def test_ufs_ics_from_aws(self):
'--ics_or_lbcs', 'ICS',
'--debug',
'--file_type', 'nemsio',
'--check_file',
]
# fmt: on

# Testing that there is no failure
retrieve_data.main(args)

# Verify files exist in temp dir

path = os.path.join(tmp_dir, "*")
files_on_disk = glob.glob(path)
self.assertEqual(len(files_on_disk), 1)

def test_ufs_lbcs_from_aws(self):

"""Get UFS-CASE-STUDY LBCS from aws for 3 hour boundary conditions"""
Expand All @@ -484,13 +480,9 @@ def test_ufs_lbcs_from_aws(self):
'--ics_or_lbcs', 'LBCS',
'--debug',
'--file_type', 'nemsio',
'--check_file',
]
# fmt: on

# Testing that there is no failure
retrieve_data.main(args)

# Verify files exist in temp dir

path = os.path.join(tmp_dir, "*")
files_on_disk = glob.glob(path)
self.assertEqual(len(files_on_disk), 2)