diff --git a/environment.yml b/environment.yml index 9de637f..f640937 100644 --- a/environment.yml +++ b/environment.yml @@ -3,9 +3,9 @@ channels: - conda-forge dependencies: - geographiclib==2.0 - - h5py==3.9.0 + - h5py <=3.12.1 - matplotlib<=3.7.2 - - mpi4py<=3.1.4 + - mpi4py<=4.0.1 - numpy<=1.25.2 - obspy<=1.4.0, >=1.3.1 - pip diff --git a/examples/correlate.py b/examples/correlate.py index 93ad096..560e803 100755 --- a/examples/correlate.py +++ b/examples/correlate.py @@ -18,6 +18,7 @@ root = 'data' sc = Store_Client(client, root) + c = Correlator(options=params, store_client=sc) print('Correlator initiated') x = time() diff --git a/params_example.yaml b/params_example.yaml index 73c6317..f170c15 100644 --- a/params_example.yaml +++ b/params_example.yaml @@ -16,6 +16,18 @@ sds_dir : '/path/to/sds_root' # change if your filenames deviate from standard pattern sds_fmtstr : '{year}/{network}/{station}/{channel}.{sds_type}/{network}.{station}.{location}.{channel}.{sds_type}.{year}.{doy:03d}' +#### parameters for the waveform database, if it was not downloaded +#### Default values can be omitted +#### Values without leading '/' are relative to proj_dir +#### Values with leading '/' are absolute paths +# Path to stationxml files, default is "inventory/*.xml" +stationxml_file : '/path/to/stations/*.xml' +# sds root directory, default is "mseed" +sds_dir : '/path/to/sds_root' +# sds format string of waveform file names, +# change if your filenames deviate from standard pattern +sds_fmtstr : '{year}/{network}/{station}/{channel}.{sds_type}/{network}.{station}.{location}.{channel}.{sds_type}.{year}.{doy:03d}' + #### parameters that are network specific net: diff --git a/setup.cfg b/setup.cfg index 8215f8e..6f6dd8e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,12 +22,12 @@ keywords = Seismology, Ambient Noise, Earth Sciences, Environmental Seismology, [options] package_dir = = src -python_requires = >=3.10 +python_requires = >=3.10, <3.12 install_requires = geographiclib==2.0 - h5py ==3.9.0 + h5py <=3.12.1 matplotlib <=3.7.2 - mpi4py <=3.1.4 + mpi4py<=4.0.1 numpy <=1.25.2 obspy<=1.4.0, >=1.3.1 diff --git a/src/seismic/correlate/correlate.py b/src/seismic/correlate/correlate.py index 2891009..eb49253 100644 --- a/src/seismic/correlate/correlate.py +++ b/src/seismic/correlate/correlate.py @@ -58,6 +58,12 @@ def __init__(self, options: dict | str, store_client: Store_Client = None): if isinstance(options, str): with open(options) as file: options = yaml.load(file, Loader=yaml.FullLoader) + elif isinstance(options, Store_Client): + raise DeprecationWarning( + "Order of arguments in Correlator has changed. " + + "The Store_Client has to be passed as the second argument. " + + "Can be None to init Local_Store_Client from options.") + # init MPI self.comm = MPI.COMM_WORLD self.psize = self.comm.Get_size() diff --git a/src/seismic/db/corr_hdf5.py b/src/seismic/db/corr_hdf5.py index 33b0a7a..b405901 100644 --- a/src/seismic/db/corr_hdf5.py +++ b/src/seismic/db/corr_hdf5.py @@ -127,12 +127,6 @@ def add_correlation( if isinstance(data, CorrTrace): data = [data] - if len(data) == 2: - isequal = np.all(np.isclose(data[0].data, data[1].data)) - else: - isequal = None - print(tag, len(data), isequal, [tr.id for tr in data]) - for tr in data: st = tr.stats path = hierarchy.format( @@ -146,7 +140,6 @@ def add_correlation( path, data=tr.data, compression=self.compression, compression_opts=self.compression_opts) convert_header_to_hdf5(ds, st) - print("Wrote", tag, tr.id, "to db.") except ValueError as e: print(tr.id, e) warnings.warn("The dataset %s is already in file and will be \ diff --git a/src/seismic/trace_data/waveform.py b/src/seismic/trace_data/waveform.py index 4d47c85..eebca5d 100644 --- a/src/seismic/trace_data/waveform.py +++ b/src/seismic/trace_data/waveform.py @@ -8,7 +8,7 @@ Peter Makus (makus@gfz-potsdam.de) Created: Thursday, 18th February 2021 02:30:02 pm -Last Modified: Monday, 25th November 2024 03:15:26 pm (J. Lehr) +Last Modified: Tuesday, 10th December 2024 03:51:56 pm ''' import fnmatch @@ -22,7 +22,6 @@ import numpy as np from obspy.clients.fdsn import Client as rClient from obspy.clients.fdsn.header import FDSNNoDataException -# from obspy.clients.filesystem.sds import Client as lClient from obspy.clients.filesystem import sds from obspy import read_inventory, UTCDateTime, read, Stream, Inventory from obspy.clients.fdsn.mass_downloader import RectangularDomain, \ @@ -30,7 +29,8 @@ from seismic.utils.raw_analysis import spct_series_welch -DEFAULT_SDS = "./mseed" +DEFAULT_SDS = "mseed" +DEFAULT_INVDIR = "inventory" class Store_Client(object): @@ -42,7 +42,6 @@ class Store_Client(object): Inventory data is stored in the folder `inventory` and attached to data that is read. """ - def __init__(self, Client: rClient, path: str, read_only: bool = False, sds_dir: str = DEFAULT_SDS): """ @@ -65,7 +64,7 @@ def __init__(self, Client: rClient, path: str, read_only: bool = False, os.makedirs(self.sds_root, exist_ok=True) assert os.path.isdir(self.sds_root), ("{} is not a directory").format( self.sds_root) - self.inv_dir = os.path.join(path, "inventory") + self.inv_dir = os.path.join(path, DEFAULT_INVDIR) if os.path.isdir(self.inv_dir) and os.listdir(self.inv_dir): self.inventory = self.read_inventory() else: @@ -517,15 +516,51 @@ def compute_spectrogram( class Local_Store_Client(Store_Client): + """ + Client to manage access to local data stored in an SDS-like structure. + + In contrast to the regular Store_Client non-default names can be set for + paths to stationxml-files and sds-root directory. It does not provide + access to online data archives. + + The client is initialized from a configuration dictionary that must contain + the following keys: + - proj_dir: path to the project directory + - co: dictionary with keys 'read_start' and 'read_end' for the time range + - net: dictionary with keys 'network' and 'station' for the selection + + The following keys are accessed, if present: + - sds_dir: path to the sds root directory, defaults to 'mseed' + - stationxml_file: path to the stationxml file, defaults to + 'inventory/*.xml' + - sds_fmtstr: format string for the sds structure, defaults to the sds + If not present, the default values are used. + + Other keys are ignored. Thus, the configuration file for the entire + correlation setup can be used to initialize the client. + """ def __init__(self, config: dict): - + """ + param config: Configuration dictionary. + :type config: dict + """ # Create project dir root = config["proj_dir"] os.makedirs(root, exist_ok=True) + + # Set default values if params not set in config + if "sds_dir" not in config: + config["sds_dir"] = DEFAULT_SDS + if "stationxml_file" not in config: + config["stationxml_file"] = os.path.join(DEFAULT_INVDIR, "*.xml") + config["stationxml_file"] = get_abs_sds_path( + root, config["stationxml_file"]) + if "sds_fmtstr" not in config: + config["sds_fmtstr"] = None sds_root = get_abs_sds_path(root, config["sds_dir"]) assert os.path.isdir(sds_root), "{} is not a directory".format( sds_root) - sdscl = sds.Client(sds_root) + sdscl = sds.Client(sds_root=sds_root) fmt_str = config["sds_fmtstr"] if fmt_str is None or fmt_str.lower() == "default": @@ -533,41 +568,62 @@ def __init__(self, config: dict): # Could check if fmt_str has correct format sdscl.FMTSTR = fmt_str + self.sds_fmtstr = fmt_str + self.sds_root = sds_root + super().__init__(sdscl, root, True, sds_root) self.lclient = self.rclient - self.sds_root = self.rclient.sds_root - self.inv_dir = config["stationxml_file"] + self._set_inventory(config) - self.sds_fmtstr = self.lclient.FMTSTR - def _set_inventory(self, config): - _inv = read_inventory(config["stationxml_file"]) - print("Channels in stationxml_file:", - len(_inv.get_contents()["channels"])) + def _set_inventory(self, config: dict): + """ + Read inventory from file system based on parameters in config. + """ + inv_all = read_inventory(config["stationxml_file"]) - _inv = _inv.select(starttime=UTCDateTime(config["co"]["read_start"]), - endtime=UTCDateTime(config["co"]["read_end"])) - print("Channels in time range:", len(_inv.get_contents()["channels"])) + inv_all = inv_all.select( + starttime=UTCDateTime(config["co"]["read_start"]), + endtime=UTCDateTime(config["co"]["read_end"])) inv = Inventory() - for n in config["net"]["network"]: - _inv_ = _inv.select(network=n) - print("Channels in netw", n, ":", - len(_inv_.get_contents()["channels"])) - for s in config["net"]["station"]: - inv += _inv_.select(station=s) - - print("Channels in selection:", len(inv.get_contents()["channels"])) + networks = config["net"]["network"] + if isinstance(networks, str): + networks = [networks] + stations = config["net"]["station"] + if isinstance(stations, str): + stations = [stations] + + for n in networks: + inv_all_ = inv_all.select(network=n) + for s in stations: + inv += inv_all_.select(station=s) + self.inventory = inv def read_inventory(self): - return self.inventory + """ + Returns the inventory attribute if set, otherwise an empty inventory. - # def _load_remote(self, network: str, station: str, - # location: str, channel: str, - # starttime: UTCDateTime, endtime: UTCDateTime, - # attach_response: bool) -> Stream: - # raise RuntimeWarning("Local sds-client cannot download remote data.") + It replaces the method of the parent class to mimick its behavior. The + method here does not actually read the inventory from the file system, + but returns the inventory object that was set during initialization + (or by calling :func:`~_set_inventory`). + + :return: Inventory object + :rtype: Inventory + """ + try: + return self.inventory + except AttributeError: + # Happens during super() and if default invdir is present but empty + return Inventory() + # return self.inventory + + def download_waveforms_mdl(self, *args, **kwargs): + """Raises UserWarning that method is not implemented.""" + raise UserWarning("Method not implemented for Local_Store_Client." + + "Use Store_Client instead.") class FS_Client(object): diff --git a/tests/test_correlate.py b/tests/test_correlate.py index 1b260e1..8b4cd3e 100644 --- a/tests/test_correlate.py +++ b/tests/test_correlate.py @@ -89,6 +89,20 @@ def test_init_options_from_yaml( makedirs_mock.assert_has_calls(mkdir_calls) open_mock.assert_any_call(self.param_example) + @mock.patch('seismic.correlate.correlate.yaml.load') + @mock.patch('builtins.open') + @mock.patch('seismic.correlate.correlate.logging') + @mock.patch('seismic.correlate.correlate.os.makedirs') + def test_deprecation_of_args( + self, makedirs_mock, logging_mock, open_mock, yaml_mock): + yaml_mock.return_value = self.options + sc_mock = mock.Mock(Store_Client) + sc_mock.get_available_stations.return_value = [] + sc_mock._translate_wildcards.return_value = [] + # c = correlate.Correlator(sc_mock, self.param_example) + self.assertRaises(DeprecationWarning, + correlate.Correlator, sc_mock, self.param_example) + @mock.patch('seismic.correlate.correlate.yaml.load') @mock.patch('builtins.open') @mock.patch('seismic.correlate.correlate.logging') diff --git a/tests/test_waveform.py b/tests/test_waveform.py index ed67dd6..5bcfe57 100644 --- a/tests/test_waveform.py +++ b/tests/test_waveform.py @@ -4,7 +4,7 @@ Author: Peter Makus (makus@gfz-potsdam.de) Created: Monday, 15th March 2021 03:33:25 pm -Last Modified: Monday, 25th November 2024 03:15:26 pm (J. Lehr) +Last Modified: Tuesday, 10th December 2024 04:26:09 pm ''' import unittest @@ -12,11 +12,19 @@ import os import warnings import yaml +from copy import deepcopy import numpy as np -from obspy import UTCDateTime, Inventory - +from obspy import UTCDateTime, Inventory, read_inventory from seismic.trace_data import waveform +from seismic import trace_data + + +paramfile = os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), + 'params_example.yaml') +with open(paramfile, "r") as f: + config = yaml.load(f, Loader=yaml.FullLoader) paramfile = os.path.join( @@ -125,24 +133,125 @@ def test_no_available_data(self): class TestLocalStoreClient(TestStoreClient): + @mock.patch('seismic.trace_data.waveform.os.makedirs') + @mock.patch('seismic.trace_data.waveform.os.path.isdir') @mock.patch('seismic.trace_data.waveform.os.listdir') + @mock.patch.object(trace_data.waveform.Local_Store_Client, + "_set_inventory") + def test_init(self, mock_setinv, + mock_listdir, mock_isdir, mock_makedirs): + mock_isdir.return_value = True + mock_setinv.sife_effect = lambda x: x.__setattr__("inventory", + Inventory()) + for rv in [True, False]: + with self.subTest(rv=rv): + mock_listdir.return_value = rv + for m in [mock_setinv, mock_makedirs]: + m.reset_mock() + + test_config = deepcopy(config) + sc = waveform.Local_Store_Client(test_config) + + self.assertEqual( + os.path.normpath(sc.sds_root), os.path.normpath( + os.path.abspath(os.path.join( + test_config["proj_dir"], test_config["sds_dir"])))) + self.assertIsInstance(sc.rclient, waveform.sds.Client) + self.assertIsInstance(sc.lclient, waveform.sds.Client) + mock_makedirs.assert_called_once_with(test_config["proj_dir"], + exist_ok=True) + mock_setinv.assert_called_once() + self.assertTrue(hasattr(sc, "inventory")) + + @mock.patch('seismic.trace_data.waveform.os.makedirs') @mock.patch('seismic.trace_data.waveform.os.path.isdir') + @mock.patch('seismic.trace_data.waveform.os.listdir') + @mock.patch.object(trace_data.waveform.Local_Store_Client, + "_set_inventory") + def test_init_without_default_paths( + self, mock_setinv, mock_listdir, mock_isdir, mock_makedirs): + mock_isdir.return_value = True + mock_setinv.sife_effect = lambda x: x.__setattr__("inventory", + Inventory()) + for rv in [True, False]: + with self.subTest(rv=rv): + mock_listdir.return_value = rv + for m in [mock_setinv, mock_makedirs]: + m.reset_mock() + + test_config = deepcopy(config) + for k in ["sds_dir", "stationxml_file", "sds_fmtstr"]: + test_config.pop(k) + + sc = waveform.Local_Store_Client(test_config) + + self.assertEqual( + os.path.normpath(sc.sds_root), os.path.normpath( + os.path.abspath(os.path.join( + test_config["proj_dir"], test_config["sds_dir"])))) + self.assertIsInstance(sc.rclient, waveform.sds.Client) + self.assertIsInstance(sc.lclient, waveform.sds.Client) + mock_makedirs.assert_called_once_with(test_config["proj_dir"], + exist_ok=True) + mock_setinv.assert_called_once() + self.assertTrue(hasattr(sc, "inventory")) + self.assertTrue(all([ + k in test_config for k in ["sds_dir", "stationxml_file", + "sds_fmtstr"]])) + + @mock.patch('seismic.trace_data.waveform.os.makedirs') + @mock.patch('seismic.trace_data.waveform.os.path.isdir') + @mock.patch('seismic.trace_data.waveform.os.listdir') @mock.patch('seismic.trace_data.waveform.read_inventory') - # @mock.patch('seismic.trace_data.waveform.sds.os.path.isdir') - @mock.patch('obspy.clients.filesystem.sds.os.path.isdir') - # @mock.patch('seismic.trace_data.waveform.sds.Client') - def setUp(self, sds_exists_mock, read_inventory_mock, - isdir_mock, listdir_mock, - ): - isdir_mock.return_value = True - listdir_mock.return_value = False - read_inventory_mock.return_value = Inventory() - sds_exists_mock.return_value = True - self.outdir = os.path.abspath(os.path.expanduser( - os.fspath(config["sds_dir"]))) - self.net = 'mynet' - self.stat = 'mystat' - self.sc = waveform.Local_Store_Client(config) + def test__set_inventory(self, mock_readinv, + mock_listdir, mock_isdir, mock_makedirs): + mock_readinv.return_value = read_inventory() + mock_isdir.return_value = True + mock_listdir.return_value = True + TEST_CONFIG = {"proj_dir": "test_proj_dir", + "net": {"network": ["GR"], "station": ["FUR", "WET"], + "component": "Z"}, + "co": {"read_start": "2006-12-16 00:00:00", + "read_end": "2007-02-01 00:00:00"} + } + + for as_list in [True, False]: + with self.subTest(as_list=as_list): + test_config = deepcopy(TEST_CONFIG) + if as_list: + test_config["net"]["network"] = ["GR"] + test_config["net"]["station"] = ["WET"] + else: + test_config["net"]["network"] = "GR" + test_config["net"]["station"] = "WET" + sc = waveform.Local_Store_Client(test_config) + sc._set_inventory(test_config) + mock_readinv.assert_called_with(test_config["stationxml_file"]) + self.assertIsInstance(sc.inventory, Inventory) + self.assertEqual(len(sc.inventory), 0) + + @mock.patch('seismic.trace_data.waveform.os.makedirs') + @mock.patch('seismic.trace_data.waveform.os.path.isdir') + @mock.patch('seismic.trace_data.waveform.os.listdir') + @mock.patch.object(trace_data.waveform.Local_Store_Client, + "_set_inventory") + def test_read_inventory( + self, mock_setinv, mock_listdir, mock_isdir, mock_makedirs): + mock_isdir.return_value = True + mock_listdir.return_value = True + sc = waveform.Local_Store_Client(deepcopy(config)) + + # Run if attribute `inventory` is not set + delattr(sc, "inventory") + inv = sc.read_inventory() + self.assertIsInstance(inv, Inventory) + self.assertEqual(len(inv), 0) + + # Run if `inventory` is set + sc.__setattr__("inventory", read_inventory()) + inv = sc.read_inventory() + self.assertIsInstance(inv, Inventory) + self.assertEqual(len(inv), 2) if __name__ == "__main__":