Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The Tomorrow IO TMS converter #1590

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ add_subdirectory(ssec)
add_subdirectory(wrfda_ncdiag)
add_subdirectory(single_observation)
add_subdirectory(mrms)
add_subdirectory(tomorrow_io)

# Optional components
if(iodaconv_gnssro_ENABLED)
Expand Down
21 changes: 21 additions & 0 deletions src/tomorrow_io/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# (C) Copyright 2024 The Tomorrow Companies, Inc.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.

add_executable(convert_tio.x
creation.cc
creation_tio_sat_inst_specs.cc
copy.h
copy_function.cc
copy_helpers.cc
copy_tms.cc
copy_tms_datetime.cc
detect_tms_type.cc
product.cc
product.h
main_tio.cc
)
target_link_libraries(convert_tio.x PUBLIC ioda)
set_target_properties(convert_tio.x PROPERTIES CXX_STANDARD 17)

12 changes: 12 additions & 0 deletions src/tomorrow_io/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# The Tomorrow Microwave Sounder (TMS) converter

The source codes here provide the ability to convert the Tomorrow.io TMS L1B-TC product
and the NASA TROPICS L1B product into IODA format.

Usage: `convert_tio.x input_file_1 [input_file_2 ...] output_file`

This converter is written in C++ and uses IODA to read and write the observation data files.
It provides an example of how to convert data to ioda using the C++ interface.

For details, contact [Ryan Honeyager](mailto:ryan.honeyager@tomorrow.io) (@rhoneyager-tomorrow).

63 changes: 63 additions & 0 deletions src/tomorrow_io/copy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#pragma once
/*
* (C) Copyright 2024 The Tomorrow Companiec, Inc.
*
* This software is licensed under the terms of the Apache Licence Version 2.0
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
*/
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "ioda/ObsGroup.h"

namespace tio_converter {

using DimensionRanges_t = std::vector<std::optional<std::pair<size_t, size_t>>>;

struct VariableInfo {
mutable ioda::Variable var; ///< The variable.
/// Optionally defines a subset of indices along each axis. Bounds are inclusive on both sides.
DimensionRanges_t range;
};

struct VariableDerivedInfo {
/// Supplementary information about the dimensions of the variable.
DimensionRanges_t range;
/// The dimensions of the variable.
ioda::Dimensions dims;
/// The type of the variable's data. Ex: unsigned little-endian 32-bit integer.
ioda::Type type;
/// The starting indices for a hyperslab selection.
std::vector<ioda::Dimensions_t> selection_start;
/// The span along each axis for a hyperslab selection.
std::vector<ioda::Dimensions_t> selection_count;
/// The number of data elements in this selection.
size_t selection_num_elements;
/// The size, in bytes, needed to store the variable's data, accounting for the selection.
size_t size_bytes;
/// Selection from the file
ioda::Selection selection_ioda;
/// Selection within memory (starts at 0,0,0,...)
ioda::Selection selection_membuf;

VariableDerivedInfo();
VariableDerivedInfo(const VariableInfo &);
VariableDerivedInfo(const VariableInfo &, const DimensionRanges_t &);
};

/// @brief Get the fill value assigned to a variable as a vector of bytes, and optionally
/// convert to a different type representation.
/// @param var is the variable to be queried.
/// @param as_type is the desired return value's data type. Normally this is the source
/// variable's data type, but optionally you can convert to a different representation.
/// Useful when converting between differing source and destination types.
std::vector<char> get_fill_value(const ioda::Variable &var, std::optional<ioda::Type> as_type = {});

/// @brief Generic function to copy a hyperslab of data from one variable to another.
/// @param from is the specification of the source data. This includes the variable and the hyperslab.
/// @param to is the specification of the destination location.
void copy(const VariableInfo &from, const VariableInfo &to);

} // namespace tio_converter
56 changes: 56 additions & 0 deletions src/tomorrow_io/copy_function.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* (C) Copyright 2024 The Tomorrow Companiec, Inc.
*
* This software is licensed under the terms of the Apache Licence Version 2.0
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
*/
#include <algorithm>
#include <cstring>
#include <exception>
#include <string>
#include <utility>
#include <vector>

#include "copy.h"
#include "hdf5.h"

namespace tio_converter {

void copy(const VariableInfo &from, const VariableInfo &to) {
using namespace ioda;
using std::byte;
using std::max;
using std::memcmp;
using std::memcpy;
using std::vector;

VariableDerivedInfo from_info(from);
VariableDerivedInfo to_info(to);

vector<char> buffer(to_info.size_bytes);
from.var.read(
gsl::make_span(buffer.data(), buffer.size()), // Read into the buffer
to_info.type, // Convert data into the destination data type (e.g. int, float, ...)
from_info.selection_membuf, // Needed to tell ioda how the data should be mapped into memory
from_info.selection_ioda // The hyperslab being read
);

vector<char> from_fill = get_fill_value(from.var, to_info.type);
vector<char> to_fill = get_fill_value(to.var);
const size_t buffer_size_of_element_bytes = to_info.type.getSize();
// Iterate over buffer in buffer_size_of_element_bytes increments.
// If we match from_fill_as_bytes_in_dest_representation, replace with the
// contents of to_fill_as_bytes_in_dest_representation.
for (size_t i = 0; i < buffer.size(); i += buffer_size_of_element_bytes) {
if (!memcmp(buffer.data() + i, from_fill.data(), buffer_size_of_element_bytes))
memcpy(buffer.data() + i, to_fill.data(), buffer_size_of_element_bytes);
}

to.var.write(gsl::make_span(buffer.data(), buffer.size()), // Write from this buffer
to_info.type, // Output variable type
to_info.selection_membuf, // Data mapping in memory
to_info.selection_ioda // The hyperslab being written
);
}

} // namespace tio_converter
119 changes: 119 additions & 0 deletions src/tomorrow_io/copy_helpers.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* (C) Copyright 2024 The Tomorrow Companiec, Inc.
*
* This software is licensed under the terms of the Apache Licence Version 2.0
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
*/
#include <algorithm>

#include "copy.h"
#include "hdf5.h"

namespace tio_converter {

namespace {
// This only returns predefined HDF5 types, and those hid_ts are static objects.
// NOTE (RH): ioda really should be extended to return the endianness of data.
// We assume little endian until this is fixed.
hid_t get_hdf5_type(const ioda::Type &typ) {
using namespace ioda;
using std::logic_error;
const size_t len = typ.getSize();
const TypeClass cls = typ.getClass();
if (cls == TypeClass::Integer) {
bool sgn = typ.isTypeSigned();
if (sgn && (len == 1)) return H5T_STD_I8LE;
if (!sgn && (len == 1)) return H5T_STD_U8LE;
if (sgn && (len == 2)) return H5T_STD_I16LE;
if (!sgn && (len == 2)) return H5T_STD_U16LE;
if (sgn && (len == 4)) return H5T_STD_I32LE;
if (!sgn && (len == 4)) return H5T_STD_U32LE;
if (sgn && (len == 8)) return H5T_STD_I64LE;
if (!sgn && (len == 8)) return H5T_STD_U64LE;
} else if (cls == TypeClass::Float) {
#ifdef H5T_NATIVE_FLOAT16 // Introduced in recent HDF5 versions
if (len == 2) return H5T_IEEE_F16LE;
#endif
if (len == 4) return H5T_IEEE_F32LE;
if (len == 8) return H5T_IEEE_F64LE;
}
throw logic_error("Unsupported object type.");
}
} // namespace

VariableDerivedInfo::VariableDerivedInfo() = default;
VariableDerivedInfo::VariableDerivedInfo(const VariableInfo &vi)
: VariableDerivedInfo(vi, vi.range) {}
VariableDerivedInfo::VariableDerivedInfo(const VariableInfo &vi, const DimensionRanges_t &di) {
using ioda::Dimensions_t;
using ioda::SelectionOperator;
using std::accumulate;
using std::multiplies;
using std::vector;

range = di;
dims = vi.var.getDimensions();
type = vi.var.getType();

vector<Dimensions_t> zero_starts(dims.dimensionality);
selection_start.resize(dims.dimensionality);
selection_count.resize(dims.dimensionality);

for (size_t i = 0; i < dims.dimensionality; ++i) {
if (range.size() > i && range[i]) {
selection_start[i] = range[i]->first;
selection_count[i] = range[i]->second - range[i]->first + 1;
} else {
selection_start[i] = 0;
selection_count[i] = dims.dimsCur[i];
}
}
selection_ioda.extent(dims.dimsCur)
.select({SelectionOperator::SET, selection_start, selection_count});
selection_membuf.extent(selection_count)
.select({SelectionOperator::SET, zero_starts, selection_count});

// Determine the size of a buffer needed to read this data in its entirety.
// This is just selection_count.
selection_num_elements
= accumulate(selection_count.begin(), selection_count.end(), 1, multiplies<size_t>());
const size_t size_of_element_bytes = type.getSize();
size_bytes = selection_num_elements * size_of_element_bytes;
}

std::vector<char> get_fill_value(const ioda::Variable &var, std::optional<ioda::Type> as_type) {
using std::logic_error;
using std::max;
using std::memcpy;
using std::vector;
const size_t len_bytes_src = var.getType().getSize();
vector<char> fill_bytes(len_bytes_src);
// BUG (RH): ioda's getFillValue is very slightly buggy in that it reports a
// spurious warning when reading the fill value of the TMS L1B MultiMask variable,
// which has an unsigned char data type.
// "ioda::Variable: hdf and netcdf fill value specifications do not match"
// In this case, we can just read the _FillValue attribute directly.
if (var.atts.exists("_FillValue")) {
ioda::Attribute fvAttr = var.atts.open("_FillValue");
fvAttr.read(gsl::make_span(fill_bytes.data(), fill_bytes.size()), fvAttr.getType());
} else {
const auto src_fill = var.getFillValue();
memcpy(fill_bytes.data(), &(src_fill.fillValue_.ui64), len_bytes_src);
}

if (!as_type) return fill_bytes;

const size_t len_bytes_to = as_type->getSize();
const hid_t h5type_from = get_hdf5_type(var.getType());
const hid_t h5type_to = get_hdf5_type(*as_type);
fill_bytes.resize(max(len_bytes_src, len_bytes_to));

herr_t cvt_res = H5Tconvert(h5type_from, h5type_to,
1, // Only one 'element' to be converted
fill_bytes.data(), nullptr, H5P_DEFAULT);
if (cvt_res < 0) throw logic_error("Fill value type conversion failed.");
fill_bytes.resize(len_bytes_to);
return fill_bytes;
}

} // namespace tio_converter
Loading