Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Missing topo fix #755

Merged
merged 5 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def get_columns(cls, columns: dict, values: Dict[str, Any]) -> dict:
'musx' : 'MusX',
'cs' : 'ChSlp',
'alt' : 'alt',
'mainstem' : 'mainstem',
}
else:
default_columns = {
Expand Down Expand Up @@ -180,6 +181,8 @@ class Columns(BaseModel, extra='forbid'):
cs: str
# string, gage ID
gages: Optional[str]
# string, mainstem ID
mainstem: Optional[str]


class WaterbodyParameters(BaseModel, extra='forbid'):
Expand Down
93 changes: 51 additions & 42 deletions src/troute-network/troute/AbstractRouting.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def update_routing_domain(self, dataframe, connections, waterbody_dataframe):
self._diffusive_network_data = {}
diffusive_domain_all = {}
rconn_diff0 = reverse_network(connections)
all_links = []

for tw in diffusive_domain:
headlink_mainstem, rfc_val, rpu_val = list(diffusive_domain.get(tw).values())
Expand All @@ -196,17 +197,39 @@ def update_routing_domain(self, dataframe, connections, waterbody_dataframe):
'links': links,
'rfc': rfc_val,
'rpu': rpu_val,
'upstream_boundary_link_mainstem': []
'upstream_boundary_link_mainstem': [],
'targets': targets,
}
all_links = all_links + links
else:
headlink_mainstem = headlink_mainstem[0]
twlink_mainstem = tw
diffusive_domain_all[twlink_mainstem] = self.diffusive_domain_by_both_ends_streamid(connections, headlink_mainstem, twlink_mainstem, rfc_val, rpu_val)

self._diffusive_domain = diffusive_domain_all

# Load topobathy data and remove any links for which topo data cannot be obtained
topobathy_df = self.topobathy_df
missing_topo_ids = list(set(all_links).difference(set(topobathy_df.index)))
topo_df_list = []
for key in missing_topo_ids:
topo_df_list.append(_fill_in_missing_topo_data(key, dataframe, topobathy_df))

new_topo_df = pd.concat(topo_df_list)
bad_links = list(set(missing_topo_ids).difference(set(new_topo_df.index)))
self._topobathy_df = pd.concat([self.topobathy_df,new_topo_df])

for tw in self._diffusive_domain:
mainstem_segs = self._diffusive_domain[tw]['links']
#mainstem_segs = self._diffusive_domain[tw]['links']

wbody_ids = waterbody_dataframe.index.tolist()
targets = self._diffusive_domain[tw]['targets'] + bad_links

links = list(reachable(rconn_diff0, sources=[tw], targets=targets).get(tw))
outlet_ids = [connections.get(id)[0] for id in wbody_ids]
wbody_and_outlet_ids = wbody_ids + outlet_ids + bad_links
mainstem_segs = list(set(links).difference(set(wbody_and_outlet_ids)))

# we want mainstem_segs start at a mainstem link right after the upstream boundary mainstem link, which is
# in turn not under any waterbody. This boundary mainstem link should be turned into a tributary segment.
upstream_boundary_mainstem_link = self._diffusive_domain[tw]['upstream_boundary_link_mainstem']
Expand Down Expand Up @@ -328,49 +351,11 @@ def topobathy_df(self):
elif topobathy_file.suffix == '.parquet':
seg_ids = []
for tw in self._diffusive_domain:
seg_ids = seg_ids + self._diffusive_network_data[tw]['mainstem_segs']
seg_ids = seg_ids + self._diffusive_domain[tw]['links']
seg_ids = ['wb-' + str(seg) for seg in seg_ids]
self._topobathy_df = read_parquet(topobathy_file, seg_ids).set_index('hy_id')
self._topobathy_df.index = self._topobathy_df.index.astype(int)

#If any diffusive mainstem segments doesn't have channel bathy date in topobathy_df,
#estimate one from adjacent segments with available bathy data
for tw in self._diffusive_domain:
for mainstem_segment in self._diffusive_network_data[tw]['mainstem_segs']:
if mainstem_segment not in self._topobathy_df.index:
# Temp.Solution: when topobaty is not available, use available topobathy of the closest upstream segment
temp_df = pd.DataFrame()
position_mainstem_segment = self._diffusive_network_data[tw]['mainstem_segs'].index(mainstem_segment)
position_upstream_mainstem_segment = position_mainstem_segment
while temp_df.empty:
try:
position_upstream_mainstem_segment -= 1
upstream_mainstem_segment = self._diffusive_network_data[tw]['mainstem_segs'][position_upstream_mainstem_segment]
temp_df = self._topobathy_df[self._topobathy_df.index==upstream_mainstem_segment]
except KeyError:
# Handle teh KeyError, e.g., break the loop or log an error
LOG.debug(f"KeyError: while filling in missing channel x-sec topobathy data, mainstem segment '{mainstem_segment}' does not have its upstream mainstem segment")
return None

new_index = pd.Index([mainstem_segment]*len(temp_df))
temp_df.index = new_index

if topobathy_file.suffix == '.nc':
fill_in_topobathy_df = temp_df

elif topobathy_file.suffix == '.parquet':
cs_id_max = temp_df['cs_id'].max()
# Select topobathy data at the most downstream of an upstream mainstem segment
fill_in_topobathy_df = pd.DataFrame(temp_df[temp_df.cs_id==cs_id_max])
fill_in_topobathy_df.cs_id = fill_in_topobathy_df.cs_id.replace(cs_id_max,1)

fill_in_topobathy_df.index.name = self._topobathy_df.index.name
combined_df = pd.concat([self._topobathy_df, fill_in_topobathy_df])
self._topobathy_df = combined_df

# Among multiple xsec profiles, select one in the most upstream of stream segment
if topobathy_file.suffix == '.parquet':
self._topobathy_df = self._topobathy_df.loc[self._topobathy_df.groupby(level='hy_id').cs_id.idxmin()]
return self._topobathy_df


Expand Down Expand Up @@ -445,3 +430,27 @@ def unrefactored_topobathy_df(self):
return self._unrefactored_topobathy_df


def _fill_in_missing_topo_data(original_key, dataframe, topobathy_df):
rconn_list = []
key = original_key
mainstem = dataframe.loc[key].mainstem
mainstem_df = dataframe[dataframe['mainstem']==mainstem].reset_index()

while key in list(mainstem_df.downstream):
upstream = mainstem_df[mainstem_df['downstream']==key].key.values.tolist()
rconn_list = rconn_list + upstream
key = upstream[0]

new_key = next((e for e in rconn_list if e in topobathy_df.index), None)

temp_df = pd.DataFrame()
if new_key:
temp_df = topobathy_df.loc[[new_key]].reset_index()
cs_id_max = temp_df['cs_id'].max()
# Select topobathy data at the most downstream of an upstream mainstem segment
temp_df = pd.DataFrame(temp_df[temp_df.cs_id==cs_id_max])
temp_df.cs_id = temp_df.cs_id.replace(cs_id_max,1)
temp_df.hy_id = original_key
temp_df = temp_df.set_index('hy_id')

return temp_df
3 changes: 3 additions & 0 deletions src/troute-network/troute/NHDNetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ def read_geo_file(self,):
'cs' : 'ChSlp',
}
)
# Remove 'mainstem' col if it exists:
if 'mainstem' in cols:
del cols['mainstem']

# numeric code used to indicate network terminal segments
terminal_code = self.supernetwork_parameters.get("terminal_code", 0)
Expand Down
1 change: 1 addition & 0 deletions test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ network_topology_parameters:
musx : 'MusX'
cs : 'ChSlp'
alt: 'alt'
mainstem: 'mainstem'
waterbody_parameters:
#----------
break_network_at_waterbodies: True
Expand Down
1 change: 1 addition & 0 deletions test/LowerColorado_TX_v4/test_AnA_V4_HYFeature_noDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ network_topology_parameters:
musx : 'MusX'
cs : 'ChSlp'
alt: 'alt'
mainstem: 'mainstem'
waterbody_parameters:
#----------
break_network_at_waterbodies: True
Expand Down
Loading