Skip to content

Commit

Permalink
Merge pull request #251 from joshuacortez/chore/testing_notebooks
Browse files Browse the repository at this point in the history
Chore / Making notebooks up-to-date
  • Loading branch information
butchtm authored Aug 14, 2024
2 parents a6abea3 + 39ed72b commit caacdd3
Show file tree
Hide file tree
Showing 23 changed files with 1,127 additions and 1,509 deletions.
24 changes: 12 additions & 12 deletions geowrangler/area_zonal_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
# %% auto 0
__all__ = ['create_area_zonal_stats']

# %% ../notebooks/06_area_zonal_stats.ipynb 9
# %% ../notebooks/06_area_zonal_stats.ipynb 7
from typing import Any, Dict, List

import geopandas as gpd
import numpy as np
import geowrangler.vector_zonal_stats as vzs
from .vector_zonal_stats import GEO_INDEX_NAME

# %% ../notebooks/06_area_zonal_stats.ipynb 11
# %% ../notebooks/06_area_zonal_stats.ipynb 9
def extract_func(func):
# extra by default is none
extra = []
Expand All @@ -32,7 +32,7 @@ def extract_func(func):

return func, extra

# %% ../notebooks/06_area_zonal_stats.ipynb 13
# %% ../notebooks/06_area_zonal_stats.ipynb 11
def fix_area_agg(agg):
if "func" not in agg:
return agg # skip fix as agg spec is invalid
Expand All @@ -53,7 +53,7 @@ def fix_area_agg(agg):

return agg

# %% ../notebooks/06_area_zonal_stats.ipynb 16
# %% ../notebooks/06_area_zonal_stats.ipynb 14
def get_source_column(agg):

if "raw" in agg["extras"]:
Expand All @@ -74,35 +74,35 @@ def get_source_column(agg):
return intersect_aoi_column
return agg["column"] # everything else based on raw column

# %% ../notebooks/06_area_zonal_stats.ipynb 18
# %% ../notebooks/06_area_zonal_stats.ipynb 16
INTERSECT_AREA_AGG = {
"column": "intersect_area",
"func": "sum",
"output": "intersect_area_sum",
"extras": "raw",
}

# %% ../notebooks/06_area_zonal_stats.ipynb 19
# %% ../notebooks/06_area_zonal_stats.ipynb 17
def build_agg_area_dicts(aggs):
aggs = [INTERSECT_AREA_AGG, *aggs]
agg_dicts = {agg["output"]: (get_source_column(agg), agg["func"]) for agg in aggs}
return agg_dicts

# %% ../notebooks/06_area_zonal_stats.ipynb 22
# %% ../notebooks/06_area_zonal_stats.ipynb 20
def validate_area_aoi(aoi):
if aoi.crs.is_geographic:
raise ValueError(
f"aoi has geographic crs: {aoi.crs}, areas maybe incorrectly computed"
)

# %% ../notebooks/06_area_zonal_stats.ipynb 23
# %% ../notebooks/06_area_zonal_stats.ipynb 21
def validate_area_data(data):
if data.crs.is_geographic:
raise ValueError(
f"data has geographic crs: {data.crs}, areas maybe incorrectly computed"
)

# %% ../notebooks/06_area_zonal_stats.ipynb 24
# %% ../notebooks/06_area_zonal_stats.ipynb 22
def expand_area_aggs(aggs):
expanded_aggs = []
for agg in aggs:
Expand All @@ -117,7 +117,7 @@ def expand_area_aggs(aggs):
expanded_aggs += [expanded_agg]
return expanded_aggs

# %% ../notebooks/06_area_zonal_stats.ipynb 25
# %% ../notebooks/06_area_zonal_stats.ipynb 23
def compute_intersect_stats(intersect, expanded_aggs):
# optimization - use df.apply to create all new columns simultaneously
for agg in expanded_aggs:
Expand All @@ -135,7 +135,7 @@ def compute_intersect_stats(intersect, expanded_aggs):
)
return intersect

# %% ../notebooks/06_area_zonal_stats.ipynb 26
# %% ../notebooks/06_area_zonal_stats.ipynb 24
def compute_imputed_stats(results, expanded_aggs):
# optimize with df.apply
# handle when intersect_area_sum is np.nan
Expand All @@ -147,7 +147,7 @@ def compute_imputed_stats(results, expanded_aggs):

return results

# %% ../notebooks/06_area_zonal_stats.ipynb 27
# %% ../notebooks/06_area_zonal_stats.ipynb 25
def create_area_zonal_stats(
aoi: gpd.GeoDataFrame, # Area of interest for which zonal stats are to be computed for
data: gpd.GeoDataFrame, # Source gdf of region/areas containing data to compute zonal stats from
Expand Down
6 changes: 3 additions & 3 deletions geowrangler/distance_zonal_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# %% auto 0
__all__ = ['create_distance_zonal_stats']

# %% ../notebooks/07_distance_zonal_stats.ipynb 9
# %% ../notebooks/07_distance_zonal_stats.ipynb 7
from typing import Any, Dict, List

import geopandas as gpd
Expand All @@ -12,7 +12,7 @@
import geowrangler.vector_zonal_stats as vzs
from .vector_zonal_stats import GEO_INDEX_NAME

# %% ../notebooks/07_distance_zonal_stats.ipynb 11
# %% ../notebooks/07_distance_zonal_stats.ipynb 9
INTERNAL_DISTANCE_COL = "__Geowrangleer_nearest_distance"


Expand All @@ -22,7 +22,7 @@ def build_agg_distance_dicts(expanded_aggs, distance_col):
agg_dicts[distance_col] = (INTERNAL_DISTANCE_COL, "mean")
return agg_dicts

# %% ../notebooks/07_distance_zonal_stats.ipynb 12
# %% ../notebooks/07_distance_zonal_stats.ipynb 10
def create_distance_zonal_stats(
aoi: gpd.GeoDataFrame, # Area of interest for which zonal stats are to be computed for
data: gpd.GeoDataFrame, # Source gdf of region/areas containing data to compute zonal stats from
Expand Down
8 changes: 4 additions & 4 deletions geowrangler/raster_to_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# %% auto 0
__all__ = ['GRID_ID', 'generate_mask', 'read_bands']

# %% ../notebooks/08_raster_to_dataframe.ipynb 10
# %% ../notebooks/08_raster_to_dataframe.ipynb 8
import json
from typing import Any, Dict, List

Expand All @@ -13,7 +13,7 @@
import rasterio as rio
import rasterio.mask

# %% ../notebooks/08_raster_to_dataframe.ipynb 13
# %% ../notebooks/08_raster_to_dataframe.ipynb 11
def _explode(gdf):
"""
Explodes a geodataframe
Expand All @@ -38,7 +38,7 @@ def _explode(gdf):

return gdf_out

# %% ../notebooks/08_raster_to_dataframe.ipynb 14
# %% ../notebooks/08_raster_to_dataframe.ipynb 12
GRID_ID = 1


Expand Down Expand Up @@ -116,7 +116,7 @@ def generate_mask(

return masks, grids, label_values

# %% ../notebooks/08_raster_to_dataframe.ipynb 16
# %% ../notebooks/08_raster_to_dataframe.ipynb 14
def read_bands(image_list: List[str], mask: str):

data = []
Expand Down
4 changes: 2 additions & 2 deletions geowrangler/spatialjoin_highest_intersection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
# %% auto 0
__all__ = ['get_highest_intersection']

# %% ../notebooks/12_spatialjoin_highest_intersection.ipynb 8
# %% ../notebooks/12_spatialjoin_highest_intersection.ipynb 6
import json
import os
import geopandas as gpd
import pandas as pd
import requests
from . import grids

# %% ../notebooks/12_spatialjoin_highest_intersection.ipynb 28
# %% ../notebooks/12_spatialjoin_highest_intersection.ipynb 23
def get_highest_intersection(
gdf1: gpd.GeoDataFrame, # gdf1 will be the basis of output geometry
gdf2: gpd.GeoDataFrame, # gdf2 data will all be included during intersection
Expand Down
6 changes: 3 additions & 3 deletions geowrangler/vector_to_raster_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# %% auto 0
__all__ = ['GRID_ID', 'generate_mask']

# %% ../notebooks/10_vector_to_raster_mask.ipynb 10
# %% ../notebooks/10_vector_to_raster_mask.ipynb 8
import json
from typing import Any, Dict

Expand All @@ -13,7 +13,7 @@
import rasterio as rio
import rasterio.mask

# %% ../notebooks/10_vector_to_raster_mask.ipynb 12
# %% ../notebooks/10_vector_to_raster_mask.ipynb 10
def _explode(gdf):
"""
Explodes a geodataframe
Expand All @@ -38,7 +38,7 @@ def _explode(gdf):

return gdf_out

# %% ../notebooks/10_vector_to_raster_mask.ipynb 13
# %% ../notebooks/10_vector_to_raster_mask.ipynb 11
GRID_ID = 1


Expand Down
34 changes: 17 additions & 17 deletions geowrangler/vector_zonal_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# %% auto 0
__all__ = ['create_zonal_stats', 'compute_quadkey', 'create_bingtile_zonal_stats']

# %% ../notebooks/02_vector_zonal_stats.ipynb 8
# %% ../notebooks/02_vector_zonal_stats.ipynb 6
GEO_INDEX_NAME = "__GeoWrangleer_aoi_index"

# %% ../notebooks/02_vector_zonal_stats.ipynb 9
# %% ../notebooks/02_vector_zonal_stats.ipynb 7
from functools import partial
from typing import Any, Dict, List

Expand All @@ -15,7 +15,7 @@
import numpy as np
import pandas as pd

# %% ../notebooks/02_vector_zonal_stats.ipynb 13
# %% ../notebooks/02_vector_zonal_stats.ipynb 11
def _fix_agg(
agg: Dict[str, Any], # A dict containing at the minimum a 'func' key
) -> Dict[str, Any]:
Expand Down Expand Up @@ -54,7 +54,7 @@ def _fix_agg(

return agg

# %% ../notebooks/02_vector_zonal_stats.ipynb 18
# %% ../notebooks/02_vector_zonal_stats.ipynb 16
def _check_agg(
agg: Dict[str, Any], # A dict containing at the minimum a 'func' key
i: int, # The index into the list of aggregations
Expand Down Expand Up @@ -95,7 +95,7 @@ def _check_agg(
f"fillna list {agg['fillna']} doesn't match func list {agg['func']} in agg[{i}] {agg}"
)

# %% ../notebooks/02_vector_zonal_stats.ipynb 21
# %% ../notebooks/02_vector_zonal_stats.ipynb 19
def _validate_aggs(
fixed_aggs: List[Dict[str, Any]], # A list of fixed agg specs
data: pd.DataFrame, # Source dataframe
Expand All @@ -111,7 +111,7 @@ def _validate_aggs(
)
outputs += agg["output"]

# %% ../notebooks/02_vector_zonal_stats.ipynb 26
# %% ../notebooks/02_vector_zonal_stats.ipynb 24
def _validate_aoi(
aoi: pd.DataFrame, # Source dataframe
) -> None:
Expand All @@ -120,7 +120,7 @@ def _validate_aoi(
"AOI has a pandas.MultiIndex. Please convert the index to a single level such as pd.RangeIndex"
)

# %% ../notebooks/02_vector_zonal_stats.ipynb 27
# %% ../notebooks/02_vector_zonal_stats.ipynb 25
def _expand_aggs(
aggs: List[Dict[str, Any]], # List of fixed valid aggs
) -> List[Dict[str, Any]]:
Expand All @@ -137,14 +137,14 @@ def _expand_aggs(
expanded_aggs += [expanded_agg]
return expanded_aggs

# %% ../notebooks/02_vector_zonal_stats.ipynb 29
# %% ../notebooks/02_vector_zonal_stats.ipynb 27
def _build_agg_args(
aggs: List[Dict[str, Any]], # A list of expanded aggs
) -> Dict:
"""Builds a dict of args with output as key and a tuple of column and func as value from a list of expanded aggs"""
return {agg["output"]: (agg["column"], agg["func"]) for agg in aggs}

# %% ../notebooks/02_vector_zonal_stats.ipynb 31
# %% ../notebooks/02_vector_zonal_stats.ipynb 29
def _prep_aoi(
aoi: pd.DataFrame, # Area of interest
) -> pd.DataFrame:
Expand All @@ -164,7 +164,7 @@ def _prep_aoi(
aoi = aoi.reset_index(level=0) # index added as new column named GEO_INDEX_NAME
return aoi

# %% ../notebooks/02_vector_zonal_stats.ipynb 36
# %% ../notebooks/02_vector_zonal_stats.ipynb 34
def _fillnas(
expanded_aggs: List[Dict[str, Any]], # list of expanded aggs
results: pd.DataFrame, # results dataframe to be filled with NAs if flag set
Expand All @@ -186,7 +186,7 @@ def _fillnas(

return results

# %% ../notebooks/02_vector_zonal_stats.ipynb 40
# %% ../notebooks/02_vector_zonal_stats.ipynb 38
def _aggregate_stats(
aoi: pd.DataFrame, # Area of interest
groups: pd.core.groupby.DataFrameGroupBy, # Source data aggregated into groups by GEO_INDEX_NAME
Expand All @@ -204,7 +204,7 @@ def _aggregate_stats(

return results

# %% ../notebooks/02_vector_zonal_stats.ipynb 46
# %% ../notebooks/02_vector_zonal_stats.ipynb 44
def create_zonal_stats(
aoi: gpd.GeoDataFrame, # Area of interest for which zonal stats are to be computed for
data: gpd.GeoDataFrame, # Source gdf containing data to compute zonal stats from
Expand Down Expand Up @@ -248,14 +248,14 @@ def create_zonal_stats(

return results

# %% ../notebooks/02_vector_zonal_stats.ipynb 62
# %% ../notebooks/02_vector_zonal_stats.ipynb 60
tms = morecantile.tms.get("WebMercatorQuad") # Tile Matrix for Bing Maps

# %% ../notebooks/02_vector_zonal_stats.ipynb 63
# %% ../notebooks/02_vector_zonal_stats.ipynb 61
def get_quadkey(geometry, zoom_level):
return tms.quadkey(tms.tile(geometry.x, geometry.y, zoom_level))

# %% ../notebooks/02_vector_zonal_stats.ipynb 64
# %% ../notebooks/02_vector_zonal_stats.ipynb 62
def compute_quadkey(
data: gpd.GeoDataFrame, # The geodataframe
zoom_level: int, # The quadkey zoom level (1-23)
Expand All @@ -282,7 +282,7 @@ def compute_quadkey(

return data

# %% ../notebooks/02_vector_zonal_stats.ipynb 71
# %% ../notebooks/02_vector_zonal_stats.ipynb 69
def validate_aoi_quadkey(aoi, aoi_quadkey_column) -> None:

if aoi_quadkey_column not in list(aoi.columns.values):
Expand Down Expand Up @@ -310,7 +310,7 @@ def validate_data_quadkey(data, data_quadkey_column, min_zoom_level):
f"data quadkey levels cannot be less than aoi quadkey level {min_zoom_level}"
)

# %% ../notebooks/02_vector_zonal_stats.ipynb 72
# %% ../notebooks/02_vector_zonal_stats.ipynb 70
def create_bingtile_zonal_stats(
aoi: pd.DataFrame, # An aoi with quadkey column
data: pd.DataFrame, # Data with quadkey column
Expand Down
Loading

0 comments on commit caacdd3

Please sign in to comment.