Skip to content

Commit

Permalink
geopandas: Correctly handle columns with integer values bigger than t…
Browse files Browse the repository at this point in the history
…he largest 32-bit integer (#2841)

Co-authored-by: Michael Grund <23025878+michaelgrund@users.noreply.github.com>
Co-authored-by: Yvonne Fröhlich <94163266+yvonnefroehlich@users.noreply.github.com>
  • Loading branch information
3 people authored Dec 4, 2023
1 parent 716f622 commit 78dfcf1
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 19 deletions.
7 changes: 5 additions & 2 deletions pygmt/helpers/tempfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@ def tempfile_from_geojson(geojson):
os.remove(tmpfile.name) # ensure file is deleted first
ogrgmt_kwargs = {"filename": tmpfile.name, "driver": "OGR_GMT", "mode": "w"}
try:
# Map int/int64 to int32 since OGR_GMT only supports 32-bit integer
# OGR_GMT only supports 32-bit integers. We need to map int/int64
# types to int32/float types depending on if the column has an
# 32-bit integer overflow issue. Related issues:
# https://github.com/geopandas/geopandas/issues/967#issuecomment-842877704
# https://github.com/GenericMappingTools/pygmt/issues/2497
if geojson.index.name is None:
Expand All @@ -140,7 +142,8 @@ def tempfile_from_geojson(geojson):
schema = gpd.io.file.infer_schema(geojson)
for col, dtype in schema["properties"].items():
if dtype in ("int", "int64"):
schema["properties"][col] = "int32"
overflow = geojson[col].abs().max() > 2**31 - 1
schema["properties"][col] = "float" if overflow else "int32"
ogrgmt_kwargs["schema"] = schema
# Using geopandas.to_file to directly export to OGR_GMT format
geojson.to_file(**ogrgmt_kwargs)
Expand Down
74 changes: 57 additions & 17 deletions pygmt/tests/test_geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,27 @@ def fixture_gdf():
index=["multipolygon", "polygon", "linestring"],
geometry=[multipolygon, polygon, linestring],
)
return gdf


@pytest.fixture(scope="module", name="gdf_ridge")
def fixture_gdf_ridge():
"""
Read a @RidgeTest.shp shapefile into a geopandas.GeoDataFrame and reproject
the geometry.
"""
# Read shapefile into a geopandas.GeoDataFrame
shapefile = which(
fname="@RidgeTest.shp @RidgeTest.shx @RidgeTest.dbf @RidgeTest.prj",
download="c",
)
gdf = gpd.read_file(shapefile[0])
# Reproject the geometry
gdf["geometry"] = (
gdf.to_crs(crs="EPSG:3857")
.buffer(distance=100000)
.to_crs(crs="OGC:CRS84") # convert to lon/lat to prevent @null in PROJ CRS
)
return gdf


Expand Down Expand Up @@ -144,40 +164,60 @@ def test_geopandas_plot3d_non_default_circle():
],
)
@pytest.mark.mpl_image_compare(filename="test_geopandas_plot_int_dtypes.png")
def test_geopandas_plot_int_dtypes(dtype):
def test_geopandas_plot_int_dtypes(gdf_ridge, dtype):
"""
Check that plotting a geopandas GeoDataFrame with integer columns works,
Check that plotting a geopandas.GeoDataFrame with integer columns works,
including int32 and int64 (non-nullable), Int32 and Int64 (nullable).
This is a regression test for
https://github.com/GenericMappingTools/pygmt/issues/2497
"""
# Read shapefile in geopandas.GeoDataFrame
shapefile = which(
fname="@RidgeTest.shp @RidgeTest.shx @RidgeTest.dbf @RidgeTest.prj",
download="c",
)
gdf = gpd.read_file(shapefile[0])
# Convert NPOINTS column to integer type
gdf_ridge["NPOINTS"] = gdf_ridge.NPOINTS.astype(dtype=dtype)

# Reproject geometry and change dtype of NPOINTS column
gdf["geometry"] = (
gdf.to_crs(crs="EPSG:3857")
.buffer(distance=100000)
.to_crs(crs="OGC:CRS84") # convert to lon/lat to prevent @null in PROJ CRS
# Plot figure with three polygons colored based on NPOINTS value
fig = Figure()
makecpt(cmap="lisbon", series=[10, 60, 10], continuous=True)
fig.plot(
data=gdf_ridge,
frame=True,
pen="1p,black",
fill="+z",
cmap=True,
aspatial="Z=NPOINTS",
)
gdf["NPOINTS"] = gdf.NPOINTS.astype(dtype=dtype)
fig.colorbar()
return fig


@pytest.mark.mpl_image_compare(filename="test_geopandas_plot_int_dtypes.png")
def test_geopandas_plot_int64_as_float(gdf_ridge):
"""
Check that big 64-bit integers are correctly mapped to float type in
geopandas.GeoDataFrame object.
"""
factor = 2**32
# Convert NPOINTS column to int64 type and make big integers
gdf_ridge["NPOINTS"] = gdf_ridge.NPOINTS.astype(dtype="int64")
gdf_ridge["NPOINTS"] *= factor

# Make sure the column is bigger than the largest 32-bit integer
assert gdf_ridge["NPOINTS"].abs().max() > 2**31 - 1

# Plot figure with three polygons colored based on NPOINTS value
fig = Figure()
makecpt(cmap="lisbon", series=[10, 60, 10], continuous=True)
makecpt(
cmap="lisbon", series=[10 * factor, 60 * factor, 10 * factor], continuous=True
)
fig.plot(
data=gdf,
data=gdf_ridge,
frame=True,
pen="1p,black",
close=True,
fill="+z",
cmap=True,
aspatial="Z=NPOINTS",
)
# Generate a CPT for 10-60 range and plot to reuse the baseline image
makecpt(cmap="lisbon", series=[10, 60, 10], continuous=True)
fig.colorbar()
return fig

0 comments on commit 78dfcf1

Please sign in to comment.