From 60eb5649db6298d105b9f3ca2660ac682f10b8d7 Mon Sep 17 00:00:00 2001
From: iosefa <ipercival@gmail.com>
Date: Thu, 26 Sep 2024 01:19:59 -1000
Subject: [PATCH 1/6] Add EPT reader support to read_lidar function

Extended the read_lidar function to support 'ept.json' file types by recognizing the EPT reader. Also included a new 'bounds' parameter for spatial cropping and streamlined the pipeline building process for better maintenance. Updated tests to match the new validation error message.
---
 pyforestscan/handlers.py | 51 ++++++++++++++--------------------------
 tests/test_handlers.py   |  2 +-
 2 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/pyforestscan/handlers.py b/pyforestscan/handlers.py
index cedd42c..a00d8e9 100644
--- a/pyforestscan/handlers.py
+++ b/pyforestscan/handlers.py
@@ -158,12 +158,13 @@ def validate_crs(crs_list):
     return True
 
 
-def read_lidar(input_file, srs, thin_radius=None, hag=False, hag_dtm=False, dtm=None, crop_poly=False, poly=None):
+def read_lidar(input_file, srs, bounds=None, thin_radius=None, hag=False, hag_dtm=False, dtm=None, crop_poly=False, poly=None):
     """
     Reads and processes a LiDAR point cloud file using PDAL based on specified options.
 
-    :param srs:
     :param input_file: str, The path to the input LiDAR file. Supported formats are .las, .laz, .copc, and .copc.laz.
+    :param srs: str, The Spatial Reference System (SRS) of the point cloud.
+    :param bounds:  Bounds within which to crop the data. Must be of the form: ([xmin, xmax], [ymin, ymax], [zmin, zmax])
     :param thin_radius: float, optional, The radius for thinning the point cloud. Must be a positive number.
     :param hag: bool, optional, If True, calculate Height Above Ground (HAG) using Delaunay triangulation.
     :param hag_dtm: bool, optional, If True, calculate Height Above Ground (HAG) using a DTM file.
@@ -183,14 +184,19 @@ def read_lidar(input_file, srs, thin_radius=None, hag=False, hag_dtm=False, dtm=
 
     las_extensions = ('.las', '.laz')
     copc_extensions = ('.copc', '.copc.laz')
+    ept_file = ('ept.json')
 
     file_lower = input_file.lower()
     if file_lower.endswith(las_extensions):
         reader = 'readers.las'
     elif file_lower.endswith(copc_extensions):
         reader = 'readers.copc'
+    elif file_lower.endswith(ept_file):
+        reader = 'readers.ept'
     else:
-        raise ValueError("The input file must be a .las, .laz, .copc, or .copc.laz file.")
+        raise ValueError(
+            "The input file must be a .las, .laz, .copc, .copc.laz file, or an ept.json file."
+        )
 
     if hag and hag_dtm:
         raise ValueError("Cannot use both 'hag' and 'hag_dtm' options at the same time.")
@@ -224,39 +230,16 @@ def read_lidar(input_file, srs, thin_radius=None, hag=False, hag_dtm=False, dtm=
         crs_list.append(crs_raster)
         pipeline_stages.append(_hag_raster(dtm))
 
-    metadata_pipeline = pdal.Pipeline(
-        json.dumps({
-            "pipeline": [
-                {
-                    "type": reader,
-                    "spatialreference": srs, #todo: make srs optional
-                    "filename": input_file
-                },
-                {
-                    "type": "filters.info"
-                }
-            ]
-        })
-    )
-    metadata_pipeline.execute()
-
-    try:
-        crs_pointcloud = metadata_pipeline.metadata['metadata']['readers.las']['spatialreference']
-    except KeyError:
-        try:
-            crs_pointcloud = metadata_pipeline.metadata['metadata']['readers.copc']['spatialreference']
-        except KeyError:
-            raise ValueError("Unable to retrieve spatial reference from the point cloud metadata.")
-    crs_list.append(crs_pointcloud)
-
-    validate_crs(crs_list)
-
+    base_pipeline = {
+        "type": reader,
+        "spatialreference": srs,
+        "filename": input_file
+    }
+    if bounds:
+        base_pipeline["bounds"] = f"{bounds}"
     main_pipeline_json = {
         "pipeline": [
-            {
-                "type": reader,
-                "filename": input_file
-            }
+            base_pipeline
         ] + pipeline_stages
     }
 
diff --git a/tests/test_handlers.py b/tests/test_handlers.py
index fe55878..c1c291a 100644
--- a/tests/test_handlers.py
+++ b/tests/test_handlers.py
@@ -221,7 +221,7 @@ def test_read_lidar_missing_dtm_file():
 def test_read_lidar_unsupported_input_extension():
     input_file = get_test_data_path("input.txt")
     srs = "EPSG:4326"
-    with pytest.raises(ValueError, match="The input file must be a .las, .laz, .copc, or .copc.laz file."):
+    with pytest.raises(ValueError, match="The input file must be a .las, .laz, .copc, .copc.laz file, or an ept.json file."):
         read_lidar(
             input_file=input_file,
             srs=srs

From 7f2226b68f7161bd5f5c95b175dd602d88524e82 Mon Sep 17 00:00:00 2001
From: iosefa <ipercival@gmail.com>
Date: Thu, 26 Sep 2024 01:38:27 -1000
Subject: [PATCH 2/6] Support WKT input for polygon geometry in crop function

Allow the `poly` parameter to accept either a file path or directly a WKT string for defining the polygon geometry used in cropping. This enhancement simplifies user input by recognizing and processing WKT strings directly.
---
 pyforestscan/handlers.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pyforestscan/handlers.py b/pyforestscan/handlers.py
index a00d8e9..c7958dc 100644
--- a/pyforestscan/handlers.py
+++ b/pyforestscan/handlers.py
@@ -170,7 +170,7 @@ def read_lidar(input_file, srs, bounds=None, thin_radius=None, hag=False, hag_dt
     :param hag_dtm: bool, optional, If True, calculate Height Above Ground (HAG) using a DTM file.
     :param dtm: str, optional, The path to the DTM file used when hag_dtm is True. Must be a .tif file.
     :param crop_poly: bool, optional, If True, crop the point cloud using the polygon defined in the poly file.
-    :param poly: str, optional, The path to the polygon file used for cropping.
+    :param poly: str, optional, The path to the polygon file used for cropping OR the WKT of the Polygon geometry.
 
     :return: numpy.ndarray, The processed point cloud data or None if no data is retrieved.
 
@@ -207,8 +207,10 @@ def read_lidar(input_file, srs, bounds=None, thin_radius=None, hag=False, hag_dt
     if crop_poly:
         if not poly or not os.path.isfile(poly):
             raise FileNotFoundError(f"No such polygon file: '{poly}'")
-        polygon_wkt, crs_vector = load_polygon_from_file(poly)
-        crs_list.append(crs_vector)
+        if poly.strip().startswith(('POLYGON', 'MULTIPOLYGON')):
+            polygon_wkt = poly
+        else:
+            polygon_wkt, crs_vector = load_polygon_from_file(poly)
         pipeline_stages.append(_crop_polygon(polygon_wkt))
 
     if thin_radius is not None:

From 0c2d65edbaf15a747c5622a41755c6e4ee3e7ebf Mon Sep 17 00:00:00 2001
From: iosefa <ipercival@gmail.com>
Date: Thu, 26 Sep 2024 09:03:43 -1000
Subject: [PATCH 3/6] Update FHD calculation and documentation

Change the term from Frequency Histogram Diversity to Foliage Height Diversity for clarity. Enhanced function documentation to better explain the input parameters and the output format while highlighting areas with no voxel returns resulting in NaN values.
---
 pyforestscan/calculate.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/pyforestscan/calculate.py b/pyforestscan/calculate.py
index 802e601..a4d8eb6 100644
--- a/pyforestscan/calculate.py
+++ b/pyforestscan/calculate.py
@@ -148,16 +148,20 @@ def calculate_pai(pad, min_height=1, max_height=None):
 
 def calculate_fhd(voxel_returns):
     """
-    Calculate the Frequency Histogram Diversity (FHD) for a given set of voxel returns.
+    Calculate the Foliage Height Diversity (FHD) for a given set of voxel returns.
 
-    This function computes the Frequency Histogram Diversity by calculating the entropy
-    of the voxel return proportions along the z-axis.
+    This function computes Foliage Height Diversity by calculating the entropy
+    of the voxel return proportions along the z-axis, which represents vertical structure
+    in the canopy.
 
     :param voxel_returns:
-        A numpy array of shape (x, y, z) representing voxel returns.
+        A numpy array of shape (x, y, z) representing voxel returns, where x and y are spatial
+        dimensions, and z represents height bins (or layers along the vertical axis).
 
     :return:
-        A numpy array of shape (x, y"""
+        A numpy array of shape (x, y) representing the FHD values for each (x, y) location.
+        Areas with no voxel returns will have NaN values.
+    """
     sum_counts = np.sum(voxel_returns, axis=2, keepdims=True)
 
     with np.errstate(divide='ignore', invalid='ignore'):

From 87832952d38b6d037044e97f9d41dbad0fc9b252 Mon Sep 17 00:00:00 2001
From: iosefa <ipercival@gmail.com>
Date: Thu, 26 Sep 2024 11:03:20 -1000
Subject: [PATCH 4/6] Update version from 0.1.6 to 0.1.7

This change updates the version of the pyforestscan package to 0.1.7. The increment signifies minor improvements or fixes in the package.
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index dfa25e1..c52dbcf 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="pyforestscan",
-    version="0.1.6",
+    version="0.1.7",
     author="Joseph Emile Honour Percival",
     author_email="ipercival@gmail.com",
     description="Analyzing forest structure using aerial LiDAR data",

From 91cfd0e1ac1cf472e0427cdc3099f0fd555394a0 Mon Sep 17 00:00:00 2001
From: iosefa <ipercival@gmail.com>
Date: Thu, 26 Sep 2024 11:07:41 -1000
Subject: [PATCH 5/6] Refactor polygon cropping validation

Simplified the validation logic for polygon input when cropping to a polygon. Now, the code clearly separates the responsibilities of checking for provided polygons and verifying file existence, improving readability and maintainability.
---
 pyforestscan/handlers.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyforestscan/handlers.py b/pyforestscan/handlers.py
index c7958dc..9317578 100644
--- a/pyforestscan/handlers.py
+++ b/pyforestscan/handlers.py
@@ -205,11 +205,13 @@ def read_lidar(input_file, srs, bounds=None, thin_radius=None, hag=False, hag_dt
     crs_list = []
 
     if crop_poly:
-        if not poly or not os.path.isfile(poly):
-            raise FileNotFoundError(f"No such polygon file: '{poly}'")
+        if not poly:
+            raise ValueError(f"Must provide a polygon or polygon wkt if cropping to a polygon.")
         if poly.strip().startswith(('POLYGON', 'MULTIPOLYGON')):
             polygon_wkt = poly
         else:
+            if not poly or not os.path.isfile(poly):
+                raise FileNotFoundError(f"No such polygon file: '{poly}'")
             polygon_wkt, crs_vector = load_polygon_from_file(poly)
         pipeline_stages.append(_crop_polygon(polygon_wkt))
 

From 13a1eef7a85126b27cd220358e86876b38ed86bd Mon Sep 17 00:00:00 2001
From: iosefa <ipercival@gmail.com>
Date: Thu, 26 Sep 2024 11:59:48 -1000
Subject: [PATCH 6/6] Add minor_version and in_srs parameters to pipeline steps

These changes ensure that the reprojection filter correctly identifies the input SRS with the `in_srs` parameter. Additionally, specifying `minor_version` in the output step aligns the format with expected versioning.
---
 pyforestscan/handlers.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyforestscan/handlers.py b/pyforestscan/handlers.py
index 9317578..d4533b5 100644
--- a/pyforestscan/handlers.py
+++ b/pyforestscan/handlers.py
@@ -283,12 +283,14 @@ def write_las(arrays, output_file, srs=None, compress=True):
     if srs:
         pipeline_steps.append({
             "type": "filters.reprojection",
+            "in_srs": srs,
             "out_srs": srs
         })
 
     pipeline_steps.append({
         "type": output_format,
         "filename": output_file,
+        "minor_version": "4",
         "extra_dims": "all"
     })