From 071ca8e304da31a4ad376c9aee79fa771c6cd46a Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Fri, 24 Jan 2025 11:05:12 +0100
Subject: [PATCH 01/14] feat: add percentile valid points in

---
 geoutils/raster/raster.py        | 3 +++
 tests/test_raster/test_raster.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index a6ecde43..aefe498b 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1920,6 +1920,7 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
             "NMAD": nmad(data),
             "RMSE": np.sqrt(np.nanmean(np.square(data - np.nanmean(data)))),
             "Standard deviation": np.nanstd(data),
+            "Percentile valid points": (np.count_nonzero(~np.isnan(data)) / data.size) * 100,
         }
         return stats_dict
 
@@ -2004,6 +2005,8 @@ def get_stats(
             "stddev": "Standard deviation",
             "standarddev": "Standard deviation",
             "standarddeviation": "Standard deviation",
+            "percentvalidpoint": "Percentile valid point",
+            "validpoint": "Percentile valid point",
         }
         if isinstance(stats_name, list):
             result = {}
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index d464879c..a4a54c6e 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1998,13 +1998,14 @@ def test_stats(self, example: str, caplog) -> None:
             "NMAD",
             "RMSE",
             "Standard deviation",
+            "Percentile valid points",
         ]
         for name in expected_stats:
             assert name in stats
             assert stats.get(name) is not None
 
         # Single stat
-        stat = raster.get_stats(stats_name="Average")
+        stat = raster.get_stats(stats_name="average")
         assert isinstance(stat, np.floating)
 
         def percentile_95(data: NDArrayNum) -> np.floating[Any]:

From 6344f22a62b4336b64c85d2b6c1de61f1f5df11e Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Fri, 24 Jan 2025 16:35:43 +0100
Subject: [PATCH 02/14] fix: remove rmse from  and update docstrings

---
 geoutils/raster/raster.py        | 55 +++++++++++++++++++++++++++-----
 tests/test_raster/test_raster.py |  1 -
 2 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index aefe498b..1faa8d62 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1897,7 +1897,7 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
         :param band: The index of the band for which to compute statistics. Default is 1.
 
         :returns: A dictionary containing the calculated statistics for the selected band, including mean, median, max,
-        min, sum, sum of squares, 90th percentile, NMAD, RMSE, and standard deviation.
+        min, sum, sum of squares, 90th percentile, NMAD, standard deviation, and percentile valid points.
         """
         if self.count == 1:
             data = self.data
@@ -1918,7 +1918,6 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
             "Sum of squares": np.nansum(np.square(data)),
             "90th percentile": np.nanpercentile(data, 90),
             "NMAD": nmad(data),
-            "RMSE": np.sqrt(np.nanmean(np.square(data - np.nanmean(data)))),
             "Standard deviation": np.nanstd(data),
             "Percentile valid points": (np.count_nonzero(~np.isnan(data)) / data.size) * 100,
         }
@@ -1928,7 +1927,18 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
     def get_stats(
         self,
         stats_name: (
-            Literal["mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "rmse", "std"]
+            Literal[
+                "mean",
+                "median",
+                "max",
+                "min",
+                "sum",
+                "sum of squares",
+                "90th percentile",
+                "nmad",
+                "std",
+                "percentile valid points",
+            ]
             | Callable[[NDArrayNum], np.floating[Any]]
         ),
         band: int = 1,
@@ -1940,7 +1950,16 @@ def get_stats(
         stats_name: (
             list[
                 Literal[
-                    "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "rmse", "std"
+                    "mean",
+                    "median",
+                    "max",
+                    "min",
+                    "sum",
+                    "sum of squares",
+                    "90th percentile",
+                    "nmad",
+                    "std",
+                    "percentile valid points",
                 ]
                 | Callable[[NDArrayNum], np.floating[Any]]
             ]
@@ -1952,11 +1971,31 @@ def get_stats(
     def get_stats(
         self,
         stats_name: (
-            Literal["mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "rmse", "std"]
+            Literal[
+                "mean",
+                "median",
+                "max",
+                "min",
+                "sum",
+                "sum of squares",
+                "90th percentile",
+                "nmad",
+                "std",
+                "percentile valid points",
+            ]
             | Callable[[NDArrayNum], np.floating[Any]]
             | list[
                 Literal[
-                    "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "rmse", "std"
+                    "mean",
+                    "median",
+                    "max",
+                    "min",
+                    "sum",
+                    "sum of squares",
+                    "90th percentile",
+                    "nmad",
+                    "std",
+                    "percentile valid points",
                 ]
                 | Callable[[NDArrayNum], np.floating[Any]]
             ]
@@ -1970,7 +2009,8 @@ def get_stats(
 
         :param stats_name: Name or list of names of the statistics to retrieve. If None, all statistics are returned.
                    Accepted names include:
-                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "rmse", "std"
+                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "std",
+                   "percentile valid points".
                    You can also use common aliases for these names (e.g., "average", "maximum", "minimum", etc.).
                    Custom callables can also be provided.
         :param band: The index of the band for which to compute statistics. Default is 1.
@@ -2000,7 +2040,6 @@ def get_stats(
             "90percentile": "90th percentile",
             "percentile90": "90th percentile",
             "nmad": "NMAD",
-            "rmse": "RMSE",
             "std": "Standard deviation",
             "stddev": "Standard deviation",
             "standarddev": "Standard deviation",
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index a4a54c6e..068257f3 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1996,7 +1996,6 @@ def test_stats(self, example: str, caplog) -> None:
             "Sum of squares",
             "90th percentile",
             "NMAD",
-            "RMSE",
             "Standard deviation",
             "Percentile valid points",
         ]

From 70f4a1ea57015302b90d515ded3ab0c110ef89c7 Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Tue, 4 Feb 2025 11:42:55 +0100
Subject: [PATCH 03/14] feat: add  in stats, typing

---
 doc/source/raster_class.md       |  5 +++--
 geoutils/raster/raster.py        | 33 ++++++++++++++++----------------
 tests/test_raster/test_raster.py |  9 +++++----
 3 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/doc/source/raster_class.md b/doc/source/raster_class.md
index ec3d2316..59c1b8e1 100644
--- a/doc/source/raster_class.md
+++ b/doc/source/raster_class.md
@@ -361,7 +361,8 @@ rast_reproj.to_xarray()
 
 ## Obtain Statistics
 The `get_stats()` method allows to extract key statistical information from a raster in a dictionary.
-Supported statistics are : mean, median, max, mean, sum, sum of squares, 90th percentile, nmad, rmse, std.
+Supported statistics are : mean, median, max, mean, sum, sum of squares, 90th percentile, nmad, std, count and
+percentage valid points.
 Callable functions are supported as well.
 
 ### Usage Examples:
@@ -377,7 +378,7 @@ rast.get_stats("mean")
 
 - Get multiple statistics in a dict:
 ```{code-cell} ipython3
-rast.get_stats(["mean", "max", "rmse"])
+rast.get_stats(["mean", "max", "std"])
 ```
 
 - Using a custom callable statistic:
diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index 1faa8d62..9542bcbe 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1897,7 +1897,7 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
         :param band: The index of the band for which to compute statistics. Default is 1.
 
         :returns: A dictionary containing the calculated statistics for the selected band, including mean, median, max,
-        min, sum, sum of squares, 90th percentile, NMAD, standard deviation, and percentile valid points.
+        min, sum, sum of squares, 90th percentile, NMAD, standard deviation, count, and percentage valid points.
         """
         if self.count == 1:
             data = self.data
@@ -1909,6 +1909,7 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
             data = data.compressed()
 
         # Compute the statistics
+        count = np.count_nonzero(np.isfinite(data))
         stats_dict = {
             "Mean": np.nanmean(data),
             "Median": np.nanmedian(data),
@@ -1919,7 +1920,8 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
             "90th percentile": np.nanpercentile(data, 90),
             "NMAD": nmad(data),
             "Standard deviation": np.nanstd(data),
-            "Percentile valid points": (np.count_nonzero(~np.isnan(data)) / data.size) * 100,
+            "Valid count": count,
+            "Percentage valid points": (count / data.size) * 100,
         }
         return stats_dict
 
@@ -1937,7 +1939,8 @@ def get_stats(
                 "90th percentile",
                 "nmad",
                 "std",
-                "percentile valid points",
+                "count",
+                "percentage valid points",
             ]
             | Callable[[NDArrayNum], np.floating[Any]]
         ),
@@ -1959,7 +1962,8 @@ def get_stats(
                     "90th percentile",
                     "nmad",
                     "std",
-                    "percentile valid points",
+                    "count",
+                    "percentage valid points",
                 ]
                 | Callable[[NDArrayNum], np.floating[Any]]
             ]
@@ -1981,7 +1985,8 @@ def get_stats(
                 "90th percentile",
                 "nmad",
                 "std",
-                "percentile valid points",
+                "count",
+                "percentage valid points",
             ]
             | Callable[[NDArrayNum], np.floating[Any]]
             | list[
@@ -1995,7 +2000,8 @@ def get_stats(
                     "90th percentile",
                     "nmad",
                     "std",
-                    "percentile valid points",
+                    "count",
+                    "percentage valid points",
                 ]
                 | Callable[[NDArrayNum], np.floating[Any]]
             ]
@@ -2009,8 +2015,8 @@ def get_stats(
 
         :param stats_name: Name or list of names of the statistics to retrieve. If None, all statistics are returned.
                    Accepted names include:
-                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "std",
-                   "percentile valid points".
+                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "std", "count",
+                   "percentage valid points".
                    You can also use common aliases for these names (e.g., "average", "maximum", "minimum", etc.).
                    Custom callables can also be provided.
         :param band: The index of the band for which to compute statistics. Default is 1.
@@ -2026,7 +2032,6 @@ def get_stats(
         # Define the metric aliases and their actual names
         stats_aliases = {
             "mean": "Mean",
-            "average": "Mean",
             "median": "Median",
             "max": "Max",
             "maximum": "Max",
@@ -2035,17 +2040,13 @@ def get_stats(
             "sum": "Sum",
             "sumofsquares": "Sum of squares",
             "sum2": "Sum of squares",
-            "percentile": "90th percentile",
             "90thpercentile": "90th percentile",
             "90percentile": "90th percentile",
-            "percentile90": "90th percentile",
             "nmad": "NMAD",
             "std": "Standard deviation",
-            "stddev": "Standard deviation",
-            "standarddev": "Standard deviation",
-            "standarddeviation": "Standard deviation",
-            "percentvalidpoint": "Percentile valid point",
-            "validpoint": "Percentile valid point",
+            "count": "Valid count",
+            "percentagevalidpoint": "Percentage valid point",
+            "validpoint": "Percentage valid point",
         }
         if isinstance(stats_name, list):
             result = {}
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index 068257f3..47e6ca6b 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1997,14 +1997,15 @@ def test_stats(self, example: str, caplog) -> None:
             "90th percentile",
             "NMAD",
             "Standard deviation",
-            "Percentile valid points",
+            "Count",
+            "Percentage valid points",
         ]
         for name in expected_stats:
             assert name in stats
             assert stats.get(name) is not None
 
         # Single stat
-        stat = raster.get_stats(stats_name="average")
+        stat = raster.get_stats(stats_name="mean")
         assert isinstance(stat, np.floating)
 
         def percentile_95(data: NDArrayNum) -> np.floating[Any]:
@@ -2016,8 +2017,8 @@ def percentile_95(data: NDArrayNum) -> np.floating[Any]:
         assert isinstance(stat, np.floating)
 
         # Selected stats and callable
-        stats_name = ["mean", "maximum", "std", "percentile_95"]
-        stats = raster.get_stats(stats_name=["mean", "maximum", "std", percentile_95])
+        stats_name = ["mean", "max", "std", "percentile_95"]
+        stats = raster.get_stats(stats_name=["mean", "max", "std", percentile_95])
         for name in stats_name:
             assert name in stats
             assert stats.get(name) is not None

From 517f56b5af873eae18cbc88cdddfe5390398751b Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Tue, 4 Feb 2025 16:51:01 +0100
Subject: [PATCH 04/14] feat: add counts whithout mask in stats

---
 doc/source/raster_class.md       |  2 +-
 geoutils/raster/raster.py        | 35 +++++++++++++++++---------------
 tests/test_raster/test_raster.py |  4 +++-
 3 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/doc/source/raster_class.md b/doc/source/raster_class.md
index 59c1b8e1..39a693a3 100644
--- a/doc/source/raster_class.md
+++ b/doc/source/raster_class.md
@@ -361,7 +361,7 @@ rast_reproj.to_xarray()
 
 ## Obtain Statistics
 The `get_stats()` method allows to extract key statistical information from a raster in a dictionary.
-Supported statistics are : mean, median, max, mean, sum, sum of squares, 90th percentile, nmad, std, count and
+Supported statistics are : mean, median, max, mean, sum, sum of squares, 90th percentile, nmad, std, valid points and
 percentage valid points.
 Callable functions are supported as well.
 
diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index 9542bcbe..99dd95a9 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1897,19 +1897,21 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
         :param band: The index of the band for which to compute statistics. Default is 1.
 
         :returns: A dictionary containing the calculated statistics for the selected band, including mean, median, max,
-        min, sum, sum of squares, 90th percentile, NMAD, standard deviation, count, and percentage valid points.
+        min, sum, sum of squares, 90th percentile, NMAD, standard deviation, valid points, and percentage valid points.
         """
+
         if self.count == 1:
             data = self.data
         else:
             data = self.data[band - 1]
 
-        # If data is a MaskedArray, use the compressed version (without masked values)
-        if isinstance(data, np.ma.MaskedArray):
-            data = data.compressed()
+        # Use the compressed version (without masked values)
+        valid_points_no_mask = np.count_nonzero(np.isfinite(data))
+        percentage_valid_points_no_mask = valid_points_no_mask / data.size * 100
+        data = data.compressed()
 
         # Compute the statistics
-        count = np.count_nonzero(np.isfinite(data))
+        valid_points = np.count_nonzero(np.isfinite(data))
         stats_dict = {
             "Mean": np.nanmean(data),
             "Median": np.nanmedian(data),
@@ -1920,8 +1922,10 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
             "90th percentile": np.nanpercentile(data, 90),
             "NMAD": nmad(data),
             "Standard deviation": np.nanstd(data),
-            "Valid count": count,
-            "Percentage valid points": (count / data.size) * 100,
+            "Valid points": valid_points,
+            "Percentage valid points": (valid_points / data.size) * 100,
+            "Valid points no mask": valid_points_no_mask,
+            "Percentage valid points no mask": percentage_valid_points_no_mask,
         }
         return stats_dict
 
@@ -1939,7 +1943,7 @@ def get_stats(
                 "90th percentile",
                 "nmad",
                 "std",
-                "count",
+                "valid points",
                 "percentage valid points",
             ]
             | Callable[[NDArrayNum], np.floating[Any]]
@@ -1962,7 +1966,7 @@ def get_stats(
                     "90th percentile",
                     "nmad",
                     "std",
-                    "count",
+                    "valid points",
                     "percentage valid points",
                 ]
                 | Callable[[NDArrayNum], np.floating[Any]]
@@ -1985,7 +1989,7 @@ def get_stats(
                 "90th percentile",
                 "nmad",
                 "std",
-                "count",
+                "valid points",
                 "percentage valid points",
             ]
             | Callable[[NDArrayNum], np.floating[Any]]
@@ -2000,7 +2004,7 @@ def get_stats(
                     "90th percentile",
                     "nmad",
                     "std",
-                    "count",
+                    "valid points",
                     "percentage valid points",
                 ]
                 | Callable[[NDArrayNum], np.floating[Any]]
@@ -2015,8 +2019,8 @@ def get_stats(
 
         :param stats_name: Name or list of names of the statistics to retrieve. If None, all statistics are returned.
                    Accepted names include:
-                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "std", "count",
-                   "percentage valid points".
+                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "std",
+                   "valid points", "percentage valid points".
                    You can also use common aliases for these names (e.g., "average", "maximum", "minimum", etc.).
                    Custom callables can also be provided.
         :param band: The index of the band for which to compute statistics. Default is 1.
@@ -2044,9 +2048,8 @@ def get_stats(
             "90percentile": "90th percentile",
             "nmad": "NMAD",
             "std": "Standard deviation",
-            "count": "Valid count",
-            "percentagevalidpoint": "Percentage valid point",
-            "validpoint": "Percentage valid point",
+            "validpoints": "Valid points",
+            "percentagevalidpoints": "Percentage valid point",
         }
         if isinstance(stats_name, list):
             result = {}
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index 47e6ca6b..9e50e9cb 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1997,8 +1997,10 @@ def test_stats(self, example: str, caplog) -> None:
             "90th percentile",
             "NMAD",
             "Standard deviation",
-            "Count",
+            "Valid points",
             "Percentage valid points",
+            "Valid points no mask",
+            "Percentage valid points no mask",
         ]
         for name in expected_stats:
             assert name in stats

From f0035aadbcb2ada0a50fb46b7e57eee796a4c756 Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Mon, 10 Feb 2025 16:53:13 +0100
Subject: [PATCH 05/14] feat: add rmse in raster stats

---
 doc/source/raster_class.md       |  5 +++--
 geoutils/raster/raster.py        | 13 ++++++++++---
 tests/test_raster/test_raster.py |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/doc/source/raster_class.md b/doc/source/raster_class.md
index 39a693a3..d31ef5a9 100644
--- a/doc/source/raster_class.md
+++ b/doc/source/raster_class.md
@@ -361,8 +361,9 @@ rast_reproj.to_xarray()
 
 ## Obtain Statistics
 The `get_stats()` method allows to extract key statistical information from a raster in a dictionary.
-Supported statistics are : mean, median, max, mean, sum, sum of squares, 90th percentile, nmad, std, valid points and
-percentage valid points.
+Supported statistics are : mean, median, max, mean, sum, sum of squares, 90th percentile, nmad, rmse, std, valid points
+and percentage valid points.
+The RMSE is only relevant when the raster represents a difference of two objects.
 Callable functions are supported as well.
 
 ### Usage Examples:
diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index 99dd95a9..e37d0d76 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1897,7 +1897,8 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
         :param band: The index of the band for which to compute statistics. Default is 1.
 
         :returns: A dictionary containing the calculated statistics for the selected band, including mean, median, max,
-        min, sum, sum of squares, 90th percentile, NMAD, standard deviation, valid points, and percentage valid points.
+        min, sum, sum of squares, 90th percentile, NMAD, RMSE, standard deviation, valid points, and percentage
+        valid points.
         """
 
         if self.count == 1:
@@ -1921,6 +1922,7 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
             "Sum of squares": np.nansum(np.square(data)),
             "90th percentile": np.nanpercentile(data, 90),
             "NMAD": nmad(data),
+            "RMSE": np.sqrt(np.nanmean(np.square(data))),
             "Standard deviation": np.nanstd(data),
             "Valid points": valid_points,
             "Percentage valid points": (valid_points / data.size) * 100,
@@ -1942,6 +1944,7 @@ def get_stats(
                 "sum of squares",
                 "90th percentile",
                 "nmad",
+                "rmse",
                 "std",
                 "valid points",
                 "percentage valid points",
@@ -1965,6 +1968,7 @@ def get_stats(
                     "sum of squares",
                     "90th percentile",
                     "nmad",
+                    "rmse",
                     "std",
                     "valid points",
                     "percentage valid points",
@@ -1988,6 +1992,7 @@ def get_stats(
                 "sum of squares",
                 "90th percentile",
                 "nmad",
+                "rmse",
                 "std",
                 "valid points",
                 "percentage valid points",
@@ -2003,6 +2008,7 @@ def get_stats(
                     "sum of squares",
                     "90th percentile",
                     "nmad",
+                    "rmse",
                     "std",
                     "valid points",
                     "percentage valid points",
@@ -2019,9 +2025,8 @@ def get_stats(
 
         :param stats_name: Name or list of names of the statistics to retrieve. If None, all statistics are returned.
                    Accepted names include:
-                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "std",
+                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "rmse", "std",
                    "valid points", "percentage valid points".
-                   You can also use common aliases for these names (e.g., "average", "maximum", "minimum", etc.).
                    Custom callables can also be provided.
         :param band: The index of the band for which to compute statistics. Default is 1.
 
@@ -2047,6 +2052,8 @@ def get_stats(
             "90thpercentile": "90th percentile",
             "90percentile": "90th percentile",
             "nmad": "NMAD",
+            "rmse": "RMSE",
+            "rms": "RMSE",
             "std": "Standard deviation",
             "validpoints": "Valid points",
             "percentagevalidpoints": "Percentage valid point",
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index 9e50e9cb..b3c70208 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1996,6 +1996,7 @@ def test_stats(self, example: str, caplog) -> None:
             "Sum of squares",
             "90th percentile",
             "NMAD",
+            "RMSE",
             "Standard deviation",
             "Valid points",
             "Percentage valid points",

From d186d68975fc4fc22e9e31a1ada1768f8b1184f7 Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Mon, 10 Feb 2025 16:55:38 +0100
Subject: [PATCH 06/14] fix doctestitem

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index ec21a591..b552ee72 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 """Configuration of pytest."""
 
-from pytest import DoctestItem
+from _pytest.doctest import DoctestItem
 
 
 # To order test modules logically during execution

From 22c981892b0f59d2516af5a6ddd39f1156c119e1 Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Tue, 11 Feb 2025 14:39:44 +0100
Subject: [PATCH 07/14] fix: correct valid points in stats, correct typing

---
 doc/source/raster_class.md       |   4 +-
 geoutils/raster/raster.py        | 117 +++++++------------------------
 tests/test_raster/test_raster.py |  15 ++--
 3 files changed, 36 insertions(+), 100 deletions(-)

diff --git a/doc/source/raster_class.md b/doc/source/raster_class.md
index d31ef5a9..59feeb38 100644
--- a/doc/source/raster_class.md
+++ b/doc/source/raster_class.md
@@ -361,8 +361,8 @@ rast_reproj.to_xarray()
 
 ## Obtain Statistics
 The `get_stats()` method allows to extract key statistical information from a raster in a dictionary.
-Supported statistics are : mean, median, max, mean, sum, sum of squares, 90th percentile, nmad, rmse, std, valid points
-and percentage valid points.
+Supported statistics are : mean, median, max, mean, sum, sum of squares, 90th percentile, nmad, rmse, std, valid points,
+percentage valid points, valid points all data, percentage valid points all data.
 The RMSE is only relevant when the raster represents a difference of two objects.
 Callable functions are supported as well.
 
diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index e37d0d76..5e72ced7 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1897,8 +1897,8 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
         :param band: The index of the band for which to compute statistics. Default is 1.
 
         :returns: A dictionary containing the calculated statistics for the selected band, including mean, median, max,
-        min, sum, sum of squares, 90th percentile, NMAD, RMSE, standard deviation, valid points, and percentage
-        valid points.
+        min, sum, sum of squares, 90th percentile, NMAD, RMSE, standard deviation, valid points, percentage
+        valid points, valid points all data, percentage valid points all data.
         """
 
         if self.count == 1:
@@ -1906,116 +1906,46 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
         else:
             data = self.data[band - 1]
 
-        # Use the compressed version (without masked values)
-        valid_points_no_mask = np.count_nonzero(np.isfinite(data))
-        percentage_valid_points_no_mask = valid_points_no_mask / data.size * 100
-        data = data.compressed()
-
         # Compute the statistics
-        valid_points = np.count_nonzero(np.isfinite(data))
+        compressed_data = data.compressed()
+        valid_points = np.count_nonzero(np.isfinite(compressed_data))
+        valid_points_all_data = np.count_nonzero(np.isfinite(data))
         stats_dict = {
-            "Mean": np.nanmean(data),
-            "Median": np.nanmedian(data),
-            "Max": np.nanmax(data),
-            "Min": np.nanmin(data),
-            "Sum": np.nansum(data),
-            "Sum of squares": np.nansum(np.square(data)),
-            "90th percentile": np.nanpercentile(data, 90),
-            "NMAD": nmad(data),
-            "RMSE": np.sqrt(np.nanmean(np.square(data))),
-            "Standard deviation": np.nanstd(data),
-            "Valid points": valid_points,
+            "Mean": np.nanmean(compressed_data),
+            "Median": np.nanmedian(compressed_data),
+            "Max": np.nanmax(compressed_data),
+            "Min": np.nanmin(compressed_data),
+            "Sum": np.nansum(compressed_data),
+            "Sum of squares": np.nansum(np.square(compressed_data)),
+            "90th percentile": np.nanpercentile(compressed_data, 90),
+            "NMAD": nmad(compressed_data),
+            "RMSE": np.sqrt(np.nanmean(np.square(compressed_data))),
+            "Standard deviation": np.nanstd(compressed_data),
+            "Valid points": np.count_nonzero(np.isfinite(compressed_data)),
             "Percentage valid points": (valid_points / data.size) * 100,
-            "Valid points no mask": valid_points_no_mask,
-            "Percentage valid points no mask": percentage_valid_points_no_mask,
+            "Valid points all data": valid_points_all_data,
+            "Percentage valid points all data": (valid_points_all_data / data.size) * 100,
         }
         return stats_dict
 
     @overload
     def get_stats(
         self,
-        stats_name: (
-            Literal[
-                "mean",
-                "median",
-                "max",
-                "min",
-                "sum",
-                "sum of squares",
-                "90th percentile",
-                "nmad",
-                "rmse",
-                "std",
-                "valid points",
-                "percentage valid points",
-            ]
-            | Callable[[NDArrayNum], np.floating[Any]]
-        ),
+        stats_name: str | Callable[[NDArrayNum], np.floating[Any]],
         band: int = 1,
     ) -> np.floating[Any]: ...
 
     @overload
     def get_stats(
         self,
-        stats_name: (
-            list[
-                Literal[
-                    "mean",
-                    "median",
-                    "max",
-                    "min",
-                    "sum",
-                    "sum of squares",
-                    "90th percentile",
-                    "nmad",
-                    "rmse",
-                    "std",
-                    "valid points",
-                    "percentage valid points",
-                ]
-                | Callable[[NDArrayNum], np.floating[Any]]
-            ]
-            | None
-        ) = None,
+        stats_name: list[str | Callable[[NDArrayNum], np.floating[Any]]] | None = None,
         band: int = 1,
     ) -> dict[str, np.floating[Any]]: ...
 
     def get_stats(
         self,
         stats_name: (
-            Literal[
-                "mean",
-                "median",
-                "max",
-                "min",
-                "sum",
-                "sum of squares",
-                "90th percentile",
-                "nmad",
-                "rmse",
-                "std",
-                "valid points",
-                "percentage valid points",
-            ]
-            | Callable[[NDArrayNum], np.floating[Any]]
-            | list[
-                Literal[
-                    "mean",
-                    "median",
-                    "max",
-                    "min",
-                    "sum",
-                    "sum of squares",
-                    "90th percentile",
-                    "nmad",
-                    "rmse",
-                    "std",
-                    "valid points",
-                    "percentage valid points",
-                ]
-                | Callable[[NDArrayNum], np.floating[Any]]
-            ]
-            | None
+            str | Callable[[NDArrayNum], np.floating[Any]] | list[str | Callable[[NDArrayNum], np.floating[Any]]] | None
         ) = None,
         band: int = 1,
     ) -> np.floating[Any] | dict[str, np.floating[Any]]:
@@ -2055,8 +1985,11 @@ def get_stats(
             "rmse": "RMSE",
             "rms": "RMSE",
             "std": "Standard deviation",
+            "standarddeviation": "Standard deviation",
             "validpoints": "Valid points",
-            "percentagevalidpoints": "Percentage valid point",
+            "percentagevalidpoints": "Percentage valid points",
+            "validpointsalldata": "Valid points all data",
+            "percentagevalidpointsalldata": "Percentage valid points all data",
         }
         if isinstance(stats_name, list):
             result = {}
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index b3c70208..d4c7a50c 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1985,8 +1985,6 @@ def test_split_bands(self) -> None:
     def test_stats(self, example: str, caplog) -> None:
         raster = gu.Raster(example)
 
-        # Full stats
-        stats = raster.get_stats()
         expected_stats = [
             "Mean",
             "Median",
@@ -2000,17 +1998,22 @@ def test_stats(self, example: str, caplog) -> None:
             "Standard deviation",
             "Valid points",
             "Percentage valid points",
-            "Valid points no mask",
-            "Percentage valid points no mask",
+            "Valid points all data",
+            "Percentage valid points all data",
         ]
+
+        # Full stats
+        stats = raster.get_stats()
         for name in expected_stats:
             assert name in stats
             assert stats.get(name) is not None
 
         # Single stat
-        stat = raster.get_stats(stats_name="mean")
-        assert isinstance(stat, np.floating)
+        for name in expected_stats:
+            stat = raster.get_stats(stats_name=name)
+            assert np.isfinite(stat)
 
+        # Callable
         def percentile_95(data: NDArrayNum) -> np.floating[Any]:
             if isinstance(data, np.ma.MaskedArray):
                 data = data.compressed()

From 122fde01d8aa49e18a8599f10b2e33e539b5254b Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Tue, 18 Feb 2025 11:48:23 +0100
Subject: [PATCH 08/14] fix: modify the way to handle masks in stats

---
 geoutils/raster/raster.py        | 46 ++++++++++++++++----------------
 geoutils/stats.py                |  6 ++---
 tests/test_raster/test_raster.py |  6 ++---
 3 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index 5e72ced7..6ae72add 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1897,8 +1897,8 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
         :param band: The index of the band for which to compute statistics. Default is 1.
 
         :returns: A dictionary containing the calculated statistics for the selected band, including mean, median, max,
-        min, sum, sum of squares, 90th percentile, NMAD, RMSE, standard deviation, valid points, percentage
-        valid points, valid points all data, percentage valid points all data.
+        min, sum, sum of squares, 90th percentile, NMAD, RMSE, standard deviation, valid count, total count,
+        percentage valid points, size.
         """
 
         if self.count == 1:
@@ -1907,24 +1907,24 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
             data = self.data[band - 1]
 
         # Compute the statistics
-        compressed_data = data.compressed()
-        valid_points = np.count_nonzero(np.isfinite(compressed_data))
-        valid_points_all_data = np.count_nonzero(np.isfinite(data))
+        mdata = np.ma.filled(data.astype(float), np.nan)
+        valid_count = np.count_nonzero(~data.mask)
+        total_count = np.count_nonzero(np.isfinite(data))
         stats_dict = {
-            "Mean": np.nanmean(compressed_data),
-            "Median": np.nanmedian(compressed_data),
-            "Max": np.nanmax(compressed_data),
-            "Min": np.nanmin(compressed_data),
-            "Sum": np.nansum(compressed_data),
-            "Sum of squares": np.nansum(np.square(compressed_data)),
-            "90th percentile": np.nanpercentile(compressed_data, 90),
-            "NMAD": nmad(compressed_data),
-            "RMSE": np.sqrt(np.nanmean(np.square(compressed_data))),
-            "Standard deviation": np.nanstd(compressed_data),
-            "Valid points": np.count_nonzero(np.isfinite(compressed_data)),
-            "Percentage valid points": (valid_points / data.size) * 100,
-            "Valid points all data": valid_points_all_data,
-            "Percentage valid points all data": (valid_points_all_data / data.size) * 100,
+            "Mean": np.ma.mean(data),
+            "Median": np.ma.median(data),
+            "Max": np.ma.max(data),
+            "Min": np.ma.min(data),
+            "Sum": np.ma.sum(data),
+            "Sum of squares": np.ma.sum(np.square(data)),
+            "90th percentile": np.nanpercentile(mdata, 90),
+            "NMAD": nmad(data),
+            "RMSE": np.sqrt(np.ma.mean(np.square(data))),
+            "Standard deviation": np.ma.std(data),
+            "Valid count": valid_count,
+            "Total count": total_count,
+            "Percentage valid points": (valid_count / total_count) * 100,
+            "Size": data.size,
         }
         return stats_dict
 
@@ -1956,7 +1956,7 @@ def get_stats(
         :param stats_name: Name or list of names of the statistics to retrieve. If None, all statistics are returned.
                    Accepted names include:
                    - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "rmse", "std",
-                   "valid points", "percentage valid points".
+                   "valid count", "total count", "percentage valid points", "size".
                    Custom callables can also be provided.
         :param band: The index of the band for which to compute statistics. Default is 1.
 
@@ -1986,10 +1986,10 @@ def get_stats(
             "rms": "RMSE",
             "std": "Standard deviation",
             "standarddeviation": "Standard deviation",
-            "validpoints": "Valid points",
+            "validcount": "Valid count",
+            "totalcount": "Total count",
             "percentagevalidpoints": "Percentage valid points",
-            "validpointsalldata": "Valid points all data",
-            "percentagevalidpointsalldata": "Percentage valid points all data",
+            "size": "Size",
         }
         if isinstance(stats_name, list):
             result = {}
diff --git a/geoutils/stats.py b/geoutils/stats.py
index 9ac61ca0..61e9a94d 100644
--- a/geoutils/stats.py
+++ b/geoutils/stats.py
@@ -38,7 +38,5 @@ def nmad(data: NDArrayNum, nfact: float = 1.4826) -> np.floating[Any]:
     :returns nmad: (normalized) median absolute deviation of data.
     """
     if isinstance(data, np.ma.masked_array):
-        data_arr = data.compressed()
-    else:
-        data_arr = np.asarray(data)
-    return nfact * np.nanmedian(np.abs(data_arr - np.nanmedian(data_arr)))
+        return nfact * np.ma.median(np.abs(data - np.ma.median(data)))
+    return nfact * np.nanmedian(np.abs(data - np.nanmedian(data)))
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index d4c7a50c..e4d09aa0 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1996,10 +1996,10 @@ def test_stats(self, example: str, caplog) -> None:
             "NMAD",
             "RMSE",
             "Standard deviation",
-            "Valid points",
+            "Valid count",
+            "Total count",
             "Percentage valid points",
-            "Valid points all data",
-            "Percentage valid points all data",
+            "Size",
         ]
 
         # Full stats

From 409ef4da9003af1a6edbaba00cf5af1f524cb6a7 Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Tue, 18 Feb 2025 11:49:14 +0100
Subject: [PATCH 09/14] docs: complete doc about raster.get_stats

---
 doc/source/api.md          |  9 +++++++++
 doc/source/raster_class.md | 21 +++++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/doc/source/api.md b/doc/source/api.md
index 10eb044c..face8593 100644
--- a/doc/source/api.md
+++ b/doc/source/api.md
@@ -107,6 +107,15 @@ documentation.
     Raster.plot
 ```
 
+### Get statistics
+
+```{eval-rst}
+.. autosummary::
+    :toctree: gen_modules/
+
+    Raster.get_stats
+```
+
 ### Get or update data methods
 
 ```{eval-rst}
diff --git a/doc/source/raster_class.md b/doc/source/raster_class.md
index 59feeb38..b5feaaff 100644
--- a/doc/source/raster_class.md
+++ b/doc/source/raster_class.md
@@ -360,10 +360,23 @@ rast_reproj.to_xarray()
 ```
 
 ## Obtain Statistics
-The `get_stats()` method allows to extract key statistical information from a raster in a dictionary.
-Supported statistics are : mean, median, max, mean, sum, sum of squares, 90th percentile, nmad, rmse, std, valid points,
-percentage valid points, valid points all data, percentage valid points all data.
-The RMSE is only relevant when the raster represents a difference of two objects.
+The {func}`~geoutils.Raster.get_stats` method allows to extract key statistical information from a raster in a dictionary.
+Supported statistics are :
+- **Mean:** arithmetic mean of the data, ignoring masked values.
+- **Median:** middle value when the valid data points are sorted in increasing order, ignoring masked values.
+- **Max:** maximum value among the data, ignoring masked values.
+- **Min:** minimum value among the data, ignoring masked values.
+- **Sum:** sum of all data, ignoring masked values.
+- **Sum of squares:** sum of the squares of all data, ignoring masked values.
+- **90th percentile:** point below which 90% of the data falls, ignoring masked values.
+- **NMAD (Normalized Median Absolute Deviation):** robust measure of variability in the data, less sensitive to outliers compared to standard deviation. Ignore masked values.
+- **RMSE (Root Mean Square Error):** commonly used to express the magnitude of errors or variability and can give insight into the spread of the data. Only relevant when the raster represents a difference of two objects. Ignore masked values.
+- **Std (Standard deviation):** Measures the spread or dispersion of the data around the mean, ignoring masked values.
+- **Valid count:** The number of valid (unmasked) data points in the array. It counts the non-masked elements.
+- **Total count:** The total number of finite data points in the array, including masked elements.
+- **Percentage valid points:** The percentage of unmasked data points out of the total number of finite points. Useful for classification statistics.
+- **Size:** Total size of the raster.
+
 Callable functions are supported as well.
 
 ### Usage Examples:

From b401b541f929b20accedd5a7ba58efba9ff88f21 Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Fri, 21 Feb 2025 10:53:50 +0100
Subject: [PATCH 10/14] feat: add LE90 in stats

---
 doc/source/raster_class.md |  1 +
 geoutils/raster/raster.py  | 10 ++++++----
 geoutils/stats.py          | 26 ++++++++++++++++++++++++++
 tests/test_stats.py        | 36 +++++++++++++++++++++++++++++++++++-
 4 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/doc/source/raster_class.md b/doc/source/raster_class.md
index b5feaaff..b55f2110 100644
--- a/doc/source/raster_class.md
+++ b/doc/source/raster_class.md
@@ -369,6 +369,7 @@ Supported statistics are :
 - **Sum:** sum of all data, ignoring masked values.
 - **Sum of squares:** sum of the squares of all data, ignoring masked values.
 - **90th percentile:** point below which 90% of the data falls, ignoring masked values.
+- **LE90 (Linear Error with 90% confidence):** Difference between the 95th and 5th percentiles of a dataset, representing the range within which 90% of the data points lie. Ignore masked values.
 - **NMAD (Normalized Median Absolute Deviation):** robust measure of variability in the data, less sensitive to outliers compared to standard deviation. Ignore masked values.
 - **RMSE (Root Mean Square Error):** commonly used to express the magnitude of errors or variability and can give insight into the spread of the data. Only relevant when the raster represents a difference of two objects. Ignore masked values.
 - **Std (Standard deviation):** Measures the spread or dispersion of the data around the mean, ignoring masked values.
diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index 6ae72add..eff18409 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -89,7 +89,7 @@
     decode_sensor_metadata,
     parse_and_convert_metadata_from_filename,
 )
-from geoutils.stats import nmad
+from geoutils.stats import linear_error, nmad
 from geoutils.vector.vector import Vector
 
 # If python38 or above, Literal is builtin. Otherwise, use typing_extensions
@@ -1897,7 +1897,7 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
         :param band: The index of the band for which to compute statistics. Default is 1.
 
         :returns: A dictionary containing the calculated statistics for the selected band, including mean, median, max,
-        min, sum, sum of squares, 90th percentile, NMAD, RMSE, standard deviation, valid count, total count,
+        min, sum, sum of squares, 90th percentile, LE90, NMAD, RMSE, standard deviation, valid count, total count,
         percentage valid points, size.
         """
 
@@ -1918,6 +1918,7 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
             "Sum": np.ma.sum(data),
             "Sum of squares": np.ma.sum(np.square(data)),
             "90th percentile": np.nanpercentile(mdata, 90),
+            "LE90": linear_error(mdata, interval=90),
             "NMAD": nmad(data),
             "RMSE": np.sqrt(np.ma.mean(np.square(data))),
             "Standard deviation": np.ma.std(data),
@@ -1955,8 +1956,8 @@ def get_stats(
 
         :param stats_name: Name or list of names of the statistics to retrieve. If None, all statistics are returned.
                    Accepted names include:
-                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "nmad", "rmse", "std",
-                   "valid count", "total count", "percentage valid points", "size".
+                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "LE90", "nmad", "rmse",
+                    "std", "valid count", "total count", "percentage valid points", "size".
                    Custom callables can also be provided.
         :param band: The index of the band for which to compute statistics. Default is 1.
 
@@ -1981,6 +1982,7 @@ def get_stats(
             "sum2": "Sum of squares",
             "90thpercentile": "90th percentile",
             "90percentile": "90th percentile",
+            "le90": "LE90",
             "nmad": "NMAD",
             "rmse": "RMSE",
             "rms": "RMSE",
diff --git a/geoutils/stats.py b/geoutils/stats.py
index 61e9a94d..2d19d9b0 100644
--- a/geoutils/stats.py
+++ b/geoutils/stats.py
@@ -40,3 +40,29 @@ def nmad(data: NDArrayNum, nfact: float = 1.4826) -> np.floating[Any]:
     if isinstance(data, np.ma.masked_array):
         return nfact * np.ma.median(np.abs(data - np.ma.median(data)))
     return nfact * np.nanmedian(np.abs(data - np.nanmedian(data)))
+
+
+def linear_error(data: NDArrayNum, interval: float = 90) -> np.floating[Any]:
+    """
+    Compute the linear error (LE) for a given dataset, representing the range of differences between the upper and
+    lower percentiles of the data. By default, this calculates the 90% confidence interval (LE90).
+
+    :param data: A numpy array or masked array of data, typically representing the differences (errors) in elevation or
+    another quantity.
+    :param interval: The confidence interval to compute, specified as a percentage. For example, an interval of 90 will
+    compute the range between the 5th and 95th percentiles (LE90). This value must be between 0 and 100.
+
+    return: The computed linear error, which is the difference between the upper and lower percentiles.
+
+    raises: ValueError if the `interval` is not between 0 and 100.
+    """
+    # Validate the interval
+    if not (0 < interval <= 100):
+        raise ValueError("Interval must be between 0 and 100")
+
+    if isinstance(data, np.ma.masked_array):
+        mdata = np.ma.filled(data.astype(float), np.nan)
+    else:
+        mdata = data
+    le = np.nanpercentile(mdata, 50 + interval / 2) - np.nanpercentile(mdata, 50 - interval / 2)
+    return le
diff --git a/tests/test_stats.py b/tests/test_stats.py
index dbfb5118..5eaa7016 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -2,10 +2,11 @@
 Test functions for stats
 """
 
+import numpy as np
 import scipy
 
 from geoutils import Raster, examples
-from geoutils.stats import nmad
+from geoutils.stats import linear_error, nmad
 
 
 class TestStats:
@@ -28,3 +29,36 @@ def test_nmad(self) -> None:
         nmad_2 = nmad(self.landsat_raster.data, nfact=2)
 
         assert nmad_1 * 2 == nmad_2
+
+    def test_linear_error(self) -> None:
+        """Test linear error (LE) functionality runs on any type of input"""
+
+        # Compute LE on the landsat raster data for a default interval (LE90)
+        le_ma = linear_error(self.landsat_raster.data)
+        le_nan_array = linear_error(self.landsat_raster.get_nanarray())
+
+        # Assert the LE90 is computed the same for masked array and NaN array
+        assert le_ma == le_nan_array
+
+        # Check that the function works for different intervals
+        le90 = linear_error(self.landsat_raster.data, interval=90)
+        le50 = linear_error(self.landsat_raster.data, interval=50)
+
+        # Verify that LE50 (interquartile range) is smaller than LE90
+        assert le50 < le90
+
+        # Test a known dataset
+        test_data = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+        le90_test = linear_error(test_data, interval=90)
+        le50_test = linear_error(test_data, interval=50)
+
+        assert le90_test == 9
+        assert le50_test == 5
+
+        # Test masked arrays with invalid data (should ignore NaNs/masked values)
+        masked_data = np.ma.masked_array(test_data, mask=[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+        le90_masked = linear_error(masked_data, interval=90)
+        le50_masked = linear_error(masked_data, interval=50)
+
+        assert le90_masked == 4.5
+        assert le50_masked == 2.5

From 1041f540d643061f71d614a232e0e4526fd64004 Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Mon, 24 Feb 2025 14:13:13 +0100
Subject: [PATCH 11/14] docs: update

---
 doc/source/raster_class.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/raster_class.md b/doc/source/raster_class.md
index b55f2110..61cdd366 100644
--- a/doc/source/raster_class.md
+++ b/doc/source/raster_class.md
@@ -260,14 +260,14 @@ The {func}`~geoutils.Raster.icrop` function accepts only a bounding box in pixel
 By default, {func}`~geoutils.Raster.crop` and {func}`~geoutils.Raster.icrop` return a new Raster unless the inplace parameter is set to True, in which case the cropping operation is performed directly on the original raster object.
 For more details, see the {ref}`specific section and function descriptions in the API<api-geo-handle>`.
 
-### Example for `crop`
+### Example for {func}`~geoutils.Raster.crop`
 ```{code-cell} ipython3
 # Crop raster to smaller bounds
 rast_crop = rast.crop(bbox=(0.3, 0.3, 1, 1))
 print(rast_crop.bounds)
 ```
 
-### Example for `icrop`
+### Example for {func}`~geoutils.Raster.icrop`
 ```{code-cell} ipython3
 # Crop raster using pixel coordinates
 rast_icrop = rast.icrop(bbox=(2, 2, 6, 6))

From 8192338b164e35f5e46c073423847a7f4f98ba7f Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Mon, 24 Feb 2025 15:03:45 +0100
Subject: [PATCH 12/14] feat: add mask parameter in get_stats()

---
 geoutils/raster/raster.py        | 23 ++++++++++++++++++-----
 tests/test_raster/test_raster.py |  6 ++++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index eff18409..410331a3 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1890,7 +1890,7 @@ def set_mask(self, mask: NDArrayBool | Mask) -> None:
         else:
             self.data[mask_arr > 0] = np.ma.masked
 
-    def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
+    def _statistics(self, band: int = 1, total_count: int | None = None) -> dict[str, np.floating[Any]]:
         """
         Calculate common statistics for a specified band in the raster.
 
@@ -1908,8 +1908,9 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
 
         # Compute the statistics
         mdata = np.ma.filled(data.astype(float), np.nan)
-        valid_count = np.count_nonzero(~data.mask)
-        total_count = np.count_nonzero(np.isfinite(data))
+        valid_count = np.count_nonzero(~self.get_mask())
+        if total_count is None:
+            total_count = valid_count
         stats_dict = {
             "Mean": np.ma.mean(data),
             "Median": np.ma.median(data),
@@ -1933,14 +1934,18 @@ def _statistics(self, band: int = 1) -> dict[str, np.floating[Any]]:
     def get_stats(
         self,
         stats_name: str | Callable[[NDArrayNum], np.floating[Any]],
+        inlier_mask: Mask | NDArrayBool | None = None,
         band: int = 1,
+        total_count: int | None = None,
     ) -> np.floating[Any]: ...
 
     @overload
     def get_stats(
         self,
         stats_name: list[str | Callable[[NDArrayNum], np.floating[Any]]] | None = None,
+        inlier_mask: Mask | NDArrayBool | None = None,
         band: int = 1,
+        total_count: int | None = None,
     ) -> dict[str, np.floating[Any]]: ...
 
     def get_stats(
@@ -1948,7 +1953,9 @@ def get_stats(
         stats_name: (
             str | Callable[[NDArrayNum], np.floating[Any]] | list[str | Callable[[NDArrayNum], np.floating[Any]]] | None
         ) = None,
+        inlier_mask: Mask | NDArrayBool | None = None,
         band: int = 1,
+        total_count: int | None = None,
     ) -> np.floating[Any] | dict[str, np.floating[Any]]:
         """
         Retrieve specified statistics or all available statistics for the raster data. Allows passing custom callables
@@ -1959,13 +1966,19 @@ def get_stats(
                    - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "LE90", "nmad", "rmse",
                     "std", "valid count", "total count", "percentage valid points", "size".
                    Custom callables can also be provided.
+        :param inlier_mask: A boolean mask to filter values for statistical calculations.
         :param band: The index of the band for which to compute statistics. Default is 1.
-
+        :param total_count: The total number of finite data points in the array.
         :returns: The requested statistic or a dictionary of statistics if multiple or all are requested.
         """
         if not self.is_loaded:
             self.load()
-        stats_dict = self._statistics(band=band)
+        if inlier_mask is not None:
+            total_count = np.count_nonzero(~self.get_mask())
+            dem_masked = self.copy()
+            dem_masked.set_mask(inlier_mask)
+            return dem_masked.get_stats(stats_name=stats_name, band=band, total_count=total_count)
+        stats_dict = self._statistics(band=band, total_count=total_count)
         if stats_name is None:
             return stats_dict
 
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index e4d09aa0..dda0c3e3 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1993,6 +1993,7 @@ def test_stats(self, example: str, caplog) -> None:
             "Sum",
             "Sum of squares",
             "90th percentile",
+            "LE90",
             "NMAD",
             "RMSE",
             "Standard deviation",
@@ -2008,6 +2009,11 @@ def test_stats(self, example: str, caplog) -> None:
             assert name in stats
             assert stats.get(name) is not None
 
+        # With mask
+        inlier_mask = raster.get_mask()
+        stats_masked = raster.get_stats(inlier_mask=inlier_mask)
+        assert stats_masked == stats
+
         # Single stat
         for name in expected_stats:
             stat = raster.get_stats(stats_name=name)

From 8f78710020583c507a9ade39d6e63f11c4d1ebc6 Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Tue, 25 Feb 2025 17:36:44 +0100
Subject: [PATCH 13/14] fix: disable total_count and percentage valid points in
 get_stats() if no inlier mask is passed

---
 doc/source/raster_class.md       | 15 +++++++++++--
 geoutils/raster/raster.py        | 36 ++++++++++++++++++++------------
 tests/test_raster/test_raster.py |  6 ++++--
 3 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/doc/source/raster_class.md b/doc/source/raster_class.md
index 61cdd366..f0227be6 100644
--- a/doc/source/raster_class.md
+++ b/doc/source/raster_class.md
@@ -374,10 +374,13 @@ Supported statistics are :
 - **RMSE (Root Mean Square Error):** commonly used to express the magnitude of errors or variability and can give insight into the spread of the data. Only relevant when the raster represents a difference of two objects. Ignore masked values.
 - **Std (Standard deviation):** Measures the spread or dispersion of the data around the mean, ignoring masked values.
 - **Valid count:** The number of valid (unmasked) data points in the array. It counts the non-masked elements.
-- **Total count:** The total number of finite data points in the array, including masked elements.
-- **Percentage valid points:** The percentage of unmasked data points out of the total number of finite points. Useful for classification statistics.
 - **Size:** Total size of the raster.
 
+If an inlier mask is passed:
+- **Total count:** The number of unmasked data points in the array before applying the inlier mask.
+- **Percentage valid points:** The ration between `valid_count` and `total_count` Useful for classification statistics.
+
+
 Callable functions are supported as well.
 
 ### Usage Examples:
@@ -402,3 +405,11 @@ def custom_stat(data):
     return np.nansum(data > 100)  # Count the number of pixels above 100
 rast.get_stats(custom_stat)
 ```
+
+- Passing an inlier mask:
+```{code-cell} ipython3
+inlier_mask = np.array([[False, False , True],
+                        [False, True , False],
+                        [True, False , True]])
+rast.get_stats(inlier_mask=inlier_mask)
+```
diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index 410331a3..9fc67363 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1896,9 +1896,7 @@ def _statistics(self, band: int = 1, total_count: int | None = None) -> dict[str
 
         :param band: The index of the band for which to compute statistics. Default is 1.
 
-        :returns: A dictionary containing the calculated statistics for the selected band, including mean, median, max,
-        min, sum, sum of squares, 90th percentile, LE90, NMAD, RMSE, standard deviation, valid count, total count,
-        percentage valid points, size.
+        :returns: A dictionary containing the calculated statistics for the selected band.
         """
 
         if self.count == 1:
@@ -1909,8 +1907,6 @@ def _statistics(self, band: int = 1, total_count: int | None = None) -> dict[str
         # Compute the statistics
         mdata = np.ma.filled(data.astype(float), np.nan)
         valid_count = np.count_nonzero(~self.get_mask())
-        if total_count is None:
-            total_count = valid_count
         stats_dict = {
             "Mean": np.ma.mean(data),
             "Median": np.ma.median(data),
@@ -1924,10 +1920,17 @@ def _statistics(self, band: int = 1, total_count: int | None = None) -> dict[str
             "RMSE": np.sqrt(np.ma.mean(np.square(data))),
             "Standard deviation": np.ma.std(data),
             "Valid count": valid_count,
-            "Total count": total_count,
-            "Percentage valid points": (valid_count / total_count) * 100,
             "Size": data.size,
         }
+
+        if total_count is not None:
+            stats_dict.update(
+                {
+                    "Total count": total_count,
+                    "Percentage valid points": (valid_count / total_count) * 100,
+                }
+            )
+
         return stats_dict
 
     @overload
@@ -1962,10 +1965,11 @@ def get_stats(
         to calculate custom stats.
 
         :param stats_name: Name or list of names of the statistics to retrieve. If None, all statistics are returned.
-                   Accepted names include:
-                   - "mean", "median", "max", "min", "sum", "sum of squares", "90th percentile", "LE90", "nmad", "rmse",
-                    "std", "valid count", "total count", "percentage valid points", "size".
-                   Custom callables can also be provided.
+            Accepted names include:
+            `mean`, `median`, `max`, `min`, `sum`, `sum of squares`, `90th percentile`, `LE90`, `nmad`, `rmse`,
+            `std`, `valid count`, `size`, and if an inlier mask is passed : `total count`,
+            `percentage valid points`.
+            Custom callables can also be provided.
         :param inlier_mask: A boolean mask to filter values for statistical calculations.
         :param band: The index of the band for which to compute statistics. Default is 1.
         :param total_count: The total number of finite data points in the array.
@@ -2002,10 +2006,16 @@ def get_stats(
             "std": "Standard deviation",
             "standarddeviation": "Standard deviation",
             "validcount": "Valid count",
-            "totalcount": "Total count",
-            "percentagevalidpoints": "Percentage valid points",
             "size": "Size",
         }
+        if total_count is not None:
+            stats_aliases.update(
+                {
+                    "totalcount": "Total count",
+                    "percentagevalidpoints": "Percentage valid points",
+                }
+            )
+
         if isinstance(stats_name, list):
             result = {}
             for name in stats_name:
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index dda0c3e3..963f6e9b 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1998,8 +1998,6 @@ def test_stats(self, example: str, caplog) -> None:
             "RMSE",
             "Standard deviation",
             "Valid count",
-            "Total count",
-            "Percentage valid points",
             "Size",
         ]
 
@@ -2012,6 +2010,10 @@ def test_stats(self, example: str, caplog) -> None:
         # With mask
         inlier_mask = raster.get_mask()
         stats_masked = raster.get_stats(inlier_mask=inlier_mask)
+        for name in ["Total count", "Percentage valid points"]:
+            assert name in stats_masked
+            assert stats_masked.get(name) is not None
+            stats_masked.pop(name)
         assert stats_masked == stats
 
         # Single stat

From 8aebf0bfb01df3b21c11b304213d7b6eed815c0e Mon Sep 17 00:00:00 2001
From: vschaffn <valentin.schaffner@cs-soprasteria.com>
Date: Wed, 26 Feb 2025 14:16:23 +0100
Subject: [PATCH 14/14] fix: refactor stats

---
 doc/source/raster_class.md       | 14 +++++----
 geoutils/raster/raster.py        | 50 ++++++++++++++++++++------------
 tests/test_raster/test_raster.py | 10 +++++--
 3 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/doc/source/raster_class.md b/doc/source/raster_class.md
index f0227be6..ef0d0e28 100644
--- a/doc/source/raster_class.md
+++ b/doc/source/raster_class.md
@@ -372,13 +372,17 @@ Supported statistics are :
 - **LE90 (Linear Error with 90% confidence):** Difference between the 95th and 5th percentiles of a dataset, representing the range within which 90% of the data points lie. Ignore masked values.
 - **NMAD (Normalized Median Absolute Deviation):** robust measure of variability in the data, less sensitive to outliers compared to standard deviation. Ignore masked values.
 - **RMSE (Root Mean Square Error):** commonly used to express the magnitude of errors or variability and can give insight into the spread of the data. Only relevant when the raster represents a difference of two objects. Ignore masked values.
-- **Std (Standard deviation):** Measures the spread or dispersion of the data around the mean, ignoring masked values.
-- **Valid count:** The number of valid (unmasked) data points in the array. It counts the non-masked elements.
-- **Size:** Total size of the raster.
+- **Std (Standard deviation):** measures the spread or dispersion of the data around the mean, ignoring masked values.
+- **Valid count:** number of finite data points in the array. It counts the non-masked elements.
+- **Total count:** total size of the raster.
+- **Percentage valid points:** ratio between **Valid count** and **Total count**.
+
 
 If an inlier mask is passed:
-- **Total count:** The number of unmasked data points in the array before applying the inlier mask.
-- **Percentage valid points:** The ration between `valid_count` and `total_count` Useful for classification statistics.
+- **Total inlier count:** number of data points in the inlier mask.
+- **Valid inlier count:** number of unmasked data points in the array after applying the inlier mask.
+- **Percentage inlier points:** ratio between **Valid inlier count** and **Valid count**. Useful for classification statistics.
+- **Percentage valid inlier points:** ratio between **Valid inlier count** and **Total inlier count**.
 
 
 Callable functions are supported as well.
diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py
index 9fc67363..2746d673 100644
--- a/geoutils/raster/raster.py
+++ b/geoutils/raster/raster.py
@@ -1890,11 +1890,12 @@ def set_mask(self, mask: NDArrayBool | Mask) -> None:
         else:
             self.data[mask_arr > 0] = np.ma.masked
 
-    def _statistics(self, band: int = 1, total_count: int | None = None) -> dict[str, np.floating[Any]]:
+    def _statistics(self, band: int = 1, counts: tuple[int, int] | None = None) -> dict[str, np.floating[Any]]:
         """
         Calculate common statistics for a specified band in the raster.
 
         :param band: The index of the band for which to compute statistics. Default is 1.
+        :param counts: (number of finite data points in the array, number of valid points in inlier_mask).
 
         :returns: A dictionary containing the calculated statistics for the selected band.
         """
@@ -1906,7 +1907,7 @@ def _statistics(self, band: int = 1, total_count: int | None = None) -> dict[str
 
         # Compute the statistics
         mdata = np.ma.filled(data.astype(float), np.nan)
-        valid_count = np.count_nonzero(~self.get_mask())
+        valid_count = np.count_nonzero(~self.get_mask()) if counts is None else counts[0]
         stats_dict = {
             "Mean": np.ma.mean(data),
             "Median": np.ma.median(data),
@@ -1920,14 +1921,18 @@ def _statistics(self, band: int = 1, total_count: int | None = None) -> dict[str
             "RMSE": np.sqrt(np.ma.mean(np.square(data))),
             "Standard deviation": np.ma.std(data),
             "Valid count": valid_count,
-            "Size": data.size,
+            "Total count": data.size,
+            "Percentage valid points": (valid_count / data.size) * 100,
         }
 
-        if total_count is not None:
+        if counts is not None:
+            valid_inlier_count = np.count_nonzero(~self.get_mask())
             stats_dict.update(
                 {
-                    "Total count": total_count,
-                    "Percentage valid points": (valid_count / total_count) * 100,
+                    "Valid inlier count": valid_inlier_count,
+                    "Total inlier count": counts[1],
+                    "Percentage inlier points": (valid_inlier_count / counts[0]) * 100,
+                    "Percentage valid inlier points": (valid_inlier_count / counts[1]) * 100,
                 }
             )
 
@@ -1939,7 +1944,7 @@ def get_stats(
         stats_name: str | Callable[[NDArrayNum], np.floating[Any]],
         inlier_mask: Mask | NDArrayBool | None = None,
         band: int = 1,
-        total_count: int | None = None,
+        counts: tuple[int, int] | None = None,
     ) -> np.floating[Any]: ...
 
     @overload
@@ -1948,7 +1953,7 @@ def get_stats(
         stats_name: list[str | Callable[[NDArrayNum], np.floating[Any]]] | None = None,
         inlier_mask: Mask | NDArrayBool | None = None,
         band: int = 1,
-        total_count: int | None = None,
+        counts: tuple[int, int] | None = None,
     ) -> dict[str, np.floating[Any]]: ...
 
     def get_stats(
@@ -1958,7 +1963,7 @@ def get_stats(
         ) = None,
         inlier_mask: Mask | NDArrayBool | None = None,
         band: int = 1,
-        total_count: int | None = None,
+        counts: tuple[int, int] | None = None,
     ) -> np.floating[Any] | dict[str, np.floating[Any]]:
         """
         Retrieve specified statistics or all available statistics for the raster data. Allows passing custom callables
@@ -1967,22 +1972,26 @@ def get_stats(
         :param stats_name: Name or list of names of the statistics to retrieve. If None, all statistics are returned.
             Accepted names include:
             `mean`, `median`, `max`, `min`, `sum`, `sum of squares`, `90th percentile`, `LE90`, `nmad`, `rmse`,
-            `std`, `valid count`, `size`, and if an inlier mask is passed : `total count`,
-            `percentage valid points`.
+            `std`, `valid count`, `total count`, `percentage valid points` and if an inlier mask is passed :
+            `valid inlier count`, `total inlier count`, `percentage inlier point`, `percentage valid inlier points`.
             Custom callables can also be provided.
         :param inlier_mask: A boolean mask to filter values for statistical calculations.
         :param band: The index of the band for which to compute statistics. Default is 1.
-        :param total_count: The total number of finite data points in the array.
+        :param counts: (number of finite data points in the array, number of valid points in inlier_mask). DO NOT USE.
         :returns: The requested statistic or a dictionary of statistics if multiple or all are requested.
         """
         if not self.is_loaded:
             self.load()
         if inlier_mask is not None:
-            total_count = np.count_nonzero(~self.get_mask())
+            valid_points = np.count_nonzero(~self.get_mask())
+            if isinstance(inlier_mask, Mask):
+                inlier_points = np.count_nonzero(~inlier_mask.data)
+            else:
+                inlier_points = np.count_nonzero(~inlier_mask)
             dem_masked = self.copy()
             dem_masked.set_mask(inlier_mask)
-            return dem_masked.get_stats(stats_name=stats_name, band=band, total_count=total_count)
-        stats_dict = self._statistics(band=band, total_count=total_count)
+            return dem_masked.get_stats(stats_name=stats_name, band=band, counts=(valid_points, inlier_points))
+        stats_dict = self._statistics(band=band, counts=counts)
         if stats_name is None:
             return stats_dict
 
@@ -2006,13 +2015,16 @@ def get_stats(
             "std": "Standard deviation",
             "standarddeviation": "Standard deviation",
             "validcount": "Valid count",
-            "size": "Size",
+            "totalcount": "Total count",
+            "percentagevalidpoints": "Percentage valid points",
         }
-        if total_count is not None:
+        if counts is not None:
             stats_aliases.update(
                 {
-                    "totalcount": "Total count",
-                    "percentagevalidpoints": "Percentage valid points",
+                    "validinliercount": "Valid inlier count",
+                    "totalinliercount": "Total inlier count",
+                    "percentagevalidinlierpoints": "Percentage valid inlier points",
+                    "percentageinlierpoints": "Percentage inlier points",
                 }
             )
 
diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py
index 963f6e9b..84d148f6 100644
--- a/tests/test_raster/test_raster.py
+++ b/tests/test_raster/test_raster.py
@@ -1998,7 +1998,8 @@ def test_stats(self, example: str, caplog) -> None:
             "RMSE",
             "Standard deviation",
             "Valid count",
-            "Size",
+            "Total count",
+            "Percentage valid points",
         ]
 
         # Full stats
@@ -2010,7 +2011,12 @@ def test_stats(self, example: str, caplog) -> None:
         # With mask
         inlier_mask = raster.get_mask()
         stats_masked = raster.get_stats(inlier_mask=inlier_mask)
-        for name in ["Total count", "Percentage valid points"]:
+        for name in [
+            "Valid inlier count",
+            "Total inlier count",
+            "Percentage inlier points",
+            "Percentage valid inlier points",
+        ]:
             assert name in stats_masked
             assert stats_masked.get(name) is not None
             stats_masked.pop(name)