diff --git a/.zenodo.json b/.zenodo.json index 73494ea9..459d6495 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -34,6 +34,9 @@ "name": "Rondeau-Genesse, Gabriel", "affiliation": "Ouranos", "orcid": "0000-0003-3389-9406" + }, + { + "name": "Guthrie, Martin" } ], "keywords": [ diff --git a/AUTHORS.rst b/AUTHORS.rst index b88c2726..13d89566 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -21,3 +21,4 @@ Contributors * Louise Arnal `@lou-a `_ * Pascal Bourgault `@aulemahal `_ * Gabriel Rondeau-Genesse `@RondeauG `_ +* Martin Guthrie `@martinguthrie93 `_ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index afe1b236..9d44d644 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,7 +5,7 @@ Changelog `Unreleased `_ (latest) ------------------------------------------------------------- -Contributors: Trevor James Smith (:user:`Zeitsperre`). +Contributors: Trevor James Smith (:user:`Zeitsperre`), Martin Guthrie (:user:`martinguthrie93`). Internal changes ^^^^^^^^^^^^^^^^ @@ -18,6 +18,8 @@ Internal changes Fixes ^^^^^ * `tox-gh` now fully supports Python3.14. (PR #600) +* `find_geometry_from_coord` now reprojects the query point into the CRS of the target ``GeoDataFrame`` before the point-in-polygon test, and accepts an optional `point_crs` argument. This prevents silent lookup failures (or wrong matches) when the routing product is stored in a projected CRS. (issue #614, PR #615) +* `geom_prop` now accepts an optional `crs` argument and emits a warning when the geometry lies in a geographic CRS, since ``area``, ``perimeter`` and ``gravelius`` are only physically meaningful in an equal-area projection. The previous guard was inverted and stayed silent for the risky (decimal-degree) case. (issue #614, PR #615) .. _changes_0.21.0: diff --git a/src/ravenpy/utilities/analysis.py b/src/ravenpy/utilities/analysis.py index 5eee1447..86182958 100644 --- a/src/ravenpy/utilities/analysis.py +++ b/src/ravenpy/utilities/analysis.py @@ -29,7 +29,10 @@ LOGGER = logging.getLogger("RavenPy") -def geom_prop(geom: Union[Polygon, MultiPolygon, GeometryCollection]) -> dict: +def geom_prop( + geom: Union[Polygon, MultiPolygon, GeometryCollection], + crs: Optional[Union[str, int]] = None, +) -> dict: """ Return a dictionary of geometry properties. @@ -37,6 +40,10 @@ def geom_prop(geom: Union[Polygon, MultiPolygon, GeometryCollection]) -> dict: ---------- geom : Polygon or MultiPolygon or GeometryCollection Geometry to analyze. + crs : str or int, optional + Coordinate reference system of `geom`. When provided, it is used to determine + whether the geometry lies in a geographic (angular) CRS. When omitted, a + heuristic based on the centroid coordinates is used instead. Returns ------- @@ -45,12 +52,32 @@ def geom_prop(geom: Union[Polygon, MultiPolygon, GeometryCollection]) -> dict: Notes ----- - Some of the properties should be computed using an equal-area projection. + Area, perimeter and the Gravelius shape index are only physically meaningful when + computed from a projected, equal-area CRS. When `geom` is detected to be in a + geographic CRS, these values are returned in angular units (e.g. square degrees) + and a warning is emitted; reproject the geometry to an equal-area CRS beforehand + for reliable results. """ geom = shape(geom) lon, lat = geom.centroid.x, geom.centroid.y - if (lon > 180) or (lon < -180) or (lat > 90) or (lat < -90): - LOGGER.warning("Shape centroid is not in decimal degrees.") + + if crs is not None: + from pyproj import CRS + + is_geographic = CRS.from_user_input(crs).is_geographic + else: + # Heuristic: coordinates that fall within valid longitude/latitude bounds are + # most likely decimal degrees (a geographic CRS). Projected coordinates are + # typically expressed in metres and fall well outside this range. + is_geographic = (-180 <= lon <= 180) and (-90 <= lat <= 90) + + if is_geographic: + LOGGER.warning( + "Geometry appears to be in a geographic CRS; 'area', 'perimeter' and " + "'gravelius' are computed in angular units and are not physically " + "meaningful. Reproject to an equal-area CRS for reliable results." + ) + area = geom.area length = geom.length gravelius = length / 2 / math.sqrt(math.pi * area) diff --git a/src/ravenpy/utilities/geo.py b/src/ravenpy/utilities/geo.py index 8ab2136f..a3b32351 100644 --- a/src/ravenpy/utilities/geo.py +++ b/src/ravenpy/utilities/geo.py @@ -302,24 +302,53 @@ def upstream_ids(bdf, bid): return sub[sub[basin_field].isin(up)] if sub is not None else df[df[basin_field].isin(up)] -def find_geometry_from_coord(lon: float, lat: float, df: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame: +def find_geometry_from_coord( + lon: float, + lat: float, + df: geopandas.GeoDataFrame, + point_crs: Union[str, int, CRS] = WGS84, +) -> geopandas.GeoDataFrame: """ Return the geometry containing the given coordinates. + Parameters + ---------- lon : float - Longitude. + Longitude of the point, expressed in `point_crs`. lat : float - Latitude. + Latitude of the point, expressed in `point_crs`. df : GeoDataFrame Data. + point_crs : str or int or pyproj.CRS + Coordinate reference system of the (`lon`, `lat`) point. Default: 4326 (WGS84). Returns ------- GeoDataFrame Record whose geometry contains the point. + + Notes + ----- + The point is reprojected to the CRS of `df` before the point-in-polygon test. + `GeoDataFrame.contains` performs a purely Cartesian comparison and does not + reconcile mismatched coordinate reference systems, so without this alignment a + point given in geographic coordinates would silently fail to match (or match the + wrong feature) when `df` is stored in a projected CRS. """ p = Point(lon, lat) + # Align the point's CRS with that of the GeoDataFrame before testing containment. + if df.crs is not None: + source = CRS.from_user_input(point_crs) + target = CRS.from_user_input(df.crs) + if not source.equals(target): + p = geom_transform(p, source_crs=source, target_crs=target) + else: + LOGGER.warning( + "The GeoDataFrame has no CRS defined; assuming its coordinates are expressed in the same CRS as the provided point (%s).", + point_crs, + ) + c = df.contains(p) n = c.sum() if n == 0: diff --git a/tests/test_geo_utilities.py b/tests/test_geo_utilities.py index 3445b423..473820b9 100644 --- a/tests/test_geo_utilities.py +++ b/tests/test_geo_utilities.py @@ -322,3 +322,67 @@ def test_feature_contains(self, yangtze): point = -69.0, 45 assert isinstance(self.checks.feature_contains(point, vector), dict) assert isinstance(self.checks.feature_contains(self.sgeo.Point(point), vector), dict) + + +class TestCrsAlignment: + """Regression tests ensuring CRS-aware behavior of geospatial helpers.""" + + geo = pytest.importorskip("ravenpy.utilities.geo") + analysis = pytest.importorskip("ravenpy.utilities.analysis") + gpd = pytest.importorskip("geopandas") + sgeo = pytest.importorskip("shapely.geometry") + + # A gauge location (lon/lat, WGS84) inside the basin box below (Matapedia region). + LON, LAT = -67.12542, 48.10417 + SUB_ID = 175000128 + + def _basin_gdf(self, crs): + """Return a one-feature GeoDataFrame in the requested CRS.""" + poly = self.sgeo.box(-67.30, 48.00, -66.90, 48.20) # lon/lat box + gdf = self.gpd.GeoDataFrame({"SubId": [self.SUB_ID]}, geometry=[poly], crs="EPSG:4326") + return gdf.to_crs(crs) + + def test_find_geometry_from_coord_projected_crs(self): + # With a projected GeoDataFrame (metres), a raw geographic point would not + # match without reprojection. The helper must align CRSs and find the basin. + gdf = self._basin_gdf("EPSG:3348") + found = self.geo.find_geometry_from_coord(self.LON, self.LAT, gdf) + assert found["SubId"].iloc[0] == self.SUB_ID + + def test_find_geometry_from_coord_geographic_crs(self): + # Backward-compatible behavior when the GeoDataFrame is already in WGS84. + gdf = self._basin_gdf("EPSG:4326") + found = self.geo.find_geometry_from_coord(self.LON, self.LAT, gdf) + assert found["SubId"].iloc[0] == self.SUB_ID + + def test_find_geometry_from_coord_custom_point_crs(self): + # The point may be supplied in a projected CRS via `point_crs`. + gdf = self._basin_gdf("EPSG:4326") + pt = self.gpd.GeoSeries([self.sgeo.Point(self.LON, self.LAT)], crs="EPSG:4326").to_crs("EPSG:3348").iloc[0] + found = self.geo.find_geometry_from_coord(pt.x, pt.y, gdf, point_crs="EPSG:3348") + assert found["SubId"].iloc[0] == self.SUB_ID + + def test_find_geometry_from_coord_point_outside(self): + gdf = self._basin_gdf("EPSG:3348") + with pytest.raises(ValueError, match="not in any geometry"): + self.geo.find_geometry_from_coord(0.0, 0.0, gdf) + + def test_geom_prop_geographic_warning(self, caplog): + import logging + + poly = self.sgeo.box(-67.30, 48.00, -66.90, 48.20) + with caplog.at_level(logging.WARNING, logger="RavenPy"): + props = self.analysis.geom_prop(poly, crs="EPSG:4326") + assert any("geographic CRS" in record.message for record in caplog.records) + # Numeric outputs are unchanged (still computed in the geometry's own units). + assert props["area"] == pytest.approx(poly.area) + assert props["perimeter"] == pytest.approx(poly.length) + + def test_geom_prop_projected_no_warning(self, caplog): + import logging + + poly = self.sgeo.box(-67.30, 48.00, -66.90, 48.20) + projected = self.gpd.GeoSeries([poly], crs="EPSG:4326").to_crs("EPSG:3348").iloc[0] + with caplog.at_level(logging.WARNING, logger="RavenPy"): + self.analysis.geom_prop(projected, crs="EPSG:3348") + assert not any("geographic CRS" in record.message for record in caplog.records)