Skip to content

Commit b771e88

Browse files
authored
feat(geoarrow-schema)!: Better distinction of creating GeoArrowType depending on extension metadata (#1275)
### Change list - Creates new `GeoArrowType::from_extension_field` which will only work if the field explicitly has a GeoArrow extension name set. - Creates new `GeoArrowType::from_arrow_field` which takes the old behavior of `TryFrom<&Field>`, allowing inference from string and binary columns. - **Breaking**: Changes `TryFrom<&Field> for GeoArrowType` to require explicit GeoArrow metadata on the field. Closes #1274
1 parent f841654 commit b771e88

File tree

14 files changed

+132
-119
lines changed

14 files changed

+132
-119
lines changed

rust/geoarrow-array/src/array/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ pub fn from_arrow_array(
5050
) -> GeoArrowResult<Arc<dyn GeoArrowArray>> {
5151
use GeoArrowType::*;
5252

53-
let result: Arc<dyn GeoArrowArray> = match GeoArrowType::try_from(field)? {
53+
let result: Arc<dyn GeoArrowArray> = match GeoArrowType::from_arrow_field(field)? {
5454
Point(_) => Arc::new(PointArray::try_from((array, field))?),
5555
LineString(_) => Arc::new(LineStringArray::try_from((array, field))?),
5656
Polygon(_) => Arc::new(PolygonArray::try_from((array, field))?),

rust/geoarrow-array/src/geozero/export/data_source/mod.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -414,13 +414,8 @@ fn process_geometry_n<P: GeomProcessor>(
414414
fn geometry_columns(schema: &Schema) -> Vec<usize> {
415415
let mut geom_indices = vec![];
416416
for (field_idx, field) in schema.fields().iter().enumerate() {
417-
// We first check that an extension type name is set and then check that we can coerce to a
418-
// GeoArrowType so that we don't accept columns that are _compatible_ with geoarrow storage
419-
// but aren't set as geoarrow extension types.
420-
if let Some(_ext_name) = field.extension_type_name() {
421-
if let Ok(_geoarrow_type) = GeoArrowType::try_from(field.as_ref()) {
422-
geom_indices.push(field_idx);
423-
}
417+
if GeoArrowType::from_extension_field(field.as_ref()).is_ok() {
418+
geom_indices.push(field_idx);
424419
}
425420
}
426421
geom_indices

rust/geoarrow-array/src/trait_.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,8 @@ mod test {
630630
for (list_size, dim) in test_cases.into_iter() {
631631
let array = FixedSizeListBuilder::new(Float64Builder::new(), list_size).finish();
632632
let t =
633-
GeoArrowType::try_from(&Field::new("", array.data_type().clone(), true)).unwrap();
633+
GeoArrowType::from_arrow_field(&Field::new("", array.data_type().clone(), true))
634+
.unwrap();
634635
assert_eq!(
635636
t,
636637
GeoArrowType::Point(
@@ -686,7 +687,8 @@ mod test {
686687
for (fields, builders, dim) in test_cases.into_iter() {
687688
let array = StructBuilder::new(fields, builders).finish();
688689
let t =
689-
GeoArrowType::try_from(&Field::new("", array.data_type().clone(), true)).unwrap();
690+
GeoArrowType::from_arrow_field(&Field::new("", array.data_type().clone(), true))
691+
.unwrap();
690692
assert_eq!(
691693
t,
692694
GeoArrowType::Point(

rust/geoarrow-schema/src/datatype.rs

Lines changed: 88 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -287,73 +287,71 @@ impl GeoArrowType {
287287
WktView(t) => WktView(t.with_metadata(meta)),
288288
}
289289
}
290-
}
291-
292-
macro_rules! impl_into_geoarrowtype {
293-
($source_type:ident, $variant:expr) => {
294-
impl From<$source_type> for GeoArrowType {
295-
fn from(value: $source_type) -> Self {
296-
$variant(value)
297-
}
298-
}
299-
};
300-
}
301-
302-
impl_into_geoarrowtype!(PointType, GeoArrowType::Point);
303-
impl_into_geoarrowtype!(LineStringType, GeoArrowType::LineString);
304-
impl_into_geoarrowtype!(PolygonType, GeoArrowType::Polygon);
305-
impl_into_geoarrowtype!(MultiPointType, GeoArrowType::MultiPoint);
306-
impl_into_geoarrowtype!(MultiLineStringType, GeoArrowType::MultiLineString);
307-
impl_into_geoarrowtype!(MultiPolygonType, GeoArrowType::MultiPolygon);
308-
impl_into_geoarrowtype!(GeometryCollectionType, GeoArrowType::GeometryCollection);
309-
impl_into_geoarrowtype!(BoxType, GeoArrowType::Rect);
310-
impl_into_geoarrowtype!(GeometryType, GeoArrowType::Geometry);
311290

312-
impl TryFrom<&Field> for GeoArrowType {
313-
type Error = GeoArrowError;
291+
/// Create a new [`GeoArrowType`] from an Arrow [`Field`], requiring GeoArrow metadata to be
292+
/// set.
293+
///
294+
/// If the field does not have at least a GeoArrow extension name, an error will be returned.
295+
///
296+
/// See also [`GeoArrowType::from_arrow_field`].
297+
pub fn from_extension_field(field: &Field) -> GeoArrowResult<Self> {
298+
let extension_name = field.extension_type_name().ok_or(GeoArrowError::InvalidGeoArrow(
299+
"Expected GeoArrow extension metadata, but found none, and `require_geoarrow_metadata` is `true`.".to_string(),
300+
))?;
314301

315-
fn try_from(field: &Field) -> GeoArrowResult<Self> {
316302
use GeoArrowType::*;
317-
if let Some(extension_name) = field.extension_type_name() {
318-
let data_type = match extension_name {
319-
PointType::NAME => Point(field.extension_type()),
320-
LineStringType::NAME => LineString(field.extension_type()),
321-
PolygonType::NAME => Polygon(field.extension_type()),
322-
MultiPointType::NAME => MultiPoint(field.extension_type()),
323-
MultiLineStringType::NAME => MultiLineString(field.extension_type()),
324-
MultiPolygonType::NAME => MultiPolygon(field.extension_type()),
325-
GeometryCollectionType::NAME => GeometryCollection(field.extension_type()),
326-
BoxType::NAME => Rect(field.extension_type()),
327-
GeometryType::NAME => Geometry(field.extension_type()),
328-
WkbType::NAME | "ogc.wkb" => match field.data_type() {
329-
DataType::Binary => Wkb(field.extension_type()),
330-
DataType::LargeBinary => LargeWkb(field.extension_type()),
331-
DataType::BinaryView => WkbView(field.extension_type()),
332-
_ => {
333-
return Err(GeoArrowError::InvalidGeoArrow(format!(
334-
"Expected binary type for geoarrow.wkb, got '{}'",
335-
field.data_type()
336-
)));
337-
}
338-
},
339-
WktType::NAME => match field.data_type() {
340-
DataType::Utf8 => Wkt(field.extension_type()),
341-
DataType::LargeUtf8 => LargeWkt(field.extension_type()),
342-
DataType::Utf8View => WktView(field.extension_type()),
343-
_ => {
344-
return Err(GeoArrowError::InvalidGeoArrow(format!(
345-
"Expected string type for geoarrow.wkt, got '{}'",
346-
field.data_type()
347-
)));
348-
}
349-
},
350-
name => {
303+
let data_type = match extension_name {
304+
PointType::NAME => Point(field.try_extension_type()?),
305+
LineStringType::NAME => LineString(field.try_extension_type()?),
306+
PolygonType::NAME => Polygon(field.try_extension_type()?),
307+
MultiPointType::NAME => MultiPoint(field.try_extension_type()?),
308+
MultiLineStringType::NAME => MultiLineString(field.try_extension_type()?),
309+
MultiPolygonType::NAME => MultiPolygon(field.try_extension_type()?),
310+
GeometryCollectionType::NAME => GeometryCollection(field.try_extension_type()?),
311+
BoxType::NAME => Rect(field.try_extension_type()?),
312+
GeometryType::NAME => Geometry(field.try_extension_type()?),
313+
WkbType::NAME => match field.data_type() {
314+
DataType::Binary => Wkb(field.try_extension_type()?),
315+
DataType::LargeBinary => LargeWkb(field.try_extension_type()?),
316+
DataType::BinaryView => WkbView(field.try_extension_type()?),
317+
_ => {
351318
return Err(GeoArrowError::InvalidGeoArrow(format!(
352-
"Expected GeoArrow type, got Arrow extension type with name: '{name}'.",
319+
"Expected binary type for a field with extension name 'geoarrow.wkb', got '{}'",
320+
field.data_type()
353321
)));
354322
}
355-
};
356-
Ok(data_type)
323+
},
324+
WktType::NAME => match field.data_type() {
325+
DataType::Utf8 => Wkt(field.try_extension_type()?),
326+
DataType::LargeUtf8 => LargeWkt(field.try_extension_type()?),
327+
DataType::Utf8View => WktView(field.try_extension_type()?),
328+
_ => {
329+
return Err(GeoArrowError::InvalidGeoArrow(format!(
330+
"Expected string type for a field with extension name 'geoarrow.wkt', got '{}'",
331+
field.data_type()
332+
)));
333+
}
334+
},
335+
name => {
336+
return Err(GeoArrowError::InvalidGeoArrow(format!(
337+
"Expected a GeoArrow extension name, got an Arrow extension type with name: '{name}'.",
338+
)));
339+
}
340+
};
341+
Ok(data_type)
342+
}
343+
344+
/// Create a new [`GeoArrowType`] from an Arrow [`Field`], inferring the GeoArrow type if
345+
/// GeoArrow metadata is not present.
346+
///
347+
/// This will first try [`GeoArrowType::from_extension_field`], and if that fails, will try to
348+
/// infer the GeoArrow type from the field's [DataType]. This only works for Point, WKB, and
349+
/// WKT types, as those are the only types that can be unambiguously inferred from an Arrow
350+
/// [DataType].
351+
pub fn from_arrow_field(field: &Field) -> GeoArrowResult<Self> {
352+
use GeoArrowType::*;
353+
if let Ok(geo_type) = Self::from_extension_field(field) {
354+
Ok(geo_type)
357355
} else {
358356
let metadata = Arc::new(Metadata::try_from(field)?);
359357
let data_type = match field.data_type() {
@@ -389,7 +387,36 @@ impl TryFrom<&Field> for GeoArrowType {
389387
DataType::Utf8View => WktView(WktType::new(metadata)),
390388
_ => return Err(GeoArrowError::InvalidGeoArrow("Only FixedSizeList, Struct, Binary, LargeBinary, BinaryView, String, LargeString, and StringView arrays are unambigously typed for a GeoArrow type and can be used without extension metadata.\nEnsure your array input has GeoArrow metadata.".to_string())),
391389
};
390+
392391
Ok(data_type)
393392
}
394393
}
395394
}
395+
396+
macro_rules! impl_into_geoarrowtype {
397+
($source_type:ident, $variant:expr) => {
398+
impl From<$source_type> for GeoArrowType {
399+
fn from(value: $source_type) -> Self {
400+
$variant(value)
401+
}
402+
}
403+
};
404+
}
405+
406+
impl_into_geoarrowtype!(PointType, GeoArrowType::Point);
407+
impl_into_geoarrowtype!(LineStringType, GeoArrowType::LineString);
408+
impl_into_geoarrowtype!(PolygonType, GeoArrowType::Polygon);
409+
impl_into_geoarrowtype!(MultiPointType, GeoArrowType::MultiPoint);
410+
impl_into_geoarrowtype!(MultiLineStringType, GeoArrowType::MultiLineString);
411+
impl_into_geoarrowtype!(MultiPolygonType, GeoArrowType::MultiPolygon);
412+
impl_into_geoarrowtype!(GeometryCollectionType, GeoArrowType::GeometryCollection);
413+
impl_into_geoarrowtype!(BoxType, GeoArrowType::Rect);
414+
impl_into_geoarrowtype!(GeometryType, GeoArrowType::Geometry);
415+
416+
impl TryFrom<&Field> for GeoArrowType {
417+
type Error = GeoArrowError;
418+
419+
fn try_from(field: &Field) -> GeoArrowResult<Self> {
420+
Self::from_extension_field(field)
421+
}
422+
}

rust/geodatafusion/src/udf/geo/processing/centroid.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use datafusion::logical_expr::{
99
};
1010
use geoarrow_array::GeoArrowArray;
1111
use geoarrow_array::array::from_arrow_array;
12-
use geoarrow_schema::{CoordType, Dimension, GeoArrowType, PointType};
12+
use geoarrow_schema::{CoordType, Dimension, Metadata, PointType};
1313

1414
use crate::data_types::any_single_geometry_type_input;
1515
use crate::error::GeoDataFusionResult;
@@ -79,9 +79,8 @@ fn return_field_impl(
7979
args: ReturnFieldArgs,
8080
coord_type: CoordType,
8181
) -> GeoDataFusionResult<FieldRef> {
82-
let input_type = GeoArrowType::try_from(args.arg_fields[0].as_ref())?;
83-
let output_type =
84-
PointType::new(Dimension::XY, input_type.metadata().clone()).with_coord_type(coord_type);
82+
let metadata = Arc::new(Metadata::try_from(args.arg_fields[0].as_ref()).unwrap_or_default());
83+
let output_type = PointType::new(Dimension::XY, metadata).with_coord_type(coord_type);
8584
Ok(Arc::new(output_type.to_field("", true)))
8685
}
8786

rust/geodatafusion/src/udf/geo/processing/convex_hull.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use datafusion::logical_expr::{
99
};
1010
use geoarrow_array::GeoArrowArray;
1111
use geoarrow_array::array::from_arrow_array;
12-
use geoarrow_schema::{CoordType, Dimension, GeoArrowType, PolygonType};
12+
use geoarrow_schema::{CoordType, Dimension, Metadata, PolygonType};
1313

1414
use crate::data_types::any_single_geometry_type_input;
1515
use crate::error::GeoDataFusionResult;
@@ -79,9 +79,8 @@ fn return_field_impl(
7979
args: ReturnFieldArgs,
8080
coord_type: CoordType,
8181
) -> GeoDataFusionResult<FieldRef> {
82-
let input_type = GeoArrowType::try_from(args.arg_fields[0].as_ref())?;
83-
let output_type =
84-
PolygonType::new(Dimension::XY, input_type.metadata().clone()).with_coord_type(coord_type);
82+
let metadata = Arc::new(Metadata::try_from(args.arg_fields[0].as_ref()).unwrap_or_default());
83+
let output_type = PolygonType::new(Dimension::XY, metadata).with_coord_type(coord_type);
8584
Ok(Arc::new(output_type.to_field("", true)))
8685
}
8786

rust/geodatafusion/src/udf/geo/processing/oriented_envelope.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use datafusion::logical_expr::{
99
};
1010
use geoarrow_array::GeoArrowArray;
1111
use geoarrow_array::array::from_arrow_array;
12-
use geoarrow_schema::{CoordType, Dimension, GeoArrowType, PolygonType};
12+
use geoarrow_schema::{CoordType, Dimension, Metadata, PolygonType};
1313

1414
use crate::data_types::any_single_geometry_type_input;
1515
use crate::error::GeoDataFusionResult;
@@ -79,9 +79,8 @@ fn return_field_impl(
7979
args: ReturnFieldArgs,
8080
coord_type: CoordType,
8181
) -> GeoDataFusionResult<FieldRef> {
82-
let input_type = GeoArrowType::try_from(args.arg_fields[0].as_ref())?;
83-
let output_type =
84-
PolygonType::new(Dimension::XY, input_type.metadata().clone()).with_coord_type(coord_type);
82+
let metadata = Arc::new(Metadata::try_from(args.arg_fields[0].as_ref()).unwrap_or_default());
83+
let output_type = PolygonType::new(Dimension::XY, metadata).with_coord_type(coord_type);
8584
Ok(Arc::new(output_type.to_field("", true)))
8685
}
8786

rust/geodatafusion/src/udf/geo/processing/point_on_surface.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use datafusion::logical_expr::{
99
};
1010
use geoarrow_array::GeoArrowArray;
1111
use geoarrow_array::array::from_arrow_array;
12-
use geoarrow_schema::{CoordType, Dimension, GeoArrowType, PointType};
12+
use geoarrow_schema::{CoordType, Dimension, Metadata, PointType};
1313

1414
use crate::data_types::any_single_geometry_type_input;
1515
use crate::error::GeoDataFusionResult;
@@ -79,9 +79,8 @@ fn return_field_impl(
7979
args: ReturnFieldArgs,
8080
coord_type: CoordType,
8181
) -> GeoDataFusionResult<FieldRef> {
82-
let input_type = GeoArrowType::try_from(args.arg_fields[0].as_ref())?;
83-
let output_type =
84-
PointType::new(Dimension::XY, input_type.metadata().clone()).with_coord_type(coord_type);
82+
let metadata = Arc::new(Metadata::try_from(args.arg_fields[0].as_ref()).unwrap_or_default());
83+
let output_type = PointType::new(Dimension::XY, metadata).with_coord_type(coord_type);
8584
Ok(Arc::new(output_type.to_field("", true)))
8685
}
8786

rust/geodatafusion/src/udf/geohash/box2d_from_geohash.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use datafusion::logical_expr::{
1313
use geoarrow_array::GeoArrowArray;
1414
use geoarrow_array::array::RectArray;
1515
use geoarrow_array::builder::RectBuilder;
16-
use geoarrow_schema::{BoxType, Dimension, GeoArrowType};
16+
use geoarrow_schema::{BoxType, Dimension, Metadata};
1717

1818
use crate::error::GeoDataFusionResult;
1919

@@ -81,8 +81,8 @@ impl ScalarUDFImpl for Box2DFromGeoHash {
8181
}
8282

8383
fn return_field_impl(args: ReturnFieldArgs) -> GeoDataFusionResult<FieldRef> {
84-
let input_type = GeoArrowType::try_from(args.arg_fields[0].as_ref())?;
85-
let output_type = BoxType::new(Dimension::XY, input_type.metadata().clone());
84+
let metadata = Arc::new(Metadata::try_from(args.arg_fields[0].as_ref()).unwrap_or_default());
85+
let output_type = BoxType::new(Dimension::XY, metadata);
8686
Ok(Arc::new(output_type.to_field("", true)))
8787
}
8888

rust/geodatafusion/src/udf/geohash/point_from_geohash.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use datafusion::logical_expr::{
1313
use geoarrow_array::GeoArrowArray;
1414
use geoarrow_array::array::PointArray;
1515
use geoarrow_array::builder::PointBuilder;
16-
use geoarrow_schema::{CoordType, Dimension, GeoArrowType, PointType};
16+
use geoarrow_schema::{CoordType, Dimension, Metadata, PointType};
1717

1818
use crate::error::GeoDataFusionResult;
1919

@@ -86,9 +86,8 @@ fn return_field_impl(
8686
args: ReturnFieldArgs,
8787
coord_type: CoordType,
8888
) -> GeoDataFusionResult<FieldRef> {
89-
let input_type = GeoArrowType::try_from(args.arg_fields[0].as_ref())?;
90-
let output_type =
91-
PointType::new(Dimension::XY, input_type.metadata().clone()).with_coord_type(coord_type);
89+
let metadata = Arc::new(Metadata::try_from(args.arg_fields[0].as_ref()).unwrap_or_default());
90+
let output_type = PointType::new(Dimension::XY, metadata).with_coord_type(coord_type);
9291
Ok(Arc::new(output_type.to_field("", true)))
9392
}
9493

0 commit comments

Comments
 (0)