@@ -232,18 +232,26 @@ def set(self, *, df: pd.DataFrame, source: str | Path, mimetype: MimeType) -> No
232
232
###################################################################################################
233
233
234
234
def coerce_to_schema (self , * , df : pd .DataFrame , schema : Type [SchemaDefinition ]) -> pd .DataFrame :
235
+ # https://modin.readthedocs.io/en/stable/flow/modin/core/dataframe/pandas/partitioning/partition.html#modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition.to_numpy
235
236
validate = {"matched" : [], "unmatched" : [], "coerced" : []}
236
237
columns = self .get_header_columns (df = df )
237
238
for c in columns :
238
239
prospect = schema .fields .get (name = c .name )
239
240
if prospect :
240
- if c .dtype == prospect .dtype :
241
- validate ["matched" ].append (c .name )
242
- else :
243
- # Try coerce the column to the type
244
- df [c .name ] = self .coerce_column_to_dtype (column = df [c .name ], coerce = prospect .dtype )
245
- validate ["coerced" ].append (c .name )
246
- df [c .name ] = df [c .name ].astype (FieldType (prospect .dtype ).astype )
241
+ try :
242
+ if c .dtype == prospect .dtype :
243
+ validate ["matched" ].append (c .name )
244
+ else :
245
+ # Try coerce the column to the type
246
+ df [c .name ] = self .coerce_column_to_dtype (column = _pd .Series (df [c .name ].to_numpy ()), coerce = prospect .dtype )
247
+ validate ["coerced" ].append (c .name )
248
+ df [c .name ] = _pd .Series (df [c .name ].to_numpy ()).astype (FieldType (prospect .dtype ).astype )
249
+ except TypeError :
250
+ from_type = c .dtype
251
+ if isinstance (df [c .name ][0 ], list ):
252
+ from_type = "array"
253
+ e = f"Column '{ c .name } ' in Data cannot be converted from type '{ from_type } ' to '{ prospect .dtype } '."
254
+ raise TypeError (e )
247
255
else :
248
256
validate ["unmatched" ].append (c .name )
249
257
if validate ["unmatched" ]:
@@ -797,10 +805,12 @@ def parse_bool(self, x: str) -> bool:
797
805
798
806
def parse_string_list (self , x : str ) -> List [str ]:
799
807
"""Coerce a column, which should contain a list of strings, from literal to actual."""
800
- if not isinstance (x , str ):
801
- return x
802
- x = ast .literal_eval (x )
803
- x = [t .strip () for t in x ]
808
+ if not isinstance (x , (str , list )):
809
+ # If it's not already a list, then it needs to be a list stored as a string
810
+ raise TypeError
811
+ if isinstance (x , str ):
812
+ x = ast .literal_eval (x )
813
+ x = [str (t ).strip () if not pd .isna (t ) else None for t in x ]
804
814
if not x :
805
815
return None
806
816
return x
0 commit comments