Skip to content

Commit 79caf08

Browse files
committed
Type coercions produce more helpful exception errors
1 parent 988f22b commit 79caf08

File tree

1 file changed

+21
-11
lines changed

1 file changed

+21
-11
lines changed

whyqd/parsers/datasource.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -232,18 +232,26 @@ def set(self, *, df: pd.DataFrame, source: str | Path, mimetype: MimeType) -> No
232232
###################################################################################################
233233

234234
def coerce_to_schema(self, *, df: pd.DataFrame, schema: Type[SchemaDefinition]) -> pd.DataFrame:
235+
# https://modin.readthedocs.io/en/stable/flow/modin/core/dataframe/pandas/partitioning/partition.html#modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition.to_numpy
235236
validate = {"matched": [], "unmatched": [], "coerced": []}
236237
columns = self.get_header_columns(df=df)
237238
for c in columns:
238239
prospect = schema.fields.get(name=c.name)
239240
if prospect:
240-
if c.dtype == prospect.dtype:
241-
validate["matched"].append(c.name)
242-
else:
243-
# Try coerce the column to the type
244-
df[c.name] = self.coerce_column_to_dtype(column=df[c.name], coerce=prospect.dtype)
245-
validate["coerced"].append(c.name)
246-
df[c.name] = df[c.name].astype(FieldType(prospect.dtype).astype)
241+
try:
242+
if c.dtype == prospect.dtype:
243+
validate["matched"].append(c.name)
244+
else:
245+
# Try coerce the column to the type
246+
df[c.name] = self.coerce_column_to_dtype(column=_pd.Series(df[c.name].to_numpy()), coerce=prospect.dtype)
247+
validate["coerced"].append(c.name)
248+
df[c.name] = _pd.Series(df[c.name].to_numpy()).astype(FieldType(prospect.dtype).astype)
249+
except TypeError:
250+
from_type = c.dtype
251+
if isinstance(df[c.name][0], list):
252+
from_type = "array"
253+
e = f"Column '{c.name}' in Data cannot be converted from type '{from_type}' to '{prospect.dtype}'."
254+
raise TypeError(e)
247255
else:
248256
validate["unmatched"].append(c.name)
249257
if validate["unmatched"]:
@@ -797,10 +805,12 @@ def parse_bool(self, x: str) -> bool:
797805

798806
def parse_string_list(self, x: str) -> List[str]:
799807
"""Coerce a column, which should contain a list of strings, from literal to actual."""
800-
if not isinstance(x, str):
801-
return x
802-
x = ast.literal_eval(x)
803-
x = [t.strip() for t in x]
808+
if not isinstance(x, (str, list)):
809+
# If it's not already a list, then it needs to be a list stored as a string
810+
raise TypeError
811+
if isinstance(x, str):
812+
x = ast.literal_eval(x)
813+
x = [str(t).strip() if not pd.isna(t) else None for t in x]
804814
if not x:
805815
return None
806816
return x

0 commit comments

Comments
 (0)