1
+ from functools import singledispatch
2
+ try :
3
+ from types import NoneType
4
+ except ImportError :
5
+ # python < 3.10
6
+ NoneType = type (None )
7
+
1
8
import pandas as pd
2
9
import numpy as np
3
10
from pydantic import BaseModel , create_model
4
11
12
+
5
13
class NoAvailablePTypeError (Exception ):
6
14
"""
7
15
Throw an error if we cannot create
@@ -18,72 +26,173 @@ def __init__(
18
26
19
27
class InvalidPTypeError (Exception ):
20
28
"""
21
- Throw an error if `save_ptype` is not
22
- True, False, or data.frame
29
+ Throw an error if ptype cannot be recognised
23
30
"""
24
31
25
32
def __init__ (
26
33
self ,
27
- message = "The `ptype_data` argument must be a pandas .DataFrame, a pydantic BaseModel, np.ndarray, or `save_ptype` must be FALSE. " ,
34
+ message = "`ptype_data` must be a pd .DataFrame, a pydantic BaseModel or np.ndarray" ,
28
35
):
29
36
self .message = message
30
37
super ().__init__ (self .message )
31
38
32
39
33
- def vetiver_create_ptype (ptype_data , save_ptype : bool ):
40
+ CREATE_PTYPE_TPL = """\
41
+ Failed to create a data prototype (ptype) from data of \
42
+ type {_data_type}. If your datatype is not one of \
43
+ (pd.DataFrame, pydantic.BaseModel, np.ndarry, dict), \
44
+ you should write a function to create the ptype. Here is \
45
+ a template for such a function: \
46
+
47
+ from pydantic import create_model
48
+ from vetiver.ptype import vetiver_create_ptype
49
+
50
+ @vetiver_create_ptype.register
51
+ def _(data: {_data_type}):
52
+ data_dict = ... # convert data to a dictionary
53
+ ptype = create_model("ptype", **data_dict)
54
+ return ptype
55
+
56
+ If your datatype is a common type, please consider submitting \
57
+ a pull request.
58
+ """
59
+
60
+ @singledispatch
61
+ def vetiver_create_ptype (data ):
34
62
"""Create zero row structure to save data types
63
+
35
64
Parameters
36
65
----------
37
- ptype_data :
38
- Data that represents what
39
- save_ptype : bool
40
- Whether or not ptype should be created
66
+ data : object
67
+ An object with information (data) whose layout is to be determined.
41
68
42
69
Returns
43
70
-------
44
- ptype
71
+ ptype : pydantic.main.BaseModel
45
72
Data prototype
46
73
47
74
"""
48
- ptype = None
75
+ raise InvalidPTypeError (
76
+ message = CREATE_PTYPE_TPL .format (_data_type = type (data ))
77
+ )
49
78
50
- if save_ptype == False :
51
- pass
52
- elif save_ptype == True :
53
- try :
54
- if isinstance (ptype_data , np .ndarray ):
55
- ptype = _array_to_ptype (ptype_data [1 ])
56
- elif isinstance (ptype_data , dict ):
57
- ptype = _dict_to_ptype (ptype_data )
58
- elif isinstance (ptype_data .construct (), BaseModel ):
59
- ptype = ptype_data
60
- except AttributeError : # cannot construct basemodel
61
- if isinstance (ptype_data , pd .DataFrame ):
62
- ptype = _df_to_ptype (ptype_data .iloc [1 , :])
63
- else :
64
- raise InvalidPTypeError
65
79
80
+ @vetiver_create_ptype .register
81
+ def _ (data : pd .DataFrame ):
82
+ """
83
+ Create ptype for a pandas dataframe
84
+
85
+ Parameters
86
+ ----------
87
+ data : DataFrame
88
+ Pandas dataframe
89
+
90
+ Examples
91
+ --------
92
+ >>> from pydantic import BaseModel
93
+ >>> df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
94
+ >>> prototype = vetiver_create_ptype(df)
95
+ >>> issubclass(prototype, BaseModel)
96
+ True
97
+ >>> prototype()
98
+ ptype(x=1, y=4)
99
+
100
+ The data prototype created for the dataframe is equivalent to:
101
+
102
+ >>> class another_prototype(BaseModel):
103
+ ... class Config:
104
+ ... title = 'ptype'
105
+ ... x: int = 1
106
+ ... y: int = 4
107
+
108
+ >>> another_prototype()
109
+ another_prototype(x=1, y=4)
110
+ >>> another_prototype() == prototype()
111
+ True
112
+
113
+ Changing the title using `class Config` ensures that the
114
+ also json/schemas match.
115
+
116
+ >>> another_prototype.schema() == prototype.schema()
117
+ True
118
+ """
119
+ dict_data = data .iloc [0 , :].to_dict ()
120
+ ptype = create_model ("ptype" , ** dict_data )
66
121
return ptype
67
122
68
123
69
- def _df_to_ptype (train_data ):
124
+ @vetiver_create_ptype .register
125
+ def _ (data : np .ndarray ):
126
+ """
127
+ Create ptype for a numpy array
70
128
71
- dict_data = train_data .to_dict ()
72
- ptype = create_model ("ptype" , ** dict_data )
129
+ Parameters
130
+ ----------
131
+ data : ndarray
132
+ 2-Dimensional numpy array
133
+
134
+ Examples
135
+ --------
136
+ >>> arr = np.array([[1, 4], [2, 5], [3, 6]])
137
+ >>> prototype = vetiver_create_ptype(arr)
138
+ >>> prototype()
139
+ ptype(0=1, 1=4)
140
+
141
+ >>> arr2 = np.array([[1, 'a'], [2, 'b'], [3, 'c']], dtype=object)
142
+ >>> prototype2 = vetiver_create_ptype(arr2)
143
+ >>> prototype2()
144
+ ptype(0=1, 1='a')
145
+ """
146
+ def _item (value ):
147
+ # pydantic needs python objects. .item() converts a numpy
148
+ # scalar type to a python equivalent, and if the ndarray
149
+ # is dtype=object, it may have python objects
150
+ try :
151
+ return value .item ()
152
+ except AttributeError :
153
+ return value
73
154
155
+ dict_data = dict (enumerate (data [0 ], 0 ))
156
+ # pydantic requires strings as indicies
157
+ dict_data = {f"{ key } " : _item (value ) for key , value in dict_data .items ()}
158
+ ptype = create_model ("ptype" , ** dict_data )
74
159
return ptype
75
160
76
161
77
- def _array_to_ptype (train_data ):
78
- dict_data = dict (enumerate (train_data , 0 ))
162
+ @vetiver_create_ptype .register
163
+ def _ (data : dict ):
164
+ """
165
+ Create ptype for a dict
79
166
80
- # pydantic requires strings as indicies
81
- dict_data = {str (key ): value .item () for key , value in dict_data .items ()}
82
- ptype = create_model ("ptype" , ** dict_data )
167
+ Parameters
168
+ ----------
169
+ data : dict
170
+ Dictionary
171
+ """
172
+ return create_model ("ptype" , ** data )
83
173
84
- return ptype
85
174
175
+ @vetiver_create_ptype .register
176
+ def _ (data : BaseModel ):
177
+ """
178
+ Create ptype for a pydantic BaseModel object
86
179
87
- def _dict_to_ptype (train_data ):
180
+ Parameters
181
+ ----------
182
+ data : pydantic.BaseModel
183
+ Pydantic BaseModel
184
+ """
185
+ return data
88
186
89
- return create_model ("ptype" ,** train_data )
187
+
188
+ @vetiver_create_ptype .register
189
+ def _ (data : NoneType ):
190
+ """
191
+ Create ptype for None
192
+
193
+ Parameters
194
+ ----------
195
+ data : None
196
+ None
197
+ """
198
+ return None
0 commit comments