@@ -196,6 +196,44 @@ def convert_type(self, mysql_type, parameters):
196
196
if mysql_type == 'point' :
197
197
return 'Tuple(x Float32, y Float32)'
198
198
199
+ # Correctly handle numeric types
200
+ if mysql_type .startswith ('numeric' ):
201
+ # Determine if parameters are specified via parentheses:
202
+ if '(' in mysql_type and ')' in mysql_type :
203
+ # Expecting a type definition like "numeric(precision, scale)"
204
+ pattern = r"numeric\((\d+)\s*,\s*(\d+)\)"
205
+ match = re .search (pattern , mysql_type )
206
+ if not match :
207
+ raise ValueError (f"Invalid numeric type definition: { mysql_type } " )
208
+
209
+ precision = int (match .group (1 ))
210
+ scale = int (match .group (2 ))
211
+ else :
212
+ # If no parentheses are provided, assume defaults.
213
+ precision = 10 # or other default as defined by your standards
214
+ scale = 0
215
+
216
+ # If no fractional part, consider mapping to integer type (if desired)
217
+ if scale == 0 :
218
+ if is_unsigned :
219
+ if precision <= 9 :
220
+ return "UInt32"
221
+ elif precision <= 18 :
222
+ return "UInt64"
223
+ else :
224
+ # For very large precisions, fallback to Decimal
225
+ return f"Decimal({ precision } , { scale } )"
226
+ else :
227
+ if precision <= 9 :
228
+ return "Int32"
229
+ elif precision <= 18 :
230
+ return "Int64"
231
+ else :
232
+ return f"Decimal({ precision } , { scale } )"
233
+ else :
234
+ # For types with a defined fractional part, use a Decimal mapping.
235
+ return f"Decimal({ precision } , { scale } )"
236
+
199
237
if mysql_type == 'int' :
200
238
if is_unsigned :
201
239
return 'UInt32'
@@ -472,7 +510,69 @@ def convert_alter_query(self, mysql_query, db_name):
472
510
473
511
raise Exception (f'operation { op_name } not implement, query: { subquery } ' )
474
512
513
+ @classmethod
514
+ def _tokenize_alter_query (cls , sql_line ):
515
+ # We want to recognize tokens that may be:
516
+ # 1. A backquoted identifier that can optionally be immediately followed by parentheses.
517
+ # 2. A plain word (letters/digits/underscore) that may immediately be followed by a parenthesized argument list.
518
+ # 3. A single-quoted or double-quoted string.
519
+ # 4. Or, if nothing else, any non‐whitespace sequence.
520
+ #
521
+ # The order is important: for example, if a word is immediately followed by parentheses,
522
+ # we want to grab it as a single token.
523
+ token_pattern = re .compile (r'''
524
+ ( # start capture group for a token
525
+ `[^`]+`(?:\([^)]*\))? | # backquoted identifier w/ optional parentheses
526
+ \w+(?:\([^)]*\))? | # a word with optional parentheses
527
+ '(?:\\'|[^'])*' | # a single-quoted string
528
+ "(?:\\"|[^"])*" | # a double-quoted string
529
+ [^\s]+ # fallback: any sequence of non-whitespace characters
530
+ )
531
+ ''' , re .VERBOSE )
532
+ tokens = token_pattern .findall (sql_line )
533
+
534
+ # Now, split the column definition into:
535
+ # token0 = column name,
536
+ # token1 = data type (which might be multiple tokens, e.g. DOUBLE PRECISION, INT UNSIGNED,
537
+ # or a word+parentheses like VARCHAR(254) or NUMERIC(5, 2)),
538
+ # remaining tokens: the parameters such as DEFAULT, NOT, etc.
539
+ #
540
+ # We define a set of keywords that indicate the start of column options.
541
+ constraint_keywords = {
542
+ "DEFAULT" , "NOT" , "NULL" , "AUTO_INCREMENT" , "PRIMARY" , "UNIQUE" ,
543
+ "COMMENT" , "COLLATE" , "REFERENCES" , "ON" , "CHECK" , "CONSTRAINT" ,
544
+ "AFTER" , "BEFORE" , "GENERATED" , "VIRTUAL" , "STORED" , "FIRST" ,
545
+ "ALWAYS" , "AS" , "IDENTITY" , "INVISIBLE" , "PERSISTED" ,
546
+ }
547
+
548
+ if not tokens :
549
+ return tokens
550
+ # The first token is always the column name.
551
+ column_name = tokens [0 ]
552
+
553
+ # Now “merge” tokens after the column name that belong to the type.
554
+ # (For many types the type is written as a single token already –
555
+ # e.g. "VARCHAR(254)" or "NUMERIC(5, 2)", but for types like
556
+ # "DOUBLE PRECISION" or "INT UNSIGNED" the .split() would produce two tokens.)
557
+ type_tokens = []
558
+ i = 1
559
+ while i < len (tokens ) and tokens [i ].upper () not in constraint_keywords :
560
+ type_tokens .append (tokens [i ])
561
+ i += 1
562
+ merged_type = " " .join (type_tokens ) if type_tokens else ""
563
+
564
+ # The remaining tokens are passed through unchanged.
565
+ param_tokens = tokens [i :]
566
+
567
+ # Result: [column name, merged type, all the rest]
568
+ if merged_type :
569
+ return [column_name , merged_type ] + param_tokens
570
+ else :
571
+ return [column_name ] + param_tokens
572
+
475
573
def __convert_alter_table_add_column (self , db_name , table_name , tokens ):
574
+ tokens = self ._tokenize_alter_query (' ' .join (tokens ))
575
+
476
576
if len (tokens ) < 2 :
477
577
raise Exception ('wrong tokens count' , tokens )
478
578
0 commit comments