@@ -298,193 +298,48 @@ def _log_model_stats(self, model: PreTrainedModel, stage: str = ""):
298
298
logger .log_info (f"GPU Memory Reserved: { torch .cuda .memory_reserved () / (1024 * 1024 ):.2f} MB" )
299
299
300
300
def convert_to_gguf (self , output_path : str ):
301
- """Convert model to GGUF format with separate quantization step."""
301
+ """
302
+ Convert model to GGUF format using the LlamaCppConverter for robustness.
303
+ """
304
+ from .llama_cpp_utils import LlamaCppConverter
305
+
302
306
if not CT_AVAILABLE :
303
- raise ImportError ("CTransformers is required for GGUF conversion" )
304
-
305
- temp_dir = None
306
- temp_gguf = None
307
+ raise ImportError ("CTransformers is required for GGUF conversion. Install with: pip install ctransformers" )
308
+
307
309
try :
308
310
logger .log_info ("\n " + "=" * 80 )
309
- logger .log_info ("🚀 Starting GGUF Conversion Process" .center (80 ))
311
+ logger .log_info ("🚀 Starting GGUF Conversion Process via LlamaCppConverter " .center (80 ))
310
312
logger .log_info ("=" * 80 + "\n " )
313
+
314
+ output_dir = os .path .dirname (output_path )
315
+ custom_name = os .path .basename (output_path )
316
+
317
+ converter = LlamaCppConverter (verbose = True )
311
318
312
- # Model Information
313
- logger .log_info ("📊 Model Information:" )
314
- logger .log_info ("-" * 40 )
315
- model_type = self .model .config .model_type if hasattr (self .model , 'config' ) else None
316
- supported_types = ["llama" , "mistral" , "falcon" , "mpt" , "gpt_neox" , "pythia" , "stablelm" ]
317
-
318
- if model_type in supported_types :
319
- logger .log_info (f"• Architecture: { model_type .upper ()} " )
320
- else :
321
- logger .log_info (f"• Architecture: Unknown (using default LLAMA)" )
322
- model_type = "llama"
323
-
324
- total_params = sum (p .numel () for p in self .model .parameters ())
325
- logger .log_info (f"• Total Parameters: { total_params :,} " )
326
- model_size = sum (p .numel () * p .element_size () for p in self .model .parameters ()) / (1024 ** 3 )
327
- logger .log_info (f"• Model Size: { model_size :.2f} GB" )
328
- logger .log_info ("" )
329
-
330
- # Conversion Settings
331
- logger .log_info ("⚙️ Conversion Settings:" )
332
- logger .log_info ("-" * 40 )
333
- logger .log_info (f"• Output Path: { output_path } " )
334
- logger .log_info (f"• Quantization Type: { self .quant_type } " )
335
- logger .log_info (f"• Target Bits: { self .bits } " )
336
- logger .log_info (f"• Group Size: { self .group_size } " )
337
- logger .log_info ("" )
338
-
339
- # Save temporary checkpoint
340
- temp_dir = f"{ output_path } _temp_hf"
341
- logger .log_info ("💾 Saving Temporary Checkpoint:" )
342
- logger .log_info ("-" * 40 )
343
- logger .log_info (f"• Checkpoint Path: { temp_dir } " )
344
- self .model .save_pretrained (temp_dir , safe_serialization = True )
345
- logger .log_info ("• Checkpoint saved successfully" )
346
- logger .log_info ("" )
347
-
348
- # Find llama.cpp tools
349
- logger .log_info ("🔍 Locating GGUF Conversion Tools:" )
350
- logger .log_info ("-" * 40 )
351
-
352
- try :
353
- import llama_cpp
354
- llama_cpp_path = os .path .dirname (llama_cpp .__file__ )
355
- convert_script = os .path .join (llama_cpp_path , "convert.py" )
356
- quantize_bin = os .path .join (llama_cpp_path , "quantize" )
357
- if not os .path .exists (convert_script ):
358
- raise FileNotFoundError ("convert.py not found" )
359
- if not os .path .exists (quantize_bin ):
360
- raise FileNotFoundError ("quantize binary not found" )
361
- logger .log_info (f"• Found convert.py: { convert_script } " )
362
- logger .log_info (f"• Found quantize: { quantize_bin } " )
363
- except (ImportError , FileNotFoundError ) as e :
364
- logger .log_error (f"• Failed to locate llama.cpp tools: { e } " )
365
- try :
366
- logger .log_info ("• Attempting to install llama-cpp-python..." )
367
- subprocess .check_call ([sys .executable , "-m" , "pip" , "install" , "--upgrade" , "llama-cpp-python" ])
368
- import llama_cpp
369
- llama_cpp_path = os .path .dirname (llama_cpp .__file__ )
370
- convert_script = os .path .join (llama_cpp_path , "convert.py" )
371
- quantize_bin = os .path .join (llama_cpp_path , "quantize" )
372
- logger .log_info ("• Successfully installed and located tools" )
373
- except Exception as inst_err :
374
- raise RuntimeError (
375
- f"Could not find or install llama-cpp-python: { inst_err } \n "
376
- "Install manually: pip install llama-cpp-python --upgrade"
377
- ) from e
378
-
379
- # Convert to FP16 GGUF
380
- logger .log_info ("🛠️ Converting to FP16 GGUF:" )
381
- logger .log_info ("-" * 40 )
382
- temp_gguf = f"{ output_path } _temp_f16.gguf"
383
- cmd_convert = [
384
- sys .executable ,
385
- convert_script ,
386
- temp_dir ,
387
- "--outfile" , temp_gguf ,
388
- "--outtype" , "f16" ,
389
- "--model-type" , model_type
390
- ]
391
-
392
- logger .log_info (f"• Command: { ' ' .join (cmd_convert )} " )
393
- with tqdm (total = 100 , desc = "Converting to FP16" , unit = "%" ) as pbar :
394
- process = subprocess .Popen (
395
- cmd_convert ,
396
- stdout = subprocess .PIPE ,
397
- stderr = subprocess .PIPE ,
398
- universal_newlines = True
399
- )
400
-
401
- while True :
402
- output = process .stdout .readline ()
403
- if output == '' and process .poll () is not None :
404
- break
405
- if output and "Converting" in output :
406
- try :
407
- progress = int (output .split ("%" )[0 ].split ()[- 1 ])
408
- pbar .n = progress
409
- pbar .refresh ()
410
- except :
411
- pass
412
- logger .log_info (f"• { output .strip ()} " )
413
-
414
- return_code = process .wait ()
415
- if return_code != 0 :
416
- error_output = process .stderr .read ()
417
- raise RuntimeError (f"FP16 GGUF conversion failed:\n { error_output } " )
418
-
419
- # Quantize to target type
420
- logger .log_info ("\n 🔄 Quantizing GGUF:" )
421
- logger .log_info ("-" * 40 )
422
- cmd_quantize = [
423
- quantize_bin ,
424
- temp_gguf ,
425
- output_path ,
426
- self .quant_type .lower () # llama.cpp expects lowercase
427
- ]
428
-
429
- logger .log_info (f"• Command: { ' ' .join (cmd_quantize )} " )
430
- with tqdm (total = 100 , desc = "Quantizing GGUF" , unit = "%" ) as pbar :
431
- process = subprocess .Popen (
432
- cmd_quantize ,
433
- stdout = subprocess .PIPE ,
434
- stderr = subprocess .PIPE ,
435
- universal_newlines = True
436
- )
437
-
438
- while True :
439
- output = process .stdout .readline ()
440
- if output == '' and process .poll () is not None :
441
- break
442
- if output and "%" in output :
443
- try :
444
- progress = int (output .split ("%" )[0 ].split ()[- 1 ])
445
- pbar .n = progress
446
- pbar .refresh ()
447
- except :
448
- pass
449
- logger .log_info (f"• { output .strip ()} " )
450
-
451
- return_code = process .wait ()
452
- if return_code != 0 :
453
- error_output = process .stderr .read ()
454
- raise RuntimeError (f"GGUF quantization failed:\n { error_output } " )
455
-
456
- # Verify results
457
- if os .path .exists (output_path ):
458
- logger .log_info ("\n ✅ Conversion Results:" )
459
- logger .log_info ("-" * 40 )
460
-
461
- file_size = os .path .getsize (output_path ) / (1024 ** 3 )
462
- logger .log_info (f"• GGUF File Size: { file_size :.2f} GB" )
463
-
464
- compression_ratio = model_size / file_size
465
- logger .log_info (f"• Compression Ratio: { compression_ratio :.2f} x" )
466
- logger .log_info (f"• Output Path: { output_path } " )
467
-
468
- logger .log_info ("\n " + "=" * 80 )
469
- logger .log_info ("✨ GGUF Conversion Completed Successfully! ✨" .center (80 ))
470
- logger .log_info ("=" * 80 + "\n " )
471
- else :
472
- raise RuntimeError (f"GGUF file was not created at { output_path } " )
473
-
319
+ gguf_path = converter .convert_to_gguf (
320
+ model = self .model ,
321
+ output_dir = output_dir ,
322
+ bits = self .bits ,
323
+ group_size = self .group_size ,
324
+ save_tokenizer = True , # It's good practice to save the tokenizer
325
+ custom_name = custom_name ,
326
+ quant_type = self .quant_type
327
+ )
328
+
329
+ if not os .path .exists (gguf_path ):
330
+ raise RuntimeError (f"GGUF file was not created at { gguf_path } " )
331
+
332
+ logger .log_info ("\n " + "=" * 80 )
333
+ logger .log_info ("✨ GGUF Conversion Completed Successfully! ✨" .center (80 ))
334
+ logger .log_info (f"📄 GGUF file saved to: { gguf_path } " .center (80 ))
335
+ logger .log_info ("=" * 80 + "\n " )
336
+
474
337
except Exception as e :
475
- logger .log_error ("\n ❌ Conversion Failed:" )
338
+ logger .log_error ("\n ❌ GGUF Conversion Failed:" )
476
339
logger .log_error ("-" * 40 )
477
340
logger .log_error (f"• Error: { str (e )} " )
478
341
raise RuntimeError (f"Failed to convert model to GGUF: { str (e )} " ) from e
479
-
480
342
finally :
481
- if temp_dir and os .path .exists (temp_dir ):
482
- logger .log_info ("\n 🧹 Cleaning Up:" )
483
- logger .log_info ("-" * 40 )
484
- logger .log_info ("• Removing temporary files..." )
485
- shutil .rmtree (temp_dir , ignore_errors = True )
486
- if temp_gguf and os .path .exists (temp_gguf ):
487
- os .remove (temp_gguf )
488
343
self ._clear_memory ()
489
344
490
345
def _clear_memory (self ):
0 commit comments