@@ -298,193 +298,48 @@ def _log_model_stats(self, model: PreTrainedModel, stage: str = ""):
298298 logger .log_info (f"GPU Memory Reserved: { torch .cuda .memory_reserved () / (1024 * 1024 ):.2f} MB" )
299299
300300 def convert_to_gguf (self , output_path : str ):
301- """Convert model to GGUF format with separate quantization step."""
301+ """
302+ Convert model to GGUF format using the LlamaCppConverter for robustness.
303+ """
304+ from .llama_cpp_utils import LlamaCppConverter
305+
302306 if not CT_AVAILABLE :
303- raise ImportError ("CTransformers is required for GGUF conversion" )
304-
305- temp_dir = None
306- temp_gguf = None
307+ raise ImportError ("CTransformers is required for GGUF conversion. Install with: pip install ctransformers" )
308+
307309 try :
308310 logger .log_info ("\n " + "=" * 80 )
309- logger .log_info ("🚀 Starting GGUF Conversion Process" .center (80 ))
311+ logger .log_info ("🚀 Starting GGUF Conversion Process via LlamaCppConverter " .center (80 ))
310312 logger .log_info ("=" * 80 + "\n " )
313+
314+ output_dir = os .path .dirname (output_path )
315+ custom_name = os .path .basename (output_path )
316+
317+ converter = LlamaCppConverter (verbose = True )
311318
312- # Model Information
313- logger .log_info ("📊 Model Information:" )
314- logger .log_info ("-" * 40 )
315- model_type = self .model .config .model_type if hasattr (self .model , 'config' ) else None
316- supported_types = ["llama" , "mistral" , "falcon" , "mpt" , "gpt_neox" , "pythia" , "stablelm" ]
317-
318- if model_type in supported_types :
319- logger .log_info (f"• Architecture: { model_type .upper ()} " )
320- else :
321- logger .log_info (f"• Architecture: Unknown (using default LLAMA)" )
322- model_type = "llama"
323-
324- total_params = sum (p .numel () for p in self .model .parameters ())
325- logger .log_info (f"• Total Parameters: { total_params :,} " )
326- model_size = sum (p .numel () * p .element_size () for p in self .model .parameters ()) / (1024 ** 3 )
327- logger .log_info (f"• Model Size: { model_size :.2f} GB" )
328- logger .log_info ("" )
329-
330- # Conversion Settings
331- logger .log_info ("⚙️ Conversion Settings:" )
332- logger .log_info ("-" * 40 )
333- logger .log_info (f"• Output Path: { output_path } " )
334- logger .log_info (f"• Quantization Type: { self .quant_type } " )
335- logger .log_info (f"• Target Bits: { self .bits } " )
336- logger .log_info (f"• Group Size: { self .group_size } " )
337- logger .log_info ("" )
338-
339- # Save temporary checkpoint
340- temp_dir = f"{ output_path } _temp_hf"
341- logger .log_info ("💾 Saving Temporary Checkpoint:" )
342- logger .log_info ("-" * 40 )
343- logger .log_info (f"• Checkpoint Path: { temp_dir } " )
344- self .model .save_pretrained (temp_dir , safe_serialization = True )
345- logger .log_info ("• Checkpoint saved successfully" )
346- logger .log_info ("" )
347-
348- # Find llama.cpp tools
349- logger .log_info ("🔍 Locating GGUF Conversion Tools:" )
350- logger .log_info ("-" * 40 )
351-
352- try :
353- import llama_cpp
354- llama_cpp_path = os .path .dirname (llama_cpp .__file__ )
355- convert_script = os .path .join (llama_cpp_path , "convert.py" )
356- quantize_bin = os .path .join (llama_cpp_path , "quantize" )
357- if not os .path .exists (convert_script ):
358- raise FileNotFoundError ("convert.py not found" )
359- if not os .path .exists (quantize_bin ):
360- raise FileNotFoundError ("quantize binary not found" )
361- logger .log_info (f"• Found convert.py: { convert_script } " )
362- logger .log_info (f"• Found quantize: { quantize_bin } " )
363- except (ImportError , FileNotFoundError ) as e :
364- logger .log_error (f"• Failed to locate llama.cpp tools: { e } " )
365- try :
366- logger .log_info ("• Attempting to install llama-cpp-python..." )
367- subprocess .check_call ([sys .executable , "-m" , "pip" , "install" , "--upgrade" , "llama-cpp-python" ])
368- import llama_cpp
369- llama_cpp_path = os .path .dirname (llama_cpp .__file__ )
370- convert_script = os .path .join (llama_cpp_path , "convert.py" )
371- quantize_bin = os .path .join (llama_cpp_path , "quantize" )
372- logger .log_info ("• Successfully installed and located tools" )
373- except Exception as inst_err :
374- raise RuntimeError (
375- f"Could not find or install llama-cpp-python: { inst_err } \n "
376- "Install manually: pip install llama-cpp-python --upgrade"
377- ) from e
378-
379- # Convert to FP16 GGUF
380- logger .log_info ("🛠️ Converting to FP16 GGUF:" )
381- logger .log_info ("-" * 40 )
382- temp_gguf = f"{ output_path } _temp_f16.gguf"
383- cmd_convert = [
384- sys .executable ,
385- convert_script ,
386- temp_dir ,
387- "--outfile" , temp_gguf ,
388- "--outtype" , "f16" ,
389- "--model-type" , model_type
390- ]
391-
392- logger .log_info (f"• Command: { ' ' .join (cmd_convert )} " )
393- with tqdm (total = 100 , desc = "Converting to FP16" , unit = "%" ) as pbar :
394- process = subprocess .Popen (
395- cmd_convert ,
396- stdout = subprocess .PIPE ,
397- stderr = subprocess .PIPE ,
398- universal_newlines = True
399- )
400-
401- while True :
402- output = process .stdout .readline ()
403- if output == '' and process .poll () is not None :
404- break
405- if output and "Converting" in output :
406- try :
407- progress = int (output .split ("%" )[0 ].split ()[- 1 ])
408- pbar .n = progress
409- pbar .refresh ()
410- except :
411- pass
412- logger .log_info (f"• { output .strip ()} " )
413-
414- return_code = process .wait ()
415- if return_code != 0 :
416- error_output = process .stderr .read ()
417- raise RuntimeError (f"FP16 GGUF conversion failed:\n { error_output } " )
418-
419- # Quantize to target type
420- logger .log_info ("\n 🔄 Quantizing GGUF:" )
421- logger .log_info ("-" * 40 )
422- cmd_quantize = [
423- quantize_bin ,
424- temp_gguf ,
425- output_path ,
426- self .quant_type .lower () # llama.cpp expects lowercase
427- ]
428-
429- logger .log_info (f"• Command: { ' ' .join (cmd_quantize )} " )
430- with tqdm (total = 100 , desc = "Quantizing GGUF" , unit = "%" ) as pbar :
431- process = subprocess .Popen (
432- cmd_quantize ,
433- stdout = subprocess .PIPE ,
434- stderr = subprocess .PIPE ,
435- universal_newlines = True
436- )
437-
438- while True :
439- output = process .stdout .readline ()
440- if output == '' and process .poll () is not None :
441- break
442- if output and "%" in output :
443- try :
444- progress = int (output .split ("%" )[0 ].split ()[- 1 ])
445- pbar .n = progress
446- pbar .refresh ()
447- except :
448- pass
449- logger .log_info (f"• { output .strip ()} " )
450-
451- return_code = process .wait ()
452- if return_code != 0 :
453- error_output = process .stderr .read ()
454- raise RuntimeError (f"GGUF quantization failed:\n { error_output } " )
455-
456- # Verify results
457- if os .path .exists (output_path ):
458- logger .log_info ("\n ✅ Conversion Results:" )
459- logger .log_info ("-" * 40 )
460-
461- file_size = os .path .getsize (output_path ) / (1024 ** 3 )
462- logger .log_info (f"• GGUF File Size: { file_size :.2f} GB" )
463-
464- compression_ratio = model_size / file_size
465- logger .log_info (f"• Compression Ratio: { compression_ratio :.2f} x" )
466- logger .log_info (f"• Output Path: { output_path } " )
467-
468- logger .log_info ("\n " + "=" * 80 )
469- logger .log_info ("✨ GGUF Conversion Completed Successfully! ✨" .center (80 ))
470- logger .log_info ("=" * 80 + "\n " )
471- else :
472- raise RuntimeError (f"GGUF file was not created at { output_path } " )
473-
319+ gguf_path = converter .convert_to_gguf (
320+ model = self .model ,
321+ output_dir = output_dir ,
322+ bits = self .bits ,
323+ group_size = self .group_size ,
324+ save_tokenizer = True , # It's good practice to save the tokenizer
325+ custom_name = custom_name ,
326+ quant_type = self .quant_type
327+ )
328+
329+ if not os .path .exists (gguf_path ):
330+ raise RuntimeError (f"GGUF file was not created at { gguf_path } " )
331+
332+ logger .log_info ("\n " + "=" * 80 )
333+ logger .log_info ("✨ GGUF Conversion Completed Successfully! ✨" .center (80 ))
334+ logger .log_info (f"📄 GGUF file saved to: { gguf_path } " .center (80 ))
335+ logger .log_info ("=" * 80 + "\n " )
336+
474337 except Exception as e :
475- logger .log_error ("\n ❌ Conversion Failed:" )
338+ logger .log_error ("\n ❌ GGUF Conversion Failed:" )
476339 logger .log_error ("-" * 40 )
477340 logger .log_error (f"• Error: { str (e )} " )
478341 raise RuntimeError (f"Failed to convert model to GGUF: { str (e )} " ) from e
479-
480342 finally :
481- if temp_dir and os .path .exists (temp_dir ):
482- logger .log_info ("\n 🧹 Cleaning Up:" )
483- logger .log_info ("-" * 40 )
484- logger .log_info ("• Removing temporary files..." )
485- shutil .rmtree (temp_dir , ignore_errors = True )
486- if temp_gguf and os .path .exists (temp_gguf ):
487- os .remove (temp_gguf )
488343 self ._clear_memory ()
489344
490345 def _clear_memory (self ):
0 commit comments