How to Convert DeepSeek-R1-UD-IQ1_M GGUF Back to Safetensors?

#38
by Cheryl33990 - opened

Hello everyone,
I need to use the DeepSeek-R1-UD-IQ1_M model in Safetensors format.
Previously, I was using llama.cpp to convert Safetensors → GGUF, but now I need to convert it back (GGUF → Safetensors),
and I am unsure how to proceed.

I found a related discussion:
https://github.com/ggml-org/llama.cpp/discussions/9410
However, it only supports BF16 and FP16, but I need support for IQ1_M quantization.

Current Setup

I have copied gguf folder in llama.cpp into my working directory.

image.png

I am using the following script (gguf_to_safetensors.py), which I modified for the conversion:

import os
import argparse
import torch
import numpy as np
from safetensors.torch import save_file
from safetensors import safe_open
from typing import Dict, Tuple
from gguf import GGUFReader, dequantize
from gguf.constants import GGML_QUANT_SIZES, GGMLQuantizationType, Keys

def load_gguf_and_extract_metadata(gguf_path: str) -> Tuple[GGUFReader, list]:
    """Load GGUF file and extract metadata and tensors."""
    reader = GGUFReader(gguf_path)
    tensors_metadata = []
    for tensor in reader.tensors:
        tensor_metadata = {
            'name': tensor.name,
            'shape': tuple(tensor.shape.tolist()),
            'n_elements': tensor.n_elements,
            'n_bytes': tensor.n_bytes,
            'data_offset': tensor.data_offset,
            'type': tensor.tensor_type,
        }
        tensors_metadata.append(tensor_metadata)
    return reader, tensors_metadata

def convert_gguf_to_safetensors(gguf_path: str, output_path: str, use_bf16: bool, use_int8: bool) -> None:
    reader, tensors_metadata = load_gguf_and_extract_metadata(gguf_path)
    print(f"Extracted {len(tensors_metadata)} tensors from GGUF file")

    tensors_dict: dict[str, torch.Tensor] = {}

    for i, tensor_info in enumerate(tensors_metadata):
        tensor_name = tensor_info['name']
        tensor_data = reader.get_tensor(i)
        tensor_type = tensor_info['type']
        
        weights = None
        weights_hf = None
        
        try:
            if tensor_type == GGMLQuantizationType.IQ1_S or tensor_type == GGMLQuantizationType.IQ1_M:
                print(f"Keeping {tensor_name} as INT8 to maintain IQ1_M quantization")
                weights = np.frombuffer(tensor_data.data, dtype=np.int8).copy()  # 保持 INT8,不解量化
            elif use_int8:
                print(f"Converting {tensor_name} to INT8")
                weights = np.frombuffer(tensor_data.data, dtype=np.int8).copy()
            else:
                weights = dequantize(tensor_data.data, tensor_data.tensor_type).astype(np.float16).copy()
            
            if tensor_type == GGMLQuantizationType.IQ1_M:
                weights_tensor = torch.from_numpy(weights).to(dtype=torch.int8)  # 直接存為 INT8
            elif use_int8:
                weights_tensor = torch.from_numpy(weights).to(dtype=torch.int8)
            elif use_bf16:
                weights_tensor = torch.from_numpy(weights).to(dtype=torch.bfloat16)
            else:
                weights_tensor = torch.from_numpy(weights).to(dtype=torch.float16)
            
            weights_hf = weights_tensor
        except Exception as e:
            print(f"Error during conversion for tensor '{tensor_name}': {e}")
            weights = np.zeros(tensor_info['shape'], dtype=np.float16)  # 避免崩潰
            weights_tensor = torch.from_numpy(weights).to(torch.float16)
            weights_hf = weights_tensor

        print(f"Processed tensor: {tensor_name} | Shape: {weights_hf.shape} | Type: {weights_tensor.dtype}")
        del weights_tensor
        del weights
        del tensor_data
        torch.cuda.empty_cache()

        tensors_dict[tensor_name] = weights_hf
        del weights_hf

        if i % 10 == 0:
            print(f"Processing {i+1}/{len(tensors_metadata)} tensors...")
            torch.cuda.empty_cache()

    metadata = {"modelspec.architecture": f"{reader.get_field(Keys.General.FILE_TYPE)}", "description": "Model converted from gguf."}

    save_file(tensors_dict, output_path, metadata=metadata)
    print("Conversion complete!")

def main():
    parser = argparse.ArgumentParser(description="Convert GGUF files to safetensors format.")
    parser.add_argument("--input", required=True, help="Path to the input GGUF file.")
    parser.add_argument("--output", required=True, help="Path to the output safetensors file.")
    parser.add_argument("--bf16", action="store_true", help="Convert tensors to BF16 format instead of FP16.")
    parser.add_argument("--int8", action="store_true", help="Convert tensors to INT8 format.")
    
    args = parser.parse_args()
    
    convert_gguf_to_safetensors(args.input, args.output, args.bf16, args.int8)

if __name__ == "__main__":
    main()

Issues:

  • This script works for FP16 and BF16, but I am not sure how to properly handle IQ1_M quantization.
  • The dequantize() function works for some cases, but I am unsure if it correctly handles IQ1_M.
  • I tried modifying gguf.quants but got errors when importing specific quantization functions.

Questions:

  1. How should I correctly dequantize IQ1_M quantized tensors back to a usable format in Safetensors?
  2. Is there a function in gguf that properly handles IQ1_M dequantization?
  3. Would I need a custom dequantization function for IQ1_M? If so, any guidance?

Any help would be greatly appreciated! Thanks in advance! 🙏

Sign up or log in to comment