How to Convert DeepSeek-R1-UD-IQ1_M GGUF Back to Safetensors?
#38
by
Cheryl33990
- opened
Hello everyone,
I need to use the DeepSeek-R1-UD-IQ1_M model in Safetensors format.
Previously, I was using llama.cpp to convert Safetensors → GGUF, but now I need to convert it back (GGUF → Safetensors),
and I am unsure how to proceed.
I found a related discussion:
https://github.com/ggml-org/llama.cpp/discussions/9410
However, it only supports BF16 and FP16, but I need support for IQ1_M quantization.
Current Setup
I have copied gguf folder in llama.cpp into my working directory.
I am using the following script (gguf_to_safetensors.py), which I modified for the conversion:
import os
import argparse
import torch
import numpy as np
from safetensors.torch import save_file
from safetensors import safe_open
from typing import Dict, Tuple
from gguf import GGUFReader, dequantize
from gguf.constants import GGML_QUANT_SIZES, GGMLQuantizationType, Keys
def load_gguf_and_extract_metadata(gguf_path: str) -> Tuple[GGUFReader, list]:
"""Load GGUF file and extract metadata and tensors."""
reader = GGUFReader(gguf_path)
tensors_metadata = []
for tensor in reader.tensors:
tensor_metadata = {
'name': tensor.name,
'shape': tuple(tensor.shape.tolist()),
'n_elements': tensor.n_elements,
'n_bytes': tensor.n_bytes,
'data_offset': tensor.data_offset,
'type': tensor.tensor_type,
}
tensors_metadata.append(tensor_metadata)
return reader, tensors_metadata
def convert_gguf_to_safetensors(gguf_path: str, output_path: str, use_bf16: bool, use_int8: bool) -> None:
reader, tensors_metadata = load_gguf_and_extract_metadata(gguf_path)
print(f"Extracted {len(tensors_metadata)} tensors from GGUF file")
tensors_dict: dict[str, torch.Tensor] = {}
for i, tensor_info in enumerate(tensors_metadata):
tensor_name = tensor_info['name']
tensor_data = reader.get_tensor(i)
tensor_type = tensor_info['type']
weights = None
weights_hf = None
try:
if tensor_type == GGMLQuantizationType.IQ1_S or tensor_type == GGMLQuantizationType.IQ1_M:
print(f"Keeping {tensor_name} as INT8 to maintain IQ1_M quantization")
weights = np.frombuffer(tensor_data.data, dtype=np.int8).copy() # 保持 INT8,不解量化
elif use_int8:
print(f"Converting {tensor_name} to INT8")
weights = np.frombuffer(tensor_data.data, dtype=np.int8).copy()
else:
weights = dequantize(tensor_data.data, tensor_data.tensor_type).astype(np.float16).copy()
if tensor_type == GGMLQuantizationType.IQ1_M:
weights_tensor = torch.from_numpy(weights).to(dtype=torch.int8) # 直接存為 INT8
elif use_int8:
weights_tensor = torch.from_numpy(weights).to(dtype=torch.int8)
elif use_bf16:
weights_tensor = torch.from_numpy(weights).to(dtype=torch.bfloat16)
else:
weights_tensor = torch.from_numpy(weights).to(dtype=torch.float16)
weights_hf = weights_tensor
except Exception as e:
print(f"Error during conversion for tensor '{tensor_name}': {e}")
weights = np.zeros(tensor_info['shape'], dtype=np.float16) # 避免崩潰
weights_tensor = torch.from_numpy(weights).to(torch.float16)
weights_hf = weights_tensor
print(f"Processed tensor: {tensor_name} | Shape: {weights_hf.shape} | Type: {weights_tensor.dtype}")
del weights_tensor
del weights
del tensor_data
torch.cuda.empty_cache()
tensors_dict[tensor_name] = weights_hf
del weights_hf
if i % 10 == 0:
print(f"Processing {i+1}/{len(tensors_metadata)} tensors...")
torch.cuda.empty_cache()
metadata = {"modelspec.architecture": f"{reader.get_field(Keys.General.FILE_TYPE)}", "description": "Model converted from gguf."}
save_file(tensors_dict, output_path, metadata=metadata)
print("Conversion complete!")
def main():
parser = argparse.ArgumentParser(description="Convert GGUF files to safetensors format.")
parser.add_argument("--input", required=True, help="Path to the input GGUF file.")
parser.add_argument("--output", required=True, help="Path to the output safetensors file.")
parser.add_argument("--bf16", action="store_true", help="Convert tensors to BF16 format instead of FP16.")
parser.add_argument("--int8", action="store_true", help="Convert tensors to INT8 format.")
args = parser.parse_args()
convert_gguf_to_safetensors(args.input, args.output, args.bf16, args.int8)
if __name__ == "__main__":
main()
Issues:
- This script works for FP16 and BF16, but I am not sure how to properly handle IQ1_M quantization.
- The dequantize() function works for some cases, but I am unsure if it correctly handles IQ1_M.
- I tried modifying gguf.quants but got errors when importing specific quantization functions.
Questions:
- How should I correctly dequantize IQ1_M quantized tensors back to a usable format in Safetensors?
- Is there a function in
gguf
that properly handles IQ1_M dequantization? - Would I need a custom dequantization function for IQ1_M? If so, any guidance?
Any help would be greatly appreciated! Thanks in advance! 🙏