FantasiaFoundry commited on
Commit
c41c9ab
·
verified ·
1 Parent(s): 70a5722
Files changed (1) hide show
  1. gguf-imat.py +0 -157
gguf-imat.py DELETED
@@ -1,157 +0,0 @@
1
- import os
2
- import requests
3
- import zipfile
4
- import subprocess
5
- import shutil
6
- from huggingface_hub import snapshot_download
7
-
8
- # Function to clone or update the llama.cpp repository with shallow cloning
9
- def clone_or_update_llama_cpp():
10
- print("Preparing...")
11
- base_dir = os.path.dirname(os.path.abspath(__file__))
12
- os.chdir(base_dir) # Move to the base directory of the script
13
- if not os.path.exists("llama.cpp"):
14
- subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp"])
15
- else:
16
- os.chdir("llama.cpp")
17
- subprocess.run(["git", "pull"])
18
- os.chdir(base_dir) # Move back to the base directory
19
- print("The 'llama.cpp' repository is ready.")
20
-
21
- # Function to download and extract the latest release of llama.cpp
22
- def download_llama_release():
23
- base_dir = os.path.dirname(os.path.abspath(__file__))
24
- dl_dir = os.path.join(base_dir, "bin", "dl")
25
- if not os.path.exists(dl_dir):
26
- os.makedirs(dl_dir)
27
-
28
- os.chdir(dl_dir)
29
- latest_release_url = "https://github.com/ggerganov/llama.cpp/releases/latest"
30
- response = requests.get(latest_release_url)
31
- if response.status_code == 200:
32
- latest_release_tag = response.url.split("/")[-1]
33
- download_url = f"https://github.com/ggerganov/llama.cpp/releases/download/{latest_release_tag}/llama-{latest_release_tag}-bin-win-cublas-cu12.2.0-x64.zip"
34
- response = requests.get(download_url)
35
- if response.status_code == 200:
36
- with open(f"llama-{latest_release_tag}-bin-win-cublas-cu12.2.0-x64.zip", "wb") as f:
37
- f.write(response.content)
38
- with zipfile.ZipFile(f"llama-{latest_release_tag}-bin-win-cublas-cu12.2.0-x64.zip", "r") as zip_ref:
39
- zip_ref.extractall(os.path.join(base_dir, "bin"))
40
- print("Downloading latest 'llama.cpp' prebuilt Windows binaries...")
41
- print("Download and extraction completed successfully.")
42
- return latest_release_tag # Return the latest release tag
43
- else:
44
- print("Failed to download the release file.")
45
- else:
46
- print("Failed to fetch the latest release information.")
47
-
48
- # Function to download and extract cudart if necessary
49
- def download_cudart_if_necessary(latest_release_tag):
50
- base_dir = os.path.dirname(os.path.abspath(__file__))
51
- cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
52
- if not os.path.exists(cudart_dl_dir):
53
- os.makedirs(cudart_dl_dir)
54
-
55
- cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
56
- cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
57
-
58
- # Check if all required files exist
59
- if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
60
- print("Cuda resources already exist. Skipping download.")
61
- else:
62
- cudart_download_url = f"https://github.com/ggerganov/llama.cpp/releases/download/{latest_release_tag}/cudart-llama-bin-win-cu12.2.0-x64.zip"
63
- response = requests.get(cudart_download_url)
64
- if response.status_code == 200:
65
- with open(cudart_zip_file, "wb") as f:
66
- f.write(response.content)
67
- with zipfile.ZipFile(cudart_zip_file, "r") as zip_ref:
68
- zip_ref.extractall(os.path.join(base_dir, "bin"))
69
- print("Preparing 'cuda' resources...")
70
- print("Download and extraction of cudart completed successfully.")
71
- else:
72
- print("Failed to download the cudart release file.")
73
-
74
- # Function to collect user input and download the specified model repository
75
- def download_model_repo():
76
- base_dir = os.path.dirname(os.path.abspath(__file__))
77
- models_dir = os.path.join(base_dir, "models")
78
- if not os.path.exists(models_dir):
79
- os.makedirs(models_dir)
80
-
81
- model_id = input("Enter the model ID to download (e.g., huggingface/transformers): ")
82
- model_name = model_id.split("/")[-1]
83
- model_dir = os.path.join(models_dir, model_name)
84
-
85
- # Download the model repository if it doesn't exist
86
- if not os.path.exists(model_dir):
87
- revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
88
-
89
- print("Downloading model repository...")
90
- snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
91
- print("Model repository downloaded successfully.")
92
- else:
93
- print("Model already exists.")
94
-
95
- # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
96
- convert_model_to_gguf_f16(base_dir, model_dir, model_name)
97
-
98
- # Function to convert the downloaded model to GGUF F16 format
99
- def convert_model_to_gguf_f16(base_dir, model_dir, model_name):
100
- convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
101
- gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
102
- gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
103
-
104
- if not os.path.exists(gguf_dir):
105
- os.makedirs(gguf_dir)
106
-
107
- # Execute the conversion command if F16 file doesn't exist
108
- if not os.path.exists(gguf_model_path):
109
- subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16"])
110
-
111
- # Delete the original model directory
112
- shutil.rmtree(model_dir)
113
- print(f"Original model directory '{model_dir}' deleted.")
114
-
115
- # Execute the imatrix command if imatrix.dat doesn't exist
116
- imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
117
- imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
118
- imatrix_txt = os.path.join(base_dir, "imatrix", "imatrix.txt")
119
- if not os.path.exists(imatrix_output):
120
- subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "13"])
121
- # Move the imatrix.dat file to the GGUF folder
122
- shutil.move("imatrix.dat", gguf_dir)
123
- print("imatrix.dat generated successfully.")
124
-
125
- # Quantize the models
126
- quantize_models(base_dir, model_name)
127
-
128
- # Function to quantize models with different options
129
- def quantize_models(base_dir, model_name):
130
- gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
131
- f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
132
-
133
- quantization_options = [
134
- "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M",
135
- "Q5_K_S", "Q6_K", "Q8_0", "IQ3_M", "IQ3_S", "IQ3_XS", "IQ3_XXS"
136
- ]
137
-
138
- for quant_option in quantization_options:
139
- quantized_gguf_name = f"{model_name}-{quant_option}-imat.gguf"
140
- quantized_gguf_path = os.path.join(gguf_dir, quantized_gguf_name)
141
- quantize_command = os.path.join(base_dir, "bin", "quantize.exe")
142
- imatrix_path = os.path.join(gguf_dir, "imatrix.dat")
143
-
144
- subprocess.run([quantize_command, "--imatrix", imatrix_path,
145
- f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
146
- print(f"Model quantized with {quant_option} option.")
147
-
148
- # Main function to execute the steps
149
- def main():
150
- clone_or_update_llama_cpp()
151
- latest_release_tag = download_llama_release()
152
- download_cudart_if_necessary(latest_release_tag)
153
- download_model_repo()
154
- print("Finished.")
155
-
156
- if __name__ == "__main__":
157
- main()