Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
•
2327177
1
Parent(s):
7e27981
Update threads=psutil.cpu_count(logical=False)
Browse files- app.py +19 -36
- requirements.txt +2 -1
app.py
CHANGED
@@ -5,10 +5,11 @@ import os
|
|
5 |
import time
|
6 |
from dataclasses import asdict, dataclass
|
7 |
from pathlib import Path
|
8 |
-
from urllib.parse import urlparse
|
9 |
from types import SimpleNamespace
|
|
|
10 |
|
11 |
import gradio as gr
|
|
|
12 |
from about_time import about_time
|
13 |
|
14 |
# from ctransformers import AutoConfig, AutoModelForCausalLM
|
@@ -16,6 +17,12 @@ from ctransformers import AutoModelForCausalLM
|
|
16 |
from huggingface_hub import hf_hub_download
|
17 |
from loguru import logger
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
os.environ["TZ"] = "Asia/Shanghai"
|
20 |
try:
|
21 |
time.tzset() # type: ignore # pylint: disable=no-member
|
@@ -230,36 +237,6 @@ def generate(
|
|
230 |
**asdict(generation_config),
|
231 |
)
|
232 |
|
233 |
-
_ = '''
|
234 |
-
_ = """full url: https://huggingface.co/TheBloke/mpt-30B-chat-GGML/blob/main/mpt-30b-chat.ggmlv0.q4_1.bin"""
|
235 |
-
|
236 |
-
# https://huggingface.co/TheBloke/mpt-30B-chat-GGML
|
237 |
-
_ = """
|
238 |
-
mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
|
239 |
-
mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
|
240 |
-
mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
|
241 |
-
mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
|
242 |
-
mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
|
243 |
-
"""
|
244 |
-
MODEL_FILENAME = "mpt-30b-chat.ggmlv0.q4_1.bin"
|
245 |
-
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
246 |
-
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
247 |
-
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
248 |
-
|
249 |
-
# https://huggingface.co/TheBloke/WizardLM-13B-V1.0-Uncensored-GGML
|
250 |
-
MODEL_FILENAME = "wizardlm-13b-v1.0-uncensored.ggmlv3.q4_1.bin" # 8.4G
|
251 |
-
# '''
|
252 |
-
|
253 |
-
URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"
|
254 |
-
MODEL_FILENAME = Path(URL).name
|
255 |
-
REPO_ID = "/".join(urlparse(url).path.strip('/').split('/')[:2]) # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
|
256 |
-
|
257 |
-
DESTINATION_FOLDER = "models"
|
258 |
-
|
259 |
-
logger.info(f"start dl, {REPO_ID=}, {MODEL_FILENAME=}, {DESTINATION_FOLDER=}")
|
260 |
-
download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
|
261 |
-
logger.info("done dl")
|
262 |
-
|
263 |
# if "mpt" in model_filename:
|
264 |
# config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
|
265 |
# llm = AutoModelForCausalLM.from_pretrained(
|
@@ -278,7 +255,16 @@ llm = AutoModelForCausalLM.from_pretrained(
|
|
278 |
)
|
279 |
# """
|
280 |
|
281 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
logger.info("load llm")
|
283 |
|
284 |
_ = Path("models", MODEL_FILENAME).absolute().as_posix()
|
@@ -288,14 +274,11 @@ LLM = AutoModelForCausalLM.from_pretrained(
|
|
288 |
REPO_ID, # DESTINATION_FOLDER, # model_path_or_repo_id: str required
|
289 |
model_file=_,
|
290 |
model_type="llama", # "starcoder", AutoConfig.from_pretrained("TheBloke/WizardLM-13B-V1.0-Uncensored-GGML")
|
291 |
-
threads=
|
292 |
)
|
293 |
|
294 |
logger.info("done load llm")
|
295 |
|
296 |
-
cpu_count = os.cpu_count() // 2 # type: ignore
|
297 |
-
logger.debug(f"{cpu_count=}")
|
298 |
-
|
299 |
GENERATION_CONFIG = GenerationConfig(
|
300 |
temperature=0.2,
|
301 |
top_k=0,
|
|
|
5 |
import time
|
6 |
from dataclasses import asdict, dataclass
|
7 |
from pathlib import Path
|
|
|
8 |
from types import SimpleNamespace
|
9 |
+
from urllib.parse import urlparse
|
10 |
|
11 |
import gradio as gr
|
12 |
+
import psutil
|
13 |
from about_time import about_time
|
14 |
|
15 |
# from ctransformers import AutoConfig, AutoModelForCausalLM
|
|
|
17 |
from huggingface_hub import hf_hub_download
|
18 |
from loguru import logger
|
19 |
|
20 |
+
URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"
|
21 |
+
MODEL_FILENAME = Path(URL).name
|
22 |
+
REPO_ID = "/".join(urlparse(URL).path.strip('/').split('/')[:2]) # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
|
23 |
+
|
24 |
+
DESTINATION_FOLDER = "models"
|
25 |
+
|
26 |
os.environ["TZ"] = "Asia/Shanghai"
|
27 |
try:
|
28 |
time.tzset() # type: ignore # pylint: disable=no-member
|
|
|
237 |
**asdict(generation_config),
|
238 |
)
|
239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
# if "mpt" in model_filename:
|
241 |
# config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
|
242 |
# llm = AutoModelForCausalLM.from_pretrained(
|
|
|
255 |
)
|
256 |
# """
|
257 |
|
258 |
+
logger.info(f"start dl, {REPO_ID=}, {MODEL_FILENAME=}, {DESTINATION_FOLDER=}")
|
259 |
+
download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
|
260 |
+
logger.info("done dl")
|
261 |
+
|
262 |
+
logger.debug(f"{os.cpu_count()=} {psutil.cpu_count(logical=False)=}")
|
263 |
+
cpu_count = os.cpu_count() // 2 # type: ignore
|
264 |
+
cpu_count = psutil.cpu_count(logical=False)
|
265 |
+
|
266 |
+
logger.debug(f"{cpu_count=}")
|
267 |
+
|
268 |
logger.info("load llm")
|
269 |
|
270 |
_ = Path("models", MODEL_FILENAME).absolute().as_posix()
|
|
|
274 |
REPO_ID, # DESTINATION_FOLDER, # model_path_or_repo_id: str required
|
275 |
model_file=_,
|
276 |
model_type="llama", # "starcoder", AutoConfig.from_pretrained("TheBloke/WizardLM-13B-V1.0-Uncensored-GGML")
|
277 |
+
threads=cpu_count,
|
278 |
)
|
279 |
|
280 |
logger.info("done load llm")
|
281 |
|
|
|
|
|
|
|
282 |
GENERATION_CONFIG = GenerationConfig(
|
283 |
temperature=0.2,
|
284 |
top_k=0,
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ transformers==4.30.2
|
|
3 |
huggingface_hub
|
4 |
gradio
|
5 |
loguru
|
6 |
-
about-time
|
|
|
|
3 |
huggingface_hub
|
4 |
gradio
|
5 |
loguru
|
6 |
+
about-time
|
7 |
+
psutil
|