Ethan Chang
commited on
Commit
·
6a3dbe6
1
Parent(s):
0f566b9
Modified pathing for tinyllama, addedin huggingface downloader for tinyllama
Browse files
code/modules/chat/chat_model_loader.py
CHANGED
@@ -5,6 +5,8 @@ from langchain_community.llms import LlamaCpp
|
|
5 |
import torch
|
6 |
import transformers
|
7 |
import os
|
|
|
|
|
8 |
from langchain.callbacks.manager import CallbackManager
|
9 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
10 |
|
@@ -14,6 +16,14 @@ class ChatModelLoader:
|
|
14 |
self.config = config
|
15 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def load_chat_model(self):
|
18 |
if self.config["llm_params"]["llm_loader"] == "openai":
|
19 |
llm = ChatOpenAI(
|
@@ -21,7 +31,8 @@ class ChatModelLoader:
|
|
21 |
)
|
22 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
23 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
24 |
-
model_path = self.config["llm_params"]["local_llm_params"]["model"]
|
|
|
25 |
llm = LlamaCpp(
|
26 |
model_path=model_path,
|
27 |
n_batch=n_batch,
|
|
|
5 |
import torch
|
6 |
import transformers
|
7 |
import os
|
8 |
+
from pathlib import Path
|
9 |
+
from huggingface_hub import hf_hub_download
|
10 |
from langchain.callbacks.manager import CallbackManager
|
11 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
12 |
|
|
|
16 |
self.config = config
|
17 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
18 |
|
19 |
+
def _verify_model_cache(self, model_cache_path):
|
20 |
+
hf_hub_download(
|
21 |
+
repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
22 |
+
filename="tinyllama-1.1b-chat-v1.0.Q5_0.gguf",
|
23 |
+
cache_dir=model_cache_path
|
24 |
+
)
|
25 |
+
return str(list(Path(model_cache_path).glob("*/snapshots/*/*.gguf"))[0])
|
26 |
+
|
27 |
def load_chat_model(self):
|
28 |
if self.config["llm_params"]["llm_loader"] == "openai":
|
29 |
llm = ChatOpenAI(
|
|
|
31 |
)
|
32 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
33 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
34 |
+
model_path = self._verify_model_cache(self.config["llm_params"]["local_llm_params"]["model"])
|
35 |
+
print(model_path)
|
36 |
llm = LlamaCpp(
|
37 |
model_path=model_path,
|
38 |
n_batch=n_batch,
|
code/modules/config/constants.py
CHANGED
@@ -78,5 +78,5 @@ Question: {question}
|
|
78 |
|
79 |
# Model Paths
|
80 |
|
81 |
-
LLAMA_PATH = "../storage/models/tinyllama
|
82 |
MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"
|
|
|
78 |
|
79 |
# Model Paths
|
80 |
|
81 |
+
LLAMA_PATH = "../storage/models/tinyllama"
|
82 |
MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"
|