Ethan Chang commited on
Commit
6a3dbe6
·
1 Parent(s): 0f566b9

Modified pathing for tinyllama, addedin huggingface downloader for tinyllama

Browse files
code/modules/chat/chat_model_loader.py CHANGED
@@ -5,6 +5,8 @@ from langchain_community.llms import LlamaCpp
5
  import torch
6
  import transformers
7
  import os
 
 
8
  from langchain.callbacks.manager import CallbackManager
9
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
10
 
@@ -14,6 +16,14 @@ class ChatModelLoader:
14
  self.config = config
15
  self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
16
 
 
 
 
 
 
 
 
 
17
  def load_chat_model(self):
18
  if self.config["llm_params"]["llm_loader"] == "openai":
19
  llm = ChatOpenAI(
@@ -21,7 +31,8 @@ class ChatModelLoader:
21
  )
22
  elif self.config["llm_params"]["llm_loader"] == "local_llm":
23
  n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
24
- model_path = self.config["llm_params"]["local_llm_params"]["model"]
 
25
  llm = LlamaCpp(
26
  model_path=model_path,
27
  n_batch=n_batch,
 
5
  import torch
6
  import transformers
7
  import os
8
+ from pathlib import Path
9
+ from huggingface_hub import hf_hub_download
10
  from langchain.callbacks.manager import CallbackManager
11
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
12
 
 
16
  self.config = config
17
  self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
18
 
19
+ def _verify_model_cache(self, model_cache_path):
20
+ hf_hub_download(
21
+ repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
22
+ filename="tinyllama-1.1b-chat-v1.0.Q5_0.gguf",
23
+ cache_dir=model_cache_path
24
+ )
25
+ return str(list(Path(model_cache_path).glob("*/snapshots/*/*.gguf"))[0])
26
+
27
  def load_chat_model(self):
28
  if self.config["llm_params"]["llm_loader"] == "openai":
29
  llm = ChatOpenAI(
 
31
  )
32
  elif self.config["llm_params"]["llm_loader"] == "local_llm":
33
  n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
34
+ model_path = self._verify_model_cache(self.config["llm_params"]["local_llm_params"]["model"])
35
+ print(model_path)
36
  llm = LlamaCpp(
37
  model_path=model_path,
38
  n_batch=n_batch,
code/modules/config/constants.py CHANGED
@@ -78,5 +78,5 @@ Question: {question}
78
 
79
  # Model Paths
80
 
81
- LLAMA_PATH = "../storage/models/tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
82
  MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"
 
78
 
79
  # Model Paths
80
 
81
+ LLAMA_PATH = "../storage/models/tinyllama"
82
  MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"