herMaster commited on
Commit
b397e77
1 Parent(s): 2bf314c

reverting the changes

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -6,7 +6,7 @@ from PyPDF2 import PdfReader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.callbacks.manager import CallbackManager
8
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
9
- from langchain.llms import LlamaCpp
10
  from langchain.vectorstores import Qdrant
11
  from qdrant_client.http import models
12
  # from langchain.llms import CTransformers
@@ -26,23 +26,23 @@ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
26
 
27
  print("loading the LLM......................................")
28
 
29
- llm = LlamaCpp(
30
- model_path="./llama-2-7b-chat.Q3_K_S.gguf",
31
- temperature = 0.2,
32
- n_ctx=2048,
33
- f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
34
- max_tokens = 500,
35
- callback_manager=callback_manager,
36
- verbose=True,
37
- )
38
 
39
- # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
40
- # model_file="llama-2-7b-chat.Q3_K_S.gguf",
41
- # model_type="llama",
42
- # temperature = 0.2,
43
- # repetition_penalty = 1.5,
44
- # max_new_tokens = 300,
45
- # )
46
 
47
 
48
 
 
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.callbacks.manager import CallbackManager
8
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
9
+ # from langchain.llms import LlamaCpp
10
  from langchain.vectorstores import Qdrant
11
  from qdrant_client.http import models
12
  # from langchain.llms import CTransformers
 
26
 
27
  print("loading the LLM......................................")
28
 
29
+ # llm = LlamaCpp(
30
+ # model_path="./llama-2-7b-chat.Q3_K_S.gguf",
31
+ # temperature = 0.2,
32
+ # n_ctx=2048,
33
+ # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
34
+ # max_tokens = 500,
35
+ # callback_manager=callback_manager,
36
+ # verbose=True,
37
+ # )
38
 
39
+ llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
40
+ model_file="llama-2-7b-chat.Q3_K_S.gguf",
41
+ model_type="llama",
42
+ temperature = 0.2,
43
+ repetition_penalty = 1.5,
44
+ max_new_tokens = 300,
45
+ )
46
 
47
 
48