manojpatil commited on
Commit
86033d5
1 Parent(s): e1a910b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -44,8 +44,8 @@ vectorstore = Milvus(connection_args=connection_args, collection_name=collection
44
 
45
  #downloading the model
46
 
47
- url = "https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin"
48
- output_file = "llama-2-7b-chat.ggmlv3.q2_K.bin" # The filename you want to save the downloaded file as
49
 
50
  response = requests.get(url)
51
 
@@ -65,7 +65,7 @@ for item in items:
65
  #intialize replicate llm
66
  llm = Replicate(
67
  model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
68
- input={"temperature": 0.1,
69
  "max_length": 256,
70
  "top_p": 1},
71
  )
@@ -192,7 +192,7 @@ def stream(input_text,prompt,context1,context2) -> Generator:
192
  # Initialize the LLM we'll be using
193
 
194
  llm = LlamaCpp(
195
- model_path="llama-2-7b-chat.ggmlv3.q2_K.bin", # model path
196
  callbacks=[QueueCallback(q)],
197
  verbose=True,
198
  n_ctx=4000,
@@ -219,6 +219,7 @@ def stream(input_text,prompt,context1,context2) -> Generator:
219
  if next_token is job_done:
220
  break
221
  content += next_token
 
222
  yield next_token
223
  except Empty:
224
  continue
 
44
 
45
  #downloading the model
46
 
47
+ url = "https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf"
48
+ output_file = "lllama-2-7b-chat.Q2_K.gguf" # The filename you want to save the downloaded file as
49
 
50
  response = requests.get(url)
51
 
 
65
  #intialize replicate llm
66
  llm = Replicate(
67
  model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
68
+ model_kwargs={"temperature": 0.1,
69
  "max_length": 256,
70
  "top_p": 1},
71
  )
 
192
  # Initialize the LLM we'll be using
193
 
194
  llm = LlamaCpp(
195
+ model_path="llama-2-7b-chat.Q2_K.gguf", # model path
196
  callbacks=[QueueCallback(q)],
197
  verbose=True,
198
  n_ctx=4000,
 
219
  if next_token is job_done:
220
  break
221
  content += next_token
222
+ print(next_token)
223
  yield next_token
224
  except Empty:
225
  continue