Spaces:
Runtime error
Runtime error
manojpatil
commited on
Commit
•
86033d5
1
Parent(s):
e1a910b
Update app.py
Browse files
app.py
CHANGED
@@ -44,8 +44,8 @@ vectorstore = Milvus(connection_args=connection_args, collection_name=collection
|
|
44 |
|
45 |
#downloading the model
|
46 |
|
47 |
-
url = "https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.
|
48 |
-
output_file = "
|
49 |
|
50 |
response = requests.get(url)
|
51 |
|
@@ -65,7 +65,7 @@ for item in items:
|
|
65 |
#intialize replicate llm
|
66 |
llm = Replicate(
|
67 |
model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
|
68 |
-
|
69 |
"max_length": 256,
|
70 |
"top_p": 1},
|
71 |
)
|
@@ -192,7 +192,7 @@ def stream(input_text,prompt,context1,context2) -> Generator:
|
|
192 |
# Initialize the LLM we'll be using
|
193 |
|
194 |
llm = LlamaCpp(
|
195 |
-
model_path="llama-2-7b-chat.
|
196 |
callbacks=[QueueCallback(q)],
|
197 |
verbose=True,
|
198 |
n_ctx=4000,
|
@@ -219,6 +219,7 @@ def stream(input_text,prompt,context1,context2) -> Generator:
|
|
219 |
if next_token is job_done:
|
220 |
break
|
221 |
content += next_token
|
|
|
222 |
yield next_token
|
223 |
except Empty:
|
224 |
continue
|
|
|
44 |
|
45 |
#downloading the model
|
46 |
|
47 |
+
url = "https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf"
|
48 |
+
output_file = "lllama-2-7b-chat.Q2_K.gguf" # The filename you want to save the downloaded file as
|
49 |
|
50 |
response = requests.get(url)
|
51 |
|
|
|
65 |
#intialize replicate llm
|
66 |
llm = Replicate(
|
67 |
model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
|
68 |
+
model_kwargs={"temperature": 0.1,
|
69 |
"max_length": 256,
|
70 |
"top_p": 1},
|
71 |
)
|
|
|
192 |
# Initialize the LLM we'll be using
|
193 |
|
194 |
llm = LlamaCpp(
|
195 |
+
model_path="llama-2-7b-chat.Q2_K.gguf", # model path
|
196 |
callbacks=[QueueCallback(q)],
|
197 |
verbose=True,
|
198 |
n_ctx=4000,
|
|
|
219 |
if next_token is job_done:
|
220 |
break
|
221 |
content += next_token
|
222 |
+
print(next_token)
|
223 |
yield next_token
|
224 |
except Empty:
|
225 |
continue
|