Spaces:

manojpatil
/

pipeline1

Runtime error

manojpatil commited on Oct 25, 2023

Commit

86033d5

•

1 Parent(s): e1a910b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -44,8 +44,8 @@ vectorstore = Milvus(connection_args=connection_args, collection_name=collection
 #downloading the model
-url = "https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin"
-output_file = "llama-2-7b-chat.ggmlv3.q2_K.bin" # The filename you want to save the downloaded file as
 response = requests.get(url)
@@ -65,7 +65,7 @@ for item in items:
 #intialize replicate llm
 llm = Replicate(
     model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
-    input={"temperature": 0.1,
            "max_length": 256,
            "top_p": 1},
 )
@@ -192,7 +192,7 @@ def stream(input_text,prompt,context1,context2) -> Generator:
     # Initialize the LLM we'll be using
     llm = LlamaCpp(
-        model_path="llama-2-7b-chat.ggmlv3.q2_K.bin",    #  model path
         callbacks=[QueueCallback(q)],
         verbose=True,
         n_ctx=4000,
@@ -219,6 +219,7 @@ def stream(input_text,prompt,context1,context2) -> Generator:
             if next_token is job_done:
                 break
             content += next_token
             yield next_token
         except Empty:
             continue

 #downloading the model
+url = "https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf"
+output_file = "lllama-2-7b-chat.Q2_K.gguf" # The filename you want to save the downloaded file as
 response = requests.get(url)
 #intialize replicate llm
 llm = Replicate(
     model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
+    model_kwargs={"temperature": 0.1,
            "max_length": 256,
            "top_p": 1},
 )
     # Initialize the LLM we'll be using
     llm = LlamaCpp(
+        model_path="llama-2-7b-chat.Q2_K.gguf",    #  model path
         callbacks=[QueueCallback(q)],
         verbose=True,
         n_ctx=4000,
             if next_token is job_done:
                 break
             content += next_token
+            print(next_token)
             yield next_token
         except Empty:
             continue