File size: 2,427 Bytes
2e66cb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


# import requests

# url = "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin"

# response = requests.get(url)

# with open("nous-hermes-13b.ggmlv3.q4_0.bin", "wb") as f:
#     f.write(response.content)


print("DONE")

def func(user):
        
    template = """
    Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
    Question: {question}
    
    Answer: """
    
    prompt = PromptTemplate(template=template, input_variables=["question"])
    
    local_path = (
        "./model.bin"
    )

    
    # # Callbacks support token-wise streaming
    # callbacks = [StreamingStdOutCallbackHandler()]
    
    # Verbose is required to pass to the callback manager
    llm = LlamaCpp(model_path=("./model.bin"))
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    question = user
    llm_chain.run(question)

    return llm_chain.run(question)

iface = gr.Interface(fn=func, inputs="text", outputs="text")
iface.launch()

# import gradio as gr
# from langchain.llms import LlamaCpp
# from langchain import PromptTemplate, LLMChain
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# print("DONE")

# def func(user):
#     template = """
#     Your name is John and not a assistant but more like a chatbot. Respond precise not more words and act like a human. for example: user: How are you? You: I'm doing good how about you?. user: hello You: Hello how you doing?. Don't say How can I assist you today?.
#     Question: {question}
    
#     Answer: """
    
#     prompt = PromptTemplate(template=template, input_variables=["question"])
    
#     local_path = "./nous-hermes-13b.ggmlv3.q4_0.bin"
    
#     llm = LlamaCpp(model_path=local_path)
#     llm_chain = LLMChain(prompt=prompt, llm=llm, streaming=True)  # Enable streaming mode
#     question = user
#     llm_chain.run(question)

#     return llm_chain.run(question)

# iface = gr.Interface(fn=func, inputs="text", outputs="text")
# iface.launch()