flav1010 commited on
Commit
3595957
·
verified ·
1 Parent(s): 7f2baef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -47
app.py CHANGED
@@ -1,66 +1,50 @@
1
-
2
  import gradio as gr
3
- from langchain.llms import HuggingFaceHub
4
  from huggingface_hub import InferenceClient
5
- from langchain.agents import load_tools, initialize_agent
6
- #from langchain.schema.agent import AgentType
7
- from langchain.agents import AgentType, initialize_agent,load_tools
8
- import wikipedia
9
-
10
 
11
- # Initialize Hugging Face inference client
 
 
12
  client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
13
-
14
- # Initialize the LLM using LangChain's HuggingFaceHub
15
- llm = HuggingFaceHub(
16
- repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
17
- huggingfacehub_api_token=HUGGINGFACE_API_TOKEN
18
- )
19
- # Load Wikipedia tool and initialize the agent
20
- tools = load_tools(["wikipedia"], llm=llm)
21
- agent = initialize_agent(
22
- tools,
23
- llm,
24
- agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
25
- verbose=False
26
- )
27
-
28
 
29
 
30
  @spaces.GPU
31
-
32
  def respond(
33
- message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p,
 
 
 
 
 
34
  ):
35
  messages = [{"role": "system", "content": system_message}]
36
- for user, assistant in history:
37
- if user:
38
- messages.append({"role": "user", "content": user})
39
- if assistant:
40
- messages.append({"role": "assistant", "content": assistant})
 
 
41
  messages.append({"role": "user", "content": message})
42
 
43
- # Use LangChain to obtain the model's response
44
- chat_input = {
45
- "messages": messages,
46
- "max_tokens": max_tokens,
47
- "temperature": temperature,
48
- "top_p": top_p,
49
- "stream": True,
50
- }
51
 
52
- # Get response from the agent using Wikipedia
53
- wikipedia_response = agent.run(message)
 
 
 
 
 
 
54
 
55
- # Use Hugging Face LLM to elaborate on the Wikipedia response
56
- chat_input["messages"].append({"role": "assistant", "content": wikipedia_response})
57
- response = wikipedia_response
58
- for message in client.chat_completion(**chat_input):
59
- token = message["choices"][0]["delta"]["content"]
60
  response += token
61
  yield response
62
 
63
-
 
 
64
  demo = gr.ChatInterface(
65
  respond,
66
  additional_inputs=[
@@ -79,5 +63,4 @@ demo = gr.ChatInterface(
79
 
80
 
81
  if __name__ == "__main__":
82
- demo.queue()
83
  demo.launch(share=True)
 
1
+ import spaces
2
  import gradio as gr
 
3
  from huggingface_hub import InferenceClient
 
 
 
 
 
4
 
5
+ """
6
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
+ """
8
  client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
9
+ # llm = HuggingFaceHub(repo_id="meta-llama/Meta-Llama-3-8B-Instruct", model_kwargs={"temperature": 1})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  @spaces.GPU
 
13
  def respond(
14
+ message,
15
+ history: list[tuple[str, str]],
16
+ system_message,
17
+ max_tokens,
18
+ temperature,
19
+ top_p,
20
  ):
21
  messages = [{"role": "system", "content": system_message}]
22
+
23
+ for val in history:
24
+ if val[0]:
25
+ messages.append({"role": "user", "content": val[0]})
26
+ if val[1]:
27
+ messages.append({"role": "assistant", "content": val[1]})
28
+
29
  messages.append({"role": "user", "content": message})
30
 
31
+ response = ""
 
 
 
 
 
 
 
32
 
33
+ for message in client.chat_completion(
34
+ messages,
35
+ max_tokens=max_tokens,
36
+ stream=True,
37
+ temperature=temperature,
38
+ top_p=top_p,
39
+ ):
40
+ token = message.choices[0].delta.content
41
 
 
 
 
 
 
42
  response += token
43
  yield response
44
 
45
+ """
46
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
47
+ """
48
  demo = gr.ChatInterface(
49
  respond,
50
  additional_inputs=[
 
63
 
64
 
65
  if __name__ == "__main__":
 
66
  demo.launch(share=True)