j2moreno commited on
Commit
69618cb
1 Parent(s): e8f9a1a
Files changed (1) hide show
  1. app.py +14 -59
app.py CHANGED
@@ -1,12 +1,4 @@
1
  import gradio as gr
2
-
3
- # def greet(name):
4
- # return "Hello " + name + "!!"
5
-
6
- # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- # iface.launch()
8
-
9
- # import spaces
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline, set_seed
12
 
@@ -20,15 +12,21 @@ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
 
21
  SEED = 42
22
 
23
- default_text = "Ask me about Leonardo Moreno"
24
- title = "Who is Leonardo Moreno"
25
 
26
  ### This Space demonstrates model [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta, a Llama 2 model with 70B parameters fine-tuned for chat instructions. This space is running on Inference Endpoints using text-generation-inference library. If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://ui.endpoints.huggingface.co/).
27
 
28
  description = """
29
- This Space is... [placeholder]
 
 
 
 
 
 
 
 
30
  """
31
- css = """.toast-wrap { display: none !important } """
32
  # examples=[
33
  # ['Who is Leonardo Moreno?'],
34
  # ['Describe Leonardo Moreno\'s professional background.'],
@@ -37,27 +35,9 @@ css = """.toast-wrap { display: none !important } """
37
  # ['How has Leonardo Moreno integrated AI in his work?'],
38
  # ]
39
 
40
- # def vote(data: gr.LikeData):
41
- # if data.liked:
42
- # print("You upvoted this response: " + data.value)
43
- # else:
44
- # print("You downvoted this response: " + data.value)
45
-
46
  # @spaces.GPU
47
  def generate_response(message, history):
48
  set_seed(SEED)
49
- # data = {
50
- # "role": "user",
51
- # "content": message
52
- # }
53
- # tokenized_prompt = tokenizer(data["content"], return_tensors="pt", padding=True, truncation=True, max_length=128)
54
- # print(tokenized_prompt)
55
-
56
- # output_sequences = model.generate(**tokenized_prompt, max_length=1024, num_return_sequences=1)
57
- # decoded_output = tokenizer.batch_decode(output_sequences, skip_special_tokens=True)[0]
58
- # print(decoded_output)
59
-
60
- # yield decoded_output
61
  temperature=0.4
62
  top_p=0.95
63
  top_k=50
@@ -79,36 +59,11 @@ def generate_response(message, history):
79
  print(outputs[0]["generated_text"])
80
  return outputs[0]["generated_text"]
81
 
82
- # chatbot_stream = gr.Chatbot()
83
- chat_interface_stream = gr.ChatInterface(generate_response,
84
  title=title,
85
  description=description,
86
- textbox=gr.Textbox(),
87
- # chatbot=chatbot_stream,
88
- css=css,
89
- # examples=examples,
90
  #cache_examples=True,
91
  #additional_inputs=additional_inputs,
92
- )
93
-
94
- # Gradio Demo
95
- with gr.Blocks() as demo:
96
- # streaming chatbot
97
- # chatbot_stream.like(vote, None, None)
98
- chat_interface_stream.render()
99
-
100
- if __name__ == "__main__":
101
- demo.queue().launch(share=True)
102
-
103
-
104
- # messages = [
105
- # {
106
- # "role": "system",
107
- # "content": "You are a friendly chatbot who always responds in the style of a thug",
108
- # },
109
- # {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
110
- # ]
111
- # model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
112
- # input_length = model_inputs.shape[1]
113
- # generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
114
- # print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline, set_seed
4
 
 
12
 
13
  SEED = 42
14
 
15
+ title = "Who is Leonardo Moreno?"
 
16
 
17
  ### This Space demonstrates model [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta, a Llama 2 model with 70B parameters fine-tuned for chat instructions. This space is running on Inference Endpoints using text-generation-inference library. If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://ui.endpoints.huggingface.co/).
18
 
19
  description = """
20
+ This Space demonstrates a finetuned [TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) that was trained on information about Leonardo Moreno. **Ask it anything about Leonardo Moreno**
21
+
22
+ Model: [j2moreno/TinyLlama-1.1B-Chat-v1.0-leo-finetuned](https://huggingface.co/j2moreno/TinyLlama-1.1B-Chat-v1.0-leo-finetuned)
23
+
24
+ Leonardo Moreno contacts:
25
+ - [LinkedIn](https://www.linkedin.com/in/jose-leonardo-moreno-/)
26
+ - [Github](https://github.com/j2moreno)
27
+
28
+ **Warning:** This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate a reply.
29
  """
 
30
  # examples=[
31
  # ['Who is Leonardo Moreno?'],
32
  # ['Describe Leonardo Moreno\'s professional background.'],
 
35
  # ['How has Leonardo Moreno integrated AI in his work?'],
36
  # ]
37
 
 
 
 
 
 
 
38
  # @spaces.GPU
39
  def generate_response(message, history):
40
  set_seed(SEED)
 
 
 
 
 
 
 
 
 
 
 
 
41
  temperature=0.4
42
  top_p=0.95
43
  top_k=50
 
59
  print(outputs[0]["generated_text"])
60
  return outputs[0]["generated_text"]
61
 
62
+ if __name__ == "__main__":
63
+ gr.ChatInterface(generate_response,
64
  title=title,
65
  description=description,
66
+ #examples=examples,
 
 
 
67
  #cache_examples=True,
68
  #additional_inputs=additional_inputs,
69
+ ).launch()