Spaces:

j2moreno
/

who-is-leo

Runtime error

App Files Files Community

j2moreno commited on Apr 29

Commit

69618cb

•

1 Parent(s): e8f9a1a

Update

Browse files

Files changed (1) hide show

app.py +14 -59

app.py CHANGED Viewed

@@ -1,12 +1,4 @@
 import gradio as gr
-# def greet(name):
-#     return "Hello " + name + "!!"
-# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-# iface.launch()
-# import spaces
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline, set_seed
@@ -20,15 +12,21 @@ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 SEED = 42
-default_text = "Ask me about Leonardo Moreno"
-title = "Who is Leonardo Moreno"
 ### This Space demonstrates model [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta, a Llama 2 model with 70B parameters fine-tuned for chat instructions. This space is running on Inference Endpoints using text-generation-inference library. If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://ui.endpoints.huggingface.co/).
 description = """
-This Space is... [placeholder]
 """
-css = """.toast-wrap { display: none !important } """
 # examples=[
 #     ['Who is Leonardo Moreno?'],
 #     ['Describe Leonardo Moreno\'s professional background.'],
@@ -37,27 +35,9 @@ css = """.toast-wrap { display: none !important } """
 #     ['How has Leonardo Moreno integrated AI in his work?'],
 # ]
-# def vote(data: gr.LikeData):
-#     if data.liked:
-#         print("You upvoted this response: " + data.value)
-#     else:
-#         print("You downvoted this response: " + data.value)
 # @spaces.GPU
 def generate_response(message, history):
     set_seed(SEED)
-    # data = {
-    #     "role": "user",
-    #     "content": message
-    # }
-    # tokenized_prompt = tokenizer(data["content"], return_tensors="pt", padding=True, truncation=True, max_length=128)
-    # print(tokenized_prompt)
-    # output_sequences = model.generate(**tokenized_prompt, max_length=1024, num_return_sequences=1)
-    # decoded_output = tokenizer.batch_decode(output_sequences, skip_special_tokens=True)[0]
-    # print(decoded_output)
-    # yield decoded_output
     temperature=0.4
     top_p=0.95
     top_k=50
@@ -79,36 +59,11 @@ def generate_response(message, history):
     print(outputs[0]["generated_text"])
     return outputs[0]["generated_text"]
-# chatbot_stream = gr.Chatbot()
-chat_interface_stream = gr.ChatInterface(generate_response,
                  title=title,
                  description=description,
-                 textbox=gr.Textbox(),
-                #  chatbot=chatbot_stream,
-                 css=css,
-                #  examples=examples,
                  #cache_examples=True,
                  #additional_inputs=additional_inputs,
-                 )
-# Gradio Demo
-with gr.Blocks() as demo:
-    # streaming chatbot
-    # chatbot_stream.like(vote, None, None)
-    chat_interface_stream.render()
-if __name__ == "__main__":
-    demo.queue().launch(share=True)
-# messages = [
-#     {
-#         "role": "system",
-#         "content": "You are a friendly chatbot who always responds in the style of a thug",
-#     },
-#     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
-# ]
-# model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
-# input_length = model_inputs.shape[1]
-# generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
-# print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])

 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline, set_seed
 SEED = 42
+title = "Who is Leonardo Moreno?"
 ### This Space demonstrates model [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta, a Llama 2 model with 70B parameters fine-tuned for chat instructions. This space is running on Inference Endpoints using text-generation-inference library. If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://ui.endpoints.huggingface.co/).
 description = """
+This Space demonstrates a finetuned [TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) that was trained on information about Leonardo Moreno. **Ask it anything about Leonardo Moreno**
+Model: [j2moreno/TinyLlama-1.1B-Chat-v1.0-leo-finetuned](https://huggingface.co/j2moreno/TinyLlama-1.1B-Chat-v1.0-leo-finetuned)
+Leonardo Moreno contacts:
+- [LinkedIn](https://www.linkedin.com/in/jose-leonardo-moreno-/)
+- [Github](https://github.com/j2moreno)
+**Warning:** This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate a reply.
 """
 # examples=[
 #     ['Who is Leonardo Moreno?'],
 #     ['Describe Leonardo Moreno\'s professional background.'],
 #     ['How has Leonardo Moreno integrated AI in his work?'],
 # ]
 # @spaces.GPU
 def generate_response(message, history):
     set_seed(SEED)
     temperature=0.4
     top_p=0.95
     top_k=50
     print(outputs[0]["generated_text"])
     return outputs[0]["generated_text"]
+if __name__ == "__main__":
+    gr.ChatInterface(generate_response,
                  title=title,
                  description=description,
+                 #examples=examples,
                  #cache_examples=True,
                  #additional_inputs=additional_inputs,
+                 ).launch()