Spaces:
Runtime error
Runtime error
update
Browse files
app.py
CHANGED
@@ -16,6 +16,7 @@ repo_id = "j2moreno/test-model"
|
|
16 |
|
17 |
model = AutoModelForCausalLM.from_pretrained(repo_id).to(device)
|
18 |
tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
|
|
19 |
|
20 |
SEED = 42
|
21 |
|
@@ -43,24 +44,47 @@ examples=[
|
|
43 |
# print("You downvoted this response: " + data.value)
|
44 |
|
45 |
# @spaces.GPU
|
46 |
-
def generate_response(message,
|
47 |
set_seed(SEED)
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
chat_interface_stream = gr.ChatInterface(generate_response,
|
60 |
title=title,
|
61 |
description=description,
|
62 |
textbox=gr.Textbox(),
|
63 |
-
|
64 |
css=css,
|
65 |
examples=examples,
|
66 |
#cache_examples=True,
|
@@ -74,4 +98,17 @@ with gr.Blocks() as demo:
|
|
74 |
chat_interface_stream.render()
|
75 |
|
76 |
if __name__ == "__main__":
|
77 |
-
demo.queue().launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
model = AutoModelForCausalLM.from_pretrained(repo_id).to(device)
|
18 |
tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
19 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
20 |
|
21 |
SEED = 42
|
22 |
|
|
|
44 |
# print("You downvoted this response: " + data.value)
|
45 |
|
46 |
# @spaces.GPU
|
47 |
+
def generate_response(message, history):
|
48 |
set_seed(SEED)
|
49 |
+
# data = {
|
50 |
+
# "role": "user",
|
51 |
+
# "content": message
|
52 |
+
# }
|
53 |
+
# tokenized_prompt = tokenizer(data["content"], return_tensors="pt", padding=True, truncation=True, max_length=128)
|
54 |
+
# print(tokenized_prompt)
|
55 |
+
|
56 |
+
# output_sequences = model.generate(**tokenized_prompt, max_length=1024, num_return_sequences=1)
|
57 |
+
# decoded_output = tokenizer.batch_decode(output_sequences, skip_special_tokens=True)[0]
|
58 |
+
# print(decoded_output)
|
59 |
+
|
60 |
+
# yield decoded_output
|
61 |
+
temperature=0.4
|
62 |
+
top_p=0.95
|
63 |
+
top_k=50
|
64 |
+
max_new_tokens=256
|
65 |
+
|
66 |
+
message_template = [
|
67 |
+
{
|
68 |
+
"role": "system",
|
69 |
+
"content": "You are a highly knowledgeable and friendly chatbot equipped with extensive information across various domains. Your goal is to understand and respond to user inquiries with accuracy and clarity. You're adept at providing detailed explanations, concise summaries, and insightful responses. Your interactions are always respectful, helpful, and focused on delivering the most relevant information to the user.",
|
70 |
+
},
|
71 |
+
{"role": "user", "content": message},
|
72 |
+
]
|
73 |
+
|
74 |
+
# Set tokenize correctly. Otherwise ticking the box breaks it.
|
75 |
+
prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
|
76 |
+
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
|
77 |
+
temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
|
78 |
+
|
79 |
+
print(outputs[0]["generated_text"])
|
80 |
+
return outputs[0]["generated_text"]
|
81 |
+
|
82 |
+
# chatbot_stream = gr.Chatbot()
|
83 |
chat_interface_stream = gr.ChatInterface(generate_response,
|
84 |
title=title,
|
85 |
description=description,
|
86 |
textbox=gr.Textbox(),
|
87 |
+
# chatbot=chatbot_stream,
|
88 |
css=css,
|
89 |
examples=examples,
|
90 |
#cache_examples=True,
|
|
|
98 |
chat_interface_stream.render()
|
99 |
|
100 |
if __name__ == "__main__":
|
101 |
+
demo.queue().launch(share=True)
|
102 |
+
|
103 |
+
|
104 |
+
# messages = [
|
105 |
+
# {
|
106 |
+
# "role": "system",
|
107 |
+
# "content": "You are a friendly chatbot who always responds in the style of a thug",
|
108 |
+
# },
|
109 |
+
# {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
|
110 |
+
# ]
|
111 |
+
# model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
112 |
+
# input_length = model_inputs.shape[1]
|
113 |
+
# generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
|
114 |
+
# print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
|