JamesBentley commited on
Commit
96056dc
·
verified ·
1 Parent(s): 651f0a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -35
app.py CHANGED
@@ -1,35 +1,97 @@
1
- import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
-
4
- torch.random.manual_seed(0)
5
- model = AutoModelForCausalLM.from_pretrained(
6
- "microsoft/Phi-3-mini-4k-instruct",
7
- device_map="cuda",
8
- torch_dtype="auto",
9
- trust_remote_code=True,
10
- )
11
-
12
- tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
13
-
14
- messages = [
15
- {"role": "system", "content": "You are a helpful AI assistant."},
16
- {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
17
- {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
18
- {"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
19
- ]
20
-
21
- pipe = pipeline(
22
- "text-generation",
23
- model=model,
24
- tokenizer=tokenizer,
25
- )
26
-
27
- generation_args = {
28
- "max_new_tokens": 500,
29
- "return_full_text": False,
30
- "temperature": 0.0,
31
- "do_sample": False,
32
- }
33
-
34
- output = pipe(messages, **generation_args)
35
- print(output[0]['generated_text'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import (
4
+ AutoModelForCausalLM,
5
+ AutoTokenizer,
6
+ TextIteratorStreamer,
7
+ )
8
+ import os
9
+ from threading import Thread
10
+ import spaces
11
+ import time
12
+
13
+ token = os.environ["HF_TOKEN"]
14
+
15
+
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ "microsoft/Phi-3-mini-4k-instruct", token=token,trust_remote_code=True
18
+ )
19
+ tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", token=token)
20
+ terminators = [
21
+ tok.eos_token_id,
22
+ ]
23
+
24
+ if torch.cuda.is_available():
25
+ device = torch.device("cuda")
26
+ print(f"Using GPU: {torch.cuda.get_device_name(device)}")
27
+ else:
28
+ device = torch.device("cpu")
29
+ print("Using CPU")
30
+
31
+ model = model.to(device)
32
+ # Dispatch Errors
33
+
34
+
35
+ @spaces.GPU(duration=60)
36
+ def chat(message, history, temperature,do_sample, max_tokens):
37
+ chat = []
38
+ for item in history:
39
+ chat.append({"role": "user", "content": item[0]})
40
+ if item[1] is not None:
41
+ chat.append({"role": "assistant", "content": item[1]})
42
+ chat.append({"role": "user", "content": message})
43
+ messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
44
+ model_inputs = tok([messages], return_tensors="pt").to(device)
45
+ streamer = TextIteratorStreamer(
46
+ tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True
47
+ )
48
+ generate_kwargs = dict(
49
+ model_inputs,
50
+ streamer=streamer,
51
+ max_new_tokens=max_tokens,
52
+ do_sample=True,
53
+ temperature=temperature,
54
+ eos_token_id=terminators,
55
+ )
56
+
57
+ if temperature == 0:
58
+ generate_kwargs['do_sample'] = False
59
+
60
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
61
+ t.start()
62
+
63
+ partial_text = ""
64
+ for new_text in streamer:
65
+ partial_text += new_text
66
+ yield partial_text
67
+
68
+
69
+ yield partial_text
70
+
71
+
72
+ demo = gr.ChatInterface(
73
+ fn=chat,
74
+ examples=[["Write me a poem about Machine Learning."]],
75
+ # multimodal=False,
76
+ additional_inputs_accordion=gr.Accordion(
77
+ label="⚙️ Parameters", open=False, render=False
78
+ ),
79
+ additional_inputs=[
80
+ gr.Slider(
81
+ minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
82
+ ),
83
+ gr.Checkbox(label="Sampling",value=True),
84
+ gr.Slider(
85
+ minimum=128,
86
+ maximum=4096,
87
+ step=1,
88
+ value=512,
89
+ label="Max new tokens",
90
+ render=False,
91
+ ),
92
+ ],
93
+ stop_btn="Stop Generation",
94
+ title="Chat With LLMs",
95
+ description="Now Running [microsoft/Phi-3-mini-4k-instruct](https://huggingface.com/microsoft/Phi-3-mini-4k-instruct)"
96
+ )
97
+ demo.launch()