alexkueck commited on
Commit
28d01d6
·
1 Parent(s): 8f40683

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+
3
+ # HF Inference Endpoints parameter
4
+ endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud"
5
+ hf_token = "hf_YOUR_TOKEN"
6
+
7
+ # Streaming Client
8
+ client = InferenceClient(endpoint_url, token=hf_token)
9
+
10
+ # generation parameter
11
+ gen_kwargs = dict(
12
+ max_new_tokens=512,
13
+ top_k=30,
14
+ top_p=0.9,
15
+ temperature=0.2,
16
+ repetition_penalty=1.02,
17
+ stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
18
+ )
19
+ # prompt
20
+ prompt = "What can you do in Nuremberg, Germany? Give me 3 Tips"
21
+
22
+ stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
23
+
24
+ # yield each generated token
25
+ for r in stream:
26
+ # skip special tokens
27
+ if r.token.special:
28
+ continue
29
+ # stop if we encounter a stop sequence
30
+ if r.token.text in gen_kwargs["stop_sequences"]:
31
+ break
32
+ # yield the generated token
33
+ print(r.token.text, end = "")
34
+ # yield r.token.text