File size: 3,108 Bytes
28d01d6
ddf0e4f
4d596ca
 
 
28d01d6
 
4d596ca
 
 
 
 
 
 
ddf0e4f
 
28d01d6
 
 
 
a4c44c9
 
4d596ca
 
 
a4c44c9
 
 
 
 
4d596ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4c44c9
4d596ca
 
 
 
 
 
 
 
 
 
 
a4c44c9
 
 
 
4d596ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from huggingface_hub import InferenceClient
import os
import gradio as gr
import random
import time

# HF Inference Endpoints parameter
endpoint_url = "https://qrh4fv8e7x3fw9w3.us-east-1.aws.endpoints.huggingface.cloud" #\
'''
-X POST \
-d '{"inputs":"My name is Teven and I am"}' \
-H "Authorization: Bearer <hf_token>" \
-H "Content-Type: application/json""
'''

hf_token = os.getenv("TOKEN_HF")

# Streaming Client
client = InferenceClient(endpoint_url, token=hf_token)



########################################################################
#Chat KI nutzen, um Text zu generieren...
def predict(text,
            chatbotGr,
            history,
            top_p,
            temperature,
            max_length_tokens,
            max_context_length_tokens,):
    if text=="":
        yield chatbotGr,history,"Empty context."
        return 
    try:
        client
    except:
        yield [[text,"No Model Found"]],[],"No Endpoint Found"
        return

    # generation parameter
    gen_kwargs = dict(
        max_new_tokens=max_length_tokens,
        top_k=30,
        top_p=top_p,
        temperature=temperature,
        repetition_penalty=1.02,
        stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
    )
    # prompt
    prompt = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)

                   
        
        
        
        
        
        
        
        
        
        




    

#######################################################################
#Darstellung mit Gradio

with open("custom.css", "r", encoding="utf-8") as f:
    customCSS = f.read()

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history):
        # generation parameter
        gen_kwargs = dict(
            max_new_tokens=512,
            top_k=30,
            top_p=0.9,
            temperature=0.2,
            repetition_penalty=1.02,
            stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
        )
        prompt = generate_prompt_with_history(text,history,max_length=max_context_length_tokens)
        stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)

        history[-1][1] = ""
        for character in stream:
            history[-1][1] += character
            time.sleep(0.05)
            yield history

        '''
        # yield each generated token
        for r in stream:
            # skip special tokens
            if r.token.special:
                continue
            # stop if we encounter a stop sequence
            if r.token.text in gen_kwargs["stop_sequences"]:
                break
            # yield the generated token
            #print(r.token.text, end = "")
            yield r.token.text 
        '''


    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)
    
demo.queue()
demo.launch()