laragrl commited on
Commit
90575a7
·
verified ·
1 Parent(s): 49cef03

Create app.py

Browse files

Create app.py and load models: LeoLM, Occiglot, Llama2

Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ # Modelle und Tokenizer laden
6
+ model_names = {
7
+ "LeoLM_13B": "LeoLM/leo-hessianai-13b",
8
+ "Occiglot_7B": "occiglot/occiglot-7b-de-en",
9
+ "LLaMA2_13B": "meta-llama/Llama-2-13b-hf"
10
+ }
11
+
12
+ # Tokenizer und Modelle vorbereiten
13
+ tokenizers = {name: AutoTokenizer.from_pretrained(model) for name, model in model_names.items()}
14
+ models = {name: AutoModelForCausalLM.from_pretrained(model, device_map="auto", torch_dtype=torch.float16) for name, model in model_names.items()}
15
+
16
+ # Funktion zur Generierung der Antwort
17
+ def generate_response(model_choice, prompt):
18
+ tokenizer = tokenizers[model_choice]
19
+ model = models[model_choice]
20
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
21
+ outputs = model.generate(inputs["input_ids"], max_new_tokens=100, do_sample=True)
22
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
23
+ return response
24
+
25
+ # Gradio Interface
26
+ with gr.Blocks() as demo:
27
+ gr.Markdown("# Vergleich von LLMs: LeoLM, Occiglot und LLaMA 2")
28
+ with gr.Row():
29
+ model_choice = gr.Radio(list(model_names.keys()), label="Modell auswählen")
30
+ prompt = gr.Textbox(label="Frage stellen", placeholder="Was sind die Hauptursachen für Bluthochdruck?")
31
+ output = gr.Textbox(label="Antwort")
32
+
33
+ submit_button = gr.Button("Antwort generieren")
34
+ submit_button.click(generate_response, inputs=[model_choice, prompt], outputs=output)
35
+
36
+ # App starten
37
+ demo.launch()