import spaces from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import gradio as gr import torch if torch.cuda.is_available(): tokenizer = AutoTokenizer.from_pretrained("ai-forever/mGPT-13B") model = AutoModelForCausalLM.from_pretrained("ai-forever/mGPT-13B", load_in_8bit=True, device_map="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) @spaces.GPU(duration=600) def predict(prompt, temperature, max_length): return pipe(prompt, temperature=temperature, max_length=max_length, top_p=0.95, top_k=50, do_sample=True)[0]["generated_text"] demo = gr.Interface( fn=predict, title="mGPT-13B Demo", inputs=["text", gr.Slider(minimum=0.01, maximum=1.0, value=0.7, label="temperature"), gr.Slider(minimum=1, maximum=1024, value=50, label="max_length")], outputs=["text"], ) demo.launch()