import spaces from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import gradio as gr import torch if torch.cuda.is_available(): tokenizer = AutoTokenizer.from_pretrained("ai-forever/mGPT-13B") model = AutoModelForCausalLM.from_pretrained("ai-forever/mGPT-13B", load_in_8bit=True, device_map="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0) @spaces.GPU def predict(text): return pipe(text) demo = gr.Interface( fn=greet, inputs=["text"], outputs=["text"], ) demo.launch()