|
import spaces |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
import gradio as gr |
|
import torch |
|
|
|
if torch.cuda.is_available(): |
|
tokenizer = AutoTokenizer.from_pretrained("ai-forever/mGPT-13B") |
|
model = AutoModelForCausalLM.from_pretrained("ai-forever/mGPT-13B", load_in_8bit=True, device_map="auto") |
|
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0) |
|
|
|
@spaces.GPU |
|
def predict(text): |
|
return pipe(text) |
|
|
|
demo = gr.Interface( |
|
fn=greet, |
|
inputs=["text"], |
|
outputs=["text"], |
|
) |
|
|
|
demo.launch() |