from transformers import AutoTokenizer, AutoModelForCausalLM import torch import gradio as gr tokenizer = AutoTokenizer.from_pretrained('Aityz/Aityz-3B') model = AutoModelForCausalLM.from_pretrained('Aityz/Aityz-3B') def generate(instruction, input = None, maxtokens: int = 20): if input is not None: ln = f'Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Input: {input} \n### Response:' if input is None: ln = f'Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Response:' inputs = tokenizer(ln, return_tensors="pt") output = model.generate(inputs=inputs['input_ids'], max_new_tokens=maxtokens) if input is not None: inputlength = len(input) if input is None: inputlength = 0 length = len(instruction) + inputlength + 154 res = tokenizer.decode(output[0].tolist()) return res[length:] inter = gr.Interface(fn=generate, inputs=["textbox", "textbox", gr.Slider(1, 1000, value=20)], outputs="textbox") inter.launch(share=False)