from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr

tokenizer = AutoTokenizer.from_pretrained('Aityz/Aityz-3B')
model = AutoModelForCausalLM.from_pretrained('Aityz/Aityz-3B')

def generate(instruction, input = None, maxtokens: int = 20):
    if input is not None:
        ln = f'Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Input: {input} \n### Response:'
    if input is None:
        ln = f'Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Response:'
    inputs = tokenizer(ln, return_tensors="pt")
    output = model.generate(inputs=inputs['input_ids'], max_new_tokens=maxtokens)
    if input is not None:
      inputlength = len(input)
    if input is None:
      inputlength = 0
    length = len(instruction) + inputlength + 154
    res = tokenizer.decode(output[0].tolist())
    return res[length:]

inter = gr.Interface(fn=generate, inputs=["textbox", "textbox", gr.Slider(1, 1000, value=20)], outputs="textbox")
inter.launch(share=False)