# import gradio as gr # # Load the model from Hugging Face # model = gr.load("models/rhshah/MediumGEN_LLama2") # import time # # Define the function to use the model # def predict(input): # time.sleep(10) # return "model(input)" # # Create the Gradio interface # iface = gr.Interface(fn=predict, inputs="text", outputs="text") # # Launch the interface # iface.launch() from peft import AutoPeftModelForCausalLM from transformers import AutoTokenizer import torch peft_model_dir = "models/rhshah/MediumGEN_LLama2" # load base LLM model and tokenizer trained_model = AutoPeftModelForCausalLM.from_pretrained( peft_model_dir, low_cpu_mem_usage=True, # torch_dtype=torch.float16, # load_in_4bit=True, ) tokenizer = AutoTokenizer.from_pretrained(peft_model_dir)