File size: 785 Bytes
b1890b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# import gradio as gr
# # Load the model from Hugging Face
# model = gr.load("models/rhshah/MediumGEN_LLama2")
# import time
# # Define the function to use the model
# def predict(input):
#     time.sleep(10)
#     return "model(input)"

# # Create the Gradio interface
# iface = gr.Interface(fn=predict, inputs="text", outputs="text")

# # Launch the interface
# iface.launch()

from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch
peft_model_dir = "models/rhshah/MediumGEN_LLama2"


# load base LLM model and tokenizer
trained_model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_dir,
    low_cpu_mem_usage=True,
    # torch_dtype=torch.float16,
    # load_in_4bit=True,
)
tokenizer = AutoTokenizer.from_pretrained(peft_model_dir)