from datasets import load_dataset from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel base_model = AutoModelForCausalLM.from_pretrained( 'meta-llama/Llama-2-7b-chat-hf', trust_remote_code=True, device_map="auto", torch_dtype=torch.float16, # optional if you have enough VRAM ) tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat-hf') model = PeftModel.from_pretrained(base_model, 'FinGPT/fingpt-forecaster_dow30_llama2-7b_lora') model = model.eval() if __name__ == "__app__": demo.queue(max_size=20).launch()