How to use :

!pip install peft accelerate bitsandbytes
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
from peft import PeftModel, PeftConfig

# Load model and tokenizer configurations
config = PeftConfig.from_pretrained("Vijayendra/QST-Llama-8b")
base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-bnb-4bit")
model = PeftModel.from_pretrained(base_model, "Vijayendra/QST-Llama-8b")
tokenizer = AutoTokenizer.from_pretrained("Vijayendra/llama3.0-8B-merged-4bit")

# Ensure padding token is set for the tokenizer
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Define the inference function with TextStreamer
def generate_answer_with_stream(model, tokenizer, text, max_new_tokens=1024, temperature=0.5, top_k=40, top_p=0.9):
    prompt = f"Answer the following question\n\n{text}\n\nQuestion:"
    
    # Tokenize the input text
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
    
    # Initialize the TextStreamer
    streamer = TextStreamer(tokenizer)

    # Generate answer using the model with streaming
    with torch.no_grad():
        model.generate(
            inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            do_sample=True,
            top_k=top_k,
            top_p=top_p,
            repetition_penalty=1.2,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
            streamer=streamer  # Stream output as it's generated
        )

# Input Question
question = "What is quantum mechanics?"

# Generate and print answer
generate_answer_with_stream(model, tokenizer, question)
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.

Model tree for Vijayendra/QST-Llama-8b

Finetuned
(2574)
this model

Dataset used to train Vijayendra/QST-Llama-8b