import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Load the tokenizer and model from Hugging Face | |
model_name = "impactframes/molmo-7B-D-bnb-4bit" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Example input prompt | |
prompt = "What is the meaning of life?" | |
# Tokenize the input | |
inputs = tokenizer(prompt, return_tensors="pt") | |
# Generate output | |
with torch.no_grad(): | |
outputs = model.generate(inputs.input_ids, max_length=100) | |
# Decode the output | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
print(response) | |