import torch from transformers import AutoModelForCausalLM, AutoTokenizer # Load the tokenizer and model from Hugging Face model_name = "impactframes/molmo-7B-D-bnb-4bit" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Example input prompt prompt = "What is the meaning of life?" # Tokenize the input inputs = tokenizer(prompt, return_tensors="pt") # Generate output with torch.no_grad(): outputs = model.generate(inputs.input_ids, max_length=100) # Decode the output response = tokenizer.decode(outputs[0], skip_special_tokens=True) print(response)