--- base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B library_name: peft license: mit language: - en pipeline_tag: text-generation --- # Model Card for Model ID ## How to Use ```python !pip install bitsandbytes import torch from transformers import AutoModelForCausalLM, AutoTokenizer # šŸ† **Model Name on Hugging Face** MODEL_NAME = "Vijayendra/DeepSeek-Llama3.1-8B-DeepThinker-v1" # šŸ›  **Load Model & Tokenizer from Hugging Face** tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="auto", # Automatically assigns model layers to available GPUs/CPUs torch_dtype=torch.float16 # Use 16-bit precision for memory efficiency ).to("cuda" if torch.cuda.is_available() else "cpu") # Send model to GPU if available # šŸ›  **Define Inference Function** def generate_response(model, tokenizer, prompt, max_new_tokens=2048, temperature=0.7): # Tokenize input inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device) # Ensure attention mask is passed attention_mask = inputs.attention_mask # Generate response with torch.no_grad(): generated_tokens = model.generate( inputs.input_ids, attention_mask=inputs.attention_mask, # Ensure attention mask is passed max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True, top_k=40, top_p=0.9, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id ) # Decode response return tokenizer.decode(generated_tokens[0], skip_special_tokens=True) # šŸ† **Test Questions** questions = [ "What happened yesterday?", "If an unstoppable force hits an immovable object, what happens?", "The sun orbits the Earth once every 365 days. Is this true?" ] # šŸ† **Generate and Print Responses** for i, question in enumerate(questions, 1): response = generate_response(model, tokenizer, question) print(f"\nšŸŸ¢ Question {i}: {question}") print(f"šŸ”µ Response: {response}") ### Framework versions - PEFT 0.14.0