import torch from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import TextStreamer # Load the model and tokenizer model_path = "/home/vscode/streamlit_space-1/trained/checkpoint-1000" model = AutoModelForCausalLM.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) # Set the pad token to be the same as the eos token tokenizer.pad_token = tokenizer.eos_token # Initialize streamer for real-time text generation streamer = TextStreamer(tokenizer) # Check if CUDA is available and move the model to GPU if it is device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Function to generate text from a given prompt def generate_text(prompt, max_length=2500): # Tokenize the input prompt and move the inputs to the same device as the model inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to( device ) with torch.no_grad(): outputs = model.generate( inputs["input_ids"], attention_mask=inputs["attention_mask"], max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id, streamer=streamer, ) # Decode the generated text to a readable format generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_text # Example usage if __name__ == "__main__": # Provide a specific task for the model to perform prompt = ( "As a language model, your task is to generate a Python script that retrieves the current date and time. " "Please provide a clear and concise script that accomplishes this task." ) result = generate_text(prompt) print("Generated Python Script:") print(result)