Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Load the model and tokenizer | |
def load_model(): | |
model_name = "prithivMLmods/QwQ-LCoT-14B-Conversational" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
device_map="auto", # Automatically assign to CPU/GPU | |
torch_dtype=torch.float16, # Mixed precision for large models | |
) | |
return tokenizer, model | |
# Load resources | |
tokenizer, model = load_model() | |
# Streamlit app UI | |
st.title("QwQ-LCoT Chatbot") | |
st.write("A conversational AI powered by QwQ-LCoT-14B. Ask me anything!") | |
# User input | |
user_input = st.text_input("You: ", "") | |
if st.button("Send"): | |
if user_input.strip(): | |
with st.spinner("Generating response..."): | |
# Tokenize input | |
inputs = tokenizer(user_input, return_tensors="pt") | |
# Generate response | |
outputs = model.generate(**inputs, max_new_tokens=150, temperature=0.7) | |
# Decode response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Display response | |
st.text_area("Bot:", value=response, height=150) | |