Spaces:

AIFS
/

Prometh-MOEM-24B

Runtime error

File size: 1,473 Bytes

1c63b9f
a79e48c
 
 
5e835ad
904ef74
5e835ad
cd21998
5e835ad
28b4b1c
a79e48c
 
 
5e835ad
 
28b4b1c
9c1f9ef
a79e48c
 
 
 
 
 
5e835ad
a79e48c
 
5e835ad
 
a79e48c
 
 
 
5e835ad

import os
import gradio as gr
from transformers import AutoTokenizer, pipeline

# Initialize the model and tokenizer with environment variable for HF_TOKEN
model_name = "AIFS/Prometh-MOEM-V.01"
hf_token = os.getenv("HF_TOKEN")  # More Pythonic way to fetch environment variables

tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)

text_generation_pipeline = pipeline(
    "text-generation",
    model=model_name,
    model_kwargs={"torch_dtype": "auto", "load_in_4bit": True},  # 'auto' lets PyTorch decide the most optimal dtype
    use_auth_token=hf_token
)

def generate_text(user_input):
    messages = [{"role": "user", "content": user_input}]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    outputs = text_generation_pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    return outputs[0]["generated_text"]

# Updated Gradio interface creation to use the latest syntax
iface = gr.Interface(
    fn=generate_text,
    inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
    outputs=gr.Textbox(),
    title="Prometh-MOEM Text Generation",
    description="A text generation model that understands your queries and generates concise, informative responses."
)

# Run the interface with enhanced parameters for better performance and user experience
iface.launch(enable_queue=True)  # enable_queue=True for handling high traffic