Spaces:
Running
Running
import torch | |
from transformers import pipeline | |
import gradio as gr | |
import PyPDF2 | |
import os | |
from huggingface_hub import login, HfFolder | |
from getpass import getpass | |
# Space configuration | |
SPACE_DIR = os.environ.get("HF_HOME", os.getcwd()) | |
# Load and preprocess the PDF content | |
try: | |
pdf_path = os.path.join(SPACE_DIR, "LTDOCS.pdf") | |
with open(pdf_path, 'rb') as file: | |
pdf_reader = PyPDF2.PdfReader(file) | |
pdf_content = ' '.join([page.extract_text() for page in pdf_reader.pages]) | |
pdf_content = pdf_content.lower().strip() | |
except Exception as e: | |
pdf_content = "" | |
print(f"Error loading PDF: {e}") | |
def init_huggingface_auth(): | |
"""Space-friendly authentication""" | |
token = os.getenv("HUGGINGFACE_TOKEN") | |
if not token: | |
print("No HF token found in environment") | |
return False | |
try: | |
login(token=token, add_to_git_credential=False) | |
print("HF authentication successful") | |
return True | |
except Exception as e: | |
print(f"Login error: {e}") | |
return False | |
if not init_huggingface_auth(): | |
print("Warning: Authentication failed") | |
# Initialize the pipeline with CPU optimization | |
try: | |
pipe = pipeline( | |
"text-generation", | |
model="google/gemma-2-2b-jpn-it", | |
device_map="auto", | |
model_kwargs={ | |
"torch_dtype": torch.float16, | |
"low_cpu_mem_usage": True | |
} | |
) | |
except Exception as e: | |
print(f"Model loading error: {e}") | |
raise | |
SYSTEM_PROMPT = f"""You are Foton, Swahili AI assistant. Tasks: | |
1. Swahili translations | |
2. Teach Swahili vocabulary/grammar | |
3. Explain cultural context | |
4. Help practice conversations | |
5. Programming assistance using: {pdf_content} | |
Maintain friendly, patient demeanor with cultural context. | |
""" | |
WELCOME_MESSAGE = "**Karibu Lugha Tausi!** Mimi ni Foton, msaidizi wako wa Kiswahili. Niko hapa kukusaidia kujifunza na kuzungumza Kiswahili. **Ninaweza kukusaidiaje leo?** π" | |
def format_messages(history): | |
"""Format chat history with system prompt""" | |
messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
for entry in history: | |
if isinstance(entry, tuple): | |
user, bot = entry | |
messages.extend([ | |
{"role": "user", "content": user}, | |
{"role": "assistant", "content": bot} | |
]) | |
return messages | |
def generate_response(message, history): | |
"""Generate AI response""" | |
try: | |
formatted_history = format_messages(history) | |
formatted_history.append({"role": "user", "content": message}) | |
prompt = "\n".join([f"{m['role']}: {m['content']}" for m in formatted_history]) | |
output = pipe( | |
prompt, | |
max_new_tokens=256, | |
temperature=0.1, | |
top_p=0.9, | |
do_sample=True, | |
return_full_text=False | |
) | |
return output[0]["generated_text"].strip() | |
except Exception as e: | |
print(f"Generation error: {e}") | |
return "Samahani, nimekutana na tatizo. Tafadhali jaribu tena baadaye." | |
# Create Gradio interface with Space optimizations | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# Lugha Tausi - Swahili Assistant") | |
chatbot = gr.Chatbot( | |
value=[[None, WELCOME_MESSAGE]], | |
height=600, | |
show_label=False, | |
avatar_images=(None, "user.png"), | |
bubble_full_width=False, | |
show_share_button=False, | |
type="messages" | |
) | |
msg = gr.Textbox(placeholder="Andika ujumbe wako hapa...", show_label=False) | |
clear = gr.Button("Futa Mazungumzo") | |
def respond(message, chat_history): | |
bot_response = generate_response(message, chat_history) | |
chat_history.append((message, bot_response)) | |
return "", chat_history | |
msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
if __name__ == "__main__": | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
auth=os.getenv("SPACE_AUTH"), | |
show_error=True | |
) |