Spaces:

FESG1234
/

Learningsw

Running

App Files Files Community

FESG1234 commited on 1 day ago

Commit

0b2c8a7

verified ·

1 Parent(s): b37e700

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -164

app.py CHANGED Viewed

@@ -1,192 +1,138 @@
 import torch
-from transformers import pipeline
 import gradio as gr
 import PyPDF2
 import os
 from huggingface_hub import login
-# Space configuration
 SPACE_DIR = os.environ.get("HF_HOME", os.getcwd())
 def init_huggingface_auth():
-    """Space-friendly authentication"""
     token = os.getenv("HUGGINGFACE_TOKEN")
-    if not token:
-        print("No HF token found in environment")
-        return False
-    try:
-        login(token=token, add_to_git_credential=False)
-        print("HF authentication successful")
-        return True
-    except Exception as e:
-        print(f"Login error: {e}")
-        return False
 if not init_huggingface_auth():
-    print("Warning: Authentication failed")
-# Load and preprocess the PDF content
-pdf_path = os.path.join(SPACE_DIR, "LTDOCS.pdf")
-with open(pdf_path, 'rb') as file:
-    pdf_reader = PyPDF2.PdfReader(file)
-    pdf_content = ' '.join([page.extract_text() for page in pdf_reader.pages])
-    pdf_content = pdf_content.lower().strip()
-# Initialize the pipeline
-pipe = pipeline(
-    "text-generation",
-    model="google/gemma-2-2b-jpn-it",
-    model_kwargs={"torch_dtype": torch.bfloat16},
-    device="cpu",  # replace with "mps" to run on a Mac device
-)
-# System prompt and welcome message
-SYSTEM_PROMPT = f"""You Foton the chat bot assistant of the Company Lugha taussi, an AI language assistant specialized in African languages, with a focus on Swahili. Your primary tasks are:
-1. Providing accurate translations between Swahili and other languages
-2. Teaching Swahili vocabulary and grammar
-3. Explaining cultural context behind Swahili expressions
-4. Helping users practice Swahili conversation
-5. Based on the programing doc  for lughah Tausi Programing which is in swahili , the following information is relevant: {pdf_content} .assist users in programing and installing lugha tausi programing language"
-Always maintain a friendly and patient demeanor, and provide cultural context when relevant speak mostly swahili and change when asked.
-"""
-WELCOME_MESSAGE = "**Karibu Lugha Tausi!** Mimi ni Foton, msaidizi wako wa kibinafsi wa Kiswahili. Niko hapa kukusaidia kujifunza, kuelewa, na kuzungumza Kiswahili. **Ninaweza kukusaidiaje leo?** Hebu tuanze! 😊"
-# CSS for custom styling
-CUSTOM_CSS = """
-.container {
-    max-width: 800px;
-    margin: auto;
-    padding: 20px;
-}
-.header {
-    text-align: center;
-    margin-bottom: 30px;
-}
-.icon {
-    width: 80px;
-    height: 80px;
-    margin: 0 auto 15px;
-    display: block;
-}
-.title {
-    font-size: 2.5em;
-    font-weight: bold;
-    margin-bottom: 10px;
-}
-.description {
-    font-size: 1.2em;
-    color: #666;
-    margin-bottom: 20px;
-}
-"""
-def format_chat_message(messages, system_prompt=SYSTEM_PROMPT):
-    """Format the chat messages with system prompt"""
-    formatted_prompt = f"{system_prompt}\n\n"
-    for message in messages:
-        if isinstance(message, tuple):
-            role, content = message
-            if role == "user":
-                formatted_prompt += f"User: {content}\nLugha Tausi: "
-            elif role == "assistant":
-                formatted_prompt += f"{content}\n"
-    return formatted_prompt
-def chat_response(message, history):
-    """Generate response for Gradio chat interface"""
-    messages = []
-    for user_msg, bot_msg in history:
-        messages.append(("user", user_msg))
-        messages.append(("assistant", bot_msg))
-    messages.append(("user", message))
-    formatted_input = format_chat_message(messages)
-    outputs = pipe(
-        formatted_input,
-        return_full_text=False,
-        max_new_tokens=256,
-        temperature=0.1,
-        top_p=0.9,
-        do_sample=True
     )
-    return outputs[0]["generated_text"].strip()
-# Create Gradio interface with custom theme and styling
-with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
-    with gr.Column(elem_classes="container"):
-        # Header section with icon, title and description
-        with gr.Column(elem_classes="header"):
-            gr.Image(
-                os.path.join(SPACE_DIR, "foton.webp"),
-                elem_classes="icon",
-                show_label=False,  # Removes the label
-                interactive=False,  # Disables interaction including download
-            )
-            gr.Markdown("# Foton - Lugha Tausi Assistant", elem_classes="title")
-            gr.Markdown(
-                """Your personal Swahili language assistant, powered by AI.
-                Specialized in translations, teaching, and cultural context.
-                Let's explore the beauty of Swahili together! 🌍✨""",
-                elem_classes="description"
-            )
-        # Chat interface
-        chatbot = gr.Chatbot(
-            value=[(None, WELCOME_MESSAGE)],
-            height=500,
-            show_label=False,
-            elem_classes="chatbox"
-        )
-        with gr.Row():
-            msg = gr.Textbox(
-                placeholder="Type your message here...",
-                show_label=False,
-                scale=9
             )
-            clear = gr.Button("Clear Chat", scale=1)
-    def user_input(message, history):
-        return "", history + [(message, None)]
-    def bot_response(history):
-        if len(history) == 0:
-            history.append((None, WELCOME_MESSAGE))
-            return history
-        user_message = history[-1][0]
-        bot_message = chat_response(user_message, history[:-1])
-        history[-1] = (user_message, bot_message)
-        return history
-    def clear_chat():
-        return [], [(None, WELCOME_MESSAGE)]
-    # Set up the message flow
-    msg.submit(
-        user_input,
-        [msg, chatbot],
-        [msg, chatbot],
-        queue=False
-    ).then(
-        bot_response,
-        chatbot,
-        chatbot
     )
-    clear.click(
-        clear_chat,
-        None,
-        [chatbot],
-        queue=False
-    )
-# Launch the interface
 if __name__ == "__main__":
-    demo.launch(share=True, ssr_mode=False)

 import torch
+from transformers import pipeline, AutoTokenizer, AutoModel
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
 import gradio as gr
 import PyPDF2
 import os
 from huggingface_hub import login
+from typing import List, Tuple
+# Configuration
 SPACE_DIR = os.environ.get("HF_HOME", os.getcwd())
+PDF_PATH = os.path.join(SPACE_DIR, "LTDOCS.pdf")
+EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
+MODEL_NAME = "google/gemma-2-2b-jpn-it"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Authentification HuggingFace
 def init_huggingface_auth():
     token = os.getenv("HUGGINGFACE_TOKEN")
+    if token:
+        try:
+            login(token=token, add_to_git_credential=False)
+            print("Authentification HF réussie")
+            return True
+        except Exception as e:
+            print(f"Erreur d'authentification: {e}")
+    return False
 if not init_huggingface_auth():
+    print("Avertissement: Authentification échouée")
+# Chargement et traitement du PDF
+def load_and_process_pdf() -> List[str]:
+    with open(PDF_PATH, 'rb') as file:
+        pdf_reader = PyPDF2.PdfReader(file)
+        text = "\n".join([page.extract_text() for page in pdf_reader.pages])
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=512,
+        chunk_overlap=128,
+        length_function=len,
+        separators=["\n\n", "\n", ".", "!", "?", ";", ",", " "]
+    )
+    return text_splitter.split_text(text)
+# Initialisation des modèles
+def initialize_models():
+    embeddings = HuggingFaceEmbeddings(
+        model_name=EMBEDDING_MODEL,
+        model_kwargs={'device': DEVICE},
+        encode_kwargs={'normalize_embeddings': True}
+    )
+    chunks = load_and_process_pdf()
+    vector_store = FAISS.from_texts(chunks, embeddings)
+    generator = pipeline(
+        "text-generation",
+        model=MODEL_NAME,
+        tokenizer=AutoTokenizer.from_pretrained(MODEL_NAME),
+        model_kwargs={"torch_dtype": torch.bfloat16},
+        device=DEVICE
+    )
+    return vector_store, generator
+vector_store, generator = initialize_models()
+# Prompt engineering
+SYSTEM_PROMPT = """Vous êtes Foton, assistant virtuel expert en programmation Lugha Tausi.
+Répondez en swahili sauf demande contraire. Basez-vous strictement sur la documentation fournie.
+Documentation:
+{context}
+Question: {question}
+Réponse:"""
+WELCOME_MESSAGE = "**Karibu Lugha Tausi!** Mimi ni Foton, msaidizi wako wa kibinafsi. Niko hapa kukusaidia kwa masuala yoyote ya programu. **Ninaweza kukusaidiaje leo?**"
+# Fonction de génération améliorée
+def rag_response(query: str, history: List[Tuple[str, str]] = []) -> str:
+    # Recherche contextuelle
+    docs = vector_store.similarity_search(query, k=3)
+    context = "\n".join([d.page_content for d in docs])
+    # Construction du prompt
+    messages = [{"role": "user", "content": SYSTEM_PROMPT.format(context=context, question=query)}]
+    # Génération avec contrôle de qualité
+    response = generator(
+        messages,
+        max_new_tokens=512,
+        temperature=0.3,
+        top_p=0.95,
+        repetition_penalty=1.1,
+        do_sample=True,
+        num_return_sequences=1
     )
+    # Post-traitement
+    answer = response[0]['generated_text'].split("Réponse:")[-1].strip()
+    return answer
+# Interface Gradio améliorée
+with gr.Blocks(theme=gr.themes.Soft(), css=gr.themes.Soft()._get_theme_css()) as demo:
+    gr.Markdown("# Foton - Msaidizi wa Lugha Tausi")
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Image("foton.webp", label="Foton", width=200)
+        with gr.Column(scale=8):
+            chatbot = gr.Chatbot(
+                value=[(None, WELCOME_MESSAGE)],
+                bubble_full_width=False,
+                height=600
             )
+    msg = gr.Textbox(
+        placeholder="Andika ujumbe wako hapa...",
+        label="Pitia swali lako",
+        container=False
     )
+    clear = gr.Button("Safisha Mazungumzo")
+    def respond(message, chat_history):
+        response = rag_response(message)
+        chat_history.append((message, response))
+        return "", chat_history
+    msg.submit(respond, [msg, chatbot], [msg, chatbot])
+    clear.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)