Spaces:

vilarin
/

LLaMAX3-Translator

Running on Zero

App Files Files Community

vilarin commited on Jul 25, 2024

Commit

c4c656e

verified ·

1 Parent(s): 2bea947

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -98

app.py CHANGED Viewed

@@ -1,82 +1,75 @@
 import torch
 from PIL import Image
 import gradio as gr
-#import spaces
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
 import os
 from threading import Thread
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-MODEL_ID = "CohereForAI/aya-23-8B"
-MODEL_ID2 = "CohereForAI/aya-23-35B"
-MODELS = os.environ.get("MODELS")
-MODEL_NAME = MODELS.split("/")[-1]
-TITLE = "<h1><center>Aya-23-Chatbox</center></h1>"
-DESCRIPTION = f'<h3><center>MODEL: <a href="https://hf.co/{MODELS}">{MODEL_NAME}</a></center></h3>'
-CSS = """
-.duplicate-button {
-  margin: auto !important;
-  color: white !important;
-  background: black !important;
-  border-radius: 100vh !important;
-}
-"""
-#QUANTIZE
-QUANTIZE_4BIT = True
-USE_GRAD_CHECKPOINTING = True
-TRAIN_BATCH_SIZE = 2
-TRAIN_MAX_SEQ_LENGTH = 512
-USE_FLASH_ATTENTION = False
-GRAD_ACC_STEPS = 16
-quantization_config = None
-if QUANTIZE_4BIT:
-    quantization_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_quant_type="nf4",
-        bnb_4bit_use_double_quant=True,
-        bnb_4bit_compute_dtype=torch.bfloat16,
     )
-attn_implementation = None
-if USE_FLASH_ATTENTION:
-    attn_implementation="flash_attention_2"
-model = AutoModelForCausalLM.from_pretrained(
-          MODELS,
-          quantization_config=quantization_config,
-          attn_implementation=attn_implementation,
-          torch_dtype=torch.bfloat16,
-          device_map="auto",
-        )
-tokenizer = AutoTokenizer.from_pretrained(MODELS)
-#@spaces.GPU()
-def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int):
-    print(f'message is - {message}')
-    print(f'history is - {history}')
-    conversation = []
-    for prompt, answer in history:
-        conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
-    conversation.append({"role": "user", "content": message})
-    print(f"Conversation is -\n{conversation}")
-    input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces':False,})
     generate_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
-        max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
     )
@@ -89,45 +82,71 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
         buffer += new_text
         yield buffer
-chatbot = gr.Chatbot(height=450)
-with gr.Blocks(css=CSS) as demo:
-    gr.HTML(TITLE)
-    gr.HTML(DESCRIPTION)
-    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
-    gr.ChatInterface(
-        fn=stream_chat,
-        chatbot=chatbot,
-        fill_height=True,
-        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
-        additional_inputs=[
-            gr.Slider(
                 minimum=0,
                 maximum=1,
                 step=0.1,
-                value=0.8,
-                label="Temperature",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=128,
-                maximum=4096,
-                step=1,
-                value=1024,
-                label="Max new tokens",
-                render=False,
-            ),
-        ],
-        examples=[
-            ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
-            ["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."],
-            ["Tell me a random fun fact about the Roman Empire."],
-            ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
-        ],
-        cache_examples=False,
-    )
 if __name__ == "__main__":

 import torch
 from PIL import Image
 import gradio as gr
+import spaces
+from transformers import LlamaForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import os
 from threading import Thread
+from polyglot.detect import Detector
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+MODEL = "LLaMAX/LLaMAX3-8B-Alpaca"
+TITLE = "<h1><center>LLaMAX3-8B-Translation</center></h1>"
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model = LlamaForCausalLM.from_pretrained(
+        MODEL,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        quantization_config=quantization_config)
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
+def lang_detector(text):
+    min_chars = 5
+    if len(text) < min_chars:
+        return "Input text too short"
+    try:
+        detector = Detector(text).language
+        lang_info = str(detector)
+        code = re.search(r"name: (\w+)", lang_info).group(1)
+        return code
+    except Exception as e:
+        return f"ERROR：{str(e)}"
+def Prompt_template(query, src_language, trg_language):
+    instruction = f'Translate the following sentences from {src_language} to {trg_language}.'
+    prompt = (
+        'Below is an instruction that describes a task, paired with an input that provides further context. '
+        'Write a response that appropriately completes the request.\n'
+        f'### Instruction:\n{instruction}\n'
+        f'### Input:\n{query}\n### Response:'
     )
+    return prompt
+# Unfinished
+def chunk_text():
+    pass
+@spaces.GPU()
+def translate(
+    source_text: str,
+    source_lang: str,
+    target_lang: str,
+    max_chunk: int,
+    max_length: int,
+    temperature: float):
+    print(f'Text is - {source_text}')
+    prompt = Prompt_template(source_text, source_lang, target_lang)
+    inputs = tokenizer(prompt, return_tensors="pt")
+    input_ids = inputs.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces':False,})
     generate_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
+        max_length=max_length,
         do_sample=True,
         temperature=temperature,
     )
         buffer += new_text
         yield buffer
+CSS = """
+    h1 {
+        text-align: center;
+        display: block;
+        height: 10vh;
+        align-content: center;
+    }
+    footer {
+        visibility: hidden;
+    }
+"""
+chatbot = gr.Chatbot(height=600)
+with gr.Blocks(theme="soft", css=CSS) as demo:
+    gr.Markdown(TITLE)
+    with gr.Row():
+        with gr.Column(scale=1):
+            source_lang = gr.Textbox(
+                label="Source Lang(Auto-Detect)",
+                value="English",
+            )
+            target_lang = gr.Textbox(
+                label="Target Lang",
+                value="Spanish",
+            )
+            max_chunk = gr.Slider(
+                label="Max tokens Per Chunk",
+                minimum=512,
+                maximum=2046,
+                value=1000,
+                step=8,
+            )
+            max_length = gr.Slider(
+                label="Context Window",
+                minimum=512,
+                maximum=8192,
+                value=4096,
+                step=8,
+            )
+            temperature = gr.Slider(
+                label="Temperature",
                 minimum=0,
                 maximum=1,
+                value=0.3,
                 step=0.1,
+            )
+        with gr.Column(scale=4):
+            gr.Markdown(DESCRIPTION)
+            source_text = gr.Textbox(
+                label="Source Text",
+                value="How we live is so different from how we ought to live that he who studies "+\
+                "what ought to be done rather than what is done will learn the way to his downfall "+\
+                "rather than to his preservation.",
+                lines=10,
+            )
+            output_text = gr.Textbox(
+                label="Output Text",
+                lines=10,
+            )
+    with gr.Row():
+        submit = gr.Button(value="Submit")
+        clear = gr.ClearButton([source_text, output_text])
+    submit.click(fn=huanik, inputs=[source_lang, target_lang, source_text, max_chunk, max_length, temperature], outputs=[output_text])
 if __name__ == "__main__":