HFMODEL-translator-2

Runtime error

App Files Files Community

futranbg commited on Nov 7, 2023

Commit

e5db612

•

1 Parent(s): ff26b87

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -46

app.py CHANGED Viewed

@@ -1,26 +1,9 @@
 import os
 import time
 import gradio as gr
-from langchain.llms import HuggingFaceHub
-llama_repo = os.getenv('HF_MODEL_LLAMA_REPO')
-starchat_repo = os.getenv('HF_MODEL_STARCHAT_REPO')
-bloom_repo = os.getenv('HF_MODEL_BLOOM_REPO')
-llamma_template = """<s>[INST]<<SYS>>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<</SYS>>[/INST]
-[INST]Begin of the document:
-{query}
-End of the document.[/INST]
-{target} translated document:
-"""
-starchat_template = """<|system|>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<</SYS>>
-Begin of the document:
-{query}
-End of the document<|end|>
-<|assistant|>
-{target} translated document:
-"""
 bloom_template = """Text translation.
 {source} text:
@@ -28,25 +11,17 @@ bloom_template = """Text translation.
 {target} translated text:
 <s>"""
-model_kwargs={
-            "max_new_tokens":2048,
-            "temperature": 0.01,
-            "truncate": 4096,
-            "seed" : 42,
-            "stop" : ["</s>","<|endoftext|>","<|end|>"],
-            }
-bloom_model_kwargs={
-            "max_new_tokens":1000,
-            "temperature": 0.01,
-#            "truncate": 1512,
-            "seed" : 42,
-            "stop" : ["</s>","<|endoftext|>","<|end|>"],
-            }
-llm1 = HuggingFaceHub(repo_id=llama_repo, task="text-generation", model_kwargs=model_kwargs)
-llm2 = HuggingFaceHub(repo_id=starchat_repo, task="text-generation", model_kwargs=model_kwargs)
-llm3 = HuggingFaceHub(repo_id=bloom_repo, task="text-generation", model_kwargs=bloom_model_kwargs)
 def split_text_into_chunks(text, chunk_size=1000):
     lines = text.split('\n')
@@ -65,21 +40,28 @@ def split_text_into_chunks(text, chunk_size=1000):
     return chunks
 def translation(source, target, text):
-    response = ""
     chunks = split_text_into_chunks(text)
     for chunk in chunks:
         try:
             input_prompt = bloom_template.replace("{source}", source)
             input_prompt = input_prompt.replace("{target}", target)
             input_prompt = input_prompt.replace("{query}", chunk)
-            stchunk = llm3(input_prompt)
-            for eot in bloom_model_kwargs['stop']:
-                stchunk = stchunk.replace(eot,"")
-            response += stchunk
         except Exception as e:
             print(f"ERROR: LLM show {e}")
-        time.sleep(5)
-    if response == "": response = text
-    return response.replace("<newline>","\n").strip()
-gr.Interface(translation, inputs=["text","text","text"], outputs="text").launch()

 import os
 import time
 import gradio as gr
+from huggingface_hub import InferenceClient
+bloom_repo = "bigscience/bloom"
 bloom_template = """Text translation.
 {source} text:
 {target} translated text:
 <s>"""
+bloom_model_kwargs=dict(
+            max_new_tokens=1000,
+            temperature=0.3,
+#            truncate=1512,
+            seed=42,
+            stop_sequences=["</s>","<|endoftext|>","<|end|>"],
+            top_p=0.95,
+            repetition_penalty=1.1,
+            )
+client = InferenceClient(model=bloom_repo, token=os.environ.get("HUGGINGFACEHUB_API_TOKEN", None))
 def split_text_into_chunks(text, chunk_size=1000):
     lines = text.split('\n')
     return chunks
 def translation(source, target, text):
+    output = ""
+    result = ""
     chunks = split_text_into_chunks(text)
     for chunk in chunks:
         try:
             input_prompt = bloom_template.replace("{source}", source)
             input_prompt = input_prompt.replace("{target}", target)
             input_prompt = input_prompt.replace("{query}", chunk)
+            stream = client.text_generation(input_prompt, stream=True, details=True, return_full_text=False, **bloom_model_kwargs)
+            for response in stream:
+                output += response.token.text
+                for stop_str in bloom_model_kwargs['stop_sequences']:
+                    if output.endswith(stop_str):
+                        output = output[:-len(stop_str)]
+                    yield output.replace("<newline>","\n")
+            #yield output.replace("<newline>","\n")
+            result += output
         except Exception as e:
             print(f"ERROR: LLM show {e}")
+        time.sleep(1)
+    #yield result.replace("<newline>","\n").strip()
+    if result == "": result = text
+    return result.replace("<newline>","\n").strip()
+gr.Interface(translation, inputs=["text","text","text"], outputs="text").queue(concurrency_count=100).launch()