import os
import gradio as gr
from langchain.llms import HuggingFaceHub

llama_repo = os.getenv('HF_MODEL_LLAMA_REPO')
starchat_repo = os.getenv('HF_MODEL_STARCHAT_REPO')
bloom_repo = os.getenv('HF_MODEL_BLOOM_REPO')

llamma_template = """<s>[INST]<<SYS>>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<</SYS>>[/INST]
[INST]Begin of the document:
{query}
End of the document.[/INST]
{target} translated document:

"""

starchat_template = """<|system|>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<</SYS>>
Begin of the document:
{query}
End of the document<|end|>
<|assistant|>
{target} translated document:

"""

bloom_template = """Translate bellow {source} text into {target}:
{query}
======
{target} translated text:

"""
model_kwargs={
            "max_new_tokens":2048,
            "temperature": 0.01,
            "truncate": 4096,
            "seed" : 256,
            "stop" : ["</s>","<|endoftext|>","<|end|>"],
            }

bloom_model_kwargs={
            "max_new_tokens":1000,
            "temperature": 0.01,
            "truncate": 1000,
            "seed" : 256,
            "stop" : ["</s>","<|endoftext|>","<|end|>","======"],
            }

llm1 = HuggingFaceHub(repo_id=llama_repo, task="text-generation", model_kwargs=model_kwargs)
llm2 = HuggingFaceHub(repo_id=starchat_repo, task="text-generation", model_kwargs=model_kwargs)
llm3 = HuggingFaceHub(repo_id=bloom_repo, task="text-generation", model_kwargs=bloom_model_kwargs)

client = InferenceClient()
    
def translation(source, target, text):
    response = text
    try:
        input_prompt = bloom_template.replace("{source}", source)
        input_prompt = input_prompt.replace("{target}", target)
        input_prompt = input_prompt.replace("{query}", text)
        response=llm3(input_prompt)
    except Exception as e:
        print(f"ERROR: LLM show {e}")
        input_prompt = starchat_template.replace("{source}", source)
        input_prompt = input_prompt.replace("{target}", target)
        input_prompt = input_prompt.replace("{query}", text)
        response=llm1(input_prompt).replace("<|end|>","")
    return response

gr.Interface(translation, inputs=["text","text","text"], outputs="text").launch()