import os import gradio as gr from langchain.llms import HuggingFaceHub llama_repo = os.getenv('HF_MODEL_LLAMA_REPO') starchat_repo = os.getenv('HF_MODEL_STARCHAT_REPO') bloom_repo = os.getenv('HF_MODEL_BLOOM_REPO') llamma_template = """[INST]<>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<>[/INST] [INST]Begin of the document: {query} End of the document.[/INST] {target} translated document: """ starchat_template = """<|system|>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<> Begin of the document: {query} End of the document<|end|> <|assistant|> {target} translated document: """ bloom_template = """Text translation. {source} text: {query} {target} translated text: """ model_kwargs={ "max_new_tokens":2048, "temperature": 0.01, "truncate": 4096, "seed" : 42, "stop" : ["","<|endoftext|>","<|end|>"], } bloom_model_kwargs={ "max_new_tokens":1000, "temperature": 0.01, # "truncate": 1512, "seed" : 42, "stop" : ["","<|endoftext|>","<|end|>"], } llm1 = HuggingFaceHub(repo_id=llama_repo, task="text-generation", model_kwargs=model_kwargs) llm2 = HuggingFaceHub(repo_id=starchat_repo, task="text-generation", model_kwargs=model_kwargs) llm3 = HuggingFaceHub(repo_id=bloom_repo, task="text-generation", model_kwargs=bloom_model_kwargs) def split_text_into_chunks(text, chunk_size=800): lines = text.splitlines() chunks = [] temp_chunk = "" for line in lines: # If adding the current line doesn't exceed the chunk size, add the line to the chunk if len(temp_chunk) + len(line) <= chunk_size: temp_chunk += line + '\n' else: # If adding the line exceeds chunk size, store the current chunk and start a new one chunks.append(temp_chunk) temp_chunk = line + '\n' # Don't forget the last chunk chunks.append(temp_chunk) return chunks def translation(source, target, text): response = "" chunks = split_text_into_chunks(text) for chunk in chunks: try: input_prompt = bloom_template.replace("{source}", source) input_prompt = input_prompt.replace("{target}", target) input_prompt = input_prompt.replace("{query}", chunk.strip()) stchunk = llm3(input_prompt) for eot in bloom_model_kwargs['stop']: stchunk = stchunk.replace(eot,"") response += stchunk + "\n" except Exception as e: print(f"ERROR: LLM show {e}") if response == "": response = text return response.strip() gr.Interface(translation, inputs=["text","text","text"], outputs="text").launch()