from transformers import MT5ForConditionalGeneration, AutoTokenizer, Text2TextGenerationPipeline, AutoModelForSeq2SeqLM import gradio as gr import re # 翻译任务设置 trans_mdl = MT5ForConditionalGeneration.from_pretrained("K024/mt5-zh-ja-en-trimmed") trans_tokenizer = AutoTokenizer.from_pretrained("K024/mt5-zh-ja-en-trimmed") trans_pipe = Text2TextGenerationPipeline(model=trans_mdl, tokenizer=trans_tokenizer) # 摘要任务设置 sum_mdl = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum") sum_tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum") def translation_job(job, text): # 设置翻译任务和提示语的映射 job_key = ["中译日", "中译英", "日译中", "英译中", "日译英", "英译日"] job_value = ["zh2ja:", "zh2en:", "ja2zh:", "en2zh:", "ja2en:", "en2ja:"] job_map = dict(zip(job_key, job_value)) input = job_map[job] + text print(input) response = trans_pipe(input, max_length=100, num_beams=4) return response[0]['generated_text'] def sum_job(text): # 去除源文本中的空格 WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip())) input_ids = sum_tokenizer( [WHITESPACE_HANDLER(text)], return_tensors="pt", padding="max_length", truncation=True, max_length=512 )["input_ids"] output_ids = sum_mdl.generate( input_ids=input_ids, max_length=84, no_repeat_ngram_size=2, num_beams=4 )[0] response = sum_tokenizer.decode( output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False ) return response with gr.Blocks() as app: # 中英日三语翻译任务 with gr.Tab("中英日三语翻译"): job_name = gr.Dropdown( ["中译日", "中译英", "日译中", "英译中", "日译英", "英译日"], label = "翻译任务选择", info = "单选" ) source_text = gr.Textbox(lines=1, label="翻译文本", placeholder="请输入要翻译的文本") trans_result = gr.Textbox(lines=1, label="翻译结果") trans_btn = gr.Button("翻译") # 多语言自动摘要任务 with gr.Tab("多语言自动摘要"): article_text = gr.Textbox(lines=8, label="待总结文本", placeholder="请输入要进行摘要的文本") sum_result = gr.Textbox(lines=2, label="摘要结果") sum_btn = gr.Button("摘要") trans_btn.click(translation_job, inputs=[job_name, source_text], outputs=trans_result) sum_btn.click(sum_job, inputs=article_text, outputs=sum_result) app.launch()