Spaces:

stevengrove
/

GPT4News

Runtime error

App Files Files Community

stevengrove commited on Jun 27, 2023

Commit

4ca98ba

•

1 Parent(s): 5f71fb3

add prompt support

Browse files

Files changed (2) hide show

app.py +206 -116
prompts/interview.json +75 -0

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import re
 import argparse
 import openai
 import gradio as gr
 SYSTEM_PROMPT = """You are a tool for filtering out paragraphs from the interview dialogues given by user."""  # noqa: E501
@@ -11,113 +13,176 @@ USER_FORMAT = """Interview Dialogues:
 {input_txt}
 Please select the rounds containing one of following tags: {pos_tags}.
-Note that you should ONLY outputs a list of the speaker name, speaking time, tag and reason  for each selected round. Do NOT output the content. Each output item should be like "speaker_name speaking_time: tag, reason"."""  # noqa: E501
-def preprocess(input_txt, max_length=4000, max_convs=4):
-    speaker_pattern = re.compile(r'(说话人\d+ \d\d:\d\d)')
-    input_txt = speaker_pattern.split(input_txt)
-    input_txt = [x.strip().replace('\n', ' ') for x in input_txt]
-    conversations = []
-    for idx, txt in enumerate(input_txt):
-        if txt.startswith('说话人'):
-            if idx < len(input_txt) - 1:
-                if not input_txt[idx + 1].startswith('说话人'):
-                    conv = [txt, input_txt[idx + 1]]
-                else:
-                    conv = [txt, '']
-                while len(''.join(conv)) > max_length:
-                    pruned_len = max_length - len(''.join(conv[0]))
-                    pruned_conv = [txt, conv[1][:pruned_len]]
-                    conversations.append(pruned_conv)
-                    conv = [txt, conv[-1][pruned_len:]]
-                conversations.append(conv)
-    input_txt_list = ['']
-    for conv in conversations:
-        conv_length = len(''.join(conv))
-        if len(input_txt_list[-1]) + conv_length >= max_length:
-            input_txt_list.append('')
-        elif len(speaker_pattern.findall(input_txt_list[-1])) >= max_convs:
-            input_txt_list.append('')
-        input_txt_list[-1] += ''.join(conv)
-    processed_txt_list = []
-    for input_txt in input_txt_list:
-        input_txt = ''.join(input_txt)
-        input_txt = speaker_pattern.sub(r'\n\1: ', input_txt)
-        processed_txt_list.append(input_txt.strip())
-    return processed_txt_list
-def chatgpt(messages, temperature=0.0):
-    try:
-        completion = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=messages,
-            temperature=temperature
-        )
-        return completion.choices[0].message.content
-    except Exception as err:
-        print(err)
-        return chatgpt(messages, temperature)
-def llm(pos_tags, neg_tags, input_txt):
-    user = USER_FORMAT.format(input_txt=input_txt, pos_tags=pos_tags)
-    messages = [
-        {'role': 'system',
-         'content': SYSTEM_PROMPT},
-        {'role': 'user',
-         'content': user}]
-    response = chatgpt(messages)
-    print(f'USER:\n\n{user}')
-    print(f'RESPONSE:\n\n{response}')
-    return response
-def postprocess(input_txt, output_txt_list):
-    speaker_pattern = re.compile(r'(说话人\d+ \d\d:\d\d)')
-    output_txt = []
-    for txt in output_txt_list:
-        if len(speaker_pattern.findall(txt)) > 0:
-            output_txt.append(txt)
-    output_txt = ''.join(output_txt)
-    speakers = set(speaker_pattern.findall(input_txt))
-    output_txt = speaker_pattern.split(output_txt)
-    results = []
-    for idx, txt in enumerate(output_txt):
-        if txt.startswith('说话人'):
-            if txt not in speakers:
-                continue
-            if idx < len(output_txt) - 1:
-                if not output_txt[idx + 1].startswith('说话人'):
-                    res = txt + output_txt[idx + 1]
-                else:
-                    res = txt
-                results.append(res.strip())
-    return '\n'.join(results)
-def filter(api_key, pos_tags, neg_tags, input_txt):
-    if api_key is None or api_key == '':
-        return 'OPENAI API Key is not set.'
-    openai.api_key = api_key
-    input_txt_list = preprocess(input_txt)
-    output_txt_list = []
-    for txt in input_txt_list:
-        output_txt = llm(pos_tags, neg_tags, txt)
-        output_txt_list.append(output_txt)
-    output_txt = postprocess(input_txt, output_txt_list)
-    return output_txt
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     args = parser.parse_args()
     with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column(scale=0.3):
@@ -128,21 +193,39 @@ if __name__ == '__main__':
                         elem_id='api_key_textbox',
                         placeholder='Enter your OPENAI API Key')
                 with gr.Row():
-                    pos_txt = gr.Textbox(
-                        lines=2,
-                        label='Positive Tags',
-                        elem_id='pos_textbox',
-                        placeholder='Enter positive tags split by semicolon')
                 with gr.Row():
-                    neg_txt = gr.Textbox(
-                        lines=2,
-                        visible=False,
-                        label='Negative Tags',
-                        elem_id='neg_textbox',
-                        placeholder='Enter negative tags split by semicolon')
                 with gr.Row():
                     input_txt = gr.Textbox(
                         lines=4,
                         label='Input',
                         elem_id='input_textbox',
                         placeholder='Enter text and press submit')
@@ -152,17 +235,24 @@ if __name__ == '__main__':
                     clear = gr.Button('Clear')
             with gr.Column(scale=0.7):
                 output_txt = gr.Textbox(
                     label='Output',
                     elem_id='output_textbox')
-                output_txt = output_txt.style(height=690)
             submit.click(
-                filter,
-                [api_key, pos_txt, neg_txt, input_txt],
                 [output_txt])
             clear.click(
                 lambda: ['', '', ''],
                 None,
-                pos_txt, neg_txt, input_txt)
         demo.queue(concurrency_count=6)
         demo.launch()

 import re
+import json
 import argparse
 import openai
 import gradio as gr
+from functools import partial
 SYSTEM_PROMPT = """You are a tool for filtering out paragraphs from the interview dialogues given by user."""  # noqa: E501
 {input_txt}
 Please select the rounds containing one of following tags: {pos_tags}.
+Note that you should ONLY outputs a list of the speaker name, speaking time, tag and reason for each selected round. Do NOT output the content. Each output item should be like "speaker_name speaking_time: tag, reason"."""  # noqa: E501
+class GPT4News():
+    def __init__(self, prompt_formats):
+        self.name2prompt = {x['name']: x for x in prompt_formats}
+    def preprocess(self, function_name, input_txt):
+        max_length = self.name2prompt[function_name]['split_length']
+        max_convs = self.name2prompt[function_name]['split_round']
+        input_txt = re.sub(r'(说话人)(\d+ \d\d:\d\d)', r'Speaker \2', input_txt)
+        speaker_pattern = re.compile(r'(Speaker \d+ \d\d:\d\d)')
+        input_txt = speaker_pattern.split(input_txt)
+        input_txt = [x.strip().replace('\n', ' ') for x in input_txt]
+        conversations = []
+        for idx, txt in enumerate(input_txt):
+            if speaker_pattern.match(txt):
+                if idx < len(input_txt) - 1:
+                    if not speaker_pattern.match(input_txt[idx + 1]):
+                        conv = [txt, input_txt[idx + 1]]
+                    else:
+                        conv = [txt, '']
+                    while len(''.join(conv)) > max_length:
+                        pruned_len = max_length - len(''.join(conv[0]))
+                        pruned_conv = [txt, conv[1][:pruned_len]]
+                        conversations.append(pruned_conv)
+                        conv = [txt, conv[-1][pruned_len:]]
+                    conversations.append(conv)
+        input_txt_list = ['']
+        for conv in conversations:
+            conv_length = len(''.join(conv))
+            if len(input_txt_list[-1]) + conv_length >= max_length:
+                input_txt_list.append('')
+            elif len(speaker_pattern.findall(input_txt_list[-1])) >= max_convs:
+                input_txt_list.append('')
+            input_txt_list[-1] += ''.join(conv)
+        processed_txt_list = []
+        for input_txt in input_txt_list:
+            input_txt = ''.join(input_txt)
+            input_txt = speaker_pattern.sub(r'\n\1: ', input_txt)
+            processed_txt_list.append(input_txt.strip())
+        return processed_txt_list
+    def chatgpt(self, messages, temperature=0.0):
+        try:
+            completion = openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                temperature=temperature
+            )
+            return completion.choices[0].message.content
+        except Exception as err:
+            print(err)
+            return self.chatgpt(messages, temperature)
+    def llm(self, function_name, temperature, **kwargs):
+        prompt = self.name2prompt[function_name]
+        user_kwargs = {key: kwargs[key] for key in prompt['user_keys']}
+        user = prompt['user'].format(**user_kwargs)
+        system_kwargs = {key: kwargs[key] for key in prompt['system_keys']}
+        system = prompt['system'].format(**system_kwargs)
+        messages = [
+            {'role': 'system',
+             'content': system},
+            {'role': 'user',
+             'content': user}]
+        response = self.chatgpt(messages, temperature=temperature)
+        print(f'SYSTEM:\n\n{system}')
+        print(f'USER:\n\n{user}')
+        print(f'RESPONSE:\n\n{response}')
+        return response
+    def translate(self, txt, output_lang):
+        if output_lang == 'English':
+            return txt
+        system = 'Translate the following text into {}:\n\n{}'.format(
+            output_lang, txt)
+        messages = [{'role': 'system', 'content': system}]
+        response = self.chatgpt(messages)
+        print(f'SYSTEM:\n\n{system}')
+        print(f'RESPONSE:\n\n{response}')
+        return response
+    def postprocess(self, function_name, input_txt, output_txt_list,
+                    output_lang):
+        if not self.name2prompt[function_name]['post_filter']:
+            output_txt = '\n\n'.join(output_txt_list)
+            output_txt = self.translate(output_txt, output_lang)
+            return output_txt
+        speaker_pattern = re.compile(r'(Speaker \d+ \d\d:\d\d)')
+        output_txt = []
+        for txt in output_txt_list:
+            if len(speaker_pattern.findall(txt)) > 0:
+                output_txt.append(txt)
+        output_txt = ''.join(output_txt)
+        speakers = set(speaker_pattern.findall(input_txt))
+        output_txt = speaker_pattern.split(output_txt)
+        results = []
+        for idx, txt in enumerate(output_txt):
+            if speaker_pattern.match(txt):
+                if txt not in speakers:
+                    continue
+                if idx < len(output_txt) - 1:
+                    if not speaker_pattern.match(output_txt[idx + 1]):
+                        res = txt + output_txt[idx + 1]
+                    else:
+                        res = txt
+                    res = self.translate(res, output_lang)
+                    results.append(res.strip())
+        return '\n\n'.join(results)
+    def __call__(self, api_key, function_name, temperature, output_lang,
+                 input_txt, tags):
+        if api_key is None or api_key == '':
+            return 'OPENAI API Key is not set.'
+        if function_name is None or function_name == '':
+            return 'Function is not selected.'
+        openai.api_key = api_key
+        input_txt_list = self.preprocess(function_name, input_txt)
+        input_txt = '\n'.join(input_txt_list)
+        output_txt_list = []
+        for txt in input_txt_list:
+            llm_kwargs = dict(input_txt=txt,
+                              tags=tags)
+            output_txt = self.llm(function_name, temperature, **llm_kwargs)
+            output_txt_list.append(output_txt)
+        output_txt = self.postprocess(
+            function_name, input_txt, output_txt_list, output_lang)
+        return output_txt
+    @property
+    def function_names(self):
+        return self.name2prompt.keys()
+def function_name_select_callback(componments, name2prompt, function_name):
+    prompt = name2prompt[function_name]
+    user_keys = prompt['user_keys']
+    result = []
+    for comp in componments:
+        result.append(gr.update(visible=comp in user_keys))
+    return result
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
+    parser.add_argument('--prompt', type=str, default='prompts/interview.json',
+                        help='path to the prompt file')
+    parser.add_argument('--temperature', type=float, default='0.7',
+                        help='temperature for the llm model')
     args = parser.parse_args()
+    prompt_formats = json.load(open(args.prompt, 'r'))
+    gpt4news = GPT4News(prompt_formats)
+    languages = ['Arabic', 'Bengali', 'Chinese (Simplified)',
+                 'Chinese (Traditional)', 'Dutch', 'English', 'French',
+                 'German', 'Hindi', 'Italian', 'Japanese', 'Korean',
+                 'Portuguese', 'Punjabi', 'Russian', 'Spanish', 'Turkish',
+                 'Urdu']
+    default_func = sorted(gpt4news.function_names)[0]
+    default_user_keys = gpt4news.name2prompt[default_func]['user_keys']
     with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column(scale=0.3):
                         elem_id='api_key_textbox',
                         placeholder='Enter your OPENAI API Key')
                 with gr.Row():
+                    function_name = gr.Dropdown(
+                        sorted(gpt4news.function_names),
+                        value=default_func,
+                        elem_id='function_dropdown',
+                        label='Function',
+                        info='choose a function to run')
+                with gr.Row():
+                    output_lang = gr.Dropdown(
+                        languages,
+                        value='English',
+                        elem_id='output_lang_dropdown',
+                        label='Output Language',
+                        info='choose a language to output')
                 with gr.Row():
+                    temperature = gr.Slider(
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=args.temperature,
+                        step=0.1,
+                        interactive=True,
+                        label='Temperature',
+                        info='higher temperature means more creative')
+                with gr.Row():
+                    tags = gr.Textbox(
+                        lines=1,
+                        visible='tags' in default_user_keys,
+                        label='Tags',
+                        elem_id='tags_textbox',
+                        placeholder='Enter tags split by semicolon')
                 with gr.Row():
                     input_txt = gr.Textbox(
                         lines=4,
+                        visible='input_txt' in default_user_keys,
                         label='Input',
                         elem_id='input_textbox',
                         placeholder='Enter text and press submit')
                     clear = gr.Button('Clear')
             with gr.Column(scale=0.7):
                 output_txt = gr.Textbox(
+                    lines=8,
                     label='Output',
                     elem_id='output_textbox')
+            function_name.select(
+                partial(function_name_select_callback, ['input_txt', 'tags'],
+                        gpt4news.name2prompt),
+                [function_name],
+                [input_txt, tags]
+            )
             submit.click(
+                gpt4news,
+                [api_key, function_name, temperature, output_lang,
+                 input_txt, tags],
                 [output_txt])
             clear.click(
                 lambda: ['', '', ''],
                 None,
+                tags, input_txt)
         demo.queue(concurrency_count=6)
         demo.launch()

prompts/interview.json ADDED Viewed

	@@ -0,0 +1,75 @@

+[
+    {
+        "name": "searching",
+        "system": "You are a tool for filtering out paragraphs from the interview dialogues given by user.",
+        "system_keys": [],
+        "user": "Interview Dialogues:\n{input_txt}\n\nPlease select the rounds containing one of following tags: {tags}. Note that you should ONLY outputs a list of the speaker name, speaking time, tag and reason for each selected round. Do NOT output the content. Each output item should be like \"speaker_name speaking_time: tag, reason\".",
+        "user_keys": [
+            "input_txt",
+            "tags"
+        ],
+        "post_filter": true,
+        "split_length": 4000,
+        "split_round": 4
+    },
+    {
+        "name": "proofreading",
+        "system": "You are a proofreading tool used to improve the wording, grammar, and logical issues in a given interview record. Note that the output should maintain the original meaning, as well as keeping the speaker's name and interview time unchanged.",
+        "system_keys": [],
+        "user": "{input_txt}\n\n------\nPlease proofread the interview record and output the improved version. Note that the output should maintain the original meaning, as well as keeping the speaker's name and interview time unchanged.",
+        "user_keys": [
+            "input_txt"
+        ],
+        "post_filter": true,
+        "split_length": 4000,
+        "split_round": 4
+    },
+    {
+        "name": "summarization",
+        "system": "You are a text summarization tool used to summarize the meaning of each round of conversation in an interview record.",
+        "system_keys": [],
+        "user": "{input_txt}\n\n------\nPlease summarize the meaning of each round of conversation in an interview record. Note that the output should be concise and contains key information. The output should be like \"speaker_name speaking_time: summarization\"",
+        "user_keys": [
+            "input_txt"
+        ],
+        "post_filter": true,
+        "split_length": 4000,
+        "split_round": 4
+    },
+    {
+        "name": "summary to news",
+        "system": "You are a news writer who writes news articles based on the given summary of interview records.",
+        "system_keys": [],
+        "user": "{input_txt}\n\n------\nPlease write a news article based on the given summary of interview records.",
+        "user_keys": [
+            "input_txt"
+        ],
+        "post_filter": false,
+        "split_length": 10000000,
+        "split_round": 10000
+    },
+    {
+        "name": "summary to twitter",
+        "system": "You are a Twitter author who writes tweets based on the given summary of interview records.",
+        "system_keys": [],
+        "user": "{input_txt}\n\n------\nPlease writes a tweet based on the given summary of interview records. Note that the number of words in the output MUST be less than 140.",
+        "user_keys": [
+            "input_txt"
+        ],
+        "post_filter": false,
+        "split_length": 10000000,
+        "split_round": 10000
+    },
+    {
+        "name": "summary to weibo",
+        "system": "You are a Weibo author who writes eye-catching short articles based on the given summary of interview records.",
+        "system_keys": [],
+        "user": "{input_txt}\n\n------\nPlease write an eye-catching short article based on the given summary of interview records. Note that the number of words in the output MUST be less than 140.",
+        "user_keys": [
+            "input_txt"
+        ],
+        "post_filter": false,
+        "split_length": 10000000,
+        "split_round": 10000
+    }
+]