sadzxctv commited on
Commit
12d9ade
1 Parent(s): 6da03bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -42
app.py CHANGED
@@ -23,16 +23,19 @@ llm = None
23
  llm_model = None
24
 
25
  @spaces.GPU(duration=120)
26
- def translate_text(
27
- input_text: str,
28
- model: str,
29
- system_message: str,
30
- max_tokens: int,
31
- temperature: float,
32
- top_p: float,
33
- top_k: int,
34
- repeat_penalty: float
35
- ) -> str:
 
 
 
36
  global llm
37
  global llm_model
38
 
@@ -51,45 +54,50 @@ def translate_text(
51
  agent = LlamaCppAgent(
52
  provider,
53
  system_prompt=f"{system_message}",
54
- predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
55
  debug_output=True
56
  )
57
 
58
  settings = provider.get_provider_default_settings()
59
- settings.temperature = temperature
60
  settings.top_k = top_k
61
- settings.top_p = top_p
62
  settings.max_tokens = max_tokens
63
- settings.repeat_penalty = repeat_penalty
64
- settings.stream = False
65
 
66
  messages = BasicChatHistory()
67
- user_message = {
68
- 'role': Roles.user,
69
- 'content': input_text
70
- }
71
- messages.add_message(user_message)
 
 
 
 
 
 
 
72
 
73
- response = agent.get_chat_response(
74
- input_text,
75
  llm_sampling_settings=settings,
76
  chat_history=messages,
77
- returns_streaming_generator=False,
78
  print_output=False
79
  )
80
 
81
- return response
 
 
 
82
 
83
- description = """<p align="center">Enter Japanese text to translate it into Simplified Chinese.</p>"""
84
 
85
- demo = gr.Interface(
86
- fn=translate_text,
87
- inputs=[
88
- gr.Textbox(
89
- lines=2,
90
- placeholder="請輸入日文",
91
- label="Input Japanese Text"
92
- ),
93
  gr.Dropdown([
94
  'sakura-14b-qwen2beta-v0.9.2-q6k.gguf'
95
  ],
@@ -97,7 +105,7 @@ demo = gr.Interface(
97
  label="Model"
98
  ),
99
  gr.Textbox(value="你是一个轻小说翻译模型,可以流畅通顺地以日本轻小说的风格将日文翻译成简体中文,并联系上下文正确使用人称代词,不擅自添加原文中没有的代词。", label="System message"),
100
- gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens"),
101
  gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature"),
102
  gr.Slider(
103
  minimum=0.1,
@@ -109,7 +117,7 @@ demo = gr.Interface(
109
  gr.Slider(
110
  minimum=0,
111
  maximum=100,
112
- value=1,
113
  step=1,
114
  label="Top-k",
115
  ),
@@ -121,13 +129,18 @@ demo = gr.Interface(
121
  label="Repetition penalty",
122
  ),
123
  ],
124
- outputs=gr.Textbox(
125
- lines=2,
126
- label="Translated Chinese Text"
127
- ),
128
- title="Japanese to Chinese Translator",
129
- description=description
 
 
 
 
 
130
  )
131
 
132
  if __name__ == "__main__":
133
- demo.launch()
 
23
  llm_model = None
24
 
25
  @spaces.GPU(duration=120)
26
+ def respond(
27
+ message,
28
+ history: list[tuple[str, str]],
29
+ model,
30
+ system_message,
31
+ max_tokens,
32
+ temperature,
33
+ top_p,
34
+ top_k,
35
+ repeat_penalty,
36
+ ):
37
+ chat_template = MessagesFormatterType.GEMMA_2
38
+
39
  global llm
40
  global llm_model
41
 
 
54
  agent = LlamaCppAgent(
55
  provider,
56
  system_prompt=f"{system_message}",
57
+ predefined_messages_formatter_type=chat_template,
58
  debug_output=True
59
  )
60
 
61
  settings = provider.get_provider_default_settings()
62
+ settings.temperature = 0.1
63
  settings.top_k = top_k
64
+ settings.top_p = 0.3
65
  settings.max_tokens = max_tokens
66
+ settings.repeat_penalty = 1.0
67
+ settings.stream = True
68
 
69
  messages = BasicChatHistory()
70
+
71
+ for msn in history:
72
+ user = {
73
+ 'role': Roles.user,
74
+ 'content': msn[0]
75
+ }
76
+ assistant = {
77
+ 'role': Roles.assistant,
78
+ 'content': msn[1]
79
+ }
80
+ messages.add_message(user)
81
+ messages.add_message(assistant)
82
 
83
+ stream = agent.get_chat_response(
84
+ message,
85
  llm_sampling_settings=settings,
86
  chat_history=messages,
87
+ returns_streaming_generator=True,
88
  print_output=False
89
  )
90
 
91
+ outputs = ""
92
+ for output in stream:
93
+ outputs += output
94
+ yield outputs
95
 
96
+ description = """<p align="center">Defaults to Sakura-14B-Qwen2beta (you can switch from additional inputs)</p>"""
97
 
98
+ demo = gr.ChatInterface(
99
+ respond,
100
+ additional_inputs=[
 
 
 
 
 
101
  gr.Dropdown([
102
  'sakura-14b-qwen2beta-v0.9.2-q6k.gguf'
103
  ],
 
105
  label="Model"
106
  ),
107
  gr.Textbox(value="你是一个轻小说翻译模型,可以流畅通顺地以日本轻小说的风格将日文翻译成简体中文,并联系上下文正确使用人称代词,不擅自添加原文中没有的代词。", label="System message"),
108
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
109
  gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature"),
110
  gr.Slider(
111
  minimum=0.1,
 
117
  gr.Slider(
118
  minimum=0,
119
  maximum=100,
120
+ value=40,
121
  step=1,
122
  label="Top-k",
123
  ),
 
129
  label="Repetition penalty",
130
  ),
131
  ],
132
+ retry_btn="Retry",
133
+ undo_btn="Undo",
134
+ clear_btn="Clear",
135
+ submit_btn="Send",
136
+ title="Chat with Sakura-14B-Qwen2beta using llama.cpp",
137
+ description=description,
138
+ chatbot=gr.Chatbot(
139
+ scale=1,
140
+ likeable=False,
141
+ show_copy_button=True
142
+ )
143
  )
144
 
145
  if __name__ == "__main__":
146
+ demo.launch()