vilarin commited on
Commit
0ec7560
1 Parent(s): 352ea7e

Upload process.py

Browse files
Files changed (1) hide show
  1. app/webui/process.py +18 -4
app/webui/process.py CHANGED
@@ -7,6 +7,8 @@ from app.webui.patch import calculate_chunk_size, multichunk_initial_translation
7
 
8
  from llama_index.core.node_parser import SentenceSplitter
9
 
 
 
10
  def tokenize(text):
11
  # Use nltk to tokenize the text
12
  words = simple_tokenizer(text)
@@ -52,7 +54,6 @@ def translator(
52
  country: str,
53
  max_tokens:int = 1000,
54
  ):
55
-
56
  """Translate the source_text from source_lang to target_lang."""
57
  num_tokens_in_text = num_tokens_in_string(source_text)
58
 
@@ -61,16 +62,17 @@ def translator(
61
  if num_tokens_in_text < max_tokens:
62
  ic("Translating text as single chunk")
63
 
64
- #Note: use yield from B() if put yield in function B()
65
  init_translation = one_chunk_initial_translation(
66
  source_lang, target_lang, source_text
67
  )
68
 
69
-
70
  reflection = one_chunk_reflect_on_translation(
71
  source_lang, target_lang, source_text, init_translation, country
72
  )
73
 
 
74
  final_translation = one_chunk_improve_translation(
75
  source_lang, target_lang, source_text, init_translation, reflection
76
  )
@@ -80,6 +82,7 @@ def translator(
80
  else:
81
  ic("Translating text as multiple chunks")
82
 
 
83
  token_size = calculate_chunk_size(
84
  token_count=num_tokens_in_text, token_limit=max_tokens
85
  )
@@ -91,14 +94,17 @@ def translator(
91
  chunk_size=token_size,
92
  )
93
 
 
94
  source_text_chunks = text_parser.split_text(source_text)
95
 
 
96
  translation_1_chunks = multichunk_initial_translation(
97
  source_lang, target_lang, source_text_chunks
98
  )
99
 
100
  init_translation = "".join(translation_1_chunks)
101
 
 
102
  reflection_chunks = multichunk_reflect_on_translation(
103
  source_lang,
104
  target_lang,
@@ -109,6 +115,7 @@ def translator(
109
 
110
  reflection = "".join(reflection_chunks)
111
 
 
112
  translation_2_chunks = multichunk_improve_translation(
113
  source_lang,
114
  target_lang,
@@ -143,7 +150,7 @@ def translator_sec(
143
  if num_tokens_in_text < max_tokens:
144
  ic("Translating text as single chunk")
145
 
146
- #Note: use yield from B() if put yield in function B()
147
  init_translation = one_chunk_initial_translation(
148
  source_lang, target_lang, source_text
149
  )
@@ -153,10 +160,12 @@ def translator_sec(
153
  except Exception as e:
154
  raise gr.Error(f"An unexpected error occurred: {e}")
155
 
 
156
  reflection = one_chunk_reflect_on_translation(
157
  source_lang, target_lang, source_text, init_translation, country
158
  )
159
 
 
160
  final_translation = one_chunk_improve_translation(
161
  source_lang, target_lang, source_text, init_translation, reflection
162
  )
@@ -166,6 +175,7 @@ def translator_sec(
166
  else:
167
  ic("Translating text as multiple chunks")
168
 
 
169
  token_size = calculate_chunk_size(
170
  token_count=num_tokens_in_text, token_limit=max_tokens
171
  )
@@ -177,8 +187,10 @@ def translator_sec(
177
  chunk_size=token_size,
178
  )
179
 
 
180
  source_text_chunks = text_parser.split_text(source_text)
181
 
 
182
  translation_1_chunks = multichunk_initial_translation(
183
  source_lang, target_lang, source_text_chunks
184
  )
@@ -190,6 +202,7 @@ def translator_sec(
190
  except Exception as e:
191
  raise gr.Error(f"An unexpected error occurred: {e}")
192
 
 
193
  reflection_chunks = multichunk_reflect_on_translation(
194
  source_lang,
195
  target_lang,
@@ -200,6 +213,7 @@ def translator_sec(
200
 
201
  reflection = "".join(reflection_chunks)
202
 
 
203
  translation_2_chunks = multichunk_improve_translation(
204
  source_lang,
205
  target_lang,
 
7
 
8
  from llama_index.core.node_parser import SentenceSplitter
9
 
10
+ progress=gr.Progress()
11
+
12
  def tokenize(text):
13
  # Use nltk to tokenize the text
14
  words = simple_tokenizer(text)
 
54
  country: str,
55
  max_tokens:int = 1000,
56
  ):
 
57
  """Translate the source_text from source_lang to target_lang."""
58
  num_tokens_in_text = num_tokens_in_string(source_text)
59
 
 
62
  if num_tokens_in_text < max_tokens:
63
  ic("Translating text as single chunk")
64
 
65
+ progress((1,3), desc="First translation...")
66
  init_translation = one_chunk_initial_translation(
67
  source_lang, target_lang, source_text
68
  )
69
 
70
+ progress((2,3), desc="Reflecton...")
71
  reflection = one_chunk_reflect_on_translation(
72
  source_lang, target_lang, source_text, init_translation, country
73
  )
74
 
75
+ progress((3,3), desc="Second translation...")
76
  final_translation = one_chunk_improve_translation(
77
  source_lang, target_lang, source_text, init_translation, reflection
78
  )
 
82
  else:
83
  ic("Translating text as multiple chunks")
84
 
85
+ progress((1,5), desc="Calculate chunk size...")
86
  token_size = calculate_chunk_size(
87
  token_count=num_tokens_in_text, token_limit=max_tokens
88
  )
 
94
  chunk_size=token_size,
95
  )
96
 
97
+ progress((2,5), desc="Spilt source text...")
98
  source_text_chunks = text_parser.split_text(source_text)
99
 
100
+ progress((3,5), desc="First translation...")
101
  translation_1_chunks = multichunk_initial_translation(
102
  source_lang, target_lang, source_text_chunks
103
  )
104
 
105
  init_translation = "".join(translation_1_chunks)
106
 
107
+ progress((4,5), desc="Reflection...")
108
  reflection_chunks = multichunk_reflect_on_translation(
109
  source_lang,
110
  target_lang,
 
115
 
116
  reflection = "".join(reflection_chunks)
117
 
118
+ progress((5,5), desc="Second translation...")
119
  translation_2_chunks = multichunk_improve_translation(
120
  source_lang,
121
  target_lang,
 
150
  if num_tokens_in_text < max_tokens:
151
  ic("Translating text as single chunk")
152
 
153
+ progress((1,3), desc="First translation...")
154
  init_translation = one_chunk_initial_translation(
155
  source_lang, target_lang, source_text
156
  )
 
160
  except Exception as e:
161
  raise gr.Error(f"An unexpected error occurred: {e}")
162
 
163
+ progress((2,3), desc="Reflecton...")
164
  reflection = one_chunk_reflect_on_translation(
165
  source_lang, target_lang, source_text, init_translation, country
166
  )
167
 
168
+ progress((3,3), desc="Second translation...")
169
  final_translation = one_chunk_improve_translation(
170
  source_lang, target_lang, source_text, init_translation, reflection
171
  )
 
175
  else:
176
  ic("Translating text as multiple chunks")
177
 
178
+ progress((1,5), desc="Calculate chunk size...")
179
  token_size = calculate_chunk_size(
180
  token_count=num_tokens_in_text, token_limit=max_tokens
181
  )
 
187
  chunk_size=token_size,
188
  )
189
 
190
+ progress((2,5), desc="Spilt source text...")
191
  source_text_chunks = text_parser.split_text(source_text)
192
 
193
+ progress((3,5), desc="First translation...")
194
  translation_1_chunks = multichunk_initial_translation(
195
  source_lang, target_lang, source_text_chunks
196
  )
 
202
  except Exception as e:
203
  raise gr.Error(f"An unexpected error occurred: {e}")
204
 
205
+ progress((4,5), desc="Reflection...")
206
  reflection_chunks = multichunk_reflect_on_translation(
207
  source_lang,
208
  target_lang,
 
213
 
214
  reflection = "".join(reflection_chunks)
215
 
216
+ progress((5,5), desc="Second translation...")
217
  translation_2_chunks = multichunk_improve_translation(
218
  source_lang,
219
  target_lang,