Tuchuanhuhuhu commited on
Commit
72e1ed6
·
1 Parent(s): 9c45970

使用tiktoken精确计数输入token

Browse files
Files changed (1) hide show
  1. utils.py +5 -4
utils.py CHANGED
@@ -51,7 +51,7 @@ def postprocess(
51
  def count_token(input_str):
52
  print("计算输入Token计数中……")
53
  encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
54
- length = len(encoding.encode("tiktoken is great!"))
55
  print("计算完成!")
56
  return length
57
 
@@ -138,7 +138,8 @@ def stream_predict(openai_api_key, system_prompt, history, inputs, chatbot, prev
138
  history.append(construct_user(inputs))
139
  user_token_count = 0
140
  if len(previous_token_count) == 0:
141
- user_token_count = count_token(inputs) + count_token(system_prompt)
 
142
  else:
143
  user_token_count = count_token(inputs)
144
  print(f"输入token计数: {user_token_count}")
@@ -200,7 +201,7 @@ def predict_all(openai_api_key, system_prompt, history, inputs, chatbot, previou
200
 
201
 
202
  def predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature, stream=False, should_check_token_count = True): # repetition_penalty, top_k
203
- print(colorama.Fore.BLUE + f"输入为:{inputs}" + colorama.Style.RESET_ALL)
204
  if stream:
205
  print("使用流式传输")
206
  iter = stream_predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature)
@@ -211,7 +212,7 @@ def predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count
211
  chatbot, history, status_text, token_count = predict_all(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature)
212
  yield chatbot, history, status_text, token_count
213
  print(f"传输完毕。当前token计数为{token_count}")
214
- print(colorama.Fore.BLUE + f"回答为:{history[-1]['content']}" + colorama.Style.RESET_ALL)
215
  if stream:
216
  max_token = max_token_streaming
217
  else:
 
51
  def count_token(input_str):
52
  print("计算输入Token计数中……")
53
  encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
54
+ length = len(encoding.encode(input_str))
55
  print("计算完成!")
56
  return length
57
 
 
138
  history.append(construct_user(inputs))
139
  user_token_count = 0
140
  if len(previous_token_count) == 0:
141
+ system_prompt_token_count = count_token(system_prompt)
142
+ user_token_count = count_token(inputs) + system_prompt_token_count
143
  else:
144
  user_token_count = count_token(inputs)
145
  print(f"输入token计数: {user_token_count}")
 
201
 
202
 
203
  def predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature, stream=False, should_check_token_count = True): # repetition_penalty, top_k
204
+ print("输入为:" +colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
205
  if stream:
206
  print("使用流式传输")
207
  iter = stream_predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature)
 
212
  chatbot, history, status_text, token_count = predict_all(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature)
213
  yield chatbot, history, status_text, token_count
214
  print(f"传输完毕。当前token计数为{token_count}")
215
+ print("回答为:" +colorama.Fore.BLUE + f"{history[-1]['content']}" + colorama.Style.RESET_ALL)
216
  if stream:
217
  max_token = max_token_streaming
218
  else: