Witold Wydmański commited on
Commit
bb65ea5
·
1 Parent(s): b36655e

fix: reduce number of tokens per chunk to allow full gpt-3 output

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -13,27 +13,26 @@ MODES = {
13
  "Custom": "",
14
  }
15
 
16
- SUMMARY_PROMPT = "Summarize the following meeting in very great detail. The summary should include all the important points discussed in the meeting."
17
 
18
  def summarize_part(text, api_key):
19
  response = openai.ChatCompletion.create(
20
  model="gpt-3.5-turbo",
21
  messages=[
22
- { "role": "system", "content": f"You are a meeting organizer. You want to summarize a meeting. You are given the following transcript of the meeting. {SUMMARY_PROMPT}" },
23
- { "role": "user", "content": text },
24
  ],
25
  api_key=api_key,
26
  )
27
  return response["choices"][0]["message"]["content"]
28
 
29
  def shorten_text(text, api_key):
30
- # Split into chunks so that each chunk is less than 3000 words (not characters!)
31
- # Overlap by halves.
32
  chunks = []
33
  words = text.split()
34
  for i in range(0, len(words), 1500):
35
  chunk = ""
36
- while len(enc.encode(chunk)) < 4000 and i < len(words):
37
  chunk += words[i] + " "
38
  i += 1
39
  chunks.append(chunk)
@@ -41,7 +40,7 @@ def shorten_text(text, api_key):
41
  with ThreadPool(4) as pool:
42
  shortened = pool.starmap(summarize_part, zip(chunks, [api_key]*len(chunks)))
43
 
44
- return "".join(shortened)
45
 
46
  def modify_text(text, api_key, command, custom_command=None):
47
  if command == "Custom":
 
13
  "Custom": "",
14
  }
15
 
16
+ SUMMARY_PROMPT = "Summarize the following meeting in very great detail, in English. The summary should include all the important points discussed in the meeting."
17
 
18
  def summarize_part(text, api_key):
19
  response = openai.ChatCompletion.create(
20
  model="gpt-3.5-turbo",
21
  messages=[
22
+ { "role": "system", "content": f"You are a meeting organizer. You want to succintly summarize a meeting. {SUMMARY_PROMPT}" },
23
+ { "role": "user", "content": "Summarize the following transcript in English: " + text },
24
  ],
25
  api_key=api_key,
26
  )
27
  return response["choices"][0]["message"]["content"]
28
 
29
  def shorten_text(text, api_key):
30
+ """ Split text into chunks of 3000 tokens and summarize each chunk. """
 
31
  chunks = []
32
  words = text.split()
33
  for i in range(0, len(words), 1500):
34
  chunk = ""
35
+ while len(enc.encode(chunk)) < 3000 and i < len(words):
36
  chunk += words[i] + " "
37
  i += 1
38
  chunks.append(chunk)
 
40
  with ThreadPool(4) as pool:
41
  shortened = pool.starmap(summarize_part, zip(chunks, [api_key]*len(chunks)))
42
 
43
+ return ". ".join(shortened)
44
 
45
  def modify_text(text, api_key, command, custom_command=None):
46
  if command == "Custom":