Spaces:

arkaprav0
/

gpt-transcript-plugin

Sleeping

App Files Files Community

arkaprav0 commited on Sep 17, 2023

Commit

30c86e2

1 Parent(s): a322f91

Update utils.py

Browse files

Files changed (1) hide show

utils.py +163 -0

utils.py CHANGED Viewed

	@@ -0,0 +1,163 @@

+import re
+import openai
+import concurrent.futures
+import os
+import sys
+def convert_to_dict(text):
+  """Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary.
+  Args:
+    text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.".
+  Returns:
+    A dictionary mapping the seconds to the text.
+  """
+  result = {}
+  for line in text.splitlines():
+    match = re.match(r"(\d+\.\d+) seconds - (.*)", line)
+    if match:
+      seconds = float(match.group(1))
+      text = match.group(2)
+      result[seconds] = text
+  return result
+def process_dict(text, batch_size=20):
+  """Processes a dictionary by combining the values of 20 elements into a single string.
+  Args:
+    dict_in: A dictionary mapping seconds to text.
+    batch_size: The number of elements to combine into a single string.
+  Returns:
+    A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch.
+  """
+  dict_in = convert_to_dict(text)
+  result = {}
+  current_batch = []
+  current_key = None
+  for seconds, text in dict_in.items():
+    if current_key is None:
+      current_key = seconds
+    current_batch.append(text)
+    if len(current_batch) == batch_size:
+      combined_value = " ".join(current_batch)
+      result[current_key] = combined_value
+      current_batch = []
+      current_key = None
+  if current_batch:
+    combined_value = " ".join(current_batch)
+    result[current_key] = combined_value
+  return result
+def call3(chunk):
+  response = openai.ChatCompletion.create(
+    model="gpt-3.5-turbo",
+    temperature= 0,
+    messages=[
+        {"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."},
+        {"role": "user", "content": str(chunk)}
+    ]
+  )
+  return response['choices'][0]['message']['content']
+def run_gpt_3(dict_in, function=call3):
+  """Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish.
+  Args:
+    dict_in: A dictionary mapping keys to values.
+    function: A function that takes a value as input and returns a value as output.
+  Returns:
+    A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value.
+  """
+  with concurrent.futures.ThreadPoolExecutor() as executor:
+    futures = [executor.submit(function, value) for value in dict_in.values()]
+    results = [future.result() for future in futures]
+  return dict(zip(dict_in.keys(), results))
+def call4(chunk):
+  response = openai.ChatCompletion.create(
+    model="gpt-3.5-turbo",
+    temperature= 0,
+    messages=[
+        {"role": "system", "content": """You are a podcast summarizer. You will be given the gist of a long podcast, and you will output this format.
+  Hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.)
+    Ex.One serendipitous relationship led him to start a company & change his life forever.
+  Give a Short Summary: Include main talking points and key phrases that will appeal to your
+    ideal listener.
+  Topics discussed in this episode: For this part, You will act as Youtube Video sectioning algorithm, and output similarly, using the given info.
+  and, Lastly, remember to output in an easily parsable format.
+  """},
+        {"role": "user", "content": str(chunk)}
+    ]
+  )
+  return response['choices'][0]['message']['content']
+def clean_and_concatenate_dict_values(dict_in):
+  """Cleans and concatenates the values of a dictionary. before sending to 4
+  Args:
+    dict_in: A dictionary mapping keys to values.
+  Returns:
+    A long string containing the concatenated values of the dictionary, with each value preceded by its key.
+  """
+  result = ""
+  for key, value in dict_in.items():
+    # Clean the value
+    value = value.strip()
+    value = value.replace("- ", "")
+    # Concatenate the value to the result
+    result += f"{key}: {value}\n"
+  return result
+# text = """
+# 6.08 seconds - Yeah, the Jack Carr one was pretty fun.
+# 11.32 seconds - He's super nice.
+# 16.56 seconds - I'm really enjoying this book.
+# 21.80 seconds - I can't wait to see what happens next.
+# 27.04 seconds - This is a great read.
+# 32.28 seconds - I highly recommend it to anyone who enjoys thrillers.
+# """
+# result = convert_to_dict(text)
+# new_result = process_dict(result)
+# # print(list(new_result.values())[7])
+# new_result