Spaces:

arkaprav0
/

gpt-transcript-plugin

Sleeping

File size: 4,958 Bytes

import re
import openai
import concurrent.futures
import os
import sys



def convert_to_dict(text):
  """Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary.

  Args:
    text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.".

  Returns:
    A dictionary mapping the seconds to the text.
  """

  result = {}
  for line in text.splitlines():
    match = re.match(r"(\d+\.\d+) seconds - (.*)", line)
    if match:
      seconds = float(match.group(1))
      text = match.group(2)
      result[seconds] = text
  return result

def process_dict(text, batch_size=20):
  """Processes a dictionary by combining the values of 20 elements into a single string.

  Args:
    dict_in: A dictionary mapping seconds to text.
    batch_size: The number of elements to combine into a single string.

  Returns:
    A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch.
  """
  dict_in = convert_to_dict(text)
  result = {}
  current_batch = []
  current_key = None
  for seconds, text in dict_in.items():
    if current_key is None:
      current_key = seconds
    current_batch.append(text)
    if len(current_batch) == batch_size:
      combined_value = " ".join(current_batch)
      result[current_key] = combined_value
      current_batch = []
      current_key = None
  if current_batch:
    combined_value = " ".join(current_batch)
    result[current_key] = combined_value
  return result


def call3(chunk):
  response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    temperature= 0,
    messages=[
        {"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."},
        {"role": "user", "content": str(chunk)}
    ]
  )
  return response['choices'][0]['message']['content']


def run_gpt_3(dict_in, function=call3):
  """Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish.

  Args:
    dict_in: A dictionary mapping keys to values.
    function: A function that takes a value as input and returns a value as output.

  Returns:
    A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value.
  """

  with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = [executor.submit(function, value) for value in dict_in.values()]
    results = [future.result() for future in futures]

  return dict(zip(dict_in.keys(), results))


def call4(chunk):
  response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    temperature= .3,
    messages=[
        {"role": "system", "content": """You are a podcast summarizer. You will be given the chunked gist of a long podcast, each chunk will have it's timestamp in seconds.
          Output in a python dictionary format whose structure is this:
          {
            hook: "the hook"
            summary: "summary"
            chapters: {
              timestamp : "chapter"
              timestamp : "chapter"
            }
        }when
        hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.)
          Ex.One serendipitous relationship led him to start a company & change his life forever.
        summary: Include main talking points and key phrases that will appeal to your
          ideal listener. keep it concise.
        chapters: analyze the whole input, and extract only the most important topics. Remove as much filler and unnecessary info and details as possible.


  

  """},
        {"role": "user", "content": str(chunk)}
    ]
  )
  return response['choices'][0]['message']['content']



def clean_and_concatenate_dict_values(dict_in):
  """Cleans and concatenates the values of a dictionary. before sending to 4

  Args:
    dict_in: A dictionary mapping keys to values.

  Returns:
    A long string containing the concatenated values of the dictionary, with each value preceded by its key.
  """

  result = ""
  for key, value in dict_in.items():
    # Clean the value
    value = value.strip()
    value = value.replace("- ", "")

    # Concatenate the value to the result
    result += f"{key}: {value}\n"

  return result






    
  
  










  
# text = """
# 6.08 seconds - Yeah, the Jack Carr one was pretty fun.
# 11.32 seconds - He's super nice.
# 16.56 seconds - I'm really enjoying this book.
# 21.80 seconds - I can't wait to see what happens next.
# 27.04 seconds - This is a great read.
# 32.28 seconds - I highly recommend it to anyone who enjoys thrillers.
# """

# result = convert_to_dict(text)
# new_result = process_dict(result)

# # print(list(new_result.values())[7])

# new_result