Spaces:
Sleeping
Sleeping
File size: 4,958 Bytes
30c86e2 60a26c4 30c86e2 60a26c4 8af421d 60a26c4 30c86e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import re
import openai
import concurrent.futures
import os
import sys
def convert_to_dict(text):
"""Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary.
Args:
text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.".
Returns:
A dictionary mapping the seconds to the text.
"""
result = {}
for line in text.splitlines():
match = re.match(r"(\d+\.\d+) seconds - (.*)", line)
if match:
seconds = float(match.group(1))
text = match.group(2)
result[seconds] = text
return result
def process_dict(text, batch_size=20):
"""Processes a dictionary by combining the values of 20 elements into a single string.
Args:
dict_in: A dictionary mapping seconds to text.
batch_size: The number of elements to combine into a single string.
Returns:
A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch.
"""
dict_in = convert_to_dict(text)
result = {}
current_batch = []
current_key = None
for seconds, text in dict_in.items():
if current_key is None:
current_key = seconds
current_batch.append(text)
if len(current_batch) == batch_size:
combined_value = " ".join(current_batch)
result[current_key] = combined_value
current_batch = []
current_key = None
if current_batch:
combined_value = " ".join(current_batch)
result[current_key] = combined_value
return result
def call3(chunk):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
temperature= 0,
messages=[
{"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."},
{"role": "user", "content": str(chunk)}
]
)
return response['choices'][0]['message']['content']
def run_gpt_3(dict_in, function=call3):
"""Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish.
Args:
dict_in: A dictionary mapping keys to values.
function: A function that takes a value as input and returns a value as output.
Returns:
A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value.
"""
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [executor.submit(function, value) for value in dict_in.values()]
results = [future.result() for future in futures]
return dict(zip(dict_in.keys(), results))
def call4(chunk):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
temperature= .3,
messages=[
{"role": "system", "content": """You are a podcast summarizer. You will be given the chunked gist of a long podcast, each chunk will have it's timestamp in seconds.
Output in a python dictionary format whose structure is this:
{
hook: "the hook"
summary: "summary"
chapters: {
timestamp : "chapter"
timestamp : "chapter"
}
}when
hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.)
Ex.One serendipitous relationship led him to start a company & change his life forever.
summary: Include main talking points and key phrases that will appeal to your
ideal listener. keep it concise.
chapters: analyze the whole input, and extract only the most important topics. Remove as much filler and unnecessary info and details as possible.
"""},
{"role": "user", "content": str(chunk)}
]
)
return response['choices'][0]['message']['content']
def clean_and_concatenate_dict_values(dict_in):
"""Cleans and concatenates the values of a dictionary. before sending to 4
Args:
dict_in: A dictionary mapping keys to values.
Returns:
A long string containing the concatenated values of the dictionary, with each value preceded by its key.
"""
result = ""
for key, value in dict_in.items():
# Clean the value
value = value.strip()
value = value.replace("- ", "")
# Concatenate the value to the result
result += f"{key}: {value}\n"
return result
# text = """
# 6.08 seconds - Yeah, the Jack Carr one was pretty fun.
# 11.32 seconds - He's super nice.
# 16.56 seconds - I'm really enjoying this book.
# 21.80 seconds - I can't wait to see what happens next.
# 27.04 seconds - This is a great read.
# 32.28 seconds - I highly recommend it to anyone who enjoys thrillers.
# """
# result = convert_to_dict(text)
# new_result = process_dict(result)
# # print(list(new_result.values())[7])
# new_result |