Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import openai
|
3 |
+
import concurrent.futures
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
def convert_to_dict(text):
|
10 |
+
"""Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.".
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
A dictionary mapping the seconds to the text.
|
17 |
+
"""
|
18 |
+
|
19 |
+
result = {}
|
20 |
+
for line in text.splitlines():
|
21 |
+
match = re.match(r"(\d+\.\d+) seconds - (.*)", line)
|
22 |
+
if match:
|
23 |
+
seconds = float(match.group(1))
|
24 |
+
text = match.group(2)
|
25 |
+
result[seconds] = text
|
26 |
+
return result
|
27 |
+
|
28 |
+
def process_dict(text, batch_size=20):
|
29 |
+
"""Processes a dictionary by combining the values of 20 elements into a single string.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
dict_in: A dictionary mapping seconds to text.
|
33 |
+
batch_size: The number of elements to combine into a single string.
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch.
|
37 |
+
"""
|
38 |
+
dict_in = convert_to_dict(text)
|
39 |
+
result = {}
|
40 |
+
current_batch = []
|
41 |
+
current_key = None
|
42 |
+
for seconds, text in dict_in.items():
|
43 |
+
if current_key is None:
|
44 |
+
current_key = seconds
|
45 |
+
current_batch.append(text)
|
46 |
+
if len(current_batch) == batch_size:
|
47 |
+
combined_value = " ".join(current_batch)
|
48 |
+
result[current_key] = combined_value
|
49 |
+
current_batch = []
|
50 |
+
current_key = None
|
51 |
+
if current_batch:
|
52 |
+
combined_value = " ".join(current_batch)
|
53 |
+
result[current_key] = combined_value
|
54 |
+
return result
|
55 |
+
|
56 |
+
|
57 |
+
def call3(chunk):
|
58 |
+
response = openai.ChatCompletion.create(
|
59 |
+
model="gpt-3.5-turbo",
|
60 |
+
temperature= 0,
|
61 |
+
messages=[
|
62 |
+
{"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."},
|
63 |
+
{"role": "user", "content": str(chunk)}
|
64 |
+
]
|
65 |
+
)
|
66 |
+
return response['choices'][0]['message']['content']
|
67 |
+
|
68 |
+
|
69 |
+
def run_gpt_3(dict_in, function=call3):
|
70 |
+
"""Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish.
|
71 |
+
|
72 |
+
Args:
|
73 |
+
dict_in: A dictionary mapping keys to values.
|
74 |
+
function: A function that takes a value as input and returns a value as output.
|
75 |
+
|
76 |
+
Returns:
|
77 |
+
A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value.
|
78 |
+
"""
|
79 |
+
|
80 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
81 |
+
futures = [executor.submit(function, value) for value in dict_in.values()]
|
82 |
+
results = [future.result() for future in futures]
|
83 |
+
|
84 |
+
return dict(zip(dict_in.keys(), results))
|
85 |
+
|
86 |
+
|
87 |
+
def call4(chunk):
|
88 |
+
response = openai.ChatCompletion.create(
|
89 |
+
model="gpt-3.5-turbo",
|
90 |
+
temperature= 0,
|
91 |
+
messages=[
|
92 |
+
{"role": "system", "content": """You are a podcast summarizer. You will be given the gist of a long podcast, and you will output this format.
|
93 |
+
Hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.)
|
94 |
+
Ex.One serendipitous relationship led him to start a company & change his life forever.
|
95 |
+
Give a Short Summary: Include main talking points and key phrases that will appeal to your
|
96 |
+
ideal listener.
|
97 |
+
Topics discussed in this episode: For this part, You will act as Youtube Video sectioning algorithm, and output similarly, using the given info.
|
98 |
+
|
99 |
+
and, Lastly, remember to output in an easily parsable format.
|
100 |
+
|
101 |
+
"""},
|
102 |
+
{"role": "user", "content": str(chunk)}
|
103 |
+
]
|
104 |
+
)
|
105 |
+
return response['choices'][0]['message']['content']
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
def clean_and_concatenate_dict_values(dict_in):
|
110 |
+
"""Cleans and concatenates the values of a dictionary. before sending to 4
|
111 |
+
|
112 |
+
Args:
|
113 |
+
dict_in: A dictionary mapping keys to values.
|
114 |
+
|
115 |
+
Returns:
|
116 |
+
A long string containing the concatenated values of the dictionary, with each value preceded by its key.
|
117 |
+
"""
|
118 |
+
|
119 |
+
result = ""
|
120 |
+
for key, value in dict_in.items():
|
121 |
+
# Clean the value
|
122 |
+
value = value.strip()
|
123 |
+
value = value.replace("- ", "")
|
124 |
+
|
125 |
+
# Concatenate the value to the result
|
126 |
+
result += f"{key}: {value}\n"
|
127 |
+
|
128 |
+
return result
|
129 |
+
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
|
139 |
+
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
|
148 |
+
|
149 |
+
# text = """
|
150 |
+
# 6.08 seconds - Yeah, the Jack Carr one was pretty fun.
|
151 |
+
# 11.32 seconds - He's super nice.
|
152 |
+
# 16.56 seconds - I'm really enjoying this book.
|
153 |
+
# 21.80 seconds - I can't wait to see what happens next.
|
154 |
+
# 27.04 seconds - This is a great read.
|
155 |
+
# 32.28 seconds - I highly recommend it to anyone who enjoys thrillers.
|
156 |
+
# """
|
157 |
+
|
158 |
+
# result = convert_to_dict(text)
|
159 |
+
# new_result = process_dict(result)
|
160 |
+
|
161 |
+
# # print(list(new_result.values())[7])
|
162 |
+
|
163 |
+
# new_result
|