arkaprav0 commited on
Commit
30c86e2
·
1 Parent(s): a322f91

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +163 -0
utils.py CHANGED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import openai
3
+ import concurrent.futures
4
+ import os
5
+ import sys
6
+
7
+
8
+
9
+ def convert_to_dict(text):
10
+ """Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary.
11
+
12
+ Args:
13
+ text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.".
14
+
15
+ Returns:
16
+ A dictionary mapping the seconds to the text.
17
+ """
18
+
19
+ result = {}
20
+ for line in text.splitlines():
21
+ match = re.match(r"(\d+\.\d+) seconds - (.*)", line)
22
+ if match:
23
+ seconds = float(match.group(1))
24
+ text = match.group(2)
25
+ result[seconds] = text
26
+ return result
27
+
28
+ def process_dict(text, batch_size=20):
29
+ """Processes a dictionary by combining the values of 20 elements into a single string.
30
+
31
+ Args:
32
+ dict_in: A dictionary mapping seconds to text.
33
+ batch_size: The number of elements to combine into a single string.
34
+
35
+ Returns:
36
+ A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch.
37
+ """
38
+ dict_in = convert_to_dict(text)
39
+ result = {}
40
+ current_batch = []
41
+ current_key = None
42
+ for seconds, text in dict_in.items():
43
+ if current_key is None:
44
+ current_key = seconds
45
+ current_batch.append(text)
46
+ if len(current_batch) == batch_size:
47
+ combined_value = " ".join(current_batch)
48
+ result[current_key] = combined_value
49
+ current_batch = []
50
+ current_key = None
51
+ if current_batch:
52
+ combined_value = " ".join(current_batch)
53
+ result[current_key] = combined_value
54
+ return result
55
+
56
+
57
+ def call3(chunk):
58
+ response = openai.ChatCompletion.create(
59
+ model="gpt-3.5-turbo",
60
+ temperature= 0,
61
+ messages=[
62
+ {"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."},
63
+ {"role": "user", "content": str(chunk)}
64
+ ]
65
+ )
66
+ return response['choices'][0]['message']['content']
67
+
68
+
69
+ def run_gpt_3(dict_in, function=call3):
70
+ """Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish.
71
+
72
+ Args:
73
+ dict_in: A dictionary mapping keys to values.
74
+ function: A function that takes a value as input and returns a value as output.
75
+
76
+ Returns:
77
+ A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value.
78
+ """
79
+
80
+ with concurrent.futures.ThreadPoolExecutor() as executor:
81
+ futures = [executor.submit(function, value) for value in dict_in.values()]
82
+ results = [future.result() for future in futures]
83
+
84
+ return dict(zip(dict_in.keys(), results))
85
+
86
+
87
+ def call4(chunk):
88
+ response = openai.ChatCompletion.create(
89
+ model="gpt-3.5-turbo",
90
+ temperature= 0,
91
+ messages=[
92
+ {"role": "system", "content": """You are a podcast summarizer. You will be given the gist of a long podcast, and you will output this format.
93
+ Hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.)
94
+ Ex.One serendipitous relationship led him to start a company & change his life forever.
95
+ Give a Short Summary: Include main talking points and key phrases that will appeal to your
96
+ ideal listener.
97
+ Topics discussed in this episode: For this part, You will act as Youtube Video sectioning algorithm, and output similarly, using the given info.
98
+
99
+ and, Lastly, remember to output in an easily parsable format.
100
+
101
+ """},
102
+ {"role": "user", "content": str(chunk)}
103
+ ]
104
+ )
105
+ return response['choices'][0]['message']['content']
106
+
107
+
108
+
109
+ def clean_and_concatenate_dict_values(dict_in):
110
+ """Cleans and concatenates the values of a dictionary. before sending to 4
111
+
112
+ Args:
113
+ dict_in: A dictionary mapping keys to values.
114
+
115
+ Returns:
116
+ A long string containing the concatenated values of the dictionary, with each value preceded by its key.
117
+ """
118
+
119
+ result = ""
120
+ for key, value in dict_in.items():
121
+ # Clean the value
122
+ value = value.strip()
123
+ value = value.replace("- ", "")
124
+
125
+ # Concatenate the value to the result
126
+ result += f"{key}: {value}\n"
127
+
128
+ return result
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+ # text = """
150
+ # 6.08 seconds - Yeah, the Jack Carr one was pretty fun.
151
+ # 11.32 seconds - He's super nice.
152
+ # 16.56 seconds - I'm really enjoying this book.
153
+ # 21.80 seconds - I can't wait to see what happens next.
154
+ # 27.04 seconds - This is a great read.
155
+ # 32.28 seconds - I highly recommend it to anyone who enjoys thrillers.
156
+ # """
157
+
158
+ # result = convert_to_dict(text)
159
+ # new_result = process_dict(result)
160
+
161
+ # # print(list(new_result.values())[7])
162
+
163
+ # new_result