File size: 4,958 Bytes
30c86e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60a26c4
30c86e2
60a26c4
 
 
 
 
 
 
 
 
 
 
 
 
 
8af421d
60a26c4
 
 
30c86e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import re
import openai
import concurrent.futures
import os
import sys



def convert_to_dict(text):
  """Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary.

  Args:
    text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.".

  Returns:
    A dictionary mapping the seconds to the text.
  """

  result = {}
  for line in text.splitlines():
    match = re.match(r"(\d+\.\d+) seconds - (.*)", line)
    if match:
      seconds = float(match.group(1))
      text = match.group(2)
      result[seconds] = text
  return result

def process_dict(text, batch_size=20):
  """Processes a dictionary by combining the values of 20 elements into a single string.

  Args:
    dict_in: A dictionary mapping seconds to text.
    batch_size: The number of elements to combine into a single string.

  Returns:
    A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch.
  """
  dict_in = convert_to_dict(text)
  result = {}
  current_batch = []
  current_key = None
  for seconds, text in dict_in.items():
    if current_key is None:
      current_key = seconds
    current_batch.append(text)
    if len(current_batch) == batch_size:
      combined_value = " ".join(current_batch)
      result[current_key] = combined_value
      current_batch = []
      current_key = None
  if current_batch:
    combined_value = " ".join(current_batch)
    result[current_key] = combined_value
  return result


def call3(chunk):
  response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    temperature= 0,
    messages=[
        {"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."},
        {"role": "user", "content": str(chunk)}
    ]
  )
  return response['choices'][0]['message']['content']


def run_gpt_3(dict_in, function=call3):
  """Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish.

  Args:
    dict_in: A dictionary mapping keys to values.
    function: A function that takes a value as input and returns a value as output.

  Returns:
    A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value.
  """

  with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = [executor.submit(function, value) for value in dict_in.values()]
    results = [future.result() for future in futures]

  return dict(zip(dict_in.keys(), results))


def call4(chunk):
  response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    temperature= .3,
    messages=[
        {"role": "system", "content": """You are a podcast summarizer. You will be given the chunked gist of a long podcast, each chunk will have it's timestamp in seconds.
          Output in a python dictionary format whose structure is this:
          {
            hook: "the hook"
            summary: "summary"
            chapters: {
              timestamp : "chapter"
              timestamp : "chapter"
            }
        }when
        hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.)
          Ex.One serendipitous relationship led him to start a company & change his life forever.
        summary: Include main talking points and key phrases that will appeal to your
          ideal listener. keep it concise.
        chapters: analyze the whole input, and extract only the most important topics. Remove as much filler and unnecessary info and details as possible.


  

  """},
        {"role": "user", "content": str(chunk)}
    ]
  )
  return response['choices'][0]['message']['content']



def clean_and_concatenate_dict_values(dict_in):
  """Cleans and concatenates the values of a dictionary. before sending to 4

  Args:
    dict_in: A dictionary mapping keys to values.

  Returns:
    A long string containing the concatenated values of the dictionary, with each value preceded by its key.
  """

  result = ""
  for key, value in dict_in.items():
    # Clean the value
    value = value.strip()
    value = value.replace("- ", "")

    # Concatenate the value to the result
    result += f"{key}: {value}\n"

  return result






    
  
  










  
# text = """
# 6.08 seconds - Yeah, the Jack Carr one was pretty fun.
# 11.32 seconds - He's super nice.
# 16.56 seconds - I'm really enjoying this book.
# 21.80 seconds - I can't wait to see what happens next.
# 27.04 seconds - This is a great read.
# 32.28 seconds - I highly recommend it to anyone who enjoys thrillers.
# """

# result = convert_to_dict(text)
# new_result = process_dict(result)

# # print(list(new_result.values())[7])

# new_result