content_generator / src /summarizer.py
ayang903's picture
Update src/summarizer.py
de255a3
import os
import json
import openai
import pandas as pd
def summarize(filename, gpt_key, model_name):
openai.api_key = gpt_key
# Opening created all data json file
f = open(filename)
allDataFile = json.load(f)
finaldf = pd.DataFrame()
for source, articles in allDataFile.items():
for article in articles:
title = article['title']
text = article['text']
combined_text = 'title: ' + title + '\n' + text
try:
# GPT-3.5 API for summarization
response = openai.ChatCompletion.create(
model=model_name,
messages=[{
"role": "system",
"content": "You are a helpful assistant."
}, {
"role":
"user",
"content":
f"Please summarize this news article text or youtube video transcript in four sentences or less. If no article/transcript is present, or it is unclear what the transcript is talking about, output 'Unable to summarize.'. {combined_text} "
}])
summarizedData = response['choices'][0]['message']['content']
print(f"SUMMARY: {summarizedData} \n\n")
# GPT-3.5 API for talking points from summarization generated
follow_up = openai.ChatCompletion.create(
model=model_name,
messages=[{
"role": "system",
"content": "You are a helpful assistant."
}, {
"role":
"user",
"content":
f"Using this article, give me five sequential talking points that I can use to make a shortform video. Do not use more than 100 words. If the summarized article says 'Unable to summarize,' output 'No talking points available'. {summarizedData}"
}])
talking_pointsData = follow_up['choices'][0]['message']['content']
print(f"TALKING POINTS: {talking_pointsData} \n\n")
articleinfo = pd.DataFrame.from_records([{
"title":
article["title"],
"source":
source,
"url":
article["url"],
"summarized_text":
summarizedData,
"talking_points":
talking_pointsData
}])
finaldf = pd.concat([finaldf, articleinfo], ignore_index=True)
except openai.error.InvalidRequestError as e:
print(f"An error occurred: {e}")
continue
csvname = 'data.csv'
finaldf.to_csv(csvname, index=False)
return csvname