Spaces:

Imageye
/

Youtube_Quiz_Maker

Sleeping

App Files Files Community

Imageye commited on Jun 27, 2024

Commit

9f2532f

verified ·

1 Parent(s): 998048f

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -145

app.py CHANGED Viewed

@@ -1,132 +1,97 @@
 import streamlit as st
 from youtube_transcript_api import YouTubeTranscriptApi
 import re
 import tempfile
 import os
 import warnings
-from groq import Groq
-# Set up Groq client
-client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-# Supported file types for Groq API
-SUPPORTED_FILE_TYPES = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
-# Function to transcribe audio using Groq Whisper API
 def transcribe_audio(file_path):
-    file_extension = os.path.splitext(file_path)[1][1:]
-    if file_extension not in SUPPORTED_FILE_TYPES:
-        return f"Error: Unsupported file type '{file_extension}'. Please upload a valid file."
-    try:
-        with open(file_path, "rb") as file:
-            transcription = client.audio.transcriptions.create(
-                file=(file_path, file.read()),
-                model="whisper-large-v3",
-            )
-        return transcription.text
-    except Exception as e:
-        return f"Error during transcription: {e}"
-# Function to get transcript from YouTube
 def get_transcript(url):
     try:
         video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
-        if not video_id_match:
             return "Error: Invalid YouTube URL"
-        video_id = video_id_match.group(1)
         transcript = YouTubeTranscriptApi.get_transcript(video_id)
         transcript_text = ' '.join([entry['text'] for entry in transcript])
         return transcript_text
     except Exception as e:
         return str(e)
-# Function to summarize text using Groq API
 def summarize_text(text):
-    try:
-        response = client.chat.completions.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": f"Summarize the following text:\n\n{text}"
-                }
-            ],
-            model="llama3-8b-8192",
-        )
-        summary = response.choices[0].message.content.strip()
-        return summary
-    except Exception as e:
-        return f"Error summarizing text: {e}"
-# Function to generate quiz questions using Groq API
 def generate_quiz_questions(text):
-    try:
-        response = client.chat.completions.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": f"Generate quiz questions for the following text:\n\n{text}"
-                }
-            ],
-            model="llama3-8b-8192",
-        )
-        quiz_questions = response.choices[0].message.content.strip()
-        return quiz_questions
-    except Exception as e:
-        return f"Error generating quiz questions: {e}"
-# Function to parse quiz questions from generated text
 def parse_quiz_questions(quiz_text):
     questions = []
     question_blocks = quiz_text.split("\n\n")
-    current_question = None
-    current_choices = []
-    correct_answer = None
     for block in question_blocks:
         lines = block.strip().split("\n")
-        if lines:
-            if re.match(r'^\d+\.', lines[0]):  # This line is a question number
-                if current_question and current_choices and correct_answer:
-                    questions.append({
-                        "question": current_question,
-                        "choices": current_choices,
-                        "correct_answer": correct_answer
-                    })
-                current_question = lines[0]
-                current_choices = lines[1:5]
-                correct_answer = lines[-1].split(": ")[-1].strip() if len(lines) > 5 else None
-            else:  # This line is an answer
-                correct_answer = lines[-1].split(": ")[-1].strip()
-    # Add the last question if it exists
-    if current_question and current_choices and correct_answer:
-        questions.append({
-            "question": current_question,
-            "choices": current_choices,
-            "correct_answer": correct_answer
-        })
     return questions
-# Function to generate explanation for quiz answers using Groq API
 def generate_explanation(question, correct_answer, user_answer):
-    try:
-        response = client.chat.completions.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": f"Explain why the correct answer to the following question is '{correct_answer}' and not '{user_answer}':\n\n{question}"
-                }
-            ],
-            model="llama3-8b-8192",
-        )
-        explanation = response.choices[0].message.content.strip()
-        return explanation
-    except Exception as e:
-        return f"Error generating explanation: {e}"
-# Function to check answers and provide feedback
 def check_answers(questions, user_answers):
     feedback = []
     correct_count = 0
@@ -152,15 +117,14 @@ def check_answers(questions, user_answers):
             })
     return feedback
-# Function to handle uploaded files
 def handle_uploaded_file(uploaded_file):
-    file_path = tempfile.mktemp(suffix=os.path.splitext(uploaded_file.name)[1])
-    with open(file_path, "wb") as f:
-        f.write(uploaded_file.read())
-    return file_path
-# Streamlit app layout and functionality
 st.title("YouTube Transcript Quiz Generator")
 st.markdown("**Instructions:** Paste a YouTube link or upload a media file to generate a quiz.")
 option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
@@ -178,52 +142,52 @@ if option == "YouTube URL":
                 quiz_text = generate_quiz_questions(transcript_text)
                 questions = parse_quiz_questions(quiz_text)
-                if not questions:
-                    st.error("No valid quiz questions could be generated.")
-                else:
-                    st.session_state.summary = summary
-                    st.session_state.questions = questions
-                    st.session_state.user_answers = {}
-                    st.session_state.generated_quiz = True
-            else:
-                st.error(transcript_text)
-if option == "Upload audio/video file":
-    uploaded_file = st.file_uploader("Choose an audio or video file", type=SUPPORTED_FILE_TYPES)
     if uploaded_file:
-        if st.button("Generate Quiz"):
-            tmp_file_path = handle_uploaded_file(uploaded_file)
-            transcript_text = transcribe_audio(tmp_file_path)
-            os.remove(tmp_file_path)
-            if "Error" not in transcript_text:
-                summary = summarize_text(transcript_text)
-                quiz_text = generate_quiz_questions(transcript_text)
-                questions = parse_quiz_questions(quiz_text)
-                if not questions:
-                    st.error("No valid quiz questions could be generated.")
-                else:
-                    st.session_state.summary = summary
-                    st.session_state.questions = questions
-                    st.session_state.user_answers = {}
-                    st.session_state.generated_quiz = True
-            else:
-                st.error(transcript_text)
 if st.session_state.generated_quiz:
-    st.write("## Summary")
-    st.write(st.session_state.summary)
-    st.write("## Quiz Questions")
-    for i, question in enumerate(st.session_state.questions):
-        st.write(f"### Question {i+1}")
-        st.write(question['question'])
-        st.session_state.user_answers[f"question_{i+1}"] = st.radio(
-            label="",
-            options=question['choices'],
-            key=f"question_{i+1}"
-        )
     if st.button("Submit Answers"):
         if "questions" in st.session_state and st.session_state.questions:
             with st.spinner('Processing your answers...'):

+import openai
 import streamlit as st
 from youtube_transcript_api import YouTubeTranscriptApi
 import re
 import tempfile
 import os
+from pydub import AudioSegment
+import logging
 import warnings
+def convert_to_supported_format(file_path):
+    audio = AudioSegment.from_file(file_path)
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp:
+        audio.export(temp.name, format="wav")
+        return temp.name
 def transcribe_audio(file_path):
+    logging.info(f"Transcribing audio file: {file_path}")
+    file_path = convert_to_supported_format(file_path)
+    logging.info(f"Converted file path: {file_path}")
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        with open(file_path, "rb") as audio_file:
+            transcript = openai.Audio.transcribe("whisper-1", audio_file)
+    os.remove(file_path)  # Clean up temporary file
+    return transcript["text"]
 def get_transcript(url):
     try:
         video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
+        if video_id_match:
+            video_id = video_id_match.group(1)
+        else:
             return "Error: Invalid YouTube URL"
         transcript = YouTubeTranscriptApi.get_transcript(video_id)
         transcript_text = ' '.join([entry['text'] for entry in transcript])
         return transcript_text
     except Exception as e:
         return str(e)
 def summarize_text(text):
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
+        ],
+        max_tokens=150
+    )
+    summary = response['choices'][0]['message']['content'].strip()
+    return summary
 def generate_quiz_questions(text):
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant that generates quiz questions. Your task is to generate ten quiz questions and four multiple choice answers for each question from the given text. Make sure to mark the correct answer with an asterisk (*) at the beginning of the answer line. Use the following format for each question:\n\n1. Question\n   a) Answer 1\n   b) Answer 2\n   c) Answer 3\n   d) Answer 4\n\n2. Question\n   a) Answer 1\n   b) Answer 2\n   c) Answer 3\n   d) Answer 4\n\n..."},
+            {"role": "user", "content": f"Generate quiz questions from the following text:\n\n{text}"}
+        ],
+        max_tokens=300
+    )
+    quiz_questions = response['choices'][0]['message']['content'].strip()
+    return quiz_questions
 def parse_quiz_questions(quiz_text):
     questions = []
     question_blocks = quiz_text.split("\n\n")
     for block in question_blocks:
         lines = block.strip().split("\n")
+        if len(lines) >= 5:
+            question = lines[0].split(". ")[1]
+            choices = [line.split(") ")[1].strip() for line in lines[1:5]]
+            correct_answer_lines = [line for line in lines[1:5] if "*" in line]
+            if correct_answer_lines:
+                correct_answer = correct_answer_lines[0].split(") ")[1].replace("*", "").strip()
+            else:
+                correct_answer = "No correct answer provided"
+            questions.append({"question": question, "choices": choices, "correct_answer": correct_answer})
     return questions
 def generate_explanation(question, correct_answer, user_answer):
+    prompt = f"Explain why the correct answer to the following question is '{correct_answer}' and not '{user_answer}':\n\n{question}"
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=150
+    )
+    explanation = response['choices'][0]['message']['content'].strip()
+    return explanation
 def check_answers(questions, user_answers):
     feedback = []
     correct_count = 0
             })
     return feedback
 def handle_uploaded_file(uploaded_file):
+    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+        tmp_file.write(uploaded_file.read())
+        tmp_file_path = tmp_file.name
+    return tmp_file_path
 st.title("YouTube Transcript Quiz Generator")
 st.markdown("**Instructions:** Paste a YouTube link or upload a media file to generate a quiz.")
 option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
                 quiz_text = generate_quiz_questions(transcript_text)
                 questions = parse_quiz_questions(quiz_text)
+                st.write("## Summary")
+                st.write(summary)
+                st.write("## Quiz Questions")
+                st.session_state.questions = questions
+                st.session_state.user_answers = {}
+                st.session_state.generated_quiz = True
+                for i, question in enumerate(questions):
+                    st.write(f"### Question {i+1}")
+                    st.write(question['question'])
+                    st.session_state.user_answers[f"question_{i+1}"] = st.radio(
+                        label="",
+                        options=question['choices'],
+                        key=f"question_{i+1}"
+                    )
+elif option == "Upload audio/video file":
+    uploaded_file = st.file_uploader("Choose an audio or video file", type=["mp3", "wav", "mp4", "mov"])
     if uploaded_file:
+        tmp_file_path = handle_uploaded_file(uploaded_file)
+        transcript_text = transcribe_audio(tmp_file_path)
+        os.remove(tmp_file_path)
+        if "Error" not in transcript_text:
+            summary = summarize_text(transcript_text)
+            quiz_text = generate_quiz_questions(transcript_text)
+            questions = parse_quiz_questions(quiz_text)
+            st.write("## Summary")
+            st.write(summary)
+            st.write("## Quiz Questions")
+            st.session_state.questions = questions
+            st.session_state.user_answers = {}
+            st.session_state.generated_quiz = True
+            for i, question in enumerate(questions):
+                st.write(f"### Question {i+1}")
+                st.write(question['question'])
+                st.session_state.user_answers[f"question_{i+1}"] = st.radio(
+                    label="",
+                    options=question['choices'],
+                    key=f"question_{i+1}"
+                )
 if st.session_state.generated_quiz:
     if st.button("Submit Answers"):
         if "questions" in st.session_state and st.session_state.questions:
             with st.spinner('Processing your answers...'):