import streamlit as st import requests import base64 import os from moviepy.editor import VideoFileClip from pytube import YouTube from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api._errors import CouldNotRetrieveTranscript import whisper import ffmpeg import re import tempfile from huggingface_hub import InferenceClient st.set_page_config(layout="wide", initial_sidebar_state="collapsed") PROMPT = """Act as the author and provide a comprehensive detailed article in the same language as the transcript in markdown format that has a H1 main title(example "# ") and broken down into H2 subtitles (example "## ") for the following transcript You must follow the rules: - Write the article in markdown format - Create a main title for the article as markdown H1 and break the article into subtitles where each subtitle is markdown H2 - Article must be in the same language as the transcript - summary should be informative and act as a replacement for the original transcript to the point that the user doesn't have to go back to read the transcript - Summary should not mention the author or speaker at all should act as your independent writing without referencing the original transcript or speaker. - You can use bullet points within the article Transcript: {} \n\n Article:""" LLM = { "llama3-8b": {'prompt': f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|> {PROMPT}<|eot_id|><|start_header_id|>assistant<|end_header_id|> """, 'endpoint': "meta-llama/Meta-Llama-3-8B-Instruct"} } @st.cache_resource() def load_whisper(model): return whisper.load_model(model) @st.cache_data def download_video(url): if "youtube" in url or "youtu.be" in url: yt = YouTube(url) video = yt.streams.get_highest_resolution() filename = video.download() else: response = requests.get(url, stream=True) filename = url.split("/")[-1] with open(filename, "wb") as file: for chunk in response.iter_content(chunk_size=1024): if chunk: file.write(chunk) return filename @st.cache_data def convert_to_audio(video_filename): video = VideoFileClip(video_filename) audio_filename = video_filename.replace(".mp4", ".mp3") audio = video.audio audio.write_audiofile(audio_filename, codec="mp3") return audio_filename @st.cache_data def summarise(prompt, llm): model = InferenceClient(LLM[llm]["endpoint"]) user_message = LLM[llm]["prompt"].format(prompt) return model.text_generation(user_message, max_new_tokens=1024) def delete_files(video_filename, audio_filename): delete_file(video_filename) delete_file(audio_filename) def delete_file(filename): if os.path.exists(filename): os.remove(filename) st.info(f"File '{os.path.basename(filename)}' deleted from the server.") @st.cache_data def transcribe_whisper(_model, audio_filepath): return _model.transcribe(audio_filepath)["text"] def get_media_download_link(media_type, file_path): with open(file_path, "rb") as file: contents = file.read() encoded = base64.b64encode(contents).decode("utf-8") media_href = f"data:file/{media_type};base64,{encoded}" st.markdown( f'Download {os.path.basename(file_path)}', unsafe_allow_html=True, ) @st.cache_data def generate_summaries(_summarizer, text, min_length=50, max_length=500): paragraphs = text.split("\n\n") summaries = [] for paragraph in paragraphs: summary = _summarizer( paragraph, max_length=max_length, min_length=min_length, do_sample=False ) summaries.append(summary[0]["summary_text"].strip()) return "\n\n".join(summaries) def main(): st.title("VidScripter") st.write("#### A One Stop Solution to Video Transcription") c1, c2 = st.columns(2) c1.write( """ - Enter the video URL in the text input box. - Click the **Fetch** button to fetch the video. - Once the video is fetched, you can perform the following actions: - Fetch transcript from YouTube API (if available) by clicking the **Fetch Transcript** button. - Transcribe the video using the Whisper model by clicking the **Transcribe (Whisper)** button. - The transcript will be displayed in a text area below. - A summary of the transcript will also be generated by the selected LLM. - The summary will be displayed in a text area below. - You can download the video, audio, transcript or summary by clicking the respective download buttons. """ ) whisper_model = load_whisper("base") url = c2.text_input("Enter the video URL") llm = c2.selectbox("Select LLM", list(LLM.keys()), index=0) fetch_button = c2.button("Fetch") st.session_state.setdefault("load_state", False) if fetch_button or st.session_state.load_state: st.session_state.load_state = True if url: process_video(url, whisper_model, llm) def process_video(url, whisper_model, llm): yt = YouTube(url) video_id = yt.video_id try: video_filename = download_video(url) st.success("Video fetched successfully") except Exception: video_filename = None st.warning("Video could not be fetched") try: audio_filename = ( convert_to_audio(video_filename) if video_filename is not None else None ) if video_filename is not None: st.success("Audio converted successfully") else: st.info("No Video to convert into Audio") except Exception: audio_filename = None st.warning("Audio coud not be converted") text_filename = ( os.path.basename(video_filename).replace(".mp4", ".txt") if video_filename is not None else "transcript.txt" ) emp = st.empty() col1, col2, col3, col4 = st.columns(4) if "youtube" in url or "youtu.be" in url: process_youtube_video(video_id, col3, emp, text_filename, llm) process_whisper_transcript(whisper_model, audio_filename, col4, text_filename) with col1: if video_filename is not None and st.button("Download Video"): with st.spinner("Encoding Video"): get_media_download_link("video", video_filename) with col2: if audio_filename is not None and st.button("Download Audio"): with st.spinner("Encoding Audio"): get_media_download_link("audio", audio_filename) def process_youtube_video(video_id, col, emp, text_filename, llm): try: transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) transcripts = [transcript for transcript in transcript_list] if transcripts: transcript_options = { f"{transcript.language} ({transcript.language_code})": transcript for transcript in transcripts } transcript_option = emp.selectbox( "Select a transcript", list(transcript_options.keys()) ) selected_transcript = transcript_options[transcript_option] st.session_state.setdefault("api_transcript", False) if col.button("Fetch Transcript") or st.session_state.api_transcript: st.session_state.api_transcript = True transcript_text = selected_transcript.fetch() transcript_text = "\n".join( [re.sub("\s+", " ", chunk["text"]) for chunk in transcript_text] ) c1, c2 = st.columns(2) with c1: modified_text = st.text_area( "Transcript", transcript_text, height=500 ) st.download_button("Download Transcript", modified_text, text_filename) with c2: openai_summarization = summarise( modified_text, llm ) summarized_text = st.text_area( "Summarized Transcript", openai_summarization, height=500 ) st.download_button("Download Summary", summarized_text, text_filename) except CouldNotRetrieveTranscript: emp.warning("Could Not Retrieve API Transcripts for this video.") except Exception as e: emp.warning(f"Error Fetching API Transcripts for this video. {e}") def process_whisper_transcript(whisper_model, audio_filename, col, text_filename): if audio_filename is not None: st.session_state.setdefault("whisper_transcript", False) if col.button("Transcribe (Whisper)") or st.session_state.whisper_transcript: st.session_state.whisper_transcript = True whisper_text = transcribe_whisper(whisper_model, audio_filename) modified_text = st.text_area("Transcript", whisper_text, height=500) st.download_button("Download", modified_text, text_filename) if __name__ == "__main__": main()