seawolf2357 commited on
Commit
f2ef6a6
β€’
1 Parent(s): 4eedd66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -6,6 +6,7 @@ import asyncio
6
  from huggingface_hub import InferenceClient
7
  from googleapiclient.discovery import build
8
  from youtube_transcript_api import YouTubeTranscriptApi
 
9
  from dotenv import load_dotenv
10
 
11
  # ν™˜κ²½ λ³€μˆ˜ λ‘œλ“œ
@@ -55,7 +56,7 @@ class MyClient(discord.Client):
55
  try:
56
  video_id = extract_video_id(message.content)
57
  if video_id:
58
- transcript = await get_video_transcript(video_id)
59
  comments = await get_video_comments(video_id)
60
  if comments and transcript:
61
  replies = await generate_replies(comments, transcript)
@@ -89,18 +90,28 @@ def extract_video_id(url):
89
  logging.debug(f'Extracted video ID: {video_id}')
90
  return video_id
91
 
92
- async def get_video_transcript(video_id):
93
  """
94
  YouTube λΉ„λ””μ˜€μ˜ μžλ§‰μ„ κ°€μ Έμ˜΅λ‹ˆλ‹€.
95
  """
96
  try:
97
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
98
- transcript_text = " ".join([entry['text'] for entry in transcript])
99
- logging.debug(f'Fetched transcript: {transcript_text}')
100
- return transcript_text
101
  except Exception as e:
102
- logging.error(f'Error fetching transcript: {e}')
103
- return None
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  async def get_video_comments(video_id):
106
  """
 
6
  from huggingface_hub import InferenceClient
7
  from googleapiclient.discovery import build
8
  from youtube_transcript_api import YouTubeTranscriptApi
9
+ from youtube_transcript_api.formatters import TextFormatter
10
  from dotenv import load_dotenv
11
 
12
  # ν™˜κ²½ λ³€μˆ˜ λ‘œλ“œ
 
56
  try:
57
  video_id = extract_video_id(message.content)
58
  if video_id:
59
+ transcript = await get_best_available_transcript(video_id)
60
  comments = await get_video_comments(video_id)
61
  if comments and transcript:
62
  replies = await generate_replies(comments, transcript)
 
90
  logging.debug(f'Extracted video ID: {video_id}')
91
  return video_id
92
 
93
+ async def get_best_available_transcript(video_id):
94
  """
95
  YouTube λΉ„λ””μ˜€μ˜ μžλ§‰μ„ κ°€μ Έμ˜΅λ‹ˆλ‹€.
96
  """
97
  try:
98
+ # μ˜μ–΄ μžλ§‰ μ‹œλ„
99
+ transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
 
 
100
  except Exception as e:
101
+ logging.warning(f'Error fetching English transcript: {e}')
102
+ try:
103
+ # μ˜μ–΄ μžλ§‰μ΄ μ—†μœΌλ©΄ λ‹€λ₯Έ μ–Έμ–΄ μžλ§‰μ„ μ‹œλ„
104
+ transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
105
+ transcript = transcripts.find_manually_created_transcript(['ko', 'ja', 'zh-Hans', 'zh-Hant']).fetch()
106
+ except Exception as e:
107
+ logging.error(f'Error fetching alternative transcript: {e}')
108
+ return None
109
+
110
+ # μžλ§‰ ν¬λ§·νŒ…
111
+ formatter = TextFormatter()
112
+ transcript_text = formatter.format_transcript(transcript)
113
+ logging.debug(f'Fetched transcript: {transcript_text}')
114
+ return transcript_text
115
 
116
  async def get_video_comments(video_id):
117
  """