Spaces:

haepada
/

roots

Running

App Files Files Community

roots / app.py

haepada

Update app.py

6045e4a verified 4 months ago

raw

history blame

8.63 kB

	import gradio as gr
	import numpy as np
	import librosa
	from transformers import pipeline
	from datetime import datetime
	import os

	# AI 모델 초기화
	speech_recognizer = pipeline(
	"automatic-speech-recognition",
	model="kresnik/wav2vec2-large-xlsr-korean" # 한국어 음성인식 모델
	)
	emotion_classifier = pipeline(
	"audio-classification",
	model="MIT/ast-finetuned-speech-commands-v2"
	)
	text_analyzer = pipeline(
	"sentiment-analysis",
	model="nlptown/bert-base-multilingual-uncased-sentiment"
	)

	def create_interface():
	with gr.Blocks(theme=gr.themes.Soft()) as app:
	# 상태 관리
	state = gr.State({
	"user_name": "",
	"reflections": [],
	"voice_analysis": None,
	"final_prompt": "",
	"generated_images": [] # 생성된 이미지 저장
	})

	# 헤더
	header = gr.Markdown("# 디지털 굿판")
	user_display = gr.Markdown("")

	with gr.Tabs() as tabs:
	# 입장
	with gr.Tab("입장"):
	gr.Markdown("""# 디지털 굿판에 오신 것을 환영합니다""")
	name_input = gr.Textbox(label="이름을 알려주세요")
	start_btn = gr.Button("여정 시작하기")

	# 청신
	with gr.Tab("청신"):
	with gr.Row():
	audio = gr.Audio(
	value="assets/main_music.mp3",
	type="filepath",
	label="온천천의 소리"
	)
	with gr.Column():
	reflection_input = gr.Textbox(
	label="현재 순간의 감상을 적어주세요",
	lines=3
	)
	save_btn = gr.Button("감상 저장하기")
	reflections_display = gr.Dataframe(
	headers=["시간", "감상", "감정 분석"],
	label="기록된 감상들"
	)

	# 기원
	with gr.Tab("기원"):
	gr.Markdown("## 기원 - 목소리로 전하기")
	with gr.Row():
	# 음성 입력
	voice_input = gr.Audio(
	label="나누고 싶은 이야기를 들려주세요",
	sources=["microphone"],
	type="filepath"
	)

	# 분석 결과
	with gr.Column():
	transcribed_text = gr.Textbox(
	label="인식된 텍스트",
	interactive=False
	)
	voice_emotion = gr.Textbox(
	label="음성 감정 분석",
	interactive=False
	)
	text_emotion = gr.Textbox(
	label="텍스트 감정 분석",
	interactive=False
	)
	analysis_details = gr.JSON(
	label="상세 분석 결과"
	)

	# 송신
	with gr.Tab("송신"):
	gr.Markdown("## 송신 - 시각화 결과")
	with gr.Column():
	final_prompt = gr.Textbox(
	label="생성된 프롬프트",
	interactive=False
	)
	gallery = gr.Gallery(
	label="시각화 결과",
	columns=2
	)
	share_btn = gr.Button("결과 공유하기")

	def analyze_voice_comprehensive(audio_path, state):
	"""종합적인 음성 분석"""
	try:
	if audio_path is None:
	return state, "음성 입력이 필요합니다.", "", "", {}

	# 오디오 로드
	y, sr = librosa.load(audio_path, sr=16000)

	# 1. 음성-텍스트 변환
	transcription = speech_recognizer(y)
	spoken_text = transcription["text"]

	# 2. 음향학적 특성 분석
	features = {
	"energy": float(np.mean(librosa.feature.rms(y=y))),
	"pitch": float(np.mean(librosa.piptrack(y=y, sr=sr)[1])),
	"tempo": float(librosa.beat.tempo(y)[0]),
	"zero_crossing_rate": float(np.mean(librosa.feature.zero_crossing_rate(y)))
	}

	# 3. 음성 감정 분석
	voice_emotions = emotion_classifier(y)
	primary_emotion = voice_emotions[0]

	# 4. 텍스트 감정 분석
	text_sentiment = text_analyzer(spoken_text)[0]

	# 결과 종합
	analysis_result = {
	"acoustic_features": features,
	"voice_emotion": primary_emotion,
	"text_sentiment": text_sentiment
	}

	# 프롬프트 생성
	prompt = generate_art_prompt(spoken_text, analysis_result, state["reflections"])
	state["final_prompt"] = prompt

	return (
	state,
	spoken_text,
	f"음성 감정: {primary_emotion['label']} ({primary_emotion['score']:.2f})",
	f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
	analysis_result
	)

	except Exception as e:
	return state, f"오류 발생: {str(e)}", "", "", {}

	def generate_art_prompt(text, analysis, reflections):
	"""예술적 프롬프트 생성"""
	# 음성 감정
	voice_emotion = analysis["voice_emotion"]["label"]
	# 텍스트 감정
	text_sentiment = analysis["text_sentiment"]["label"]
	# 에너지 레벨
	energy = analysis["acoustic_features"]["energy"]

	# 감정에 따른 색상 매핑
	emotion_colors = {
	"happy": "따뜻한 노란색과 주황색",
	"sad": "깊은 파랑색과 보라색",
	"angry": "강렬한 빨강색과 검정색",
	"neutral": "부드러운 회색과 베이지색"
	}

	# 기본 프롬프트 구성
	prompt = f"한국 전통 민화 스타일의 추상화, {emotion_colors.get(voice_emotion, '자연스러운 색상')} 사용. "
	prompt += f"음성의 감정({voice_emotion})과 텍스트의 감정({text_sentiment})이 조화를 이루며, "
	prompt += f"에너지 레벨({energy:.2f})을 통해 화면의 동적인 느낌을 표현. "

	# 이전 감상들 반영
	if reflections:
	prompt += "이전 감상들의 정서를 배경에 은은하게 담아내기. "

	return prompt

	def save_reflection(text, state):
	"""감상 저장 및 감정 분석"""
	if not text.strip():
	return state, state["reflections"]

	current_time = datetime.now().strftime("%H:%M:%S")
	sentiment = text_analyzer(text)[0]
	new_reflection = [current_time, text, f"{sentiment['label']} ({sentiment['score']:.2f})"]

	state["reflections"].append(new_reflection)
	return state, state["reflections"]

	def start_journey(name):
	"""여정 시작"""
	welcome_text = f"# 환영합니다, {name}님의 디지털 굿판"
	return welcome_text, gr.update(selected="청신")

	# 이벤트 연결
	start_btn.click(
	fn=start_journey,
	inputs=[name_input],
	outputs=[user_display, tabs]
	)

	save_btn.click(
	fn=save_reflection,
	inputs=[reflection_input, state],
	outputs=[state, reflections_display]
	)

	voice_input.change(
	fn=analyze_voice_comprehensive,
	inputs=[voice_input, state],
	outputs=[
	state,
	transcribed_text,
	voice_emotion,
	text_emotion,
	analysis_details
	]
	)

	return app

	# 앱 실행
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()