Spaces:
Running
Running
File size: 8,730 Bytes
7af90a1 9865d2f 7af90a1 9865d2f 7af90a1 d87c1dc 29a601b 0b20754 7af90a1 8fc93c5 d87c1dc 1e577a8 d87c1dc 2ba42f6 d87c1dc 1e577a8 d87c1dc 2ba42f6 d87c1dc 1e577a8 d87c1dc 2ba42f6 d87c1dc 2ba42f6 d87c1dc 2ba42f6 d87c1dc 2ba42f6 d87c1dc 8fc93c5 d87c1dc 9865d2f f088de8 9865d2f 7af90a1 7ee9f7b 2ba42f6 7af90a1 d87c1dc 9865d2f 7af90a1 9865d2f 6027b0f b6f13f9 9865d2f 7af90a1 9865d2f 7af90a1 6027b0f b6f13f9 7af90a1 9865d2f 7af90a1 9865d2f 2ba42f6 c5def6f 2ba42f6 b6f13f9 2ba42f6 777ca80 2ba42f6 777ca80 2ba42f6 5583945 2ba42f6 777ca80 2ba42f6 777ca80 5583945 777ca80 5583945 777ca80 c093078 5583945 2ba42f6 7af90a1 2ba42f6 7af90a1 05b2058 7af90a1 2ba42f6 7af90a1 9865d2f 7af90a1 2ba42f6 7af90a1 9865d2f 7af90a1 d87c1dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import gradio as gr
from gradio_client import Client
import uuid
import warnings
import numpy as np
import json
import os
from gradio_client import Client, FileData, handle_file
import tempfile
import scipy.io.wavfile as wavfile
warnings.filterwarnings("ignore")
client = Client(os.environ['src'])
custom_css = """
.gradio-container {
justify-content: flex-start !important;
}
"""
def chat_function(message, history, session_id):
result = client.predict(
message,
history,
session_id,
fn_index=0
)
_, new_history, audio_path, display_text = result
display_text = f"Current Session ID: {session_id}"
return "", new_history, audio_path, session_id, display_text, display_text
def set_session(user_id):
result = client.predict(
user_id,
fn_index=1
)
new_id, display_text = result
display_text = f"Current Session ID: {new_id}"
return new_id, "", display_text, display_text
def handle_audio(audio_data, history, session_id):
if audio_data is None:
return None, history, session_id, f"Current Session ID: {session_id}", f"Current Session ID: {session_id}"
try:
sample_rate, audio_array = audio_data
with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as temp:
wavfile.write(temp.name, sample_rate, audio_array)
audio = {"path": temp.name, "meta": {"_type": "gradio.FileData"}}
result = client.predict(
audio,
history,
session_id,
api_name="/handle_audio"
)
audio_path, new_history, new_session_id = result
display_text = f"Current Session ID: {new_session_id}"
return audio_path, new_history, new_session_id, display_text, display_text
except Exception as e:
print(f"Error processing audio: {str(e)}")
import traceback
traceback.print_exc()
return None, history, session_id, f"Error processing audio. Session ID: {session_id}", f"Error processing audio. Session ID: {session_id}"
def respond(message, chat_history, session_id):
if not session_id:
return "", chat_history, None, session_id, "🔴🔴 Please set a session ID first in the Options tab | オプションタブに移動して、セッションIDを設定してください。🔴🔴", "🔴🔴 Please set a session ID first in the Options tab | オプションタブに移動して、セッションIDを設定してください。🔴🔴"
return chat_function(message, chat_history, session_id)
def create_frontend_demo():
with gr.Blocks(css=custom_css, theme="Respair/[email protected]") as demo:
session_id_state = gr.State("")
with gr.Tabs() as tabs:
with gr.Tab("Chat"):
gr.Markdown("""Please set a session ID, or generate a new one in the Options tab, it helps to retain the history of your conversation.
<br>オプションタブでセッションIDを設定するか新しく生成してください。会話の履歴を保持するために必要です。""")
chat_session_display = gr.Markdown("Current Session ID: None", label="Session ID")
chatbot = gr.Chatbot(
label="Conversation History",
height=400,
avatar_images=["photo_2024-03-01_22-30-42.jpg", "colored_blured.png"],
placeholder="Start chatting with Aira..."
)
with gr.Column():
msg = gr.Textbox(
show_label=False,
placeholder="Enter text and press enter",
container=True,
)
audio_output = gr.Audio(
label="Aira's Response",
type="filepath",
streaming=False,
autoplay=True,
)
with gr.Row():
audio_input = gr.Audio(
sources=["microphone"],
type="numpy",
label="Audio Input",
streaming=False
)
with gr.Tab("Options"):
with gr.Column():
options_session_display = gr.Markdown("Current Session ID: None", label="Session ID")
session_input = gr.Textbox(
value="",
label="Session ID (leave blank and press the button for an entirely new session) | セッションID(新規セッションの場合は空欄のままボタンを押してください)",
)
gen_id_btn = gr.Button("Set Session ID")
session_msg = gr.Markdown("")
clear_btn = gr.Button("Clear Conversation")
gr.Markdown("""
### English
This is a personal project I wanted to do for a while.
Aira's voice was designed to be unique; it doesn't belong to any real person out there.
Her design is also based on a vtuber project I did a few years ago, though I didn't put
a lot of effort into it this time (you can see the lazy brush strokes in her pfp).
You can talk to her in English or Japanese, but she will only respond in Japanese
(Subs over dubs, bros) ask her to give you a Subtitle if you can't talk in Japanese.
The majority of the latency depends on the HF's inference api.
The language modelling part is not fine-tuned, it's an off-the-shelf one, please beware of that.
**Session Guide:**
1. Enter your Session ID above or leave blank for a new one
2. Click 'Set Session ID' to confirm
3. Use 'Clear Conversation' to reset the chat
4. Your conversation history is saved based on your Session ID
I'll try to keep this demo up for as long as I can afford.
---
### 日本語
愛良の声は独特なものとして設計されました。実在する人物の声ではありません。
彼女のデザインは数年前に制作したVTuberプロジェクトがベースになっています。今回はそれほど力を入れていませんが(プロフィール画像の雑な筆致は見え見えですね)。
**セッションIDの使い方:**
1. 上記にセッションIDを入力するか、新規の場合は空欄のままにしてください
2.「Set Session ID」をクリックして確定
3. 「Clear Conversation」で会話をリセット
4. 会話履歴はセッションIDに基づいて保存されます
レイテンシーの大部分はHugging Faceの推論APIに依存しています。
言語モデルの部分は微調整されておらず、既製のものを使用しているのでご注意ください。
できる限り長くこのデモを継続できるよう努めます。
""")
msg.submit(
respond,
inputs=[msg, chatbot, session_id_state],
outputs=[msg, chatbot, audio_output, session_id_state, chat_session_display, options_session_display]
)
gen_id_btn.click(
set_session,
inputs=[session_input],
outputs=[session_id_state, session_msg, chat_session_display, options_session_display]
)
audio_input.stop_recording(
handle_audio,
inputs=[audio_input, chatbot, session_id_state],
outputs=[audio_output, chatbot, session_id_state, chat_session_display, options_session_display]
)
clear_btn.click(
lambda: [],
None,
[chatbot]
)
return demo
if __name__ == "__main__":
demo = create_frontend_demo()
demo.launch(show_error=True) |