Spaces:

Delik
/

pyannote-speaker-diarization-3.1

Running on Zero

File size: 1,260 Bytes

f30c373
7c9216a
18e78ec
ba685bf
aad12fa
fc03567
 
 
 
 
 
 
 
1c57ed2
8289149
fc03567
 
 
aad12fa
 
 
 
76efec6
0d076a1
aad12fa
0d076a1
8289149
fc03567
 
 
 
0d076a1
76efec6
8289149
0d076a1
db1ee1f
aad12fa
1429210
db1ee1f
ff42726
2129f6b
0d076a1
1429210
db1ee1f
2129f6b
0d076a1

import gradio as gr
import os
from pyannote.audio import Pipeline

# instantiate the pipeline
try:
    pipeline = Pipeline.from_pretrained(
        "pyannote/speaker-diarization-3.1",
        use_auth_token=os.environ["api"]
    )
except Exception as e:
    print(f"Error initializing pipeline: {e}")
    pipeline = None

def process_audio(audio):
    if pipeline is None:
        return "Error: Pipeline not initialized"

    # Read the uploaded audio file
    with open(audio, "rb") as f:
        audio_data = f.read()

    # Save the uploaded audio file to a temporary location
    with open("temp.wav", "wb") as f:
        f.write(audio_data)

    # Use the diarization pipeline to process the audio
    try:
        diarization = pipeline("temp.wav")
    except Exception as e:
        return f"Error processing audio: {e}"

    # Remove the temporary file
    os.remove("temp.wav")

    # Return the diarization output
    return str(diarization)

with gr.Blocks() as demo:
    audio_input = gr.Audio(type="filepath", label="Upload Audio")  
    process_button = gr.Button("Process")
    diarization_output = gr.Textbox(label="Diarization Output")

    process_button.click(fn=process_audio, inputs=audio_input, outputs=diarization_output)

demo.launch()