EEYD / app.py
alibabasglab's picture
Update app.py
d6b381d verified
import torch
import soundfile as sf
import gradio as gr
import spaces
from extract_everything import extract_everything
@spaces.GPU(duration=30)
def fn_extract_everything(input_audio, input_text_prompt):
extract_everything_model = extract_everything()
orig_wav, output_wav, residual_wav = extract_everything_model(input_audio, input_text_prompt)
sf.write('extracted_audio.wav', output_wav, 16000)
return 'extracted_audio.wav'
def clear_all():
return gr.update(value=None), gr.update(value="")
with gr.Blocks(title="OmniSoniX") as demo:
gr.Markdown("# OmniSoniX: Text-Driven Universal Target Audio Extraction")
gr.Markdown(
"Extract any sound using free-form text prompts.To try it, simply click one of the examples, or upload your own audio/video (Preferably less than 20 seconds or less due to the GPU usage limits here, try again after a few seconds if encounters huggingface error.)"
)
audio_input = gr.Audio(label="Input Audio", type="filepath")
text_prompt = gr.Textbox(
label="Describe the sound to extract",
placeholder="e.g., 'vocal', 'dog barking', 'female speech'"
)
with gr.Row():
clear_btn = gr.Button("Clear")
extract_btn = gr.Button("Extract")
extracted_out = gr.Audio(label="Extracted Audio", type="filepath")
# Examples β€” now guaranteed to work
gr.Examples(
examples=[
["examples/noisy_speech.wav", "noise"],
["examples/song_chinese.wav", "vocal"],
["examples/song_english.wav", "drum"],
["examples/bird_speech.wav", "bird chirp"],
["examples/keyboard_water.wav", "A person types on a keyboard"],
["examples/siren_speech.wav", "Ambulance siren"],
["examples/low_volumn_speech.wav", "Low volume speech"],
["examples/male_speech.wav", "Male speech"],
["examples/czech_speech.wav", "Czech speech"],
["examples/slower_speech.wav", "slower voice"],
["examples/happy_speech.wav", "happy speech"],
],
inputs=[audio_input, text_prompt],
outputs=[extracted_out],
fn=fn_extract_everything,
cache_examples=False,
)
extract_btn.click(
fn=fn_extract_everything,
inputs=[audio_input, text_prompt],
outputs=[extracted_out]
)
clear_btn.click(
fn=clear_all,
inputs=[],
outputs=[audio_input, text_prompt]
)
demo.launch()