Spaces:
Running
on
Zero
Running
on
Zero
| import torch | |
| import soundfile as sf | |
| import gradio as gr | |
| import spaces | |
| from extract_everything import extract_everything | |
| def fn_extract_everything(input_audio, input_text_prompt): | |
| extract_everything_model = extract_everything() | |
| orig_wav, output_wav, residual_wav = extract_everything_model(input_audio, input_text_prompt) | |
| sf.write('extracted_audio.wav', output_wav, 16000) | |
| return 'extracted_audio.wav' | |
| def clear_all(): | |
| return gr.update(value=None), gr.update(value="") | |
| with gr.Blocks(title="OmniSoniX") as demo: | |
| gr.Markdown("# OmniSoniX: Text-Driven Universal Target Audio Extraction") | |
| gr.Markdown( | |
| "Extract any sound using free-form text prompts.To try it, simply click one of the examples, or upload your own audio/video (Preferably less than 20 seconds or less due to the GPU usage limits here, try again after a few seconds if encounters huggingface error.)" | |
| ) | |
| audio_input = gr.Audio(label="Input Audio", type="filepath") | |
| text_prompt = gr.Textbox( | |
| label="Describe the sound to extract", | |
| placeholder="e.g., 'vocal', 'dog barking', 'female speech'" | |
| ) | |
| with gr.Row(): | |
| clear_btn = gr.Button("Clear") | |
| extract_btn = gr.Button("Extract") | |
| extracted_out = gr.Audio(label="Extracted Audio", type="filepath") | |
| # Examples β now guaranteed to work | |
| gr.Examples( | |
| examples=[ | |
| ["examples/noisy_speech.wav", "noise"], | |
| ["examples/song_chinese.wav", "vocal"], | |
| ["examples/song_english.wav", "drum"], | |
| ["examples/bird_speech.wav", "bird chirp"], | |
| ["examples/keyboard_water.wav", "A person types on a keyboard"], | |
| ["examples/siren_speech.wav", "Ambulance siren"], | |
| ["examples/low_volumn_speech.wav", "Low volume speech"], | |
| ["examples/male_speech.wav", "Male speech"], | |
| ["examples/czech_speech.wav", "Czech speech"], | |
| ["examples/slower_speech.wav", "slower voice"], | |
| ["examples/happy_speech.wav", "happy speech"], | |
| ], | |
| inputs=[audio_input, text_prompt], | |
| outputs=[extracted_out], | |
| fn=fn_extract_everything, | |
| cache_examples=False, | |
| ) | |
| extract_btn.click( | |
| fn=fn_extract_everything, | |
| inputs=[audio_input, text_prompt], | |
| outputs=[extracted_out] | |
| ) | |
| clear_btn.click( | |
| fn=clear_all, | |
| inputs=[], | |
| outputs=[audio_input, text_prompt] | |
| ) | |
| demo.launch() | |