Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,9 @@ from huggingface_hub import snapshot_download
|
|
| 8 |
# ---------------------------------
|
| 9 |
# SINGLE-TURN MODEL SETUP
|
| 10 |
# ---------------------------------
|
| 11 |
-
|
|
|
|
|
|
|
| 12 |
MODEL_BASE_THINK = os.path.join(MODEL_BASE_SINGLE, 'stage35')
|
| 13 |
|
| 14 |
model_single = llava.load(MODEL_BASE_SINGLE, model_base=None, devices=[0])
|
|
@@ -83,11 +85,12 @@ def multi_turn_chat(user_input, audio_file, history, current_audio):
|
|
| 83 |
# ---------------------------------
|
| 84 |
# INTERFACE
|
| 85 |
# ---------------------------------
|
| 86 |
-
with gr.Blocks(css=".gradio-container { max-width:
|
| 87 |
gr.Markdown("""
|
| 88 |
|
| 89 |
-
|
| 90 |
<div align="center">
|
|
|
|
|
|
|
| 91 |
<h2><strong>Audio Flamingo 3</strong></h2>
|
| 92 |
<p><em>Advancing Audio Intelligence with Fully Open Large Audio-Language Models</em></p>
|
| 93 |
</div>
|
|
|
|
| 8 |
# ---------------------------------
|
| 9 |
# SINGLE-TURN MODEL SETUP
|
| 10 |
# ---------------------------------
|
| 11 |
+
api_key = os.getenv("my_secret")
|
| 12 |
+
|
| 13 |
+
MODEL_BASE_SINGLE = snapshot_download(repo_id="nvidia/audio-flamingo-3", local_dir="./", token=api_key)
|
| 14 |
MODEL_BASE_THINK = os.path.join(MODEL_BASE_SINGLE, 'stage35')
|
| 15 |
|
| 16 |
model_single = llava.load(MODEL_BASE_SINGLE, model_base=None, devices=[0])
|
|
|
|
| 85 |
# ---------------------------------
|
| 86 |
# INTERFACE
|
| 87 |
# ---------------------------------
|
| 88 |
+
with gr.Blocks(css=".gradio-container { max-width: 100% !important; margin: 0 auto !important; padding: 0 2rem; }") as demo:
|
| 89 |
gr.Markdown("""
|
| 90 |
|
|
|
|
| 91 |
<div align="center">
|
| 92 |
+
<img src="https://github.com/NVIDIA/audio-flamingo/blob/audio_flamingo_3/static/logo-no-bg.png" alt="Audio Flamingo 3 Logo" width="120" style="margin-bottom: 10px;">
|
| 93 |
+
|
| 94 |
<h2><strong>Audio Flamingo 3</strong></h2>
|
| 95 |
<p><em>Advancing Audio Intelligence with Fully Open Large Audio-Language Models</em></p>
|
| 96 |
</div>
|