Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -53,7 +53,7 @@ def convert_files(files):
|
|
| 53 |
|
| 54 |
|
| 55 |
################################################
|
| 56 |
-
# Model Inference with
|
| 57 |
################################################
|
| 58 |
@spaces.GPU
|
| 59 |
def index_gpu(images, ds):
|
|
@@ -81,17 +81,17 @@ def index_gpu(images, ds):
|
|
| 81 |
return f"Uploaded and converted {len(images)} pages", ds, images
|
| 82 |
|
| 83 |
|
| 84 |
-
def
|
| 85 |
-
"""Calls
|
| 86 |
if api_key:
|
| 87 |
try:
|
| 88 |
# Convert images to base64 strings
|
| 89 |
base64_images = [encode_image_to_base64(image[0]) for image in images]
|
| 90 |
|
| 91 |
-
# Initialize the OpenAI client with the
|
| 92 |
client = OpenAI(
|
| 93 |
api_key=api_key.strip(),
|
| 94 |
-
base_url="https://
|
| 95 |
)
|
| 96 |
PROMPT = """
|
| 97 |
You are a smart assistant designed to answer questions about a PDF document.
|
|
@@ -105,9 +105,9 @@ def query_gemini(query, images, api_key):
|
|
| 105 |
PDF pages:
|
| 106 |
"""
|
| 107 |
|
| 108 |
-
# Get the response from the
|
| 109 |
response = client.chat.completions.create(
|
| 110 |
-
model="
|
| 111 |
reasoning_effort="none",
|
| 112 |
messages=[
|
| 113 |
{
|
|
@@ -132,10 +132,10 @@ def query_gemini(query, images, api_key):
|
|
| 132 |
|
| 133 |
# Handle errors from the API
|
| 134 |
except Exception as e:
|
| 135 |
-
return "API connection error! Please check your API
|
| 136 |
|
| 137 |
-
# If no API
|
| 138 |
-
return "Enter your
|
| 139 |
|
| 140 |
|
| 141 |
################################################
|
|
@@ -175,8 +175,8 @@ def search(query: str, ds, images, k, api_key):
|
|
| 175 |
img_copy = img.copy()
|
| 176 |
results.append((img_copy, f"Page {idx}"))
|
| 177 |
|
| 178 |
-
# Generate response
|
| 179 |
-
ai_response =
|
| 180 |
|
| 181 |
return results, ai_response
|
| 182 |
|
|
@@ -186,11 +186,11 @@ def search(query: str, ds, images, k, api_key):
|
|
| 186 |
################################################
|
| 187 |
with gr.Blocks(theme=gr.themes.Ocean()) as demo:
|
| 188 |
gr.Markdown(
|
| 189 |
-
"# Multimodal RAG with
|
| 190 |
)
|
| 191 |
gr.Markdown(
|
| 192 |
-
"""Demo to test
|
| 193 |
-
|
| 194 |
This demo allows you to upload PDF files and search for the most relevant pages based on your query.
|
| 195 |
Refresh the page if you change documents!
|
| 196 |
⚠️ This demo uses a model trained exclusively on A4 PDFs in portrait mode, containing English text. Performance is expected to drop for other page formats and languages.
|
|
@@ -213,8 +213,8 @@ with gr.Blocks(theme=gr.themes.Ocean()) as demo:
|
|
| 213 |
with gr.Column(scale=3):
|
| 214 |
gr.Markdown("## 3️⃣ Search")
|
| 215 |
api_key = gr.Textbox(
|
| 216 |
-
placeholder="Enter your
|
| 217 |
-
label="API
|
| 218 |
)
|
| 219 |
query = gr.Textbox(placeholder="Enter your query here", label="Query")
|
| 220 |
k = gr.Slider(
|
|
@@ -233,7 +233,7 @@ with gr.Blocks(theme=gr.themes.Ocean()) as demo:
|
|
| 233 |
label="Retrieved Documents", height=600, show_label=True
|
| 234 |
)
|
| 235 |
|
| 236 |
-
gr.Markdown("## 5️⃣
|
| 237 |
output_text = gr.Textbox(
|
| 238 |
label="AI Response",
|
| 239 |
placeholder="Generated response based on retrieved documents",
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
################################################
|
| 56 |
+
# Model Inference with ModernVBERT and Qwen
|
| 57 |
################################################
|
| 58 |
@spaces.GPU
|
| 59 |
def index_gpu(images, ds):
|
|
|
|
| 81 |
return f"Uploaded and converted {len(images)} pages", ds, images
|
| 82 |
|
| 83 |
|
| 84 |
+
def query_qwen(query, images, api_key):
|
| 85 |
+
"""Calls Qwen model with the query and image data."""
|
| 86 |
if api_key:
|
| 87 |
try:
|
| 88 |
# Convert images to base64 strings
|
| 89 |
base64_images = [encode_image_to_base64(image[0]) for image in images]
|
| 90 |
|
| 91 |
+
# Initialize the OpenAI client with the Hugging Face token
|
| 92 |
client = OpenAI(
|
| 93 |
api_key=api_key.strip(),
|
| 94 |
+
base_url="https://router.huggingface.co/v1",
|
| 95 |
)
|
| 96 |
PROMPT = """
|
| 97 |
You are a smart assistant designed to answer questions about a PDF document.
|
|
|
|
| 105 |
PDF pages:
|
| 106 |
"""
|
| 107 |
|
| 108 |
+
# Get the response from the Qwen inference API
|
| 109 |
response = client.chat.completions.create(
|
| 110 |
+
model="Qwen/Qwen3-VL-30B-A3B-Instruct",
|
| 111 |
reasoning_effort="none",
|
| 112 |
messages=[
|
| 113 |
{
|
|
|
|
| 132 |
|
| 133 |
# Handle errors from the API
|
| 134 |
except Exception as e:
|
| 135 |
+
return "API connection error! Please check your API token and try again."
|
| 136 |
|
| 137 |
+
# If no API token is provided, return a message indicating that the user should enter their token
|
| 138 |
+
return "Enter your Hugging Face token to get a custom response."
|
| 139 |
|
| 140 |
|
| 141 |
################################################
|
|
|
|
| 175 |
img_copy = img.copy()
|
| 176 |
results.append((img_copy, f"Page {idx}"))
|
| 177 |
|
| 178 |
+
# Generate response
|
| 179 |
+
ai_response = query_qwen(query, results, api_key)
|
| 180 |
|
| 181 |
return results, ai_response
|
| 182 |
|
|
|
|
| 186 |
################################################
|
| 187 |
with gr.Blocks(theme=gr.themes.Ocean()) as demo:
|
| 188 |
gr.Markdown(
|
| 189 |
+
"# Multimodal RAG with ModernVBERT & Qwen 📚"
|
| 190 |
)
|
| 191 |
gr.Markdown(
|
| 192 |
+
"""Demo to test ColModernVBERT (ModernVBERT) on PDF documents.
|
| 193 |
+
ModernVBERT is a model implemented from the paper [ModernVBERT: Towards Smaller Visual Document Retrievers](https://arxiv.org/abs/2510.01149).
|
| 194 |
This demo allows you to upload PDF files and search for the most relevant pages based on your query.
|
| 195 |
Refresh the page if you change documents!
|
| 196 |
⚠️ This demo uses a model trained exclusively on A4 PDFs in portrait mode, containing English text. Performance is expected to drop for other page formats and languages.
|
|
|
|
| 213 |
with gr.Column(scale=3):
|
| 214 |
gr.Markdown("## 3️⃣ Search")
|
| 215 |
api_key = gr.Textbox(
|
| 216 |
+
placeholder="Enter your Hugging Face token here (must be valid)",
|
| 217 |
+
label="API token",
|
| 218 |
)
|
| 219 |
query = gr.Textbox(placeholder="Enter your query here", label="Query")
|
| 220 |
k = gr.Slider(
|
|
|
|
| 233 |
label="Retrieved Documents", height=600, show_label=True
|
| 234 |
)
|
| 235 |
|
| 236 |
+
gr.Markdown("## 5️⃣ Qwen Response")
|
| 237 |
output_text = gr.Textbox(
|
| 238 |
label="AI Response",
|
| 239 |
placeholder="Generated response based on retrieved documents",
|