sitammeur commited on
Commit
3345f41
·
verified ·
1 Parent(s): e9061b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -18
app.py CHANGED
@@ -53,7 +53,7 @@ def convert_files(files):
53
 
54
 
55
  ################################################
56
- # Model Inference with ColPali and Gemini
57
  ################################################
58
  @spaces.GPU
59
  def index_gpu(images, ds):
@@ -81,17 +81,17 @@ def index_gpu(images, ds):
81
  return f"Uploaded and converted {len(images)} pages", ds, images
82
 
83
 
84
- def query_gemini(query, images, api_key):
85
- """Calls Google's Gemini model with the query and image data."""
86
  if api_key:
87
  try:
88
  # Convert images to base64 strings
89
  base64_images = [encode_image_to_base64(image[0]) for image in images]
90
 
91
- # Initialize the OpenAI client with the Gemini API key
92
  client = OpenAI(
93
  api_key=api_key.strip(),
94
- base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
95
  )
96
  PROMPT = """
97
  You are a smart assistant designed to answer questions about a PDF document.
@@ -105,9 +105,9 @@ def query_gemini(query, images, api_key):
105
  PDF pages:
106
  """
107
 
108
- # Get the response from the Gemini API
109
  response = client.chat.completions.create(
110
- model="gemini-2.5-flash-lite",
111
  reasoning_effort="none",
112
  messages=[
113
  {
@@ -132,10 +132,10 @@ def query_gemini(query, images, api_key):
132
 
133
  # Handle errors from the API
134
  except Exception as e:
135
- return "API connection error! Please check your API key and try again."
136
 
137
- # If no API key is provided, return a message indicating that the user should enter their key
138
- return "Enter your Gemini API key to get a custom response."
139
 
140
 
141
  ################################################
@@ -175,8 +175,8 @@ def search(query: str, ds, images, k, api_key):
175
  img_copy = img.copy()
176
  results.append((img_copy, f"Page {idx}"))
177
 
178
- # Generate response from Gemini
179
- ai_response = query_gemini(query, results, api_key)
180
 
181
  return results, ai_response
182
 
@@ -186,11 +186,11 @@ def search(query: str, ds, images, k, api_key):
186
  ################################################
187
  with gr.Blocks(theme=gr.themes.Ocean()) as demo:
188
  gr.Markdown(
189
- "# Multimodal RAG with ColVision & Gemini 📚"
190
  )
191
  gr.Markdown(
192
- """Demo to test ColQwen2.5 (ColPali) on PDF documents.
193
- ColPali is a model implemented from the [ColPali paper](https://arxiv.org/abs/2407.01449).
194
  This demo allows you to upload PDF files and search for the most relevant pages based on your query.
195
  Refresh the page if you change documents!
196
  ⚠️ This demo uses a model trained exclusively on A4 PDFs in portrait mode, containing English text. Performance is expected to drop for other page formats and languages.
@@ -213,8 +213,8 @@ with gr.Blocks(theme=gr.themes.Ocean()) as demo:
213
  with gr.Column(scale=3):
214
  gr.Markdown("## 3️⃣ Search")
215
  api_key = gr.Textbox(
216
- placeholder="Enter your Gemini API key here (must be valid)",
217
- label="API key",
218
  )
219
  query = gr.Textbox(placeholder="Enter your query here", label="Query")
220
  k = gr.Slider(
@@ -233,7 +233,7 @@ with gr.Blocks(theme=gr.themes.Ocean()) as demo:
233
  label="Retrieved Documents", height=600, show_label=True
234
  )
235
 
236
- gr.Markdown("## 5️⃣ Gemini Response")
237
  output_text = gr.Textbox(
238
  label="AI Response",
239
  placeholder="Generated response based on retrieved documents",
 
53
 
54
 
55
  ################################################
56
+ # Model Inference with ModernVBERT and Qwen
57
  ################################################
58
  @spaces.GPU
59
  def index_gpu(images, ds):
 
81
  return f"Uploaded and converted {len(images)} pages", ds, images
82
 
83
 
84
+ def query_qwen(query, images, api_key):
85
+ """Calls Qwen model with the query and image data."""
86
  if api_key:
87
  try:
88
  # Convert images to base64 strings
89
  base64_images = [encode_image_to_base64(image[0]) for image in images]
90
 
91
+ # Initialize the OpenAI client with the Hugging Face token
92
  client = OpenAI(
93
  api_key=api_key.strip(),
94
+ base_url="https://router.huggingface.co/v1",
95
  )
96
  PROMPT = """
97
  You are a smart assistant designed to answer questions about a PDF document.
 
105
  PDF pages:
106
  """
107
 
108
+ # Get the response from the Qwen inference API
109
  response = client.chat.completions.create(
110
+ model="Qwen/Qwen3-VL-30B-A3B-Instruct",
111
  reasoning_effort="none",
112
  messages=[
113
  {
 
132
 
133
  # Handle errors from the API
134
  except Exception as e:
135
+ return "API connection error! Please check your API token and try again."
136
 
137
+ # If no API token is provided, return a message indicating that the user should enter their token
138
+ return "Enter your Hugging Face token to get a custom response."
139
 
140
 
141
  ################################################
 
175
  img_copy = img.copy()
176
  results.append((img_copy, f"Page {idx}"))
177
 
178
+ # Generate response
179
+ ai_response = query_qwen(query, results, api_key)
180
 
181
  return results, ai_response
182
 
 
186
  ################################################
187
  with gr.Blocks(theme=gr.themes.Ocean()) as demo:
188
  gr.Markdown(
189
+ "# Multimodal RAG with ModernVBERT & Qwen 📚"
190
  )
191
  gr.Markdown(
192
+ """Demo to test ColModernVBERT (ModernVBERT) on PDF documents.
193
+ ModernVBERT is a model implemented from the paper [ModernVBERT: Towards Smaller Visual Document Retrievers](https://arxiv.org/abs/2510.01149).
194
  This demo allows you to upload PDF files and search for the most relevant pages based on your query.
195
  Refresh the page if you change documents!
196
  ⚠️ This demo uses a model trained exclusively on A4 PDFs in portrait mode, containing English text. Performance is expected to drop for other page formats and languages.
 
213
  with gr.Column(scale=3):
214
  gr.Markdown("## 3️⃣ Search")
215
  api_key = gr.Textbox(
216
+ placeholder="Enter your Hugging Face token here (must be valid)",
217
+ label="API token",
218
  )
219
  query = gr.Textbox(placeholder="Enter your query here", label="Query")
220
  k = gr.Slider(
 
233
  label="Retrieved Documents", height=600, show_label=True
234
  )
235
 
236
+ gr.Markdown("## 5️⃣ Qwen Response")
237
  output_text = gr.Textbox(
238
  label="AI Response",
239
  placeholder="Generated response based on retrieved documents",