ladybug11 commited on
Commit
66d5109
Β·
1 Parent(s): 059772d
Files changed (3) hide show
  1. app.py +144 -40
  2. modal_video_processing.py +41 -14
  3. quote_generator_gemini.py +2 -1
app.py CHANGED
@@ -194,33 +194,73 @@ def search_pexels_video_tool(style: str, niche: str) -> dict:
194
  }
195
 
196
  @tool
197
- def generate_voice_narration_tool(quote_text: str, output_path: str) -> dict:
198
  """
199
- Generate voice narration for the quote using ElevenLabs.
 
 
200
 
201
  Args:
202
- quote_text: The quote text to narrate
 
203
  output_path: Path where to save the audio file
204
 
205
  Returns:
206
- Dictionary with success status and output path
207
  """
208
 
209
  try:
210
- # Generate audio using ElevenLabs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  audio = elevenlabs_client.text_to_speech.convert(
212
- text=quote_text,
213
- voice_id="pNInz6obpgDQGcFmaJgB", # Adam voice - clear and motivational
214
  model_id="eleven_multilingual_v2",
215
  voice_settings=VoiceSettings(
216
- stability=0.5,
217
- similarity_boost=0.75,
218
- style=0.5,
219
  use_speaker_boost=True
220
  )
221
  )
222
 
223
- # Save audio to file
224
  with open(output_path, 'wb') as f:
225
  for chunk in audio:
226
  f.write(chunk)
@@ -228,14 +268,16 @@ def generate_voice_narration_tool(quote_text: str, output_path: str) -> dict:
228
  return {
229
  "success": True,
230
  "output_path": output_path,
231
- "message": "Voice narration created successfully!"
 
232
  }
233
 
234
  except Exception as e:
235
  return {
236
  "success": False,
237
  "output_path": None,
238
- "message": f"Error creating voice: {str(e)}"
 
239
  }
240
 
241
  @tool
@@ -265,9 +307,13 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
265
 
266
  print("πŸš€ Processing on Modal (fast!)...")
267
 
268
- # For now, skip audio in Modal (would need to upload to cloud storage)
269
- # We'll process without audio for speed
270
- audio_url = None
 
 
 
 
271
 
272
  # Call Modal endpoint
273
  response = requests.post(
@@ -275,9 +321,9 @@ def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, a
275
  json={
276
  "video_url": video_url,
277
  "quote_text": quote_text,
278
- "audio_url": audio_url
279
  },
280
- timeout=300 # Increased to 5 minutes
281
  )
282
 
283
  if response.status_code == 200:
@@ -459,9 +505,9 @@ def initialize_agent():
459
 
460
  # Create agent with custom tools
461
  agent = CodeAgent(
462
- tools=[generate_quote_tool, search_pexels_video_tool, generate_voice_narration_tool, create_quote_video_tool],
463
  model=model,
464
- additional_authorized_imports=["requests", "openai", "random", "tempfile", "os"],
465
  max_steps=15
466
  )
467
 
@@ -476,16 +522,29 @@ def initialize_agent():
476
  # Initialize agent
477
  agent, agent_error = initialize_agent()
478
 
479
- def mcp_agent_pipeline(niche, style, num_variations=1):
480
  """
481
  MCP-POWERED AUTONOMOUS AGENT PIPELINE
482
  Uses smolagents with proper MCP server integration
483
  Generates multiple video variations with Gemini-powered quotes
 
484
  """
485
 
 
 
 
 
 
 
 
 
486
  status_log = []
487
  status_log.append("πŸ€– **MCP AGENT STARTING**\n")
488
 
 
 
 
 
489
  if agent_error:
490
  status_log.append(f"❌ Agent initialization failed: {agent_error}")
491
  status_log.append("\nπŸ”„ Falling back to direct tool execution...\n")
@@ -523,9 +582,35 @@ def mcp_agent_pipeline(niche, style, num_variations=1):
523
 
524
  status_log.append("")
525
 
526
- # STEP 4: Create multiple video variations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  status_log.append(f"🎬 **MCP TOOL: create_quote_video_tool (x{len(video_results)})**")
528
  status_log.append(f" ⏳ Creating {len(video_results)} video variations...")
 
 
529
 
530
  output_dir = "/tmp/quote_videos"
531
  gallery_dir = "/data/gallery_videos" # HF persistent storage
@@ -544,7 +629,7 @@ def mcp_agent_pipeline(niche, style, num_variations=1):
544
  video_result["video_url"],
545
  quote,
546
  output_path,
547
- None # No audio
548
  )
549
 
550
  if creation_result["success"]:
@@ -569,10 +654,12 @@ def mcp_agent_pipeline(niche, style, num_variations=1):
569
 
570
  status_log.append("")
571
 
572
- # STEP 5: Integration status
573
  status_log.append("πŸ”— **AI INTEGRATIONS:**")
574
  status_log.append(" βœ… Gemini API - Quote generation with variety tracking")
575
  status_log.append(" βœ… Pexels API - Video search")
 
 
576
  status_log.append(" βœ… Modal Compute - Fast video processing")
577
  if mcp_enabled:
578
  status_log.append(" βœ… MCP Server - abidlabs-mcp-tools.hf.space")
@@ -590,7 +677,7 @@ def mcp_agent_pipeline(niche, style, num_variations=1):
590
  status_log.append(f"\n❌ Pipeline error: {str(e)}")
591
  return "\n".join(status_log), []
592
 
593
- def fallback_pipeline(niche, style, num_variations=1):
594
  """Fallback pipeline if MCP agent fails"""
595
  status_log = []
596
  status_log.append("πŸ”„ **FALLBACK MODE (Direct Tool Execution)**\n")
@@ -604,6 +691,20 @@ def fallback_pipeline(niche, style, num_variations=1):
604
 
605
  status_log.append(f" βœ… Quote generated\n")
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  # Search videos
608
  status_log.append(f"πŸ” Searching for {num_variations} videos...")
609
  video_results = []
@@ -636,7 +737,7 @@ def fallback_pipeline(niche, style, num_variations=1):
636
  video_result["video_url"],
637
  quote,
638
  output_path,
639
- None # No audio
640
  )
641
 
642
  if creation_result["success"]:
@@ -666,21 +767,23 @@ def fallback_pipeline(niche, style, num_variations=1):
666
  with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
667
  gr.Markdown("""
668
  # 🎬 AIQuoteClipGenerator
669
- ### MCP-Powered with Gemini AI Integration
670
 
671
  **Key Features:**
672
  - 🌟 **Gemini AI:** No more repetitive quotes! Smart variety tracking
 
 
 
673
  - πŸ”— **MCP Server:** smolagents framework integration
674
- - πŸ› οΈ **4 Custom MCP Tools:** Quote (Gemini) + Video search + Voice + Video creation
675
- - πŸ€– **Agent Reasoning:** Autonomous task execution
676
  - ⚑ **Modal Processing:** 4-8x faster video creation
677
  - 🎨 **Multiple Variations:** Get different video styles
678
 
679
  **Prize Eligibility:**
680
- - βœ… Gemini API Integration ($10K Creative category)
681
- - βœ… OpenAI Fallback (API Integration $1K credits)
682
  - βœ… Modal Innovation Award ($2.5K)
683
- - βœ… ElevenLabs Voice Award (~$2K + AirPods)
684
  """)
685
 
686
  # Example Gallery - Instagram-style grid
@@ -757,6 +860,12 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
757
  info="Generate multiple versions to choose from"
758
  )
759
 
 
 
 
 
 
 
760
  generate_btn = gr.Button("πŸ€– Run MCP Agent with Gemini", variant="primary", size="lg")
761
 
762
  with gr.Column():
@@ -784,15 +893,10 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
784
  **Category:** Productivity Tools
785
  **Built with:** Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP
786
 
787
- **Prize Targets:**
788
- - Google Gemini Creative Award ($10K)
789
- - Modal Innovation Award ($2.5K)
790
- - OpenAI API Integration ($1K credits)
791
- - ElevenLabs Voice Award (~$2K + AirPods)
792
  """)
793
 
794
- def process_and_display(niche, style, num_variations):
795
- status, videos = mcp_agent_pipeline(niche, style, num_variations)
796
 
797
  # Return up to 3 videos, None for unused slots
798
  v1 = videos[0] if len(videos) > 0 else None
@@ -806,7 +910,7 @@ with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.the
806
 
807
  generate_btn.click(
808
  process_and_display,
809
- inputs=[niche, style, num_variations],
810
  outputs=[
811
  output, video1, video2, video3,
812
  gallery_video1, gallery_video2, gallery_video3,
 
194
  }
195
 
196
  @tool
197
+ def generate_voice_commentary_tool(quote_text: str, niche: str, output_path: str) -> dict:
198
  """
199
+ Generate insightful voice commentary explaining the deeper meaning of the quote.
200
+ Uses Gemini to create thoughtful explanation, then ElevenLabs to voice it.
201
+ This adds VALUE - not just reading what's already on screen.
202
 
203
  Args:
204
+ quote_text: The quote to explain
205
+ niche: The niche/category for context
206
  output_path: Path where to save the audio file
207
 
208
  Returns:
209
+ Dictionary with success status, output path, and the explanation text
210
  """
211
 
212
  try:
213
+ # Step 1: Generate explanation using Gemini
214
+ import google.generativeai as genai
215
+
216
+ explanation_prompt = f"""Given this {niche} quote:
217
+
218
+ "{quote_text}"
219
+
220
+ Write a brief, insightful voice-over commentary that explains the deeper meaning or practical wisdom.
221
+
222
+ Requirements:
223
+ - 2-3 sentences maximum
224
+ - Around 25-35 words total
225
+ - Spoken naturally (like a wise mentor)
226
+ - Add insight that isn't obvious from reading
227
+ - Make it thought-provoking
228
+ - Don't start with "This quote..." - dive into the insight
229
+
230
+ Example:
231
+ Quote: "Between stimulus and response there is a space."
232
+ Good: "In that pause lies your freedom. That's where you choose who you become, not who your habits make you."
233
+
234
+ Return ONLY the commentary, nothing else."""
235
+
236
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
237
+ model = genai.GenerativeModel('gemini-pro')
238
+
239
+ response = model.generate_content(
240
+ explanation_prompt,
241
+ generation_config={
242
+ "temperature": 0.7,
243
+ "max_output_tokens": 100
244
+ }
245
+ )
246
+
247
+ explanation = response.text.strip().strip('"').strip("'")
248
+ print(f"πŸ“ Commentary: {explanation}")
249
+
250
+ # Step 2: Generate voice using ElevenLabs
251
  audio = elevenlabs_client.text_to_speech.convert(
252
+ text=explanation,
253
+ voice_id="pNInz6obpgDQGcFmaJgB", # Adam - thoughtful and clear
254
  model_id="eleven_multilingual_v2",
255
  voice_settings=VoiceSettings(
256
+ stability=0.6, # Stable for narration
257
+ similarity_boost=0.8,
258
+ style=0.6, # Expressive for commentary
259
  use_speaker_boost=True
260
  )
261
  )
262
 
263
+ # Save audio
264
  with open(output_path, 'wb') as f:
265
  for chunk in audio:
266
  f.write(chunk)
 
268
  return {
269
  "success": True,
270
  "output_path": output_path,
271
+ "explanation": explanation,
272
+ "message": "Voice commentary created!"
273
  }
274
 
275
  except Exception as e:
276
  return {
277
  "success": False,
278
  "output_path": None,
279
+ "explanation": None,
280
+ "message": f"Error creating commentary: {str(e)}"
281
  }
282
 
283
  @tool
 
307
 
308
  print("πŸš€ Processing on Modal (fast!)...")
309
 
310
+ # Prepare audio data if present
311
+ audio_b64 = None
312
+ if audio_path and os.path.exists(audio_path):
313
+ with open(audio_path, 'rb') as f:
314
+ audio_bytes = f.read()
315
+ audio_b64 = base64.b64encode(audio_bytes).decode()
316
+ print(f" 🎀 Including voice commentary audio ({len(audio_bytes)} bytes)")
317
 
318
  # Call Modal endpoint
319
  response = requests.post(
 
321
  json={
322
  "video_url": video_url,
323
  "quote_text": quote_text,
324
+ "audio_b64": audio_b64 # Pass audio as base64
325
  },
326
+ timeout=300
327
  )
328
 
329
  if response.status_code == 200:
 
505
 
506
  # Create agent with custom tools
507
  agent = CodeAgent(
508
+ tools=[generate_quote_tool, search_pexels_video_tool, generate_voice_commentary_tool, create_quote_video_tool],
509
  model=model,
510
+ additional_authorized_imports=["requests", "openai", "random", "tempfile", "os", "google.generativeai"],
511
  max_steps=15
512
  )
513
 
 
522
  # Initialize agent
523
  agent, agent_error = initialize_agent()
524
 
525
+ def mcp_agent_pipeline(niche, style, num_variations=1, add_voice=False):
526
  """
527
  MCP-POWERED AUTONOMOUS AGENT PIPELINE
528
  Uses smolagents with proper MCP server integration
529
  Generates multiple video variations with Gemini-powered quotes
530
+ Optionally adds ElevenLabs voice narration
531
  """
532
 
533
+ # Smart auto-enable voice for contemplative content
534
+ auto_voice_niches = ["Stoicism", "Mindfulness", "Leadership"]
535
+ if niche in auto_voice_niches and not add_voice:
536
+ add_voice = True
537
+ voice_reason = "🎀 Voice auto-enabled: Perfect for contemplative content!"
538
+ else:
539
+ voice_reason = None
540
+
541
  status_log = []
542
  status_log.append("πŸ€– **MCP AGENT STARTING**\n")
543
 
544
+ if voice_reason:
545
+ status_log.append(voice_reason)
546
+ status_log.append("")
547
+
548
  if agent_error:
549
  status_log.append(f"❌ Agent initialization failed: {agent_error}")
550
  status_log.append("\nπŸ”„ Falling back to direct tool execution...\n")
 
582
 
583
  status_log.append("")
584
 
585
+ # STEP 4: Generate voice commentary if enabled
586
+ audio_path = None
587
+ explanation_text = None
588
+ if add_voice:
589
+ status_log.append("🎀 **GEMINI + ELEVENLABS: generate_voice_commentary_tool**")
590
+ status_log.append(" ⏳ Creating insightful commentary...")
591
+
592
+ import tempfile
593
+ audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
594
+ audio_path = audio_file.name
595
+
596
+ voice_result = generate_voice_commentary_tool(quote, niche, audio_path)
597
+
598
+ if voice_result["success"]:
599
+ explanation_text = voice_result.get("explanation")
600
+ status_log.append(" βœ… Commentary created!")
601
+ status_log.append(f" πŸ’­ Insight: \"{explanation_text}\"")
602
+ status_log.append(f" πŸ”Š Using Gemini (explanation) + ElevenLabs (voice)")
603
+ else:
604
+ status_log.append(f" ⚠️ Commentary failed: {voice_result.get('message', 'Unknown error')}")
605
+ audio_path = None
606
+
607
+ status_log.append("")
608
+
609
+ # STEP 5: Create multiple video variations
610
  status_log.append(f"🎬 **MCP TOOL: create_quote_video_tool (x{len(video_results)})**")
611
  status_log.append(f" ⏳ Creating {len(video_results)} video variations...")
612
+ if add_voice and audio_path:
613
+ status_log.append(" 🎀 Including voice narration...")
614
 
615
  output_dir = "/tmp/quote_videos"
616
  gallery_dir = "/data/gallery_videos" # HF persistent storage
 
629
  video_result["video_url"],
630
  quote,
631
  output_path,
632
+ audio_path # Use voice if enabled
633
  )
634
 
635
  if creation_result["success"]:
 
654
 
655
  status_log.append("")
656
 
657
+ # STEP 6: Integration status
658
  status_log.append("πŸ”— **AI INTEGRATIONS:**")
659
  status_log.append(" βœ… Gemini API - Quote generation with variety tracking")
660
  status_log.append(" βœ… Pexels API - Video search")
661
+ if add_voice:
662
+ status_log.append(" βœ… ElevenLabs - Premium AI voice synthesis")
663
  status_log.append(" βœ… Modal Compute - Fast video processing")
664
  if mcp_enabled:
665
  status_log.append(" βœ… MCP Server - abidlabs-mcp-tools.hf.space")
 
677
  status_log.append(f"\n❌ Pipeline error: {str(e)}")
678
  return "\n".join(status_log), []
679
 
680
+ def fallback_pipeline(niche, style, num_variations=1, add_voice=False):
681
  """Fallback pipeline if MCP agent fails"""
682
  status_log = []
683
  status_log.append("πŸ”„ **FALLBACK MODE (Direct Tool Execution)**\n")
 
691
 
692
  status_log.append(f" βœ… Quote generated\n")
693
 
694
+ # Generate voice commentary if enabled
695
+ audio_path = None
696
+ if add_voice:
697
+ status_log.append("🎀 Generating commentary...")
698
+ import tempfile
699
+ audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
700
+ audio_path = audio_file.name
701
+ voice_result = generate_voice_commentary_tool(quote, niche, audio_path)
702
+ if voice_result["success"]:
703
+ status_log.append(f" βœ… Commentary: {voice_result.get('explanation')}\n")
704
+ else:
705
+ audio_path = None
706
+ status_log.append(" ⚠️ Commentary failed\n")
707
+
708
  # Search videos
709
  status_log.append(f"πŸ” Searching for {num_variations} videos...")
710
  video_results = []
 
737
  video_result["video_url"],
738
  quote,
739
  output_path,
740
+ audio_path # Use voice if enabled
741
  )
742
 
743
  if creation_result["success"]:
 
767
  with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
768
  gr.Markdown("""
769
  # 🎬 AIQuoteClipGenerator
770
+ ### MCP-Powered with Gemini AI + ElevenLabs Voice Commentary
771
 
772
  **Key Features:**
773
  - 🌟 **Gemini AI:** No more repetitive quotes! Smart variety tracking
774
+ - 🎀 **Voice Commentary:** AI explains the deeper meaning (not just reading the quote!)
775
+ - 🧠 **Dual Gemini Use:** Quote generation + Explanation generation
776
+ - πŸ”Š **ElevenLabs Voice:** Premium AI voice synthesis
777
  - πŸ”— **MCP Server:** smolagents framework integration
778
+ - πŸ› οΈ **4 Custom MCP Tools:** Quote + Video search + Commentary + Video creation
 
779
  - ⚑ **Modal Processing:** 4-8x faster video creation
780
  - 🎨 **Multiple Variations:** Get different video styles
781
 
782
  **Prize Eligibility:**
783
+ - βœ… Gemini API Integration ($10K Creative category) - Used TWICE!
784
+ - βœ… ElevenLabs Voice Award (~$2K + AirPods 4 Pro) - Adds real value!
785
  - βœ… Modal Innovation Award ($2.5K)
786
+ - βœ… OpenAI Fallback ($1K credits)
787
  """)
788
 
789
  # Example Gallery - Instagram-style grid
 
860
  info="Generate multiple versions to choose from"
861
  )
862
 
863
+ add_voice = gr.Checkbox(
864
+ label="🎀 Add Voice Commentary (Gemini + ElevenLabs)",
865
+ value=False,
866
+ info="AI explains the quote's deeper meaning with voice - adds real value! Auto-enabled for Stoicism/Mindfulness."
867
+ )
868
+
869
  generate_btn = gr.Button("πŸ€– Run MCP Agent with Gemini", variant="primary", size="lg")
870
 
871
  with gr.Column():
 
893
  **Category:** Productivity Tools
894
  **Built with:** Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP
895
 
 
 
 
 
 
896
  """)
897
 
898
+ def process_and_display(niche, style, num_variations, add_voice):
899
+ status, videos = mcp_agent_pipeline(niche, style, num_variations, add_voice)
900
 
901
  # Return up to 3 videos, None for unused slots
902
  v1 = videos[0] if len(videos) > 0 else None
 
910
 
911
  generate_btn.click(
912
  process_and_display,
913
+ inputs=[niche, style, num_variations, add_voice],
914
  outputs=[
915
  output, video1, video2, video3,
916
  gallery_video1, gallery_video2, gallery_video3,
modal_video_processing.py CHANGED
@@ -26,7 +26,7 @@ image = modal.Image.debian_slim(python_version="3.11").pip_install(
26
  keep_warm=1, # Keep 1 container warm to eliminate cold starts!
27
  container_idle_timeout=300, # Keep alive for 5 minutes
28
  )
29
- def process_quote_video(video_url: str, quote_text: str, audio_url: str = None) -> bytes:
30
  """
31
  Process quote video on Modal's fast infrastructure.
32
  Downloads video, adds text overlay, optionally adds audio, returns video bytes.
@@ -34,7 +34,7 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
34
  Args:
35
  video_url: URL of background video
36
  quote_text: Quote to overlay
37
- audio_url: Optional URL of audio file
38
 
39
  Returns:
40
  bytes: Processed video file as bytes
@@ -45,6 +45,7 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
45
  from PIL import Image, ImageDraw, ImageFont
46
  import numpy as np
47
  import time
 
48
 
49
  start_time = time.time()
50
  print(f"🎬 Starting video processing on Modal...")
@@ -146,11 +147,37 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
146
  final_video = CompositeVideoClip([video, text_clip])
147
  print(f"βœ… Composited in {time.time() - composite_start:.1f}s")
148
 
149
- # Skip audio for speed
150
- if audio_url:
151
- print("⚠️ Skipping audio for speed optimization")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- # Export with FASTEST possible settings
154
  print("πŸ“¦ Exporting video...")
155
  export_start = time.time()
156
  output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
@@ -159,13 +186,13 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
159
  output_file.name,
160
  codec='libx264',
161
  audio_codec='aac',
162
- fps=10, # Even lower - 10fps is fine for quote videos
163
  preset='ultrafast',
164
  threads=2,
165
  verbose=False,
166
  logger=None,
167
- bitrate="400k", # Lower bitrate
168
- ffmpeg_params=['-crf', '30', '-g', '30'] # Even lower quality, larger GOP
169
  )
170
 
171
  print(f"βœ… Video exported in {time.time() - export_start:.1f}s")
@@ -192,18 +219,18 @@ def process_quote_video(video_url: str, quote_text: str, audio_url: str = None)
192
  @modal.web_endpoint(method="POST")
193
  def process_video_endpoint(data: dict):
194
  """
195
- Web endpoint to process videos.
196
- Accepts JSON with video_url, quote_text, and optional audio_url.
197
  """
198
  video_url = data.get("video_url")
199
  quote_text = data.get("quote_text")
200
- audio_url = data.get("audio_url")
201
 
202
  if not video_url or not quote_text:
203
  return {"error": "Missing video_url or quote_text"}, 400
204
 
205
  try:
206
- video_bytes = process_quote_video.remote(video_url, quote_text, audio_url)
207
 
208
  # Return video bytes as base64
209
  import base64
@@ -225,6 +252,6 @@ if __name__ == "__main__":
225
  result = process_quote_video.remote(
226
  video_url="https://videos.pexels.com/video-files/3843433/3843433-uhd_2732_1440_25fps.mp4",
227
  quote_text="Test quote for local testing",
228
- audio_url=None
229
  )
230
  print(f"Got video: {len(result)} bytes")
 
26
  keep_warm=1, # Keep 1 container warm to eliminate cold starts!
27
  container_idle_timeout=300, # Keep alive for 5 minutes
28
  )
29
+ def process_quote_video(video_url: str, quote_text: str, audio_b64: str = None) -> bytes:
30
  """
31
  Process quote video on Modal's fast infrastructure.
32
  Downloads video, adds text overlay, optionally adds audio, returns video bytes.
 
34
  Args:
35
  video_url: URL of background video
36
  quote_text: Quote to overlay
37
+ audio_b64: Optional base64-encoded audio file
38
 
39
  Returns:
40
  bytes: Processed video file as bytes
 
45
  from PIL import Image, ImageDraw, ImageFont
46
  import numpy as np
47
  import time
48
+ import base64
49
 
50
  start_time = time.time()
51
  print(f"🎬 Starting video processing on Modal...")
 
147
  final_video = CompositeVideoClip([video, text_clip])
148
  print(f"βœ… Composited in {time.time() - composite_start:.1f}s")
149
 
150
+ # Add audio if provided
151
+ if audio_b64:
152
+ print("🎀 Adding voice commentary audio...")
153
+ audio_start = time.time()
154
+ try:
155
+ # Decode base64 audio
156
+ audio_bytes = base64.b64decode(audio_b64)
157
+
158
+ # Save to temp file
159
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
160
+ with open(temp_audio.name, 'wb') as f:
161
+ f.write(audio_bytes)
162
+
163
+ # Load audio clip
164
+ audio_clip = AudioFileClip(temp_audio.name)
165
+
166
+ # Use the shorter duration between video and audio
167
+ audio_duration = min(audio_clip.duration, final_video.duration)
168
+ audio_clip = audio_clip.subclip(0, audio_duration)
169
+
170
+ # Set audio on video
171
+ final_video = final_video.set_audio(audio_clip)
172
+
173
+ print(f"βœ… Audio added in {time.time() - audio_start:.1f}s")
174
+
175
+ # Cleanup audio temp file
176
+ os.unlink(temp_audio.name)
177
+ except Exception as e:
178
+ print(f"⚠️ Audio failed: {e}, continuing without audio")
179
 
180
+ # Export with optimized settings
181
  print("πŸ“¦ Exporting video...")
182
  export_start = time.time()
183
  output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
 
186
  output_file.name,
187
  codec='libx264',
188
  audio_codec='aac',
189
+ fps=10, # Lower fps for speed
190
  preset='ultrafast',
191
  threads=2,
192
  verbose=False,
193
  logger=None,
194
+ bitrate="400k",
195
+ ffmpeg_params=['-crf', '30', '-g', '30']
196
  )
197
 
198
  print(f"βœ… Video exported in {time.time() - export_start:.1f}s")
 
219
  @modal.web_endpoint(method="POST")
220
  def process_video_endpoint(data: dict):
221
  """
222
+ Web endpoint to process videos with optional audio.
223
+ Accepts JSON with video_url, quote_text, and optional audio_b64.
224
  """
225
  video_url = data.get("video_url")
226
  quote_text = data.get("quote_text")
227
+ audio_b64 = data.get("audio_b64") # Changed from audio_url
228
 
229
  if not video_url or not quote_text:
230
  return {"error": "Missing video_url or quote_text"}, 400
231
 
232
  try:
233
+ video_bytes = process_quote_video.remote(video_url, quote_text, audio_b64)
234
 
235
  # Return video bytes as base64
236
  import base64
 
252
  result = process_quote_video.remote(
253
  video_url="https://videos.pexels.com/video-files/3843433/3843433-uhd_2732_1440_25fps.mp4",
254
  quote_text="Test quote for local testing",
255
+ audio_b64=None
256
  )
257
  print(f"Got video: {len(result)} bytes")
quote_generator_gemini.py CHANGED
@@ -283,4 +283,5 @@ if __name__ == "__main__":
283
 
284
  except Exception as e:
285
  print(f"Error: {e}")
286
- print("\nMake sure GEMINI_API_KEY is set in environment variables")
 
 
283
 
284
  except Exception as e:
285
  print(f"Error: {e}")
286
+ print("\nMake sure GEMINI_API_KEY is set in environment variables")
287
+