ladybug11's picture
update
f081221
raw
history blame
35.9 kB
import gradio as gr
import os
import requests
import random
import tempfile
from openai import OpenAI
from smolagents import CodeAgent, MCPClient, tool
from huggingface_hub import InferenceClient
from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip, AudioFileClip
from PIL import Image, ImageDraw, ImageFont
import textwrap
import numpy as np
from elevenlabs import ElevenLabs, VoiceSettings
# Import our new Gemini quote generator
from quote_generator_gemini import HybridQuoteGenerator
# Initialize clients
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
# Initialize Hybrid Quote Generator (Gemini + OpenAI fallback)
hybrid_quote_generator = HybridQuoteGenerator(
gemini_key=os.getenv("GEMINI_API_KEY"),
openai_client=openai_client
)
# Initialize MCP Client (connecting to existing MCP server)
try:
mcp_client = MCPClient("https://abidlabs-mcp-tools.hf.space")
mcp_enabled = True
except Exception as e:
print(f"MCP initialization warning: {e}")
mcp_enabled = False
# Define custom tools for the MCP agent
@tool
def generate_quote_tool(niche: str, style: str) -> str:
"""
Generate a powerful inspirational quote using Gemini AI with variety tracking.
Falls back to OpenAI if Gemini is unavailable.
Args:
niche: The category of quote (Motivation, Business, Fitness, etc.)
style: The visual style (Cinematic, Nature, Urban, Minimal, Abstract)
Returns:
A powerful, unique quote string
"""
try:
result = hybrid_quote_generator.generate_quote(niche, style, prefer_gemini=True)
if result["success"]:
quote = result["quote"]
source = result["source"]
# Log which generator was used
if source == "gemini":
stats = result.get("stats", {})
print(f"✨ Generated with Gemini (Total: {stats.get('total_quotes_generated', 0)})")
else:
print(f"✨ Generated with OpenAI (fallback)")
return quote
else:
error_msg = result.get("error", "Unknown error")
return f"Error generating quote: {error_msg}"
except Exception as e:
return f"Error generating quote: {str(e)}"
@tool
def search_pexels_video_tool(style: str, niche: str) -> dict:
"""
Search and fetch a matching video from Pexels based on style and niche.
Args:
style: Visual style (Cinematic, Nature, Urban, Minimal, Abstract)
niche: Content niche (Motivation, Business, Fitness, etc.)
Returns:
Dictionary with video_url, search_query, and pexels_url
"""
# Intelligent search strategy mapping
search_strategies = {
"Motivation": {
"Cinematic": ["person climbing mountain", "running sunrise", "achievement success"],
"Nature": ["sunrise mountain peak", "ocean waves powerful", "forest light"],
"Urban": ["city skyline dawn", "person running city", "urban success"],
"Minimal": ["minimal motivation", "single person silhouette", "clean inspiring"],
"Abstract": ["light rays hope", "particles rising", "abstract energy"]
},
"Business/Entrepreneurship": {
"Cinematic": ["business cityscape", "office modern", "handshake deal"],
"Nature": ["growth plant", "river flowing", "sunrise new beginning"],
"Urban": ["city business", "office skyline", "modern workspace"],
"Minimal": ["desk minimal", "workspace clean", "simple office"],
"Abstract": ["network connections", "growth chart", "abstract progress"]
},
"Fitness": {
"Cinematic": ["athlete training", "gym workout", "running outdoor"],
"Nature": ["outdoor workout", "mountain hiking", "beach running"],
"Urban": ["city running", "urban fitness", "street workout"],
"Minimal": ["gym minimal", "simple workout", "clean fitness"],
"Abstract": ["energy motion", "strength power", "dynamic movement"]
},
"Mindfulness": {
"Cinematic": ["meditation sunset", "peaceful landscape", "calm water"],
"Nature": ["forest peaceful", "calm lake", "zen garden"],
"Urban": ["city peaceful morning", "quiet street", "urban calm"],
"Minimal": ["minimal zen", "simple meditation", "clean peaceful"],
"Abstract": ["calm waves", "gentle motion", "soft particles"]
},
"Stoicism": {
"Cinematic": ["ancient architecture", "statue philosopher", "timeless landscape"],
"Nature": ["mountain strong", "oak tree", "stone nature"],
"Urban": ["classical building", "statue city", "ancient modern"],
"Minimal": ["stone minimal", "simple strong", "pillar minimal"],
"Abstract": ["marble texture", "stone abstract", "timeless pattern"]
},
"Leadership": {
"Cinematic": ["team meeting", "leader speaking", "group collaboration"],
"Nature": ["eagle flying", "lion pride", "mountain top"],
"Urban": ["office leadership", "boardroom", "city leadership"],
"Minimal": ["chess pieces", "simple leadership", "clean professional"],
"Abstract": ["network leader", "connection points", "guiding light"]
},
"Love & Relationships": {
"Cinematic": ["couple sunset", "romance beautiful", "love cinematic"],
"Nature": ["couple nature", "romantic sunset", "peaceful together"],
"Urban": ["couple city", "romance urban", "love city lights"],
"Minimal": ["hands holding", "simple love", "minimal romance"],
"Abstract": ["hearts flowing", "love particles", "connection abstract"]
}
}
# Get queries for this niche + style combination
queries = search_strategies.get(niche, {}).get(style, ["aesthetic nature"])
try:
headers = {"Authorization": PEXELS_API_KEY}
# Pick a random query for variety
query = random.choice(queries)
url = f"https://api.pexels.com/videos/search?query={query}&per_page=15&orientation=portrait"
response = requests.get(url, headers=headers)
data = response.json()
if "videos" in data and len(data["videos"]) > 0:
# Pick a random video from results
video = random.choice(data["videos"][:10])
video_files = video.get("video_files", [])
# Find portrait/vertical video
portrait_videos = [vf for vf in video_files if vf.get("width", 0) < vf.get("height", 0)]
if portrait_videos:
selected = random.choice(portrait_videos)
return {
"video_url": selected.get("link"),
"search_query": query,
"pexels_url": video.get("url"),
"success": True
}
# Fallback to any HD video
if video_files:
return {
"video_url": video_files[0].get("link"),
"search_query": query,
"pexels_url": video.get("url"),
"success": True
}
return {
"video_url": None,
"search_query": query,
"pexels_url": None,
"success": False,
"error": "No suitable videos found"
}
except Exception as e:
return {
"video_url": None,
"search_query": "",
"pexels_url": None,
"success": False,
"error": str(e)
}
@tool
def generate_voice_commentary_tool(quote_text: str, niche: str, output_path: str) -> dict:
"""
Generate insightful voice commentary explaining the deeper meaning of the quote.
Uses Gemini to create thoughtful explanation, then ElevenLabs to voice it.
This adds VALUE - not just reading what's already on screen.
Args:
quote_text: The quote to explain
niche: The niche/category for context
output_path: Path where to save the audio file
Returns:
Dictionary with success status, output path, and the explanation text
"""
try:
# Step 1: Generate explanation using Gemini
import google.generativeai as genai
explanation_prompt = f"""Given this {niche} quote:
"{quote_text}"
Write a brief, insightful voice-over commentary that explains the deeper meaning or practical wisdom.
Requirements:
- 2-3 sentences maximum
- Around 25-35 words total
- Spoken naturally (like a wise mentor)
- Add insight that isn't obvious from reading
- Make it thought-provoking
- Don't start with "This quote..." - dive into the insight
Example:
Quote: "Between stimulus and response there is a space."
Good: "In that pause lies your freedom. That's where you choose who you become, not who your habits make you."
Return ONLY the commentary, nothing else."""
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel('gemini-1.5-flash') # Updated model name
response = model.generate_content(
explanation_prompt,
generation_config={
"temperature": 0.7,
"max_output_tokens": 100
}
)
explanation = response.text.strip().strip('"').strip("'")
print(f"πŸ“ Commentary: {explanation}")
# Step 2: Generate voice using ElevenLabs
audio = elevenlabs_client.text_to_speech.convert(
text=explanation,
voice_id="pNInz6obpgDQGcFmaJgB", # Adam - thoughtful and clear
model_id="eleven_multilingual_v2",
voice_settings=VoiceSettings(
stability=0.6, # Stable for narration
similarity_boost=0.8,
style=0.6, # Expressive for commentary
use_speaker_boost=True
)
)
# Save audio
with open(output_path, 'wb') as f:
for chunk in audio:
f.write(chunk)
return {
"success": True,
"output_path": output_path,
"explanation": explanation,
"message": "Voice commentary created!"
}
except Exception as e:
return {
"success": False,
"output_path": None,
"explanation": None,
"message": f"Error creating commentary: {str(e)}"
}
@tool
def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, audio_path: str = None) -> dict:
"""
Create a final quote video by overlaying text on the background video.
Uses Modal for fast processing (4-8x faster) with local fallback.
Optionally adds voice narration audio.
Args:
video_url: URL of the background video from Pexels
quote_text: The quote text to overlay
output_path: Path where to save the final video
audio_path: Optional path to audio file for voice narration
Returns:
Dictionary with success status and output path
"""
# Check if Modal is configured
modal_endpoint = os.getenv("MODAL_ENDPOINT_URL")
if modal_endpoint:
try:
import requests
import base64
print("πŸš€ Processing on Modal (fast!)...")
# Prepare audio data if present
audio_b64 = None
if audio_path and os.path.exists(audio_path):
with open(audio_path, 'rb') as f:
audio_bytes = f.read()
audio_b64 = base64.b64encode(audio_bytes).decode()
print(f" 🎀 Including voice commentary audio ({len(audio_bytes)} bytes)")
# Call Modal endpoint with longer timeout
response = requests.post(
modal_endpoint,
json={
"video_url": video_url,
"quote_text": quote_text,
"audio_b64": audio_b64 # Pass audio as base64
},
timeout=120 # 2 minute timeout
)
if response.status_code == 200:
result = response.json()
if result.get("success"):
# Decode video bytes
video_b64 = result["video"]
video_bytes = base64.b64decode(video_b64)
# Save to output path
with open(output_path, 'wb') as f:
f.write(video_bytes)
print(f"βœ… Modal processing complete! {result['size_mb']:.2f}MB")
return {
"success": True,
"output_path": output_path,
"message": f"Video created via Modal in ~20s ({result['size_mb']:.2f}MB)"
}
else:
print(f"⚠️ Modal returned error: {result.get('error', 'Unknown')}")
else:
print(f"⚠️ Modal HTTP error: {response.status_code}")
# If Modal failed, fall through to local processing
print("⚠️ Modal failed, falling back to local processing...")
except requests.Timeout:
print(f"⚠️ Modal timeout after 120s, falling back to local...")
except Exception as e:
print(f"⚠️ Modal error: {e}, falling back to local processing...")
else:
print("ℹ️ MODAL_ENDPOINT_URL not configured, using local processing")
# LOCAL PROCESSING - Skip if taking too long
print("πŸ”§ Processing locally (may be slow)...")
print("⚠️ WARNING: Local processing can hang on HF Spaces!")
print("⚠️ Consider setting up Modal for 4-8x faster processing")
# Return error instead of hanging
return {
"success": False,
"output_path": None,
"message": "Local processing disabled - please configure Modal for video generation. Deploy Modal with: modal deploy modal_video_processing.py"
}
# LOCAL PROCESSING (Fallback or if Modal not configured)
print("πŸ”§ Processing locally...")
try:
import time
processing_start = time.time()
# Step 1: Download the video
response = requests.get(video_url, stream=True, timeout=30)
response.raise_for_status()
# Create temporary file for downloaded video
temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
with open(temp_video.name, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
# Step 2: Load video with MoviePy
video = VideoFileClip(temp_video.name)
# Get video dimensions
w, h = video.size
# Step 3: Create text overlay using PIL
def make_text_frame(t):
"""Generate a text frame using PIL"""
# Create transparent image
img = Image.new('RGBA', (w, h), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
# Calculate font size (2.5% of video height - smaller for better aesthetic)
font_size = int(h * 0.025)
# Try to load a font, fall back to default if needed
try:
# Try common fonts available on Linux
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
except:
try:
font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", font_size)
except:
# Fall back to default font
font = ImageFont.load_default()
# Wrap text to fit width (60% of video width for better proportions)
max_width = int(w * 0.6)
# Manual text wrapping with better line length
words = quote_text.split()
lines = []
current_line = []
for word in words:
test_line = ' '.join(current_line + [word])
# Get text bbox to check width
bbox = draw.textbbox((0, 0), test_line, font=font)
text_width = bbox[2] - bbox[0]
if text_width <= max_width:
current_line.append(word)
else:
if current_line:
lines.append(' '.join(current_line))
current_line = [word]
else:
lines.append(word)
if current_line:
lines.append(' '.join(current_line))
# Calculate total text height with better line spacing
line_spacing = int(font_size * 0.4)
text_block_height = len(lines) * (font_size + line_spacing)
# Start y position (centered vertically)
y = (h - text_block_height) // 2
# Draw each line centered
for line in lines:
# Get text size
bbox = draw.textbbox((0, 0), line, font=font)
text_width = bbox[2] - bbox[0]
# Center horizontally
x = (w - text_width) // 2
# Draw black outline (stroke) - thinner for smaller text
outline_width = max(2, int(font_size * 0.08))
for adj_x in range(-outline_width, outline_width + 1):
for adj_y in range(-outline_width, outline_width + 1):
draw.text((x + adj_x, y + adj_y), line, font=font, fill='black')
# Draw white text on top
draw.text((x, y), line, font=font, fill='white')
y += font_size + line_spacing
return np.array(img)
# Step 4: Create text clip from function
text_clip = ImageClip(make_text_frame(0), duration=video.duration)
# Step 5: Composite video with text
final_video = CompositeVideoClip([video, text_clip])
# Step 5.5: Add voice narration if provided
if audio_path and os.path.exists(audio_path):
try:
print("🎀 Adding audio track...")
audio_clip = AudioFileClip(audio_path)
# Use the shorter duration between video and audio
audio_duration = min(audio_clip.duration, final_video.duration)
audio_clip = audio_clip.subclip(0, audio_duration)
final_video = final_video.set_audio(audio_clip)
print("βœ… Audio added successfully")
except Exception as audio_error:
print(f"⚠️ Could not add audio: {audio_error}")
print("⚠️ Continuing without audio...")
# Continue without audio rather than failing
# Step 6: Export final video
print("πŸ“¦ Exporting video (this may take 30-60s)...")
final_video.write_videofile(
output_path,
codec='libx264',
audio_codec='aac',
temp_audiofile='temp-audio.m4a',
remove_temp=True,
fps=24,
preset='ultrafast', # Faster encoding
threads=4,
logger=None, # Suppress verbose output
verbose=False
)
print(f"βœ… Video export complete! ({time.time() - processing_start:.1f}s total)")
# Cleanup
video.close()
final_video.close()
os.unlink(temp_video.name)
return {
"success": True,
"output_path": output_path,
"message": "Video created successfully!"
}
except Exception as e:
return {
"success": False,
"output_path": None,
"message": f"Error creating video: {str(e)}"
}
# Initialize the MCP-powered agent
def initialize_agent():
"""Initialize the CodeAgent with MCP capabilities"""
try:
# Use Hugging Face Inference API for the agent's LLM
model = InferenceClient(token=os.getenv("HF_TOKEN"))
# Create agent with custom tools
agent = CodeAgent(
tools=[generate_quote_tool, search_pexels_video_tool, generate_voice_commentary_tool, create_quote_video_tool],
model=model,
additional_authorized_imports=["requests", "openai", "random", "tempfile", "os", "google.generativeai"],
max_steps=15
)
# Add MCP client if available
if mcp_enabled:
agent.mcp_clients = [mcp_client]
return agent, None
except Exception as e:
return None, f"Agent initialization error: {str(e)}"
# Initialize agent
agent, agent_error = initialize_agent()
def mcp_agent_pipeline(niche, style, num_variations=1):
"""
MCP-POWERED AUTONOMOUS AGENT PIPELINE
Uses smolagents with proper MCP server integration
Generates multiple video variations with Gemini-powered quotes
"""
status_log = []
status_log.append("πŸ€– **MCP AGENT STARTING**\n")
if agent_error:
status_log.append(f"❌ Agent initialization failed: {agent_error}")
status_log.append("\nπŸ”„ Falling back to direct tool execution...\n")
return fallback_pipeline(niche, style, num_variations)
try:
# STEP 1: Agent receives task
status_log.append("πŸ“‹ **TASK RECEIVED:**")
status_log.append(f" β†’ Generate {niche} quote with {style} aesthetic")
status_log.append(f" β†’ Create {num_variations} video variations")
status_log.append("")
# STEP 2: Agent executes quote generation with Gemini
status_log.append("🧠 **GEMINI AI: generate_quote_tool**")
quote = generate_quote_tool(niche, style)
if "Error" in quote:
return "\n".join(status_log) + f"\n❌ Failed: {quote}", []
status_log.append(f" βœ… Generated: \"{quote[:100]}...\"" if len(quote) > 100 else f" βœ… Generated: \"{quote}\"\n")
# STEP 3: Search for multiple videos
status_log.append(f"πŸ” **MCP TOOL: search_pexels_video_tool (x{num_variations})**")
status_log.append(f" ⏳ Finding {num_variations} different videos...")
video_results = []
for i in range(num_variations):
video_result = search_pexels_video_tool(style, niche)
if video_result["success"]:
video_results.append(video_result)
status_log.append(f" βœ… Video {i+1}: {video_result['search_query']}")
if not video_results:
return "\n".join(status_log) + "\n❌ No videos found", []
status_log.append("")
# STEP 4: Create multiple video variations
status_log.append(f"🎬 **MCP TOOL: create_quote_video_tool (x{len(video_results)})**")
status_log.append(f" ⏳ Creating {len(video_results)} video variations in parallel...")
output_dir = "/tmp/quote_videos"
gallery_dir = "/data/gallery_videos"
os.makedirs(output_dir, exist_ok=True)
os.makedirs(gallery_dir, exist_ok=True)
import time
timestamp = int(time.time())
# Use threading for parallel Modal calls
import threading
import queue
results_queue = queue.Queue()
def create_single_video(index, video_result):
output_filename = f"quote_video_v{index+1}_{timestamp}.mp4"
output_path = os.path.join(output_dir, output_filename)
creation_result = create_quote_video_tool(
video_result["video_url"],
quote,
output_path,
None
)
results_queue.put((index, creation_result, output_path))
# Start all threads
threads = []
for i, video_result in enumerate(video_results):
thread = threading.Thread(target=create_single_video, args=(i, video_result))
thread.start()
threads.append(thread)
# Wait for all to complete
for thread in threads:
thread.join()
# Collect results
created_videos = []
all_results = []
while not results_queue.empty():
all_results.append(results_queue.get())
# Sort by index
all_results.sort(key=lambda x: x[0])
# Process results
for index, creation_result, output_path in all_results:
if creation_result["success"]:
created_videos.append(output_path)
status_log.append(f" βœ… Variation {index+1} created!")
# Copy to gallery
import shutil
gallery_filename = f"gallery_{timestamp}_v{index+1}.mp4"
gallery_path = os.path.join(gallery_dir, gallery_filename)
try:
shutil.copy2(output_path, gallery_path)
except:
pass
else:
error_msg = creation_result.get("message", "Unknown error")
status_log.append(f" ⚠️ Variation {i+1} failed: {error_msg}")
if not created_videos:
status_log.append("\n❌ All video creations failed")
return "\n".join(status_log), []
status_log.append("")
# STEP 5: Integration status
status_log.append("πŸ”— **AI INTEGRATIONS:**")
status_log.append(" βœ… Gemini API - Quote generation with variety tracking")
status_log.append(" βœ… Pexels API - Video search")
status_log.append(" βœ… Modal Compute - Fast video processing")
if mcp_enabled:
status_log.append(" βœ… MCP Server - abidlabs-mcp-tools.hf.space")
status_log.append("")
# STEP 6: Success!
status_log.append("✨ **PIPELINE COMPLETE!**")
status_log.append(f" 🎬 Created {len(created_videos)} unique video variations")
status_log.append(f" πŸ“₯ Choose your favorite and download!")
final_status = "\n".join(status_log)
return final_status, created_videos
except Exception as e:
status_log.append(f"\n❌ Pipeline error: {str(e)}")
return "\n".join(status_log), []
def fallback_pipeline(niche, style, num_variations=1):
"""Fallback pipeline if MCP agent fails"""
status_log = []
status_log.append("πŸ”„ **FALLBACK MODE (Direct Tool Execution)**\n")
# Generate quote
status_log.append("🧠 Generating quote with Gemini...")
quote = generate_quote_tool(niche, style)
if "Error" in quote:
return "\n".join(status_log) + f"\n❌ {quote}", []
status_log.append(f" βœ… Quote generated\n")
# Search videos
status_log.append(f"πŸ” Searching for {num_variations} videos...")
video_results = []
for i in range(num_variations):
video_result = search_pexels_video_tool(style, niche)
if video_result["success"]:
video_results.append(video_result)
if not video_results:
return "\n".join(status_log) + "\n❌ No videos found", []
status_log.append(f" βœ… Found {len(video_results)} videos\n")
# Create videos
status_log.append("🎬 Creating videos...")
output_dir = "/tmp/quote_videos"
gallery_dir = "/data/gallery_videos" # HF persistent storage
os.makedirs(output_dir, exist_ok=True)
os.makedirs(gallery_dir, exist_ok=True)
import time
timestamp = int(time.time())
created_videos = []
for i, video_result in enumerate(video_results):
output_filename = f"quote_video_v{i+1}_{timestamp}.mp4"
output_path = os.path.join(output_dir, output_filename)
creation_result = create_quote_video_tool(
video_result["video_url"],
quote,
output_path,
None # No audio
)
if creation_result["success"]:
created_videos.append(creation_result["output_path"])
# Copy to gallery
import shutil
gallery_filename = f"gallery_{timestamp}_v{i+1}.mp4"
gallery_path = os.path.join(gallery_dir, gallery_filename)
try:
shutil.copy2(creation_result["output_path"], gallery_path)
except:
pass
else:
error_msg = creation_result.get("message", "Unknown error")
status_log.append(f" ❌ Video {i+1} error: {error_msg}")
if not created_videos:
return "\n".join(status_log) + "\n❌ Video creation failed", []
status_log.append(f" βœ… Created {len(created_videos)} videos!\n")
status_log.append("🎬 **COMPLETE!**")
return "\n".join(status_log), created_videos
# Gradio Interface
with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🎬 AIQuoteClipGenerator
### MCP-Powered with Gemini AI
**Key Features:**
- 🌟 **Gemini AI:** No more repetitive quotes! Smart variety tracking
- πŸ”— **MCP Server:** smolagents framework integration
- πŸ› οΈ **4 Custom MCP Tools:** Quote + Video search + Video creation
- πŸ€– **Agent Reasoning:** Autonomous task execution
- ⚑ **Modal Processing:** 4-8x faster video creation
- 🎨 **Multiple Variations:** Get different video styles
**Prize Eligibility:**
- βœ… Gemini API Integration ($10K Creative category)
- βœ… Modal Innovation Award ($2.5K)
- βœ… OpenAI Fallback ($1K credits)
""")
# Example Gallery - Instagram-style grid
with gr.Accordion("πŸ“Έ Example Gallery - Recent Videos", open=True):
gr.Markdown("See what others have created! Updates automatically after generation.")
# First row - 3 videos
with gr.Row():
gallery_video1 = gr.Video(label="", height=300, show_label=False, interactive=False)
gallery_video2 = gr.Video(label="", height=300, show_label=False, interactive=False)
gallery_video3 = gr.Video(label="", height=300, show_label=False, interactive=False)
# Second row - 3 videos
with gr.Row():
gallery_video4 = gr.Video(label="", height=300, show_label=False, interactive=False)
gallery_video5 = gr.Video(label="", height=300, show_label=False, interactive=False)
gallery_video6 = gr.Video(label="", height=300, show_label=False, interactive=False)
# Function to load gallery videos
def load_gallery_videos():
gallery_output_dir = "/data/gallery_videos"
os.makedirs(gallery_output_dir, exist_ok=True)
import glob
existing_videos = sorted(glob.glob(f"{gallery_output_dir}/*.mp4"),
key=os.path.getmtime, reverse=True)[:6]
# Return 6 videos (None for empty slots)
videos = [None] * 6
for i, video_path in enumerate(existing_videos):
if i < 6:
videos[i] = video_path
return videos
gr.Markdown("---")
gr.Markdown("## 🎯 Generate Your Own Quote Video")
with gr.Row():
with gr.Column():
gr.Markdown("### 🎯 Input")
niche = gr.Dropdown(
choices=[
"Motivation",
"Business/Entrepreneurship",
"Fitness",
"Mindfulness",
"Stoicism",
"Leadership",
"Love & Relationships"
],
label="πŸ“‚ Select Niche",
value="Motivation"
)
style = gr.Dropdown(
choices=[
"Cinematic",
"Nature",
"Urban",
"Minimal",
"Abstract"
],
label="🎨 Visual Style",
value="Cinematic"
)
num_variations = gr.Slider(
minimum=1,
maximum=3,
value=1,
step=1,
label="🎬 Number of Video Variations",
info="Generate multiple versions to choose from"
)
generate_btn = gr.Button("πŸ€– Run MCP Agent with Gemini", variant="primary", size="lg")
with gr.Column():
gr.Markdown("### πŸ“Š MCP Agent Activity Log")
output = gr.Textbox(label="Agent Status", lines=20, show_label=False)
with gr.Row():
gr.Markdown("### ✨ Your Quote Videos")
with gr.Row():
video1 = gr.Video(label="Video 1", visible=True, height=500)
video2 = gr.Video(label="Video 2", visible=False, height=500)
video3 = gr.Video(label="Video 3", visible=False, height=500)
gr.Markdown("""
---
### ✨ Features
- 🌟 **Gemini AI** - Eliminates repetitive quotes with smart history tracking
- 🎨 **Multiple Variations** - Get 1-3 different videos to choose from
- ⚑ **Modal Processing** - 4-8x faster with serverless compute
- 🎯 **4 MCP Tools** - Quote (Gemini), Video Search, Voice, Video Creation
### πŸ† Hackathon: MCP 1st Birthday
**Track:** Track 2 - MCP in Action
**Category:** Productivity Tools
**Built with:** Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP
**Prize Targets:**
- Google Gemini Creative Award ($10K)
- Modal Innovation Award ($2.5K)
- OpenAI API Integration ($1K credits)
- ElevenLabs Voice Award (~$2K + AirPods)
""")
def process_and_display(niche, style, num_variations):
status, videos = mcp_agent_pipeline(niche, style, num_variations)
# Return up to 3 videos, None for unused slots
v1 = videos[0] if len(videos) > 0 else None
v2 = videos[1] if len(videos) > 1 else None
v3 = videos[2] if len(videos) > 2 else None
# Load updated gallery (6 videos)
gallery_vids = load_gallery_videos()
return [status, v1, v2, v3] + gallery_vids
generate_btn.click(
process_and_display,
inputs=[niche, style, num_variations],
outputs=[
output, video1, video2, video3,
gallery_video1, gallery_video2, gallery_video3,
gallery_video4, gallery_video5, gallery_video6
]
)
# Load gallery on page load
demo.load(
load_gallery_videos,
outputs=[
gallery_video1, gallery_video2, gallery_video3,
gallery_video4, gallery_video5, gallery_video6
]
)
if __name__ == "__main__":
demo.launch(allowed_paths=["/data/gallery_videos"])