Spaces:

MCP-1st-Birthday
/

AIQuoteClipGenerator

Running

App Files Files Community

AIQuoteClipGenerator / app.py

ladybug11

update

f081221 24 days ago

raw

history blame

35.9 kB

	import gradio as gr
	import os
	import requests
	import random
	import tempfile
	from openai import OpenAI
	from smolagents import CodeAgent, MCPClient, tool
	from huggingface_hub import InferenceClient
	from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip, AudioFileClip
	from PIL import Image, ImageDraw, ImageFont
	import textwrap
	import numpy as np
	from elevenlabs import ElevenLabs, VoiceSettings

	# Import our new Gemini quote generator
	from quote_generator_gemini import HybridQuoteGenerator

	# Initialize clients
	openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
	PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
	elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))

	# Initialize Hybrid Quote Generator (Gemini + OpenAI fallback)
	hybrid_quote_generator = HybridQuoteGenerator(
	gemini_key=os.getenv("GEMINI_API_KEY"),
	openai_client=openai_client
	)

	# Initialize MCP Client (connecting to existing MCP server)
	try:
	mcp_client = MCPClient("https://abidlabs-mcp-tools.hf.space")
	mcp_enabled = True
	except Exception as e:
	print(f"MCP initialization warning: {e}")
	mcp_enabled = False

	# Define custom tools for the MCP agent
	@tool
	def generate_quote_tool(niche: str, style: str) -> str:
	"""
	Generate a powerful inspirational quote using Gemini AI with variety tracking.
	Falls back to OpenAI if Gemini is unavailable.

	Args:
	niche: The category of quote (Motivation, Business, Fitness, etc.)
	style: The visual style (Cinematic, Nature, Urban, Minimal, Abstract)

	Returns:
	A powerful, unique quote string
	"""

	try:
	result = hybrid_quote_generator.generate_quote(niche, style, prefer_gemini=True)

	if result["success"]:
	quote = result["quote"]
	source = result["source"]

	# Log which generator was used
	if source == "gemini":
	stats = result.get("stats", {})
	print(f"✨ Generated with Gemini (Total: {stats.get('total_quotes_generated', 0)})")
	else:
	print(f"✨ Generated with OpenAI (fallback)")

	return quote
	else:
	error_msg = result.get("error", "Unknown error")
	return f"Error generating quote: {error_msg}"

	except Exception as e:
	return f"Error generating quote: {str(e)}"

	@tool
	def search_pexels_video_tool(style: str, niche: str) -> dict:
	"""
	Search and fetch a matching video from Pexels based on style and niche.

	Args:
	style: Visual style (Cinematic, Nature, Urban, Minimal, Abstract)
	niche: Content niche (Motivation, Business, Fitness, etc.)

	Returns:
	Dictionary with video_url, search_query, and pexels_url
	"""

	# Intelligent search strategy mapping
	search_strategies = {
	"Motivation": {
	"Cinematic": ["person climbing mountain", "running sunrise", "achievement success"],
	"Nature": ["sunrise mountain peak", "ocean waves powerful", "forest light"],
	"Urban": ["city skyline dawn", "person running city", "urban success"],
	"Minimal": ["minimal motivation", "single person silhouette", "clean inspiring"],
	"Abstract": ["light rays hope", "particles rising", "abstract energy"]
	},
	"Business/Entrepreneurship": {
	"Cinematic": ["business cityscape", "office modern", "handshake deal"],
	"Nature": ["growth plant", "river flowing", "sunrise new beginning"],
	"Urban": ["city business", "office skyline", "modern workspace"],
	"Minimal": ["desk minimal", "workspace clean", "simple office"],
	"Abstract": ["network connections", "growth chart", "abstract progress"]
	},
	"Fitness": {
	"Cinematic": ["athlete training", "gym workout", "running outdoor"],
	"Nature": ["outdoor workout", "mountain hiking", "beach running"],
	"Urban": ["city running", "urban fitness", "street workout"],
	"Minimal": ["gym minimal", "simple workout", "clean fitness"],
	"Abstract": ["energy motion", "strength power", "dynamic movement"]
	},
	"Mindfulness": {
	"Cinematic": ["meditation sunset", "peaceful landscape", "calm water"],
	"Nature": ["forest peaceful", "calm lake", "zen garden"],
	"Urban": ["city peaceful morning", "quiet street", "urban calm"],
	"Minimal": ["minimal zen", "simple meditation", "clean peaceful"],
	"Abstract": ["calm waves", "gentle motion", "soft particles"]
	},
	"Stoicism": {
	"Cinematic": ["ancient architecture", "statue philosopher", "timeless landscape"],
	"Nature": ["mountain strong", "oak tree", "stone nature"],
	"Urban": ["classical building", "statue city", "ancient modern"],
	"Minimal": ["stone minimal", "simple strong", "pillar minimal"],
	"Abstract": ["marble texture", "stone abstract", "timeless pattern"]
	},
	"Leadership": {
	"Cinematic": ["team meeting", "leader speaking", "group collaboration"],
	"Nature": ["eagle flying", "lion pride", "mountain top"],
	"Urban": ["office leadership", "boardroom", "city leadership"],
	"Minimal": ["chess pieces", "simple leadership", "clean professional"],
	"Abstract": ["network leader", "connection points", "guiding light"]
	},
	"Love & Relationships": {
	"Cinematic": ["couple sunset", "romance beautiful", "love cinematic"],
	"Nature": ["couple nature", "romantic sunset", "peaceful together"],
	"Urban": ["couple city", "romance urban", "love city lights"],
	"Minimal": ["hands holding", "simple love", "minimal romance"],
	"Abstract": ["hearts flowing", "love particles", "connection abstract"]
	}
	}

	# Get queries for this niche + style combination
	queries = search_strategies.get(niche, {}).get(style, ["aesthetic nature"])

	try:
	headers = {"Authorization": PEXELS_API_KEY}

	# Pick a random query for variety
	query = random.choice(queries)

	url = f"https://api.pexels.com/videos/search?query={query}&per_page=15&orientation=portrait"
	response = requests.get(url, headers=headers)
	data = response.json()

	if "videos" in data and len(data["videos"]) > 0:
	# Pick a random video from results
	video = random.choice(data["videos"][:10])
	video_files = video.get("video_files", [])

	# Find portrait/vertical video
	portrait_videos = [vf for vf in video_files if vf.get("width", 0) < vf.get("height", 0)]

	if portrait_videos:
	selected = random.choice(portrait_videos)
	return {
	"video_url": selected.get("link"),
	"search_query": query,
	"pexels_url": video.get("url"),
	"success": True
	}

	# Fallback to any HD video
	if video_files:
	return {
	"video_url": video_files[0].get("link"),
	"search_query": query,
	"pexels_url": video.get("url"),
	"success": True
	}

	return {
	"video_url": None,
	"search_query": query,
	"pexels_url": None,
	"success": False,
	"error": "No suitable videos found"
	}

	except Exception as e:
	return {
	"video_url": None,
	"search_query": "",
	"pexels_url": None,
	"success": False,
	"error": str(e)
	}

	@tool
	def generate_voice_commentary_tool(quote_text: str, niche: str, output_path: str) -> dict:
	"""
	Generate insightful voice commentary explaining the deeper meaning of the quote.
	Uses Gemini to create thoughtful explanation, then ElevenLabs to voice it.
	This adds VALUE - not just reading what's already on screen.

	Args:
	quote_text: The quote to explain
	niche: The niche/category for context
	output_path: Path where to save the audio file

	Returns:
	Dictionary with success status, output path, and the explanation text
	"""

	try:
	# Step 1: Generate explanation using Gemini
	import google.generativeai as genai

	explanation_prompt = f"""Given this {niche} quote:

	"{quote_text}"

	Write a brief, insightful voice-over commentary that explains the deeper meaning or practical wisdom.

	Requirements:
	- 2-3 sentences maximum
	- Around 25-35 words total
	- Spoken naturally (like a wise mentor)
	- Add insight that isn't obvious from reading
	- Make it thought-provoking
	- Don't start with "This quote..." - dive into the insight

	Example:
	Quote: "Between stimulus and response there is a space."
	Good: "In that pause lies your freedom. That's where you choose who you become, not who your habits make you."

	Return ONLY the commentary, nothing else."""

	genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
	model = genai.GenerativeModel('gemini-1.5-flash') # Updated model name

	response = model.generate_content(
	explanation_prompt,
	generation_config={
	"temperature": 0.7,
	"max_output_tokens": 100
	}
	)

	explanation = response.text.strip().strip('"').strip("'")
	print(f"📝 Commentary: {explanation}")

	# Step 2: Generate voice using ElevenLabs
	audio = elevenlabs_client.text_to_speech.convert(
	text=explanation,
	voice_id="pNInz6obpgDQGcFmaJgB", # Adam - thoughtful and clear
	model_id="eleven_multilingual_v2",
	voice_settings=VoiceSettings(
	stability=0.6, # Stable for narration
	similarity_boost=0.8,
	style=0.6, # Expressive for commentary
	use_speaker_boost=True
	)
	)

	# Save audio
	with open(output_path, 'wb') as f:
	for chunk in audio:
	f.write(chunk)

	return {
	"success": True,
	"output_path": output_path,
	"explanation": explanation,
	"message": "Voice commentary created!"
	}

	except Exception as e:
	return {
	"success": False,
	"output_path": None,
	"explanation": None,
	"message": f"Error creating commentary: {str(e)}"
	}

	@tool
	def create_quote_video_tool(video_url: str, quote_text: str, output_path: str, audio_path: str = None) -> dict:
	"""
	Create a final quote video by overlaying text on the background video.
	Uses Modal for fast processing (4-8x faster) with local fallback.
	Optionally adds voice narration audio.

	Args:
	video_url: URL of the background video from Pexels
	quote_text: The quote text to overlay
	output_path: Path where to save the final video
	audio_path: Optional path to audio file for voice narration

	Returns:
	Dictionary with success status and output path
	"""

	# Check if Modal is configured
	modal_endpoint = os.getenv("MODAL_ENDPOINT_URL")

	if modal_endpoint:
	try:
	import requests
	import base64

	print("🚀 Processing on Modal (fast!)...")

	# Prepare audio data if present
	audio_b64 = None
	if audio_path and os.path.exists(audio_path):
	with open(audio_path, 'rb') as f:
	audio_bytes = f.read()
	audio_b64 = base64.b64encode(audio_bytes).decode()
	print(f" 🎤 Including voice commentary audio ({len(audio_bytes)} bytes)")

	# Call Modal endpoint with longer timeout
	response = requests.post(
	modal_endpoint,
	json={
	"video_url": video_url,
	"quote_text": quote_text,
	"audio_b64": audio_b64 # Pass audio as base64
	},
	timeout=120 # 2 minute timeout
	)

	if response.status_code == 200:
	result = response.json()

	if result.get("success"):
	# Decode video bytes
	video_b64 = result["video"]
	video_bytes = base64.b64decode(video_b64)

	# Save to output path
	with open(output_path, 'wb') as f:
	f.write(video_bytes)

	print(f"✅ Modal processing complete! {result['size_mb']:.2f}MB")

	return {
	"success": True,
	"output_path": output_path,
	"message": f"Video created via Modal in ~20s ({result['size_mb']:.2f}MB)"
	}
	else:
	print(f"⚠️ Modal returned error: {result.get('error', 'Unknown')}")
	else:
	print(f"⚠️ Modal HTTP error: {response.status_code}")

	# If Modal failed, fall through to local processing
	print("⚠️ Modal failed, falling back to local processing...")

	except requests.Timeout:
	print(f"⚠️ Modal timeout after 120s, falling back to local...")
	except Exception as e:
	print(f"⚠️ Modal error: {e}, falling back to local processing...")
	else:
	print("ℹ️ MODAL_ENDPOINT_URL not configured, using local processing")

	# LOCAL PROCESSING - Skip if taking too long
	print("🔧 Processing locally (may be slow)...")
	print("⚠️ WARNING: Local processing can hang on HF Spaces!")
	print("⚠️ Consider setting up Modal for 4-8x faster processing")

	# Return error instead of hanging
	return {
	"success": False,
	"output_path": None,
	"message": "Local processing disabled - please configure Modal for video generation. Deploy Modal with: modal deploy modal_video_processing.py"
	}

	# LOCAL PROCESSING (Fallback or if Modal not configured)
	print("🔧 Processing locally...")

	try:
	import time
	processing_start = time.time()

	# Step 1: Download the video
	response = requests.get(video_url, stream=True, timeout=30)
	response.raise_for_status()

	# Create temporary file for downloaded video
	temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')

	with open(temp_video.name, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)

	# Step 2: Load video with MoviePy
	video = VideoFileClip(temp_video.name)

	# Get video dimensions
	w, h = video.size

	# Step 3: Create text overlay using PIL
	def make_text_frame(t):
	"""Generate a text frame using PIL"""
	# Create transparent image
	img = Image.new('RGBA', (w, h), (0, 0, 0, 0))
	draw = ImageDraw.Draw(img)

	# Calculate font size (2.5% of video height - smaller for better aesthetic)
	font_size = int(h * 0.025)

	# Try to load a font, fall back to default if needed
	try:
	# Try common fonts available on Linux
	font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
	except:
	try:
	font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", font_size)
	except:
	# Fall back to default font
	font = ImageFont.load_default()

	# Wrap text to fit width (60% of video width for better proportions)
	max_width = int(w * 0.6)

	# Manual text wrapping with better line length
	words = quote_text.split()
	lines = []
	current_line = []

	for word in words:
	test_line = ' '.join(current_line + [word])
	# Get text bbox to check width
	bbox = draw.textbbox((0, 0), test_line, font=font)
	text_width = bbox[2] - bbox[0]

	if text_width <= max_width:
	current_line.append(word)
	else:
	if current_line:
	lines.append(' '.join(current_line))
	current_line = [word]
	else:
	lines.append(word)

	if current_line:
	lines.append(' '.join(current_line))

	# Calculate total text height with better line spacing
	line_spacing = int(font_size * 0.4)
	text_block_height = len(lines) * (font_size + line_spacing)

	# Start y position (centered vertically)
	y = (h - text_block_height) // 2

	# Draw each line centered
	for line in lines:
	# Get text size
	bbox = draw.textbbox((0, 0), line, font=font)
	text_width = bbox[2] - bbox[0]

	# Center horizontally
	x = (w - text_width) // 2

	# Draw black outline (stroke) - thinner for smaller text
	outline_width = max(2, int(font_size * 0.08))
	for adj_x in range(-outline_width, outline_width + 1):
	for adj_y in range(-outline_width, outline_width + 1):
	draw.text((x + adj_x, y + adj_y), line, font=font, fill='black')

	# Draw white text on top
	draw.text((x, y), line, font=font, fill='white')

	y += font_size + line_spacing

	return np.array(img)

	# Step 4: Create text clip from function
	text_clip = ImageClip(make_text_frame(0), duration=video.duration)

	# Step 5: Composite video with text
	final_video = CompositeVideoClip([video, text_clip])

	# Step 5.5: Add voice narration if provided
	if audio_path and os.path.exists(audio_path):
	try:
	print("🎤 Adding audio track...")
	audio_clip = AudioFileClip(audio_path)
	# Use the shorter duration between video and audio
	audio_duration = min(audio_clip.duration, final_video.duration)
	audio_clip = audio_clip.subclip(0, audio_duration)
	final_video = final_video.set_audio(audio_clip)
	print("✅ Audio added successfully")
	except Exception as audio_error:
	print(f"⚠️ Could not add audio: {audio_error}")
	print("⚠️ Continuing without audio...")
	# Continue without audio rather than failing

	# Step 6: Export final video
	print("📦 Exporting video (this may take 30-60s)...")
	final_video.write_videofile(
	output_path,
	codec='libx264',
	audio_codec='aac',
	temp_audiofile='temp-audio.m4a',
	remove_temp=True,
	fps=24,
	preset='ultrafast', # Faster encoding
	threads=4,
	logger=None, # Suppress verbose output
	verbose=False
	)

	print(f"✅ Video export complete! ({time.time() - processing_start:.1f}s total)")

	# Cleanup
	video.close()
	final_video.close()
	os.unlink(temp_video.name)

	return {
	"success": True,
	"output_path": output_path,
	"message": "Video created successfully!"
	}

	except Exception as e:
	return {
	"success": False,
	"output_path": None,
	"message": f"Error creating video: {str(e)}"
	}

	# Initialize the MCP-powered agent
	def initialize_agent():
	"""Initialize the CodeAgent with MCP capabilities"""
	try:
	# Use Hugging Face Inference API for the agent's LLM
	model = InferenceClient(token=os.getenv("HF_TOKEN"))

	# Create agent with custom tools
	agent = CodeAgent(
	tools=[generate_quote_tool, search_pexels_video_tool, generate_voice_commentary_tool, create_quote_video_tool],
	model=model,
	additional_authorized_imports=["requests", "openai", "random", "tempfile", "os", "google.generativeai"],
	max_steps=15
	)

	# Add MCP client if available
	if mcp_enabled:
	agent.mcp_clients = [mcp_client]

	return agent, None
	except Exception as e:
	return None, f"Agent initialization error: {str(e)}"

	# Initialize agent
	agent, agent_error = initialize_agent()

	def mcp_agent_pipeline(niche, style, num_variations=1):
	"""
	MCP-POWERED AUTONOMOUS AGENT PIPELINE
	Uses smolagents with proper MCP server integration
	Generates multiple video variations with Gemini-powered quotes
	"""

	status_log = []
	status_log.append("🤖 MCP AGENT STARTING\n")

	if agent_error:
	status_log.append(f"❌ Agent initialization failed: {agent_error}")
	status_log.append("\n🔄 Falling back to direct tool execution...\n")
	return fallback_pipeline(niche, style, num_variations)

	try:
	# STEP 1: Agent receives task
	status_log.append("📋 TASK RECEIVED:")
	status_log.append(f" → Generate {niche} quote with {style} aesthetic")
	status_log.append(f" → Create {num_variations} video variations")
	status_log.append("")

	# STEP 2: Agent executes quote generation with Gemini
	status_log.append("🧠 GEMINI AI: generate_quote_tool")
	quote = generate_quote_tool(niche, style)

	if "Error" in quote:
	return "\n".join(status_log) + f"\n❌ Failed: {quote}", []

	status_log.append(f" ✅ Generated: \"{quote[:100]}...\"" if len(quote) > 100 else f" ✅ Generated: \"{quote}\"\n")

	# STEP 3: Search for multiple videos
	status_log.append(f"🔍 MCP TOOL: search_pexels_video_tool (x{num_variations})")
	status_log.append(f" ⏳ Finding {num_variations} different videos...")

	video_results = []
	for i in range(num_variations):
	video_result = search_pexels_video_tool(style, niche)
	if video_result["success"]:
	video_results.append(video_result)
	status_log.append(f" ✅ Video {i+1}: {video_result['search_query']}")

	if not video_results:
	return "\n".join(status_log) + "\n❌ No videos found", []

	status_log.append("")

	# STEP 4: Create multiple video variations
	status_log.append(f"🎬 MCP TOOL: create_quote_video_tool (x{len(video_results)})")
	status_log.append(f" ⏳ Creating {len(video_results)} video variations in parallel...")

	output_dir = "/tmp/quote_videos"
	gallery_dir = "/data/gallery_videos"
	os.makedirs(output_dir, exist_ok=True)
	os.makedirs(gallery_dir, exist_ok=True)

	import time
	timestamp = int(time.time())

	# Use threading for parallel Modal calls
	import threading
	import queue

	results_queue = queue.Queue()

	def create_single_video(index, video_result):
	output_filename = f"quote_video_v{index+1}_{timestamp}.mp4"
	output_path = os.path.join(output_dir, output_filename)

	creation_result = create_quote_video_tool(
	video_result["video_url"],
	quote,
	output_path,
	None
	)

	results_queue.put((index, creation_result, output_path))

	# Start all threads
	threads = []
	for i, video_result in enumerate(video_results):
	thread = threading.Thread(target=create_single_video, args=(i, video_result))
	thread.start()
	threads.append(thread)

	# Wait for all to complete
	for thread in threads:
	thread.join()

	# Collect results
	created_videos = []
	all_results = []
	while not results_queue.empty():
	all_results.append(results_queue.get())

	# Sort by index
	all_results.sort(key=lambda x: x[0])

	# Process results
	for index, creation_result, output_path in all_results:
	if creation_result["success"]:
	created_videos.append(output_path)
	status_log.append(f" ✅ Variation {index+1} created!")

	# Copy to gallery
	import shutil
	gallery_filename = f"gallery_{timestamp}_v{index+1}.mp4"
	gallery_path = os.path.join(gallery_dir, gallery_filename)
	try:
	shutil.copy2(output_path, gallery_path)
	except:
	pass
	else:
	error_msg = creation_result.get("message", "Unknown error")
	status_log.append(f" ⚠️ Variation {i+1} failed: {error_msg}")

	if not created_videos:
	status_log.append("\n❌ All video creations failed")
	return "\n".join(status_log), []

	status_log.append("")

	# STEP 5: Integration status
	status_log.append("🔗 AI INTEGRATIONS:")
	status_log.append(" ✅ Gemini API - Quote generation with variety tracking")
	status_log.append(" ✅ Pexels API - Video search")
	status_log.append(" ✅ Modal Compute - Fast video processing")
	if mcp_enabled:
	status_log.append(" ✅ MCP Server - abidlabs-mcp-tools.hf.space")
	status_log.append("")

	# STEP 6: Success!
	status_log.append("✨ PIPELINE COMPLETE!")
	status_log.append(f" 🎬 Created {len(created_videos)} unique video variations")
	status_log.append(f" 📥 Choose your favorite and download!")

	final_status = "\n".join(status_log)
	return final_status, created_videos

	except Exception as e:
	status_log.append(f"\n❌ Pipeline error: {str(e)}")
	return "\n".join(status_log), []

	def fallback_pipeline(niche, style, num_variations=1):
	"""Fallback pipeline if MCP agent fails"""
	status_log = []
	status_log.append("🔄 FALLBACK MODE (Direct Tool Execution)\n")

	# Generate quote
	status_log.append("🧠 Generating quote with Gemini...")
	quote = generate_quote_tool(niche, style)

	if "Error" in quote:
	return "\n".join(status_log) + f"\n❌ {quote}", []

	status_log.append(f" ✅ Quote generated\n")

	# Search videos
	status_log.append(f"🔍 Searching for {num_variations} videos...")
	video_results = []
	for i in range(num_variations):
	video_result = search_pexels_video_tool(style, niche)
	if video_result["success"]:
	video_results.append(video_result)

	if not video_results:
	return "\n".join(status_log) + "\n❌ No videos found", []

	status_log.append(f" ✅ Found {len(video_results)} videos\n")

	# Create videos
	status_log.append("🎬 Creating videos...")
	output_dir = "/tmp/quote_videos"
	gallery_dir = "/data/gallery_videos" # HF persistent storage
	os.makedirs(output_dir, exist_ok=True)
	os.makedirs(gallery_dir, exist_ok=True)

	import time
	timestamp = int(time.time())
	created_videos = []

	for i, video_result in enumerate(video_results):
	output_filename = f"quote_video_v{i+1}_{timestamp}.mp4"
	output_path = os.path.join(output_dir, output_filename)

	creation_result = create_quote_video_tool(
	video_result["video_url"],
	quote,
	output_path,
	None # No audio
	)

	if creation_result["success"]:
	created_videos.append(creation_result["output_path"])

	# Copy to gallery
	import shutil
	gallery_filename = f"gallery_{timestamp}_v{i+1}.mp4"
	gallery_path = os.path.join(gallery_dir, gallery_filename)
	try:
	shutil.copy2(creation_result["output_path"], gallery_path)
	except:
	pass
	else:
	error_msg = creation_result.get("message", "Unknown error")
	status_log.append(f" ❌ Video {i+1} error: {error_msg}")

	if not created_videos:
	return "\n".join(status_log) + "\n❌ Video creation failed", []

	status_log.append(f" ✅ Created {len(created_videos)} videos!\n")
	status_log.append("🎬 COMPLETE!")

	return "\n".join(status_log), created_videos

	# Gradio Interface
	with gr.Blocks(title="AIQuoteClipGenerator - MCP + Gemini Edition", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🎬 AIQuoteClipGenerator
	### MCP-Powered with Gemini AI

	Key Features:
	- 🌟 Gemini AI: No more repetitive quotes! Smart variety tracking
	- 🔗 MCP Server: smolagents framework integration
	- 🛠️ 4 Custom MCP Tools: Quote + Video search + Video creation
	- 🤖 Agent Reasoning: Autonomous task execution
	- ⚡ Modal Processing: 4-8x faster video creation
	- 🎨 Multiple Variations: Get different video styles

	Prize Eligibility:
	- ✅ Gemini API Integration ($10K Creative category)
	- ✅ Modal Innovation Award ($2.5K)
	- ✅ OpenAI Fallback ($1K credits)
	""")

	# Example Gallery - Instagram-style grid
	with gr.Accordion("📸 Example Gallery - Recent Videos", open=True):
	gr.Markdown("See what others have created! Updates automatically after generation.")

	# First row - 3 videos
	with gr.Row():
	gallery_video1 = gr.Video(label="", height=300, show_label=False, interactive=False)
	gallery_video2 = gr.Video(label="", height=300, show_label=False, interactive=False)
	gallery_video3 = gr.Video(label="", height=300, show_label=False, interactive=False)

	# Second row - 3 videos
	with gr.Row():
	gallery_video4 = gr.Video(label="", height=300, show_label=False, interactive=False)
	gallery_video5 = gr.Video(label="", height=300, show_label=False, interactive=False)
	gallery_video6 = gr.Video(label="", height=300, show_label=False, interactive=False)

	# Function to load gallery videos
	def load_gallery_videos():
	gallery_output_dir = "/data/gallery_videos"
	os.makedirs(gallery_output_dir, exist_ok=True)

	import glob
	existing_videos = sorted(glob.glob(f"{gallery_output_dir}/*.mp4"),
	key=os.path.getmtime, reverse=True)[:6]

	# Return 6 videos (None for empty slots)
	videos = [None] * 6
	for i, video_path in enumerate(existing_videos):
	if i < 6:
	videos[i] = video_path

	return videos

	gr.Markdown("---")
	gr.Markdown("## 🎯 Generate Your Own Quote Video")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 🎯 Input")
	niche = gr.Dropdown(
	choices=[
	"Motivation",
	"Business/Entrepreneurship",
	"Fitness",
	"Mindfulness",
	"Stoicism",
	"Leadership",
	"Love & Relationships"
	],
	label="📂 Select Niche",
	value="Motivation"
	)

	style = gr.Dropdown(
	choices=[
	"Cinematic",
	"Nature",
	"Urban",
	"Minimal",
	"Abstract"
	],
	label="🎨 Visual Style",
	value="Cinematic"
	)

	num_variations = gr.Slider(
	minimum=1,
	maximum=3,
	value=1,
	step=1,
	label="🎬 Number of Video Variations",
	info="Generate multiple versions to choose from"
	)

	generate_btn = gr.Button("🤖 Run MCP Agent with Gemini", variant="primary", size="lg")

	with gr.Column():
	gr.Markdown("### 📊 MCP Agent Activity Log")
	output = gr.Textbox(label="Agent Status", lines=20, show_label=False)

	with gr.Row():
	gr.Markdown("### ✨ Your Quote Videos")

	with gr.Row():
	video1 = gr.Video(label="Video 1", visible=True, height=500)
	video2 = gr.Video(label="Video 2", visible=False, height=500)
	video3 = gr.Video(label="Video 3", visible=False, height=500)

	gr.Markdown("""
	---
	### ✨ Features
	- 🌟 Gemini AI - Eliminates repetitive quotes with smart history tracking
	- 🎨 Multiple Variations - Get 1-3 different videos to choose from
	- ⚡ Modal Processing - 4-8x faster with serverless compute
	- 🎯 4 MCP Tools - Quote (Gemini), Video Search, Voice, Video Creation

	### 🏆 Hackathon: MCP 1st Birthday
	Track: Track 2 - MCP in Action
	Category: Productivity Tools
	Built with: Gradio + smolagents + Gemini + OpenAI + Pexels + Modal + ElevenLabs + MCP

	Prize Targets:
	- Google Gemini Creative Award ($10K)
	- Modal Innovation Award ($2.5K)
	- OpenAI API Integration ($1K credits)
	- ElevenLabs Voice Award (~$2K + AirPods)
	""")

	def process_and_display(niche, style, num_variations):
	status, videos = mcp_agent_pipeline(niche, style, num_variations)

	# Return up to 3 videos, None for unused slots
	v1 = videos[0] if len(videos) > 0 else None
	v2 = videos[1] if len(videos) > 1 else None
	v3 = videos[2] if len(videos) > 2 else None

	# Load updated gallery (6 videos)
	gallery_vids = load_gallery_videos()

	return [status, v1, v2, v3] + gallery_vids

	generate_btn.click(
	process_and_display,
	inputs=[niche, style, num_variations],
	outputs=[
	output, video1, video2, video3,
	gallery_video1, gallery_video2, gallery_video3,
	gallery_video4, gallery_video5, gallery_video6
	]
	)

	# Load gallery on page load
	demo.load(
	load_gallery_videos,
	outputs=[
	gallery_video1, gallery_video2, gallery_video3,
	gallery_video4, gallery_video5, gallery_video6
	]
	)

	if __name__ == "__main__":
	demo.launch(allowed_paths=["/data/gallery_videos"])