ladybug11 commited on
Commit
44ee7c9
Β·
1 Parent(s): b59bb3d

update grid

Browse files
Files changed (1) hide show
  1. app.py +100 -120
app.py CHANGED
@@ -1,24 +1,31 @@
1
- import gradio as gr
2
  import os
3
- import requests
4
  import random
5
- import tempfile
6
  import base64
 
7
  from typing import Tuple, List, Dict, Any
8
 
 
 
9
  from openai import OpenAI
10
  from smolagents import CodeAgent, MCPClient, tool
11
  from huggingface_hub import InferenceClient
12
  from elevenlabs import ElevenLabs, VoiceSettings
13
 
14
- # Import our Gemini + OpenAI hybrid quote generator
15
  from quote_generator_gemini import HybridQuoteGenerator
16
 
17
- # ==== CLIENTS / GLOBALS =======================================================
 
 
18
 
19
  openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
20
  PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
21
- elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
 
 
 
 
 
 
22
 
23
  # Hybrid quote generator (Gemini primary, OpenAI fallback)
24
  hybrid_quote_generator = HybridQuoteGenerator(
@@ -37,11 +44,13 @@ except Exception as e:
37
  # Modal endpoint for fast video rendering
38
  MODAL_ENDPOINT_URL = os.getenv("MODAL_ENDPOINT_URL")
39
 
40
- # ==== CONTEXT ENGINEERING: PERSONAS & TRENDS ==================================
41
 
 
 
 
42
 
43
  def get_persona_instruction(persona: str) -> str:
44
- """Return a short style instruction for the selected persona."""
45
  persona = (persona or "").lower()
46
  if persona == "coach":
47
  return (
@@ -68,8 +77,7 @@ def get_persona_instruction(persona: str) -> str:
68
 
69
  def get_trend_insights(niche: str) -> Dict[str, Any]:
70
  """
71
- Lightweight 'RAG' over static trend knowledge per niche.
72
- Returns topics + hooks + a short summary string.
73
  """
74
  niche = niche or "Motivation"
75
 
@@ -77,9 +85,8 @@ def get_trend_insights(niche: str) -> Dict[str, Any]:
77
  "Motivation": {
78
  "label": "soft life vs discipline era",
79
  "summary": (
80
- "Right now motivational content leans into 'soft life' aesthetics while still "
81
- "talking about discipline, systems, and quiet consistency. People want ambition "
82
- "without burnout and routines that feel gentle but effective."
83
  ),
84
  "topics": [
85
  {
@@ -92,53 +99,49 @@ def get_trend_insights(niche: str) -> Dict[str, Any]:
92
  },
93
  {
94
  "topic": "Reset Routine Hacks",
95
- "hook": "Feeling stuck? Here’s a 10-minute reset that gets you moving again.",
96
- },
97
- {
98
- "topic": "Motivation in 60 Seconds",
99
- "hook": "Your 60-second reminder that small moves count more than perfect plans.",
100
  },
101
  ],
102
  },
103
  "Business/Entrepreneurship": {
104
  "label": "one-person brands & slow growth",
105
  "summary": (
106
- "Founders are tired of hustle theatre. Trending content focuses on one-person "
107
- "brands, slow compounding, transparent revenue, and honest behind-the-scenes."
108
  ),
109
  "topics": [
110
  {
111
  "topic": "Build in Public Moments",
112
- "hook": "Here’s the part of building nobody shows youβ€”but everyone feels.",
113
  },
114
  {
115
  "topic": "Tiny Experiments",
116
- "hook": "Instead of a 5-year plan, here’s one experiment you can run this week.",
117
  },
118
  ],
119
  },
120
  "Fitness": {
121
  "label": "sustainable glow-up",
122
  "summary": (
123
- "Fitness trends lean toward sustainable glow-ups: lower-impact routines, walking, "
124
- "strength, and realistic body expectations."
125
  ),
126
  "topics": [
127
  {
128
  "topic": "Gentle Discipline Workouts",
129
- "hook": "A workout routine for the days you β€˜don’t feel like it’ but still care.",
130
  },
131
  {
132
  "topic": "Slow Glow-Up",
133
- "hook": "Your glow-up doesn’t need to be loud. Here’s the quiet version.",
134
  },
135
  ],
136
  },
137
  "Mindfulness": {
138
  "label": "nervous system & soft resets",
139
  "summary": (
140
- "Mindfulness content is shifting toward nervous system regulation, tiny resets, "
141
- "and practical grounding instead of abstract spirituality."
142
  ),
143
  "topics": [
144
  {
@@ -154,13 +157,13 @@ def get_trend_insights(niche: str) -> Dict[str, Any]:
154
  "Stoicism": {
155
  "label": "quiet strength",
156
  "summary": (
157
- "Stoic content focuses on quiet strength, emotional regulation, and not reacting "
158
- "to every notification, comment, or impulse."
159
  ),
160
  "topics": [
161
  {
162
  "topic": "Reaction Discipline",
163
- "hook": "You can’t control peopleβ€”but you can control the delay before you answer.",
164
  },
165
  {
166
  "topic": "Modern Stoic Moments",
@@ -171,13 +174,13 @@ def get_trend_insights(niche: str) -> Dict[str, Any]:
171
  "Leadership": {
172
  "label": "servant leadership & clarity",
173
  "summary": (
174
- "Leadership trends highlight servant leadership, psychological safety, and clear, "
175
- "simple direction instead of corporate buzzwords."
176
  ),
177
  "topics": [
178
  {
179
  "topic": "Clarity Over Charisma",
180
- "hook": "People don’t need a hero. They need one simple, clear next step.",
181
  },
182
  {
183
  "topic": "Leader as Mirror",
@@ -188,8 +191,8 @@ def get_trend_insights(niche: str) -> Dict[str, Any]:
188
  "Love & Relationships": {
189
  "label": "self-worth & secure attachment",
190
  "summary": (
191
- "Relationship content leans into self-worth, boundaries, and secure attachmentβ€”not "
192
- "only romance but choosing emotionally safe people."
193
  ),
194
  "topics": [
195
  {
@@ -207,8 +210,8 @@ def get_trend_insights(niche: str) -> Dict[str, Any]:
207
  default = {
208
  "label": "modern glow-up & gentle discipline",
209
  "summary": (
210
- "Short-form content leans into gentle discipline, realistic routines, and soft glow-ups "
211
- "instead of extreme hustle or perfection."
212
  ),
213
  "topics": [
214
  {
@@ -221,8 +224,9 @@ def get_trend_insights(niche: str) -> Dict[str, Any]:
221
  return trends.get(niche, default)
222
 
223
 
224
- # ==== TOOLS (FOR AGENT + DIRECT USE) ==========================================
225
-
 
226
 
227
  @tool
228
  def generate_quote_tool(niche: str, style: str, persona: str) -> str:
@@ -238,7 +242,6 @@ def generate_quote_tool(niche: str, style: str, persona: str) -> str:
238
  Returns:
239
  A unique quote string.
240
  """
241
- # Combine style + persona into a richer conditioning string
242
  persona_instruction = get_persona_instruction(persona)
243
  combined_style = f"{style} | persona={persona} | tone={persona_instruction}"
244
 
@@ -276,14 +279,8 @@ def search_pexels_video_tool(style: str, niche: str, trend_label: str = "") -> d
276
  trend_label: Short description of the current trend for this niche.
277
 
278
  Returns:
279
- Dictionary with:
280
- - success: bool
281
- - video_url: URL of selected MP4
282
- - search_query: query used
283
- - pexels_url: original Pexels page
284
- - error: message if any
285
  """
286
- # Base search strategy
287
  base_queries = {
288
  "Motivation": {
289
  "Cinematic": ["running sunrise", "cliff sunrise", "city at dawn"],
@@ -339,7 +336,6 @@ def search_pexels_video_tool(style: str, niche: str, trend_label: str = "") -> d
339
  niche_map = base_queries.get(niche, base_queries["Motivation"])
340
  queries = niche_map.get(style, niche_map["Cinematic"])
341
 
342
- # Light tweak: if trend label contains certain words, bias the query
343
  trend_label_lower = (trend_label or "").lower()
344
  if "soft life" in trend_label_lower:
345
  queries = queries + ["soft life aesthetic", "cozy morning light"]
@@ -359,9 +355,7 @@ def search_pexels_video_tool(style: str, niche: str, trend_label: str = "") -> d
359
  video_files = video.get("video_files", [])
360
 
361
  portrait_videos = [
362
- vf
363
- for vf in video_files
364
- if vf.get("width", 0) < vf.get("height", 0)
365
  ]
366
 
367
  if portrait_videos:
@@ -413,8 +407,8 @@ def create_quote_video_tool(
413
  video_url: Direct URL to a Pexels MP4 file.
414
  quote_text: The quote text to overlay on the video.
415
  output_path: Local path where the MP4 should be saved.
416
- audio_b64: Optional base64-encoded audio for narration (mp3).
417
- text_style: Layout style for quote text (classic_center, lower_third_serif, typewriter_top).
418
 
419
  Returns:
420
  Dictionary with success flag, message, and output_path if successful.
@@ -475,8 +469,9 @@ def create_quote_video_tool(
475
  }
476
 
477
 
478
- # ==== OPTIONAL: AGENT INITIALIZATION (MCP-FLAVORED) ===========================
479
-
 
480
 
481
  def initialize_agent():
482
  """Initialize the CodeAgent with MCP capabilities (if available)."""
@@ -516,8 +511,10 @@ def initialize_agent():
516
 
517
  agent, agent_error = initialize_agent()
518
 
519
- # ==== VOICE GENERATION (OpenAI explanation + ElevenLabs TTS) ==================
520
 
 
 
 
521
 
522
  def get_voice_config(voice_profile: str) -> Tuple[str, VoiceSettings]:
523
  """
@@ -528,7 +525,7 @@ def get_voice_config(voice_profile: str) -> Tuple[str, VoiceSettings]:
528
  # Calm female (Rachel)
529
  if "rachel" in vp or "female" in vp:
530
  return (
531
- "21m00Tcm4TlvDq8ikWAM", # Rachel (from ElevenLabs docs)
532
  VoiceSettings(
533
  stability=0.5,
534
  similarity_boost=0.9,
@@ -537,7 +534,7 @@ def get_voice_config(voice_profile: str) -> Tuple[str, VoiceSettings]:
537
  ),
538
  )
539
 
540
- # Warm male (Adam)
541
  return (
542
  "pNInz6obpgDQGcFmaJgB", # Adam
543
  VoiceSettings(
@@ -558,7 +555,6 @@ def generate_voice_commentary(
558
  ) -> Tuple[str, str]:
559
  """
560
  Generate a short explanatory commentary + ElevenLabs audio (as base64).
561
- Voice is always generated if ElevenLabs is available.
562
 
563
  Returns:
564
  (commentary_text, audio_b64) – audio_b64 may be "" if error.
@@ -566,7 +562,6 @@ def generate_voice_commentary(
566
  if not elevenlabs_client:
567
  return "", ""
568
 
569
- # 1) Use OpenAI to generate brief commentary
570
  persona_instruction = get_persona_instruction(persona)
571
  prompt = f"""
572
  You are creating a short voice-over commentary for a TikTok/Instagram quote video.
@@ -590,6 +585,7 @@ Requirements:
590
  Return ONLY the commentary text, nothing else.
591
  """
592
 
 
593
  try:
594
  completion = openai_client.chat.completions.create(
595
  model="gpt-4o-mini",
@@ -600,7 +596,6 @@ Return ONLY the commentary text, nothing else.
600
  max_tokens=120,
601
  temperature=0.7,
602
  )
603
-
604
  commentary = completion.choices[0].message.content.strip()
605
  except Exception as e:
606
  print(f"⚠️ Error generating commentary text: {e}")
@@ -609,24 +604,23 @@ Return ONLY the commentary text, nothing else.
609
  # 2) ElevenLabs TTS
610
  try:
611
  voice_id, voice_settings = get_voice_config(voice_profile)
612
-
613
  audio_stream = elevenlabs_client.text_to_speech.convert(
614
  text=commentary,
615
  voice_id=voice_id,
616
  model_id="eleven_multilingual_v2",
617
  voice_settings=voice_settings,
618
  )
619
-
620
  audio_bytes = b"".join(chunk for chunk in audio_stream)
621
  audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
622
  return commentary, audio_b64
623
  except Exception as e:
624
  print(f"⚠️ Error generating ElevenLabs audio: {e}")
625
- return commentary, "" # keep commentary for logs even if audio fails
626
-
627
 
628
- # ==== PIPELINE (ADVANCED AGENT-LIKE FLOW) =====================================
629
 
 
 
 
630
 
631
  def mcp_agent_pipeline(
632
  niche: str,
@@ -638,10 +632,10 @@ def mcp_agent_pipeline(
638
  ) -> Tuple[str, List[str]]:
639
  """
640
  MCP-flavored autonomous pipeline with:
641
- - Context engineering (persona, trends)
642
- - Trend-informed 'RAG' context injection
643
  - Quote generation via hybrid Gemini/OpenAI
644
- - ElevenLabs narration (always on if available)
645
  - Modal-based video creation (1–3 variations)
646
  """
647
 
@@ -652,7 +646,7 @@ def mcp_agent_pipeline(
652
  status_log.append(f"⚠️ Agent initialization failed: {agent_error}")
653
  status_log.append(" Falling back to direct tool execution.\n")
654
 
655
- # STEP 0: Context & trends
656
  status_log.append("🧩 **Step 0 – Building context**")
657
  status_log.append(f" β€’ Niche: `{niche}`")
658
  status_log.append(f" β€’ Visual style: `{style}`")
@@ -665,20 +659,19 @@ def mcp_agent_pipeline(
665
  trend_summary = trend_info.get("summary", "")
666
  topics_for_log = ", ".join(t["topic"] for t in trend_info.get("topics", [])[:3])
667
 
668
- status_log.append("πŸ“ˆ **Step 1 – Trend-aware context (mini-RAG)**")
669
  status_log.append(f" β€’ Trend theme: {trend_label}")
670
  status_log.append(f" β€’ Topics: {topics_for_log}")
671
  status_log.append(f" β€’ Summary: {trend_summary}\n")
672
 
673
- # Simple β€œfusion score” heuristic
674
  fusion_score = random.randint(78, 97)
675
  status_log.append(
676
  f"🎯 **Context Fusion Score:** {fusion_score}/100 "
677
  "(niche + trend + persona alignment)\n"
678
  )
679
 
680
- # STEP 2: Generate quote
681
- status_log.append("🧠 **Step 2 – Generating quote (Gemini + variety tracking)**")
682
  quote = generate_quote_tool(niche, style, persona)
683
  if quote.startswith("Error"):
684
  status_log.append(f" ❌ Quote generation error: {quote}")
@@ -687,8 +680,8 @@ def mcp_agent_pipeline(
687
  preview = quote if len(quote) <= 140 else quote[:140] + "..."
688
  status_log.append(f" βœ… Quote: β€œ{preview}”\n")
689
 
690
- # STEP 3: Voice commentary (always attempted)
691
- status_log.append("πŸ”Š **Step 3 – Generating voice-over explanation (OpenAI + ElevenLabs)**")
692
  commentary, audio_b64 = generate_voice_commentary(
693
  quote_text=quote,
694
  niche=niche,
@@ -697,13 +690,13 @@ def mcp_agent_pipeline(
697
  voice_profile=voice_profile,
698
  )
699
  if audio_b64:
700
- status_log.append(" βœ… Voice-over created and encoded as base64")
701
  else:
702
  status_log.append(" ⚠️ Voice generation failed or ElevenLabs unavailable")
703
  if commentary:
704
  status_log.append(f" πŸ“ Commentary preview: {commentary[:120]}...\n")
705
 
706
- # STEP 4: Search Pexels videos
707
  status_log.append("πŸŽ₯ **Step 4 – Searching Pexels for background videos**")
708
  status_log.append(f" Target variations: {num_variations}\n")
709
 
@@ -713,11 +706,12 @@ def mcp_agent_pipeline(
713
  if vr.get("success"):
714
  video_results.append(vr)
715
  status_log.append(
716
- f" βœ… Variation {i+1}: query=`{vr['search_query']}` url={vr['pexels_url']}"
717
  )
718
  else:
719
  status_log.append(
720
- f" ⚠️ Variation {i+1} video search failed: {vr.get('error', 'unknown error')}"
 
721
  )
722
 
723
  if not video_results:
@@ -726,7 +720,7 @@ def mcp_agent_pipeline(
726
 
727
  status_log.append("")
728
 
729
- # STEP 5: Create videos via Modal
730
  status_log.append("🎬 **Step 5 – Rendering quote videos on Modal**")
731
  output_dir = "/tmp/quote_videos"
732
  gallery_dir = "/data/gallery_videos"
@@ -737,7 +731,6 @@ def mcp_agent_pipeline(
737
  import shutil
738
 
739
  timestamp = int(time.time())
740
-
741
  created_videos: List[str] = []
742
 
743
  for i, vr in enumerate(video_results):
@@ -754,9 +747,8 @@ def mcp_agent_pipeline(
754
 
755
  if creation_result.get("success"):
756
  created_videos.append(out_path)
757
- status_log.append(f" βœ… Variation {i+1} rendered successfully")
758
 
759
- # Copy to gallery (we keep ALL; scrolling handled by Gradio gallery)
760
  gallery_filename = f"gallery_{timestamp}_v{i+1}.mp4"
761
  gallery_path = os.path.join(gallery_dir, gallery_filename)
762
  try:
@@ -773,12 +765,12 @@ def mcp_agent_pipeline(
773
  status_log.append("\n❌ All video renderings failed.")
774
  return "\n".join(status_log), []
775
 
776
- # STEP 6: Wrap up
777
  status_log.append("\nπŸ”— **Integrations used:**")
778
- status_log.append(" β€’ Gemini (via HybridQuoteGenerator) – quote + variety tracking")
779
- status_log.append(" β€’ OpenAI – spoken-style explanation for voice-over")
780
  status_log.append(" β€’ ElevenLabs – voice narration")
781
- status_log.append(" β€’ Pexels – background stock video search")
782
  status_log.append(" β€’ Modal – fast video rendering")
783
  if mcp_enabled:
784
  status_log.append(" β€’ MCP server – available for extended tools")
@@ -789,19 +781,19 @@ def mcp_agent_pipeline(
789
  return "\n".join(status_log), created_videos
790
 
791
 
792
- # ==== GALLERY UTIL (SCROLLABLE, KEEPS ALL) ====================================
793
-
 
794
 
795
  def load_gallery_videos() -> List[str]:
796
  """
797
  Load all videos from persistent gallery folder (sorted newest β†’ oldest).
798
- Gradio's Gallery will handle scrolling.
799
  """
800
  gallery_output_dir = "/data/gallery_videos"
801
  os.makedirs(gallery_output_dir, exist_ok=True)
802
 
803
  import glob
804
-
805
  existing_videos = sorted(
806
  glob.glob(f"{gallery_output_dir}/*.mp4"),
807
  key=os.path.getmtime,
@@ -811,7 +803,9 @@ def load_gallery_videos() -> List[str]:
811
  return existing_videos
812
 
813
 
814
- # ==== GRADIO UI ===============================================================
 
 
815
 
816
  with gr.Blocks(
817
  title="AIQuoteClipGenerator - MCP + Gemini Edition",
@@ -820,23 +814,24 @@ with gr.Blocks(
820
  gr.Markdown(
821
  """
822
  # 🎬 AIQuoteClipGenerator
823
- ### MCP-flavored agent β€’ Gemini + OpenAI + ElevenLabs + Modal
824
 
825
  An autonomous mini-studio that generates trend-aware quote videos with voice-over,
826
  cinematic stock footage, and MCP-style agent reasoning.
827
  """
828
  )
829
 
 
830
  with gr.Accordion("πŸ“Έ Example Gallery – All Generated Videos", open=True):
831
  gr.Markdown("Scroll to explore all the clips you've generated so far.")
832
  gallery = gr.Gallery(
833
  label=None,
834
- elem_id="gallery",
835
  show_label=False,
836
- columns=[3],
837
- rows=[2],
838
- height=540,
839
- object_fit="cover",
 
840
  preview=True,
841
  )
842
 
@@ -862,13 +857,7 @@ with gr.Blocks(
862
  )
863
 
864
  style = gr.Dropdown(
865
- choices=[
866
- "Cinematic",
867
- "Nature",
868
- "Urban",
869
- "Minimal",
870
- "Abstract",
871
- ],
872
  label="🎨 Visual Style",
873
  value="Cinematic",
874
  )
@@ -880,11 +869,7 @@ with gr.Blocks(
880
  )
881
 
882
  text_style = gr.Dropdown(
883
- choices=[
884
- "classic_center",
885
- "lower_third_serif",
886
- "typewriter_top",
887
- ],
888
  label="πŸ–‹ Text Layout Style",
889
  value="classic_center",
890
  )
@@ -908,7 +893,8 @@ with gr.Blocks(
908
  )
909
 
910
  generate_btn = gr.Button(
911
- "πŸ€– Run Agent Pipeline", variant="primary"
 
912
  )
913
 
914
  with gr.Column():
@@ -959,13 +945,7 @@ with gr.Blocks(
959
 
960
  gallery_vids = load_gallery_videos()
961
 
962
- return [
963
- status,
964
- v1,
965
- v2,
966
- v3,
967
- gallery_vids,
968
- ]
969
 
970
  generate_btn.click(
971
  process_and_display,
 
 
1
  import os
 
2
  import random
 
3
  import base64
4
+ import tempfile
5
  from typing import Tuple, List, Dict, Any
6
 
7
+ import gradio as gr
8
+ import requests
9
  from openai import OpenAI
10
  from smolagents import CodeAgent, MCPClient, tool
11
  from huggingface_hub import InferenceClient
12
  from elevenlabs import ElevenLabs, VoiceSettings
13
 
 
14
  from quote_generator_gemini import HybridQuoteGenerator
15
 
16
+ # =============================================================================
17
+ # GLOBAL CLIENTS / CONFIG
18
+ # =============================================================================
19
 
20
  openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
21
  PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
22
+
23
+ # ElevenLabs client (optional)
24
+ try:
25
+ elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
26
+ except Exception as e:
27
+ print(f"ElevenLabs init warning: {e}")
28
+ elevenlabs_client = None
29
 
30
  # Hybrid quote generator (Gemini primary, OpenAI fallback)
31
  hybrid_quote_generator = HybridQuoteGenerator(
 
44
  # Modal endpoint for fast video rendering
45
  MODAL_ENDPOINT_URL = os.getenv("MODAL_ENDPOINT_URL")
46
 
 
47
 
48
+ # =============================================================================
49
+ # CONTEXT ENGINEERING: PERSONA + TRENDS
50
+ # =============================================================================
51
 
52
  def get_persona_instruction(persona: str) -> str:
53
+ """Short style instruction for the selected persona."""
54
  persona = (persona or "").lower()
55
  if persona == "coach":
56
  return (
 
77
 
78
  def get_trend_insights(niche: str) -> Dict[str, Any]:
79
  """
80
+ Lightweight 'trend RAG' – returns niche-specific themes & hooks.
 
81
  """
82
  niche = niche or "Motivation"
83
 
 
85
  "Motivation": {
86
  "label": "soft life vs discipline era",
87
  "summary": (
88
+ "Motivational content leans into 'soft life' aesthetics while still "
89
+ "talking about discipline, systems, and quiet consistency."
 
90
  ),
91
  "topics": [
92
  {
 
99
  },
100
  {
101
  "topic": "Reset Routine Hacks",
102
+ "hook": "A 10-minute reset to get you unstuck.",
 
 
 
 
103
  },
104
  ],
105
  },
106
  "Business/Entrepreneurship": {
107
  "label": "one-person brands & slow growth",
108
  "summary": (
109
+ "Founders are tired of hustle theatre. Trending content focuses on "
110
+ "one-person brands, slow compounding, and honest behind-the-scenes."
111
  ),
112
  "topics": [
113
  {
114
  "topic": "Build in Public Moments",
115
+ "hook": "Here’s the part of building nobody showsβ€”but everyone feels.",
116
  },
117
  {
118
  "topic": "Tiny Experiments",
119
+ "hook": "One small experiment you can run this week instead of a 5-year plan.",
120
  },
121
  ],
122
  },
123
  "Fitness": {
124
  "label": "sustainable glow-up",
125
  "summary": (
126
+ "Fitness trends lean toward sustainable glow-ups: walking, strength, "
127
+ "and realistic body expectations."
128
  ),
129
  "topics": [
130
  {
131
  "topic": "Gentle Discipline Workouts",
132
+ "hook": "A routine for the days you β€˜don’t feel like it’ but still care.",
133
  },
134
  {
135
  "topic": "Slow Glow-Up",
136
+ "hook": "The quiet glow-up that happens when you stop quitting.",
137
  },
138
  ],
139
  },
140
  "Mindfulness": {
141
  "label": "nervous system & soft resets",
142
  "summary": (
143
+ "Mindfulness content is shifting toward nervous system regulation, tiny "
144
+ "resets, and practical grounding."
145
  ),
146
  "topics": [
147
  {
 
157
  "Stoicism": {
158
  "label": "quiet strength",
159
  "summary": (
160
+ "Stoic content focuses on quiet strength, emotional regulation, and not "
161
+ "reacting to every notification, comment, or impulse."
162
  ),
163
  "topics": [
164
  {
165
  "topic": "Reaction Discipline",
166
+ "hook": "You can’t control peopleβ€”but you can control the pause before you answer.",
167
  },
168
  {
169
  "topic": "Modern Stoic Moments",
 
174
  "Leadership": {
175
  "label": "servant leadership & clarity",
176
  "summary": (
177
+ "Leadership trends highlight servant leadership, psychological safety, "
178
+ "and simple, clear direction."
179
  ),
180
  "topics": [
181
  {
182
  "topic": "Clarity Over Charisma",
183
+ "hook": "People don’t need a hero. They need one clear next step.",
184
  },
185
  {
186
  "topic": "Leader as Mirror",
 
191
  "Love & Relationships": {
192
  "label": "self-worth & secure attachment",
193
  "summary": (
194
+ "Relationship content leans into self-worth, boundaries, and secure "
195
+ "attachmentβ€”not just romance but emotional safety."
196
  ),
197
  "topics": [
198
  {
 
210
  default = {
211
  "label": "modern glow-up & gentle discipline",
212
  "summary": (
213
+ "Short-form content leans into gentle discipline, realistic routines, "
214
+ "and soft glow-ups instead of extreme hustle."
215
  ),
216
  "topics": [
217
  {
 
224
  return trends.get(niche, default)
225
 
226
 
227
+ # =============================================================================
228
+ # TOOLS
229
+ # =============================================================================
230
 
231
  @tool
232
  def generate_quote_tool(niche: str, style: str, persona: str) -> str:
 
242
  Returns:
243
  A unique quote string.
244
  """
 
245
  persona_instruction = get_persona_instruction(persona)
246
  combined_style = f"{style} | persona={persona} | tone={persona_instruction}"
247
 
 
279
  trend_label: Short description of the current trend for this niche.
280
 
281
  Returns:
282
+ Dictionary with success, video_url, search_query, pexels_url, error (if any).
 
 
 
 
 
283
  """
 
284
  base_queries = {
285
  "Motivation": {
286
  "Cinematic": ["running sunrise", "cliff sunrise", "city at dawn"],
 
336
  niche_map = base_queries.get(niche, base_queries["Motivation"])
337
  queries = niche_map.get(style, niche_map["Cinematic"])
338
 
 
339
  trend_label_lower = (trend_label or "").lower()
340
  if "soft life" in trend_label_lower:
341
  queries = queries + ["soft life aesthetic", "cozy morning light"]
 
355
  video_files = video.get("video_files", [])
356
 
357
  portrait_videos = [
358
+ vf for vf in video_files if vf.get("width", 0) < vf.get("height", 0)
 
 
359
  ]
360
 
361
  if portrait_videos:
 
407
  video_url: Direct URL to a Pexels MP4 file.
408
  quote_text: The quote text to overlay on the video.
409
  output_path: Local path where the MP4 should be saved.
410
+ audio_b64: Base64-encoded audio for narration.
411
+ text_style: Layout style for quote text.
412
 
413
  Returns:
414
  Dictionary with success flag, message, and output_path if successful.
 
469
  }
470
 
471
 
472
+ # =============================================================================
473
+ # AGENT (MCP-FLAVORED)
474
+ # =============================================================================
475
 
476
  def initialize_agent():
477
  """Initialize the CodeAgent with MCP capabilities (if available)."""
 
511
 
512
  agent, agent_error = initialize_agent()
513
 
 
514
 
515
+ # =============================================================================
516
+ # VOICE GENERATION (OpenAI commentary + ElevenLabs TTS)
517
+ # =============================================================================
518
 
519
  def get_voice_config(voice_profile: str) -> Tuple[str, VoiceSettings]:
520
  """
 
525
  # Calm female (Rachel)
526
  if "rachel" in vp or "female" in vp:
527
  return (
528
+ "21m00Tcm4TlvDq8ikWAM", # Rachel
529
  VoiceSettings(
530
  stability=0.5,
531
  similarity_boost=0.9,
 
534
  ),
535
  )
536
 
537
+ # Warm male (Adam) – default
538
  return (
539
  "pNInz6obpgDQGcFmaJgB", # Adam
540
  VoiceSettings(
 
555
  ) -> Tuple[str, str]:
556
  """
557
  Generate a short explanatory commentary + ElevenLabs audio (as base64).
 
558
 
559
  Returns:
560
  (commentary_text, audio_b64) – audio_b64 may be "" if error.
 
562
  if not elevenlabs_client:
563
  return "", ""
564
 
 
565
  persona_instruction = get_persona_instruction(persona)
566
  prompt = f"""
567
  You are creating a short voice-over commentary for a TikTok/Instagram quote video.
 
585
  Return ONLY the commentary text, nothing else.
586
  """
587
 
588
+ # 1) Commentary via OpenAI
589
  try:
590
  completion = openai_client.chat.completions.create(
591
  model="gpt-4o-mini",
 
596
  max_tokens=120,
597
  temperature=0.7,
598
  )
 
599
  commentary = completion.choices[0].message.content.strip()
600
  except Exception as e:
601
  print(f"⚠️ Error generating commentary text: {e}")
 
604
  # 2) ElevenLabs TTS
605
  try:
606
  voice_id, voice_settings = get_voice_config(voice_profile)
 
607
  audio_stream = elevenlabs_client.text_to_speech.convert(
608
  text=commentary,
609
  voice_id=voice_id,
610
  model_id="eleven_multilingual_v2",
611
  voice_settings=voice_settings,
612
  )
 
613
  audio_bytes = b"".join(chunk for chunk in audio_stream)
614
  audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
615
  return commentary, audio_b64
616
  except Exception as e:
617
  print(f"⚠️ Error generating ElevenLabs audio: {e}")
618
+ return commentary, ""
 
619
 
 
620
 
621
+ # =============================================================================
622
+ # PIPELINE (MCP-STYLE)
623
+ # =============================================================================
624
 
625
  def mcp_agent_pipeline(
626
  niche: str,
 
632
  ) -> Tuple[str, List[str]]:
633
  """
634
  MCP-flavored autonomous pipeline with:
635
+ - Context engineering (niche + persona + trends)
636
+ - Trend-informed context
637
  - Quote generation via hybrid Gemini/OpenAI
638
+ - ElevenLabs narration
639
  - Modal-based video creation (1–3 variations)
640
  """
641
 
 
646
  status_log.append(f"⚠️ Agent initialization failed: {agent_error}")
647
  status_log.append(" Falling back to direct tool execution.\n")
648
 
649
+ # Step 0: context & trends
650
  status_log.append("🧩 **Step 0 – Building context**")
651
  status_log.append(f" β€’ Niche: `{niche}`")
652
  status_log.append(f" β€’ Visual style: `{style}`")
 
659
  trend_summary = trend_info.get("summary", "")
660
  topics_for_log = ", ".join(t["topic"] for t in trend_info.get("topics", [])[:3])
661
 
662
+ status_log.append("πŸ“ˆ **Step 1 – Trend-aware context**")
663
  status_log.append(f" β€’ Trend theme: {trend_label}")
664
  status_log.append(f" β€’ Topics: {topics_for_log}")
665
  status_log.append(f" β€’ Summary: {trend_summary}\n")
666
 
 
667
  fusion_score = random.randint(78, 97)
668
  status_log.append(
669
  f"🎯 **Context Fusion Score:** {fusion_score}/100 "
670
  "(niche + trend + persona alignment)\n"
671
  )
672
 
673
+ # Step 2: quote
674
+ status_log.append("🧠 **Step 2 – Generating quote**")
675
  quote = generate_quote_tool(niche, style, persona)
676
  if quote.startswith("Error"):
677
  status_log.append(f" ❌ Quote generation error: {quote}")
 
680
  preview = quote if len(quote) <= 140 else quote[:140] + "..."
681
  status_log.append(f" βœ… Quote: β€œ{preview}”\n")
682
 
683
+ # Step 3: voice commentary
684
+ status_log.append("πŸ”Š **Step 3 – Generating voice-over (OpenAI + ElevenLabs)**")
685
  commentary, audio_b64 = generate_voice_commentary(
686
  quote_text=quote,
687
  niche=niche,
 
690
  voice_profile=voice_profile,
691
  )
692
  if audio_b64:
693
+ status_log.append(" βœ… Voice-over created")
694
  else:
695
  status_log.append(" ⚠️ Voice generation failed or ElevenLabs unavailable")
696
  if commentary:
697
  status_log.append(f" πŸ“ Commentary preview: {commentary[:120]}...\n")
698
 
699
+ # Step 4: Pexels videos
700
  status_log.append("πŸŽ₯ **Step 4 – Searching Pexels for background videos**")
701
  status_log.append(f" Target variations: {num_variations}\n")
702
 
 
706
  if vr.get("success"):
707
  video_results.append(vr)
708
  status_log.append(
709
+ f" βœ… Variation {i+1}: query=`{vr['search_query']}` url={vr['pexels_url']}"
710
  )
711
  else:
712
  status_log.append(
713
+ f" ⚠️ Variation {i+1} video search failed: "
714
+ f"{vr.get('error', 'unknown error')}"
715
  )
716
 
717
  if not video_results:
 
720
 
721
  status_log.append("")
722
 
723
+ # Step 5: Modal rendering
724
  status_log.append("🎬 **Step 5 – Rendering quote videos on Modal**")
725
  output_dir = "/tmp/quote_videos"
726
  gallery_dir = "/data/gallery_videos"
 
731
  import shutil
732
 
733
  timestamp = int(time.time())
 
734
  created_videos: List[str] = []
735
 
736
  for i, vr in enumerate(video_results):
 
747
 
748
  if creation_result.get("success"):
749
  created_videos.append(out_path)
750
+ status_log.append(f" βœ… Variation {i+1} rendered")
751
 
 
752
  gallery_filename = f"gallery_{timestamp}_v{i+1}.mp4"
753
  gallery_path = os.path.join(gallery_dir, gallery_filename)
754
  try:
 
765
  status_log.append("\n❌ All video renderings failed.")
766
  return "\n".join(status_log), []
767
 
768
+ # Wrap-up
769
  status_log.append("\nπŸ”— **Integrations used:**")
770
+ status_log.append(" β€’ Gemini – quote + variety tracking")
771
+ status_log.append(" β€’ OpenAI – spoken-style commentary")
772
  status_log.append(" β€’ ElevenLabs – voice narration")
773
+ status_log.append(" β€’ Pexels – stock video search")
774
  status_log.append(" β€’ Modal – fast video rendering")
775
  if mcp_enabled:
776
  status_log.append(" β€’ MCP server – available for extended tools")
 
781
  return "\n".join(status_log), created_videos
782
 
783
 
784
+ # =============================================================================
785
+ # GALLERY (SCROLLABLE GRID)
786
+ # =============================================================================
787
 
788
  def load_gallery_videos() -> List[str]:
789
  """
790
  Load all videos from persistent gallery folder (sorted newest β†’ oldest).
791
+ Gradio's Gallery will handle scrolling in a grid layout.
792
  """
793
  gallery_output_dir = "/data/gallery_videos"
794
  os.makedirs(gallery_output_dir, exist_ok=True)
795
 
796
  import glob
 
797
  existing_videos = sorted(
798
  glob.glob(f"{gallery_output_dir}/*.mp4"),
799
  key=os.path.getmtime,
 
803
  return existing_videos
804
 
805
 
806
+ # =============================================================================
807
+ # GRADIO UI
808
+ # =============================================================================
809
 
810
  with gr.Blocks(
811
  title="AIQuoteClipGenerator - MCP + Gemini Edition",
 
814
  gr.Markdown(
815
  """
816
  # 🎬 AIQuoteClipGenerator
817
+ ### MCP-style agent β€’ Gemini + OpenAI + ElevenLabs + Modal
818
 
819
  An autonomous mini-studio that generates trend-aware quote videos with voice-over,
820
  cinematic stock footage, and MCP-style agent reasoning.
821
  """
822
  )
823
 
824
+ # Scrollable IG-style gallery
825
  with gr.Accordion("πŸ“Έ Example Gallery – All Generated Videos", open=True):
826
  gr.Markdown("Scroll to explore all the clips you've generated so far.")
827
  gallery = gr.Gallery(
828
  label=None,
 
829
  show_label=False,
830
+ columns=3, # 3 per row β†’ IG grid
831
+ rows=2, # ~2 rows visible before scroll
832
+ height=540, # scrolling kicks in after this
833
+ object_fit="cover", # crop previews to fill tiles
834
+ allow_preview=True,
835
  preview=True,
836
  )
837
 
 
857
  )
858
 
859
  style = gr.Dropdown(
860
+ choices=["Cinematic", "Nature", "Urban", "Minimal", "Abstract"],
 
 
 
 
 
 
861
  label="🎨 Visual Style",
862
  value="Cinematic",
863
  )
 
869
  )
870
 
871
  text_style = gr.Dropdown(
872
+ choices=["classic_center", "lower_third_serif", "typewriter_top"],
 
 
 
 
873
  label="πŸ–‹ Text Layout Style",
874
  value="classic_center",
875
  )
 
893
  )
894
 
895
  generate_btn = gr.Button(
896
+ "πŸ€– Run Agent Pipeline",
897
+ variant="primary",
898
  )
899
 
900
  with gr.Column():
 
945
 
946
  gallery_vids = load_gallery_videos()
947
 
948
+ return status, v1, v2, v3, gallery_vids
 
 
 
 
 
 
949
 
950
  generate_btn.click(
951
  process_and_display,