Spaces:
Running
Running
| """ | |
| MAKER Framework - Hugging Face Space | |
| ===================================== | |
| Reliable AI Agent with Web Search & File Upload | |
| Based on: https://arxiv.org/abs/2511.09030 | |
| """ | |
| import gradio as gr | |
| import asyncio | |
| import json | |
| import re | |
| import base64 | |
| from collections import Counter | |
| from dataclasses import dataclass, field | |
| from typing import Any, Callable, Optional | |
| from pathlib import Path | |
| # ============================================================================ | |
| # MAKER Core (Embedded) | |
| # ============================================================================ | |
| class VotingConfig: | |
| k: int = 3 | |
| max_samples: int = 30 | |
| temperature_first: float = 0.0 | |
| temperature_rest: float = 0.1 | |
| parallel_samples: int = 3 | |
| class RedFlagConfig: | |
| max_response_chars: int = 3000 | |
| min_response_length: int = 5 | |
| banned_patterns: list = field(default_factory=lambda: [r"I don't know", r"I cannot"]) | |
| class LLMClient: | |
| """Universal LLM client.""" | |
| def __init__(self, provider: str, api_key: str, model: str = None): | |
| self.provider = provider.lower() | |
| self.api_key = api_key | |
| self.model = model | |
| self._client = None | |
| self._setup_client() | |
| def _setup_client(self): | |
| if self.provider == "openai": | |
| from openai import AsyncOpenAI | |
| self._client = AsyncOpenAI(api_key=self.api_key) | |
| self.model = self.model or "gpt-4o-mini" | |
| elif self.provider == "anthropic": | |
| from anthropic import AsyncAnthropic | |
| self._client = AsyncAnthropic(api_key=self.api_key) | |
| self.model = self.model or "claude-sonnet-4-20250514" | |
| elif self.provider == "groq": | |
| from openai import AsyncOpenAI | |
| self._client = AsyncOpenAI(api_key=self.api_key, base_url="https://api.groq.com/openai/v1") | |
| self.model = self.model or "llama-3.3-70b-versatile" | |
| elif self.provider == "together": | |
| from openai import AsyncOpenAI | |
| self._client = AsyncOpenAI(api_key=self.api_key, base_url="https://api.together.xyz/v1") | |
| self.model = self.model or "meta-llama/Llama-3.3-70B-Instruct-Turbo" | |
| elif self.provider == "openrouter": | |
| from openai import AsyncOpenAI | |
| self._client = AsyncOpenAI(api_key=self.api_key, base_url="https://openrouter.ai/api/v1") | |
| self.model = self.model or "openai/gpt-4o-mini" | |
| async def generate(self, prompt: str, temperature: float = 0.0, max_tokens: int = 1000) -> str: | |
| if self.provider == "anthropic": | |
| r = await self._client.messages.create( | |
| model=self.model, max_tokens=max_tokens, | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| return r.content[0].text | |
| else: | |
| r = await self._client.chat.completions.create( | |
| model=self.model, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=temperature, max_tokens=max_tokens | |
| ) | |
| return r.choices[0].message.content | |
| class WebSearch: | |
| """Web search using DuckDuckGo (free).""" | |
| async def search(query: str, num_results: int = 5) -> list: | |
| try: | |
| from duckduckgo_search import DDGS | |
| results = [] | |
| with DDGS() as ddgs: | |
| for r in ddgs.text(query, max_results=num_results): | |
| results.append({ | |
| "title": r.get("title", ""), | |
| "url": r.get("href", ""), | |
| "snippet": r.get("body", "") | |
| }) | |
| return results | |
| except Exception as e: | |
| return [{"title": "Error", "url": "", "snippet": str(e)}] | |
| class FileHandler: | |
| """Handle file uploads.""" | |
| async def load_file(file_path: str) -> dict: | |
| path = Path(file_path) | |
| ext = path.suffix.lower() | |
| try: | |
| if ext in {'.txt', '.md', '.json', '.py', '.js', '.html', '.css', '.csv'}: | |
| content = path.read_text(encoding='utf-8', errors='replace') | |
| return {"type": "text", "name": path.name, "content": content[:50000]} | |
| elif ext == '.pdf': | |
| try: | |
| import pymupdf | |
| doc = pymupdf.open(str(path)) | |
| text = "\n\n".join([page.get_text() for page in doc]) | |
| doc.close() | |
| return {"type": "pdf", "name": path.name, "content": text[:50000]} | |
| except ImportError: | |
| return {"type": "error", "name": path.name, "content": "PDF requires: pip install pymupdf"} | |
| elif ext == '.docx': | |
| try: | |
| from docx import Document | |
| doc = Document(str(path)) | |
| text = "\n\n".join([p.text for p in doc.paragraphs]) | |
| return {"type": "docx", "name": path.name, "content": text[:50000]} | |
| except ImportError: | |
| return {"type": "error", "name": path.name, "content": "DOCX requires: pip install python-docx"} | |
| elif ext in {'.png', '.jpg', '.jpeg', '.gif', '.webp'}: | |
| content = path.read_bytes() | |
| b64 = base64.b64encode(content).decode('utf-8') | |
| return {"type": "image", "name": path.name, "base64": b64} | |
| else: | |
| content = path.read_text(encoding='utf-8', errors='replace') | |
| return {"type": "text", "name": path.name, "content": content[:50000]} | |
| except Exception as e: | |
| return {"type": "error", "name": path.name, "content": str(e)} | |
| class MAKERAgent: | |
| """MAKER Framework Agent.""" | |
| def __init__(self, llm: LLMClient, voting: VotingConfig = None, red_flags: RedFlagConfig = None): | |
| self.llm = llm | |
| self.voting = voting or VotingConfig() | |
| self.red_flags = red_flags or RedFlagConfig() | |
| self.stats = {"samples": 0, "red_flags": 0, "tool_calls": 0} | |
| def _check_red_flags(self, response: str) -> bool: | |
| if len(response) > self.red_flags.max_response_chars: | |
| return True | |
| if len(response) < self.red_flags.min_response_length: | |
| return True | |
| for pattern in self.red_flags.banned_patterns: | |
| if re.search(pattern, response, re.IGNORECASE): | |
| return True | |
| return False | |
| def _parse_json(self, response: str) -> Optional[dict]: | |
| response = re.sub(r'^```(?:json)?\s*', '', response.strip()) | |
| response = re.sub(r'\s*```$', '', response) | |
| try: | |
| result = json.loads(response) | |
| return result if isinstance(result, dict) else None | |
| except: | |
| return None | |
| def _serialize(self, result) -> str: | |
| if isinstance(result, dict): | |
| return json.dumps(result, sort_keys=True) | |
| return str(result) | |
| async def execute(self, prompt: str, expected_keys: list = None, use_tools: bool = False, | |
| file_context: str = None, progress_callback: Callable = None) -> dict: | |
| full_prompt = "" | |
| if file_context: | |
| full_prompt += f"CONTEXT FROM FILES:\n{file_context}\n\n" | |
| full_prompt += prompt | |
| if use_tools: | |
| full_prompt += '\n\nTo search web: {"tool": "web_search", "query": "..."}' | |
| full_prompt += "\n\nRespond with valid JSON only." | |
| votes: Counter = Counter() | |
| results_map = {} | |
| samples, flagged = 0, 0 | |
| tool_results = [] | |
| if progress_callback: | |
| progress_callback(0.1, "Getting first sample...") | |
| response = await self.llm.generate(full_prompt, temperature=0.0) | |
| samples += 1 | |
| self.stats["samples"] += 1 | |
| # Handle tool calls | |
| if use_tools: | |
| parsed = self._parse_json(response) | |
| if parsed and parsed.get("tool") == "web_search": | |
| query = parsed.get("query", "") | |
| if progress_callback: | |
| progress_callback(0.2, f"Searching: {query}...") | |
| search_results = await WebSearch.search(query) | |
| tool_results.append({"query": query, "results": search_results}) | |
| self.stats["tool_calls"] += 1 | |
| search_text = "\n".join([f"- {r['title']}: {r['snippet']}" for r in search_results[:5]]) | |
| full_prompt += f"\n\nSEARCH RESULTS:\n{search_text}\n\nNow provide final JSON answer." | |
| response = await self.llm.generate(full_prompt, temperature=0.0) | |
| samples += 1 | |
| # Parse response | |
| if self._check_red_flags(response): | |
| flagged += 1 | |
| self.stats["red_flags"] += 1 | |
| else: | |
| parsed = self._parse_json(response) | |
| if parsed and (not expected_keys or all(k in parsed for k in expected_keys)): | |
| key = self._serialize(parsed) | |
| votes[key] += 1 | |
| results_map[key] = parsed | |
| # Voting loop | |
| round_num = 1 | |
| while samples < self.voting.max_samples: | |
| if votes: | |
| top = votes.most_common(2) | |
| top_count = top[0][1] | |
| second_count = top[1][1] if len(top) > 1 else 0 | |
| if top_count - second_count >= self.voting.k: | |
| break | |
| round_num += 1 | |
| if progress_callback: | |
| progress_callback(0.2 + 0.6 * (samples / self.voting.max_samples), f"Voting round {round_num}...") | |
| for _ in range(self.voting.parallel_samples): | |
| if samples >= self.voting.max_samples: | |
| break | |
| response = await self.llm.generate(full_prompt, temperature=self.voting.temperature_rest) | |
| samples += 1 | |
| self.stats["samples"] += 1 | |
| if self._check_red_flags(response): | |
| flagged += 1 | |
| continue | |
| parsed = self._parse_json(response) | |
| if parsed and (not expected_keys or all(k in parsed for k in expected_keys)): | |
| key = self._serialize(parsed) | |
| votes[key] += 1 | |
| if key not in results_map: | |
| results_map[key] = parsed | |
| if progress_callback: | |
| progress_callback(1.0, "Complete!") | |
| if votes: | |
| top_key, top_count = votes.most_common(1)[0] | |
| return { | |
| "success": True, "result": results_map[top_key], | |
| "votes": top_count, "total_samples": samples, | |
| "red_flagged": flagged, "vote_distribution": dict(votes), | |
| "tool_results": tool_results | |
| } | |
| return {"success": False, "result": None, "votes": 0, "total_samples": samples, | |
| "red_flagged": flagged, "vote_distribution": {}, "tool_results": tool_results} | |
| # ============================================================================ | |
| # Custom CSS | |
| # ============================================================================ | |
| CUSTOM_CSS = """ | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| .header-title { | |
| background: linear-gradient(90deg, #6366f1, #8b5cf6, #a855f7); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5rem !important; | |
| font-weight: 800 !important; | |
| text-align: center; | |
| } | |
| .header-sub { | |
| color: #64748b !important; | |
| text-align: center; | |
| margin-bottom: 1.5rem !important; | |
| } | |
| .primary-btn { | |
| background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%) !important; | |
| border: none !important; | |
| font-weight: 600 !important; | |
| border-radius: 8px !important; | |
| } | |
| .primary-btn:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 4px 12px rgba(99, 102, 241, 0.4) !important; | |
| } | |
| """ | |
| # ============================================================================ | |
| # State & Functions | |
| # ============================================================================ | |
| current_agent = None | |
| loaded_files = {} | |
| def create_agent(provider, api_key, model, k_votes): | |
| global current_agent | |
| if not api_key: | |
| return "β Please enter API key" | |
| try: | |
| llm = LLMClient(provider, api_key, model if model else None) | |
| current_agent = MAKERAgent(llm, VotingConfig(k=k_votes)) | |
| return f"β Agent ready: {provider} / {llm.model}" | |
| except Exception as e: | |
| return f"β Error: {e}" | |
| async def run_query_async(prompt, use_search, use_files, expected_keys, progress=gr.Progress()): | |
| global current_agent, loaded_files | |
| if not current_agent: | |
| return {"error": "Create agent first"}, "β No agent", "" | |
| file_context = None | |
| if use_files and loaded_files: | |
| parts = [f"=== {n} ===\n{i.get('content', '')[:10000]}" | |
| for n, i in loaded_files.items() if i["type"] != "image"] | |
| file_context = "\n\n".join(parts) if parts else None | |
| keys = [k.strip() for k in expected_keys.split(",") if k.strip()] if expected_keys else None | |
| def update_progress(pct, msg): | |
| progress(pct, desc=msg) | |
| try: | |
| result = await current_agent.execute(prompt, keys, use_search, file_context, update_progress) | |
| stats = f"""### Stats | |
| - **Success**: {'β ' if result['success'] else 'β'} | |
| - **Votes**: {result['votes']} | |
| - **Samples**: {result['total_samples']} | |
| - **Red-flagged**: {result['red_flagged']}""" | |
| votes = "### Vote Distribution\n" + "\n".join([f"- {v} votes: {k[:80]}..." for k, v in | |
| sorted(result['vote_distribution'].items(), key=lambda x: -x[1])[:3]]) if result['vote_distribution'] else "" | |
| return result['result'], stats, votes | |
| except Exception as e: | |
| return {"error": str(e)}, f"β {e}", "" | |
| def run_query(prompt, use_search, use_files, expected_keys, progress=gr.Progress()): | |
| return asyncio.run(run_query_async(prompt, use_search, use_files, expected_keys, progress)) | |
| def handle_files(files): | |
| global loaded_files | |
| if not files: | |
| loaded_files = {} | |
| return "No files" | |
| loaded_files = {} | |
| results = [] | |
| for f in files: | |
| info = asyncio.run(FileHandler.load_file(f.name)) | |
| loaded_files[info['name']] = info | |
| if info['type'] == 'error': | |
| results.append(f"β {info['name']}: {info['content']}") | |
| elif info['type'] == 'image': | |
| results.append(f"πΌοΈ {info['name']}") | |
| else: | |
| results.append(f"β {info['name']} ({len(info.get('content', ''))} chars)") | |
| return "\n".join(results) | |
| # ============================================================================ | |
| # UI | |
| # ============================================================================ | |
| with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Soft(), title="MAKER Agent") as demo: | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 20px 0;"> | |
| <h1 class="header-title">π§ MAKER Agent</h1> | |
| <p class="header-sub">Reliable AI with Voting & Red-Flagging | Based on arxiv.org/abs/2511.09030</p> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| # Setup Tab | |
| with gr.Tab("βοΈ Setup"): | |
| gr.Markdown("### Configure your LLM provider") | |
| with gr.Row(): | |
| with gr.Column(): | |
| provider = gr.Dropdown( | |
| ["openai", "anthropic", "groq", "together", "openrouter"], | |
| value="openai", label="Provider" | |
| ) | |
| api_key = gr.Textbox(label="API Key", type="password", placeholder="sk-...") | |
| model = gr.Textbox(label="Model (optional)", placeholder="Leave blank for default") | |
| with gr.Column(): | |
| k_votes = gr.Slider(1, 10, value=3, step=1, label="K (votes needed to win)", | |
| info="Higher = more reliable but slower") | |
| gr.Markdown(""" | |
| ### How MAKER Works | |
| 1. **Voting**: Samples multiple responses, winner needs K votes ahead | |
| 2. **Red-Flagging**: Discards suspicious outputs (too long, malformed) | |
| 3. **Tools**: Optional web search for current information | |
| """) | |
| setup_btn = gr.Button("π Create Agent", elem_classes="primary-btn") | |
| setup_status = gr.Markdown("π Enter your API key and click Create Agent to start") | |
| setup_btn.click(create_agent, [provider, api_key, model, k_votes], setup_status) | |
| # Query Tab | |
| with gr.Tab("π¬ Query"): | |
| gr.Markdown("### Ask a question") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt = gr.Textbox( | |
| label="Your Query", | |
| lines=4, | |
| placeholder="Ask anything... The agent will use voting to ensure reliable answers.\n\nExample: What are the key factors for startup success? Return as JSON with keys: factors, explanation" | |
| ) | |
| with gr.Row(): | |
| use_search = gr.Checkbox(label="π Enable Web Search", info="Search DuckDuckGo for current info") | |
| use_files = gr.Checkbox(label="π Use Uploaded Files", info="Include file content in context") | |
| expected_keys = gr.Textbox( | |
| label="Expected JSON keys (optional)", | |
| placeholder="answer, confidence, sources", | |
| info="Comma-separated list of required keys in response" | |
| ) | |
| run_btn = gr.Button("βΆοΈ Run Query", elem_classes="primary-btn") | |
| with gr.Column(scale=1): | |
| gr.Markdown("""### Example Queries | |
| **Simple Analysis:** | |
| ``` | |
| What factors lead to startup success? | |
| ``` | |
| **With Web Search:** | |
| ``` | |
| What are the latest AI news this week? | |
| ``` | |
| **With Expected Keys:** | |
| ``` | |
| Analyze the pros and cons of remote work. | |
| Expected keys: pros, cons, recommendation | |
| ``` | |
| """) | |
| gr.Markdown("---") | |
| gr.Markdown("### Results") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| result_json = gr.JSON(label="Response") | |
| with gr.Column(scale=1): | |
| stats_md = gr.Markdown("*Run a query to see stats*") | |
| votes_md = gr.Markdown("") | |
| run_btn.click( | |
| run_query, | |
| [prompt, use_search, use_files, expected_keys], | |
| [result_json, stats_md, votes_md] | |
| ) | |
| # Files Tab | |
| with gr.Tab("π Files"): | |
| gr.Markdown("### Upload files for analysis") | |
| gr.Markdown("Supported formats: PDF, DOCX, TXT, MD, JSON, CSV, PNG, JPG") | |
| file_upload = gr.File( | |
| label="Upload Files", | |
| file_count="multiple", | |
| file_types=[".pdf", ".docx", ".txt", ".md", ".json", ".csv", ".png", ".jpg", ".jpeg"] | |
| ) | |
| file_status = gr.Markdown("*No files uploaded*") | |
| file_upload.change(handle_files, file_upload, file_status) | |
| gr.Markdown(""" | |
| ### How to use files | |
| 1. Upload your files above | |
| 2. Go to the **Query** tab | |
| 3. Check **"Use Uploaded Files"** | |
| 4. Ask questions about your documents! | |
| """) | |
| # About Tab | |
| with gr.Tab("βΉοΈ About"): | |
| gr.Markdown(""" | |
| ## About MAKER Framework | |
| **MAKER** (Massively Decomposed Agentic Processes) achieves near-zero errors through: | |
| | Pillar | Description | | |
| |--------|-------------| | |
| | **Maximal Decomposition** | Break tasks into single-step atomic operations | | |
| | **K-Voting** | Sample multiple times, winner needs K votes ahead | | |
| | **Red-Flagging** | Discard suspicious outputs (don't try to repair them) | | |
| ### Key Insight | |
| > *"Reliability is an engineering problem, not a model problem."* | |
| Instead of waiting for better models, you can achieve near-zero errors TODAY using smaller, cheaper models with statistical voting. | |
| ### Results from the Paper | |
| The researchers achieved **1,000,000 steps with ZERO errors** using gpt-4.1-mini! | |
| ### Links | |
| - π **Paper**: [arxiv.org/abs/2511.09030](https://arxiv.org/abs/2511.09030) | |
| - π₯ **Video Explanation**: [YouTube](https://youtube.com/watch?v=TJ-vWGCosdQ) | |
| ### Supported LLM Providers | |
| | Provider | Example Models | | |
| |----------|----------------| | |
| | OpenAI | gpt-4o-mini, gpt-4o | | |
| | Anthropic | claude-sonnet, claude-opus | | |
| | Groq | llama-3.3-70b (very fast!) | | |
| | Together | Llama, Mistral, Qwen | | |
| | OpenRouter | 100+ models | | |
| """) | |
| gr.HTML(""" | |
| <div style="text-align:center; color:#64748b; padding:20px; border-top: 1px solid #e2e8f0; margin-top: 20px;"> | |
| MAKER Framework | Based on <a href="https://arxiv.org/abs/2511.09030" style="color:#6366f1">arxiv.org/abs/2511.09030</a> | |
| </div> | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |