Spaces:

algorithmicsuperintelligence
/

prompt-optimizer

Running

App Files Files Community

codelion commited on 23 days ago

Commit

769e325

verified ·

1 Parent(s): 1a53d87

Upload app.py

Browse files

Files changed (1) hide show

app.py +12 -3

app.py CHANGED Viewed

@@ -646,6 +646,10 @@ def evaluate(prompt: str) -> dict:
         print(f"Final: {{correct}}/{{total}} = {{accuracy:.2%}}")
         return {{
             "combined_score": accuracy,
             "accuracy": accuracy,
@@ -773,9 +777,9 @@ Your improved prompt here
         "evolution": {
             "population_size": 10,  # Smaller population but more iterations
             "num_islands": 1,  # Single island for simpler evolution
-            "elite_ratio": 0.2,  # Keep top 20% (2 best prompts)
-            "explore_ratio": 0.5,  # More exploration to find better prompts
-            "exploit_ratio": 0.3,  # Less exploitation
         },
         "database": {
             "log_prompts": True,  # Save prompts used to generate each program
@@ -921,14 +925,19 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
             if os.path.exists(best_prompt_path):
                 with open(best_prompt_path, "r") as f:
                     best_prompt = f.read()
             else:
                 # Fallback: try without the "best" subdirectory
                 best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
                 if os.path.exists(best_prompt_path_alt):
                     with open(best_prompt_path_alt, "r") as f:
                         best_prompt = f.read()
                 else:
                     best_prompt = initial_prompt
             # Final evaluation: Use same 50 samples as initial eval for fair comparison
             progress(0.85, desc="Evaluating best prompt on 50 samples (same as initial)...")

         print(f"Final: {{correct}}/{{total}} = {{accuracy:.2%}}")
+        # DEBUG: Log the prompt being evaluated and its score
+        prompt_preview = prompt[:80].replace('\\n', ' ') if len(prompt) > 80 else prompt.replace('\\n', ' ')
+        print(f"[EVAL DEBUG] Prompt: '{{prompt_preview}}...' → Score: {{accuracy:.2%}}")
         return {{
             "combined_score": accuracy,
             "accuracy": accuracy,
         "evolution": {
             "population_size": 10,  # Smaller population but more iterations
             "num_islands": 1,  # Single island for simpler evolution
+            "elite_ratio": 0.3,  # Keep top 30% (3 best prompts)
+            "explore_ratio": 0.2,  # Less random exploration
+            "exploit_ratio": 0.5,  # More exploitation of good prompts
         },
         "database": {
             "log_prompts": True,  # Save prompts used to generate each program
             if os.path.exists(best_prompt_path):
                 with open(best_prompt_path, "r") as f:
                     best_prompt = f.read()
+                print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path}")
+                print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
             else:
                 # Fallback: try without the "best" subdirectory
                 best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
                 if os.path.exists(best_prompt_path_alt):
                     with open(best_prompt_path_alt, "r") as f:
                         best_prompt = f.read()
+                    print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path_alt}")
+                    print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
                 else:
                     best_prompt = initial_prompt
+                    print(f"\n[SELECTION] WARNING: No best_program.txt found, using initial prompt")
             # Final evaluation: Use same 50 samples as initial eval for fair comparison
             progress(0.85, desc="Evaluating best prompt on 50 samples (same as initial)...")