codelion commited on
Commit
769e325
·
verified ·
1 Parent(s): 1a53d87

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -646,6 +646,10 @@ def evaluate(prompt: str) -> dict:
646
 
647
  print(f"Final: {{correct}}/{{total}} = {{accuracy:.2%}}")
648
 
 
 
 
 
649
  return {{
650
  "combined_score": accuracy,
651
  "accuracy": accuracy,
@@ -773,9 +777,9 @@ Your improved prompt here
773
  "evolution": {
774
  "population_size": 10, # Smaller population but more iterations
775
  "num_islands": 1, # Single island for simpler evolution
776
- "elite_ratio": 0.2, # Keep top 20% (2 best prompts)
777
- "explore_ratio": 0.5, # More exploration to find better prompts
778
- "exploit_ratio": 0.3, # Less exploitation
779
  },
780
  "database": {
781
  "log_prompts": True, # Save prompts used to generate each program
@@ -921,14 +925,19 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
921
  if os.path.exists(best_prompt_path):
922
  with open(best_prompt_path, "r") as f:
923
  best_prompt = f.read()
 
 
924
  else:
925
  # Fallback: try without the "best" subdirectory
926
  best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
927
  if os.path.exists(best_prompt_path_alt):
928
  with open(best_prompt_path_alt, "r") as f:
929
  best_prompt = f.read()
 
 
930
  else:
931
  best_prompt = initial_prompt
 
932
 
933
  # Final evaluation: Use same 50 samples as initial eval for fair comparison
934
  progress(0.85, desc="Evaluating best prompt on 50 samples (same as initial)...")
 
646
 
647
  print(f"Final: {{correct}}/{{total}} = {{accuracy:.2%}}")
648
 
649
+ # DEBUG: Log the prompt being evaluated and its score
650
+ prompt_preview = prompt[:80].replace('\\n', ' ') if len(prompt) > 80 else prompt.replace('\\n', ' ')
651
+ print(f"[EVAL DEBUG] Prompt: '{{prompt_preview}}...' → Score: {{accuracy:.2%}}")
652
+
653
  return {{
654
  "combined_score": accuracy,
655
  "accuracy": accuracy,
 
777
  "evolution": {
778
  "population_size": 10, # Smaller population but more iterations
779
  "num_islands": 1, # Single island for simpler evolution
780
+ "elite_ratio": 0.3, # Keep top 30% (3 best prompts)
781
+ "explore_ratio": 0.2, # Less random exploration
782
+ "exploit_ratio": 0.5, # More exploitation of good prompts
783
  },
784
  "database": {
785
  "log_prompts": True, # Save prompts used to generate each program
 
925
  if os.path.exists(best_prompt_path):
926
  with open(best_prompt_path, "r") as f:
927
  best_prompt = f.read()
928
+ print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path}")
929
+ print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
930
  else:
931
  # Fallback: try without the "best" subdirectory
932
  best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
933
  if os.path.exists(best_prompt_path_alt):
934
  with open(best_prompt_path_alt, "r") as f:
935
  best_prompt = f.read()
936
+ print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path_alt}")
937
+ print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
938
  else:
939
  best_prompt = initial_prompt
940
+ print(f"\n[SELECTION] WARNING: No best_program.txt found, using initial prompt")
941
 
942
  # Final evaluation: Use same 50 samples as initial eval for fair comparison
943
  progress(0.85, desc="Evaluating best prompt on 50 samples (same as initial)...")