Upload app.py
Browse files
app.py
CHANGED
|
@@ -646,6 +646,10 @@ def evaluate(prompt: str) -> dict:
|
|
| 646 |
|
| 647 |
print(f"Final: {{correct}}/{{total}} = {{accuracy:.2%}}")
|
| 648 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
return {{
|
| 650 |
"combined_score": accuracy,
|
| 651 |
"accuracy": accuracy,
|
|
@@ -773,9 +777,9 @@ Your improved prompt here
|
|
| 773 |
"evolution": {
|
| 774 |
"population_size": 10, # Smaller population but more iterations
|
| 775 |
"num_islands": 1, # Single island for simpler evolution
|
| 776 |
-
"elite_ratio": 0.
|
| 777 |
-
"explore_ratio": 0.
|
| 778 |
-
"exploit_ratio": 0.
|
| 779 |
},
|
| 780 |
"database": {
|
| 781 |
"log_prompts": True, # Save prompts used to generate each program
|
|
@@ -921,14 +925,19 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
|
| 921 |
if os.path.exists(best_prompt_path):
|
| 922 |
with open(best_prompt_path, "r") as f:
|
| 923 |
best_prompt = f.read()
|
|
|
|
|
|
|
| 924 |
else:
|
| 925 |
# Fallback: try without the "best" subdirectory
|
| 926 |
best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
|
| 927 |
if os.path.exists(best_prompt_path_alt):
|
| 928 |
with open(best_prompt_path_alt, "r") as f:
|
| 929 |
best_prompt = f.read()
|
|
|
|
|
|
|
| 930 |
else:
|
| 931 |
best_prompt = initial_prompt
|
|
|
|
| 932 |
|
| 933 |
# Final evaluation: Use same 50 samples as initial eval for fair comparison
|
| 934 |
progress(0.85, desc="Evaluating best prompt on 50 samples (same as initial)...")
|
|
|
|
| 646 |
|
| 647 |
print(f"Final: {{correct}}/{{total}} = {{accuracy:.2%}}")
|
| 648 |
|
| 649 |
+
# DEBUG: Log the prompt being evaluated and its score
|
| 650 |
+
prompt_preview = prompt[:80].replace('\\n', ' ') if len(prompt) > 80 else prompt.replace('\\n', ' ')
|
| 651 |
+
print(f"[EVAL DEBUG] Prompt: '{{prompt_preview}}...' → Score: {{accuracy:.2%}}")
|
| 652 |
+
|
| 653 |
return {{
|
| 654 |
"combined_score": accuracy,
|
| 655 |
"accuracy": accuracy,
|
|
|
|
| 777 |
"evolution": {
|
| 778 |
"population_size": 10, # Smaller population but more iterations
|
| 779 |
"num_islands": 1, # Single island for simpler evolution
|
| 780 |
+
"elite_ratio": 0.3, # Keep top 30% (3 best prompts)
|
| 781 |
+
"explore_ratio": 0.2, # Less random exploration
|
| 782 |
+
"exploit_ratio": 0.5, # More exploitation of good prompts
|
| 783 |
},
|
| 784 |
"database": {
|
| 785 |
"log_prompts": True, # Save prompts used to generate each program
|
|
|
|
| 925 |
if os.path.exists(best_prompt_path):
|
| 926 |
with open(best_prompt_path, "r") as f:
|
| 927 |
best_prompt = f.read()
|
| 928 |
+
print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path}")
|
| 929 |
+
print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
|
| 930 |
else:
|
| 931 |
# Fallback: try without the "best" subdirectory
|
| 932 |
best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
|
| 933 |
if os.path.exists(best_prompt_path_alt):
|
| 934 |
with open(best_prompt_path_alt, "r") as f:
|
| 935 |
best_prompt = f.read()
|
| 936 |
+
print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path_alt}")
|
| 937 |
+
print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
|
| 938 |
else:
|
| 939 |
best_prompt = initial_prompt
|
| 940 |
+
print(f"\n[SELECTION] WARNING: No best_program.txt found, using initial prompt")
|
| 941 |
|
| 942 |
# Final evaluation: Use same 50 samples as initial eval for fair comparison
|
| 943 |
progress(0.85, desc="Evaluating best prompt on 50 samples (same as initial)...")
|