Upload app.py
Browse files
app.py
CHANGED
|
@@ -256,28 +256,49 @@ def collect_prompt_history(output_dir: str) -> List[Dict]:
|
|
| 256 |
try:
|
| 257 |
prompts = []
|
| 258 |
|
| 259 |
-
#
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
# Also check for logs to get scores
|
| 263 |
-
log_dir = os.path.join(output_dir, "logs")
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
with open(pfile, 'r') as f:
|
| 268 |
-
prompt_content = f.read()
|
| 269 |
|
| 270 |
-
|
| 271 |
-
|
| 272 |
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
"file": pfile
|
| 277 |
-
})
|
| 278 |
-
except:
|
| 279 |
continue
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
return prompts
|
| 282 |
except Exception as e:
|
| 283 |
print(f"Error collecting prompt history: {e}")
|
|
@@ -554,6 +575,7 @@ def create_config_file(model: str, work_dir: str):
|
|
| 554 |
"temperature": 0.7,
|
| 555 |
},
|
| 556 |
"max_iterations": 10,
|
|
|
|
| 557 |
"diff_based_evolution": False, # Use full rewrite mode for prompts (not diff/patch mode)
|
| 558 |
"evolution": {
|
| 559 |
"population_size": 10,
|
|
@@ -562,6 +584,9 @@ def create_config_file(model: str, work_dir: str):
|
|
| 562 |
"explore_ratio": 0.3,
|
| 563 |
"exploit_ratio": 0.6,
|
| 564 |
},
|
|
|
|
|
|
|
|
|
|
| 565 |
"evaluator": {
|
| 566 |
"timeout": 3600, # 1 hour timeout (effectively disabled, but prevents NoneType arithmetic errors)
|
| 567 |
"cascade_evaluation": False, # Disable cascade to prevent signal errors
|
|
@@ -693,13 +718,19 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
|
| 693 |
|
| 694 |
progress(0.85, desc="Evaluating best evolved prompt on 20 samples...")
|
| 695 |
|
| 696 |
-
# Get the best prompt
|
| 697 |
-
best_prompt_path = os.path.join(output_dir, "best_program.txt")
|
| 698 |
if os.path.exists(best_prompt_path):
|
| 699 |
with open(best_prompt_path, "r") as f:
|
| 700 |
best_prompt = f.read()
|
| 701 |
else:
|
| 702 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
|
| 704 |
# Evaluate best prompt (using 20 samples like initial eval for consistency)
|
| 705 |
final_eval = evaluate_prompt(
|
|
|
|
| 256 |
try:
|
| 257 |
prompts = []
|
| 258 |
|
| 259 |
+
# OpenEvolve saves programs in checkpoint directories as JSON files
|
| 260 |
+
# Structure: output_dir/checkpoints/checkpoint_{iteration}/programs/{program_id}.json
|
| 261 |
+
checkpoints_dir = os.path.join(output_dir, "checkpoints")
|
|
|
|
|
|
|
| 262 |
|
| 263 |
+
if not os.path.exists(checkpoints_dir):
|
| 264 |
+
return []
|
|
|
|
|
|
|
| 265 |
|
| 266 |
+
# Find all checkpoint directories
|
| 267 |
+
checkpoint_dirs = sorted(glob.glob(os.path.join(checkpoints_dir, "checkpoint_*")))
|
| 268 |
|
| 269 |
+
for checkpoint_dir in checkpoint_dirs:
|
| 270 |
+
programs_dir = os.path.join(checkpoint_dir, "programs")
|
| 271 |
+
if not os.path.exists(programs_dir):
|
|
|
|
|
|
|
|
|
|
| 272 |
continue
|
| 273 |
|
| 274 |
+
# Read all program JSON files
|
| 275 |
+
program_files = glob.glob(os.path.join(programs_dir, "*.json"))
|
| 276 |
+
|
| 277 |
+
for pfile in program_files:
|
| 278 |
+
try:
|
| 279 |
+
with open(pfile, 'r') as f:
|
| 280 |
+
program_data = json.load(f)
|
| 281 |
+
|
| 282 |
+
# Extract the code (prompt) from the program data
|
| 283 |
+
prompt_content = program_data.get("code", "")
|
| 284 |
+
prog_id = program_data.get("id", os.path.basename(pfile).replace(".json", ""))
|
| 285 |
+
iteration = program_data.get("iteration_found", 0)
|
| 286 |
+
metrics = program_data.get("metrics", {})
|
| 287 |
+
|
| 288 |
+
prompts.append({
|
| 289 |
+
"prompt": prompt_content,
|
| 290 |
+
"id": prog_id,
|
| 291 |
+
"file": pfile,
|
| 292 |
+
"iteration": iteration,
|
| 293 |
+
"metrics": metrics
|
| 294 |
+
})
|
| 295 |
+
except Exception as e:
|
| 296 |
+
print(f"Error reading program file {pfile}: {e}")
|
| 297 |
+
continue
|
| 298 |
+
|
| 299 |
+
# Sort by iteration
|
| 300 |
+
prompts.sort(key=lambda x: x.get("iteration", 0))
|
| 301 |
+
|
| 302 |
return prompts
|
| 303 |
except Exception as e:
|
| 304 |
print(f"Error collecting prompt history: {e}")
|
|
|
|
| 575 |
"temperature": 0.7,
|
| 576 |
},
|
| 577 |
"max_iterations": 10,
|
| 578 |
+
"checkpoint_interval": 2, # Save checkpoints every 2 iterations to preserve prompt history
|
| 579 |
"diff_based_evolution": False, # Use full rewrite mode for prompts (not diff/patch mode)
|
| 580 |
"evolution": {
|
| 581 |
"population_size": 10,
|
|
|
|
| 584 |
"explore_ratio": 0.3,
|
| 585 |
"exploit_ratio": 0.6,
|
| 586 |
},
|
| 587 |
+
"database": {
|
| 588 |
+
"log_prompts": True, # Save prompts used to generate each program
|
| 589 |
+
},
|
| 590 |
"evaluator": {
|
| 591 |
"timeout": 3600, # 1 hour timeout (effectively disabled, but prevents NoneType arithmetic errors)
|
| 592 |
"cascade_evaluation": False, # Disable cascade to prevent signal errors
|
|
|
|
| 718 |
|
| 719 |
progress(0.85, desc="Evaluating best evolved prompt on 20 samples...")
|
| 720 |
|
| 721 |
+
# Get the best prompt (OpenEvolve saves to output_dir/best/best_program.txt)
|
| 722 |
+
best_prompt_path = os.path.join(output_dir, "best", "best_program.txt")
|
| 723 |
if os.path.exists(best_prompt_path):
|
| 724 |
with open(best_prompt_path, "r") as f:
|
| 725 |
best_prompt = f.read()
|
| 726 |
else:
|
| 727 |
+
# Fallback: try without the "best" subdirectory
|
| 728 |
+
best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
|
| 729 |
+
if os.path.exists(best_prompt_path_alt):
|
| 730 |
+
with open(best_prompt_path_alt, "r") as f:
|
| 731 |
+
best_prompt = f.read()
|
| 732 |
+
else:
|
| 733 |
+
best_prompt = initial_prompt
|
| 734 |
|
| 735 |
# Evaluate best prompt (using 20 samples like initial eval for consistency)
|
| 736 |
final_eval = evaluate_prompt(
|