codelion commited on
Commit
4f668f2
·
verified ·
1 Parent(s): 0533c5a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -20
app.py CHANGED
@@ -256,28 +256,49 @@ def collect_prompt_history(output_dir: str) -> List[Dict]:
256
  try:
257
  prompts = []
258
 
259
- # Look for all program files
260
- program_files = sorted(glob.glob(os.path.join(output_dir, "program_*.txt")))
261
-
262
- # Also check for logs to get scores
263
- log_dir = os.path.join(output_dir, "logs")
264
 
265
- for pfile in program_files:
266
- try:
267
- with open(pfile, 'r') as f:
268
- prompt_content = f.read()
269
 
270
- # Extract program ID from filename
271
- prog_id = os.path.basename(pfile).replace("program_", "").replace(".txt", "")
272
 
273
- prompts.append({
274
- "prompt": prompt_content,
275
- "id": prog_id,
276
- "file": pfile
277
- })
278
- except:
279
  continue
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  return prompts
282
  except Exception as e:
283
  print(f"Error collecting prompt history: {e}")
@@ -554,6 +575,7 @@ def create_config_file(model: str, work_dir: str):
554
  "temperature": 0.7,
555
  },
556
  "max_iterations": 10,
 
557
  "diff_based_evolution": False, # Use full rewrite mode for prompts (not diff/patch mode)
558
  "evolution": {
559
  "population_size": 10,
@@ -562,6 +584,9 @@ def create_config_file(model: str, work_dir: str):
562
  "explore_ratio": 0.3,
563
  "exploit_ratio": 0.6,
564
  },
 
 
 
565
  "evaluator": {
566
  "timeout": 3600, # 1 hour timeout (effectively disabled, but prevents NoneType arithmetic errors)
567
  "cascade_evaluation": False, # Disable cascade to prevent signal errors
@@ -693,13 +718,19 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
693
 
694
  progress(0.85, desc="Evaluating best evolved prompt on 20 samples...")
695
 
696
- # Get the best prompt
697
- best_prompt_path = os.path.join(output_dir, "best_program.txt")
698
  if os.path.exists(best_prompt_path):
699
  with open(best_prompt_path, "r") as f:
700
  best_prompt = f.read()
701
  else:
702
- best_prompt = initial_prompt
 
 
 
 
 
 
703
 
704
  # Evaluate best prompt (using 20 samples like initial eval for consistency)
705
  final_eval = evaluate_prompt(
 
256
  try:
257
  prompts = []
258
 
259
+ # OpenEvolve saves programs in checkpoint directories as JSON files
260
+ # Structure: output_dir/checkpoints/checkpoint_{iteration}/programs/{program_id}.json
261
+ checkpoints_dir = os.path.join(output_dir, "checkpoints")
 
 
262
 
263
+ if not os.path.exists(checkpoints_dir):
264
+ return []
 
 
265
 
266
+ # Find all checkpoint directories
267
+ checkpoint_dirs = sorted(glob.glob(os.path.join(checkpoints_dir, "checkpoint_*")))
268
 
269
+ for checkpoint_dir in checkpoint_dirs:
270
+ programs_dir = os.path.join(checkpoint_dir, "programs")
271
+ if not os.path.exists(programs_dir):
 
 
 
272
  continue
273
 
274
+ # Read all program JSON files
275
+ program_files = glob.glob(os.path.join(programs_dir, "*.json"))
276
+
277
+ for pfile in program_files:
278
+ try:
279
+ with open(pfile, 'r') as f:
280
+ program_data = json.load(f)
281
+
282
+ # Extract the code (prompt) from the program data
283
+ prompt_content = program_data.get("code", "")
284
+ prog_id = program_data.get("id", os.path.basename(pfile).replace(".json", ""))
285
+ iteration = program_data.get("iteration_found", 0)
286
+ metrics = program_data.get("metrics", {})
287
+
288
+ prompts.append({
289
+ "prompt": prompt_content,
290
+ "id": prog_id,
291
+ "file": pfile,
292
+ "iteration": iteration,
293
+ "metrics": metrics
294
+ })
295
+ except Exception as e:
296
+ print(f"Error reading program file {pfile}: {e}")
297
+ continue
298
+
299
+ # Sort by iteration
300
+ prompts.sort(key=lambda x: x.get("iteration", 0))
301
+
302
  return prompts
303
  except Exception as e:
304
  print(f"Error collecting prompt history: {e}")
 
575
  "temperature": 0.7,
576
  },
577
  "max_iterations": 10,
578
+ "checkpoint_interval": 2, # Save checkpoints every 2 iterations to preserve prompt history
579
  "diff_based_evolution": False, # Use full rewrite mode for prompts (not diff/patch mode)
580
  "evolution": {
581
  "population_size": 10,
 
584
  "explore_ratio": 0.3,
585
  "exploit_ratio": 0.6,
586
  },
587
+ "database": {
588
+ "log_prompts": True, # Save prompts used to generate each program
589
+ },
590
  "evaluator": {
591
  "timeout": 3600, # 1 hour timeout (effectively disabled, but prevents NoneType arithmetic errors)
592
  "cascade_evaluation": False, # Disable cascade to prevent signal errors
 
718
 
719
  progress(0.85, desc="Evaluating best evolved prompt on 20 samples...")
720
 
721
+ # Get the best prompt (OpenEvolve saves to output_dir/best/best_program.txt)
722
+ best_prompt_path = os.path.join(output_dir, "best", "best_program.txt")
723
  if os.path.exists(best_prompt_path):
724
  with open(best_prompt_path, "r") as f:
725
  best_prompt = f.read()
726
  else:
727
+ # Fallback: try without the "best" subdirectory
728
+ best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
729
+ if os.path.exists(best_prompt_path_alt):
730
+ with open(best_prompt_path_alt, "r") as f:
731
+ best_prompt = f.read()
732
+ else:
733
+ best_prompt = initial_prompt
734
 
735
  # Evaluate best prompt (using 20 samples like initial eval for consistency)
736
  final_eval = evaluate_prompt(