codelion commited on
Commit
062d7ee
·
verified ·
1 Parent(s): bc07d04

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -7
app.py CHANGED
@@ -516,10 +516,22 @@ def evaluate(prompt: str) -> dict:
516
  """
517
  Evaluate a prompt using 50 fixed samples - SAME as initial and final evaluation.
518
 
 
519
  Using the same 50 samples ensures evolution optimizes for the exact test set.
520
  Includes early stopping and rate limit handling.
521
  """
522
  try:
 
 
 
 
 
 
 
 
 
 
 
523
  # IMPORTANT: Use fixed seed for consistent sampling across all evaluations
524
  random.seed(42)
525
 
@@ -569,8 +581,8 @@ def evaluate(prompt: str) -> dict:
569
  if isinstance(target, dict):
570
  target = str(target)
571
 
572
- # Format the prompt
573
- formatted_prompt = prompt.replace("{{input}}", str(input_text))
574
 
575
  # Call the model with retry logic for transient failures
576
  max_retries = 3
@@ -646,8 +658,8 @@ def evaluate(prompt: str) -> dict:
646
 
647
  print(f"Final: {{correct}}/{{total}} = {{accuracy:.2%}}")
648
 
649
- # DEBUG: Log the prompt being evaluated and its score
650
- prompt_preview = prompt[:80].replace('\\n', ' ') if len(prompt) > 80 else prompt.replace('\\n', ' ')
651
  print(f"[EVAL DEBUG] Prompt: '{{prompt_preview}}...' → Score: {{accuracy:.2%}}")
652
 
653
  return {{
@@ -826,10 +838,14 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
826
  work_dir = tempfile.mkdtemp(prefix="openevolve_")
827
 
828
  try:
829
- # Save initial prompt
 
830
  initial_prompt_path = os.path.join(work_dir, "initial_prompt.txt")
831
  with open(initial_prompt_path, "w") as f:
 
 
832
  f.write(initial_prompt)
 
833
 
834
  # Create evaluator
835
  progress(0.1, desc="Creating evaluator...")
@@ -929,16 +945,22 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
929
  best_prompt_path = os.path.join(output_dir, "best", "best_program.txt")
930
  if os.path.exists(best_prompt_path):
931
  with open(best_prompt_path, "r") as f:
932
- best_prompt = f.read()
 
 
933
  print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path}")
 
934
  print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
935
  else:
936
  # Fallback: try without the "best" subdirectory
937
  best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
938
  if os.path.exists(best_prompt_path_alt):
939
  with open(best_prompt_path_alt, "r") as f:
940
- best_prompt = f.read()
 
 
941
  print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path_alt}")
 
942
  print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
943
  else:
944
  best_prompt = initial_prompt
 
516
  """
517
  Evaluate a prompt using 50 fixed samples - SAME as initial and final evaluation.
518
 
519
+ OpenEvolve passes a file path, so we need to read the prompt from the file.
520
  Using the same 50 samples ensures evolution optimizes for the exact test set.
521
  Includes early stopping and rate limit handling.
522
  """
523
  try:
524
+ # CRITICAL: OpenEvolve passes a FILE PATH, not the prompt text!
525
+ # Check if prompt is a file path and read it
526
+ if os.path.exists(prompt):
527
+ with open(prompt, 'r') as f:
528
+ prompt_text = f.read()
529
+ # Strip EVOLVE-BLOCK markers if present
530
+ prompt_text = prompt_text.replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
531
+ else:
532
+ # If not a file path, use as-is (for backward compatibility)
533
+ prompt_text = prompt
534
+
535
  # IMPORTANT: Use fixed seed for consistent sampling across all evaluations
536
  random.seed(42)
537
 
 
581
  if isinstance(target, dict):
582
  target = str(target)
583
 
584
+ # Format the prompt (use prompt_text that we read from file)
585
+ formatted_prompt = prompt_text.replace("{{input}}", str(input_text))
586
 
587
  # Call the model with retry logic for transient failures
588
  max_retries = 3
 
658
 
659
  print(f"Final: {{correct}}/{{total}} = {{accuracy:.2%}}")
660
 
661
+ # DEBUG: Log the prompt being evaluated and its score (use prompt_text, not file path)
662
+ prompt_preview = prompt_text[:80].replace('\\n', ' ') if len(prompt_text) > 80 else prompt_text.replace('\\n', ' ')
663
  print(f"[EVAL DEBUG] Prompt: '{{prompt_preview}}...' → Score: {{accuracy:.2%}}")
664
 
665
  return {{
 
838
  work_dir = tempfile.mkdtemp(prefix="openevolve_")
839
 
840
  try:
841
+ # Save initial prompt with EVOLVE-BLOCK markers for OpenEvolve
842
+ # These markers tell OpenEvolve which part to optimize
843
  initial_prompt_path = os.path.join(work_dir, "initial_prompt.txt")
844
  with open(initial_prompt_path, "w") as f:
845
+ # Wrap prompt in evolve markers so OpenEvolve knows what to optimize
846
+ f.write("# EVOLVE-BLOCK-START\n")
847
  f.write(initial_prompt)
848
+ f.write("\n# EVOLVE-BLOCK-END\n")
849
 
850
  # Create evaluator
851
  progress(0.1, desc="Creating evaluator...")
 
945
  best_prompt_path = os.path.join(output_dir, "best", "best_program.txt")
946
  if os.path.exists(best_prompt_path):
947
  with open(best_prompt_path, "r") as f:
948
+ best_prompt_raw = f.read()
949
+ # Strip EVOLVE-BLOCK markers that we added
950
+ best_prompt = best_prompt_raw.replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
951
  print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path}")
952
+ print(f"[SELECTION] Raw prompt length: {len(best_prompt_raw)} chars")
953
  print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
954
  else:
955
  # Fallback: try without the "best" subdirectory
956
  best_prompt_path_alt = os.path.join(output_dir, "best_program.txt")
957
  if os.path.exists(best_prompt_path_alt):
958
  with open(best_prompt_path_alt, "r") as f:
959
+ best_prompt_raw = f.read()
960
+ # Strip EVOLVE-BLOCK markers
961
+ best_prompt = best_prompt_raw.replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
962
  print(f"\n[SELECTION] OpenEvolve selected best prompt from: {best_prompt_path_alt}")
963
+ print(f"[SELECTION] Raw prompt length: {len(best_prompt_raw)} chars")
964
  print(f"[SELECTION] Best prompt: '{best_prompt[:100].replace(chr(10), ' ')}...'")
965
  else:
966
  best_prompt = initial_prompt