codelion commited on
Commit
1a53d87
·
verified ·
1 Parent(s): 69d4c01

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -504,7 +504,7 @@ def parse_evolution_history(output_dir: str) -> str:
504
 
505
  def create_evaluator_file(dataset_name: str, split: str, model: str,
506
  input_field: str, target_field: str, work_dir: str):
507
- """Create an evaluator.py file for OpenEvolve that uses 75 samples for better signal."""
508
  evaluator_code = f'''
509
  import os
510
  import random
@@ -514,9 +514,9 @@ from openai import OpenAI
514
 
515
  def evaluate(prompt: str) -> dict:
516
  """
517
- Evaluate a prompt using 75 fixed samples for stronger evolution signal.
518
 
519
- 75 samples balances signal strength (vs 50) with API rate limits (vs 150).
520
  Includes early stopping and rate limit handling.
521
  """
522
  try:
@@ -535,8 +535,8 @@ def evaluate(prompt: str) -> dict:
535
  else:
536
  raise
537
 
538
- # Sample 75 samples with seed 42 for good signal without excessive API calls
539
- num_samples = 75
540
  if len(dataset) > num_samples:
541
  # Use SAME sampling logic as initial/final eval
542
  indices = random.sample(range(len(dataset)), num_samples)
@@ -973,7 +973,7 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
973
  - **Model**: {model}
974
  - **Initial Eval**: 50 samples
975
  - **Final Eval**: 50 samples (same samples for fair comparison)
976
- - **Evolution**: 75 samples per variant (balanced signal vs API limits)
977
  - **Iterations**: 10
978
 
979
  ### Results
 
504
 
505
  def create_evaluator_file(dataset_name: str, split: str, model: str,
506
  input_field: str, target_field: str, work_dir: str):
507
+ """Create an evaluator.py file for OpenEvolve that uses same 50 samples as initial/final eval."""
508
  evaluator_code = f'''
509
  import os
510
  import random
 
514
 
515
  def evaluate(prompt: str) -> dict:
516
  """
517
+ Evaluate a prompt using 50 fixed samples - SAME as initial and final evaluation.
518
 
519
+ Using the same 50 samples ensures evolution optimizes for the exact test set.
520
  Includes early stopping and rate limit handling.
521
  """
522
  try:
 
535
  else:
536
  raise
537
 
538
+ # Sample 50 samples with seed 42 - SAME as initial/final evaluation for consistency!
539
+ num_samples = 50
540
  if len(dataset) > num_samples:
541
  # Use SAME sampling logic as initial/final eval
542
  indices = random.sample(range(len(dataset)), num_samples)
 
973
  - **Model**: {model}
974
  - **Initial Eval**: 50 samples
975
  - **Final Eval**: 50 samples (same samples for fair comparison)
976
+ - **Evolution**: 50 samples per variant (SAME samples as initial/final for consistency!)
977
  - **Iterations**: 10
978
 
979
  ### Results