codelion commited on
Commit
8e39f71
·
verified ·
1 Parent(s): 399f83d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -545,10 +545,10 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
545
  progress(0.15, desc="Creating configuration...")
546
  config_path = create_config_file(model, work_dir)
547
 
548
- # Run initial evaluation
549
- progress(0.2, desc="Running initial evaluation on 100 samples...")
550
  initial_eval = evaluate_prompt(
551
- initial_prompt, dataset_name, dataset_split, 100,
552
  model, input_field, target_field
553
  )
554
 
@@ -598,7 +598,7 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
598
  # Parse evolution history for visualization
599
  evolution_viz = parse_evolution_history(output_dir)
600
 
601
- progress(0.85, desc="Evaluating best evolved prompt...")
602
 
603
  # Get the best prompt
604
  best_prompt_path = os.path.join(output_dir, "best_program.txt")
@@ -608,9 +608,9 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
608
  else:
609
  best_prompt = initial_prompt
610
 
611
- # Evaluate best prompt
612
  final_eval = evaluate_prompt(
613
- best_prompt, dataset_name, dataset_split, 100,
614
  model, input_field, target_field
615
  )
616
 
@@ -641,7 +641,8 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
641
  ### Summary
642
  - **Dataset**: {dataset_name} ({dataset_split} split)
643
  - **Model**: {model}
644
- - **Evaluation**: Staged (20 samples → 100 if score ≥ 0.5)
 
645
  - **Iterations**: 10
646
 
647
  ### Results
 
545
  progress(0.15, desc="Creating configuration...")
546
  config_path = create_config_file(model, work_dir)
547
 
548
+ # Run initial evaluation (using 20 samples to save API calls)
549
+ progress(0.2, desc="Running initial evaluation on 20 samples...")
550
  initial_eval = evaluate_prompt(
551
+ initial_prompt, dataset_name, dataset_split, 20,
552
  model, input_field, target_field
553
  )
554
 
 
598
  # Parse evolution history for visualization
599
  evolution_viz = parse_evolution_history(output_dir)
600
 
601
+ progress(0.85, desc="Evaluating best evolved prompt on 20 samples...")
602
 
603
  # Get the best prompt
604
  best_prompt_path = os.path.join(output_dir, "best_program.txt")
 
608
  else:
609
  best_prompt = initial_prompt
610
 
611
+ # Evaluate best prompt (using 20 samples like initial eval for consistency)
612
  final_eval = evaluate_prompt(
613
+ best_prompt, dataset_name, dataset_split, 20,
614
  model, input_field, target_field
615
  )
616
 
 
641
  ### Summary
642
  - **Dataset**: {dataset_name} ({dataset_split} split)
643
  - **Model**: {model}
644
+ - **Initial/Final Eval**: 20 samples each
645
+ - **Evolution Eval**: Staged (20 → 100 if score ≥ 0.5)
646
  - **Iterations**: 10
647
 
648
  ### Results