Upload app.py
Browse files
app.py
CHANGED
|
@@ -545,10 +545,10 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
|
| 545 |
progress(0.15, desc="Creating configuration...")
|
| 546 |
config_path = create_config_file(model, work_dir)
|
| 547 |
|
| 548 |
-
# Run initial evaluation
|
| 549 |
-
progress(0.2, desc="Running initial evaluation on
|
| 550 |
initial_eval = evaluate_prompt(
|
| 551 |
-
initial_prompt, dataset_name, dataset_split,
|
| 552 |
model, input_field, target_field
|
| 553 |
)
|
| 554 |
|
|
@@ -598,7 +598,7 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
|
| 598 |
# Parse evolution history for visualization
|
| 599 |
evolution_viz = parse_evolution_history(output_dir)
|
| 600 |
|
| 601 |
-
progress(0.85, desc="Evaluating best evolved prompt...")
|
| 602 |
|
| 603 |
# Get the best prompt
|
| 604 |
best_prompt_path = os.path.join(output_dir, "best_program.txt")
|
|
@@ -608,9 +608,9 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
|
| 608 |
else:
|
| 609 |
best_prompt = initial_prompt
|
| 610 |
|
| 611 |
-
# Evaluate best prompt
|
| 612 |
final_eval = evaluate_prompt(
|
| 613 |
-
best_prompt, dataset_name, dataset_split,
|
| 614 |
model, input_field, target_field
|
| 615 |
)
|
| 616 |
|
|
@@ -641,7 +641,8 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
|
| 641 |
### Summary
|
| 642 |
- **Dataset**: {dataset_name} ({dataset_split} split)
|
| 643 |
- **Model**: {model}
|
| 644 |
-
- **
|
|
|
|
| 645 |
- **Iterations**: 10
|
| 646 |
|
| 647 |
### Results
|
|
|
|
| 545 |
progress(0.15, desc="Creating configuration...")
|
| 546 |
config_path = create_config_file(model, work_dir)
|
| 547 |
|
| 548 |
+
# Run initial evaluation (using 20 samples to save API calls)
|
| 549 |
+
progress(0.2, desc="Running initial evaluation on 20 samples...")
|
| 550 |
initial_eval = evaluate_prompt(
|
| 551 |
+
initial_prompt, dataset_name, dataset_split, 20,
|
| 552 |
model, input_field, target_field
|
| 553 |
)
|
| 554 |
|
|
|
|
| 598 |
# Parse evolution history for visualization
|
| 599 |
evolution_viz = parse_evolution_history(output_dir)
|
| 600 |
|
| 601 |
+
progress(0.85, desc="Evaluating best evolved prompt on 20 samples...")
|
| 602 |
|
| 603 |
# Get the best prompt
|
| 604 |
best_prompt_path = os.path.join(output_dir, "best_program.txt")
|
|
|
|
| 608 |
else:
|
| 609 |
best_prompt = initial_prompt
|
| 610 |
|
| 611 |
+
# Evaluate best prompt (using 20 samples like initial eval for consistency)
|
| 612 |
final_eval = evaluate_prompt(
|
| 613 |
+
best_prompt, dataset_name, dataset_split, 20,
|
| 614 |
model, input_field, target_field
|
| 615 |
)
|
| 616 |
|
|
|
|
| 641 |
### Summary
|
| 642 |
- **Dataset**: {dataset_name} ({dataset_split} split)
|
| 643 |
- **Model**: {model}
|
| 644 |
+
- **Initial/Final Eval**: 20 samples each
|
| 645 |
+
- **Evolution Eval**: Staged (20 → 100 if score ≥ 0.5)
|
| 646 |
- **Iterations**: 10
|
| 647 |
|
| 648 |
### Results
|