Spaces:

Inoob
/

ReverseBenchmark

Sleeping

App Files Files Community

Inoob commited on Mar 3

Commit

b1af8c9

verified ·

1 Parent(s): b32a0cf

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -34

app.py CHANGED Viewed

@@ -77,40 +77,41 @@ for x in samples:
         # For demonstration purposes, we'll reverse the input as the model output
         # Replace this part with your model's actual output
         model_output_full = st.text_input("Model Ouput:", "")
-        # Extract the text between <back> and </back> tags
-        tag1 = model_output_full.find("<back>")
-        tag2 = model_output_full.find("</back>")
-        model_output = model_output_full[tag1 + 6: tag2]
-        st.subheader("Model Output")
-        st.write(model_output)
-        # Tokenize both outputs for BLEU calculation
-        reference_tokens = nltk.word_tokenize(true_output)
-        candidate_tokens = nltk.word_tokenize(model_output)
-        # Compute BLEU score (using the single reference)
-        bleu_score = sentence_bleu([reference_tokens], candidate_tokens)
-        st.write("**BLEU Score:**", bleu_score)
-        # Compute ROUGE scores
-        rouge_scores = rouge.get_scores(model_output, true_output)
-        st.write("**ROUGE Scores:**")
-        st.json(rouge_scores)
-        # Compute character-level accuracy and precision
-        accuracy_metric = char_accuracy(true_output, model_output)
-        precision_metric = char_precision(true_output, model_output)
-        st.write("**Character Accuracy:**", accuracy_metric)
-        st.write("**Character Precision:**", precision_metric)
-        st.markdown("---")
-        # Append metrics to lists
-        acc.append(accuracy_metric)
-        pres.append(precision_metric)
-        bleu.append(bleu_score)
-        rouges.append(rouge_scores)
 # Allow the user to download the metrics
 if st.button("Download Metrics"):

         # For demonstration purposes, we'll reverse the input as the model output
         # Replace this part with your model's actual output
         model_output_full = st.text_input("Model Ouput:", "")
+        if st.Button("Submit"):
+            # Extract the text between <back> and </back> tags
+            tag1 = model_output_full.find("<back>")
+            tag2 = model_output_full.find("</back>")
+            model_output = model_output_full[tag1 + 6: tag2]
+            st.subheader("Model Output")
+            st.write(model_output)
+            # Tokenize both outputs for BLEU calculation
+            reference_tokens = nltk.word_tokenize(true_output)
+            candidate_tokens = nltk.word_tokenize(model_output)
+            # Compute BLEU score (using the single reference)
+            bleu_score = sentence_bleu([reference_tokens], candidate_tokens)
+            st.write("**BLEU Score:**", bleu_score)
+            # Compute ROUGE scores
+            rouge_scores = rouge.get_scores(model_output, true_output)
+            st.write("**ROUGE Scores:**")
+            st.json(rouge_scores)
+            # Compute character-level accuracy and precision
+            accuracy_metric = char_accuracy(true_output, model_output)
+            precision_metric = char_precision(true_output, model_output)
+            st.write("**Character Accuracy:**", accuracy_metric)
+            st.write("**Character Precision:**", precision_metric)
+            st.markdown("---")
+            # Append metrics to lists
+            acc.append(accuracy_metric)
+            pres.append(precision_metric)
+            bleu.append(bleu_score)
+            rouges.append(rouge_scores)
 # Allow the user to download the metrics
 if st.button("Download Metrics"):