Spaces:

evaluate-metric
/

squad_v2

Running

App Files Files Community

lvwerra HF Staff commited on Sep 22, 2022

Commit

b2436ac

1 Parent(s): 01ab7ce

Update Space (evaluate main: e4a27243)

Browse files

Files changed (2) hide show

requirements.txt +1 -1
squad_v2.py +23 -4

requirements.txt CHANGED Viewed

	@@ -1 +1 @@
1	- git+https://github.com/huggingface/evaluate@~~80448674f5447a9682afe051db243c4a13bfe4ff~~


1	+ git+https://github.com/huggingface/evaluate@e4a2724377909fe2aeb4357e3971e5a569673b39

squad_v2.py CHANGED Viewed

@@ -13,6 +13,8 @@
 # limitations under the License.
 """ SQuAD v2 metric. """
 import datasets
 import evaluate
@@ -87,13 +89,26 @@ Examples:
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class SquadV2(evaluate.Metric):
-    def _info(self):
         return evaluate.MetricInfo(
             description=_DESCRIPTION,
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             features=datasets.Features(
                 {
                     "predictions": {
@@ -113,7 +128,7 @@ class SquadV2(evaluate.Metric):
             reference_urls=["https://rajpurkar.github.io/SQuAD-explorer/"],
         )
-    def _compute(self, predictions, references, no_answer_threshold=1.0):
         no_answer_probabilities = {p["id"]: p["no_answer_probability"] for p in predictions}
         dataset = [{"paragraphs": [{"qas": references}]}]
         predictions = {p["id"]: p["prediction_text"] for p in predictions}
@@ -123,8 +138,12 @@ class SquadV2(evaluate.Metric):
         no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
         exact_raw, f1_raw = get_raw_scores(dataset, predictions)
-        exact_thresh = apply_no_ans_threshold(exact_raw, no_answer_probabilities, qid_to_has_ans, no_answer_threshold)
-        f1_thresh = apply_no_ans_threshold(f1_raw, no_answer_probabilities, qid_to_has_ans, no_answer_threshold)
         out_eval = make_eval_dict(exact_thresh, f1_thresh)
         if has_ans_qids:

 # limitations under the License.
 """ SQuAD v2 metric. """
+from dataclasses import dataclass
 import datasets
 import evaluate
 """
+@dataclass
+class SquadV2Config(evaluate.info.Config):
+    name: str = "default"
+    no_answer_threshold: float = 1.0
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class SquadV2(evaluate.Metric):
+    CONFIG_CLASS = SquadV2Config
+    ALLOWED_CONFIG_NAMES = ["default"]
+    def _info(self, config):
         return evaluate.MetricInfo(
             description=_DESCRIPTION,
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
+            config=config,
             features=datasets.Features(
                 {
                     "predictions": {
             reference_urls=["https://rajpurkar.github.io/SQuAD-explorer/"],
         )
+    def _compute(self, predictions, references):
         no_answer_probabilities = {p["id"]: p["no_answer_probability"] for p in predictions}
         dataset = [{"paragraphs": [{"qas": references}]}]
         predictions = {p["id"]: p["prediction_text"] for p in predictions}
         no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
         exact_raw, f1_raw = get_raw_scores(dataset, predictions)
+        exact_thresh = apply_no_ans_threshold(
+            exact_raw, no_answer_probabilities, qid_to_has_ans, self.config.no_answer_threshold
+        )
+        f1_thresh = apply_no_ans_threshold(
+            f1_raw, no_answer_probabilities, qid_to_has_ans, self.config.no_answer_threshold
+        )
         out_eval = make_eval_dict(exact_thresh, f1_thresh)
         if has_ans_qids: