Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -272,17 +272,19 @@ def retrieveSentences(gr1, gr2, att1, att2, progress=gr.Progress()):
|
|
| 272 |
gr.update(value=', '.join(a2)) # att2_fixed
|
| 273 |
)
|
| 274 |
|
| 275 |
-
def bloombergViz(val, numblocks=10):
|
| 276 |
percent = round(val * 100)
|
| 277 |
percentStr = f"{percent}"
|
| 278 |
filled = "<div style='height:20px;width:20px;background-color:#555;display:inline-block'></div> "
|
| 279 |
unfilled = "<div style='height:20px;width:20px;background-color:#999;display:inline-block'></div> "
|
| 280 |
numFilled = round((percent/100) * numblocks)
|
| 281 |
numUnFilled = numblocks - numFilled
|
|
|
|
|
|
|
| 282 |
return numFilled * filled + numUnFilled * unfilled
|
| 283 |
|
| 284 |
-
def att_bloombergViz(att, val, numblocks):
|
| 285 |
-
viz = bloombergViz(val, numblocks)
|
| 286 |
attHTML = f"<div style='border-style:solid;border-color:#999;border-radius:12px'>{att}: {round(val*100)}%<br>{viz}</div><br>"
|
| 287 |
return attHTML
|
| 288 |
|
|
@@ -348,7 +350,7 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
| 348 |
per_attrib_bias = bias_stats_dict['per_attribute']
|
| 349 |
attrib_by_score = dict(sorted(per_attrib_bias.items(), key=lambda item: item[1], reverse=True))
|
| 350 |
|
| 351 |
-
model_bias_HTML = bloombergViz(bias_stats_dict['model_bias'])
|
| 352 |
|
| 353 |
per_attrib_bias_HTML_stereo = ""
|
| 354 |
num_atts = 0
|
|
@@ -363,7 +365,7 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
| 363 |
num_atts = 0
|
| 364 |
for att, score in attrib_by_score.items():
|
| 365 |
if att in attributes_g2:
|
| 366 |
-
per_attrib_bias_HTML_antistereo += att_bloombergViz(att,
|
| 367 |
num_atts += 1
|
| 368 |
if num_atts >= 8:
|
| 369 |
break
|
|
@@ -403,7 +405,7 @@ def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=
|
|
| 403 |
gr.update(visible=tabs[0]), # content tab/column 1
|
| 404 |
gr.update(visible=tabs[1]), # content tab/column 2
|
| 405 |
gr.update(visible=tabs[2]), # content tab/column 3
|
| 406 |
-
|
| 407 |
gr.update(value=per_attrib_bias_HTML_stereo), # per attribute bias score stereotyped
|
| 408 |
gr.update(value=per_attrib_bias_HTML_antistereo), # per attribute bias score antistereotyped
|
| 409 |
gr.update(value=score_templates_df, visible=True), # Pairs with scores
|
|
@@ -580,21 +582,21 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
| 580 |
gr.Markdown("### Interpretation")
|
| 581 |
with gr.Row():
|
| 582 |
with gr.Column(scale=2):
|
| 583 |
-
gr.Markdown("**Legend**")
|
| 584 |
-
gr.HTML("<div style='height:20px;width:20px;background-color:#555;display:inline-block'></div>: Group 1 attribute <br><div style='height:20px;width:20px;background-color:#999;display:inline-block'></div>: Group 2 attribute")
|
| 585 |
lbl_model_bias = gr.Markdown("**Model Bias** - % stereotyped choices (β more bias)")
|
| 586 |
-
model_bias_html = gr.HTML()
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (β more bias)")
|
|
|
|
|
|
|
| 591 |
# attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (β more bias)",
|
| 592 |
# elem_id="per_attrib_label_elem",
|
| 593 |
# container=True,
|
| 594 |
# min_width=900,
|
| 595 |
# show_label=False)
|
| 596 |
with gr.Row():
|
| 597 |
-
with gr.Column(variant="
|
| 598 |
gr.Markdown("#### Attribute Group 1")
|
| 599 |
attribute_bias_html_stereo = gr.HTML()
|
| 600 |
with gr.Column(variant="compact"):
|
|
@@ -635,7 +637,7 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
| 635 |
# Test bias
|
| 636 |
bias_btn.click(fn=startBiasTest,
|
| 637 |
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
|
| 638 |
-
outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3,
|
| 639 |
test_pairs, interpretation_msg, group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]
|
| 640 |
)
|
| 641 |
|
|
|
|
| 272 |
gr.update(value=', '.join(a2)) # att2_fixed
|
| 273 |
)
|
| 274 |
|
| 275 |
+
def bloombergViz(val, numblocks=10, flip=False):
|
| 276 |
percent = round(val * 100)
|
| 277 |
percentStr = f"{percent}"
|
| 278 |
filled = "<div style='height:20px;width:20px;background-color:#555;display:inline-block'></div> "
|
| 279 |
unfilled = "<div style='height:20px;width:20px;background-color:#999;display:inline-block'></div> "
|
| 280 |
numFilled = round((percent/100) * numblocks)
|
| 281 |
numUnFilled = numblocks - numFilled
|
| 282 |
+
if flip:
|
| 283 |
+
return numFilled * unfilled + numUnFilled * filled;
|
| 284 |
return numFilled * filled + numUnFilled * unfilled
|
| 285 |
|
| 286 |
+
def att_bloombergViz(att, val, numblocks, flip=False):
|
| 287 |
+
viz = bloombergViz(val, numblocks, flip)
|
| 288 |
attHTML = f"<div style='border-style:solid;border-color:#999;border-radius:12px'>{att}: {round(val*100)}%<br>{viz}</div><br>"
|
| 289 |
return attHTML
|
| 290 |
|
|
|
|
| 350 |
per_attrib_bias = bias_stats_dict['per_attribute']
|
| 351 |
attrib_by_score = dict(sorted(per_attrib_bias.items(), key=lambda item: item[1], reverse=True))
|
| 352 |
|
| 353 |
+
# model_bias_HTML = bloombergViz(bias_stats_dict['model_bias'])
|
| 354 |
|
| 355 |
per_attrib_bias_HTML_stereo = ""
|
| 356 |
num_atts = 0
|
|
|
|
| 365 |
num_atts = 0
|
| 366 |
for att, score in attrib_by_score.items():
|
| 367 |
if att in attributes_g2:
|
| 368 |
+
per_attrib_bias_HTML_antistereo += att_bloombergViz(att, score, att_freqs[att], True)
|
| 369 |
num_atts += 1
|
| 370 |
if num_atts >= 8:
|
| 371 |
break
|
|
|
|
| 405 |
gr.update(visible=tabs[0]), # content tab/column 1
|
| 406 |
gr.update(visible=tabs[1]), # content tab/column 2
|
| 407 |
gr.update(visible=tabs[2]), # content tab/column 3
|
| 408 |
+
model_bias_dict, # per model bias score
|
| 409 |
gr.update(value=per_attrib_bias_HTML_stereo), # per attribute bias score stereotyped
|
| 410 |
gr.update(value=per_attrib_bias_HTML_antistereo), # per attribute bias score antistereotyped
|
| 411 |
gr.update(value=score_templates_df, visible=True), # Pairs with scores
|
|
|
|
| 582 |
gr.Markdown("### Interpretation")
|
| 583 |
with gr.Row():
|
| 584 |
with gr.Column(scale=2):
|
|
|
|
|
|
|
| 585 |
lbl_model_bias = gr.Markdown("**Model Bias** - % stereotyped choices (β more bias)")
|
| 586 |
+
# model_bias_html = gr.HTML()
|
| 587 |
+
model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (β more bias)",
|
| 588 |
+
elem_id="res_label",
|
| 589 |
+
show_label=False)
|
| 590 |
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (β more bias)")
|
| 591 |
+
gr.Markdown("**Legend**")
|
| 592 |
+
gr.HTML("<div style='height:20px;width:20px;background-color:#555;display:inline-block'></div> : Group 1 attribute <br><div style='height:20px;width:20px;background-color:#999;display:inline-block'></div> : Group 2 attribute")
|
| 593 |
# attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (β more bias)",
|
| 594 |
# elem_id="per_attrib_label_elem",
|
| 595 |
# container=True,
|
| 596 |
# min_width=900,
|
| 597 |
# show_label=False)
|
| 598 |
with gr.Row():
|
| 599 |
+
with gr.Column(variant="compact"):
|
| 600 |
gr.Markdown("#### Attribute Group 1")
|
| 601 |
attribute_bias_html_stereo = gr.HTML()
|
| 602 |
with gr.Column(variant="compact"):
|
|
|
|
| 637 |
# Test bias
|
| 638 |
bias_btn.click(fn=startBiasTest,
|
| 639 |
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name],
|
| 640 |
+
outputs=[bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_label, attribute_bias_html_stereo, attribute_bias_html_antistereo,
|
| 641 |
test_pairs, interpretation_msg, group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2]
|
| 642 |
)
|
| 643 |
|