Spaces:
Sleeping
Sleeping
| import json | |
| import io | |
| from typing import List, Dict, Tuple | |
| import pandas as pd | |
| import numpy as np | |
| import altair as alt | |
| import streamlit as st | |
| from pathlib import PurePosixPath | |
| st.set_page_config(page_title="Twinkle Eval Analyzer", page_icon=":star2:", layout="wide") | |
| st.title("✨ Twinkle Eval Analyzer (.json / .jsonl)") | |
| # ----------------- Helpers ----------------- | |
| def _decode_bytes_to_text(b: bytes) -> str: | |
| for enc in ("utf-8", "utf-16", "utf-16le", "utf-16be", "big5", "cp950"): | |
| try: | |
| return b.decode(enc) | |
| except Exception: | |
| continue | |
| return b.decode("utf-8", errors="ignore") | |
| def read_twinkle_doc(file) -> Dict: | |
| raw = file.read() | |
| if isinstance(raw, bytes): | |
| text = _decode_bytes_to_text(raw) | |
| else: | |
| text = raw | |
| text = text.strip() | |
| try: | |
| obj = json.loads(text) | |
| except Exception: | |
| for line in text.splitlines(): | |
| line = line.strip().rstrip(",") | |
| if not line: | |
| continue | |
| try: | |
| obj = json.loads(line) | |
| break | |
| except Exception: | |
| continue | |
| if not isinstance(obj, dict): | |
| raise ValueError("檔案不是有效的 Twinkle Eval JSON 物件。") | |
| if "timestamp" not in obj or "config" not in obj or "dataset_results" not in obj: | |
| raise ValueError("缺少必要欄位") | |
| return obj | |
| def extract_records(doc: Dict) -> Tuple[pd.DataFrame, Dict[str, float]]: | |
| model = doc.get("config", {}).get("model", {}).get("name", "<unknown>") | |
| timestamp = doc.get("timestamp", "<no-ts>") | |
| source_label = f"{model} @ {timestamp}" | |
| rows = [] | |
| avg_map = {} | |
| for ds_path, ds_payload in doc.get("dataset_results", {}).items(): | |
| ds_name = ds_path.split("datasets/")[-1].strip("/") if ds_path.startswith("datasets/") else ds_path | |
| avg_meta = ds_payload.get("average_accuracy") if isinstance(ds_payload, dict) else None | |
| results = ds_payload.get("results", []) if isinstance(ds_payload, dict) else [] | |
| for item in results: | |
| if not isinstance(item, dict): | |
| continue | |
| file_path = item.get("file") | |
| acc_mean = item.get("accuracy_mean") | |
| if file_path is None or acc_mean is None: | |
| continue | |
| fname = PurePosixPath(file_path).name | |
| category = fname.rsplit(".", 1)[0] | |
| rows.append({ | |
| "dataset": ds_name, | |
| "category": category, | |
| "file": fname, | |
| "accuracy_mean": float(acc_mean), | |
| "source_label": source_label | |
| }) | |
| if avg_meta is None and results: | |
| vals = [float(it.get("accuracy_mean", np.nan)) for it in results if "accuracy_mean" in it] | |
| if vals: | |
| avg_meta = float(np.mean(vals)) | |
| if avg_meta is not None: | |
| avg_map[ds_name] = avg_meta | |
| return pd.DataFrame(rows), avg_map | |
| def load_all(files) -> Tuple[pd.DataFrame, Dict[str, Dict[str, float]]]: | |
| frames = [] | |
| meta = {} | |
| for f in files or []: | |
| try: | |
| doc = read_twinkle_doc(f) | |
| except Exception as e: | |
| st.error(f"❌ 無法讀取 {getattr(f, 'name', '檔案')}:{e}") | |
| continue | |
| df, avg_map = extract_records(doc) | |
| if not df.empty: | |
| frames.append(df) | |
| src = df["source_label"].iloc[0] | |
| meta[src] = avg_map | |
| if not frames: | |
| return pd.DataFrame(columns=["dataset", "category", "file", "accuracy_mean", "source_label"]), {} | |
| return pd.concat(frames, ignore_index=True), meta | |
| # ----------------- Sidebar ----------------- | |
| with st.sidebar: | |
| files = st.file_uploader("選擇 Twinkle Eval 檔案", type=["json", "jsonl"], accept_multiple_files=True) | |
| df_all, meta_all = load_all(files) | |
| normalize_0_100 = st.checkbox("以 0–100 顯示", value=False) | |
| page_size = st.selectbox("每張圖顯示幾個類別", [10, 20, 30, 50, 100], index=1) | |
| sort_mode = st.selectbox("排序方式", ["依整體平均由高到低", "依整體平均由低到高", "依字母排序"]) | |
| if df_all.empty: | |
| st.info("請上傳 Twinkle Eval 檔案") | |
| st.stop() | |
| all_datasets = sorted(df_all["dataset"].unique().tolist()) | |
| selected_dataset = st.selectbox("選擇資料集", options=all_datasets) | |
| work = df_all[df_all["dataset"] == selected_dataset].copy() | |
| metric_plot = "accuracy_mean" + (" (x100)" if normalize_0_100 else "") | |
| work[metric_plot] = work["accuracy_mean"] * (100.0 if normalize_0_100 else 1.0) | |
| order_df = work.groupby("category")[metric_plot].mean().reset_index() | |
| if sort_mode == "依整體平均由高到低": | |
| order_df = order_df.sort_values(metric_plot, ascending=False) | |
| elif sort_mode == "依整體平均由低到高": | |
| order_df = order_df.sort_values(metric_plot, ascending=True) | |
| else: | |
| order_df = order_df.sort_values("category", ascending=True) | |
| cat_order = order_df["category"].tolist() | |
| work["category"] = pd.Categorical(work["category"], categories=cat_order, ordered=True) | |
| n = len(cat_order) | |
| pages = int(np.ceil(n / page_size)) | |
| for p in range(pages): | |
| start, end = p * page_size, min((p + 1) * page_size, n) | |
| subset_cats = cat_order[start:end] | |
| sub = work[work["category"].isin(subset_cats)] | |
| st.subheader(f"📊 {selected_dataset}|類別 {start+1}-{end} / {n}") | |
| base = alt.Chart(sub).encode( | |
| x=alt.X("category:N", sort=subset_cats), | |
| y=alt.Y(f"{metric_plot}:Q"), | |
| color=alt.Color("source_label:N"), | |
| tooltip=["source_label", "file", alt.Tooltip(metric_plot, format=".3f")] | |
| ) | |
| bars = base.mark_bar().encode(xOffset="source_label") | |
| st.altair_chart(bars.properties(height=420), use_container_width=True) | |
| pivot = sub.pivot_table(index="category", columns="source_label", values=metric_plot) | |
| st.dataframe(pivot, use_container_width=True) | |
| st.download_button( | |
| label=f"下載此頁 CSV ({start+1}-{end})", | |
| data=pivot.reset_index().to_csv(index=False).encode("utf-8"), | |
| file_name=f"twinkle_{selected_dataset}_{start+1}_{end}.csv", | |
| mime="text/csv" | |
| ) | |