File size: 6,136 Bytes
691a2b5
 
 
 
0f37227
691a2b5
 
0f37227
691a2b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f37227
691a2b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import json
import io
from typing import List, Dict, Tuple

import pandas as pd
import numpy as np
import altair as alt
import streamlit as st
from pathlib import PurePosixPath

st.set_page_config(page_title="Twinkle Eval Analyzer", page_icon=":star2:", layout="wide")

st.title("✨ Twinkle Eval Analyzer (.json / .jsonl)")

# ----------------- Helpers -----------------

def _decode_bytes_to_text(b: bytes) -> str:
    for enc in ("utf-8", "utf-16", "utf-16le", "utf-16be", "big5", "cp950"):
        try:
            return b.decode(enc)
        except Exception:
            continue
    return b.decode("utf-8", errors="ignore")

def read_twinkle_doc(file) -> Dict:
    raw = file.read()
    if isinstance(raw, bytes):
        text = _decode_bytes_to_text(raw)
    else:
        text = raw
    text = text.strip()
    try:
        obj = json.loads(text)
    except Exception:
        for line in text.splitlines():
            line = line.strip().rstrip(",")
            if not line:
                continue
            try:
                obj = json.loads(line)
                break
            except Exception:
                continue
    if not isinstance(obj, dict):
        raise ValueError("檔案不是有效的 Twinkle Eval JSON 物件。")
    if "timestamp" not in obj or "config" not in obj or "dataset_results" not in obj:
        raise ValueError("缺少必要欄位")
    return obj

def extract_records(doc: Dict) -> Tuple[pd.DataFrame, Dict[str, float]]:
    model = doc.get("config", {}).get("model", {}).get("name", "<unknown>")
    timestamp = doc.get("timestamp", "<no-ts>")
    source_label = f"{model} @ {timestamp}"
    rows = []
    avg_map = {}
    for ds_path, ds_payload in doc.get("dataset_results", {}).items():
        ds_name = ds_path.split("datasets/")[-1].strip("/") if ds_path.startswith("datasets/") else ds_path
        avg_meta = ds_payload.get("average_accuracy") if isinstance(ds_payload, dict) else None
        results = ds_payload.get("results", []) if isinstance(ds_payload, dict) else []
        for item in results:
            if not isinstance(item, dict):
                continue
            file_path = item.get("file")
            acc_mean = item.get("accuracy_mean")
            if file_path is None or acc_mean is None:
                continue
            fname = PurePosixPath(file_path).name
            category = fname.rsplit(".", 1)[0]
            rows.append({
                "dataset": ds_name,
                "category": category,
                "file": fname,
                "accuracy_mean": float(acc_mean),
                "source_label": source_label
            })
        if avg_meta is None and results:
            vals = [float(it.get("accuracy_mean", np.nan)) for it in results if "accuracy_mean" in it]
            if vals:
                avg_meta = float(np.mean(vals))
        if avg_meta is not None:
            avg_map[ds_name] = avg_meta
    return pd.DataFrame(rows), avg_map

def load_all(files) -> Tuple[pd.DataFrame, Dict[str, Dict[str, float]]]:
    frames = []
    meta = {}
    for f in files or []:
        try:
            doc = read_twinkle_doc(f)
        except Exception as e:
            st.error(f"❌ 無法讀取 {getattr(f, 'name', '檔案')}{e}")
            continue
        df, avg_map = extract_records(doc)
        if not df.empty:
            frames.append(df)
            src = df["source_label"].iloc[0]
            meta[src] = avg_map
    if not frames:
        return pd.DataFrame(columns=["dataset", "category", "file", "accuracy_mean", "source_label"]), {}
    return pd.concat(frames, ignore_index=True), meta

# ----------------- Sidebar -----------------

with st.sidebar:
    files = st.file_uploader("選擇 Twinkle Eval 檔案", type=["json", "jsonl"], accept_multiple_files=True)
    df_all, meta_all = load_all(files)
    normalize_0_100 = st.checkbox("以 0–100 顯示", value=False)
    page_size = st.selectbox("每張圖顯示幾個類別", [10, 20, 30, 50, 100], index=1)
    sort_mode = st.selectbox("排序方式", ["依整體平均由高到低", "依整體平均由低到高", "依字母排序"])

if df_all.empty:
    st.info("請上傳 Twinkle Eval 檔案")
    st.stop()

all_datasets = sorted(df_all["dataset"].unique().tolist())
selected_dataset = st.selectbox("選擇資料集", options=all_datasets)
work = df_all[df_all["dataset"] == selected_dataset].copy()
metric_plot = "accuracy_mean" + (" (x100)" if normalize_0_100 else "")
work[metric_plot] = work["accuracy_mean"] * (100.0 if normalize_0_100 else 1.0)

order_df = work.groupby("category")[metric_plot].mean().reset_index()
if sort_mode == "依整體平均由高到低":
    order_df = order_df.sort_values(metric_plot, ascending=False)
elif sort_mode == "依整體平均由低到高":
    order_df = order_df.sort_values(metric_plot, ascending=True)
else:
    order_df = order_df.sort_values("category", ascending=True)

cat_order = order_df["category"].tolist()
work["category"] = pd.Categorical(work["category"], categories=cat_order, ordered=True)

n = len(cat_order)
pages = int(np.ceil(n / page_size))

for p in range(pages):
    start, end = p * page_size, min((p + 1) * page_size, n)
    subset_cats = cat_order[start:end]
    sub = work[work["category"].isin(subset_cats)]
    st.subheader(f"📊 {selected_dataset}|類別 {start+1}-{end} / {n}")
    base = alt.Chart(sub).encode(
        x=alt.X("category:N", sort=subset_cats),
        y=alt.Y(f"{metric_plot}:Q"),
        color=alt.Color("source_label:N"),
        tooltip=["source_label", "file", alt.Tooltip(metric_plot, format=".3f")]
    )
    bars = base.mark_bar().encode(xOffset="source_label")
    st.altair_chart(bars.properties(height=420), use_container_width=True)
    pivot = sub.pivot_table(index="category", columns="source_label", values=metric_plot)
    st.dataframe(pivot, use_container_width=True)
    st.download_button(
        label=f"下載此頁 CSV ({start+1}-{end})",
        data=pivot.reset_index().to_csv(index=False).encode("utf-8"),
        file_name=f"twinkle_{selected_dataset}_{start+1}_{end}.csv",
        mime="text/csv"
    )