Spaces:
Runtime error
Runtime error
app.py
CHANGED
|
@@ -16,9 +16,9 @@ from background import process_arxiv_ids
|
|
| 16 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 17 |
|
| 18 |
gemini_api_key, hf_token, dataset_repo_id, request_arxiv_repo_id, restart_repo_id = get_secrets()
|
| 19 |
-
initialize_repos(dataset_repo_id, request_arxiv_repo_id, hf_token)
|
| 20 |
|
| 21 |
-
titles, date_dict, requested_arxiv_ids_df, arxivid2data = initialize_data(dataset_repo_id, request_arxiv_repo_id)
|
| 22 |
|
| 23 |
from ui import (
|
| 24 |
get_paper_by_year, get_paper_by_month, get_paper_by_day,
|
|
@@ -26,15 +26,42 @@ from ui import (
|
|
| 26 |
before_chat_begin, chat_stream, chat_reset
|
| 27 |
)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
with gr.Blocks(css="constants/styles.css", theme=gr.themes.Soft()) as demo:
|
| 40 |
cur_arxiv_id = gr.Textbox(selected_paper['arxiv_id'], visible=False)
|
|
|
|
| 16 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 17 |
|
| 18 |
gemini_api_key, hf_token, dataset_repo_id, request_arxiv_repo_id, restart_repo_id = get_secrets()
|
| 19 |
+
empty_src_dataset = initialize_repos(dataset_repo_id, request_arxiv_repo_id, hf_token)
|
| 20 |
|
| 21 |
+
titles, date_dict, requested_arxiv_ids_df, arxivid2data = initialize_data(dataset_repo_id, request_arxiv_repo_id, empty_src_dataset)
|
| 22 |
|
| 23 |
from ui import (
|
| 24 |
get_paper_by_year, get_paper_by_month, get_paper_by_day,
|
|
|
|
| 26 |
before_chat_begin, chat_stream, chat_reset
|
| 27 |
)
|
| 28 |
|
| 29 |
+
if len(date_dict.keys()) > 0:
|
| 30 |
+
sorted_year = sorted(date_dict.keys())
|
| 31 |
+
last_year = sorted_year[-1] if len(sorted_year) > 0 else ""
|
| 32 |
+
sorted_month = sorted(date_dict[last_year].keys())
|
| 33 |
+
last_month = sorted_month[-1] if len(sorted_year) > 0 else ""
|
| 34 |
+
sorted_day = sorted(date_dict[last_year][last_month].keys())
|
| 35 |
+
last_day = sorted_day[-1] if len(sorted_year) > 0 else ""
|
| 36 |
+
last_papers = date_dict[last_year][last_month][last_day] if len(sorted_year) > 0 else [""]
|
| 37 |
+
selected_paper = last_papers[0]
|
| 38 |
+
visible = True
|
| 39 |
+
else:
|
| 40 |
+
sorted_year = ["2024"]
|
| 41 |
+
last_year = sorted_year[-1]
|
| 42 |
+
sorted_month = ["01"]
|
| 43 |
+
last_month = sorted_month[-1]
|
| 44 |
+
sorted_day = ["01"]
|
| 45 |
+
last_day = sorted_day[-1]
|
| 46 |
+
|
| 47 |
+
selected_paper = {}
|
| 48 |
+
selected_paper["title"] = ""
|
| 49 |
+
selected_paper["summary"] = ""
|
| 50 |
+
selected_paper["arxiv_id"] = ""
|
| 51 |
+
selected_paper["target_date"] = "2024-01-01"
|
| 52 |
+
for idx in range(10):
|
| 53 |
+
selected_paper[f"{idx}_question"] = ""
|
| 54 |
+
selected_paper[f"{idx}_answer:eli5"] = ""
|
| 55 |
+
selected_paper[f"{idx}_answer:expert"] = ""
|
| 56 |
+
selected_paper[f"{idx}_additional_depth_q:follow up question"] = ""
|
| 57 |
+
selected_paper[f"{idx}_additional_depth_q:answers:eli5"] = ""
|
| 58 |
+
selected_paper[f"{idx}_additional_depth_q:answers:expert"] = ""
|
| 59 |
+
selected_paper[f"{idx}_additional_breath_q:follow up question"] = ""
|
| 60 |
+
selected_paper[f"{idx}_additional_breath_q:answers:eli5"] = ""
|
| 61 |
+
selected_paper[f"{idx}_additional_breath_q:answers:expert"] = ""
|
| 62 |
+
|
| 63 |
+
last_papers = [selected_paper]
|
| 64 |
+
visible = False
|
| 65 |
|
| 66 |
with gr.Blocks(css="constants/styles.css", theme=gr.themes.Soft()) as demo:
|
| 67 |
cur_arxiv_id = gr.Textbox(selected_paper['arxiv_id'], visible=False)
|
init.py
CHANGED
|
@@ -35,35 +35,37 @@ def _initialize_paper_info(source_ds):
|
|
| 35 |
arxivid2data = {}
|
| 36 |
count = 0
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
for paper in papers:
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
year, month, day = date.split("-")
|
| 56 |
-
papers = date2qna[date]
|
| 57 |
-
for paper in papers:
|
| 58 |
-
title2qna[paper["title"]] = paper
|
| 59 |
-
arxivid2data[paper['arxiv_id']] = {"idx": count, "paper": paper}
|
| 60 |
-
date_dict[year][month][day].append(paper)
|
| 61 |
-
|
| 62 |
-
titles = [f"[{v['arxiv_id']}] {k}" for k, v in title2qna.items()]
|
| 63 |
-
|
| 64 |
-
return titles, date_dict, arxivid2data
|
| 65 |
-
|
| 66 |
-
def initialize_data(source_data_repo_id, request_data_repo_id):
|
| 67 |
global date_dict, arxivid2data
|
| 68 |
global requested_arxiv_ids_df
|
| 69 |
|
|
@@ -86,6 +88,10 @@ def initialize_repos(
|
|
| 86 |
):
|
| 87 |
if create_hf_hub(source_data_repo_id, hf_token) is False:
|
| 88 |
print(f"{source_data_repo_id} repository already exists")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
if create_hf_hub(request_data_repo_id, hf_token) is False:
|
| 91 |
print(f"{request_data_repo_id} repository already exists")
|
|
|
|
| 35 |
arxivid2data = {}
|
| 36 |
count = 0
|
| 37 |
|
| 38 |
+
if len(source_ds["train"]) > 1:
|
| 39 |
+
for data in source_ds["train"]:
|
| 40 |
+
date = data["target_date"].strftime("%Y-%m-%d")
|
| 41 |
+
arxiv_id = data["arxiv_id"]
|
| 42 |
+
|
| 43 |
+
if date in date2qna:
|
| 44 |
+
papers = copy.deepcopy(date2qna[date])
|
| 45 |
+
for paper in papers:
|
| 46 |
+
if paper["title"] == data["title"]:
|
| 47 |
+
if _count_nans(paper) > _count_nans(data):
|
| 48 |
+
date2qna[date].remove(paper)
|
| 49 |
+
|
| 50 |
+
date2qna[date].append(data)
|
| 51 |
+
del papers
|
| 52 |
+
else:
|
| 53 |
+
date2qna[date] = [data]
|
| 54 |
+
|
| 55 |
+
for date in date2qna:
|
| 56 |
+
year, month, day = date.split("-")
|
| 57 |
+
papers = date2qna[date]
|
| 58 |
for paper in papers:
|
| 59 |
+
title2qna[paper["title"]] = paper
|
| 60 |
+
arxivid2data[paper['arxiv_id']] = {"idx": count, "paper": paper}
|
| 61 |
+
date_dict[year][month][day].append(paper)
|
| 62 |
+
|
| 63 |
+
titles = [f"[{v['arxiv_id']}] {k}" for k, v in title2qna.items()]
|
| 64 |
+
return titles, date_dict, arxivid2data
|
| 65 |
+
else:
|
| 66 |
+
return [], {}, {}
|
| 67 |
+
|
| 68 |
+
def initialize_data(source_data_repo_id, request_data_repo_id, empty_src_dataset):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
global date_dict, arxivid2data
|
| 70 |
global requested_arxiv_ids_df
|
| 71 |
|
|
|
|
| 88 |
):
|
| 89 |
if create_hf_hub(source_data_repo_id, hf_token) is False:
|
| 90 |
print(f"{source_data_repo_id} repository already exists")
|
| 91 |
+
else:
|
| 92 |
+
dummy_row = {"title": ["dummy"]}
|
| 93 |
+
ds = Dataset.from_dict(dummy_row)
|
| 94 |
+
ds.push_to_hub(source_data_repo_id, token=hf_token)
|
| 95 |
|
| 96 |
if create_hf_hub(request_data_repo_id, hf_token) is False:
|
| 97 |
print(f"{request_data_repo_id} repository already exists")
|