Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -214,7 +214,6 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
| 214 |
"n_gram_range": (1, 3),
|
| 215 |
"top_n_words": 15,
|
| 216 |
"verbose": True,
|
| 217 |
-
"diversity": 0.5,
|
| 218 |
"stop_words": ARABIC_STOP_WORDS
|
| 219 |
}
|
| 220 |
|
|
@@ -318,8 +317,12 @@ def main():
|
|
| 318 |
)
|
| 319 |
|
| 320 |
if topic_strategy == "Manual":
|
|
|
|
| 321 |
n_documents = len(df)
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
n_topics = st.slider(
|
| 325 |
"Number of Topics",
|
|
@@ -328,6 +331,13 @@ def main():
|
|
| 328 |
value=min(20, max_topics),
|
| 329 |
help=f"Select the desired number of topics (max {max_topics} based on dataset size)"
|
| 330 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
with col2:
|
| 333 |
top_n = st.number_input(
|
|
|
|
| 214 |
"n_gram_range": (1, 3),
|
| 215 |
"top_n_words": 15,
|
| 216 |
"verbose": True,
|
|
|
|
| 217 |
"stop_words": ARABIC_STOP_WORDS
|
| 218 |
}
|
| 219 |
|
|
|
|
| 317 |
)
|
| 318 |
|
| 319 |
if topic_strategy == "Manual":
|
| 320 |
+
# Calculate reasonable max topics based on dataset size
|
| 321 |
n_documents = len(df)
|
| 322 |
+
if n_documents < 1000:
|
| 323 |
+
max_topics = min(50, n_documents // 20)
|
| 324 |
+
else:
|
| 325 |
+
max_topics = min(500, int(np.log10(n_documents) * 100))
|
| 326 |
|
| 327 |
n_topics = st.slider(
|
| 328 |
"Number of Topics",
|
|
|
|
| 331 |
value=min(20, max_topics),
|
| 332 |
help=f"Select the desired number of topics (max {max_topics} based on dataset size)"
|
| 333 |
)
|
| 334 |
+
|
| 335 |
+
st.info(f"""
|
| 336 |
+
💡 For your dataset of {n_documents:,} documents:
|
| 337 |
+
- Minimum topics: 2
|
| 338 |
+
- Maximum topics: {max_topics}
|
| 339 |
+
- Recommended range: {max(2, max_topics//5)}-{max_topics//2}
|
| 340 |
+
""")
|
| 341 |
|
| 342 |
with col2:
|
| 343 |
top_n = st.number_input(
|