Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -214,7 +214,6 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
| 214 |
"n_gram_range": (1, 3),
|
| 215 |
"top_n_words": 15,
|
| 216 |
"verbose": True,
|
| 217 |
-
"stop_words": ARABIC_STOP_WORDS
|
| 218 |
}
|
| 219 |
|
| 220 |
if topic_strategy == "Manual" and n_topics is not None:
|
|
@@ -223,6 +222,11 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
| 223 |
topic_model_params["nr_topics"] = "auto"
|
| 224 |
|
| 225 |
topic_model = BERTopic(**topic_model_params)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
for country, group in df.groupby('country'):
|
| 228 |
progress_text = f"Processing poems for {country}..."
|
|
@@ -319,10 +323,7 @@ def main():
|
|
| 319 |
if topic_strategy == "Manual":
|
| 320 |
# Calculate reasonable max topics based on dataset size
|
| 321 |
n_documents = len(df)
|
| 322 |
-
|
| 323 |
-
max_topics = min(50, n_documents // 20)
|
| 324 |
-
else:
|
| 325 |
-
max_topics = min(500, int(np.log10(n_documents) * 100))
|
| 326 |
|
| 327 |
n_topics = st.slider(
|
| 328 |
"Number of Topics",
|
|
|
|
| 214 |
"n_gram_range": (1, 3),
|
| 215 |
"top_n_words": 15,
|
| 216 |
"verbose": True,
|
|
|
|
| 217 |
}
|
| 218 |
|
| 219 |
if topic_strategy == "Manual" and n_topics is not None:
|
|
|
|
| 222 |
topic_model_params["nr_topics"] = "auto"
|
| 223 |
|
| 224 |
topic_model = BERTopic(**topic_model_params)
|
| 225 |
+
|
| 226 |
+
# Create vectorizer with stop words
|
| 227 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
| 228 |
+
vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS))
|
| 229 |
+
topic_model.vectorizer_model = vectorizer
|
| 230 |
|
| 231 |
for country, group in df.groupby('country'):
|
| 232 |
progress_text = f"Processing poems for {country}..."
|
|
|
|
| 323 |
if topic_strategy == "Manual":
|
| 324 |
# Calculate reasonable max topics based on dataset size
|
| 325 |
n_documents = len(df)
|
| 326 |
+
max_topics = max(2, min(50, n_documents // 20))
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
n_topics = st.slider(
|
| 329 |
"Number of Topics",
|