Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -271,138 +271,135 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
|
|
| 271 |
|
| 272 |
return summaries, topic_model
|
| 273 |
|
| 274 |
-
#
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
| 281 |
|
| 282 |
-
# Main app interface
|
| 283 |
-
st.title("๐ Arabic Poem Analysis")
|
| 284 |
-
st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
|
| 285 |
|
| 286 |
-
# File upload
|
| 287 |
-
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
|
| 288 |
|
| 289 |
-
if uploaded_file is not None:
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
# Validate columns
|
| 298 |
-
required_columns = ['country', 'poem']
|
| 299 |
-
if not all(col in df.columns for col in required_columns):
|
| 300 |
-
st.error("File must contain 'country' and 'poem' columns.")
|
| 301 |
-
st.stop()
|
| 302 |
-
|
| 303 |
-
# Clean data
|
| 304 |
-
df['country'] = df['country'].str.strip()
|
| 305 |
-
df = df.dropna(subset=['country', 'poem'])
|
| 306 |
-
|
| 307 |
-
# Add topic modeling controls
|
| 308 |
-
st.subheader("Topic Modeling Settings")
|
| 309 |
-
col1, col2 = st.columns(2)
|
| 310 |
-
|
| 311 |
-
with col1:
|
| 312 |
-
topic_strategy = st.radio(
|
| 313 |
-
"Topic Number Strategy",
|
| 314 |
-
["Auto", "Manual"],
|
| 315 |
-
help="Choose whether to let the model determine the optimal number of topics or set it manually"
|
| 316 |
-
)
|
| 317 |
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
)
|
| 333 |
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
max_value=100,
|
| 346 |
-
value=10
|
| 347 |
-
)
|
| 348 |
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
|
|
|
|
|
|
| 362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
else:
|
| 399 |
-
st.info("๐ Upload a file to get started!")
|
| 400 |
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
|
|
|
|
|
|
|
|
|
| 408 |
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
return summaries, topic_model
|
| 273 |
|
| 274 |
+
# Main application logic
|
| 275 |
+
def main():
|
| 276 |
+
# Load models
|
| 277 |
+
try:
|
| 278 |
+
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
| 279 |
+
st.success("Models loaded successfully!")
|
| 280 |
+
except Exception as e:
|
| 281 |
+
st.error(f"Error loading models: {str(e)}")
|
| 282 |
+
st.stop()
|
| 283 |
|
| 284 |
+
# Main app interface
|
| 285 |
+
st.title("๐ Arabic Poem Analysis")
|
| 286 |
+
st.write("Upload a CSV or Excel file containing Arabic poems with columns `country` and `poem`.")
|
| 287 |
|
| 288 |
+
# File upload
|
| 289 |
+
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
|
| 290 |
|
| 291 |
+
if uploaded_file is not None:
|
| 292 |
+
try:
|
| 293 |
+
# Read the file
|
| 294 |
+
if uploaded_file.name.endswith('.csv'):
|
| 295 |
+
df = pd.read_csv(uploaded_file)
|
| 296 |
+
else:
|
| 297 |
+
df = pd.read_excel(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
+
# Validate columns
|
| 300 |
+
required_columns = ['country', 'poem']
|
| 301 |
+
if not all(col in df.columns for col in required_columns):
|
| 302 |
+
st.error("File must contain 'country' and 'poem' columns.")
|
| 303 |
+
st.stop()
|
| 304 |
+
|
| 305 |
+
# Clean data
|
| 306 |
+
df['country'] = df['country'].str.strip()
|
| 307 |
+
df = df.dropna(subset=['country', 'poem'])
|
| 308 |
+
|
| 309 |
+
# Add topic modeling controls
|
| 310 |
+
st.subheader("Topic Modeling Settings")
|
| 311 |
+
col1, col2 = st.columns(2)
|
| 312 |
+
|
| 313 |
+
with col1:
|
| 314 |
+
topic_strategy = st.radio(
|
| 315 |
+
"Topic Number Strategy",
|
| 316 |
+
["Auto", "Manual"],
|
| 317 |
+
help="Choose whether to let the model determine the optimal number of topics or set it manually"
|
| 318 |
)
|
| 319 |
|
| 320 |
+
if topic_strategy == "Manual":
|
| 321 |
+
n_documents = len(df)
|
| 322 |
+
max_topics = min(500, int(np.log10(n_documents) * 100))
|
| 323 |
+
|
| 324 |
+
n_topics = st.slider(
|
| 325 |
+
"Number of Topics",
|
| 326 |
+
min_value=2,
|
| 327 |
+
max_value=max_topics,
|
| 328 |
+
value=min(20, max_topics),
|
| 329 |
+
help=f"Select the desired number of topics (max {max_topics} based on dataset size)"
|
| 330 |
+
)
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
+
with col2:
|
| 333 |
+
top_n = st.number_input(
|
| 334 |
+
"Number of top topics/emotions to display:",
|
| 335 |
+
min_value=1,
|
| 336 |
+
max_value=100,
|
| 337 |
+
value=10
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
min_topic_size = st.slider(
|
| 341 |
+
"Minimum Topic Size",
|
| 342 |
+
min_value=10,
|
| 343 |
+
max_value=100,
|
| 344 |
+
value=30,
|
| 345 |
+
help="Minimum number of documents required to form a topic"
|
| 346 |
+
)
|
| 347 |
|
| 348 |
+
if st.button("Process Data"):
|
| 349 |
+
with st.spinner("Processing your data..."):
|
| 350 |
+
summaries, topic_model = process_and_summarize(
|
| 351 |
+
df,
|
| 352 |
+
top_n=top_n,
|
| 353 |
+
topic_strategy=topic_strategy,
|
| 354 |
+
n_topics=n_topics if topic_strategy == "Manual" else None,
|
| 355 |
+
min_topic_size=min_topic_size
|
| 356 |
+
)
|
| 357 |
|
| 358 |
+
if summaries:
|
| 359 |
+
st.success("Analysis complete!")
|
| 360 |
+
|
| 361 |
+
# Display results in tabs
|
| 362 |
+
tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
|
| 363 |
+
|
| 364 |
+
with tab1:
|
| 365 |
+
for summary in summaries:
|
| 366 |
+
with st.expander(f"๐ {summary['country']} ({summary['total_poems']} poems)"):
|
| 367 |
+
col1, col2 = st.columns(2)
|
| 368 |
+
|
| 369 |
+
with col1:
|
| 370 |
+
st.subheader("Top Topics")
|
| 371 |
+
for topic in summary['top_topics']:
|
| 372 |
+
st.write(f"โข {topic['topic']}: {topic['count']} poems")
|
| 373 |
+
|
| 374 |
+
with col2:
|
| 375 |
+
st.subheader("Emotions")
|
| 376 |
+
for emotion in summary['top_emotions']:
|
| 377 |
+
st.write(f"โข {emotion['emotion']}: {emotion['count']} poems")
|
| 378 |
+
|
| 379 |
+
with tab2:
|
| 380 |
+
st.subheader("Global Topic Distribution")
|
| 381 |
+
topic_info = topic_model.get_topic_info()
|
| 382 |
+
for _, row in topic_info.iterrows():
|
| 383 |
+
if row['Topic'] == -1:
|
| 384 |
+
topic_name = "Miscellaneous"
|
| 385 |
+
else:
|
| 386 |
+
words = topic_model.get_topic(row['Topic'])
|
| 387 |
+
topic_name = " | ".join([word for word, _ in words[:5]])
|
| 388 |
+
st.write(f"โข Topic {row['Topic']}: {topic_name} ({row['Count']} poems)")
|
| 389 |
+
|
| 390 |
+
except Exception as e:
|
| 391 |
+
st.error(f"Error processing file: {str(e)}")
|
|
|
|
|
|
|
| 392 |
|
| 393 |
+
else:
|
| 394 |
+
st.info("๐ Upload a file to get started!")
|
| 395 |
+
|
| 396 |
+
# Example format
|
| 397 |
+
st.write("### Expected File Format:")
|
| 398 |
+
example_df = pd.DataFrame({
|
| 399 |
+
'country': ['Egypt', 'Palestine'],
|
| 400 |
+
'poem': ['ูุตูุฏุฉ ู
ุตุฑูุฉ', 'ูุตูุฏุฉ ููุณุทูููุฉ']
|
| 401 |
+
})
|
| 402 |
+
st.dataframe(example_df)
|
| 403 |
|
| 404 |
+
if __name__ == "__main__":
|
| 405 |
+
main()
|