Spaces:
Running
Running
Update rss_feed.py
Browse files- modules/rss_feed.py +23 -2
modules/rss_feed.py
CHANGED
|
@@ -7,6 +7,10 @@ import requests
|
|
| 7 |
from datetime import datetime, timedelta
|
| 8 |
from urllib.parse import urlparse
|
| 9 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
logger = logging.getLogger("misinformation_detector")
|
| 12 |
|
|
@@ -335,8 +339,8 @@ def retrieve_evidence_from_rss(claim, max_results=10, category_feeds=None):
|
|
| 335 |
other_feeds = [feed for feed in feeds_to_use if feed not in fact_check_feeds]
|
| 336 |
|
| 337 |
# Take all fact-checking feeds plus a random selection of others
|
| 338 |
-
import random
|
| 339 |
selected_feeds = fact_check_feeds + random.sample(other_feeds, min(max(0, 10 - len(fact_check_feeds)), len(other_feeds)))
|
|
|
|
| 340 |
else:
|
| 341 |
selected_feeds = feeds_to_use
|
| 342 |
|
|
@@ -403,8 +407,25 @@ def retrieve_evidence_from_rss(claim, max_results=10, category_feeds=None):
|
|
| 403 |
logger.info(f"Retrieved {len(top_entries)} relevant RSS items from {len(feeds)} feeds in {time.time() - start_time:.2f}s")
|
| 404 |
|
| 405 |
# Return just the text portion
|
| 406 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
except Exception as e:
|
| 409 |
logger.error(f"Error in RSS retrieval: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
return []
|
|
|
|
| 7 |
from datetime import datetime, timedelta
|
| 8 |
from urllib.parse import urlparse
|
| 9 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 10 |
+
import random
|
| 11 |
+
# Import the performance tracker
|
| 12 |
+
from utils.performance import PerformanceTracker
|
| 13 |
+
performance_tracker = PerformanceTracker()
|
| 14 |
|
| 15 |
logger = logging.getLogger("misinformation_detector")
|
| 16 |
|
|
|
|
| 339 |
other_feeds = [feed for feed in feeds_to_use if feed not in fact_check_feeds]
|
| 340 |
|
| 341 |
# Take all fact-checking feeds plus a random selection of others
|
|
|
|
| 342 |
selected_feeds = fact_check_feeds + random.sample(other_feeds, min(max(0, 10 - len(fact_check_feeds)), len(other_feeds)))
|
| 343 |
+
|
| 344 |
else:
|
| 345 |
selected_feeds = feeds_to_use
|
| 346 |
|
|
|
|
| 407 |
logger.info(f"Retrieved {len(top_entries)} relevant RSS items from {len(feeds)} feeds in {time.time() - start_time:.2f}s")
|
| 408 |
|
| 409 |
# Return just the text portion
|
| 410 |
+
rss_results = [entry["text"] for entry in top_entries]
|
| 411 |
+
|
| 412 |
+
# Log evidence retrieval performance
|
| 413 |
+
success = bool(rss_results)
|
| 414 |
+
source_count = {"rss": len(rss_results)}
|
| 415 |
+
try:
|
| 416 |
+
performance_tracker.log_evidence_retrieval(success, source_count)
|
| 417 |
+
except Exception as e:
|
| 418 |
+
logger.error(f"Error logging RSS evidence retrieval: {e}")
|
| 419 |
+
|
| 420 |
+
return rss_results
|
| 421 |
|
| 422 |
except Exception as e:
|
| 423 |
logger.error(f"Error in RSS retrieval: {str(e)}")
|
| 424 |
+
|
| 425 |
+
# Log failed evidence retrieval
|
| 426 |
+
try:
|
| 427 |
+
performance_tracker.log_evidence_retrieval(False, {"rss": 0})
|
| 428 |
+
except Exception as log_error:
|
| 429 |
+
logger.error(f"Error logging failed RSS evidence retrieval: {log_error}")
|
| 430 |
+
|
| 431 |
return []
|