Upload 4 files
Browse files- services/__init__.py +0 -0
- services/history_service.py +969 -0
- services/prompt_parser.py +179 -0
- services/wikidata_service.py +706 -0
services/__init__.py
ADDED
|
File without changes
|
services/history_service.py
ADDED
|
@@ -0,0 +1,969 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import math
|
| 5 |
+
import sqlite3
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Dict, Iterable, List, Optional, Tuple
|
| 8 |
+
|
| 9 |
+
# Import Wikidata service for remote lookups
|
| 10 |
+
try:
|
| 11 |
+
from services.wikidata_service import (
|
| 12 |
+
search_events_by_geo_time as wikidata_search,
|
| 13 |
+
get_event_detail as wikidata_get_detail,
|
| 14 |
+
search_events_by_name as wikidata_search_by_name,
|
| 15 |
+
)
|
| 16 |
+
WIKIDATA_AVAILABLE = True
|
| 17 |
+
except ImportError:
|
| 18 |
+
WIKIDATA_AVAILABLE = False
|
| 19 |
+
print("[history_service] Wikidata service not available, using curated data only")
|
| 20 |
+
|
| 21 |
+
ROOT_DIR = Path(__file__).resolve().parent.parent
|
| 22 |
+
DATA_DIR = ROOT_DIR / "data"
|
| 23 |
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 24 |
+
DB_PATH = DATA_DIR / "meridian_history.db"
|
| 25 |
+
|
| 26 |
+
# Wikidata settings
|
| 27 |
+
ENABLE_WIKIDATA_FALLBACK = True
|
| 28 |
+
WIKIDATA_CONFIDENCE_THRESHOLD = 0.5
|
| 29 |
+
|
| 30 |
+
EVENT_SCHEMA_VERSION = 2
|
| 31 |
+
EVENT_EXTRA_COLUMNS: Dict[str, str] = {
|
| 32 |
+
"slug": "TEXT",
|
| 33 |
+
"summary": "TEXT",
|
| 34 |
+
"narrative": "TEXT",
|
| 35 |
+
"start_year": "INTEGER",
|
| 36 |
+
"end_year": "INTEGER",
|
| 37 |
+
"month": "INTEGER",
|
| 38 |
+
"day": "INTEGER",
|
| 39 |
+
"themes": "TEXT",
|
| 40 |
+
"actors": "TEXT",
|
| 41 |
+
"artifacts": "TEXT",
|
| 42 |
+
"visual_motifs": "TEXT",
|
| 43 |
+
"facets": "TEXT",
|
| 44 |
+
"sources": "TEXT",
|
| 45 |
+
"time_range": "TEXT",
|
| 46 |
+
"geo_anchor": "TEXT",
|
| 47 |
+
"confidence": "REAL",
|
| 48 |
+
"relationships": "TEXT",
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _get_connection() -> sqlite3.Connection:
|
| 53 |
+
conn = sqlite3.connect(DB_PATH)
|
| 54 |
+
conn.row_factory = sqlite3.Row
|
| 55 |
+
return conn
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _serialize(value: object) -> str:
|
| 59 |
+
return json.dumps(value, ensure_ascii=False)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _deserialize(value: Optional[str], default):
|
| 63 |
+
if value is None or value == "":
|
| 64 |
+
return default
|
| 65 |
+
try:
|
| 66 |
+
return json.loads(value)
|
| 67 |
+
except json.JSONDecodeError:
|
| 68 |
+
return default
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def ensure_schema() -> None:
|
| 72 |
+
conn = _get_connection()
|
| 73 |
+
cursor = conn.cursor()
|
| 74 |
+
|
| 75 |
+
cursor.execute(
|
| 76 |
+
"""
|
| 77 |
+
CREATE TABLE IF NOT EXISTS events (
|
| 78 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 79 |
+
name TEXT UNIQUE,
|
| 80 |
+
year INTEGER,
|
| 81 |
+
lat REAL,
|
| 82 |
+
lon REAL
|
| 83 |
+
)
|
| 84 |
+
"""
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
cursor.execute(
|
| 88 |
+
"""
|
| 89 |
+
CREATE TABLE IF NOT EXISTS schema_meta (
|
| 90 |
+
key TEXT PRIMARY KEY,
|
| 91 |
+
value TEXT
|
| 92 |
+
)
|
| 93 |
+
"""
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# Add new columns if missing
|
| 97 |
+
cursor.execute("PRAGMA table_info(events)")
|
| 98 |
+
existing_columns = {row["name"] for row in cursor.fetchall()}
|
| 99 |
+
for column, column_type in EVENT_EXTRA_COLUMNS.items():
|
| 100 |
+
if column not in existing_columns:
|
| 101 |
+
cursor.execute(f"ALTER TABLE events ADD COLUMN {column} {column_type}")
|
| 102 |
+
|
| 103 |
+
# Update schema version
|
| 104 |
+
cursor.execute(
|
| 105 |
+
"""
|
| 106 |
+
INSERT INTO schema_meta(key, value)
|
| 107 |
+
VALUES('event_schema_version', ?)
|
| 108 |
+
ON CONFLICT(key) DO UPDATE SET value=excluded.value
|
| 109 |
+
""",
|
| 110 |
+
(str(EVENT_SCHEMA_VERSION),),
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_events_year ON events(year)")
|
| 114 |
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_events_coordinates ON events(lat, lon)")
|
| 115 |
+
|
| 116 |
+
conn.commit()
|
| 117 |
+
conn.close()
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def seed_curated_events(force_refresh: bool = False) -> None:
|
| 121 |
+
conn = _get_connection()
|
| 122 |
+
cursor = conn.cursor()
|
| 123 |
+
|
| 124 |
+
if force_refresh:
|
| 125 |
+
cursor.execute("DELETE FROM events")
|
| 126 |
+
|
| 127 |
+
for event in CURATED_EVENTS:
|
| 128 |
+
cursor.execute(
|
| 129 |
+
"""
|
| 130 |
+
INSERT OR IGNORE INTO events (
|
| 131 |
+
name, slug, year, start_year, end_year, month, day,
|
| 132 |
+
lat, lon, summary, narrative, themes, actors, artifacts,
|
| 133 |
+
visual_motifs, facets, sources, time_range, geo_anchor,
|
| 134 |
+
confidence, relationships
|
| 135 |
+
) VALUES (
|
| 136 |
+
:name, :slug, :year, :start_year, :end_year, :month, :day,
|
| 137 |
+
:lat, :lon, :summary, :narrative, :themes, :actors, :artifacts,
|
| 138 |
+
:visual_motifs, :facets, :sources, :time_range, :geo_anchor,
|
| 139 |
+
:confidence, :relationships
|
| 140 |
+
)
|
| 141 |
+
""",
|
| 142 |
+
{
|
| 143 |
+
"name": event["name"],
|
| 144 |
+
"slug": event.get("slug") or event["name"].lower().replace(" ", "_"),
|
| 145 |
+
"year": event.get("year"),
|
| 146 |
+
"start_year": event.get("start_year", event.get("year")),
|
| 147 |
+
"end_year": event.get("end_year", event.get("year")),
|
| 148 |
+
"month": event.get("month"),
|
| 149 |
+
"day": event.get("day"),
|
| 150 |
+
"lat": event.get("lat"),
|
| 151 |
+
"lon": event.get("lon"),
|
| 152 |
+
"summary": event.get("summary"),
|
| 153 |
+
"narrative": event.get("narrative"),
|
| 154 |
+
"themes": _serialize(event.get("themes", [])),
|
| 155 |
+
"actors": _serialize(event.get("actors", [])),
|
| 156 |
+
"artifacts": _serialize(event.get("artifacts", [])),
|
| 157 |
+
"visual_motifs": _serialize(event.get("visual_motifs", [])),
|
| 158 |
+
"facets": _serialize(event.get("facets", {})),
|
| 159 |
+
"sources": _serialize(event.get("sources", [])),
|
| 160 |
+
"time_range": _serialize(event.get("time_range", {})),
|
| 161 |
+
"geo_anchor": _serialize(event.get("geo_anchor", {})),
|
| 162 |
+
"confidence": event.get("confidence", 0.85),
|
| 163 |
+
"relationships": _serialize(event.get("relationships", {})),
|
| 164 |
+
},
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
conn.commit()
|
| 168 |
+
conn.close()
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def initialize_history(force_refresh: bool = False) -> None:
|
| 172 |
+
ensure_schema()
|
| 173 |
+
seed_curated_events(force_refresh=force_refresh)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def load_events_from_db() -> List[dict]:
|
| 177 |
+
conn = _get_connection()
|
| 178 |
+
cursor = conn.cursor()
|
| 179 |
+
cursor.execute("SELECT * FROM events")
|
| 180 |
+
rows = cursor.fetchall()
|
| 181 |
+
conn.close()
|
| 182 |
+
|
| 183 |
+
events = []
|
| 184 |
+
for row in rows:
|
| 185 |
+
event = dict(row)
|
| 186 |
+
event["themes"] = _deserialize(event.get("themes"), [])
|
| 187 |
+
event["actors"] = _deserialize(event.get("actors"), [])
|
| 188 |
+
event["artifacts"] = _deserialize(event.get("artifacts"), [])
|
| 189 |
+
event["visual_motifs"] = _deserialize(event.get("visual_motifs"), [])
|
| 190 |
+
event["facets"] = _deserialize(event.get("facets"), {})
|
| 191 |
+
event["sources"] = _deserialize(event.get("sources"), [])
|
| 192 |
+
event["time_range"] = _deserialize(event.get("time_range"), {})
|
| 193 |
+
event["geo_anchor"] = _deserialize(event.get("geo_anchor"), {})
|
| 194 |
+
event["relationships"] = _deserialize(event.get("relationships"), {})
|
| 195 |
+
events.append(event)
|
| 196 |
+
|
| 197 |
+
return events
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
| 201 |
+
radius = 6371.0
|
| 202 |
+
phi1, phi2 = math.radians(lat1), math.radians(lat2)
|
| 203 |
+
delta_phi = math.radians(lat2 - lat1)
|
| 204 |
+
delta_lambda = math.radians(lon2 - lon1)
|
| 205 |
+
|
| 206 |
+
a = (
|
| 207 |
+
math.sin(delta_phi / 2) ** 2
|
| 208 |
+
+ math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2) ** 2
|
| 209 |
+
)
|
| 210 |
+
c = 2 * math.atan2(math.sqrt(a), math.sqrt(max(0.0, 1 - a)))
|
| 211 |
+
return radius * c
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def _compute_match_score(
|
| 215 |
+
event: dict,
|
| 216 |
+
lat: float,
|
| 217 |
+
lon: float,
|
| 218 |
+
year: int,
|
| 219 |
+
year_weight: float = 1.0,
|
| 220 |
+
) -> Tuple[float, float, float, float]:
|
| 221 |
+
"""
|
| 222 |
+
Compute match score for an event based on distance and year.
|
| 223 |
+
|
| 224 |
+
Args:
|
| 225 |
+
event: Event dictionary
|
| 226 |
+
lat: Query latitude
|
| 227 |
+
lon: Query longitude
|
| 228 |
+
year: Query year
|
| 229 |
+
year_weight: Weight for year matching (0.0-2.0)
|
| 230 |
+
- 0.0 = distance only
|
| 231 |
+
- 1.0 = balanced (default)
|
| 232 |
+
- 2.0 = strongly prefer year matches
|
| 233 |
+
|
| 234 |
+
Returns:
|
| 235 |
+
Tuple of (distance_km, year_delta, confidence, match_score)
|
| 236 |
+
"""
|
| 237 |
+
event_year = event.get("year") or year
|
| 238 |
+
event_lat = event.get("lat") or lat
|
| 239 |
+
event_lon = event.get("lon") or lon
|
| 240 |
+
|
| 241 |
+
distance = haversine_distance(lat, lon, event_lat, event_lon)
|
| 242 |
+
year_delta = abs(event_year - year)
|
| 243 |
+
base_confidence = event.get("confidence", 0.8)
|
| 244 |
+
|
| 245 |
+
# Year-weighted scoring:
|
| 246 |
+
# - Exact year match (delta=0): massive bonus
|
| 247 |
+
# - Within 5 years: strong bonus
|
| 248 |
+
# - Within 10 years: moderate bonus
|
| 249 |
+
# - Beyond 10 years: penalty increases
|
| 250 |
+
|
| 251 |
+
if year_delta == 0:
|
| 252 |
+
year_score = -50 * year_weight # Big bonus for exact year
|
| 253 |
+
elif year_delta <= 2:
|
| 254 |
+
year_score = -30 * year_weight # Strong bonus for ±2 years
|
| 255 |
+
elif year_delta <= 5:
|
| 256 |
+
year_score = -15 * year_weight # Good bonus for ±5 years
|
| 257 |
+
elif year_delta <= 10:
|
| 258 |
+
year_score = 0 # Neutral for ±10 years
|
| 259 |
+
else:
|
| 260 |
+
year_score = year_delta * 3 * year_weight # Penalty for distant years
|
| 261 |
+
|
| 262 |
+
# Distance scoring (normalized):
|
| 263 |
+
# - Within 50km: strong bonus
|
| 264 |
+
# - Within 200km: moderate bonus
|
| 265 |
+
# - Beyond 500km: penalty
|
| 266 |
+
if distance < 50:
|
| 267 |
+
distance_score = -20 * (1 - year_weight * 0.3) # Bonus, reduced if year-weighted
|
| 268 |
+
elif distance < 200:
|
| 269 |
+
distance_score = distance * 0.1
|
| 270 |
+
else:
|
| 271 |
+
distance_score = distance * 0.2 * (1 - year_weight * 0.3) # Reduced penalty if year-weighted
|
| 272 |
+
|
| 273 |
+
# Combined score (lower is better)
|
| 274 |
+
match_score = distance_score + year_score
|
| 275 |
+
|
| 276 |
+
# Confidence calculation
|
| 277 |
+
confidence = base_confidence
|
| 278 |
+
if year_delta == 0:
|
| 279 |
+
confidence += 0.15
|
| 280 |
+
elif year_delta <= 5:
|
| 281 |
+
confidence += 0.08
|
| 282 |
+
|
| 283 |
+
if distance < 100:
|
| 284 |
+
confidence += 0.1
|
| 285 |
+
elif distance < 300:
|
| 286 |
+
confidence += 0.05
|
| 287 |
+
|
| 288 |
+
confidence = max(0.0, min(0.99, confidence))
|
| 289 |
+
|
| 290 |
+
return distance, year_delta, confidence, match_score
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
def get_events_by_coordinates(
|
| 294 |
+
lat: float,
|
| 295 |
+
lon: float,
|
| 296 |
+
year: int,
|
| 297 |
+
radius_km: float = 250.0,
|
| 298 |
+
limit: int = 5,
|
| 299 |
+
include_wikidata: bool = True,
|
| 300 |
+
year_weight: float = 1.5,
|
| 301 |
+
) -> List[dict]:
|
| 302 |
+
"""
|
| 303 |
+
Get historical events near coordinates and year.
|
| 304 |
+
|
| 305 |
+
First searches curated local database, then optionally queries Wikidata
|
| 306 |
+
for additional results if enabled and local results are insufficient.
|
| 307 |
+
|
| 308 |
+
Args:
|
| 309 |
+
lat: Latitude
|
| 310 |
+
lon: Longitude
|
| 311 |
+
year: Target year (negative for BCE)
|
| 312 |
+
radius_km: Search radius in kilometers
|
| 313 |
+
limit: Maximum number of results
|
| 314 |
+
include_wikidata: Whether to include Wikidata results
|
| 315 |
+
year_weight: How much to prioritize year matches (0.0-2.0)
|
| 316 |
+
- 0.0 = distance only (ignore year)
|
| 317 |
+
- 1.0 = balanced
|
| 318 |
+
- 1.5 = prefer year matches (default)
|
| 319 |
+
- 2.0 = strongly prefer year matches
|
| 320 |
+
|
| 321 |
+
Returns:
|
| 322 |
+
List of event dictionaries sorted by relevance
|
| 323 |
+
"""
|
| 324 |
+
# Step 1: Search curated local database
|
| 325 |
+
events = load_events_from_db()
|
| 326 |
+
matches: List[dict] = []
|
| 327 |
+
|
| 328 |
+
# Use larger radius when year-weighted to find more year matches
|
| 329 |
+
effective_radius = radius_km * (1 + year_weight * 0.5) if year_weight > 1.0 else radius_km
|
| 330 |
+
|
| 331 |
+
for event in events:
|
| 332 |
+
distance, year_delta, confidence, match_score = _compute_match_score(
|
| 333 |
+
event, lat, lon, year, year_weight=year_weight
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
# Include if within radius OR if year matches closely
|
| 337 |
+
if distance > effective_radius and year_delta > 10:
|
| 338 |
+
continue
|
| 339 |
+
|
| 340 |
+
# Always include exact year matches regardless of distance
|
| 341 |
+
if year_delta > 15 and distance > radius_km:
|
| 342 |
+
continue
|
| 343 |
+
|
| 344 |
+
match = dict(event)
|
| 345 |
+
match["distance_km"] = round(distance, 2)
|
| 346 |
+
match["year_delta"] = year_delta
|
| 347 |
+
match["match_confidence"] = round(confidence, 3)
|
| 348 |
+
match["match_score"] = match_score
|
| 349 |
+
match["source"] = "curated"
|
| 350 |
+
matches.append(match)
|
| 351 |
+
|
| 352 |
+
matches.sort(key=lambda item: item["match_score"])
|
| 353 |
+
curated_results = matches[:limit]
|
| 354 |
+
|
| 355 |
+
# Step 2: If enabled and we have few/no curated results, query Wikidata
|
| 356 |
+
if (
|
| 357 |
+
include_wikidata
|
| 358 |
+
and ENABLE_WIKIDATA_FALLBACK
|
| 359 |
+
and WIKIDATA_AVAILABLE
|
| 360 |
+
and len(curated_results) < limit
|
| 361 |
+
):
|
| 362 |
+
try:
|
| 363 |
+
print(f"[history_service] Querying Wikidata for additional events...")
|
| 364 |
+
wikidata_results = wikidata_search(
|
| 365 |
+
lat=lat,
|
| 366 |
+
lon=lon,
|
| 367 |
+
year=year,
|
| 368 |
+
radius_km=radius_km,
|
| 369 |
+
limit=limit * 2, # Get extra to filter
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
# Merge Wikidata results, avoiding duplicates by name
|
| 373 |
+
curated_names = {m.get("name", "").lower() for m in curated_results}
|
| 374 |
+
|
| 375 |
+
for wd_event in wikidata_results:
|
| 376 |
+
# Skip if we already have this event from curated data
|
| 377 |
+
event_name = wd_event.get("name", "").lower()
|
| 378 |
+
if event_name in curated_names:
|
| 379 |
+
continue
|
| 380 |
+
|
| 381 |
+
# Skip low-confidence results
|
| 382 |
+
confidence = wd_event.get("match_confidence", wd_event.get("confidence", 0))
|
| 383 |
+
if confidence < WIKIDATA_CONFIDENCE_THRESHOLD:
|
| 384 |
+
continue
|
| 385 |
+
|
| 386 |
+
# Add source marker and compute year-weighted score
|
| 387 |
+
wd_event["source"] = "wikidata"
|
| 388 |
+
wd_year_delta = wd_event.get("year_delta", 99)
|
| 389 |
+
wd_distance = wd_event.get("distance_km", 999)
|
| 390 |
+
|
| 391 |
+
# Year-weighted scoring for Wikidata results
|
| 392 |
+
if wd_year_delta == 0:
|
| 393 |
+
year_score = -50 * year_weight
|
| 394 |
+
elif wd_year_delta <= 2:
|
| 395 |
+
year_score = -30 * year_weight
|
| 396 |
+
elif wd_year_delta <= 5:
|
| 397 |
+
year_score = -15 * year_weight
|
| 398 |
+
elif wd_year_delta <= 10:
|
| 399 |
+
year_score = 0
|
| 400 |
+
else:
|
| 401 |
+
year_score = wd_year_delta * 3 * year_weight
|
| 402 |
+
|
| 403 |
+
distance_score = wd_distance * 0.1 * (1 - year_weight * 0.3)
|
| 404 |
+
wd_event["match_score"] = distance_score + year_score - confidence * 10
|
| 405 |
+
|
| 406 |
+
curated_results.append(wd_event)
|
| 407 |
+
curated_names.add(event_name)
|
| 408 |
+
|
| 409 |
+
if len(curated_results) >= limit:
|
| 410 |
+
break
|
| 411 |
+
|
| 412 |
+
# Re-sort combined results
|
| 413 |
+
curated_results.sort(key=lambda item: item.get("match_score", 999))
|
| 414 |
+
|
| 415 |
+
except Exception as e:
|
| 416 |
+
print(f"[history_service] Wikidata lookup failed: {e}")
|
| 417 |
+
|
| 418 |
+
return curated_results[:limit]
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
def search_events_globally(
|
| 422 |
+
lat: float,
|
| 423 |
+
lon: float,
|
| 424 |
+
year: int,
|
| 425 |
+
radius_km: float = 500.0,
|
| 426 |
+
limit: int = 10,
|
| 427 |
+
year_weight: float = 1.5,
|
| 428 |
+
) -> List[dict]:
|
| 429 |
+
"""
|
| 430 |
+
Search for historical events with broader radius, always including Wikidata.
|
| 431 |
+
|
| 432 |
+
This is useful for finding events when the user doesn't have precise coordinates.
|
| 433 |
+
Uses year-weighted scoring by default to prioritize temporal matches.
|
| 434 |
+
"""
|
| 435 |
+
return get_events_by_coordinates(
|
| 436 |
+
lat=lat,
|
| 437 |
+
lon=lon,
|
| 438 |
+
year=year,
|
| 439 |
+
radius_km=radius_km,
|
| 440 |
+
limit=limit,
|
| 441 |
+
include_wikidata=True,
|
| 442 |
+
year_weight=year_weight,
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
def get_event_by_slug(slug: str) -> Optional[dict]:
|
| 447 |
+
conn = _get_connection()
|
| 448 |
+
cursor = conn.cursor()
|
| 449 |
+
cursor.execute("SELECT * FROM events WHERE slug = ?", (slug,))
|
| 450 |
+
row = cursor.fetchone()
|
| 451 |
+
conn.close()
|
| 452 |
+
|
| 453 |
+
if not row:
|
| 454 |
+
return None
|
| 455 |
+
|
| 456 |
+
event = dict(row)
|
| 457 |
+
event["themes"] = _deserialize(event.get("themes"), [])
|
| 458 |
+
event["actors"] = _deserialize(event.get("actors"), [])
|
| 459 |
+
event["artifacts"] = _deserialize(event.get("artifacts"), [])
|
| 460 |
+
event["visual_motifs"] = _deserialize(event.get("visual_motifs"), [])
|
| 461 |
+
event["facets"] = _deserialize(event.get("facets"), {})
|
| 462 |
+
event["sources"] = _deserialize(event.get("sources"), [])
|
| 463 |
+
event["time_range"] = _deserialize(event.get("time_range"), {})
|
| 464 |
+
event["geo_anchor"] = _deserialize(event.get("geo_anchor"), {})
|
| 465 |
+
event["relationships"] = _deserialize(event.get("relationships"), {})
|
| 466 |
+
return event
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
def get_event_by_name(name: str, include_wikidata: bool = True) -> Optional[dict]:
|
| 470 |
+
"""
|
| 471 |
+
Get event by name, checking curated data first, then Wikidata.
|
| 472 |
+
"""
|
| 473 |
+
# Try curated data first
|
| 474 |
+
event = get_event_by_slug(name.lower().replace(" ", "_"))
|
| 475 |
+
if event:
|
| 476 |
+
event["source"] = "curated"
|
| 477 |
+
return event
|
| 478 |
+
|
| 479 |
+
# Try Wikidata if enabled
|
| 480 |
+
if include_wikidata and ENABLE_WIKIDATA_FALLBACK and WIKIDATA_AVAILABLE:
|
| 481 |
+
try:
|
| 482 |
+
results = wikidata_search_by_name(name, limit=1)
|
| 483 |
+
if results:
|
| 484 |
+
results[0]["source"] = "wikidata"
|
| 485 |
+
return results[0]
|
| 486 |
+
except Exception as e:
|
| 487 |
+
print(f"[history_service] Wikidata name search failed: {e}")
|
| 488 |
+
|
| 489 |
+
return None
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
def get_event_by_qid(qid: str) -> Optional[dict]:
|
| 493 |
+
"""
|
| 494 |
+
Get detailed event information from Wikidata by QID.
|
| 495 |
+
"""
|
| 496 |
+
if not WIKIDATA_AVAILABLE:
|
| 497 |
+
return None
|
| 498 |
+
|
| 499 |
+
try:
|
| 500 |
+
return wikidata_get_detail(qid)
|
| 501 |
+
except Exception as e:
|
| 502 |
+
print(f"[history_service] Wikidata QID lookup failed: {e}")
|
| 503 |
+
return None
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
def get_artifacts_for_year(year: int, limit: int = 4) -> List[dict]:
|
| 507 |
+
matches: List[dict] = []
|
| 508 |
+
for artifact in CURATED_ARTIFACTS:
|
| 509 |
+
era_start, era_end = artifact.get("era", [None, None])
|
| 510 |
+
if era_start is None or era_end is None:
|
| 511 |
+
matches.append(artifact)
|
| 512 |
+
continue
|
| 513 |
+
if era_start <= year <= era_end:
|
| 514 |
+
matches.append(artifact)
|
| 515 |
+
if not matches:
|
| 516 |
+
matches = CURATED_ARTIFACTS[:]
|
| 517 |
+
return matches[:limit]
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
def summarize_event(event: dict) -> str:
|
| 521 |
+
summary = event.get("summary") or event.get("narrative") or event.get("name")
|
| 522 |
+
return summary
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
def ensure_iterable(value: Optional[Iterable[str]]) -> List[str]:
|
| 526 |
+
if value is None:
|
| 527 |
+
return []
|
| 528 |
+
return list(value)
|
| 529 |
+
|
| 530 |
+
|
| 531 |
+
CURATED_EVENTS: List[dict] = [
|
| 532 |
+
{
|
| 533 |
+
"name": "Fall of the Berlin Wall",
|
| 534 |
+
"slug": "fall_of_the_berlin_wall",
|
| 535 |
+
"year": 1989,
|
| 536 |
+
"start_year": 1989,
|
| 537 |
+
"end_year": 1989,
|
| 538 |
+
"month": 11,
|
| 539 |
+
"day": 9,
|
| 540 |
+
"lat": 52.5163,
|
| 541 |
+
"lon": 13.3777,
|
| 542 |
+
"summary": "East and West Berliners gather at the Brandenburg Gate as border checkpoints open and the concrete wall begins to fall.",
|
| 543 |
+
"narrative": (
|
| 544 |
+
"A sea of Berliners clamber atop graffiti-streaked concrete slabs, cheering as border guards lift the barriers. "
|
| 545 |
+
"People pass champagne bottles, wield sledgehammers, and chip away fragments while floodlights and television crews illuminate the night."
|
| 546 |
+
),
|
| 547 |
+
"themes": ["political", "reunification", "cold war"],
|
| 548 |
+
"actors": ["East German civilians", "West Berlin residents", "border guards", "international journalists"],
|
| 549 |
+
"artifacts": ["Graffiti-covered concrete", "Champagne bottles", "Trabant cars", "Floodlights", "Metal barricades"],
|
| 550 |
+
"visual_motifs": ["floodlit night sky", "cold autumn breath", "television cameras", "crowded concrete wall"],
|
| 551 |
+
"facets": {"era": "late 20th century", "region": "western_europe", "type": "political upheaval"},
|
| 552 |
+
"sources": [{"label": "Wikipedia", "url": "https://en.wikipedia.org/wiki/Fall_of_the_Berlin_Wall"}],
|
| 553 |
+
"time_range": {"start": "1989-11-09T18:00:00", "end": "1989-11-10T02:00:00"},
|
| 554 |
+
"geo_anchor": {"lat": 52.5163, "lon": 13.3777, "radius_km": 4},
|
| 555 |
+
"confidence": 0.96,
|
| 556 |
+
"relationships": {"consequences": ["German reunification 1990"]},
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"name": "D-Day Landing at Omaha Beach",
|
| 560 |
+
"slug": "d_day_landing_at_omaha_beach",
|
| 561 |
+
"year": 1944,
|
| 562 |
+
"start_year": 1944,
|
| 563 |
+
"end_year": 1944,
|
| 564 |
+
"month": 6,
|
| 565 |
+
"day": 6,
|
| 566 |
+
"lat": 49.4144,
|
| 567 |
+
"lon": -0.8322,
|
| 568 |
+
"summary": "Allied assault troops storm Omaha Beach under heavy German fire at dawn during Operation Overlord.",
|
| 569 |
+
"narrative": (
|
| 570 |
+
"Pre-dawn haze lifts as landing craft ramps crash open and American soldiers sprint through waist-high surf toward fortified bluffs. "
|
| 571 |
+
"Machine-gun tracers stitch the air, artillery craters erupt in wet sand, and medics tend to the wounded beside hedgehog obstacles."
|
| 572 |
+
),
|
| 573 |
+
"themes": ["military", "WWII", "allied victory"],
|
| 574 |
+
"actors": ["US 1st Infantry Division", "US 29th Infantry Division", "German Atlantic Wall defenders", "Combat medics"],
|
| 575 |
+
"artifacts": ["Higgins landing craft", "Browning machine guns", "M1 helmets", "Beach obstacles", "Signal flares"],
|
| 576 |
+
"visual_motifs": ["morning fog", "breaking waves", "artillery smoke", "olive drab uniforms"],
|
| 577 |
+
"facets": {"era": "mid 20th century", "region": "western_europe", "type": "amphibious assault"},
|
| 578 |
+
"sources": [{"label": "National WWII Museum", "url": "https://www.nationalww2museum.org"}],
|
| 579 |
+
"time_range": {"start": "1944-06-06T05:30:00", "end": "1944-06-06T10:00:00"},
|
| 580 |
+
"geo_anchor": {"lat": 49.4144, "lon": -0.8322, "radius_km": 12},
|
| 581 |
+
"confidence": 0.94,
|
| 582 |
+
"relationships": {"parallel": ["Sword Beach landings", "Utah Beach landings"]},
|
| 583 |
+
},
|
| 584 |
+
{
|
| 585 |
+
"name": "Signing of the Declaration of Independence",
|
| 586 |
+
"slug": "signing_of_the_declaration_of_independence",
|
| 587 |
+
"year": 1776,
|
| 588 |
+
"start_year": 1776,
|
| 589 |
+
"end_year": 1776,
|
| 590 |
+
"month": 7,
|
| 591 |
+
"day": 4,
|
| 592 |
+
"lat": 39.9489,
|
| 593 |
+
"lon": -75.1500,
|
| 594 |
+
"summary": "Delegates of the Continental Congress sign the Declaration inside Independence Hall, Philadelphia.",
|
| 595 |
+
"narrative": (
|
| 596 |
+
"Sunlight streams through tall sash windows onto polished wood floors as delegates in powdered wigs lean over parchment. "
|
| 597 |
+
"Quill pens scratch, wax seals glisten, and brass bellows stir a warm July breeze through the Assembly Room."
|
| 598 |
+
),
|
| 599 |
+
"themes": ["political", "founding documents", "revolution"],
|
| 600 |
+
"actors": ["Thomas Jefferson", "John Hancock", "Continental Congress delegates"],
|
| 601 |
+
"artifacts": ["Quill pens", "Parchment scrolls", "Wax seals", "Mahogany desks"],
|
| 602 |
+
"visual_motifs": ["golden afternoon light", "colonial interior", "powder wigs", "rich green drapery"],
|
| 603 |
+
"facets": {"era": "late 18th century", "region": "north_america", "type": "political charter"},
|
| 604 |
+
"sources": [{"label": "US National Archives", "url": "https://www.archives.gov/founding-docs/declaration"}],
|
| 605 |
+
"time_range": {"start": "1776-07-04T10:00:00", "end": "1776-07-04T15:00:00"},
|
| 606 |
+
"geo_anchor": {"lat": 39.9489, "lon": -75.1500, "radius_km": 1},
|
| 607 |
+
"confidence": 0.9,
|
| 608 |
+
"relationships": {"causes": ["Continental Congress debates"], "consequences": ["American Revolutionary War escalation"]},
|
| 609 |
+
},
|
| 610 |
+
{
|
| 611 |
+
"name": "Battle of Waterloo",
|
| 612 |
+
"slug": "battle_of_waterloo",
|
| 613 |
+
"year": 1815,
|
| 614 |
+
"start_year": 1815,
|
| 615 |
+
"end_year": 1815,
|
| 616 |
+
"month": 6,
|
| 617 |
+
"day": 18,
|
| 618 |
+
"lat": 50.6794,
|
| 619 |
+
"lon": 4.4125,
|
| 620 |
+
"summary": "Coalition forces defeat Napoleon Bonaparte near Waterloo, ending the Hundred Days campaign.",
|
| 621 |
+
"narrative": (
|
| 622 |
+
"Under rain-darkened skies, British squares brace against French cavalry charges across muddy Belgian fields. "
|
| 623 |
+
"Cannon smoke drifts low, cuirassiers clash with bayonet lines, and signal flags ripple above the La Haye Sainte farmhouse."
|
| 624 |
+
),
|
| 625 |
+
"themes": ["military", "napoleonic wars"],
|
| 626 |
+
"actors": ["British infantry", "Dutch-Belgian troops", "French Imperial Guard", "Prussian reinforcements"],
|
| 627 |
+
"artifacts": ["Cuirass armor", "Sabers", "Field cannon", "Signal flags"],
|
| 628 |
+
"visual_motifs": ["storm clouds", "muddy terrain", "cavalry charge", "gunpowder smoke"],
|
| 629 |
+
"facets": {"era": "early 19th century", "region": "western_europe", "type": "decisive battle"},
|
| 630 |
+
"sources": [{"label": "Waterloo Battlefield", "url": "https://www.waterloo1815.be"}],
|
| 631 |
+
"time_range": {"start": "1815-06-18T11:30:00", "end": "1815-06-18T20:30:00"},
|
| 632 |
+
"geo_anchor": {"lat": 50.6794, "lon": 4.4125, "radius_km": 8},
|
| 633 |
+
"confidence": 0.88,
|
| 634 |
+
"relationships": {"consequences": ["Exile of Napoleon to Saint Helena"]},
|
| 635 |
+
},
|
| 636 |
+
{
|
| 637 |
+
"name": "Hiroshima Atomic Bombing",
|
| 638 |
+
"slug": "hiroshima_atomic_bombing",
|
| 639 |
+
"year": 1945,
|
| 640 |
+
"start_year": 1945,
|
| 641 |
+
"end_year": 1945,
|
| 642 |
+
"month": 8,
|
| 643 |
+
"day": 6,
|
| 644 |
+
"lat": 34.3853,
|
| 645 |
+
"lon": 132.4553,
|
| 646 |
+
"summary": "The United States detonates an atomic bomb over Hiroshima, Japan, causing widespread destruction.",
|
| 647 |
+
"narrative": (
|
| 648 |
+
"Moments after the blinding flash, a mushroom cloud towers above shattered city blocks. "
|
| 649 |
+
"Wooden houses ignite, survivors stagger through debris-clogged streets, and the iconic Genbaku Dome stands amid the devastation."
|
| 650 |
+
),
|
| 651 |
+
"themes": ["military", "WWII", "nuclear warfare"],
|
| 652 |
+
"actors": ["Civilians", "First responders", "US bomber crew (distant)"],
|
| 653 |
+
"artifacts": ["Genbaku Dome", "Debris-laden streets", "Shattered windows", "Charred telegraph poles"],
|
| 654 |
+
"visual_motifs": ["mushroom cloud", "ashen fallout", "burning skyline", "silhouetted survivors"],
|
| 655 |
+
"facets": {"era": "mid 20th century", "region": "east_asia", "type": "aerial bombardment"},
|
| 656 |
+
"sources": [{"label": "Hiroshima Peace Memorial Museum", "url": "https://hpmmuseum.jp/?lang=en"}],
|
| 657 |
+
"time_range": {"start": "1945-08-06T08:15:00", "end": "1945-08-06T12:00:00"},
|
| 658 |
+
"geo_anchor": {"lat": 34.3853, "lon": 132.4553, "radius_km": 15},
|
| 659 |
+
"confidence": 0.87,
|
| 660 |
+
"relationships": {"consequences": ["Surrender of Japan 1945"]},
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"name": "Tiananmen Square Protests",
|
| 664 |
+
"slug": "tiananmen_square_protests",
|
| 665 |
+
"year": 1989,
|
| 666 |
+
"start_year": 1989,
|
| 667 |
+
"end_year": 1989,
|
| 668 |
+
"month": 6,
|
| 669 |
+
"day": 4,
|
| 670 |
+
"lat": 39.9042,
|
| 671 |
+
"lon": 116.4074,
|
| 672 |
+
"summary": "Chinese citizens hold pro-democracy demonstrations in Beijing's Tiananmen Square before military suppression.",
|
| 673 |
+
"narrative": (
|
| 674 |
+
"In early dawn haze, students link arms facing a line of armored vehicles. "
|
| 675 |
+
"The Goddess of Democracy statue rises above banners, bicycle couriers weave through tents, and the Gate of Heavenly Peace looms in the background."
|
| 676 |
+
),
|
| 677 |
+
"themes": ["political", "protest", "democracy"],
|
| 678 |
+
"actors": ["Student demonstrators", "People's Liberation Army soldiers", "Beijing residents"],
|
| 679 |
+
"artifacts": ["Goddess of Democracy statue", "Banners and loudspeakers", "Tents", "Armored personnel carriers"],
|
| 680 |
+
"visual_motifs": ["morning haze", "stone square", "red flags", "human chain"],
|
| 681 |
+
"facets": {"era": "late 20th century", "region": "east_asia", "type": "protest movement"},
|
| 682 |
+
"sources": [{"label": "BBC Timeline", "url": "https://www.bbc.com/news/world-asia-china-12661772"}],
|
| 683 |
+
"time_range": {"start": "1989-06-03T22:00:00", "end": "1989-06-04T07:00:00"},
|
| 684 |
+
"geo_anchor": {"lat": 39.9042, "lon": 116.4074, "radius_km": 6},
|
| 685 |
+
"confidence": 0.88,
|
| 686 |
+
"relationships": {"parallel": ["1989 global protest movements"]},
|
| 687 |
+
},
|
| 688 |
+
{
|
| 689 |
+
"name": "Apollo 11 Moon Launch",
|
| 690 |
+
"slug": "apollo_11_moon_launch",
|
| 691 |
+
"year": 1969,
|
| 692 |
+
"start_year": 1969,
|
| 693 |
+
"end_year": 1969,
|
| 694 |
+
"month": 7,
|
| 695 |
+
"day": 16,
|
| 696 |
+
"lat": 28.5729,
|
| 697 |
+
"lon": -80.6490,
|
| 698 |
+
"summary": "NASA launches Apollo 11 from Kennedy Space Center, beginning the first crewed mission to land on the Moon.",
|
| 699 |
+
"narrative": (
|
| 700 |
+
"Spectators line the Causeway as the Saturn V rockets skyward, engines roaring and painting the morning sky orange. "
|
| 701 |
+
"Camera crews pan across mission control staff, astronauts in white suits wave before boarding, and the vehicle assembly building looms nearby."
|
| 702 |
+
),
|
| 703 |
+
"themes": ["space exploration", "science", "Cold War"],
|
| 704 |
+
"actors": ["Neil Armstrong", "Buzz Aldrin", "Michael Collins", "Mission control engineers"],
|
| 705 |
+
"artifacts": ["Saturn V rocket", "Launch gantry", "Mission patches", "Telemetry consoles"],
|
| 706 |
+
"visual_motifs": ["plume of fire", "sunrise glow", "American flags", "NASA vehicles"],
|
| 707 |
+
"facets": {"era": "late 20th century", "region": "north_america", "type": "space mission"},
|
| 708 |
+
"sources": [{"label": "NASA History", "url": "https://www.nasa.gov/specials/apollo50th/"}],
|
| 709 |
+
"time_range": {"start": "1969-07-16T09:32:00", "end": "1969-07-16T10:00:00"},
|
| 710 |
+
"geo_anchor": {"lat": 28.5729, "lon": -80.6490, "radius_km": 10},
|
| 711 |
+
"confidence": 0.89,
|
| 712 |
+
"relationships": {"consequences": ["Apollo 11 moon landing"]},
|
| 713 |
+
},
|
| 714 |
+
{
|
| 715 |
+
"name": "Wright Brothers First Flight",
|
| 716 |
+
"slug": "wright_brothers_first_flight",
|
| 717 |
+
"year": 1903,
|
| 718 |
+
"start_year": 1903,
|
| 719 |
+
"end_year": 1903,
|
| 720 |
+
"month": 12,
|
| 721 |
+
"day": 17,
|
| 722 |
+
"lat": 36.0177,
|
| 723 |
+
"lon": -75.6694,
|
| 724 |
+
"summary": "Orville and Wilbur Wright achieve the first powered, sustained flight at Kitty Hawk, North Carolina.",
|
| 725 |
+
"narrative": (
|
| 726 |
+
"On windswept dunes, Orville lies prone on the Flyer as Wilbur steadies a wingtip. "
|
| 727 |
+
"A small crowd of lifesavers braces the launch rail, camera ready, as the biplane lifts into the cold December air for twelve seconds."
|
| 728 |
+
),
|
| 729 |
+
"themes": ["aviation", "innovation"],
|
| 730 |
+
"actors": ["Orville Wright", "Wilbur Wright", "Kill Devil Hills lifesavers"],
|
| 731 |
+
"artifacts": ["Wright Flyer", "Launch rail", "Oil-stained overalls", "Box camera"],
|
| 732 |
+
"visual_motifs": ["wind-scoured dunes", "frosty breath", "canvas wings", "wooden spars"],
|
| 733 |
+
"facets": {"era": "early 20th century", "region": "north_america", "type": "technological milestone"},
|
| 734 |
+
"sources": [{"label": "Smithsonian Air & Space", "url": "https://airandspace.si.edu"}],
|
| 735 |
+
"time_range": {"start": "1903-12-17T10:35:00", "end": "1903-12-17T10:47:00"},
|
| 736 |
+
"geo_anchor": {"lat": 36.0177, "lon": -75.6694, "radius_km": 3},
|
| 737 |
+
"confidence": 0.86,
|
| 738 |
+
"relationships": {"consequences": ["Development of powered flight"]},
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"name": "Grito de Dolores",
|
| 742 |
+
"slug": "grito_de_dolores",
|
| 743 |
+
"year": 1810,
|
| 744 |
+
"start_year": 1810,
|
| 745 |
+
"end_year": 1810,
|
| 746 |
+
"month": 9,
|
| 747 |
+
"day": 16,
|
| 748 |
+
"lat": 21.1561,
|
| 749 |
+
"lon": -100.9326,
|
| 750 |
+
"summary": "Father Miguel Hidalgo y Costilla calls for Mexican independence with the famous Grito de Dolores.",
|
| 751 |
+
"narrative": (
|
| 752 |
+
"Before dawn, church bells ring out as Father Hidalgo addresses villagers in the plaza, torchlight illuminating insurgent banners. "
|
| 753 |
+
"Peasants clutch farming tools turned weapons while women distribute ammunition from woven baskets."
|
| 754 |
+
),
|
| 755 |
+
"themes": ["revolution", "latin america"],
|
| 756 |
+
"actors": ["Father Miguel Hidalgo", "Town villagers", "Criollo supporters"],
|
| 757 |
+
"artifacts": ["Church bell rope", "Guadalupe banner", "Torches", "Improvised spears"],
|
| 758 |
+
"visual_motifs": ["torchlit plaza", "colonial church facade", "Mexican flag colors", "dawn sky"],
|
| 759 |
+
"facets": {"era": "early 19th century", "region": "central_america", "type": "independence movement"},
|
| 760 |
+
"sources": [{"label": "Mexican History", "url": "https://www.gob.mx"}],
|
| 761 |
+
"time_range": {"start": "1810-09-16T05:00:00", "end": "1810-09-16T07:00:00"},
|
| 762 |
+
"geo_anchor": {"lat": 21.1561, "lon": -100.9326, "radius_km": 5},
|
| 763 |
+
"confidence": 0.82,
|
| 764 |
+
"relationships": {"consequences": ["Mexican War of Independence"]},
|
| 765 |
+
},
|
| 766 |
+
{
|
| 767 |
+
"name": "Storming of the Bastille",
|
| 768 |
+
"slug": "storming_of_the_bastille",
|
| 769 |
+
"year": 1789,
|
| 770 |
+
"start_year": 1789,
|
| 771 |
+
"end_year": 1789,
|
| 772 |
+
"month": 7,
|
| 773 |
+
"day": 14,
|
| 774 |
+
"lat": 48.8530,
|
| 775 |
+
"lon": 2.3692,
|
| 776 |
+
"summary": "Parisian revolutionaries seize the Bastille fortress, igniting the French Revolution.",
|
| 777 |
+
"narrative": (
|
| 778 |
+
"Parisians wielding pikes and muskets swarm the Bastille's stone courtyard as smoke billows from cannon fire. "
|
| 779 |
+
"National Guardsmen drag royal cannons into position while prisoners emerge to cheering crowds waving tricolor cockades."
|
| 780 |
+
),
|
| 781 |
+
"themes": ["revolution", "political upheaval"],
|
| 782 |
+
"actors": ["Parisian crowds", "National Guardsmen", "Royal soldiers"],
|
| 783 |
+
"artifacts": ["Tricolor cockades", "Iron portcullis", "Cannons", "Stone battlements"],
|
| 784 |
+
"visual_motifs": ["smoke-filled courtyard", "stormy summer sky", "stone fortress", "crowd surge"],
|
| 785 |
+
"facets": {"era": "late 18th century", "region": "western_europe", "type": "revolutionary uprising"},
|
| 786 |
+
"sources": [{"label": "French Archives", "url": "https://www.archives-nationales.culture.gouv.fr"}],
|
| 787 |
+
"time_range": {"start": "1789-07-14T09:00:00", "end": "1789-07-14T17:00:00"},
|
| 788 |
+
"geo_anchor": {"lat": 48.8530, "lon": 2.3692, "radius_km": 3},
|
| 789 |
+
"confidence": 0.84,
|
| 790 |
+
"relationships": {"consequences": ["Declaration of the Rights of Man"]},
|
| 791 |
+
},
|
| 792 |
+
{
|
| 793 |
+
"name": "Assassination of Julius Caesar",
|
| 794 |
+
"slug": "assassination_of_julius_caesar",
|
| 795 |
+
"year": -44,
|
| 796 |
+
"start_year": -44,
|
| 797 |
+
"end_year": -44,
|
| 798 |
+
"month": 3,
|
| 799 |
+
"day": 15,
|
| 800 |
+
"lat": 41.8933,
|
| 801 |
+
"lon": 12.4729,
|
| 802 |
+
"summary": "Julius Caesar is stabbed by Roman senators inside the Theatre of Pompey during the Ides of March.",
|
| 803 |
+
"narrative": (
|
| 804 |
+
"Late morning sunlight filters through the marble portico as Caesar takes his seat. "
|
| 805 |
+
"Senators in scarlet-trimmed togas encircle him; daggers flash, and the dictator staggers toward the statue of Pompey "
|
| 806 |
+
"beneath frescoed arches and hanging laurel wreaths."
|
| 807 |
+
),
|
| 808 |
+
"themes": ["political", "assassination", "ancient rome"],
|
| 809 |
+
"actors": ["Julius Caesar", "Marcus Junius Brutus", "Gaius Cassius Longinus", "Roman senators"],
|
| 810 |
+
"artifacts": ["Marble curule chair", "Bronze daggers", "Laurel wreaths", "Blood-stained togas"],
|
| 811 |
+
"visual_motifs": ["marble columns", "sunbeam through smoke", "collapsing laurel crown"],
|
| 812 |
+
"facets": {"era": "classical antiquity", "region": "western_europe", "type": "political assassination"},
|
| 813 |
+
"sources": [{"label": "Ancient Rome", "url": "https://en.wikipedia.org/wiki/Assassination_of_Julius_Caesar"}],
|
| 814 |
+
"time_range": {"start": "-0044-03-15T11:00:00", "end": "-0044-03-15T12:00:00"},
|
| 815 |
+
"geo_anchor": {"lat": 41.8933, "lon": 12.4729, "radius_km": 2},
|
| 816 |
+
"confidence": 0.9,
|
| 817 |
+
"relationships": {"consequences": ["Liberators' civil war"]},
|
| 818 |
+
},
|
| 819 |
+
]
|
| 820 |
+
|
| 821 |
+
|
| 822 |
+
CURATED_ARTIFACTS: List[dict] = [
|
| 823 |
+
{"title": "Graffiti fragment of the Berlin Wall", "culture": "German", "period": "Cold War", "era": (1961, 1990)},
|
| 824 |
+
{"title": "Allied M1 Helmet", "culture": "American", "period": "World War II", "era": (1941, 1945)},
|
| 825 |
+
{"title": "Continental Congress inkwell", "culture": "American", "period": "Revolutionary", "era": (1765, 1783)},
|
| 826 |
+
{"title": "French cuirassier armor", "culture": "French", "period": "Napoleonic", "era": (1800, 1815)},
|
| 827 |
+
{"title": "Goddess of Democracy maquette", "culture": "Chinese", "period": "Late 20th century", "era": (1980, 1990)},
|
| 828 |
+
{"title": "Saturn V mission patch", "culture": "American", "period": "Space Age", "era": (1960, 1975)},
|
| 829 |
+
{"title": "Wright Flyer blueprint", "culture": "American", "period": "Early Aviation", "era": (1899, 1905)},
|
| 830 |
+
{"title": "Bastille prison key", "culture": "French", "period": "Revolutionary", "era": (1789, 1799)},
|
| 831 |
+
]
|
| 832 |
+
|
| 833 |
+
ERA_VISUAL_VOCABULARY: Dict[Tuple[int, int], dict] = {
|
| 834 |
+
(-5000, 1700): {
|
| 835 |
+
"architecture": "stone structures, timber framing, open marketplaces",
|
| 836 |
+
"clothing": "homespun fabrics, cloaks, leather sandals",
|
| 837 |
+
"technology": "handcrafted tools, smoke from hearth fires, animal-drawn transport",
|
| 838 |
+
"transport": "horses, carts, foot traffic",
|
| 839 |
+
"mood": "earthy textures, smoke and torchlight",
|
| 840 |
+
},
|
| 841 |
+
(1700, 1850): {
|
| 842 |
+
"architecture": "Georgian and neoclassical facades, stone avenues, colonial interiors",
|
| 843 |
+
"clothing": "powdered wigs, waistcoats, breeches, corseted gowns",
|
| 844 |
+
"technology": "printing presses, quill ink, carronade cannons",
|
| 845 |
+
"transport": "horse-drawn carriages, sailing ships, infantry columns",
|
| 846 |
+
"mood": "oil-painted lighting, warm candle glow and shadow",
|
| 847 |
+
},
|
| 848 |
+
(1850, 1918): {
|
| 849 |
+
"architecture": "industrial brick mills, iron train stations, Victorian terraces",
|
| 850 |
+
"clothing": "bowler hats, uniforms with brass buttons, layered dresses",
|
| 851 |
+
"technology": "steam locomotives, telegraph poles, gas lanterns",
|
| 852 |
+
"transport": "steam trains, horse omnibuses, early bicycles",
|
| 853 |
+
"mood": "coal smoke haze, sepia-toned atmosphere",
|
| 854 |
+
},
|
| 855 |
+
(1918, 1950): {
|
| 856 |
+
"architecture": "art deco facades, reinforced bunkers, concrete civic plazas",
|
| 857 |
+
"clothing": "military uniforms, flapper dresses, utilitarian workwear",
|
| 858 |
+
"technology": "radio towers, field telephones, propeller aircraft",
|
| 859 |
+
"transport": "steel warships, troop trucks, streetcars",
|
| 860 |
+
"mood": "black-and-white newsreel grit, halation from searchlights",
|
| 861 |
+
},
|
| 862 |
+
(1950, 1990): {
|
| 863 |
+
"architecture": "mid-century modern lines, brutalist government blocks, neon signage",
|
| 864 |
+
"clothing": "denim jackets, tailored suits, Cold War uniforms",
|
| 865 |
+
"technology": "cathode-ray cameras, satellite dishes, analog broadcast vans",
|
| 866 |
+
"transport": "boxy sedans, subway trains, patrol jeeps",
|
| 867 |
+
"mood": "sodium-vapor glow, vivid chromatic contrasts",
|
| 868 |
+
},
|
| 869 |
+
(1990, 2030): {
|
| 870 |
+
"architecture": "glass high-rises, LED billboards, postmodern cultural centers",
|
| 871 |
+
"clothing": "synthetic fabrics, streetwear, modern uniforms",
|
| 872 |
+
"technology": "smart devices, digital screens, drones",
|
| 873 |
+
"transport": "light rail, electric cars, bicycles with LED lights",
|
| 874 |
+
"mood": "clean highlights, cinematic depth of field, vibrant color grading",
|
| 875 |
+
},
|
| 876 |
+
}
|
| 877 |
+
|
| 878 |
+
REGIONAL_CONTEXT: Dict[str, dict] = {
|
| 879 |
+
"western_europe": {
|
| 880 |
+
"architecture": "historic stone plazas, cathedrals, tram-lined boulevards",
|
| 881 |
+
"climate": "temperate weather with layered clouds and soft rain",
|
| 882 |
+
},
|
| 883 |
+
"eastern_europe": {
|
| 884 |
+
"architecture": "Soviet-era apartment blocks, neoclassical government buildings",
|
| 885 |
+
"climate": "continental climate with sharp seasonal contrast",
|
| 886 |
+
},
|
| 887 |
+
"north_america": {
|
| 888 |
+
"architecture": "brick row houses, colonial meeting halls, steel skyscrapers",
|
| 889 |
+
"climate": "varied weather, from humid summers to snowy winters",
|
| 890 |
+
},
|
| 891 |
+
"east_asia": {
|
| 892 |
+
"architecture": "pagoda rooftops, dense urban districts, neon signage",
|
| 893 |
+
"climate": "humid subtropical seasons with monsoon rains",
|
| 894 |
+
},
|
| 895 |
+
"central_america": {
|
| 896 |
+
"architecture": "stucco plazas, colonial churches, cobblestone streets",
|
| 897 |
+
"climate": "warm highland mornings with misty horizons",
|
| 898 |
+
},
|
| 899 |
+
"western_asia": {
|
| 900 |
+
"architecture": "stone citadels, market arcades, desert courtyards",
|
| 901 |
+
"climate": "arid sunlight, dust carried on dry winds",
|
| 902 |
+
},
|
| 903 |
+
}
|
| 904 |
+
|
| 905 |
+
|
| 906 |
+
def get_era_vocabulary(year: int) -> dict:
|
| 907 |
+
for (start, end), vocab in ERA_VISUAL_VOCABULARY.items():
|
| 908 |
+
if start <= year < end:
|
| 909 |
+
return vocab
|
| 910 |
+
# Default to modern vocabulary
|
| 911 |
+
return ERA_VISUAL_VOCABULARY[(1950, 1990)]
|
| 912 |
+
|
| 913 |
+
|
| 914 |
+
def get_region_context(region_key: Optional[str]) -> dict:
|
| 915 |
+
if not region_key:
|
| 916 |
+
return {}
|
| 917 |
+
return REGIONAL_CONTEXT.get(region_key.lower(), {})
|
| 918 |
+
|
| 919 |
+
|
| 920 |
+
def format_event_digest(event: dict) -> dict:
|
| 921 |
+
return {
|
| 922 |
+
"name": event.get("name"),
|
| 923 |
+
"slug": event.get("slug"),
|
| 924 |
+
"year": event.get("year"),
|
| 925 |
+
"start_year": event.get("start_year"),
|
| 926 |
+
"end_year": event.get("end_year"),
|
| 927 |
+
"month": event.get("month"),
|
| 928 |
+
"day": event.get("day"),
|
| 929 |
+
"lat": event.get("lat"),
|
| 930 |
+
"lon": event.get("lon"),
|
| 931 |
+
"summary": event.get("summary"),
|
| 932 |
+
"themes": ensure_iterable(event.get("themes")),
|
| 933 |
+
"facets": event.get("facets", {}),
|
| 934 |
+
"distance_km": event.get("distance_km"),
|
| 935 |
+
"year_delta": event.get("year_delta"),
|
| 936 |
+
"match_confidence": event.get("match_confidence"),
|
| 937 |
+
"sources": ensure_iterable(event.get("sources")),
|
| 938 |
+
}
|
| 939 |
+
|
| 940 |
+
|
| 941 |
+
def build_event_context(event: dict) -> dict:
|
| 942 |
+
return {
|
| 943 |
+
"event": format_event_digest(event),
|
| 944 |
+
"narrative": event.get("narrative"),
|
| 945 |
+
"actors": ensure_iterable(event.get("actors")),
|
| 946 |
+
"artifacts": ensure_iterable(event.get("artifacts")),
|
| 947 |
+
"visual_motifs": ensure_iterable(event.get("visual_motifs")),
|
| 948 |
+
"relationships": event.get("relationships", {}),
|
| 949 |
+
"time_range": event.get("time_range"),
|
| 950 |
+
"geo_anchor": event.get("geo_anchor"),
|
| 951 |
+
"confidence": event.get("match_confidence", event.get("confidence")),
|
| 952 |
+
}
|
| 953 |
+
|
| 954 |
+
|
| 955 |
+
def get_events_response(
|
| 956 |
+
lat: float,
|
| 957 |
+
lon: float,
|
| 958 |
+
year: int,
|
| 959 |
+
radius_km: float = 250.0,
|
| 960 |
+
limit: int = 5,
|
| 961 |
+
) -> dict:
|
| 962 |
+
matches = get_events_by_coordinates(lat, lon, year, radius_km=radius_km, limit=limit)
|
| 963 |
+
return {
|
| 964 |
+
"query": {"lat": lat, "lon": lon, "year": year, "radius_km": radius_km, "limit": limit},
|
| 965 |
+
"count": len(matches),
|
| 966 |
+
"events": [format_event_digest(event) for event in matches],
|
| 967 |
+
}
|
| 968 |
+
|
| 969 |
+
|
services/prompt_parser.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
MONTHS = {
|
| 9 |
+
"january": 1,
|
| 10 |
+
"february": 2,
|
| 11 |
+
"march": 3,
|
| 12 |
+
"april": 4,
|
| 13 |
+
"may": 5,
|
| 14 |
+
"june": 6,
|
| 15 |
+
"july": 7,
|
| 16 |
+
"august": 8,
|
| 17 |
+
"september": 9,
|
| 18 |
+
"sept": 9,
|
| 19 |
+
"october": 10,
|
| 20 |
+
"november": 11,
|
| 21 |
+
"december": 12,
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
HEMISPHERE_MULTIPLIERS = {
|
| 26 |
+
"n": 1,
|
| 27 |
+
"s": -1,
|
| 28 |
+
"e": 1,
|
| 29 |
+
"w": -1,
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
COORD_PATTERN = re.compile(
|
| 34 |
+
r"([-+]?\d+(?:\.\d+)?)\s*(?:°|deg|degrees)?\s*([NnSsEeWw])"
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
LAT_LON_WORD_PATTERN = re.compile(
|
| 38 |
+
r"(?:latitude|lat)\s*[:=]?\s*([-+]?\d+(?:\.\d+)?)|(?:longitude|lon)\s*[:=]?\s*([-+]?\d+(?:\.\d+)?)",
|
| 39 |
+
re.IGNORECASE,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
DATE_PATTERN = re.compile(
|
| 43 |
+
r"\b("
|
| 44 |
+
+ "|".join(MONTHS.keys())
|
| 45 |
+
+ r")\s+(\d{1,2})(?:st|nd|rd|th)?(?:,\s*|\s+)(-?\d{1,4})(?:\s*(BCE|BC|CE|AD))?",
|
| 46 |
+
re.IGNORECASE,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
YEAR_ONLY_PATTERN = re.compile(r"\b(-?\d{1,4})\s*(BCE|BC|CE|AD)?\b", re.IGNORECASE)
|
| 50 |
+
|
| 51 |
+
HOUR_PATTERN = re.compile(r"\b(\d{1,2})(?::(\d{2}))?\s*(?:hours?|h)\b", re.IGNORECASE)
|
| 52 |
+
|
| 53 |
+
SEASONAL_HOUR_PATTERN = re.compile(
|
| 54 |
+
r"\b(?:at|around)\s*(\d{1,2})(?::(\d{2}))?\s*(?:am|pm)\b", re.IGNORECASE
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@dataclass
|
| 59 |
+
class ParsedPrompt:
|
| 60 |
+
lat: Optional[float] = None
|
| 61 |
+
lon: Optional[float] = None
|
| 62 |
+
year: Optional[int] = None
|
| 63 |
+
month: Optional[int] = None
|
| 64 |
+
day: Optional[int] = None
|
| 65 |
+
hour: Optional[int] = None
|
| 66 |
+
minute: Optional[int] = None
|
| 67 |
+
confidence: float = 0.0
|
| 68 |
+
residual_text: str = ""
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _apply_hemisphere(value: float, hemisphere: str) -> float:
|
| 72 |
+
multiplier = HEMISPHERE_MULTIPLIERS.get(hemisphere.lower(), 1)
|
| 73 |
+
return value * multiplier
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def _parse_coordinates(text: str) -> tuple[Optional[float], Optional[float], float]:
|
| 77 |
+
lat = lon = None
|
| 78 |
+
confidence = 0.0
|
| 79 |
+
|
| 80 |
+
matches = COORD_PATTERN.findall(text)
|
| 81 |
+
lat_candidate = lon_candidate = None
|
| 82 |
+
for value_str, hemisphere in matches:
|
| 83 |
+
value = float(value_str)
|
| 84 |
+
hemi = hemisphere.lower()
|
| 85 |
+
adjusted = _apply_hemisphere(value, hemi)
|
| 86 |
+
if hemi in ("n", "s") and lat_candidate is None:
|
| 87 |
+
lat_candidate = adjusted
|
| 88 |
+
elif hemi in ("e", "w") and lon_candidate is None:
|
| 89 |
+
lon_candidate = adjusted
|
| 90 |
+
|
| 91 |
+
if lat_candidate is not None and lon_candidate is not None:
|
| 92 |
+
lat, lon = lat_candidate, lon_candidate
|
| 93 |
+
confidence += 0.5
|
| 94 |
+
|
| 95 |
+
if lat is None or lon is None:
|
| 96 |
+
word_matches = LAT_LON_WORD_PATTERN.findall(text)
|
| 97 |
+
lat_words = [float(item[0]) for item in word_matches if item[0]]
|
| 98 |
+
lon_words = [float(item[1]) for item in word_matches if item[1]]
|
| 99 |
+
if lat is None and lat_words:
|
| 100 |
+
lat = lat_words[0]
|
| 101 |
+
confidence += 0.2
|
| 102 |
+
if lon is None and lon_words:
|
| 103 |
+
lon = lon_words[0]
|
| 104 |
+
confidence += 0.2
|
| 105 |
+
|
| 106 |
+
return lat, lon, min(confidence, 0.6)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def _convert_year(raw_year: str, era: Optional[str]) -> int:
|
| 110 |
+
year = int(raw_year)
|
| 111 |
+
if era:
|
| 112 |
+
era = era.upper()
|
| 113 |
+
if era in ("BCE", "BC"):
|
| 114 |
+
return -abs(year)
|
| 115 |
+
return year
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _parse_date(text: str) -> tuple[Optional[int], Optional[int], Optional[int], float]:
|
| 119 |
+
match = DATE_PATTERN.search(text)
|
| 120 |
+
if match:
|
| 121 |
+
month_name, day_str, year_str, era = match.groups()
|
| 122 |
+
month = MONTHS.get(month_name.lower())
|
| 123 |
+
day = int(day_str)
|
| 124 |
+
year = _convert_year(year_str, era)
|
| 125 |
+
return year, month, day, 0.4
|
| 126 |
+
|
| 127 |
+
# Fallback: year-only pattern
|
| 128 |
+
for candidate in YEAR_ONLY_PATTERN.finditer(text):
|
| 129 |
+
year_str, era = candidate.groups()
|
| 130 |
+
year = _convert_year(year_str, era)
|
| 131 |
+
if -5000 <= year <= 3000: # plausible historical range
|
| 132 |
+
return year, None, None, 0.2
|
| 133 |
+
return None, None, None, 0.0
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _parse_hour(text: str) -> tuple[Optional[int], Optional[int], float]:
|
| 137 |
+
match = HOUR_PATTERN.search(text)
|
| 138 |
+
if match:
|
| 139 |
+
hour = int(match.group(1))
|
| 140 |
+
minute = int(match.group(2)) if match.group(2) else 0
|
| 141 |
+
return hour, minute, 0.2
|
| 142 |
+
|
| 143 |
+
match = SEASONAL_HOUR_PATTERN.search(text)
|
| 144 |
+
if match:
|
| 145 |
+
hour = int(match.group(1))
|
| 146 |
+
minute = int(match.group(2)) if match.group(2) else 0
|
| 147 |
+
suffix = match.group(0).lower()
|
| 148 |
+
if "pm" in suffix and hour < 12:
|
| 149 |
+
hour += 12
|
| 150 |
+
if "am" in suffix and hour == 12:
|
| 151 |
+
hour = 0
|
| 152 |
+
return hour, minute, 0.15
|
| 153 |
+
|
| 154 |
+
return None, None, 0.0
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def parse_prompt_context(prompt: Optional[str]) -> ParsedPrompt:
|
| 158 |
+
if not prompt:
|
| 159 |
+
return ParsedPrompt(residual_text="")
|
| 160 |
+
|
| 161 |
+
lat, lon, coord_conf = _parse_coordinates(prompt)
|
| 162 |
+
year, month, day, date_conf = _parse_date(prompt)
|
| 163 |
+
hour, minute, hour_conf = _parse_hour(prompt)
|
| 164 |
+
|
| 165 |
+
total_conf = coord_conf + date_conf + hour_conf
|
| 166 |
+
return ParsedPrompt(
|
| 167 |
+
lat=lat,
|
| 168 |
+
lon=lon,
|
| 169 |
+
year=year,
|
| 170 |
+
month=month,
|
| 171 |
+
day=day,
|
| 172 |
+
hour=hour,
|
| 173 |
+
minute=minute,
|
| 174 |
+
confidence=min(total_conf, 1.0),
|
| 175 |
+
residual_text=prompt,
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
|
services/wikidata_service.py
ADDED
|
@@ -0,0 +1,706 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Wikidata service for finding historical events.
|
| 3 |
+
|
| 4 |
+
Queries Wikidata for events at specific coordinates and times.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import hashlib
|
| 10 |
+
import json
|
| 11 |
+
import math
|
| 12 |
+
import os
|
| 13 |
+
import ssl
|
| 14 |
+
import time
|
| 15 |
+
from dataclasses import dataclass, field
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 19 |
+
import urllib.request
|
| 20 |
+
import urllib.parse
|
| 21 |
+
import urllib.error
|
| 22 |
+
|
| 23 |
+
# Note: We use urllib with SSL fallback to avoid permission issues with requests/certifi
|
| 24 |
+
USE_REQUESTS = False
|
| 25 |
+
|
| 26 |
+
# Cache configuration
|
| 27 |
+
ROOT_DIR = Path(__file__).resolve().parent.parent
|
| 28 |
+
CACHE_DIR = ROOT_DIR / "data" / "wikidata_cache"
|
| 29 |
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 30 |
+
CACHE_TTL_SECONDS = 86400 * 7 # 7 days
|
| 31 |
+
|
| 32 |
+
WIKIDATA_SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
|
| 33 |
+
|
| 34 |
+
# Rate limiting
|
| 35 |
+
_last_request_time: float = 0.0
|
| 36 |
+
MIN_REQUEST_INTERVAL = 1.5 # seconds between requests
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@dataclass
|
| 40 |
+
class WikidataEvent:
|
| 41 |
+
"""Represents a historical event from Wikidata."""
|
| 42 |
+
qid: str
|
| 43 |
+
name: str
|
| 44 |
+
description: str = ""
|
| 45 |
+
year: Optional[int] = None
|
| 46 |
+
month: Optional[int] = None
|
| 47 |
+
day: Optional[int] = None
|
| 48 |
+
lat: Optional[float] = None
|
| 49 |
+
lon: Optional[float] = None
|
| 50 |
+
participants: List[str] = field(default_factory=list)
|
| 51 |
+
location_name: str = ""
|
| 52 |
+
event_type: str = ""
|
| 53 |
+
wikipedia_url: str = ""
|
| 54 |
+
image_url: str = ""
|
| 55 |
+
distance_km: float = 0.0
|
| 56 |
+
year_delta: int = 0
|
| 57 |
+
confidence: float = 0.0
|
| 58 |
+
source: str = "wikidata"
|
| 59 |
+
|
| 60 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 61 |
+
return {
|
| 62 |
+
"qid": self.qid,
|
| 63 |
+
"name": self.name,
|
| 64 |
+
"description": self.description,
|
| 65 |
+
"year": self.year,
|
| 66 |
+
"month": self.month,
|
| 67 |
+
"day": self.day,
|
| 68 |
+
"lat": self.lat,
|
| 69 |
+
"lon": self.lon,
|
| 70 |
+
"participants": self.participants,
|
| 71 |
+
"location_name": self.location_name,
|
| 72 |
+
"event_type": self.event_type,
|
| 73 |
+
"wikipedia_url": self.wikipedia_url,
|
| 74 |
+
"image_url": self.image_url,
|
| 75 |
+
"distance_km": self.distance_km,
|
| 76 |
+
"year_delta": self.year_delta,
|
| 77 |
+
"confidence": self.confidence,
|
| 78 |
+
"source": self.source,
|
| 79 |
+
# Compatibility fields for existing code
|
| 80 |
+
"summary": self.description,
|
| 81 |
+
"narrative": self.description,
|
| 82 |
+
"actors": self.participants,
|
| 83 |
+
"themes": [self.event_type] if self.event_type else [],
|
| 84 |
+
"artifacts": [],
|
| 85 |
+
"visual_motifs": [],
|
| 86 |
+
"facets": {"type": self.event_type},
|
| 87 |
+
"sources": [{"label": "Wikidata", "url": f"https://www.wikidata.org/wiki/{self.qid}"}],
|
| 88 |
+
"match_confidence": self.confidence,
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _cache_key(lat: float, lon: float, year: int, radius_km: float) -> str:
|
| 93 |
+
"""Generate a cache key for the query parameters."""
|
| 94 |
+
raw = f"{lat:.2f}_{lon:.2f}_{year}_{radius_km:.0f}"
|
| 95 |
+
return hashlib.md5(raw.encode()).hexdigest()
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _get_cached(cache_key: str) -> Optional[List[Dict]]:
|
| 99 |
+
"""Retrieve cached results if they exist and aren't expired."""
|
| 100 |
+
cache_file = CACHE_DIR / f"{cache_key}.json"
|
| 101 |
+
if not cache_file.exists():
|
| 102 |
+
return None
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
with open(cache_file, "r", encoding="utf-8") as f:
|
| 106 |
+
data = json.load(f)
|
| 107 |
+
|
| 108 |
+
cached_time = data.get("timestamp", 0)
|
| 109 |
+
if time.time() - cached_time > CACHE_TTL_SECONDS:
|
| 110 |
+
cache_file.unlink(missing_ok=True)
|
| 111 |
+
return None
|
| 112 |
+
|
| 113 |
+
return data.get("events", [])
|
| 114 |
+
except (json.JSONDecodeError, OSError):
|
| 115 |
+
return None
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _save_cache(cache_key: str, events: List[Dict]) -> None:
|
| 119 |
+
"""Save results to cache."""
|
| 120 |
+
cache_file = CACHE_DIR / f"{cache_key}.json"
|
| 121 |
+
try:
|
| 122 |
+
with open(cache_file, "w", encoding="utf-8") as f:
|
| 123 |
+
json.dump({"timestamp": time.time(), "events": events}, f, ensure_ascii=False)
|
| 124 |
+
except OSError:
|
| 125 |
+
pass
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _rate_limit() -> None:
|
| 129 |
+
"""Ensure we don't exceed Wikidata rate limits."""
|
| 130 |
+
global _last_request_time
|
| 131 |
+
elapsed = time.time() - _last_request_time
|
| 132 |
+
if elapsed < MIN_REQUEST_INTERVAL:
|
| 133 |
+
time.sleep(MIN_REQUEST_INTERVAL - elapsed)
|
| 134 |
+
_last_request_time = time.time()
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def _haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
| 138 |
+
"""Calculate distance in km between two points."""
|
| 139 |
+
R = 6371.0
|
| 140 |
+
phi1, phi2 = math.radians(lat1), math.radians(lat2)
|
| 141 |
+
dphi = math.radians(lat2 - lat1)
|
| 142 |
+
dlam = math.radians(lon2 - lon1)
|
| 143 |
+
a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2
|
| 144 |
+
return R * 2 * math.atan2(math.sqrt(a), math.sqrt(max(0, 1 - a)))
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def _build_sparql_query(lat: float, lon: float, year: int, radius_km: float, limit: int = 20) -> str:
|
| 148 |
+
"""
|
| 149 |
+
Build SPARQL query for historical events near coordinates and year.
|
| 150 |
+
|
| 151 |
+
This query searches for:
|
| 152 |
+
- Events (Q1656682) that occurred at a location
|
| 153 |
+
- Battles, treaties, revolutions, etc.
|
| 154 |
+
- Events with point in time or start time within the year range
|
| 155 |
+
"""
|
| 156 |
+
# Wikidata uses negative years for BCE
|
| 157 |
+
year_start = year - 15
|
| 158 |
+
year_end = year + 15
|
| 159 |
+
|
| 160 |
+
# Convert radius to degrees (rough approximation)
|
| 161 |
+
degree_radius = radius_km / 111.0
|
| 162 |
+
|
| 163 |
+
query = f"""
|
| 164 |
+
SELECT DISTINCT ?event ?eventLabel ?eventDescription ?date ?coord ?locationLabel ?participantLabel ?typeLabel ?article ?image
|
| 165 |
+
WHERE {{
|
| 166 |
+
# Find events with coordinates
|
| 167 |
+
?event wdt:P31/wdt:P279* wd:Q1656682 . # instance of event or subclass
|
| 168 |
+
|
| 169 |
+
# Get coordinates - either direct or via location
|
| 170 |
+
OPTIONAL {{
|
| 171 |
+
?event wdt:P625 ?directCoord .
|
| 172 |
+
}}
|
| 173 |
+
OPTIONAL {{
|
| 174 |
+
?event wdt:P276 ?location .
|
| 175 |
+
?location wdt:P625 ?locationCoord .
|
| 176 |
+
}}
|
| 177 |
+
BIND(COALESCE(?directCoord, ?locationCoord) AS ?coord)
|
| 178 |
+
|
| 179 |
+
# Filter by coordinate bounding box
|
| 180 |
+
FILTER(BOUND(?coord))
|
| 181 |
+
BIND(geof:latitude(?coord) AS ?lat)
|
| 182 |
+
BIND(geof:longitude(?coord) AS ?lon)
|
| 183 |
+
FILTER(?lat >= {lat - degree_radius} && ?lat <= {lat + degree_radius})
|
| 184 |
+
FILTER(?lon >= {lon - degree_radius} && ?lon <= {lon + degree_radius})
|
| 185 |
+
|
| 186 |
+
# Get date
|
| 187 |
+
OPTIONAL {{ ?event wdt:P585 ?pointInTime . }}
|
| 188 |
+
OPTIONAL {{ ?event wdt:P580 ?startTime . }}
|
| 189 |
+
BIND(COALESCE(?pointInTime, ?startTime) AS ?date)
|
| 190 |
+
|
| 191 |
+
# Filter by year range
|
| 192 |
+
FILTER(BOUND(?date))
|
| 193 |
+
FILTER(YEAR(?date) >= {year_start} && YEAR(?date) <= {year_end})
|
| 194 |
+
|
| 195 |
+
# Optional: participants
|
| 196 |
+
OPTIONAL {{ ?event wdt:P710 ?participant . }}
|
| 197 |
+
|
| 198 |
+
# Optional: event type
|
| 199 |
+
OPTIONAL {{ ?event wdt:P31 ?type . }}
|
| 200 |
+
|
| 201 |
+
# Optional: Wikipedia article
|
| 202 |
+
OPTIONAL {{
|
| 203 |
+
?article schema:about ?event ;
|
| 204 |
+
schema:isPartOf <https://en.wikipedia.org/> .
|
| 205 |
+
}}
|
| 206 |
+
|
| 207 |
+
# Optional: image
|
| 208 |
+
OPTIONAL {{ ?event wdt:P18 ?image . }}
|
| 209 |
+
|
| 210 |
+
# Location label
|
| 211 |
+
OPTIONAL {{ ?event wdt:P276 ?loc . }}
|
| 212 |
+
|
| 213 |
+
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en,de,fr,es,it" . }}
|
| 214 |
+
}}
|
| 215 |
+
ORDER BY ABS(YEAR(?date) - {year})
|
| 216 |
+
LIMIT {limit}
|
| 217 |
+
"""
|
| 218 |
+
return query
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def _build_fallback_query(lat: float, lon: float, year: int, limit: int = 15) -> str:
|
| 222 |
+
"""
|
| 223 |
+
Simpler fallback query that searches for any notable events in the year range.
|
| 224 |
+
Uses text search and broader event types.
|
| 225 |
+
"""
|
| 226 |
+
year_start = year - 20
|
| 227 |
+
year_end = year + 20
|
| 228 |
+
|
| 229 |
+
query = f"""
|
| 230 |
+
SELECT DISTINCT ?event ?eventLabel ?eventDescription ?date ?coord ?locationLabel ?article
|
| 231 |
+
WHERE {{
|
| 232 |
+
# Broader event types
|
| 233 |
+
VALUES ?eventType {{
|
| 234 |
+
wd:Q178561 # battle
|
| 235 |
+
wd:Q131569 # treaty
|
| 236 |
+
wd:Q7278 # political revolution
|
| 237 |
+
wd:Q8076 # assassination
|
| 238 |
+
wd:Q3882219 # coronation
|
| 239 |
+
wd:Q1318295 # military offensive
|
| 240 |
+
wd:Q2001676 # massacre
|
| 241 |
+
wd:Q18669875 # historical event
|
| 242 |
+
wd:Q13418847 # historical period
|
| 243 |
+
wd:Q3024240 # historical event
|
| 244 |
+
}}
|
| 245 |
+
?event wdt:P31 ?eventType .
|
| 246 |
+
|
| 247 |
+
# Date filter
|
| 248 |
+
OPTIONAL {{ ?event wdt:P585 ?pointInTime . }}
|
| 249 |
+
OPTIONAL {{ ?event wdt:P580 ?startTime . }}
|
| 250 |
+
BIND(COALESCE(?pointInTime, ?startTime) AS ?date)
|
| 251 |
+
FILTER(BOUND(?date))
|
| 252 |
+
FILTER(YEAR(?date) >= {year_start} && YEAR(?date) <= {year_end})
|
| 253 |
+
|
| 254 |
+
# Get coordinates via location
|
| 255 |
+
OPTIONAL {{
|
| 256 |
+
?event wdt:P276 ?location .
|
| 257 |
+
?location wdt:P625 ?coord .
|
| 258 |
+
}}
|
| 259 |
+
OPTIONAL {{
|
| 260 |
+
?event wdt:P625 ?directCoord .
|
| 261 |
+
}}
|
| 262 |
+
BIND(COALESCE(?coord, ?directCoord) AS ?finalCoord)
|
| 263 |
+
|
| 264 |
+
# Wikipedia article
|
| 265 |
+
OPTIONAL {{
|
| 266 |
+
?article schema:about ?event ;
|
| 267 |
+
schema:isPartOf <https://en.wikipedia.org/> .
|
| 268 |
+
}}
|
| 269 |
+
|
| 270 |
+
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
|
| 271 |
+
}}
|
| 272 |
+
ORDER BY ABS(YEAR(?date) - {year})
|
| 273 |
+
LIMIT {limit}
|
| 274 |
+
"""
|
| 275 |
+
return query
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def _execute_sparql(query: str) -> Optional[Dict]:
|
| 279 |
+
"""Execute SPARQL query against Wikidata endpoint."""
|
| 280 |
+
_rate_limit()
|
| 281 |
+
|
| 282 |
+
headers = {
|
| 283 |
+
"Accept": "application/sparql-results+json",
|
| 284 |
+
"User-Agent": "Meridian-Historical-App/1.0 (https://github.com/meridian; [email protected])"
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
# Try using requests library first (better SSL handling)
|
| 288 |
+
if USE_REQUESTS:
|
| 289 |
+
try:
|
| 290 |
+
response = requests.get(
|
| 291 |
+
WIKIDATA_SPARQL_ENDPOINT,
|
| 292 |
+
params={"query": query},
|
| 293 |
+
headers=headers,
|
| 294 |
+
timeout=30,
|
| 295 |
+
verify=certifi.where(),
|
| 296 |
+
)
|
| 297 |
+
response.raise_for_status()
|
| 298 |
+
return response.json()
|
| 299 |
+
except Exception as e:
|
| 300 |
+
print(f"[Wikidata] SPARQL query failed (requests): {e}")
|
| 301 |
+
# Fall through to urllib fallback
|
| 302 |
+
|
| 303 |
+
# Fallback to urllib with SSL context
|
| 304 |
+
params = urllib.parse.urlencode({"query": query})
|
| 305 |
+
url = f"{WIKIDATA_SPARQL_ENDPOINT}?{params}"
|
| 306 |
+
|
| 307 |
+
try:
|
| 308 |
+
# Create SSL context that doesn't verify certificates (fallback)
|
| 309 |
+
ssl_context = ssl.create_default_context()
|
| 310 |
+
ssl_context.check_hostname = False
|
| 311 |
+
ssl_context.verify_mode = ssl.CERT_NONE
|
| 312 |
+
|
| 313 |
+
req = urllib.request.Request(url, headers=headers)
|
| 314 |
+
with urllib.request.urlopen(req, timeout=30, context=ssl_context) as response:
|
| 315 |
+
return json.loads(response.read().decode("utf-8"))
|
| 316 |
+
except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError, TimeoutError) as e:
|
| 317 |
+
print(f"[Wikidata] SPARQL query failed (urllib): {e}")
|
| 318 |
+
return None
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
def _parse_wikidata_date(date_str: str) -> Tuple[Optional[int], Optional[int], Optional[int]]:
|
| 322 |
+
"""Parse Wikidata date string to year, month, day."""
|
| 323 |
+
if not date_str:
|
| 324 |
+
return None, None, None
|
| 325 |
+
|
| 326 |
+
# Handle BCE dates (negative years)
|
| 327 |
+
# Wikidata format: -0044-03-15T00:00:00Z for 44 BCE
|
| 328 |
+
try:
|
| 329 |
+
if date_str.startswith("-"):
|
| 330 |
+
# BCE date
|
| 331 |
+
parts = date_str[1:].split("T")[0].split("-")
|
| 332 |
+
year = -int(parts[0])
|
| 333 |
+
month = int(parts[1]) if len(parts) > 1 else None
|
| 334 |
+
day = int(parts[2]) if len(parts) > 2 else None
|
| 335 |
+
return year, month, day
|
| 336 |
+
else:
|
| 337 |
+
# CE date
|
| 338 |
+
parts = date_str.split("T")[0].split("-")
|
| 339 |
+
year = int(parts[0])
|
| 340 |
+
month = int(parts[1]) if len(parts) > 1 else None
|
| 341 |
+
day = int(parts[2]) if len(parts) > 2 else None
|
| 342 |
+
return year, month, day
|
| 343 |
+
except (ValueError, IndexError):
|
| 344 |
+
return None, None, None
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def _parse_coordinates(coord_str: str) -> Tuple[Optional[float], Optional[float]]:
|
| 348 |
+
"""Parse Wikidata coordinate string to lat, lon."""
|
| 349 |
+
if not coord_str:
|
| 350 |
+
return None, None
|
| 351 |
+
|
| 352 |
+
# Format: Point(lon lat)
|
| 353 |
+
try:
|
| 354 |
+
if coord_str.startswith("Point("):
|
| 355 |
+
inner = coord_str[6:-1]
|
| 356 |
+
lon_str, lat_str = inner.split()
|
| 357 |
+
return float(lat_str), float(lon_str)
|
| 358 |
+
except (ValueError, IndexError):
|
| 359 |
+
pass
|
| 360 |
+
|
| 361 |
+
return None, None
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
def _results_to_events(
|
| 365 |
+
results: Dict,
|
| 366 |
+
query_lat: float,
|
| 367 |
+
query_lon: float,
|
| 368 |
+
query_year: int,
|
| 369 |
+
) -> List[WikidataEvent]:
|
| 370 |
+
"""Convert SPARQL results to WikidataEvent objects."""
|
| 371 |
+
events_map: Dict[str, WikidataEvent] = {}
|
| 372 |
+
|
| 373 |
+
bindings = results.get("results", {}).get("bindings", [])
|
| 374 |
+
|
| 375 |
+
for binding in bindings:
|
| 376 |
+
# Extract QID
|
| 377 |
+
event_uri = binding.get("event", {}).get("value", "")
|
| 378 |
+
if not event_uri:
|
| 379 |
+
continue
|
| 380 |
+
qid = event_uri.split("/")[-1]
|
| 381 |
+
|
| 382 |
+
# Get or create event
|
| 383 |
+
if qid not in events_map:
|
| 384 |
+
name = binding.get("eventLabel", {}).get("value", "Unknown Event")
|
| 385 |
+
description = binding.get("eventDescription", {}).get("value", "")
|
| 386 |
+
|
| 387 |
+
# Parse date
|
| 388 |
+
date_str = binding.get("date", {}).get("value", "")
|
| 389 |
+
year, month, day = _parse_wikidata_date(date_str)
|
| 390 |
+
|
| 391 |
+
# Parse coordinates
|
| 392 |
+
coord_str = binding.get("coord", {}).get("value", "")
|
| 393 |
+
lat, lon = _parse_coordinates(coord_str)
|
| 394 |
+
|
| 395 |
+
# Location name
|
| 396 |
+
location_name = binding.get("locationLabel", {}).get("value", "")
|
| 397 |
+
|
| 398 |
+
# Event type
|
| 399 |
+
event_type = binding.get("typeLabel", {}).get("value", "")
|
| 400 |
+
|
| 401 |
+
# Wikipedia URL
|
| 402 |
+
wikipedia_url = binding.get("article", {}).get("value", "")
|
| 403 |
+
|
| 404 |
+
# Image URL
|
| 405 |
+
image_url = binding.get("image", {}).get("value", "")
|
| 406 |
+
|
| 407 |
+
# Calculate distance and confidence
|
| 408 |
+
distance_km = 0.0
|
| 409 |
+
if lat is not None and lon is not None:
|
| 410 |
+
distance_km = _haversine(query_lat, query_lon, lat, lon)
|
| 411 |
+
|
| 412 |
+
year_delta = abs((year or query_year) - query_year)
|
| 413 |
+
|
| 414 |
+
# Confidence scoring
|
| 415 |
+
confidence = 0.7
|
| 416 |
+
if distance_km < 50:
|
| 417 |
+
confidence += 0.15
|
| 418 |
+
elif distance_km < 150:
|
| 419 |
+
confidence += 0.1
|
| 420 |
+
|
| 421 |
+
if year_delta == 0:
|
| 422 |
+
confidence += 0.15
|
| 423 |
+
elif year_delta <= 5:
|
| 424 |
+
confidence += 0.1
|
| 425 |
+
|
| 426 |
+
if wikipedia_url:
|
| 427 |
+
confidence += 0.05
|
| 428 |
+
|
| 429 |
+
confidence = min(confidence, 0.98)
|
| 430 |
+
|
| 431 |
+
events_map[qid] = WikidataEvent(
|
| 432 |
+
qid=qid,
|
| 433 |
+
name=name,
|
| 434 |
+
description=description,
|
| 435 |
+
year=year,
|
| 436 |
+
month=month,
|
| 437 |
+
day=day,
|
| 438 |
+
lat=lat,
|
| 439 |
+
lon=lon,
|
| 440 |
+
location_name=location_name,
|
| 441 |
+
event_type=event_type,
|
| 442 |
+
wikipedia_url=wikipedia_url,
|
| 443 |
+
image_url=image_url,
|
| 444 |
+
distance_km=round(distance_km, 2),
|
| 445 |
+
year_delta=year_delta,
|
| 446 |
+
confidence=round(confidence, 3),
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
# Add participant if present
|
| 450 |
+
participant = binding.get("participantLabel", {}).get("value", "")
|
| 451 |
+
if participant and participant not in events_map[qid].participants:
|
| 452 |
+
events_map[qid].participants.append(participant)
|
| 453 |
+
|
| 454 |
+
# Sort by relevance (lower distance + year_delta = better)
|
| 455 |
+
events = list(events_map.values())
|
| 456 |
+
events.sort(key=lambda e: e.distance_km + e.year_delta * 5 - e.confidence * 20)
|
| 457 |
+
|
| 458 |
+
return events
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
def search_events_by_geo_time(
|
| 462 |
+
lat: float,
|
| 463 |
+
lon: float,
|
| 464 |
+
year: int,
|
| 465 |
+
radius_km: float = 300.0,
|
| 466 |
+
limit: int = 10,
|
| 467 |
+
use_cache: bool = True,
|
| 468 |
+
) -> List[Dict]:
|
| 469 |
+
"""
|
| 470 |
+
Search Wikidata for historical events near coordinates and year.
|
| 471 |
+
|
| 472 |
+
Args:
|
| 473 |
+
lat: Latitude
|
| 474 |
+
lon: Longitude
|
| 475 |
+
year: Target year (negative for BCE)
|
| 476 |
+
radius_km: Search radius in kilometers
|
| 477 |
+
limit: Maximum number of results
|
| 478 |
+
use_cache: Whether to use cached results
|
| 479 |
+
|
| 480 |
+
Returns:
|
| 481 |
+
List of event dictionaries compatible with existing code
|
| 482 |
+
"""
|
| 483 |
+
# Check cache first
|
| 484 |
+
cache_key = _cache_key(lat, lon, year, radius_km)
|
| 485 |
+
if use_cache:
|
| 486 |
+
cached = _get_cached(cache_key)
|
| 487 |
+
if cached is not None:
|
| 488 |
+
print(f"[Wikidata] Cache hit for {lat:.2f}, {lon:.2f}, {year}")
|
| 489 |
+
return cached[:limit]
|
| 490 |
+
|
| 491 |
+
print(f"[Wikidata] Querying for events near {lat:.2f}, {lon:.2f}, year {year}")
|
| 492 |
+
|
| 493 |
+
# Try primary query first
|
| 494 |
+
query = _build_sparql_query(lat, lon, year, radius_km, limit * 2)
|
| 495 |
+
results = _execute_sparql(query)
|
| 496 |
+
|
| 497 |
+
events: List[WikidataEvent] = []
|
| 498 |
+
if results:
|
| 499 |
+
events = _results_to_events(results, lat, lon, year)
|
| 500 |
+
|
| 501 |
+
# If no results, try fallback query
|
| 502 |
+
if not events:
|
| 503 |
+
print("[Wikidata] Primary query returned no results, trying fallback...")
|
| 504 |
+
fallback_query = _build_fallback_query(lat, lon, year, limit * 2)
|
| 505 |
+
fallback_results = _execute_sparql(fallback_query)
|
| 506 |
+
if fallback_results:
|
| 507 |
+
events = _results_to_events(fallback_results, lat, lon, year)
|
| 508 |
+
|
| 509 |
+
# Convert to dicts and cache
|
| 510 |
+
event_dicts = [e.to_dict() for e in events[:limit]]
|
| 511 |
+
|
| 512 |
+
if use_cache and event_dicts:
|
| 513 |
+
_save_cache(cache_key, event_dicts)
|
| 514 |
+
|
| 515 |
+
print(f"[Wikidata] Found {len(event_dicts)} events")
|
| 516 |
+
return event_dicts
|
| 517 |
+
|
| 518 |
+
|
| 519 |
+
def get_event_detail(qid: str) -> Optional[Dict]:
|
| 520 |
+
"""
|
| 521 |
+
Fetch detailed information about a specific Wikidata event.
|
| 522 |
+
|
| 523 |
+
Args:
|
| 524 |
+
qid: Wikidata QID (e.g., "Q784")
|
| 525 |
+
|
| 526 |
+
Returns:
|
| 527 |
+
Event dictionary with full details, or None if not found
|
| 528 |
+
"""
|
| 529 |
+
query = f"""
|
| 530 |
+
SELECT ?event ?eventLabel ?eventDescription ?date ?coord ?locationLabel
|
| 531 |
+
?participantLabel ?typeLabel ?article ?image ?causeLabel ?effectLabel
|
| 532 |
+
WHERE {{
|
| 533 |
+
BIND(wd:{qid} AS ?event)
|
| 534 |
+
|
| 535 |
+
OPTIONAL {{ ?event wdt:P585 ?pointInTime . }}
|
| 536 |
+
OPTIONAL {{ ?event wdt:P580 ?startTime . }}
|
| 537 |
+
BIND(COALESCE(?pointInTime, ?startTime) AS ?date)
|
| 538 |
+
|
| 539 |
+
OPTIONAL {{ ?event wdt:P625 ?coord . }}
|
| 540 |
+
OPTIONAL {{
|
| 541 |
+
?event wdt:P276 ?location .
|
| 542 |
+
?location wdt:P625 ?locCoord .
|
| 543 |
+
}}
|
| 544 |
+
|
| 545 |
+
OPTIONAL {{ ?event wdt:P710 ?participant . }}
|
| 546 |
+
OPTIONAL {{ ?event wdt:P31 ?type . }}
|
| 547 |
+
OPTIONAL {{ ?event wdt:P828 ?cause . }}
|
| 548 |
+
OPTIONAL {{ ?event wdt:P1542 ?effect . }}
|
| 549 |
+
OPTIONAL {{ ?event wdt:P18 ?image . }}
|
| 550 |
+
|
| 551 |
+
OPTIONAL {{
|
| 552 |
+
?article schema:about ?event ;
|
| 553 |
+
schema:isPartOf <https://en.wikipedia.org/> .
|
| 554 |
+
}}
|
| 555 |
+
|
| 556 |
+
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
|
| 557 |
+
}}
|
| 558 |
+
LIMIT 50
|
| 559 |
+
"""
|
| 560 |
+
|
| 561 |
+
results = _execute_sparql(query)
|
| 562 |
+
if not results:
|
| 563 |
+
return None
|
| 564 |
+
|
| 565 |
+
events = _results_to_events(results, 0, 0, 0)
|
| 566 |
+
if events:
|
| 567 |
+
event = events[0]
|
| 568 |
+
event_dict = event.to_dict()
|
| 569 |
+
|
| 570 |
+
# Extract causes and effects from results
|
| 571 |
+
bindings = results.get("results", {}).get("bindings", [])
|
| 572 |
+
causes = set()
|
| 573 |
+
effects = set()
|
| 574 |
+
for binding in bindings:
|
| 575 |
+
cause = binding.get("causeLabel", {}).get("value", "")
|
| 576 |
+
effect = binding.get("effectLabel", {}).get("value", "")
|
| 577 |
+
if cause:
|
| 578 |
+
causes.add(cause)
|
| 579 |
+
if effect:
|
| 580 |
+
effects.add(effect)
|
| 581 |
+
|
| 582 |
+
event_dict["causes"] = list(causes)
|
| 583 |
+
event_dict["effects"] = list(effects)
|
| 584 |
+
event_dict["relationships"] = {
|
| 585 |
+
"causes": list(causes),
|
| 586 |
+
"consequences": list(effects),
|
| 587 |
+
}
|
| 588 |
+
|
| 589 |
+
return event_dict
|
| 590 |
+
|
| 591 |
+
return None
|
| 592 |
+
|
| 593 |
+
|
| 594 |
+
def search_events_by_name(name: str, limit: int = 5) -> List[Dict]:
|
| 595 |
+
"""
|
| 596 |
+
Search Wikidata for events by name.
|
| 597 |
+
|
| 598 |
+
Args:
|
| 599 |
+
name: Event name to search for
|
| 600 |
+
limit: Maximum results
|
| 601 |
+
|
| 602 |
+
Returns:
|
| 603 |
+
List of matching events
|
| 604 |
+
"""
|
| 605 |
+
# Escape special characters
|
| 606 |
+
escaped_name = name.replace('"', '\\"')
|
| 607 |
+
|
| 608 |
+
query = f"""
|
| 609 |
+
SELECT DISTINCT ?event ?eventLabel ?eventDescription ?date ?coord ?locationLabel ?article
|
| 610 |
+
WHERE {{
|
| 611 |
+
?event wdt:P31/wdt:P279* wd:Q1656682 .
|
| 612 |
+
?event rdfs:label ?label .
|
| 613 |
+
FILTER(LANG(?label) = "en")
|
| 614 |
+
FILTER(CONTAINS(LCASE(?label), LCASE("{escaped_name}")))
|
| 615 |
+
|
| 616 |
+
OPTIONAL {{ ?event wdt:P585 ?pointInTime . }}
|
| 617 |
+
OPTIONAL {{ ?event wdt:P580 ?startTime . }}
|
| 618 |
+
BIND(COALESCE(?pointInTime, ?startTime) AS ?date)
|
| 619 |
+
|
| 620 |
+
OPTIONAL {{ ?event wdt:P625 ?coord . }}
|
| 621 |
+
OPTIONAL {{
|
| 622 |
+
?event wdt:P276 ?location .
|
| 623 |
+
?location wdt:P625 ?locCoord .
|
| 624 |
+
}}
|
| 625 |
+
|
| 626 |
+
OPTIONAL {{
|
| 627 |
+
?article schema:about ?event ;
|
| 628 |
+
schema:isPartOf <https://en.wikipedia.org/> .
|
| 629 |
+
}}
|
| 630 |
+
|
| 631 |
+
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
|
| 632 |
+
}}
|
| 633 |
+
LIMIT {limit}
|
| 634 |
+
"""
|
| 635 |
+
|
| 636 |
+
results = _execute_sparql(query)
|
| 637 |
+
if not results:
|
| 638 |
+
return []
|
| 639 |
+
|
| 640 |
+
events = _results_to_events(results, 0, 0, 0)
|
| 641 |
+
return [e.to_dict() for e in events]
|
| 642 |
+
|
| 643 |
+
|
| 644 |
+
# MCP-compatible function exports
|
| 645 |
+
def mcp_search_historical_events(
|
| 646 |
+
latitude: float,
|
| 647 |
+
longitude: float,
|
| 648 |
+
year: int,
|
| 649 |
+
radius_km: float = 300.0,
|
| 650 |
+
limit: int = 10,
|
| 651 |
+
) -> Dict:
|
| 652 |
+
"""
|
| 653 |
+
MCP tool: Search for historical events by coordinates and year.
|
| 654 |
+
|
| 655 |
+
This queries Wikidata for events that occurred near the specified
|
| 656 |
+
location and time, returning structured event data suitable for
|
| 657 |
+
prompt generation.
|
| 658 |
+
"""
|
| 659 |
+
try:
|
| 660 |
+
events = search_events_by_geo_time(
|
| 661 |
+
lat=latitude,
|
| 662 |
+
lon=longitude,
|
| 663 |
+
year=year,
|
| 664 |
+
radius_km=radius_km,
|
| 665 |
+
limit=limit,
|
| 666 |
+
)
|
| 667 |
+
return {
|
| 668 |
+
"success": True,
|
| 669 |
+
"query": {
|
| 670 |
+
"latitude": latitude,
|
| 671 |
+
"longitude": longitude,
|
| 672 |
+
"year": year,
|
| 673 |
+
"radius_km": radius_km,
|
| 674 |
+
},
|
| 675 |
+
"count": len(events),
|
| 676 |
+
"events": events,
|
| 677 |
+
"source": "wikidata",
|
| 678 |
+
}
|
| 679 |
+
except Exception as e:
|
| 680 |
+
return {
|
| 681 |
+
"success": False,
|
| 682 |
+
"error": str(e),
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
|
| 686 |
+
def mcp_get_event_by_qid(qid: str) -> Dict:
|
| 687 |
+
"""
|
| 688 |
+
MCP tool: Get detailed information about a Wikidata event.
|
| 689 |
+
"""
|
| 690 |
+
try:
|
| 691 |
+
event = get_event_detail(qid)
|
| 692 |
+
if event:
|
| 693 |
+
return {
|
| 694 |
+
"success": True,
|
| 695 |
+
"event": event,
|
| 696 |
+
}
|
| 697 |
+
return {
|
| 698 |
+
"success": False,
|
| 699 |
+
"error": f"Event {qid} not found",
|
| 700 |
+
}
|
| 701 |
+
except Exception as e:
|
| 702 |
+
return {
|
| 703 |
+
"success": False,
|
| 704 |
+
"error": str(e),
|
| 705 |
+
}
|
| 706 |
+
|