sammoftah commited on
Commit
51f3e0e
·
verified ·
1 Parent(s): 34d8aa1

Upload 4 files

Browse files
services/__init__.py ADDED
File without changes
services/history_service.py ADDED
@@ -0,0 +1,969 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import math
5
+ import sqlite3
6
+ from pathlib import Path
7
+ from typing import Dict, Iterable, List, Optional, Tuple
8
+
9
+ # Import Wikidata service for remote lookups
10
+ try:
11
+ from services.wikidata_service import (
12
+ search_events_by_geo_time as wikidata_search,
13
+ get_event_detail as wikidata_get_detail,
14
+ search_events_by_name as wikidata_search_by_name,
15
+ )
16
+ WIKIDATA_AVAILABLE = True
17
+ except ImportError:
18
+ WIKIDATA_AVAILABLE = False
19
+ print("[history_service] Wikidata service not available, using curated data only")
20
+
21
+ ROOT_DIR = Path(__file__).resolve().parent.parent
22
+ DATA_DIR = ROOT_DIR / "data"
23
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
24
+ DB_PATH = DATA_DIR / "meridian_history.db"
25
+
26
+ # Wikidata settings
27
+ ENABLE_WIKIDATA_FALLBACK = True
28
+ WIKIDATA_CONFIDENCE_THRESHOLD = 0.5
29
+
30
+ EVENT_SCHEMA_VERSION = 2
31
+ EVENT_EXTRA_COLUMNS: Dict[str, str] = {
32
+ "slug": "TEXT",
33
+ "summary": "TEXT",
34
+ "narrative": "TEXT",
35
+ "start_year": "INTEGER",
36
+ "end_year": "INTEGER",
37
+ "month": "INTEGER",
38
+ "day": "INTEGER",
39
+ "themes": "TEXT",
40
+ "actors": "TEXT",
41
+ "artifacts": "TEXT",
42
+ "visual_motifs": "TEXT",
43
+ "facets": "TEXT",
44
+ "sources": "TEXT",
45
+ "time_range": "TEXT",
46
+ "geo_anchor": "TEXT",
47
+ "confidence": "REAL",
48
+ "relationships": "TEXT",
49
+ }
50
+
51
+
52
+ def _get_connection() -> sqlite3.Connection:
53
+ conn = sqlite3.connect(DB_PATH)
54
+ conn.row_factory = sqlite3.Row
55
+ return conn
56
+
57
+
58
+ def _serialize(value: object) -> str:
59
+ return json.dumps(value, ensure_ascii=False)
60
+
61
+
62
+ def _deserialize(value: Optional[str], default):
63
+ if value is None or value == "":
64
+ return default
65
+ try:
66
+ return json.loads(value)
67
+ except json.JSONDecodeError:
68
+ return default
69
+
70
+
71
+ def ensure_schema() -> None:
72
+ conn = _get_connection()
73
+ cursor = conn.cursor()
74
+
75
+ cursor.execute(
76
+ """
77
+ CREATE TABLE IF NOT EXISTS events (
78
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
79
+ name TEXT UNIQUE,
80
+ year INTEGER,
81
+ lat REAL,
82
+ lon REAL
83
+ )
84
+ """
85
+ )
86
+
87
+ cursor.execute(
88
+ """
89
+ CREATE TABLE IF NOT EXISTS schema_meta (
90
+ key TEXT PRIMARY KEY,
91
+ value TEXT
92
+ )
93
+ """
94
+ )
95
+
96
+ # Add new columns if missing
97
+ cursor.execute("PRAGMA table_info(events)")
98
+ existing_columns = {row["name"] for row in cursor.fetchall()}
99
+ for column, column_type in EVENT_EXTRA_COLUMNS.items():
100
+ if column not in existing_columns:
101
+ cursor.execute(f"ALTER TABLE events ADD COLUMN {column} {column_type}")
102
+
103
+ # Update schema version
104
+ cursor.execute(
105
+ """
106
+ INSERT INTO schema_meta(key, value)
107
+ VALUES('event_schema_version', ?)
108
+ ON CONFLICT(key) DO UPDATE SET value=excluded.value
109
+ """,
110
+ (str(EVENT_SCHEMA_VERSION),),
111
+ )
112
+
113
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_events_year ON events(year)")
114
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_events_coordinates ON events(lat, lon)")
115
+
116
+ conn.commit()
117
+ conn.close()
118
+
119
+
120
+ def seed_curated_events(force_refresh: bool = False) -> None:
121
+ conn = _get_connection()
122
+ cursor = conn.cursor()
123
+
124
+ if force_refresh:
125
+ cursor.execute("DELETE FROM events")
126
+
127
+ for event in CURATED_EVENTS:
128
+ cursor.execute(
129
+ """
130
+ INSERT OR IGNORE INTO events (
131
+ name, slug, year, start_year, end_year, month, day,
132
+ lat, lon, summary, narrative, themes, actors, artifacts,
133
+ visual_motifs, facets, sources, time_range, geo_anchor,
134
+ confidence, relationships
135
+ ) VALUES (
136
+ :name, :slug, :year, :start_year, :end_year, :month, :day,
137
+ :lat, :lon, :summary, :narrative, :themes, :actors, :artifacts,
138
+ :visual_motifs, :facets, :sources, :time_range, :geo_anchor,
139
+ :confidence, :relationships
140
+ )
141
+ """,
142
+ {
143
+ "name": event["name"],
144
+ "slug": event.get("slug") or event["name"].lower().replace(" ", "_"),
145
+ "year": event.get("year"),
146
+ "start_year": event.get("start_year", event.get("year")),
147
+ "end_year": event.get("end_year", event.get("year")),
148
+ "month": event.get("month"),
149
+ "day": event.get("day"),
150
+ "lat": event.get("lat"),
151
+ "lon": event.get("lon"),
152
+ "summary": event.get("summary"),
153
+ "narrative": event.get("narrative"),
154
+ "themes": _serialize(event.get("themes", [])),
155
+ "actors": _serialize(event.get("actors", [])),
156
+ "artifacts": _serialize(event.get("artifacts", [])),
157
+ "visual_motifs": _serialize(event.get("visual_motifs", [])),
158
+ "facets": _serialize(event.get("facets", {})),
159
+ "sources": _serialize(event.get("sources", [])),
160
+ "time_range": _serialize(event.get("time_range", {})),
161
+ "geo_anchor": _serialize(event.get("geo_anchor", {})),
162
+ "confidence": event.get("confidence", 0.85),
163
+ "relationships": _serialize(event.get("relationships", {})),
164
+ },
165
+ )
166
+
167
+ conn.commit()
168
+ conn.close()
169
+
170
+
171
+ def initialize_history(force_refresh: bool = False) -> None:
172
+ ensure_schema()
173
+ seed_curated_events(force_refresh=force_refresh)
174
+
175
+
176
+ def load_events_from_db() -> List[dict]:
177
+ conn = _get_connection()
178
+ cursor = conn.cursor()
179
+ cursor.execute("SELECT * FROM events")
180
+ rows = cursor.fetchall()
181
+ conn.close()
182
+
183
+ events = []
184
+ for row in rows:
185
+ event = dict(row)
186
+ event["themes"] = _deserialize(event.get("themes"), [])
187
+ event["actors"] = _deserialize(event.get("actors"), [])
188
+ event["artifacts"] = _deserialize(event.get("artifacts"), [])
189
+ event["visual_motifs"] = _deserialize(event.get("visual_motifs"), [])
190
+ event["facets"] = _deserialize(event.get("facets"), {})
191
+ event["sources"] = _deserialize(event.get("sources"), [])
192
+ event["time_range"] = _deserialize(event.get("time_range"), {})
193
+ event["geo_anchor"] = _deserialize(event.get("geo_anchor"), {})
194
+ event["relationships"] = _deserialize(event.get("relationships"), {})
195
+ events.append(event)
196
+
197
+ return events
198
+
199
+
200
+ def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
201
+ radius = 6371.0
202
+ phi1, phi2 = math.radians(lat1), math.radians(lat2)
203
+ delta_phi = math.radians(lat2 - lat1)
204
+ delta_lambda = math.radians(lon2 - lon1)
205
+
206
+ a = (
207
+ math.sin(delta_phi / 2) ** 2
208
+ + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2) ** 2
209
+ )
210
+ c = 2 * math.atan2(math.sqrt(a), math.sqrt(max(0.0, 1 - a)))
211
+ return radius * c
212
+
213
+
214
+ def _compute_match_score(
215
+ event: dict,
216
+ lat: float,
217
+ lon: float,
218
+ year: int,
219
+ year_weight: float = 1.0,
220
+ ) -> Tuple[float, float, float, float]:
221
+ """
222
+ Compute match score for an event based on distance and year.
223
+
224
+ Args:
225
+ event: Event dictionary
226
+ lat: Query latitude
227
+ lon: Query longitude
228
+ year: Query year
229
+ year_weight: Weight for year matching (0.0-2.0)
230
+ - 0.0 = distance only
231
+ - 1.0 = balanced (default)
232
+ - 2.0 = strongly prefer year matches
233
+
234
+ Returns:
235
+ Tuple of (distance_km, year_delta, confidence, match_score)
236
+ """
237
+ event_year = event.get("year") or year
238
+ event_lat = event.get("lat") or lat
239
+ event_lon = event.get("lon") or lon
240
+
241
+ distance = haversine_distance(lat, lon, event_lat, event_lon)
242
+ year_delta = abs(event_year - year)
243
+ base_confidence = event.get("confidence", 0.8)
244
+
245
+ # Year-weighted scoring:
246
+ # - Exact year match (delta=0): massive bonus
247
+ # - Within 5 years: strong bonus
248
+ # - Within 10 years: moderate bonus
249
+ # - Beyond 10 years: penalty increases
250
+
251
+ if year_delta == 0:
252
+ year_score = -50 * year_weight # Big bonus for exact year
253
+ elif year_delta <= 2:
254
+ year_score = -30 * year_weight # Strong bonus for ±2 years
255
+ elif year_delta <= 5:
256
+ year_score = -15 * year_weight # Good bonus for ±5 years
257
+ elif year_delta <= 10:
258
+ year_score = 0 # Neutral for ±10 years
259
+ else:
260
+ year_score = year_delta * 3 * year_weight # Penalty for distant years
261
+
262
+ # Distance scoring (normalized):
263
+ # - Within 50km: strong bonus
264
+ # - Within 200km: moderate bonus
265
+ # - Beyond 500km: penalty
266
+ if distance < 50:
267
+ distance_score = -20 * (1 - year_weight * 0.3) # Bonus, reduced if year-weighted
268
+ elif distance < 200:
269
+ distance_score = distance * 0.1
270
+ else:
271
+ distance_score = distance * 0.2 * (1 - year_weight * 0.3) # Reduced penalty if year-weighted
272
+
273
+ # Combined score (lower is better)
274
+ match_score = distance_score + year_score
275
+
276
+ # Confidence calculation
277
+ confidence = base_confidence
278
+ if year_delta == 0:
279
+ confidence += 0.15
280
+ elif year_delta <= 5:
281
+ confidence += 0.08
282
+
283
+ if distance < 100:
284
+ confidence += 0.1
285
+ elif distance < 300:
286
+ confidence += 0.05
287
+
288
+ confidence = max(0.0, min(0.99, confidence))
289
+
290
+ return distance, year_delta, confidence, match_score
291
+
292
+
293
+ def get_events_by_coordinates(
294
+ lat: float,
295
+ lon: float,
296
+ year: int,
297
+ radius_km: float = 250.0,
298
+ limit: int = 5,
299
+ include_wikidata: bool = True,
300
+ year_weight: float = 1.5,
301
+ ) -> List[dict]:
302
+ """
303
+ Get historical events near coordinates and year.
304
+
305
+ First searches curated local database, then optionally queries Wikidata
306
+ for additional results if enabled and local results are insufficient.
307
+
308
+ Args:
309
+ lat: Latitude
310
+ lon: Longitude
311
+ year: Target year (negative for BCE)
312
+ radius_km: Search radius in kilometers
313
+ limit: Maximum number of results
314
+ include_wikidata: Whether to include Wikidata results
315
+ year_weight: How much to prioritize year matches (0.0-2.0)
316
+ - 0.0 = distance only (ignore year)
317
+ - 1.0 = balanced
318
+ - 1.5 = prefer year matches (default)
319
+ - 2.0 = strongly prefer year matches
320
+
321
+ Returns:
322
+ List of event dictionaries sorted by relevance
323
+ """
324
+ # Step 1: Search curated local database
325
+ events = load_events_from_db()
326
+ matches: List[dict] = []
327
+
328
+ # Use larger radius when year-weighted to find more year matches
329
+ effective_radius = radius_km * (1 + year_weight * 0.5) if year_weight > 1.0 else radius_km
330
+
331
+ for event in events:
332
+ distance, year_delta, confidence, match_score = _compute_match_score(
333
+ event, lat, lon, year, year_weight=year_weight
334
+ )
335
+
336
+ # Include if within radius OR if year matches closely
337
+ if distance > effective_radius and year_delta > 10:
338
+ continue
339
+
340
+ # Always include exact year matches regardless of distance
341
+ if year_delta > 15 and distance > radius_km:
342
+ continue
343
+
344
+ match = dict(event)
345
+ match["distance_km"] = round(distance, 2)
346
+ match["year_delta"] = year_delta
347
+ match["match_confidence"] = round(confidence, 3)
348
+ match["match_score"] = match_score
349
+ match["source"] = "curated"
350
+ matches.append(match)
351
+
352
+ matches.sort(key=lambda item: item["match_score"])
353
+ curated_results = matches[:limit]
354
+
355
+ # Step 2: If enabled and we have few/no curated results, query Wikidata
356
+ if (
357
+ include_wikidata
358
+ and ENABLE_WIKIDATA_FALLBACK
359
+ and WIKIDATA_AVAILABLE
360
+ and len(curated_results) < limit
361
+ ):
362
+ try:
363
+ print(f"[history_service] Querying Wikidata for additional events...")
364
+ wikidata_results = wikidata_search(
365
+ lat=lat,
366
+ lon=lon,
367
+ year=year,
368
+ radius_km=radius_km,
369
+ limit=limit * 2, # Get extra to filter
370
+ )
371
+
372
+ # Merge Wikidata results, avoiding duplicates by name
373
+ curated_names = {m.get("name", "").lower() for m in curated_results}
374
+
375
+ for wd_event in wikidata_results:
376
+ # Skip if we already have this event from curated data
377
+ event_name = wd_event.get("name", "").lower()
378
+ if event_name in curated_names:
379
+ continue
380
+
381
+ # Skip low-confidence results
382
+ confidence = wd_event.get("match_confidence", wd_event.get("confidence", 0))
383
+ if confidence < WIKIDATA_CONFIDENCE_THRESHOLD:
384
+ continue
385
+
386
+ # Add source marker and compute year-weighted score
387
+ wd_event["source"] = "wikidata"
388
+ wd_year_delta = wd_event.get("year_delta", 99)
389
+ wd_distance = wd_event.get("distance_km", 999)
390
+
391
+ # Year-weighted scoring for Wikidata results
392
+ if wd_year_delta == 0:
393
+ year_score = -50 * year_weight
394
+ elif wd_year_delta <= 2:
395
+ year_score = -30 * year_weight
396
+ elif wd_year_delta <= 5:
397
+ year_score = -15 * year_weight
398
+ elif wd_year_delta <= 10:
399
+ year_score = 0
400
+ else:
401
+ year_score = wd_year_delta * 3 * year_weight
402
+
403
+ distance_score = wd_distance * 0.1 * (1 - year_weight * 0.3)
404
+ wd_event["match_score"] = distance_score + year_score - confidence * 10
405
+
406
+ curated_results.append(wd_event)
407
+ curated_names.add(event_name)
408
+
409
+ if len(curated_results) >= limit:
410
+ break
411
+
412
+ # Re-sort combined results
413
+ curated_results.sort(key=lambda item: item.get("match_score", 999))
414
+
415
+ except Exception as e:
416
+ print(f"[history_service] Wikidata lookup failed: {e}")
417
+
418
+ return curated_results[:limit]
419
+
420
+
421
+ def search_events_globally(
422
+ lat: float,
423
+ lon: float,
424
+ year: int,
425
+ radius_km: float = 500.0,
426
+ limit: int = 10,
427
+ year_weight: float = 1.5,
428
+ ) -> List[dict]:
429
+ """
430
+ Search for historical events with broader radius, always including Wikidata.
431
+
432
+ This is useful for finding events when the user doesn't have precise coordinates.
433
+ Uses year-weighted scoring by default to prioritize temporal matches.
434
+ """
435
+ return get_events_by_coordinates(
436
+ lat=lat,
437
+ lon=lon,
438
+ year=year,
439
+ radius_km=radius_km,
440
+ limit=limit,
441
+ include_wikidata=True,
442
+ year_weight=year_weight,
443
+ )
444
+
445
+
446
+ def get_event_by_slug(slug: str) -> Optional[dict]:
447
+ conn = _get_connection()
448
+ cursor = conn.cursor()
449
+ cursor.execute("SELECT * FROM events WHERE slug = ?", (slug,))
450
+ row = cursor.fetchone()
451
+ conn.close()
452
+
453
+ if not row:
454
+ return None
455
+
456
+ event = dict(row)
457
+ event["themes"] = _deserialize(event.get("themes"), [])
458
+ event["actors"] = _deserialize(event.get("actors"), [])
459
+ event["artifacts"] = _deserialize(event.get("artifacts"), [])
460
+ event["visual_motifs"] = _deserialize(event.get("visual_motifs"), [])
461
+ event["facets"] = _deserialize(event.get("facets"), {})
462
+ event["sources"] = _deserialize(event.get("sources"), [])
463
+ event["time_range"] = _deserialize(event.get("time_range"), {})
464
+ event["geo_anchor"] = _deserialize(event.get("geo_anchor"), {})
465
+ event["relationships"] = _deserialize(event.get("relationships"), {})
466
+ return event
467
+
468
+
469
+ def get_event_by_name(name: str, include_wikidata: bool = True) -> Optional[dict]:
470
+ """
471
+ Get event by name, checking curated data first, then Wikidata.
472
+ """
473
+ # Try curated data first
474
+ event = get_event_by_slug(name.lower().replace(" ", "_"))
475
+ if event:
476
+ event["source"] = "curated"
477
+ return event
478
+
479
+ # Try Wikidata if enabled
480
+ if include_wikidata and ENABLE_WIKIDATA_FALLBACK and WIKIDATA_AVAILABLE:
481
+ try:
482
+ results = wikidata_search_by_name(name, limit=1)
483
+ if results:
484
+ results[0]["source"] = "wikidata"
485
+ return results[0]
486
+ except Exception as e:
487
+ print(f"[history_service] Wikidata name search failed: {e}")
488
+
489
+ return None
490
+
491
+
492
+ def get_event_by_qid(qid: str) -> Optional[dict]:
493
+ """
494
+ Get detailed event information from Wikidata by QID.
495
+ """
496
+ if not WIKIDATA_AVAILABLE:
497
+ return None
498
+
499
+ try:
500
+ return wikidata_get_detail(qid)
501
+ except Exception as e:
502
+ print(f"[history_service] Wikidata QID lookup failed: {e}")
503
+ return None
504
+
505
+
506
+ def get_artifacts_for_year(year: int, limit: int = 4) -> List[dict]:
507
+ matches: List[dict] = []
508
+ for artifact in CURATED_ARTIFACTS:
509
+ era_start, era_end = artifact.get("era", [None, None])
510
+ if era_start is None or era_end is None:
511
+ matches.append(artifact)
512
+ continue
513
+ if era_start <= year <= era_end:
514
+ matches.append(artifact)
515
+ if not matches:
516
+ matches = CURATED_ARTIFACTS[:]
517
+ return matches[:limit]
518
+
519
+
520
+ def summarize_event(event: dict) -> str:
521
+ summary = event.get("summary") or event.get("narrative") or event.get("name")
522
+ return summary
523
+
524
+
525
+ def ensure_iterable(value: Optional[Iterable[str]]) -> List[str]:
526
+ if value is None:
527
+ return []
528
+ return list(value)
529
+
530
+
531
+ CURATED_EVENTS: List[dict] = [
532
+ {
533
+ "name": "Fall of the Berlin Wall",
534
+ "slug": "fall_of_the_berlin_wall",
535
+ "year": 1989,
536
+ "start_year": 1989,
537
+ "end_year": 1989,
538
+ "month": 11,
539
+ "day": 9,
540
+ "lat": 52.5163,
541
+ "lon": 13.3777,
542
+ "summary": "East and West Berliners gather at the Brandenburg Gate as border checkpoints open and the concrete wall begins to fall.",
543
+ "narrative": (
544
+ "A sea of Berliners clamber atop graffiti-streaked concrete slabs, cheering as border guards lift the barriers. "
545
+ "People pass champagne bottles, wield sledgehammers, and chip away fragments while floodlights and television crews illuminate the night."
546
+ ),
547
+ "themes": ["political", "reunification", "cold war"],
548
+ "actors": ["East German civilians", "West Berlin residents", "border guards", "international journalists"],
549
+ "artifacts": ["Graffiti-covered concrete", "Champagne bottles", "Trabant cars", "Floodlights", "Metal barricades"],
550
+ "visual_motifs": ["floodlit night sky", "cold autumn breath", "television cameras", "crowded concrete wall"],
551
+ "facets": {"era": "late 20th century", "region": "western_europe", "type": "political upheaval"},
552
+ "sources": [{"label": "Wikipedia", "url": "https://en.wikipedia.org/wiki/Fall_of_the_Berlin_Wall"}],
553
+ "time_range": {"start": "1989-11-09T18:00:00", "end": "1989-11-10T02:00:00"},
554
+ "geo_anchor": {"lat": 52.5163, "lon": 13.3777, "radius_km": 4},
555
+ "confidence": 0.96,
556
+ "relationships": {"consequences": ["German reunification 1990"]},
557
+ },
558
+ {
559
+ "name": "D-Day Landing at Omaha Beach",
560
+ "slug": "d_day_landing_at_omaha_beach",
561
+ "year": 1944,
562
+ "start_year": 1944,
563
+ "end_year": 1944,
564
+ "month": 6,
565
+ "day": 6,
566
+ "lat": 49.4144,
567
+ "lon": -0.8322,
568
+ "summary": "Allied assault troops storm Omaha Beach under heavy German fire at dawn during Operation Overlord.",
569
+ "narrative": (
570
+ "Pre-dawn haze lifts as landing craft ramps crash open and American soldiers sprint through waist-high surf toward fortified bluffs. "
571
+ "Machine-gun tracers stitch the air, artillery craters erupt in wet sand, and medics tend to the wounded beside hedgehog obstacles."
572
+ ),
573
+ "themes": ["military", "WWII", "allied victory"],
574
+ "actors": ["US 1st Infantry Division", "US 29th Infantry Division", "German Atlantic Wall defenders", "Combat medics"],
575
+ "artifacts": ["Higgins landing craft", "Browning machine guns", "M1 helmets", "Beach obstacles", "Signal flares"],
576
+ "visual_motifs": ["morning fog", "breaking waves", "artillery smoke", "olive drab uniforms"],
577
+ "facets": {"era": "mid 20th century", "region": "western_europe", "type": "amphibious assault"},
578
+ "sources": [{"label": "National WWII Museum", "url": "https://www.nationalww2museum.org"}],
579
+ "time_range": {"start": "1944-06-06T05:30:00", "end": "1944-06-06T10:00:00"},
580
+ "geo_anchor": {"lat": 49.4144, "lon": -0.8322, "radius_km": 12},
581
+ "confidence": 0.94,
582
+ "relationships": {"parallel": ["Sword Beach landings", "Utah Beach landings"]},
583
+ },
584
+ {
585
+ "name": "Signing of the Declaration of Independence",
586
+ "slug": "signing_of_the_declaration_of_independence",
587
+ "year": 1776,
588
+ "start_year": 1776,
589
+ "end_year": 1776,
590
+ "month": 7,
591
+ "day": 4,
592
+ "lat": 39.9489,
593
+ "lon": -75.1500,
594
+ "summary": "Delegates of the Continental Congress sign the Declaration inside Independence Hall, Philadelphia.",
595
+ "narrative": (
596
+ "Sunlight streams through tall sash windows onto polished wood floors as delegates in powdered wigs lean over parchment. "
597
+ "Quill pens scratch, wax seals glisten, and brass bellows stir a warm July breeze through the Assembly Room."
598
+ ),
599
+ "themes": ["political", "founding documents", "revolution"],
600
+ "actors": ["Thomas Jefferson", "John Hancock", "Continental Congress delegates"],
601
+ "artifacts": ["Quill pens", "Parchment scrolls", "Wax seals", "Mahogany desks"],
602
+ "visual_motifs": ["golden afternoon light", "colonial interior", "powder wigs", "rich green drapery"],
603
+ "facets": {"era": "late 18th century", "region": "north_america", "type": "political charter"},
604
+ "sources": [{"label": "US National Archives", "url": "https://www.archives.gov/founding-docs/declaration"}],
605
+ "time_range": {"start": "1776-07-04T10:00:00", "end": "1776-07-04T15:00:00"},
606
+ "geo_anchor": {"lat": 39.9489, "lon": -75.1500, "radius_km": 1},
607
+ "confidence": 0.9,
608
+ "relationships": {"causes": ["Continental Congress debates"], "consequences": ["American Revolutionary War escalation"]},
609
+ },
610
+ {
611
+ "name": "Battle of Waterloo",
612
+ "slug": "battle_of_waterloo",
613
+ "year": 1815,
614
+ "start_year": 1815,
615
+ "end_year": 1815,
616
+ "month": 6,
617
+ "day": 18,
618
+ "lat": 50.6794,
619
+ "lon": 4.4125,
620
+ "summary": "Coalition forces defeat Napoleon Bonaparte near Waterloo, ending the Hundred Days campaign.",
621
+ "narrative": (
622
+ "Under rain-darkened skies, British squares brace against French cavalry charges across muddy Belgian fields. "
623
+ "Cannon smoke drifts low, cuirassiers clash with bayonet lines, and signal flags ripple above the La Haye Sainte farmhouse."
624
+ ),
625
+ "themes": ["military", "napoleonic wars"],
626
+ "actors": ["British infantry", "Dutch-Belgian troops", "French Imperial Guard", "Prussian reinforcements"],
627
+ "artifacts": ["Cuirass armor", "Sabers", "Field cannon", "Signal flags"],
628
+ "visual_motifs": ["storm clouds", "muddy terrain", "cavalry charge", "gunpowder smoke"],
629
+ "facets": {"era": "early 19th century", "region": "western_europe", "type": "decisive battle"},
630
+ "sources": [{"label": "Waterloo Battlefield", "url": "https://www.waterloo1815.be"}],
631
+ "time_range": {"start": "1815-06-18T11:30:00", "end": "1815-06-18T20:30:00"},
632
+ "geo_anchor": {"lat": 50.6794, "lon": 4.4125, "radius_km": 8},
633
+ "confidence": 0.88,
634
+ "relationships": {"consequences": ["Exile of Napoleon to Saint Helena"]},
635
+ },
636
+ {
637
+ "name": "Hiroshima Atomic Bombing",
638
+ "slug": "hiroshima_atomic_bombing",
639
+ "year": 1945,
640
+ "start_year": 1945,
641
+ "end_year": 1945,
642
+ "month": 8,
643
+ "day": 6,
644
+ "lat": 34.3853,
645
+ "lon": 132.4553,
646
+ "summary": "The United States detonates an atomic bomb over Hiroshima, Japan, causing widespread destruction.",
647
+ "narrative": (
648
+ "Moments after the blinding flash, a mushroom cloud towers above shattered city blocks. "
649
+ "Wooden houses ignite, survivors stagger through debris-clogged streets, and the iconic Genbaku Dome stands amid the devastation."
650
+ ),
651
+ "themes": ["military", "WWII", "nuclear warfare"],
652
+ "actors": ["Civilians", "First responders", "US bomber crew (distant)"],
653
+ "artifacts": ["Genbaku Dome", "Debris-laden streets", "Shattered windows", "Charred telegraph poles"],
654
+ "visual_motifs": ["mushroom cloud", "ashen fallout", "burning skyline", "silhouetted survivors"],
655
+ "facets": {"era": "mid 20th century", "region": "east_asia", "type": "aerial bombardment"},
656
+ "sources": [{"label": "Hiroshima Peace Memorial Museum", "url": "https://hpmmuseum.jp/?lang=en"}],
657
+ "time_range": {"start": "1945-08-06T08:15:00", "end": "1945-08-06T12:00:00"},
658
+ "geo_anchor": {"lat": 34.3853, "lon": 132.4553, "radius_km": 15},
659
+ "confidence": 0.87,
660
+ "relationships": {"consequences": ["Surrender of Japan 1945"]},
661
+ },
662
+ {
663
+ "name": "Tiananmen Square Protests",
664
+ "slug": "tiananmen_square_protests",
665
+ "year": 1989,
666
+ "start_year": 1989,
667
+ "end_year": 1989,
668
+ "month": 6,
669
+ "day": 4,
670
+ "lat": 39.9042,
671
+ "lon": 116.4074,
672
+ "summary": "Chinese citizens hold pro-democracy demonstrations in Beijing's Tiananmen Square before military suppression.",
673
+ "narrative": (
674
+ "In early dawn haze, students link arms facing a line of armored vehicles. "
675
+ "The Goddess of Democracy statue rises above banners, bicycle couriers weave through tents, and the Gate of Heavenly Peace looms in the background."
676
+ ),
677
+ "themes": ["political", "protest", "democracy"],
678
+ "actors": ["Student demonstrators", "People's Liberation Army soldiers", "Beijing residents"],
679
+ "artifacts": ["Goddess of Democracy statue", "Banners and loudspeakers", "Tents", "Armored personnel carriers"],
680
+ "visual_motifs": ["morning haze", "stone square", "red flags", "human chain"],
681
+ "facets": {"era": "late 20th century", "region": "east_asia", "type": "protest movement"},
682
+ "sources": [{"label": "BBC Timeline", "url": "https://www.bbc.com/news/world-asia-china-12661772"}],
683
+ "time_range": {"start": "1989-06-03T22:00:00", "end": "1989-06-04T07:00:00"},
684
+ "geo_anchor": {"lat": 39.9042, "lon": 116.4074, "radius_km": 6},
685
+ "confidence": 0.88,
686
+ "relationships": {"parallel": ["1989 global protest movements"]},
687
+ },
688
+ {
689
+ "name": "Apollo 11 Moon Launch",
690
+ "slug": "apollo_11_moon_launch",
691
+ "year": 1969,
692
+ "start_year": 1969,
693
+ "end_year": 1969,
694
+ "month": 7,
695
+ "day": 16,
696
+ "lat": 28.5729,
697
+ "lon": -80.6490,
698
+ "summary": "NASA launches Apollo 11 from Kennedy Space Center, beginning the first crewed mission to land on the Moon.",
699
+ "narrative": (
700
+ "Spectators line the Causeway as the Saturn V rockets skyward, engines roaring and painting the morning sky orange. "
701
+ "Camera crews pan across mission control staff, astronauts in white suits wave before boarding, and the vehicle assembly building looms nearby."
702
+ ),
703
+ "themes": ["space exploration", "science", "Cold War"],
704
+ "actors": ["Neil Armstrong", "Buzz Aldrin", "Michael Collins", "Mission control engineers"],
705
+ "artifacts": ["Saturn V rocket", "Launch gantry", "Mission patches", "Telemetry consoles"],
706
+ "visual_motifs": ["plume of fire", "sunrise glow", "American flags", "NASA vehicles"],
707
+ "facets": {"era": "late 20th century", "region": "north_america", "type": "space mission"},
708
+ "sources": [{"label": "NASA History", "url": "https://www.nasa.gov/specials/apollo50th/"}],
709
+ "time_range": {"start": "1969-07-16T09:32:00", "end": "1969-07-16T10:00:00"},
710
+ "geo_anchor": {"lat": 28.5729, "lon": -80.6490, "radius_km": 10},
711
+ "confidence": 0.89,
712
+ "relationships": {"consequences": ["Apollo 11 moon landing"]},
713
+ },
714
+ {
715
+ "name": "Wright Brothers First Flight",
716
+ "slug": "wright_brothers_first_flight",
717
+ "year": 1903,
718
+ "start_year": 1903,
719
+ "end_year": 1903,
720
+ "month": 12,
721
+ "day": 17,
722
+ "lat": 36.0177,
723
+ "lon": -75.6694,
724
+ "summary": "Orville and Wilbur Wright achieve the first powered, sustained flight at Kitty Hawk, North Carolina.",
725
+ "narrative": (
726
+ "On windswept dunes, Orville lies prone on the Flyer as Wilbur steadies a wingtip. "
727
+ "A small crowd of lifesavers braces the launch rail, camera ready, as the biplane lifts into the cold December air for twelve seconds."
728
+ ),
729
+ "themes": ["aviation", "innovation"],
730
+ "actors": ["Orville Wright", "Wilbur Wright", "Kill Devil Hills lifesavers"],
731
+ "artifacts": ["Wright Flyer", "Launch rail", "Oil-stained overalls", "Box camera"],
732
+ "visual_motifs": ["wind-scoured dunes", "frosty breath", "canvas wings", "wooden spars"],
733
+ "facets": {"era": "early 20th century", "region": "north_america", "type": "technological milestone"},
734
+ "sources": [{"label": "Smithsonian Air & Space", "url": "https://airandspace.si.edu"}],
735
+ "time_range": {"start": "1903-12-17T10:35:00", "end": "1903-12-17T10:47:00"},
736
+ "geo_anchor": {"lat": 36.0177, "lon": -75.6694, "radius_km": 3},
737
+ "confidence": 0.86,
738
+ "relationships": {"consequences": ["Development of powered flight"]},
739
+ },
740
+ {
741
+ "name": "Grito de Dolores",
742
+ "slug": "grito_de_dolores",
743
+ "year": 1810,
744
+ "start_year": 1810,
745
+ "end_year": 1810,
746
+ "month": 9,
747
+ "day": 16,
748
+ "lat": 21.1561,
749
+ "lon": -100.9326,
750
+ "summary": "Father Miguel Hidalgo y Costilla calls for Mexican independence with the famous Grito de Dolores.",
751
+ "narrative": (
752
+ "Before dawn, church bells ring out as Father Hidalgo addresses villagers in the plaza, torchlight illuminating insurgent banners. "
753
+ "Peasants clutch farming tools turned weapons while women distribute ammunition from woven baskets."
754
+ ),
755
+ "themes": ["revolution", "latin america"],
756
+ "actors": ["Father Miguel Hidalgo", "Town villagers", "Criollo supporters"],
757
+ "artifacts": ["Church bell rope", "Guadalupe banner", "Torches", "Improvised spears"],
758
+ "visual_motifs": ["torchlit plaza", "colonial church facade", "Mexican flag colors", "dawn sky"],
759
+ "facets": {"era": "early 19th century", "region": "central_america", "type": "independence movement"},
760
+ "sources": [{"label": "Mexican History", "url": "https://www.gob.mx"}],
761
+ "time_range": {"start": "1810-09-16T05:00:00", "end": "1810-09-16T07:00:00"},
762
+ "geo_anchor": {"lat": 21.1561, "lon": -100.9326, "radius_km": 5},
763
+ "confidence": 0.82,
764
+ "relationships": {"consequences": ["Mexican War of Independence"]},
765
+ },
766
+ {
767
+ "name": "Storming of the Bastille",
768
+ "slug": "storming_of_the_bastille",
769
+ "year": 1789,
770
+ "start_year": 1789,
771
+ "end_year": 1789,
772
+ "month": 7,
773
+ "day": 14,
774
+ "lat": 48.8530,
775
+ "lon": 2.3692,
776
+ "summary": "Parisian revolutionaries seize the Bastille fortress, igniting the French Revolution.",
777
+ "narrative": (
778
+ "Parisians wielding pikes and muskets swarm the Bastille's stone courtyard as smoke billows from cannon fire. "
779
+ "National Guardsmen drag royal cannons into position while prisoners emerge to cheering crowds waving tricolor cockades."
780
+ ),
781
+ "themes": ["revolution", "political upheaval"],
782
+ "actors": ["Parisian crowds", "National Guardsmen", "Royal soldiers"],
783
+ "artifacts": ["Tricolor cockades", "Iron portcullis", "Cannons", "Stone battlements"],
784
+ "visual_motifs": ["smoke-filled courtyard", "stormy summer sky", "stone fortress", "crowd surge"],
785
+ "facets": {"era": "late 18th century", "region": "western_europe", "type": "revolutionary uprising"},
786
+ "sources": [{"label": "French Archives", "url": "https://www.archives-nationales.culture.gouv.fr"}],
787
+ "time_range": {"start": "1789-07-14T09:00:00", "end": "1789-07-14T17:00:00"},
788
+ "geo_anchor": {"lat": 48.8530, "lon": 2.3692, "radius_km": 3},
789
+ "confidence": 0.84,
790
+ "relationships": {"consequences": ["Declaration of the Rights of Man"]},
791
+ },
792
+ {
793
+ "name": "Assassination of Julius Caesar",
794
+ "slug": "assassination_of_julius_caesar",
795
+ "year": -44,
796
+ "start_year": -44,
797
+ "end_year": -44,
798
+ "month": 3,
799
+ "day": 15,
800
+ "lat": 41.8933,
801
+ "lon": 12.4729,
802
+ "summary": "Julius Caesar is stabbed by Roman senators inside the Theatre of Pompey during the Ides of March.",
803
+ "narrative": (
804
+ "Late morning sunlight filters through the marble portico as Caesar takes his seat. "
805
+ "Senators in scarlet-trimmed togas encircle him; daggers flash, and the dictator staggers toward the statue of Pompey "
806
+ "beneath frescoed arches and hanging laurel wreaths."
807
+ ),
808
+ "themes": ["political", "assassination", "ancient rome"],
809
+ "actors": ["Julius Caesar", "Marcus Junius Brutus", "Gaius Cassius Longinus", "Roman senators"],
810
+ "artifacts": ["Marble curule chair", "Bronze daggers", "Laurel wreaths", "Blood-stained togas"],
811
+ "visual_motifs": ["marble columns", "sunbeam through smoke", "collapsing laurel crown"],
812
+ "facets": {"era": "classical antiquity", "region": "western_europe", "type": "political assassination"},
813
+ "sources": [{"label": "Ancient Rome", "url": "https://en.wikipedia.org/wiki/Assassination_of_Julius_Caesar"}],
814
+ "time_range": {"start": "-0044-03-15T11:00:00", "end": "-0044-03-15T12:00:00"},
815
+ "geo_anchor": {"lat": 41.8933, "lon": 12.4729, "radius_km": 2},
816
+ "confidence": 0.9,
817
+ "relationships": {"consequences": ["Liberators' civil war"]},
818
+ },
819
+ ]
820
+
821
+
822
+ CURATED_ARTIFACTS: List[dict] = [
823
+ {"title": "Graffiti fragment of the Berlin Wall", "culture": "German", "period": "Cold War", "era": (1961, 1990)},
824
+ {"title": "Allied M1 Helmet", "culture": "American", "period": "World War II", "era": (1941, 1945)},
825
+ {"title": "Continental Congress inkwell", "culture": "American", "period": "Revolutionary", "era": (1765, 1783)},
826
+ {"title": "French cuirassier armor", "culture": "French", "period": "Napoleonic", "era": (1800, 1815)},
827
+ {"title": "Goddess of Democracy maquette", "culture": "Chinese", "period": "Late 20th century", "era": (1980, 1990)},
828
+ {"title": "Saturn V mission patch", "culture": "American", "period": "Space Age", "era": (1960, 1975)},
829
+ {"title": "Wright Flyer blueprint", "culture": "American", "period": "Early Aviation", "era": (1899, 1905)},
830
+ {"title": "Bastille prison key", "culture": "French", "period": "Revolutionary", "era": (1789, 1799)},
831
+ ]
832
+
833
+ ERA_VISUAL_VOCABULARY: Dict[Tuple[int, int], dict] = {
834
+ (-5000, 1700): {
835
+ "architecture": "stone structures, timber framing, open marketplaces",
836
+ "clothing": "homespun fabrics, cloaks, leather sandals",
837
+ "technology": "handcrafted tools, smoke from hearth fires, animal-drawn transport",
838
+ "transport": "horses, carts, foot traffic",
839
+ "mood": "earthy textures, smoke and torchlight",
840
+ },
841
+ (1700, 1850): {
842
+ "architecture": "Georgian and neoclassical facades, stone avenues, colonial interiors",
843
+ "clothing": "powdered wigs, waistcoats, breeches, corseted gowns",
844
+ "technology": "printing presses, quill ink, carronade cannons",
845
+ "transport": "horse-drawn carriages, sailing ships, infantry columns",
846
+ "mood": "oil-painted lighting, warm candle glow and shadow",
847
+ },
848
+ (1850, 1918): {
849
+ "architecture": "industrial brick mills, iron train stations, Victorian terraces",
850
+ "clothing": "bowler hats, uniforms with brass buttons, layered dresses",
851
+ "technology": "steam locomotives, telegraph poles, gas lanterns",
852
+ "transport": "steam trains, horse omnibuses, early bicycles",
853
+ "mood": "coal smoke haze, sepia-toned atmosphere",
854
+ },
855
+ (1918, 1950): {
856
+ "architecture": "art deco facades, reinforced bunkers, concrete civic plazas",
857
+ "clothing": "military uniforms, flapper dresses, utilitarian workwear",
858
+ "technology": "radio towers, field telephones, propeller aircraft",
859
+ "transport": "steel warships, troop trucks, streetcars",
860
+ "mood": "black-and-white newsreel grit, halation from searchlights",
861
+ },
862
+ (1950, 1990): {
863
+ "architecture": "mid-century modern lines, brutalist government blocks, neon signage",
864
+ "clothing": "denim jackets, tailored suits, Cold War uniforms",
865
+ "technology": "cathode-ray cameras, satellite dishes, analog broadcast vans",
866
+ "transport": "boxy sedans, subway trains, patrol jeeps",
867
+ "mood": "sodium-vapor glow, vivid chromatic contrasts",
868
+ },
869
+ (1990, 2030): {
870
+ "architecture": "glass high-rises, LED billboards, postmodern cultural centers",
871
+ "clothing": "synthetic fabrics, streetwear, modern uniforms",
872
+ "technology": "smart devices, digital screens, drones",
873
+ "transport": "light rail, electric cars, bicycles with LED lights",
874
+ "mood": "clean highlights, cinematic depth of field, vibrant color grading",
875
+ },
876
+ }
877
+
878
+ REGIONAL_CONTEXT: Dict[str, dict] = {
879
+ "western_europe": {
880
+ "architecture": "historic stone plazas, cathedrals, tram-lined boulevards",
881
+ "climate": "temperate weather with layered clouds and soft rain",
882
+ },
883
+ "eastern_europe": {
884
+ "architecture": "Soviet-era apartment blocks, neoclassical government buildings",
885
+ "climate": "continental climate with sharp seasonal contrast",
886
+ },
887
+ "north_america": {
888
+ "architecture": "brick row houses, colonial meeting halls, steel skyscrapers",
889
+ "climate": "varied weather, from humid summers to snowy winters",
890
+ },
891
+ "east_asia": {
892
+ "architecture": "pagoda rooftops, dense urban districts, neon signage",
893
+ "climate": "humid subtropical seasons with monsoon rains",
894
+ },
895
+ "central_america": {
896
+ "architecture": "stucco plazas, colonial churches, cobblestone streets",
897
+ "climate": "warm highland mornings with misty horizons",
898
+ },
899
+ "western_asia": {
900
+ "architecture": "stone citadels, market arcades, desert courtyards",
901
+ "climate": "arid sunlight, dust carried on dry winds",
902
+ },
903
+ }
904
+
905
+
906
+ def get_era_vocabulary(year: int) -> dict:
907
+ for (start, end), vocab in ERA_VISUAL_VOCABULARY.items():
908
+ if start <= year < end:
909
+ return vocab
910
+ # Default to modern vocabulary
911
+ return ERA_VISUAL_VOCABULARY[(1950, 1990)]
912
+
913
+
914
+ def get_region_context(region_key: Optional[str]) -> dict:
915
+ if not region_key:
916
+ return {}
917
+ return REGIONAL_CONTEXT.get(region_key.lower(), {})
918
+
919
+
920
+ def format_event_digest(event: dict) -> dict:
921
+ return {
922
+ "name": event.get("name"),
923
+ "slug": event.get("slug"),
924
+ "year": event.get("year"),
925
+ "start_year": event.get("start_year"),
926
+ "end_year": event.get("end_year"),
927
+ "month": event.get("month"),
928
+ "day": event.get("day"),
929
+ "lat": event.get("lat"),
930
+ "lon": event.get("lon"),
931
+ "summary": event.get("summary"),
932
+ "themes": ensure_iterable(event.get("themes")),
933
+ "facets": event.get("facets", {}),
934
+ "distance_km": event.get("distance_km"),
935
+ "year_delta": event.get("year_delta"),
936
+ "match_confidence": event.get("match_confidence"),
937
+ "sources": ensure_iterable(event.get("sources")),
938
+ }
939
+
940
+
941
+ def build_event_context(event: dict) -> dict:
942
+ return {
943
+ "event": format_event_digest(event),
944
+ "narrative": event.get("narrative"),
945
+ "actors": ensure_iterable(event.get("actors")),
946
+ "artifacts": ensure_iterable(event.get("artifacts")),
947
+ "visual_motifs": ensure_iterable(event.get("visual_motifs")),
948
+ "relationships": event.get("relationships", {}),
949
+ "time_range": event.get("time_range"),
950
+ "geo_anchor": event.get("geo_anchor"),
951
+ "confidence": event.get("match_confidence", event.get("confidence")),
952
+ }
953
+
954
+
955
+ def get_events_response(
956
+ lat: float,
957
+ lon: float,
958
+ year: int,
959
+ radius_km: float = 250.0,
960
+ limit: int = 5,
961
+ ) -> dict:
962
+ matches = get_events_by_coordinates(lat, lon, year, radius_km=radius_km, limit=limit)
963
+ return {
964
+ "query": {"lat": lat, "lon": lon, "year": year, "radius_km": radius_km, "limit": limit},
965
+ "count": len(matches),
966
+ "events": [format_event_digest(event) for event in matches],
967
+ }
968
+
969
+
services/prompt_parser.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+
7
+
8
+ MONTHS = {
9
+ "january": 1,
10
+ "february": 2,
11
+ "march": 3,
12
+ "april": 4,
13
+ "may": 5,
14
+ "june": 6,
15
+ "july": 7,
16
+ "august": 8,
17
+ "september": 9,
18
+ "sept": 9,
19
+ "october": 10,
20
+ "november": 11,
21
+ "december": 12,
22
+ }
23
+
24
+
25
+ HEMISPHERE_MULTIPLIERS = {
26
+ "n": 1,
27
+ "s": -1,
28
+ "e": 1,
29
+ "w": -1,
30
+ }
31
+
32
+
33
+ COORD_PATTERN = re.compile(
34
+ r"([-+]?\d+(?:\.\d+)?)\s*(?:°|deg|degrees)?\s*([NnSsEeWw])"
35
+ )
36
+
37
+ LAT_LON_WORD_PATTERN = re.compile(
38
+ r"(?:latitude|lat)\s*[:=]?\s*([-+]?\d+(?:\.\d+)?)|(?:longitude|lon)\s*[:=]?\s*([-+]?\d+(?:\.\d+)?)",
39
+ re.IGNORECASE,
40
+ )
41
+
42
+ DATE_PATTERN = re.compile(
43
+ r"\b("
44
+ + "|".join(MONTHS.keys())
45
+ + r")\s+(\d{1,2})(?:st|nd|rd|th)?(?:,\s*|\s+)(-?\d{1,4})(?:\s*(BCE|BC|CE|AD))?",
46
+ re.IGNORECASE,
47
+ )
48
+
49
+ YEAR_ONLY_PATTERN = re.compile(r"\b(-?\d{1,4})\s*(BCE|BC|CE|AD)?\b", re.IGNORECASE)
50
+
51
+ HOUR_PATTERN = re.compile(r"\b(\d{1,2})(?::(\d{2}))?\s*(?:hours?|h)\b", re.IGNORECASE)
52
+
53
+ SEASONAL_HOUR_PATTERN = re.compile(
54
+ r"\b(?:at|around)\s*(\d{1,2})(?::(\d{2}))?\s*(?:am|pm)\b", re.IGNORECASE
55
+ )
56
+
57
+
58
+ @dataclass
59
+ class ParsedPrompt:
60
+ lat: Optional[float] = None
61
+ lon: Optional[float] = None
62
+ year: Optional[int] = None
63
+ month: Optional[int] = None
64
+ day: Optional[int] = None
65
+ hour: Optional[int] = None
66
+ minute: Optional[int] = None
67
+ confidence: float = 0.0
68
+ residual_text: str = ""
69
+
70
+
71
+ def _apply_hemisphere(value: float, hemisphere: str) -> float:
72
+ multiplier = HEMISPHERE_MULTIPLIERS.get(hemisphere.lower(), 1)
73
+ return value * multiplier
74
+
75
+
76
+ def _parse_coordinates(text: str) -> tuple[Optional[float], Optional[float], float]:
77
+ lat = lon = None
78
+ confidence = 0.0
79
+
80
+ matches = COORD_PATTERN.findall(text)
81
+ lat_candidate = lon_candidate = None
82
+ for value_str, hemisphere in matches:
83
+ value = float(value_str)
84
+ hemi = hemisphere.lower()
85
+ adjusted = _apply_hemisphere(value, hemi)
86
+ if hemi in ("n", "s") and lat_candidate is None:
87
+ lat_candidate = adjusted
88
+ elif hemi in ("e", "w") and lon_candidate is None:
89
+ lon_candidate = adjusted
90
+
91
+ if lat_candidate is not None and lon_candidate is not None:
92
+ lat, lon = lat_candidate, lon_candidate
93
+ confidence += 0.5
94
+
95
+ if lat is None or lon is None:
96
+ word_matches = LAT_LON_WORD_PATTERN.findall(text)
97
+ lat_words = [float(item[0]) for item in word_matches if item[0]]
98
+ lon_words = [float(item[1]) for item in word_matches if item[1]]
99
+ if lat is None and lat_words:
100
+ lat = lat_words[0]
101
+ confidence += 0.2
102
+ if lon is None and lon_words:
103
+ lon = lon_words[0]
104
+ confidence += 0.2
105
+
106
+ return lat, lon, min(confidence, 0.6)
107
+
108
+
109
+ def _convert_year(raw_year: str, era: Optional[str]) -> int:
110
+ year = int(raw_year)
111
+ if era:
112
+ era = era.upper()
113
+ if era in ("BCE", "BC"):
114
+ return -abs(year)
115
+ return year
116
+
117
+
118
+ def _parse_date(text: str) -> tuple[Optional[int], Optional[int], Optional[int], float]:
119
+ match = DATE_PATTERN.search(text)
120
+ if match:
121
+ month_name, day_str, year_str, era = match.groups()
122
+ month = MONTHS.get(month_name.lower())
123
+ day = int(day_str)
124
+ year = _convert_year(year_str, era)
125
+ return year, month, day, 0.4
126
+
127
+ # Fallback: year-only pattern
128
+ for candidate in YEAR_ONLY_PATTERN.finditer(text):
129
+ year_str, era = candidate.groups()
130
+ year = _convert_year(year_str, era)
131
+ if -5000 <= year <= 3000: # plausible historical range
132
+ return year, None, None, 0.2
133
+ return None, None, None, 0.0
134
+
135
+
136
+ def _parse_hour(text: str) -> tuple[Optional[int], Optional[int], float]:
137
+ match = HOUR_PATTERN.search(text)
138
+ if match:
139
+ hour = int(match.group(1))
140
+ minute = int(match.group(2)) if match.group(2) else 0
141
+ return hour, minute, 0.2
142
+
143
+ match = SEASONAL_HOUR_PATTERN.search(text)
144
+ if match:
145
+ hour = int(match.group(1))
146
+ minute = int(match.group(2)) if match.group(2) else 0
147
+ suffix = match.group(0).lower()
148
+ if "pm" in suffix and hour < 12:
149
+ hour += 12
150
+ if "am" in suffix and hour == 12:
151
+ hour = 0
152
+ return hour, minute, 0.15
153
+
154
+ return None, None, 0.0
155
+
156
+
157
+ def parse_prompt_context(prompt: Optional[str]) -> ParsedPrompt:
158
+ if not prompt:
159
+ return ParsedPrompt(residual_text="")
160
+
161
+ lat, lon, coord_conf = _parse_coordinates(prompt)
162
+ year, month, day, date_conf = _parse_date(prompt)
163
+ hour, minute, hour_conf = _parse_hour(prompt)
164
+
165
+ total_conf = coord_conf + date_conf + hour_conf
166
+ return ParsedPrompt(
167
+ lat=lat,
168
+ lon=lon,
169
+ year=year,
170
+ month=month,
171
+ day=day,
172
+ hour=hour,
173
+ minute=minute,
174
+ confidence=min(total_conf, 1.0),
175
+ residual_text=prompt,
176
+ )
177
+
178
+
179
+
services/wikidata_service.py ADDED
@@ -0,0 +1,706 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Wikidata service for finding historical events.
3
+
4
+ Queries Wikidata for events at specific coordinates and times.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+ import json
11
+ import math
12
+ import os
13
+ import ssl
14
+ import time
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from typing import Any, Dict, List, Optional, Tuple
19
+ import urllib.request
20
+ import urllib.parse
21
+ import urllib.error
22
+
23
+ # Note: We use urllib with SSL fallback to avoid permission issues with requests/certifi
24
+ USE_REQUESTS = False
25
+
26
+ # Cache configuration
27
+ ROOT_DIR = Path(__file__).resolve().parent.parent
28
+ CACHE_DIR = ROOT_DIR / "data" / "wikidata_cache"
29
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
30
+ CACHE_TTL_SECONDS = 86400 * 7 # 7 days
31
+
32
+ WIKIDATA_SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
33
+
34
+ # Rate limiting
35
+ _last_request_time: float = 0.0
36
+ MIN_REQUEST_INTERVAL = 1.5 # seconds between requests
37
+
38
+
39
+ @dataclass
40
+ class WikidataEvent:
41
+ """Represents a historical event from Wikidata."""
42
+ qid: str
43
+ name: str
44
+ description: str = ""
45
+ year: Optional[int] = None
46
+ month: Optional[int] = None
47
+ day: Optional[int] = None
48
+ lat: Optional[float] = None
49
+ lon: Optional[float] = None
50
+ participants: List[str] = field(default_factory=list)
51
+ location_name: str = ""
52
+ event_type: str = ""
53
+ wikipedia_url: str = ""
54
+ image_url: str = ""
55
+ distance_km: float = 0.0
56
+ year_delta: int = 0
57
+ confidence: float = 0.0
58
+ source: str = "wikidata"
59
+
60
+ def to_dict(self) -> Dict[str, Any]:
61
+ return {
62
+ "qid": self.qid,
63
+ "name": self.name,
64
+ "description": self.description,
65
+ "year": self.year,
66
+ "month": self.month,
67
+ "day": self.day,
68
+ "lat": self.lat,
69
+ "lon": self.lon,
70
+ "participants": self.participants,
71
+ "location_name": self.location_name,
72
+ "event_type": self.event_type,
73
+ "wikipedia_url": self.wikipedia_url,
74
+ "image_url": self.image_url,
75
+ "distance_km": self.distance_km,
76
+ "year_delta": self.year_delta,
77
+ "confidence": self.confidence,
78
+ "source": self.source,
79
+ # Compatibility fields for existing code
80
+ "summary": self.description,
81
+ "narrative": self.description,
82
+ "actors": self.participants,
83
+ "themes": [self.event_type] if self.event_type else [],
84
+ "artifacts": [],
85
+ "visual_motifs": [],
86
+ "facets": {"type": self.event_type},
87
+ "sources": [{"label": "Wikidata", "url": f"https://www.wikidata.org/wiki/{self.qid}"}],
88
+ "match_confidence": self.confidence,
89
+ }
90
+
91
+
92
+ def _cache_key(lat: float, lon: float, year: int, radius_km: float) -> str:
93
+ """Generate a cache key for the query parameters."""
94
+ raw = f"{lat:.2f}_{lon:.2f}_{year}_{radius_km:.0f}"
95
+ return hashlib.md5(raw.encode()).hexdigest()
96
+
97
+
98
+ def _get_cached(cache_key: str) -> Optional[List[Dict]]:
99
+ """Retrieve cached results if they exist and aren't expired."""
100
+ cache_file = CACHE_DIR / f"{cache_key}.json"
101
+ if not cache_file.exists():
102
+ return None
103
+
104
+ try:
105
+ with open(cache_file, "r", encoding="utf-8") as f:
106
+ data = json.load(f)
107
+
108
+ cached_time = data.get("timestamp", 0)
109
+ if time.time() - cached_time > CACHE_TTL_SECONDS:
110
+ cache_file.unlink(missing_ok=True)
111
+ return None
112
+
113
+ return data.get("events", [])
114
+ except (json.JSONDecodeError, OSError):
115
+ return None
116
+
117
+
118
+ def _save_cache(cache_key: str, events: List[Dict]) -> None:
119
+ """Save results to cache."""
120
+ cache_file = CACHE_DIR / f"{cache_key}.json"
121
+ try:
122
+ with open(cache_file, "w", encoding="utf-8") as f:
123
+ json.dump({"timestamp": time.time(), "events": events}, f, ensure_ascii=False)
124
+ except OSError:
125
+ pass
126
+
127
+
128
+ def _rate_limit() -> None:
129
+ """Ensure we don't exceed Wikidata rate limits."""
130
+ global _last_request_time
131
+ elapsed = time.time() - _last_request_time
132
+ if elapsed < MIN_REQUEST_INTERVAL:
133
+ time.sleep(MIN_REQUEST_INTERVAL - elapsed)
134
+ _last_request_time = time.time()
135
+
136
+
137
+ def _haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
138
+ """Calculate distance in km between two points."""
139
+ R = 6371.0
140
+ phi1, phi2 = math.radians(lat1), math.radians(lat2)
141
+ dphi = math.radians(lat2 - lat1)
142
+ dlam = math.radians(lon2 - lon1)
143
+ a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2
144
+ return R * 2 * math.atan2(math.sqrt(a), math.sqrt(max(0, 1 - a)))
145
+
146
+
147
+ def _build_sparql_query(lat: float, lon: float, year: int, radius_km: float, limit: int = 20) -> str:
148
+ """
149
+ Build SPARQL query for historical events near coordinates and year.
150
+
151
+ This query searches for:
152
+ - Events (Q1656682) that occurred at a location
153
+ - Battles, treaties, revolutions, etc.
154
+ - Events with point in time or start time within the year range
155
+ """
156
+ # Wikidata uses negative years for BCE
157
+ year_start = year - 15
158
+ year_end = year + 15
159
+
160
+ # Convert radius to degrees (rough approximation)
161
+ degree_radius = radius_km / 111.0
162
+
163
+ query = f"""
164
+ SELECT DISTINCT ?event ?eventLabel ?eventDescription ?date ?coord ?locationLabel ?participantLabel ?typeLabel ?article ?image
165
+ WHERE {{
166
+ # Find events with coordinates
167
+ ?event wdt:P31/wdt:P279* wd:Q1656682 . # instance of event or subclass
168
+
169
+ # Get coordinates - either direct or via location
170
+ OPTIONAL {{
171
+ ?event wdt:P625 ?directCoord .
172
+ }}
173
+ OPTIONAL {{
174
+ ?event wdt:P276 ?location .
175
+ ?location wdt:P625 ?locationCoord .
176
+ }}
177
+ BIND(COALESCE(?directCoord, ?locationCoord) AS ?coord)
178
+
179
+ # Filter by coordinate bounding box
180
+ FILTER(BOUND(?coord))
181
+ BIND(geof:latitude(?coord) AS ?lat)
182
+ BIND(geof:longitude(?coord) AS ?lon)
183
+ FILTER(?lat >= {lat - degree_radius} && ?lat <= {lat + degree_radius})
184
+ FILTER(?lon >= {lon - degree_radius} && ?lon <= {lon + degree_radius})
185
+
186
+ # Get date
187
+ OPTIONAL {{ ?event wdt:P585 ?pointInTime . }}
188
+ OPTIONAL {{ ?event wdt:P580 ?startTime . }}
189
+ BIND(COALESCE(?pointInTime, ?startTime) AS ?date)
190
+
191
+ # Filter by year range
192
+ FILTER(BOUND(?date))
193
+ FILTER(YEAR(?date) >= {year_start} && YEAR(?date) <= {year_end})
194
+
195
+ # Optional: participants
196
+ OPTIONAL {{ ?event wdt:P710 ?participant . }}
197
+
198
+ # Optional: event type
199
+ OPTIONAL {{ ?event wdt:P31 ?type . }}
200
+
201
+ # Optional: Wikipedia article
202
+ OPTIONAL {{
203
+ ?article schema:about ?event ;
204
+ schema:isPartOf <https://en.wikipedia.org/> .
205
+ }}
206
+
207
+ # Optional: image
208
+ OPTIONAL {{ ?event wdt:P18 ?image . }}
209
+
210
+ # Location label
211
+ OPTIONAL {{ ?event wdt:P276 ?loc . }}
212
+
213
+ SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en,de,fr,es,it" . }}
214
+ }}
215
+ ORDER BY ABS(YEAR(?date) - {year})
216
+ LIMIT {limit}
217
+ """
218
+ return query
219
+
220
+
221
+ def _build_fallback_query(lat: float, lon: float, year: int, limit: int = 15) -> str:
222
+ """
223
+ Simpler fallback query that searches for any notable events in the year range.
224
+ Uses text search and broader event types.
225
+ """
226
+ year_start = year - 20
227
+ year_end = year + 20
228
+
229
+ query = f"""
230
+ SELECT DISTINCT ?event ?eventLabel ?eventDescription ?date ?coord ?locationLabel ?article
231
+ WHERE {{
232
+ # Broader event types
233
+ VALUES ?eventType {{
234
+ wd:Q178561 # battle
235
+ wd:Q131569 # treaty
236
+ wd:Q7278 # political revolution
237
+ wd:Q8076 # assassination
238
+ wd:Q3882219 # coronation
239
+ wd:Q1318295 # military offensive
240
+ wd:Q2001676 # massacre
241
+ wd:Q18669875 # historical event
242
+ wd:Q13418847 # historical period
243
+ wd:Q3024240 # historical event
244
+ }}
245
+ ?event wdt:P31 ?eventType .
246
+
247
+ # Date filter
248
+ OPTIONAL {{ ?event wdt:P585 ?pointInTime . }}
249
+ OPTIONAL {{ ?event wdt:P580 ?startTime . }}
250
+ BIND(COALESCE(?pointInTime, ?startTime) AS ?date)
251
+ FILTER(BOUND(?date))
252
+ FILTER(YEAR(?date) >= {year_start} && YEAR(?date) <= {year_end})
253
+
254
+ # Get coordinates via location
255
+ OPTIONAL {{
256
+ ?event wdt:P276 ?location .
257
+ ?location wdt:P625 ?coord .
258
+ }}
259
+ OPTIONAL {{
260
+ ?event wdt:P625 ?directCoord .
261
+ }}
262
+ BIND(COALESCE(?coord, ?directCoord) AS ?finalCoord)
263
+
264
+ # Wikipedia article
265
+ OPTIONAL {{
266
+ ?article schema:about ?event ;
267
+ schema:isPartOf <https://en.wikipedia.org/> .
268
+ }}
269
+
270
+ SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
271
+ }}
272
+ ORDER BY ABS(YEAR(?date) - {year})
273
+ LIMIT {limit}
274
+ """
275
+ return query
276
+
277
+
278
+ def _execute_sparql(query: str) -> Optional[Dict]:
279
+ """Execute SPARQL query against Wikidata endpoint."""
280
+ _rate_limit()
281
+
282
+ headers = {
283
+ "Accept": "application/sparql-results+json",
284
+ "User-Agent": "Meridian-Historical-App/1.0 (https://github.com/meridian; [email protected])"
285
+ }
286
+
287
+ # Try using requests library first (better SSL handling)
288
+ if USE_REQUESTS:
289
+ try:
290
+ response = requests.get(
291
+ WIKIDATA_SPARQL_ENDPOINT,
292
+ params={"query": query},
293
+ headers=headers,
294
+ timeout=30,
295
+ verify=certifi.where(),
296
+ )
297
+ response.raise_for_status()
298
+ return response.json()
299
+ except Exception as e:
300
+ print(f"[Wikidata] SPARQL query failed (requests): {e}")
301
+ # Fall through to urllib fallback
302
+
303
+ # Fallback to urllib with SSL context
304
+ params = urllib.parse.urlencode({"query": query})
305
+ url = f"{WIKIDATA_SPARQL_ENDPOINT}?{params}"
306
+
307
+ try:
308
+ # Create SSL context that doesn't verify certificates (fallback)
309
+ ssl_context = ssl.create_default_context()
310
+ ssl_context.check_hostname = False
311
+ ssl_context.verify_mode = ssl.CERT_NONE
312
+
313
+ req = urllib.request.Request(url, headers=headers)
314
+ with urllib.request.urlopen(req, timeout=30, context=ssl_context) as response:
315
+ return json.loads(response.read().decode("utf-8"))
316
+ except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError, TimeoutError) as e:
317
+ print(f"[Wikidata] SPARQL query failed (urllib): {e}")
318
+ return None
319
+
320
+
321
+ def _parse_wikidata_date(date_str: str) -> Tuple[Optional[int], Optional[int], Optional[int]]:
322
+ """Parse Wikidata date string to year, month, day."""
323
+ if not date_str:
324
+ return None, None, None
325
+
326
+ # Handle BCE dates (negative years)
327
+ # Wikidata format: -0044-03-15T00:00:00Z for 44 BCE
328
+ try:
329
+ if date_str.startswith("-"):
330
+ # BCE date
331
+ parts = date_str[1:].split("T")[0].split("-")
332
+ year = -int(parts[0])
333
+ month = int(parts[1]) if len(parts) > 1 else None
334
+ day = int(parts[2]) if len(parts) > 2 else None
335
+ return year, month, day
336
+ else:
337
+ # CE date
338
+ parts = date_str.split("T")[0].split("-")
339
+ year = int(parts[0])
340
+ month = int(parts[1]) if len(parts) > 1 else None
341
+ day = int(parts[2]) if len(parts) > 2 else None
342
+ return year, month, day
343
+ except (ValueError, IndexError):
344
+ return None, None, None
345
+
346
+
347
+ def _parse_coordinates(coord_str: str) -> Tuple[Optional[float], Optional[float]]:
348
+ """Parse Wikidata coordinate string to lat, lon."""
349
+ if not coord_str:
350
+ return None, None
351
+
352
+ # Format: Point(lon lat)
353
+ try:
354
+ if coord_str.startswith("Point("):
355
+ inner = coord_str[6:-1]
356
+ lon_str, lat_str = inner.split()
357
+ return float(lat_str), float(lon_str)
358
+ except (ValueError, IndexError):
359
+ pass
360
+
361
+ return None, None
362
+
363
+
364
+ def _results_to_events(
365
+ results: Dict,
366
+ query_lat: float,
367
+ query_lon: float,
368
+ query_year: int,
369
+ ) -> List[WikidataEvent]:
370
+ """Convert SPARQL results to WikidataEvent objects."""
371
+ events_map: Dict[str, WikidataEvent] = {}
372
+
373
+ bindings = results.get("results", {}).get("bindings", [])
374
+
375
+ for binding in bindings:
376
+ # Extract QID
377
+ event_uri = binding.get("event", {}).get("value", "")
378
+ if not event_uri:
379
+ continue
380
+ qid = event_uri.split("/")[-1]
381
+
382
+ # Get or create event
383
+ if qid not in events_map:
384
+ name = binding.get("eventLabel", {}).get("value", "Unknown Event")
385
+ description = binding.get("eventDescription", {}).get("value", "")
386
+
387
+ # Parse date
388
+ date_str = binding.get("date", {}).get("value", "")
389
+ year, month, day = _parse_wikidata_date(date_str)
390
+
391
+ # Parse coordinates
392
+ coord_str = binding.get("coord", {}).get("value", "")
393
+ lat, lon = _parse_coordinates(coord_str)
394
+
395
+ # Location name
396
+ location_name = binding.get("locationLabel", {}).get("value", "")
397
+
398
+ # Event type
399
+ event_type = binding.get("typeLabel", {}).get("value", "")
400
+
401
+ # Wikipedia URL
402
+ wikipedia_url = binding.get("article", {}).get("value", "")
403
+
404
+ # Image URL
405
+ image_url = binding.get("image", {}).get("value", "")
406
+
407
+ # Calculate distance and confidence
408
+ distance_km = 0.0
409
+ if lat is not None and lon is not None:
410
+ distance_km = _haversine(query_lat, query_lon, lat, lon)
411
+
412
+ year_delta = abs((year or query_year) - query_year)
413
+
414
+ # Confidence scoring
415
+ confidence = 0.7
416
+ if distance_km < 50:
417
+ confidence += 0.15
418
+ elif distance_km < 150:
419
+ confidence += 0.1
420
+
421
+ if year_delta == 0:
422
+ confidence += 0.15
423
+ elif year_delta <= 5:
424
+ confidence += 0.1
425
+
426
+ if wikipedia_url:
427
+ confidence += 0.05
428
+
429
+ confidence = min(confidence, 0.98)
430
+
431
+ events_map[qid] = WikidataEvent(
432
+ qid=qid,
433
+ name=name,
434
+ description=description,
435
+ year=year,
436
+ month=month,
437
+ day=day,
438
+ lat=lat,
439
+ lon=lon,
440
+ location_name=location_name,
441
+ event_type=event_type,
442
+ wikipedia_url=wikipedia_url,
443
+ image_url=image_url,
444
+ distance_km=round(distance_km, 2),
445
+ year_delta=year_delta,
446
+ confidence=round(confidence, 3),
447
+ )
448
+
449
+ # Add participant if present
450
+ participant = binding.get("participantLabel", {}).get("value", "")
451
+ if participant and participant not in events_map[qid].participants:
452
+ events_map[qid].participants.append(participant)
453
+
454
+ # Sort by relevance (lower distance + year_delta = better)
455
+ events = list(events_map.values())
456
+ events.sort(key=lambda e: e.distance_km + e.year_delta * 5 - e.confidence * 20)
457
+
458
+ return events
459
+
460
+
461
+ def search_events_by_geo_time(
462
+ lat: float,
463
+ lon: float,
464
+ year: int,
465
+ radius_km: float = 300.0,
466
+ limit: int = 10,
467
+ use_cache: bool = True,
468
+ ) -> List[Dict]:
469
+ """
470
+ Search Wikidata for historical events near coordinates and year.
471
+
472
+ Args:
473
+ lat: Latitude
474
+ lon: Longitude
475
+ year: Target year (negative for BCE)
476
+ radius_km: Search radius in kilometers
477
+ limit: Maximum number of results
478
+ use_cache: Whether to use cached results
479
+
480
+ Returns:
481
+ List of event dictionaries compatible with existing code
482
+ """
483
+ # Check cache first
484
+ cache_key = _cache_key(lat, lon, year, radius_km)
485
+ if use_cache:
486
+ cached = _get_cached(cache_key)
487
+ if cached is not None:
488
+ print(f"[Wikidata] Cache hit for {lat:.2f}, {lon:.2f}, {year}")
489
+ return cached[:limit]
490
+
491
+ print(f"[Wikidata] Querying for events near {lat:.2f}, {lon:.2f}, year {year}")
492
+
493
+ # Try primary query first
494
+ query = _build_sparql_query(lat, lon, year, radius_km, limit * 2)
495
+ results = _execute_sparql(query)
496
+
497
+ events: List[WikidataEvent] = []
498
+ if results:
499
+ events = _results_to_events(results, lat, lon, year)
500
+
501
+ # If no results, try fallback query
502
+ if not events:
503
+ print("[Wikidata] Primary query returned no results, trying fallback...")
504
+ fallback_query = _build_fallback_query(lat, lon, year, limit * 2)
505
+ fallback_results = _execute_sparql(fallback_query)
506
+ if fallback_results:
507
+ events = _results_to_events(fallback_results, lat, lon, year)
508
+
509
+ # Convert to dicts and cache
510
+ event_dicts = [e.to_dict() for e in events[:limit]]
511
+
512
+ if use_cache and event_dicts:
513
+ _save_cache(cache_key, event_dicts)
514
+
515
+ print(f"[Wikidata] Found {len(event_dicts)} events")
516
+ return event_dicts
517
+
518
+
519
+ def get_event_detail(qid: str) -> Optional[Dict]:
520
+ """
521
+ Fetch detailed information about a specific Wikidata event.
522
+
523
+ Args:
524
+ qid: Wikidata QID (e.g., "Q784")
525
+
526
+ Returns:
527
+ Event dictionary with full details, or None if not found
528
+ """
529
+ query = f"""
530
+ SELECT ?event ?eventLabel ?eventDescription ?date ?coord ?locationLabel
531
+ ?participantLabel ?typeLabel ?article ?image ?causeLabel ?effectLabel
532
+ WHERE {{
533
+ BIND(wd:{qid} AS ?event)
534
+
535
+ OPTIONAL {{ ?event wdt:P585 ?pointInTime . }}
536
+ OPTIONAL {{ ?event wdt:P580 ?startTime . }}
537
+ BIND(COALESCE(?pointInTime, ?startTime) AS ?date)
538
+
539
+ OPTIONAL {{ ?event wdt:P625 ?coord . }}
540
+ OPTIONAL {{
541
+ ?event wdt:P276 ?location .
542
+ ?location wdt:P625 ?locCoord .
543
+ }}
544
+
545
+ OPTIONAL {{ ?event wdt:P710 ?participant . }}
546
+ OPTIONAL {{ ?event wdt:P31 ?type . }}
547
+ OPTIONAL {{ ?event wdt:P828 ?cause . }}
548
+ OPTIONAL {{ ?event wdt:P1542 ?effect . }}
549
+ OPTIONAL {{ ?event wdt:P18 ?image . }}
550
+
551
+ OPTIONAL {{
552
+ ?article schema:about ?event ;
553
+ schema:isPartOf <https://en.wikipedia.org/> .
554
+ }}
555
+
556
+ SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
557
+ }}
558
+ LIMIT 50
559
+ """
560
+
561
+ results = _execute_sparql(query)
562
+ if not results:
563
+ return None
564
+
565
+ events = _results_to_events(results, 0, 0, 0)
566
+ if events:
567
+ event = events[0]
568
+ event_dict = event.to_dict()
569
+
570
+ # Extract causes and effects from results
571
+ bindings = results.get("results", {}).get("bindings", [])
572
+ causes = set()
573
+ effects = set()
574
+ for binding in bindings:
575
+ cause = binding.get("causeLabel", {}).get("value", "")
576
+ effect = binding.get("effectLabel", {}).get("value", "")
577
+ if cause:
578
+ causes.add(cause)
579
+ if effect:
580
+ effects.add(effect)
581
+
582
+ event_dict["causes"] = list(causes)
583
+ event_dict["effects"] = list(effects)
584
+ event_dict["relationships"] = {
585
+ "causes": list(causes),
586
+ "consequences": list(effects),
587
+ }
588
+
589
+ return event_dict
590
+
591
+ return None
592
+
593
+
594
+ def search_events_by_name(name: str, limit: int = 5) -> List[Dict]:
595
+ """
596
+ Search Wikidata for events by name.
597
+
598
+ Args:
599
+ name: Event name to search for
600
+ limit: Maximum results
601
+
602
+ Returns:
603
+ List of matching events
604
+ """
605
+ # Escape special characters
606
+ escaped_name = name.replace('"', '\\"')
607
+
608
+ query = f"""
609
+ SELECT DISTINCT ?event ?eventLabel ?eventDescription ?date ?coord ?locationLabel ?article
610
+ WHERE {{
611
+ ?event wdt:P31/wdt:P279* wd:Q1656682 .
612
+ ?event rdfs:label ?label .
613
+ FILTER(LANG(?label) = "en")
614
+ FILTER(CONTAINS(LCASE(?label), LCASE("{escaped_name}")))
615
+
616
+ OPTIONAL {{ ?event wdt:P585 ?pointInTime . }}
617
+ OPTIONAL {{ ?event wdt:P580 ?startTime . }}
618
+ BIND(COALESCE(?pointInTime, ?startTime) AS ?date)
619
+
620
+ OPTIONAL {{ ?event wdt:P625 ?coord . }}
621
+ OPTIONAL {{
622
+ ?event wdt:P276 ?location .
623
+ ?location wdt:P625 ?locCoord .
624
+ }}
625
+
626
+ OPTIONAL {{
627
+ ?article schema:about ?event ;
628
+ schema:isPartOf <https://en.wikipedia.org/> .
629
+ }}
630
+
631
+ SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
632
+ }}
633
+ LIMIT {limit}
634
+ """
635
+
636
+ results = _execute_sparql(query)
637
+ if not results:
638
+ return []
639
+
640
+ events = _results_to_events(results, 0, 0, 0)
641
+ return [e.to_dict() for e in events]
642
+
643
+
644
+ # MCP-compatible function exports
645
+ def mcp_search_historical_events(
646
+ latitude: float,
647
+ longitude: float,
648
+ year: int,
649
+ radius_km: float = 300.0,
650
+ limit: int = 10,
651
+ ) -> Dict:
652
+ """
653
+ MCP tool: Search for historical events by coordinates and year.
654
+
655
+ This queries Wikidata for events that occurred near the specified
656
+ location and time, returning structured event data suitable for
657
+ prompt generation.
658
+ """
659
+ try:
660
+ events = search_events_by_geo_time(
661
+ lat=latitude,
662
+ lon=longitude,
663
+ year=year,
664
+ radius_km=radius_km,
665
+ limit=limit,
666
+ )
667
+ return {
668
+ "success": True,
669
+ "query": {
670
+ "latitude": latitude,
671
+ "longitude": longitude,
672
+ "year": year,
673
+ "radius_km": radius_km,
674
+ },
675
+ "count": len(events),
676
+ "events": events,
677
+ "source": "wikidata",
678
+ }
679
+ except Exception as e:
680
+ return {
681
+ "success": False,
682
+ "error": str(e),
683
+ }
684
+
685
+
686
+ def mcp_get_event_by_qid(qid: str) -> Dict:
687
+ """
688
+ MCP tool: Get detailed information about a Wikidata event.
689
+ """
690
+ try:
691
+ event = get_event_detail(qid)
692
+ if event:
693
+ return {
694
+ "success": True,
695
+ "event": event,
696
+ }
697
+ return {
698
+ "success": False,
699
+ "error": f"Event {qid} not found",
700
+ }
701
+ except Exception as e:
702
+ return {
703
+ "success": False,
704
+ "error": str(e),
705
+ }
706
+