Bellok commited on
Commit
752474d
·
verified ·
1 Parent(s): dd9d03a

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +3 -2
  2. app.py +163 -607
  3. coverage.xml +11 -11
  4. test-results.xml +0 -0
README.md CHANGED
@@ -3,8 +3,9 @@ title: Warbler CDA FractalStat RAG
3
  emoji: 🦜
4
  colorFrom: blue
5
  colorTo: purple
6
- sdk: docker
7
- app_port: 7860
 
8
  pinned: false
9
  license: mit
10
  short_description: RAG system with 8D FractalStat and 2.6M+ documents
 
3
  emoji: 🦜
4
  colorFrom: blue
5
  colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
  pinned: false
10
  license: mit
11
  short_description: RAG system with 8D FractalStat and 2.6M+ documents
app.py CHANGED
@@ -1,639 +1,195 @@
 
1
  """
2
- Warbler CDA - HuggingFace Space Demo
3
- Interactive demo of the Cognitive Development Architecture RAG system
 
4
  """
5
 
 
6
  import json
 
7
  import time
8
- import os
9
- import threading
10
- import gradio as gr
11
- import spaces
12
- from pathlib import Path
13
- from typing import Tuple, Optional, Dict
14
-
15
- # Set TOKENIZERS_PARALLELISM to avoid warnings with SentenceTransformers
16
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
17
-
18
-
19
- # Global variables for background ingestion tracking
20
- ingestion_status = {
21
- "running": False,
22
- "total_docs": 0,
23
- "processed": 0,
24
- "failed": 0,
25
- "start_time": None,
26
- "eta": 0,
27
- "rate": 0,
28
- }
29
-
30
-
31
- def background_ingest_packs(api, pack_docs, pack_manager):
32
- """Background function to ingest packs without blocking app startup"""
33
- global ingestion_status
34
-
35
- # Suppress numpy warnings during ingestion to avoid cluttering logs in HF Spaces
36
- import warnings
37
- with warnings.catch_warnings():
38
- warnings.filterwarnings("ignore", message="invalid value encountered", category=RuntimeWarning)
39
-
40
- ingestion_status["running"] = True
41
- ingestion_status["total_docs"] = len(pack_docs)
42
- ingestion_status["processed"] = 0
43
- ingestion_status["failed"] = 0
44
- ingestion_status["start_time"] = time.time()
45
-
46
- print(f"[INFO] Ingesting {len(pack_docs)} documents from Warbler packs...")
47
- total_docs = len(pack_docs)
48
- processed = 0
49
- failed = 0
50
- start_time = time.time()
51
- batch_size = 1000
52
-
53
- # Process in batches to avoid memory issues and provide progress
54
- for batch_start in range(0, total_docs, batch_size):
55
- batch_end = min(batch_start + batch_size, total_docs)
56
- batch = pack_docs[batch_start:batch_end]
57
-
58
- batch_processed = 0
59
- batch_failed = 0
60
-
61
- for doc in batch:
62
- success = api.add_document(doc["id"], doc["content"], doc["metadata"])
63
- if not success:
64
- batch_failed += 1
65
- failed += 1
66
- if failed <= 5: # Log first few failures
67
- print(f"[WARN] Failed to add document {doc['id']}")
68
-
69
- batch_processed += 1
70
- processed += 1
71
-
72
- # Update global status
73
- ingestion_status["processed"] = processed
74
- ingestion_status["failed"] = failed
75
-
76
- # Progress update after each batch
77
- elapsed = time.time() - start_time
78
- rate = processed / elapsed if elapsed > 0 else 0
79
- eta = (total_docs - processed) / rate if rate > 0 else 0
80
- ingestion_status["rate"] = rate
81
- ingestion_status["eta"] = eta
82
 
83
- print(
84
- f"[PROGRESS] {processed}/{total_docs} documents ingested "
85
- f"({processed/total_docs*100:.1f}%) - "
86
- f"{rate:.1f} docs/sec - ETA: {eta/60:.1f} min"
87
- )
88
-
89
- # Force garbage collection after large batches to free memory
90
- if processed % 10000 == 0:
91
- import gc
92
-
93
- gc.collect()
94
-
95
- packs_loaded = processed
96
- pack_manager.mark_packs_ingested(1, packs_loaded)
97
- total_time = time.time() - start_time
98
- print(
99
- f"[OK] Loaded {packs_loaded} documents from Warbler packs "
100
- f"({failed} failed) in {total_time:.1f} seconds"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  )
102
 
103
- # Mark ingestion complete
104
- ingestion_status["running"] = False
105
-
106
-
107
- SAMPLE_DOCS = [
108
- {
109
- "id": "wisdom_1",
110
- "content": "True wisdom comes from understanding both success and failure. Each setback teaches resilience.",
111
- "metadata": {
112
- "realm_type": "wisdom",
113
- "realm_label": "philosophy",
114
- "lifecycle_stage": "peak",
115
- },
116
- },
117
- {
118
- "id": "wisdom_2",
119
- "content": "Courage is not the absence of fear, but the determination to act despite it.",
120
- "metadata": {
121
- "realm_type": "wisdom",
122
- "realm_label": "virtue",
123
- "lifecycle_stage": "emergence",
124
- },
125
- },
126
- {
127
- "id": "tech_1",
128
- "content": "The Warbler CDA system uses STAT7 addressing for multi-dimensional retrieval.",
129
- "metadata": {
130
- "realm_type": "technical",
131
- "realm_label": "documentation",
132
- "lifecycle_stage": "peak",
133
- },
134
- },
135
- {
136
- "id": "narrative_1",
137
- "content": "In the ancient library, the keeper of memories preserved stories across generations.",
138
- "metadata": {
139
- "realm_type": "narrative",
140
- "realm_label": "lore",
141
- "lifecycle_stage": "crystallization",
142
- },
143
- },
144
- {
145
- "id": "pattern_1",
146
- "content": "Patterns emerge when we observe the connections between seemingly unrelated events.",
147
- "metadata": {
148
- "realm_type": "pattern",
149
- "realm_label": "insight",
150
- "lifecycle_stage": "emergence",
151
- },
152
- },
153
- ]
154
-
155
-
156
- class PackManager:
157
- def __init__(self):
158
- self.cache_dir = Path.home() / ".warbler_cda" / "cache"
159
- self.cache_dir.mkdir(parents=True, exist_ok=True)
160
- self.metadata_file = self.cache_dir / "pack_metadata.json"
161
- self.skip_cache = os.getenv("WARBLER_SKIP_PACK_CACHE", "").lower() == "true"
162
- self.sample_only = os.getenv("WARBLER_SAMPLE_ONLY", "").lower() == "true"
163
- self.ingest_packs = os.getenv("WARBLER_INGEST_PACKS", "true").lower() == "true"
164
-
165
- def _load_metadata(self) -> Optional[Dict]:
166
- if not self.metadata_file.exists():
167
- return None
168
- try:
169
- with open(self.metadata_file, "r") as f:
170
- return json.load(f)
171
- except BaseException:
172
- return None
173
-
174
- def _save_metadata(self, metadata: Dict):
175
- try:
176
- with open(self.metadata_file, "w") as f:
177
- json.dump(metadata, f, indent=2)
178
- except Exception as e:
179
- print(f"[WARN] Failed to save pack metadata: {e}")
180
-
181
- def health_check(self, api, expected_doc_count: int = None) -> bool:
182
- if not api:
183
- return False
184
- try:
185
- current_size = api.get_context_store_size()
186
- if expected_doc_count and current_size < expected_doc_count:
187
- return False
188
- return current_size > 0
189
- except BaseException:
190
- return False
191
-
192
- def should_ingest_packs(self, api, pack_count: int) -> bool:
193
- if self.skip_cache or not self.ingest_packs or self.sample_only:
194
- return False
195
-
196
- if not self.health_check(api, expected_doc_count=10):
197
- return True
198
-
199
- metadata = self._load_metadata()
200
- if not metadata or metadata.get("pack_count") != pack_count:
201
- return True
202
-
203
- return False
204
-
205
- def mark_packs_ingested(self, pack_count: int, doc_count: int):
206
- metadata = {
207
- "ingested_at": time.time(),
208
- "pack_count": pack_count,
209
- "doc_count": doc_count,
210
- "status": "healthy",
211
- }
212
- self._save_metadata(metadata)
213
-
214
-
215
- pack_manager = PackManager()
216
 
217
 
218
- try:
219
- from warbler_cda import (
220
- RetrievalAPI,
221
- SemanticAnchorGraph,
222
- EmbeddingProviderFactory,
223
- STAT7RAGBridge,
224
- RetrievalQuery,
225
- RetrievalMode,
226
- )
227
- from warbler_cda.pack_loader import PackLoader
228
-
229
- WARBLER_AVAILABLE = True
230
- except ImportError:
231
- WARBLER_AVAILABLE = False
232
- print("Warning: Warbler CDA not installed. Using mock mode.")
233
-
234
- api = None
235
-
236
- if WARBLER_AVAILABLE:
237
- try:
238
- embedding_provider = EmbeddingProviderFactory.get_default_provider()
239
- semantic_anchors = SemanticAnchorGraph(embedding_provider=embedding_provider)
240
- stat7_bridge = STAT7RAGBridge()
241
-
242
- api = RetrievalAPI(
243
- semantic_anchors=semantic_anchors,
244
- embedding_provider=embedding_provider,
245
- stat7_bridge=stat7_bridge,
246
- config={"enable_stat7_hybrid": True},
247
- )
248
-
249
- packs_loaded = 0
250
-
251
- if pack_manager.sample_only:
252
- print("[INFO] Loading sample documents only (WARBLER_SAMPLE_ONLY=true)")
253
- for doc in SAMPLE_DOCS:
254
- api.add_document(doc["id"], doc["content"], doc["metadata"])
255
- packs_loaded = len(SAMPLE_DOCS)
256
- print(f"[OK] Loaded {packs_loaded} sample documents")
257
-
258
- elif pack_manager.ingest_packs:
259
- from warbler_cda.pack_sync import PackSync
260
-
261
- pack_sync = PackSync()
262
- sync_status = pack_sync.get_sync_status()
263
- print(f"[INFO] Pack Status: {sync_status}")
264
-
265
- pack_loader = PackLoader()
266
- pack_docs = pack_loader.discover_documents()
267
-
268
- if pack_docs and pack_manager.should_ingest_packs(api, len(pack_docs)):
269
- # Start background ingestion
270
- ingestion_thread = threading.Thread(
271
- target=background_ingest_packs, args=(api, pack_docs, pack_manager), daemon=True
272
- )
273
- ingestion_thread.start()
274
- packs_loaded = 0 # Will be updated asynchronously
275
- print(f"[INFO] Started background ingestion of {len(pack_docs)} documents")
276
-
277
- elif pack_docs:
278
- packs_loaded = len(pack_docs)
279
- print(f"[INFO] Using cached pack data ({packs_loaded} documents)")
280
-
281
- else:
282
- print("[INFO] No Warbler packs found. Using sample documents instead.")
283
- for doc in SAMPLE_DOCS:
284
- api.add_document(doc["id"], doc["content"], doc["metadata"])
285
- packs_loaded = len(SAMPLE_DOCS)
286
- print(f"[OK] Loaded {packs_loaded} sample documents")
287
-
288
- context_size = api.get_context_store_size()
289
- print(f"[OK] Total documents in context store: {context_size}")
290
-
291
- except Exception as e:
292
- print(f"[ERROR] Failed to initialize Warbler CDA: {e}")
293
- api = None
294
- import traceback
295
-
296
- traceback.print_exc()
297
-
298
-
299
- @spaces.GPU
300
- def query_warbler(
301
- query_text: str,
302
- max_results: int = 5,
303
- use_hybrid: bool = True,
304
- weight_semantic: float = 0.6,
305
- weight_stat7: float = 0.4,
306
- ) -> Tuple[str, str]:
307
- """
308
- Query the Warbler CDA system
309
-
310
- Returns:
311
- Tuple of (results_text, metrics_json)
312
- """
313
- if not WARBLER_AVAILABLE or not api:
314
- return "Warbler CDA not available. Please install the package.", "{}"
315
-
316
  if not query_text.strip():
317
- return "Please enter a query.", "{}"
318
-
319
- try:
320
- start_time = time.time()
321
-
322
- print(f"DEBUG: Context store size: {api.get_context_store_size()}")
323
-
324
- # Create query
325
- query = RetrievalQuery(
326
- query_id=f"demo_{int(time.time())}",
327
- mode=RetrievalMode.SEMANTIC_SIMILARITY,
328
- semantic_query=query_text,
329
- max_results=max_results,
330
- confidence_threshold=0.3,
331
- stat7_hybrid=use_hybrid,
332
- weight_semantic=weight_semantic,
333
- weight_stat7=weight_stat7,
334
- )
335
-
336
- print(f"DEBUG: Query created - ID: {query.query_id}, Text: {query_text}")
337
-
338
- # Execute query
339
- assembly = api.retrieve_context(query)
340
-
341
- print(
342
- f"DEBUG: Retrieved {len(assembly.results)} results, Assembly ID: {assembly.assembly_id}"
343
- )
344
-
345
- elapsed_ms = (time.time() - start_time) * 1000
346
-
347
- # Format results
348
- results_text = "# Query Results\n\n"
349
- results_text += f"**Query:** {query_text}\n\n"
350
- results_text += (
351
- f"**Mode:** {'Hybrid (Semantic + STAT7)' if use_hybrid else 'Semantic Only'}\n\n"
352
- )
353
- results_text += f"**Results Found:** {len(assembly.results)}\n\n"
354
- results_text += f"**Assembly Quality:** {assembly.assembly_quality:.3f}\n\n"
355
- results_text += f"**Execution Time:** {elapsed_ms:.1f}ms\n\n"
356
- results_text += "---\n\n"
357
-
358
- if assembly.results:
359
- for i, result in enumerate(assembly.results, 1):
360
- results_text += f"### Result {i}\n\n"
361
- results_text += f"**Relevance Score:** {result.relevance_score:.3f}\n\n"
362
-
363
- if use_hybrid:
364
- results_text += f"- Semantic Similarity: {result.semantic_similarity:.3f}\n"
365
- results_text += f"- STAT7 Resonance: {result.stat7_resonance:.3f}\n\n"
366
-
367
- results_text += f"**Content:** {result.content}\n\n"
368
- results_text += f"**Type:** {result.content_type}\n\n"
369
-
370
- if result.metadata:
371
- results_text += "**Metadata:**\n"
372
- for key, value in result.metadata.items():
373
- if key != "stat7": # Skip complex STAT7 object
374
- results_text += f"- {key}: {value}\n"
375
- results_text += "\n"
376
-
377
- results_text += "---\n\n"
378
- else:
379
- results_text += (
380
- "*No results found. Try adjusting your query or adding more documents.*\n"
381
- )
382
-
383
- # Metrics
384
- metrics = {
385
- "query_id": assembly.assembly_id,
386
- "result_count": len(assembly.results),
387
- "total_relevance": assembly.total_relevance,
388
- "assembly_quality": assembly.assembly_quality,
389
- "temporal_span_hours": assembly.temporal_span_hours,
390
- "anchor_coverage": len(assembly.anchor_coverage),
391
- "execution_time_ms": elapsed_ms,
392
- "hybrid_mode": use_hybrid,
393
- }
394
-
395
- metrics_json = json.dumps(metrics, indent=2)
396
-
397
- return results_text, metrics_json
398
-
399
- except Exception as e:
400
- return f"Error: {str(e)}", json.dumps({"error": str(e)}, indent=2)
401
-
402
-
403
- def add_document(doc_id: str, content: str, realm_type: str, realm_label: str) -> str:
404
- """Add a new document to the system"""
405
- if not WARBLER_AVAILABLE or not api:
406
- return "Warbler CDA not available."
407
-
408
- if not doc_id.strip() or not content.strip():
409
- return "Please provide both document ID and content."
410
-
411
- try:
412
- metadata = {
413
- "realm_type": realm_type,
414
- "realm_label": realm_label,
415
- "lifecycle_stage": "emergence",
416
- "activity_level": 0.7,
417
- }
418
-
419
- success = api.add_document(doc_id, content, metadata)
420
-
421
- if success:
422
- return f"[OK] Document '{doc_id}' added successfully!\n\nTotal documents: {api.get_context_store_size()}"
423
- else:
424
- return f"[ERROR] Document '{doc_id}' already exists."
425
-
426
- except Exception as e:
427
- return f"Error: {str(e)}"
428
 
429
 
430
  def get_system_stats() -> str:
431
- """Get system statistics"""
432
- if not WARBLER_AVAILABLE or not api:
433
- return "Warbler CDA not available."
434
-
435
- try:
436
- metrics = api.get_retrieval_metrics()
437
-
438
- stats = "# System Statistics\n\n"
439
- stats += f"**Total Documents:** {metrics['context_store_size']}\n\n"
440
- stats += f"**Total Queries:** {metrics['retrieval_metrics']['total_queries']}\n\n"
441
- stats += f"**Cache Hit Rate:** {metrics['cache_performance']['hit_rate']:.1%}\n\n"
442
- stats += f"**Average Results per Query:** {metrics['retrieval_metrics']['average_results_per_query']:.1f}\n\n"
443
- stats += f"**Average Retrieval Time:** {metrics['retrieval_metrics']['average_retrieval_time_ms']:.1f}ms\n\n"
444
- stats += f"**Hybrid Queries:** {metrics['retrieval_metrics']['hybrid_queries']}\n\n"
445
-
446
- stats += "## Quality Distribution\n\n"
447
- for quality, count in metrics["retrieval_metrics"]["quality_distribution"].items():
448
- stats += f"- {quality.capitalize()}: {count}\n"
449
-
450
- # Add ingestion status information
451
- global ingestion_status
452
- stats += "\n## Background Pack Ingestion\n\n"
453
-
454
- if ingestion_status["running"]:
455
- # Currently ingesting
456
- progress_percent = (ingestion_status["processed"] / ingestion_status["total_docs"] * 100) if ingestion_status["total_docs"] > 0 else 0
457
- eta_minutes = ingestion_status["eta"] / 60 if ingestion_status["eta"] > 0 else 0
458
-
459
- stats += "**Status:** 🟢 **ACTIVE** - Ingesting documents...\n\n"
460
- stats += "```\n"
461
- stats += f"Progress: {ingestion_status['processed']}/{ingestion_status['total_docs']} documents\n"
462
- stats += f"Complete: {progress_percent:.1f}%\n"
463
- stats += f"Rate: {ingestion_status['rate']:.1f} docs/sec\n"
464
- stats += f"ETA: {eta_minutes:.1f} minutes\n"
465
- if ingestion_status['failed'] > 0:
466
- stats += f"Failed: {ingestion_status['failed']} documents\n"
467
- stats += "```\n\n"
468
- elif ingestion_status["total_docs"] > 0:
469
- # Completed ingestion (has totals but not running)
470
- stats += "**Status:** ✅ **COMPLETE**\n\n"
471
- stats += f"**Last Ingestion:** Processed {ingestion_status['processed']} documents"
472
- if ingestion_status['failed'] > 0:
473
- stats += f" ({ingestion_status['failed']} failed)"
474
- stats += "\n\n"
475
- else:
476
- # No background ingestion detected
477
- stats += "**Status:** ⚪ **IDLE** - No background ingestion active\n\n"
478
-
479
- return stats
480
-
481
- except Exception as e:
482
- return f"Error: {str(e)}"
483
-
484
-
485
- with gr.Blocks(title="Warbler CDA - RAG System Demo") as demo:
486
- gr.Markdown(
487
- """
488
- # Warbler CDA - Cognitive Development Architecture
489
-
490
- Interactive demo of a production-ready RAG system with **STAT7 multi-dimensional addressing**.
491
-
492
- ## Features
493
- - **Semantic Search**: Find relevant documents using natural language
494
- - **STAT7 Hybrid Scoring**: Combine semantic similarity with 7-dimensional resonance
495
- - **Real-time Retrieval**: Sub-second query performance
496
- - **Provenance Tracking**: Full lineage and metadata preservation
497
- """
498
- )
499
-
500
  with gr.Tab("Query"):
501
  with gr.Row():
502
- with gr.Column(scale=2):
503
  query_input = gr.Textbox(
504
  label="Query",
505
- placeholder="Enter your search query (e.g., 'wisdom about courage')",
506
- lines=2,
507
  )
508
-
509
- with gr.Row():
510
- max_results = gr.Slider(
511
- minimum=1, maximum=10, value=5, step=1, label="Max Results"
512
- )
513
- use_hybrid = gr.Checkbox(label="Enable STAT7 Hybrid Scoring", value=True)
514
-
515
- with gr.Row():
516
- weight_semantic = gr.Slider(
517
- minimum=0.0, maximum=1.0, value=0.6, step=0.1, label="Semantic Weight"
518
- )
519
- weight_stat7 = gr.Slider(
520
- minimum=0.0, maximum=1.0, value=0.4, step=0.1, label="STAT7 Weight"
521
- )
522
-
523
- query_btn = gr.Button("Search", variant="primary")
524
-
525
- with gr.Column(scale=1):
526
- gr.Markdown(
527
- """
528
- ### Example Queries
529
- - "wisdom about courage"
530
- - "technical documentation"
531
- - "narrative patterns"
532
- - "ancient knowledge"
533
- - "system architecture"
534
- """
535
  )
536
-
537
- with gr.Row():
538
- results_output = gr.Markdown(label="Results")
539
-
540
- with gr.Row():
541
- metrics_output = gr.JSON(label="Metrics")
542
-
 
 
543
  query_btn.click(
544
  fn=query_warbler,
545
- inputs=[query_input, max_results, use_hybrid, weight_semantic, weight_stat7],
546
- outputs=[results_output, metrics_output],
547
  )
548
-
549
- with gr.Tab("Add Document"):
550
- with gr.Row():
551
- with gr.Column():
552
- doc_id_input = gr.Textbox(label="Document ID", placeholder="unique_doc_id")
553
- content_input = gr.Textbox(
554
- label="Content", placeholder="Enter document content...", lines=5
555
- )
556
-
557
- with gr.Row():
558
- realm_type_input = gr.Dropdown(
559
- choices=["wisdom", "technical", "narrative", "pattern", "data"],
560
- value="wisdom",
561
- label="Realm Type",
562
- )
563
- realm_label_input = gr.Textbox(
564
- label="Realm Label", placeholder="e.g., philosophy, documentation"
565
- )
566
-
567
- add_btn = gr.Button("Add Document", variant="primary")
568
- add_output = gr.Textbox(label="Status", lines=3)
569
-
570
- add_btn.click(
571
- fn=add_document,
572
- inputs=[doc_id_input, content_input, realm_type_input, realm_label_input],
573
- outputs=add_output,
574
- )
575
-
576
  with gr.Tab("System Stats"):
577
- stats_btn = gr.Button("Refresh Statistics", variant="primary")
578
  stats_output = gr.Markdown()
579
-
580
  stats_btn.click(fn=get_system_stats, outputs=stats_output)
581
-
582
- # Auto-load stats on tab open
583
  demo.load(fn=get_system_stats, outputs=stats_output)
584
-
585
- # Refresh stats every 10 seconds if ingestion is running
586
- def auto_refresh_stats():
587
- while ingestion_status["running"]:
588
- time.sleep(10)
589
- # Note: In Gradio, we can't directly update from background thread
590
- # This would need a more complex setup with queues or websockets
591
- # For now, users can manually refresh
592
-
593
  with gr.Tab("About"):
594
- gr.Markdown(
595
- """
596
  ## About Warbler CDA
597
-
598
- Warbler CDA (Cognitive Development Architecture) is a production-ready RAG system featuring:
599
-
600
- ### STAT7 Multi-Dimensional Addressing
601
-
602
- Each document is addressed in 7 dimensions:
603
- 1. **Realm**: Domain classification
604
- 2. **Lineage**: Generation/version
605
- 3. **Adjacency**: Connectivity score
606
- 4. **Horizon**: Lifecycle stage
607
- 5. **Luminosity**: Activity level
608
- 6. **Polarity**: Resonance factor
609
- 7. **Dimensionality**: Complexity level
610
-
611
- ### Hybrid Scoring
612
-
613
- Combines traditional semantic similarity with STAT7 resonance for superior retrieval:
614
-
615
- ```
616
- hybrid_score = (0.6 × semantic) + (0.4 × stat7_resonance)
617
- ```
618
-
619
- ### Validated Performance
620
-
621
- - **EXP-01**: 0% collision rate across 10K+ entities
622
- - **EXP-02**: Sub-millisecond retrieval at 100K scale
623
- - **EXP-03**: All 7 dimensions proven necessary
624
- - **EXP-10**: Narrative coherence preserved under concurrent load
625
-
626
  ### Links
627
-
628
- - [GitHub Repository](https://github.com/tiny-walnut-games/the-seed)
629
- - [Documentation](https://github.com/tiny-walnut-games/the-seed/blob/main/README.md)
630
- - [PyPI Package](https://pypi.org/project/warbler-cda/)
631
-
632
- ---
633
-
634
- Made with love by Tiny Walnut Games
635
- """
636
- )
637
 
638
  if __name__ == "__main__":
639
- demo.launch()
 
1
+ #!/usr/bin/env python3
2
  """
3
+ Gradio interface for Warbler CDA on HuggingFace Spaces.
4
+
5
+ Provides a web UI for the FractalStat RAG system with GPU acceleration.
6
  """
7
 
8
+ import gradio as gr
9
  import json
10
+ from typing import Dict, Any, List
11
  import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Import Warbler CDA components
14
+ from warbler_cda.retrieval_api import RetrievalAPI, RetrievalQuery, RetrievalMode
15
+ from warbler_cda.embeddings import EmbeddingProviderFactory
16
+ from warbler_cda.fractalstat_rag_bridge import FractalStatRAGBridge
17
+ from warbler_cda.pack_loader import PackLoader
18
+
19
+ # Initialize the system
20
+ print("🚀 Initializing Warbler CDA...")
21
+
22
+ # Create embedding provider (will use sentence-transformers with GPU if available)
23
+ embedding_provider = EmbeddingProviderFactory.get_default_provider()
24
+ print(f"✅ Embedding provider: {embedding_provider.get_provider_info()['provider_id']}")
25
+
26
+ # Create FractalStat bridge
27
+ fractalstat_bridge = FractalStatRAGBridge()
28
+ print("✅ FractalStat bridge initialized")
29
+
30
+ # Create RetrievalAPI
31
+ api = RetrievalAPI(
32
+ embedding_provider=embedding_provider,
33
+ fractalstat_bridge=fractalstat_bridge,
34
+ config={"enable_fractalstat_hybrid": True}
35
+ )
36
+ print("✅ RetrievalAPI initialized")
37
+
38
+ # Load packs
39
+ print("📚 Loading Warbler packs...")
40
+ pack_loader = PackLoader()
41
+ documents = pack_loader.discover_documents()
42
+ print(f"✅ Found {len(documents)} documents")
43
+
44
+ # Ingest documents
45
+ for doc in documents:
46
+ api.add_document(
47
+ doc_id=doc["id"],
48
+ content=doc["content"],
49
+ metadata=doc.get("metadata", {})
50
  )
51
 
52
+ print(f"🎉 Warbler CDA ready with {api.get_context_store_size()} documents!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
+ def query_warbler(query_text: str, max_results: int = 5, use_hybrid: bool = True) -> str:
56
+ """Query the Warbler CDA system."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  if not query_text.strip():
58
+ return "Please enter a query."
59
+
60
+ start_time = time.time()
61
+
62
+ # Create query
63
+ query = RetrievalQuery(
64
+ query_id=f"gradio_{int(time.time())}",
65
+ mode=RetrievalMode.SEMANTIC_SIMILARITY,
66
+ semantic_query=query_text,
67
+ max_results=max_results,
68
+ fractalstat_hybrid=use_hybrid
69
+ )
70
+
71
+ # Execute query
72
+ assembly = api.retrieve_context(query)
73
+
74
+ elapsed_ms = (time.time() - start_time) * 1000
75
+
76
+ # Format results
77
+ output = f"## Query Results\n\n"
78
+ output += f"**Query:** {query_text}\n\n"
79
+ output += f"**Found:** {len(assembly.results)} results in {elapsed_ms:.0f}ms\n\n"
80
+ output += f"**Quality Score:** {assembly.assembly_quality:.3f}\n\n"
81
+
82
+ if assembly.results:
83
+ output += "### Top Results\n\n"
84
+ for i, result in enumerate(assembly.results[:max_results], 1):
85
+ output += f"**{i}. Score: {result.relevance_score:.3f}**\n\n"
86
+ output += f"{result.content[:300]}...\n\n"
87
+ if use_hybrid:
88
+ output += f"- Semantic: {result.semantic_similarity:.3f}\n"
89
+ output += f"- FractalStat: {result.fractalstat_resonance:.3f}\n\n"
90
+ output += "---\n\n"
91
+ else:
92
+ output += "No results found.\n"
93
+
94
+ return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
 
97
  def get_system_stats() -> str:
98
+ """Get system statistics."""
99
+ metrics = api.get_retrieval_metrics()
100
+
101
+ output = "## System Statistics\n\n"
102
+ output += f"**Total Documents:** {api.get_context_store_size():,}\n\n"
103
+ output += f"**Total Queries:** {metrics['retrieval_metrics']['total_queries']}\n\n"
104
+ output += f"**Cache Hit Rate:** {metrics['cache_performance']['hit_rate']:.1%}\n\n"
105
+ output += f"**Avg Quality:** {metrics['system_health']['average_quality']:.3f}\n\n"
106
+
107
+ return output
108
+
109
+
110
+ # Create Gradio interface
111
+ with gr.Blocks(title="Warbler CDA - FractalStat RAG") as demo:
112
+ gr.Markdown("""
113
+ # 🦜 Warbler CDA - FractalStat RAG System
114
+
115
+ Semantic retrieval with 8D FractalStat multi-dimensional addressing.
116
+
117
+ **Features:**
118
+ - 2.6M+ documents from arXiv, education, fiction, and more
119
+ - Hybrid semantic + FractalStat scoring
120
+ - Bob the Skeptic bias detection
121
+ - Narrative coherence analysis
122
+ """)
123
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  with gr.Tab("Query"):
125
  with gr.Row():
126
+ with gr.Column():
127
  query_input = gr.Textbox(
128
  label="Query",
129
+ placeholder="Enter your search query...",
130
+ lines=2
131
  )
132
+ max_results = gr.Slider(
133
+ minimum=1,
134
+ maximum=20,
135
+ value=5,
136
+ step=1,
137
+ label="Max Results"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  )
139
+ use_hybrid = gr.Checkbox(
140
+ label="Enable FractalStat Hybrid Scoring",
141
+ value=True
142
+ )
143
+ query_btn = gr.Button("Search", variant="primary")
144
+
145
+ with gr.Column():
146
+ results_output = gr.Markdown(label="Results")
147
+
148
  query_btn.click(
149
  fn=query_warbler,
150
+ inputs=[query_input, max_results, use_hybrid],
151
+ outputs=results_output
152
  )
153
+
154
+ gr.Examples(
155
+ examples=[
156
+ ["hello world", 5, True],
157
+ ["rotation dynamics of Saturn's moons", 5, True],
158
+ ["anything about machine learning", 10, False],
159
+ ],
160
+ inputs=[query_input, max_results, use_hybrid]
161
+ )
162
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  with gr.Tab("System Stats"):
 
164
  stats_output = gr.Markdown()
165
+ stats_btn = gr.Button("Refresh Stats")
166
  stats_btn.click(fn=get_system_stats, outputs=stats_output)
 
 
167
  demo.load(fn=get_system_stats, outputs=stats_output)
168
+
 
 
 
 
 
 
 
 
169
  with gr.Tab("About"):
170
+ gr.Markdown("""
 
171
  ## About Warbler CDA
172
+
173
+ Warbler CDA is a production-ready RAG system featuring:
174
+
175
+ - **8D FractalStat Addressing**: Multi-dimensional intelligence for superior retrieval
176
+ - **Semantic Anchors**: Persistent memory with provenance tracking
177
+ - **Bob the Skeptic**: Automatic bias detection and validation
178
+ - **Narrative Coherence**: Quality analysis beyond simple similarity
179
+
180
+ ### Performance
181
+
182
+ - 84% test coverage with 587 passing tests
183
+ - 9-28s query response time
184
+ - 0.88 average relevance score
185
+ - 75-83% narrative coherence
186
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  ### Links
188
+
189
+ - [Source Code](https://gitlab.com/tiny-walnut-games/the-seed)
190
+ - [Documentation](https://gitlab.com/tiny-walnut-games/the-seed/-/tree/main/warbler-cda-package)
191
+ - [Performance Report](https://gitlab.com/tiny-walnut-games/the-seed/-/blob/main/warbler-cda-package/WARBLER_CDA_PERFORMANCE_REPORT.md)
192
+ """)
 
 
 
 
 
193
 
194
  if __name__ == "__main__":
195
+ demo.launch(server_name="0.0.0.0", server_port=7860)
coverage.xml CHANGED
@@ -1,12 +1,12 @@
1
  <?xml version="1.0" ?>
2
- <coverage version="7.12.0" timestamp="1764711627113" lines-valid="5038" lines-covered="3853" line-rate="0.7648" branches-covered="0" branches-valid="0" branch-rate="0" complexity="0">
3
  <!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.12.0 -->
4
  <!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
5
  <sources>
6
  <source>/builds/tiny-walnut-games/the-seed/warbler-cda-package/warbler_cda</source>
7
  </sources>
8
  <packages>
9
- <package name="." line-rate="0.8338" branch-rate="0" complexity="0">
10
  <classes>
11
  <class name="__init__.py" filename="__init__.py" complexity="0" line-rate="0.6786" branch-rate="0">
12
  <methods/>
@@ -965,7 +965,7 @@
965
  <line number="664" hits="1"/>
966
  </lines>
967
  </class>
968
- <class name="evaporation.py" filename="evaporation.py" complexity="0" line-rate="0.7902" branch-rate="0">
969
  <methods/>
970
  <lines>
971
  <line number="3" hits="1"/>
@@ -1051,14 +1051,14 @@
1051
  <line number="163" hits="1"/>
1052
  <line number="168" hits="1"/>
1053
  <line number="171" hits="1"/>
1054
- <line number="172" hits="1"/>
1055
  <line number="173" hits="1"/>
1056
  <line number="174" hits="1"/>
1057
- <line number="175" hits="0"/>
1058
  <line number="176" hits="0"/>
1059
- <line number="177" hits="0"/>
1060
  <line number="178" hits="0"/>
1061
- <line number="180" hits="0"/>
1062
  <line number="183" hits="1"/>
1063
  <line number="184" hits="1"/>
1064
  <line number="187" hits="1"/>
@@ -1116,7 +1116,7 @@
1116
  <line number="301" hits="1"/>
1117
  <line number="302" hits="0"/>
1118
  <line number="304" hits="1"/>
1119
- <line number="305" hits="0"/>
1120
  <line number="307" hits="1"/>
1121
  <line number="311" hits="1"/>
1122
  <line number="313" hits="1"/>
@@ -1173,9 +1173,9 @@
1173
  <line number="401" hits="1"/>
1174
  <line number="402" hits="1"/>
1175
  <line number="403" hits="1"/>
1176
- <line number="404" hits="0"/>
1177
- <line number="405" hits="1"/>
1178
- <line number="406" hits="1"/>
1179
  <line number="407" hits="0"/>
1180
  <line number="408" hits="0"/>
1181
  <line number="410" hits="0"/>
 
1
  <?xml version="1.0" ?>
2
+ <coverage version="7.12.0" timestamp="1764715930755" lines-valid="5038" lines-covered="3855" line-rate="0.7652" branches-covered="0" branches-valid="0" branch-rate="0" complexity="0">
3
  <!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.12.0 -->
4
  <!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
5
  <sources>
6
  <source>/builds/tiny-walnut-games/the-seed/warbler-cda-package/warbler_cda</source>
7
  </sources>
8
  <packages>
9
+ <package name="." line-rate="0.8344" branch-rate="0" complexity="0">
10
  <classes>
11
  <class name="__init__.py" filename="__init__.py" complexity="0" line-rate="0.6786" branch-rate="0">
12
  <methods/>
 
965
  <line number="664" hits="1"/>
966
  </lines>
967
  </class>
968
+ <class name="evaporation.py" filename="evaporation.py" complexity="0" line-rate="0.7967" branch-rate="0">
969
  <methods/>
970
  <lines>
971
  <line number="3" hits="1"/>
 
1051
  <line number="163" hits="1"/>
1052
  <line number="168" hits="1"/>
1053
  <line number="171" hits="1"/>
1054
+ <line number="172" hits="0"/>
1055
  <line number="173" hits="1"/>
1056
  <line number="174" hits="1"/>
1057
+ <line number="175" hits="1"/>
1058
  <line number="176" hits="0"/>
1059
+ <line number="177" hits="1"/>
1060
  <line number="178" hits="0"/>
1061
+ <line number="180" hits="1"/>
1062
  <line number="183" hits="1"/>
1063
  <line number="184" hits="1"/>
1064
  <line number="187" hits="1"/>
 
1116
  <line number="301" hits="1"/>
1117
  <line number="302" hits="0"/>
1118
  <line number="304" hits="1"/>
1119
+ <line number="305" hits="1"/>
1120
  <line number="307" hits="1"/>
1121
  <line number="311" hits="1"/>
1122
  <line number="313" hits="1"/>
 
1173
  <line number="401" hits="1"/>
1174
  <line number="402" hits="1"/>
1175
  <line number="403" hits="1"/>
1176
+ <line number="404" hits="1"/>
1177
+ <line number="405" hits="0"/>
1178
+ <line number="406" hits="0"/>
1179
  <line number="407" hits="0"/>
1180
  <line number="408" hits="0"/>
1181
  <line number="410" hits="0"/>
test-results.xml CHANGED
The diff for this file is too large to render. See raw diff