import sqlite3 import shutil from pathlib import Path import sys # Add project root to path project_root = Path(__file__).parent.parent sys.path.append(str(project_root)) from config.settings import Settings def cleanup_old_embeddings(): """Clean up old embedding directories""" chroma_path = Settings.get_chroma_path() # List all UUID directories collection_dirs = list(chroma_path.glob("*-*-*-*-*")) print("\nFound collection directories:") for dir_path in collection_dirs: print(f"- {dir_path.name}") # Get current collection info from database sqlite_file = chroma_path / "chroma.sqlite3" conn = sqlite3.connect(sqlite_file) cursor = conn.cursor() try: # Get all active collection IDs cursor.execute("SELECT id FROM collections") active_ids = {row[0] for row in cursor.fetchall()} print("\nActive collection IDs:") for id in active_ids: print(f"- {id}") # Find directories that don't match any active collection for dir_path in collection_dirs: if dir_path.name not in active_ids: print(f"\nFound unused collection directory: {dir_path.name}") response = input("Delete this directory? (y/N): ") if response.lower() == 'y': shutil.rmtree(dir_path) print(f"Deleted: {dir_path}") else: print("Skipped deletion") finally: conn.close() if __name__ == "__main__": cleanup_old_embeddings()