James Edmunds
Checkpoint! local working probably. new embeddings, added to readme, additional scripts, updated process_lyrics and upload_embeddings and added some testscripts.
d147321 | import sqlite3 | |
| import shutil | |
| from pathlib import Path | |
| import sys | |
| # Add project root to path | |
| project_root = Path(__file__).parent.parent | |
| sys.path.append(str(project_root)) | |
| from config.settings import Settings | |
| def cleanup_old_embeddings(): | |
| """Clean up old embedding directories""" | |
| chroma_path = Settings.get_chroma_path() | |
| # List all UUID directories | |
| collection_dirs = list(chroma_path.glob("*-*-*-*-*")) | |
| print("\nFound collection directories:") | |
| for dir_path in collection_dirs: | |
| print(f"- {dir_path.name}") | |
| # Get current collection info from database | |
| sqlite_file = chroma_path / "chroma.sqlite3" | |
| conn = sqlite3.connect(sqlite_file) | |
| cursor = conn.cursor() | |
| try: | |
| # Get all active collection IDs | |
| cursor.execute("SELECT id FROM collections") | |
| active_ids = {row[0] for row in cursor.fetchall()} | |
| print("\nActive collection IDs:") | |
| for id in active_ids: | |
| print(f"- {id}") | |
| # Find directories that don't match any active collection | |
| for dir_path in collection_dirs: | |
| if dir_path.name not in active_ids: | |
| print(f"\nFound unused collection directory: {dir_path.name}") | |
| response = input("Delete this directory? (y/N): ") | |
| if response.lower() == 'y': | |
| shutil.rmtree(dir_path) | |
| print(f"Deleted: {dir_path}") | |
| else: | |
| print("Skipped deletion") | |
| finally: | |
| conn.close() | |
| if __name__ == "__main__": | |
| cleanup_old_embeddings() |