LyrGen2 / scripts /cleanup_old_embeddings.py
James Edmunds
Checkpoint! local working probably. new embeddings, added to readme, additional scripts, updated process_lyrics and upload_embeddings and added some testscripts.
d147321
import sqlite3
import shutil
from pathlib import Path
import sys
# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.append(str(project_root))
from config.settings import Settings
def cleanup_old_embeddings():
"""Clean up old embedding directories"""
chroma_path = Settings.get_chroma_path()
# List all UUID directories
collection_dirs = list(chroma_path.glob("*-*-*-*-*"))
print("\nFound collection directories:")
for dir_path in collection_dirs:
print(f"- {dir_path.name}")
# Get current collection info from database
sqlite_file = chroma_path / "chroma.sqlite3"
conn = sqlite3.connect(sqlite_file)
cursor = conn.cursor()
try:
# Get all active collection IDs
cursor.execute("SELECT id FROM collections")
active_ids = {row[0] for row in cursor.fetchall()}
print("\nActive collection IDs:")
for id in active_ids:
print(f"- {id}")
# Find directories that don't match any active collection
for dir_path in collection_dirs:
if dir_path.name not in active_ids:
print(f"\nFound unused collection directory: {dir_path.name}")
response = input("Delete this directory? (y/N): ")
if response.lower() == 'y':
shutil.rmtree(dir_path)
print(f"Deleted: {dir_path}")
else:
print("Skipped deletion")
finally:
conn.close()
if __name__ == "__main__":
cleanup_old_embeddings()