James Edmunds commited on
Commit
57df620
·
1 Parent(s): 9567ff8

Stage 1 HF Integration. Added upload_embeddings.py

Browse files
Files changed (1) hide show
  1. scripts/upload_embeddings.py +36 -0
scripts/upload_embeddings.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Upload embeddings to HuggingFace"""
2
+ import sys
3
+ from pathlib import Path
4
+ from huggingface_hub import HfApi
5
+ from dotenv import load_dotenv
6
+
7
+ # Add parent directory to path
8
+ sys.path.append(str(Path(__file__).parent.parent))
9
+ from config.settings import Settings
10
+
11
+
12
+ def main():
13
+ """Upload embeddings directory to HuggingFace dataset"""
14
+ print("Starting upload process...")
15
+
16
+ # Print size info
17
+ total_size = sum(
18
+ f.stat().st_size for f in Settings.EMBEDDINGS_DIR.glob('**/*') if f.is_file()
19
+ )
20
+ print(f"Found embeddings: {total_size / 1024 / 1024:.2f} MB")
21
+
22
+ api = HfApi(token=Settings.HF_TOKEN)
23
+
24
+ print(f"Uploading to {Settings.HF_DATASET}...")
25
+ api.upload_folder(
26
+ folder_path=str(Settings.EMBEDDINGS_DIR),
27
+ repo_id=Settings.HF_DATASET,
28
+ repo_type="dataset"
29
+ )
30
+
31
+ print("Upload complete!")
32
+
33
+
34
+ if __name__ == "__main__":
35
+ load_dotenv()
36
+ main()