EtashGuha commited on
Commit
f436e9b
·
verified ·
1 Parent(s): d744b34

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # sft_a1_magicoder__Qwen3-8B
18
 
19
- This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--perturbed-docker-exp-magicoder-tasks-2_glm_4.7_traces_jupiter_upsampled_10k/snapshots/1d42cc3dc0818f1642c4f0120875810d492cd923_thinking_preprocessed dataset.
20
 
21
  ## Model description
22
 
 
16
 
17
  # sft_a1_magicoder__Qwen3-8B
18
 
19
+ This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--Magicoder-Evol-Instruct-110K-sandboxes-1_10k_glm_4.7_traces_jupiter/snapshots/5aeec71d5cdc4cd588e71eed903e3ab2f7b35051_thinking_preprocessed dataset.
20
 
21
  ## Model description
22
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "achieved_tflops_per_gpu": 0.0034522879542614945,
3
- "achieved_tflops_per_gpu_theoretical": 1151.8875513598773,
4
  "epoch": 7.0,
5
  "loss_nan_ranks": 0,
6
- "loss_rank_avg": 0.20906896889209747,
7
- "mfu_percent": 0.00024397794729763214,
8
- "mfu_percent_theoretical": 81.40548066147542,
9
- "total_flos": 702151473364992.0,
10
- "train_loss": 0.11156423039722288,
11
- "train_runtime": 12711.7053,
12
- "train_samples_per_second": 5.432,
13
- "train_steps_per_second": 0.34,
14
- "valid_targets_mean": 3750.0,
15
- "valid_targets_min": 750
16
  }
 
1
  {
2
+ "achieved_tflops_per_gpu": 0.002110187398451811,
3
+ "achieved_tflops_per_gpu_theoretical": 650.4776760620482,
4
  "epoch": 7.0,
5
  "loss_nan_ranks": 0,
6
+ "loss_rank_avg": 0.2384590357542038,
7
+ "mfu_percent": 0.00014912985148069337,
8
+ "mfu_percent_theoretical": 45.97015378530376,
9
+ "total_flos": 688651439570944.0,
10
+ "train_loss": 0.26049303336732077,
11
+ "train_runtime": 20396.6316,
12
+ "train_samples_per_second": 3.064,
13
+ "train_steps_per_second": 0.192,
14
+ "valid_targets_mean": 4082.4,
15
+ "valid_targets_min": 866
16
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e345b6afa60fa4829f5948b59ba8c50e55cb24728f0a0680678a76131d155f6
3
  size 4902257696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daffa32e7444e736a1c517d62c2a55d9f96260371213ac37e97ac9827b55b478
3
  size 4902257696
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7f01ba11e70af275526f9878acfb0967b7aa77386c0cfe13ffdf81cd0fa2854
3
  size 4915960368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfe4bb96e03e07782586ca63884f59543a475c5932ac4ccf440848b3736ff540
3
  size 4915960368
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d25dd5e86fc1746044ec8925f77ec7e7454177ae06c88238f8d14bd506c9a86
3
  size 4983068496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc931313941b618ca7de44a63927fa3a1e43ed4b723a103c01493d683889d17b
3
  size 4983068496
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e94d06ce0ca1539eb63055462f2bfb734ce12783f11f9ce8f55e7a63078b4bfc
3
  size 1580230264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ab55fd4c4fd1cdb8d41f5aee283940231d28eba52cc99ebf70cf3cbf11b598d
3
  size 1580230264
run_summary.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "agent_name": "1d42cc3dc0818f1642c4f0120875810d492cd923_thinking_preprocessed",
3
  "training_start": null,
4
  "training_end": null,
5
  "created_by": "raoof1",
6
  "base_model_name": "Qwen/Qwen3-8B",
7
- "dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--perturbed-docker-exp-magicoder-tasks-2_glm_4.7_traces_jupiter_upsampled_10k/snapshots/1d42cc3dc0818f1642c4f0120875810d492cd923_thinking_preprocessed",
8
  "training_type": "SFT",
9
  "training_parameters": "https://huggingface.co/DCAgent/a1-magicoder/blob/main/config.json",
10
  "wandb_link": null,
 
1
  {
2
+ "agent_name": "5aeec71d5cdc4cd588e71eed903e3ab2f7b35051_thinking_preprocessed",
3
  "training_start": null,
4
  "training_end": null,
5
  "created_by": "raoof1",
6
  "base_model_name": "Qwen/Qwen3-8B",
7
+ "dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--Magicoder-Evol-Instruct-110K-sandboxes-1_10k_glm_4.7_traces_jupiter/snapshots/5aeec71d5cdc4cd588e71eed903e3ab2f7b35051_thinking_preprocessed",
8
  "training_type": "SFT",
9
  "training_parameters": "https://huggingface.co/DCAgent/a1-magicoder/blob/main/config.json",
10
  "wandb_link": null,
train_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "achieved_tflops_per_gpu": 0.0034522879542614945,
3
- "achieved_tflops_per_gpu_theoretical": 1151.8875513598773,
4
  "epoch": 7.0,
5
  "loss_nan_ranks": 0,
6
- "loss_rank_avg": 0.20906896889209747,
7
- "mfu_percent": 0.00024397794729763214,
8
- "mfu_percent_theoretical": 81.40548066147542,
9
- "total_flos": 702151473364992.0,
10
- "train_loss": 0.11156423039722288,
11
- "train_runtime": 12711.7053,
12
- "train_samples_per_second": 5.432,
13
- "train_steps_per_second": 0.34,
14
- "valid_targets_mean": 3750.0,
15
- "valid_targets_min": 750
16
  }
 
1
  {
2
+ "achieved_tflops_per_gpu": 0.002110187398451811,
3
+ "achieved_tflops_per_gpu_theoretical": 650.4776760620482,
4
  "epoch": 7.0,
5
  "loss_nan_ranks": 0,
6
+ "loss_rank_avg": 0.2384590357542038,
7
+ "mfu_percent": 0.00014912985148069337,
8
+ "mfu_percent_theoretical": 45.97015378530376,
9
+ "total_flos": 688651439570944.0,
10
+ "train_loss": 0.26049303336732077,
11
+ "train_runtime": 20396.6316,
12
+ "train_samples_per_second": 3.064,
13
+ "train_steps_per_second": 0.192,
14
+ "valid_targets_mean": 4082.4,
15
+ "valid_targets_min": 866
16
  }
trainer_log.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
training_loss.png CHANGED