ekinakyurek commited on
Commit
0bfc910
·
verified ·
1 Parent(s): f7c7b65

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 00576224/adapter_config.json +1 -0
  2. 00576224/adapter_model.bin +3 -0
  3. 00576224/config.json +1 -0
  4. 00576224/log_1730923439.txt +190 -0
  5. 00576224/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
  6. 00576224/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
  7. 009d5c81/adapter_config.json +1 -0
  8. 009d5c81/adapter_model.bin +3 -0
  9. 009d5c81/config.json +1 -0
  10. 009d5c81/log_1730923494.txt +250 -0
  11. 009d5c81/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
  12. 009d5c81/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
  13. 00dbd492/adapter_config.json +1 -0
  14. 00dbd492/adapter_model.bin +3 -0
  15. 00dbd492/config.json +1 -0
  16. 00dbd492/log_1730923997.txt +250 -0
  17. 00dbd492/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
  18. 00dbd492/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
  19. 03560426/adapter_config.json +1 -0
  20. 03560426/adapter_model.bin +3 -0
  21. 03560426/config.json +1 -0
  22. 03560426/log_1730924338.txt +250 -0
  23. 03560426/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
  24. 03560426/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
  25. 05a7bcf2/adapter_config.json +1 -0
  26. 05a7bcf2/adapter_model.bin +3 -0
  27. 05a7bcf2/config.json +1 -0
  28. 05a7bcf2/log_1730924590.txt +96 -0
  29. 05a7bcf2/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
  30. 05a7bcf2/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
  31. 0607ce86/adapter_config.json +1 -0
  32. 0607ce86/adapter_model.bin +3 -0
  33. 0607ce86/config.json +1 -0
  34. 0607ce86/log_1730924807.txt +144 -0
  35. 0607ce86/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
  36. 0607ce86/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
  37. 0692e18c/adapter_config.json +1 -0
  38. 0692e18c/adapter_model.bin +3 -0
  39. 0692e18c/config.json +1 -0
  40. 0692e18c/log_1730925029.txt +250 -0
  41. 0692e18c/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
  42. 0692e18c/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
  43. 070dd51e/adapter_config.json +1 -0
  44. 070dd51e/adapter_model.bin +3 -0
  45. 070dd51e/config.json +1 -0
  46. 070dd51e/log_1730925150.txt +72 -0
  47. 070dd51e/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +0 -0
  48. 070dd51e/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl +1 -0
  49. 08573cc6/adapter_config.json +1 -0
  50. 08573cc6/adapter_model.bin +3 -0
00576224/adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"base_model_name_or_path": "/raid/lingo/akyurek/git/agi/checkpoints/pretrained/8B_grids_no_lm_generated_tasks/4k/", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
00576224/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a9243bac781e0b58f343a1e5b02a378d78f2686fc758ebb5de76f75f07bd5f
3
+ size 596042770
00576224/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.0.dev0", "use_cache": true, "vocab_size": 128256}
00576224/log_1730923439.txt ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:0.12513844668865204 lr:5.263157894736842e-06 tokens_per_second_per_gpu:489.1639760078218
2
+ Step 2 | loss:0.27767521142959595 lr:1.0526315789473684e-05 tokens_per_second_per_gpu:211.5141760606634
3
+ Step 3 | loss:0.08814200013875961 lr:1.5789473684210526e-05 tokens_per_second_per_gpu:1034.8852521431254
4
+ Step 4 | loss:0.14448179304599762 lr:2.105263157894737e-05 tokens_per_second_per_gpu:603.851086921295
5
+ Step 5 | loss:0.17749035358428955 lr:2.6315789473684212e-05 tokens_per_second_per_gpu:346.6801253216191
6
+ Step 6 | loss:0.22013026475906372 lr:3.157894736842105e-05 tokens_per_second_per_gpu:330.39612038517987
7
+ Step 7 | loss:0.10268905758857727 lr:3.6842105263157895e-05 tokens_per_second_per_gpu:4722.104566287469
8
+ Step 8 | loss:0.018015574663877487 lr:4.210526315789474e-05 tokens_per_second_per_gpu:6431.808558531696
9
+ Step 9 | loss:0.08885238319635391 lr:4.736842105263158e-05 tokens_per_second_per_gpu:571.6879149150644
10
+ Step 10 | loss:0.05220437049865723 lr:5.2631578947368424e-05 tokens_per_second_per_gpu:4693.365676410151
11
+ Step 11 | loss:0.27746930718421936 lr:5.789473684210527e-05 tokens_per_second_per_gpu:1778.005353013356
12
+ Step 12 | loss:0.07933241873979568 lr:6.31578947368421e-05 tokens_per_second_per_gpu:4633.485321885326
13
+ Step 13 | loss:0.053572043776512146 lr:6.842105263157895e-05 tokens_per_second_per_gpu:4663.791103674315
14
+ Step 14 | loss:0.03598742559552193 lr:7.368421052631579e-05 tokens_per_second_per_gpu:6399.908531613933
15
+ Step 15 | loss:0.11993309110403061 lr:7.894736842105263e-05 tokens_per_second_per_gpu:2789.6131114882687
16
+ Step 16 | loss:0.02057805471122265 lr:8.421052631578948e-05 tokens_per_second_per_gpu:4668.259126390028
17
+ Step 17 | loss:0.08534551411867142 lr:8.947368421052632e-05 tokens_per_second_per_gpu:2704.2567977002714
18
+ Step 18 | loss:0.04972148686647415 lr:9.473684210526316e-05 tokens_per_second_per_gpu:2865.2209746652698
19
+ Step 19 | loss:0.10108952969312668 lr:0.0001 tokens_per_second_per_gpu:2700.8888204910354
20
+ Step 20 | loss:0.0645143985748291 lr:9.999156208166614e-05 tokens_per_second_per_gpu:4832.97408792502
21
+ Step 21 | loss:0.02110893465578556 lr:9.996625117460318e-05 tokens_per_second_per_gpu:6440.741150678779
22
+ Step 22 | loss:0.02561924234032631 lr:9.992407582166581e-05 tokens_per_second_per_gpu:4606.905708939646
23
+ Step 23 | loss:0.021514398977160454 lr:9.986505025774138e-05 tokens_per_second_per_gpu:4623.379873491071
24
+ Step 24 | loss:0.016372518613934517 lr:9.978919440494539e-05 tokens_per_second_per_gpu:6425.975738887444
25
+ Step 25 | loss:0.006173485890030861 lr:9.969653386589748e-05 tokens_per_second_per_gpu:6420.184616940538
26
+ Step 26 | loss:0.08447691053152084 lr:9.958709991508012e-05 tokens_per_second_per_gpu:2812.9036846256577
27
+ Step 27 | loss:0.04044446721673012 lr:9.946092948828289e-05 tokens_per_second_per_gpu:2045.035723823389
28
+ Step 28 | loss:0.00761376041918993 lr:9.931806517013612e-05 tokens_per_second_per_gpu:6419.125727543057
29
+ Step 29 | loss:2.9212271329015493e-05 lr:9.915855517973776e-05 tokens_per_second_per_gpu:2828.156433864423
30
+ Step 30 | loss:0.019834276288747787 lr:9.89824533543787e-05 tokens_per_second_per_gpu:4602.148315466991
31
+ Step 31 | loss:0.02634788677096367 lr:9.878981913137179e-05 tokens_per_second_per_gpu:2814.4680355231203
32
+ Step 32 | loss:0.01563478261232376 lr:9.85807175279907e-05 tokens_per_second_per_gpu:6430.664192724959
33
+ Step 33 | loss:0.00018869338964577764 lr:9.835521911952555e-05 tokens_per_second_per_gpu:4621.488151696804
34
+ Step 34 | loss:0.012434639967978 lr:9.811340001546251e-05 tokens_per_second_per_gpu:2720.019388899271
35
+ Step 35 | loss:0.009905942715704441 lr:9.785534183379572e-05 tokens_per_second_per_gpu:1779.500183931238
36
+ Step 36 | loss:0.0009810677729547024 lr:9.75811316734796e-05 tokens_per_second_per_gpu:6436.066182459395
37
+ Step 37 | loss:3.712317629833706e-05 lr:9.729086208503174e-05 tokens_per_second_per_gpu:6385.345573591526
38
+ Step 38 | loss:0.051087636500597 lr:9.698463103929542e-05 tokens_per_second_per_gpu:1740.9540895086413
39
+ Step 39 | loss:0.055866535753011703 lr:9.666254189437286e-05 tokens_per_second_per_gpu:6430.774494881437
40
+ Step 40 | loss:0.042876582592725754 lr:9.632470336074009e-05 tokens_per_second_per_gpu:2725.4300337513587
41
+ Step 41 | loss:4.068629277753644e-05 lr:9.59712294645554e-05 tokens_per_second_per_gpu:4793.682682767743
42
+ Step 42 | loss:0.005137309432029724 lr:9.560223950917353e-05 tokens_per_second_per_gpu:4886.87236586204
43
+ Step 43 | loss:9.51029869611375e-06 lr:9.521785803487889e-05 tokens_per_second_per_gpu:6433.24017207722
44
+ Step 44 | loss:0.0006581911002285779 lr:9.481821477685101e-05 tokens_per_second_per_gpu:6394.068307927573
45
+ Step 45 | loss:0.011923214420676231 lr:9.440344462137689e-05 tokens_per_second_per_gpu:4651.927148948125
46
+ Step 46 | loss:0.003881220705807209 lr:9.397368756032445e-05 tokens_per_second_per_gpu:4624.039551597291
47
+ Step 47 | loss:0.0006434835959225893 lr:9.352908864389312e-05 tokens_per_second_per_gpu:6421.54598110918
48
+ Step 48 | loss:0.013022343628108501 lr:9.306979793165681e-05 tokens_per_second_per_gpu:4593.720201739378
49
+ Step 49 | loss:0.016436750069260597 lr:9.259597044191636e-05 tokens_per_second_per_gpu:2712.1897246523313
50
+ Step 50 | loss:0.005530699156224728 lr:9.210776609937829e-05 tokens_per_second_per_gpu:4650.439436447809
51
+ Step 51 | loss:0.03162027895450592 lr:9.160534968117752e-05 tokens_per_second_per_gpu:4658.941153618514
52
+ Step 52 | loss:0.001070328988134861 lr:9.108889076126226e-05 tokens_per_second_per_gpu:6420.722511431488
53
+ Step 53 | loss:0.001636895933188498 lr:9.055856365316011e-05 tokens_per_second_per_gpu:4676.1058043584
54
+ Step 54 | loss:0.0023436627816408873 lr:9.001454735114421e-05 tokens_per_second_per_gpu:4685.836913259768
55
+ Step 55 | loss:0.002947223139926791 lr:8.945702546981969e-05 tokens_per_second_per_gpu:3607.0607627985087
56
+ Step 56 | loss:1.1357014045643155e-05 lr:8.88861861821507e-05 tokens_per_second_per_gpu:6388.666042322892
57
+ Step 57 | loss:0.0018116658320650458 lr:8.83022221559489e-05 tokens_per_second_per_gpu:2814.6286149842804
58
+ Step 58 | loss:0.0025165448896586895 lr:8.770533048884482e-05 tokens_per_second_per_gpu:2828.894264031168
59
+ Step 59 | loss:0.028220675885677338 lr:8.709571264176409e-05 tokens_per_second_per_gpu:2825.978751158324
60
+ Step 60 | loss:0.03158221021294594 lr:8.647357437093105e-05 tokens_per_second_per_gpu:340.0394546222093
61
+ Step 61 | loss:0.010333526879549026 lr:8.583912565842257e-05 tokens_per_second_per_gpu:4617.829336225042
62
+ Step 62 | loss:0.0030926747713238 lr:8.519258064129558e-05 tokens_per_second_per_gpu:4818.569941848527
63
+ Step 63 | loss:0.004414541646838188 lr:8.453415753931222e-05 tokens_per_second_per_gpu:4601.682609455322
64
+ Step 64 | loss:0.016952676698565483 lr:8.386407858128706e-05 tokens_per_second_per_gpu:2834.2569108388043
65
+ Step 65 | loss:0.004038005601614714 lr:8.318256993008107e-05 tokens_per_second_per_gpu:6423.033432306101
66
+ Step 66 | loss:0.00045517730177380145 lr:8.2489861606268e-05 tokens_per_second_per_gpu:6401.850928014607
67
+ Step 67 | loss:9.486277122050524e-06 lr:8.178618741049842e-05 tokens_per_second_per_gpu:6394.543131261295
68
+ Step 68 | loss:2.5972610728786094e-06 lr:8.107178484458824e-05 tokens_per_second_per_gpu:6360.161820455056
69
+ Step 69 | loss:0.0017736494773998857 lr:8.034689503135783e-05 tokens_per_second_per_gpu:4900.146728652612
70
+ Step 70 | loss:0.00011349606938892975 lr:7.961176263324901e-05 tokens_per_second_per_gpu:6405.960111262532
71
+ Step 71 | loss:2.416427832940826e-06 lr:7.886663576974733e-05 tokens_per_second_per_gpu:6396.551017054138
72
+ Step 72 | loss:4.147927029407583e-06 lr:7.811176593363772e-05 tokens_per_second_per_gpu:4918.184814476602
73
+ Step 73 | loss:3.7847003113711253e-06 lr:7.734740790612136e-05 tokens_per_second_per_gpu:2882.40260371104
74
+ Step 74 | loss:0.0003083910560235381 lr:7.65738196708228e-05 tokens_per_second_per_gpu:6420.82991308512
75
+ Step 75 | loss:0.0027848940808326006 lr:7.579126232671621e-05 tokens_per_second_per_gpu:4897.586010719973
76
+ Step 76 | loss:0.0022790227085351944 lr:7.500000000000001e-05 tokens_per_second_per_gpu:2721.072563920397
77
+ Step 77 | loss:1.4661931118098437e-06 lr:7.420029975494995e-05 tokens_per_second_per_gpu:6449.610643613004
78
+ Step 78 | loss:0.0009201367502100766 lr:7.33924315037804e-05 tokens_per_second_per_gpu:6393.140054586642
79
+ Step 79 | loss:0.00021389646281022578 lr:7.257666791554448e-05 tokens_per_second_per_gpu:2836.579024426254
80
+ Step 80 | loss:0.0001596698275534436 lr:7.175328432410366e-05 tokens_per_second_per_gpu:4649.107091415815
81
+ Step 81 | loss:0.003130219876766205 lr:7.092255863519806e-05 tokens_per_second_per_gpu:2827.0497468671183
82
+ Step 82 | loss:0.0005868118023499846 lr:7.008477123264848e-05 tokens_per_second_per_gpu:4712.871502714318
83
+ Step 83 | loss:0.00012436187535058707 lr:6.924020488372229e-05 tokens_per_second_per_gpu:6448.857423217293
84
+ Step 84 | loss:6.51985828881152e-05 lr:6.838914464369467e-05 tokens_per_second_per_gpu:3556.6975550492407
85
+ Step 85 | loss:0.00011639451258815825 lr:6.753187775963773e-05 tokens_per_second_per_gpu:4861.603860869885
86
+ Step 86 | loss:0.03337816894054413 lr:6.666869357346978e-05 tokens_per_second_per_gpu:2818.0086060190574
87
+ Step 87 | loss:0.0003359356487635523 lr:6.579988342429763e-05 tokens_per_second_per_gpu:4629.657454767926
88
+ Step 88 | loss:0.0014997980324551463 lr:6.492574055008473e-05 tokens_per_second_per_gpu:4881.239518855494
89
+ Step 89 | loss:7.738002750556916e-06 lr:6.404655998867848e-05 tokens_per_second_per_gpu:4669.80564291533
90
+ Step 90 | loss:8.703746425453573e-05 lr:6.316263847822997e-05 tokens_per_second_per_gpu:6417.726691214301
91
+ Step 91 | loss:0.0739043727517128 lr:6.227427435703997e-05 tokens_per_second_per_gpu:4612.9635786516965
92
+ Step 92 | loss:0.016813863068819046 lr:6.138176746286468e-05 tokens_per_second_per_gpu:2823.8781025034764
93
+ Step 93 | loss:6.928161747055128e-05 lr:6.0485419031715516e-05 tokens_per_second_per_gpu:2859.723339670579
94
+ Step 94 | loss:0.02898344211280346 lr:5.958553159618693e-05 tokens_per_second_per_gpu:6426.714621734662
95
+ Step 95 | loss:0.0013739264104515314 lr:5.868240888334653e-05 tokens_per_second_per_gpu:2844.9955209254304
96
+ Step 96 | loss:2.435055421301513e-06 lr:5.7776355712222166e-05 tokens_per_second_per_gpu:6418.51945491529
97
+ Step 97 | loss:1.5492834108954412e-06 lr:5.686767789092041e-05 tokens_per_second_per_gpu:6416.743087828441
98
+ Step 98 | loss:9.047378171089804e-07 lr:5.595668211341118e-05 tokens_per_second_per_gpu:6357.369361637581
99
+ Step 99 | loss:0.0036852010525763035 lr:5.504367585601342e-05 tokens_per_second_per_gpu:6390.133020958593
100
+ Step 100 | loss:0.0021667189430445433 lr:5.4128967273616625e-05 tokens_per_second_per_gpu:4597.133995440272
101
+ Step 101 | loss:0.005624657031148672 lr:5.3212865095673514e-05 tokens_per_second_per_gpu:4662.51872029268
102
+ Step 102 | loss:0.003217491786926985 lr:5.229567852199859e-05 tokens_per_second_per_gpu:4652.101588464225
103
+ Step 103 | loss:0.0016954037128016353 lr:5.1377717118408105e-05 tokens_per_second_per_gpu:4695.5548734896165
104
+ Step 104 | loss:0.0013828268274664879 lr:5.0459290712236326e-05 tokens_per_second_per_gpu:6429.358015641005
105
+ Step 105 | loss:9.068298822967336e-06 lr:4.9540709287763685e-05 tokens_per_second_per_gpu:6391.431356242531
106
+ Step 106 | loss:0.0033326654229313135 lr:4.8622282881591906e-05 tokens_per_second_per_gpu:4868.965084559455
107
+ Step 107 | loss:0.003369354410097003 lr:4.7704321478001415e-05 tokens_per_second_per_gpu:4667.411675619285
108
+ Step 108 | loss:0.0030244227964431047 lr:4.6787134904326504e-05 tokens_per_second_per_gpu:2852.53285533292
109
+ Step 109 | loss:0.001989662414416671 lr:4.5871032726383386e-05 tokens_per_second_per_gpu:4678.622547190236
110
+ Step 110 | loss:1.4466125321632717e-05 lr:4.4956324143986596e-05 tokens_per_second_per_gpu:4810.972524572794
111
+ Step 111 | loss:3.1691836284153396e-06 lr:4.404331788658882e-05 tokens_per_second_per_gpu:3587.3725472140322
112
+ Step 112 | loss:0.0011431199964135885 lr:4.3132322109079596e-05 tokens_per_second_per_gpu:6415.560684389564
113
+ Step 113 | loss:0.005362535361200571 lr:4.222364428777786e-05 tokens_per_second_per_gpu:2752.632997217397
114
+ Step 114 | loss:0.0075534312054514885 lr:4.131759111665349e-05 tokens_per_second_per_gpu:4368.799613301608
115
+ Step 115 | loss:0.0011615206021815538 lr:4.0414468403813095e-05 tokens_per_second_per_gpu:4674.262697274069
116
+ Step 116 | loss:0.0009375385707244277 lr:3.951458096828449e-05 tokens_per_second_per_gpu:6372.567470355085
117
+ Step 117 | loss:0.003302461002022028 lr:3.861823253713535e-05 tokens_per_second_per_gpu:6400.079595816121
118
+ Step 118 | loss:1.4625480844188132e-06 lr:3.772572564296005e-05 tokens_per_second_per_gpu:6417.671241359475
119
+ Step 119 | loss:0.001941256457939744 lr:3.683736152177005e-05 tokens_per_second_per_gpu:2841.5228444613617
120
+ Step 120 | loss:0.002696903655305505 lr:3.595344001132154e-05 tokens_per_second_per_gpu:6433.570437260473
121
+ Step 121 | loss:0.05962380766868591 lr:3.5074259449915284e-05 tokens_per_second_per_gpu:2843.32026878976
122
+ Step 122 | loss:1.4319573892862536e-05 lr:3.420011657570238e-05 tokens_per_second_per_gpu:2811.763598035594
123
+ Step 123 | loss:0.04734177514910698 lr:3.333130642653024e-05 tokens_per_second_per_gpu:1780.8220458373457
124
+ Step 124 | loss:0.002887164708226919 lr:3.2468122240362284e-05 tokens_per_second_per_gpu:4639.027286752893
125
+ Step 125 | loss:4.599939074978465e-06 lr:3.1610855356305354e-05 tokens_per_second_per_gpu:4698.550787904085
126
+ Step 126 | loss:0.004566989839076996 lr:3.0759795116277725e-05 tokens_per_second_per_gpu:4697.269678423468
127
+ Step 127 | loss:0.0036403313279151917 lr:2.991522876735154e-05 tokens_per_second_per_gpu:4632.184769962814
128
+ Step 128 | loss:0.003928751684725285 lr:2.9077441364801938e-05 tokens_per_second_per_gpu:4629.39022456499
129
+ Step 129 | loss:0.0016177849611267447 lr:2.824671567589635e-05 tokens_per_second_per_gpu:6414.063868110892
130
+ Step 130 | loss:0.0006506482604891062 lr:2.7423332084455544e-05 tokens_per_second_per_gpu:4862.7184002824115
131
+ Step 131 | loss:0.008410315029323101 lr:2.660756849621962e-05 tokens_per_second_per_gpu:2860.926802536824
132
+ Step 132 | loss:1.534285729576368e-05 lr:2.5799700245050074e-05 tokens_per_second_per_gpu:4644.185636025985
133
+ Step 133 | loss:0.012605580501258373 lr:2.500000000000001e-05 tokens_per_second_per_gpu:2828.5641390430605
134
+ Step 134 | loss:9.261790182790719e-06 lr:2.4208737673283815e-05 tokens_per_second_per_gpu:4883.91824384238
135
+ Step 135 | loss:0.004368562716990709 lr:2.3426180329177215e-05 tokens_per_second_per_gpu:4648.604585515982
136
+ Step 136 | loss:0.006988442502915859 lr:2.2652592093878666e-05 tokens_per_second_per_gpu:4880.214896300479
137
+ Step 137 | loss:0.004174124449491501 lr:2.1888234066362302e-05 tokens_per_second_per_gpu:4639.287041089598
138
+ Step 138 | loss:4.192010237602517e-06 lr:2.1133364230252688e-05 tokens_per_second_per_gpu:2827.9441333131067
139
+ Step 139 | loss:4.253053702996112e-06 lr:2.0388237366751006e-05 tokens_per_second_per_gpu:1344.97427429136
140
+ Step 140 | loss:0.0013539784122258425 lr:1.9653104968642173e-05 tokens_per_second_per_gpu:4658.711630506871
141
+ Step 141 | loss:0.0006630073767155409 lr:1.8928215155411773e-05 tokens_per_second_per_gpu:6409.573963090514
142
+ Step 142 | loss:0.002947476925328374 lr:1.821381258950161e-05 tokens_per_second_per_gpu:6428.605302530668
143
+ Step 143 | loss:0.0018973237602040172 lr:1.7510138393732028e-05 tokens_per_second_per_gpu:6394.570954520929
144
+ Step 144 | loss:3.7145280202821596e-06 lr:1.681743006991894e-05 tokens_per_second_per_gpu:4675.013611001729
145
+ Step 145 | loss:0.042256902903318405 lr:1.6135921418712956e-05 tokens_per_second_per_gpu:2843.6277884070064
146
+ Step 146 | loss:0.003359108464792371 lr:1.5465842460687784e-05 tokens_per_second_per_gpu:4600.518508424465
147
+ Step 147 | loss:0.005931657738983631 lr:1.4807419358704433e-05 tokens_per_second_per_gpu:2860.5951701361273
148
+ Step 148 | loss:4.868064024776686e-06 lr:1.4160874341577446e-05 tokens_per_second_per_gpu:6413.965658588304
149
+ Step 149 | loss:0.023996124044060707 lr:1.3526425629068967e-05 tokens_per_second_per_gpu:2840.5765071242326
150
+ Step 150 | loss:0.0011750105768442154 lr:1.2904287358235928e-05 tokens_per_second_per_gpu:6419.940118376599
151
+ Step 151 | loss:0.0019033460412174463 lr:1.2294669511155193e-05 tokens_per_second_per_gpu:4623.908438531533
152
+ Step 152 | loss:0.001131798722781241 lr:1.1697777844051105e-05 tokens_per_second_per_gpu:4702.877983028494
153
+ Step 153 | loss:3.302582263131626e-05 lr:1.111381381784931e-05 tokens_per_second_per_gpu:6421.469748265485
154
+ Step 154 | loss:0.005529465619474649 lr:1.0542974530180327e-05 tokens_per_second_per_gpu:2728.8512653353687
155
+ Step 155 | loss:0.00396890239790082 lr:9.985452648855803e-06 tokens_per_second_per_gpu:2799.251290868056
156
+ Step 156 | loss:0.005971190519630909 lr:9.441436346839894e-06 tokens_per_second_per_gpu:2706.4303003540135
157
+ Step 157 | loss:8.517595233570319e-07 lr:8.911109238737747e-06 tokens_per_second_per_gpu:6381.4581134098125
158
+ Step 158 | loss:0.002489866456016898 lr:8.3946503188225e-06 tokens_per_second_per_gpu:4650.469394964638
159
+ Step 159 | loss:0.0034828418865799904 lr:7.89223390062172e-06 tokens_per_second_per_gpu:4662.585502769738
160
+ Step 160 | loss:0.002724783029407263 lr:7.404029558083653e-06 tokens_per_second_per_gpu:6430.701081956164
161
+ Step 161 | loss:0.0018676317995414138 lr:6.9302020683432055e-06 tokens_per_second_per_gpu:4662.483149651056
162
+ Step 162 | loss:9.775993930816185e-06 lr:6.470911356106885e-06 tokens_per_second_per_gpu:6427.367120140001
163
+ Step 163 | loss:0.01138626504689455 lr:6.026312439675552e-06 tokens_per_second_per_gpu:4650.724654138391
164
+ Step 164 | loss:0.0014588163467124104 lr:5.596555378623125e-06 tokens_per_second_per_gpu:2851.8214822531313
165
+ Step 165 | loss:0.00135196337942034 lr:5.181785223148999e-06 tokens_per_second_per_gpu:4793.982009234704
166
+ Step 166 | loss:0.000627187080681324 lr:4.782141965121128e-06 tokens_per_second_per_gpu:4692.159156595695
167
+ Step 167 | loss:8.381396128243068e-07 lr:4.397760490826481e-06 tokens_per_second_per_gpu:3617.0074058089476
168
+ Step 168 | loss:0.0012127950321882963 lr:4.028770535444615e-06 tokens_per_second_per_gpu:6421.134820675016
169
+ Step 169 | loss:4.570974124362692e-05 lr:3.675296639259912e-06 tokens_per_second_per_gpu:6406.2440034019
170
+ Step 170 | loss:0.0016346625052392483 lr:3.3374581056271447e-06 tokens_per_second_per_gpu:4832.4576941429605
171
+ Step 171 | loss:0.0013866540975868702 lr:3.0153689607045845e-06 tokens_per_second_per_gpu:4669.9487983815
172
+ Step 172 | loss:0.0026889871805906296 lr:2.7091379149682685e-06 tokens_per_second_per_gpu:4663.827255565714
173
+ Step 173 | loss:0.0013674983056262136 lr:2.4188683265204127e-06 tokens_per_second_per_gpu:4628.5059242993375
174
+ Step 174 | loss:0.002935702446848154 lr:2.1446581662042942e-06 tokens_per_second_per_gpu:4691.468123682847
175
+ Step 175 | loss:0.004059434402734041 lr:1.8865999845374793e-06 tokens_per_second_per_gpu:4653.043818434914
176
+ Step 176 | loss:0.0026409674901515245 lr:1.6447808804744668e-06 tokens_per_second_per_gpu:4878.260104369465
177
+ Step 177 | loss:0.0015465031610801816 lr:1.419282472009309e-06 tokens_per_second_per_gpu:6418.617830043843
178
+ Step 178 | loss:0.00817189458757639 lr:1.210180868628219e-06 tokens_per_second_per_gpu:4792.315895091995
179
+ Step 179 | loss:0.0015504483599215746 lr:1.0175466456213034e-06 tokens_per_second_per_gpu:4871.149434461099
180
+ Step 180 | loss:0.00481545040383935 lr:8.414448202622494e-07 tokens_per_second_per_gpu:2730.1446594882127
181
+ Step 181 | loss:0.002081669634208083 lr:6.819348298638839e-07 tokens_per_second_per_gpu:4669.819980068593
182
+ Step 182 | loss:0.0024928648490458727 lr:5.390705117171047e-07 tokens_per_second_per_gpu:4678.283477984429
183
+ Step 183 | loss:0.005283087957650423 lr:4.129000849198872e-07 tokens_per_second_per_gpu:2814.5930062504976
184
+ Step 184 | loss:0.0012949762167409062 lr:3.034661341025258e-07 tokens_per_second_per_gpu:2840.571429191292
185
+ Step 185 | loss:0.0006076831487007439 lr:2.1080559505462505e-07 tokens_per_second_per_gpu:6385.848503137581
186
+ Step 186 | loss:9.34275612962665e-06 lr:1.3494974225863322e-07 tokens_per_second_per_gpu:6429.516357906251
187
+ Step 187 | loss:0.006051680073142052 lr:7.59241783341913e-08 tokens_per_second_per_gpu:2718.71073842405
188
+ Step 188 | loss:0.005551817826926708 lr:3.374882539681767e-08 tokens_per_second_per_gpu:2686.2392676993068
189
+ Step 189 | loss:2.3192162643681513e-06 lr:8.437918333864536e-09 tokens_per_second_per_gpu:6412.661227271177
190
+ Step 190 | loss:5.593451078311773e-06 lr:0.0 tokens_per_second_per_gpu:4855.294777618601
00576224/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
00576224/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[9 9 7 7]\n [9 9 7 7]\n [5 5 9 9]\n [5 5 9 9]] -> [[9 9 7 7 5 5 9 9 9 9 7 7]\n [9 9 7 7 5 5 9 9 9 9 7 7]\n [5 5 9 9 9 9 7 7 5 5 9 9]\n [5 5 9 9 9 9 7 7 5 5 9 9]\n [9 9 7 7 5 5 9 9 9 9 7 7]\n [9 9 7 7 5 5 9 9 9 9 7 7]\n [5 5 9 9 9 9 7 7 5 5 9 9]\n [5 5 9 9 9 9 7 7 5 5 9 9]\n [9 9 7 7 5 5 9 9 9 9 7 7]\n [9 9 7 7 5 5 9 9 9 9 7 7]\n [5 5 9 9 9 9 7 7 5 5 9 9]\n [5 5 9 9 9 9 7 7 5 5 9 9]]#\n"}, {"role": "user", "content": "[[5 5 8 8]\n [5 5 8 8]\n [1 1 6 6]\n [1 1 6 6]] -> "}], "output": {"role": "assistant", "content": "[[5 5 8 8 1 1 6 6 5 5 8 8]\n [5 5 8 8 1 1 6 6 5 5 8 8]\n [1 1 6 6 5 5 8 8 1 1 6 6]\n [1 1 6 6 5 5 8 8 1 1 6 6]\n [5 5 8 8 1 1 6 6 5 5 8 8]\n [5 5 8 8 1 1 6 6 5 5 8 8]\n [1 1 6 6 5 5 8 8 1 1 6 6]\n [1 1 6 6 5 5 8 8 1 1 6 6]\n [5 5 8 8 1 1 6 6 5 5 8 8]\n [5 5 8 8 1 1 6 6 5 5 8 8]\n [1 1 6 6 5 5 8 8 1 1 6 6]\n [1 1 6 6 5 5 8 8 1 1 6 6]]#"}, "total_tokens": 735}
009d5c81/adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"base_model_name_or_path": "/raid/lingo/akyurek/git/agi/checkpoints/pretrained/8B_grids_no_lm_generated_tasks/4k/", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
009d5c81/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f2c68ec85e03d63667c5e9be47dc156ac4b5c834c5b996739f9ba2886acdc4a
3
+ size 596042770
009d5c81/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.0.dev0", "use_cache": true, "vocab_size": 128256}
009d5c81/log_1730923494.txt ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:0.010427827946841717 lr:4.000000000000001e-06 tokens_per_second_per_gpu:5008.803920961876
2
+ Step 2 | loss:0.012005687691271305 lr:8.000000000000001e-06 tokens_per_second_per_gpu:4543.881741505023
3
+ Step 3 | loss:0.009577717632055283 lr:1.2e-05 tokens_per_second_per_gpu:6734.236745220564
4
+ Step 4 | loss:0.00831858441233635 lr:1.6000000000000003e-05 tokens_per_second_per_gpu:6769.381361399619
5
+ Step 5 | loss:0.011338017880916595 lr:2e-05 tokens_per_second_per_gpu:6808.394730665254
6
+ Step 6 | loss:0.012518859468400478 lr:2.4e-05 tokens_per_second_per_gpu:6826.8391987912155
7
+ Step 7 | loss:0.009475968778133392 lr:2.8000000000000003e-05 tokens_per_second_per_gpu:6732.7444724965135
8
+ Step 8 | loss:0.009848631918430328 lr:3.2000000000000005e-05 tokens_per_second_per_gpu:6760.5034121812705
9
+ Step 9 | loss:0.04560374096035957 lr:3.6e-05 tokens_per_second_per_gpu:3810.7806500671277
10
+ Step 10 | loss:0.012298542074859142 lr:4e-05 tokens_per_second_per_gpu:6847.044814584687
11
+ Step 11 | loss:0.011072923429310322 lr:4.4000000000000006e-05 tokens_per_second_per_gpu:4733.153092515809
12
+ Step 12 | loss:0.05228746309876442 lr:4.8e-05 tokens_per_second_per_gpu:7031.137102265347
13
+ Step 13 | loss:0.00804749596863985 lr:5.2000000000000004e-05 tokens_per_second_per_gpu:6787.49204446146
14
+ Step 14 | loss:0.01451866701245308 lr:5.6000000000000006e-05 tokens_per_second_per_gpu:7111.1075156944025
15
+ Step 15 | loss:0.006390584632754326 lr:6e-05 tokens_per_second_per_gpu:6842.452370635766
16
+ Step 16 | loss:0.02179665118455887 lr:6.400000000000001e-05 tokens_per_second_per_gpu:6823.107724400367
17
+ Step 17 | loss:0.0033636281732469797 lr:6.800000000000001e-05 tokens_per_second_per_gpu:6808.574304054001
18
+ Step 18 | loss:0.009917511604726315 lr:7.2e-05 tokens_per_second_per_gpu:7069.924856487332
19
+ Step 19 | loss:0.004517083056271076 lr:7.6e-05 tokens_per_second_per_gpu:6764.437095893996
20
+ Step 20 | loss:0.006020938046276569 lr:8e-05 tokens_per_second_per_gpu:6923.754124218399
21
+ Step 21 | loss:0.0030509368516504765 lr:8.4e-05 tokens_per_second_per_gpu:6805.189456766779
22
+ Step 22 | loss:0.00804334506392479 lr:8.800000000000001e-05 tokens_per_second_per_gpu:7069.793757152901
23
+ Step 23 | loss:0.003392538521438837 lr:9.200000000000001e-05 tokens_per_second_per_gpu:6836.617304348815
24
+ Step 24 | loss:0.0021571789402514696 lr:9.6e-05 tokens_per_second_per_gpu:6766.132565380621
25
+ Step 25 | loss:0.0030511049553751945 lr:0.0001 tokens_per_second_per_gpu:6778.980036945585
26
+ Step 26 | loss:0.006041023414582014 lr:9.999512620046522e-05 tokens_per_second_per_gpu:6937.8041427283015
27
+ Step 27 | loss:0.0059476569294929504 lr:9.998050575201771e-05 tokens_per_second_per_gpu:6505.623481762069
28
+ Step 28 | loss:0.005402639042586088 lr:9.995614150494293e-05 tokens_per_second_per_gpu:7043.925655167204
29
+ Step 29 | loss:0.005851719994097948 lr:9.992203820909906e-05 tokens_per_second_per_gpu:3346.9873526000856
30
+ Step 30 | loss:0.0020611840300261974 lr:9.987820251299122e-05 tokens_per_second_per_gpu:6830.3186216681825
31
+ Step 31 | loss:0.0037254351191222668 lr:9.982464296247522e-05 tokens_per_second_per_gpu:6925.960355958691
32
+ Step 32 | loss:0.0023793913424015045 lr:9.976136999909156e-05 tokens_per_second_per_gpu:6839.809402276852
33
+ Step 33 | loss:0.003644440555945039 lr:9.968839595802982e-05 tokens_per_second_per_gpu:4677.784518884225
34
+ Step 34 | loss:0.0015612317947670817 lr:9.96057350657239e-05 tokens_per_second_per_gpu:6737.955977517824
35
+ Step 35 | loss:0.0035902569070458412 lr:9.951340343707852e-05 tokens_per_second_per_gpu:6856.4531858965265
36
+ Step 36 | loss:0.0032603067811578512 lr:9.941141907232765e-05 tokens_per_second_per_gpu:6810.5289537738745
37
+ Step 37 | loss:0.003907656762748957 lr:9.929980185352526e-05 tokens_per_second_per_gpu:7054.548892382373
38
+ Step 38 | loss:0.0011479186359792948 lr:9.917857354066931e-05 tokens_per_second_per_gpu:6799.882482842793
39
+ Step 39 | loss:0.005801335908472538 lr:9.904775776745958e-05 tokens_per_second_per_gpu:7075.814535317771
40
+ Step 40 | loss:0.0031425445340573788 lr:9.890738003669029e-05 tokens_per_second_per_gpu:7034.364400477758
41
+ Step 41 | loss:0.0025631876196712255 lr:9.875746771527816e-05 tokens_per_second_per_gpu:6793.955462066984
42
+ Step 42 | loss:0.001818945980630815 lr:9.859805002892732e-05 tokens_per_second_per_gpu:6905.6879619222145
43
+ Step 43 | loss:0.004916809033602476 lr:9.842915805643155e-05 tokens_per_second_per_gpu:6934.969671944544
44
+ Step 44 | loss:0.02129308320581913 lr:9.825082472361557e-05 tokens_per_second_per_gpu:6855.163517028159
45
+ Step 45 | loss:0.0223652645945549 lr:9.806308479691595e-05 tokens_per_second_per_gpu:7096.430714609596
46
+ Step 46 | loss:0.002566624665632844 lr:9.786597487660337e-05 tokens_per_second_per_gpu:6838.96004185691
47
+ Step 47 | loss:0.0028638732619583607 lr:9.765953338964735e-05 tokens_per_second_per_gpu:6808.621479851141
48
+ Step 48 | loss:0.0011649236548691988 lr:9.744380058222483e-05 tokens_per_second_per_gpu:6894.453426767675
49
+ Step 49 | loss:0.018686098977923393 lr:9.721881851187406e-05 tokens_per_second_per_gpu:7133.293399589013
50
+ Step 50 | loss:0.003414908656850457 lr:9.698463103929542e-05 tokens_per_second_per_gpu:7046.301558692089
51
+ Step 51 | loss:0.013546042144298553 lr:9.674128381980072e-05 tokens_per_second_per_gpu:7042.4875394006
52
+ Step 52 | loss:0.002427030121907592 lr:9.648882429441257e-05 tokens_per_second_per_gpu:6794.84895780147
53
+ Step 53 | loss:0.0018121228786185384 lr:9.622730168061567e-05 tokens_per_second_per_gpu:6938.909780768277
54
+ Step 54 | loss:0.0024043091107159853 lr:9.595676696276172e-05 tokens_per_second_per_gpu:7058.786650613044
55
+ Step 55 | loss:0.0004099662182852626 lr:9.567727288213005e-05 tokens_per_second_per_gpu:6782.433544703813
56
+ Step 56 | loss:0.0035382509231567383 lr:9.538887392664544e-05 tokens_per_second_per_gpu:6340.576769430522
57
+ Step 57 | loss:0.0026507212314754725 lr:9.50916263202557e-05 tokens_per_second_per_gpu:7129.957745497268
58
+ Step 58 | loss:0.0014208294451236725 lr:9.478558801197065e-05 tokens_per_second_per_gpu:6951.299428853478
59
+ Step 59 | loss:0.013981368392705917 lr:9.447081866456489e-05 tokens_per_second_per_gpu:7071.6907771911765
60
+ Step 60 | loss:0.003813214600086212 lr:9.414737964294636e-05 tokens_per_second_per_gpu:7104.69149462406
61
+ Step 61 | loss:0.0021942683961242437 lr:9.381533400219318e-05 tokens_per_second_per_gpu:6948.905083627324
62
+ Step 62 | loss:0.000814873434137553 lr:9.347474647526095e-05 tokens_per_second_per_gpu:6853.1087155786445
63
+ Step 63 | loss:0.0035422611981630325 lr:9.312568346036288e-05 tokens_per_second_per_gpu:7080.4084352494
64
+ Step 64 | loss:0.0011617210693657398 lr:9.276821300802534e-05 tokens_per_second_per_gpu:6883.420349062332
65
+ Step 65 | loss:0.0025042295455932617 lr:9.24024048078213e-05 tokens_per_second_per_gpu:7075.091915582967
66
+ Step 66 | loss:0.004087530542165041 lr:9.202833017478422e-05 tokens_per_second_per_gpu:6823.599166612906
67
+ Step 67 | loss:0.003984375856816769 lr:9.164606203550497e-05 tokens_per_second_per_gpu:6792.725634094935
68
+ Step 68 | loss:0.004546380136162043 lr:9.125567491391476e-05 tokens_per_second_per_gpu:7089.574978639151
69
+ Step 69 | loss:0.0024830603506416082 lr:9.085724491675642e-05 tokens_per_second_per_gpu:6837.292820955085
70
+ Step 70 | loss:0.0029061806853860617 lr:9.045084971874738e-05 tokens_per_second_per_gpu:6827.2410775373755
71
+ Step 71 | loss:0.0034427084028720856 lr:9.003656854743667e-05 tokens_per_second_per_gpu:6864.66198527211
72
+ Step 72 | loss:0.001464420696720481 lr:8.961448216775954e-05 tokens_per_second_per_gpu:6822.178329585212
73
+ Step 73 | loss:0.002612335840240121 lr:8.9184672866292e-05 tokens_per_second_per_gpu:6838.830248369909
74
+ Step 74 | loss:0.0045876093208789825 lr:8.874722443520899e-05 tokens_per_second_per_gpu:7056.158575948568
75
+ Step 75 | loss:0.002774039749056101 lr:8.83022221559489e-05 tokens_per_second_per_gpu:6802.963020641426
76
+ Step 76 | loss:0.002137867035344243 lr:8.784975278258783e-05 tokens_per_second_per_gpu:6817.5861010485
77
+ Step 77 | loss:0.002842382062226534 lr:8.73899045249266e-05 tokens_per_second_per_gpu:6785.7430365621685
78
+ Step 78 | loss:0.002142858225852251 lr:8.692276703129421e-05 tokens_per_second_per_gpu:6804.014333258267
79
+ Step 79 | loss:0.0015875640092417598 lr:8.644843137107059e-05 tokens_per_second_per_gpu:6724.91936376274
80
+ Step 80 | loss:0.0018124596681445837 lr:8.596699001693255e-05 tokens_per_second_per_gpu:6846.558579000179
81
+ Step 81 | loss:0.00355800474062562 lr:8.547853682682604e-05 tokens_per_second_per_gpu:7075.9772933281865
82
+ Step 82 | loss:0.0021482682786881924 lr:8.498316702566828e-05 tokens_per_second_per_gpu:6868.955295302446
83
+ Step 83 | loss:0.0021761327516287565 lr:8.44809771867835e-05 tokens_per_second_per_gpu:6751.233050487658
84
+ Step 84 | loss:0.001975941937416792 lr:8.397206521307584e-05 tokens_per_second_per_gpu:6761.111402996535
85
+ Step 85 | loss:0.0025848988443613052 lr:8.345653031794292e-05 tokens_per_second_per_gpu:6802.3682461658045
86
+ Step 86 | loss:0.004574115853756666 lr:8.293447300593402e-05 tokens_per_second_per_gpu:7113.086878800607
87
+ Step 87 | loss:0.007184212561696768 lr:8.240599505315655e-05 tokens_per_second_per_gpu:7069.960020819823
88
+ Step 88 | loss:0.002063318155705929 lr:8.18711994874345e-05 tokens_per_second_per_gpu:6721.899285472525
89
+ Step 89 | loss:0.0022942281793802977 lr:8.133019056822304e-05 tokens_per_second_per_gpu:6872.463710717629
90
+ Step 90 | loss:0.005228151101619005 lr:8.07830737662829e-05 tokens_per_second_per_gpu:7091.47543806555
91
+ Step 91 | loss:0.002778491470962763 lr:8.022995574311876e-05 tokens_per_second_per_gpu:6918.719198741909
92
+ Step 92 | loss:0.00034355948446318507 lr:7.967094433018508e-05 tokens_per_second_per_gpu:6767.886403882791
93
+ Step 93 | loss:0.0017243159236386418 lr:7.910614850786448e-05 tokens_per_second_per_gpu:6924.718945423088
94
+ Step 94 | loss:0.0015514715341851115 lr:7.85356783842216e-05 tokens_per_second_per_gpu:6741.568062307173
95
+ Step 95 | loss:0.00177426275331527 lr:7.795964517353735e-05 tokens_per_second_per_gpu:6826.294128026231
96
+ Step 96 | loss:0.0010320349829271436 lr:7.737816117462752e-05 tokens_per_second_per_gpu:6790.442234767555
97
+ Step 97 | loss:0.002792761428281665 lr:7.679133974894983e-05 tokens_per_second_per_gpu:6793.460903280145
98
+ Step 98 | loss:0.0012509945081546903 lr:7.619929529850397e-05 tokens_per_second_per_gpu:6748.048046446525
99
+ Step 99 | loss:0.0003892534878104925 lr:7.560214324352858e-05 tokens_per_second_per_gpu:6856.93868103825
100
+ Step 100 | loss:0.001663127215579152 lr:7.500000000000001e-05 tokens_per_second_per_gpu:6936.128233988353
101
+ Step 101 | loss:0.0029043490067124367 lr:7.439298295693665e-05 tokens_per_second_per_gpu:6888.205778313194
102
+ Step 102 | loss:0.0008506955346092582 lr:7.378121045351378e-05 tokens_per_second_per_gpu:6781.707978527126
103
+ Step 103 | loss:0.001238394994288683 lr:7.316480175599309e-05 tokens_per_second_per_gpu:6791.616479659178
104
+ Step 104 | loss:0.0013181371614336967 lr:7.254387703447154e-05 tokens_per_second_per_gpu:6771.346624293808
105
+ Step 105 | loss:0.0016408805968239903 lr:7.191855733945387e-05 tokens_per_second_per_gpu:6954.431686441531
106
+ Step 106 | loss:0.0009435561369173229 lr:7.128896457825364e-05 tokens_per_second_per_gpu:6755.779447471709
107
+ Step 107 | loss:0.0022064659278839827 lr:7.06552214912271e-05 tokens_per_second_per_gpu:6887.610212397179
108
+ Step 108 | loss:0.00030679383780807257 lr:7.001745162784477e-05 tokens_per_second_per_gpu:6949.994220774615
109
+ Step 109 | loss:0.0016244077123701572 lr:6.937577932260515e-05 tokens_per_second_per_gpu:6790.821867412899
110
+ Step 110 | loss:0.0016033818246796727 lr:6.873032967079561e-05 tokens_per_second_per_gpu:6756.3403573268215
111
+ Step 111 | loss:0.0005207827198319137 lr:6.808122850410461e-05 tokens_per_second_per_gpu:6799.655995077981
112
+ Step 112 | loss:0.0052481950260698795 lr:6.742860236609077e-05 tokens_per_second_per_gpu:7087.606787117775
113
+ Step 113 | loss:0.006328810937702656 lr:6.677257848751277e-05 tokens_per_second_per_gpu:6934.034005304637
114
+ Step 114 | loss:0.0011291435221210122 lr:6.611328476152557e-05 tokens_per_second_per_gpu:6871.8333951491195
115
+ Step 115 | loss:0.00048354084719903767 lr:6.545084971874738e-05 tokens_per_second_per_gpu:6780.266863755895
116
+ Step 116 | loss:0.00043865066254511476 lr:6.478540250220234e-05 tokens_per_second_per_gpu:6915.482403663006
117
+ Step 117 | loss:0.0021259665954858065 lr:6.411707284214384e-05 tokens_per_second_per_gpu:6816.025366953438
118
+ Step 118 | loss:0.008526140823960304 lr:6.344599103076329e-05 tokens_per_second_per_gpu:7075.479872722786
119
+ Step 119 | loss:0.0015032440423965454 lr:6.277228789678953e-05 tokens_per_second_per_gpu:6793.758984555935
120
+ Step 120 | loss:0.003990808967500925 lr:6.209609477998338e-05 tokens_per_second_per_gpu:7047.216593349506
121
+ Step 121 | loss:0.004816874396055937 lr:6.141754350553279e-05 tokens_per_second_per_gpu:6818.7207761915215
122
+ Step 122 | loss:0.00147228071000427 lr:6.073676635835317e-05 tokens_per_second_per_gpu:6814.826293721152
123
+ Step 123 | loss:0.0006123994244262576 lr:6.005389605729824e-05 tokens_per_second_per_gpu:6846.409181850501
124
+ Step 124 | loss:0.00288543407805264 lr:5.9369065729286245e-05 tokens_per_second_per_gpu:6908.781832888838
125
+ Step 125 | loss:0.0011689026141539216 lr:5.868240888334653e-05 tokens_per_second_per_gpu:7073.299779415434
126
+ Step 126 | loss:0.00034374496317468584 lr:5.799405938459175e-05 tokens_per_second_per_gpu:6807.663781246732
127
+ Step 127 | loss:0.00042975673568435013 lr:5.730415142812059e-05 tokens_per_second_per_gpu:6809.353222422066
128
+ Step 128 | loss:0.005754610989242792 lr:5.661281951285613e-05 tokens_per_second_per_gpu:7068.458683328511
129
+ Step 129 | loss:0.0002537728287279606 lr:5.5920198415325064e-05 tokens_per_second_per_gpu:6754.827923560454
130
+ Step 130 | loss:0.0005576722323894501 lr:5.522642316338268e-05 tokens_per_second_per_gpu:6788.842293043924
131
+ Step 131 | loss:0.004440902732312679 lr:5.453162900988902e-05 tokens_per_second_per_gpu:7048.482917691646
132
+ Step 132 | loss:0.0004139615339227021 lr:5.383595140634093e-05 tokens_per_second_per_gpu:6942.025881187428
133
+ Step 133 | loss:0.0024621011689305305 lr:5.313952597646568e-05 tokens_per_second_per_gpu:7043.749870494549
134
+ Step 134 | loss:0.0013672587228938937 lr:5.244248848978067e-05 tokens_per_second_per_gpu:6929.812635857822
135
+ Step 135 | loss:0.0027264454402029514 lr:5.174497483512506e-05 tokens_per_second_per_gpu:6835.052810061578
136
+ Step 136 | loss:0.0012849484337493777 lr:5.104712099416785e-05 tokens_per_second_per_gpu:7064.693295972902
137
+ Step 137 | loss:0.0008558609406463802 lr:5.034906301489808e-05 tokens_per_second_per_gpu:6747.508059166015
138
+ Step 138 | loss:0.00029380517662502825 lr:4.965093698510193e-05 tokens_per_second_per_gpu:6754.683125182554
139
+ Step 139 | loss:0.0007402076153084636 lr:4.895287900583216e-05 tokens_per_second_per_gpu:6845.057294076618
140
+ Step 140 | loss:0.0002960671845357865 lr:4.825502516487497e-05 tokens_per_second_per_gpu:6748.741245167262
141
+ Step 141 | loss:0.000997895491309464 lr:4.755751151021934e-05 tokens_per_second_per_gpu:6942.016348126688
142
+ Step 142 | loss:0.0009276614873670042 lr:4.6860474023534335e-05 tokens_per_second_per_gpu:6767.839635599327
143
+ Step 143 | loss:0.0004551708116196096 lr:4.616404859365907e-05 tokens_per_second_per_gpu:6888.172090585795
144
+ Step 144 | loss:0.0005538578261621296 lr:4.5468370990111006e-05 tokens_per_second_per_gpu:6768.255774804881
145
+ Step 145 | loss:0.0009637348703108728 lr:4.477357683661734e-05 tokens_per_second_per_gpu:6818.526926226486
146
+ Step 146 | loss:0.0027866477612406015 lr:4.407980158467495e-05 tokens_per_second_per_gpu:7056.942774868622
147
+ Step 147 | loss:0.002221394097432494 lr:4.3387180487143876e-05 tokens_per_second_per_gpu:6808.126183156505
148
+ Step 148 | loss:0.0017781126080080867 lr:4.269584857187943e-05 tokens_per_second_per_gpu:7079.1447666266
149
+ Step 149 | loss:0.002728288760408759 lr:4.2005940615408264e-05 tokens_per_second_per_gpu:7061.440566873951
150
+ Step 150 | loss:0.00043648516293615103 lr:4.131759111665349e-05 tokens_per_second_per_gpu:6838.369778287981
151
+ Step 151 | loss:0.005630104336887598 lr:4.063093427071376e-05 tokens_per_second_per_gpu:7121.344205876374
152
+ Step 152 | loss:0.001904865843243897 lr:3.9946103942701777e-05 tokens_per_second_per_gpu:6800.521178331733
153
+ Step 153 | loss:0.0029734631534665823 lr:3.926323364164684e-05 tokens_per_second_per_gpu:6822.838847173219
154
+ Step 154 | loss:0.0009773087222129107 lr:3.858245649446721e-05 tokens_per_second_per_gpu:6759.756627635941
155
+ Step 155 | loss:0.00153159792535007 lr:3.790390522001662e-05 tokens_per_second_per_gpu:7051.6791390615945
156
+ Step 156 | loss:0.0017198395216837525 lr:3.7227712103210486e-05 tokens_per_second_per_gpu:7066.022989144581
157
+ Step 157 | loss:0.0016727091278880835 lr:3.655400896923672e-05 tokens_per_second_per_gpu:6811.702307534606
158
+ Step 158 | loss:0.003214507596567273 lr:3.588292715785617e-05 tokens_per_second_per_gpu:6922.819964740895
159
+ Step 159 | loss:0.0010125575354322791 lr:3.5214597497797684e-05 tokens_per_second_per_gpu:7060.987353340303
160
+ Step 160 | loss:0.0010515168542042375 lr:3.4549150281252636e-05 tokens_per_second_per_gpu:6804.303949781229
161
+ Step 161 | loss:0.001024384517222643 lr:3.388671523847445e-05 tokens_per_second_per_gpu:6841.669938396447
162
+ Step 162 | loss:0.0018848608015105128 lr:3.322742151248725e-05 tokens_per_second_per_gpu:6751.846793117959
163
+ Step 163 | loss:0.0013347417116165161 lr:3.257139763390925e-05 tokens_per_second_per_gpu:7027.891719500398
164
+ Step 164 | loss:0.0006750918691977859 lr:3.1918771495895396e-05 tokens_per_second_per_gpu:6785.860509976884
165
+ Step 165 | loss:0.0011055140057578683 lr:3.12696703292044e-05 tokens_per_second_per_gpu:7077.9940436212155
166
+ Step 166 | loss:0.0007286557811312377 lr:3.062422067739485e-05 tokens_per_second_per_gpu:7043.7316350507
167
+ Step 167 | loss:0.0037401369772851467 lr:2.9982548372155263e-05 tokens_per_second_per_gpu:7083.24516489987
168
+ Step 168 | loss:0.0010677126701921225 lr:2.934477850877292e-05 tokens_per_second_per_gpu:6932.174790116868
169
+ Step 169 | loss:0.00019011566473636776 lr:2.8711035421746367e-05 tokens_per_second_per_gpu:6749.252022014583
170
+ Step 170 | loss:0.00033960933797061443 lr:2.8081442660546125e-05 tokens_per_second_per_gpu:6863.493168543277
171
+ Step 171 | loss:0.0015284103574231267 lr:2.7456122965528475e-05 tokens_per_second_per_gpu:6828.519459914139
172
+ Step 172 | loss:0.0036087161861360073 lr:2.6835198244006927e-05 tokens_per_second_per_gpu:7082.15377515148
173
+ Step 173 | loss:0.0005151398945599794 lr:2.6218789546486234e-05 tokens_per_second_per_gpu:6752.773445394205
174
+ Step 174 | loss:0.00032851702417247 lr:2.560701704306336e-05 tokens_per_second_per_gpu:6789.504094228726
175
+ Step 175 | loss:0.001663737464696169 lr:2.500000000000001e-05 tokens_per_second_per_gpu:6815.591288989559
176
+ Step 176 | loss:0.0011574072996154428 lr:2.4397856756471432e-05 tokens_per_second_per_gpu:6828.92087090997
177
+ Step 177 | loss:0.0018865606980398297 lr:2.3800704701496053e-05 tokens_per_second_per_gpu:7081.598663947833
178
+ Step 178 | loss:0.0011174243409186602 lr:2.3208660251050158e-05 tokens_per_second_per_gpu:7082.039230725635
179
+ Step 179 | loss:0.00067571533145383 lr:2.2621838825372493e-05 tokens_per_second_per_gpu:6798.832947386817
180
+ Step 180 | loss:0.00047556660138070583 lr:2.2040354826462668e-05 tokens_per_second_per_gpu:6924.668744916678
181
+ Step 181 | loss:0.00037496539880521595 lr:2.1464321615778422e-05 tokens_per_second_per_gpu:6800.830937853566
182
+ Step 182 | loss:0.0004152611654717475 lr:2.0893851492135537e-05 tokens_per_second_per_gpu:6947.978079695834
183
+ Step 183 | loss:0.0009621500503271818 lr:2.0329055669814934e-05 tokens_per_second_per_gpu:6783.273996315187
184
+ Step 184 | loss:0.0012321381364017725 lr:1.977004425688126e-05 tokens_per_second_per_gpu:6811.429980135828
185
+ Step 185 | loss:0.00025866113719530404 lr:1.9216926233717085e-05 tokens_per_second_per_gpu:6783.257337206322
186
+ Step 186 | loss:0.002340645994991064 lr:1.866980943177699e-05 tokens_per_second_per_gpu:7067.855191085557
187
+ Step 187 | loss:0.0013471072306856513 lr:1.8128800512565513e-05 tokens_per_second_per_gpu:7093.102910885423
188
+ Step 188 | loss:0.0012069465592503548 lr:1.7594004946843456e-05 tokens_per_second_per_gpu:6876.0927182988735
189
+ Step 189 | loss:0.001518066506832838 lr:1.7065526994065973e-05 tokens_per_second_per_gpu:6902.493203679121
190
+ Step 190 | loss:0.0015219164779409766 lr:1.6543469682057106e-05 tokens_per_second_per_gpu:6756.480865419263
191
+ Step 191 | loss:0.0019189255544915795 lr:1.602793478692419e-05 tokens_per_second_per_gpu:7088.821810042651
192
+ Step 192 | loss:0.0005621357704512775 lr:1.551902281321651e-05 tokens_per_second_per_gpu:6830.200919745289
193
+ Step 193 | loss:0.001488498062826693 lr:1.5016832974331724e-05 tokens_per_second_per_gpu:6753.454997326393
194
+ Step 194 | loss:0.0016284087905660272 lr:1.4521463173173965e-05 tokens_per_second_per_gpu:6869.125853864198
195
+ Step 195 | loss:0.002182665513828397 lr:1.4033009983067452e-05 tokens_per_second_per_gpu:6832.429235304975
196
+ Step 196 | loss:0.0011356642935425043 lr:1.3551568628929434e-05 tokens_per_second_per_gpu:6842.570545775131
197
+ Step 197 | loss:0.0014715568395331502 lr:1.3077232968705805e-05 tokens_per_second_per_gpu:7079.698799487813
198
+ Step 198 | loss:0.0011941406410187483 lr:1.2610095475073414e-05 tokens_per_second_per_gpu:6788.546335926008
199
+ Step 199 | loss:0.002553859492763877 lr:1.2150247217412186e-05 tokens_per_second_per_gpu:7072.1766037679445
200
+ Step 200 | loss:0.002418858464807272 lr:1.1697777844051105e-05 tokens_per_second_per_gpu:7070.26639598808
201
+ Step 201 | loss:0.00022807034838479012 lr:1.1252775564791024e-05 tokens_per_second_per_gpu:6755.340153978266
202
+ Step 202 | loss:0.0022267724853008986 lr:1.0815327133708015e-05 tokens_per_second_per_gpu:7076.30069714757
203
+ Step 203 | loss:0.0009444208699278533 lr:1.0385517832240471e-05 tokens_per_second_per_gpu:6897.970980091377
204
+ Step 204 | loss:0.0031882880721241236 lr:9.963431452563332e-06 tokens_per_second_per_gpu:7073.835945134803
205
+ Step 205 | loss:0.000676371215377003 lr:9.549150281252633e-06 tokens_per_second_per_gpu:7080.9184577339265
206
+ Step 206 | loss:0.0008925383444875479 lr:9.142755083243576e-06 tokens_per_second_per_gpu:6930.198401475367
207
+ Step 207 | loss:0.002081420738250017 lr:8.744325086085248e-06 tokens_per_second_per_gpu:6957.562423264895
208
+ Step 208 | loss:0.00026374580920673907 lr:8.353937964495029e-06 tokens_per_second_per_gpu:6745.388585455252
209
+ Step 209 | loss:0.001403855043463409 lr:7.971669825215788e-06 tokens_per_second_per_gpu:6765.415505030002
210
+ Step 210 | loss:0.0006239673239178956 lr:7.597595192178702e-06 tokens_per_second_per_gpu:6754.522948906783
211
+ Step 211 | loss:0.0012805211590602994 lr:7.2317869919746705e-06 tokens_per_second_per_gpu:6908.773129201858
212
+ Step 212 | loss:0.0016043313080444932 lr:6.874316539637127e-06 tokens_per_second_per_gpu:6819.215629540058
213
+ Step 213 | loss:0.00026408283156342804 lr:6.52525352473905e-06 tokens_per_second_per_gpu:6856.267463665356
214
+ Step 214 | loss:0.0010434025898575783 lr:6.184665997806832e-06 tokens_per_second_per_gpu:6752.933544010319
215
+ Step 215 | loss:0.0016748131019994617 lr:5.852620357053651e-06 tokens_per_second_per_gpu:7069.560336048503
216
+ Step 216 | loss:0.0002644062624312937 lr:5.529181335435124e-06 tokens_per_second_per_gpu:6783.721789910482
217
+ Step 217 | loss:0.0012606634991243482 lr:5.214411988029355e-06 tokens_per_second_per_gpu:6788.511260014302
218
+ Step 218 | loss:0.0011555441888049245 lr:4.908373679744316e-06 tokens_per_second_per_gpu:6812.071941439878
219
+ Step 219 | loss:0.0010011408012360334 lr:4.611126073354571e-06 tokens_per_second_per_gpu:6790.057154106953
220
+ Step 220 | loss:0.001161118852905929 lr:4.322727117869951e-06 tokens_per_second_per_gpu:6838.034017608517
221
+ Step 221 | loss:0.0008470311295241117 lr:4.043233037238281e-06 tokens_per_second_per_gpu:6739.783261886782
222
+ Step 222 | loss:0.0003358850080985576 lr:3.772698319384349e-06 tokens_per_second_per_gpu:6810.188296253875
223
+ Step 223 | loss:0.0017017056234180927 lr:3.511175705587433e-06 tokens_per_second_per_gpu:7134.196991419764
224
+ Step 224 | loss:0.00045213307021185756 lr:3.258716180199278e-06 tokens_per_second_per_gpu:6820.160351750362
225
+ Step 225 | loss:0.0012671389849856496 lr:3.0153689607045845e-06 tokens_per_second_per_gpu:6791.659265496022
226
+ Step 226 | loss:0.0012812168570235372 lr:2.7811814881259503e-06 tokens_per_second_per_gpu:6810.583773959078
227
+ Step 227 | loss:0.0007159471279010177 lr:2.5561994177751737e-06 tokens_per_second_per_gpu:6743.998299794295
228
+ Step 228 | loss:0.0021349831949919462 lr:2.340466610352654e-06 tokens_per_second_per_gpu:7066.600262207837
229
+ Step 229 | loss:0.00030012783827260137 lr:2.134025123396638e-06 tokens_per_second_per_gpu:6814.5203125288635
230
+ Step 230 | loss:0.00024607600062154233 lr:1.9369152030840556e-06 tokens_per_second_per_gpu:6785.876556055032
231
+ Step 231 | loss:0.0006418603588826954 lr:1.7491752763844293e-06 tokens_per_second_per_gpu:7075.07446026204
232
+ Step 232 | loss:0.00033673577127046883 lr:1.5708419435684462e-06 tokens_per_second_per_gpu:6945.267938724145
233
+ Step 233 | loss:0.00025602109963074327 lr:1.4019499710726913e-06 tokens_per_second_per_gpu:6790.554445807879
234
+ Step 234 | loss:0.00047656253445893526 lr:1.2425322847218368e-06 tokens_per_second_per_gpu:6771.395451788634
235
+ Step 235 | loss:0.00046232191380113363 lr:1.0926199633097157e-06 tokens_per_second_per_gpu:6758.807390887128
236
+ Step 236 | loss:0.0018393310019746423 lr:9.522422325404235e-07 tokens_per_second_per_gpu:6817.929599404819
237
+ Step 237 | loss:0.0011434406042099 lr:8.214264593307098e-07 tokens_per_second_per_gpu:6823.818443994626
238
+ Step 238 | loss:0.0012595762964338064 lr:7.001981464747565e-07 tokens_per_second_per_gpu:6804.744850083202
239
+ Step 239 | loss:0.0027921022847294807 lr:5.885809276723608e-07 tokens_per_second_per_gpu:7088.774800407417
240
+ Step 240 | loss:0.0010998341022059321 lr:4.865965629214819e-07 tokens_per_second_per_gpu:6783.880260917502
241
+ Step 241 | loss:0.0020137943793088198 lr:3.9426493427611177e-07 tokens_per_second_per_gpu:7076.471847159209
242
+ Step 242 | loss:0.0006402755388990045 lr:3.1160404197018154e-07 tokens_per_second_per_gpu:7070.256800749601
243
+ Step 243 | loss:0.0020476654171943665 lr:2.386300009084408e-07 tokens_per_second_per_gpu:6931.143869152546
244
+ Step 244 | loss:0.0016461233608424664 lr:1.753570375247815e-07 tokens_per_second_per_gpu:6873.38200128151
245
+ Step 245 | loss:0.0015378650277853012 lr:1.2179748700879012e-07 tokens_per_second_per_gpu:6931.3903853247375
246
+ Step 246 | loss:0.0012600197223946452 lr:7.796179090094891e-08 tokens_per_second_per_gpu:6836.136965216229
247
+ Step 247 | loss:0.0010782753815874457 lr:4.385849505708084e-08 tokens_per_second_per_gpu:6771.003891387363
248
+ Step 248 | loss:0.0006726465653628111 lr:1.949424798228239e-08 tokens_per_second_per_gpu:7068.659410114673
249
+ Step 249 | loss:0.0013102944940328598 lr:4.873799534788059e-09 tokens_per_second_per_gpu:7036.706840644525
250
+ Step 250 | loss:0.0007836272125132382 lr:0.0 tokens_per_second_per_gpu:6834.360445101376
009d5c81/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
009d5c81/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 0 0 0 0 0 0 0 0]\n [0 0 0 3 3 0 3 3 3 0 0 0 0 0]\n [0 0 0 0 3 3 0 3 0 0 0 0 0 0]\n [0 0 3 3 0 0 3 0 0 0 0 0 0 0]\n [0 0 3 0 3 3 0 0 0 0 0 0 0 0]\n [0 0 0 3 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 6 0 6 0 0]\n [0 0 0 0 0 0 0 0 0 0 6 0 0 0]\n [0 0 0 0 0 0 0 0 0 6 6 6 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 8 0 0 0 0 0 0 0 0]\n [0 0 0 8 8 0 8 8 8 0 0 0 0 0]\n [0 0 0 0 8 8 0 8 0 0 0 0 0 0]\n [0 0 8 8 0 0 8 0 0 0 0 0 0 0]\n [0 0 8 0 8 8 0 0 0 0 0 0 0 0]\n [0 0 0 8 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n[[0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 3 3 0 3 3 0 0 0 0 0 0 0]\n [0 0 0 3 3 3 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 3 3 3 0 0 0 0 0]\n [0 0 3 3 0 0 3 0 3 3 0 0 0 0]\n [0 0 0 3 3 3 3 0 0 0 0 0 0 0]\n [0 0 0 3 0 3 0 0 0 0 0 0 0 0]\n [0 3 3 3 0 3 3 3 0 0 0 0 0 0]\n [0 3 0 0 0 0 0 3 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 6 0 0]\n [0 0 0 0 0 0 0 0 0 0 6 6 6 0]\n [0 0 0 0 0 0 0 0 0 0 0 6 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 9 9 0 9 9 0 0 0 0 0 0 0]\n [0 0 0 9 9 9 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 9 9 9 9 0 0 0 0 0]\n [0 0 9 9 0 0 9 0 9 9 0 0 0 0]\n [0 0 0 9 9 9 9 0 0 0 0 0 0 0]\n [0 0 0 9 0 9 0 0 0 0 0 0 0 0]\n [0 9 9 9 0 9 9 9 0 0 0 0 0 0]\n [0 9 0 0 0 0 0 9 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n[[0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 3 3 0 3 0 3 3 0 0 0 0]\n [0 0 0 3 0 3 0 3 0 3 0 0 0 0]\n [0 0 0 3 3 0 3 0 3 3 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 6 0 6 0 0 0]\n [0 0 0 0 0 0 0 0 0 6 0 0 0 0]\n [0 0 0 0 0 0 0 0 6 6 6 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 8 8 0 8 0 8 8 0 0 0 0]\n [0 0 0 8 0 8 0 8 0 8 0 0 0 0]\n [0 0 0 8 8 0 8 0 8 8 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n[[0 0 0 0 0 3 3 0 0 0 0 0 0 0]\n [0 0 0 3 3 3 0 0 0 0 0 0 0 0]\n [0 3 0 3 0 0 0 0 0 0 0 0 0 0]\n [0 3 3 3 0 3 3 3 3 0 0 0 0 0]\n [3 3 0 3 3 3 0 0 3 3 0 0 0 0]\n [0 3 0 0 0 3 3 0 0 0 0 0 0 0]\n [0 3 3 0 0 0 3 3 0 0 0 0 0 0]\n [0 0 3 3 3 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 6 6 6 0 0 0 0]\n [0 0 0 0 0 0 0 6 0 6 0 0 0 0]\n [0 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 5 5 0 0 0 0 0 0 0]\n [0 0 0 5 5 5 0 0 0 0 0 0 0 0]\n [0 5 0 5 0 0 0 0 0 0 0 0 0 0]\n [0 5 5 5 0 5 5 5 5 0 0 0 0 0]\n [5 5 0 5 5 5 0 0 5 5 0 0 0 0]\n [0 5 0 0 0 5 5 0 0 0 0 0 0 0]\n [0 5 5 0 0 0 5 5 0 0 0 0 0 0]\n [0 0 5 5 5 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 0 0 0 0 3 0 0 0 3 0 0 0]\n [0 0 0 0 3 0 3 0 3 0 3 0 3 0]\n [0 0 0 0 3 3 3 3 3 3 3 3 3 0]\n [0 0 0 0 3 0 3 0 3 0 3 0 3 0]\n [0 0 0 0 3 0 0 0 3 0 0 0 3 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 6 6 6 0 0 0 0 0 0 0]\n [0 0 0 0 0 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> "}], "output": {"role": "assistant", "content": "[[0 0 0 0 0 0 9 0 0 0 9 0 0 0]\n [0 0 0 0 9 0 9 0 9 0 9 0 9 0]\n [0 0 0 0 9 9 9 9 9 9 9 9 9 0]\n [0 0 0 0 9 0 9 0 9 0 9 0 9 0]\n [0 0 0 0 9 0 0 0 9 0 0 0 9 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 4129}
00dbd492/adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"base_model_name_or_path": "/raid/lingo/akyurek/git/agi/checkpoints/pretrained/8B_grids_no_lm_generated_tasks/4k/", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
00dbd492/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f27c8f3e53b3fe8a2c1b07344473faa831ac4d423b1d792d7203f59aa81b4aa6
3
+ size 596042770
00dbd492/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.0.dev0", "use_cache": true, "vocab_size": 128256}
00dbd492/log_1730923997.txt ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:0.013372980058193207 lr:4.000000000000001e-06 tokens_per_second_per_gpu:3874.8165404626934
2
+ Step 2 | loss:0.008318759500980377 lr:8.000000000000001e-06 tokens_per_second_per_gpu:3994.9100071089683
3
+ Step 3 | loss:0.020588943734765053 lr:1.2e-05 tokens_per_second_per_gpu:2622.9347670806737
4
+ Step 4 | loss:0.015207692980766296 lr:1.6000000000000003e-05 tokens_per_second_per_gpu:7117.680403972301
5
+ Step 5 | loss:0.015772100538015366 lr:2e-05 tokens_per_second_per_gpu:2841.3650484667232
6
+ Step 6 | loss:0.014235018752515316 lr:2.4e-05 tokens_per_second_per_gpu:7144.18862882571
7
+ Step 7 | loss:0.04776996001601219 lr:2.8000000000000003e-05 tokens_per_second_per_gpu:6944.746126975525
8
+ Step 8 | loss:0.01470720861107111 lr:3.2000000000000005e-05 tokens_per_second_per_gpu:7099.559212838606
9
+ Step 9 | loss:0.011193573474884033 lr:3.6e-05 tokens_per_second_per_gpu:7085.40196576822
10
+ Step 10 | loss:0.010021017864346504 lr:4e-05 tokens_per_second_per_gpu:7123.146230649066
11
+ Step 11 | loss:0.01647418551146984 lr:4.4000000000000006e-05 tokens_per_second_per_gpu:7026.652050826819
12
+ Step 12 | loss:0.011999926529824734 lr:4.8e-05 tokens_per_second_per_gpu:7097.950716141434
13
+ Step 13 | loss:0.022550508379936218 lr:5.2000000000000004e-05 tokens_per_second_per_gpu:5038.530118848792
14
+ Step 14 | loss:0.01995103433728218 lr:5.6000000000000006e-05 tokens_per_second_per_gpu:4248.733461591392
15
+ Step 15 | loss:0.006620384287089109 lr:6e-05 tokens_per_second_per_gpu:7131.65967860037
16
+ Step 16 | loss:0.00740306731313467 lr:6.400000000000001e-05 tokens_per_second_per_gpu:7127.628243449412
17
+ Step 17 | loss:0.0060096438974142075 lr:6.800000000000001e-05 tokens_per_second_per_gpu:4974.283310307197
18
+ Step 18 | loss:0.006891552358865738 lr:7.2e-05 tokens_per_second_per_gpu:7093.440515229761
19
+ Step 19 | loss:0.03327202796936035 lr:7.6e-05 tokens_per_second_per_gpu:3179.4929076391018
20
+ Step 20 | loss:0.009643638506531715 lr:8e-05 tokens_per_second_per_gpu:3810.467892928971
21
+ Step 21 | loss:0.005601323209702969 lr:8.4e-05 tokens_per_second_per_gpu:3646.923119975776
22
+ Step 22 | loss:0.007782038766890764 lr:8.800000000000001e-05 tokens_per_second_per_gpu:7135.31195854449
23
+ Step 23 | loss:0.00458392733708024 lr:9.200000000000001e-05 tokens_per_second_per_gpu:7043.86940911673
24
+ Step 24 | loss:0.004309103358536959 lr:9.6e-05 tokens_per_second_per_gpu:4565.439191471691
25
+ Step 25 | loss:0.01496342197060585 lr:0.0001 tokens_per_second_per_gpu:7025.824794922582
26
+ Step 26 | loss:0.004186063073575497 lr:9.999512620046522e-05 tokens_per_second_per_gpu:7152.228127227936
27
+ Step 27 | loss:0.0035033023450523615 lr:9.998050575201771e-05 tokens_per_second_per_gpu:6816.616531689177
28
+ Step 28 | loss:0.00774459820240736 lr:9.995614150494293e-05 tokens_per_second_per_gpu:6955.943011111193
29
+ Step 29 | loss:0.0031369992066174746 lr:9.992203820909906e-05 tokens_per_second_per_gpu:6952.491947617002
30
+ Step 30 | loss:0.0030230407137423754 lr:9.987820251299122e-05 tokens_per_second_per_gpu:6775.238885333704
31
+ Step 31 | loss:0.003314398694783449 lr:9.982464296247522e-05 tokens_per_second_per_gpu:7106.728739581611
32
+ Step 32 | loss:0.001633325358852744 lr:9.976136999909156e-05 tokens_per_second_per_gpu:3764.4525024505992
33
+ Step 33 | loss:0.005330185871571302 lr:9.968839595802982e-05 tokens_per_second_per_gpu:3603.47294425594
34
+ Step 34 | loss:0.0014852150343358517 lr:9.96057350657239e-05 tokens_per_second_per_gpu:7117.485152356746
35
+ Step 35 | loss:0.003130739787593484 lr:9.951340343707852e-05 tokens_per_second_per_gpu:6990.789773012925
36
+ Step 36 | loss:0.004151569213718176 lr:9.941141907232765e-05 tokens_per_second_per_gpu:7033.465646617793
37
+ Step 37 | loss:0.013509933836758137 lr:9.929980185352526e-05 tokens_per_second_per_gpu:7094.785297432972
38
+ Step 38 | loss:0.001217296696268022 lr:9.917857354066931e-05 tokens_per_second_per_gpu:6891.074270619146
39
+ Step 39 | loss:0.018984084948897362 lr:9.904775776745958e-05 tokens_per_second_per_gpu:6937.616604164713
40
+ Step 40 | loss:0.0024438444525003433 lr:9.890738003669029e-05 tokens_per_second_per_gpu:7102.9656227885835
41
+ Step 41 | loss:0.004817514214664698 lr:9.875746771527816e-05 tokens_per_second_per_gpu:6932.069229790838
42
+ Step 42 | loss:0.00136182748246938 lr:9.859805002892732e-05 tokens_per_second_per_gpu:7147.055721597306
43
+ Step 43 | loss:0.001384297152981162 lr:9.842915805643155e-05 tokens_per_second_per_gpu:6985.959203114564
44
+ Step 44 | loss:0.002320047002285719 lr:9.825082472361557e-05 tokens_per_second_per_gpu:7128.594394261389
45
+ Step 45 | loss:0.0039440966211259365 lr:9.806308479691595e-05 tokens_per_second_per_gpu:7092.24097995752
46
+ Step 46 | loss:0.002282568486407399 lr:9.786597487660337e-05 tokens_per_second_per_gpu:7099.248524495624
47
+ Step 47 | loss:0.007890361361205578 lr:9.765953338964735e-05 tokens_per_second_per_gpu:7125.180928719208
48
+ Step 48 | loss:0.0063784425146877766 lr:9.744380058222483e-05 tokens_per_second_per_gpu:3185.985379749494
49
+ Step 49 | loss:0.0014725268119946122 lr:9.721881851187406e-05 tokens_per_second_per_gpu:2882.7688858859356
50
+ Step 50 | loss:0.0023472707252949476 lr:9.698463103929542e-05 tokens_per_second_per_gpu:7020.361247128179
51
+ Step 51 | loss:0.005551986861974001 lr:9.674128381980072e-05 tokens_per_second_per_gpu:7138.453096495373
52
+ Step 52 | loss:0.0020946969743818045 lr:9.648882429441257e-05 tokens_per_second_per_gpu:7095.370745145354
53
+ Step 53 | loss:0.0014590605860576034 lr:9.622730168061567e-05 tokens_per_second_per_gpu:6938.182521117374
54
+ Step 54 | loss:0.0026314163114875555 lr:9.595676696276172e-05 tokens_per_second_per_gpu:7146.813608067687
55
+ Step 55 | loss:0.004251094534993172 lr:9.567727288213005e-05 tokens_per_second_per_gpu:7020.545527205833
56
+ Step 56 | loss:0.002550149569287896 lr:9.538887392664544e-05 tokens_per_second_per_gpu:6238.048724097262
57
+ Step 57 | loss:0.0020003155805170536 lr:9.50916263202557e-05 tokens_per_second_per_gpu:6745.993080512584
58
+ Step 58 | loss:0.010202882811427116 lr:9.478558801197065e-05 tokens_per_second_per_gpu:1990.4750697143486
59
+ Step 59 | loss:0.005923750344663858 lr:9.447081866456489e-05 tokens_per_second_per_gpu:6820.061144228702
60
+ Step 60 | loss:0.0033114587422460318 lr:9.414737964294636e-05 tokens_per_second_per_gpu:7085.365520634816
61
+ Step 61 | loss:0.002277028514072299 lr:9.381533400219318e-05 tokens_per_second_per_gpu:6987.3647924218085
62
+ Step 62 | loss:0.0060898070223629475 lr:9.347474647526095e-05 tokens_per_second_per_gpu:6994.6666444410985
63
+ Step 63 | loss:0.0019283033907413483 lr:9.312568346036288e-05 tokens_per_second_per_gpu:6814.584929042839
64
+ Step 64 | loss:0.0009065241320058703 lr:9.276821300802534e-05 tokens_per_second_per_gpu:7062.157178065032
65
+ Step 65 | loss:0.001274686655960977 lr:9.24024048078213e-05 tokens_per_second_per_gpu:7123.374265454482
66
+ Step 66 | loss:0.002168378559872508 lr:9.202833017478422e-05 tokens_per_second_per_gpu:7069.50761988174
67
+ Step 67 | loss:0.0021304618567228317 lr:9.164606203550497e-05 tokens_per_second_per_gpu:7052.259079224
68
+ Step 68 | loss:0.002201642142608762 lr:9.125567491391476e-05 tokens_per_second_per_gpu:7136.391561456483
69
+ Step 69 | loss:0.0009723631665110588 lr:9.085724491675642e-05 tokens_per_second_per_gpu:7064.5492701151925
70
+ Step 70 | loss:0.003060169517993927 lr:9.045084971874738e-05 tokens_per_second_per_gpu:6941.652866473926
71
+ Step 71 | loss:0.0032122512347996235 lr:9.003656854743667e-05 tokens_per_second_per_gpu:7022.946844400017
72
+ Step 72 | loss:0.0020385109819471836 lr:8.961448216775954e-05 tokens_per_second_per_gpu:6927.626839447722
73
+ Step 73 | loss:0.001373852021060884 lr:8.9184672866292e-05 tokens_per_second_per_gpu:7065.016192215358
74
+ Step 74 | loss:0.004395594820380211 lr:8.874722443520899e-05 tokens_per_second_per_gpu:7113.838441720636
75
+ Step 75 | loss:0.0020413161255419254 lr:8.83022221559489e-05 tokens_per_second_per_gpu:7119.761473963676
76
+ Step 76 | loss:0.003172144526615739 lr:8.784975278258783e-05 tokens_per_second_per_gpu:7129.411685133083
77
+ Step 77 | loss:0.0022230115719139576 lr:8.73899045249266e-05 tokens_per_second_per_gpu:7063.9619534628155
78
+ Step 78 | loss:0.0008520846022292972 lr:8.692276703129421e-05 tokens_per_second_per_gpu:7034.131822093049
79
+ Step 79 | loss:0.001710437354631722 lr:8.644843137107059e-05 tokens_per_second_per_gpu:7058.615724528316
80
+ Step 80 | loss:0.0006989283137954772 lr:8.596699001693255e-05 tokens_per_second_per_gpu:6876.49493969523
81
+ Step 81 | loss:0.0011765643721446395 lr:8.547853682682604e-05 tokens_per_second_per_gpu:6893.020499058587
82
+ Step 82 | loss:0.002165679819881916 lr:8.498316702566828e-05 tokens_per_second_per_gpu:7026.9744119362285
83
+ Step 83 | loss:0.001775385346263647 lr:8.44809771867835e-05 tokens_per_second_per_gpu:6819.755051209656
84
+ Step 84 | loss:0.002057864563539624 lr:8.397206521307584e-05 tokens_per_second_per_gpu:7161.343267645968
85
+ Step 85 | loss:0.0026688796933740377 lr:8.345653031794292e-05 tokens_per_second_per_gpu:7056.729456010942
86
+ Step 86 | loss:0.0009387475438416004 lr:8.293447300593402e-05 tokens_per_second_per_gpu:7046.82909670213
87
+ Step 87 | loss:0.0029537666123360395 lr:8.240599505315655e-05 tokens_per_second_per_gpu:7163.705186401793
88
+ Step 88 | loss:0.001403899397701025 lr:8.18711994874345e-05 tokens_per_second_per_gpu:7086.841256326294
89
+ Step 89 | loss:0.0009538537124171853 lr:8.133019056822304e-05 tokens_per_second_per_gpu:6745.587328039051
90
+ Step 90 | loss:0.0026184518355876207 lr:8.07830737662829e-05 tokens_per_second_per_gpu:7151.122838997032
91
+ Step 91 | loss:0.0038881609216332436 lr:8.022995574311876e-05 tokens_per_second_per_gpu:7109.856026332342
92
+ Step 92 | loss:0.0007222425774671137 lr:7.967094433018508e-05 tokens_per_second_per_gpu:7084.979094367599
93
+ Step 93 | loss:0.00215703877620399 lr:7.910614850786448e-05 tokens_per_second_per_gpu:7127.295756871386
94
+ Step 94 | loss:0.00035772137925960124 lr:7.85356783842216e-05 tokens_per_second_per_gpu:6902.882920407662
95
+ Step 95 | loss:0.00417992053553462 lr:7.795964517353735e-05 tokens_per_second_per_gpu:6972.93791991583
96
+ Step 96 | loss:0.001009405474178493 lr:7.737816117462752e-05 tokens_per_second_per_gpu:7052.199205708232
97
+ Step 97 | loss:0.0009588804095983505 lr:7.679133974894983e-05 tokens_per_second_per_gpu:7000.472939006606
98
+ Step 98 | loss:0.0018452832009643316 lr:7.619929529850397e-05 tokens_per_second_per_gpu:7084.619581608012
99
+ Step 99 | loss:0.0035648574121296406 lr:7.560214324352858e-05 tokens_per_second_per_gpu:7087.564819676298
100
+ Step 100 | loss:0.0012323096161708236 lr:7.500000000000001e-05 tokens_per_second_per_gpu:6959.057675476407
101
+ Step 101 | loss:0.0014938520034775138 lr:7.439298295693665e-05 tokens_per_second_per_gpu:7144.913116406752
102
+ Step 102 | loss:0.001955854007974267 lr:7.378121045351378e-05 tokens_per_second_per_gpu:7067.2556906500395
103
+ Step 103 | loss:0.0031611681915819645 lr:7.316480175599309e-05 tokens_per_second_per_gpu:6974.208066732836
104
+ Step 104 | loss:0.00555458152666688 lr:7.254387703447154e-05 tokens_per_second_per_gpu:6906.162536864967
105
+ Step 105 | loss:0.001002943841740489 lr:7.191855733945387e-05 tokens_per_second_per_gpu:6972.908661671081
106
+ Step 106 | loss:0.0017082667909562588 lr:7.128896457825364e-05 tokens_per_second_per_gpu:7005.0246815049695
107
+ Step 107 | loss:0.0013688785256817937 lr:7.06552214912271e-05 tokens_per_second_per_gpu:7103.879138546548
108
+ Step 108 | loss:0.0005387114943005145 lr:7.001745162784477e-05 tokens_per_second_per_gpu:6982.492730096482
109
+ Step 109 | loss:0.0016628771554678679 lr:6.937577932260515e-05 tokens_per_second_per_gpu:7014.960024636566
110
+ Step 110 | loss:0.004161191172897816 lr:6.873032967079561e-05 tokens_per_second_per_gpu:6953.228299164842
111
+ Step 111 | loss:0.0009758872911334038 lr:6.808122850410461e-05 tokens_per_second_per_gpu:6855.019034410367
112
+ Step 112 | loss:0.0013757736887782812 lr:6.742860236609077e-05 tokens_per_second_per_gpu:6921.972251687803
113
+ Step 113 | loss:0.0019739512354135513 lr:6.677257848751277e-05 tokens_per_second_per_gpu:7069.092554140731
114
+ Step 114 | loss:0.0011348786065354943 lr:6.611328476152557e-05 tokens_per_second_per_gpu:7158.150929958733
115
+ Step 115 | loss:0.003932917956262827 lr:6.545084971874738e-05 tokens_per_second_per_gpu:6947.480109452886
116
+ Step 116 | loss:0.0009977880399674177 lr:6.478540250220234e-05 tokens_per_second_per_gpu:7127.033721864732
117
+ Step 117 | loss:0.00743027264252305 lr:6.411707284214384e-05 tokens_per_second_per_gpu:7096.241446538374
118
+ Step 118 | loss:0.002439243718981743 lr:6.344599103076329e-05 tokens_per_second_per_gpu:6935.2267354820715
119
+ Step 119 | loss:0.0004214672080706805 lr:6.277228789678953e-05 tokens_per_second_per_gpu:7091.56909795498
120
+ Step 120 | loss:0.0007211238844320178 lr:6.209609477998338e-05 tokens_per_second_per_gpu:6898.000938353513
121
+ Step 121 | loss:0.0007646355079486966 lr:6.141754350553279e-05 tokens_per_second_per_gpu:7099.772787485942
122
+ Step 122 | loss:0.0011237451108172536 lr:6.073676635835317e-05 tokens_per_second_per_gpu:6775.0731863648925
123
+ Step 123 | loss:0.0006198790506459773 lr:6.005389605729824e-05 tokens_per_second_per_gpu:7153.354180313162
124
+ Step 124 | loss:0.00046897237189114094 lr:5.9369065729286245e-05 tokens_per_second_per_gpu:6944.312583773988
125
+ Step 125 | loss:0.0015241801738739014 lr:5.868240888334653e-05 tokens_per_second_per_gpu:7065.43870966363
126
+ Step 126 | loss:0.00150835991371423 lr:5.799405938459175e-05 tokens_per_second_per_gpu:6991.128441325819
127
+ Step 127 | loss:0.0031010867096483707 lr:5.730415142812059e-05 tokens_per_second_per_gpu:7042.518093887544
128
+ Step 128 | loss:0.000658935634419322 lr:5.661281951285613e-05 tokens_per_second_per_gpu:6984.4888204604495
129
+ Step 129 | loss:0.0019208983285352588 lr:5.5920198415325064e-05 tokens_per_second_per_gpu:7114.971657321872
130
+ Step 130 | loss:0.010894868522882462 lr:5.522642316338268e-05 tokens_per_second_per_gpu:6949.150946368696
131
+ Step 131 | loss:0.0006857437547296286 lr:5.453162900988902e-05 tokens_per_second_per_gpu:7138.962920295958
132
+ Step 132 | loss:0.0025876301806420088 lr:5.383595140634093e-05 tokens_per_second_per_gpu:6911.379588236837
133
+ Step 133 | loss:0.0007797284051775932 lr:5.313952597646568e-05 tokens_per_second_per_gpu:7010.2993322206075
134
+ Step 134 | loss:0.0006535501452162862 lr:5.244248848978067e-05 tokens_per_second_per_gpu:7117.641226887859
135
+ Step 135 | loss:0.0011649364605545998 lr:5.174497483512506e-05 tokens_per_second_per_gpu:7042.7284180611705
136
+ Step 136 | loss:0.00197750236839056 lr:5.104712099416785e-05 tokens_per_second_per_gpu:6937.186163094593
137
+ Step 137 | loss:0.0006281325477175415 lr:5.034906301489808e-05 tokens_per_second_per_gpu:6890.483427881456
138
+ Step 138 | loss:0.0004475094028748572 lr:4.965093698510193e-05 tokens_per_second_per_gpu:7061.311318509494
139
+ Step 139 | loss:0.001448856433853507 lr:4.895287900583216e-05 tokens_per_second_per_gpu:6794.672713275055
140
+ Step 140 | loss:0.0017595699755474925 lr:4.825502516487497e-05 tokens_per_second_per_gpu:6787.3119728029815
141
+ Step 141 | loss:0.0038012624718248844 lr:4.755751151021934e-05 tokens_per_second_per_gpu:7027.214726395109
142
+ Step 142 | loss:0.0019520800560712814 lr:4.6860474023534335e-05 tokens_per_second_per_gpu:7069.12945612731
143
+ Step 143 | loss:0.001292936853133142 lr:4.616404859365907e-05 tokens_per_second_per_gpu:6698.106579498869
144
+ Step 144 | loss:0.0011428159195929766 lr:4.5468370990111006e-05 tokens_per_second_per_gpu:6902.521683648508
145
+ Step 145 | loss:0.004733377136290073 lr:4.477357683661734e-05 tokens_per_second_per_gpu:7006.496317744093
146
+ Step 146 | loss:0.00043399244896136224 lr:4.407980158467495e-05 tokens_per_second_per_gpu:7114.975104224623
147
+ Step 147 | loss:0.0014300920302048326 lr:4.3387180487143876e-05 tokens_per_second_per_gpu:7081.525811614731
148
+ Step 148 | loss:0.000798733439296484 lr:4.269584857187943e-05 tokens_per_second_per_gpu:7045.750572126043
149
+ Step 149 | loss:0.002541664056479931 lr:4.2005940615408264e-05 tokens_per_second_per_gpu:6900.663567071848
150
+ Step 150 | loss:0.0009505233610980213 lr:4.131759111665349e-05 tokens_per_second_per_gpu:6986.384852225816
151
+ Step 151 | loss:0.001285750069655478 lr:4.063093427071376e-05 tokens_per_second_per_gpu:7168.225778742038
152
+ Step 152 | loss:0.0008513851789757609 lr:3.9946103942701777e-05 tokens_per_second_per_gpu:7039.891508284199
153
+ Step 153 | loss:0.0036429339088499546 lr:3.926323364164684e-05 tokens_per_second_per_gpu:7007.155142247289
154
+ Step 154 | loss:0.001225275918841362 lr:3.858245649446721e-05 tokens_per_second_per_gpu:7075.071287563538
155
+ Step 155 | loss:0.0014096405357122421 lr:3.790390522001662e-05 tokens_per_second_per_gpu:6981.197578253649
156
+ Step 156 | loss:0.0010392990661785007 lr:3.7227712103210486e-05 tokens_per_second_per_gpu:7098.612489109119
157
+ Step 157 | loss:0.0027088394854217768 lr:3.655400896923672e-05 tokens_per_second_per_gpu:7156.940597865128
158
+ Step 158 | loss:0.003336328314617276 lr:3.588292715785617e-05 tokens_per_second_per_gpu:6945.79586399277
159
+ Step 159 | loss:0.0022836641874164343 lr:3.5214597497797684e-05 tokens_per_second_per_gpu:6948.45775123944
160
+ Step 160 | loss:0.00043441716115921736 lr:3.4549150281252636e-05 tokens_per_second_per_gpu:7117.049542009377
161
+ Step 161 | loss:0.0015876208199188113 lr:3.388671523847445e-05 tokens_per_second_per_gpu:6917.912796460095
162
+ Step 162 | loss:0.0003647822595667094 lr:3.322742151248725e-05 tokens_per_second_per_gpu:7135.103724169449
163
+ Step 163 | loss:0.0021626269444823265 lr:3.257139763390925e-05 tokens_per_second_per_gpu:6914.728402143451
164
+ Step 164 | loss:0.002703474136069417 lr:3.1918771495895396e-05 tokens_per_second_per_gpu:6896.78496264613
165
+ Step 165 | loss:0.0022170422598719597 lr:3.12696703292044e-05 tokens_per_second_per_gpu:7088.043016537944
166
+ Step 166 | loss:0.0008771849097684026 lr:3.062422067739485e-05 tokens_per_second_per_gpu:6884.6708126055555
167
+ Step 167 | loss:0.0016094500897452235 lr:2.9982548372155263e-05 tokens_per_second_per_gpu:6842.5530033440355
168
+ Step 168 | loss:0.0004359333252068609 lr:2.934477850877292e-05 tokens_per_second_per_gpu:7025.009382698733
169
+ Step 169 | loss:0.00035868945997208357 lr:2.8711035421746367e-05 tokens_per_second_per_gpu:6818.46959405271
170
+ Step 170 | loss:0.0010377708822488785 lr:2.8081442660546125e-05 tokens_per_second_per_gpu:6963.642834178167
171
+ Step 171 | loss:0.004563743248581886 lr:2.7456122965528475e-05 tokens_per_second_per_gpu:7009.664219092231
172
+ Step 172 | loss:0.002375879557803273 lr:2.6835198244006927e-05 tokens_per_second_per_gpu:7145.9721486139315
173
+ Step 173 | loss:0.0013227115850895643 lr:2.6218789546486234e-05 tokens_per_second_per_gpu:7027.731133241178
174
+ Step 174 | loss:0.001065325690433383 lr:2.560701704306336e-05 tokens_per_second_per_gpu:7145.854333408454
175
+ Step 175 | loss:0.00026451688609085977 lr:2.500000000000001e-05 tokens_per_second_per_gpu:6977.66593747055
176
+ Step 176 | loss:0.0007138861110433936 lr:2.4397856756471432e-05 tokens_per_second_per_gpu:7035.885887306675
177
+ Step 177 | loss:0.000287461094558239 lr:2.3800704701496053e-05 tokens_per_second_per_gpu:6875.0926933782675
178
+ Step 178 | loss:0.0005073968786746264 lr:2.3208660251050158e-05 tokens_per_second_per_gpu:7110.455798158638
179
+ Step 179 | loss:0.0009222722728736699 lr:2.2621838825372493e-05 tokens_per_second_per_gpu:7084.7714553618725
180
+ Step 180 | loss:0.0010540970833972096 lr:2.2040354826462668e-05 tokens_per_second_per_gpu:6988.182011407202
181
+ Step 181 | loss:0.00043954243301413953 lr:2.1464321615778422e-05 tokens_per_second_per_gpu:7011.86383701614
182
+ Step 182 | loss:0.0011906925356015563 lr:2.0893851492135537e-05 tokens_per_second_per_gpu:6885.3141883194785
183
+ Step 183 | loss:0.002102498197928071 lr:2.0329055669814934e-05 tokens_per_second_per_gpu:7080.036222220117
184
+ Step 184 | loss:0.0005148081108927727 lr:1.977004425688126e-05 tokens_per_second_per_gpu:7076.435828090621
185
+ Step 185 | loss:0.0012309339363127947 lr:1.9216926233717085e-05 tokens_per_second_per_gpu:7017.950566668733
186
+ Step 186 | loss:0.0015539468731731176 lr:1.866980943177699e-05 tokens_per_second_per_gpu:7132.538506473828
187
+ Step 187 | loss:0.001363910036161542 lr:1.8128800512565513e-05 tokens_per_second_per_gpu:6969.457191706217
188
+ Step 188 | loss:0.0008457832736894488 lr:1.7594004946843456e-05 tokens_per_second_per_gpu:7114.060352573214
189
+ Step 189 | loss:0.0006026898045092821 lr:1.7065526994065973e-05 tokens_per_second_per_gpu:7100.238133807311
190
+ Step 190 | loss:0.002620010171085596 lr:1.6543469682057106e-05 tokens_per_second_per_gpu:6917.934596811246
191
+ Step 191 | loss:0.0005382331437431276 lr:1.602793478692419e-05 tokens_per_second_per_gpu:7129.193352348408
192
+ Step 192 | loss:0.0011343047954142094 lr:1.551902281321651e-05 tokens_per_second_per_gpu:7089.743379925699
193
+ Step 193 | loss:0.0001948566932696849 lr:1.5016832974331724e-05 tokens_per_second_per_gpu:6958.511568194661
194
+ Step 194 | loss:0.0032963843550533056 lr:1.4521463173173965e-05 tokens_per_second_per_gpu:7153.5391956230305
195
+ Step 195 | loss:0.0013983447570353746 lr:1.4033009983067452e-05 tokens_per_second_per_gpu:7092.817204790224
196
+ Step 196 | loss:0.0004038666083943099 lr:1.3551568628929434e-05 tokens_per_second_per_gpu:7034.465560501549
197
+ Step 197 | loss:0.0005019771633669734 lr:1.3077232968705805e-05 tokens_per_second_per_gpu:7133.788517058802
198
+ Step 198 | loss:0.002158497227355838 lr:1.2610095475073414e-05 tokens_per_second_per_gpu:6935.653682473058
199
+ Step 199 | loss:0.0004485348763410002 lr:1.2150247217412186e-05 tokens_per_second_per_gpu:7035.408824162851
200
+ Step 200 | loss:0.0010387710062786937 lr:1.1697777844051105e-05 tokens_per_second_per_gpu:6995.588587661693
201
+ Step 201 | loss:0.0014105384470894933 lr:1.1252775564791024e-05 tokens_per_second_per_gpu:7165.8917280504775
202
+ Step 202 | loss:0.0008372652227990329 lr:1.0815327133708015e-05 tokens_per_second_per_gpu:6824.845028945261
203
+ Step 203 | loss:0.0016373236430808902 lr:1.0385517832240471e-05 tokens_per_second_per_gpu:6983.390434038561
204
+ Step 204 | loss:0.0010298008564859629 lr:9.963431452563332e-06 tokens_per_second_per_gpu:7092.856782372603
205
+ Step 205 | loss:0.0005104406154714525 lr:9.549150281252633e-06 tokens_per_second_per_gpu:6979.889176254553
206
+ Step 206 | loss:0.00044149774475954473 lr:9.142755083243576e-06 tokens_per_second_per_gpu:6729.542240077146
207
+ Step 207 | loss:0.0016503415536135435 lr:8.744325086085248e-06 tokens_per_second_per_gpu:7045.859341655128
208
+ Step 208 | loss:0.0021091720554977655 lr:8.353937964495029e-06 tokens_per_second_per_gpu:6960.631753252208
209
+ Step 209 | loss:0.0014547959435731173 lr:7.971669825215788e-06 tokens_per_second_per_gpu:7140.638028664293
210
+ Step 210 | loss:0.0011278915917500854 lr:7.597595192178702e-06 tokens_per_second_per_gpu:7112.024889474323
211
+ Step 211 | loss:0.0008361373329535127 lr:7.2317869919746705e-06 tokens_per_second_per_gpu:6825.989889307472
212
+ Step 212 | loss:0.0011150042992085218 lr:6.874316539637127e-06 tokens_per_second_per_gpu:6953.677938236041
213
+ Step 213 | loss:0.00025254415231756866 lr:6.52525352473905e-06 tokens_per_second_per_gpu:6989.570204640159
214
+ Step 214 | loss:0.0016249557957053185 lr:6.184665997806832e-06 tokens_per_second_per_gpu:7133.165202912919
215
+ Step 215 | loss:0.001143998233601451 lr:5.852620357053651e-06 tokens_per_second_per_gpu:7051.853256206377
216
+ Step 216 | loss:0.000365725951269269 lr:5.529181335435124e-06 tokens_per_second_per_gpu:7109.042950385973
217
+ Step 217 | loss:0.0015886126784607768 lr:5.214411988029355e-06 tokens_per_second_per_gpu:7028.875248251393
218
+ Step 218 | loss:0.0013761294540017843 lr:4.908373679744316e-06 tokens_per_second_per_gpu:7108.213051111388
219
+ Step 219 | loss:0.0018573033157736063 lr:4.611126073354571e-06 tokens_per_second_per_gpu:7125.91419319748
220
+ Step 220 | loss:0.0019727428443729877 lr:4.322727117869951e-06 tokens_per_second_per_gpu:6949.702618624644
221
+ Step 221 | loss:0.00046489882515743375 lr:4.043233037238281e-06 tokens_per_second_per_gpu:7082.179158830416
222
+ Step 222 | loss:0.001256947172805667 lr:3.772698319384349e-06 tokens_per_second_per_gpu:7154.753459188333
223
+ Step 223 | loss:0.0007704919553361833 lr:3.511175705587433e-06 tokens_per_second_per_gpu:7084.612916916432
224
+ Step 224 | loss:0.00048463058192282915 lr:3.258716180199278e-06 tokens_per_second_per_gpu:7100.661102645909
225
+ Step 225 | loss:0.0015112065011635423 lr:3.0153689607045845e-06 tokens_per_second_per_gpu:6986.426070544913
226
+ Step 226 | loss:0.0014505032449960709 lr:2.7811814881259503e-06 tokens_per_second_per_gpu:7057.156034274591
227
+ Step 227 | loss:0.0017806094838306308 lr:2.5561994177751737e-06 tokens_per_second_per_gpu:6967.176853458735
228
+ Step 228 | loss:0.0016619979869574308 lr:2.340466610352654e-06 tokens_per_second_per_gpu:6959.813413974034
229
+ Step 229 | loss:0.0012311894679442048 lr:2.134025123396638e-06 tokens_per_second_per_gpu:7026.055783363448
230
+ Step 230 | loss:0.0019572905730456114 lr:1.9369152030840556e-06 tokens_per_second_per_gpu:7062.787034722274
231
+ Step 231 | loss:0.0016982824308797717 lr:1.7491752763844293e-06 tokens_per_second_per_gpu:6831.063215360656
232
+ Step 232 | loss:0.0021078097634017467 lr:1.5708419435684462e-06 tokens_per_second_per_gpu:7086.32389932558
233
+ Step 233 | loss:0.0013445017393678427 lr:1.4019499710726913e-06 tokens_per_second_per_gpu:6977.155383959046
234
+ Step 234 | loss:0.00036836747312918305 lr:1.2425322847218368e-06 tokens_per_second_per_gpu:7094.678174910597
235
+ Step 235 | loss:0.0005533919320441782 lr:1.0926199633097157e-06 tokens_per_second_per_gpu:7031.788985841722
236
+ Step 236 | loss:0.0016440795734524727 lr:9.522422325404235e-07 tokens_per_second_per_gpu:7046.472693651047
237
+ Step 237 | loss:0.0013803904876112938 lr:8.214264593307098e-07 tokens_per_second_per_gpu:6975.3985161252995
238
+ Step 238 | loss:0.0009708477882668376 lr:7.001981464747565e-07 tokens_per_second_per_gpu:7049.375475406457
239
+ Step 239 | loss:0.00199791113846004 lr:5.885809276723608e-07 tokens_per_second_per_gpu:7017.553232092998
240
+ Step 240 | loss:0.0015766591532155871 lr:4.865965629214819e-07 tokens_per_second_per_gpu:6955.9168267022715
241
+ Step 241 | loss:0.0014465312706306577 lr:3.9426493427611177e-07 tokens_per_second_per_gpu:6931.547567024191
242
+ Step 242 | loss:0.0013422713382169604 lr:3.1160404197018154e-07 tokens_per_second_per_gpu:7060.875940299212
243
+ Step 243 | loss:0.0006091100513003767 lr:2.386300009084408e-07 tokens_per_second_per_gpu:7146.698945228412
244
+ Step 244 | loss:0.0005727412644773722 lr:1.753570375247815e-07 tokens_per_second_per_gpu:7135.779924518519
245
+ Step 245 | loss:0.0005283570499159396 lr:1.2179748700879012e-07 tokens_per_second_per_gpu:7015.242118553269
246
+ Step 246 | loss:0.00038219685666263103 lr:7.796179090094891e-08 tokens_per_second_per_gpu:7149.8793604615785
247
+ Step 247 | loss:0.0014308899408206344 lr:4.385849505708084e-08 tokens_per_second_per_gpu:7180.676959868798
248
+ Step 248 | loss:0.00210582185536623 lr:1.949424798228239e-08 tokens_per_second_per_gpu:7132.91353743717
249
+ Step 249 | loss:0.0010692310752347112 lr:4.873799534788059e-09 tokens_per_second_per_gpu:7011.347544756431
250
+ Step 250 | loss:0.002054369542747736 lr:0.0 tokens_per_second_per_gpu:7147.0550602441635
00dbd492/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
00dbd492/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [5 5 5 5 5 0 0 0 0 0 0 0 0 0 0]\n [5 0 0 0 5 0 0 0 0 0 0 0 0 0 0]\n [5 0 5 0 5 0 0 0 0 0 0 0 0 0 0]\n [5 0 0 0 5 0 0 0 0 0 0 0 0 0 0]\n [5 5 5 5 5 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 5 5 5 5 5 5 5 5 5]\n [0 0 0 0 0 0 5 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 0 0 0 5 0 0 0 5]\n [0 0 0 0 0 0 5 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 5 5 5 5 5 5 5 5]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [5 5 5 5 5 0 0 0 0 0 0 0 0 0 0]\n [5 1 1 1 5 0 0 0 0 0 0 0 0 0 0]\n [5 1 5 1 5 0 0 0 0 0 0 0 0 0 0]\n [5 1 1 1 5 0 0 0 0 0 0 0 0 0 0]\n [5 5 5 5 5 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 5 5 5 5 5 5 5 5 5]\n [0 0 0 0 0 0 5 6 6 6 6 6 6 6 5]\n [0 0 0 0 0 0 5 6 6 6 6 6 6 6 5]\n [0 0 0 0 0 0 5 6 6 6 6 6 6 6 5]\n [0 0 0 0 0 0 5 6 6 6 5 6 6 6 5]\n [0 0 0 0 0 0 5 6 6 6 6 6 6 6 5]\n [0 0 0 0 0 0 5 6 6 6 6 6 6 6 5]\n [0 0 0 0 0 0 5 6 6 6 6 6 6 6 5]\n [0 0 0 0 0 0 5 5 5 5 5 5 5 5 5]]#\n[[0 0 5 5 5 5 5]\n [0 0 5 0 0 0 5]\n [0 0 5 0 5 0 5]\n [0 0 5 0 0 0 5]\n [0 0 5 5 5 5 5]\n [0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0]] -> [[0 0 5 5 5 5 5]\n [0 0 5 1 1 1 5]\n [0 0 5 1 5 1 5]\n [0 0 5 1 1 1 5]\n [0 0 5 5 5 5 5]\n [0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0]]#\n[[0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 5 5 5 5 5]\n [0 0 0 0 0 0 0 0 5 0 0 0 5]\n [0 0 0 0 0 0 0 0 5 0 5 0 5]\n [0 0 0 0 0 0 0 0 5 0 0 0 5]\n [5 5 5 5 5 5 5 0 5 5 5 5 5]\n [5 0 0 0 0 0 5 0 0 0 0 0 0]\n [5 0 0 0 0 0 5 0 0 0 0 0 0]\n [5 0 0 5 0 0 5 0 0 0 0 0 0]\n [5 0 0 0 0 0 5 0 0 0 0 0 0]\n [5 0 0 0 0 0 5 0 0 0 0 0 0]\n [5 5 5 5 5 5 5 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 5 5 5 5 5]\n [0 0 0 0 0 0 0 0 5 1 1 1 5]\n [0 0 0 0 0 0 0 0 5 1 5 1 5]\n [0 0 0 0 0 0 0 0 5 1 1 1 5]\n [5 5 5 5 5 5 5 0 5 5 5 5 5]\n [5 9 9 9 9 9 5 0 0 0 0 0 0]\n [5 9 9 9 9 9 5 0 0 0 0 0 0]\n [5 9 9 5 9 9 5 0 0 0 0 0 0]\n [5 9 9 9 9 9 5 0 0 0 0 0 0]\n [5 9 9 9 9 9 5 0 0 0 0 0 0]\n [5 5 5 5 5 5 5 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 5 5 5 5 5 5 5]\n [0 0 5 0 0 0 0 0 5]\n [0 0 5 0 0 0 0 0 5]\n [0 0 5 0 0 5 0 0 5]\n [0 0 5 0 0 0 0 0 5]\n [0 0 5 0 0 0 0 0 5]\n [0 0 5 5 5 5 5 5 5]\n [0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0]] -> "}], "output": {"role": "assistant", "content": "[[0 0 5 5 5 5 5 5 5]\n [0 0 5 9 9 9 9 9 5]\n [0 0 5 9 9 9 9 9 5]\n [0 0 5 9 9 5 9 9 5]\n [0 0 5 9 9 9 9 9 5]\n [0 0 5 9 9 9 9 9 5]\n [0 0 5 5 5 5 5 5 5]\n [0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 2251}
03560426/adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"base_model_name_or_path": "/raid/lingo/akyurek/git/agi/checkpoints/pretrained/8B_grids_no_lm_generated_tasks/4k/", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
03560426/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68d9f5021b82bfba90c24ad2434ed887038bcffa33da1e106575dfddea653cc6
3
+ size 596042770
03560426/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.0.dev0", "use_cache": true, "vocab_size": 128256}
03560426/log_1730924338.txt ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:0.09594444930553436 lr:4.000000000000001e-06 tokens_per_second_per_gpu:1715.6769661527067
2
+ Step 2 | loss:0.032612573355436325 lr:8.000000000000001e-06 tokens_per_second_per_gpu:4257.384016888243
3
+ Step 3 | loss:0.03703868016600609 lr:1.2e-05 tokens_per_second_per_gpu:7118.213139384143
4
+ Step 4 | loss:0.04869644343852997 lr:1.6000000000000003e-05 tokens_per_second_per_gpu:2861.776388430861
5
+ Step 5 | loss:0.04404347762465477 lr:2e-05 tokens_per_second_per_gpu:2890.405749137227
6
+ Step 6 | loss:0.04101202264428139 lr:2.4e-05 tokens_per_second_per_gpu:7102.234102434205
7
+ Step 7 | loss:0.048599839210510254 lr:2.8000000000000003e-05 tokens_per_second_per_gpu:7020.581060628121
8
+ Step 8 | loss:0.024283362552523613 lr:3.2000000000000005e-05 tokens_per_second_per_gpu:6993.611958028155
9
+ Step 9 | loss:0.03192838281393051 lr:3.6e-05 tokens_per_second_per_gpu:7122.84532645759
10
+ Step 10 | loss:0.07498080283403397 lr:4e-05 tokens_per_second_per_gpu:6758.487752244918
11
+ Step 11 | loss:0.05588093400001526 lr:4.4000000000000006e-05 tokens_per_second_per_gpu:2187.1191955052795
12
+ Step 12 | loss:0.06042847782373428 lr:4.8e-05 tokens_per_second_per_gpu:7091.115951473602
13
+ Step 13 | loss:0.07163452357053757 lr:5.2000000000000004e-05 tokens_per_second_per_gpu:6771.857666986925
14
+ Step 14 | loss:0.020770439878106117 lr:5.6000000000000006e-05 tokens_per_second_per_gpu:3408.1900982120937
15
+ Step 15 | loss:0.07795064151287079 lr:6e-05 tokens_per_second_per_gpu:6720.726208902146
16
+ Step 16 | loss:0.049128226935863495 lr:6.400000000000001e-05 tokens_per_second_per_gpu:7154.987550845856
17
+ Step 17 | loss:0.03991858661174774 lr:6.800000000000001e-05 tokens_per_second_per_gpu:7038.071954264581
18
+ Step 18 | loss:0.020218411460518837 lr:7.2e-05 tokens_per_second_per_gpu:7052.16330095677
19
+ Step 19 | loss:0.049349959939718246 lr:7.6e-05 tokens_per_second_per_gpu:7135.243307802797
20
+ Step 20 | loss:0.024519197642803192 lr:8e-05 tokens_per_second_per_gpu:7070.415186103101
21
+ Step 21 | loss:0.06849371641874313 lr:8.4e-05 tokens_per_second_per_gpu:6798.501835862438
22
+ Step 22 | loss:0.03306471183896065 lr:8.800000000000001e-05 tokens_per_second_per_gpu:7143.20493993916
23
+ Step 23 | loss:0.038712278008461 lr:9.200000000000001e-05 tokens_per_second_per_gpu:7062.176042971551
24
+ Step 24 | loss:0.022722184658050537 lr:9.6e-05 tokens_per_second_per_gpu:7093.97275491179
25
+ Step 25 | loss:0.014121896587312222 lr:0.0001 tokens_per_second_per_gpu:7069.144169807532
26
+ Step 26 | loss:0.0176176019012928 lr:9.999512620046522e-05 tokens_per_second_per_gpu:7131.631923799358
27
+ Step 27 | loss:0.00988552626222372 lr:9.998050575201771e-05 tokens_per_second_per_gpu:6891.637589561682
28
+ Step 28 | loss:0.010788653045892715 lr:9.995614150494293e-05 tokens_per_second_per_gpu:7146.763009204502
29
+ Step 29 | loss:0.012993809767067432 lr:9.992203820909906e-05 tokens_per_second_per_gpu:7135.075134041281
30
+ Step 30 | loss:0.011962542310357094 lr:9.987820251299122e-05 tokens_per_second_per_gpu:7080.446021298551
31
+ Step 31 | loss:0.0072456710040569305 lr:9.982464296247522e-05 tokens_per_second_per_gpu:7100.213845627682
32
+ Step 32 | loss:0.036037687212228775 lr:9.976136999909156e-05 tokens_per_second_per_gpu:2155.485116316584
33
+ Step 33 | loss:0.010557749308645725 lr:9.968839595802982e-05 tokens_per_second_per_gpu:6916.82365083494
34
+ Step 34 | loss:0.022763660177588463 lr:9.96057350657239e-05 tokens_per_second_per_gpu:6963.677786918989
35
+ Step 35 | loss:0.05381365120410919 lr:9.951340343707852e-05 tokens_per_second_per_gpu:6749.30636485485
36
+ Step 36 | loss:0.033899154514074326 lr:9.941141907232765e-05 tokens_per_second_per_gpu:7095.564843066931
37
+ Step 37 | loss:0.004967940039932728 lr:9.929980185352526e-05 tokens_per_second_per_gpu:6990.531549510169
38
+ Step 38 | loss:0.01169795822352171 lr:9.917857354066931e-05 tokens_per_second_per_gpu:7128.511183238077
39
+ Step 39 | loss:0.012766576372087002 lr:9.904775776745958e-05 tokens_per_second_per_gpu:7079.308564480445
40
+ Step 40 | loss:0.01314773503690958 lr:9.890738003669029e-05 tokens_per_second_per_gpu:7046.018317224989
41
+ Step 41 | loss:0.0301198773086071 lr:9.875746771527816e-05 tokens_per_second_per_gpu:6641.960318371145
42
+ Step 42 | loss:0.02045576646924019 lr:9.859805002892732e-05 tokens_per_second_per_gpu:7143.374203812877
43
+ Step 43 | loss:0.009153805673122406 lr:9.842915805643155e-05 tokens_per_second_per_gpu:7057.907999612955
44
+ Step 44 | loss:0.014376576989889145 lr:9.825082472361557e-05 tokens_per_second_per_gpu:7106.24093855879
45
+ Step 45 | loss:0.006419774144887924 lr:9.806308479691595e-05 tokens_per_second_per_gpu:6975.467572936728
46
+ Step 46 | loss:0.010145753622055054 lr:9.786597487660337e-05 tokens_per_second_per_gpu:7086.584362075627
47
+ Step 47 | loss:0.01086120679974556 lr:9.765953338964735e-05 tokens_per_second_per_gpu:7082.111393455114
48
+ Step 48 | loss:0.013914630748331547 lr:9.744380058222483e-05 tokens_per_second_per_gpu:7071.756851147732
49
+ Step 49 | loss:0.006967062130570412 lr:9.721881851187406e-05 tokens_per_second_per_gpu:7096.142942882559
50
+ Step 50 | loss:0.004446886479854584 lr:9.698463103929542e-05 tokens_per_second_per_gpu:7079.807724349228
51
+ Step 51 | loss:0.011698620393872261 lr:9.674128381980072e-05 tokens_per_second_per_gpu:6734.770564917194
52
+ Step 52 | loss:0.0073717557825148106 lr:9.648882429441257e-05 tokens_per_second_per_gpu:7099.255240592363
53
+ Step 53 | loss:0.04363773763179779 lr:9.622730168061567e-05 tokens_per_second_per_gpu:1271.1740731348273
54
+ Step 54 | loss:0.0032293389085680246 lr:9.595676696276172e-05 tokens_per_second_per_gpu:7009.180949621221
55
+ Step 55 | loss:0.014490296132862568 lr:9.567727288213005e-05 tokens_per_second_per_gpu:7143.885987070321
56
+ Step 56 | loss:0.02843150869011879 lr:9.538887392664544e-05 tokens_per_second_per_gpu:4646.515178976232
57
+ Step 57 | loss:0.011716767214238644 lr:9.50916263202557e-05 tokens_per_second_per_gpu:6930.332067235958
58
+ Step 58 | loss:0.011800938285887241 lr:9.478558801197065e-05 tokens_per_second_per_gpu:6956.873287159913
59
+ Step 59 | loss:0.019172390922904015 lr:9.447081866456489e-05 tokens_per_second_per_gpu:7094.0306617429105
60
+ Step 60 | loss:0.0029313103295862675 lr:9.414737964294636e-05 tokens_per_second_per_gpu:7074.439718451712
61
+ Step 61 | loss:0.01510224211961031 lr:9.381533400219318e-05 tokens_per_second_per_gpu:7057.053816060703
62
+ Step 62 | loss:0.012170720845460892 lr:9.347474647526095e-05 tokens_per_second_per_gpu:7054.9581135919825
63
+ Step 63 | loss:0.002090180292725563 lr:9.312568346036288e-05 tokens_per_second_per_gpu:7041.351263053978
64
+ Step 64 | loss:0.008515679277479649 lr:9.276821300802534e-05 tokens_per_second_per_gpu:7107.134029255465
65
+ Step 65 | loss:0.005716190207749605 lr:9.24024048078213e-05 tokens_per_second_per_gpu:7033.337463035152
66
+ Step 66 | loss:0.003724724519997835 lr:9.202833017478422e-05 tokens_per_second_per_gpu:6753.27843297091
67
+ Step 67 | loss:0.0013471748679876328 lr:9.164606203550497e-05 tokens_per_second_per_gpu:7069.459297300209
68
+ Step 68 | loss:0.028791621327400208 lr:9.125567491391476e-05 tokens_per_second_per_gpu:6762.340892360801
69
+ Step 69 | loss:0.034610774368047714 lr:9.085724491675642e-05 tokens_per_second_per_gpu:6740.150911219675
70
+ Step 70 | loss:0.008526662364602089 lr:9.045084971874738e-05 tokens_per_second_per_gpu:7074.943892812442
71
+ Step 71 | loss:0.007055385038256645 lr:9.003656854743667e-05 tokens_per_second_per_gpu:7093.802250036825
72
+ Step 72 | loss:0.004274093080312014 lr:8.961448216775954e-05 tokens_per_second_per_gpu:7177.604704465473
73
+ Step 73 | loss:0.01708085462450981 lr:8.9184672866292e-05 tokens_per_second_per_gpu:7097.049507784967
74
+ Step 74 | loss:0.004166878294199705 lr:8.874722443520899e-05 tokens_per_second_per_gpu:7110.845091689312
75
+ Step 75 | loss:0.004323659930378199 lr:8.83022221559489e-05 tokens_per_second_per_gpu:7087.7417758858555
76
+ Step 76 | loss:0.0018252398585900664 lr:8.784975278258783e-05 tokens_per_second_per_gpu:7066.170856740396
77
+ Step 77 | loss:0.030012302100658417 lr:8.73899045249266e-05 tokens_per_second_per_gpu:6788.34487321683
78
+ Step 78 | loss:0.0072570620104670525 lr:8.692276703129421e-05 tokens_per_second_per_gpu:7055.591958327719
79
+ Step 79 | loss:0.01024627685546875 lr:8.644843137107059e-05 tokens_per_second_per_gpu:7099.121611303732
80
+ Step 80 | loss:0.023203443735837936 lr:8.596699001693255e-05 tokens_per_second_per_gpu:6928.721677860912
81
+ Step 81 | loss:0.025124238803982735 lr:8.547853682682604e-05 tokens_per_second_per_gpu:7085.958076572892
82
+ Step 82 | loss:0.0048821973614394665 lr:8.498316702566828e-05 tokens_per_second_per_gpu:7065.727309648535
83
+ Step 83 | loss:0.023713823407888412 lr:8.44809771867835e-05 tokens_per_second_per_gpu:5447.8865534438755
84
+ Step 84 | loss:0.010438942350447178 lr:8.397206521307584e-05 tokens_per_second_per_gpu:7054.727469347494
85
+ Step 85 | loss:0.012677349150180817 lr:8.345653031794292e-05 tokens_per_second_per_gpu:7059.575956676728
86
+ Step 86 | loss:0.004624150227755308 lr:8.293447300593402e-05 tokens_per_second_per_gpu:7050.472275899744
87
+ Step 87 | loss:0.0012963004410266876 lr:8.240599505315655e-05 tokens_per_second_per_gpu:7097.777791176698
88
+ Step 88 | loss:0.004414051305502653 lr:8.18711994874345e-05 tokens_per_second_per_gpu:6981.526522990102
89
+ Step 89 | loss:0.023303508758544922 lr:8.133019056822304e-05 tokens_per_second_per_gpu:6472.518825923673
90
+ Step 90 | loss:0.0030153817497193813 lr:8.07830737662829e-05 tokens_per_second_per_gpu:7017.439179194748
91
+ Step 91 | loss:0.00913572683930397 lr:8.022995574311876e-05 tokens_per_second_per_gpu:6943.81647207651
92
+ Step 92 | loss:0.0011044674320146441 lr:7.967094433018508e-05 tokens_per_second_per_gpu:7048.682661139392
93
+ Step 93 | loss:0.004775516223162413 lr:7.910614850786448e-05 tokens_per_second_per_gpu:6905.493230117229
94
+ Step 94 | loss:0.008886449970304966 lr:7.85356783842216e-05 tokens_per_second_per_gpu:7115.512181418436
95
+ Step 95 | loss:0.0027299695648252964 lr:7.795964517353735e-05 tokens_per_second_per_gpu:7081.430962658991
96
+ Step 96 | loss:0.00905381515622139 lr:7.737816117462752e-05 tokens_per_second_per_gpu:7077.885327106005
97
+ Step 97 | loss:0.0003037667484022677 lr:7.679133974894983e-05 tokens_per_second_per_gpu:7058.211221380861
98
+ Step 98 | loss:0.0015372553607448936 lr:7.619929529850397e-05 tokens_per_second_per_gpu:7059.540422425649
99
+ Step 99 | loss:0.00771813839673996 lr:7.560214324352858e-05 tokens_per_second_per_gpu:7163.497036762075
100
+ Step 100 | loss:0.0002618357539176941 lr:7.500000000000001e-05 tokens_per_second_per_gpu:7129.741724390563
101
+ Step 101 | loss:0.002139240736141801 lr:7.439298295693665e-05 tokens_per_second_per_gpu:7063.026386610476
102
+ Step 102 | loss:0.000588633178267628 lr:7.378121045351378e-05 tokens_per_second_per_gpu:7085.274584805542
103
+ Step 103 | loss:0.0009644604288041592 lr:7.316480175599309e-05 tokens_per_second_per_gpu:7079.005873332365
104
+ Step 104 | loss:0.0014253875706344843 lr:7.254387703447154e-05 tokens_per_second_per_gpu:7110.536329650884
105
+ Step 105 | loss:0.008904769085347652 lr:7.191855733945387e-05 tokens_per_second_per_gpu:7165.9044236089685
106
+ Step 106 | loss:0.01313498429954052 lr:7.128896457825364e-05 tokens_per_second_per_gpu:7044.567480443534
107
+ Step 107 | loss:0.009586299769580364 lr:7.06552214912271e-05 tokens_per_second_per_gpu:7065.759004065457
108
+ Step 108 | loss:0.004150373861193657 lr:7.001745162784477e-05 tokens_per_second_per_gpu:7144.795253698613
109
+ Step 109 | loss:0.0025894774589687586 lr:6.937577932260515e-05 tokens_per_second_per_gpu:7092.035088440755
110
+ Step 110 | loss:0.009777382016181946 lr:6.873032967079561e-05 tokens_per_second_per_gpu:7093.150806060385
111
+ Step 111 | loss:0.0069180880673229694 lr:6.808122850410461e-05 tokens_per_second_per_gpu:6953.824523940086
112
+ Step 112 | loss:0.004603222478181124 lr:6.742860236609077e-05 tokens_per_second_per_gpu:7045.455581600914
113
+ Step 113 | loss:0.0011307838140055537 lr:6.677257848751277e-05 tokens_per_second_per_gpu:7113.028856346813
114
+ Step 114 | loss:0.00957533810287714 lr:6.611328476152557e-05 tokens_per_second_per_gpu:6866.532520421302
115
+ Step 115 | loss:0.0010501754004508257 lr:6.545084971874738e-05 tokens_per_second_per_gpu:7045.1286895041985
116
+ Step 116 | loss:0.0003030570223927498 lr:6.478540250220234e-05 tokens_per_second_per_gpu:6919.206378095726
117
+ Step 117 | loss:0.012309281155467033 lr:6.411707284214384e-05 tokens_per_second_per_gpu:6959.89602153374
118
+ Step 118 | loss:0.0011735755251720548 lr:6.344599103076329e-05 tokens_per_second_per_gpu:6998.818230826136
119
+ Step 119 | loss:0.006594390608370304 lr:6.277228789678953e-05 tokens_per_second_per_gpu:7027.889308059036
120
+ Step 120 | loss:0.004384372849017382 lr:6.209609477998338e-05 tokens_per_second_per_gpu:7043.651450873184
121
+ Step 121 | loss:0.004672420211136341 lr:6.141754350553279e-05 tokens_per_second_per_gpu:7101.4110384159385
122
+ Step 122 | loss:0.005256533622741699 lr:6.073676635835317e-05 tokens_per_second_per_gpu:7102.926567716092
123
+ Step 123 | loss:0.006718686316162348 lr:6.005389605729824e-05 tokens_per_second_per_gpu:7031.4624230505915
124
+ Step 124 | loss:0.007981338538229465 lr:5.9369065729286245e-05 tokens_per_second_per_gpu:7118.1753882686
125
+ Step 125 | loss:0.004765128251165152 lr:5.868240888334653e-05 tokens_per_second_per_gpu:6946.728970297603
126
+ Step 126 | loss:0.006511871702969074 lr:5.799405938459175e-05 tokens_per_second_per_gpu:7068.353681650787
127
+ Step 127 | loss:0.006299371365457773 lr:5.730415142812059e-05 tokens_per_second_per_gpu:7125.328240536463
128
+ Step 128 | loss:0.002500183880329132 lr:5.661281951285613e-05 tokens_per_second_per_gpu:6762.9994900944475
129
+ Step 129 | loss:0.0003447072522249073 lr:5.5920198415325064e-05 tokens_per_second_per_gpu:7116.810989279679
130
+ Step 130 | loss:0.0038090553134679794 lr:5.522642316338268e-05 tokens_per_second_per_gpu:7068.002336185429
131
+ Step 131 | loss:0.0015808121534064412 lr:5.453162900988902e-05 tokens_per_second_per_gpu:7067.758646043467
132
+ Step 132 | loss:0.005941121373325586 lr:5.383595140634093e-05 tokens_per_second_per_gpu:7091.837221774894
133
+ Step 133 | loss:0.0010753199458122253 lr:5.313952597646568e-05 tokens_per_second_per_gpu:7068.938359991409
134
+ Step 134 | loss:0.0007225701119750738 lr:5.244248848978067e-05 tokens_per_second_per_gpu:6743.037281274063
135
+ Step 135 | loss:0.008606262505054474 lr:5.174497483512506e-05 tokens_per_second_per_gpu:7060.966363726559
136
+ Step 136 | loss:0.004445431753993034 lr:5.104712099416785e-05 tokens_per_second_per_gpu:7051.867724934648
137
+ Step 137 | loss:0.0019743351731449366 lr:5.034906301489808e-05 tokens_per_second_per_gpu:6682.908717884122
138
+ Step 138 | loss:0.009234325028955936 lr:4.965093698510193e-05 tokens_per_second_per_gpu:7139.389343634964
139
+ Step 139 | loss:0.005030487664043903 lr:4.895287900583216e-05 tokens_per_second_per_gpu:7080.114364087144
140
+ Step 140 | loss:0.0006191696156747639 lr:4.825502516487497e-05 tokens_per_second_per_gpu:6177.17880138713
141
+ Step 141 | loss:0.003408745164051652 lr:4.755751151021934e-05 tokens_per_second_per_gpu:6959.878964928582
142
+ Step 142 | loss:0.0030980852898210287 lr:4.6860474023534335e-05 tokens_per_second_per_gpu:7048.244691756786
143
+ Step 143 | loss:0.0019596803467720747 lr:4.616404859365907e-05 tokens_per_second_per_gpu:6965.323695321251
144
+ Step 144 | loss:0.009168603457510471 lr:4.5468370990111006e-05 tokens_per_second_per_gpu:6709.272693327204
145
+ Step 145 | loss:0.00331300450488925 lr:4.477357683661734e-05 tokens_per_second_per_gpu:7122.655300155168
146
+ Step 146 | loss:0.0020876226481050253 lr:4.407980158467495e-05 tokens_per_second_per_gpu:7059.668810962822
147
+ Step 147 | loss:0.0013308243360370398 lr:4.3387180487143876e-05 tokens_per_second_per_gpu:6982.222854743132
148
+ Step 148 | loss:0.0034321194980293512 lr:4.269584857187943e-05 tokens_per_second_per_gpu:6684.743597530625
149
+ Step 149 | loss:0.0019766073673963547 lr:4.2005940615408264e-05 tokens_per_second_per_gpu:7067.469060841531
150
+ Step 150 | loss:0.0008945669396780431 lr:4.131759111665349e-05 tokens_per_second_per_gpu:7066.655473123749
151
+ Step 151 | loss:0.0008740133489482105 lr:4.063093427071376e-05 tokens_per_second_per_gpu:7175.781860123878
152
+ Step 152 | loss:0.0032591235358268023 lr:3.9946103942701777e-05 tokens_per_second_per_gpu:7008.007102721075
153
+ Step 153 | loss:0.0033099993597716093 lr:3.926323364164684e-05 tokens_per_second_per_gpu:7084.441039783992
154
+ Step 154 | loss:0.0014826861442998052 lr:3.858245649446721e-05 tokens_per_second_per_gpu:6662.47060324718
155
+ Step 155 | loss:0.0008808820857666433 lr:3.790390522001662e-05 tokens_per_second_per_gpu:7067.196327829486
156
+ Step 156 | loss:0.0009888733038678765 lr:3.7227712103210486e-05 tokens_per_second_per_gpu:6989.206805860369
157
+ Step 157 | loss:0.0038229695055633783 lr:3.655400896923672e-05 tokens_per_second_per_gpu:6902.019722464749
158
+ Step 158 | loss:7.960521907079965e-05 lr:3.588292715785617e-05 tokens_per_second_per_gpu:6887.668597374931
159
+ Step 159 | loss:0.0016212619375437498 lr:3.5214597497797684e-05 tokens_per_second_per_gpu:6971.546283451361
160
+ Step 160 | loss:9.730933379614726e-05 lr:3.4549150281252636e-05 tokens_per_second_per_gpu:7001.316944454005
161
+ Step 161 | loss:0.0019280611304566264 lr:3.388671523847445e-05 tokens_per_second_per_gpu:7058.511728463046
162
+ Step 162 | loss:0.0032107133883982897 lr:3.322742151248725e-05 tokens_per_second_per_gpu:7074.473400661386
163
+ Step 163 | loss:0.0020377160981297493 lr:3.257139763390925e-05 tokens_per_second_per_gpu:7079.718647038059
164
+ Step 164 | loss:0.0058026742190122604 lr:3.1918771495895396e-05 tokens_per_second_per_gpu:6904.0378816734565
165
+ Step 165 | loss:0.0010289129568263888 lr:3.12696703292044e-05 tokens_per_second_per_gpu:7115.230322496159
166
+ Step 166 | loss:0.0020757780876010656 lr:3.062422067739485e-05 tokens_per_second_per_gpu:7143.786317408253
167
+ Step 167 | loss:0.005250038579106331 lr:2.9982548372155263e-05 tokens_per_second_per_gpu:6904.71251059718
168
+ Step 168 | loss:0.0008269779500551522 lr:2.934477850877292e-05 tokens_per_second_per_gpu:7127.128386332989
169
+ Step 169 | loss:0.007161479443311691 lr:2.8711035421746367e-05 tokens_per_second_per_gpu:6973.786818409613
170
+ Step 170 | loss:0.0016270504565909505 lr:2.8081442660546125e-05 tokens_per_second_per_gpu:6944.131316659634
171
+ Step 171 | loss:0.0005630644736811519 lr:2.7456122965528475e-05 tokens_per_second_per_gpu:6968.731351596765
172
+ Step 172 | loss:0.000764863274525851 lr:2.6835198244006927e-05 tokens_per_second_per_gpu:6751.367808378523
173
+ Step 173 | loss:0.005358004476875067 lr:2.6218789546486234e-05 tokens_per_second_per_gpu:7121.830729623779
174
+ Step 174 | loss:4.0301574699697085e-06 lr:2.560701704306336e-05 tokens_per_second_per_gpu:6938.362346146086
175
+ Step 175 | loss:0.0015505176270380616 lr:2.500000000000001e-05 tokens_per_second_per_gpu:6982.030611321107
176
+ Step 176 | loss:0.004141073673963547 lr:2.4397856756471432e-05 tokens_per_second_per_gpu:7141.479902718891
177
+ Step 177 | loss:0.006702089682221413 lr:2.3800704701496053e-05 tokens_per_second_per_gpu:7087.489435882164
178
+ Step 178 | loss:0.004516590852290392 lr:2.3208660251050158e-05 tokens_per_second_per_gpu:7110.237668215957
179
+ Step 179 | loss:0.0002626439672894776 lr:2.2621838825372493e-05 tokens_per_second_per_gpu:7056.144125914929
180
+ Step 180 | loss:0.002430600579828024 lr:2.2040354826462668e-05 tokens_per_second_per_gpu:6759.34132815909
181
+ Step 181 | loss:4.081410588696599e-05 lr:2.1464321615778422e-05 tokens_per_second_per_gpu:7053.257854517555
182
+ Step 182 | loss:0.00031062396010383964 lr:2.0893851492135537e-05 tokens_per_second_per_gpu:6915.0048024862435
183
+ Step 183 | loss:1.496891854912974e-05 lr:2.0329055669814934e-05 tokens_per_second_per_gpu:7029.941830363428
184
+ Step 184 | loss:0.0008824768592603505 lr:1.977004425688126e-05 tokens_per_second_per_gpu:7049.465102859838
185
+ Step 185 | loss:0.0025399732403457165 lr:1.9216926233717085e-05 tokens_per_second_per_gpu:7065.522537463739
186
+ Step 186 | loss:0.0011831234442070127 lr:1.866980943177699e-05 tokens_per_second_per_gpu:7057.038829715127
187
+ Step 187 | loss:8.935239748097956e-05 lr:1.8128800512565513e-05 tokens_per_second_per_gpu:7045.273026663626
188
+ Step 188 | loss:0.00030020231497474015 lr:1.7594004946843456e-05 tokens_per_second_per_gpu:7101.548701998664
189
+ Step 189 | loss:0.002447166247293353 lr:1.7065526994065973e-05 tokens_per_second_per_gpu:7132.322499430969
190
+ Step 190 | loss:0.00178128806874156 lr:1.6543469682057106e-05 tokens_per_second_per_gpu:7079.7816493973
191
+ Step 191 | loss:0.0005427153082564473 lr:1.602793478692419e-05 tokens_per_second_per_gpu:7049.6550887142785
192
+ Step 192 | loss:0.0004498407943174243 lr:1.551902281321651e-05 tokens_per_second_per_gpu:7151.590671504066
193
+ Step 193 | loss:0.0021533083636313677 lr:1.5016832974331724e-05 tokens_per_second_per_gpu:7073.773690899949
194
+ Step 194 | loss:4.6476934585371055e-06 lr:1.4521463173173965e-05 tokens_per_second_per_gpu:6513.352836122793
195
+ Step 195 | loss:0.0017906646244227886 lr:1.4033009983067452e-05 tokens_per_second_per_gpu:7078.314068986232
196
+ Step 196 | loss:0.00018793635535985231 lr:1.3551568628929434e-05 tokens_per_second_per_gpu:7114.828339260472
197
+ Step 197 | loss:0.0031530391424894333 lr:1.3077232968705805e-05 tokens_per_second_per_gpu:7111.472157868944
198
+ Step 198 | loss:0.00028054098947905004 lr:1.2610095475073414e-05 tokens_per_second_per_gpu:6189.90796946071
199
+ Step 199 | loss:0.0005241130711510777 lr:1.2150247217412186e-05 tokens_per_second_per_gpu:7043.012850462871
200
+ Step 200 | loss:0.004094945732504129 lr:1.1697777844051105e-05 tokens_per_second_per_gpu:7028.667699320902
201
+ Step 201 | loss:0.002229253528639674 lr:1.1252775564791024e-05 tokens_per_second_per_gpu:7107.463195987544
202
+ Step 202 | loss:0.00019255683582741767 lr:1.0815327133708015e-05 tokens_per_second_per_gpu:7068.113178903538
203
+ Step 203 | loss:6.1030459619360045e-06 lr:1.0385517832240471e-05 tokens_per_second_per_gpu:6902.548601670786
204
+ Step 204 | loss:5.9569669247139245e-05 lr:9.963431452563332e-06 tokens_per_second_per_gpu:7057.201818382818
205
+ Step 205 | loss:0.005544851999729872 lr:9.549150281252633e-06 tokens_per_second_per_gpu:7088.885520114002
206
+ Step 206 | loss:0.0030818574596196413 lr:9.142755083243576e-06 tokens_per_second_per_gpu:7127.871132256084
207
+ Step 207 | loss:0.00011336655006743968 lr:8.744325086085248e-06 tokens_per_second_per_gpu:6866.277677188948
208
+ Step 208 | loss:0.00016025043441914022 lr:8.353937964495029e-06 tokens_per_second_per_gpu:7129.579521478313
209
+ Step 209 | loss:1.730686199152842e-05 lr:7.971669825215788e-06 tokens_per_second_per_gpu:7113.120857257116
210
+ Step 210 | loss:0.00028120417846366763 lr:7.597595192178702e-06 tokens_per_second_per_gpu:7073.1185268988165
211
+ Step 211 | loss:0.002427683211863041 lr:7.2317869919746705e-06 tokens_per_second_per_gpu:7114.428420333685
212
+ Step 212 | loss:0.0007908017141744494 lr:6.874316539637127e-06 tokens_per_second_per_gpu:7088.792782859684
213
+ Step 213 | loss:0.00452659884467721 lr:6.52525352473905e-06 tokens_per_second_per_gpu:6896.324693599925
214
+ Step 214 | loss:0.0025756561663001776 lr:6.184665997806832e-06 tokens_per_second_per_gpu:7069.378359232183
215
+ Step 215 | loss:0.00045055814553052187 lr:5.852620357053651e-06 tokens_per_second_per_gpu:7099.053127386129
216
+ Step 216 | loss:0.004190291278064251 lr:5.529181335435124e-06 tokens_per_second_per_gpu:6755.487017523827
217
+ Step 217 | loss:0.002386178122833371 lr:5.214411988029355e-06 tokens_per_second_per_gpu:7142.102004793783
218
+ Step 218 | loss:0.00011885088315466419 lr:4.908373679744316e-06 tokens_per_second_per_gpu:6976.9724357603955
219
+ Step 219 | loss:0.000391733949072659 lr:4.611126073354571e-06 tokens_per_second_per_gpu:7070.465521832886
220
+ Step 220 | loss:2.282398236275185e-05 lr:4.322727117869951e-06 tokens_per_second_per_gpu:7067.273556888841
221
+ Step 221 | loss:0.00013819478044752032 lr:4.043233037238281e-06 tokens_per_second_per_gpu:7124.199888989333
222
+ Step 222 | loss:6.782953278161585e-05 lr:3.772698319384349e-06 tokens_per_second_per_gpu:7049.3117894643665
223
+ Step 223 | loss:0.000757302506826818 lr:3.511175705587433e-06 tokens_per_second_per_gpu:7097.271216873272
224
+ Step 224 | loss:0.0013424844946712255 lr:3.258716180199278e-06 tokens_per_second_per_gpu:7039.021132485852
225
+ Step 225 | loss:0.00032940192613750696 lr:3.0153689607045845e-06 tokens_per_second_per_gpu:7037.537330538644
226
+ Step 226 | loss:0.0014422229724004865 lr:2.7811814881259503e-06 tokens_per_second_per_gpu:7091.09106833669
227
+ Step 227 | loss:0.0010956695768982172 lr:2.5561994177751737e-06 tokens_per_second_per_gpu:6604.557403083021
228
+ Step 228 | loss:0.0011385928373783827 lr:2.340466610352654e-06 tokens_per_second_per_gpu:6979.094215726923
229
+ Step 229 | loss:0.0018219526391476393 lr:2.134025123396638e-06 tokens_per_second_per_gpu:6895.723399935125
230
+ Step 230 | loss:0.008147644810378551 lr:1.9369152030840556e-06 tokens_per_second_per_gpu:6753.505543554295
231
+ Step 231 | loss:0.0006599955377168953 lr:1.7491752763844293e-06 tokens_per_second_per_gpu:7076.2268471422185
232
+ Step 232 | loss:0.0014754146104678512 lr:1.5708419435684462e-06 tokens_per_second_per_gpu:6802.986797947515
233
+ Step 233 | loss:0.0003230568836443126 lr:1.4019499710726913e-06 tokens_per_second_per_gpu:6944.848647833805
234
+ Step 234 | loss:0.0002109357010340318 lr:1.2425322847218368e-06 tokens_per_second_per_gpu:7087.99972871592
235
+ Step 235 | loss:0.0013953811721876264 lr:1.0926199633097157e-06 tokens_per_second_per_gpu:7110.8439529001935
236
+ Step 236 | loss:4.525236727204174e-05 lr:9.522422325404235e-07 tokens_per_second_per_gpu:7099.461659197926
237
+ Step 237 | loss:0.0006170326378196478 lr:8.214264593307098e-07 tokens_per_second_per_gpu:7072.285288625411
238
+ Step 238 | loss:0.0010877117747440934 lr:7.001981464747565e-07 tokens_per_second_per_gpu:7062.581629530725
239
+ Step 239 | loss:0.000273244600975886 lr:5.885809276723608e-07 tokens_per_second_per_gpu:6751.770199818327
240
+ Step 240 | loss:0.0007284900639206171 lr:4.865965629214819e-07 tokens_per_second_per_gpu:7044.183969309484
241
+ Step 241 | loss:4.254740633768961e-05 lr:3.9426493427611177e-07 tokens_per_second_per_gpu:7088.501677999479
242
+ Step 242 | loss:0.0011083302088081837 lr:3.1160404197018154e-07 tokens_per_second_per_gpu:6983.718705962598
243
+ Step 243 | loss:0.00521168252453208 lr:2.386300009084408e-07 tokens_per_second_per_gpu:7067.875052697799
244
+ Step 244 | loss:0.0009710222948342562 lr:1.753570375247815e-07 tokens_per_second_per_gpu:6997.507768003162
245
+ Step 245 | loss:0.0012455569813027978 lr:1.2179748700879012e-07 tokens_per_second_per_gpu:7113.441984033089
246
+ Step 246 | loss:0.0002956873213406652 lr:7.796179090094891e-08 tokens_per_second_per_gpu:7098.860712192067
247
+ Step 247 | loss:0.0043370844796299934 lr:4.385849505708084e-08 tokens_per_second_per_gpu:7060.336069246184
248
+ Step 248 | loss:0.0003348605241626501 lr:1.949424798228239e-08 tokens_per_second_per_gpu:7053.891355799027
249
+ Step 249 | loss:7.63071802794002e-05 lr:4.873799534788059e-09 tokens_per_second_per_gpu:6912.572067251458
250
+ Step 250 | loss:0.00046650483272969723 lr:0.0 tokens_per_second_per_gpu:7091.354368744028
03560426/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
03560426/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 4 4 4]\n [0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 4 4 4]\n [0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 4 4 4]\n [0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 4 4 4]\n [0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 4 4 4]\n [0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 4 4 4 4 4]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]] -> [[4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 0]\n [4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 0]\n [4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 0]\n [4 4 4 4 4 4 4 4 0 0 0 0 0 0 0 0 0 0 0 0]\n [4 4 4 4 4 4 1 1 1 1 0 0 0 0 0 0 0 0 0 0]\n [4 4 4 4 4 4 1 1 1 1 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 1 1 6 6 6 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 1 1 6 6 6 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 6 6 6 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 6 6 6 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 6 6 6 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 6 6 6 6 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 6 6 6 6 6 6 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 6 6 6 6 6 6 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5]] -> [[8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [8 8 6 6 6 6 6 6 6 6 5 5 5 5 5 5 0 0 0 0]\n [8 8 6 6 6 6 6 6 6 6 5 5 5 5 5 5 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 8 8 8 8 8 8 8 8]] -> "}], "output": {"role": "assistant", "content": "[[7 7 7 7 7 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [7 7 7 7 7 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [7 7 7 7 6 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0]\n [7 7 7 7 6 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 6 6 5 5 5 5 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 6 6 5 5 5 5 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 5 5 8 8 8 8 8 8 8 8 0 0 0 0]\n [0 0 0 0 0 0 5 5 8 8 8 8 8 8 8 8 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 4985}
05a7bcf2/adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"base_model_name_or_path": "/raid/lingo/akyurek/git/agi/checkpoints/pretrained/8B_grids_no_lm_generated_tasks/4k/", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
05a7bcf2/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ea37bff6d800eb8d009e71edae1bf66e2508be90518c96f59ef05097643a12
3
+ size 596042770
05a7bcf2/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.0.dev0", "use_cache": true, "vocab_size": 128256}
05a7bcf2/log_1730924590.txt ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:0.05179863050580025 lr:1.1111111111111112e-05 tokens_per_second_per_gpu:4860.945113786926
2
+ Step 2 | loss:0.0376623161137104 lr:2.2222222222222223e-05 tokens_per_second_per_gpu:6834.301743985016
3
+ Step 3 | loss:0.0394112728536129 lr:3.3333333333333335e-05 tokens_per_second_per_gpu:6792.444651679777
4
+ Step 4 | loss:0.029476292431354523 lr:4.4444444444444447e-05 tokens_per_second_per_gpu:6804.175745991314
5
+ Step 5 | loss:0.051123712211847305 lr:5.555555555555556e-05 tokens_per_second_per_gpu:6802.744032109486
6
+ Step 6 | loss:0.03351309150457382 lr:6.666666666666667e-05 tokens_per_second_per_gpu:6826.979193318932
7
+ Step 7 | loss:0.061334047466516495 lr:7.777777777777778e-05 tokens_per_second_per_gpu:6821.014173507077
8
+ Step 8 | loss:0.01792784593999386 lr:8.888888888888889e-05 tokens_per_second_per_gpu:6834.16073598883
9
+ Step 9 | loss:0.04884471371769905 lr:0.0001 tokens_per_second_per_gpu:6814.469749504075
10
+ Step 10 | loss:0.023769527673721313 lr:9.996740476948385e-05 tokens_per_second_per_gpu:6808.148625150188
11
+ Step 11 | loss:0.01683230884373188 lr:9.98696615758975e-05 tokens_per_second_per_gpu:6817.3899062996725
12
+ Step 12 | loss:0.044611480087041855 lr:9.970689785771798e-05 tokens_per_second_per_gpu:6809.765549920982
13
+ Step 13 | loss:0.01763676479458809 lr:9.947932582778188e-05 tokens_per_second_per_gpu:6823.3497548714695
14
+ Step 14 | loss:0.03391048312187195 lr:9.918724219660013e-05 tokens_per_second_per_gpu:6821.922471578913
15
+ Step 15 | loss:0.010380257852375507 lr:9.883102778550434e-05 tokens_per_second_per_gpu:6801.825966163852
16
+ Step 16 | loss:0.03979611024260521 lr:9.841114703012817e-05 tokens_per_second_per_gpu:6812.3183912732675
17
+ Step 17 | loss:0.015345641411840916 lr:9.792814737487207e-05 tokens_per_second_per_gpu:6805.431099856669
18
+ Step 18 | loss:0.028081906959414482 lr:9.738265855914013e-05 tokens_per_second_per_gpu:6806.589515901607
19
+ Step 19 | loss:0.011652050539851189 lr:9.677539179628005e-05 tokens_per_second_per_gpu:6823.728472070371
20
+ Step 20 | loss:0.013236177153885365 lr:9.610713884629666e-05 tokens_per_second_per_gpu:6809.5806175346925
21
+ Step 21 | loss:0.009158747270703316 lr:9.537877098354786e-05 tokens_per_second_per_gpu:6816.071944583631
22
+ Step 22 | loss:0.013639681972563267 lr:9.459123786076912e-05 tokens_per_second_per_gpu:6815.612443990215
23
+ Step 23 | loss:0.018701117485761642 lr:9.374556627090749e-05 tokens_per_second_per_gpu:6795.6975633367065
24
+ Step 24 | loss:0.03290664032101631 lr:9.284285880837946e-05 tokens_per_second_per_gpu:6816.0876800959
25
+ Step 25 | loss:0.011792514473199844 lr:9.188429243149824e-05 tokens_per_second_per_gpu:6799.6734433673755
26
+ Step 26 | loss:0.00887240655720234 lr:9.087111692794459e-05 tokens_per_second_per_gpu:6820.68937424808
27
+ Step 27 | loss:0.005111854523420334 lr:8.980465328528219e-05 tokens_per_second_per_gpu:6811.4291322974195
28
+ Step 28 | loss:0.00833337102085352 lr:8.868629196864182e-05 tokens_per_second_per_gpu:6792.721061225793
29
+ Step 29 | loss:0.004015392158180475 lr:8.751749110782012e-05 tokens_per_second_per_gpu:6802.514837515657
30
+ Step 30 | loss:0.027002353221178055 lr:8.629977459615655e-05 tokens_per_second_per_gpu:6815.317941212828
31
+ Step 31 | loss:0.004000282846391201 lr:8.503473010366713e-05 tokens_per_second_per_gpu:6819.934152465108
32
+ Step 32 | loss:0.008173882961273193 lr:8.37240070070257e-05 tokens_per_second_per_gpu:6803.155938800446
33
+ Step 33 | loss:0.012675347737967968 lr:8.236931423909138e-05 tokens_per_second_per_gpu:6816.343440706613
34
+ Step 34 | loss:0.006324874237179756 lr:8.097241806078615e-05 tokens_per_second_per_gpu:6809.894525951748
35
+ Step 35 | loss:0.005416011903434992 lr:7.953513975822755e-05 tokens_per_second_per_gpu:6808.5298231387305
36
+ Step 36 | loss:0.006035041529685259 lr:7.805935326811912e-05 tokens_per_second_per_gpu:6811.065492973223
37
+ Step 37 | loss:0.009669274091720581 lr:7.654698273449435e-05 tokens_per_second_per_gpu:6800.272962369546
38
+ Step 38 | loss:0.00583279225975275 lr:7.500000000000001e-05 tokens_per_second_per_gpu:6814.067125679608
39
+ Step 39 | loss:0.005747811868786812 lr:7.342042203498951e-05 tokens_per_second_per_gpu:6806.249276713293
40
+ Step 40 | loss:0.003706105286255479 lr:7.181030830777837e-05 tokens_per_second_per_gpu:6834.5469767175955
41
+ Step 41 | loss:0.0025935648009181023 lr:7.017175809949044e-05 tokens_per_second_per_gpu:6811.799248930741
42
+ Step 42 | loss:0.005205724388360977 lr:6.850690776699573e-05 tokens_per_second_per_gpu:6789.21067948351
43
+ Step 43 | loss:0.016056399792432785 lr:6.681792795750875e-05 tokens_per_second_per_gpu:6804.181083822605
44
+ Step 44 | loss:0.004058449529111385 lr:6.510702077847863e-05 tokens_per_second_per_gpu:6809.4810600403825
45
+ Step 45 | loss:0.002956643933430314 lr:6.337641692646106e-05 tokens_per_second_per_gpu:6816.696084771359
46
+ Step 46 | loss:0.003887909697368741 lr:6.162837277871553e-05 tokens_per_second_per_gpu:6786.40257028325
47
+ Step 47 | loss:0.014375696890056133 lr:5.9865167451320005e-05 tokens_per_second_per_gpu:6811.3195278545345
48
+ Step 48 | loss:0.0021371471229940653 lr:5.808909982763825e-05 tokens_per_second_per_gpu:6810.95771134
49
+ Step 49 | loss:0.002866639057174325 lr:5.6302485561014475e-05 tokens_per_second_per_gpu:6819.357207569117
50
+ Step 50 | loss:0.0011557599063962698 lr:5.4507654055603275e-05 tokens_per_second_per_gpu:6805.72853194876
51
+ Step 51 | loss:0.018013911321759224 lr:5.270694542927088e-05 tokens_per_second_per_gpu:6809.846715099813
52
+ Step 52 | loss:0.0026188462506979704 lr:5.090270746252802e-05 tokens_per_second_per_gpu:6800.943318320658
53
+ Step 53 | loss:0.011280328966677189 lr:4.909729253747197e-05 tokens_per_second_per_gpu:6813.292069306693
54
+ Step 54 | loss:0.0020988117903470993 lr:4.729305457072913e-05 tokens_per_second_per_gpu:6795.184923444009
55
+ Step 55 | loss:0.0011527270544320345 lr:4.549234594439674e-05 tokens_per_second_per_gpu:6810.567359537698
56
+ Step 56 | loss:0.0011581148719415069 lr:4.3697514438985536e-05 tokens_per_second_per_gpu:6408.456566137512
57
+ Step 57 | loss:0.019491899758577347 lr:4.1910900172361764e-05 tokens_per_second_per_gpu:6799.5670455212785
58
+ Step 58 | loss:0.0003687155549414456 lr:4.0134832548680006e-05 tokens_per_second_per_gpu:6813.552709174246
59
+ Step 59 | loss:0.0015409629559144378 lr:3.8371627221284495e-05 tokens_per_second_per_gpu:6811.492922559958
60
+ Step 60 | loss:0.0013514460297301412 lr:3.6623583073538966e-05 tokens_per_second_per_gpu:6822.011500047504
61
+ Step 61 | loss:0.000942587445024401 lr:3.489297922152136e-05 tokens_per_second_per_gpu:6811.367142849822
62
+ Step 62 | loss:0.00012637079635169357 lr:3.3182072042491244e-05 tokens_per_second_per_gpu:6806.890541489057
63
+ Step 63 | loss:0.0020158663392066956 lr:3.149309223300428e-05 tokens_per_second_per_gpu:6792.902753614923
64
+ Step 64 | loss:0.001647979486733675 lr:2.982824190050958e-05 tokens_per_second_per_gpu:6810.383664910744
65
+ Step 65 | loss:0.009610266424715519 lr:2.8189691692221627e-05 tokens_per_second_per_gpu:6815.221687372939
66
+ Step 66 | loss:0.0009663584060035646 lr:2.65795779650105e-05 tokens_per_second_per_gpu:6808.829119755595
67
+ Step 67 | loss:0.0011374299647286534 lr:2.500000000000001e-05 tokens_per_second_per_gpu:6817.545267622491
68
+ Step 68 | loss:0.011504337191581726 lr:2.3453017265505673e-05 tokens_per_second_per_gpu:6820.517336016243
69
+ Step 69 | loss:0.0005847160937264562 lr:2.194064673188089e-05 tokens_per_second_per_gpu:6816.399234756072
70
+ Step 70 | loss:0.010215891525149345 lr:2.0464860241772455e-05 tokens_per_second_per_gpu:6819.561227096747
71
+ Step 71 | loss:0.003488093614578247 lr:1.902758193921385e-05 tokens_per_second_per_gpu:6808.3365992539
72
+ Step 72 | loss:0.0006929687806405127 lr:1.7630685760908622e-05 tokens_per_second_per_gpu:6810.176107640547
73
+ Step 73 | loss:0.0006874012760818005 lr:1.6275992992974308e-05 tokens_per_second_per_gpu:6819.088817701193
74
+ Step 74 | loss:0.0008788794511929154 lr:1.4965269896332885e-05 tokens_per_second_per_gpu:6801.777743303528
75
+ Step 75 | loss:0.002412602072581649 lr:1.3700225403843469e-05 tokens_per_second_per_gpu:6804.103610169244
76
+ Step 76 | loss:0.0038095307536423206 lr:1.2482508892179884e-05 tokens_per_second_per_gpu:6806.588981155734
77
+ Step 77 | loss:0.0053641716949641705 lr:1.1313708031358183e-05 tokens_per_second_per_gpu:6792.592091749703
78
+ Step 78 | loss:0.0010660523548722267 lr:1.0195346714717813e-05 tokens_per_second_per_gpu:6811.368266515568
79
+ Step 79 | loss:0.008615836501121521 lr:9.12888307205541e-06 tokens_per_second_per_gpu:6810.978602048898
80
+ Step 80 | loss:0.0005627855425700545 lr:8.115707568501768e-06 tokens_per_second_per_gpu:6815.071657971353
81
+ Step 81 | loss:0.002700572367757559 lr:7.157141191620548e-06 tokens_per_second_per_gpu:6793.027561263215
82
+ Step 82 | loss:0.0012557301670312881 lr:6.2544337290925185e-06 tokens_per_second_per_gpu:6802.445255615107
83
+ Step 83 | loss:0.0015329713933169842 lr:5.408762139230888e-06 tokens_per_second_per_gpu:6804.99038483307
84
+ Step 84 | loss:0.003167090704664588 lr:4.621229016452156e-06 tokens_per_second_per_gpu:6783.223055759846
85
+ Step 85 | loss:0.0002928520552814007 lr:3.892861153703342e-06 tokens_per_second_per_gpu:6814.443503922145
86
+ Step 86 | loss:0.0007159900269471109 lr:3.2246082037199532e-06 tokens_per_second_per_gpu:6807.390574807507
87
+ Step 87 | loss:0.0011301033664494753 lr:2.6173414408598827e-06 tokens_per_second_per_gpu:6811.879242181891
88
+ Step 88 | loss:0.003690943820402026 lr:2.0718526251279346e-06 tokens_per_second_per_gpu:6815.908566203344
89
+ Step 89 | loss:0.0003694476035889238 lr:1.5888529698718346e-06 tokens_per_second_per_gpu:6784.198178900837
90
+ Step 90 | loss:0.00046446218038909137 lr:1.1689722144956671e-06 tokens_per_second_per_gpu:6810.386739473312
91
+ Step 91 | loss:0.0004260542045813054 lr:8.127578033998662e-07 tokens_per_second_per_gpu:6813.714003933362
92
+ Step 92 | loss:0.001164629589766264 lr:5.206741722181386e-07 tokens_per_second_per_gpu:6820.454820154012
93
+ Step 93 | loss:0.0006978681194595993 lr:2.9310214228202013e-07 tokens_per_second_per_gpu:6791.87785883621
94
+ Step 94 | loss:0.0012666977709159255 lr:1.3033842410251075e-07 tokens_per_second_per_gpu:6809.304188230645
95
+ Step 95 | loss:0.0017049101879820228 lr:3.259523051615254e-08 tokens_per_second_per_gpu:6801.652062440923
96
+ Step 96 | loss:0.0002462772827129811 lr:0.0 tokens_per_second_per_gpu:6830.59360532396
05a7bcf2/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
05a7bcf2/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]\n [0 0 2 0 0 2 2 2 0 0 2 0 2 0 0 0 2 2 0 0 2 0 2 0 0 2 2 0 0 2]\n [0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 4 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 4 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 4 4 4 0 0 0 0 0 0 4 4 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 4 4 4 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 2 2 2 0 0 0 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 2 2 0 0 0]\n [0 0 0 0 8 2 2 0 0 0 2 8 2 0 0 0 0 0 0 8 2 0 0 0 0 2 2 0 0 0]\n [0 0 0 0 8 8 2 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [2 2 2 2 8 8 8 2 2 2 8 8 8 2 2 2 2 2 2 8 8 2 2 2 2 8 8 2 2 2]\n [0 0 2 0 8 8 8 2 0 0 8 8 8 0 0 0 2 2 0 8 8 0 2 0 0 8 8 0 0 2]\n [0 0 0 0 8 8 8 2 0 0 8 8 8 0 0 0 2 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [0 0 0 0 8 8 8 0 0 0 8 8 8 0 0 0 0 0 0 8 8 0 0 0 0 8 8 0 0 0]\n [8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8]\n [0 0 0 0 4 4 4 0 0 0 4 4 4 0 0 0 0 0 0 4 4 0 0 0 0 4 4 0 0 0]\n [0 0 0 0 4 4 4 0 0 0 4 4 4 0 0 0 0 0 0 4 4 0 0 0 0 4 4 0 0 0]\n [0 0 0 0 3 3 3 0 0 0 4 4 4 0 0 0 0 0 0 4 4 0 0 0 0 4 4 0 0 0]\n [0 0 0 0 3 3 0 0 0 0 4 4 4 0 0 0 0 0 0 4 4 0 0 0 0 4 4 0 0 0]\n [0 0 0 0 3 3 0 0 0 0 4 4 4 0 0 0 0 0 0 4 4 0 0 0 0 3 3 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 3 3 3 0 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 3 3 3 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 4 4 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 8 0 0 0 4 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 8 0 0 0 4 4 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 4 4 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 4 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 4 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 2 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 4 4 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 4 4 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]] -> "}], "output": {"role": "assistant", "content": "[[0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [2 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 4 4 4 4 3 3 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [2 2 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 4 4 4 3 0 0 0 0 0]\n [2 2 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 4 4 4 3 3 0 0 0 0]\n [2 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 4 4 4 3 3 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [2 2 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 4 4 4 4 4 4 3 0 0]\n [2 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 4 4 4 4 4 4 3 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 2 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 4 4 4 4 4 3 3 0 0]\n [2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 4 4 4 4 4 3 3 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 7383}
0607ce86/adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"base_model_name_or_path": "/raid/lingo/akyurek/git/agi/checkpoints/pretrained/8B_grids_no_lm_generated_tasks/4k/", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
0607ce86/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccdd2dd61451a98b935288c9cbc2b0a269411dceb212f210425c9f8c1c0899b2
3
+ size 596042770
0607ce86/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.0.dev0", "use_cache": true, "vocab_size": 128256}
0607ce86/log_1730924807.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:0.004792299121618271 lr:7.142857142857143e-06 tokens_per_second_per_gpu:4518.468949459207
2
+ Step 2 | loss:0.012903495691716671 lr:1.4285714285714285e-05 tokens_per_second_per_gpu:6955.524642420276
3
+ Step 3 | loss:0.012679360806941986 lr:2.1428571428571428e-05 tokens_per_second_per_gpu:6974.477801839077
4
+ Step 4 | loss:0.0050378115847706795 lr:2.857142857142857e-05 tokens_per_second_per_gpu:3835.632166536646
5
+ Step 5 | loss:0.011082684621214867 lr:3.571428571428572e-05 tokens_per_second_per_gpu:7015.004592322919
6
+ Step 6 | loss:0.01366399321705103 lr:4.2857142857142856e-05 tokens_per_second_per_gpu:3804.4801853547688
7
+ Step 7 | loss:0.004555062390863895 lr:5e-05 tokens_per_second_per_gpu:6924.165176437273
8
+ Step 8 | loss:0.0031856889836490154 lr:5.714285714285714e-05 tokens_per_second_per_gpu:3836.63610072261
9
+ Step 9 | loss:0.003530680201947689 lr:6.428571428571429e-05 tokens_per_second_per_gpu:6928.8767409095635
10
+ Step 10 | loss:0.0029834117740392685 lr:7.142857142857143e-05 tokens_per_second_per_gpu:7054.289075345248
11
+ Step 11 | loss:0.008434215560555458 lr:7.857142857142858e-05 tokens_per_second_per_gpu:7028.746804300251
12
+ Step 12 | loss:0.0019024465000256896 lr:8.571428571428571e-05 tokens_per_second_per_gpu:7004.27387196034
13
+ Step 13 | loss:0.0013153237523511052 lr:9.285714285714286e-05 tokens_per_second_per_gpu:3747.579511976763
14
+ Step 14 | loss:0.0016037473687902093 lr:0.0001 tokens_per_second_per_gpu:6868.013298613831
15
+ Step 15 | loss:0.0023987351451069117 lr:9.998540070400966e-05 tokens_per_second_per_gpu:3746.109581608771
16
+ Step 16 | loss:0.00439112912863493 lr:9.994161134161634e-05 tokens_per_second_per_gpu:7051.889328967001
17
+ Step 17 | loss:0.0005964749143458903 lr:9.986865748457457e-05 tokens_per_second_per_gpu:7070.284134761689
18
+ Step 18 | loss:0.0004346532514318824 lr:9.976658173588244e-05 tokens_per_second_per_gpu:6955.9594991868125
19
+ Step 19 | loss:0.0004751915985252708 lr:9.96354437049027e-05 tokens_per_second_per_gpu:6969.409647366765
20
+ Step 20 | loss:0.00026490577147342265 lr:9.947531997255256e-05 tokens_per_second_per_gpu:7026.113799572298
21
+ Step 21 | loss:1.5243083907989785e-05 lr:9.928630404658255e-05 tokens_per_second_per_gpu:7068.505665499448
22
+ Step 22 | loss:0.0005737241008318961 lr:9.906850630697068e-05 tokens_per_second_per_gpu:7038.416000389648
23
+ Step 23 | loss:0.002770385704934597 lr:9.882205394146361e-05 tokens_per_second_per_gpu:6906.939970128213
24
+ Step 24 | loss:9.889339708024636e-05 lr:9.85470908713026e-05 tokens_per_second_per_gpu:7076.4088338579795
25
+ Step 25 | loss:0.004420747049152851 lr:9.824377766717759e-05 tokens_per_second_per_gpu:6974.264297517452
26
+ Step 26 | loss:0.0005106984172016382 lr:9.791229145545831e-05 tokens_per_second_per_gpu:6966.986782986819
27
+ Step 27 | loss:0.00022695916413795203 lr:9.755282581475769e-05 tokens_per_second_per_gpu:6925.302847230216
28
+ Step 28 | loss:0.000734488305170089 lr:9.716559066288715e-05 tokens_per_second_per_gpu:7036.274228152679
29
+ Step 29 | loss:6.996609158704814e-07 lr:9.675081213427076e-05 tokens_per_second_per_gpu:7085.078541386148
30
+ Step 30 | loss:0.002097910735756159 lr:9.630873244788883e-05 tokens_per_second_per_gpu:7030.355570058724
31
+ Step 31 | loss:1.957287310005995e-07 lr:9.583960976582913e-05 tokens_per_second_per_gpu:7046.601472234343
32
+ Step 32 | loss:0.005069150123745203 lr:9.534371804252728e-05 tokens_per_second_per_gpu:7076.385620015693
33
+ Step 33 | loss:7.62033505452564e-06 lr:9.482134686478519e-05 tokens_per_second_per_gpu:6960.136663212914
34
+ Step 34 | loss:2.63245260612166e-06 lr:9.42728012826605e-05 tokens_per_second_per_gpu:7099.353936058267
35
+ Step 35 | loss:0.002219924470409751 lr:9.36984016313259e-05 tokens_per_second_per_gpu:7063.492131862085
36
+ Step 36 | loss:3.3589412851142697e-06 lr:9.309848334400246e-05 tokens_per_second_per_gpu:6960.442071551715
37
+ Step 37 | loss:0.0004606418660841882 lr:9.247339675607605e-05 tokens_per_second_per_gpu:6893.408028057143
38
+ Step 38 | loss:6.152480182208819e-06 lr:9.182350690051133e-05 tokens_per_second_per_gpu:7056.333279794816
39
+ Step 39 | loss:8.942539716372266e-05 lr:9.114919329468282e-05 tokens_per_second_per_gpu:7064.486994457691
40
+ Step 40 | loss:0.0007815973367542028 lr:9.045084971874738e-05 tokens_per_second_per_gpu:6907.760467124012
41
+ Step 41 | loss:4.1791929106693715e-05 lr:8.972888398568772e-05 tokens_per_second_per_gpu:7046.59756862717
42
+ Step 42 | loss:0.0017932152841240168 lr:8.898371770316111e-05 tokens_per_second_per_gpu:6909.559407459882
43
+ Step 43 | loss:0.0011351365828886628 lr:8.821578602729242e-05 tokens_per_second_per_gpu:6951.361299365132
44
+ Step 44 | loss:4.510201506491285e-06 lr:8.742553740855506e-05 tokens_per_second_per_gpu:6896.120717094731
45
+ Step 45 | loss:5.263660932541825e-05 lr:8.661343332988869e-05 tokens_per_second_per_gpu:6887.339235528718
46
+ Step 46 | loss:1.1140290553157683e-05 lr:8.577994803720606e-05 tokens_per_second_per_gpu:7087.980082812125
47
+ Step 47 | loss:6.47643901174888e-06 lr:8.492556826244687e-05 tokens_per_second_per_gpu:7084.290273647461
48
+ Step 48 | loss:9.070141686606803e-07 lr:8.405079293933986e-05 tokens_per_second_per_gpu:7080.424185701019
49
+ Step 49 | loss:0.0019970741122961044 lr:8.315613291203976e-05 tokens_per_second_per_gpu:7044.6594430534315
50
+ Step 50 | loss:4.678936238633469e-05 lr:8.224211063680853e-05 tokens_per_second_per_gpu:6961.4151908691565
51
+ Step 51 | loss:1.2322523616603576e-05 lr:8.130925987691569e-05 tokens_per_second_per_gpu:7070.698271942826
52
+ Step 52 | loss:1.0367139111622237e-05 lr:8.035812539093557e-05 tokens_per_second_per_gpu:6907.463530719349
53
+ Step 53 | loss:6.867420552225667e-07 lr:7.938926261462366e-05 tokens_per_second_per_gpu:7031.305434342307
54
+ Step 54 | loss:4.9571219278732315e-05 lr:7.840323733655778e-05 tokens_per_second_per_gpu:6969.609919294838
55
+ Step 55 | loss:2.820162808347959e-05 lr:7.740062536773352e-05 tokens_per_second_per_gpu:6900.29281985781
56
+ Step 56 | loss:2.8738515993609326e-06 lr:7.638201220530665e-05 tokens_per_second_per_gpu:6430.862795900378
57
+ Step 57 | loss:4.696042196883354e-06 lr:7.534799269067953e-05 tokens_per_second_per_gpu:7027.365158927265
58
+ Step 58 | loss:1.300309486396145e-05 lr:7.42991706621303e-05 tokens_per_second_per_gpu:6951.896803131622
59
+ Step 59 | loss:1.5782376067363657e-05 lr:7.323615860218843e-05 tokens_per_second_per_gpu:6967.182906826198
60
+ Step 60 | loss:4.52238236903213e-06 lr:7.215957727996207e-05 tokens_per_second_per_gpu:7039.3509248598275
61
+ Step 61 | loss:1.7398016325387289e-06 lr:7.107005538862646e-05 tokens_per_second_per_gpu:6956.770026254611
62
+ Step 62 | loss:4.455490852706134e-05 lr:6.996822917828477e-05 tokens_per_second_per_gpu:6966.337500807775
63
+ Step 63 | loss:8.927419230531086e-07 lr:6.885474208441603e-05 tokens_per_second_per_gpu:7040.308891231726
64
+ Step 64 | loss:1.174200588138774e-05 lr:6.773024435212678e-05 tokens_per_second_per_gpu:6898.833043360964
65
+ Step 65 | loss:1.8381995232630288e-06 lr:6.659539265642643e-05 tokens_per_second_per_gpu:7061.917802620802
66
+ Step 66 | loss:2.2081801489548525e-06 lr:6.545084971874738e-05 tokens_per_second_per_gpu:6944.550985626046
67
+ Step 67 | loss:4.22269877162762e-05 lr:6.429728391993446e-05 tokens_per_second_per_gpu:7091.5878807660365
68
+ Step 68 | loss:2.3210809558804613e-06 lr:6.313536890992935e-05 tokens_per_second_per_gpu:6962.9924973239395
69
+ Step 69 | loss:3.1893891900836024e-06 lr:6.19657832143779e-05 tokens_per_second_per_gpu:7045.1220561244145
70
+ Step 70 | loss:0.00023930698807816952 lr:6.078920983839031e-05 tokens_per_second_per_gpu:6960.861822584388
71
+ Step 71 | loss:1.130143232330738e-06 lr:5.960633586768543e-05 tokens_per_second_per_gpu:6899.961643808216
72
+ Step 72 | loss:1.6112105640786467e-06 lr:5.841785206735192e-05 tokens_per_second_per_gpu:6965.057634860687
73
+ Step 73 | loss:2.0564432361425133e-06 lr:5.7224452478461064e-05 tokens_per_second_per_gpu:7074.571887162948
74
+ Step 74 | loss:8.124079613480717e-05 lr:5.602683401276615e-05 tokens_per_second_per_gpu:7046.836142768574
75
+ Step 75 | loss:1.010556957226072e-06 lr:5.482569604572576e-05 tokens_per_second_per_gpu:6908.564844520081
76
+ Step 76 | loss:2.592751570773544e-06 lr:5.3621740008088126e-05 tokens_per_second_per_gpu:6971.9321043059845
77
+ Step 77 | loss:5.325124220689759e-06 lr:5.2415668976275355e-05 tokens_per_second_per_gpu:6965.100494382521
78
+ Step 78 | loss:0.00024011747154872864 lr:5.1208187261806615e-05 tokens_per_second_per_gpu:6964.165691178264
79
+ Step 79 | loss:7.75813589370955e-07 lr:5e-05 tokens_per_second_per_gpu:6938.621796099295
80
+ Step 80 | loss:2.1455480236909352e-06 lr:4.87918127381934e-05 tokens_per_second_per_gpu:6963.372165086193
81
+ Step 81 | loss:0.0003063591429963708 lr:4.758433102372466e-05 tokens_per_second_per_gpu:6917.706143442057
82
+ Step 82 | loss:1.5376626834040508e-06 lr:4.6378259991911886e-05 tokens_per_second_per_gpu:7051.631908002588
83
+ Step 83 | loss:2.9501327958314505e-07 lr:4.5174303954274244e-05 tokens_per_second_per_gpu:6772.079826510534
84
+ Step 84 | loss:3.040148044419766e-07 lr:4.397316598723385e-05 tokens_per_second_per_gpu:7060.876339702193
85
+ Step 85 | loss:1.3994391565574915e-06 lr:4.277554752153895e-05 tokens_per_second_per_gpu:6899.650038977372
86
+ Step 86 | loss:4.592549885273911e-05 lr:4.1582147932648074e-05 tokens_per_second_per_gpu:6946.918505295365
87
+ Step 87 | loss:9.003700256471348e-07 lr:4.039366413231458e-05 tokens_per_second_per_gpu:7089.637885490974
88
+ Step 88 | loss:5.667653795171645e-07 lr:3.92107901616097e-05 tokens_per_second_per_gpu:6955.233061277162
89
+ Step 89 | loss:2.5703700430312892e-06 lr:3.803421678562213e-05 tokens_per_second_per_gpu:7045.854343354433
90
+ Step 90 | loss:8.465374412480742e-05 lr:3.6864631090070655e-05 tokens_per_second_per_gpu:7040.102101848447
91
+ Step 91 | loss:7.399895366688725e-06 lr:3.570271608006555e-05 tokens_per_second_per_gpu:7096.806662512389
92
+ Step 92 | loss:5.275537091620208e-07 lr:3.4549150281252636e-05 tokens_per_second_per_gpu:6952.699977338772
93
+ Step 93 | loss:1.5645102848793613e-06 lr:3.340460734357359e-05 tokens_per_second_per_gpu:7088.014462474767
94
+ Step 94 | loss:9.298463510276633e-07 lr:3.226975564787322e-05 tokens_per_second_per_gpu:7070.872293588375
95
+ Step 95 | loss:4.712554471097974e-07 lr:3.114525791558398e-05 tokens_per_second_per_gpu:6959.590874922212
96
+ Step 96 | loss:2.8378622118907515e-07 lr:3.003177082171523e-05 tokens_per_second_per_gpu:7054.584883174489
97
+ Step 97 | loss:6.349221166601637e-07 lr:2.8929944611373554e-05 tokens_per_second_per_gpu:7086.716693252626
98
+ Step 98 | loss:8.236501162173226e-05 lr:2.784042272003794e-05 tokens_per_second_per_gpu:6895.087426251085
99
+ Step 99 | loss:0.00022709825134370476 lr:2.6763841397811573e-05 tokens_per_second_per_gpu:7082.962550211235
100
+ Step 100 | loss:1.0754414461189299e-06 lr:2.57008293378697e-05 tokens_per_second_per_gpu:6958.6647294891545
101
+ Step 101 | loss:1.960502686415566e-06 lr:2.4652007309320498e-05 tokens_per_second_per_gpu:6965.047105335359
102
+ Step 102 | loss:1.0624105470924405e-06 lr:2.361798779469336e-05 tokens_per_second_per_gpu:7067.047384793512
103
+ Step 103 | loss:2.5157325467262126e-07 lr:2.259937463226651e-05 tokens_per_second_per_gpu:7089.271154029672
104
+ Step 104 | loss:8.222309020311513e-07 lr:2.1596762663442218e-05 tokens_per_second_per_gpu:7052.033531868413
105
+ Step 105 | loss:0.00019905969384126365 lr:2.061073738537635e-05 tokens_per_second_per_gpu:6968.846518884659
106
+ Step 106 | loss:1.422281798113545e-06 lr:1.9641874609064443e-05 tokens_per_second_per_gpu:6972.498559040641
107
+ Step 107 | loss:2.492096882633632e-06 lr:1.8690740123084316e-05 tokens_per_second_per_gpu:6961.414059723269
108
+ Step 108 | loss:3.576358358259313e-05 lr:1.7757889363191483e-05 tokens_per_second_per_gpu:6974.772507783813
109
+ Step 109 | loss:9.369013014293159e-07 lr:1.684386708796025e-05 tokens_per_second_per_gpu:6896.961054191133
110
+ Step 110 | loss:8.888050047062279e-07 lr:1.5949207060660138e-05 tokens_per_second_per_gpu:6903.65724348431
111
+ Step 111 | loss:1.915802556595736e-07 lr:1.5074431737553157e-05 tokens_per_second_per_gpu:6765.210621936858
112
+ Step 112 | loss:6.041101983100816e-07 lr:1.422005196279395e-05 tokens_per_second_per_gpu:6936.884964345948
113
+ Step 113 | loss:4.0249375388157205e-07 lr:1.338656667011134e-05 tokens_per_second_per_gpu:7058.38947020656
114
+ Step 114 | loss:6.367684477481816e-07 lr:1.257446259144494e-05 tokens_per_second_per_gpu:7032.751535755158
115
+ Step 115 | loss:2.8260386898182333e-05 lr:1.178421397270758e-05 tokens_per_second_per_gpu:6953.469266312482
116
+ Step 116 | loss:1.567266167512571e-06 lr:1.1016282296838887e-05 tokens_per_second_per_gpu:7085.262586220905
117
+ Step 117 | loss:2.264130216644844e-06 lr:1.0271116014312293e-05 tokens_per_second_per_gpu:6952.288695018227
118
+ Step 118 | loss:6.490680971182883e-05 lr:9.549150281252633e-06 tokens_per_second_per_gpu:6964.214384286497
119
+ Step 119 | loss:7.784604463267897e-07 lr:8.850806705317183e-06 tokens_per_second_per_gpu:6961.247695089686
120
+ Step 120 | loss:8.729821843189711e-07 lr:8.176493099488663e-06 tokens_per_second_per_gpu:6903.2489231021245
121
+ Step 121 | loss:8.295385214296402e-07 lr:7.526603243923957e-06 tokens_per_second_per_gpu:6953.335007126678
122
+ Step 122 | loss:1.3326631460586214e-06 lr:6.901516655997536e-06 tokens_per_second_per_gpu:6962.136519664098
123
+ Step 123 | loss:1.290653131036379e-06 lr:6.301598368674105e-06 tokens_per_second_per_gpu:6961.8538852270485
124
+ Step 124 | loss:0.0001666852185735479 lr:5.727198717339511e-06 tokens_per_second_per_gpu:6964.692810445289
125
+ Step 125 | loss:1.8864420781028457e-07 lr:5.178653135214812e-06 tokens_per_second_per_gpu:7084.47222098494
126
+ Step 126 | loss:1.4434226613957435e-06 lr:4.65628195747273e-06 tokens_per_second_per_gpu:6965.933476281354
127
+ Step 127 | loss:0.0004926001420244575 lr:4.16039023417088e-06 tokens_per_second_per_gpu:7070.743126358531
128
+ Step 128 | loss:6.65172024127969e-07 lr:3.691267552111183e-06 tokens_per_second_per_gpu:7058.634358221531
129
+ Step 129 | loss:2.423998375888914e-05 lr:3.249187865729264e-06 tokens_per_second_per_gpu:6960.316882297749
130
+ Step 130 | loss:2.3096629320207285e-06 lr:2.8344093371128424e-06 tokens_per_second_per_gpu:6967.707629269878
131
+ Step 131 | loss:8.259024184553709e-07 lr:2.4471741852423237e-06 tokens_per_second_per_gpu:7091.881830761651
132
+ Step 132 | loss:0.00036341266240924597 lr:2.087708544541689e-06 tokens_per_second_per_gpu:7028.734700444715
133
+ Step 133 | loss:5.048845196142793e-05 lr:1.7562223328224325e-06 tokens_per_second_per_gpu:7054.698139756758
134
+ Step 134 | loss:1.331735461462813e-06 lr:1.4529091286973995e-06 tokens_per_second_per_gpu:6891.879518942568
135
+ Step 135 | loss:0.00016570062143728137 lr:1.1779460585363944e-06 tokens_per_second_per_gpu:6894.350001130334
136
+ Step 136 | loss:8.915412763599306e-07 lr:9.314936930293283e-07 tokens_per_second_per_gpu:6967.057064696187
137
+ Step 137 | loss:3.679159135572263e-06 lr:7.136959534174592e-07 tokens_per_second_per_gpu:6951.214306958658
138
+ Step 138 | loss:1.686560040070617e-06 lr:5.246800274474439e-07 tokens_per_second_per_gpu:7062.8543555851875
139
+ Step 139 | loss:2.881310194879916e-07 lr:3.6455629509730136e-07 tokens_per_second_per_gpu:7034.988814681111
140
+ Step 140 | loss:1.2103055269108154e-06 lr:2.334182641175686e-07 tokens_per_second_per_gpu:6775.31819688721
141
+ Step 141 | loss:8.536469249520451e-05 lr:1.3134251542544774e-07 tokens_per_second_per_gpu:6948.348891929506
142
+ Step 142 | loss:1.8825789993570652e-07 lr:5.838865838366792e-08 tokens_per_second_per_gpu:7050.198371432318
143
+ Step 143 | loss:4.7263534952435293e-07 lr:1.4599295990352924e-08 tokens_per_second_per_gpu:7049.281042448092
144
+ Step 144 | loss:8.21501889731735e-05 lr:0.0 tokens_per_second_per_gpu:7089.175553410856
0607ce86/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
0607ce86/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 3 0 0 0 0 0 0]\n [0 0 6 6 6 6 6 6 0 0 6 6 6 6 3 6 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 3 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 3 0 0 0]\n [0 3 8 8 3 3 8 8 0 0 8 8 3 3 8 3 0 0 0 3 0 0]\n [0 3 8 8 3 3 8 8 0 0 8 8 3 3 8 8 3 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0]\n [0 3 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 6 6 6 6 6 6 0 0 6 6 3 6 6 6 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 3 8 3 3 8 3 0 3 0 0 0 3]\n [0 0 8 8 3 3 3 8 0 0 3 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 3 3 3 3 8 0 0 0 0 0 0]\n [3 3 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 3 0 0 0]\n [0 0 0 0 0 0 0 0 0 3 0 0 3 0 0 0 0 0 0 0 0 0]\n [0 0 6 6 3 6 6 6 0 3 6 6 6 3 6 6 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 3 0]\n [0 0 8 3 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 3 0 0 3 0 0 0 3 0 0 0 0 0]\n [0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 6 6 6 6 6 6 0 0 6 6 6 6 6 6 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 6 6 6 6 6 6 0 0 6 6 6 6 6 6 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 6 6 6 6 6 6 0 0 6 6 6 6 6 6 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 8 8 3 3 8 8 0 0 8 8 3 3 8 8 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n[[0 3 0 0 0 3 3 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 1 1 1 2 3 3 0 3 0 0]\n [0 1 1 3 3 3 0 1 1 2 3 3 0 1 1 2 3 3 3 0 3 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 1 3 3 0 0 0 3]\n [0 1 3 3 3 1 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 8 8 8 8 8 0 8 8 8 8 8 0 8 8 8 8 8 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 3 1 3 3 3 0 3 1 2 3 3 0 1 1 2 3 3 0 0 3 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 3 2 1 3 0 1 1 2 3 3 0 0 0 0]\n [1 1 1 2 3 3 0 1 1 2 3 3 3 1 3 2 3 3 0 0 0 0]\n [0 8 1 8 8 3 0 8 8 8 8 8 0 1 8 8 8 8 0 0 0 0]\n [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 3 0]\n [0 1 1 3 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 1 3 3 0 1 1 2 3 1 0 0 0 0]\n [1 1 1 2 3 3 0 1 1 2 3 1 0 1 1 2 3 3 0 0 0 0]\n [3 8 8 8 3 3 1 8 8 8 8 8 0 8 8 8 8 8 0 0 1 0]\n [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 3 3 0 3 0 3 0 1 1 0 3 0 0 0 0 0 0 0 0 0 1]\n [0 0 3 0 0 1 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 8 8 8 8 8 0 8 8 8 8 8 0 8 8 8 8 8 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 8 8 8 8 8 0 8 8 8 8 8 0 8 8 8 8 8 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 1 1 2 3 3 0 1 1 2 3 3 0 1 1 2 3 3 0 0 0 0]\n [0 8 8 8 8 8 0 8 8 8 8 8 0 8 8 8 8 8 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3]\n [0 2 2 2 2 2 0 2 2 2 2 0 0 2 2 2 2 0 0 0 0 3]\n [0 3 2 2 3 0 0 3 2 2 3 0 3 3 2 2 3 0 0 3 0 0]\n [0 2 3 3 3 0 0 3 3 3 3 0 0 3 3 3 3 0 0 0 0 0]\n [0 2 3 1 3 0 0 2 2 1 2 0 0 3 1 1 3 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0]\n [0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0]\n [0 2 2 2 2 3 0 2 2 2 2 0 0 2 2 2 2 2 0 0 0 0]\n [0 3 2 2 3 0 0 3 2 2 3 0 3 3 2 2 3 2 0 0 0 0]\n [0 3 3 3 3 0 0 3 3 3 3 0 0 3 3 3 3 0 0 0 0 0]\n [0 3 3 1 3 0 0 3 1 1 3 0 0 3 1 1 3 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 0 0 3 2 3 2 0 0 2 3 2 2 0 0 0 0 0]\n [0 3 2 2 3 0 0 3 2 3 3 0 0 3 2 3 3 0 0 0 0 0]\n [3 3 3 3 3 0 0 3 3 3 3 0 0 3 2 3 3 0 0 2 0 0]\n [0 3 1 1 3 0 3 3 1 1 3 0 0 3 1 1 3 0 0 0 0 0]\n [0 2 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 3 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 2 0 0 0 0 2 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 2 0 0]\n [0 0 0 3 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> "}], "output": {"role": "assistant", "content": "[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 0 0 2 2 2 2 0 0 2 2 2 2 0 0 0 0 0]\n [0 3 2 2 3 0 0 3 2 2 3 0 0 3 2 2 3 0 0 0 0 0]\n [0 3 3 3 3 0 0 3 3 3 3 0 0 3 3 3 3 0 0 0 0 0]\n [0 3 1 1 3 0 0 3 1 1 3 0 0 3 1 1 3 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 0 0 2 2 2 2 0 0 2 2 2 2 0 0 0 0 0]\n [0 3 2 2 3 0 0 3 2 2 3 0 0 3 2 2 3 0 0 0 0 0]\n [0 3 3 3 3 0 0 3 3 3 3 0 0 3 3 3 3 0 0 0 0 0]\n [0 3 1 1 3 0 0 3 1 1 3 0 0 3 1 1 3 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 2 2 2 2 0 0 2 2 2 2 0 0 2 2 2 2 0 0 0 0 0]\n [0 3 2 2 3 0 0 3 2 2 3 0 0 3 2 2 3 0 0 0 0 0]\n [0 3 3 3 3 0 0 3 3 3 3 0 0 3 3 3 3 0 0 0 0 0]\n [0 3 1 1 3 0 0 3 1 1 3 0 0 3 1 1 3 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 6005}
0692e18c/adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"base_model_name_or_path": "/raid/lingo/akyurek/git/agi/checkpoints/pretrained/8B_grids_no_lm_generated_tasks/4k/", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
0692e18c/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073b6d5f936685ac2736e3c1a67af872075b1166f04665ea29d120447d7acbaa
3
+ size 596042770
0692e18c/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.0.dev0", "use_cache": true, "vocab_size": 128256}
0692e18c/log_1730925029.txt ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:0.05883090943098068 lr:4.000000000000001e-06 tokens_per_second_per_gpu:2611.2751764532522
2
+ Step 2 | loss:0.1065659150481224 lr:8.000000000000001e-06 tokens_per_second_per_gpu:934.6274612544344
3
+ Step 3 | loss:0.10369853675365448 lr:1.2e-05 tokens_per_second_per_gpu:1633.880920446792
4
+ Step 4 | loss:0.06993220746517181 lr:1.6000000000000003e-05 tokens_per_second_per_gpu:2003.4908633730493
5
+ Step 5 | loss:0.0536116324365139 lr:2e-05 tokens_per_second_per_gpu:7071.469899338988
6
+ Step 6 | loss:0.06969004124403 lr:2.4e-05 tokens_per_second_per_gpu:7059.030951476349
7
+ Step 7 | loss:0.12010835856199265 lr:2.8000000000000003e-05 tokens_per_second_per_gpu:5949.667488090579
8
+ Step 8 | loss:0.051114361733198166 lr:3.2000000000000005e-05 tokens_per_second_per_gpu:7016.083742408133
9
+ Step 9 | loss:0.04927004128694534 lr:3.6e-05 tokens_per_second_per_gpu:6961.956915402864
10
+ Step 10 | loss:0.08280401676893234 lr:4e-05 tokens_per_second_per_gpu:1167.2202124089874
11
+ Step 11 | loss:0.08619477599859238 lr:4.4000000000000006e-05 tokens_per_second_per_gpu:1139.6981710490575
12
+ Step 12 | loss:0.07909607887268066 lr:4.8e-05 tokens_per_second_per_gpu:7098.479896993361
13
+ Step 13 | loss:0.05969027802348137 lr:5.2000000000000004e-05 tokens_per_second_per_gpu:7094.291274042624
14
+ Step 14 | loss:0.06413069367408752 lr:5.6000000000000006e-05 tokens_per_second_per_gpu:6855.497619317191
15
+ Step 15 | loss:0.0635824203491211 lr:6e-05 tokens_per_second_per_gpu:7149.597258818894
16
+ Step 16 | loss:0.07948478311300278 lr:6.400000000000001e-05 tokens_per_second_per_gpu:6649.963046324791
17
+ Step 17 | loss:0.0760459378361702 lr:6.800000000000001e-05 tokens_per_second_per_gpu:6535.773747266753
18
+ Step 18 | loss:0.05200653523206711 lr:7.2e-05 tokens_per_second_per_gpu:6886.203505416455
19
+ Step 19 | loss:0.05042438581585884 lr:7.6e-05 tokens_per_second_per_gpu:7118.639800231809
20
+ Step 20 | loss:0.05962265655398369 lr:8e-05 tokens_per_second_per_gpu:6906.562284368418
21
+ Step 21 | loss:0.06424694508314133 lr:8.4e-05 tokens_per_second_per_gpu:6511.331095251028
22
+ Step 22 | loss:0.0616440623998642 lr:8.800000000000001e-05 tokens_per_second_per_gpu:1586.2845500246515
23
+ Step 23 | loss:0.06509910523891449 lr:9.200000000000001e-05 tokens_per_second_per_gpu:6546.337467431212
24
+ Step 24 | loss:0.06912975758314133 lr:9.6e-05 tokens_per_second_per_gpu:661.4638977396842
25
+ Step 25 | loss:0.04250991344451904 lr:0.0001 tokens_per_second_per_gpu:6657.945357791943
26
+ Step 26 | loss:0.04667964577674866 lr:9.999512620046522e-05 tokens_per_second_per_gpu:5958.279583441191
27
+ Step 27 | loss:0.03431122750043869 lr:9.998050575201771e-05 tokens_per_second_per_gpu:4794.290307232642
28
+ Step 28 | loss:0.07390312105417252 lr:9.995614150494293e-05 tokens_per_second_per_gpu:6607.071950662243
29
+ Step 29 | loss:0.039681028574705124 lr:9.992203820909906e-05 tokens_per_second_per_gpu:6552.995122018929
30
+ Step 30 | loss:0.03120642900466919 lr:9.987820251299122e-05 tokens_per_second_per_gpu:6945.21487409806
31
+ Step 31 | loss:0.03754393011331558 lr:9.982464296247522e-05 tokens_per_second_per_gpu:6547.6040707603825
32
+ Step 32 | loss:0.0607815682888031 lr:9.976136999909156e-05 tokens_per_second_per_gpu:6445.429115622666
33
+ Step 33 | loss:0.022207770496606827 lr:9.968839595802982e-05 tokens_per_second_per_gpu:6928.181601878918
34
+ Step 34 | loss:0.011908555403351784 lr:9.96057350657239e-05 tokens_per_second_per_gpu:7062.763301040281
35
+ Step 35 | loss:0.013044281862676144 lr:9.951340343707852e-05 tokens_per_second_per_gpu:5892.149529787437
36
+ Step 36 | loss:0.015627676621079445 lr:9.941141907232765e-05 tokens_per_second_per_gpu:7135.602800487149
37
+ Step 37 | loss:0.016328388825058937 lr:9.929980185352526e-05 tokens_per_second_per_gpu:7081.303577017091
38
+ Step 38 | loss:0.005116661079227924 lr:9.917857354066931e-05 tokens_per_second_per_gpu:7032.88413547479
39
+ Step 39 | loss:0.020706845447421074 lr:9.904775776745958e-05 tokens_per_second_per_gpu:7051.139499141364
40
+ Step 40 | loss:0.023872001096606255 lr:9.890738003669029e-05 tokens_per_second_per_gpu:6903.941838072107
41
+ Step 41 | loss:0.013512889854609966 lr:9.875746771527816e-05 tokens_per_second_per_gpu:6883.324140607382
42
+ Step 42 | loss:0.023559028282761574 lr:9.859805002892732e-05 tokens_per_second_per_gpu:5220.333405477438
43
+ Step 43 | loss:0.009499269537627697 lr:9.842915805643155e-05 tokens_per_second_per_gpu:6883.461614545596
44
+ Step 44 | loss:0.006331244483590126 lr:9.825082472361557e-05 tokens_per_second_per_gpu:6617.3389363356155
45
+ Step 45 | loss:0.015614012256264687 lr:9.806308479691595e-05 tokens_per_second_per_gpu:7068.646531980263
46
+ Step 46 | loss:0.04139475151896477 lr:9.786597487660337e-05 tokens_per_second_per_gpu:6651.108418816515
47
+ Step 47 | loss:0.01379504892975092 lr:9.765953338964735e-05 tokens_per_second_per_gpu:7110.698789407686
48
+ Step 48 | loss:0.00873752310872078 lr:9.744380058222483e-05 tokens_per_second_per_gpu:6883.427074840145
49
+ Step 49 | loss:0.0025170650333166122 lr:9.721881851187406e-05 tokens_per_second_per_gpu:6530.497468449956
50
+ Step 50 | loss:0.049308665096759796 lr:9.698463103929542e-05 tokens_per_second_per_gpu:6657.147320144416
51
+ Step 51 | loss:0.013455690816044807 lr:9.674128381980072e-05 tokens_per_second_per_gpu:6629.482383767396
52
+ Step 52 | loss:0.015905072912573814 lr:9.648882429441257e-05 tokens_per_second_per_gpu:6450.48251411474
53
+ Step 53 | loss:0.002275326522067189 lr:9.622730168061567e-05 tokens_per_second_per_gpu:6824.529962920934
54
+ Step 54 | loss:0.01587088219821453 lr:9.595676696276172e-05 tokens_per_second_per_gpu:6521.132932898683
55
+ Step 55 | loss:0.005622063297778368 lr:9.567727288213005e-05 tokens_per_second_per_gpu:6531.4447554583185
56
+ Step 56 | loss:0.004901554435491562 lr:9.538887392664544e-05 tokens_per_second_per_gpu:5633.388308641928
57
+ Step 57 | loss:0.0019407781073823571 lr:9.50916263202557e-05 tokens_per_second_per_gpu:6906.319790444152
58
+ Step 58 | loss:0.0060058278031647205 lr:9.478558801197065e-05 tokens_per_second_per_gpu:6856.7635894640125
59
+ Step 59 | loss:0.005499544087797403 lr:9.447081866456489e-05 tokens_per_second_per_gpu:6642.833062245913
60
+ Step 60 | loss:0.013067233376204967 lr:9.414737964294636e-05 tokens_per_second_per_gpu:6643.617905289443
61
+ Step 61 | loss:0.00852174125611782 lr:9.381533400219318e-05 tokens_per_second_per_gpu:6553.863668850757
62
+ Step 62 | loss:0.007614427246153355 lr:9.347474647526095e-05 tokens_per_second_per_gpu:7052.1698789666825
63
+ Step 63 | loss:0.01105488557368517 lr:9.312568346036288e-05 tokens_per_second_per_gpu:7058.621720464415
64
+ Step 64 | loss:0.013129975646734238 lr:9.276821300802534e-05 tokens_per_second_per_gpu:6528.830458609242
65
+ Step 65 | loss:0.0036676388699561357 lr:9.24024048078213e-05 tokens_per_second_per_gpu:7099.631236412344
66
+ Step 66 | loss:0.0017698833253234625 lr:9.202833017478422e-05 tokens_per_second_per_gpu:7080.763740321347
67
+ Step 67 | loss:0.0006935734418220818 lr:9.164606203550497e-05 tokens_per_second_per_gpu:6518.712752628263
68
+ Step 68 | loss:0.006906419526785612 lr:9.125567491391476e-05 tokens_per_second_per_gpu:6591.018648894371
69
+ Step 69 | loss:0.002882704371586442 lr:9.085724491675642e-05 tokens_per_second_per_gpu:7116.528546515005
70
+ Step 70 | loss:0.00394488824531436 lr:9.045084971874738e-05 tokens_per_second_per_gpu:6827.659097483362
71
+ Step 71 | loss:0.0019539296627044678 lr:9.003656854743667e-05 tokens_per_second_per_gpu:6636.826259591227
72
+ Step 72 | loss:0.0005638383445329964 lr:8.961448216775954e-05 tokens_per_second_per_gpu:6894.328447107831
73
+ Step 73 | loss:0.0041533648036420345 lr:8.9184672866292e-05 tokens_per_second_per_gpu:6878.894826933022
74
+ Step 74 | loss:0.0009008919005282223 lr:8.874722443520899e-05 tokens_per_second_per_gpu:7076.127400677781
75
+ Step 75 | loss:0.002822647336870432 lr:8.83022221559489e-05 tokens_per_second_per_gpu:6604.422504566621
76
+ Step 76 | loss:0.0012791752815246582 lr:8.784975278258783e-05 tokens_per_second_per_gpu:6662.954752142708
77
+ Step 77 | loss:0.0034571527503430843 lr:8.73899045249266e-05 tokens_per_second_per_gpu:6878.229928630995
78
+ Step 78 | loss:0.0003115411091130227 lr:8.692276703129421e-05 tokens_per_second_per_gpu:6621.282027338098
79
+ Step 79 | loss:0.004982416983693838 lr:8.644843137107059e-05 tokens_per_second_per_gpu:7065.362582716634
80
+ Step 80 | loss:0.005641724448651075 lr:8.596699001693255e-05 tokens_per_second_per_gpu:6818.3028179613775
81
+ Step 81 | loss:0.007741517852991819 lr:8.547853682682604e-05 tokens_per_second_per_gpu:7068.3698595558735
82
+ Step 82 | loss:0.004250807221978903 lr:8.498316702566828e-05 tokens_per_second_per_gpu:6417.045004042305
83
+ Step 83 | loss:0.0003256700874771923 lr:8.44809771867835e-05 tokens_per_second_per_gpu:5288.4226300841865
84
+ Step 84 | loss:0.002000636188313365 lr:8.397206521307584e-05 tokens_per_second_per_gpu:6634.974566286588
85
+ Step 85 | loss:0.000544965558219701 lr:8.345653031794292e-05 tokens_per_second_per_gpu:6634.014989792351
86
+ Step 86 | loss:0.001567141618579626 lr:8.293447300593402e-05 tokens_per_second_per_gpu:6860.262958802247
87
+ Step 87 | loss:0.0006420569843612611 lr:8.240599505315655e-05 tokens_per_second_per_gpu:6905.004579443605
88
+ Step 88 | loss:0.007808868307620287 lr:8.18711994874345e-05 tokens_per_second_per_gpu:6632.698013944389
89
+ Step 89 | loss:0.00021798825764562935 lr:8.133019056822304e-05 tokens_per_second_per_gpu:6439.540558933112
90
+ Step 90 | loss:0.00026628863997757435 lr:8.07830737662829e-05 tokens_per_second_per_gpu:6905.334910011535
91
+ Step 91 | loss:0.009381849318742752 lr:8.022995574311876e-05 tokens_per_second_per_gpu:6520.647037620224
92
+ Step 92 | loss:0.0001644226722419262 lr:7.967094433018508e-05 tokens_per_second_per_gpu:6899.679949135617
93
+ Step 93 | loss:0.00557605130597949 lr:7.910614850786448e-05 tokens_per_second_per_gpu:6619.658986620156
94
+ Step 94 | loss:0.0020753894932568073 lr:7.85356783842216e-05 tokens_per_second_per_gpu:6527.52192209492
95
+ Step 95 | loss:0.0009785438887774944 lr:7.795964517353735e-05 tokens_per_second_per_gpu:6851.988618005506
96
+ Step 96 | loss:0.00023535460059065372 lr:7.737816117462752e-05 tokens_per_second_per_gpu:6892.176184929317
97
+ Step 97 | loss:0.00012474882532842457 lr:7.679133974894983e-05 tokens_per_second_per_gpu:6853.329438980445
98
+ Step 98 | loss:0.003538035321980715 lr:7.619929529850397e-05 tokens_per_second_per_gpu:6631.218921573058
99
+ Step 99 | loss:0.00324582913890481 lr:7.560214324352858e-05 tokens_per_second_per_gpu:5222.440706457179
100
+ Step 100 | loss:6.167021638248116e-05 lr:7.500000000000001e-05 tokens_per_second_per_gpu:6919.006783887625
101
+ Step 101 | loss:0.0016939690103754401 lr:7.439298295693665e-05 tokens_per_second_per_gpu:6406.025615551767
102
+ Step 102 | loss:0.0006084628403186798 lr:7.378121045351378e-05 tokens_per_second_per_gpu:6877.084720206567
103
+ Step 103 | loss:0.00017363041115459055 lr:7.316480175599309e-05 tokens_per_second_per_gpu:5929.288778810249
104
+ Step 104 | loss:0.0005308957188390195 lr:7.254387703447154e-05 tokens_per_second_per_gpu:7095.644998839233
105
+ Step 105 | loss:0.0019598398357629776 lr:7.191855733945387e-05 tokens_per_second_per_gpu:6873.67040419656
106
+ Step 106 | loss:0.0008263597846962512 lr:7.128896457825364e-05 tokens_per_second_per_gpu:6628.67102415756
107
+ Step 107 | loss:0.0006074818084016442 lr:7.06552214912271e-05 tokens_per_second_per_gpu:6630.428254088887
108
+ Step 108 | loss:0.0003068344376515597 lr:7.001745162784477e-05 tokens_per_second_per_gpu:6420.439101038877
109
+ Step 109 | loss:0.0009827185422182083 lr:6.937577932260515e-05 tokens_per_second_per_gpu:6656.780873474644
110
+ Step 110 | loss:0.0019185517448931932 lr:6.873032967079561e-05 tokens_per_second_per_gpu:6634.594462348685
111
+ Step 111 | loss:0.0037080980837345123 lr:6.808122850410461e-05 tokens_per_second_per_gpu:5337.909562507922
112
+ Step 112 | loss:1.8280934455106035e-05 lr:6.742860236609077e-05 tokens_per_second_per_gpu:6890.276452233108
113
+ Step 113 | loss:6.343572022160515e-05 lr:6.677257848751277e-05 tokens_per_second_per_gpu:6916.200469048678
114
+ Step 114 | loss:8.518458344042301e-05 lr:6.611328476152557e-05 tokens_per_second_per_gpu:6642.759167552707
115
+ Step 115 | loss:0.0010669255862012506 lr:6.545084971874738e-05 tokens_per_second_per_gpu:7106.338781109161
116
+ Step 116 | loss:7.879980694269761e-05 lr:6.478540250220234e-05 tokens_per_second_per_gpu:6450.590534138903
117
+ Step 117 | loss:1.610443359822966e-05 lr:6.411707284214384e-05 tokens_per_second_per_gpu:6948.888592453719
118
+ Step 118 | loss:7.019325130386278e-05 lr:6.344599103076329e-05 tokens_per_second_per_gpu:6843.619329003747
119
+ Step 119 | loss:0.00010053990263259038 lr:6.277228789678953e-05 tokens_per_second_per_gpu:6470.339743921217
120
+ Step 120 | loss:0.0006487674545496702 lr:6.209609477998338e-05 tokens_per_second_per_gpu:6632.729896144266
121
+ Step 121 | loss:5.3303869208320975e-05 lr:6.141754350553279e-05 tokens_per_second_per_gpu:6661.920876119739
122
+ Step 122 | loss:0.0001957487256731838 lr:6.073676635835317e-05 tokens_per_second_per_gpu:7117.271578375329
123
+ Step 123 | loss:0.00020888530707452446 lr:6.005389605729824e-05 tokens_per_second_per_gpu:6532.323786611221
124
+ Step 124 | loss:0.00016734490054659545 lr:5.9369065729286245e-05 tokens_per_second_per_gpu:6470.860378475325
125
+ Step 125 | loss:0.0001500799844507128 lr:5.868240888334653e-05 tokens_per_second_per_gpu:6887.746656319498
126
+ Step 126 | loss:3.6837009247392416e-05 lr:5.799405938459175e-05 tokens_per_second_per_gpu:6652.201306620712
127
+ Step 127 | loss:0.0021684449166059494 lr:5.730415142812059e-05 tokens_per_second_per_gpu:6880.78207607067
128
+ Step 128 | loss:1.8878876289818436e-05 lr:5.661281951285613e-05 tokens_per_second_per_gpu:6631.064782221875
129
+ Step 129 | loss:3.306664075353183e-05 lr:5.5920198415325064e-05 tokens_per_second_per_gpu:7111.88461098746
130
+ Step 130 | loss:0.00012198043259559199 lr:5.522642316338268e-05 tokens_per_second_per_gpu:6466.124990035078
131
+ Step 131 | loss:0.000832759658806026 lr:5.453162900988902e-05 tokens_per_second_per_gpu:6581.671788023008
132
+ Step 132 | loss:0.007134541869163513 lr:5.383595140634093e-05 tokens_per_second_per_gpu:6967.797162172086
133
+ Step 133 | loss:2.075553493341431e-05 lr:5.313952597646568e-05 tokens_per_second_per_gpu:6549.833573793661
134
+ Step 134 | loss:2.6473575417185202e-05 lr:5.244248848978067e-05 tokens_per_second_per_gpu:7127.917559570654
135
+ Step 135 | loss:2.2700472982251085e-05 lr:5.174497483512506e-05 tokens_per_second_per_gpu:6574.015968536689
136
+ Step 136 | loss:6.514487904496491e-05 lr:5.104712099416785e-05 tokens_per_second_per_gpu:6429.155021212451
137
+ Step 137 | loss:1.191654064314207e-05 lr:5.034906301489808e-05 tokens_per_second_per_gpu:7140.5249904908105
138
+ Step 138 | loss:0.0005834389012306929 lr:4.965093698510193e-05 tokens_per_second_per_gpu:6452.364510347203
139
+ Step 139 | loss:8.847516437526792e-05 lr:4.895287900583216e-05 tokens_per_second_per_gpu:7117.959177378009
140
+ Step 140 | loss:0.004411947913467884 lr:4.825502516487497e-05 tokens_per_second_per_gpu:3948.052493511066
141
+ Step 141 | loss:5.6236054660985246e-05 lr:4.755751151021934e-05 tokens_per_second_per_gpu:6942.522635416498
142
+ Step 142 | loss:0.00029896333580836654 lr:4.6860474023534335e-05 tokens_per_second_per_gpu:6867.760612053949
143
+ Step 143 | loss:0.010003290139138699 lr:4.616404859365907e-05 tokens_per_second_per_gpu:6446.631164919947
144
+ Step 144 | loss:7.106986595317721e-05 lr:4.5468370990111006e-05 tokens_per_second_per_gpu:6926.631459505046
145
+ Step 145 | loss:0.0005215980345383286 lr:4.477357683661734e-05 tokens_per_second_per_gpu:5213.824134304672
146
+ Step 146 | loss:9.424990275874734e-05 lr:4.407980158467495e-05 tokens_per_second_per_gpu:6651.622279874753
147
+ Step 147 | loss:0.00032575210207141936 lr:4.3387180487143876e-05 tokens_per_second_per_gpu:7009.78129432356
148
+ Step 148 | loss:5.463398338179104e-05 lr:4.269584857187943e-05 tokens_per_second_per_gpu:7021.721905466142
149
+ Step 149 | loss:0.00023407567641697824 lr:4.2005940615408264e-05 tokens_per_second_per_gpu:6632.982085085049
150
+ Step 150 | loss:4.2366555135231465e-05 lr:4.131759111665349e-05 tokens_per_second_per_gpu:7113.6613049929965
151
+ Step 151 | loss:1.727652306726668e-05 lr:4.063093427071376e-05 tokens_per_second_per_gpu:6929.620848245407
152
+ Step 152 | loss:0.0006041711894795299 lr:3.9946103942701777e-05 tokens_per_second_per_gpu:6591.247445364165
153
+ Step 153 | loss:6.702107202727348e-05 lr:3.926323364164684e-05 tokens_per_second_per_gpu:6542.112709633285
154
+ Step 154 | loss:0.00010898215259658173 lr:3.858245649446721e-05 tokens_per_second_per_gpu:6617.720480843186
155
+ Step 155 | loss:0.0005894604837521911 lr:3.790390522001662e-05 tokens_per_second_per_gpu:7121.269133178284
156
+ Step 156 | loss:0.00046646883129142225 lr:3.7227712103210486e-05 tokens_per_second_per_gpu:6810.810132136087
157
+ Step 157 | loss:0.0008714331779628992 lr:3.655400896923672e-05 tokens_per_second_per_gpu:6559.711084234603
158
+ Step 158 | loss:0.0009323527337983251 lr:3.588292715785617e-05 tokens_per_second_per_gpu:6659.847774845367
159
+ Step 159 | loss:9.004903404274955e-05 lr:3.5214597497797684e-05 tokens_per_second_per_gpu:6905.063456852771
160
+ Step 160 | loss:0.00015947382780723274 lr:3.4549150281252636e-05 tokens_per_second_per_gpu:6655.478787142777
161
+ Step 161 | loss:0.0004274751408956945 lr:3.388671523847445e-05 tokens_per_second_per_gpu:6942.9743567778205
162
+ Step 162 | loss:0.00037595766480080783 lr:3.322742151248725e-05 tokens_per_second_per_gpu:6677.666313251598
163
+ Step 163 | loss:3.244326580897905e-05 lr:3.257139763390925e-05 tokens_per_second_per_gpu:6653.075937390707
164
+ Step 164 | loss:0.00030996996792964637 lr:3.1918771495895396e-05 tokens_per_second_per_gpu:6644.397243135655
165
+ Step 165 | loss:5.752700235461816e-05 lr:3.12696703292044e-05 tokens_per_second_per_gpu:6960.209716963056
166
+ Step 166 | loss:7.959095091791824e-05 lr:3.062422067739485e-05 tokens_per_second_per_gpu:6912.57406313066
167
+ Step 167 | loss:0.00031512955320067704 lr:2.9982548372155263e-05 tokens_per_second_per_gpu:6889.145567809192
168
+ Step 168 | loss:0.0016873697750270367 lr:2.934477850877292e-05 tokens_per_second_per_gpu:6455.525014033346
169
+ Step 169 | loss:6.595908780582249e-05 lr:2.8711035421746367e-05 tokens_per_second_per_gpu:3940.9819865040454
170
+ Step 170 | loss:0.0001208314424729906 lr:2.8081442660546125e-05 tokens_per_second_per_gpu:6533.838468920102
171
+ Step 171 | loss:2.291880446136929e-05 lr:2.7456122965528475e-05 tokens_per_second_per_gpu:7036.136911717446
172
+ Step 172 | loss:3.8961392419878393e-05 lr:2.6835198244006927e-05 tokens_per_second_per_gpu:6419.633418996145
173
+ Step 173 | loss:0.0002535284438636154 lr:2.6218789546486234e-05 tokens_per_second_per_gpu:6614.819622468586
174
+ Step 174 | loss:4.4378077291185036e-05 lr:2.560701704306336e-05 tokens_per_second_per_gpu:7058.27307364323
175
+ Step 175 | loss:0.0015160999028012156 lr:2.500000000000001e-05 tokens_per_second_per_gpu:5946.335424162427
176
+ Step 176 | loss:4.20267679146491e-05 lr:2.4397856756471432e-05 tokens_per_second_per_gpu:7076.1643700977365
177
+ Step 177 | loss:0.0009513521217741072 lr:2.3800704701496053e-05 tokens_per_second_per_gpu:6827.403499245773
178
+ Step 178 | loss:2.0644696633098647e-05 lr:2.3208660251050158e-05 tokens_per_second_per_gpu:7118.0654958975465
179
+ Step 179 | loss:5.462503031594679e-05 lr:2.2621838825372493e-05 tokens_per_second_per_gpu:7067.663128977011
180
+ Step 180 | loss:0.00020294466230552644 lr:2.2040354826462668e-05 tokens_per_second_per_gpu:6596.390810214846
181
+ Step 181 | loss:0.00020127856987528503 lr:2.1464321615778422e-05 tokens_per_second_per_gpu:7013.25305818966
182
+ Step 182 | loss:2.1506995835807174e-05 lr:2.0893851492135537e-05 tokens_per_second_per_gpu:6878.958473899288
183
+ Step 183 | loss:0.0016845290083438158 lr:2.0329055669814934e-05 tokens_per_second_per_gpu:6876.458445904233
184
+ Step 184 | loss:0.0001503072853665799 lr:1.977004425688126e-05 tokens_per_second_per_gpu:6608.593972229724
185
+ Step 185 | loss:7.932235166663304e-05 lr:1.9216926233717085e-05 tokens_per_second_per_gpu:6596.182449041913
186
+ Step 186 | loss:7.315298717003316e-05 lr:1.866980943177699e-05 tokens_per_second_per_gpu:6651.494518162662
187
+ Step 187 | loss:2.1691537767765112e-05 lr:1.8128800512565513e-05 tokens_per_second_per_gpu:6906.556298073519
188
+ Step 188 | loss:0.00034803853486664593 lr:1.7594004946843456e-05 tokens_per_second_per_gpu:6855.023616501254
189
+ Step 189 | loss:0.0003062945615965873 lr:1.7065526994065973e-05 tokens_per_second_per_gpu:6494.110561964963
190
+ Step 190 | loss:2.6088669983437285e-05 lr:1.6543469682057106e-05 tokens_per_second_per_gpu:7103.355385947416
191
+ Step 191 | loss:3.2427298719994724e-05 lr:1.602793478692419e-05 tokens_per_second_per_gpu:7049.27116226414
192
+ Step 192 | loss:1.9283334040665068e-05 lr:1.551902281321651e-05 tokens_per_second_per_gpu:6621.940479831454
193
+ Step 193 | loss:1.4837847629678436e-05 lr:1.5016832974331724e-05 tokens_per_second_per_gpu:6632.270038448392
194
+ Step 194 | loss:4.618912498699501e-05 lr:1.4521463173173965e-05 tokens_per_second_per_gpu:6450.603211542761
195
+ Step 195 | loss:8.731346315471455e-05 lr:1.4033009983067452e-05 tokens_per_second_per_gpu:6874.2009861572205
196
+ Step 196 | loss:0.0007837693556211889 lr:1.3551568628929434e-05 tokens_per_second_per_gpu:6438.693332145625
197
+ Step 197 | loss:7.728889613645151e-05 lr:1.3077232968705805e-05 tokens_per_second_per_gpu:6603.994857786765
198
+ Step 198 | loss:2.348179441469256e-05 lr:1.2610095475073414e-05 tokens_per_second_per_gpu:4789.884799591809
199
+ Step 199 | loss:0.00016038610192481428 lr:1.2150247217412186e-05 tokens_per_second_per_gpu:6558.0853059261235
200
+ Step 200 | loss:3.3631153200985864e-05 lr:1.1697777844051105e-05 tokens_per_second_per_gpu:7087.831642948178
201
+ Step 201 | loss:0.00014618111890740693 lr:1.1252775564791024e-05 tokens_per_second_per_gpu:6565.384879539554
202
+ Step 202 | loss:2.701554330997169e-05 lr:1.0815327133708015e-05 tokens_per_second_per_gpu:6893.453313682802
203
+ Step 203 | loss:0.0008529752376489341 lr:1.0385517832240471e-05 tokens_per_second_per_gpu:6517.5355654338255
204
+ Step 204 | loss:1.588598024682142e-05 lr:9.963431452563332e-06 tokens_per_second_per_gpu:7059.871188934064
205
+ Step 205 | loss:0.0008057854720391333 lr:9.549150281252633e-06 tokens_per_second_per_gpu:6548.372620521682
206
+ Step 206 | loss:0.00015476223779842257 lr:9.142755083243576e-06 tokens_per_second_per_gpu:6431.588636343005
207
+ Step 207 | loss:8.664749475428835e-05 lr:8.744325086085248e-06 tokens_per_second_per_gpu:6502.919716342713
208
+ Step 208 | loss:1.2338457963778637e-05 lr:8.353937964495029e-06 tokens_per_second_per_gpu:6861.501104968888
209
+ Step 209 | loss:0.00013570269220508635 lr:7.971669825215788e-06 tokens_per_second_per_gpu:6863.267024088076
210
+ Step 210 | loss:0.002080573933199048 lr:7.597595192178702e-06 tokens_per_second_per_gpu:5904.508517994963
211
+ Step 211 | loss:0.0004988689906895161 lr:7.2317869919746705e-06 tokens_per_second_per_gpu:6622.7369112184415
212
+ Step 212 | loss:0.00018352782353758812 lr:6.874316539637127e-06 tokens_per_second_per_gpu:7081.97858016051
213
+ Step 213 | loss:0.00037246820284053683 lr:6.52525352473905e-06 tokens_per_second_per_gpu:5181.607671540491
214
+ Step 214 | loss:1.9034436263609678e-05 lr:6.184665997806832e-06 tokens_per_second_per_gpu:6877.447332775893
215
+ Step 215 | loss:0.00025970637216232717 lr:5.852620357053651e-06 tokens_per_second_per_gpu:6619.443237103561
216
+ Step 216 | loss:9.822969877859578e-05 lr:5.529181335435124e-06 tokens_per_second_per_gpu:6551.990720176112
217
+ Step 217 | loss:4.414553040987812e-05 lr:5.214411988029355e-06 tokens_per_second_per_gpu:6911.87462585134
218
+ Step 218 | loss:0.0006779417162761092 lr:4.908373679744316e-06 tokens_per_second_per_gpu:6631.687627177202
219
+ Step 219 | loss:1.7940288671525195e-05 lr:4.611126073354571e-06 tokens_per_second_per_gpu:7020.880700776409
220
+ Step 220 | loss:6.450503133237362e-05 lr:4.322727117869951e-06 tokens_per_second_per_gpu:6897.571010537485
221
+ Step 221 | loss:0.0009318335796706378 lr:4.043233037238281e-06 tokens_per_second_per_gpu:6619.221030435229
222
+ Step 222 | loss:0.00016723918088246137 lr:3.772698319384349e-06 tokens_per_second_per_gpu:7075.477545011553
223
+ Step 223 | loss:8.360719948541373e-05 lr:3.511175705587433e-06 tokens_per_second_per_gpu:6512.199896253443
224
+ Step 224 | loss:1.4453452422458213e-05 lr:3.258716180199278e-06 tokens_per_second_per_gpu:6842.419957161844
225
+ Step 225 | loss:0.0017041980754584074 lr:3.0153689607045845e-06 tokens_per_second_per_gpu:6609.6983750661375
226
+ Step 226 | loss:5.68675714021083e-05 lr:2.7811814881259503e-06 tokens_per_second_per_gpu:6838.0884856370085
227
+ Step 227 | loss:0.002710741711780429 lr:2.5561994177751737e-06 tokens_per_second_per_gpu:3278.290224999214
228
+ Step 228 | loss:0.0017590811476111412 lr:2.340466610352654e-06 tokens_per_second_per_gpu:5959.538228032885
229
+ Step 229 | loss:0.00035566239967010915 lr:2.134025123396638e-06 tokens_per_second_per_gpu:6550.852848790178
230
+ Step 230 | loss:4.236725089867832e-06 lr:1.9369152030840556e-06 tokens_per_second_per_gpu:6560.816553955841
231
+ Step 231 | loss:0.0016642542323097587 lr:1.7491752763844293e-06 tokens_per_second_per_gpu:5188.038830459843
232
+ Step 232 | loss:3.7601857911795378e-06 lr:1.5708419435684462e-06 tokens_per_second_per_gpu:6876.047844526483
233
+ Step 233 | loss:3.0354982300195843e-05 lr:1.4019499710726913e-06 tokens_per_second_per_gpu:7048.910112085713
234
+ Step 234 | loss:9.097172733163461e-05 lr:1.2425322847218368e-06 tokens_per_second_per_gpu:6620.95106717429
235
+ Step 235 | loss:0.0004588098672684282 lr:1.0926199633097157e-06 tokens_per_second_per_gpu:6866.427218408531
236
+ Step 236 | loss:2.3615913960384205e-05 lr:9.522422325404235e-07 tokens_per_second_per_gpu:6807.550934929809
237
+ Step 237 | loss:8.894444908946753e-05 lr:8.214264593307098e-07 tokens_per_second_per_gpu:6632.915847438242
238
+ Step 238 | loss:1.4141115570964757e-05 lr:7.001981464747565e-07 tokens_per_second_per_gpu:7098.742085761691
239
+ Step 239 | loss:2.696912815736141e-05 lr:5.885809276723608e-07 tokens_per_second_per_gpu:6597.974030159197
240
+ Step 240 | loss:0.0008145094616338611 lr:4.865965629214819e-07 tokens_per_second_per_gpu:6674.75699821072
241
+ Step 241 | loss:7.957598427310586e-05 lr:3.9426493427611177e-07 tokens_per_second_per_gpu:6595.459145926985
242
+ Step 242 | loss:3.828562330454588e-05 lr:3.1160404197018154e-07 tokens_per_second_per_gpu:7101.7020777281305
243
+ Step 243 | loss:8.814628927211743e-06 lr:2.386300009084408e-07 tokens_per_second_per_gpu:7011.342644886534
244
+ Step 244 | loss:0.0004267446347512305 lr:1.753570375247815e-07 tokens_per_second_per_gpu:6545.467889161499
245
+ Step 245 | loss:0.0005327242542989552 lr:1.2179748700879012e-07 tokens_per_second_per_gpu:6635.642602121437
246
+ Step 246 | loss:3.4723900171229616e-05 lr:7.796179090094891e-08 tokens_per_second_per_gpu:7081.141397797378
247
+ Step 247 | loss:3.564795770216733e-05 lr:4.385849505708084e-08 tokens_per_second_per_gpu:6877.282976617243
248
+ Step 248 | loss:0.0013675112277269363 lr:1.949424798228239e-08 tokens_per_second_per_gpu:6899.428748658248
249
+ Step 249 | loss:4.7962246753741056e-05 lr:4.873799534788059e-09 tokens_per_second_per_gpu:6932.006787788585
250
+ Step 250 | loss:9.682981908554211e-05 lr:0.0 tokens_per_second_per_gpu:5238.0422309469095
0692e18c/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
0692e18c/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[5 5 0 5 5 0]\n [0 0 5 0 0 5]\n [0 0 5 0 0 5]\n [5 5 0 5 5 0]\n [0 0 5 0 0 5]\n [0 0 5 0 0 5]] -> [[0 0 5 0 0 5 0 0 0 0 0 5 0 0 5 0 0 0]\n [5 5 0 5 5 0 0 0 0 5 5 0 5 5 0 0 0 0]\n [5 5 0 5 5 0 0 0 0 5 5 0 5 5 0 0 0 0]\n [0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 5 5 0]\n [0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 5 5 0]\n [0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 5 5 0]\n [0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 5 5 0]\n [0 0 5 0 0 5 0 0 0 0 0 5 0 0 5 0 0 0]\n [5 5 0 5 5 0 0 0 0 5 5 0 5 5 0 0 0 0]\n [5 5 0 5 5 0 0 0 0 5 5 0 5 5 0 0 0 0]\n [0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 5 5 0]\n [0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 5 5 0]\n [0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 5]\n [0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 5 5 0]\n [0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 5 5 0]]#\n[[0 0 3 0 0 3]\n [0 3 0 0 3 0]\n [3 0 0 3 0 0]\n [0 0 3 0 0 3]\n [0 3 0 0 3 0]\n [3 0 0 3 0 0]] -> [[0 0 0 0 0 0 3 3 0 0 0 0 0 0 0 3 3 0]\n [0 0 0 0 0 0 3 0 3 0 0 0 0 0 0 3 0 3]\n [0 0 0 0 0 0 0 3 3 0 0 0 0 0 0 0 3 3]\n [0 0 0 3 3 0 0 0 0 0 0 0 3 3 0 0 0 0]\n [0 0 0 3 0 3 0 0 0 0 0 0 3 0 3 0 0 0]\n [0 0 0 0 3 3 0 0 0 0 0 0 0 3 3 0 0 0]\n [3 3 0 0 0 0 0 0 0 3 3 0 0 0 0 0 0 0]\n [3 0 3 0 0 0 0 0 0 3 0 3 0 0 0 0 0 0]\n [0 3 3 0 0 0 0 0 0 0 3 3 0 0 0 0 0 0]\n [0 0 0 0 0 0 3 3 0 0 0 0 0 0 0 3 3 0]\n [0 0 0 0 0 0 3 0 3 0 0 0 0 0 0 3 0 3]\n [0 0 0 0 0 0 0 3 3 0 0 0 0 0 0 0 3 3]\n [0 0 0 3 3 0 0 0 0 0 0 0 3 3 0 0 0 0]\n [0 0 0 3 0 3 0 0 0 0 0 0 3 0 3 0 0 0]\n [0 0 0 0 3 3 0 0 0 0 0 0 0 3 3 0 0 0]\n [3 3 0 0 0 0 0 0 0 3 3 0 0 0 0 0 0 0]\n [3 0 3 0 0 0 0 0 0 3 0 3 0 0 0 0 0 0]\n [0 3 3 0 0 0 0 0 0 0 3 3 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 9 0 0 9 0]\n [9 9 9 9 9 9]\n [0 9 0 0 9 0]\n [0 9 0 0 9 0]\n [9 9 9 9 9 9]\n [0 9 0 0 9 0]] -> "}], "output": {"role": "assistant", "content": "[[0 0 0 9 0 9 0 0 0 0 0 0 9 0 9 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 9 0 9 0 0 0 0 0 0 9 0 9 0 0 0]\n [9 0 9 9 0 9 9 0 9 9 0 9 9 0 9 9 0 9]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [9 0 9 9 0 9 9 0 9 9 0 9 9 0 9 9 0 9]\n [0 0 0 9 0 9 0 0 0 0 0 0 9 0 9 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 9 0 9 0 0 0 0 0 0 9 0 9 0 0 0]\n [0 0 0 9 0 9 0 0 0 0 0 0 9 0 9 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 9 0 9 0 0 0 0 0 0 9 0 9 0 0 0]\n [9 0 9 9 0 9 9 0 9 9 0 9 9 0 9 9 0 9]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [9 0 9 9 0 9 9 0 9 9 0 9 9 0 9 9 0 9]\n [0 0 0 9 0 9 0 0 0 0 0 0 9 0 9 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 9 0 9 0 0 0 0 0 0 9 0 9 0 0 0]]#"}, "total_tokens": 2297}
070dd51e/adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"base_model_name_or_path": "/raid/lingo/akyurek/git/agi/checkpoints/pretrained/8B_grids_no_lm_generated_tasks/4k/", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
070dd51e/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dfb333dd397963ba7e73a073b7d4d6b8868ef3493cc1391b850b995fe21ebd8
3
+ size 596042770
070dd51e/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": 128009, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 8192, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.0.dev0", "use_cache": true, "vocab_size": 128256}
070dd51e/log_1730925150.txt ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:0.01213599182665348 lr:1.4285714285714285e-05 tokens_per_second_per_gpu:3404.260116401829
2
+ Step 2 | loss:0.05323003977537155 lr:2.857142857142857e-05 tokens_per_second_per_gpu:2847.5748762569233
3
+ Step 3 | loss:0.006721426732838154 lr:4.2857142857142856e-05 tokens_per_second_per_gpu:3404.5310283094473
4
+ Step 4 | loss:0.03512277454137802 lr:5.714285714285714e-05 tokens_per_second_per_gpu:2151.1299929149804
5
+ Step 5 | loss:0.015561948530375957 lr:7.142857142857143e-05 tokens_per_second_per_gpu:7213.580199077777
6
+ Step 6 | loss:0.008180935867130756 lr:8.571428571428571e-05 tokens_per_second_per_gpu:7133.875309282848
7
+ Step 7 | loss:0.01505997683852911 lr:0.0001 tokens_per_second_per_gpu:2838.1605080364957
8
+ Step 8 | loss:0.017542490735650063 lr:9.994161134161634e-05 tokens_per_second_per_gpu:7064.647511922427
9
+ Step 9 | loss:0.01684839464724064 lr:9.976658173588244e-05 tokens_per_second_per_gpu:7195.1292634894635
10
+ Step 10 | loss:0.011541903018951416 lr:9.947531997255256e-05 tokens_per_second_per_gpu:1273.3526396091336
11
+ Step 11 | loss:0.0019745274912565947 lr:9.906850630697068e-05 tokens_per_second_per_gpu:7202.0862713139295
12
+ Step 12 | loss:0.04754862189292908 lr:9.85470908713026e-05 tokens_per_second_per_gpu:7090.606538899934
13
+ Step 13 | loss:0.00039215420838445425 lr:9.791229145545831e-05 tokens_per_second_per_gpu:7006.873846550338
14
+ Step 14 | loss:0.0008788838749751449 lr:9.716559066288715e-05 tokens_per_second_per_gpu:7192.817561967175
15
+ Step 15 | loss:0.0007529142312705517 lr:9.630873244788883e-05 tokens_per_second_per_gpu:7111.737477943987
16
+ Step 16 | loss:0.0001974945334950462 lr:9.534371804252728e-05 tokens_per_second_per_gpu:7091.69321172515
17
+ Step 17 | loss:0.0016645367722958326 lr:9.42728012826605e-05 tokens_per_second_per_gpu:7176.800912786511
18
+ Step 18 | loss:0.027435433119535446 lr:9.309848334400246e-05 tokens_per_second_per_gpu:7170.110091079389
19
+ Step 19 | loss:0.026739366352558136 lr:9.182350690051133e-05 tokens_per_second_per_gpu:7130.819955305939
20
+ Step 20 | loss:0.0015863939188420773 lr:9.045084971874738e-05 tokens_per_second_per_gpu:7076.8900804429095
21
+ Step 21 | loss:0.0016179593512788415 lr:8.898371770316111e-05 tokens_per_second_per_gpu:7177.145273038502
22
+ Step 22 | loss:0.02591816335916519 lr:8.742553740855506e-05 tokens_per_second_per_gpu:7038.516202651878
23
+ Step 23 | loss:0.000453911314252764 lr:8.577994803720606e-05 tokens_per_second_per_gpu:7076.675124899392
24
+ Step 24 | loss:0.01640825718641281 lr:8.405079293933986e-05 tokens_per_second_per_gpu:7111.868628736731
25
+ Step 25 | loss:0.0001911711151478812 lr:8.224211063680853e-05 tokens_per_second_per_gpu:6996.426639921837
26
+ Step 26 | loss:0.00019689431064762175 lr:8.035812539093557e-05 tokens_per_second_per_gpu:7129.771943389241
27
+ Step 27 | loss:0.0003955479769501835 lr:7.840323733655778e-05 tokens_per_second_per_gpu:6705.522398275781
28
+ Step 28 | loss:0.0016950565623119473 lr:7.638201220530665e-05 tokens_per_second_per_gpu:7164.835243901002
29
+ Step 29 | loss:0.0004549043078441173 lr:7.42991706621303e-05 tokens_per_second_per_gpu:7134.113817493094
30
+ Step 30 | loss:7.907362305559218e-05 lr:7.215957727996207e-05 tokens_per_second_per_gpu:7085.3284701406865
31
+ Step 31 | loss:0.022005649283528328 lr:6.996822917828477e-05 tokens_per_second_per_gpu:7106.993416514684
32
+ Step 32 | loss:0.00033388612791895866 lr:6.773024435212678e-05 tokens_per_second_per_gpu:7087.199547240821
33
+ Step 33 | loss:0.006230715196579695 lr:6.545084971874738e-05 tokens_per_second_per_gpu:6889.225855285138
34
+ Step 34 | loss:0.019058911129832268 lr:6.313536890992935e-05 tokens_per_second_per_gpu:7226.036345262191
35
+ Step 35 | loss:0.0001418855244992301 lr:6.078920983839031e-05 tokens_per_second_per_gpu:7154.760936431576
36
+ Step 36 | loss:0.0005796361365355551 lr:5.841785206735192e-05 tokens_per_second_per_gpu:6891.661354772104
37
+ Step 37 | loss:4.626877125701867e-05 lr:5.602683401276615e-05 tokens_per_second_per_gpu:7002.617612685
38
+ Step 38 | loss:0.005331241525709629 lr:5.3621740008088126e-05 tokens_per_second_per_gpu:7010.927497097903
39
+ Step 39 | loss:0.00010369029769208282 lr:5.1208187261806615e-05 tokens_per_second_per_gpu:7168.5148320560575
40
+ Step 40 | loss:0.00034265266731381416 lr:4.87918127381934e-05 tokens_per_second_per_gpu:7162.935906194933
41
+ Step 41 | loss:0.00016875483561307192 lr:4.6378259991911886e-05 tokens_per_second_per_gpu:7134.073121948486
42
+ Step 42 | loss:6.131715053925291e-06 lr:4.397316598723385e-05 tokens_per_second_per_gpu:6997.651607723751
43
+ Step 43 | loss:0.00643449742347002 lr:4.1582147932648074e-05 tokens_per_second_per_gpu:7054.267604831181
44
+ Step 44 | loss:0.011971571482717991 lr:3.92107901616097e-05 tokens_per_second_per_gpu:6913.706728873295
45
+ Step 45 | loss:0.00011875088239321485 lr:3.6864631090070655e-05 tokens_per_second_per_gpu:7150.46483685172
46
+ Step 46 | loss:0.00037201965460553765 lr:3.4549150281252636e-05 tokens_per_second_per_gpu:7209.474418950389
47
+ Step 47 | loss:6.80260345689021e-05 lr:3.226975564787322e-05 tokens_per_second_per_gpu:7069.455266192888
48
+ Step 48 | loss:0.00014731497503817081 lr:3.003177082171523e-05 tokens_per_second_per_gpu:7163.2810745656725
49
+ Step 49 | loss:0.00013455803855322301 lr:2.784042272003794e-05 tokens_per_second_per_gpu:7122.241765808404
50
+ Step 50 | loss:0.00018143688794225454 lr:2.57008293378697e-05 tokens_per_second_per_gpu:7232.54367922411
51
+ Step 51 | loss:0.00027479001437313855 lr:2.361798779469336e-05 tokens_per_second_per_gpu:6946.3745857220665
52
+ Step 52 | loss:9.438945198780857e-06 lr:2.1596762663442218e-05 tokens_per_second_per_gpu:7064.475268996893
53
+ Step 53 | loss:0.00028414305415935814 lr:1.9641874609064443e-05 tokens_per_second_per_gpu:7004.631675388433
54
+ Step 54 | loss:0.0002906432200688869 lr:1.7757889363191483e-05 tokens_per_second_per_gpu:7099.724473612669
55
+ Step 55 | loss:0.0006885308539494872 lr:1.5949207060660138e-05 tokens_per_second_per_gpu:7223.775075746081
56
+ Step 56 | loss:8.605341281509027e-05 lr:1.422005196279395e-05 tokens_per_second_per_gpu:6082.0778740205715
57
+ Step 57 | loss:0.0063959574326872826 lr:1.257446259144494e-05 tokens_per_second_per_gpu:7081.2743175992555
58
+ Step 58 | loss:0.005889782682061195 lr:1.1016282296838887e-05 tokens_per_second_per_gpu:7068.960681166967
59
+ Step 59 | loss:0.0003120407636743039 lr:9.549150281252633e-06 tokens_per_second_per_gpu:7055.8604218274395
60
+ Step 60 | loss:0.000564471585676074 lr:8.176493099488663e-06 tokens_per_second_per_gpu:7272.444511517603
61
+ Step 61 | loss:8.206569327739999e-05 lr:6.901516655997536e-06 tokens_per_second_per_gpu:7210.362983854608
62
+ Step 62 | loss:0.0002453089982736856 lr:5.727198717339511e-06 tokens_per_second_per_gpu:6921.957697264826
63
+ Step 63 | loss:0.00021397467935457826 lr:4.65628195747273e-06 tokens_per_second_per_gpu:7103.90490201871
64
+ Step 64 | loss:2.379073293923284e-06 lr:3.691267552111183e-06 tokens_per_second_per_gpu:6993.683263965953
65
+ Step 65 | loss:0.0032244210597127676 lr:2.8344093371128424e-06 tokens_per_second_per_gpu:7137.559084787147
66
+ Step 66 | loss:0.0002484503202140331 lr:2.087708544541689e-06 tokens_per_second_per_gpu:7176.5653753533525
67
+ Step 67 | loss:5.681362836185144e-06 lr:1.4529091286973995e-06 tokens_per_second_per_gpu:7011.669433490256
68
+ Step 68 | loss:0.000574558274820447 lr:9.314936930293283e-07 tokens_per_second_per_gpu:7068.591725466086
69
+ Step 69 | loss:0.0002666613145265728 lr:5.246800274474439e-07 tokens_per_second_per_gpu:7109.388717623035
70
+ Step 70 | loss:7.340025331359357e-05 lr:2.334182641175686e-07 tokens_per_second_per_gpu:7215.015175796162
71
+ Step 71 | loss:0.005155441351234913 lr:5.838865838366792e-08 tokens_per_second_per_gpu:7187.734395899015
72
+ Step 72 | loss:0.004279113840311766 lr:0.0 tokens_per_second_per_gpu:7082.0875692297195
070dd51e/td_False_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
070dd51e/td_True_ttd_False_ttdwa_False_ad_True_trd_False.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"input": [{"role": "system", "content": "Figure out the underlying transformation in the following examples and apply it to the test case. Here are some examples from this transformation, your answer must follow the format.\nThe input-output grids are provided as python arrays:\n[[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 6 0 0 0 0 6 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 2 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 7 0 0 7 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 9 0 0 0 0 9 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 2 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]] -> [[0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 3 0 0 0 0]\n [0 0 0 0 0 3 0 0 0 0]\n [0 0 0 0 0 3 0 0 0 0]\n [0 0 0 0 0 3 0 0 0 0]\n [0 6 6 6 6 3 6 0 0 0]\n [0 0 0 0 0 3 0 0 0 0]\n [0 0 0 0 0 3 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 2 0 0 0 0 0 0]\n [0 0 0 2 0 0 0 0 0 0]\n [0 0 7 2 7 7 0 0 0 0]\n [0 0 0 2 0 0 0 0 0 0]\n [0 0 0 2 0 0 0 0 0 0]\n [0 0 0 2 0 0 0 0 0 0]\n [0 0 9 2 9 9 9 9 0 0]\n [0 0 0 2 0 0 0 0 0 0]\n [0 0 0 2 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0]]#\n"}, {"role": "user", "content": "[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 5 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 9 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] -> "}], "output": {"role": "assistant", "content": "[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 5 5 8 5 5 5 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 6 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 9 9 9 4 9 9 9 9 9 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]#"}, "total_tokens": 3363}
08573cc6/adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"base_model_name_or_path": "/raid/lingo/akyurek/git/agi/checkpoints/pretrained/8B_grids_no_lm_generated_tasks/4k/", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "lora_alpha": 16.0, "lora_dropout": 0.0, "modules_to_save": null, "peft_type": "LORA", "r": 128, "target_modules": ["gate_proj", "down_proj", "up_proj", "lm_head", "q_proj", "v_proj"], "task_type": "CAUSAL_LM"}
08573cc6/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:238c7be3530fc7e5c843ce69ce3250a53031003f0c603835dd555b9c952e75e6
3
+ size 596042770