File size: 160,189 Bytes
691b3b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})]
{
    func main<ios16>(tensor<fp16, [128, 64]> cos, tensor<fp16, [1, 448, 1, 1024]> k_cache_0, tensor<fp16, [1, 448, 1, 1024]> k_cache_1, tensor<fp16, [1, 512, 1, 64]> mask, tensor<fp16, [128, 64]> sin, tensor<fp16, [1, 1024, 1, 448]> v_cache_0, tensor<fp16, [1, 1024, 1, 448]> v_cache_1, tensor<fp16, [1, 3072, 8, 8]> x) [CoreML_InputDefaultValues = dict<tensor<string, []>, tensor<fp32, []>>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] {
            tensor<int32, []> var_13 = const()[name = tensor<string, []>("op_13"), val = tensor<int32, []>(-1)];
            tensor<int32, []> var_17 = const()[name = tensor<string, []>("op_17"), val = tensor<int32, []>(-2)];
            tensor<int32, []> var_19 = const()[name = tensor<string, []>("op_19"), val = tensor<int32, []>(-3)];
            tensor<int32, []> var_52 = const()[name = tensor<string, []>("op_52"), val = tensor<int32, []>(1)];
            tensor<bool, []> var_55 = const()[name = tensor<string, []>("op_55"), val = tensor<bool, []>(true)];
            tensor<bool, []> x_eps_1_interleave_0 = const()[name = tensor<string, []>("x_eps_1_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 1, 8, 8]> eps_chan_1_to_fp16 = const()[name = tensor<string, []>("eps_chan_1_to_fp16"), val = tensor<fp16, [1, 1, 8, 8]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
            tensor<fp16, [1, 3073, 8, 8]> x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor<string, []>("x_eps_1_cast_fp16")];
            tensor<int32, [1]> norm_x_1_axes_0 = const()[name = tensor<string, []>("norm_x_1_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 8, 8]> norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor<string, []>("norm_x_1_cast_fp16")];
            tensor<fp16, [1, 3072, 8, 8]> x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor<string, []>("x_normed_1_cast_fp16")];
            tensor<fp16, []> var_79_to_fp16 = const()[name = tensor<string, []>("op_79_to_fp16"), val = tensor<fp16, []>(0x1.bb8p+5)];
            tensor<fp16, [1, 3072, 8, 8]> x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor<string, []>("x_normed_3_cast_fp16")];
            tensor<fp16, [1, 3072, 1, 1]> blocks_0_norm_1_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_norm_1_weight_to_fp16"), val = tensor<fp16, [1, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(256)))];
            tensor<fp16, [1, 3072, 8, 8]> x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor<string, []>("x_5_cast_fp16")];
            tensor<int32, [4]> var_100 = const()[name = tensor<string, []>("op_100"), val = tensor<int32, [4]>([1, 3072, 1, -1])];
            tensor<fp16, [1, 3072, 1, 64]> input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
            tensor<int32, [2]> var_103 = const()[name = tensor<string, []>("op_103"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_105 = const()[name = tensor<string, []>("op_105"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [3072, 3072, 1, 1]> blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [3072, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6464)))];
            tensor<fp16, [1, 3072, 1, 64]> q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("q_1_cast_fp16")];
            tensor<int32, [2]> var_109 = const()[name = tensor<string, []>("op_109"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_111 = const()[name = tensor<string, []>("op_111"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1024, 3072, 1, 1]> blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18880896)))];
            tensor<fp16, [1, 1024, 1, 64]> k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
            tensor<int32, [2]> var_115 = const()[name = tensor<string, []>("op_115"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_117 = const()[name = tensor<string, []>("op_117"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> v_1_pad_type_0 = const()[name = tensor<string, []>("v_1_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> v_1_pad_0 = const()[name = tensor<string, []>("v_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1024, 3072, 1, 1]> blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25172416)))];
            tensor<fp16, [1, 1024, 1, 64]> v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("v_1_cast_fp16")];
            tensor<int32, [4]> var_120 = const()[name = tensor<string, []>("op_120"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<fp16, [1, 24, 128, 64]> q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor<string, []>("q_3_cast_fp16")];
            tensor<int32, [4]> var_122 = const()[name = tensor<string, []>("op_122"), val = tensor<int32, [4]>([1, -1, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
            tensor<int32, [4]> var_136_begin_0 = const()[name = tensor<string, []>("op_136_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_136_end_0 = const()[name = tensor<string, []>("op_136_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
            tensor<bool, [4]> var_136_end_mask_0 = const()[name = tensor<string, []>("op_136_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 24, 64, 64]> var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor<string, []>("op_136_cast_fp16")];
            tensor<int32, [4]> var_142_begin_0 = const()[name = tensor<string, []>("op_142_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_142_end_0 = const()[name = tensor<string, []>("op_142_end_0"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<bool, [4]> var_142_end_mask_0 = const()[name = tensor<string, []>("op_142_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 64, 64]> var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor<string, []>("op_142_cast_fp16")];
            tensor<fp16, []> const_10_promoted_to_fp16 = const()[name = tensor<string, []>("const_10_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 24, 64, 64]> var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor<string, []>("op_144_cast_fp16")];
            tensor<bool, []> rotated_1_interleave_0 = const()[name = tensor<string, []>("rotated_1_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 24, 128, 64]> rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor<string, []>("rotated_1_cast_fp16")];
            tensor<fp16, [1, 24, 128, 64]> var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor<string, []>("op_147_cast_fp16")];
            tensor<fp16, [1, 24, 128, 64]> var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor<string, []>("op_148_cast_fp16")];
            tensor<fp16, [1, 24, 128, 64]> roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor<string, []>("roped_1_cast_fp16")];
            tensor<int32, [4]> var_161_begin_0 = const()[name = tensor<string, []>("op_161_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_161_end_0 = const()[name = tensor<string, []>("op_161_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
            tensor<bool, [4]> var_161_end_mask_0 = const()[name = tensor<string, []>("op_161_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 8, 64, 64]> var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_161_cast_fp16")];
            tensor<int32, [4]> var_167_begin_0 = const()[name = tensor<string, []>("op_167_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_167_end_0 = const()[name = tensor<string, []>("op_167_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<bool, [4]> var_167_end_mask_0 = const()[name = tensor<string, []>("op_167_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 64, 64]> var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_167_cast_fp16")];
            tensor<fp16, []> const_12_promoted_to_fp16 = const()[name = tensor<string, []>("const_12_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 8, 64, 64]> var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
            tensor<bool, []> rotated_3_interleave_0 = const()[name = tensor<string, []>("rotated_3_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 8, 128, 64]> rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor<string, []>("rotated_3_cast_fp16")];
            tensor<fp16, [1, 8, 128, 64]> var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor<string, []>("op_172_cast_fp16")];
            tensor<fp16, [1, 8, 128, 64]> var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor<string, []>("op_173_cast_fp16")];
            tensor<fp16, [1, 8, 128, 64]> roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor<string, []>("roped_3_cast_fp16")];
            tensor<int32, [4]> var_176 = const()[name = tensor<string, []>("op_176"), val = tensor<int32, [4]>([1, -1, 1, 64])];
            tensor<fp16, [1, 1024, 1, 64]> k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
            tensor<int32, [4]> var_178 = const()[name = tensor<string, []>("op_178"), val = tensor<int32, [4]>([1, -1, 1, 64])];
            tensor<fp16, [1, 1024, 1, 64]> new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor<string, []>("new_v_cache_0_type_fp32_cast_fp16")];
            tensor<int32, [4]> k_9_perm_0 = const()[name = tensor<string, []>("k_9_perm_0"), val = tensor<int32, [4]>([0, -1, 2, -3])];
            tensor<bool, []> k_11_interleave_0 = const()[name = tensor<string, []>("k_11_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 64, 1, 1024]> new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_1")];
            tensor<fp16, [1, 512, 1, 1024]> k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor<string, []>("k_11_cast_fp16")];
            tensor<bool, []> v_7_interleave_0 = const()[name = tensor<string, []>("v_7_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 1024, 1, 512]> v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor<string, []>("v_7_cast_fp16")];
            tensor<int32, [4]> var_186 = const()[name = tensor<string, []>("op_186"), val = tensor<int32, [4]>([1, 3072, 1, -1])];
            tensor<fp16, [1, 3072, 1, 64]> q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor<string, []>("q_7_cast_fp16")];
            tensor<int32, [4]> var_191_begin_0 = const()[name = tensor<string, []>("op_191_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_191_end_0 = const()[name = tensor<string, []>("op_191_end_0"), val = tensor<int32, [4]>([1, 128, 1, 64])];
            tensor<bool, [4]> var_191_end_mask_0 = const()[name = tensor<string, []>("op_191_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_191_cast_fp16")];
            tensor<int32, [4]> var_195_begin_0 = const()[name = tensor<string, []>("op_195_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
            tensor<int32, [4]> var_195_end_0 = const()[name = tensor<string, []>("op_195_end_0"), val = tensor<int32, [4]>([1, 256, 1, 64])];
            tensor<bool, [4]> var_195_end_mask_0 = const()[name = tensor<string, []>("op_195_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_195_cast_fp16")];
            tensor<int32, [4]> var_199_begin_0 = const()[name = tensor<string, []>("op_199_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
            tensor<int32, [4]> var_199_end_0 = const()[name = tensor<string, []>("op_199_end_0"), val = tensor<int32, [4]>([1, 384, 1, 64])];
            tensor<bool, [4]> var_199_end_mask_0 = const()[name = tensor<string, []>("op_199_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_199_cast_fp16")];
            tensor<int32, [4]> var_203_begin_0 = const()[name = tensor<string, []>("op_203_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
            tensor<int32, [4]> var_203_end_0 = const()[name = tensor<string, []>("op_203_end_0"), val = tensor<int32, [4]>([1, 512, 1, 64])];
            tensor<bool, [4]> var_203_end_mask_0 = const()[name = tensor<string, []>("op_203_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_203_cast_fp16")];
            tensor<int32, [4]> var_207_begin_0 = const()[name = tensor<string, []>("op_207_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
            tensor<int32, [4]> var_207_end_0 = const()[name = tensor<string, []>("op_207_end_0"), val = tensor<int32, [4]>([1, 640, 1, 64])];
            tensor<bool, [4]> var_207_end_mask_0 = const()[name = tensor<string, []>("op_207_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_207_cast_fp16")];
            tensor<int32, [4]> var_211_begin_0 = const()[name = tensor<string, []>("op_211_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
            tensor<int32, [4]> var_211_end_0 = const()[name = tensor<string, []>("op_211_end_0"), val = tensor<int32, [4]>([1, 768, 1, 64])];
            tensor<bool, [4]> var_211_end_mask_0 = const()[name = tensor<string, []>("op_211_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_211_cast_fp16")];
            tensor<int32, [4]> var_215_begin_0 = const()[name = tensor<string, []>("op_215_begin_0"), val = tensor<int32, [4]>([0, 768, 0, 0])];
            tensor<int32, [4]> var_215_end_0 = const()[name = tensor<string, []>("op_215_end_0"), val = tensor<int32, [4]>([1, 896, 1, 64])];
            tensor<bool, [4]> var_215_end_mask_0 = const()[name = tensor<string, []>("op_215_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_215_cast_fp16")];
            tensor<int32, [4]> var_219_begin_0 = const()[name = tensor<string, []>("op_219_begin_0"), val = tensor<int32, [4]>([0, 896, 0, 0])];
            tensor<int32, [4]> var_219_end_0 = const()[name = tensor<string, []>("op_219_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 64])];
            tensor<bool, [4]> var_219_end_mask_0 = const()[name = tensor<string, []>("op_219_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_219_cast_fp16")];
            tensor<int32, [4]> var_223_begin_0 = const()[name = tensor<string, []>("op_223_begin_0"), val = tensor<int32, [4]>([0, 1024, 0, 0])];
            tensor<int32, [4]> var_223_end_0 = const()[name = tensor<string, []>("op_223_end_0"), val = tensor<int32, [4]>([1, 1152, 1, 64])];
            tensor<bool, [4]> var_223_end_mask_0 = const()[name = tensor<string, []>("op_223_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_223_cast_fp16")];
            tensor<int32, [4]> var_227_begin_0 = const()[name = tensor<string, []>("op_227_begin_0"), val = tensor<int32, [4]>([0, 1152, 0, 0])];
            tensor<int32, [4]> var_227_end_0 = const()[name = tensor<string, []>("op_227_end_0"), val = tensor<int32, [4]>([1, 1280, 1, 64])];
            tensor<bool, [4]> var_227_end_mask_0 = const()[name = tensor<string, []>("op_227_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_227_cast_fp16")];
            tensor<int32, [4]> var_231_begin_0 = const()[name = tensor<string, []>("op_231_begin_0"), val = tensor<int32, [4]>([0, 1280, 0, 0])];
            tensor<int32, [4]> var_231_end_0 = const()[name = tensor<string, []>("op_231_end_0"), val = tensor<int32, [4]>([1, 1408, 1, 64])];
            tensor<bool, [4]> var_231_end_mask_0 = const()[name = tensor<string, []>("op_231_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_231_cast_fp16")];
            tensor<int32, [4]> var_235_begin_0 = const()[name = tensor<string, []>("op_235_begin_0"), val = tensor<int32, [4]>([0, 1408, 0, 0])];
            tensor<int32, [4]> var_235_end_0 = const()[name = tensor<string, []>("op_235_end_0"), val = tensor<int32, [4]>([1, 1536, 1, 64])];
            tensor<bool, [4]> var_235_end_mask_0 = const()[name = tensor<string, []>("op_235_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_235_cast_fp16")];
            tensor<int32, [4]> var_239_begin_0 = const()[name = tensor<string, []>("op_239_begin_0"), val = tensor<int32, [4]>([0, 1536, 0, 0])];
            tensor<int32, [4]> var_239_end_0 = const()[name = tensor<string, []>("op_239_end_0"), val = tensor<int32, [4]>([1, 1664, 1, 64])];
            tensor<bool, [4]> var_239_end_mask_0 = const()[name = tensor<string, []>("op_239_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_239_cast_fp16")];
            tensor<int32, [4]> var_243_begin_0 = const()[name = tensor<string, []>("op_243_begin_0"), val = tensor<int32, [4]>([0, 1664, 0, 0])];
            tensor<int32, [4]> var_243_end_0 = const()[name = tensor<string, []>("op_243_end_0"), val = tensor<int32, [4]>([1, 1792, 1, 64])];
            tensor<bool, [4]> var_243_end_mask_0 = const()[name = tensor<string, []>("op_243_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_243_cast_fp16")];
            tensor<int32, [4]> var_247_begin_0 = const()[name = tensor<string, []>("op_247_begin_0"), val = tensor<int32, [4]>([0, 1792, 0, 0])];
            tensor<int32, [4]> var_247_end_0 = const()[name = tensor<string, []>("op_247_end_0"), val = tensor<int32, [4]>([1, 1920, 1, 64])];
            tensor<bool, [4]> var_247_end_mask_0 = const()[name = tensor<string, []>("op_247_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_247_cast_fp16")];
            tensor<int32, [4]> var_251_begin_0 = const()[name = tensor<string, []>("op_251_begin_0"), val = tensor<int32, [4]>([0, 1920, 0, 0])];
            tensor<int32, [4]> var_251_end_0 = const()[name = tensor<string, []>("op_251_end_0"), val = tensor<int32, [4]>([1, 2048, 1, 64])];
            tensor<bool, [4]> var_251_end_mask_0 = const()[name = tensor<string, []>("op_251_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_251_cast_fp16")];
            tensor<int32, [4]> var_255_begin_0 = const()[name = tensor<string, []>("op_255_begin_0"), val = tensor<int32, [4]>([0, 2048, 0, 0])];
            tensor<int32, [4]> var_255_end_0 = const()[name = tensor<string, []>("op_255_end_0"), val = tensor<int32, [4]>([1, 2176, 1, 64])];
            tensor<bool, [4]> var_255_end_mask_0 = const()[name = tensor<string, []>("op_255_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_255_cast_fp16")];
            tensor<int32, [4]> var_259_begin_0 = const()[name = tensor<string, []>("op_259_begin_0"), val = tensor<int32, [4]>([0, 2176, 0, 0])];
            tensor<int32, [4]> var_259_end_0 = const()[name = tensor<string, []>("op_259_end_0"), val = tensor<int32, [4]>([1, 2304, 1, 64])];
            tensor<bool, [4]> var_259_end_mask_0 = const()[name = tensor<string, []>("op_259_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_259_cast_fp16")];
            tensor<int32, [4]> var_263_begin_0 = const()[name = tensor<string, []>("op_263_begin_0"), val = tensor<int32, [4]>([0, 2304, 0, 0])];
            tensor<int32, [4]> var_263_end_0 = const()[name = tensor<string, []>("op_263_end_0"), val = tensor<int32, [4]>([1, 2432, 1, 64])];
            tensor<bool, [4]> var_263_end_mask_0 = const()[name = tensor<string, []>("op_263_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_263_cast_fp16")];
            tensor<int32, [4]> var_267_begin_0 = const()[name = tensor<string, []>("op_267_begin_0"), val = tensor<int32, [4]>([0, 2432, 0, 0])];
            tensor<int32, [4]> var_267_end_0 = const()[name = tensor<string, []>("op_267_end_0"), val = tensor<int32, [4]>([1, 2560, 1, 64])];
            tensor<bool, [4]> var_267_end_mask_0 = const()[name = tensor<string, []>("op_267_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_267_cast_fp16")];
            tensor<int32, [4]> var_271_begin_0 = const()[name = tensor<string, []>("op_271_begin_0"), val = tensor<int32, [4]>([0, 2560, 0, 0])];
            tensor<int32, [4]> var_271_end_0 = const()[name = tensor<string, []>("op_271_end_0"), val = tensor<int32, [4]>([1, 2688, 1, 64])];
            tensor<bool, [4]> var_271_end_mask_0 = const()[name = tensor<string, []>("op_271_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_271_cast_fp16")];
            tensor<int32, [4]> var_275_begin_0 = const()[name = tensor<string, []>("op_275_begin_0"), val = tensor<int32, [4]>([0, 2688, 0, 0])];
            tensor<int32, [4]> var_275_end_0 = const()[name = tensor<string, []>("op_275_end_0"), val = tensor<int32, [4]>([1, 2816, 1, 64])];
            tensor<bool, [4]> var_275_end_mask_0 = const()[name = tensor<string, []>("op_275_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_275_cast_fp16")];
            tensor<int32, [4]> var_279_begin_0 = const()[name = tensor<string, []>("op_279_begin_0"), val = tensor<int32, [4]>([0, 2816, 0, 0])];
            tensor<int32, [4]> var_279_end_0 = const()[name = tensor<string, []>("op_279_end_0"), val = tensor<int32, [4]>([1, 2944, 1, 64])];
            tensor<bool, [4]> var_279_end_mask_0 = const()[name = tensor<string, []>("op_279_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_279_cast_fp16")];
            tensor<int32, [4]> var_283_begin_0 = const()[name = tensor<string, []>("op_283_begin_0"), val = tensor<int32, [4]>([0, 2944, 0, 0])];
            tensor<int32, [4]> var_283_end_0 = const()[name = tensor<string, []>("op_283_end_0"), val = tensor<int32, [4]>([1, 3072, 1, 64])];
            tensor<bool, [4]> var_283_end_mask_0 = const()[name = tensor<string, []>("op_283_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor<string, []>("op_283_cast_fp16")];
            tensor<int32, [4]> var_289_begin_0 = const()[name = tensor<string, []>("op_289_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_289_end_0 = const()[name = tensor<string, []>("op_289_end_0"), val = tensor<int32, [4]>([1, 512, 1, 128])];
            tensor<bool, [4]> var_289_end_mask_0 = const()[name = tensor<string, []>("op_289_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_289_cast_fp16")];
            tensor<int32, [4]> var_301_begin_0 = const()[name = tensor<string, []>("op_301_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
            tensor<int32, [4]> var_301_end_0 = const()[name = tensor<string, []>("op_301_end_0"), val = tensor<int32, [4]>([1, 512, 1, 256])];
            tensor<bool, [4]> var_301_end_mask_0 = const()[name = tensor<string, []>("op_301_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_301_cast_fp16")];
            tensor<int32, [4]> var_313_begin_0 = const()[name = tensor<string, []>("op_313_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
            tensor<int32, [4]> var_313_end_0 = const()[name = tensor<string, []>("op_313_end_0"), val = tensor<int32, [4]>([1, 512, 1, 384])];
            tensor<bool, [4]> var_313_end_mask_0 = const()[name = tensor<string, []>("op_313_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_313_cast_fp16")];
            tensor<int32, [4]> var_325_begin_0 = const()[name = tensor<string, []>("op_325_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
            tensor<int32, [4]> var_325_end_0 = const()[name = tensor<string, []>("op_325_end_0"), val = tensor<int32, [4]>([1, 512, 1, 512])];
            tensor<bool, [4]> var_325_end_mask_0 = const()[name = tensor<string, []>("op_325_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_325_cast_fp16")];
            tensor<int32, [4]> var_337_begin_0 = const()[name = tensor<string, []>("op_337_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
            tensor<int32, [4]> var_337_end_0 = const()[name = tensor<string, []>("op_337_end_0"), val = tensor<int32, [4]>([1, 512, 1, 640])];
            tensor<bool, [4]> var_337_end_mask_0 = const()[name = tensor<string, []>("op_337_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_337_cast_fp16")];
            tensor<int32, [4]> var_349_begin_0 = const()[name = tensor<string, []>("op_349_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
            tensor<int32, [4]> var_349_end_0 = const()[name = tensor<string, []>("op_349_end_0"), val = tensor<int32, [4]>([1, 512, 1, 768])];
            tensor<bool, [4]> var_349_end_mask_0 = const()[name = tensor<string, []>("op_349_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_349_cast_fp16")];
            tensor<int32, [4]> var_361_begin_0 = const()[name = tensor<string, []>("op_361_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 768])];
            tensor<int32, [4]> var_361_end_0 = const()[name = tensor<string, []>("op_361_end_0"), val = tensor<int32, [4]>([1, 512, 1, 896])];
            tensor<bool, [4]> var_361_end_mask_0 = const()[name = tensor<string, []>("op_361_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_361_cast_fp16")];
            tensor<int32, [4]> var_373_begin_0 = const()[name = tensor<string, []>("op_373_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 896])];
            tensor<int32, [4]> var_373_end_0 = const()[name = tensor<string, []>("op_373_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1024])];
            tensor<bool, [4]> var_373_end_mask_0 = const()[name = tensor<string, []>("op_373_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_373_cast_fp16")];
            tensor<int32, [4]> var_383_begin_0 = const()[name = tensor<string, []>("op_383_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_383_end_0 = const()[name = tensor<string, []>("op_383_end_0"), val = tensor<int32, [4]>([1, 128, 1, 512])];
            tensor<bool, [4]> var_383_end_mask_0 = const()[name = tensor<string, []>("op_383_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor<string, []>("op_383_cast_fp16")];
            tensor<int32, [4]> var_395_begin_0 = const()[name = tensor<string, []>("op_395_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
            tensor<int32, [4]> var_395_end_0 = const()[name = tensor<string, []>("op_395_end_0"), val = tensor<int32, [4]>([1, 256, 1, 512])];
            tensor<bool, [4]> var_395_end_mask_0 = const()[name = tensor<string, []>("op_395_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor<string, []>("op_395_cast_fp16")];
            tensor<int32, [4]> var_407_begin_0 = const()[name = tensor<string, []>("op_407_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
            tensor<int32, [4]> var_407_end_0 = const()[name = tensor<string, []>("op_407_end_0"), val = tensor<int32, [4]>([1, 384, 1, 512])];
            tensor<bool, [4]> var_407_end_mask_0 = const()[name = tensor<string, []>("op_407_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor<string, []>("op_407_cast_fp16")];
            tensor<int32, [4]> var_419_begin_0 = const()[name = tensor<string, []>("op_419_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
            tensor<int32, [4]> var_419_end_0 = const()[name = tensor<string, []>("op_419_end_0"), val = tensor<int32, [4]>([1, 512, 1, 512])];
            tensor<bool, [4]> var_419_end_mask_0 = const()[name = tensor<string, []>("op_419_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor<string, []>("op_419_cast_fp16")];
            tensor<int32, [4]> var_431_begin_0 = const()[name = tensor<string, []>("op_431_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
            tensor<int32, [4]> var_431_end_0 = const()[name = tensor<string, []>("op_431_end_0"), val = tensor<int32, [4]>([1, 640, 1, 512])];
            tensor<bool, [4]> var_431_end_mask_0 = const()[name = tensor<string, []>("op_431_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor<string, []>("op_431_cast_fp16")];
            tensor<int32, [4]> var_443_begin_0 = const()[name = tensor<string, []>("op_443_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
            tensor<int32, [4]> var_443_end_0 = const()[name = tensor<string, []>("op_443_end_0"), val = tensor<int32, [4]>([1, 768, 1, 512])];
            tensor<bool, [4]> var_443_end_mask_0 = const()[name = tensor<string, []>("op_443_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor<string, []>("op_443_cast_fp16")];
            tensor<int32, [4]> var_455_begin_0 = const()[name = tensor<string, []>("op_455_begin_0"), val = tensor<int32, [4]>([0, 768, 0, 0])];
            tensor<int32, [4]> var_455_end_0 = const()[name = tensor<string, []>("op_455_end_0"), val = tensor<int32, [4]>([1, 896, 1, 512])];
            tensor<bool, [4]> var_455_end_mask_0 = const()[name = tensor<string, []>("op_455_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor<string, []>("op_455_cast_fp16")];
            tensor<int32, [4]> var_467_begin_0 = const()[name = tensor<string, []>("op_467_begin_0"), val = tensor<int32, [4]>([0, 896, 0, 0])];
            tensor<int32, [4]> var_467_end_0 = const()[name = tensor<string, []>("op_467_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 512])];
            tensor<bool, [4]> var_467_end_mask_0 = const()[name = tensor<string, []>("op_467_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor<string, []>("op_467_cast_fp16")];
            tensor<string, []> var_479_equation_0 = const()[name = tensor<string, []>("op_479_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor<string, []>("op_479_cast_fp16")];
            tensor<fp16, []> var_480_to_fp16 = const()[name = tensor<string, []>("op_480_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor<string, []>("op_481_cast_fp16")];
            tensor<string, []> var_483_equation_0 = const()[name = tensor<string, []>("op_483_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor<string, []>("op_483_cast_fp16")];
            tensor<fp16, []> var_484_to_fp16 = const()[name = tensor<string, []>("op_484_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor<string, []>("op_485_cast_fp16")];
            tensor<string, []> var_487_equation_0 = const()[name = tensor<string, []>("op_487_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor<string, []>("op_487_cast_fp16")];
            tensor<fp16, []> var_488_to_fp16 = const()[name = tensor<string, []>("op_488_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor<string, []>("op_489_cast_fp16")];
            tensor<string, []> var_491_equation_0 = const()[name = tensor<string, []>("op_491_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor<string, []>("op_491_cast_fp16")];
            tensor<fp16, []> var_492_to_fp16 = const()[name = tensor<string, []>("op_492_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor<string, []>("op_493_cast_fp16")];
            tensor<string, []> var_495_equation_0 = const()[name = tensor<string, []>("op_495_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor<string, []>("op_495_cast_fp16")];
            tensor<fp16, []> var_496_to_fp16 = const()[name = tensor<string, []>("op_496_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor<string, []>("op_497_cast_fp16")];
            tensor<string, []> var_499_equation_0 = const()[name = tensor<string, []>("op_499_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor<string, []>("op_499_cast_fp16")];
            tensor<fp16, []> var_500_to_fp16 = const()[name = tensor<string, []>("op_500_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor<string, []>("op_501_cast_fp16")];
            tensor<string, []> var_503_equation_0 = const()[name = tensor<string, []>("op_503_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor<string, []>("op_503_cast_fp16")];
            tensor<fp16, []> var_504_to_fp16 = const()[name = tensor<string, []>("op_504_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor<string, []>("op_505_cast_fp16")];
            tensor<string, []> var_507_equation_0 = const()[name = tensor<string, []>("op_507_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor<string, []>("op_507_cast_fp16")];
            tensor<fp16, []> var_508_to_fp16 = const()[name = tensor<string, []>("op_508_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor<string, []>("op_509_cast_fp16")];
            tensor<string, []> var_511_equation_0 = const()[name = tensor<string, []>("op_511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor<string, []>("op_511_cast_fp16")];
            tensor<fp16, []> var_512_to_fp16 = const()[name = tensor<string, []>("op_512_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor<string, []>("op_513_cast_fp16")];
            tensor<string, []> var_515_equation_0 = const()[name = tensor<string, []>("op_515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor<string, []>("op_515_cast_fp16")];
            tensor<fp16, []> var_516_to_fp16 = const()[name = tensor<string, []>("op_516_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor<string, []>("op_517_cast_fp16")];
            tensor<string, []> var_519_equation_0 = const()[name = tensor<string, []>("op_519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor<string, []>("op_519_cast_fp16")];
            tensor<fp16, []> var_520_to_fp16 = const()[name = tensor<string, []>("op_520_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor<string, []>("op_521_cast_fp16")];
            tensor<string, []> var_523_equation_0 = const()[name = tensor<string, []>("op_523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor<string, []>("op_523_cast_fp16")];
            tensor<fp16, []> var_524_to_fp16 = const()[name = tensor<string, []>("op_524_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor<string, []>("op_525_cast_fp16")];
            tensor<string, []> var_527_equation_0 = const()[name = tensor<string, []>("op_527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor<string, []>("op_527_cast_fp16")];
            tensor<fp16, []> var_528_to_fp16 = const()[name = tensor<string, []>("op_528_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor<string, []>("op_529_cast_fp16")];
            tensor<string, []> var_531_equation_0 = const()[name = tensor<string, []>("op_531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor<string, []>("op_531_cast_fp16")];
            tensor<fp16, []> var_532_to_fp16 = const()[name = tensor<string, []>("op_532_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor<string, []>("op_533_cast_fp16")];
            tensor<string, []> var_535_equation_0 = const()[name = tensor<string, []>("op_535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor<string, []>("op_535_cast_fp16")];
            tensor<fp16, []> var_536_to_fp16 = const()[name = tensor<string, []>("op_536_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor<string, []>("op_537_cast_fp16")];
            tensor<string, []> var_539_equation_0 = const()[name = tensor<string, []>("op_539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor<string, []>("op_539_cast_fp16")];
            tensor<fp16, []> var_540_to_fp16 = const()[name = tensor<string, []>("op_540_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor<string, []>("op_541_cast_fp16")];
            tensor<string, []> var_543_equation_0 = const()[name = tensor<string, []>("op_543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor<string, []>("op_543_cast_fp16")];
            tensor<fp16, []> var_544_to_fp16 = const()[name = tensor<string, []>("op_544_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor<string, []>("op_545_cast_fp16")];
            tensor<string, []> var_547_equation_0 = const()[name = tensor<string, []>("op_547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor<string, []>("op_547_cast_fp16")];
            tensor<fp16, []> var_548_to_fp16 = const()[name = tensor<string, []>("op_548_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor<string, []>("op_549_cast_fp16")];
            tensor<string, []> var_551_equation_0 = const()[name = tensor<string, []>("op_551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor<string, []>("op_551_cast_fp16")];
            tensor<fp16, []> var_552_to_fp16 = const()[name = tensor<string, []>("op_552_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor<string, []>("op_553_cast_fp16")];
            tensor<string, []> var_555_equation_0 = const()[name = tensor<string, []>("op_555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor<string, []>("op_555_cast_fp16")];
            tensor<fp16, []> var_556_to_fp16 = const()[name = tensor<string, []>("op_556_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor<string, []>("op_557_cast_fp16")];
            tensor<string, []> var_559_equation_0 = const()[name = tensor<string, []>("op_559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor<string, []>("op_559_cast_fp16")];
            tensor<fp16, []> var_560_to_fp16 = const()[name = tensor<string, []>("op_560_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor<string, []>("op_561_cast_fp16")];
            tensor<string, []> var_563_equation_0 = const()[name = tensor<string, []>("op_563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor<string, []>("op_563_cast_fp16")];
            tensor<fp16, []> var_564_to_fp16 = const()[name = tensor<string, []>("op_564_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor<string, []>("op_565_cast_fp16")];
            tensor<string, []> var_567_equation_0 = const()[name = tensor<string, []>("op_567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor<string, []>("op_567_cast_fp16")];
            tensor<fp16, []> var_568_to_fp16 = const()[name = tensor<string, []>("op_568_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor<string, []>("op_569_cast_fp16")];
            tensor<string, []> var_571_equation_0 = const()[name = tensor<string, []>("op_571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor<string, []>("op_571_cast_fp16")];
            tensor<fp16, []> var_572_to_fp16 = const()[name = tensor<string, []>("op_572_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor<string, []>("op_573_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor<string, []>("aw_1_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor<string, []>("aw_3_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor<string, []>("aw_5_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor<string, []>("aw_7_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor<string, []>("aw_9_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor<string, []>("aw_11_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor<string, []>("aw_13_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor<string, []>("aw_15_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor<string, []>("aw_17_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor<string, []>("aw_19_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor<string, []>("aw_21_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor<string, []>("aw_23_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor<string, []>("aw_25_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor<string, []>("aw_27_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor<string, []>("aw_29_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor<string, []>("aw_31_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor<string, []>("aw_33_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor<string, []>("aw_35_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor<string, []>("aw_37_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor<string, []>("aw_39_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor<string, []>("aw_41_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor<string, []>("aw_43_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor<string, []>("aw_45_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor<string, []>("aw_47_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor<string, []>("op_598_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor<string, []>("op_599_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor<string, []>("op_600_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor<string, []>("op_601_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor<string, []>("op_602_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor<string, []>("op_603_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor<string, []>("op_604_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor<string, []>("op_605_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor<string, []>("op_606_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor<string, []>("op_607_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor<string, []>("op_608_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor<string, []>("op_609_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor<string, []>("op_610_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor<string, []>("op_611_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor<string, []>("op_612_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor<string, []>("op_613_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor<string, []>("op_614_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor<string, []>("op_615_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor<string, []>("op_616_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor<string, []>("op_617_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor<string, []>("op_618_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor<string, []>("op_619_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor<string, []>("op_620_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor<string, []>("op_621_cast_fp16")];
            tensor<string, []> var_623_equation_0 = const()[name = tensor<string, []>("op_623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor<string, []>("op_623_cast_fp16")];
            tensor<string, []> var_625_equation_0 = const()[name = tensor<string, []>("op_625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor<string, []>("op_625_cast_fp16")];
            tensor<string, []> var_627_equation_0 = const()[name = tensor<string, []>("op_627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor<string, []>("op_627_cast_fp16")];
            tensor<string, []> var_629_equation_0 = const()[name = tensor<string, []>("op_629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor<string, []>("op_629_cast_fp16")];
            tensor<string, []> var_631_equation_0 = const()[name = tensor<string, []>("op_631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor<string, []>("op_631_cast_fp16")];
            tensor<string, []> var_633_equation_0 = const()[name = tensor<string, []>("op_633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor<string, []>("op_633_cast_fp16")];
            tensor<string, []> var_635_equation_0 = const()[name = tensor<string, []>("op_635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor<string, []>("op_635_cast_fp16")];
            tensor<string, []> var_637_equation_0 = const()[name = tensor<string, []>("op_637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor<string, []>("op_637_cast_fp16")];
            tensor<string, []> var_639_equation_0 = const()[name = tensor<string, []>("op_639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor<string, []>("op_639_cast_fp16")];
            tensor<string, []> var_641_equation_0 = const()[name = tensor<string, []>("op_641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor<string, []>("op_641_cast_fp16")];
            tensor<string, []> var_643_equation_0 = const()[name = tensor<string, []>("op_643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor<string, []>("op_643_cast_fp16")];
            tensor<string, []> var_645_equation_0 = const()[name = tensor<string, []>("op_645_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor<string, []>("op_645_cast_fp16")];
            tensor<string, []> var_647_equation_0 = const()[name = tensor<string, []>("op_647_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor<string, []>("op_647_cast_fp16")];
            tensor<string, []> var_649_equation_0 = const()[name = tensor<string, []>("op_649_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor<string, []>("op_649_cast_fp16")];
            tensor<string, []> var_651_equation_0 = const()[name = tensor<string, []>("op_651_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor<string, []>("op_651_cast_fp16")];
            tensor<string, []> var_653_equation_0 = const()[name = tensor<string, []>("op_653_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor<string, []>("op_653_cast_fp16")];
            tensor<string, []> var_655_equation_0 = const()[name = tensor<string, []>("op_655_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor<string, []>("op_655_cast_fp16")];
            tensor<string, []> var_657_equation_0 = const()[name = tensor<string, []>("op_657_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor<string, []>("op_657_cast_fp16")];
            tensor<string, []> var_659_equation_0 = const()[name = tensor<string, []>("op_659_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor<string, []>("op_659_cast_fp16")];
            tensor<string, []> var_661_equation_0 = const()[name = tensor<string, []>("op_661_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor<string, []>("op_661_cast_fp16")];
            tensor<string, []> var_663_equation_0 = const()[name = tensor<string, []>("op_663_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor<string, []>("op_663_cast_fp16")];
            tensor<string, []> var_665_equation_0 = const()[name = tensor<string, []>("op_665_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor<string, []>("op_665_cast_fp16")];
            tensor<string, []> var_667_equation_0 = const()[name = tensor<string, []>("op_667_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor<string, []>("op_667_cast_fp16")];
            tensor<string, []> var_669_equation_0 = const()[name = tensor<string, []>("op_669_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor<string, []>("op_669_cast_fp16")];
            tensor<bool, []> x_11_interleave_0 = const()[name = tensor<string, []>("x_11_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 3072, 1, 64]> x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor<string, []>("x_11_cast_fp16")];
            tensor<int32, [4]> var_674 = const()[name = tensor<string, []>("op_674"), val = tensor<int32, [4]>([1, 3072, -1, 8])];
            tensor<fp16, [1, 3072, 8, 8]> input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
            tensor<int32, [2]> var_677 = const()[name = tensor<string, []>("op_677"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_679 = const()[name = tensor<string, []>("op_679"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> attention_output_1_pad_type_0 = const()[name = tensor<string, []>("attention_output_1_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> attention_output_1_pad_0 = const()[name = tensor<string, []>("attention_output_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [3072, 3072, 1, 1]> blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_proj_weight_to_fp16"), val = tensor<fp16, [3072, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31463936)))];
            tensor<fp16, [1, 3072, 8, 8]> attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("attention_output_1_cast_fp16")];
            tensor<fp16, [1, 3072, 8, 8]> x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor<string, []>("x_13_cast_fp16")];
            tensor<bool, []> x_eps_3_interleave_0 = const()[name = tensor<string, []>("x_eps_3_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 1, 8, 8]> eps_chan_3_to_fp16 = const()[name = tensor<string, []>("eps_chan_3_to_fp16"), val = tensor<fp16, [1, 1, 8, 8]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50338368)))];
            tensor<fp16, [1, 3073, 8, 8]> x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor<string, []>("x_eps_3_cast_fp16")];
            tensor<int32, [1]> norm_x_3_axes_0 = const()[name = tensor<string, []>("norm_x_3_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 8, 8]> norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor<string, []>("norm_x_3_cast_fp16")];
            tensor<fp16, [1, 3072, 8, 8]> x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor<string, []>("x_normed_7_cast_fp16")];
            tensor<fp16, []> var_705_to_fp16 = const()[name = tensor<string, []>("op_705_to_fp16"), val = tensor<fp16, []>(0x1.bb8p+5)];
            tensor<fp16, [1, 3072, 8, 8]> x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor<string, []>("x_normed_9_cast_fp16")];
            tensor<fp16, [1, 3072, 1, 1]> blocks_0_norm_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_norm_2_weight_to_fp16"), val = tensor<fp16, [1, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50338560)))];
            tensor<fp16, [1, 3072, 8, 8]> input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
            tensor<int32, [2]> var_716 = const()[name = tensor<string, []>("op_716"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_718 = const()[name = tensor<string, []>("op_718"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> input_7_pad_type_0 = const()[name = tensor<string, []>("input_7_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> input_7_pad_0 = const()[name = tensor<string, []>("input_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [8192, 3072, 1, 1]> blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor<fp16, [8192, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50344768)))];
            tensor<fp16, [1, 8192, 8, 8]> input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
            tensor<int32, [2]> var_722 = const()[name = tensor<string, []>("op_722"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_724 = const()[name = tensor<string, []>("op_724"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> x_fc_2_1_pad_type_0 = const()[name = tensor<string, []>("x_fc_2_1_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> x_fc_2_1_pad_0 = const()[name = tensor<string, []>("x_fc_2_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [8192, 3072, 1, 1]> blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor<fp16, [8192, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100676480)))];
            tensor<fp16, [1, 8192, 8, 8]> x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("x_fc_2_1_cast_fp16")];
            tensor<fp16, [1, 8192, 8, 8]> var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor<string, []>("op_727_cast_fp16")];
            tensor<fp16, [1, 8192, 8, 8]> input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
            tensor<int32, [2]> var_730 = const()[name = tensor<string, []>("op_730"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_732 = const()[name = tensor<string, []>("op_732"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_734_pad_type_0 = const()[name = tensor<string, []>("op_734_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_734_pad_0 = const()[name = tensor<string, []>("op_734_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [3072, 8192, 1, 1]> blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_proj_weight_to_fp16"), val = tensor<fp16, [3072, 8192, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151008192)))];
            tensor<fp16, [1, 3072, 8, 8]> var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("op_734_cast_fp16")];
            tensor<fp16, [1, 3072, 8, 8]> x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor<string, []>("x_17_cast_fp16")];
            tensor<int32, []> var_740 = const()[name = tensor<string, []>("op_740"), val = tensor<int32, []>(-1)];
            tensor<int32, []> var_744 = const()[name = tensor<string, []>("op_744"), val = tensor<int32, []>(-2)];
            tensor<int32, []> var_746 = const()[name = tensor<string, []>("op_746"), val = tensor<int32, []>(-3)];
            tensor<int32, []> var_779 = const()[name = tensor<string, []>("op_779"), val = tensor<int32, []>(1)];
            tensor<bool, []> var_782 = const()[name = tensor<string, []>("op_782"), val = tensor<bool, []>(true)];
            tensor<bool, []> x_eps_5_interleave_0 = const()[name = tensor<string, []>("x_eps_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 1, 8, 8]> eps_chan_5_to_fp16 = const()[name = tensor<string, []>("eps_chan_5_to_fp16"), val = tensor<fp16, [1, 1, 8, 8]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201339904)))];
            tensor<fp16, [1, 3073, 8, 8]> x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor<string, []>("x_eps_5_cast_fp16")];
            tensor<int32, [1]> norm_x_5_axes_0 = const()[name = tensor<string, []>("norm_x_5_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 8, 8]> norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor<string, []>("norm_x_5_cast_fp16")];
            tensor<fp16, [1, 3072, 8, 8]> x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor<string, []>("x_normed_13_cast_fp16")];
            tensor<fp16, []> var_805_to_fp16 = const()[name = tensor<string, []>("op_805_to_fp16"), val = tensor<fp16, []>(0x1.bb8p+5)];
            tensor<fp16, [1, 3072, 8, 8]> x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor<string, []>("x_normed_15_cast_fp16")];
            tensor<fp16, [1, 3072, 1, 1]> blocks_1_norm_1_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_norm_1_weight_to_fp16"), val = tensor<fp16, [1, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201340096)))];
            tensor<fp16, [1, 3072, 8, 8]> x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor<string, []>("x_21_cast_fp16")];
            tensor<int32, [4]> var_829 = const()[name = tensor<string, []>("op_829"), val = tensor<int32, [4]>([1, 3072, 1, -1])];
            tensor<fp16, [1, 3072, 1, 64]> input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
            tensor<int32, [2]> var_832 = const()[name = tensor<string, []>("op_832"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_834 = const()[name = tensor<string, []>("op_834"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [3072, 3072, 1, 1]> blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [3072, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201346304)))];
            tensor<fp16, [1, 3072, 1, 64]> q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("q_9_cast_fp16")];
            tensor<int32, [2]> var_838 = const()[name = tensor<string, []>("op_838"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_840 = const()[name = tensor<string, []>("op_840"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> k_13_pad_type_0 = const()[name = tensor<string, []>("k_13_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> k_13_pad_0 = const()[name = tensor<string, []>("k_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1024, 3072, 1, 1]> blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1024, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220220736)))];
            tensor<fp16, [1, 1024, 1, 64]> k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
            tensor<int32, [2]> var_844 = const()[name = tensor<string, []>("op_844"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_846 = const()[name = tensor<string, []>("op_846"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> v_11_pad_type_0 = const()[name = tensor<string, []>("v_11_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> v_11_pad_0 = const()[name = tensor<string, []>("v_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1024, 3072, 1, 1]> blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1024, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226512256)))];
            tensor<fp16, [1, 1024, 1, 64]> v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("v_11_cast_fp16")];
            tensor<int32, [4]> var_849 = const()[name = tensor<string, []>("op_849"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<fp16, [1, 24, 128, 64]> q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor<string, []>("q_11_cast_fp16")];
            tensor<int32, [4]> var_851 = const()[name = tensor<string, []>("op_851"), val = tensor<int32, [4]>([1, -1, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor<string, []>("k_15_cast_fp16")];
            tensor<int32, [4]> var_865_begin_0 = const()[name = tensor<string, []>("op_865_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_865_end_0 = const()[name = tensor<string, []>("op_865_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
            tensor<bool, [4]> var_865_end_mask_0 = const()[name = tensor<string, []>("op_865_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 24, 64, 64]> var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor<string, []>("op_865_cast_fp16")];
            tensor<int32, [4]> var_871_begin_0 = const()[name = tensor<string, []>("op_871_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_871_end_0 = const()[name = tensor<string, []>("op_871_end_0"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<bool, [4]> var_871_end_mask_0 = const()[name = tensor<string, []>("op_871_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 64, 64]> var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor<string, []>("op_871_cast_fp16")];
            tensor<fp16, []> const_30_promoted_to_fp16 = const()[name = tensor<string, []>("const_30_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 24, 64, 64]> var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor<string, []>("op_873_cast_fp16")];
            tensor<bool, []> rotated_5_interleave_0 = const()[name = tensor<string, []>("rotated_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 24, 128, 64]> rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor<string, []>("rotated_5_cast_fp16")];
            tensor<fp16, [1, 24, 128, 64]> var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor<string, []>("op_876_cast_fp16")];
            tensor<fp16, [1, 24, 128, 64]> var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor<string, []>("op_877_cast_fp16")];
            tensor<fp16, [1, 24, 128, 64]> roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor<string, []>("roped_5_cast_fp16")];
            tensor<int32, [4]> var_890_begin_0 = const()[name = tensor<string, []>("op_890_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_890_end_0 = const()[name = tensor<string, []>("op_890_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
            tensor<bool, [4]> var_890_end_mask_0 = const()[name = tensor<string, []>("op_890_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 8, 64, 64]> var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor<string, []>("op_890_cast_fp16")];
            tensor<int32, [4]> var_896_begin_0 = const()[name = tensor<string, []>("op_896_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_896_end_0 = const()[name = tensor<string, []>("op_896_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<bool, [4]> var_896_end_mask_0 = const()[name = tensor<string, []>("op_896_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 64, 64]> var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor<string, []>("op_896_cast_fp16")];
            tensor<fp16, []> const_32_promoted_to_fp16 = const()[name = tensor<string, []>("const_32_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 8, 64, 64]> var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor<string, []>("op_898_cast_fp16")];
            tensor<bool, []> rotated_interleave_0 = const()[name = tensor<string, []>("rotated_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 8, 128, 64]> rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor<string, []>("rotated_cast_fp16")];
            tensor<fp16, [1, 8, 128, 64]> var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor<string, []>("op_901_cast_fp16")];
            tensor<fp16, [1, 8, 128, 64]> var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor<string, []>("op_902_cast_fp16")];
            tensor<fp16, [1, 8, 128, 64]> roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor<string, []>("roped_cast_fp16")];
            tensor<int32, [4]> var_905 = const()[name = tensor<string, []>("op_905"), val = tensor<int32, [4]>([1, -1, 1, 64])];
            tensor<fp16, [1, 1024, 1, 64]> k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")];
            tensor<int32, [4]> var_907 = const()[name = tensor<string, []>("op_907"), val = tensor<int32, [4]>([1, -1, 1, 64])];
            tensor<fp16, [1, 1024, 1, 64]> new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor<string, []>("new_v_cache_1_type_fp32_cast_fp16")];
            tensor<int32, [4]> k_21_perm_0 = const()[name = tensor<string, []>("k_21_perm_0"), val = tensor<int32, [4]>([0, -1, 2, -3])];
            tensor<bool, []> k_interleave_0 = const()[name = tensor<string, []>("k_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 64, 1, 1024]> new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor<string, []>("transpose_0")];
            tensor<fp16, [1, 512, 1, 1024]> k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor<string, []>("k_cast_fp16")];
            tensor<bool, []> v_17_interleave_0 = const()[name = tensor<string, []>("v_17_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 1024, 1, 512]> v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor<string, []>("v_17_cast_fp16")];
            tensor<int32, [4]> var_915 = const()[name = tensor<string, []>("op_915"), val = tensor<int32, [4]>([1, 3072, 1, -1])];
            tensor<fp16, [1, 3072, 1, 64]> q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor<string, []>("q_cast_fp16")];
            tensor<int32, [4]> var_920_begin_0 = const()[name = tensor<string, []>("op_920_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_920_end_0 = const()[name = tensor<string, []>("op_920_end_0"), val = tensor<int32, [4]>([1, 128, 1, 64])];
            tensor<bool, [4]> var_920_end_mask_0 = const()[name = tensor<string, []>("op_920_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_920_cast_fp16")];
            tensor<int32, [4]> var_924_begin_0 = const()[name = tensor<string, []>("op_924_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
            tensor<int32, [4]> var_924_end_0 = const()[name = tensor<string, []>("op_924_end_0"), val = tensor<int32, [4]>([1, 256, 1, 64])];
            tensor<bool, [4]> var_924_end_mask_0 = const()[name = tensor<string, []>("op_924_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_924_cast_fp16")];
            tensor<int32, [4]> var_928_begin_0 = const()[name = tensor<string, []>("op_928_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
            tensor<int32, [4]> var_928_end_0 = const()[name = tensor<string, []>("op_928_end_0"), val = tensor<int32, [4]>([1, 384, 1, 64])];
            tensor<bool, [4]> var_928_end_mask_0 = const()[name = tensor<string, []>("op_928_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_928_cast_fp16")];
            tensor<int32, [4]> var_932_begin_0 = const()[name = tensor<string, []>("op_932_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
            tensor<int32, [4]> var_932_end_0 = const()[name = tensor<string, []>("op_932_end_0"), val = tensor<int32, [4]>([1, 512, 1, 64])];
            tensor<bool, [4]> var_932_end_mask_0 = const()[name = tensor<string, []>("op_932_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_932_cast_fp16")];
            tensor<int32, [4]> var_936_begin_0 = const()[name = tensor<string, []>("op_936_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
            tensor<int32, [4]> var_936_end_0 = const()[name = tensor<string, []>("op_936_end_0"), val = tensor<int32, [4]>([1, 640, 1, 64])];
            tensor<bool, [4]> var_936_end_mask_0 = const()[name = tensor<string, []>("op_936_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_936_cast_fp16")];
            tensor<int32, [4]> var_940_begin_0 = const()[name = tensor<string, []>("op_940_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
            tensor<int32, [4]> var_940_end_0 = const()[name = tensor<string, []>("op_940_end_0"), val = tensor<int32, [4]>([1, 768, 1, 64])];
            tensor<bool, [4]> var_940_end_mask_0 = const()[name = tensor<string, []>("op_940_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_940_cast_fp16")];
            tensor<int32, [4]> var_944_begin_0 = const()[name = tensor<string, []>("op_944_begin_0"), val = tensor<int32, [4]>([0, 768, 0, 0])];
            tensor<int32, [4]> var_944_end_0 = const()[name = tensor<string, []>("op_944_end_0"), val = tensor<int32, [4]>([1, 896, 1, 64])];
            tensor<bool, [4]> var_944_end_mask_0 = const()[name = tensor<string, []>("op_944_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_944_cast_fp16")];
            tensor<int32, [4]> var_948_begin_0 = const()[name = tensor<string, []>("op_948_begin_0"), val = tensor<int32, [4]>([0, 896, 0, 0])];
            tensor<int32, [4]> var_948_end_0 = const()[name = tensor<string, []>("op_948_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 64])];
            tensor<bool, [4]> var_948_end_mask_0 = const()[name = tensor<string, []>("op_948_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_948_cast_fp16")];
            tensor<int32, [4]> var_952_begin_0 = const()[name = tensor<string, []>("op_952_begin_0"), val = tensor<int32, [4]>([0, 1024, 0, 0])];
            tensor<int32, [4]> var_952_end_0 = const()[name = tensor<string, []>("op_952_end_0"), val = tensor<int32, [4]>([1, 1152, 1, 64])];
            tensor<bool, [4]> var_952_end_mask_0 = const()[name = tensor<string, []>("op_952_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_952_cast_fp16")];
            tensor<int32, [4]> var_956_begin_0 = const()[name = tensor<string, []>("op_956_begin_0"), val = tensor<int32, [4]>([0, 1152, 0, 0])];
            tensor<int32, [4]> var_956_end_0 = const()[name = tensor<string, []>("op_956_end_0"), val = tensor<int32, [4]>([1, 1280, 1, 64])];
            tensor<bool, [4]> var_956_end_mask_0 = const()[name = tensor<string, []>("op_956_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_956_cast_fp16")];
            tensor<int32, [4]> var_960_begin_0 = const()[name = tensor<string, []>("op_960_begin_0"), val = tensor<int32, [4]>([0, 1280, 0, 0])];
            tensor<int32, [4]> var_960_end_0 = const()[name = tensor<string, []>("op_960_end_0"), val = tensor<int32, [4]>([1, 1408, 1, 64])];
            tensor<bool, [4]> var_960_end_mask_0 = const()[name = tensor<string, []>("op_960_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_960_cast_fp16")];
            tensor<int32, [4]> var_964_begin_0 = const()[name = tensor<string, []>("op_964_begin_0"), val = tensor<int32, [4]>([0, 1408, 0, 0])];
            tensor<int32, [4]> var_964_end_0 = const()[name = tensor<string, []>("op_964_end_0"), val = tensor<int32, [4]>([1, 1536, 1, 64])];
            tensor<bool, [4]> var_964_end_mask_0 = const()[name = tensor<string, []>("op_964_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_964_cast_fp16")];
            tensor<int32, [4]> var_968_begin_0 = const()[name = tensor<string, []>("op_968_begin_0"), val = tensor<int32, [4]>([0, 1536, 0, 0])];
            tensor<int32, [4]> var_968_end_0 = const()[name = tensor<string, []>("op_968_end_0"), val = tensor<int32, [4]>([1, 1664, 1, 64])];
            tensor<bool, [4]> var_968_end_mask_0 = const()[name = tensor<string, []>("op_968_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_968_cast_fp16")];
            tensor<int32, [4]> var_972_begin_0 = const()[name = tensor<string, []>("op_972_begin_0"), val = tensor<int32, [4]>([0, 1664, 0, 0])];
            tensor<int32, [4]> var_972_end_0 = const()[name = tensor<string, []>("op_972_end_0"), val = tensor<int32, [4]>([1, 1792, 1, 64])];
            tensor<bool, [4]> var_972_end_mask_0 = const()[name = tensor<string, []>("op_972_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_972_cast_fp16")];
            tensor<int32, [4]> var_976_begin_0 = const()[name = tensor<string, []>("op_976_begin_0"), val = tensor<int32, [4]>([0, 1792, 0, 0])];
            tensor<int32, [4]> var_976_end_0 = const()[name = tensor<string, []>("op_976_end_0"), val = tensor<int32, [4]>([1, 1920, 1, 64])];
            tensor<bool, [4]> var_976_end_mask_0 = const()[name = tensor<string, []>("op_976_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_976_cast_fp16")];
            tensor<int32, [4]> var_980_begin_0 = const()[name = tensor<string, []>("op_980_begin_0"), val = tensor<int32, [4]>([0, 1920, 0, 0])];
            tensor<int32, [4]> var_980_end_0 = const()[name = tensor<string, []>("op_980_end_0"), val = tensor<int32, [4]>([1, 2048, 1, 64])];
            tensor<bool, [4]> var_980_end_mask_0 = const()[name = tensor<string, []>("op_980_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_980_cast_fp16")];
            tensor<int32, [4]> var_984_begin_0 = const()[name = tensor<string, []>("op_984_begin_0"), val = tensor<int32, [4]>([0, 2048, 0, 0])];
            tensor<int32, [4]> var_984_end_0 = const()[name = tensor<string, []>("op_984_end_0"), val = tensor<int32, [4]>([1, 2176, 1, 64])];
            tensor<bool, [4]> var_984_end_mask_0 = const()[name = tensor<string, []>("op_984_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_984_cast_fp16")];
            tensor<int32, [4]> var_988_begin_0 = const()[name = tensor<string, []>("op_988_begin_0"), val = tensor<int32, [4]>([0, 2176, 0, 0])];
            tensor<int32, [4]> var_988_end_0 = const()[name = tensor<string, []>("op_988_end_0"), val = tensor<int32, [4]>([1, 2304, 1, 64])];
            tensor<bool, [4]> var_988_end_mask_0 = const()[name = tensor<string, []>("op_988_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_988_cast_fp16")];
            tensor<int32, [4]> var_992_begin_0 = const()[name = tensor<string, []>("op_992_begin_0"), val = tensor<int32, [4]>([0, 2304, 0, 0])];
            tensor<int32, [4]> var_992_end_0 = const()[name = tensor<string, []>("op_992_end_0"), val = tensor<int32, [4]>([1, 2432, 1, 64])];
            tensor<bool, [4]> var_992_end_mask_0 = const()[name = tensor<string, []>("op_992_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_992_cast_fp16")];
            tensor<int32, [4]> var_996_begin_0 = const()[name = tensor<string, []>("op_996_begin_0"), val = tensor<int32, [4]>([0, 2432, 0, 0])];
            tensor<int32, [4]> var_996_end_0 = const()[name = tensor<string, []>("op_996_end_0"), val = tensor<int32, [4]>([1, 2560, 1, 64])];
            tensor<bool, [4]> var_996_end_mask_0 = const()[name = tensor<string, []>("op_996_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_996_cast_fp16")];
            tensor<int32, [4]> var_1000_begin_0 = const()[name = tensor<string, []>("op_1000_begin_0"), val = tensor<int32, [4]>([0, 2560, 0, 0])];
            tensor<int32, [4]> var_1000_end_0 = const()[name = tensor<string, []>("op_1000_end_0"), val = tensor<int32, [4]>([1, 2688, 1, 64])];
            tensor<bool, [4]> var_1000_end_mask_0 = const()[name = tensor<string, []>("op_1000_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_1000_cast_fp16")];
            tensor<int32, [4]> var_1004_begin_0 = const()[name = tensor<string, []>("op_1004_begin_0"), val = tensor<int32, [4]>([0, 2688, 0, 0])];
            tensor<int32, [4]> var_1004_end_0 = const()[name = tensor<string, []>("op_1004_end_0"), val = tensor<int32, [4]>([1, 2816, 1, 64])];
            tensor<bool, [4]> var_1004_end_mask_0 = const()[name = tensor<string, []>("op_1004_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_1004_cast_fp16")];
            tensor<int32, [4]> var_1008_begin_0 = const()[name = tensor<string, []>("op_1008_begin_0"), val = tensor<int32, [4]>([0, 2816, 0, 0])];
            tensor<int32, [4]> var_1008_end_0 = const()[name = tensor<string, []>("op_1008_end_0"), val = tensor<int32, [4]>([1, 2944, 1, 64])];
            tensor<bool, [4]> var_1008_end_mask_0 = const()[name = tensor<string, []>("op_1008_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_1008_cast_fp16")];
            tensor<int32, [4]> var_1012_begin_0 = const()[name = tensor<string, []>("op_1012_begin_0"), val = tensor<int32, [4]>([0, 2944, 0, 0])];
            tensor<int32, [4]> var_1012_end_0 = const()[name = tensor<string, []>("op_1012_end_0"), val = tensor<int32, [4]>([1, 3072, 1, 64])];
            tensor<bool, [4]> var_1012_end_mask_0 = const()[name = tensor<string, []>("op_1012_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 64]> var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor<string, []>("op_1012_cast_fp16")];
            tensor<int32, [4]> var_1018_begin_0 = const()[name = tensor<string, []>("op_1018_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_1018_end_0 = const()[name = tensor<string, []>("op_1018_end_0"), val = tensor<int32, [4]>([1, 512, 1, 128])];
            tensor<bool, [4]> var_1018_end_mask_0 = const()[name = tensor<string, []>("op_1018_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1018_cast_fp16")];
            tensor<int32, [4]> var_1030_begin_0 = const()[name = tensor<string, []>("op_1030_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
            tensor<int32, [4]> var_1030_end_0 = const()[name = tensor<string, []>("op_1030_end_0"), val = tensor<int32, [4]>([1, 512, 1, 256])];
            tensor<bool, [4]> var_1030_end_mask_0 = const()[name = tensor<string, []>("op_1030_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1030_cast_fp16")];
            tensor<int32, [4]> var_1042_begin_0 = const()[name = tensor<string, []>("op_1042_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
            tensor<int32, [4]> var_1042_end_0 = const()[name = tensor<string, []>("op_1042_end_0"), val = tensor<int32, [4]>([1, 512, 1, 384])];
            tensor<bool, [4]> var_1042_end_mask_0 = const()[name = tensor<string, []>("op_1042_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1042_cast_fp16")];
            tensor<int32, [4]> var_1054_begin_0 = const()[name = tensor<string, []>("op_1054_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
            tensor<int32, [4]> var_1054_end_0 = const()[name = tensor<string, []>("op_1054_end_0"), val = tensor<int32, [4]>([1, 512, 1, 512])];
            tensor<bool, [4]> var_1054_end_mask_0 = const()[name = tensor<string, []>("op_1054_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1054_cast_fp16")];
            tensor<int32, [4]> var_1066_begin_0 = const()[name = tensor<string, []>("op_1066_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
            tensor<int32, [4]> var_1066_end_0 = const()[name = tensor<string, []>("op_1066_end_0"), val = tensor<int32, [4]>([1, 512, 1, 640])];
            tensor<bool, [4]> var_1066_end_mask_0 = const()[name = tensor<string, []>("op_1066_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1066_cast_fp16")];
            tensor<int32, [4]> var_1078_begin_0 = const()[name = tensor<string, []>("op_1078_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
            tensor<int32, [4]> var_1078_end_0 = const()[name = tensor<string, []>("op_1078_end_0"), val = tensor<int32, [4]>([1, 512, 1, 768])];
            tensor<bool, [4]> var_1078_end_mask_0 = const()[name = tensor<string, []>("op_1078_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1078_cast_fp16")];
            tensor<int32, [4]> var_1090_begin_0 = const()[name = tensor<string, []>("op_1090_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 768])];
            tensor<int32, [4]> var_1090_end_0 = const()[name = tensor<string, []>("op_1090_end_0"), val = tensor<int32, [4]>([1, 512, 1, 896])];
            tensor<bool, [4]> var_1090_end_mask_0 = const()[name = tensor<string, []>("op_1090_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1090_cast_fp16")];
            tensor<int32, [4]> var_1102_begin_0 = const()[name = tensor<string, []>("op_1102_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 896])];
            tensor<int32, [4]> var_1102_end_0 = const()[name = tensor<string, []>("op_1102_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1024])];
            tensor<bool, [4]> var_1102_end_mask_0 = const()[name = tensor<string, []>("op_1102_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 512, 1, 128]> var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1102_cast_fp16")];
            tensor<int32, [4]> var_1112_begin_0 = const()[name = tensor<string, []>("op_1112_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_1112_end_0 = const()[name = tensor<string, []>("op_1112_end_0"), val = tensor<int32, [4]>([1, 128, 1, 512])];
            tensor<bool, [4]> var_1112_end_mask_0 = const()[name = tensor<string, []>("op_1112_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor<string, []>("op_1112_cast_fp16")];
            tensor<int32, [4]> var_1124_begin_0 = const()[name = tensor<string, []>("op_1124_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
            tensor<int32, [4]> var_1124_end_0 = const()[name = tensor<string, []>("op_1124_end_0"), val = tensor<int32, [4]>([1, 256, 1, 512])];
            tensor<bool, [4]> var_1124_end_mask_0 = const()[name = tensor<string, []>("op_1124_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor<string, []>("op_1124_cast_fp16")];
            tensor<int32, [4]> var_1136_begin_0 = const()[name = tensor<string, []>("op_1136_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
            tensor<int32, [4]> var_1136_end_0 = const()[name = tensor<string, []>("op_1136_end_0"), val = tensor<int32, [4]>([1, 384, 1, 512])];
            tensor<bool, [4]> var_1136_end_mask_0 = const()[name = tensor<string, []>("op_1136_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor<string, []>("op_1136_cast_fp16")];
            tensor<int32, [4]> var_1148_begin_0 = const()[name = tensor<string, []>("op_1148_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
            tensor<int32, [4]> var_1148_end_0 = const()[name = tensor<string, []>("op_1148_end_0"), val = tensor<int32, [4]>([1, 512, 1, 512])];
            tensor<bool, [4]> var_1148_end_mask_0 = const()[name = tensor<string, []>("op_1148_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor<string, []>("op_1148_cast_fp16")];
            tensor<int32, [4]> var_1160_begin_0 = const()[name = tensor<string, []>("op_1160_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
            tensor<int32, [4]> var_1160_end_0 = const()[name = tensor<string, []>("op_1160_end_0"), val = tensor<int32, [4]>([1, 640, 1, 512])];
            tensor<bool, [4]> var_1160_end_mask_0 = const()[name = tensor<string, []>("op_1160_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor<string, []>("op_1160_cast_fp16")];
            tensor<int32, [4]> var_1172_begin_0 = const()[name = tensor<string, []>("op_1172_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
            tensor<int32, [4]> var_1172_end_0 = const()[name = tensor<string, []>("op_1172_end_0"), val = tensor<int32, [4]>([1, 768, 1, 512])];
            tensor<bool, [4]> var_1172_end_mask_0 = const()[name = tensor<string, []>("op_1172_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor<string, []>("op_1172_cast_fp16")];
            tensor<int32, [4]> var_1184_begin_0 = const()[name = tensor<string, []>("op_1184_begin_0"), val = tensor<int32, [4]>([0, 768, 0, 0])];
            tensor<int32, [4]> var_1184_end_0 = const()[name = tensor<string, []>("op_1184_end_0"), val = tensor<int32, [4]>([1, 896, 1, 512])];
            tensor<bool, [4]> var_1184_end_mask_0 = const()[name = tensor<string, []>("op_1184_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor<string, []>("op_1184_cast_fp16")];
            tensor<int32, [4]> var_1196_begin_0 = const()[name = tensor<string, []>("op_1196_begin_0"), val = tensor<int32, [4]>([0, 896, 0, 0])];
            tensor<int32, [4]> var_1196_end_0 = const()[name = tensor<string, []>("op_1196_end_0"), val = tensor<int32, [4]>([1, 1024, 1, 512])];
            tensor<bool, [4]> var_1196_end_mask_0 = const()[name = tensor<string, []>("op_1196_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
            tensor<fp16, [1, 128, 1, 512]> var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor<string, []>("op_1196_cast_fp16")];
            tensor<string, []> var_1208_equation_0 = const()[name = tensor<string, []>("op_1208_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor<string, []>("op_1208_cast_fp16")];
            tensor<fp16, []> var_1209_to_fp16 = const()[name = tensor<string, []>("op_1209_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor<string, []>("op_1210_cast_fp16")];
            tensor<string, []> var_1212_equation_0 = const()[name = tensor<string, []>("op_1212_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor<string, []>("op_1212_cast_fp16")];
            tensor<fp16, []> var_1213_to_fp16 = const()[name = tensor<string, []>("op_1213_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor<string, []>("op_1214_cast_fp16")];
            tensor<string, []> var_1216_equation_0 = const()[name = tensor<string, []>("op_1216_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor<string, []>("op_1216_cast_fp16")];
            tensor<fp16, []> var_1217_to_fp16 = const()[name = tensor<string, []>("op_1217_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor<string, []>("op_1218_cast_fp16")];
            tensor<string, []> var_1220_equation_0 = const()[name = tensor<string, []>("op_1220_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor<string, []>("op_1220_cast_fp16")];
            tensor<fp16, []> var_1221_to_fp16 = const()[name = tensor<string, []>("op_1221_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor<string, []>("op_1222_cast_fp16")];
            tensor<string, []> var_1224_equation_0 = const()[name = tensor<string, []>("op_1224_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor<string, []>("op_1224_cast_fp16")];
            tensor<fp16, []> var_1225_to_fp16 = const()[name = tensor<string, []>("op_1225_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor<string, []>("op_1226_cast_fp16")];
            tensor<string, []> var_1228_equation_0 = const()[name = tensor<string, []>("op_1228_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor<string, []>("op_1228_cast_fp16")];
            tensor<fp16, []> var_1229_to_fp16 = const()[name = tensor<string, []>("op_1229_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor<string, []>("op_1230_cast_fp16")];
            tensor<string, []> var_1232_equation_0 = const()[name = tensor<string, []>("op_1232_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor<string, []>("op_1232_cast_fp16")];
            tensor<fp16, []> var_1233_to_fp16 = const()[name = tensor<string, []>("op_1233_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor<string, []>("op_1234_cast_fp16")];
            tensor<string, []> var_1236_equation_0 = const()[name = tensor<string, []>("op_1236_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor<string, []>("op_1236_cast_fp16")];
            tensor<fp16, []> var_1237_to_fp16 = const()[name = tensor<string, []>("op_1237_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor<string, []>("op_1238_cast_fp16")];
            tensor<string, []> var_1240_equation_0 = const()[name = tensor<string, []>("op_1240_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor<string, []>("op_1240_cast_fp16")];
            tensor<fp16, []> var_1241_to_fp16 = const()[name = tensor<string, []>("op_1241_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor<string, []>("op_1242_cast_fp16")];
            tensor<string, []> var_1244_equation_0 = const()[name = tensor<string, []>("op_1244_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor<string, []>("op_1244_cast_fp16")];
            tensor<fp16, []> var_1245_to_fp16 = const()[name = tensor<string, []>("op_1245_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor<string, []>("op_1246_cast_fp16")];
            tensor<string, []> var_1248_equation_0 = const()[name = tensor<string, []>("op_1248_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor<string, []>("op_1248_cast_fp16")];
            tensor<fp16, []> var_1249_to_fp16 = const()[name = tensor<string, []>("op_1249_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor<string, []>("op_1250_cast_fp16")];
            tensor<string, []> var_1252_equation_0 = const()[name = tensor<string, []>("op_1252_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor<string, []>("op_1252_cast_fp16")];
            tensor<fp16, []> var_1253_to_fp16 = const()[name = tensor<string, []>("op_1253_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor<string, []>("op_1254_cast_fp16")];
            tensor<string, []> var_1256_equation_0 = const()[name = tensor<string, []>("op_1256_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor<string, []>("op_1256_cast_fp16")];
            tensor<fp16, []> var_1257_to_fp16 = const()[name = tensor<string, []>("op_1257_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor<string, []>("op_1258_cast_fp16")];
            tensor<string, []> var_1260_equation_0 = const()[name = tensor<string, []>("op_1260_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor<string, []>("op_1260_cast_fp16")];
            tensor<fp16, []> var_1261_to_fp16 = const()[name = tensor<string, []>("op_1261_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor<string, []>("op_1262_cast_fp16")];
            tensor<string, []> var_1264_equation_0 = const()[name = tensor<string, []>("op_1264_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor<string, []>("op_1264_cast_fp16")];
            tensor<fp16, []> var_1265_to_fp16 = const()[name = tensor<string, []>("op_1265_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor<string, []>("op_1266_cast_fp16")];
            tensor<string, []> var_1268_equation_0 = const()[name = tensor<string, []>("op_1268_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor<string, []>("op_1268_cast_fp16")];
            tensor<fp16, []> var_1269_to_fp16 = const()[name = tensor<string, []>("op_1269_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor<string, []>("op_1270_cast_fp16")];
            tensor<string, []> var_1272_equation_0 = const()[name = tensor<string, []>("op_1272_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor<string, []>("op_1272_cast_fp16")];
            tensor<fp16, []> var_1273_to_fp16 = const()[name = tensor<string, []>("op_1273_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor<string, []>("op_1274_cast_fp16")];
            tensor<string, []> var_1276_equation_0 = const()[name = tensor<string, []>("op_1276_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor<string, []>("op_1276_cast_fp16")];
            tensor<fp16, []> var_1277_to_fp16 = const()[name = tensor<string, []>("op_1277_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor<string, []>("op_1278_cast_fp16")];
            tensor<string, []> var_1280_equation_0 = const()[name = tensor<string, []>("op_1280_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor<string, []>("op_1280_cast_fp16")];
            tensor<fp16, []> var_1281_to_fp16 = const()[name = tensor<string, []>("op_1281_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor<string, []>("op_1282_cast_fp16")];
            tensor<string, []> var_1284_equation_0 = const()[name = tensor<string, []>("op_1284_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor<string, []>("op_1284_cast_fp16")];
            tensor<fp16, []> var_1285_to_fp16 = const()[name = tensor<string, []>("op_1285_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor<string, []>("op_1286_cast_fp16")];
            tensor<string, []> var_1288_equation_0 = const()[name = tensor<string, []>("op_1288_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor<string, []>("op_1288_cast_fp16")];
            tensor<fp16, []> var_1289_to_fp16 = const()[name = tensor<string, []>("op_1289_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor<string, []>("op_1290_cast_fp16")];
            tensor<string, []> var_1292_equation_0 = const()[name = tensor<string, []>("op_1292_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor<string, []>("op_1292_cast_fp16")];
            tensor<fp16, []> var_1293_to_fp16 = const()[name = tensor<string, []>("op_1293_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor<string, []>("op_1294_cast_fp16")];
            tensor<string, []> var_1296_equation_0 = const()[name = tensor<string, []>("op_1296_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor<string, []>("op_1296_cast_fp16")];
            tensor<fp16, []> var_1297_to_fp16 = const()[name = tensor<string, []>("op_1297_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor<string, []>("op_1298_cast_fp16")];
            tensor<string, []> var_1300_equation_0 = const()[name = tensor<string, []>("op_1300_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
            tensor<fp16, [1, 512, 1, 64]> var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor<string, []>("op_1300_cast_fp16")];
            tensor<fp16, []> var_1301_to_fp16 = const()[name = tensor<string, []>("op_1301_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 512, 1, 64]> var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor<string, []>("op_1302_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor<string, []>("aw_49_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor<string, []>("aw_51_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor<string, []>("aw_53_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor<string, []>("aw_55_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor<string, []>("aw_57_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor<string, []>("aw_59_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor<string, []>("aw_61_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor<string, []>("aw_63_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor<string, []>("aw_65_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor<string, []>("aw_67_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor<string, []>("aw_69_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor<string, []>("aw_71_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor<string, []>("aw_73_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor<string, []>("aw_75_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor<string, []>("aw_77_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor<string, []>("aw_79_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor<string, []>("aw_81_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor<string, []>("aw_83_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor<string, []>("aw_85_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor<string, []>("aw_87_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor<string, []>("aw_89_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor<string, []>("aw_91_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor<string, []>("aw_93_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor<string, []>("aw_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor<string, []>("op_1327_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor<string, []>("op_1328_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor<string, []>("op_1329_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor<string, []>("op_1330_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor<string, []>("op_1331_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor<string, []>("op_1332_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor<string, []>("op_1333_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor<string, []>("op_1334_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor<string, []>("op_1335_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor<string, []>("op_1336_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor<string, []>("op_1337_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor<string, []>("op_1338_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor<string, []>("op_1339_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor<string, []>("op_1340_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor<string, []>("op_1341_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor<string, []>("op_1342_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor<string, []>("op_1343_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor<string, []>("op_1344_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor<string, []>("op_1345_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor<string, []>("op_1346_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor<string, []>("op_1347_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor<string, []>("op_1348_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor<string, []>("op_1349_cast_fp16")];
            tensor<fp16, [1, 512, 1, 64]> var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor<string, []>("op_1350_cast_fp16")];
            tensor<string, []> var_1352_equation_0 = const()[name = tensor<string, []>("op_1352_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor<string, []>("op_1352_cast_fp16")];
            tensor<string, []> var_1354_equation_0 = const()[name = tensor<string, []>("op_1354_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor<string, []>("op_1354_cast_fp16")];
            tensor<string, []> var_1356_equation_0 = const()[name = tensor<string, []>("op_1356_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor<string, []>("op_1356_cast_fp16")];
            tensor<string, []> var_1358_equation_0 = const()[name = tensor<string, []>("op_1358_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor<string, []>("op_1358_cast_fp16")];
            tensor<string, []> var_1360_equation_0 = const()[name = tensor<string, []>("op_1360_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor<string, []>("op_1360_cast_fp16")];
            tensor<string, []> var_1362_equation_0 = const()[name = tensor<string, []>("op_1362_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor<string, []>("op_1362_cast_fp16")];
            tensor<string, []> var_1364_equation_0 = const()[name = tensor<string, []>("op_1364_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor<string, []>("op_1364_cast_fp16")];
            tensor<string, []> var_1366_equation_0 = const()[name = tensor<string, []>("op_1366_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor<string, []>("op_1366_cast_fp16")];
            tensor<string, []> var_1368_equation_0 = const()[name = tensor<string, []>("op_1368_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor<string, []>("op_1368_cast_fp16")];
            tensor<string, []> var_1370_equation_0 = const()[name = tensor<string, []>("op_1370_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor<string, []>("op_1370_cast_fp16")];
            tensor<string, []> var_1372_equation_0 = const()[name = tensor<string, []>("op_1372_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor<string, []>("op_1372_cast_fp16")];
            tensor<string, []> var_1374_equation_0 = const()[name = tensor<string, []>("op_1374_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor<string, []>("op_1374_cast_fp16")];
            tensor<string, []> var_1376_equation_0 = const()[name = tensor<string, []>("op_1376_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor<string, []>("op_1376_cast_fp16")];
            tensor<string, []> var_1378_equation_0 = const()[name = tensor<string, []>("op_1378_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor<string, []>("op_1378_cast_fp16")];
            tensor<string, []> var_1380_equation_0 = const()[name = tensor<string, []>("op_1380_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor<string, []>("op_1380_cast_fp16")];
            tensor<string, []> var_1382_equation_0 = const()[name = tensor<string, []>("op_1382_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor<string, []>("op_1382_cast_fp16")];
            tensor<string, []> var_1384_equation_0 = const()[name = tensor<string, []>("op_1384_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor<string, []>("op_1384_cast_fp16")];
            tensor<string, []> var_1386_equation_0 = const()[name = tensor<string, []>("op_1386_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor<string, []>("op_1386_cast_fp16")];
            tensor<string, []> var_1388_equation_0 = const()[name = tensor<string, []>("op_1388_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor<string, []>("op_1388_cast_fp16")];
            tensor<string, []> var_1390_equation_0 = const()[name = tensor<string, []>("op_1390_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor<string, []>("op_1390_cast_fp16")];
            tensor<string, []> var_1392_equation_0 = const()[name = tensor<string, []>("op_1392_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor<string, []>("op_1392_cast_fp16")];
            tensor<string, []> var_1394_equation_0 = const()[name = tensor<string, []>("op_1394_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor<string, []>("op_1394_cast_fp16")];
            tensor<string, []> var_1396_equation_0 = const()[name = tensor<string, []>("op_1396_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor<string, []>("op_1396_cast_fp16")];
            tensor<string, []> var_1398_equation_0 = const()[name = tensor<string, []>("op_1398_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
            tensor<fp16, [1, 128, 1, 64]> var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor<string, []>("op_1398_cast_fp16")];
            tensor<bool, []> x_27_interleave_0 = const()[name = tensor<string, []>("x_27_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 3072, 1, 64]> x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor<string, []>("x_27_cast_fp16")];
            tensor<int32, [4]> var_1403 = const()[name = tensor<string, []>("op_1403"), val = tensor<int32, [4]>([1, 3072, -1, 8])];
            tensor<fp16, [1, 3072, 8, 8]> input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
            tensor<int32, [2]> var_1406 = const()[name = tensor<string, []>("op_1406"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_1408 = const()[name = tensor<string, []>("op_1408"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> attention_output_pad_type_0 = const()[name = tensor<string, []>("attention_output_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> attention_output_pad_0 = const()[name = tensor<string, []>("attention_output_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [3072, 3072, 1, 1]> blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_proj_weight_to_fp16"), val = tensor<fp16, [3072, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(232803776)))];
            tensor<fp16, [1, 3072, 8, 8]> attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("attention_output_cast_fp16")];
            tensor<fp16, [1, 3072, 8, 8]> x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor<string, []>("x_29_cast_fp16")];
            tensor<bool, []> x_eps_interleave_0 = const()[name = tensor<string, []>("x_eps_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 1, 8, 8]> eps_chan_to_fp16 = const()[name = tensor<string, []>("eps_chan_to_fp16"), val = tensor<fp16, [1, 1, 8, 8]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(251678208)))];
            tensor<fp16, [1, 3073, 8, 8]> x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor<string, []>("x_eps_cast_fp16")];
            tensor<int32, [1]> norm_x_axes_0 = const()[name = tensor<string, []>("norm_x_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 8, 8]> norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor<string, []>("norm_x_cast_fp16")];
            tensor<fp16, [1, 3072, 8, 8]> x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor<string, []>("x_normed_19_cast_fp16")];
            tensor<fp16, []> var_1434_to_fp16 = const()[name = tensor<string, []>("op_1434_to_fp16"), val = tensor<fp16, []>(0x1.bb8p+5)];
            tensor<fp16, [1, 3072, 8, 8]> x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor<string, []>("x_normed_21_cast_fp16")];
            tensor<fp16, [1, 3072, 1, 1]> blocks_1_norm_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_norm_2_weight_to_fp16"), val = tensor<fp16, [1, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(251678400)))];
            tensor<fp16, [1, 3072, 8, 8]> input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
            tensor<int32, [2]> var_1445 = const()[name = tensor<string, []>("op_1445"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_1447 = const()[name = tensor<string, []>("op_1447"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [8192, 3072, 1, 1]> blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor<fp16, [8192, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(251684608)))];
            tensor<fp16, [1, 8192, 8, 8]> input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
            tensor<int32, [2]> var_1451 = const()[name = tensor<string, []>("op_1451"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_1453 = const()[name = tensor<string, []>("op_1453"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> x_fc_2_pad_type_0 = const()[name = tensor<string, []>("x_fc_2_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> x_fc_2_pad_0 = const()[name = tensor<string, []>("x_fc_2_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [8192, 3072, 1, 1]> blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor<fp16, [8192, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302016320)))];
            tensor<fp16, [1, 8192, 8, 8]> x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("x_fc_2_cast_fp16")];
            tensor<fp16, [1, 8192, 8, 8]> var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor<string, []>("op_1456_cast_fp16")];
            tensor<fp16, [1, 8192, 8, 8]> input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
            tensor<int32, [2]> var_1459 = const()[name = tensor<string, []>("op_1459"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_1461 = const()[name = tensor<string, []>("op_1461"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_1463_pad_type_0 = const()[name = tensor<string, []>("op_1463_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_1463_pad_0 = const()[name = tensor<string, []>("op_1463_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [3072, 8192, 1, 1]> blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_proj_weight_to_fp16"), val = tensor<fp16, [3072, 8192, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(352348032)))];
            tensor<fp16, [1, 3072, 8, 8]> var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_1463_cast_fp16")];
            tensor<fp16, [1, 3072, 8, 8]> new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor<string, []>("op_1464_cast_fp16")];
        } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1);
}